c++ 读取、输出txt文件

下面这段话转自:https://blog.csdn.net/lightlater/article/details/6326338

关于文本文件的文件头

第一 ANSI文件的文件头为空,不需要处理;

第二 UNICODE文件的文件头为0xFF,0xFE共计两个字节,读取时需要偏移两个字节再行读取;

第三 UTF-8文件的文件头为0xEF,0xBB,0xBF共计三个字节,读取时需要偏移三个字节后再行读取;

1.ansi格式txt文件

 1 void readAnsiTXT(){
 2     string filename = "ansi.txt";
 3     ifstream fin(filename.c_str());
 4     if (!fin.is_open()){
 5         cout << "open failed!\n";
 6     }
 7     char ch;
 8     string msg = "";
 9     while (fin.get(ch)){
10         msg += ch;
11     }
12     cout << msg << "\n";
13 }

2.Unicode格式

转载:https://blog.csdn.net/hxfhq1314/article/details/80344669

memset函数:https://baike.baidu.com/item/memset/4747579?fr=aladdin

setlocal函数:https://www.runoob.com/cprogramming/c-function-setlocale.html

void readUnicodeTXT(){
    string filename = "unicode.txt";
    ifstream fin;
    fin.open(filename, ios::binary);
    fin.seekg(2, ios::beg);
    wstring wstrLine;
    while (!fin.eof())
    {
        wchar_t wch;
        fin.read((char *)(&wch), 2);
        wstrLine.append(1, wch);
    }
    string str = ws2s(wstrLine);
    str.erase(str.size()-1, 1);//删除结尾重复的一个字符
    cout << str << endl;
}

std::string ws2s(const std::wstring& ws)
{
    std::string curLocale = setlocale(LC_ALL, NULL); // C 库函数 char *setlocale(int category, const char *locale) 设置或读取地域化信息。
    setlocale(LC_ALL, "chs");
    const wchar_t* _Source = ws.c_str();
    size_t _Dsize = 2 * ws.size() + 1;
    char *_Dest = new char[_Dsize];
    memset(_Dest, 0, _Dsize);
    wcstombs(_Dest, _Source, _Dsize);
    std::string result = _Dest;
    delete[]_Dest;
    setlocale(LC_ALL, curLocale.c_str());
    return result;
}

utf8格式:

 1 void readUtf8TXT(){
 2     string str = "utf8.txt";
 3     wstring res=L"";
 4     std::locale loc("chs");
 5     std::wcout.imbue(loc);
 6     std::wifstream wif(str, ios::binary);
 7     codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>* codecvToUnicode = new codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>;
 8     if (wif.is_open()){
 9         wif.imbue(std::locale(wif.getloc(), codecvToUnicode));
10         wstring wline;
11         while (getline(wif, wline)){
12             wstring convert;
13             for (auto c : wline){
14                 if (c != L'\0' && c != L'?') convert += c;
15             }
16             res = res + convert;        
17         }
18         wif.close();
19     }    
20     for (wstring::iterator i = res.begin(); i != res.end(); i++){//将res中的'\r'换成'\n',否则输出异常
21         if (*i == '\r'){
22             *i = '\n';
23         }
24     }
25     wcout << res << endl;
26 }

转载:http://101.132.192.87/2019/08/29/windows%e4%b8%8b%e7%94%a8stdwifstream%e8%af%bb%e5%8f%96unicode%e6%96%87%e6%9c%ac/

Windows下使用std::wifstream读取Unicode文本的方法:

 1 std::locale loc("chs");                //windows下ok
 2     std::wcout.imbue(loc);
 3     // open as a byte stream
 4     std::wifstream wif("路径", std::ios::binary);
 5     std::codecvt_utf16<wchar_t, 0x10ffff, std::consume_header>* codecvtToUnicode = new std::codecvt_utf16 < wchar_t, 0x10ffff, std::consume_header >;
 6     if (wif.is_open())
 7     {
 8         // apply BOM-sensitive UTF-16 facet
 9         wif.imbue(std::locale(wif.getloc(), codecvtToUnicode));
10         std::wstring wline;
11         while (std::getline(wif, wline))
12         {
13             std::wstring convert;
14             for (auto c : wline)
15             {
16                 if (c != L'\0' && c != L'?')
17                     convert += c;
18             }
19             wcout << convert << endl;
20         }
21         wif.close();
22         //delete codecvtToUnicode;     //new和delete,应该不用手动delete,在哪里delete都会崩溃(亲测)
23     }

Windows下使用std::wifstream读取UTF-8文本的方法:

 1 std::locale loc("chs");                //windows下ok
 2     std::wcout.imbue(loc);
 3     // open as a byte stream
 4     std::wifstream wif("路径", std::ios::binary);
 5     std::codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>* codecvtToUnicode = new std::codecvt_utf8 < wchar_t, 0x10ffff, std::consume_header >;
 6     if (wif.is_open())
 7     {
 8         // apply BOM-sensitive UTF-8 facet
 9         wif.imbue(std::locale(wif.getloc(), codecvtToUnicode));
10         std::wstring wline;
11         while (std::getline(wif, wline))
12         {
13             std::wstring convert;
14             for (auto c : wline)
15             {
16                 if (c != L'\0' && c != L'?')
17                     convert += c;
18             }
19             wcout << convert << endl;
20         }
21         wif.close();
22         //delete codecvtToUnicode;     //new和delete,应该不用手动delete,在哪里delete都会崩溃(亲测)
23     }