C++ Stream与编码转换

1.自己动手改造codecvt来读写Unicode(LE)代码:

#include <iostream>
#include <fstream>
#include <locale>
#include "TextCodeChange.h"
using namespace std;

//#define endl L"\r\n"
int main()
{
    locale loc(locale::classic(), new class NullCodecvt);
    wstring wstr;
    wifstream wfin(L"test.txt", ios_base::binary);
    wofstream wfout(L"test1.txt", ios_base::binary);;
    wfout.imbue(loc);
    wchar_t header[1] = { 0xFEFF };
    wfout.write(header, 1);

    wfin.imbue(loc);
    wcout.imbue(locale(""));
    while (std::getline(wfin, wstr))
    //while (wfin >> wstr)
    {
        wcout << wstr << endl;
        wfout << wstr << endl;
    }
    wfin.close();
    wfout.close();
    return 0;
}

头文件TextCodeChange.h

using std::codecvt ;
typedef codecvt < wchar_t , char , mbstate_t > NullCodecvtBase ;
class NullCodecvt
    : public NullCodecvtBase
{

public:
    typedef wchar_t _E ;
    typedef char _To ;
    typedef mbstate_t _St ;

    explicit NullCodecvt( size_t _R=0 ) : NullCodecvtBase(_R) { }

protected:
    virtual result do_in( _St& _State ,
                   const _To* _F1 , const _To* _L1 , const _To*& _Mid1 ,
                   _E* F2 , _E* _L2 , _E*& _Mid2
                   ) const
    {
        return noconv ;
    }
    virtual result do_out( _St& _State ,
                   const _E* _F1 , const _E* _L1 , const _E*& _Mid1 ,
                   _To* F2, _E* _L2 , _To*& _Mid2
                   ) const
    {
        return noconv ;
    }
    virtual result do_unshift( _St& _State ,
            _To* _F2 , _To* _L2 , _To*& _Mid2 ) const
    {
        return noconv ;
     }
    virtual int do_length( _St& _State , const _To* _F1 ,
           const _To* _L1 , size_t _N2 ) const _THROW0()
    {
        return (_N2 < (size_t)(_L1 - _F1)) ? _N2 : _L1 - _F1 ;
    }
    virtual bool do_always_noconv() const _THROW0()
    {
        return true ;
    }
    virtual int do_max_length() const _THROW0()
    {
        return 2 ;
    }
    virtual int do_encoding() const _THROW0()
    {
        return 2 ;
    }
} ;

2.利用C++11标准中提供的 codecvt_utf16来读写UTF-16LE文件,读出的UTF-16LE字符放入wchar_t中处理。

#include <iostream>
#include <fstream>
#include <locale>
#include <codecvt>
using namespace std;

int main()
{
    wchar_t header[1] = { 0xFEFF };// Unicode little endian's header
    wstring wstr;
    wifstream wfin(L"test.txt");

    std::locale loc(std::locale(), new std::codecvt_utf16<wchar_t,0x10ffff,std::little_endian>);
    std::wofstream wfout("test1.txt", std::ios::binary);
    wfout.imbue(loc);
    wfin.imbue(loc);
    wcout.imbue(locale(""));
    wfout.write(header, 1);
    while (wfin >> wstr)
    {
        wcout << wstr << endl;
        wfout << wstr << L"\r\n";
    }
    wfin.close();
    wfout.close();
    return 0;
}

3.利用C++11标准中提供的 codecvt_utf16来读写UTF-16BE文件,读出的UTF-16BE字符放入wchar_t中处理。

#include <iostream>
#include <fstream>
#include <locale>
#include <codecvt>
using namespace std;

int main()
{
    wchar_t header[1] = { 0xFFFE };// Unicode little endian's header
    wstring wstr;
    wifstream wfin(L"test.txt");

    std::locale loc(std::locale(), new std::codecvt_utf16<wchar_t, 0x10ffff, std::generate_header>);
    std::wofstream wfout("test1.txt", std::ios::binary);
    wfout.imbue(loc);
    wfin.imbue(loc);
    wcout.imbue(locale(""));
    wfout.write(header, 1);
    while (wfin >> wstr)
    {
        wcout << wstr << endl;
        wfout << wstr << L"\r\n";
    }
    wfin.close();
    wfout.close();
    return 0;
}

4.利用C++11标准中提供的codecvt_utf8来读写UTF-8文件,读出的UTF-8字符放入wchar_t中处理。

#include <iostream>
#include <fstream>
#include <locale>
#include <codecvt>
using namespace std;

int main()
{
    wstring wstr;
    wifstream wfin(L"test.txt");

    std::locale loc(std::locale(), new std::codecvt_utf8<wchar_t>);
    wofstream wfout("test1.txt");
    wfout.imbue(loc);
    wfin.imbue(loc);
    wcout.imbue(locale(""));
    while (wfin >> wstr)
    {
        wcout << wstr << endl;
        wfout << wstr << endl;
    }
    wfin.close();
    wfout.close();
    return 0;
}

5.UTF8编码转换UTF-16编码的应用

#include <iostream>
#include <fstream>
#include <locale>
#include <codecvt>
#include <cvt\wstring>
using namespace std;

int main()
{
    //读取UTF-8文件的内容并转换为UTF16编码放入wstring中
    std::wifstream wfin("test.txt");
    wfin.imbue(std::locale(wfin.getloc(), new std::codecvt_utf8_utf16<wchar_t>));

    wstring wstr;
    wfin >> wstr;

    //创建一个utf8编码转换器
    wstring_convert<codecvt_utf8<wchar_t> > myconv;
    //将UTF16编码转换为UTF8编码
    string mbstring = myconv.to_bytes(wstr);
    cout << mbstring;
    //将UTF8编码转换为UTF16编码
    wstr = myconv.from_bytes(mbstring);
    wcout.imbue(locale(""));
    wcout << wstr;
    wfin.close();
    return 0;
}