Delphi操作Unicode字符

===================================Delphi:===================================================

当前版本(2007)中的默认状态下, String 就是 AnsiString

在 Delphi 2009 中:

string = UnicodeString; (同样: PString = PUnicodeString;)

Char = WideChar; (同样: PChar = PWideChar;)

procedure

var

List: TStrings;

begin

List := TStringList.Create;

List.Text := str;

List.SaveToFile(FilePath, TEncoding.ASCII);

List.LoadFromFile(FilePath, TEncoding.ASCII); // TEncoding.UTF8;

Memo2.Lines := List;

List.Free;

end;

请教下万老师:我发现delphi2009里用idhttp来获取网页,如果网页源码是utf8的话可以直接用htmsrc:=idhttp.get(url),而如果是gb码的话只能用stream方式。但是用stream取得的gb编码网页怎样把他转成utf呢?如果不转的话在非中文操作系统下会乱码。

procedure TForm1.Button1Click(Sender: TObject);

var

stream1,stream2: TStringStream;

b: Byte;

bs: string;

begin

{建立第一个流, 使用默认的双字节编码; 流中的数据是 Memo 中的字符串}

stream1 := TStringStream.Create(Memo1.Text, 54936);

{把第一个流的十六进制编码显示在 Memo 中}

bs := '';

for b in stream1.Bytes do bs := Format(bs + '%2x ', [b]);

Memo1.Lines.Add(bs);

stream2 := TStringStream.Create(stream1.DataString, TEncoding.UTF8);

{把第二个流的十六进制编码显示在 Memo 中}

bs := '';

for b in stream2.Bytes do bs := Format(bs + '%2x ', [b]);

Memo1.Lines.Add(bs);

stream1.Free;

stream2.Free;

function Str_Gb2UniCode(text: string): String;

var

i,len: Integer;

cur: Integer;

t: String;

ws: WideString;

begin

Result := '';

ws := text;

len := Length(ws);

i := 1;

while i <= len do

begin

cur := Ord(ws[i]);

FmtStr(t,'%4.4X',[cur]);

Result := Result + t;

Inc(i);

end;

end;

function Unicode_str(text: string):string;

var

i,len: Integer;

ws: WideString;

begin

ws := '';

i := 1;

len := Length(text);

while i < len do

begin

ws := ws + Widechar(StrToInt('$' + Copy(text,i,4)));

i := i+4;

end;

Result := ws;

end;

procedure TForm1.Button1Click(Sender: TObject);

begin

ShowMessage(Str_Gb2UniCode('ÍòÒ»')); //4E074E00

ShowMessage(Unicode_str('4E074E00')); //ÍòÒ»

end;

Unicode和字符相互转化的函数

2009-11-09 14:52

// 将字符转化成Unicode

function AnsiToUnicode(Ansi: string):string;

var

s:string;

i:integer;

j,k:string[2];

a:array [1..1000] of char;

begin

s:='';

StringToWideChar(Ansi,@(a[1]),500);

i:=1;

while ((a[i]<>#0) or (a[i+1]<>#0)) do begin

j:=IntToHex(Integer(a[i]),2);

k:=IntToHex(Integer(a[i+1]),2);

s:=s+k+j;

i:=i+2;

end;

Result:=s;

end;

// 将Unicode转化成字符

function ReadHex(AString:string):integer;

begin

Result:=StrToInt('$'+AString)

end;

function UnicodeToAnsi(Unicode: string):string;

var

s:string;

i:integer;

j,k:string[2];

begin

i:=1;

s:='';

while i< if end; s:="s+Char(ReadHex(j))+Char(ReadHex(k));" i:="i+4;" k:="Copy(Unicode,i,2);" j:="Copy(Unicode,i+2,2);" begin do>'' then

s:=WideCharToString(PWideChar(s+#0#0#0#0))

else

s:='';

Result:=s;

end;

//WideChar 兼容了 AnsiChar 的 #0..#255; 但占用了 2 字节大小

//UniCode 字符 WideChar; 和 AnsiChar 不同, WideChar 是占 2 字节大小.

var

c: WideChar; {WideChar 的取值范围是: #0..#65535, 用十六进制表示是: #$0..#$FFFF}

begin

{WideChar 兼容了 AnsiChar 的 #0..#255; 但占用了 2 字节大小}

c := #65;

ShowMessage(c); {A}

ShowMessage(IntToStr(Length(c))); {1; 这是字符长度}

ShowMessage(IntToStr(SizeOf(c))); {2; 但占用 2 个字节}

Navigation: 问与答 >

汉字与多字节编码的转换

汉字与多字节编码的转换 - 回复 "不知道" 的问题

问题来源:

TEncoding.Default码(中的16位 CE D2 C3 C7 )如何转成汉字呢?

汉字为'我们';

--------------------------------------------------------------------------------

Delphi 2009 默认的编码是多字节编码(MBCS), Delphi 这样表示它: TEncoding.Default.

下面是多字节编码与汉字之间转换的例子:

--------------------------------------------------------------------------------

{汉字到多字节编码}

procedure TForm1.Button1Click(Sender: TObject);

var

stream: TStringStream;

b: Byte;

string;

begin

stream := TStringStream.Create('我们', TEncoding.Default);

s := '';

for b in stream.Bytes do s := Format('%s%x '

ShowMessage(s); {CE D2 C3 C7}

stream.Free;

end;

{多字节编码到汉字}

procedure TForm1.Button2Click(Sender: TObject);

var

stream: TStringStream;

begin

stream := TStringStream.Create;

stream.Size := 4;

stream.Bytes[0] := $CE;

stream.Bytes[1] := $D2;

stream.Bytes[2] := $C3;

stream.Bytes[3] := $C7;

ShowMessage(stream.DataString);

stream.Free;

end;

{把多字节编码的字符串转换到汉字}

procedure TForm1.Button3Click(Sender: TObject);

var

stream: TStringStream;

i: Integer;

begin

str := 'CED2C3C7';

stream := TStringStream.Create;

stream.Size := Length(str) div 2;

for i := 1to Length(str) do

if Odd(i) then stream.Bytes[i div 2] := StrToIntDef(Concat(#36,str[i],str[i+1]), 0);

ShowMessage(stream.DataString); {我们}

stream.Free;

end;

end.

获取所有汉字与 Unicode 的对照表

var

w: WideString;

i: Integer;

s: string;

List: TStringList;

begin

List := TStringList.Create;

for i := $4e00 to $9fa5 do

begin

s := #36 + IntToHex(i,4); {#36 是 $ 字符}

w := WideChar(i);

List.Add(s + '='

end;

List.SaveToFile('c:\temp\Unicode-Hz.txt');

List.Free;

end;

汉字与 Unicode 转换

{感谢 robin(xuebin418@163.com)提供}

//转换

functionstring): String;

var

i,len: Integer;

cur: Integer;

t: String;

ws: WideString;

begin

Result := '';

ws := text;

len := Length(ws);

i := 1

while i <= len do

begin

cur := Ord(ws[i]);

FmtStr(t,'%4.4X',[cur]);

Result := Result + t;

Inc(i);

end;

end;

//恢复

Unicode_str(text: string):string;

var

i,len: Integer;

ws: WideString;

begin

ws := '';

i := 1;

len := Length(text);

while i < len do

begin

ws := ws + Widechar(StrToInt('$' + Copy(text,i,4)));

i := i+4;

end;

Result := ws;

end;

//测试

procedure TForm1.Button1Click(Sender: TObject);

begin

ShowMessage(Str_Gb2UniCode('万一')); //4E074E00

ShowMessage(Unicode_str('4E074E00')); //万一

end;

Windows API 中的字符串对应这 Delphi 的 PChar(PAnsiChar); 在 API 中使用 Delphi 的字符串还是比较灵活的.

定长字符串不是 #0 结束的, 和 API 不好兼容, 一般不用于 API 中.

//赋值方法1: 给直接量

begin

SetWindowText(Handle, '新标题');

end;

--------------------------------------------------------------------------------

//赋值方法2: 定义它要的类型

var

p: PChar;

begin

p := '新标题';

SetWindowText(Handle, p);

end;

--------------------------------------------------------------------------------

//赋值方法3: 转换成它要的类型

var

str: string;

begin

str := '新标题';

SetWindowText(Handle, PChar(str));

end;

//赋值方法4: 用字符数组

var

arr: array[0..255] of Char;

begin

arr := '新标题';

SetWindowText(Handle, arr);

end;

--------------------------------------------------------------------------------