日期:2011-03-22 13:56:00 来源:本站整理
支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]
本文“支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]”是由七道奇为您精心收集,来源于网络转载,文章版权归文章作者所有,本站不对其观点以及内容做任何评价,请读者自行判断,以下是其具体内容:
ZUtf8_16.h文件:
//---------------------------------------------------------------------------
#ifndefZUtf8_16H
#defineZUtf8_16H
//---------------------------------------------------------------------------
/*
支持UNICODE,UNICODEBE,UTF8,ASCII之间的转换的类.
日期:2007-06-15
版本:1.0
作者:小笨象
阐明:你可以随便利用本文件,不过假如你改正了此中的BUG,
大概改正得更好了,请你也告诉我一下,
让我也能享用一下开源的好处,谢谢.
*/
enumEncodingType
{
uni8Bit=0,//默许ASCII
uni16BE=1,
uni16LE=2,//Windows默许的编码,也就是UNICODE
uniUTF8=3,
uniUTF8NOBOM=4//没有UTF8标识头的UTF8文件
};
classZUtf8_16
{
private:
EncodingTypem_unicodeMode;//编码方法
intisUTF8_16(constchar*s,unsignedintlen,unsigned*cchUnused);
EncodingType__fastcallDetermineEncoding(unsignedchar*data,size_tiLen);
public:
__fastcallZUtf8_16();
__fastcall~ZUtf8_16();
EncodingType__fastcallGetEncodingType(void){returnm_unicodeMode;};
bool__fastcallLoadFromStream(TMemoryStream*pStream,AnsiString&DestText);
bool__fastcallStreamSaveToFile(TMemoryStream*pStream,
AnsiStringFileNameA,EncodingTypeunicodeMode);
};
#endif
ZUtf8_16.cpp文件:
//---------------------------------------------------------------------------
#include<vcl.h>
#pragmahdrstop
#include<stdio.h>
#include"ZUtf8_16.h"
#pragmapackage(smart_init)
__fastcallZUtf8_16::ZUtf8_16()
{
m_unicodeMode=uni8Bit;
}
//---------------------------------------------------------------------------
__fastcallZUtf8_16::~ZUtf8_16()
{
}
//---------------------------------------------------------------------------
intZUtf8_16::isUTF8_16(constchar*s,unsignedintlen,unsigned*cchUnused)
{
intrv=1;
intASCII7only=1;
constunsignedchar*sx=(unsignedchar*)s,*endx=sx+len;
while(sx<endx)
{
if(!*sx)
{
//Fordetection,we'llsaythatNULmeansnotUTF8
ASCII7only=0;
rv=0;
break;
}
elseif(*sx<0x80)
{
//0nnnnnnnIfthebyte'sfirsthexcodebeginswith0-7,itisanASCIIcharacter.
sx++;
}
elseif(*sx<(0x80+0x40))
{
//10nnnnnn8throughBcannotbefirsthexcodes
ASCII7only=0;
rv=0;
break;
}
elseif(*sx<(0x80+0x40+0x20))
{
//110xxxvv10nnnnnn IfitbeginswithCorD,itisan11bitcharacter
ASCII7only=0;
if(sx>=endx-1)break;
if(!(*sx&0x1F)||(sx[1]&(0x80+0x40))!=0x80){rv=0;break;}
sx+=2;
}
elseif(*sx<(0x80+0x40+0x20+0x10))
{
//1110qqqq10xxxxvv10nnnnnnIfitbeginswithE,itis16bit
ASCII7only=0;
if(sx>=endx-2)break;
if(!(*sx&0xF)||(sx[1]&(0x80+0x40))!=0x80||(sx[2]&(0x80+0x40))!=0x80)
{rv=0;break;}
sx+=3;
}
else
{
/*morethan16bitsarenotallowedhere*/
ASCII7only=0;
rv=0;
break;
}
}
if(cchUnused)*cchUnused=endx-sx;
return(ASCII7only?0:rv);
}
//---------------------------------------------------------------------------
EncodingType__fastcallZUtf8_16::DetermineEncoding(unsignedchar*data,size_tiLen)
{
//TODO:判断当前文件的编码范例.
m_unicodeMode=uni8Bit;//默许ASCII
if(data[0]==0xFE&&data[1]==0xFF)//Bigendian==UNICODE-BIG UTF16
{
m_unicodeMode=uni16BE;
}
elseif(data[0]==0xFF&&data[1]==0xFE)//Littleendian==UNICODE UTF16
{
m_unicodeMode=uni16LE;//Unicode
}
elseif(data[0]==0xEF&&data[1]==0xBB&&data[2]==0xBF)//UTF8
{
m_unicodeMode=uniUTF8;
}
elseif(isUTF8_16(data,iLen,NULL)==1)
{
m_unicodeMode=uniUTF8NOBOM;
}
return m_unicodeMode;
}
//---------------------------------------------------------------------------
bool__fastcallZUtf8_16::LoadFromStream(TMemoryStream*pSourceStream,AnsiString&DestText)
{
//TODO:从流中读取数据
//先判断字符编码
pSourceStream->Position=0;
if(pSourceStream->Size==0)returntrue;
// 本文转自 C++Builder 研究 - http://www.ccrun.com/article.asp?i=1023&d=cbj0f7
m_unicodeMode=DetermineEncoding((char*)pSourceStream->Memory,pSourceStream->Size);
pSourceStream->Position=0;
//再根椐呼应的编码做呼应的事.
switch(m_unicodeMode)
{
caseuni8Bit:
{
//什么都不做.以保证翻开一些大的文件时速度快一些.
//所以调用者需求自己在调的之后判断字符编码,
//假如是uni8Bit,则需求自己处理.
// intiLength=pSourceStream->Size;
// char *szUnicode=newchar[iLength+1];
// memset(szUnicode,0x00,iLength+1);
// pSourceStream->Read(szUnicode,iLength);
// DestText=AnsiString(szUnicode);
// delete[]szUnicode;
// szUnicode=NULL;
break;
}
caseuni16BE:
{
//UCBigendian
pSourceStream->Position=2;
intiLength=pSourceStream->Size-2;
chartemp;
char*szUnicode=newchar[iLength+2];
memset(szUnicode,0x00,iLength+2);
pSourceStream->Read(szUnicode,iLength);
//只要把每两个字节的位置交换一下,就是UNICODELE了.So...
for(inti=0;i<iLength;i+=2)
{
temp=szUnicode[i];
szUnicode[i]=szUnicode[i+1];
szUnicode[i+1]=temp;
Application->ProcessMessages();
}
DestText=WideCharLenToString((wchar_t*)(szUnicode),iLength/2);
delete[]szUnicode;
szUnicode=NULL;
break;
}
caseuni16LE:
{
//UNICODE Littleendian
pSourceStream->Position=2;
intiLength=pSourceStream->Size-2;
wchar_t *szUnicode=newwchar_t[iLength+2];
memset(szUnicode,0x00,iLength+2);
pSourceStream->Read(szUnicode,iLength);
WideStringWideStr=WideString(szUnicode);
DestText=WideStr;
delete[]szUnicode;
szUnicode=NULL;
break;
}
caseuniUTF8:
{
//UTF8
pSourceStream->Position=3;
intiLength=pSourceStream->Size-3;
char*szUTF8=newchar[iLength+3];
memset(szUTF8,0x00,iLength+3);
pSourceStream->Read(szUTF8,iLength);
AnsiStringUtf8Str=Utf8ToAnsi(szUTF8);
if(Utf8Str=="")
DestText=AnsiString((char*)pSourceStream->Memory);
else
DestText=Utf8Str;
delete[]szUTF8;
szUTF8=NULL;
break;
}
caseuniUTF8NOBOM:
{
//UTF8没有头标识的情形.
intiLength=pSourceStream->Size;
char*szUTF8=newchar[iLength+3];
memset(szUTF8,0x00,iLength+3);
pSourceStream->Read(szUTF8,iLength);
AnsiStringUtf8Str=Utf8ToAnsi(szUTF8);
if(Utf8Str=="")
DestText=AnsiString((char*)pSourceStream->Memory);
else
DestText=Utf8Str;
delete[]szUTF8;
szUTF8=NULL;
break;
}
}
returntrue;
}
//---------------------------------------------------------------------------
bool__fastcallZUtf8_16::StreamSaveToFile(TMemoryStream*pStream,
AnsiStringFileNameA,EncodingTypeunicodeMode)
{
//TODO:把流内容按指定的格局保存到文件中.
try
{
pStream->Position=0;
switch(unicodeMode)
{
caseuni8Bit:
{
//什么都不做.直接保存.
pStream->SaveToFile(FileNameA);
break;
}
caseuni16BE:
{
//UCBigendian
intiLength=pStream->Size;
chartemp;
char*pSource=newchar[iLength+2];
memset(pSource,0x00,iLength+2);
pStream->Read(pSource,iLength);
//先看看转成的宽字节数返到nLen
intnLen=MultiByteToWideChar(CP_ACP,0,pSource,iLength,NULL,NULL);
LPWSTRlpwsz=newWCHAR[nLen];
MultiByteToWideChar(CP_ACP,0,pSource,-1,lpwsz,nLen);
intiNewLen=lstrlenW(lpwsz)*sizeof(WCHAR);
char*pDest=newchar[iNewLen];
memcpy(pDest,lpwsz,iNewLen);
//只要把每两个字节的位置交换一下,就是UNICODEBig了.So...
for(inti=0;i<iNewLen;i+=2)
{
temp=pDest[i];
pDest[i]=pDest[i+1];
pDest[i+1]=temp;
Application->ProcessMessages();
}
FILE*f=fopen(FileNameA.c_str(),"wb");
//写UnicodeBig头
fputc(0xFE,f);
fputc(0xFF,f);
fwrite(pDest,1,iNewLen,f);
fclose(f);
delete[]pDest;
pDest=NULL;
delete[]lpwsz;
lpwsz=NULL;
delete[]pSource;
pSource=NULL;
break;
}
caseuni16LE:
{
//UNICODE Littleendian
intiLength=pStream->Size;
char*pSource=newchar[iLength+2];
memset(pSource,0x00,iLength+2);
pStream->Read(pSource,iLength);
//先看看转成的宽字节数返到nLen
intnLen=MultiByteToWideChar(CP_ACP,0,pSource,iLength,NULL,NULL);
LPWSTRlpwsz=newWCHAR[nLen];
MultiByteToWideChar(CP_ACP,0,pSource,-1,lpwsz,nLen);
FILE*f=fopen(FileNameA.c_str(),"wb");
//写Unicode头
fputc(0xFF,f);
fputc(0xFE,f);
//一个宽字节占两个字节
fwrite(lpwsz,1,lstrlenW(lpwsz)*sizeof(WCHAR),f);
fclose(f);
delete[]lpwsz;
lpwsz=NULL;
delete[]pSource;
pSource=NULL;
break;
}
caseuniUTF8:
{
//UTF8
intiLen=pStream->Size;
char*pSource=newchar[iLen+3];
memset(pSource,0x00,iLen+3);
pStream->Read(pSource,iLen);
AnsiStringUtf8Str=AnsiToUtf8(pSource);
delete[]pSource;
pSource=NULL;
FILE*f=fopen(FileNameA.c_str(),"wb");
//写UTF8头
fputc(0xEF,f);
fputc(0xBB,f);
fputc(0xBF,f);
//一个宽字节占两个字节
fwrite(Utf8Str.c_str(),1,Utf8Str.Length(),f);
fclose(f);
break;
}
caseuniUTF8NOBOM:
{
//UTF8没有标识头的情形.
intiLen=pStream->Size;
char*pSource=newchar[iLen+3];
memset(pSource,0x00,iLen+3);
pStream->Read(pSource,iLen);
AnsiStringUtf8Str=AnsiToUtf8(pSource);
delete[]pSource;
pSource=NULL;
FILE*f=fopen(FileNameA.c_str(),"wb");
//一个宽字节占两个字节
fwrite(Utf8Str.c_str(),1,Utf8Str.Length(),f);
fclose(f);
break;
}
}//endofswitch
}
catch(...)
{
returnfalse;
}
returntrue;
}
//---------------------------------------------------------------------------
//试用举例:
#include"ZUtf8_16.h"
bool__fastcallLoadFile(AnsiStringstrFileName,TStrings*pList)
{
EncodingTypeunicodeMode;
//TODO:装入文件.
//假如装入成功,则返回true
AnsiStringErrMsg;
boolbReturn=true;
ErrMsg.sprintf("装入%s文档时出错, 该文档不存在"
"大概被别的程序以独占方法翻开!",strFileName);
if(!FileExists(strFileName))
{
MessageBox(0, ErrMsg.c_str(),"错误",MB_OK|MB_ICONERROR);
returnfalse;
}
AnsiStringReturnTxt;
ZUtf8_16zutf8_16;
TMemoryStream*ReadStream=newTMemoryStream();
ReadStream->LoadFromFile(strFileName);
bReturn=zutf8_16.LoadFromStream(ReadStream,ReturnTxt);
if(bReturn)
{
unicodeMode=zutf8_16.GetEncodingType();
if(unicodeMode==uni8Bit)
pList->LoadFromStream(ReadStream);
else
pList->Text=ReturnTxt;
}
else
{
MessageBox(0, ErrMsg.c_str(),"错误",MB_OK|MB_ICONERROR);
}
deleteReadStream;
ReadStream=NULL;
returnbReturn;
}
以上是“支持UNICODE/UTF8/ANSI之间的转换的类[VC/C++编程]”的内容,如果你对以上该文章内容感兴趣,你可以看看七道奇为您推荐以下文章:
本文地址: | 与您的QQ/BBS好友分享! |
评论内容只代表网友观点,与本站立场无关!
评论摘要(共 0 条,得分 0 分,平均 0 分)
查看完整评论