一个开源的Asp.net2.0博客系统
最近一段做一些关于文字编码方面的东西,常常涉及到各种编码字符之间的转换。主要是做中日文方面的,包括中文gb2312, 日文JIS, SHIFT-JIS,以及他们和Unnicode码之间的转换。
一 GBK <==> Unicode
- unsigned short GBK2UNI(unsigned short usGBK)
- {
- unsigned char szEUC[2] = { usGBK >> 8, usGBK & 0xFF };
- unsigned short usUNI;
- MultiByteToWideChar(
- 936,
- 0,
- (LPCSTR)szEUC,
- 2,
- &usUNI,
- 1
- );
- return usUNI;
- }
- unsigned short UNI2GBK(unsigned short usUNI)
- {
- unsigned char szGBK[3]={0};
- unsigned short wzUNI[2] = { usUNI, 0 };
- unsigned short usGBK;
- WideCharToMultiByte(
- 936,
- 0,
- wzUNI,
- 2,
- (LPSTR)szGBK,
- 2,
- 0,
- 0
- );
- usGBK = (szGBK[0] << 8) | szGBK[1];
- return usGBK;
- }
二 SHIFT-JIS <==> Unicode
- unsigned short SJIS2UNI(unsigned short usSJIS)
- {
- unsigned char szEUC[2] = { usSJIS >> 8, usSJIS & 0xFF };
- unsigned short usUNI;
- MultiByteToWideChar(
- 932,
- 0,
- (LPCSTR)szEUC,
- 2,
- &usUNI,
- 1
- );
- return usUNI;
- }
- unsigned short UNI2SJIS(unsigned short usUNI)
- {
- unsigned char szSJIS[3] = { 0 };
- unsigned short wzUNI[2] = { usUNI, 0 };
- unsigned short usSJIS;
- WideCharToMultiByte(
- 932,
- 0,
- wzUNI,
- 2,
- (LPSTR)szSJIS,
- 2,
- 0,
- 0
- );
- usSJIS = (szSJIS[0] << 8) | szSJIS[1];
- return usSJIS;
- }
三 JIS <=> Unicode
- unsigned short JIS2UNI(unsigned short usJIS)
- {
- unsigned char szEUC[2] = { (usJIS | 0x8080) >> 8, (usJIS | 0x8080) & 0xFF };
- unsigned short usUNI;
- MultiByteToWideChar(
- 20932,
- 0,
- (LPCSTR)szEUC,
- 2,
- &usUNI,
- 1
- );
- return usUNI;
- }
- unsigned short UNI2JIS(unsigned short usUNI)
- {
- unsigned char szJIS[3] = { 0 };
- unsigned short wzUNI[2] = { usUNI, 0 };
- unsigned short usJIS;
- WideCharToMultiByte(
- 20932,
- 0,
- wzUNI,
- 2,
- (LPSTR)szJIS,
- 2,
- 0,
- 0
- );
- usJIS = (szJIS[0] << 8) | szJIS[1];
- return usJIS;
- }
四 JIS <=> SHIFT-JIS
- unsigned short SJIS2JIS( unsigned short sjis )
- {
- unsigned short ubyte, lbyte;
- if (((sjis >= 0x8140) && (sjis <= 0x9ffc)) ||
- ((sjis >= 0xe040) && (sjis <= 0xeffc)) )
- {
- ubyte = sjis >> 8;
- lbyte = sjis & 0x00ff;
- if ( (lbyte <= 0x3f) || (lbyte == 0x7f) ||
- (lbyte >= 0xfd) ) return 0;
- if ( ubyte >= 0xe0 ) ubyte -= 0xc0; else ubyte -= 0x80;
- ubyte = (ubyte << 1) + 0x1f;
- if ( lbyte >= 0x9f )
- {
- ubyte++;
- lbyte -= 0x7e;
- } else {
- if ( lbyte >= 0x80 ) lbyte--;
- lbyte -= 0x1f;
- }
- return ( ubyte << 8 ) + lbyte;
- } else {
- return 0;
- }
- }
- unsigned short JIS2SJIS( unsigned short jis )
- {
- unsigned short ubyte, lbyte;
- ubyte = jis >> 8;
- lbyte = jis & 0x00ff;
- lbyte += 0x1f;
- if ( lbyte >= 0x7f ) lbyte++;
- if ( lbyte <= 0x3f ) return 0;
- if ( (ubyte & 0x0001) == 0 )
- {
- lbyte = jis & 0x00ff;
- lbyte += 0x7e;
- ubyte--;
- if ( lbyte > 0xfd ) return 0;
- }
- ubyte -= 0x1f;
- ubyte = ubyte >> 1;
- ubyte += 0x80;
- if ( ubyte >= 0xa0 ) ubyte += 0x40;
- if ( ((ubyte >= 0x81) && (ubyte <= 0x9f)) ||
- ((ubyte >= 0xe0) && (ubyte <= 0xef)) )
- {
- return (ubyte << 8) + lbyte;
- } else {
- return 0;
- }
- }
UNI2JIS这个函数好像不太好用,其他的都经过测试,没有问题的。现在还不知道具体原因,现在从Unicode转到JIS是分两个步骤的,第一个步骤是先将Unicode转到SHIFT-JIS,然后由SHIFT-JIS转到JIS。