当前位置: 首页 >> 程序设计 >> Unicode与UTF8互转源代码
 

Unicode与UTF8互转源代码

作者:      来源:http://blog.csdn.net/flying8127     发表时间:2007-05-15     浏览次数:      字号:    

 int UTF2Uni(const char* src, std::wstring &t)
{
 if (src == NULL)
 {
  return -1;
 }

 int size_s = strlen(src);
 int size_d = size_s + 10;          //?
 
 wchar_t *des = new wchar_t[size_d];
 memset(des, 0, size_d * sizeof(wchar_t));
 
 int s = 0, d = 0;
 bool toomuchbyte = true; //set true to skip error prefix.
 
 while (s < size_s && d < size_d)
 {
  unsigned char c = src[s];
  if ((c & 0x80) == 0)
  {
   des[d++] += src[s++];
  }
  else if((c & 0xE0) == 0xC0)  ///< 110x-xxxx 10xx-xxxx
  {
   WCHAR &wideChar = des[d++];
   wideChar  = (src[s + 0] & 0x3F) << 6;
   wideChar |= (src[s + 1] & 0x3F);
   
   s += 2;
  }
  else if((c & 0xF0) == 0xE0)  ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
  {
   WCHAR &wideChar = des[d++];
   
   wideChar  = (src[s + 0] & 0x1F) << 12;
   wideChar |= (src[s + 1] & 0x3F) << 6;
   wideChar |= (src[s + 2] & 0x3F);
   
   s += 3;
  }
  else if((c & 0xF8) == 0xF0)  ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
  {
   WCHAR &wideChar = des[d++];
   
   wideChar  = (src[s + 0] & 0x0F) << 18;
   wideChar  = (src[s + 1] & 0x3F) << 12;
   wideChar |= (src[s + 2] & 0x3F) << 6;
   wideChar |= (src[s + 3] & 0x3F);
   
   s += 4;
  }
  else
  {
   WCHAR &wideChar = des[d++]; ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
   
   wideChar  = (src[s + 0] & 0x07) << 24;
   wideChar  = (src[s + 1] & 0x3F) << 18;
   wideChar  = (src[s + 2] & 0x3F) << 12;
   wideChar |= (src[s + 3] & 0x3F) << 6;
   wideChar |= (src[s + 4] & 0x3F);

   s += 5;
  }
 }
 
 t = des;
 delete[] des;
 des = NULL;
 
 return 0;
}

 

int Uni2UTF(wchar_t wchar, char *utf8)
{
 if (utf8 == NULL) {
  return -1;
 }
 int len = 0;
 int size_d = 8;
 
 if (wchar < 0x80)
 {  //
  //length = 1;
  utf8[len++] = (char)wchar;
 }
 else if(wchar < 0x800)
 {
  //length = 2;
  
  if (len + 1 >= size_d)
   return -1;
  
  utf8[len++] = 0xc0 | ( wchar >> 6 );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 else if(wchar < 0x10000 )
 {
  //length = 3;
  if (len + 2 >= size_d)
   return -1;
  
  utf8[len++] = 0xe0 | ( wchar >> 12 );
  utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 else if( wchar < 0x200000 )
 {
  //length = 4;
  if (len + 3 >= size_d)
   return -1;
  
  utf8[len++] = 0xf0 | ( (int)wchar >> 18 );
  utf8[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
  utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 return len;
}

责任编辑 webmaster

 
 
 
 
 
评论更多>>
 
 
 
发表
 
姓名: QQ:
性别: MSN:
E-mail: 主页:
评分: 1 2 3 4 5
评论内容:
验证码:
  
  • 请遵守《互联网电子公告服务管理规定》及中华人民共和国其他各项有关法律法规。
  • 严禁发表危害国家安全、损害国家利益、破坏民族团结、破坏国家宗教政策、破坏社会稳定、侮辱、诽谤、教唆、淫秽等内容的评论 。
  • 用户需对自己在使用本站服务过程中的行为承担法律责任(直接或间接导致的)。
  • 本站管理员有权保留或删除评论内容。
  • 评论内容只代表网友个人观点,与本网站立场无关。
  •