查看过编码规则后,觉得还是不可思议的,简单而实用,这一点挺佩服国外牛人的。
下面post出来源代码,供大家分析:
#include <stdint.h>
#include <stdio.h>
const uint16_t strUni[6] = {0x7535, 0x5B50, 0x4EA7, 0x54C1, 0x4E16, 0x754C}; //电子产品世界
const uint8_t strUTF[] = {0xe7, 0x94, 0xb5, 0xe5, 0xad, 0x90, 0xe4, 0xba, 0xa7, 0xe5, 0x93, 0x81, 0xe4, 0xb8, 0x96,
0xe7, 0x95, 0x8c};
void TransUni2UTF(uint32_t unichar, uint8_t *pUTF8Buf, uint8_t *plength)
{
if ( unichar <= 0x0000007F )
{
// * U-00000000 - U-0000007F: 0xxxxxxx
*pUTF8Buf = (unichar & 0x7F);
*plength = 1;
}
else if ( unichar >= 0x00000080 && unichar <= 0x000007FF )
{
// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
*(pUTF8Buf+1) = (unichar & 0x3F) | 0x80;
*pUTF8Buf = ((unichar >> 6) & 0x1F) | 0xC0;
*plength = 2;
}
else if ( unichar >= 0x00000800 && unichar <= 0x0000FFFF )
{
// * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
*(pUTF8Buf+2) = (unichar & 0x3F) | 0x80;
*(pUTF8Buf+1) = ((unichar >> 6) & 0x3F) | 0x80;
*pUTF8Buf = ((unichar >> 12) & 0x0F) | 0xE0;
*plength = 3;
}
else if ( unichar >= 0x00010000 && unichar <= 0x001FFFFF )
{
// * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pUTF8Buf+3) = (unichar & 0x3F) | 0x80;
*(pUTF8Buf+2) = ((unichar >> 6) & 0x3F) | 0x80;
*(pUTF8Buf+1) = ((unichar >> 12) & 0x3F) | 0x80;
*pUTF8Buf = ((unichar >> 18) & 0x07) | 0xF0;
*plength = 4;
}
else if ( unichar >= 0x00200000 && unichar <= 0x03FFFFFF )
{
// * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pUTF8Buf+4) = (unichar & 0x3F) | 0x80;
*(pUTF8Buf+3) = ((unichar >> 6) & 0x3F) | 0x80;
*(pUTF8Buf+2) = ((unichar >> 12) & 0x3F) | 0x80;
*(pUTF8Buf+1) = ((unichar >> 18) & 0x3F) | 0x80;
*pUTF8Buf = ((unichar >> 24) & 0x03) | 0xF8;
*plength = 5;
}
else if ( unichar >= 0x04000000 && unichar <= 0x7FFFFFFF )
{
// * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*(pUTF8Buf+5) = (unichar & 0x3F) | 0x80;
*(pUTF8Buf+4) = ((unichar >> 6) & 0x3F) | 0x80;
*(pUTF8Buf+3) = ((unichar >> 12) & 0x3F) | 0x80;
*(pUTF8Buf+2) = ((unichar >> 18) & 0x3F) | 0x80;
*(pUTF8Buf+1) = ((unichar >> 24) & 0x3F) | 0x80;
*pUTF8Buf = ((unichar >> 30) & 0x01) | 0xFC;
*plength = 6;
}
}
uint8_t GetUTF8Length(uint8_t UTF8Head)
{
uint8_t i;
uint8_t len = 0;
for (i = 0; i < 6; i++)
{
if((UTF8Head & 0x80) == 0x80)
{
len++;
UTF8Head = UTF8Head << 1;
}
else
{
break;
}
}
return (len);
}
void TransUTF2Uni(const uint8_t *pUTF8Buf, uint16_t *pUniBuf, uint8_t *plength)
{
uint16_t tempUni = 0;
*plength = GetUTF8Length(*pUTF8Buf);
switch(*plength)
{
case 3:
{
tempUni = (pUTF8Buf[0] & 0x0F) << 12;
tempUni |= (pUTF8Buf[1] & 0x3F) << 6;
tempUni |= (pUTF8Buf[2] & 0x3F);
break;
}
}
*pUniBuf = tempUni;
}
void OutputResult(uint16_t unichar, uint8_t *pTxd, uint8_t length)
{
uint8_t i;
printf("0x%4x 0x", unichar);
for(i = 0; i < length; i++)
{
printf("%x", pTxd[i]);
}
printf("\n");
}
uint8_t TxdBuf[37] = {0, };
uint16_t gUni;
void main(void)
{
uint8_t i;
uint8_t len;
uint8_t *p;
printf("unicode ==> UTF8\n");
for(i = 0; i < 6; i++)
{
TransUni2UTF(strUni[i], &TxdBuf[0], &len);
OutputResult(strUni[i], &TxdBuf[0], len);
}
printf("unicode <== UTF8\n");
p = (uint8_t *)&strUTF[0];
for(i = 0; i < 6; i++)
{
TransUTF2Uni(p, &gUni, &len);
OutputResult(gUni, p, len);
p += len;
}
while(1)
{
;
}
}再上一张截图

我要赚赏金
