查看过编码规则后,觉得还是不可思议的,简单而实用,这一点挺佩服国外牛人的。
下面post出来源代码,供大家分析:
#include <stdint.h> #include <stdio.h> const uint16_t strUni[6] = {0x7535, 0x5B50, 0x4EA7, 0x54C1, 0x4E16, 0x754C}; //电子产品世界 const uint8_t strUTF[] = {0xe7, 0x94, 0xb5, 0xe5, 0xad, 0x90, 0xe4, 0xba, 0xa7, 0xe5, 0x93, 0x81, 0xe4, 0xb8, 0x96, 0xe7, 0x95, 0x8c}; void TransUni2UTF(uint32_t unichar, uint8_t *pUTF8Buf, uint8_t *plength) { if ( unichar <= 0x0000007F ) { // * U-00000000 - U-0000007F: 0xxxxxxx *pUTF8Buf = (unichar & 0x7F); *plength = 1; } else if ( unichar >= 0x00000080 && unichar <= 0x000007FF ) { // * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx *(pUTF8Buf+1) = (unichar & 0x3F) | 0x80; *pUTF8Buf = ((unichar >> 6) & 0x1F) | 0xC0; *plength = 2; } else if ( unichar >= 0x00000800 && unichar <= 0x0000FFFF ) { // * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx *(pUTF8Buf+2) = (unichar & 0x3F) | 0x80; *(pUTF8Buf+1) = ((unichar >> 6) & 0x3F) | 0x80; *pUTF8Buf = ((unichar >> 12) & 0x0F) | 0xE0; *plength = 3; } else if ( unichar >= 0x00010000 && unichar <= 0x001FFFFF ) { // * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx *(pUTF8Buf+3) = (unichar & 0x3F) | 0x80; *(pUTF8Buf+2) = ((unichar >> 6) & 0x3F) | 0x80; *(pUTF8Buf+1) = ((unichar >> 12) & 0x3F) | 0x80; *pUTF8Buf = ((unichar >> 18) & 0x07) | 0xF0; *plength = 4; } else if ( unichar >= 0x00200000 && unichar <= 0x03FFFFFF ) { // * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx *(pUTF8Buf+4) = (unichar & 0x3F) | 0x80; *(pUTF8Buf+3) = ((unichar >> 6) & 0x3F) | 0x80; *(pUTF8Buf+2) = ((unichar >> 12) & 0x3F) | 0x80; *(pUTF8Buf+1) = ((unichar >> 18) & 0x3F) | 0x80; *pUTF8Buf = ((unichar >> 24) & 0x03) | 0xF8; *plength = 5; } else if ( unichar >= 0x04000000 && unichar <= 0x7FFFFFFF ) { // * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx *(pUTF8Buf+5) = (unichar & 0x3F) | 0x80; *(pUTF8Buf+4) = ((unichar >> 6) & 0x3F) | 0x80; *(pUTF8Buf+3) = ((unichar >> 12) & 0x3F) | 0x80; *(pUTF8Buf+2) = ((unichar >> 18) & 0x3F) | 0x80; *(pUTF8Buf+1) = ((unichar >> 24) & 0x3F) | 0x80; *pUTF8Buf = ((unichar >> 30) & 0x01) | 0xFC; *plength = 6; } } uint8_t GetUTF8Length(uint8_t UTF8Head) { uint8_t i; uint8_t len = 0; for (i = 0; i < 6; i++) { if((UTF8Head & 0x80) == 0x80) { len++; UTF8Head = UTF8Head << 1; } else { break; } } return (len); } void TransUTF2Uni(const uint8_t *pUTF8Buf, uint16_t *pUniBuf, uint8_t *plength) { uint16_t tempUni = 0; *plength = GetUTF8Length(*pUTF8Buf); switch(*plength) { case 3: { tempUni = (pUTF8Buf[0] & 0x0F) << 12; tempUni |= (pUTF8Buf[1] & 0x3F) << 6; tempUni |= (pUTF8Buf[2] & 0x3F); break; } } *pUniBuf = tempUni; } void OutputResult(uint16_t unichar, uint8_t *pTxd, uint8_t length) { uint8_t i; printf("0x%4x 0x", unichar); for(i = 0; i < length; i++) { printf("%x", pTxd[i]); } printf("\n"); } uint8_t TxdBuf[37] = {0, }; uint16_t gUni; void main(void) { uint8_t i; uint8_t len; uint8_t *p; printf("unicode ==> UTF8\n"); for(i = 0; i < 6; i++) { TransUni2UTF(strUni[i], &TxdBuf[0], &len); OutputResult(strUni[i], &TxdBuf[0], len); } printf("unicode <== UTF8\n"); p = (uint8_t *)&strUTF[0]; for(i = 0; i < 6; i++) { TransUTF2Uni(p, &gUni, &len); OutputResult(gUni, p, len); p += len; } while(1) { ; } }
再上一张截图