看了百度百科哈夫曼的介绍,感觉挺好玩的,堆代码试玩一下:
数据压缩部分:
1、计算出每个数据出现的次数
2、构建哈夫曼树
3、得到哈夫曼编码
4、压缩数据
最终数据:
|头信息 | 每个数据出现的次数信息 | 压缩后的数据|
解压数据部分:
1、头信息提取每个数据出现的次数
2、构建树
3、得到编码
4、解码解压
下面是代码:
#include "stdio.h" #include "stdlib.h" #include "string.h" typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; #define MAX_NUMBER 4096 #define PRIORITY_BASE_NUMBER 0X55 #pragma pack(1) struct huffman_tree_node { uint32_t weight_value; uint8_t value; uint32_t priority; struct huffman_tree_node *parent; struct huffman_tree_node *lchild_bit_zero; struct huffman_tree_node *rchild_bit_one; }; typedef struct huffman_tree_node huffman_tree_node_t; typedef struct { huffman_tree_node_t** phuffman_tree_bottom_node_address_array; //weight value --> node information uint32_t number; huffman_tree_node_t* phuffman_tree_list_head; }huffman_tree_info_t; struct huffman_record_times { uint8_t value; uint32_t times; struct huffman_record_times *pnext; }; typedef struct huffman_record_times huffman_record_times_t; typedef struct { uint8_t value; uint32_t times; }huffman_times_info_t; typedef struct { uint8_t value; uint8_t weight_value; uint8_t bitcount; uint8_t encode_value; }huffman_encode_info_t; const uint8_t head_string[8] = "huffman"; /** ********************** head information ********************** times_table ********************** data ********************** */ typedef struct { uint8_t head[7]; uint32_t times_table_len; uint32_t bitcount; uint32_t len_before_compress; uint8_t buf[1]; }huffman_compress_package_t; typedef struct { uint8_t head[7]; uint32_t times_table_len; uint32_t bitcount; uint32_t len_before_compress; uint8_t *pbuf; }huffman_compress_package_info_t; #pragma pack() uint8_t test_buf[MAX_NUMBER]; void huffman_init_test_buf() { uint32_t i = 0; for(i = 0;i < MAX_NUMBER;i++) { srand(i + 5); test_buf[i] = (uint8_t)(rand() & 0x0f); printf("%d ", test_buf[i]); } printf("\n\r"); } static int huffman_calculate_frequency(uint8_t *pbuf, uint32_t len, huffman_record_times_t **phuffman_record_times_list_head) { uint32_t i = 0; huffman_record_times_t *ptmp = NULL, *p = NULL; if(pbuf == NULL || len == 0) { return -1; } do { if(ptmp == NULL) //create the list head { ptmp = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t)); if(ptmp == NULL) { return -1; printf("memory error...\n\r"); } ptmp->pnext = NULL; ptmp->times = 0; ptmp->value = pbuf[0]; p = ptmp; i = 0; } for(;;) //add times or create new node { if(p->value == pbuf[i]) { p->times++; break; } else if(p->pnext != NULL) { p = p->pnext; } else { p->pnext = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t)); if(p->pnext == NULL) { *phuffman_record_times_list_head = ptmp; return -1; } p->pnext->pnext = NULL; p->pnext->value = pbuf[i]; p->pnext->times = 1; break; } } i++; p = ptmp; }while(i < len); *phuffman_record_times_list_head = ptmp; #if 0 p = *phuffman_record_times_list_head; do { if(p != NULL) { printf("value: %d number: %d \n\r", p->value, p->times); p = p->pnext; } }while(p != NULL); #endif return 0; } static int huffman_find_out_two_min(huffman_tree_node_t **record_times_node_array, uint32_t number, huffman_tree_node_t **pmin1, huffman_tree_node_t **pmin2) { uint8_t j = 0; uint32_t min = 0xffffffff; uint32_t i = 0; huffman_tree_node_t *psave_min1 = NULL, *ptmp = NULL, *p = NULL; for(j = 0;j < 2;j++) { min = 0xffffffff; for(i = 0;i < number;i++) { p = record_times_node_array[i]; if(p->parent == NULL) { if(p->weight_value <= min && psave_min1 != p) { min = p->weight_value; ptmp = p; } } else { while(p->parent != NULL) { p = p->parent; } if(p->weight_value < min && psave_min1 != p) { min = p->weight_value; ptmp = p; } } } if(psave_min1 == NULL) { *pmin1 = ptmp; psave_min1 = ptmp; } else { *pmin2 = ptmp; } } return 0; } static int huffman_create_tree(huffman_record_times_t *phuffman_record_times, huffman_tree_info_t* phuffman_tree) { huffman_record_times_t *p = phuffman_record_times; huffman_tree_node_t **record_times_node_array = NULL; uint32_t record_times_number = 0, i = 0; huffman_tree_node_t *pmin1, *pmin2, *pnew_node; if(phuffman_record_times == NULL) { return -1; } while(p != NULL) { record_times_number++; //calculate the number of weight values p = p->pnext; } record_times_node_array = malloc(record_times_number * sizeof(huffman_tree_node_t *)); if(record_times_node_array == NULL) { return -1; } /* create the bottom nodes */ p = phuffman_record_times; for(i = 0;i < record_times_number;i++) { record_times_node_array[i] = malloc(sizeof(huffman_tree_node_t )); if(record_times_node_array[i] == NULL) { return -1; } record_times_node_array[i]->lchild_bit_zero = NULL; record_times_node_array[i]->parent = NULL; record_times_node_array[i]->priority = PRIORITY_BASE_NUMBER; record_times_node_array[i]->rchild_bit_one = NULL; record_times_node_array[i]->value = p->value; record_times_node_array[i]->weight_value = p->times; p = p->pnext; #if 0 printf("lchild: %p, rchild: %p, parent: %p, value: %d ,weight: %d\n\r", record_times_node_array[i]->lchild_bit_zero ,record_times_node_array[i]->rchild_bit_one, record_times_node_array[i]->parent, record_times_node_array[i]->value ,record_times_node_array[i]->weight_value); #endif } /* create huffman tree */ for(i = 0;i < (record_times_number - 1);i++) { huffman_find_out_two_min(record_times_node_array, record_times_number, &pmin1, &pmin2); pnew_node = (huffman_tree_node_t *)malloc(sizeof(huffman_tree_node_t )); if(pnew_node == NULL) { return -1; } pnew_node->lchild_bit_zero = pmin2; pnew_node->parent = NULL; pnew_node->priority = PRIORITY_BASE_NUMBER + i + 1; pnew_node->rchild_bit_one = pmin1; pnew_node->weight_value = pmin1->weight_value + pmin2->weight_value; if(pmin1->priority > pmin2->priority) { pnew_node->lchild_bit_zero = pmin1; pnew_node->rchild_bit_one = pmin2; } pmin1->parent = pnew_node; pmin2->parent = pnew_node; #if 0 printf("%d + %d -> %d\n\r", pnew_node->lchild_bit_zero->weight_value, pnew_node->rchild_bit_one->weight_value, pnew_node->weight_value); #endif } phuffman_tree->number = record_times_number; phuffman_tree->phuffman_tree_bottom_node_address_array = record_times_node_array; phuffman_tree->phuffman_tree_list_head = pnew_node; return 0; } static int huffman_create_encode_table(huffman_tree_info_t *phuffman_tree, huffman_encode_info_t **pencode_encode_array) { uint32_t i = 0; uint32_t number; huffman_tree_node_t **p; huffman_tree_node_t *ptmp; uint8_t bitcount = 0, encode_value = 0; if(phuffman_tree == NULL) { return -1; } number = phuffman_tree->number; p = phuffman_tree->phuffman_tree_bottom_node_address_array; *pencode_encode_array = (huffman_encode_info_t *)malloc(number * sizeof(huffman_encode_info_t)); if(*pencode_encode_array == NULL) { return -1; } for(i = 0;i < number;i++) { ptmp = p[i]; bitcount = 0; encode_value = 0; for(;;) { if(ptmp->parent != NULL) { if(ptmp->parent->rchild_bit_one == ptmp) { encode_value |= (0x1 << bitcount); } bitcount++; ptmp = ptmp->parent; } else { break; } } (*pencode_encode_array)[i].bitcount = bitcount; (*pencode_encode_array)[i].encode_value = encode_value; (*pencode_encode_array)[i].value = p[i]->value; (*pencode_encode_array)[i].weight_value = p[i]->weight_value; #if 1 printf("value: %d weight: %d bitcount: %d encode: %d\n\r", p[i]->value, p[i]->weight_value, bitcount, encode_value); #endif } return 0; } int huffman_destroy_record_times_list(huffman_record_times_t* head) { huffman_record_times_t *ptmp; for(;;) { if(head->pnext != NULL) { ptmp = head->pnext->pnext; free(head->pnext); head->pnext = ptmp; } else { free(head); return 0; } } } int huffman_destroy_tree(huffman_tree_info_t *tree) { uint32_t i = 0; huffman_tree_node_t* ptmp; for(i = 0;i < tree->number;i++) { for(;;) { ptmp = tree->phuffman_tree_bottom_node_address_array[i]; while(ptmp->parent != NULL) { ptmp = ptmp->parent; } if(ptmp == tree->phuffman_tree_bottom_node_address_array[i]) { break; } ptmp->lchild_bit_zero->parent = NULL; ptmp->rchild_bit_one->parent = NULL; free(ptmp); } free(ptmp); } free(tree->phuffman_tree_bottom_node_address_array); return 0; } int huffman_compress_start(uint8_t *pbuf, uint32_t len, huffman_compress_package_info_t *pinfo) { huffman_record_times_t *phuffman_record_times_list_head, *precord_times_tmp; huffman_encode_info_t *pencode_encode_array; huffman_tree_info_t huffman_tree; uint32_t i = 0, j = 0, k = 0, number = 0, bitcount = 0; uint32_t total_bitcount = 0; uint8_t *presultbuf; huffman_times_info_t *ptimes_info; if(pbuf == NULL || len == 0) { return -1; } huffman_calculate_frequency(pbuf, len, &phuffman_record_times_list_head); huffman_create_tree(phuffman_record_times_list_head, &huffman_tree); huffman_create_encode_table(&huffman_tree, &pencode_encode_array); number = huffman_tree.number; //the number of encode value for(i = 0;i < len;i++) { for(j = 0;j < number;j++) { if(pbuf[i] == pencode_encode_array[j].value) { total_bitcount += pencode_encode_array[j].bitcount; } } } presultbuf = malloc((total_bitcount + 8) / 8); if(presultbuf == NULL) { return -1; } for(i = 0;i < len;i++) { for(j = 0;j < number;j++) { if(pbuf[i] == pencode_encode_array[j].value) { for(k = pencode_encode_array[j].bitcount;k > 0;k--) { if(pencode_encode_array[j].encode_value & (0x1 << (k - 1))) { presultbuf[bitcount / 8] |= (0x1 << (bitcount % 8)); } else { presultbuf[bitcount / 8] &= ~(0x1 << (bitcount % 8)); } bitcount++; } break; } } } huffman_destroy_tree(&huffman_tree); free(pencode_encode_array); memcpy(pinfo->head, head_string, sizeof(pinfo->head)); pinfo->bitcount = bitcount; pinfo->times_table_len = number * sizeof(huffman_times_info_t); pinfo->len_before_compress = len; pinfo->pbuf = malloc(pinfo->times_table_len + ((pinfo->bitcount + 8) / 8)); if(pinfo->pbuf == NULL) { return -1; } ptimes_info = (huffman_times_info_t *)(pinfo->pbuf); precord_times_tmp = phuffman_record_times_list_head; for(i = 0;i < number;i++) { ptimes_info[i].value = precord_times_tmp->value; ptimes_info[i].times = precord_times_tmp->times; precord_times_tmp = precord_times_tmp->pnext; } memcpy(pinfo->pbuf + pinfo->times_table_len, presultbuf, (pinfo->bitcount + 8) / 8); huffman_destroy_record_times_list(phuffman_record_times_list_head); #if 1 printf("bitcount: %d\n\r", bitcount); for(i = 0;i < bitcount;i++) { if(presultbuf[i / 8] & (0x1 << (i % 8))) { printf("1"); } else { printf("0"); } } printf("\n\r"); #endif free(presultbuf); return 0; } int huffman_uncompress_start(huffman_compress_package_t *pinfo, uint8_t **pbuf, uint32_t *size) { huffman_record_times_t *precord_times_list_head = NULL, *ptmp = NULL; huffman_times_info_t *ptimes_info; huffman_tree_info_t huffman_tree; huffman_tree_node_t *pnode; uint32_t i = 0, number = 0, index = 0; uint8_t *pdata; if(pinfo == NULL) { return -1; } if(strncmp(head_string, pinfo->head, sizeof(pinfo->head)) != 0) { return -1; } if(pinfo->len_before_compress == 0) { return -1; } *size = pinfo->len_before_compress; *pbuf = malloc(pinfo->len_before_compress); if(pbuf == NULL) { return -1; } number = pinfo->times_table_len / sizeof(huffman_times_info_t); ptimes_info = (huffman_times_info_t *)pinfo->buf; for(i = 0;i < number;i++) { if(precord_times_list_head == NULL) { precord_times_list_head = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t)); if(precord_times_list_head == NULL) { return -1; } precord_times_list_head->times = ptimes_info[i].times; precord_times_list_head->value = ptimes_info[i].value; precord_times_list_head->pnext = NULL; ptmp = precord_times_list_head; continue; } ptmp->pnext = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t)); if(ptmp->pnext == NULL) { return -1; } ptmp->pnext->pnext = NULL; ptmp->pnext->times = ptimes_info[i].times; ptmp->pnext->value = ptimes_info[i].value; ptmp = ptmp->pnext; } huffman_create_tree(precord_times_list_head, &huffman_tree); pdata = pinfo->buf + pinfo->times_table_len; pnode = huffman_tree.phuffman_tree_list_head; for(i = 0;i < pinfo->bitcount;i++) { if(pdata[i / 8] & (0x1 << (i % 8))) { pnode = pnode->rchild_bit_one; } else { pnode = pnode->lchild_bit_zero; } if(pnode->lchild_bit_zero == NULL && pnode->rchild_bit_one == NULL) { (*pbuf)[index++] = pnode->value; pnode = huffman_tree.phuffman_tree_list_head; } } huffman_destroy_record_times_list(precord_times_list_head); huffman_destroy_tree(&huffman_tree); #if 1 for(i = 0;i < pinfo->len_before_compress;i++) { printf("%d ", (*pbuf)[i]); } printf("\n\r"); #endif return 0; } int main() { huffman_compress_package_info_t huffman_compress_package_info; uint32_t len; huffman_compress_package_t *p; uint8_t *pbuf; uint32_t size; uint32_t i = 0; huffman_init_test_buf(); huffman_compress_start(test_buf, MAX_NUMBER, &huffman_compress_package_info); /*****************************************************************************************/ len = sizeof(huffman_compress_package_info) - sizeof(uint8_t *) + huffman_compress_package_info.times_table_len + (huffman_compress_package_info.bitcount + 8) / 8; printf("*****len: %d******\n\r", len); p = (huffman_compress_package_t *)malloc(len); if(p == NULL) { printf("error\n\r"); return 0; } memcpy(p, &huffman_compress_package_info, sizeof(huffman_compress_package_info) - sizeof(uint8_t *)); memcpy(p->buf, huffman_compress_package_info.pbuf, huffman_compress_package_info.times_table_len + (huffman_compress_package_info.bitcount + 8) / 8); /****************************************************************************************/ free(huffman_compress_package_info.pbuf); huffman_uncompress_start(p, &pbuf, &size); free(p); free(pbuf); return 0; }
压缩:
int huffman_compress_start(uint8_t *pbuf, uint32_t len, huffman_compress_package_info_t *pinfo);
解压:
int huffman_uncompress_start(huffman_compress_package_t *pinfo, uint8_t **pbuf, uint32_t *size);
运行后的log:
打印有点多,提取有用信息:
value: 11 weight: 257 bitcount: 4 encode: 7
value: 13 weight: 266 bitcount: 4 encode: 2
value: 5 weight: 233 bitcount: 4 encode: 14
value: 8 weight: 262 bitcount: 4 encode: 5
value: 3 weight: 264 bitcount: 4 encode: 3
value: 15 weight: 261 bitcount: 4 encode: 6
value: 0 weight: 264 bitcount: 4 encode: 4
value: 10 weight: 271 bitcount: 4 encode: 1
value: 12 weight: 254 bitcount: 4 encode: 9
value: 9 weight: 257 bitcount: 4 encode: 8
value: 6 weight: 249 bitcount: 4 encode: 11
value: 7 weight: 244 bitcount: 4 encode: 13
value: 2 weight: 231 bitcount: 4 encode: 15
value: 14 weight: 247 bitcount: 4 encode: 12
value: 4 weight: 284 bitcount: 4 encode: 0
value: 1 weight: 252 bitcount: 4 encode: 10
bitcount: 16384 //4K数据后压缩到 16384 bits
*****len: 2148****** //打包后是2148个字节数据
。。。。。。。。。。。。。。。。。。。。。。。。
在MCU应用中,不知道有没有谁使用过数据压缩算法。有啥好的迷你压缩算法也希望共享一下