看了百度百科哈夫曼的介绍,感觉挺好玩的,堆代码试玩一下:
数据压缩部分:
1、计算出每个数据出现的次数
2、构建哈夫曼树
3、得到哈夫曼编码
4、压缩数据
最终数据:
|头信息 | 每个数据出现的次数信息 | 压缩后的数据|
解压数据部分:
1、头信息提取每个数据出现的次数
2、构建树
3、得到编码
4、解码解压
下面是代码:
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#define MAX_NUMBER 4096
#define PRIORITY_BASE_NUMBER 0X55
#pragma pack(1)
struct huffman_tree_node
{
uint32_t weight_value;
uint8_t value;
uint32_t priority;
struct huffman_tree_node *parent;
struct huffman_tree_node *lchild_bit_zero;
struct huffman_tree_node *rchild_bit_one;
};
typedef struct huffman_tree_node huffman_tree_node_t;
typedef struct
{
huffman_tree_node_t** phuffman_tree_bottom_node_address_array; //weight value --> node information
uint32_t number;
huffman_tree_node_t* phuffman_tree_list_head;
}huffman_tree_info_t;
struct huffman_record_times
{
uint8_t value;
uint32_t times;
struct huffman_record_times *pnext;
};
typedef struct huffman_record_times huffman_record_times_t;
typedef struct
{
uint8_t value;
uint32_t times;
}huffman_times_info_t;
typedef struct
{
uint8_t value;
uint8_t weight_value;
uint8_t bitcount;
uint8_t encode_value;
}huffman_encode_info_t;
const uint8_t head_string[8] = "huffman";
/**
**********************
head information
**********************
times_table
**********************
data
**********************
*/
typedef struct
{
uint8_t head[7];
uint32_t times_table_len;
uint32_t bitcount;
uint32_t len_before_compress;
uint8_t buf[1];
}huffman_compress_package_t;
typedef struct
{
uint8_t head[7];
uint32_t times_table_len;
uint32_t bitcount;
uint32_t len_before_compress;
uint8_t *pbuf;
}huffman_compress_package_info_t;
#pragma pack()
uint8_t test_buf[MAX_NUMBER];
void huffman_init_test_buf()
{
uint32_t i = 0;
for(i = 0;i < MAX_NUMBER;i++)
{
srand(i + 5);
test_buf[i] = (uint8_t)(rand() & 0x0f);
printf("%d ", test_buf[i]);
}
printf("\n\r");
}
static int huffman_calculate_frequency(uint8_t *pbuf, uint32_t len, huffman_record_times_t **phuffman_record_times_list_head)
{
uint32_t i = 0;
huffman_record_times_t *ptmp = NULL, *p = NULL;
if(pbuf == NULL || len == 0)
{
return -1;
}
do
{
if(ptmp == NULL) //create the list head
{
ptmp = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
if(ptmp == NULL)
{
return -1;
printf("memory error...\n\r");
}
ptmp->pnext = NULL;
ptmp->times = 0;
ptmp->value = pbuf[0];
p = ptmp;
i = 0;
}
for(;;) //add times or create new node
{
if(p->value == pbuf[i])
{
p->times++;
break;
}
else if(p->pnext != NULL)
{
p = p->pnext;
}
else
{
p->pnext = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
if(p->pnext == NULL)
{
*phuffman_record_times_list_head = ptmp;
return -1;
}
p->pnext->pnext = NULL;
p->pnext->value = pbuf[i];
p->pnext->times = 1;
break;
}
}
i++;
p = ptmp;
}while(i < len);
*phuffman_record_times_list_head = ptmp;
#if 0
p = *phuffman_record_times_list_head;
do
{
if(p != NULL)
{
printf("value: %d number: %d \n\r", p->value, p->times);
p = p->pnext;
}
}while(p != NULL);
#endif
return 0;
}
static int huffman_find_out_two_min(huffman_tree_node_t **record_times_node_array, uint32_t number, huffman_tree_node_t **pmin1, huffman_tree_node_t **pmin2)
{
uint8_t j = 0;
uint32_t min = 0xffffffff;
uint32_t i = 0;
huffman_tree_node_t *psave_min1 = NULL, *ptmp = NULL, *p = NULL;
for(j = 0;j < 2;j++)
{
min = 0xffffffff;
for(i = 0;i < number;i++)
{
p = record_times_node_array[i];
if(p->parent == NULL)
{
if(p->weight_value <= min && psave_min1 != p)
{
min = p->weight_value;
ptmp = p;
}
}
else
{
while(p->parent != NULL)
{
p = p->parent;
}
if(p->weight_value < min && psave_min1 != p)
{
min = p->weight_value;
ptmp = p;
}
}
}
if(psave_min1 == NULL)
{
*pmin1 = ptmp;
psave_min1 = ptmp;
}
else
{
*pmin2 = ptmp;
}
}
return 0;
}
static int huffman_create_tree(huffman_record_times_t *phuffman_record_times, huffman_tree_info_t* phuffman_tree)
{
huffman_record_times_t *p = phuffman_record_times;
huffman_tree_node_t **record_times_node_array = NULL;
uint32_t record_times_number = 0, i = 0;
huffman_tree_node_t *pmin1, *pmin2, *pnew_node;
if(phuffman_record_times == NULL)
{
return -1;
}
while(p != NULL)
{
record_times_number++; //calculate the number of weight values
p = p->pnext;
}
record_times_node_array = malloc(record_times_number * sizeof(huffman_tree_node_t *));
if(record_times_node_array == NULL)
{
return -1;
}
/* create the bottom nodes */
p = phuffman_record_times;
for(i = 0;i < record_times_number;i++)
{
record_times_node_array[i] = malloc(sizeof(huffman_tree_node_t ));
if(record_times_node_array[i] == NULL)
{
return -1;
}
record_times_node_array[i]->lchild_bit_zero = NULL;
record_times_node_array[i]->parent = NULL;
record_times_node_array[i]->priority = PRIORITY_BASE_NUMBER;
record_times_node_array[i]->rchild_bit_one = NULL;
record_times_node_array[i]->value = p->value;
record_times_node_array[i]->weight_value = p->times;
p = p->pnext;
#if 0
printf("lchild: %p, rchild: %p, parent: %p, value: %d ,weight: %d\n\r", record_times_node_array[i]->lchild_bit_zero
,record_times_node_array[i]->rchild_bit_one, record_times_node_array[i]->parent, record_times_node_array[i]->value
,record_times_node_array[i]->weight_value);
#endif
}
/* create huffman tree */
for(i = 0;i < (record_times_number - 1);i++)
{
huffman_find_out_two_min(record_times_node_array, record_times_number, &pmin1, &pmin2);
pnew_node = (huffman_tree_node_t *)malloc(sizeof(huffman_tree_node_t ));
if(pnew_node == NULL)
{
return -1;
}
pnew_node->lchild_bit_zero = pmin2;
pnew_node->parent = NULL;
pnew_node->priority = PRIORITY_BASE_NUMBER + i + 1;
pnew_node->rchild_bit_one = pmin1;
pnew_node->weight_value = pmin1->weight_value + pmin2->weight_value;
if(pmin1->priority > pmin2->priority)
{
pnew_node->lchild_bit_zero = pmin1;
pnew_node->rchild_bit_one = pmin2;
}
pmin1->parent = pnew_node;
pmin2->parent = pnew_node;
#if 0
printf("%d + %d -> %d\n\r", pnew_node->lchild_bit_zero->weight_value, pnew_node->rchild_bit_one->weight_value, pnew_node->weight_value);
#endif
}
phuffman_tree->number = record_times_number;
phuffman_tree->phuffman_tree_bottom_node_address_array = record_times_node_array;
phuffman_tree->phuffman_tree_list_head = pnew_node;
return 0;
}
static int huffman_create_encode_table(huffman_tree_info_t *phuffman_tree, huffman_encode_info_t **pencode_encode_array)
{
uint32_t i = 0;
uint32_t number;
huffman_tree_node_t **p;
huffman_tree_node_t *ptmp;
uint8_t bitcount = 0, encode_value = 0;
if(phuffman_tree == NULL)
{
return -1;
}
number = phuffman_tree->number;
p = phuffman_tree->phuffman_tree_bottom_node_address_array;
*pencode_encode_array = (huffman_encode_info_t *)malloc(number * sizeof(huffman_encode_info_t));
if(*pencode_encode_array == NULL)
{
return -1;
}
for(i = 0;i < number;i++)
{
ptmp = p[i];
bitcount = 0;
encode_value = 0;
for(;;)
{
if(ptmp->parent != NULL)
{
if(ptmp->parent->rchild_bit_one == ptmp)
{
encode_value |= (0x1 << bitcount);
}
bitcount++;
ptmp = ptmp->parent;
}
else
{
break;
}
}
(*pencode_encode_array)[i].bitcount = bitcount;
(*pencode_encode_array)[i].encode_value = encode_value;
(*pencode_encode_array)[i].value = p[i]->value;
(*pencode_encode_array)[i].weight_value = p[i]->weight_value;
#if 1
printf("value: %d weight: %d bitcount: %d encode: %d\n\r", p[i]->value, p[i]->weight_value, bitcount, encode_value);
#endif
}
return 0;
}
int huffman_destroy_record_times_list(huffman_record_times_t* head)
{
huffman_record_times_t *ptmp;
for(;;)
{
if(head->pnext != NULL)
{
ptmp = head->pnext->pnext;
free(head->pnext);
head->pnext = ptmp;
}
else
{
free(head);
return 0;
}
}
}
int huffman_destroy_tree(huffman_tree_info_t *tree)
{
uint32_t i = 0;
huffman_tree_node_t* ptmp;
for(i = 0;i < tree->number;i++)
{
for(;;)
{
ptmp = tree->phuffman_tree_bottom_node_address_array[i];
while(ptmp->parent != NULL)
{
ptmp = ptmp->parent;
}
if(ptmp == tree->phuffman_tree_bottom_node_address_array[i])
{
break;
}
ptmp->lchild_bit_zero->parent = NULL;
ptmp->rchild_bit_one->parent = NULL;
free(ptmp);
}
free(ptmp);
}
free(tree->phuffman_tree_bottom_node_address_array);
return 0;
}
int huffman_compress_start(uint8_t *pbuf, uint32_t len, huffman_compress_package_info_t *pinfo)
{
huffman_record_times_t *phuffman_record_times_list_head, *precord_times_tmp;
huffman_encode_info_t *pencode_encode_array;
huffman_tree_info_t huffman_tree;
uint32_t i = 0, j = 0, k = 0, number = 0, bitcount = 0;
uint32_t total_bitcount = 0;
uint8_t *presultbuf;
huffman_times_info_t *ptimes_info;
if(pbuf == NULL || len == 0)
{
return -1;
}
huffman_calculate_frequency(pbuf, len, &phuffman_record_times_list_head);
huffman_create_tree(phuffman_record_times_list_head, &huffman_tree);
huffman_create_encode_table(&huffman_tree, &pencode_encode_array);
number = huffman_tree.number; //the number of encode value
for(i = 0;i < len;i++)
{
for(j = 0;j < number;j++)
{
if(pbuf[i] == pencode_encode_array[j].value)
{
total_bitcount += pencode_encode_array[j].bitcount;
}
}
}
presultbuf = malloc((total_bitcount + 8) / 8);
if(presultbuf == NULL)
{
return -1;
}
for(i = 0;i < len;i++)
{
for(j = 0;j < number;j++)
{
if(pbuf[i] == pencode_encode_array[j].value)
{
for(k = pencode_encode_array[j].bitcount;k > 0;k--)
{
if(pencode_encode_array[j].encode_value & (0x1 << (k - 1)))
{
presultbuf[bitcount / 8] |= (0x1 << (bitcount % 8));
}
else
{
presultbuf[bitcount / 8] &= ~(0x1 << (bitcount % 8));
}
bitcount++;
}
break;
}
}
}
huffman_destroy_tree(&huffman_tree);
free(pencode_encode_array);
memcpy(pinfo->head, head_string, sizeof(pinfo->head));
pinfo->bitcount = bitcount;
pinfo->times_table_len = number * sizeof(huffman_times_info_t);
pinfo->len_before_compress = len;
pinfo->pbuf = malloc(pinfo->times_table_len + ((pinfo->bitcount + 8) / 8));
if(pinfo->pbuf == NULL)
{
return -1;
}
ptimes_info = (huffman_times_info_t *)(pinfo->pbuf);
precord_times_tmp = phuffman_record_times_list_head;
for(i = 0;i < number;i++)
{
ptimes_info[i].value = precord_times_tmp->value;
ptimes_info[i].times = precord_times_tmp->times;
precord_times_tmp = precord_times_tmp->pnext;
}
memcpy(pinfo->pbuf + pinfo->times_table_len, presultbuf, (pinfo->bitcount + 8) / 8);
huffman_destroy_record_times_list(phuffman_record_times_list_head);
#if 1
printf("bitcount: %d\n\r", bitcount);
for(i = 0;i < bitcount;i++)
{
if(presultbuf[i / 8] & (0x1 << (i % 8)))
{
printf("1");
}
else
{
printf("0");
}
}
printf("\n\r");
#endif
free(presultbuf);
return 0;
}
int huffman_uncompress_start(huffman_compress_package_t *pinfo, uint8_t **pbuf, uint32_t *size)
{
huffman_record_times_t *precord_times_list_head = NULL, *ptmp = NULL;
huffman_times_info_t *ptimes_info;
huffman_tree_info_t huffman_tree;
huffman_tree_node_t *pnode;
uint32_t i = 0, number = 0, index = 0;
uint8_t *pdata;
if(pinfo == NULL)
{
return -1;
}
if(strncmp(head_string, pinfo->head, sizeof(pinfo->head)) != 0)
{
return -1;
}
if(pinfo->len_before_compress == 0)
{
return -1;
}
*size = pinfo->len_before_compress;
*pbuf = malloc(pinfo->len_before_compress);
if(pbuf == NULL)
{
return -1;
}
number = pinfo->times_table_len / sizeof(huffman_times_info_t);
ptimes_info = (huffman_times_info_t *)pinfo->buf;
for(i = 0;i < number;i++)
{
if(precord_times_list_head == NULL)
{
precord_times_list_head = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
if(precord_times_list_head == NULL)
{
return -1;
}
precord_times_list_head->times = ptimes_info[i].times;
precord_times_list_head->value = ptimes_info[i].value;
precord_times_list_head->pnext = NULL;
ptmp = precord_times_list_head;
continue;
}
ptmp->pnext = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
if(ptmp->pnext == NULL)
{
return -1;
}
ptmp->pnext->pnext = NULL;
ptmp->pnext->times = ptimes_info[i].times;
ptmp->pnext->value = ptimes_info[i].value;
ptmp = ptmp->pnext;
}
huffman_create_tree(precord_times_list_head, &huffman_tree);
pdata = pinfo->buf + pinfo->times_table_len;
pnode = huffman_tree.phuffman_tree_list_head;
for(i = 0;i < pinfo->bitcount;i++)
{
if(pdata[i / 8] & (0x1 << (i % 8)))
{
pnode = pnode->rchild_bit_one;
}
else
{
pnode = pnode->lchild_bit_zero;
}
if(pnode->lchild_bit_zero == NULL && pnode->rchild_bit_one == NULL)
{
(*pbuf)[index++] = pnode->value;
pnode = huffman_tree.phuffman_tree_list_head;
}
}
huffman_destroy_record_times_list(precord_times_list_head);
huffman_destroy_tree(&huffman_tree);
#if 1
for(i = 0;i < pinfo->len_before_compress;i++)
{
printf("%d ", (*pbuf)[i]);
}
printf("\n\r");
#endif
return 0;
}
int main()
{
huffman_compress_package_info_t huffman_compress_package_info;
uint32_t len;
huffman_compress_package_t *p;
uint8_t *pbuf;
uint32_t size;
uint32_t i = 0;
huffman_init_test_buf();
huffman_compress_start(test_buf, MAX_NUMBER, &huffman_compress_package_info);
/*****************************************************************************************/
len = sizeof(huffman_compress_package_info) - sizeof(uint8_t *) + huffman_compress_package_info.times_table_len + (huffman_compress_package_info.bitcount + 8) / 8;
printf("*****len: %d******\n\r", len);
p = (huffman_compress_package_t *)malloc(len);
if(p == NULL)
{
printf("error\n\r");
return 0;
}
memcpy(p, &huffman_compress_package_info, sizeof(huffman_compress_package_info) - sizeof(uint8_t *));
memcpy(p->buf, huffman_compress_package_info.pbuf, huffman_compress_package_info.times_table_len + (huffman_compress_package_info.bitcount + 8) / 8);
/****************************************************************************************/
free(huffman_compress_package_info.pbuf);
huffman_uncompress_start(p, &pbuf, &size);
free(p);
free(pbuf);
return 0;
}压缩:
int huffman_compress_start(uint8_t *pbuf, uint32_t len, huffman_compress_package_info_t *pinfo);
解压:
int huffman_uncompress_start(huffman_compress_package_t *pinfo, uint8_t **pbuf, uint32_t *size);
运行后的log:
打印有点多,提取有用信息:
value: 11 weight: 257 bitcount: 4 encode: 7
value: 13 weight: 266 bitcount: 4 encode: 2
value: 5 weight: 233 bitcount: 4 encode: 14
value: 8 weight: 262 bitcount: 4 encode: 5
value: 3 weight: 264 bitcount: 4 encode: 3
value: 15 weight: 261 bitcount: 4 encode: 6
value: 0 weight: 264 bitcount: 4 encode: 4
value: 10 weight: 271 bitcount: 4 encode: 1
value: 12 weight: 254 bitcount: 4 encode: 9
value: 9 weight: 257 bitcount: 4 encode: 8
value: 6 weight: 249 bitcount: 4 encode: 11
value: 7 weight: 244 bitcount: 4 encode: 13
value: 2 weight: 231 bitcount: 4 encode: 15
value: 14 weight: 247 bitcount: 4 encode: 12
value: 4 weight: 284 bitcount: 4 encode: 0
value: 1 weight: 252 bitcount: 4 encode: 10
bitcount: 16384 //4K数据后压缩到 16384 bits
*****len: 2148****** //打包后是2148个字节数据
。。。。。。。。。。。。。。。。。。。。。。。。
在MCU应用中,不知道有没有谁使用过数据压缩算法。有啥好的迷你压缩算法也希望共享一下
我要赚赏金
