这些小活动你都参加了吗?快来围观一下吧!>>
电子产品世界 » 论坛首页 » 嵌入式开发 » STM32 » 利用哈夫曼编码玩数据的压缩和解压

共1条 1/1 1 跳转至

利用哈夫曼编码玩数据的压缩和解压

高工
2017-10-20 17:01:21     打赏

看了百度百科哈夫曼的介绍,感觉挺好玩的,堆代码试玩一下:

哈夫曼编码(Huffman Coding)


数据压缩部分:

1、计算出每个数据出现的次数

2、构建哈夫曼树

3、得到哈夫曼编码

4、压缩数据

最终数据:

|头信息 | 每个数据出现的次数信息 | 压缩后的数据|

解压数据部分:

1、头信息提取每个数据出现的次数

2、构建树

3、得到编码

4、解码解压

下面是代码:

#include "stdio.h"
#include "stdlib.h"
#include "string.h"

typedef unsigned char  uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int   uint32_t;

#define MAX_NUMBER              4096

#define PRIORITY_BASE_NUMBER    0X55

#pragma pack(1)

struct huffman_tree_node
{
	uint32_t  weight_value;
	uint8_t   value;
	uint32_t  priority;
	struct huffman_tree_node *parent;
	struct huffman_tree_node *lchild_bit_zero;
	struct huffman_tree_node *rchild_bit_one;
};
typedef struct huffman_tree_node huffman_tree_node_t;

typedef struct
{
	huffman_tree_node_t** phuffman_tree_bottom_node_address_array;      //weight value  -->  node information
	uint32_t  number;
	huffman_tree_node_t* phuffman_tree_list_head;
}huffman_tree_info_t;


struct huffman_record_times
{
	uint8_t    value;
	uint32_t   times;
	struct huffman_record_times *pnext;
};
typedef struct huffman_record_times huffman_record_times_t;

typedef struct
{
	uint8_t  value;
	uint32_t times;
}huffman_times_info_t;

typedef struct
{
	uint8_t value;
	uint8_t weight_value;
	uint8_t bitcount;
	uint8_t encode_value;
}huffman_encode_info_t;

const uint8_t head_string[8] = "huffman";


/**
  **********************
    head information
  **********************
     times_table
  **********************
         data
  **********************
*/
typedef struct
{
	uint8_t   head[7];
	uint32_t  times_table_len;
	uint32_t  bitcount;
	uint32_t  len_before_compress;
	uint8_t   buf[1];
}huffman_compress_package_t;

typedef struct
{
	uint8_t   head[7];
	uint32_t  times_table_len;
	uint32_t  bitcount;
	uint32_t  len_before_compress;
	uint8_t   *pbuf;
}huffman_compress_package_info_t;

#pragma pack()



uint8_t test_buf[MAX_NUMBER];
void huffman_init_test_buf()
{
	uint32_t i = 0;
	for(i = 0;i < MAX_NUMBER;i++)
	{
		srand(i + 5);
		test_buf[i] = (uint8_t)(rand() & 0x0f);
		printf("%d ", test_buf[i]);
	}
	printf("\n\r");
}

static int huffman_calculate_frequency(uint8_t *pbuf, uint32_t len, huffman_record_times_t **phuffman_record_times_list_head)
{
	uint32_t i = 0;
	huffman_record_times_t *ptmp = NULL, *p = NULL;
	if(pbuf == NULL || len == 0)
	{
		return -1;
	}
	do
	{
		if(ptmp == NULL)     //create the list head
		{
			ptmp = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
			if(ptmp == NULL)
			{
				return -1;
				printf("memory error...\n\r");
			}
			ptmp->pnext = NULL;
			ptmp->times = 0;
			ptmp->value = pbuf[0];
			p = ptmp;  
			i = 0;
		}
		for(;;)     //add times or create new node
		{
			if(p->value == pbuf[i])
			{
				p->times++;
				break;
			}
			else if(p->pnext != NULL)
			{
				p = p->pnext;
			}
			else
			{
				p->pnext = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
				if(p->pnext == NULL)
				{
					*phuffman_record_times_list_head = ptmp;
					return -1;
				}
				p->pnext->pnext = NULL;
				p->pnext->value = pbuf[i];
				p->pnext->times = 1;
				break;
			}
		}
		i++;
		p = ptmp; 
	}while(i < len);
	*phuffman_record_times_list_head = ptmp;
#if 0
	p = *phuffman_record_times_list_head;
	do
	{	
		if(p != NULL)
		{
			printf("value: %d number: %d \n\r", p->value, p->times);
			p = p->pnext;
		}
	}while(p != NULL);
#endif	
	return 0;	
}


static int huffman_find_out_two_min(huffman_tree_node_t **record_times_node_array, uint32_t number, huffman_tree_node_t **pmin1, huffman_tree_node_t **pmin2)
{
	uint8_t j = 0;
	uint32_t min = 0xffffffff;
	uint32_t i = 0;
	huffman_tree_node_t *psave_min1 = NULL, *ptmp = NULL, *p = NULL;
	
	for(j = 0;j < 2;j++)
	{
		min = 0xffffffff;
		for(i = 0;i < number;i++)
		{
			p = record_times_node_array[i];
			if(p->parent == NULL)
			{
				if(p->weight_value <= min && psave_min1 != p)
				{
					min = p->weight_value;
					ptmp = p;
				}
			}
			else
			{
				while(p->parent != NULL)
				{
					p = p->parent;
				}
				if(p->weight_value < min && psave_min1 != p)
				{
					min = p->weight_value;
					ptmp = p;
				}
			}
		}
		if(psave_min1 == NULL)
		{
			*pmin1 = ptmp;
			psave_min1 = ptmp;
		}
		else
		{
			*pmin2 = ptmp;
		}
	}
	return 0;
}
	


static int huffman_create_tree(huffman_record_times_t *phuffman_record_times, huffman_tree_info_t* phuffman_tree)
{
	huffman_record_times_t *p = phuffman_record_times;
	huffman_tree_node_t **record_times_node_array = NULL;
	uint32_t record_times_number = 0, i = 0;
	huffman_tree_node_t *pmin1, *pmin2, *pnew_node;
	
	if(phuffman_record_times == NULL)
	{
		return -1;
	}
	while(p != NULL)
	{
		record_times_number++;    //calculate the number of weight values
		p = p->pnext;
	}
	record_times_node_array = malloc(record_times_number * sizeof(huffman_tree_node_t *));
	if(record_times_node_array == NULL)
	{
		return -1;
	}
	/* create the bottom nodes  */
	p = phuffman_record_times;
	for(i = 0;i < record_times_number;i++)
	{
		record_times_node_array[i] = malloc(sizeof(huffman_tree_node_t ));
		if(record_times_node_array[i] == NULL)
		{
			return -1;
		}
		record_times_node_array[i]->lchild_bit_zero = NULL;
		record_times_node_array[i]->parent = NULL;
		record_times_node_array[i]->priority = PRIORITY_BASE_NUMBER;
		record_times_node_array[i]->rchild_bit_one = NULL;
		record_times_node_array[i]->value = p->value;
		record_times_node_array[i]->weight_value = p->times;
		p = p->pnext;
#if 0
		printf("lchild: %p, rchild: %p, parent: %p, value: %d ,weight: %d\n\r", record_times_node_array[i]->lchild_bit_zero
		,record_times_node_array[i]->rchild_bit_one, record_times_node_array[i]->parent, record_times_node_array[i]->value
		,record_times_node_array[i]->weight_value);
#endif
	}
	/* create huffman tree */
	for(i = 0;i < (record_times_number - 1);i++)
	{
		huffman_find_out_two_min(record_times_node_array, record_times_number, &pmin1, &pmin2);
		pnew_node = (huffman_tree_node_t *)malloc(sizeof(huffman_tree_node_t ));
		if(pnew_node == NULL)
		{
			return -1;
		}
		pnew_node->lchild_bit_zero = pmin2;
		pnew_node->parent = NULL;
		pnew_node->priority = PRIORITY_BASE_NUMBER + i + 1;
		pnew_node->rchild_bit_one = pmin1;
		pnew_node->weight_value = pmin1->weight_value + pmin2->weight_value;
		if(pmin1->priority > pmin2->priority)
		{
			pnew_node->lchild_bit_zero = pmin1;
			pnew_node->rchild_bit_one = pmin2;
		}
		pmin1->parent = pnew_node;
		pmin2->parent = pnew_node;
#if 0
		printf("%d  + %d  -> %d\n\r", pnew_node->lchild_bit_zero->weight_value, pnew_node->rchild_bit_one->weight_value, pnew_node->weight_value);
#endif
	}

	phuffman_tree->number = record_times_number;
	phuffman_tree->phuffman_tree_bottom_node_address_array = record_times_node_array;
	phuffman_tree->phuffman_tree_list_head = pnew_node;
	return 0;
}


static int huffman_create_encode_table(huffman_tree_info_t *phuffman_tree, huffman_encode_info_t **pencode_encode_array)
{
	uint32_t i = 0;
	uint32_t number;
	huffman_tree_node_t **p;
	huffman_tree_node_t *ptmp;
	uint8_t bitcount = 0, encode_value = 0;
	if(phuffman_tree == NULL)
	{
		return -1;
	}
	number = phuffman_tree->number;
	p = phuffman_tree->phuffman_tree_bottom_node_address_array;	
	*pencode_encode_array = (huffman_encode_info_t *)malloc(number * sizeof(huffman_encode_info_t));
	if(*pencode_encode_array == NULL)
	{
		return -1;
	}
	for(i = 0;i < number;i++)
	{
		ptmp = p[i];
		bitcount = 0;
		encode_value = 0;
		for(;;)
		{
			if(ptmp->parent != NULL)
			{
				if(ptmp->parent->rchild_bit_one == ptmp)
				{
					encode_value |= (0x1 << bitcount);
				}
				bitcount++;
				ptmp = ptmp->parent;
			}
			else
			{
				break;
			}
		}
		(*pencode_encode_array)[i].bitcount = bitcount;
		(*pencode_encode_array)[i].encode_value = encode_value;
		(*pencode_encode_array)[i].value = p[i]->value;
		(*pencode_encode_array)[i].weight_value = p[i]->weight_value;
#if 1
		printf("value: %d weight: %d bitcount: %d encode: %d\n\r", p[i]->value, p[i]->weight_value, bitcount, encode_value);
#endif
	}
	return 0;
}


int huffman_destroy_record_times_list(huffman_record_times_t* head)
{
	huffman_record_times_t *ptmp;
	for(;;)
	{
		if(head->pnext != NULL)
		{
			ptmp = head->pnext->pnext;
			free(head->pnext);
			head->pnext = ptmp;
		}
		else
		{
			free(head);
			return 0;
		}
	}
}


int huffman_destroy_tree(huffman_tree_info_t *tree)
{
	uint32_t i = 0;
	huffman_tree_node_t* ptmp;
	for(i = 0;i < tree->number;i++)
	{
		for(;;)
		{
			ptmp = tree->phuffman_tree_bottom_node_address_array[i];
			while(ptmp->parent != NULL)
			{
				ptmp = ptmp->parent;
			}
			if(ptmp == tree->phuffman_tree_bottom_node_address_array[i])
			{
				break;
			}
			ptmp->lchild_bit_zero->parent = NULL;
			ptmp->rchild_bit_one->parent = NULL;
			free(ptmp);
		}
		free(ptmp);
	}
	free(tree->phuffman_tree_bottom_node_address_array);
	return 0;
}


int huffman_compress_start(uint8_t *pbuf, uint32_t len, huffman_compress_package_info_t *pinfo)
{
	huffman_record_times_t *phuffman_record_times_list_head, *precord_times_tmp; 
	huffman_encode_info_t *pencode_encode_array;
	huffman_tree_info_t huffman_tree;
	uint32_t i = 0, j = 0, k = 0, number = 0, bitcount = 0;
	uint32_t total_bitcount = 0;
	uint8_t *presultbuf;
	huffman_times_info_t *ptimes_info;
	if(pbuf == NULL || len == 0)
	{
		return -1;
	}
	huffman_calculate_frequency(pbuf, len, &phuffman_record_times_list_head);
	huffman_create_tree(phuffman_record_times_list_head, &huffman_tree);
	huffman_create_encode_table(&huffman_tree, &pencode_encode_array);
	number = huffman_tree.number;                    //the number of encode value
	for(i = 0;i < len;i++)
	{
		for(j = 0;j < number;j++)
		{
			if(pbuf[i] == pencode_encode_array[j].value)
			{
				total_bitcount += pencode_encode_array[j].bitcount;
			}
		}
	}	
	presultbuf = malloc((total_bitcount + 8) / 8);
	if(presultbuf == NULL)
	{
		return -1;
	}
	for(i = 0;i < len;i++)
	{
		for(j = 0;j < number;j++)
		{
			if(pbuf[i] == pencode_encode_array[j].value)
			{
				for(k = pencode_encode_array[j].bitcount;k > 0;k--)
				{
					if(pencode_encode_array[j].encode_value & (0x1 << (k - 1)))
					{
						presultbuf[bitcount / 8] |= (0x1 << (bitcount % 8));
					}
					else
					{
						presultbuf[bitcount / 8] &= ~(0x1 << (bitcount % 8));
					}
					bitcount++;
				}
				break;
			}
		}
	}
	huffman_destroy_tree(&huffman_tree);
	free(pencode_encode_array);	
	
	memcpy(pinfo->head, head_string, sizeof(pinfo->head));
	pinfo->bitcount = bitcount;
	pinfo->times_table_len = number * sizeof(huffman_times_info_t);
	pinfo->len_before_compress = len;
	pinfo->pbuf = malloc(pinfo->times_table_len + ((pinfo->bitcount + 8) / 8));
	if(pinfo->pbuf == NULL)
	{
		return -1;
	}
	ptimes_info = (huffman_times_info_t *)(pinfo->pbuf);
	precord_times_tmp = phuffman_record_times_list_head;
	for(i = 0;i < number;i++)
	{
		ptimes_info[i].value = precord_times_tmp->value;
		ptimes_info[i].times = precord_times_tmp->times;
		precord_times_tmp = precord_times_tmp->pnext;
		
	}
	memcpy(pinfo->pbuf + pinfo->times_table_len, presultbuf, (pinfo->bitcount + 8) / 8);
	huffman_destroy_record_times_list(phuffman_record_times_list_head);
#if 1
	printf("bitcount: %d\n\r", bitcount);
	for(i = 0;i < bitcount;i++)
	{
		if(presultbuf[i / 8] & (0x1 << (i % 8)))
		{
			printf("1");
		}
		else
		{
			printf("0");
		}
	}
	printf("\n\r");
#endif
	free(presultbuf);
	return 0;
}


int huffman_uncompress_start(huffman_compress_package_t *pinfo, uint8_t **pbuf, uint32_t *size)
{
	huffman_record_times_t *precord_times_list_head = NULL, *ptmp = NULL;
	huffman_times_info_t *ptimes_info;
	huffman_tree_info_t huffman_tree;
	huffman_tree_node_t *pnode;
	uint32_t i = 0, number = 0, index = 0;
	uint8_t *pdata;
	if(pinfo == NULL)
	{
		return -1;
	}
	if(strncmp(head_string, pinfo->head, sizeof(pinfo->head)) != 0)
	{
		return -1;
	}
	if(pinfo->len_before_compress == 0)
	{
		return -1;
	}
	*size = pinfo->len_before_compress;
	*pbuf = malloc(pinfo->len_before_compress);
	if(pbuf == NULL)
	{
		return -1;
	}
	number = pinfo->times_table_len / sizeof(huffman_times_info_t);
	ptimes_info = (huffman_times_info_t *)pinfo->buf;
	for(i = 0;i < number;i++)
	{
		if(precord_times_list_head == NULL)
		{
			precord_times_list_head = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
			if(precord_times_list_head == NULL)
			{
				return -1;
			}
			precord_times_list_head->times = ptimes_info[i].times;
			precord_times_list_head->value = ptimes_info[i].value;
			precord_times_list_head->pnext = NULL;
			ptmp = precord_times_list_head;
			continue;
		}
		ptmp->pnext = (huffman_record_times_t *)malloc(sizeof(huffman_record_times_t));
		if(ptmp->pnext == NULL)
		{
			return -1;
		}
		ptmp->pnext->pnext = NULL;
		ptmp->pnext->times = ptimes_info[i].times;
		ptmp->pnext->value = ptimes_info[i].value;
		ptmp = ptmp->pnext;
	}
	huffman_create_tree(precord_times_list_head, &huffman_tree);
	pdata = pinfo->buf + pinfo->times_table_len;
	pnode = huffman_tree.phuffman_tree_list_head;
	for(i = 0;i < pinfo->bitcount;i++)
	{
		if(pdata[i / 8] & (0x1 << (i % 8)))
		{
			pnode = pnode->rchild_bit_one;
		}
		else
		{
			pnode = pnode->lchild_bit_zero;
		}
		if(pnode->lchild_bit_zero == NULL && pnode->rchild_bit_one == NULL)
		{
			(*pbuf)[index++] = pnode->value;
			pnode = huffman_tree.phuffman_tree_list_head;
		}
	}
	huffman_destroy_record_times_list(precord_times_list_head);
	huffman_destroy_tree(&huffman_tree);
#if 1
	for(i = 0;i < pinfo->len_before_compress;i++)
	{
		printf("%d ", (*pbuf)[i]);
	}
	printf("\n\r");
#endif
	return 0;
	
}



int main()
{
	huffman_compress_package_info_t huffman_compress_package_info;
	uint32_t len;
	huffman_compress_package_t *p;
	uint8_t *pbuf;
	uint32_t size;
	uint32_t i = 0;
	huffman_init_test_buf();
	huffman_compress_start(test_buf, MAX_NUMBER, &huffman_compress_package_info);
/*****************************************************************************************/
	len = sizeof(huffman_compress_package_info) - sizeof(uint8_t *) + huffman_compress_package_info.times_table_len + (huffman_compress_package_info.bitcount + 8) / 8;
	printf("*****len: %d******\n\r", len);
	p = (huffman_compress_package_t *)malloc(len);
	if(p == NULL)
	{
		printf("error\n\r");
		return 0;
	}
	memcpy(p, &huffman_compress_package_info, sizeof(huffman_compress_package_info) - sizeof(uint8_t *));
	memcpy(p->buf, huffman_compress_package_info.pbuf, huffman_compress_package_info.times_table_len + (huffman_compress_package_info.bitcount + 8) / 8);
/****************************************************************************************/	
	free(huffman_compress_package_info.pbuf);
	huffman_uncompress_start(p, &pbuf, &size);
	free(p);
	free(pbuf);
	return 0;
}

压缩:

int huffman_compress_start(uint8_t *pbuf, uint32_t len, huffman_compress_package_info_t *pinfo);

解压:

int huffman_uncompress_start(huffman_compress_package_t *pinfo, uint8_t **pbuf, uint32_t *size);

运行后的log:

打印有点多,提取有用信息:

value: 11 weight: 257 bitcount: 4 encode: 7



value: 13 weight: 266 bitcount: 4 encode: 2



value: 5 weight: 233 bitcount: 4 encode: 14



value: 8 weight: 262 bitcount: 4 encode: 5



value: 3 weight: 264 bitcount: 4 encode: 3



value: 15 weight: 261 bitcount: 4 encode: 6



value: 0 weight: 264 bitcount: 4 encode: 4



value: 10 weight: 271 bitcount: 4 encode: 1



value: 12 weight: 254 bitcount: 4 encode: 9



value: 9 weight: 257 bitcount: 4 encode: 8



value: 6 weight: 249 bitcount: 4 encode: 11



value: 7 weight: 244 bitcount: 4 encode: 13



value: 2 weight: 231 bitcount: 4 encode: 15



value: 14 weight: 247 bitcount: 4 encode: 12



value: 4 weight: 284 bitcount: 4 encode: 0



value: 1 weight: 252 bitcount: 4 encode: 10



bitcount: 16384      //4K数据后压缩到 16384 bits

*****len: 2148******   //打包后是2148个字节数据

。。。。。。。。。。。。。。。。。。。。。。。。


在MCU应用中,不知道有没有谁使用过数据压缩算法。有啥好的迷你压缩算法也希望共享一下




关键词: 哈夫曼     编码     数据     压缩     解压    

共1条 1/1 1 跳转至

回复

匿名不能发帖!请先 [ 登陆 注册 ]