import os
import cv2
from queue import PriorityQueue
import numpy as np
import math
import struct
import matplotlib.pyplot as plt
class HuffmanNode(object):
def __init__(self,value,key=None,symbol='',left_child=None,right_child=None):
self.left_child=left_child
self.right_child=right_child
self.value=value
self.key=key
assert symbol==''
self.symbol=symbol
def __eq__(self,other):
return self.value==other.value
def __gt__(self,other):
return self.value>other.value
def __lt__(self,other):
return self.value < other.value
def createTree(hist_dict:dict)->HuffmanNode:
q=PriorityQueue()
for k, v in hist_dict.items():
q.put(HuffmanNode(value=v,key=k))
while q.qsize()>1:
l_freq, r_freq=q.get(),q.get()
node=HuffmanNode(value=l_freq.value r_freq.value,left_child=l_freq,right_child=r_freq)
q.put(node)
return q.get()
def walkTree_VLR(root_node:HuffmanNode,symbol=''):
global Huffman_encode_dict
if isinstance(root_node,HuffmanNode):
root_node.symbol =symbol
if root_node.key!=None:
Huffman_encode_dict[root_node.key]=root_node.symbol
walkTree_VLR(root_node.left_child,symbol=root_node.symbol '0')
walkTree_VLR(root_node.right_child, symbol=root_node.symbol '1')
return
def encodeImage(src_img: np.ndarray, encode_dict: dict):
img_encode=""
assert len(src_img.shape)==1, '`src_img` must be a vector'
for pixel in src_img:
img_encode =encode_dict[pixel]
return img_encode
def writeBinImage(img_encode:str,huffman_file:str):
with open(huffman_file,'wb') as f:
for i in range(0,len(img_encode),8):
img_encode_dec=int(img_encode[i:i 8],2)
img_encode_bin=struct.pack('>B',img_encode_dec)
f.write(img_encode_bin)
def readBinImage(huffman_file: str,img_encode_len:int):
code_bin_str=""
with open(huffman_file,'rb') as f:
content=f.read()
code_dec_tuple=struct.unpack('>' 'B'*len(content),content)
for code_dec in code_dec_tuple:
code_bin_str =bin(code_dec)[2:].zfill(8)
len_diff=len(code_bin_str)-img_encode_len
code_bin_str=code_bin_str[:-8] code_bin_str[-(8-len_diff):]
return code_bin_str
def decodeHuffman(img_encode:str,huffman_tree_root:HuffmanNode):
img_src_val_list=[]
root_node=huffman_tree_root
for code in img_encode:
if code=='0':
root_node=root_node.left_child
elif code=='1':
root_node=root_node.right_child
if root_node.key!=None:
img_src_val_list.append(root_node.key)
root_node=huffman_tree_root
return np.asarray(img_src_val_list)
def decodeHuffmanByDict(img_encode:str,encode_dict:dict):
img_src_val_list=[]
decode_dict={}
for k, v in encode_dict.items():
decode_dict[v]=k
s=0
while len(img_encode)>s 1:
for k in decode_dict.keys():
if k==img_encode[s:s len(k)]:
img_src_val_list.append(decode_dict[k])
s =len(k)
break
return np.asarray(img_src_val_list)
def put(path):
src_img=cv2.imread(path,cv2.IMREAD_GRAYSCALE)
src_img_w, src_img_h=src_img.shape[:2]
src_img_ravel=src_img.ravel()
hist_dict={}
for p in src_img_ravel:
if p not in hist_dict:
hist_dict[p]=1
else:
hist_dict[p] =1
#构造哈夫曼树
huffman_root_node=createTree(hist_dict)
#遍历哈夫曼树
walkTree_VLR(huffman_root_node)
global Huffman_encode_dict
print('哈夫曼编码字典:',Huffman_encode_dict)
img_encode=encodeImage(src_img_ravel,Huffman_encode_dict)
writeBinImage(img_encode,'huffman_bin_img_file.bin')
img_read_code=readBinImage('huffman_bin_img_file.bin',len(img_encode))
#解码二进制编码数据字符串得到原始图像展开的向量
#根据哈夫曼树进行解码的方式
img_src_val_array = decodeHuffman(img_read_code, huffman_root_node)
assert len(img_src_val_array)==src_img_w*src_img_h
#恢复原始图像
img_decode=np.reshape(img_src_val_array,[src_img_w,src_img_h])
#计算平均编码长度和编码效率
total_code_len=0
total_code_num=sum(hist_dict.values())
avg_code_len=0
I_entropy=0
for key in hist_dict.keys():
count=hist_dict[key]
code_len=len(Huffman_encode_dict[key])
prob=count/total_code_num
avg_code_len =prob*code_len
I_entropy =-(prob*math.log2(prob))
S_eff=I_entropy/avg_code_len
print("平均编码长度为{:.3f}".format(avg_code_len))
print("编码效率为{:.6f}".format(S_eff))
#压缩率
ori_size=src_img_w*src_img_h*8/(1024*8)
comp_size=len(img_encode)/(1024*8)
comp_rate=1-comp_size/ori_size
print('原图灰度图大小',ori_size,'KB 压缩后大小',comp_size,'KB 压缩率',comp_rate,'%')
plt.rcParams['font.sans-serif']=['SimHei']
plt.subplot(121),plt.imshow(src_img,plt.cm.gray),plt.title('原图灰度图像'),plt.axis('off')
plt.subplot(122),plt.imshow(img_decode,plt.cm.gray),plt.title('解压后'),plt.axis('off')
plt.show()
if __name__=='__main__':
#哈夫曼编码字典{pixel_value:code}
Huffman_encode_dict={}
put(r'C:/Users/xpp/Desktop/Lena.png')
哈夫曼编码字典: {103: '0000000', 227: '00000010000', 229: '000000100010', 228: '000000100011', 224: '0000001001', 225: '00000010100', 222: '00000010101', 223: '0000001011', 185: '00000011', 87: '0000010', 106: '0000011', 61: '00001', 80: '0001000', 120: '0001001', 68: '000101', 122: '0001100', 93: '0001101', 125: '0001110', 90: '0001111', 56: '00100', 78: '0010100', 89: '0010101', 132: '0010110', 123: '0010111', 127: '0011000', 184: '0011001', 129: '0011010', 134: '0011011', 119: '0011100', 215: '00111010', 219: '001110110', 218: '001110111', 76: '0011110', 133: '0011111', 164: '010000', 107: '0100010', 124: '0100011', 126: '0100100', 131: '0100101', 130: '0100110', 128: '0100111', 59: '01010', 117: '0101100', 109: '0101101', 145: '0101110', 141: '0101111', 183: '01100000', 174: '01100001', 118: '0110001', 144: '011001', 143: '0110100', 204: '011010100', 189: '011010101', 181: '01101011', 139: '0110110', 136: '0110111', 135: '0111000', 214: '01110010', 206: '01110011', 160: '011101', 110: '0111100', 163: '0111101', 226: '01111100000', 230: '0111110000100', 231: '01111100001010', 232: '011111000010110', 235: '01111100001011100', 34: '01111100001011101', 239: '011111000010111100', 238: '011111000010111101', 233: '01111100001011111', 39: '011111000011', 221: '0111110001', 211: '011111001', 177: '01111101', 173: '0111111', 175: '1000000', 161: '1000001', 142: '1000010', 146: '1000011', 74: '1000100', 170: '1000101', 208: '100011000', 191: '100011001', 212: '10001101', 116: '1000111', 162: '1001000', 182: '1001001', 138: '1001010', 179: '10010110', 207: '10010111', 72: '1001100', 147: '1001101', 159: '1001110', 190: '10011110', 43: '100111110', 201: '100111111', 66: '101000', 53: '101001', 140: '1010100', 194: '10101010', 176: '10101011', 188: '10101100', 210: '10101101', 158: '1010111', 178: '1011000', 213: '10110010', 192: '10110011', 111: '1011010', 209: '10110110', 171: '10110111', 167: '1011100', 115: '1011101', 156: '101111', 148: '1100000', 149: '1100001', 157: '1100010', 202: '11000110', 168: '11000111', 137: '1100100', 180: '1100101', 187: '110011000', 217: '110011001', 205: '11001101', 114: '1100111', 50: '1101000', 172: '11010010', 220: '1101001100', 216: '1101001101', 196: '110100111', 70: '1101010', 195: '11010110', 186: '11010111', 203: '11011000', 199: '11011001', 154: '1101101', 169: '11011100', 98: '11011101', 155: '1101111', 151: '111000', 112: '1110010', 46: '11100110', 193: '111001110', 198: '111001111', 64: '111010', 150: '1110110', 197: '11101110', 99: '11101111', 200: '11110000', 165: '11110001', 153: '1111001', 96: '11110100', 85: '11110101', 152: '1111011', 102: '11111000', 95: '11111001', 105: '11111010', 101: '11111011', 166: '11111100', 92: '11111101', 84: '11111110', 82: '11111111'} 平均编码长度为6.995 编码效率为0.996009 原图灰度图大小 206.640625 KB 压缩后大小 180.6787109375 KB 压缩率 0.12563799621928162 %
算法:哈夫曼编码是一种根据词频变化的变长二进制编码方式,多用于压缩算法。