1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
|
package com.eh.ftd.dsa.ds;
import com.alibaba.fastjson.JSON;
import com.eh.ftd.dsa.ds.utils.ByteUtils;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
/**
* 赫夫曼编码
*
* @author David Li
* @create 2020/06/28 18:41
*/
public class HuffmanEncode {
static class HuffmanEncodeResult {
// 压缩后的数据
byte[] data;
// 赫夫曼编码映射关系 byte(字符字节)->huffmanCode
String keyJson;
public byte[] getData() {
return data;
}
public void setData(byte[] data) {
this.data = data;
}
public String getKeyJson() {
return keyJson;
}
public void setKeyJson(String keyJson) {
this.keyJson = keyJson;
}
}
/**
* 定义节点
*/
static class Node {
int weight;
byte data;
Node left;
Node right;
public Node(byte data, int weight) {
this.data = data;
this.weight = weight;
}
/**
* 创建根节点使用,data为空
*
* @param weight
* @param left
* @param right
*/
public Node(int weight, Node left, Node right) {
this.weight = weight;
this.left = left;
this.right = right;
}
@Override
public String toString() {
return String.valueOf(data);
}
}
/**
* 赫夫曼编码
*
* @param content
* @return
*/
public static HuffmanEncodeResult encode(byte[] content) {
HuffmanEncodeResult res = new HuffmanEncodeResult();
// 1. 获取字符串的字符->权重 映射关系
Map<Byte, Integer> characterWeightMap = getCharacterWeightMap(content);
// 2. 创建赫夫曼树
Node huffmanTree = createHuffmanTree(characterWeightMap);
// 3. 根据赫夫曼树获取字符到赫夫曼编码的映射关系
Map<Byte, String> characterHuffmanCodeMap = getCharacterHuffmanCodeMap(huffmanTree);
// 4. 根据字符赫夫曼码映射得到原始字符串内容的赫夫曼码
String huffmanCode = getHuffmanCode(content, characterHuffmanCodeMap);
// 使用byte[]传输,所以需要再将赫夫曼码字符串转成byte数组
byte[] data = ByteUtils.convertBinaryStr2ByteArr(huffmanCode);
res.setData(data);
res.setKeyJson(convertKey2JsonString(characterHuffmanCodeMap));
return res;
}
/**
* 将key也就是赫夫曼编码映射关系转成json串
* 这里有坑,Map里的Byte转换后会变成Integer类型, 使用Json工具建议全转成String处理,防止被工具坑
*
* @param characterHuffmanCodeMap
* @return
*/
private static String convertKey2JsonString(Map<Byte, String> characterHuffmanCodeMap) {
Map<String, String> res = Maps.newHashMap();
for (Map.Entry<Byte, String> entry : characterHuffmanCodeMap.entrySet()) {
res.put(String.valueOf(entry.getKey()), entry.getValue());
}
return JSON.toJSONString(res);
}
/**
* 获取字符串的字符->权重 映射关系
*
* @param content
* @return
*/
private static Map<Byte, Integer> getCharacterWeightMap(byte[] content) {
Map<Byte, Integer> res = Maps.newHashMap();
for (int i = 0; i < content.length; i++) {
byte b = content[i];
res.put(b, res.get(b) == null ? 1 : res.get(b) + 1);
}
return res;
}
/**
* 创建赫夫曼树
*
* @param characterWeightMap
* @return
*/
private static Node createHuffmanTree(Map<Byte, Integer> characterWeightMap) {
// 1. 构建一个节点集合
List<Node> nodeList = Lists.newArrayList();
for (Map.Entry<Byte, Integer> entry : characterWeightMap.entrySet()) {
nodeList.add(new Node(entry.getKey(), entry.getValue()));
}
// 2. 创建赫夫曼树
while (nodeList.size() > 1) {
// 排序
nodeList.sort((o1, o2) -> o1.weight - o2.weight);
// 取出根节点权值最小的两颗二叉树组成一颗新的二叉树,该新的二叉树的根节点的权值是两颗小二叉树节点权值之和,并将两颗小二叉树从集合中移除。
Node n1 = nodeList.get(0);
Node n2 = nodeList.get(1);
Node parent = new Node(n1.weight + n2.weight, n1, n2);
nodeList.add(parent);
nodeList.remove(n1);
nodeList.remove(n2);
}
return nodeList.get(0);
}
/**
* 根据赫夫曼树获取字符到赫夫曼编码的映射关系
*
* @param node
* @return
*/
private static Map<Byte, String> getCharacterHuffmanCodeMap(Node node) {
Map<Byte, String> characterHuffmanCodeMap = Maps.newHashMap();
StringBuilder sb = new StringBuilder();
getCharacterHuffmanCodeMap(node, characterHuffmanCodeMap, sb);
return characterHuffmanCodeMap;
}
/**
* 根据赫夫曼树获取字符到赫夫曼编码的映射关系(递归处理)
*
* @param node
* @param sb
* @return
*/
private static void getCharacterHuffmanCodeMap(Node node, Map<Byte, String> characterHuffmanCodeMap, StringBuilder sb) {
// 叶子节点 则处理映射关系
if (node.left == null && node.right == null) {
characterHuffmanCodeMap.put(node.data, sb.toString());
return;
}
// 非叶子节点,继续递归处理编码
// 处理左节点,边的code是0
if (node.left != null) {
StringBuilder curSB = new StringBuilder();
curSB.append(sb).append("0");
getCharacterHuffmanCodeMap(node.left, characterHuffmanCodeMap, curSB);
}
// 处理右节点,边的code是1
if (node.right != null) {
StringBuilder curSB = new StringBuilder();
curSB.append(sb).append("1");
getCharacterHuffmanCodeMap(node.right, characterHuffmanCodeMap, curSB);
}
}
/**
* 根据字符赫夫曼码映射得到原始字符串内容的赫夫曼码
*
* @param content
* @param characterHuffmanCodeMap
* @return
*/
private static String getHuffmanCode(byte[] content, Map<Byte, String> characterHuffmanCodeMap) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < content.length; i++) {
byte b = content[i];
sb.append(characterHuffmanCodeMap.get(b));
}
return sb.toString();
}
public static void main(String[] args) {
String s = "i like like like java do you like a java";
HuffmanEncodeResult res = encode(s.getBytes());
byte[] bytes = res.getData();
for (byte b : bytes) {
System.out.print(b + " ");
}
System.out.println();
System.out.println(res.getKeyJson());
}
}
package com.eh.ftd.dsa.ds.utils;
/**
* todo
*
* @author David Li
* @create 2020/06/28 19:57
*/
public class ByteUtils {
/**
* 将二进制字符串转换成byte数组
*
* @param str
* @return
*/
public static byte[] convertBinaryStr2ByteArr(String str) {
// 1. 确定数组长度
int length = (str.length() + 7) / 8;
// 2. 定义数组
byte[] res = new byte[length];
int index = 0; // byte数组下标
for (int i = 0; i < str.length(); i += 8) {
String strByte = i + 8 >= str.length() ? str.substring(i) : str.substring(i, i + 8);
res[index++] = (byte) Integer.parseInt(strByte, 2);
}
return res;
}
public static String convertByteArr2BinaryStr(byte[] arr) {
StringBuilder sb = new StringBuilder();
// 将byte转int,使用
for (int i = 0; i < arr.length; i++) {
int data = arr[i];
if (i != arr.length - 1) {
data |= 256; // 当i是正数需要补高位,如果是最后一位则不需要,比如28的二进制码是11100
}
String byteStr = Integer.toBinaryString(data);
// byteStr此时有32位, 只需要截取后8位
if (i != arr.length - 1) {
sb.append(byteStr.substring(byteStr.length() - 8));
} else {
// 如果是最后一位 没有8位 不能用截取 比如28的二进制码是11100
sb.append(byteStr);
}
}
return sb.toString();
}
// public static void main(String[] args) {
// // to byte[]
// byte[] arr = convertBinaryStr2ByteArr("1010100010111111110010001011111111001000101111111100100101001101110001110000011011101000111100101000101111111100110001001010011011100");
// for (byte b : arr) {
// System.out.println(b + " ");
// }
// // to binary string
// String str = convertByteArr2BinaryStr(arr);
// System.out.println("1010100010111111110010001011111111001000101111111100100101001101110001110000011011101000111100101000101111111100110001001010011011100");
// System.out.println(str);
// }
public static void main(String[] args) {
byte[] arr = convertBinaryStr2ByteArr("1010100010111111110010001011111111001000101111111100100101001101110001110000011011101000111100101000101111111100110001001010011011100");
for (byte b : arr) {
String s = Integer.toBinaryString(b);
System.out.println(b + "\t" + s);
}
}
}
|