]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /********************************************************************** |
2 | Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ||
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | * Redistributions of source code must retain the above copyright | |
8 | notice, this list of conditions and the following disclaimer. | |
9 | * Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | **********************************************************************/ | |
29 | ||
30 | #ifndef HUFF_CODES_H | |
31 | #define HUFF_CODES_H | |
32 | ||
33 | #include <immintrin.h> | |
34 | #include <stdint.h> | |
35 | #include <string.h> | |
36 | #include <assert.h> | |
37 | #include "igzip_lib.h" | |
38 | #include "bitbuf2.h" | |
39 | ||
40 | #ifdef _MSC_VER | |
41 | # include <intrin.h> | |
42 | #else | |
43 | # include <x86intrin.h> | |
44 | #endif | |
45 | ||
46 | #define LIT_LEN IGZIP_LIT_LEN | |
47 | #define DIST_LEN IGZIP_DIST_LEN | |
48 | #define CODE_LEN_CODES 19 | |
49 | #define HUFF_LEN 19 | |
50 | #ifdef LONGER_HUFFTABLE | |
51 | # define DCODE_OFFSET 26 | |
52 | #else | |
53 | # define DCODE_OFFSET 20 | |
54 | #endif | |
55 | #define DYN_HDR_START_LEN 17 | |
56 | #define MAX_HISTHEAP_SIZE LIT_LEN | |
57 | #define MAX_HUFF_TREE_DEPTH 15 | |
58 | #define D IGZIP_D /* Amount of history */ | |
59 | ||
60 | #define MAX_DEFLATE_CODE_LEN 15 | |
61 | #define MAX_SAFE_LIT_CODE_LEN 13 | |
62 | #define MAX_SAFE_DIST_CODE_LEN 12 | |
63 | ||
64 | #define LONG_DIST_TABLE_SIZE 8192 | |
65 | #define SHORT_DIST_TABLE_SIZE 1024 | |
66 | #define LEN_TABLE_SIZE 256 | |
67 | #define LIT_TABLE_SIZE 257 | |
68 | #define LAST_BLOCK 1 | |
69 | ||
70 | #define LEN_EXTRA_BITS_START 264 | |
71 | #define LEN_EXTRA_BITS_INTERVAL 4 | |
72 | #define DIST_EXTRA_BITS_START 3 | |
73 | #define DIST_EXTRA_BITS_INTERVAL 2 | |
74 | ||
75 | #define INVALID_LIT_LEN_HUFFCODE 1 | |
76 | #define INVALID_DIST_HUFFCODE 1 | |
77 | #define INVALID_HUFFCODE 1 | |
78 | ||
79 | /** | |
80 | * @brief Structure used to store huffman codes | |
81 | */ | |
82 | struct huff_code { | |
83 | uint16_t code; | |
84 | uint8_t length; | |
85 | }; | |
86 | ||
87 | /** | |
88 | * @brief Binary tree used to store and create a huffman tree. | |
89 | */ | |
90 | struct huff_tree { | |
91 | uint16_t value; | |
92 | uint64_t frequency; | |
93 | struct huff_tree *left; | |
94 | struct huff_tree *right; | |
95 | }; | |
96 | ||
97 | /** | |
98 | * @brief Nodes in a doubly linked list. | |
99 | */ | |
100 | struct linked_list_node { | |
101 | uint16_t value; | |
102 | struct linked_list_node *next; | |
103 | struct linked_list_node *previous; | |
104 | }; | |
105 | ||
106 | /** | |
107 | * @brief This structure is a doubly linked list. | |
108 | */ | |
109 | struct linked_list { | |
110 | uint64_t length; | |
111 | struct linked_list_node *start; | |
112 | struct linked_list_node *end; | |
113 | }; | |
114 | ||
115 | /** | |
116 | * @brief This is a binary minheap structure which stores huffman trees. | |
117 | * @details The huffman trees are sorted by the frequency of the root. | |
118 | * The structure is represented in a fixed sized array. | |
119 | */ | |
120 | struct histheap { | |
121 | struct huff_tree tree[MAX_HISTHEAP_SIZE]; | |
122 | uint16_t size; | |
123 | }; | |
124 | ||
125 | /** | |
126 | * @brief Inserts a hufftree into a histheap. | |
127 | * @param element: the hufftree to be inserted | |
128 | * @param heap: the heap which element is being inserted into. | |
129 | * @requires This function assumes the heap has enough allocated space. | |
130 | * @returns Returns the index in heap of the inserted element | |
131 | */ | |
132 | int heap_push(struct huff_tree element, struct histheap *heap); | |
133 | ||
134 | /** | |
135 | * @brief Removes the top element from the heap and returns it. | |
136 | */ | |
137 | struct huff_tree heap_pop(struct histheap *heap); | |
138 | ||
139 | /** | |
140 | * @brief Removes the first element from list and returns it. | |
141 | */ | |
142 | struct linked_list_node *pop_from_front(struct linked_list *list); | |
143 | ||
144 | /** | |
145 | * @brief Adds new_element to the front of list. | |
146 | */ | |
147 | void append_to_front(struct linked_list *list, struct linked_list_node *new_element); | |
148 | ||
149 | /** | |
150 | * @brief Adds new_element to the end of list. | |
151 | */ | |
152 | void append_to_back(struct linked_list *list, struct linked_list_node *new_element); | |
153 | ||
154 | /** | |
155 | * @brief Returns the deflate symbol value for a repeat length. | |
156 | */ | |
157 | uint32_t convert_length_to_len_sym(uint32_t length); | |
158 | ||
159 | /** | |
160 | * @brief Returns the deflate symbol value for a look back distance. | |
161 | */ | |
162 | uint32_t convert_dist_to_dist_sym(uint32_t dist); | |
163 | ||
164 | /** | |
165 | * Constructs a huffman tree on tree_array which only uses elements with non-zero frequency. | |
166 | * @requires Assumes there will be at least two symbols in the produced tree. | |
167 | * @requires tree_array must have length at least 2*size-1, and size must be less than 286. | |
168 | * @param tree_array: array of huff_tree elements used to create a huffman tree, the first | |
169 | * size elements of the array are the leaf elements in the huffman tree. | |
170 | * @param histogram: a histogram of the frequency of elements in tree_array. | |
171 | * @param size: the number of leaf elements in the huffman tree. | |
172 | */ | |
173 | struct huff_tree create_symbol_subset_huff_tree(struct huff_tree *tree_array, | |
174 | uint64_t * histogram, uint32_t size); | |
175 | ||
176 | /** | |
177 | * @brief Construct a huffman tree on tree_array which uses every symbol. | |
178 | * @requires tree_array must have length at least 2*size-1, and size must be less than 286. | |
179 | * @param tree_array: array of huff_tree elements used to create a huffman tree, the first | |
180 | * @param size elements of the array are the leaf elements in the huffman tree. | |
181 | * @param histogram: a histogram of the frequency of elements in tree_array. | |
182 | * @param size: the number of leaf elements in the huffman tree. | |
183 | */ | |
184 | struct huff_tree create_huff_tree(struct huff_tree *tree_array, uint64_t * histogram, | |
185 | uint32_t size); | |
186 | ||
187 | /** | |
188 | * @brief Creates a deflate compliant huffman tree with maximum depth max_depth. | |
189 | * @details The huffman tree is represented as a lookup table. | |
190 | * @param huff_lookup_table: The output lookup table. | |
191 | * @param table_length: The length of table. | |
192 | * @param root: the input huffman tree the created tree is based on. | |
193 | * @param max_depth: maximum depth the huffman tree can have | |
194 | * @returns Returns 0 if sucessful and returns 1 otherwise. | |
195 | */ | |
196 | int create_huff_lookup(struct huff_code *huff_lookup_table, int table_length, | |
197 | struct huff_tree root, uint8_t max_depth); | |
198 | ||
199 | /** | |
200 | * @brief Determines the code length for every value in a huffmant tree. | |
201 | * @param huff_lookup_table: An output lookup table used to store the code lengths | |
202 | * @param corresponding to the possible values | |
203 | * @param count: An output histogram representing code length versus number of occurences. | |
204 | * @param current_node: A node of the huffman tree being analyzed currently. | |
205 | * @param current_depth: The depth of the current node in the huffman tree. | |
206 | * @returns Returns 0 if sucessful and returns 1 otherwise. | |
207 | */ | |
208 | int find_code_lengths(struct huff_code *huff_lookup_table, uint16_t * count, | |
209 | struct huff_tree root, uint8_t max_depth); | |
210 | ||
211 | /** | |
212 | * @brief Creates an array of linked lists. | |
213 | * @detail Each linked list contains all the elements with codes of a given length for | |
214 | * lengths less than 16, and an list for all elements with codes at least 16. These lists | |
215 | * are sorted by frequency from least frequent to most frequent within any given code length. | |
216 | * @param depth_array: depth_array[i] is a linked list of elements with code length i | |
217 | * @param linked_lists: An input structure the linked lists in depth array are built on. | |
218 | * @param current_node: the current node being visited in a huffman tree | |
219 | * @param current_depth: the depth of current_node in a huffman tree | |
220 | */ | |
221 | void huffman_tree_traversal(struct linked_list *depth_array, | |
222 | struct linked_list_node *linked_lists, uint16_t * extra_nodes, | |
223 | uint8_t max_depth, struct huff_tree current_node, | |
224 | uint16_t current_depth); | |
225 | ||
226 | /** | |
227 | * @brief Determines the code each element of a deflate compliant huffman tree and stores | |
228 | * it in a lookup table | |
229 | * @requires table has been initialized to already contain the code length for each element. | |
230 | * @param table: A lookup table used to store the codes. | |
231 | * @param table_length: The length of table. | |
232 | * @param count: a histogram representing the number of occurences of codes of a given length | |
233 | */ | |
234 | void set_huff_codes(struct huff_code *table, int table_length, uint16_t * count); | |
235 | ||
236 | /* Reverse the first length bits in bits and returns that value */ | |
237 | uint16_t bit_reverse(uint16_t bits, uint8_t length); | |
238 | ||
239 | /** | |
240 | * @brief Checks if a literal/length huffman table can be stored in the igzip hufftables files. | |
241 | * @param table: A literal/length huffman code lookup table. | |
242 | * @returns index of the first symbol which fails and 0xFFFF otherwise. | |
243 | */ | |
244 | uint16_t valid_lit_huff_table(struct huff_code *huff_code_table); | |
245 | ||
246 | /** | |
247 | * @brief Checks if a distance huffman table can be stored in the igzip hufftables files. | |
248 | * @param table: A distance huffman code lookup table. | |
249 | * @returnsthe index of the first symbol which fails and 0xFFFF otherwise. | |
250 | */ | |
251 | uint16_t valid_dist_huff_table(struct huff_code *huff_code_table); | |
252 | ||
253 | /** | |
254 | * @brief Creates the dynamic huffman deflate header. | |
255 | * @returns Returns the length of header in bits. | |
256 | * @requires This function requires header is large enough to store the whole header. | |
257 | * @param header: The output header. | |
258 | * @param lit_huff_table: A literal/length code huffman lookup table. | |
259 | * @param dist_huff_table: A distance huffman code lookup table. | |
260 | * @param end_of_block: Value determining whether end of block header is produced or not; | |
261 | * 0 corresponds to not end of block and all other inputs correspond to end of block. | |
262 | */ | |
263 | int create_header(uint8_t *header, uint32_t header_length, struct huff_code *lit_huff_table, | |
264 | struct huff_code *dist_huff_table, uint32_t end_of_block); | |
265 | ||
266 | /** | |
267 | * @brief Creates a run length encoded reprsentation of huff_table. | |
268 | * @details Also creates a histogram representing the frequency of each symbols | |
269 | * @returns Returns the number of symbols written into huffman_rep. | |
270 | * @param huffman_rep: The output run length encoded version of huff_table. | |
271 | * @param histogram: The output histogram of frequencies of elements in huffman_rep. | |
272 | * @param extra_bits: An output table storing extra bits associated with huffman_rep. | |
273 | * @param huff_table: The input huffman_table or concatonation of huffman_tables. | |
274 | * @parma len: The length of huff_table. | |
275 | */ | |
276 | uint16_t create_huffman_rep(uint16_t * huffman_rep, uint64_t * histogram, | |
277 | uint16_t * extra_bits, struct huff_code *huff_table, uint16_t len); | |
278 | ||
279 | /** | |
280 | * @brief Flushes the symbols for a repeat of last_code for length run_length into huffman_rep. | |
281 | * @param huffman_rep: pointer to array containing the output huffman_rep. | |
282 | * @param histogram: histogram of elements seen in huffman_rep. | |
283 | * @param extra_bits: an array holding extra bits for the corresponding symbol in huffman_rep. | |
284 | * @param huff_table: a concatenated list of huffman lookup tables. | |
285 | * @param current_index: The next spot elements will be written in huffman_rep. | |
286 | */ | |
287 | uint16_t flush_repeats(uint16_t * huffman_rep, uint64_t * histogram, uint16_t * extra_bits, | |
288 | uint16_t last_code, uint16_t run_length, uint16_t current_index); | |
289 | ||
290 | /** | |
291 | * @brief Creates the header for run length encoded huffman trees. | |
292 | * @param header: the output header. | |
293 | * @param lookup_table: a huffman lookup table. | |
294 | * @param huffman_rep: a run length encoded huffman tree. | |
295 | * @extra_bits: extra bits associated with the corresponding spot in huffman_rep | |
296 | * @param huffman_rep_length: the length of huffman_rep. | |
297 | * @param end_of_block: Value determining whether end of block header is produced or not; | |
298 | * 0 corresponds to not end of block and all other inputs correspond to end of block. | |
299 | * @param hclen: Length of huffman code for huffman codes minus 4. | |
300 | * @param hlit: Length of literal/length table minus 257. | |
301 | * @parm hdist: Length of distance table minus 1. | |
302 | */ | |
303 | int create_huffman_header(uint8_t *header, uint32_t header_length, struct huff_code *lookup_table, | |
304 | uint16_t * huffman_rep, uint16_t * extra_bits, | |
305 | uint16_t huffman_rep_length, uint32_t end_of_block, uint32_t hclen, | |
306 | uint32_t hlit, uint32_t hdist); | |
307 | ||
308 | /** | |
309 | * @brief Creates a two table representation of huffman codes. | |
310 | * @param code_table: output table containing the code | |
311 | * @param code_size_table: output table containing the code length | |
312 | * @param length: the lenght of hufftable | |
313 | * @param hufftable: a huffman lookup table | |
314 | */ | |
315 | void create_code_tables(uint16_t * code_table, uint8_t * code_length_table, | |
316 | uint32_t length, struct huff_code *hufftable); | |
317 | ||
318 | /** | |
319 | * @brief Creates a packed representation of length huffman codes. | |
320 | * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman | |
321 | * code and bits 8:0 contain the code length. | |
322 | * @param packed_table: the output table | |
323 | * @param length: the length of lit_len_hufftable | |
324 | * @param lit_len_hufftable: a literal/length huffman lookup table | |
325 | */ | |
326 | void create_packed_len_table(uint32_t * packed_table, struct huff_code *lit_len_hufftable); | |
327 | ||
328 | /** | |
329 | * @brief Creates a packed representation of distance huffman codes. | |
330 | * @details In packed_table, bits 32:8 contain the extra bits appended to the huffman | |
331 | * code and bits 8:0 contain the code length. | |
332 | * @param packed_table: the output table | |
333 | * @param length: the length of lit_len_hufftable | |
334 | * @param dist_hufftable: a distance huffman lookup table | |
335 | */ | |
336 | void create_packed_dist_table(uint32_t * packed_table, uint32_t length, | |
337 | struct huff_code *dist_hufftable); | |
338 | ||
339 | /** | |
340 | * @brief Checks to see if the hufftable is usable by igzip | |
341 | * | |
342 | * @param lit_len_hufftable: literal/lenght huffman code | |
343 | * @param dist_hufftable: distance huffman code | |
344 | * @returns Returns 0 if the table is usable | |
345 | */ | |
346 | int are_hufftables_useable(struct huff_code *lit_len_hufftable, | |
347 | struct huff_code *dist_hufftable); | |
348 | #endif |