]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/igzip/generate_custom_hufftables.c
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / isa-l / igzip / generate_custom_hufftables.c
CommitLineData
7c673cae
FG
1/**********************************************************************
2 Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28**********************************************************************/
29
30/* This program can be used to generate custom a custom huffman encoding to get
31 * better data compression. This is most useful when the type of data being
32 * compressed is well known.
33 *
34 * To use generate_custom_hufftables, pass a sequence of files to the program
35 * that together form an accurate representation of the data that is being
36 * compressed. Generate_custom_hufftables will then produce the file
37 * hufftables_c.c, which should be moved to replace its counterpart in the igzip
38 * source folder. After recompiling the Isa-l library, the igzip compression
39 * functions will use the new hufftables.
40 *
41 * Generate_custom_hufftables should be compiled with the same compile time
42 * parameters as the igzip source code. Generating custom hufftables with
43 * different compile time parameters may cause igzip to produce invalid output
44 * for the reasons described below. The default parameters used by
45 * generate_custom_hufftables are the same as the default parameters used by
46 * igzip.
47 *
224ce89b
WB
48 * *WARNING* generate custom hufftables must be compiled with a IGZIP_HIST_SIZE
49 * that is at least as large as the IGZIP_HIST_SIZE used by igzip. By default
50 * IGZIP_HIST_SIZE is 32K, the maximum usable IGZIP_HIST_SIZE is 32K. The reason
51 * for this is to generate better compression. Igzip cannot produce look back
52 * distances with sizes larger than the IGZIP_HIST_SIZE igzip was compiled with,
53 * so look back distances with sizes larger than IGZIP_HIST_SIZE are not
54 * assigned a huffman code. The definition of LONGER_HUFFTABLES must be
55 * consistent as well since that definition changes the size of the structures
56 * printed by this tool.
7c673cae 57 *
7c673cae
FG
58 */
59
60#include <stdint.h>
61#include <stdio.h>
62#include <inttypes.h>
224ce89b
WB
63#include <string.h>
64#include <stdlib.h>
65#include "igzip_lib.h"
7c673cae 66
20effc67
TL
67#include "huff_codes.h"
68#include "huffman.h"
69
7c673cae
FG
70/*These max code lengths are limited by how the data is stored in
71 * hufftables.asm. The deflate standard max is 15.*/
72
224ce89b 73#define MAX_HEADER_SIZE ISAL_DEF_MAX_HDR_SIZE
7c673cae
FG
74
75#define GZIP_HEADER_SIZE 10
76#define GZIP_TRAILER_SIZE 8
f91f0fd5
TL
77#define ZLIB_HEADER_SIZE 2
78#define ZLIB_TRAILER_SIZE 4
7c673cae
FG
79
80/**
81 * @brief Prints a table of uint8_t elements to a file.
82 * @param outfile: the file the table is printed to.
83 * @param table: the table to be printed.
84 * @param length: number of elements to be printed.
85 * @param header: header to append in front of the table.
86 * @param footer: footer to append at the end of the table.
87 * @param begin_line: string printed at beginning of new line
88 */
89void fprint_uint8_table(FILE * outfile, uint8_t * table, uint64_t length, char *header,
90 char *footer, char *begin_line)
91{
92 int i;
93 fprintf(outfile, "%s", header);
94 for (i = 0; i < length - 1; i++) {
95 if ((i & 7) == 0)
96 fprintf(outfile, "\n%s", begin_line);
97 else
98 fprintf(outfile, " ");
99 fprintf(outfile, "0x%02x,", table[i]);
100 }
101
102 if ((i & 7) == 0)
103 fprintf(outfile, "\n%s", begin_line);
104 else
105 fprintf(outfile, " ");
106 fprintf(outfile, "0x%02x", table[i]);
107 fprintf(outfile, "%s", footer);
108
109}
110
111/**
112 * @brief Prints a table of uint16_t elements to a file.
113 * @param outfile: the file the table is printed to.
114 * @param table: the table to be printed.
115 * @param length: number of elements to be printed.
116 * @param header: header to append in front of the table.
117 * @param footer: footer to append at the end of the table.
118 * @param begin_line: string printed at beginning of new line
119 */
120void fprint_uint16_table(FILE * outfile, uint16_t * table, uint64_t length, char *header,
121 char *footer, char *begin_line)
122{
123 int i;
124 fprintf(outfile, "%s", header);
125 for (i = 0; i < length - 1; i++) {
126 if ((i & 7) == 0)
127 fprintf(outfile, "\n%s", begin_line);
128 else
129 fprintf(outfile, " ");
130 fprintf(outfile, "0x%04x,", table[i]);
131 }
132
133 if ((i & 7) == 0)
134 fprintf(outfile, "\n%s", begin_line);
135 else
136 fprintf(outfile, " ");
137 fprintf(outfile, "0x%04x", table[i]);
138 fprintf(outfile, "%s", footer);
139
140}
141
142/**
143 * @brief Prints a table of uint32_t elements to a file.
144 * @param outfile: the file the table is printed to.
145 * @param table: the table to be printed.
146 * @param length: number of elements to be printed.
147 * @param header: header to append in front of the table.
148 * @param footer: footer to append at the end of the table.
149 * @param begin_line: string printed at beginning of new line
150 */
151void fprint_uint32_table(FILE * outfile, uint32_t * table, uint64_t length, char *header,
152 char *footer, char *begin_line)
153{
154 int i;
155 fprintf(outfile, "%s", header);
156 for (i = 0; i < length - 1; i++) {
157 if ((i & 3) == 0)
158 fprintf(outfile, "\n%s", begin_line);
159 else
160 fprintf(outfile, " ");
161 fprintf(outfile, "0x%08x,", table[i]);
162 }
163
164 if ((i & 3) == 0)
165 fprintf(outfile, "%s", begin_line);
166 else
167 fprintf(outfile, " ");
168 fprintf(outfile, "0x%08x", table[i]);
169 fprintf(outfile, "%s", footer);
170
171}
172
224ce89b
WB
173void fprint_hufftables(FILE * output_file, char *hufftables_name,
174 struct isal_hufftables *hufftables)
7c673cae 175{
224ce89b
WB
176 fprintf(output_file, "struct isal_hufftables %s = {\n\n", hufftables_name);
177
178 fprint_uint8_table(output_file, hufftables->deflate_hdr,
179 hufftables->deflate_hdr_count +
180 (hufftables->deflate_hdr_extra_bits + 7) / 8,
181 "\t.deflate_hdr = {", "},\n\n", "\t\t");
182
183 fprintf(output_file, "\t.deflate_hdr_count = %d,\n", hufftables->deflate_hdr_count);
184 fprintf(output_file, "\t.deflate_hdr_extra_bits = %d,\n\n",
185 hufftables->deflate_hdr_extra_bits);
186
187 fprint_uint32_table(output_file, hufftables->dist_table, IGZIP_DIST_TABLE_SIZE,
188 "\t.dist_table = {", "},\n\n", "\t\t");
189
190 fprint_uint32_table(output_file, hufftables->len_table, IGZIP_LEN_TABLE_SIZE,
191 "\t.len_table = {", "},\n\n", "\t\t");
192
193 fprint_uint16_table(output_file, hufftables->lit_table, IGZIP_LIT_TABLE_SIZE,
194 "\t.lit_table = {", "},\n\n", "\t\t");
195 fprint_uint8_table(output_file, hufftables->lit_table_sizes, IGZIP_LIT_TABLE_SIZE,
196 "\t.lit_table_sizes = {", "},\n\n", "\t\t");
197
198 fprint_uint16_table(output_file, hufftables->dcodes,
199 ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET,
200 "\t.dcodes = {", "},\n\n", "\t\t");
201 fprint_uint8_table(output_file, hufftables->dcodes_sizes,
202 ISAL_DEF_DIST_SYMBOLS - IGZIP_DECODE_OFFSET,
203 "\t.dcodes_sizes = {", "}\n", "\t\t");
7c673cae
FG
204 fprintf(output_file, "};\n");
205}
206
224ce89b 207void fprint_header(FILE * output_file)
7c673cae 208{
224ce89b 209
7c673cae
FG
210 fprintf(output_file, "#include <stdint.h>\n");
211 fprintf(output_file, "#include <igzip_lib.h>\n\n");
212
224ce89b
WB
213 fprintf(output_file, "#if IGZIP_HIST_SIZE > %d\n"
214 "# error \"Invalid history size for the custom hufftable\"\n"
215 "#endif\n", IGZIP_HIST_SIZE);
216
217#ifdef LONGER_HUFFTABLE
218 fprintf(output_file, "#ifndef LONGER_HUFFTABLE\n"
219 "# error \"Custom hufftable requires LONGER_HUFFTABLE to be defined \"\n"
220 "#endif\n");
221#else
222 fprintf(output_file, "#ifdef LONGER_HUFFTABLE\n"
223 "# error \"Custom hufftable requires LONGER_HUFFTABLE to not be defined \"\n"
224 "#endif\n");
225#endif
226 fprintf(output_file, "\n");
227
7c673cae
FG
228 fprintf(output_file, "const uint8_t gzip_hdr[] = {\n"
229 "\t0x1f, 0x8b, 0x08, 0x00, 0x00,\n" "\t0x00, 0x00, 0x00, 0x00, 0xff\t};\n\n");
230
231 fprintf(output_file, "const uint32_t gzip_hdr_bytes = %d;\n", GZIP_HEADER_SIZE);
f91f0fd5
TL
232 fprintf(output_file, "const uint32_t gzip_trl_bytes = %d;\n\n", GZIP_TRAILER_SIZE);
233
234 fprintf(output_file, "const uint8_t zlib_hdr[] = { 0x78, 0x01 };\n\n");
235 fprintf(output_file, "const uint32_t zlib_hdr_bytes = %d;\n", ZLIB_HEADER_SIZE);
236 fprintf(output_file, "const uint32_t zlib_trl_bytes = %d;\n", ZLIB_TRAILER_SIZE);
7c673cae
FG
237}
238
20effc67
TL
239static uint32_t convert_dist_to_dist_sym(uint32_t dist)
240{
241 assert(dist <= 32768 && dist > 0);
242 if (dist <= 32768) {
243 uint32_t msb = dist > 4 ? bsr(dist - 1) - 2 : 0;
244 return (msb * 2) + ((dist - 1) >> msb);
245 } else {
246 return ~0;
247 }
248}
249
250/**
251 * @brief Returns the deflate symbol value for a repeat length.
252 */
253static uint32_t convert_length_to_len_sym(uint32_t length)
254{
255 assert(length > 2 && length < 259);
256
257 /* Based on tables on page 11 in RFC 1951 */
258 if (length < 11)
259 return 257 + length - 3;
260 else if (length < 19)
261 return 261 + (length - 3) / 2;
262 else if (length < 35)
263 return 265 + (length - 3) / 4;
264 else if (length < 67)
265 return 269 + (length - 3) / 8;
266 else if (length < 131)
267 return 273 + (length - 3) / 16;
268 else if (length < 258)
269 return 277 + (length - 3) / 32;
270 else
271 return 285;
272}
273
274void isal_update_histogram_dict(uint8_t * start_stream, int dict_length, int length,
275 struct isal_huff_histogram *histogram)
276{
277 uint32_t literal = 0, hash;
278 uint16_t seen, *last_seen = histogram->hash_table;
279 uint8_t *current, *end_stream, *next_hash, *end, *end_dict;
280 uint32_t match_length;
281 uint32_t dist;
282 uint64_t *lit_len_histogram = histogram->lit_len_histogram;
283 uint64_t *dist_histogram = histogram->dist_histogram;
284
285 if (length <= 0)
286 return;
287
288 end_stream = start_stream + dict_length + length;
289 end_dict = start_stream + dict_length;
290
291 memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */
292
293 for (current = start_stream; current < end_dict - 4; current++) {
294 literal = load_u32(current);
295 hash = compute_hash(literal) & LVL0_HASH_MASK;
296 last_seen[hash] = (current - start_stream) & 0xFFFF;
297 }
298
299 for (current = start_stream + dict_length; current < end_stream - 3; current++) {
300 literal = load_u32(current);
301 hash = compute_hash(literal) & LVL0_HASH_MASK;
302 seen = last_seen[hash];
303 last_seen[hash] = (current - start_stream) & 0xFFFF;
304 dist = (current - start_stream - seen) & 0xFFFF;
305 if (dist - 1 < D - 1) {
306 assert(start_stream <= current - dist);
307 match_length =
308 compare258(current - dist, current, end_stream - current);
309 if (match_length >= SHORTEST_MATCH) {
310 next_hash = current;
311#ifdef ISAL_LIMIT_HASH_UPDATE
312 end = next_hash + 3;
313#else
314 end = next_hash + match_length;
315#endif
316 if (end > end_stream - 3)
317 end = end_stream - 3;
318 next_hash++;
319 for (; next_hash < end; next_hash++) {
320 literal = load_u32(next_hash);
321 hash = compute_hash(literal) & LVL0_HASH_MASK;
322 last_seen[hash] = (next_hash - start_stream) & 0xFFFF;
323 }
324
325 dist_histogram[convert_dist_to_dist_sym(dist)] += 1;
326 lit_len_histogram[convert_length_to_len_sym(match_length)] +=
327 1;
328 current += match_length - 1;
329 continue;
330 }
331 }
332 lit_len_histogram[literal & 0xFF] += 1;
333 }
334
335 for (; current < end_stream; current++)
336 lit_len_histogram[*current] += 1;
337
338 lit_len_histogram[256] += 1;
339 return;
340}
341
7c673cae
FG
342int main(int argc, char *argv[])
343{
344 long int file_length;
20effc67 345 int argi = 1;
7c673cae 346 uint8_t *stream = NULL;
224ce89b 347 struct isal_hufftables hufftables;
7c673cae 348 struct isal_huff_histogram histogram;
224ce89b 349 struct isal_zstream tmp_stream;
20effc67
TL
350 FILE *file = NULL;
351 FILE *dict_file = NULL;
352 FILE *hist_file = NULL;
353 long int dict_file_length = 0;
354 long int hist_file_length = 0;
355 uint8_t *dict_stream = NULL;
7c673cae
FG
356
357 if (argc == 1) {
358 printf("Error, no input file.\n");
359 return 1;
360 }
361
20effc67
TL
362 if (argc > 3 && argv[1][0] == '-' && argv[1][1] == 'd') {
363 dict_file = fopen(argv[2], "r");
364
365 fseek(dict_file, 0, SEEK_END);
366 dict_file_length = ftell(dict_file);
367 fseek(dict_file, 0, SEEK_SET);
368 dict_file_length -= ftell(dict_file);
369 dict_stream = malloc(dict_file_length);
370 if (dict_stream == NULL) {
371 printf("Failed to allocate memory to read in dictionary file\n");
372 fclose(dict_file);
373 return 1;
374 }
375 if (fread(dict_stream, 1, dict_file_length, dict_file) != dict_file_length) {
376 printf("Error occurred when reading dictionary file");
377 fclose(dict_file);
378 free(dict_stream);
379 return 1;
380 }
381 isal_update_histogram(dict_stream, dict_file_length, &histogram);
382
383 printf("Read %ld bytes of dictionary file %s\n", dict_file_length, argv[2]);
384 argi += 2;
385 fclose(dict_file);
386 free(dict_stream);
387 }
388
389 if ((argc > argi + 1) && argv[argi][0] == '-' && argv[argi][1] == 'h') {
390 hist_file = fopen(argv[argi + 1], "r+");
391 fseek(hist_file, 0, SEEK_END);
392 hist_file_length = ftell(hist_file);
393 fseek(hist_file, 0, SEEK_SET);
394 hist_file_length -= ftell(hist_file);
395 if (hist_file_length > sizeof(histogram)) {
396 printf("Histogram file too long\n");
397 return 1;
398 }
399 if (fread(&histogram, 1, hist_file_length, hist_file) != hist_file_length) {
400 printf("Error occurred when reading history file");
401 fclose(hist_file);
402 return 1;
403 }
404 fseek(hist_file, 0, SEEK_SET);
7c673cae 405
20effc67
TL
406 printf("Read %ld bytes of history file %s\n", hist_file_length,
407 argv[argi + 1]);
408 argi += 2;
409 } else
410 memset(&histogram, 0, sizeof(histogram)); /* Initialize histograms. */
411
412 while (argi < argc) {
413 printf("Processing %s\n", argv[argi]);
414 file = fopen(argv[argi], "r");
7c673cae
FG
415 if (file == NULL) {
416 printf("Error opening file\n");
417 return 1;
418 }
419 fseek(file, 0, SEEK_END);
420 file_length = ftell(file);
421 fseek(file, 0, SEEK_SET);
422 file_length -= ftell(file);
20effc67 423 stream = malloc(file_length + dict_file_length);
7c673cae
FG
424 if (stream == NULL) {
425 printf("Failed to allocate memory to read in file\n");
426 fclose(file);
427 return 1;
428 }
20effc67
TL
429 if (dict_file_length > 0)
430 memcpy(stream, dict_stream, dict_file_length);
431
432 if (fread(&stream[dict_file_length], 1, file_length, file) != file_length) {
7c673cae
FG
433 printf("Error occurred when reading file");
434 fclose(file);
435 free(stream);
436 return 1;
437 }
438
439 /* Create a histogram of frequency of symbols found in stream to
440 * generate the huffman tree.*/
20effc67
TL
441 if (0 == dict_file_length)
442 isal_update_histogram(stream, file_length, &histogram);
443 else
444 isal_update_histogram_dict(stream, dict_file_length, file_length,
445 &histogram);
7c673cae
FG
446
447 fclose(file);
448 free(stream);
20effc67 449 argi++;
7c673cae
FG
450 }
451
224ce89b 452 isal_create_hufftables(&hufftables, &histogram);
7c673cae 453
224ce89b
WB
454 file = fopen("hufftables_c.c", "w");
455 if (file == NULL) {
456 printf("Error creating file hufftables_c.c\n");
7c673cae
FG
457 return 1;
458 }
459
224ce89b 460 fprint_header(file);
7c673cae 461
224ce89b 462 fprintf(file, "\n");
7c673cae 463
224ce89b 464 fprint_hufftables(file, "hufftables_default", &hufftables);
7c673cae 465
224ce89b 466 fprintf(file, "\n");
7c673cae 467
224ce89b
WB
468 isal_deflate_stateless_init(&tmp_stream);
469 isal_deflate_set_hufftables(&tmp_stream, NULL, IGZIP_HUFFTABLE_STATIC);
470 fprint_hufftables(file, "hufftables_static", tmp_stream.hufftables);
7c673cae
FG
471
472 fclose(file);
473
20effc67
TL
474 if (hist_file) {
475 int len = fwrite(&histogram, 1, sizeof(histogram), hist_file);
476 printf("wrote %d bytes of histogram file\n", len);
477 fclose(hist_file);
478 }
7c673cae
FG
479 return 0;
480}