]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/C/BrotliCompress/enc/compress_fragment.c
BaseTools: Copy Brotli algorithm 3rd party source code for tool
[mirror_edk2.git] / BaseTools / Source / C / BrotliCompress / enc / compress_fragment.c
1 /* Copyright 2015 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Function for fast encoding of an input fragment, independently from the input
8 history. This function uses one-pass processing: when we find a backward
9 match, we immediately emit the corresponding command and literal codes to
10 the bit stream.
11
12 Adapted from the CompressFragment() function in
13 https://github.com/google/snappy/blob/master/snappy.cc */
14
15 #include "./compress_fragment.h"
16
17 #include <string.h> /* memcmp, memcpy, memset */
18
19 #include "../common/types.h"
20 #include "./brotli_bit_stream.h"
21 #include "./entropy_encode.h"
22 #include "./fast_log.h"
23 #include "./find_match_length.h"
24 #include "./memory.h"
25 #include "./port.h"
26 #include "./write_bits.h"
27
28
29 #if defined(__cplusplus) || defined(c_plusplus)
30 extern "C" {
31 #endif
32
33 /* kHashMul32 multiplier has these properties:
34 * The multiplier must be odd. Otherwise we may lose the highest bit.
35 * No long streaks of 1s or 0s.
36 * There is no effort to ensure that it is a prime, the oddity is enough
37 for this use.
38 * The number has been tuned heuristically against compression benchmarks. */
39 static const uint32_t kHashMul32 = 0x1e35a7bd;
40
41 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
42 const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
43 return (uint32_t)(h >> shift);
44 }
45
46 static BROTLI_INLINE uint32_t HashBytesAtOffset(
47 uint64_t v, int offset, size_t shift) {
48 assert(offset >= 0);
49 assert(offset <= 3);
50 {
51 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
52 return (uint32_t)(h >> shift);
53 }
54 }
55
56 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
57 return TO_BROTLI_BOOL(
58 BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
59 p1[4] == p2[4]);
60 }
61
62 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
63 of the "input" string and stores it into the bit stream.
64 Note that the prefix code here is built from the pre-LZ77 input, therefore
65 we can only approximate the statistics of the actual literal stream.
66 Moreover, for long inputs we build a histogram from a sample of the input
67 and thus have to assign a non-zero depth for each literal.
68 Returns estimated compression ratio millibytes/char for encoding given input
69 with generated code. */
70 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
71 const uint8_t* input,
72 const size_t input_size,
73 uint8_t depths[256],
74 uint16_t bits[256],
75 size_t* storage_ix,
76 uint8_t* storage) {
77 uint32_t histogram[256] = { 0 };
78 size_t histogram_total;
79 size_t i;
80 if (input_size < (1 << 15)) {
81 for (i = 0; i < input_size; ++i) {
82 ++histogram[input[i]];
83 }
84 histogram_total = input_size;
85 for (i = 0; i < 256; ++i) {
86 /* We weigh the first 11 samples with weight 3 to account for the
87 balancing effect of the LZ77 phase on the histogram. */
88 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
89 histogram[i] += adjust;
90 histogram_total += adjust;
91 }
92 } else {
93 static const size_t kSampleRate = 29;
94 for (i = 0; i < input_size; i += kSampleRate) {
95 ++histogram[input[i]];
96 }
97 histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
98 for (i = 0; i < 256; ++i) {
99 /* We add 1 to each population count to avoid 0 bit depths (since this is
100 only a sample and we don't know if the symbol appears or not), and we
101 weigh the first 11 samples with weight 3 to account for the balancing
102 effect of the LZ77 phase on the histogram (more frequent symbols are
103 more likely to be in backward references instead as literals). */
104 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
105 histogram[i] += adjust;
106 histogram_total += adjust;
107 }
108 }
109 BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
110 /* max_bits = */ 8,
111 depths, bits, storage_ix, storage);
112 if (BROTLI_IS_OOM(m)) return 0;
113 {
114 size_t literal_ratio = 0;
115 for (i = 0; i < 256; ++i) {
116 if (histogram[i]) literal_ratio += histogram[i] * depths[i];
117 }
118 /* Estimated encoding ratio, millibytes per symbol. */
119 return (literal_ratio * 125) / histogram_total;
120 }
121 }
122
123 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
124 "bits" based on "histogram" and stores it into the bit stream. */
125 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
126 uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
127 uint8_t* storage) {
128 /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
129 HuffmanTree tree[129];
130 uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
131 uint16_t cmd_bits[64];
132
133 BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
134 BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
135 /* We have to jump through a few hoopes here in order to compute
136 the command bits because the symbols are in a different order than in
137 the full alphabet. This looks complicated, but having the symbols
138 in this order in the command bits saves a few branches in the Emit*
139 functions. */
140 memcpy(cmd_depth, depth, 24);
141 memcpy(cmd_depth + 24, depth + 40, 8);
142 memcpy(cmd_depth + 32, depth + 24, 8);
143 memcpy(cmd_depth + 40, depth + 48, 8);
144 memcpy(cmd_depth + 48, depth + 32, 8);
145 memcpy(cmd_depth + 56, depth + 56, 8);
146 BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
147 memcpy(bits, cmd_bits, 48);
148 memcpy(bits + 24, cmd_bits + 32, 16);
149 memcpy(bits + 32, cmd_bits + 48, 16);
150 memcpy(bits + 40, cmd_bits + 24, 16);
151 memcpy(bits + 48, cmd_bits + 40, 16);
152 memcpy(bits + 56, cmd_bits + 56, 16);
153 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
154 {
155 /* Create the bit length array for the full command alphabet. */
156 size_t i;
157 memset(cmd_depth, 0, 64); /* only 64 first values were used */
158 memcpy(cmd_depth, depth, 8);
159 memcpy(cmd_depth + 64, depth + 8, 8);
160 memcpy(cmd_depth + 128, depth + 16, 8);
161 memcpy(cmd_depth + 192, depth + 24, 8);
162 memcpy(cmd_depth + 384, depth + 32, 8);
163 for (i = 0; i < 8; ++i) {
164 cmd_depth[128 + 8 * i] = depth[40 + i];
165 cmd_depth[256 + 8 * i] = depth[48 + i];
166 cmd_depth[448 + 8 * i] = depth[56 + i];
167 }
168 BrotliStoreHuffmanTree(
169 cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
170 }
171 BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
172 }
173
174 /* REQUIRES: insertlen < 6210 */
175 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
176 const uint8_t depth[128],
177 const uint16_t bits[128],
178 uint32_t histo[128],
179 size_t* storage_ix,
180 uint8_t* storage) {
181 if (insertlen < 6) {
182 const size_t code = insertlen + 40;
183 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
184 ++histo[code];
185 } else if (insertlen < 130) {
186 const size_t tail = insertlen - 2;
187 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
188 const size_t prefix = tail >> nbits;
189 const size_t inscode = (nbits << 1) + prefix + 42;
190 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
191 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
192 ++histo[inscode];
193 } else if (insertlen < 2114) {
194 const size_t tail = insertlen - 66;
195 const uint32_t nbits = Log2FloorNonZero(tail);
196 const size_t code = nbits + 50;
197 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
198 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
199 ++histo[code];
200 } else {
201 BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
202 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
203 ++histo[21];
204 }
205 }
206
207 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
208 const uint8_t depth[128],
209 const uint16_t bits[128],
210 uint32_t histo[128],
211 size_t* storage_ix,
212 uint8_t* storage) {
213 if (insertlen < 22594) {
214 BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
215 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
216 ++histo[22];
217 } else {
218 BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
219 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
220 ++histo[23];
221 }
222 }
223
224 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
225 const uint8_t depth[128],
226 const uint16_t bits[128],
227 uint32_t histo[128],
228 size_t* storage_ix,
229 uint8_t* storage) {
230 if (copylen < 10) {
231 BrotliWriteBits(
232 depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
233 ++histo[copylen + 14];
234 } else if (copylen < 134) {
235 const size_t tail = copylen - 6;
236 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
237 const size_t prefix = tail >> nbits;
238 const size_t code = (nbits << 1) + prefix + 20;
239 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
240 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
241 ++histo[code];
242 } else if (copylen < 2118) {
243 const size_t tail = copylen - 70;
244 const uint32_t nbits = Log2FloorNonZero(tail);
245 const size_t code = nbits + 28;
246 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
247 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
248 ++histo[code];
249 } else {
250 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
251 BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
252 ++histo[47];
253 }
254 }
255
256 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
257 const uint8_t depth[128],
258 const uint16_t bits[128],
259 uint32_t histo[128],
260 size_t* storage_ix,
261 uint8_t* storage) {
262 if (copylen < 12) {
263 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
264 ++histo[copylen - 4];
265 } else if (copylen < 72) {
266 const size_t tail = copylen - 8;
267 const uint32_t nbits = Log2FloorNonZero(tail) - 1;
268 const size_t prefix = tail >> nbits;
269 const size_t code = (nbits << 1) + prefix + 4;
270 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
271 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
272 ++histo[code];
273 } else if (copylen < 136) {
274 const size_t tail = copylen - 8;
275 const size_t code = (tail >> 5) + 30;
276 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
277 BrotliWriteBits(5, tail & 31, storage_ix, storage);
278 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
279 ++histo[code];
280 ++histo[64];
281 } else if (copylen < 2120) {
282 const size_t tail = copylen - 72;
283 const uint32_t nbits = Log2FloorNonZero(tail);
284 const size_t code = nbits + 28;
285 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
286 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
287 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
288 ++histo[code];
289 ++histo[64];
290 } else {
291 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
292 BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
293 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
294 ++histo[47];
295 ++histo[64];
296 }
297 }
298
299 static BROTLI_INLINE void EmitDistance(size_t distance,
300 const uint8_t depth[128],
301 const uint16_t bits[128],
302 uint32_t histo[128],
303 size_t* storage_ix, uint8_t* storage) {
304 const size_t d = distance + 3;
305 const uint32_t nbits = Log2FloorNonZero(d) - 1u;
306 const size_t prefix = (d >> nbits) & 1;
307 const size_t offset = (2 + prefix) << nbits;
308 const size_t distcode = 2 * (nbits - 1) + prefix + 80;
309 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
310 BrotliWriteBits(nbits, d - offset, storage_ix, storage);
311 ++histo[distcode];
312 }
313
314 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
315 const uint8_t depth[256],
316 const uint16_t bits[256],
317 size_t* storage_ix, uint8_t* storage) {
318 size_t j;
319 for (j = 0; j < len; j++) {
320 const uint8_t lit = input[j];
321 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
322 }
323 }
324
325 /* REQUIRES: len <= 1 << 20. */
326 static void BrotliStoreMetaBlockHeader(
327 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
328 uint8_t* storage) {
329 /* ISLAST */
330 BrotliWriteBits(1, 0, storage_ix, storage);
331 if (len <= (1U << 16)) {
332 /* MNIBBLES is 4 */
333 BrotliWriteBits(2, 0, storage_ix, storage);
334 BrotliWriteBits(16, len - 1, storage_ix, storage);
335 } else {
336 /* MNIBBLES is 5 */
337 BrotliWriteBits(2, 1, storage_ix, storage);
338 BrotliWriteBits(20, len - 1, storage_ix, storage);
339 }
340 /* ISUNCOMPRESSED */
341 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
342 }
343
344 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
345 uint8_t *array) {
346 while (n_bits > 0) {
347 size_t byte_pos = pos >> 3;
348 size_t n_unchanged_bits = pos & 7;
349 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
350 size_t total_bits = n_unchanged_bits + n_changed_bits;
351 uint32_t mask =
352 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
353 uint32_t unchanged_bits = array[byte_pos] & mask;
354 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
355 array[byte_pos] =
356 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
357 n_bits -= n_changed_bits;
358 bits >>= n_changed_bits;
359 pos += n_changed_bits;
360 }
361 }
362
363 static void RewindBitPosition(const size_t new_storage_ix,
364 size_t* storage_ix, uint8_t* storage) {
365 const size_t bitpos = new_storage_ix & 7;
366 const size_t mask = (1u << bitpos) - 1;
367 storage[new_storage_ix >> 3] &= (uint8_t)mask;
368 *storage_ix = new_storage_ix;
369 }
370
371 static BROTLI_BOOL ShouldMergeBlock(
372 const uint8_t* data, size_t len, const uint8_t* depths) {
373 size_t histo[256] = { 0 };
374 static const size_t kSampleRate = 43;
375 size_t i;
376 for (i = 0; i < len; i += kSampleRate) {
377 ++histo[data[i]];
378 }
379 {
380 const size_t total = (len + kSampleRate - 1) / kSampleRate;
381 double r = (FastLog2(total) + 0.5) * (double)total + 200;
382 for (i = 0; i < 256; ++i) {
383 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
384 }
385 return TO_BROTLI_BOOL(r >= 0.0);
386 }
387 }
388
389 /* Acceptable loss for uncompressible speedup is 2% */
390 #define MIN_RATIO 980
391
392 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
393 const uint8_t* metablock_start, const uint8_t* next_emit,
394 const size_t insertlen, const size_t literal_ratio) {
395 const size_t compressed = (size_t)(next_emit - metablock_start);
396 if (compressed * 50 > insertlen) {
397 return BROTLI_FALSE;
398 } else {
399 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
400 }
401 }
402
403 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
404 const size_t storage_ix_start,
405 size_t* storage_ix, uint8_t* storage) {
406 const size_t len = (size_t)(end - begin);
407 RewindBitPosition(storage_ix_start, storage_ix, storage);
408 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
409 *storage_ix = (*storage_ix + 7u) & ~7u;
410 memcpy(&storage[*storage_ix >> 3], begin, len);
411 *storage_ix += len << 3;
412 storage[*storage_ix >> 3] = 0;
413 }
414
415 static uint32_t kCmdHistoSeed[128] = {
416 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
417 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
419 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
420 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
421 1, 1, 1, 1, 0, 0, 0, 0,
422 };
423
424 void BrotliCompressFragmentFast(MemoryManager* m,
425 const uint8_t* input, size_t input_size,
426 BROTLI_BOOL is_last,
427 int* table, size_t table_size,
428 uint8_t cmd_depth[128], uint16_t cmd_bits[128],
429 size_t* cmd_code_numbits, uint8_t* cmd_code,
430 size_t* storage_ix, uint8_t* storage) {
431 uint32_t cmd_histo[128];
432 const uint8_t* ip_end;
433
434 /* "next_emit" is a pointer to the first byte that is not covered by a
435 previous copy. Bytes between "next_emit" and the start of the next copy or
436 the end of the input will be emitted as literal bytes. */
437 const uint8_t* next_emit = input;
438 /* Save the start of the first block for position and distance computations.
439 */
440 const uint8_t* base_ip = input;
441
442 static const size_t kFirstBlockSize = 3 << 15;
443 static const size_t kMergeBlockSize = 1 << 16;
444
445 const size_t kInputMarginBytes = 16;
446 const size_t kMinMatchLen = 5;
447
448 const uint8_t* metablock_start = input;
449 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
450 size_t total_block_size = block_size;
451 /* Save the bit position of the MLEN field of the meta-block header, so that
452 we can update it later if we decide to extend this meta-block. */
453 size_t mlen_storage_ix = *storage_ix + 3;
454
455 uint8_t lit_depth[256];
456 uint16_t lit_bits[256];
457
458 size_t literal_ratio;
459
460 const uint8_t* ip;
461 int last_distance;
462
463 const size_t shift = 64u - Log2FloorNonZero(table_size);
464 assert(table_size);
465 assert(table_size <= (1u << 31));
466 /* table must be power of two */
467 assert((table_size & (table_size - 1)) == 0);
468 assert(table_size - 1 ==
469 (size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
470
471 if (input_size == 0) {
472 assert(is_last);
473 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
474 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
475 *storage_ix = (*storage_ix + 7u) & ~7u;
476 return;
477 }
478
479 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
480 /* No block splits, no contexts. */
481 BrotliWriteBits(13, 0, storage_ix, storage);
482
483 literal_ratio = BuildAndStoreLiteralPrefixCode(
484 m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
485 if (BROTLI_IS_OOM(m)) return;
486
487 {
488 /* Store the pre-compressed command and distance prefix codes. */
489 size_t i;
490 for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
491 BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
492 }
493 }
494 BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
495 storage_ix, storage);
496
497 emit_commands:
498 /* Initialize the command and distance histograms. We will gather
499 statistics of command and distance codes during the processing
500 of this block and use it to update the command and distance
501 prefix codes for the next block. */
502 memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
503
504 /* "ip" is the input pointer. */
505 ip = input;
506 last_distance = -1;
507 ip_end = input + block_size;
508
509 if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
510 /* For the last block, we need to keep a 16 bytes margin so that we can be
511 sure that all distances are at most window size - 16.
512 For all other blocks, we only need to keep a margin of 5 bytes so that
513 we don't go over the block size with a copy. */
514 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
515 input_size - kInputMarginBytes);
516 const uint8_t* ip_limit = input + len_limit;
517
518 uint32_t next_hash;
519 for (next_hash = Hash(++ip, shift); ; ) {
520 /* Step 1: Scan forward in the input looking for a 5-byte-long match.
521 If we get close to exhausting the input then goto emit_remainder.
522
523 Heuristic match skipping: If 32 bytes are scanned with no matches
524 found, start looking only at every other byte. If 32 more bytes are
525 scanned, look at every third byte, etc.. When a match is found,
526 immediately go back to looking at every byte. This is a small loss
527 (~5% performance, ~0.1% density) for compressible data due to more
528 bookkeeping, but for non-compressible data (such as JPEG) it's a huge
529 win since the compressor quickly "realizes" the data is incompressible
530 and doesn't bother looking for matches everywhere.
531
532 The "skip" variable keeps track of how many bytes there are since the
533 last match; dividing it by 32 (ie. right-shifting by five) gives the
534 number of bytes to move ahead for each iteration. */
535 uint32_t skip = 32;
536
537 const uint8_t* next_ip = ip;
538 const uint8_t* candidate;
539 assert(next_emit < ip);
540
541 do {
542 uint32_t hash = next_hash;
543 uint32_t bytes_between_hash_lookups = skip++ >> 5;
544 assert(hash == Hash(next_ip, shift));
545 ip = next_ip;
546 next_ip = ip + bytes_between_hash_lookups;
547 if (PREDICT_FALSE(next_ip > ip_limit)) {
548 goto emit_remainder;
549 }
550 next_hash = Hash(next_ip, shift);
551 candidate = ip - last_distance;
552 if (IsMatch(ip, candidate)) {
553 if (PREDICT_TRUE(candidate < ip)) {
554 table[hash] = (int)(ip - base_ip);
555 break;
556 }
557 }
558 candidate = base_ip + table[hash];
559 assert(candidate >= base_ip);
560 assert(candidate < ip);
561
562 table[hash] = (int)(ip - base_ip);
563 } while (PREDICT_TRUE(!IsMatch(ip, candidate)));
564
565 /* Step 2: Emit the found match together with the literal bytes from
566 "next_emit" to the bit stream, and then see if we can find a next macth
567 immediately afterwards. Repeat until we find no match for the input
568 without emitting some literal bytes. */
569
570 {
571 /* We have a 5-byte match at ip, and we need to emit bytes in
572 [next_emit, ip). */
573 const uint8_t* base = ip;
574 size_t matched = 5 + FindMatchLengthWithLimit(
575 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
576 int distance = (int)(base - candidate); /* > 0 */
577 size_t insert = (size_t)(base - next_emit);
578 ip += matched;
579 assert(0 == memcmp(base, candidate, matched));
580 if (PREDICT_TRUE(insert < 6210)) {
581 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
582 storage_ix, storage);
583 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
584 literal_ratio)) {
585 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
586 storage_ix, storage);
587 input_size -= (size_t)(base - input);
588 input = base;
589 next_emit = input;
590 goto next_block;
591 } else {
592 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
593 storage_ix, storage);
594 }
595 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
596 storage_ix, storage);
597 if (distance == last_distance) {
598 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
599 ++cmd_histo[64];
600 } else {
601 EmitDistance((size_t)distance, cmd_depth, cmd_bits,
602 cmd_histo, storage_ix, storage);
603 last_distance = distance;
604 }
605 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
606 storage_ix, storage);
607
608 next_emit = ip;
609 if (PREDICT_FALSE(ip >= ip_limit)) {
610 goto emit_remainder;
611 }
612 /* We could immediately start working at ip now, but to improve
613 compression we first update "table" with the hashes of some positions
614 within the last copy. */
615 {
616 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
617 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
618 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
619 table[prev_hash] = (int)(ip - base_ip - 3);
620 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
621 table[prev_hash] = (int)(ip - base_ip - 2);
622 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
623 table[prev_hash] = (int)(ip - base_ip - 1);
624
625 candidate = base_ip + table[cur_hash];
626 table[cur_hash] = (int)(ip - base_ip);
627 }
628 }
629
630 while (IsMatch(ip, candidate)) {
631 /* We have a 5-byte match at ip, and no need to emit any literal bytes
632 prior to ip. */
633 const uint8_t* base = ip;
634 size_t matched = 5 + FindMatchLengthWithLimit(
635 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
636 ip += matched;
637 last_distance = (int)(base - candidate); /* > 0 */
638 assert(0 == memcmp(base, candidate, matched));
639 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
640 storage_ix, storage);
641 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
642 cmd_histo, storage_ix, storage);
643
644 next_emit = ip;
645 if (PREDICT_FALSE(ip >= ip_limit)) {
646 goto emit_remainder;
647 }
648 /* We could immediately start working at ip now, but to improve
649 compression we first update "table" with the hashes of some positions
650 within the last copy. */
651 {
652 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
653 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
654 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
655 table[prev_hash] = (int)(ip - base_ip - 3);
656 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
657 table[prev_hash] = (int)(ip - base_ip - 2);
658 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
659 table[prev_hash] = (int)(ip - base_ip - 1);
660
661 candidate = base_ip + table[cur_hash];
662 table[cur_hash] = (int)(ip - base_ip);
663 }
664 }
665
666 next_hash = Hash(++ip, shift);
667 }
668 }
669
670 emit_remainder:
671 assert(next_emit <= ip_end);
672 input += block_size;
673 input_size -= block_size;
674 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
675
676 /* Decide if we want to continue this meta-block instead of emitting the
677 last insert-only command. */
678 if (input_size > 0 &&
679 total_block_size + block_size <= (1 << 20) &&
680 ShouldMergeBlock(input, block_size, lit_depth)) {
681 assert(total_block_size > (1 << 16));
682 /* Update the size of the current meta-block and continue emitting commands.
683 We can do this because the current size and the new size both have 5
684 nibbles. */
685 total_block_size += block_size;
686 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
687 goto emit_commands;
688 }
689
690 /* Emit the remaining bytes as literals. */
691 if (next_emit < ip_end) {
692 const size_t insert = (size_t)(ip_end - next_emit);
693 if (PREDICT_TRUE(insert < 6210)) {
694 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
695 storage_ix, storage);
696 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
697 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
698 literal_ratio)) {
699 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
700 storage_ix, storage);
701 } else {
702 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
703 storage_ix, storage);
704 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
705 storage_ix, storage);
706 }
707 }
708 next_emit = ip_end;
709
710 next_block:
711 /* If we have more data, write a new meta-block header and prefix codes and
712 then continue emitting commands. */
713 if (input_size > 0) {
714 metablock_start = input;
715 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
716 total_block_size = block_size;
717 /* Save the bit position of the MLEN field of the meta-block header, so that
718 we can update it later if we decide to extend this meta-block. */
719 mlen_storage_ix = *storage_ix + 3;
720 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
721 /* No block splits, no contexts. */
722 BrotliWriteBits(13, 0, storage_ix, storage);
723 literal_ratio = BuildAndStoreLiteralPrefixCode(
724 m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
725 if (BROTLI_IS_OOM(m)) return;
726 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
727 storage_ix, storage);
728 goto emit_commands;
729 }
730
731 if (is_last) {
732 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
733 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
734 *storage_ix = (*storage_ix + 7u) & ~7u;
735 } else {
736 /* If this is not the last block, update the command and distance prefix
737 codes for the next block and store the compressed forms. */
738 cmd_code[0] = 0;
739 *cmd_code_numbits = 0;
740 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
741 cmd_code_numbits, cmd_code);
742 }
743 }
744
745 #if defined(__cplusplus) || defined(c_plusplus)
746 } /* extern "C" */
747 #endif