]>
Commit | Line | Data |
---|---|---|
11b7501a SB |
1 | /* Copyright 2010 Google Inc. All Rights Reserved.\r |
2 | \r | |
3 | Distributed under MIT license.\r | |
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
5 | */\r | |
6 | \r | |
7 | /* A (forgetful) hash table to the data seen by the compressor, to\r | |
8 | help create backward references to previous data. */\r | |
9 | \r | |
10 | #ifndef BROTLI_ENC_HASH_H_\r | |
11 | #define BROTLI_ENC_HASH_H_\r | |
12 | \r | |
13 | #include <string.h> /* memcmp, memset */\r | |
14 | \r | |
15 | #include "../common/constants.h"\r | |
16 | #include "../common/dictionary.h"\r | |
17 | #include "../common/types.h"\r | |
18 | #include "./dictionary_hash.h"\r | |
19 | #include "./fast_log.h"\r | |
20 | #include "./find_match_length.h"\r | |
21 | #include "./memory.h"\r | |
22 | #include "./port.h"\r | |
23 | #include "./quality.h"\r | |
24 | #include "./static_dict.h"\r | |
25 | \r | |
26 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
27 | extern "C" {\r | |
28 | #endif\r | |
29 | \r | |
30 | #define MAX_TREE_SEARCH_DEPTH 64\r | |
31 | #define MAX_TREE_COMP_LENGTH 128\r | |
32 | #define score_t size_t\r | |
33 | \r | |
34 | static const uint32_t kDistanceCacheIndex[] = {\r | |
35 | 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,\r | |
36 | };\r | |
37 | static const int kDistanceCacheOffset[] = {\r | |
38 | 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3\r | |
39 | };\r | |
40 | \r | |
41 | static const uint32_t kCutoffTransformsCount = 10;\r | |
42 | static const uint8_t kCutoffTransforms[] = {\r | |
43 | 0, 12, 27, 23, 42, 63, 56, 48, 59, 64\r | |
44 | };\r | |
45 | \r | |
46 | typedef struct HasherSearchResult {\r | |
47 | size_t len;\r | |
48 | size_t len_x_code; /* == len ^ len_code */\r | |
49 | size_t distance;\r | |
50 | score_t score;\r | |
51 | } HasherSearchResult;\r | |
52 | \r | |
53 | typedef struct DictionarySearchStatictics {\r | |
54 | size_t num_lookups;\r | |
55 | size_t num_matches;\r | |
56 | } DictionarySearchStatictics;\r | |
57 | \r | |
58 | /* kHashMul32 multiplier has these properties:\r | |
59 | * The multiplier must be odd. Otherwise we may lose the highest bit.\r | |
60 | * No long streaks of 1s or 0s.\r | |
61 | * There is no effort to ensure that it is a prime, the oddity is enough\r | |
62 | for this use.\r | |
63 | * The number has been tuned heuristically against compression benchmarks. */\r | |
64 | static const uint32_t kHashMul32 = 0x1e35a7bd;\r | |
65 | \r | |
66 | static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {\r | |
67 | uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;\r | |
68 | /* The higher bits contain more mixture from the multiplication,\r | |
69 | so we take our results from there. */\r | |
70 | return h >> (32 - 14);\r | |
71 | }\r | |
72 | \r | |
73 | #define BROTLI_LITERAL_BYTE_SCORE 540\r | |
74 | #define BROTLI_DISTANCE_BIT_PENALTY 120\r | |
75 | /* Score must be positive after applying maximal penalty. */\r | |
76 | #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))\r | |
77 | \r | |
78 | /* Usually, we always choose the longest backward reference. This function\r | |
79 | allows for the exception of that rule.\r | |
80 | \r | |
81 | If we choose a backward reference that is further away, it will\r | |
82 | usually be coded with more bits. We approximate this by assuming\r | |
83 | log2(distance). If the distance can be expressed in terms of the\r | |
84 | last four distances, we use some heuristic constants to estimate\r | |
85 | the bits cost. For the first up to four literals we use the bit\r | |
86 | cost of the literals from the literal cost model, after that we\r | |
87 | use the average bit cost of the cost model.\r | |
88 | \r | |
89 | This function is used to sometimes discard a longer backward reference\r | |
90 | when it is not much longer and the bit cost for encoding it is more\r | |
91 | than the saved literals.\r | |
92 | \r | |
93 | backward_reference_offset MUST be positive. */\r | |
94 | static BROTLI_INLINE score_t BackwardReferenceScore(\r | |
95 | size_t copy_length, size_t backward_reference_offset) {\r | |
96 | return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -\r | |
97 | BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);\r | |
98 | }\r | |
99 | \r | |
100 | static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {\r | |
101 | /* Repeat last */\r | |
102 | BROTLI_SCORE_BASE + 60,\r | |
103 | /* 2nd, 3rd, 4th last */\r | |
104 | BROTLI_SCORE_BASE - 95,\r | |
105 | BROTLI_SCORE_BASE - 117,\r | |
106 | BROTLI_SCORE_BASE - 127,\r | |
107 | /* Last with offset */\r | |
108 | BROTLI_SCORE_BASE - 93,\r | |
109 | BROTLI_SCORE_BASE - 93,\r | |
110 | BROTLI_SCORE_BASE - 96,\r | |
111 | BROTLI_SCORE_BASE - 96,\r | |
112 | BROTLI_SCORE_BASE - 99,\r | |
113 | BROTLI_SCORE_BASE - 99,\r | |
114 | /* 2nd last with offset */\r | |
115 | BROTLI_SCORE_BASE - 105,\r | |
116 | BROTLI_SCORE_BASE - 105,\r | |
117 | BROTLI_SCORE_BASE - 115,\r | |
118 | BROTLI_SCORE_BASE - 115,\r | |
119 | BROTLI_SCORE_BASE - 125,\r | |
120 | BROTLI_SCORE_BASE - 125\r | |
121 | };\r | |
122 | \r | |
123 | static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(\r | |
124 | size_t copy_length, size_t distance_short_code) {\r | |
125 | return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +\r | |
126 | kDistanceShortCodeCost[distance_short_code];\r | |
127 | }\r | |
128 | \r | |
129 | static BROTLI_INLINE void DictionarySearchStaticticsReset(\r | |
130 | DictionarySearchStatictics* self) {\r | |
131 | self->num_lookups = 0;\r | |
132 | self->num_matches = 0;\r | |
133 | }\r | |
134 | \r | |
135 | static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(\r | |
136 | size_t item, const uint8_t* data, size_t max_length, size_t max_backward,\r | |
137 | HasherSearchResult* out) {\r | |
138 | size_t len;\r | |
139 | size_t dist;\r | |
140 | size_t offset;\r | |
141 | size_t matchlen;\r | |
142 | size_t backward;\r | |
143 | score_t score;\r | |
144 | len = item & 31;\r | |
145 | dist = item >> 5;\r | |
146 | offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;\r | |
147 | if (len > max_length) {\r | |
148 | return BROTLI_FALSE;\r | |
149 | }\r | |
150 | \r | |
151 | matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);\r | |
152 | if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {\r | |
153 | return BROTLI_FALSE;\r | |
154 | }\r | |
155 | {\r | |
156 | size_t transform_id = kCutoffTransforms[len - matchlen];\r | |
157 | backward = max_backward + dist + 1 +\r | |
158 | (transform_id << kBrotliDictionarySizeBitsByLength[len]);\r | |
159 | }\r | |
160 | score = BackwardReferenceScore(matchlen, backward);\r | |
161 | if (score < out->score) {\r | |
162 | return BROTLI_FALSE;\r | |
163 | }\r | |
164 | out->len = matchlen;\r | |
165 | out->len_x_code = len ^ matchlen;\r | |
166 | out->distance = backward;\r | |
167 | out->score = score;\r | |
168 | return BROTLI_TRUE;\r | |
169 | }\r | |
170 | \r | |
171 | static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(\r | |
172 | DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,\r | |
173 | size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {\r | |
174 | size_t key;\r | |
175 | size_t i;\r | |
176 | BROTLI_BOOL is_match_found = BROTLI_FALSE;\r | |
177 | if (self->num_matches < (self->num_lookups >> 7)) {\r | |
178 | return BROTLI_FALSE;\r | |
179 | }\r | |
180 | key = Hash14(data) << 1;\r | |
181 | for (i = 0; i < (shallow ? 1 : 2); ++i, ++key) {\r | |
182 | size_t item = kStaticDictionaryHash[key];\r | |
183 | self->num_lookups++;\r | |
184 | if (item != 0 &&\r | |
185 | TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {\r | |
186 | self->num_matches++;\r | |
187 | is_match_found = BROTLI_TRUE;\r | |
188 | }\r | |
189 | }\r | |
190 | return is_match_found;\r | |
191 | }\r | |
192 | \r | |
193 | typedef struct BackwardMatch {\r | |
194 | uint32_t distance;\r | |
195 | uint32_t length_and_code;\r | |
196 | } BackwardMatch;\r | |
197 | \r | |
198 | static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,\r | |
199 | size_t dist, size_t len) {\r | |
200 | self->distance = (uint32_t)dist;\r | |
201 | self->length_and_code = (uint32_t)(len << 5);\r | |
202 | }\r | |
203 | \r | |
204 | static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,\r | |
205 | size_t dist, size_t len, size_t len_code) {\r | |
206 | self->distance = (uint32_t)dist;\r | |
207 | self->length_and_code =\r | |
208 | (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));\r | |
209 | }\r | |
210 | \r | |
211 | static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {\r | |
212 | return self->length_and_code >> 5;\r | |
213 | }\r | |
214 | \r | |
215 | static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {\r | |
216 | size_t code = self->length_and_code & 31;\r | |
217 | return code ? code : BackwardMatchLength(self);\r | |
218 | }\r | |
219 | \r | |
220 | #define EXPAND_CAT(a, b) CAT(a, b)\r | |
221 | #define CAT(a, b) a ## b\r | |
222 | #define FN(X) EXPAND_CAT(X, HASHER())\r | |
223 | \r | |
224 | #define MAX_NUM_MATCHES_H10 (64 + MAX_TREE_SEARCH_DEPTH)\r | |
225 | \r | |
226 | #define HASHER() H10\r | |
227 | #define HashToBinaryTree HASHER()\r | |
228 | \r | |
229 | #define BUCKET_BITS 17\r | |
230 | #define BUCKET_SIZE (1 << BUCKET_BITS)\r | |
231 | \r | |
232 | static size_t FN(HashTypeLength)(void) { return 4; }\r | |
233 | static size_t FN(StoreLookahead)(void) { return MAX_TREE_COMP_LENGTH; }\r | |
234 | \r | |
235 | static uint32_t FN(HashBytes)(const uint8_t *data) {\r | |
236 | uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;\r | |
237 | /* The higher bits contain more mixture from the multiplication,\r | |
238 | so we take our results from there. */\r | |
239 | return h >> (32 - BUCKET_BITS);\r | |
240 | }\r | |
241 | \r | |
242 | /* A (forgetful) hash table where each hash bucket contains a binary tree of\r | |
243 | sequences whose first 4 bytes share the same hash code.\r | |
244 | Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting\r | |
245 | position in the input data. The binary tree is sorted by the lexicographic\r | |
246 | order of the sequences, and it is also a max-heap with respect to the\r | |
247 | starting positions. */\r | |
248 | typedef struct HashToBinaryTree {\r | |
249 | /* The window size minus 1 */\r | |
250 | size_t window_mask_;\r | |
251 | \r | |
252 | /* Hash table that maps the 4-byte hashes of the sequence to the last\r | |
253 | position where this hash was found, which is the root of the binary\r | |
254 | tree of sequences that share this hash bucket. */\r | |
255 | uint32_t buckets_[BUCKET_SIZE];\r | |
256 | \r | |
257 | /* The union of the binary trees of each hash bucket. The root of the tree\r | |
258 | corresponding to a hash is a sequence starting at buckets_[hash] and\r | |
259 | the left and right children of a sequence starting at pos are\r | |
260 | forest_[2 * pos] and forest_[2 * pos + 1]. */\r | |
261 | uint32_t* forest_;\r | |
262 | \r | |
263 | /* A position used to mark a non-existent sequence, i.e. a tree is empty if\r | |
264 | its root is at invalid_pos_ and a node is a leaf if both its children\r | |
265 | are at invalid_pos_. */\r | |
266 | uint32_t invalid_pos_;\r | |
267 | \r | |
268 | size_t forest_size_;\r | |
269 | BROTLI_BOOL is_dirty_;\r | |
270 | } HashToBinaryTree;\r | |
271 | \r | |
272 | static void FN(Reset)(HashToBinaryTree* self) {\r | |
273 | self->is_dirty_ = BROTLI_TRUE;\r | |
274 | }\r | |
275 | \r | |
276 | static void FN(Initialize)(HashToBinaryTree* self) {\r | |
277 | self->forest_ = NULL;\r | |
278 | self->forest_size_ = 0;\r | |
279 | FN(Reset)(self);\r | |
280 | }\r | |
281 | \r | |
282 | static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {\r | |
283 | BROTLI_FREE(m, self->forest_);\r | |
284 | }\r | |
285 | \r | |
286 | static void FN(Init)(\r | |
287 | MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,\r | |
288 | const BrotliEncoderParams* params, size_t position, size_t bytes,\r | |
289 | BROTLI_BOOL is_last) {\r | |
290 | if (self->is_dirty_) {\r | |
291 | uint32_t invalid_pos;\r | |
292 | size_t num_nodes;\r | |
293 | uint32_t i;\r | |
294 | BROTLI_UNUSED(data);\r | |
295 | self->window_mask_ = (1u << params->lgwin) - 1u;\r | |
296 | invalid_pos = (uint32_t)(0 - self->window_mask_);\r | |
297 | self->invalid_pos_ = invalid_pos;\r | |
298 | for (i = 0; i < BUCKET_SIZE; i++) {\r | |
299 | self->buckets_[i] = invalid_pos;\r | |
300 | }\r | |
301 | num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;\r | |
302 | if (num_nodes > self->forest_size_) {\r | |
303 | BROTLI_FREE(m, self->forest_);\r | |
304 | self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);\r | |
305 | if (BROTLI_IS_OOM(m)) return;\r | |
306 | self->forest_size_ = num_nodes;\r | |
307 | }\r | |
308 | self->is_dirty_ = BROTLI_FALSE;\r | |
309 | }\r | |
310 | }\r | |
311 | \r | |
312 | static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,\r | |
313 | const size_t pos) {\r | |
314 | return 2 * (pos & self->window_mask_);\r | |
315 | }\r | |
316 | \r | |
317 | static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,\r | |
318 | const size_t pos) {\r | |
319 | return 2 * (pos & self->window_mask_) + 1;\r | |
320 | }\r | |
321 | \r | |
322 | /* Stores the hash of the next 4 bytes and in a single tree-traversal, the\r | |
323 | hash bucket's binary tree is searched for matches and is re-rooted at the\r | |
324 | current position.\r | |
325 | \r | |
326 | If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the\r | |
327 | current position is searched for matches, but the state of the hash table\r | |
328 | is not changed, since we can not know the final sorting order of the\r | |
329 | current (incomplete) sequence.\r | |
330 | \r | |
331 | This function must be called with increasing cur_ix positions. */\r | |
332 | static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(\r | |
333 | HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,\r | |
334 | const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,\r | |
335 | const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,\r | |
336 | BackwardMatch* BROTLI_RESTRICT matches) {\r | |
337 | const size_t cur_ix_masked = cur_ix & ring_buffer_mask;\r | |
338 | const size_t max_comp_len =\r | |
339 | BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);\r | |
340 | const BROTLI_BOOL should_reroot_tree =\r | |
341 | TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);\r | |
342 | const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);\r | |
343 | size_t prev_ix = self->buckets_[key];\r | |
344 | /* The forest index of the rightmost node of the left subtree of the new\r | |
345 | root, updated as we traverse and reroot the tree of the hash bucket. */\r | |
346 | size_t node_left = FN(LeftChildIndex)(self, cur_ix);\r | |
347 | /* The forest index of the leftmost node of the right subtree of the new\r | |
348 | root, updated as we traverse and reroot the tree of the hash bucket. */\r | |
349 | size_t node_right = FN(RightChildIndex)(self, cur_ix);\r | |
350 | /* The match length of the rightmost node of the left subtree of the new\r | |
351 | root, updated as we traverse and reroot the tree of the hash bucket. */\r | |
352 | size_t best_len_left = 0;\r | |
353 | /* The match length of the leftmost node of the right subtree of the new\r | |
354 | root, updated as we traverse and reroot the tree of the hash bucket. */\r | |
355 | size_t best_len_right = 0;\r | |
356 | size_t depth_remaining;\r | |
357 | if (should_reroot_tree) {\r | |
358 | self->buckets_[key] = (uint32_t)cur_ix;\r | |
359 | }\r | |
360 | for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {\r | |
361 | const size_t backward = cur_ix - prev_ix;\r | |
362 | const size_t prev_ix_masked = prev_ix & ring_buffer_mask;\r | |
363 | if (backward == 0 || backward > max_backward || depth_remaining == 0) {\r | |
364 | if (should_reroot_tree) {\r | |
365 | self->forest_[node_left] = self->invalid_pos_;\r | |
366 | self->forest_[node_right] = self->invalid_pos_;\r | |
367 | }\r | |
368 | break;\r | |
369 | }\r | |
370 | {\r | |
371 | const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);\r | |
372 | size_t len;\r | |
373 | assert(cur_len <= MAX_TREE_COMP_LENGTH);\r | |
374 | len = cur_len +\r | |
375 | FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],\r | |
376 | &data[prev_ix_masked + cur_len],\r | |
377 | max_length - cur_len);\r | |
378 | assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));\r | |
379 | if (matches && len > *best_len) {\r | |
380 | *best_len = len;\r | |
381 | InitBackwardMatch(matches++, backward, len);\r | |
382 | }\r | |
383 | if (len >= max_comp_len) {\r | |
384 | if (should_reroot_tree) {\r | |
385 | self->forest_[node_left] =\r | |
386 | self->forest_[FN(LeftChildIndex)(self, prev_ix)];\r | |
387 | self->forest_[node_right] =\r | |
388 | self->forest_[FN(RightChildIndex)(self, prev_ix)];\r | |
389 | }\r | |
390 | break;\r | |
391 | }\r | |
392 | if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {\r | |
393 | best_len_left = len;\r | |
394 | if (should_reroot_tree) {\r | |
395 | self->forest_[node_left] = (uint32_t)prev_ix;\r | |
396 | }\r | |
397 | node_left = FN(RightChildIndex)(self, prev_ix);\r | |
398 | prev_ix = self->forest_[node_left];\r | |
399 | } else {\r | |
400 | best_len_right = len;\r | |
401 | if (should_reroot_tree) {\r | |
402 | self->forest_[node_right] = (uint32_t)prev_ix;\r | |
403 | }\r | |
404 | node_right = FN(LeftChildIndex)(self, prev_ix);\r | |
405 | prev_ix = self->forest_[node_right];\r | |
406 | }\r | |
407 | }\r | |
408 | }\r | |
409 | return matches;\r | |
410 | }\r | |
411 | \r | |
412 | /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the\r | |
413 | length of max_length and stores the position cur_ix in the hash table.\r | |
414 | \r | |
415 | Sets *num_matches to the number of matches found, and stores the found\r | |
416 | matches in matches[0] to matches[*num_matches - 1]. The matches will be\r | |
417 | sorted by strictly increasing length and (non-strictly) increasing\r | |
418 | distance. */\r | |
419 | static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,\r | |
420 | const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,\r | |
421 | const size_t max_length, const size_t max_backward,\r | |
422 | const BrotliEncoderParams* params, BackwardMatch* matches) {\r | |
423 | BackwardMatch* const orig_matches = matches;\r | |
424 | const size_t cur_ix_masked = cur_ix & ring_buffer_mask;\r | |
425 | size_t best_len = 1;\r | |
426 | const size_t short_match_max_backward =\r | |
427 | params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;\r | |
428 | size_t stop = cur_ix - short_match_max_backward;\r | |
429 | uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];\r | |
430 | size_t i;\r | |
431 | if (cur_ix < short_match_max_backward) { stop = 0; }\r | |
432 | for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {\r | |
433 | size_t prev_ix = i;\r | |
434 | const size_t backward = cur_ix - prev_ix;\r | |
435 | if (PREDICT_FALSE(backward > max_backward)) {\r | |
436 | break;\r | |
437 | }\r | |
438 | prev_ix &= ring_buffer_mask;\r | |
439 | if (data[cur_ix_masked] != data[prev_ix] ||\r | |
440 | data[cur_ix_masked + 1] != data[prev_ix + 1]) {\r | |
441 | continue;\r | |
442 | }\r | |
443 | {\r | |
444 | const size_t len =\r | |
445 | FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],\r | |
446 | max_length);\r | |
447 | if (len > best_len) {\r | |
448 | best_len = len;\r | |
449 | InitBackwardMatch(matches++, backward, len);\r | |
450 | }\r | |
451 | }\r | |
452 | }\r | |
453 | if (best_len < max_length) {\r | |
454 | matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,\r | |
455 | max_length, max_backward, &best_len, matches);\r | |
456 | }\r | |
457 | for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {\r | |
458 | dict_matches[i] = kInvalidMatch;\r | |
459 | }\r | |
460 | {\r | |
461 | size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);\r | |
462 | if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,\r | |
463 | max_length, &dict_matches[0])) {\r | |
464 | size_t maxlen = BROTLI_MIN(\r | |
465 | size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);\r | |
466 | size_t l;\r | |
467 | for (l = minlen; l <= maxlen; ++l) {\r | |
468 | uint32_t dict_id = dict_matches[l];\r | |
469 | if (dict_id < kInvalidMatch) {\r | |
470 | InitDictionaryBackwardMatch(matches++,\r | |
471 | max_backward + (dict_id >> 5) + 1, l, dict_id & 31);\r | |
472 | }\r | |
473 | }\r | |
474 | }\r | |
475 | }\r | |
476 | return (size_t)(matches - orig_matches);\r | |
477 | }\r | |
478 | \r | |
479 | /* Stores the hash of the next 4 bytes and re-roots the binary tree at the\r | |
480 | current sequence, without returning any matches.\r | |
481 | REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */\r | |
482 | static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,\r | |
483 | const size_t mask, const size_t ix) {\r | |
484 | /* Maximum distance is window size - 16, see section 9.1. of the spec. */\r | |
485 | const size_t max_backward = self->window_mask_ - 15;\r | |
486 | FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,\r | |
487 | max_backward, NULL, NULL);\r | |
488 | }\r | |
489 | \r | |
490 | static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,\r | |
491 | const uint8_t *data, const size_t mask, const size_t ix_start,\r | |
492 | const size_t ix_end) {\r | |
493 | size_t i = ix_start + 63 <= ix_end ? ix_end - 63 : ix_start;\r | |
494 | for (; i < ix_end; ++i) {\r | |
495 | FN(Store)(self, data, mask, i);\r | |
496 | }\r | |
497 | }\r | |
498 | \r | |
499 | static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,\r | |
500 | size_t num_bytes, size_t position, const uint8_t* ringbuffer,\r | |
501 | size_t ringbuffer_mask) {\r | |
502 | if (num_bytes >= FN(HashTypeLength)() - 1 &&\r | |
503 | position >= MAX_TREE_COMP_LENGTH) {\r | |
504 | /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.\r | |
505 | These could not be calculated before, since they require knowledge\r | |
506 | of both the previous and the current block. */\r | |
507 | const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;\r | |
508 | const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);\r | |
509 | size_t i;\r | |
510 | for (i = i_start; i < i_end; ++i) {\r | |
511 | /* Maximum distance is window size - 16, see section 9.1. of the spec.\r | |
512 | Furthermore, we have to make sure that we don't look further back\r | |
513 | from the start of the next block than the window size, otherwise we\r | |
514 | could access already overwritten areas of the ringbuffer. */\r | |
515 | const size_t max_backward =\r | |
516 | self->window_mask_ - BROTLI_MAX(size_t, 15, position - i);\r | |
517 | /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the\r | |
518 | end of the current block and that we have at least\r | |
519 | MAX_TREE_COMP_LENGTH tail in the ringbuffer. */\r | |
520 | FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,\r | |
521 | MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);\r | |
522 | }\r | |
523 | }\r | |
524 | }\r | |
525 | \r | |
526 | #undef BUCKET_SIZE\r | |
527 | #undef BUCKET_BITS\r | |
528 | \r | |
529 | #undef HASHER\r | |
530 | \r | |
531 | /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression\r | |
532 | a little faster (0.5% - 1%) and it compresses 0.15% better on small text\r | |
533 | and html inputs. */\r | |
534 | \r | |
535 | #define HASHER() H2\r | |
536 | #define BUCKET_BITS 16\r | |
537 | #define BUCKET_SWEEP 1\r | |
538 | #define USE_DICTIONARY 1\r | |
539 | #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */\r | |
540 | #undef BUCKET_SWEEP\r | |
541 | #undef USE_DICTIONARY\r | |
542 | #undef HASHER\r | |
543 | \r | |
544 | #define HASHER() H3\r | |
545 | #define BUCKET_SWEEP 2\r | |
546 | #define USE_DICTIONARY 0\r | |
547 | #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */\r | |
548 | #undef USE_DICTIONARY\r | |
549 | #undef BUCKET_SWEEP\r | |
550 | #undef BUCKET_BITS\r | |
551 | #undef HASHER\r | |
552 | \r | |
553 | #define HASHER() H4\r | |
554 | #define BUCKET_BITS 17\r | |
555 | #define BUCKET_SWEEP 4\r | |
556 | #define USE_DICTIONARY 1\r | |
557 | #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */\r | |
558 | #undef USE_DICTIONARY\r | |
559 | #undef BUCKET_SWEEP\r | |
560 | #undef BUCKET_BITS\r | |
561 | #undef HASHER\r | |
562 | \r | |
563 | #define HASHER() H5\r | |
564 | #define BUCKET_BITS 14\r | |
565 | #define BLOCK_BITS 4\r | |
566 | #define NUM_LAST_DISTANCES_TO_CHECK 4\r | |
567 | #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */\r | |
568 | #undef BLOCK_BITS\r | |
569 | #undef HASHER\r | |
570 | \r | |
571 | #define HASHER() H6\r | |
572 | #define BLOCK_BITS 5\r | |
573 | #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */\r | |
574 | #undef NUM_LAST_DISTANCES_TO_CHECK\r | |
575 | #undef BLOCK_BITS\r | |
576 | #undef BUCKET_BITS\r | |
577 | #undef HASHER\r | |
578 | \r | |
579 | #define HASHER() H7\r | |
580 | #define BUCKET_BITS 15\r | |
581 | #define BLOCK_BITS 6\r | |
582 | #define NUM_LAST_DISTANCES_TO_CHECK 10\r | |
583 | #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */\r | |
584 | #undef BLOCK_BITS\r | |
585 | #undef HASHER\r | |
586 | \r | |
587 | #define HASHER() H8\r | |
588 | #define BLOCK_BITS 7\r | |
589 | #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */\r | |
590 | #undef NUM_LAST_DISTANCES_TO_CHECK\r | |
591 | #undef BLOCK_BITS\r | |
592 | #undef HASHER\r | |
593 | \r | |
594 | #define HASHER() H9\r | |
595 | #define BLOCK_BITS 8\r | |
596 | #define NUM_LAST_DISTANCES_TO_CHECK 16\r | |
597 | #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */\r | |
598 | #undef NUM_LAST_DISTANCES_TO_CHECK\r | |
599 | #undef BLOCK_BITS\r | |
600 | #undef BUCKET_BITS\r | |
601 | #undef HASHER\r | |
602 | \r | |
603 | #define BUCKET_BITS 15\r | |
604 | \r | |
605 | #define NUM_LAST_DISTANCES_TO_CHECK 4\r | |
606 | #define NUM_BANKS 1\r | |
607 | #define BANK_BITS 16\r | |
608 | #define HASHER() H40\r | |
609 | #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */\r | |
610 | #undef HASHER\r | |
611 | #undef NUM_LAST_DISTANCES_TO_CHECK\r | |
612 | \r | |
613 | #define NUM_LAST_DISTANCES_TO_CHECK 10\r | |
614 | #define HASHER() H41\r | |
615 | #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */\r | |
616 | #undef HASHER\r | |
617 | #undef NUM_LAST_DISTANCES_TO_CHECK\r | |
618 | #undef NUM_BANKS\r | |
619 | #undef BANK_BITS\r | |
620 | \r | |
621 | #define NUM_LAST_DISTANCES_TO_CHECK 16\r | |
622 | #define NUM_BANKS 512\r | |
623 | #define BANK_BITS 9\r | |
624 | #define HASHER() H42\r | |
625 | #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */\r | |
626 | #undef HASHER\r | |
627 | #undef NUM_LAST_DISTANCES_TO_CHECK\r | |
628 | #undef NUM_BANKS\r | |
629 | #undef BANK_BITS\r | |
630 | \r | |
631 | #undef BUCKET_BITS\r | |
632 | \r | |
633 | #undef FN\r | |
634 | #undef CAT\r | |
635 | #undef EXPAND_CAT\r | |
636 | \r | |
637 | #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \\r | |
638 | H(40) H(41) H(42)\r | |
639 | #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)\r | |
640 | \r | |
641 | typedef struct Hashers {\r | |
642 | #define _MEMBER(N) H ## N* h ## N;\r | |
643 | FOR_ALL_HASHERS(_MEMBER)\r | |
644 | #undef _MEMBER\r | |
645 | } Hashers;\r | |
646 | \r | |
647 | static BROTLI_INLINE void InitHashers(Hashers* self) {\r | |
648 | #define _INIT(N) self->h ## N = 0;\r | |
649 | FOR_ALL_HASHERS(_INIT)\r | |
650 | #undef _INIT\r | |
651 | }\r | |
652 | \r | |
653 | static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {\r | |
654 | if (self->h10) CleanupH10(m, self->h10);\r | |
655 | #define _CLEANUP(N) BROTLI_FREE(m, self->h ## N)\r | |
656 | FOR_ALL_HASHERS(_CLEANUP)\r | |
657 | #undef _CLEANUP\r | |
658 | }\r | |
659 | \r | |
660 | static BROTLI_INLINE void HashersReset(Hashers* self, int type) {\r | |
661 | switch (type) {\r | |
662 | #define _RESET(N) case N: ResetH ## N(self->h ## N); break;\r | |
663 | FOR_ALL_HASHERS(_RESET)\r | |
664 | #undef _RESET\r | |
665 | default: break;\r | |
666 | }\r | |
667 | }\r | |
668 | \r | |
669 | static BROTLI_INLINE void HashersSetup(\r | |
670 | MemoryManager* m, Hashers* self, int type) {\r | |
671 | switch (type) {\r | |
672 | #define _SETUP(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;\r | |
673 | FOR_ALL_HASHERS(_SETUP)\r | |
674 | #undef _SETUP\r | |
675 | default: break;\r | |
676 | }\r | |
677 | if (BROTLI_IS_OOM(m)) return;\r | |
678 | if (type == 10) InitializeH10(self->h10);\r | |
679 | HashersReset(self, type);\r | |
680 | }\r | |
681 | \r | |
682 | #define _WARMUP_HASH(N) \\r | |
683 | static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \\r | |
684 | const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \\r | |
685 | H ## N* hasher) { \\r | |
686 | size_t overlap = (StoreLookaheadH ## N()) - 1; \\r | |
687 | size_t i; \\r | |
688 | InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \\r | |
689 | if (BROTLI_IS_OOM(m)) return; \\r | |
690 | for (i = 0; i + overlap < size; i++) { \\r | |
691 | StoreH ## N(hasher, dict, ~(size_t)0, i); \\r | |
692 | } \\r | |
693 | }\r | |
694 | FOR_ALL_HASHERS(_WARMUP_HASH)\r | |
695 | #undef _WARMUP_HASH\r | |
696 | \r | |
697 | /* Custom LZ77 window. */\r | |
698 | static BROTLI_INLINE void HashersPrependCustomDictionary(\r | |
699 | MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,\r | |
700 | const size_t size, const uint8_t* dict) {\r | |
701 | int hasher_type = ChooseHasher(params);\r | |
702 | switch (hasher_type) {\r | |
703 | #define _PREPEND(N) \\r | |
704 | case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;\r | |
705 | FOR_ALL_HASHERS(_PREPEND)\r | |
706 | #undef _PREPEND\r | |
707 | default: break;\r | |
708 | }\r | |
709 | if (BROTLI_IS_OOM(m)) return;\r | |
710 | }\r | |
711 | \r | |
712 | \r | |
713 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
714 | } /* extern "C" */\r | |
715 | #endif\r | |
716 | \r | |
717 | #endif /* BROTLI_ENC_HASH_H_ */\r |