]>
Commit | Line | Data |
---|---|---|
dd4f667e LG |
1 | /* NOLINT(build/header_guard) */\r |
2 | /* Copyright 2016 Google Inc. All Rights Reserved.\r | |
3 | \r | |
4 | Distributed under MIT license.\r | |
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
6 | */\r | |
7 | \r | |
8 | /* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,\r | |
9 | MAX_TREE_SEARCH_DEPTH */\r | |
10 | \r | |
11 | /* A (forgetful) hash table where each hash bucket contains a binary tree of\r | |
12 | sequences whose first 4 bytes share the same hash code.\r | |
13 | Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting\r | |
14 | position in the input data. The binary tree is sorted by the lexicographic\r | |
15 | order of the sequences, and it is also a max-heap with respect to the\r | |
16 | starting positions. */\r | |
17 | \r | |
18 | #define HashToBinaryTree HASHER()\r | |
19 | \r | |
20 | #define BUCKET_SIZE (1 << BUCKET_BITS)\r | |
21 | \r | |
22 | static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }\r | |
23 | static BROTLI_INLINE size_t FN(StoreLookahead)(void) {\r | |
24 | return MAX_TREE_COMP_LENGTH;\r | |
25 | }\r | |
26 | \r | |
27 | static uint32_t FN(HashBytes)(const uint8_t* data) {\r | |
28 | uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;\r | |
29 | /* The higher bits contain more mixture from the multiplication,\r | |
30 | so we take our results from there. */\r | |
31 | return h >> (32 - BUCKET_BITS);\r | |
32 | }\r | |
33 | \r | |
34 | typedef struct HashToBinaryTree {\r | |
35 | /* The window size minus 1 */\r | |
36 | size_t window_mask_;\r | |
37 | \r | |
38 | /* Hash table that maps the 4-byte hashes of the sequence to the last\r | |
39 | position where this hash was found, which is the root of the binary\r | |
40 | tree of sequences that share this hash bucket. */\r | |
41 | uint32_t buckets_[BUCKET_SIZE];\r | |
42 | \r | |
43 | /* A position used to mark a non-existent sequence, i.e. a tree is empty if\r | |
44 | its root is at invalid_pos_ and a node is a leaf if both its children\r | |
45 | are at invalid_pos_. */\r | |
46 | uint32_t invalid_pos_;\r | |
47 | \r | |
48 | /* --- Dynamic size members --- */\r | |
49 | \r | |
50 | /* The union of the binary trees of each hash bucket. The root of the tree\r | |
51 | corresponding to a hash is a sequence starting at buckets_[hash] and\r | |
52 | the left and right children of a sequence starting at pos are\r | |
53 | forest_[2 * pos] and forest_[2 * pos + 1]. */\r | |
54 | /* uint32_t forest[2 * num_nodes] */\r | |
55 | } HashToBinaryTree;\r | |
56 | \r | |
57 | static BROTLI_INLINE HashToBinaryTree* FN(Self)(HasherHandle handle) {\r | |
58 | return (HashToBinaryTree*)&(GetHasherCommon(handle)[1]);\r | |
59 | }\r | |
60 | \r | |
61 | static BROTLI_INLINE uint32_t* FN(Forest)(HashToBinaryTree* self) {\r | |
62 | return (uint32_t*)(&self[1]);\r | |
63 | }\r | |
64 | \r | |
65 | static void FN(Initialize)(\r | |
66 | HasherHandle handle, const BrotliEncoderParams* params) {\r | |
67 | HashToBinaryTree* self = FN(Self)(handle);\r | |
68 | self->window_mask_ = (1u << params->lgwin) - 1u;\r | |
69 | self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);\r | |
70 | }\r | |
71 | \r | |
72 | static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,\r | |
73 | size_t input_size, const uint8_t* data) {\r | |
74 | HashToBinaryTree* self = FN(Self)(handle);\r | |
75 | uint32_t invalid_pos = self->invalid_pos_;\r | |
76 | uint32_t i;\r | |
77 | BROTLI_UNUSED(data);\r | |
78 | BROTLI_UNUSED(one_shot);\r | |
79 | BROTLI_UNUSED(input_size);\r | |
80 | for (i = 0; i < BUCKET_SIZE; i++) {\r | |
81 | self->buckets_[i] = invalid_pos;\r | |
82 | }\r | |
83 | }\r | |
84 | \r | |
85 | static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(\r | |
86 | const BrotliEncoderParams* params, BROTLI_BOOL one_shot,\r | |
87 | size_t input_size) {\r | |
88 | size_t num_nodes = (size_t)1 << params->lgwin;\r | |
89 | if (one_shot && input_size < num_nodes) {\r | |
90 | num_nodes = input_size;\r | |
91 | }\r | |
92 | return sizeof(HashToBinaryTree) + 2 * sizeof(uint32_t) * num_nodes;\r | |
93 | }\r | |
94 | \r | |
95 | static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,\r | |
96 | const size_t pos) {\r | |
97 | return 2 * (pos & self->window_mask_);\r | |
98 | }\r | |
99 | \r | |
100 | static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,\r | |
101 | const size_t pos) {\r | |
102 | return 2 * (pos & self->window_mask_) + 1;\r | |
103 | }\r | |
104 | \r | |
105 | /* Stores the hash of the next 4 bytes and in a single tree-traversal, the\r | |
106 | hash bucket's binary tree is searched for matches and is re-rooted at the\r | |
107 | current position.\r | |
108 | \r | |
109 | If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the\r | |
110 | current position is searched for matches, but the state of the hash table\r | |
111 | is not changed, since we can not know the final sorting order of the\r | |
112 | current (incomplete) sequence.\r | |
113 | \r | |
114 | This function must be called with increasing cur_ix positions. */\r | |
115 | static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(\r | |
116 | HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,\r | |
117 | const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,\r | |
118 | const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,\r | |
119 | BackwardMatch* BROTLI_RESTRICT matches) {\r | |
120 | const size_t cur_ix_masked = cur_ix & ring_buffer_mask;\r | |
121 | const size_t max_comp_len =\r | |
122 | BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);\r | |
123 | const BROTLI_BOOL should_reroot_tree =\r | |
124 | TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);\r | |
125 | const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);\r | |
126 | uint32_t* forest = FN(Forest)(self);\r | |
127 | size_t prev_ix = self->buckets_[key];\r | |
128 | /* The forest index of the rightmost node of the left subtree of the new\r | |
129 | root, updated as we traverse and re-root the tree of the hash bucket. */\r | |
130 | size_t node_left = FN(LeftChildIndex)(self, cur_ix);\r | |
131 | /* The forest index of the leftmost node of the right subtree of the new\r | |
132 | root, updated as we traverse and re-root the tree of the hash bucket. */\r | |
133 | size_t node_right = FN(RightChildIndex)(self, cur_ix);\r | |
134 | /* The match length of the rightmost node of the left subtree of the new\r | |
135 | root, updated as we traverse and re-root the tree of the hash bucket. */\r | |
136 | size_t best_len_left = 0;\r | |
137 | /* The match length of the leftmost node of the right subtree of the new\r | |
138 | root, updated as we traverse and re-root the tree of the hash bucket. */\r | |
139 | size_t best_len_right = 0;\r | |
140 | size_t depth_remaining;\r | |
141 | if (should_reroot_tree) {\r | |
142 | self->buckets_[key] = (uint32_t)cur_ix;\r | |
143 | }\r | |
144 | for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {\r | |
145 | const size_t backward = cur_ix - prev_ix;\r | |
146 | const size_t prev_ix_masked = prev_ix & ring_buffer_mask;\r | |
147 | if (backward == 0 || backward > max_backward || depth_remaining == 0) {\r | |
148 | if (should_reroot_tree) {\r | |
149 | forest[node_left] = self->invalid_pos_;\r | |
150 | forest[node_right] = self->invalid_pos_;\r | |
151 | }\r | |
152 | break;\r | |
153 | }\r | |
154 | {\r | |
155 | const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);\r | |
156 | size_t len;\r | |
157 | BROTLI_DCHECK(cur_len <= MAX_TREE_COMP_LENGTH);\r | |
158 | len = cur_len +\r | |
159 | FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],\r | |
160 | &data[prev_ix_masked + cur_len],\r | |
161 | max_length - cur_len);\r | |
162 | BROTLI_DCHECK(\r | |
163 | 0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));\r | |
164 | if (matches && len > *best_len) {\r | |
165 | *best_len = len;\r | |
166 | InitBackwardMatch(matches++, backward, len);\r | |
167 | }\r | |
168 | if (len >= max_comp_len) {\r | |
169 | if (should_reroot_tree) {\r | |
170 | forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];\r | |
171 | forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];\r | |
172 | }\r | |
173 | break;\r | |
174 | }\r | |
175 | if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {\r | |
176 | best_len_left = len;\r | |
177 | if (should_reroot_tree) {\r | |
178 | forest[node_left] = (uint32_t)prev_ix;\r | |
179 | }\r | |
180 | node_left = FN(RightChildIndex)(self, prev_ix);\r | |
181 | prev_ix = forest[node_left];\r | |
182 | } else {\r | |
183 | best_len_right = len;\r | |
184 | if (should_reroot_tree) {\r | |
185 | forest[node_right] = (uint32_t)prev_ix;\r | |
186 | }\r | |
187 | node_right = FN(LeftChildIndex)(self, prev_ix);\r | |
188 | prev_ix = forest[node_right];\r | |
189 | }\r | |
190 | }\r | |
191 | }\r | |
192 | return matches;\r | |
193 | }\r | |
194 | \r | |
195 | /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the\r | |
196 | length of max_length and stores the position cur_ix in the hash table.\r | |
197 | \r | |
198 | Sets *num_matches to the number of matches found, and stores the found\r | |
199 | matches in matches[0] to matches[*num_matches - 1]. The matches will be\r | |
200 | sorted by strictly increasing length and (non-strictly) increasing\r | |
201 | distance. */\r | |
202 | static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,\r | |
203 | const BrotliEncoderDictionary* dictionary, const uint8_t* data,\r | |
204 | const size_t ring_buffer_mask, const size_t cur_ix,\r | |
205 | const size_t max_length, const size_t max_backward, const size_t gap,\r | |
206 | const BrotliEncoderParams* params, BackwardMatch* matches) {\r | |
207 | BackwardMatch* const orig_matches = matches;\r | |
208 | const size_t cur_ix_masked = cur_ix & ring_buffer_mask;\r | |
209 | size_t best_len = 1;\r | |
210 | const size_t short_match_max_backward =\r | |
211 | params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;\r | |
212 | size_t stop = cur_ix - short_match_max_backward;\r | |
213 | uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];\r | |
214 | size_t i;\r | |
215 | if (cur_ix < short_match_max_backward) { stop = 0; }\r | |
216 | for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {\r | |
217 | size_t prev_ix = i;\r | |
218 | const size_t backward = cur_ix - prev_ix;\r | |
219 | if (BROTLI_PREDICT_FALSE(backward > max_backward)) {\r | |
220 | break;\r | |
221 | }\r | |
222 | prev_ix &= ring_buffer_mask;\r | |
223 | if (data[cur_ix_masked] != data[prev_ix] ||\r | |
224 | data[cur_ix_masked + 1] != data[prev_ix + 1]) {\r | |
225 | continue;\r | |
226 | }\r | |
227 | {\r | |
228 | const size_t len =\r | |
229 | FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],\r | |
230 | max_length);\r | |
231 | if (len > best_len) {\r | |
232 | best_len = len;\r | |
233 | InitBackwardMatch(matches++, backward, len);\r | |
234 | }\r | |
235 | }\r | |
236 | }\r | |
237 | if (best_len < max_length) {\r | |
238 | matches = FN(StoreAndFindMatches)(FN(Self)(handle), data, cur_ix,\r | |
239 | ring_buffer_mask, max_length, max_backward, &best_len, matches);\r | |
240 | }\r | |
241 | for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {\r | |
242 | dict_matches[i] = kInvalidMatch;\r | |
243 | }\r | |
244 | {\r | |
245 | size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);\r | |
246 | if (BrotliFindAllStaticDictionaryMatches(dictionary,\r | |
247 | &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {\r | |
248 | size_t maxlen = BROTLI_MIN(\r | |
249 | size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);\r | |
250 | size_t l;\r | |
251 | for (l = minlen; l <= maxlen; ++l) {\r | |
252 | uint32_t dict_id = dict_matches[l];\r | |
253 | if (dict_id < kInvalidMatch) {\r | |
254 | size_t distance = max_backward + gap + (dict_id >> 5) + 1;\r | |
255 | if (distance <= params->dist.max_distance) {\r | |
256 | InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);\r | |
257 | }\r | |
258 | }\r | |
259 | }\r | |
260 | }\r | |
261 | }\r | |
262 | return (size_t)(matches - orig_matches);\r | |
263 | }\r | |
264 | \r | |
265 | /* Stores the hash of the next 4 bytes and re-roots the binary tree at the\r | |
266 | current sequence, without returning any matches.\r | |
267 | REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */\r | |
268 | static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,\r | |
269 | const size_t mask, const size_t ix) {\r | |
270 | HashToBinaryTree* self = FN(Self)(handle);\r | |
271 | /* Maximum distance is window size - 16, see section 9.1. of the spec. */\r | |
272 | const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;\r | |
273 | FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,\r | |
274 | max_backward, NULL, NULL);\r | |
275 | }\r | |
276 | \r | |
277 | static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,\r | |
278 | const uint8_t* data, const size_t mask, const size_t ix_start,\r | |
279 | const size_t ix_end) {\r | |
280 | size_t i = ix_start;\r | |
281 | size_t j = ix_start;\r | |
282 | if (ix_start + 63 <= ix_end) {\r | |
283 | i = ix_end - 63;\r | |
284 | }\r | |
285 | if (ix_start + 512 <= i) {\r | |
286 | for (; j < i; j += 8) {\r | |
287 | FN(Store)(handle, data, mask, j);\r | |
288 | }\r | |
289 | }\r | |
290 | for (; i < ix_end; ++i) {\r | |
291 | FN(Store)(handle, data, mask, i);\r | |
292 | }\r | |
293 | }\r | |
294 | \r | |
295 | static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,\r | |
296 | size_t num_bytes, size_t position, const uint8_t* ringbuffer,\r | |
297 | size_t ringbuffer_mask) {\r | |
298 | HashToBinaryTree* self = FN(Self)(handle);\r | |
299 | if (num_bytes >= FN(HashTypeLength)() - 1 &&\r | |
300 | position >= MAX_TREE_COMP_LENGTH) {\r | |
301 | /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.\r | |
302 | These could not be calculated before, since they require knowledge\r | |
303 | of both the previous and the current block. */\r | |
304 | const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;\r | |
305 | const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);\r | |
306 | size_t i;\r | |
307 | for (i = i_start; i < i_end; ++i) {\r | |
308 | /* Maximum distance is window size - 16, see section 9.1. of the spec.\r | |
309 | Furthermore, we have to make sure that we don't look further back\r | |
310 | from the start of the next block than the window size, otherwise we\r | |
311 | could access already overwritten areas of the ring-buffer. */\r | |
312 | const size_t max_backward =\r | |
313 | self->window_mask_ - BROTLI_MAX(size_t,\r | |
314 | BROTLI_WINDOW_GAP - 1,\r | |
315 | position - i);\r | |
316 | /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the\r | |
317 | end of the current block and that we have at least\r | |
318 | MAX_TREE_COMP_LENGTH tail in the ring-buffer. */\r | |
319 | FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,\r | |
320 | MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);\r | |
321 | }\r | |
322 | }\r | |
323 | }\r | |
324 | \r | |
325 | #undef BUCKET_SIZE\r | |
326 | \r | |
327 | #undef HashToBinaryTree\r |