]>
Commit | Line | Data |
---|---|---|
11b7501a SB |
1 | /* NOLINT(build/header_guard) */\r |
2 | /* Copyright 2016 Google Inc. All Rights Reserved.\r | |
3 | \r | |
4 | Distributed under MIT license.\r | |
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
6 | */\r | |
7 | \r | |
8 | /* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,\r | |
9 | NUM_LAST_DISTANCES_TO_CHECK */\r | |
10 | \r | |
11 | /* A (forgetful) hash table to the data seen by the compressor, to\r | |
12 | help create backward references to previous data.\r | |
13 | \r | |
14 | Hashes are stored in chains which are bucketed to groups. Group of chains\r | |
15 | share a storage "bank". When more than "bank size" chain nodes are added,\r | |
16 | oldest nodes are replaced; this way several chains may share a tail. */\r | |
17 | \r | |
18 | #define HashForgetfulChain HASHER()\r | |
19 | \r | |
20 | #define BANK_SIZE (1 << BANK_BITS)\r | |
21 | \r | |
22 | /* Number of hash buckets. */\r | |
23 | #define BUCKET_SIZE (1 << BUCKET_BITS)\r | |
24 | \r | |
25 | #define CAPPED_CHAINS 0\r | |
26 | \r | |
27 | static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }\r | |
28 | static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }\r | |
29 | \r | |
30 | /* HashBytes is the function that chooses the bucket to place the address in.*/\r | |
31 | static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) {\r | |
32 | const uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;\r | |
33 | /* The higher bits contain more mixture from the multiplication,\r | |
34 | so we take our results from there. */\r | |
35 | return h >> (32 - BUCKET_BITS);\r | |
36 | }\r | |
37 | \r | |
38 | typedef struct FN(Slot) {\r | |
39 | uint16_t delta;\r | |
40 | uint16_t next;\r | |
41 | } FN(Slot);\r | |
42 | \r | |
43 | typedef struct FN(Bank) {\r | |
44 | FN(Slot) slots[BANK_SIZE];\r | |
45 | } FN(Bank);\r | |
46 | \r | |
47 | typedef struct HashForgetfulChain {\r | |
48 | uint32_t addr[BUCKET_SIZE];\r | |
49 | uint16_t head[BUCKET_SIZE];\r | |
50 | /* Truncated hash used for quick rejection of "distance cache" candidates. */\r | |
51 | uint8_t tiny_hash[65536];\r | |
52 | FN(Bank) banks[NUM_BANKS];\r | |
53 | uint16_t free_slot_idx[NUM_BANKS];\r | |
54 | BROTLI_BOOL is_dirty_;\r | |
55 | DictionarySearchStatictics dict_search_stats_;\r | |
56 | size_t max_hops;\r | |
57 | } HashForgetfulChain;\r | |
58 | \r | |
59 | static void FN(Reset)(HashForgetfulChain* self) {\r | |
60 | self->is_dirty_ = BROTLI_TRUE;\r | |
61 | DictionarySearchStaticticsReset(&self->dict_search_stats_);\r | |
62 | }\r | |
63 | \r | |
64 | static void FN(InitEmpty)(HashForgetfulChain* self) {\r | |
65 | if (self->is_dirty_) {\r | |
66 | /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position\r | |
67 | processed by hasher never reaches 3GB + 64M; this makes all new chains\r | |
68 | to be terminated after the first node. */\r | |
69 | memset(self->addr, 0xCC, sizeof(self->addr));\r | |
70 | memset(self->head, 0, sizeof(self->head));\r | |
71 | memset(self->tiny_hash, 0, sizeof(self->tiny_hash));\r | |
72 | memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));\r | |
73 | self->is_dirty_ = BROTLI_FALSE;\r | |
74 | }\r | |
75 | }\r | |
76 | \r | |
77 | static void FN(InitForData)(HashForgetfulChain* self, const uint8_t* data,\r | |
78 | size_t num) {\r | |
79 | size_t i;\r | |
80 | for (i = 0; i < num; ++i) {\r | |
81 | size_t bucket = FN(HashBytes)(&data[i]);\r | |
82 | /* See InitEmpty comment. */\r | |
83 | self->addr[bucket] = 0xCCCCCCCC;\r | |
84 | self->head[bucket] = 0xCCCC;\r | |
85 | }\r | |
86 | memset(self->tiny_hash, 0, sizeof(self->tiny_hash));\r | |
87 | memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));\r | |
88 | if (num != 0) {\r | |
89 | self->is_dirty_ = BROTLI_FALSE;\r | |
90 | }\r | |
91 | }\r | |
92 | \r | |
93 | static void FN(Init)(\r | |
94 | MemoryManager* m, HashForgetfulChain* self, const uint8_t* data,\r | |
95 | const BrotliEncoderParams* params, size_t position, size_t bytes,\r | |
96 | BROTLI_BOOL is_last) {\r | |
97 | /* Choose which init method is faster.\r | |
98 | Init() is about 100 times faster than InitForData(). */\r | |
99 | const size_t kMaxBytesForPartialHashInit = BUCKET_SIZE >> 6;\r | |
100 | BROTLI_UNUSED(m);\r | |
101 | self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);\r | |
102 | if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {\r | |
103 | FN(InitForData)(self, data, bytes);\r | |
104 | } else {\r | |
105 | FN(InitEmpty)(self);\r | |
106 | }\r | |
107 | }\r | |
108 | \r | |
109 | /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend\r | |
110 | node to corresponding chain; also update tiny_hash for current position. */\r | |
111 | static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,\r | |
112 | const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {\r | |
113 | const size_t key = FN(HashBytes)(&data[ix & mask]);\r | |
114 | const size_t bank = key & (NUM_BANKS - 1);\r | |
115 | const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);\r | |
116 | size_t delta = ix - self->addr[key];\r | |
117 | self->tiny_hash[(uint16_t)ix] = (uint8_t)key;\r | |
118 | if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;\r | |
119 | self->banks[bank].slots[idx].delta = (uint16_t)delta;\r | |
120 | self->banks[bank].slots[idx].next = self->head[key];\r | |
121 | self->addr[key] = (uint32_t)ix;\r | |
122 | self->head[key] = (uint16_t)idx;\r | |
123 | }\r | |
124 | \r | |
125 | static BROTLI_INLINE void FN(StoreRange)(HashForgetfulChain* self,\r | |
126 | const uint8_t *data, const size_t mask, const size_t ix_start,\r | |
127 | const size_t ix_end) {\r | |
128 | size_t i;\r | |
129 | for (i = ix_start; i < ix_end; ++i) {\r | |
130 | FN(Store)(self, data, mask, i);\r | |
131 | }\r | |
132 | }\r | |
133 | \r | |
134 | static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashForgetfulChain* self,\r | |
135 | size_t num_bytes, size_t position, const uint8_t* ringbuffer,\r | |
136 | size_t ring_buffer_mask) {\r | |
137 | if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {\r | |
138 | /* Prepare the hashes for three last bytes of the last write.\r | |
139 | These could not be calculated before, since they require knowledge\r | |
140 | of both the previous and the current block. */\r | |
141 | FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);\r | |
142 | FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);\r | |
143 | FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);\r | |
144 | }\r | |
145 | }\r | |
146 | \r | |
147 | /* Find a longest backward match of &data[cur_ix] up to the length of\r | |
148 | max_length and stores the position cur_ix in the hash table.\r | |
149 | \r | |
150 | Does not look for matches longer than max_length.\r | |
151 | Does not look for matches further away than max_backward.\r | |
152 | Writes the best match into |out|.\r | |
153 | Returns 1 when match is found, otherwise 0. */\r | |
154 | static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(\r | |
155 | HashForgetfulChain* self, const uint8_t* BROTLI_RESTRICT data,\r | |
156 | const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,\r | |
157 | const size_t cur_ix, const size_t max_length, const size_t max_backward,\r | |
158 | HasherSearchResult* BROTLI_RESTRICT out) {\r | |
159 | const size_t cur_ix_masked = cur_ix & ring_buffer_mask;\r | |
160 | BROTLI_BOOL is_match_found = BROTLI_FALSE;\r | |
161 | /* Don't accept a short copy from far away. */\r | |
162 | score_t best_score = out->score;\r | |
163 | size_t best_len = out->len;\r | |
164 | size_t i;\r | |
165 | const size_t key = FN(HashBytes)(&data[cur_ix_masked]);\r | |
166 | const uint8_t tiny_hash = (uint8_t)(key);\r | |
167 | out->len = 0;\r | |
168 | out->len_x_code = 0;\r | |
169 | /* Try last distance first. */\r | |
170 | for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {\r | |
171 | const size_t idx = kDistanceCacheIndex[i];\r | |
172 | const size_t backward =\r | |
173 | (size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);\r | |
174 | size_t prev_ix = (cur_ix - backward);\r | |
175 | if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;\r | |
176 | if (prev_ix >= cur_ix || backward > max_backward) {\r | |
177 | continue;\r | |
178 | }\r | |
179 | prev_ix &= ring_buffer_mask;\r | |
180 | {\r | |
181 | const size_t len = FindMatchLengthWithLimit(&data[prev_ix],\r | |
182 | &data[cur_ix_masked],\r | |
183 | max_length);\r | |
184 | if (len >= 2) {\r | |
185 | score_t score = BackwardReferenceScoreUsingLastDistance(len, i);\r | |
186 | if (best_score < score) {\r | |
187 | best_score = score;\r | |
188 | best_len = len;\r | |
189 | out->len = best_len;\r | |
190 | out->distance = backward;\r | |
191 | out->score = best_score;\r | |
192 | is_match_found = BROTLI_TRUE;\r | |
193 | }\r | |
194 | }\r | |
195 | }\r | |
196 | }\r | |
197 | {\r | |
198 | const size_t bank = key & (NUM_BANKS - 1);\r | |
199 | size_t backward = 0;\r | |
200 | size_t hops = self->max_hops;\r | |
201 | size_t delta = cur_ix - self->addr[key];\r | |
202 | size_t slot = self->head[key];\r | |
203 | while (hops--) {\r | |
204 | size_t prev_ix;\r | |
205 | size_t last = slot;\r | |
206 | backward += delta;\r | |
207 | if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;\r | |
208 | prev_ix = (cur_ix - backward) & ring_buffer_mask;\r | |
209 | slot = self->banks[bank].slots[last].next;\r | |
210 | delta = self->banks[bank].slots[last].delta;\r | |
211 | if (cur_ix_masked + best_len > ring_buffer_mask ||\r | |
212 | prev_ix + best_len > ring_buffer_mask ||\r | |
213 | data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {\r | |
214 | continue;\r | |
215 | }\r | |
216 | {\r | |
217 | const size_t len = FindMatchLengthWithLimit(&data[prev_ix],\r | |
218 | &data[cur_ix_masked],\r | |
219 | max_length);\r | |
220 | if (len >= 4) {\r | |
221 | /* Comparing for >= 3 does not change the semantics, but just saves\r | |
222 | for a few unnecessary binary logarithms in backward reference\r | |
223 | score, since we are not interested in such short matches. */\r | |
224 | score_t score = BackwardReferenceScore(len, backward);\r | |
225 | if (best_score < score) {\r | |
226 | best_score = score;\r | |
227 | best_len = len;\r | |
228 | out->len = best_len;\r | |
229 | out->distance = backward;\r | |
230 | out->score = best_score;\r | |
231 | is_match_found = BROTLI_TRUE;\r | |
232 | }\r | |
233 | }\r | |
234 | }\r | |
235 | }\r | |
236 | FN(Store)(self, data, ring_buffer_mask, cur_ix);\r | |
237 | }\r | |
238 | if (!is_match_found) {\r | |
239 | is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,\r | |
240 | &data[cur_ix_masked], max_length, max_backward, out, BROTLI_FALSE);\r | |
241 | }\r | |
242 | return is_match_found;\r | |
243 | }\r | |
244 | \r | |
245 | #undef BANK_SIZE\r | |
246 | #undef BUCKET_SIZE\r | |
247 | #undef CAPPED_CHAINS\r | |
248 | \r | |
249 | #undef HashForgetfulChain\r |