]> git.proxmox.com Git - mirror_edk2.git/blame - BaseTools/Source/C/BrotliCompress/enc/hash_forgetful_chain_inc.h
BaseTools: Copy Brotli algorithm 3rd party source code for tool
[mirror_edk2.git] / BaseTools / Source / C / BrotliCompress / enc / hash_forgetful_chain_inc.h
CommitLineData
11b7501a
SB
1/* NOLINT(build/header_guard) */\r
2/* Copyright 2016 Google Inc. All Rights Reserved.\r
3\r
4 Distributed under MIT license.\r
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
6*/\r
7\r
8/* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,\r
9 NUM_LAST_DISTANCES_TO_CHECK */\r
10\r
11/* A (forgetful) hash table to the data seen by the compressor, to\r
12 help create backward references to previous data.\r
13\r
14 Hashes are stored in chains which are bucketed to groups. Group of chains\r
15 share a storage "bank". When more than "bank size" chain nodes are added,\r
16 oldest nodes are replaced; this way several chains may share a tail. */\r
17\r
18#define HashForgetfulChain HASHER()\r
19\r
20#define BANK_SIZE (1 << BANK_BITS)\r
21\r
22/* Number of hash buckets. */\r
23#define BUCKET_SIZE (1 << BUCKET_BITS)\r
24\r
25#define CAPPED_CHAINS 0\r
26\r
27static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }\r
28static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }\r
29\r
30/* HashBytes is the function that chooses the bucket to place the address in.*/\r
31static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) {\r
32 const uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;\r
33 /* The higher bits contain more mixture from the multiplication,\r
34 so we take our results from there. */\r
35 return h >> (32 - BUCKET_BITS);\r
36}\r
37\r
38typedef struct FN(Slot) {\r
39 uint16_t delta;\r
40 uint16_t next;\r
41} FN(Slot);\r
42\r
43typedef struct FN(Bank) {\r
44 FN(Slot) slots[BANK_SIZE];\r
45} FN(Bank);\r
46\r
47typedef struct HashForgetfulChain {\r
48 uint32_t addr[BUCKET_SIZE];\r
49 uint16_t head[BUCKET_SIZE];\r
50 /* Truncated hash used for quick rejection of "distance cache" candidates. */\r
51 uint8_t tiny_hash[65536];\r
52 FN(Bank) banks[NUM_BANKS];\r
53 uint16_t free_slot_idx[NUM_BANKS];\r
54 BROTLI_BOOL is_dirty_;\r
55 DictionarySearchStatictics dict_search_stats_;\r
56 size_t max_hops;\r
57} HashForgetfulChain;\r
58\r
59static void FN(Reset)(HashForgetfulChain* self) {\r
60 self->is_dirty_ = BROTLI_TRUE;\r
61 DictionarySearchStaticticsReset(&self->dict_search_stats_);\r
62}\r
63\r
64static void FN(InitEmpty)(HashForgetfulChain* self) {\r
65 if (self->is_dirty_) {\r
66 /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position\r
67 processed by hasher never reaches 3GB + 64M; this makes all new chains\r
68 to be terminated after the first node. */\r
69 memset(self->addr, 0xCC, sizeof(self->addr));\r
70 memset(self->head, 0, sizeof(self->head));\r
71 memset(self->tiny_hash, 0, sizeof(self->tiny_hash));\r
72 memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));\r
73 self->is_dirty_ = BROTLI_FALSE;\r
74 }\r
75}\r
76\r
77static void FN(InitForData)(HashForgetfulChain* self, const uint8_t* data,\r
78 size_t num) {\r
79 size_t i;\r
80 for (i = 0; i < num; ++i) {\r
81 size_t bucket = FN(HashBytes)(&data[i]);\r
82 /* See InitEmpty comment. */\r
83 self->addr[bucket] = 0xCCCCCCCC;\r
84 self->head[bucket] = 0xCCCC;\r
85 }\r
86 memset(self->tiny_hash, 0, sizeof(self->tiny_hash));\r
87 memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));\r
88 if (num != 0) {\r
89 self->is_dirty_ = BROTLI_FALSE;\r
90 }\r
91}\r
92\r
93static void FN(Init)(\r
94 MemoryManager* m, HashForgetfulChain* self, const uint8_t* data,\r
95 const BrotliEncoderParams* params, size_t position, size_t bytes,\r
96 BROTLI_BOOL is_last) {\r
97 /* Choose which init method is faster.\r
98 Init() is about 100 times faster than InitForData(). */\r
99 const size_t kMaxBytesForPartialHashInit = BUCKET_SIZE >> 6;\r
100 BROTLI_UNUSED(m);\r
101 self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);\r
102 if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {\r
103 FN(InitForData)(self, data, bytes);\r
104 } else {\r
105 FN(InitEmpty)(self);\r
106 }\r
107}\r
108\r
109/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend\r
110 node to corresponding chain; also update tiny_hash for current position. */\r
111static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,\r
112 const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {\r
113 const size_t key = FN(HashBytes)(&data[ix & mask]);\r
114 const size_t bank = key & (NUM_BANKS - 1);\r
115 const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);\r
116 size_t delta = ix - self->addr[key];\r
117 self->tiny_hash[(uint16_t)ix] = (uint8_t)key;\r
118 if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;\r
119 self->banks[bank].slots[idx].delta = (uint16_t)delta;\r
120 self->banks[bank].slots[idx].next = self->head[key];\r
121 self->addr[key] = (uint32_t)ix;\r
122 self->head[key] = (uint16_t)idx;\r
123}\r
124\r
125static BROTLI_INLINE void FN(StoreRange)(HashForgetfulChain* self,\r
126 const uint8_t *data, const size_t mask, const size_t ix_start,\r
127 const size_t ix_end) {\r
128 size_t i;\r
129 for (i = ix_start; i < ix_end; ++i) {\r
130 FN(Store)(self, data, mask, i);\r
131 }\r
132}\r
133\r
134static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashForgetfulChain* self,\r
135 size_t num_bytes, size_t position, const uint8_t* ringbuffer,\r
136 size_t ring_buffer_mask) {\r
137 if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {\r
138 /* Prepare the hashes for three last bytes of the last write.\r
139 These could not be calculated before, since they require knowledge\r
140 of both the previous and the current block. */\r
141 FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);\r
142 FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);\r
143 FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);\r
144 }\r
145}\r
146\r
147/* Find a longest backward match of &data[cur_ix] up to the length of\r
148 max_length and stores the position cur_ix in the hash table.\r
149\r
150 Does not look for matches longer than max_length.\r
151 Does not look for matches further away than max_backward.\r
152 Writes the best match into |out|.\r
153 Returns 1 when match is found, otherwise 0. */\r
154static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(\r
155 HashForgetfulChain* self, const uint8_t* BROTLI_RESTRICT data,\r
156 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,\r
157 const size_t cur_ix, const size_t max_length, const size_t max_backward,\r
158 HasherSearchResult* BROTLI_RESTRICT out) {\r
159 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;\r
160 BROTLI_BOOL is_match_found = BROTLI_FALSE;\r
161 /* Don't accept a short copy from far away. */\r
162 score_t best_score = out->score;\r
163 size_t best_len = out->len;\r
164 size_t i;\r
165 const size_t key = FN(HashBytes)(&data[cur_ix_masked]);\r
166 const uint8_t tiny_hash = (uint8_t)(key);\r
167 out->len = 0;\r
168 out->len_x_code = 0;\r
169 /* Try last distance first. */\r
170 for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {\r
171 const size_t idx = kDistanceCacheIndex[i];\r
172 const size_t backward =\r
173 (size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);\r
174 size_t prev_ix = (cur_ix - backward);\r
175 if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;\r
176 if (prev_ix >= cur_ix || backward > max_backward) {\r
177 continue;\r
178 }\r
179 prev_ix &= ring_buffer_mask;\r
180 {\r
181 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],\r
182 &data[cur_ix_masked],\r
183 max_length);\r
184 if (len >= 2) {\r
185 score_t score = BackwardReferenceScoreUsingLastDistance(len, i);\r
186 if (best_score < score) {\r
187 best_score = score;\r
188 best_len = len;\r
189 out->len = best_len;\r
190 out->distance = backward;\r
191 out->score = best_score;\r
192 is_match_found = BROTLI_TRUE;\r
193 }\r
194 }\r
195 }\r
196 }\r
197 {\r
198 const size_t bank = key & (NUM_BANKS - 1);\r
199 size_t backward = 0;\r
200 size_t hops = self->max_hops;\r
201 size_t delta = cur_ix - self->addr[key];\r
202 size_t slot = self->head[key];\r
203 while (hops--) {\r
204 size_t prev_ix;\r
205 size_t last = slot;\r
206 backward += delta;\r
207 if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;\r
208 prev_ix = (cur_ix - backward) & ring_buffer_mask;\r
209 slot = self->banks[bank].slots[last].next;\r
210 delta = self->banks[bank].slots[last].delta;\r
211 if (cur_ix_masked + best_len > ring_buffer_mask ||\r
212 prev_ix + best_len > ring_buffer_mask ||\r
213 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {\r
214 continue;\r
215 }\r
216 {\r
217 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],\r
218 &data[cur_ix_masked],\r
219 max_length);\r
220 if (len >= 4) {\r
221 /* Comparing for >= 3 does not change the semantics, but just saves\r
222 for a few unnecessary binary logarithms in backward reference\r
223 score, since we are not interested in such short matches. */\r
224 score_t score = BackwardReferenceScore(len, backward);\r
225 if (best_score < score) {\r
226 best_score = score;\r
227 best_len = len;\r
228 out->len = best_len;\r
229 out->distance = backward;\r
230 out->score = best_score;\r
231 is_match_found = BROTLI_TRUE;\r
232 }\r
233 }\r
234 }\r
235 }\r
236 FN(Store)(self, data, ring_buffer_mask, cur_ix);\r
237 }\r
238 if (!is_match_found) {\r
239 is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,\r
240 &data[cur_ix_masked], max_length, max_backward, out, BROTLI_FALSE);\r
241 }\r
242 return is_match_found;\r
243}\r
244\r
245#undef BANK_SIZE\r
246#undef BUCKET_SIZE\r
247#undef CAPPED_CHAINS\r
248\r
249#undef HashForgetfulChain\r