]> git.proxmox.com Git - mirror_edk2.git/blame - BaseTools/Source/C/BrotliCompress/enc/static_dict.c
BaseTools: Copy Brotli algorithm 3rd party source code for tool
[mirror_edk2.git] / BaseTools / Source / C / BrotliCompress / enc / static_dict.c
CommitLineData
11b7501a
SB
1/* Copyright 2013 Google Inc. All Rights Reserved.\r
2\r
3 Distributed under MIT license.\r
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
5*/\r
6\r
7#include "./static_dict.h"\r
8\r
9#include "../common/dictionary.h"\r
10#include "./find_match_length.h"\r
11#include "./port.h"\r
12#include "./static_dict_lut.h"\r
13\r
14#if defined(__cplusplus) || defined(c_plusplus)\r
15extern "C" {\r
16#endif\r
17\r
18static const uint8_t kUppercaseFirst = 10;\r
19static const uint8_t kOmitLastNTransforms[10] = {\r
20 0, 12, 27, 23, 42, 63, 56, 48, 59, 64,\r
21};\r
22\r
23static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {\r
24 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;\r
25 /* The higher bits contain more mixture from the multiplication,\r
26 so we take our results from there. */\r
27 return h >> (32 - kDictNumBits);\r
28}\r
29\r
30static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,\r
31 uint32_t* matches) {\r
32 uint32_t match = (uint32_t)((distance << 5) + len_code);\r
33 matches[len] = BROTLI_MIN(uint32_t, matches[len], match);\r
34}\r
35\r
36static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,\r
37 size_t id,\r
38 size_t len,\r
39 size_t maxlen) {\r
40 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;\r
41 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,\r
42 BROTLI_MIN(size_t, len, maxlen));\r
43}\r
44\r
45static BROTLI_INLINE BROTLI_BOOL IsMatch(\r
46 DictWord w, const uint8_t* data, size_t max_length) {\r
47 if (w.len > max_length) {\r
48 return BROTLI_FALSE;\r
49 } else {\r
50 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +\r
51 (size_t)w.len * (size_t)w.idx;\r
52 const uint8_t* dict = &kBrotliDictionary[offset];\r
53 if (w.transform == 0) {\r
54 /* Match against base dictionary word. */\r
55 return\r
56 TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);\r
57 } else if (w.transform == 10) {\r
58 /* Match against uppercase first transform.\r
59 Note that there are only ASCII uppercase words in the lookup table. */\r
60 return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&\r
61 (dict[0] ^ 32) == data[0] &&\r
62 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==\r
63 w.len - 1u);\r
64 } else {\r
65 /* Match against uppercase all transform.\r
66 Note that there are only ASCII uppercase words in the lookup table. */\r
67 size_t i;\r
68 for (i = 0; i < w.len; ++i) {\r
69 if (dict[i] >= 'a' && dict[i] <= 'z') {\r
70 if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;\r
71 } else {\r
72 if (dict[i] != data[i]) return BROTLI_FALSE;\r
73 }\r
74 }\r
75 return BROTLI_TRUE;\r
76 }\r
77 }\r
78}\r
79\r
80BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(\r
81 const uint8_t* data, size_t min_length, size_t max_length,\r
82 uint32_t* matches) {\r
83 BROTLI_BOOL has_found_match = BROTLI_FALSE;\r
84 {\r
85 size_t offset = kStaticDictionaryBuckets[Hash(data)];\r
86 BROTLI_BOOL end = !offset;\r
87 while (!end) {\r
88 DictWord w = kStaticDictionaryWords[offset++];\r
89 const size_t l = w.len & 0x7F;\r
90 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];\r
91 const size_t id = w.idx;\r
92 end = !!(w.len & 0x80);\r
93 w.len = (uint8_t)l;\r
94 if (w.transform == 0) {\r
95 const size_t matchlen = DictMatchLength(data, id, l, max_length);\r
96 const uint8_t* s;\r
97 size_t minlen;\r
98 size_t maxlen;\r
99 size_t len;\r
100 /* Transform "" + kIdentity + "" */\r
101 if (matchlen == l) {\r
102 AddMatch(id, l, l, matches);\r
103 has_found_match = BROTLI_TRUE;\r
104 }\r
105 /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */\r
106 if (matchlen >= l - 1) {\r
107 AddMatch(id + 12 * n, l - 1, l, matches);\r
108 if (l + 2 < max_length &&\r
109 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&\r
110 data[l + 2] == ' ') {\r
111 AddMatch(id + 49 * n, l + 3, l, matches);\r
112 }\r
113 has_found_match = BROTLI_TRUE;\r
114 }\r
115 /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */\r
116 minlen = min_length;\r
117 if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);\r
118 maxlen = BROTLI_MIN(size_t, matchlen, l - 2);\r
119 for (len = minlen; len <= maxlen; ++len) {\r
120 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);\r
121 has_found_match = BROTLI_TRUE;\r
122 }\r
123 if (matchlen < l || l + 6 >= max_length) {\r
124 continue;\r
125 }\r
126 s = &data[l];\r
127 /* Transforms "" + kIdentity + <suffix> */\r
128 if (s[0] == ' ') {\r
129 AddMatch(id + n, l + 1, l, matches);\r
130 if (s[1] == 'a') {\r
131 if (s[2] == ' ') {\r
132 AddMatch(id + 28 * n, l + 3, l, matches);\r
133 } else if (s[2] == 's') {\r
134 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);\r
135 } else if (s[2] == 't') {\r
136 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);\r
137 } else if (s[2] == 'n') {\r
138 if (s[3] == 'd' && s[4] == ' ') {\r
139 AddMatch(id + 10 * n, l + 5, l, matches);\r
140 }\r
141 }\r
142 } else if (s[1] == 'b') {\r
143 if (s[2] == 'y' && s[3] == ' ') {\r
144 AddMatch(id + 38 * n, l + 4, l, matches);\r
145 }\r
146 } else if (s[1] == 'i') {\r
147 if (s[2] == 'n') {\r
148 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);\r
149 } else if (s[2] == 's') {\r
150 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);\r
151 }\r
152 } else if (s[1] == 'f') {\r
153 if (s[2] == 'o') {\r
154 if (s[3] == 'r' && s[4] == ' ') {\r
155 AddMatch(id + 25 * n, l + 5, l, matches);\r
156 }\r
157 } else if (s[2] == 'r') {\r
158 if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {\r
159 AddMatch(id + 37 * n, l + 6, l, matches);\r
160 }\r
161 }\r
162 } else if (s[1] == 'o') {\r
163 if (s[2] == 'f') {\r
164 if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);\r
165 } else if (s[2] == 'n') {\r
166 if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);\r
167 }\r
168 } else if (s[1] == 'n') {\r
169 if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {\r
170 AddMatch(id + 80 * n, l + 5, l, matches);\r
171 }\r
172 } else if (s[1] == 't') {\r
173 if (s[2] == 'h') {\r
174 if (s[3] == 'e') {\r
175 if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);\r
176 } else if (s[3] == 'a') {\r
177 if (s[4] == 't' && s[5] == ' ') {\r
178 AddMatch(id + 29 * n, l + 6, l, matches);\r
179 }\r
180 }\r
181 } else if (s[2] == 'o') {\r
182 if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);\r
183 }\r
184 } else if (s[1] == 'w') {\r
185 if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {\r
186 AddMatch(id + 35 * n, l + 6, l, matches);\r
187 }\r
188 }\r
189 } else if (s[0] == '"') {\r
190 AddMatch(id + 19 * n, l + 1, l, matches);\r
191 if (s[1] == '>') {\r
192 AddMatch(id + 21 * n, l + 2, l, matches);\r
193 }\r
194 } else if (s[0] == '.') {\r
195 AddMatch(id + 20 * n, l + 1, l, matches);\r
196 if (s[1] == ' ') {\r
197 AddMatch(id + 31 * n, l + 2, l, matches);\r
198 if (s[2] == 'T' && s[3] == 'h') {\r
199 if (s[4] == 'e') {\r
200 if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);\r
201 } else if (s[4] == 'i') {\r
202 if (s[5] == 's' && s[6] == ' ') {\r
203 AddMatch(id + 75 * n, l + 7, l, matches);\r
204 }\r
205 }\r
206 }\r
207 }\r
208 } else if (s[0] == ',') {\r
209 AddMatch(id + 76 * n, l + 1, l, matches);\r
210 if (s[1] == ' ') {\r
211 AddMatch(id + 14 * n, l + 2, l, matches);\r
212 }\r
213 } else if (s[0] == '\n') {\r
214 AddMatch(id + 22 * n, l + 1, l, matches);\r
215 if (s[1] == '\t') {\r
216 AddMatch(id + 50 * n, l + 2, l, matches);\r
217 }\r
218 } else if (s[0] == ']') {\r
219 AddMatch(id + 24 * n, l + 1, l, matches);\r
220 } else if (s[0] == '\'') {\r
221 AddMatch(id + 36 * n, l + 1, l, matches);\r
222 } else if (s[0] == ':') {\r
223 AddMatch(id + 51 * n, l + 1, l, matches);\r
224 } else if (s[0] == '(') {\r
225 AddMatch(id + 57 * n, l + 1, l, matches);\r
226 } else if (s[0] == '=') {\r
227 if (s[1] == '"') {\r
228 AddMatch(id + 70 * n, l + 2, l, matches);\r
229 } else if (s[1] == '\'') {\r
230 AddMatch(id + 86 * n, l + 2, l, matches);\r
231 }\r
232 } else if (s[0] == 'a') {\r
233 if (s[1] == 'l' && s[2] == ' ') {\r
234 AddMatch(id + 84 * n, l + 3, l, matches);\r
235 }\r
236 } else if (s[0] == 'e') {\r
237 if (s[1] == 'd') {\r
238 if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);\r
239 } else if (s[1] == 'r') {\r
240 if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);\r
241 } else if (s[1] == 's') {\r
242 if (s[2] == 't' && s[3] == ' ') {\r
243 AddMatch(id + 95 * n, l + 4, l, matches);\r
244 }\r
245 }\r
246 } else if (s[0] == 'f') {\r
247 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {\r
248 AddMatch(id + 90 * n, l + 4, l, matches);\r
249 }\r
250 } else if (s[0] == 'i') {\r
251 if (s[1] == 'v') {\r
252 if (s[2] == 'e' && s[3] == ' ') {\r
253 AddMatch(id + 92 * n, l + 4, l, matches);\r
254 }\r
255 } else if (s[1] == 'z') {\r
256 if (s[2] == 'e' && s[3] == ' ') {\r
257 AddMatch(id + 100 * n, l + 4, l, matches);\r
258 }\r
259 }\r
260 } else if (s[0] == 'l') {\r
261 if (s[1] == 'e') {\r
262 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {\r
263 AddMatch(id + 93 * n, l + 5, l, matches);\r
264 }\r
265 } else if (s[1] == 'y') {\r
266 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);\r
267 }\r
268 } else if (s[0] == 'o') {\r
269 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {\r
270 AddMatch(id + 106 * n, l + 4, l, matches);\r
271 }\r
272 }\r
273 } else {\r
274 /* Set is_all_caps=0 for kUppercaseFirst and\r
275 is_all_caps=1 otherwise (kUppercaseAll) transform. */\r
276 const BROTLI_BOOL is_all_caps =\r
277 TO_BROTLI_BOOL(w.transform != kUppercaseFirst);\r
278 const uint8_t* s;\r
279 if (!IsMatch(w, data, max_length)) {\r
280 continue;\r
281 }\r
282 /* Transform "" + kUppercase{First,All} + "" */\r
283 AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);\r
284 has_found_match = BROTLI_TRUE;\r
285 if (l + 1 >= max_length) {\r
286 continue;\r
287 }\r
288 /* Transforms "" + kUppercase{First,All} + <suffix> */\r
289 s = &data[l];\r
290 if (s[0] == ' ') {\r
291 AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);\r
292 } else if (s[0] == '"') {\r
293 AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);\r
294 if (s[1] == '>') {\r
295 AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);\r
296 }\r
297 } else if (s[0] == '.') {\r
298 AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);\r
299 if (s[1] == ' ') {\r
300 AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);\r
301 }\r
302 } else if (s[0] == ',') {\r
303 AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);\r
304 if (s[1] == ' ') {\r
305 AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);\r
306 }\r
307 } else if (s[0] == '\'') {\r
308 AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);\r
309 } else if (s[0] == '(') {\r
310 AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);\r
311 } else if (s[0] == '=') {\r
312 if (s[1] == '"') {\r
313 AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);\r
314 } else if (s[1] == '\'') {\r
315 AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);\r
316 }\r
317 }\r
318 }\r
319 }\r
320 }\r
321 /* Transforms with prefixes " " and "." */\r
322 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {\r
323 BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');\r
324 size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];\r
325 BROTLI_BOOL end = !offset;\r
326 while (!end) {\r
327 DictWord w = kStaticDictionaryWords[offset++];\r
328 const size_t l = w.len & 0x7F;\r
329 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];\r
330 const size_t id = w.idx;\r
331 end = !!(w.len & 0x80);\r
332 w.len = (uint8_t)l;\r
333 if (w.transform == 0) {\r
334 const uint8_t* s;\r
335 if (!IsMatch(w, &data[1], max_length - 1)) {\r
336 continue;\r
337 }\r
338 /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */\r
339 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);\r
340 has_found_match = BROTLI_TRUE;\r
341 if (l + 2 >= max_length) {\r
342 continue;\r
343 }\r
344 /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>\r
345 */\r
346 s = &data[l + 1];\r
347 if (s[0] == ' ') {\r
348 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);\r
349 } else if (s[0] == '(') {\r
350 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);\r
351 } else if (is_space) {\r
352 if (s[0] == ',') {\r
353 AddMatch(id + 103 * n, l + 2, l, matches);\r
354 if (s[1] == ' ') {\r
355 AddMatch(id + 33 * n, l + 3, l, matches);\r
356 }\r
357 } else if (s[0] == '.') {\r
358 AddMatch(id + 71 * n, l + 2, l, matches);\r
359 if (s[1] == ' ') {\r
360 AddMatch(id + 52 * n, l + 3, l, matches);\r
361 }\r
362 } else if (s[0] == '=') {\r
363 if (s[1] == '"') {\r
364 AddMatch(id + 81 * n, l + 3, l, matches);\r
365 } else if (s[1] == '\'') {\r
366 AddMatch(id + 98 * n, l + 3, l, matches);\r
367 }\r
368 }\r
369 }\r
370 } else if (is_space) {\r
371 /* Set is_all_caps=0 for kUppercaseFirst and\r
372 is_all_caps=1 otherwise (kUppercaseAll) transform. */\r
373 const BROTLI_BOOL is_all_caps =\r
374 TO_BROTLI_BOOL(w.transform != kUppercaseFirst);\r
375 const uint8_t* s;\r
376 if (!IsMatch(w, &data[1], max_length - 1)) {\r
377 continue;\r
378 }\r
379 /* Transforms " " + kUppercase{First,All} + "" */\r
380 AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);\r
381 has_found_match = BROTLI_TRUE;\r
382 if (l + 2 >= max_length) {\r
383 continue;\r
384 }\r
385 /* Transforms " " + kUppercase{First,All} + <suffix> */\r
386 s = &data[l + 1];\r
387 if (s[0] == ' ') {\r
388 AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);\r
389 } else if (s[0] == ',') {\r
390 if (!is_all_caps) {\r
391 AddMatch(id + 109 * n, l + 2, l, matches);\r
392 }\r
393 if (s[1] == ' ') {\r
394 AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);\r
395 }\r
396 } else if (s[0] == '.') {\r
397 AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);\r
398 if (s[1] == ' ') {\r
399 AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);\r
400 }\r
401 } else if (s[0] == '=') {\r
402 if (s[1] == '"') {\r
403 AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);\r
404 } else if (s[1] == '\'') {\r
405 AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);\r
406 }\r
407 }\r
408 }\r
409 }\r
410 }\r
411 if (max_length >= 6) {\r
412 /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */\r
413 if ((data[1] == ' ' &&\r
414 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||\r
415 (data[0] == 0xc2 && data[1] == 0xa0)) {\r
416 size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];\r
417 BROTLI_BOOL end = !offset;\r
418 while (!end) {\r
419 DictWord w = kStaticDictionaryWords[offset++];\r
420 const size_t l = w.len & 0x7F;\r
421 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];\r
422 const size_t id = w.idx;\r
423 end = !!(w.len & 0x80);\r
424 w.len = (uint8_t)l;\r
425 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {\r
426 if (data[0] == 0xc2) {\r
427 AddMatch(id + 102 * n, l + 2, l, matches);\r
428 has_found_match = BROTLI_TRUE;\r
429 } else if (l + 2 < max_length && data[l + 2] == ' ') {\r
430 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);\r
431 AddMatch(id + t * n, l + 3, l, matches);\r
432 has_found_match = BROTLI_TRUE;\r
433 }\r
434 }\r
435 }\r
436 }\r
437 }\r
438 if (max_length >= 9) {\r
439 /* Transforms with prefixes " the " and ".com/" */\r
440 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&\r
441 data[3] == 'e' && data[4] == ' ') ||\r
442 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&\r
443 data[3] == 'm' && data[4] == '/')) {\r
444 size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];\r
445 BROTLI_BOOL end = !offset;\r
446 while (!end) {\r
447 DictWord w = kStaticDictionaryWords[offset++];\r
448 const size_t l = w.len & 0x7F;\r
449 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];\r
450 const size_t id = w.idx;\r
451 end = !!(w.len & 0x80);\r
452 w.len = (uint8_t)l;\r
453 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {\r
454 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);\r
455 has_found_match = BROTLI_TRUE;\r
456 if (l + 5 < max_length) {\r
457 const uint8_t* s = &data[l + 5];\r
458 if (data[0] == ' ') {\r
459 if (l + 8 < max_length &&\r
460 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {\r
461 AddMatch(id + 62 * n, l + 9, l, matches);\r
462 if (l + 12 < max_length &&\r
463 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {\r
464 AddMatch(id + 73 * n, l + 13, l, matches);\r
465 }\r
466 }\r
467 }\r
468 }\r
469 }\r
470 }\r
471 }\r
472 }\r
473 return has_found_match;\r
474}\r
475\r
476#if defined(__cplusplus) || defined(c_plusplus)\r
477} /* extern "C" */\r
478#endif\r