]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/C/BrotliCompress/enc/static_dict.c
BaseTools: Copy Brotli algorithm 3rd party source code for tool
[mirror_edk2.git] / BaseTools / Source / C / BrotliCompress / enc / static_dict.c
1 /* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 #include "./static_dict.h"
8
9 #include "../common/dictionary.h"
10 #include "./find_match_length.h"
11 #include "./port.h"
12 #include "./static_dict_lut.h"
13
14 #if defined(__cplusplus) || defined(c_plusplus)
15 extern "C" {
16 #endif
17
18 static const uint8_t kUppercaseFirst = 10;
19 static const uint8_t kOmitLastNTransforms[10] = {
20 0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
21 };
22
23 static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
24 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
25 /* The higher bits contain more mixture from the multiplication,
26 so we take our results from there. */
27 return h >> (32 - kDictNumBits);
28 }
29
30 static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
31 uint32_t* matches) {
32 uint32_t match = (uint32_t)((distance << 5) + len_code);
33 matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
34 }
35
36 static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
37 size_t id,
38 size_t len,
39 size_t maxlen) {
40 const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
41 return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
42 BROTLI_MIN(size_t, len, maxlen));
43 }
44
45 static BROTLI_INLINE BROTLI_BOOL IsMatch(
46 DictWord w, const uint8_t* data, size_t max_length) {
47 if (w.len > max_length) {
48 return BROTLI_FALSE;
49 } else {
50 const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
51 (size_t)w.len * (size_t)w.idx;
52 const uint8_t* dict = &kBrotliDictionary[offset];
53 if (w.transform == 0) {
54 /* Match against base dictionary word. */
55 return
56 TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
57 } else if (w.transform == 10) {
58 /* Match against uppercase first transform.
59 Note that there are only ASCII uppercase words in the lookup table. */
60 return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
61 (dict[0] ^ 32) == data[0] &&
62 FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
63 w.len - 1u);
64 } else {
65 /* Match against uppercase all transform.
66 Note that there are only ASCII uppercase words in the lookup table. */
67 size_t i;
68 for (i = 0; i < w.len; ++i) {
69 if (dict[i] >= 'a' && dict[i] <= 'z') {
70 if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
71 } else {
72 if (dict[i] != data[i]) return BROTLI_FALSE;
73 }
74 }
75 return BROTLI_TRUE;
76 }
77 }
78 }
79
80 BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
81 const uint8_t* data, size_t min_length, size_t max_length,
82 uint32_t* matches) {
83 BROTLI_BOOL has_found_match = BROTLI_FALSE;
84 {
85 size_t offset = kStaticDictionaryBuckets[Hash(data)];
86 BROTLI_BOOL end = !offset;
87 while (!end) {
88 DictWord w = kStaticDictionaryWords[offset++];
89 const size_t l = w.len & 0x7F;
90 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
91 const size_t id = w.idx;
92 end = !!(w.len & 0x80);
93 w.len = (uint8_t)l;
94 if (w.transform == 0) {
95 const size_t matchlen = DictMatchLength(data, id, l, max_length);
96 const uint8_t* s;
97 size_t minlen;
98 size_t maxlen;
99 size_t len;
100 /* Transform "" + kIdentity + "" */
101 if (matchlen == l) {
102 AddMatch(id, l, l, matches);
103 has_found_match = BROTLI_TRUE;
104 }
105 /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
106 if (matchlen >= l - 1) {
107 AddMatch(id + 12 * n, l - 1, l, matches);
108 if (l + 2 < max_length &&
109 data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
110 data[l + 2] == ' ') {
111 AddMatch(id + 49 * n, l + 3, l, matches);
112 }
113 has_found_match = BROTLI_TRUE;
114 }
115 /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
116 minlen = min_length;
117 if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
118 maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
119 for (len = minlen; len <= maxlen; ++len) {
120 AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
121 has_found_match = BROTLI_TRUE;
122 }
123 if (matchlen < l || l + 6 >= max_length) {
124 continue;
125 }
126 s = &data[l];
127 /* Transforms "" + kIdentity + <suffix> */
128 if (s[0] == ' ') {
129 AddMatch(id + n, l + 1, l, matches);
130 if (s[1] == 'a') {
131 if (s[2] == ' ') {
132 AddMatch(id + 28 * n, l + 3, l, matches);
133 } else if (s[2] == 's') {
134 if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
135 } else if (s[2] == 't') {
136 if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
137 } else if (s[2] == 'n') {
138 if (s[3] == 'd' && s[4] == ' ') {
139 AddMatch(id + 10 * n, l + 5, l, matches);
140 }
141 }
142 } else if (s[1] == 'b') {
143 if (s[2] == 'y' && s[3] == ' ') {
144 AddMatch(id + 38 * n, l + 4, l, matches);
145 }
146 } else if (s[1] == 'i') {
147 if (s[2] == 'n') {
148 if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
149 } else if (s[2] == 's') {
150 if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
151 }
152 } else if (s[1] == 'f') {
153 if (s[2] == 'o') {
154 if (s[3] == 'r' && s[4] == ' ') {
155 AddMatch(id + 25 * n, l + 5, l, matches);
156 }
157 } else if (s[2] == 'r') {
158 if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
159 AddMatch(id + 37 * n, l + 6, l, matches);
160 }
161 }
162 } else if (s[1] == 'o') {
163 if (s[2] == 'f') {
164 if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
165 } else if (s[2] == 'n') {
166 if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
167 }
168 } else if (s[1] == 'n') {
169 if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
170 AddMatch(id + 80 * n, l + 5, l, matches);
171 }
172 } else if (s[1] == 't') {
173 if (s[2] == 'h') {
174 if (s[3] == 'e') {
175 if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
176 } else if (s[3] == 'a') {
177 if (s[4] == 't' && s[5] == ' ') {
178 AddMatch(id + 29 * n, l + 6, l, matches);
179 }
180 }
181 } else if (s[2] == 'o') {
182 if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
183 }
184 } else if (s[1] == 'w') {
185 if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
186 AddMatch(id + 35 * n, l + 6, l, matches);
187 }
188 }
189 } else if (s[0] == '"') {
190 AddMatch(id + 19 * n, l + 1, l, matches);
191 if (s[1] == '>') {
192 AddMatch(id + 21 * n, l + 2, l, matches);
193 }
194 } else if (s[0] == '.') {
195 AddMatch(id + 20 * n, l + 1, l, matches);
196 if (s[1] == ' ') {
197 AddMatch(id + 31 * n, l + 2, l, matches);
198 if (s[2] == 'T' && s[3] == 'h') {
199 if (s[4] == 'e') {
200 if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
201 } else if (s[4] == 'i') {
202 if (s[5] == 's' && s[6] == ' ') {
203 AddMatch(id + 75 * n, l + 7, l, matches);
204 }
205 }
206 }
207 }
208 } else if (s[0] == ',') {
209 AddMatch(id + 76 * n, l + 1, l, matches);
210 if (s[1] == ' ') {
211 AddMatch(id + 14 * n, l + 2, l, matches);
212 }
213 } else if (s[0] == '\n') {
214 AddMatch(id + 22 * n, l + 1, l, matches);
215 if (s[1] == '\t') {
216 AddMatch(id + 50 * n, l + 2, l, matches);
217 }
218 } else if (s[0] == ']') {
219 AddMatch(id + 24 * n, l + 1, l, matches);
220 } else if (s[0] == '\'') {
221 AddMatch(id + 36 * n, l + 1, l, matches);
222 } else if (s[0] == ':') {
223 AddMatch(id + 51 * n, l + 1, l, matches);
224 } else if (s[0] == '(') {
225 AddMatch(id + 57 * n, l + 1, l, matches);
226 } else if (s[0] == '=') {
227 if (s[1] == '"') {
228 AddMatch(id + 70 * n, l + 2, l, matches);
229 } else if (s[1] == '\'') {
230 AddMatch(id + 86 * n, l + 2, l, matches);
231 }
232 } else if (s[0] == 'a') {
233 if (s[1] == 'l' && s[2] == ' ') {
234 AddMatch(id + 84 * n, l + 3, l, matches);
235 }
236 } else if (s[0] == 'e') {
237 if (s[1] == 'd') {
238 if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
239 } else if (s[1] == 'r') {
240 if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
241 } else if (s[1] == 's') {
242 if (s[2] == 't' && s[3] == ' ') {
243 AddMatch(id + 95 * n, l + 4, l, matches);
244 }
245 }
246 } else if (s[0] == 'f') {
247 if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
248 AddMatch(id + 90 * n, l + 4, l, matches);
249 }
250 } else if (s[0] == 'i') {
251 if (s[1] == 'v') {
252 if (s[2] == 'e' && s[3] == ' ') {
253 AddMatch(id + 92 * n, l + 4, l, matches);
254 }
255 } else if (s[1] == 'z') {
256 if (s[2] == 'e' && s[3] == ' ') {
257 AddMatch(id + 100 * n, l + 4, l, matches);
258 }
259 }
260 } else if (s[0] == 'l') {
261 if (s[1] == 'e') {
262 if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
263 AddMatch(id + 93 * n, l + 5, l, matches);
264 }
265 } else if (s[1] == 'y') {
266 if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
267 }
268 } else if (s[0] == 'o') {
269 if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
270 AddMatch(id + 106 * n, l + 4, l, matches);
271 }
272 }
273 } else {
274 /* Set is_all_caps=0 for kUppercaseFirst and
275 is_all_caps=1 otherwise (kUppercaseAll) transform. */
276 const BROTLI_BOOL is_all_caps =
277 TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
278 const uint8_t* s;
279 if (!IsMatch(w, data, max_length)) {
280 continue;
281 }
282 /* Transform "" + kUppercase{First,All} + "" */
283 AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
284 has_found_match = BROTLI_TRUE;
285 if (l + 1 >= max_length) {
286 continue;
287 }
288 /* Transforms "" + kUppercase{First,All} + <suffix> */
289 s = &data[l];
290 if (s[0] == ' ') {
291 AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
292 } else if (s[0] == '"') {
293 AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
294 if (s[1] == '>') {
295 AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
296 }
297 } else if (s[0] == '.') {
298 AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
299 if (s[1] == ' ') {
300 AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
301 }
302 } else if (s[0] == ',') {
303 AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
304 if (s[1] == ' ') {
305 AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
306 }
307 } else if (s[0] == '\'') {
308 AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
309 } else if (s[0] == '(') {
310 AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
311 } else if (s[0] == '=') {
312 if (s[1] == '"') {
313 AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
314 } else if (s[1] == '\'') {
315 AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
316 }
317 }
318 }
319 }
320 }
321 /* Transforms with prefixes " " and "." */
322 if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
323 BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
324 size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];
325 BROTLI_BOOL end = !offset;
326 while (!end) {
327 DictWord w = kStaticDictionaryWords[offset++];
328 const size_t l = w.len & 0x7F;
329 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
330 const size_t id = w.idx;
331 end = !!(w.len & 0x80);
332 w.len = (uint8_t)l;
333 if (w.transform == 0) {
334 const uint8_t* s;
335 if (!IsMatch(w, &data[1], max_length - 1)) {
336 continue;
337 }
338 /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
339 AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
340 has_found_match = BROTLI_TRUE;
341 if (l + 2 >= max_length) {
342 continue;
343 }
344 /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
345 */
346 s = &data[l + 1];
347 if (s[0] == ' ') {
348 AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
349 } else if (s[0] == '(') {
350 AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
351 } else if (is_space) {
352 if (s[0] == ',') {
353 AddMatch(id + 103 * n, l + 2, l, matches);
354 if (s[1] == ' ') {
355 AddMatch(id + 33 * n, l + 3, l, matches);
356 }
357 } else if (s[0] == '.') {
358 AddMatch(id + 71 * n, l + 2, l, matches);
359 if (s[1] == ' ') {
360 AddMatch(id + 52 * n, l + 3, l, matches);
361 }
362 } else if (s[0] == '=') {
363 if (s[1] == '"') {
364 AddMatch(id + 81 * n, l + 3, l, matches);
365 } else if (s[1] == '\'') {
366 AddMatch(id + 98 * n, l + 3, l, matches);
367 }
368 }
369 }
370 } else if (is_space) {
371 /* Set is_all_caps=0 for kUppercaseFirst and
372 is_all_caps=1 otherwise (kUppercaseAll) transform. */
373 const BROTLI_BOOL is_all_caps =
374 TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
375 const uint8_t* s;
376 if (!IsMatch(w, &data[1], max_length - 1)) {
377 continue;
378 }
379 /* Transforms " " + kUppercase{First,All} + "" */
380 AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
381 has_found_match = BROTLI_TRUE;
382 if (l + 2 >= max_length) {
383 continue;
384 }
385 /* Transforms " " + kUppercase{First,All} + <suffix> */
386 s = &data[l + 1];
387 if (s[0] == ' ') {
388 AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
389 } else if (s[0] == ',') {
390 if (!is_all_caps) {
391 AddMatch(id + 109 * n, l + 2, l, matches);
392 }
393 if (s[1] == ' ') {
394 AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
395 }
396 } else if (s[0] == '.') {
397 AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
398 if (s[1] == ' ') {
399 AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
400 }
401 } else if (s[0] == '=') {
402 if (s[1] == '"') {
403 AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
404 } else if (s[1] == '\'') {
405 AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
406 }
407 }
408 }
409 }
410 }
411 if (max_length >= 6) {
412 /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
413 if ((data[1] == ' ' &&
414 (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
415 (data[0] == 0xc2 && data[1] == 0xa0)) {
416 size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];
417 BROTLI_BOOL end = !offset;
418 while (!end) {
419 DictWord w = kStaticDictionaryWords[offset++];
420 const size_t l = w.len & 0x7F;
421 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
422 const size_t id = w.idx;
423 end = !!(w.len & 0x80);
424 w.len = (uint8_t)l;
425 if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
426 if (data[0] == 0xc2) {
427 AddMatch(id + 102 * n, l + 2, l, matches);
428 has_found_match = BROTLI_TRUE;
429 } else if (l + 2 < max_length && data[l + 2] == ' ') {
430 size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
431 AddMatch(id + t * n, l + 3, l, matches);
432 has_found_match = BROTLI_TRUE;
433 }
434 }
435 }
436 }
437 }
438 if (max_length >= 9) {
439 /* Transforms with prefixes " the " and ".com/" */
440 if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
441 data[3] == 'e' && data[4] == ' ') ||
442 (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
443 data[3] == 'm' && data[4] == '/')) {
444 size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];
445 BROTLI_BOOL end = !offset;
446 while (!end) {
447 DictWord w = kStaticDictionaryWords[offset++];
448 const size_t l = w.len & 0x7F;
449 const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
450 const size_t id = w.idx;
451 end = !!(w.len & 0x80);
452 w.len = (uint8_t)l;
453 if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
454 AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
455 has_found_match = BROTLI_TRUE;
456 if (l + 5 < max_length) {
457 const uint8_t* s = &data[l + 5];
458 if (data[0] == ' ') {
459 if (l + 8 < max_length &&
460 s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
461 AddMatch(id + 62 * n, l + 9, l, matches);
462 if (l + 12 < max_length &&
463 s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
464 AddMatch(id + 73 * n, l + 13, l, matches);
465 }
466 }
467 }
468 }
469 }
470 }
471 }
472 }
473 return has_found_match;
474 }
475
476 #if defined(__cplusplus) || defined(c_plusplus)
477 } /* extern "C" */
478 #endif