]>
Commit | Line | Data |
---|---|---|
11b7501a SB |
1 | /* Copyright 2013 Google Inc. All Rights Reserved.\r |
2 | \r | |
3 | Distributed under MIT license.\r | |
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
5 | */\r | |
6 | \r | |
7 | #include "./static_dict.h"\r | |
8 | \r | |
9 | #include "../common/dictionary.h"\r | |
dd4f667e LG |
10 | #include "../common/platform.h"\r |
11 | #include "../common/transform.h"\r | |
12 | #include "./encoder_dict.h"\r | |
11b7501a | 13 | #include "./find_match_length.h"\r |
11b7501a SB |
14 | \r |
15 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
16 | extern "C" {\r | |
17 | #endif\r | |
18 | \r | |
dd4f667e LG |
19 | static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {\r |
20 | uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;\r | |
11b7501a SB |
21 | /* The higher bits contain more mixture from the multiplication,\r |
22 | so we take our results from there. */\r | |
23 | return h >> (32 - kDictNumBits);\r | |
24 | }\r | |
25 | \r | |
26 | static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,\r | |
27 | uint32_t* matches) {\r | |
28 | uint32_t match = (uint32_t)((distance << 5) + len_code);\r | |
29 | matches[len] = BROTLI_MIN(uint32_t, matches[len], match);\r | |
30 | }\r | |
31 | \r | |
dd4f667e LG |
32 | static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,\r |
33 | const uint8_t* data,\r | |
11b7501a SB |
34 | size_t id,\r |
35 | size_t len,\r | |
36 | size_t maxlen) {\r | |
dd4f667e LG |
37 | const size_t offset = dictionary->offsets_by_length[len] + len * id;\r |
38 | return FindMatchLengthWithLimit(&dictionary->data[offset], data,\r | |
11b7501a SB |
39 | BROTLI_MIN(size_t, len, maxlen));\r |
40 | }\r | |
41 | \r | |
dd4f667e | 42 | static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,\r |
11b7501a SB |
43 | DictWord w, const uint8_t* data, size_t max_length) {\r |
44 | if (w.len > max_length) {\r | |
45 | return BROTLI_FALSE;\r | |
46 | } else {\r | |
dd4f667e | 47 | const size_t offset = dictionary->offsets_by_length[w.len] +\r |
11b7501a | 48 | (size_t)w.len * (size_t)w.idx;\r |
dd4f667e | 49 | const uint8_t* dict = &dictionary->data[offset];\r |
11b7501a SB |
50 | if (w.transform == 0) {\r |
51 | /* Match against base dictionary word. */\r | |
52 | return\r | |
53 | TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);\r | |
54 | } else if (w.transform == 10) {\r | |
55 | /* Match against uppercase first transform.\r | |
56 | Note that there are only ASCII uppercase words in the lookup table. */\r | |
57 | return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&\r | |
58 | (dict[0] ^ 32) == data[0] &&\r | |
59 | FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==\r | |
60 | w.len - 1u);\r | |
61 | } else {\r | |
62 | /* Match against uppercase all transform.\r | |
63 | Note that there are only ASCII uppercase words in the lookup table. */\r | |
64 | size_t i;\r | |
65 | for (i = 0; i < w.len; ++i) {\r | |
66 | if (dict[i] >= 'a' && dict[i] <= 'z') {\r | |
67 | if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;\r | |
68 | } else {\r | |
69 | if (dict[i] != data[i]) return BROTLI_FALSE;\r | |
70 | }\r | |
71 | }\r | |
72 | return BROTLI_TRUE;\r | |
73 | }\r | |
74 | }\r | |
75 | }\r | |
76 | \r | |
77 | BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(\r | |
dd4f667e LG |
78 | const BrotliEncoderDictionary* dictionary, const uint8_t* data,\r |
79 | size_t min_length, size_t max_length, uint32_t* matches) {\r | |
11b7501a SB |
80 | BROTLI_BOOL has_found_match = BROTLI_FALSE;\r |
81 | {\r | |
dd4f667e | 82 | size_t offset = dictionary->buckets[Hash(data)];\r |
11b7501a SB |
83 | BROTLI_BOOL end = !offset;\r |
84 | while (!end) {\r | |
dd4f667e LG |
85 | DictWord w = dictionary->dict_words[offset++];\r |
86 | const size_t l = w.len & 0x1F;\r | |
87 | const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];\r | |
11b7501a SB |
88 | const size_t id = w.idx;\r |
89 | end = !!(w.len & 0x80);\r | |
90 | w.len = (uint8_t)l;\r | |
91 | if (w.transform == 0) {\r | |
dd4f667e LG |
92 | const size_t matchlen =\r |
93 | DictMatchLength(dictionary->words, data, id, l, max_length);\r | |
11b7501a SB |
94 | const uint8_t* s;\r |
95 | size_t minlen;\r | |
96 | size_t maxlen;\r | |
97 | size_t len;\r | |
dd4f667e | 98 | /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */\r |
11b7501a SB |
99 | if (matchlen == l) {\r |
100 | AddMatch(id, l, l, matches);\r | |
101 | has_found_match = BROTLI_TRUE;\r | |
102 | }\r | |
dd4f667e LG |
103 | /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and\r |
104 | "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */\r | |
11b7501a SB |
105 | if (matchlen >= l - 1) {\r |
106 | AddMatch(id + 12 * n, l - 1, l, matches);\r | |
107 | if (l + 2 < max_length &&\r | |
108 | data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&\r | |
109 | data[l + 2] == ' ') {\r | |
110 | AddMatch(id + 49 * n, l + 3, l, matches);\r | |
111 | }\r | |
112 | has_found_match = BROTLI_TRUE;\r | |
113 | }\r | |
dd4f667e | 114 | /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */\r |
11b7501a SB |
115 | minlen = min_length;\r |
116 | if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);\r | |
117 | maxlen = BROTLI_MIN(size_t, matchlen, l - 2);\r | |
118 | for (len = minlen; len <= maxlen; ++len) {\r | |
dd4f667e LG |
119 | size_t cut = l - len;\r |
120 | size_t transform_id = (cut << 2) +\r | |
121 | (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);\r | |
122 | AddMatch(id + transform_id * n, len, l, matches);\r | |
11b7501a SB |
123 | has_found_match = BROTLI_TRUE;\r |
124 | }\r | |
125 | if (matchlen < l || l + 6 >= max_length) {\r | |
126 | continue;\r | |
127 | }\r | |
128 | s = &data[l];\r | |
dd4f667e | 129 | /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */\r |
11b7501a SB |
130 | if (s[0] == ' ') {\r |
131 | AddMatch(id + n, l + 1, l, matches);\r | |
132 | if (s[1] == 'a') {\r | |
133 | if (s[2] == ' ') {\r | |
134 | AddMatch(id + 28 * n, l + 3, l, matches);\r | |
135 | } else if (s[2] == 's') {\r | |
136 | if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);\r | |
137 | } else if (s[2] == 't') {\r | |
138 | if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);\r | |
139 | } else if (s[2] == 'n') {\r | |
140 | if (s[3] == 'd' && s[4] == ' ') {\r | |
141 | AddMatch(id + 10 * n, l + 5, l, matches);\r | |
142 | }\r | |
143 | }\r | |
144 | } else if (s[1] == 'b') {\r | |
145 | if (s[2] == 'y' && s[3] == ' ') {\r | |
146 | AddMatch(id + 38 * n, l + 4, l, matches);\r | |
147 | }\r | |
148 | } else if (s[1] == 'i') {\r | |
149 | if (s[2] == 'n') {\r | |
150 | if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);\r | |
151 | } else if (s[2] == 's') {\r | |
152 | if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);\r | |
153 | }\r | |
154 | } else if (s[1] == 'f') {\r | |
155 | if (s[2] == 'o') {\r | |
156 | if (s[3] == 'r' && s[4] == ' ') {\r | |
157 | AddMatch(id + 25 * n, l + 5, l, matches);\r | |
158 | }\r | |
159 | } else if (s[2] == 'r') {\r | |
160 | if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {\r | |
161 | AddMatch(id + 37 * n, l + 6, l, matches);\r | |
162 | }\r | |
163 | }\r | |
164 | } else if (s[1] == 'o') {\r | |
165 | if (s[2] == 'f') {\r | |
166 | if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);\r | |
167 | } else if (s[2] == 'n') {\r | |
168 | if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);\r | |
169 | }\r | |
170 | } else if (s[1] == 'n') {\r | |
171 | if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {\r | |
172 | AddMatch(id + 80 * n, l + 5, l, matches);\r | |
173 | }\r | |
174 | } else if (s[1] == 't') {\r | |
175 | if (s[2] == 'h') {\r | |
176 | if (s[3] == 'e') {\r | |
177 | if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);\r | |
178 | } else if (s[3] == 'a') {\r | |
179 | if (s[4] == 't' && s[5] == ' ') {\r | |
180 | AddMatch(id + 29 * n, l + 6, l, matches);\r | |
181 | }\r | |
182 | }\r | |
183 | } else if (s[2] == 'o') {\r | |
184 | if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);\r | |
185 | }\r | |
186 | } else if (s[1] == 'w') {\r | |
187 | if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {\r | |
188 | AddMatch(id + 35 * n, l + 6, l, matches);\r | |
189 | }\r | |
190 | }\r | |
191 | } else if (s[0] == '"') {\r | |
192 | AddMatch(id + 19 * n, l + 1, l, matches);\r | |
193 | if (s[1] == '>') {\r | |
194 | AddMatch(id + 21 * n, l + 2, l, matches);\r | |
195 | }\r | |
196 | } else if (s[0] == '.') {\r | |
197 | AddMatch(id + 20 * n, l + 1, l, matches);\r | |
198 | if (s[1] == ' ') {\r | |
199 | AddMatch(id + 31 * n, l + 2, l, matches);\r | |
200 | if (s[2] == 'T' && s[3] == 'h') {\r | |
201 | if (s[4] == 'e') {\r | |
202 | if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);\r | |
203 | } else if (s[4] == 'i') {\r | |
204 | if (s[5] == 's' && s[6] == ' ') {\r | |
205 | AddMatch(id + 75 * n, l + 7, l, matches);\r | |
206 | }\r | |
207 | }\r | |
208 | }\r | |
209 | }\r | |
210 | } else if (s[0] == ',') {\r | |
211 | AddMatch(id + 76 * n, l + 1, l, matches);\r | |
212 | if (s[1] == ' ') {\r | |
213 | AddMatch(id + 14 * n, l + 2, l, matches);\r | |
214 | }\r | |
215 | } else if (s[0] == '\n') {\r | |
216 | AddMatch(id + 22 * n, l + 1, l, matches);\r | |
217 | if (s[1] == '\t') {\r | |
218 | AddMatch(id + 50 * n, l + 2, l, matches);\r | |
219 | }\r | |
220 | } else if (s[0] == ']') {\r | |
221 | AddMatch(id + 24 * n, l + 1, l, matches);\r | |
222 | } else if (s[0] == '\'') {\r | |
223 | AddMatch(id + 36 * n, l + 1, l, matches);\r | |
224 | } else if (s[0] == ':') {\r | |
225 | AddMatch(id + 51 * n, l + 1, l, matches);\r | |
226 | } else if (s[0] == '(') {\r | |
227 | AddMatch(id + 57 * n, l + 1, l, matches);\r | |
228 | } else if (s[0] == '=') {\r | |
229 | if (s[1] == '"') {\r | |
230 | AddMatch(id + 70 * n, l + 2, l, matches);\r | |
231 | } else if (s[1] == '\'') {\r | |
232 | AddMatch(id + 86 * n, l + 2, l, matches);\r | |
233 | }\r | |
234 | } else if (s[0] == 'a') {\r | |
235 | if (s[1] == 'l' && s[2] == ' ') {\r | |
236 | AddMatch(id + 84 * n, l + 3, l, matches);\r | |
237 | }\r | |
238 | } else if (s[0] == 'e') {\r | |
239 | if (s[1] == 'd') {\r | |
240 | if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);\r | |
241 | } else if (s[1] == 'r') {\r | |
242 | if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);\r | |
243 | } else if (s[1] == 's') {\r | |
244 | if (s[2] == 't' && s[3] == ' ') {\r | |
245 | AddMatch(id + 95 * n, l + 4, l, matches);\r | |
246 | }\r | |
247 | }\r | |
248 | } else if (s[0] == 'f') {\r | |
249 | if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {\r | |
250 | AddMatch(id + 90 * n, l + 4, l, matches);\r | |
251 | }\r | |
252 | } else if (s[0] == 'i') {\r | |
253 | if (s[1] == 'v') {\r | |
254 | if (s[2] == 'e' && s[3] == ' ') {\r | |
255 | AddMatch(id + 92 * n, l + 4, l, matches);\r | |
256 | }\r | |
257 | } else if (s[1] == 'z') {\r | |
258 | if (s[2] == 'e' && s[3] == ' ') {\r | |
259 | AddMatch(id + 100 * n, l + 4, l, matches);\r | |
260 | }\r | |
261 | }\r | |
262 | } else if (s[0] == 'l') {\r | |
263 | if (s[1] == 'e') {\r | |
264 | if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {\r | |
265 | AddMatch(id + 93 * n, l + 5, l, matches);\r | |
266 | }\r | |
267 | } else if (s[1] == 'y') {\r | |
268 | if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);\r | |
269 | }\r | |
270 | } else if (s[0] == 'o') {\r | |
271 | if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {\r | |
272 | AddMatch(id + 106 * n, l + 4, l, matches);\r | |
273 | }\r | |
274 | }\r | |
275 | } else {\r | |
dd4f667e LG |
276 | /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and\r |
277 | is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)\r | |
278 | transform. */\r | |
11b7501a | 279 | const BROTLI_BOOL is_all_caps =\r |
dd4f667e | 280 | TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);\r |
11b7501a | 281 | const uint8_t* s;\r |
dd4f667e | 282 | if (!IsMatch(dictionary->words, w, data, max_length)) {\r |
11b7501a SB |
283 | continue;\r |
284 | }\r | |
285 | /* Transform "" + kUppercase{First,All} + "" */\r | |
286 | AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);\r | |
287 | has_found_match = BROTLI_TRUE;\r | |
288 | if (l + 1 >= max_length) {\r | |
289 | continue;\r | |
290 | }\r | |
291 | /* Transforms "" + kUppercase{First,All} + <suffix> */\r | |
292 | s = &data[l];\r | |
293 | if (s[0] == ' ') {\r | |
294 | AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);\r | |
295 | } else if (s[0] == '"') {\r | |
296 | AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);\r | |
297 | if (s[1] == '>') {\r | |
298 | AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);\r | |
299 | }\r | |
300 | } else if (s[0] == '.') {\r | |
301 | AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);\r | |
302 | if (s[1] == ' ') {\r | |
303 | AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);\r | |
304 | }\r | |
305 | } else if (s[0] == ',') {\r | |
306 | AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);\r | |
307 | if (s[1] == ' ') {\r | |
308 | AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);\r | |
309 | }\r | |
310 | } else if (s[0] == '\'') {\r | |
311 | AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);\r | |
312 | } else if (s[0] == '(') {\r | |
313 | AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);\r | |
314 | } else if (s[0] == '=') {\r | |
315 | if (s[1] == '"') {\r | |
316 | AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);\r | |
317 | } else if (s[1] == '\'') {\r | |
318 | AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);\r | |
319 | }\r | |
320 | }\r | |
321 | }\r | |
322 | }\r | |
323 | }\r | |
324 | /* Transforms with prefixes " " and "." */\r | |
325 | if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {\r | |
326 | BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');\r | |
dd4f667e | 327 | size_t offset = dictionary->buckets[Hash(&data[1])];\r |
11b7501a SB |
328 | BROTLI_BOOL end = !offset;\r |
329 | while (!end) {\r | |
dd4f667e LG |
330 | DictWord w = dictionary->dict_words[offset++];\r |
331 | const size_t l = w.len & 0x1F;\r | |
332 | const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];\r | |
11b7501a SB |
333 | const size_t id = w.idx;\r |
334 | end = !!(w.len & 0x80);\r | |
335 | w.len = (uint8_t)l;\r | |
336 | if (w.transform == 0) {\r | |
337 | const uint8_t* s;\r | |
dd4f667e | 338 | if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {\r |
11b7501a SB |
339 | continue;\r |
340 | }\r | |
dd4f667e LG |
341 | /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and\r |
342 | "." + BROTLI_TRANSFORM_IDENTITY + "" */\r | |
11b7501a SB |
343 | AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);\r |
344 | has_found_match = BROTLI_TRUE;\r | |
345 | if (l + 2 >= max_length) {\r | |
346 | continue;\r | |
347 | }\r | |
dd4f667e LG |
348 | /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and\r |
349 | "." + BROTLI_TRANSFORM_IDENTITY + <suffix>\r | |
11b7501a SB |
350 | */\r |
351 | s = &data[l + 1];\r | |
352 | if (s[0] == ' ') {\r | |
353 | AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);\r | |
354 | } else if (s[0] == '(') {\r | |
355 | AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);\r | |
356 | } else if (is_space) {\r | |
357 | if (s[0] == ',') {\r | |
358 | AddMatch(id + 103 * n, l + 2, l, matches);\r | |
359 | if (s[1] == ' ') {\r | |
360 | AddMatch(id + 33 * n, l + 3, l, matches);\r | |
361 | }\r | |
362 | } else if (s[0] == '.') {\r | |
363 | AddMatch(id + 71 * n, l + 2, l, matches);\r | |
364 | if (s[1] == ' ') {\r | |
365 | AddMatch(id + 52 * n, l + 3, l, matches);\r | |
366 | }\r | |
367 | } else if (s[0] == '=') {\r | |
368 | if (s[1] == '"') {\r | |
369 | AddMatch(id + 81 * n, l + 3, l, matches);\r | |
370 | } else if (s[1] == '\'') {\r | |
371 | AddMatch(id + 98 * n, l + 3, l, matches);\r | |
372 | }\r | |
373 | }\r | |
374 | }\r | |
375 | } else if (is_space) {\r | |
dd4f667e LG |
376 | /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and\r |
377 | is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)\r | |
378 | transform. */\r | |
11b7501a | 379 | const BROTLI_BOOL is_all_caps =\r |
dd4f667e | 380 | TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);\r |
11b7501a | 381 | const uint8_t* s;\r |
dd4f667e | 382 | if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {\r |
11b7501a SB |
383 | continue;\r |
384 | }\r | |
385 | /* Transforms " " + kUppercase{First,All} + "" */\r | |
386 | AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);\r | |
387 | has_found_match = BROTLI_TRUE;\r | |
388 | if (l + 2 >= max_length) {\r | |
389 | continue;\r | |
390 | }\r | |
391 | /* Transforms " " + kUppercase{First,All} + <suffix> */\r | |
392 | s = &data[l + 1];\r | |
393 | if (s[0] == ' ') {\r | |
394 | AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);\r | |
395 | } else if (s[0] == ',') {\r | |
396 | if (!is_all_caps) {\r | |
397 | AddMatch(id + 109 * n, l + 2, l, matches);\r | |
398 | }\r | |
399 | if (s[1] == ' ') {\r | |
400 | AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);\r | |
401 | }\r | |
402 | } else if (s[0] == '.') {\r | |
403 | AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);\r | |
404 | if (s[1] == ' ') {\r | |
405 | AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);\r | |
406 | }\r | |
407 | } else if (s[0] == '=') {\r | |
408 | if (s[1] == '"') {\r | |
409 | AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);\r | |
410 | } else if (s[1] == '\'') {\r | |
411 | AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);\r | |
412 | }\r | |
413 | }\r | |
414 | }\r | |
415 | }\r | |
416 | }\r | |
417 | if (max_length >= 6) {\r | |
dd4f667e | 418 | /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */\r |
11b7501a SB |
419 | if ((data[1] == ' ' &&\r |
420 | (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||\r | |
dd4f667e LG |
421 | (data[0] == 0xC2 && data[1] == 0xA0)) {\r |
422 | size_t offset = dictionary->buckets[Hash(&data[2])];\r | |
11b7501a SB |
423 | BROTLI_BOOL end = !offset;\r |
424 | while (!end) {\r | |
dd4f667e LG |
425 | DictWord w = dictionary->dict_words[offset++];\r |
426 | const size_t l = w.len & 0x1F;\r | |
427 | const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];\r | |
11b7501a SB |
428 | const size_t id = w.idx;\r |
429 | end = !!(w.len & 0x80);\r | |
430 | w.len = (uint8_t)l;\r | |
dd4f667e LG |
431 | if (w.transform == 0 &&\r |
432 | IsMatch(dictionary->words, w, &data[2], max_length - 2)) {\r | |
433 | if (data[0] == 0xC2) {\r | |
11b7501a SB |
434 | AddMatch(id + 102 * n, l + 2, l, matches);\r |
435 | has_found_match = BROTLI_TRUE;\r | |
436 | } else if (l + 2 < max_length && data[l + 2] == ' ') {\r | |
437 | size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);\r | |
438 | AddMatch(id + t * n, l + 3, l, matches);\r | |
439 | has_found_match = BROTLI_TRUE;\r | |
440 | }\r | |
441 | }\r | |
442 | }\r | |
443 | }\r | |
444 | }\r | |
445 | if (max_length >= 9) {\r | |
446 | /* Transforms with prefixes " the " and ".com/" */\r | |
447 | if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&\r | |
448 | data[3] == 'e' && data[4] == ' ') ||\r | |
449 | (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&\r | |
450 | data[3] == 'm' && data[4] == '/')) {\r | |
dd4f667e | 451 | size_t offset = dictionary->buckets[Hash(&data[5])];\r |
11b7501a SB |
452 | BROTLI_BOOL end = !offset;\r |
453 | while (!end) {\r | |
dd4f667e LG |
454 | DictWord w = dictionary->dict_words[offset++];\r |
455 | const size_t l = w.len & 0x1F;\r | |
456 | const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];\r | |
11b7501a SB |
457 | const size_t id = w.idx;\r |
458 | end = !!(w.len & 0x80);\r | |
459 | w.len = (uint8_t)l;\r | |
dd4f667e LG |
460 | if (w.transform == 0 &&\r |
461 | IsMatch(dictionary->words, w, &data[5], max_length - 5)) {\r | |
11b7501a SB |
462 | AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);\r |
463 | has_found_match = BROTLI_TRUE;\r | |
464 | if (l + 5 < max_length) {\r | |
465 | const uint8_t* s = &data[l + 5];\r | |
466 | if (data[0] == ' ') {\r | |
467 | if (l + 8 < max_length &&\r | |
468 | s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {\r | |
469 | AddMatch(id + 62 * n, l + 9, l, matches);\r | |
470 | if (l + 12 < max_length &&\r | |
471 | s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {\r | |
472 | AddMatch(id + 73 * n, l + 13, l, matches);\r | |
473 | }\r | |
474 | }\r | |
475 | }\r | |
476 | }\r | |
477 | }\r | |
478 | }\r | |
479 | }\r | |
480 | }\r | |
481 | return has_found_match;\r | |
482 | }\r | |
483 | \r | |
484 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
485 | } /* extern "C" */\r | |
486 | #endif\r |