]>
Commit | Line | Data |
---|---|---|
36ff6d80 SB |
1 | /* Copyright 2013 Google Inc. All Rights Reserved.\r |
2 | \r | |
3 | Distributed under MIT license.\r | |
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r | |
5 | */\r | |
6 | \r | |
7 | /* Transformations on dictionary words. */\r | |
8 | \r | |
9 | #ifndef BROTLI_DEC_TRANSFORM_H_\r | |
10 | #define BROTLI_DEC_TRANSFORM_H_\r | |
11 | \r | |
12 | #include "../common/types.h"\r | |
13 | #include "./port.h"\r | |
14 | \r | |
15 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
16 | extern "C" {\r | |
17 | #endif\r | |
18 | \r | |
19 | enum WordTransformType {\r | |
20 | kIdentity = 0,\r | |
21 | kOmitLast1 = 1,\r | |
22 | kOmitLast2 = 2,\r | |
23 | kOmitLast3 = 3,\r | |
24 | kOmitLast4 = 4,\r | |
25 | kOmitLast5 = 5,\r | |
26 | kOmitLast6 = 6,\r | |
27 | kOmitLast7 = 7,\r | |
28 | kOmitLast8 = 8,\r | |
29 | kOmitLast9 = 9,\r | |
30 | kUppercaseFirst = 10,\r | |
31 | kUppercaseAll = 11,\r | |
32 | kOmitFirst1 = 12,\r | |
33 | kOmitFirst2 = 13,\r | |
34 | kOmitFirst3 = 14,\r | |
35 | kOmitFirst4 = 15,\r | |
36 | kOmitFirst5 = 16,\r | |
37 | kOmitFirst6 = 17,\r | |
38 | kOmitFirst7 = 18,\r | |
39 | kOmitFirst8 = 19,\r | |
40 | kOmitFirst9 = 20\r | |
41 | };\r | |
42 | \r | |
43 | typedef struct {\r | |
44 | const uint8_t prefix_id;\r | |
45 | const uint8_t transform;\r | |
46 | const uint8_t suffix_id;\r | |
47 | } Transform;\r | |
48 | \r | |
49 | static const char kPrefixSuffix[208] =\r | |
50 | "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"\r | |
51 | " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"\r | |
52 | " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"\r | |
53 | " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";\r | |
54 | \r | |
55 | enum {\r | |
56 | /* EMPTY = ""\r | |
57 | SP = " "\r | |
58 | DQUOT = "\""\r | |
59 | SQUOT = "'"\r | |
60 | CLOSEBR = "]"\r | |
61 | OPEN = "("\r | |
62 | SLASH = "/"\r | |
63 | NBSP = non-breaking space "\0xc2\xa0"\r | |
64 | */\r | |
65 | kPFix_EMPTY = 0,\r | |
66 | kPFix_SP = 1,\r | |
67 | kPFix_COMMASP = 3,\r | |
68 | kPFix_SPofSPtheSP = 6,\r | |
69 | kPFix_SPtheSP = 9,\r | |
70 | kPFix_eSP = 12,\r | |
71 | kPFix_SPofSP = 15,\r | |
72 | kPFix_sSP = 20,\r | |
73 | kPFix_DOT = 23,\r | |
74 | kPFix_SPandSP = 25,\r | |
75 | kPFix_SPinSP = 31,\r | |
76 | kPFix_DQUOT = 36,\r | |
77 | kPFix_SPtoSP = 38,\r | |
78 | kPFix_DQUOTGT = 43,\r | |
79 | kPFix_NEWLINE = 46,\r | |
80 | kPFix_DOTSP = 48,\r | |
81 | kPFix_CLOSEBR = 51,\r | |
82 | kPFix_SPforSP = 53,\r | |
83 | kPFix_SPaSP = 59,\r | |
84 | kPFix_SPthatSP = 63,\r | |
85 | kPFix_SQUOT = 70,\r | |
86 | kPFix_SPwithSP = 72,\r | |
87 | kPFix_SPfromSP = 79,\r | |
88 | kPFix_SPbySP = 86,\r | |
89 | kPFix_OPEN = 91,\r | |
90 | kPFix_DOTSPTheSP = 93,\r | |
91 | kPFix_SPonSP = 100,\r | |
92 | kPFix_SPasSP = 105,\r | |
93 | kPFix_SPisSP = 110,\r | |
94 | kPFix_ingSP = 115,\r | |
95 | kPFix_NEWLINETAB = 120,\r | |
96 | kPFix_COLON = 123,\r | |
97 | kPFix_edSP = 125,\r | |
98 | kPFix_EQDQUOT = 129,\r | |
99 | kPFix_SPatSP = 132,\r | |
100 | kPFix_lySP = 137,\r | |
101 | kPFix_COMMA = 141,\r | |
102 | kPFix_EQSQUOT = 143,\r | |
103 | kPFix_DOTcomSLASH = 146,\r | |
104 | kPFix_DOTSPThisSP = 152,\r | |
105 | kPFix_SPnotSP = 160,\r | |
106 | kPFix_erSP = 166,\r | |
107 | kPFix_alSP = 170,\r | |
108 | kPFix_fulSP = 174,\r | |
109 | kPFix_iveSP = 179,\r | |
110 | kPFix_lessSP = 184,\r | |
111 | kPFix_estSP = 190,\r | |
112 | kPFix_izeSP = 195,\r | |
113 | kPFix_NBSP = 200,\r | |
114 | kPFix_ousSP = 203\r | |
115 | };\r | |
116 | \r | |
117 | static const Transform kTransforms[] = {\r | |
118 | { kPFix_EMPTY, kIdentity, kPFix_EMPTY },\r | |
119 | { kPFix_EMPTY, kIdentity, kPFix_SP },\r | |
120 | { kPFix_SP, kIdentity, kPFix_SP },\r | |
121 | { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },\r | |
122 | { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },\r | |
123 | { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },\r | |
124 | { kPFix_SP, kIdentity, kPFix_EMPTY },\r | |
125 | { kPFix_sSP, kIdentity, kPFix_SP },\r | |
126 | { kPFix_EMPTY, kIdentity, kPFix_SPofSP },\r | |
127 | { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },\r | |
128 | { kPFix_EMPTY, kIdentity, kPFix_SPandSP },\r | |
129 | { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },\r | |
130 | { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },\r | |
131 | { kPFix_COMMASP, kIdentity, kPFix_SP },\r | |
132 | { kPFix_EMPTY, kIdentity, kPFix_COMMASP },\r | |
133 | { kPFix_SP, kUppercaseFirst, kPFix_SP },\r | |
134 | { kPFix_EMPTY, kIdentity, kPFix_SPinSP },\r | |
135 | { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },\r | |
136 | { kPFix_eSP, kIdentity, kPFix_SP },\r | |
137 | { kPFix_EMPTY, kIdentity, kPFix_DQUOT },\r | |
138 | { kPFix_EMPTY, kIdentity, kPFix_DOT },\r | |
139 | { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },\r | |
140 | { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },\r | |
141 | { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },\r | |
142 | { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },\r | |
143 | { kPFix_EMPTY, kIdentity, kPFix_SPforSP },\r | |
144 | { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },\r | |
145 | { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },\r | |
146 | { kPFix_EMPTY, kIdentity, kPFix_SPaSP },\r | |
147 | { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },\r | |
148 | { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },\r | |
149 | { kPFix_EMPTY, kIdentity, kPFix_DOTSP },\r | |
150 | { kPFix_DOT, kIdentity, kPFix_EMPTY },\r | |
151 | { kPFix_SP, kIdentity, kPFix_COMMASP },\r | |
152 | { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },\r | |
153 | { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },\r | |
154 | { kPFix_EMPTY, kIdentity, kPFix_SQUOT },\r | |
155 | { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },\r | |
156 | { kPFix_EMPTY, kIdentity, kPFix_SPbySP },\r | |
157 | { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },\r | |
158 | { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },\r | |
159 | { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },\r | |
160 | { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },\r | |
161 | { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },\r | |
162 | { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },\r | |
163 | { kPFix_EMPTY, kIdentity, kPFix_SPonSP },\r | |
164 | { kPFix_EMPTY, kIdentity, kPFix_SPasSP },\r | |
165 | { kPFix_EMPTY, kIdentity, kPFix_SPisSP },\r | |
166 | { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },\r | |
167 | { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },\r | |
168 | { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },\r | |
169 | { kPFix_EMPTY, kIdentity, kPFix_COLON },\r | |
170 | { kPFix_SP, kIdentity, kPFix_DOTSP },\r | |
171 | { kPFix_EMPTY, kIdentity, kPFix_edSP },\r | |
172 | { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },\r | |
173 | { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },\r | |
174 | { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },\r | |
175 | { kPFix_EMPTY, kIdentity, kPFix_OPEN },\r | |
176 | { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },\r | |
177 | { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },\r | |
178 | { kPFix_EMPTY, kIdentity, kPFix_SPatSP },\r | |
179 | { kPFix_EMPTY, kIdentity, kPFix_lySP },\r | |
180 | { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },\r | |
181 | { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },\r | |
182 | { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },\r | |
183 | { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },\r | |
184 | { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },\r | |
185 | { kPFix_DOT, kIdentity, kPFix_OPEN },\r | |
186 | { kPFix_EMPTY, kUppercaseAll, kPFix_SP },\r | |
187 | { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },\r | |
188 | { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },\r | |
189 | { kPFix_SP, kIdentity, kPFix_DOT },\r | |
190 | { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },\r | |
191 | { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },\r | |
192 | { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },\r | |
193 | { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },\r | |
194 | { kPFix_EMPTY, kIdentity, kPFix_COMMA },\r | |
195 | { kPFix_DOT, kIdentity, kPFix_SP },\r | |
196 | { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },\r | |
197 | { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },\r | |
198 | { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },\r | |
199 | { kPFix_SP, kIdentity, kPFix_EQDQUOT },\r | |
200 | { kPFix_EMPTY, kIdentity, kPFix_erSP },\r | |
201 | { kPFix_SP, kUppercaseAll, kPFix_SP },\r | |
202 | { kPFix_EMPTY, kIdentity, kPFix_alSP },\r | |
203 | { kPFix_SP, kUppercaseAll, kPFix_EMPTY },\r | |
204 | { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },\r | |
205 | { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },\r | |
206 | { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },\r | |
207 | { kPFix_SP, kIdentity, kPFix_OPEN },\r | |
208 | { kPFix_EMPTY, kIdentity, kPFix_fulSP },\r | |
209 | { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },\r | |
210 | { kPFix_EMPTY, kIdentity, kPFix_iveSP },\r | |
211 | { kPFix_EMPTY, kIdentity, kPFix_lessSP },\r | |
212 | { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },\r | |
213 | { kPFix_EMPTY, kIdentity, kPFix_estSP },\r | |
214 | { kPFix_SP, kUppercaseFirst, kPFix_DOT },\r | |
215 | { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },\r | |
216 | { kPFix_SP, kIdentity, kPFix_EQSQUOT },\r | |
217 | { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },\r | |
218 | { kPFix_EMPTY, kIdentity, kPFix_izeSP },\r | |
219 | { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },\r | |
220 | { kPFix_NBSP, kIdentity, kPFix_EMPTY },\r | |
221 | { kPFix_SP, kIdentity, kPFix_COMMA },\r | |
222 | { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },\r | |
223 | { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },\r | |
224 | { kPFix_EMPTY, kIdentity, kPFix_ousSP },\r | |
225 | { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },\r | |
226 | { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },\r | |
227 | { kPFix_SP, kUppercaseFirst, kPFix_COMMA },\r | |
228 | { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },\r | |
229 | { kPFix_SP, kUppercaseAll, kPFix_COMMASP },\r | |
230 | { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },\r | |
231 | { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },\r | |
232 | { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },\r | |
233 | { kPFix_SP, kUppercaseAll, kPFix_DOT },\r | |
234 | { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },\r | |
235 | { kPFix_SP, kUppercaseAll, kPFix_DOTSP },\r | |
236 | { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },\r | |
237 | { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },\r | |
238 | { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },\r | |
239 | };\r | |
240 | \r | |
241 | static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);\r | |
242 | \r | |
243 | static int ToUpperCase(uint8_t* p) {\r | |
244 | if (p[0] < 0xc0) {\r | |
245 | if (p[0] >= 'a' && p[0] <= 'z') {\r | |
246 | p[0] ^= 32;\r | |
247 | }\r | |
248 | return 1;\r | |
249 | }\r | |
250 | /* An overly simplified uppercasing model for utf-8. */\r | |
251 | if (p[0] < 0xe0) {\r | |
252 | p[1] ^= 32;\r | |
253 | return 2;\r | |
254 | }\r | |
255 | /* An arbitrary transform for three byte characters. */\r | |
256 | p[2] ^= 5;\r | |
257 | return 3;\r | |
258 | }\r | |
259 | \r | |
260 | static BROTLI_NOINLINE int TransformDictionaryWord(\r | |
261 | uint8_t* dst, const uint8_t* word, int len, int transform) {\r | |
262 | int idx = 0;\r | |
263 | {\r | |
264 | const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];\r | |
265 | while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }\r | |
266 | }\r | |
267 | {\r | |
268 | const int t = kTransforms[transform].transform;\r | |
269 | int i = 0;\r | |
270 | int skip = t - (kOmitFirst1 - 1);\r | |
271 | if (skip > 0) {\r | |
272 | word += skip;\r | |
273 | len -= skip;\r | |
274 | } else if (t <= kOmitLast9) {\r | |
275 | len -= t;\r | |
276 | }\r | |
277 | while (i < len) { dst[idx++] = word[i++]; }\r | |
278 | if (t == kUppercaseFirst) {\r | |
279 | ToUpperCase(&dst[idx - len]);\r | |
280 | } else if (t == kUppercaseAll) {\r | |
281 | uint8_t* uppercase = &dst[idx - len];\r | |
282 | while (len > 0) {\r | |
283 | int step = ToUpperCase(uppercase);\r | |
284 | uppercase += step;\r | |
285 | len -= step;\r | |
286 | }\r | |
287 | }\r | |
288 | }\r | |
289 | {\r | |
290 | const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];\r | |
291 | while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }\r | |
292 | return idx;\r | |
293 | }\r | |
294 | }\r | |
295 | \r | |
296 | #if defined(__cplusplus) || defined(c_plusplus)\r | |
297 | } /* extern "C" */\r | |
298 | #endif\r | |
299 | \r | |
300 | #endif /* BROTLI_DEC_TRANSFORM_H_ */\r |