1 /* Copyright 2013 Google Inc. All Rights Reserved.
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
7 /* Transformations on dictionary words. */
9 #ifndef BROTLI_DEC_TRANSFORM_H_
10 #define BROTLI_DEC_TRANSFORM_H_
12 #include "../common/types.h"
15 #if defined(__cplusplus) || defined(c_plusplus)
19 enum WordTransformType
{
44 const uint8_t prefix_id
;
45 const uint8_t transform
;
46 const uint8_t suffix_id
;
49 static const char kPrefixSuffix
[208] =
50 "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
51 " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
52 " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
53 " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
63 NBSP = non-breaking space "\0xc2\xa0"
68 kPFix_SPofSPtheSP
= 6,
90 kPFix_DOTSPTheSP
= 93,
95 kPFix_NEWLINETAB
= 120,
103 kPFix_DOTcomSLASH
= 146,
104 kPFix_DOTSPThisSP
= 152,
117 static const Transform kTransforms
[] = {
118 { kPFix_EMPTY
, kIdentity
, kPFix_EMPTY
},
119 { kPFix_EMPTY
, kIdentity
, kPFix_SP
},
120 { kPFix_SP
, kIdentity
, kPFix_SP
},
121 { kPFix_EMPTY
, kOmitFirst1
, kPFix_EMPTY
},
122 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_SP
},
123 { kPFix_EMPTY
, kIdentity
, kPFix_SPtheSP
},
124 { kPFix_SP
, kIdentity
, kPFix_EMPTY
},
125 { kPFix_sSP
, kIdentity
, kPFix_SP
},
126 { kPFix_EMPTY
, kIdentity
, kPFix_SPofSP
},
127 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_EMPTY
},
128 { kPFix_EMPTY
, kIdentity
, kPFix_SPandSP
},
129 { kPFix_EMPTY
, kOmitFirst2
, kPFix_EMPTY
},
130 { kPFix_EMPTY
, kOmitLast1
, kPFix_EMPTY
},
131 { kPFix_COMMASP
, kIdentity
, kPFix_SP
},
132 { kPFix_EMPTY
, kIdentity
, kPFix_COMMASP
},
133 { kPFix_SP
, kUppercaseFirst
, kPFix_SP
},
134 { kPFix_EMPTY
, kIdentity
, kPFix_SPinSP
},
135 { kPFix_EMPTY
, kIdentity
, kPFix_SPtoSP
},
136 { kPFix_eSP
, kIdentity
, kPFix_SP
},
137 { kPFix_EMPTY
, kIdentity
, kPFix_DQUOT
},
138 { kPFix_EMPTY
, kIdentity
, kPFix_DOT
},
139 { kPFix_EMPTY
, kIdentity
, kPFix_DQUOTGT
},
140 { kPFix_EMPTY
, kIdentity
, kPFix_NEWLINE
},
141 { kPFix_EMPTY
, kOmitLast3
, kPFix_EMPTY
},
142 { kPFix_EMPTY
, kIdentity
, kPFix_CLOSEBR
},
143 { kPFix_EMPTY
, kIdentity
, kPFix_SPforSP
},
144 { kPFix_EMPTY
, kOmitFirst3
, kPFix_EMPTY
},
145 { kPFix_EMPTY
, kOmitLast2
, kPFix_EMPTY
},
146 { kPFix_EMPTY
, kIdentity
, kPFix_SPaSP
},
147 { kPFix_EMPTY
, kIdentity
, kPFix_SPthatSP
},
148 { kPFix_SP
, kUppercaseFirst
, kPFix_EMPTY
},
149 { kPFix_EMPTY
, kIdentity
, kPFix_DOTSP
},
150 { kPFix_DOT
, kIdentity
, kPFix_EMPTY
},
151 { kPFix_SP
, kIdentity
, kPFix_COMMASP
},
152 { kPFix_EMPTY
, kOmitFirst4
, kPFix_EMPTY
},
153 { kPFix_EMPTY
, kIdentity
, kPFix_SPwithSP
},
154 { kPFix_EMPTY
, kIdentity
, kPFix_SQUOT
},
155 { kPFix_EMPTY
, kIdentity
, kPFix_SPfromSP
},
156 { kPFix_EMPTY
, kIdentity
, kPFix_SPbySP
},
157 { kPFix_EMPTY
, kOmitFirst5
, kPFix_EMPTY
},
158 { kPFix_EMPTY
, kOmitFirst6
, kPFix_EMPTY
},
159 { kPFix_SPtheSP
, kIdentity
, kPFix_EMPTY
},
160 { kPFix_EMPTY
, kOmitLast4
, kPFix_EMPTY
},
161 { kPFix_EMPTY
, kIdentity
, kPFix_DOTSPTheSP
},
162 { kPFix_EMPTY
, kUppercaseAll
, kPFix_EMPTY
},
163 { kPFix_EMPTY
, kIdentity
, kPFix_SPonSP
},
164 { kPFix_EMPTY
, kIdentity
, kPFix_SPasSP
},
165 { kPFix_EMPTY
, kIdentity
, kPFix_SPisSP
},
166 { kPFix_EMPTY
, kOmitLast7
, kPFix_EMPTY
},
167 { kPFix_EMPTY
, kOmitLast1
, kPFix_ingSP
},
168 { kPFix_EMPTY
, kIdentity
, kPFix_NEWLINETAB
},
169 { kPFix_EMPTY
, kIdentity
, kPFix_COLON
},
170 { kPFix_SP
, kIdentity
, kPFix_DOTSP
},
171 { kPFix_EMPTY
, kIdentity
, kPFix_edSP
},
172 { kPFix_EMPTY
, kOmitFirst9
, kPFix_EMPTY
},
173 { kPFix_EMPTY
, kOmitFirst7
, kPFix_EMPTY
},
174 { kPFix_EMPTY
, kOmitLast6
, kPFix_EMPTY
},
175 { kPFix_EMPTY
, kIdentity
, kPFix_OPEN
},
176 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_COMMASP
},
177 { kPFix_EMPTY
, kOmitLast8
, kPFix_EMPTY
},
178 { kPFix_EMPTY
, kIdentity
, kPFix_SPatSP
},
179 { kPFix_EMPTY
, kIdentity
, kPFix_lySP
},
180 { kPFix_SPtheSP
, kIdentity
, kPFix_SPofSP
},
181 { kPFix_EMPTY
, kOmitLast5
, kPFix_EMPTY
},
182 { kPFix_EMPTY
, kOmitLast9
, kPFix_EMPTY
},
183 { kPFix_SP
, kUppercaseFirst
, kPFix_COMMASP
},
184 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DQUOT
},
185 { kPFix_DOT
, kIdentity
, kPFix_OPEN
},
186 { kPFix_EMPTY
, kUppercaseAll
, kPFix_SP
},
187 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DQUOTGT
},
188 { kPFix_EMPTY
, kIdentity
, kPFix_EQDQUOT
},
189 { kPFix_SP
, kIdentity
, kPFix_DOT
},
190 { kPFix_DOTcomSLASH
, kIdentity
, kPFix_EMPTY
},
191 { kPFix_SPtheSP
, kIdentity
, kPFix_SPofSPtheSP
},
192 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_SQUOT
},
193 { kPFix_EMPTY
, kIdentity
, kPFix_DOTSPThisSP
},
194 { kPFix_EMPTY
, kIdentity
, kPFix_COMMA
},
195 { kPFix_DOT
, kIdentity
, kPFix_SP
},
196 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_OPEN
},
197 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DOT
},
198 { kPFix_EMPTY
, kIdentity
, kPFix_SPnotSP
},
199 { kPFix_SP
, kIdentity
, kPFix_EQDQUOT
},
200 { kPFix_EMPTY
, kIdentity
, kPFix_erSP
},
201 { kPFix_SP
, kUppercaseAll
, kPFix_SP
},
202 { kPFix_EMPTY
, kIdentity
, kPFix_alSP
},
203 { kPFix_SP
, kUppercaseAll
, kPFix_EMPTY
},
204 { kPFix_EMPTY
, kIdentity
, kPFix_EQSQUOT
},
205 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DQUOT
},
206 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_DOTSP
},
207 { kPFix_SP
, kIdentity
, kPFix_OPEN
},
208 { kPFix_EMPTY
, kIdentity
, kPFix_fulSP
},
209 { kPFix_SP
, kUppercaseFirst
, kPFix_DOTSP
},
210 { kPFix_EMPTY
, kIdentity
, kPFix_iveSP
},
211 { kPFix_EMPTY
, kIdentity
, kPFix_lessSP
},
212 { kPFix_EMPTY
, kUppercaseAll
, kPFix_SQUOT
},
213 { kPFix_EMPTY
, kIdentity
, kPFix_estSP
},
214 { kPFix_SP
, kUppercaseFirst
, kPFix_DOT
},
215 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DQUOTGT
},
216 { kPFix_SP
, kIdentity
, kPFix_EQSQUOT
},
217 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_COMMA
},
218 { kPFix_EMPTY
, kIdentity
, kPFix_izeSP
},
219 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DOT
},
220 { kPFix_NBSP
, kIdentity
, kPFix_EMPTY
},
221 { kPFix_SP
, kIdentity
, kPFix_COMMA
},
222 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_EQDQUOT
},
223 { kPFix_EMPTY
, kUppercaseAll
, kPFix_EQDQUOT
},
224 { kPFix_EMPTY
, kIdentity
, kPFix_ousSP
},
225 { kPFix_EMPTY
, kUppercaseAll
, kPFix_COMMASP
},
226 { kPFix_EMPTY
, kUppercaseFirst
, kPFix_EQSQUOT
},
227 { kPFix_SP
, kUppercaseFirst
, kPFix_COMMA
},
228 { kPFix_SP
, kUppercaseAll
, kPFix_EQDQUOT
},
229 { kPFix_SP
, kUppercaseAll
, kPFix_COMMASP
},
230 { kPFix_EMPTY
, kUppercaseAll
, kPFix_COMMA
},
231 { kPFix_EMPTY
, kUppercaseAll
, kPFix_OPEN
},
232 { kPFix_EMPTY
, kUppercaseAll
, kPFix_DOTSP
},
233 { kPFix_SP
, kUppercaseAll
, kPFix_DOT
},
234 { kPFix_EMPTY
, kUppercaseAll
, kPFix_EQSQUOT
},
235 { kPFix_SP
, kUppercaseAll
, kPFix_DOTSP
},
236 { kPFix_SP
, kUppercaseFirst
, kPFix_EQDQUOT
},
237 { kPFix_SP
, kUppercaseAll
, kPFix_EQSQUOT
},
238 { kPFix_SP
, kUppercaseFirst
, kPFix_EQSQUOT
},
241 static const int kNumTransforms
= sizeof(kTransforms
) / sizeof(kTransforms
[0]);
243 static int ToUpperCase(uint8_t* p
) {
245 if (p
[0] >= 'a' && p
[0] <= 'z') {
250 /* An overly simplified uppercasing model for utf-8. */
255 /* An arbitrary transform for three byte characters. */
260 static BROTLI_NOINLINE
int TransformDictionaryWord(
261 uint8_t* dst
, const uint8_t* word
, int len
, int transform
) {
264 const char* prefix
= &kPrefixSuffix
[kTransforms
[transform
].prefix_id
];
265 while (*prefix
) { dst
[idx
++] = (uint8_t)*prefix
++; }
268 const int t
= kTransforms
[transform
].transform
;
270 int skip
= t
- (kOmitFirst1
- 1);
274 } else if (t
<= kOmitLast9
) {
277 while (i
< len
) { dst
[idx
++] = word
[i
++]; }
278 if (t
== kUppercaseFirst
) {
279 ToUpperCase(&dst
[idx
- len
]);
280 } else if (t
== kUppercaseAll
) {
281 uint8_t* uppercase
= &dst
[idx
- len
];
283 int step
= ToUpperCase(uppercase
);
290 const char* suffix
= &kPrefixSuffix
[kTransforms
[transform
].suffix_id
];
291 while (*suffix
) { dst
[idx
++] = (uint8_t)*suffix
++; }
296 #if defined(__cplusplus) || defined(c_plusplus)
300 #endif /* BROTLI_DEC_TRANSFORM_H_ */