]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c
MdeModulePkg: Regular expression protocol
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regenc.c
CommitLineData
db3b92b4
CS
1/**********************************************************************\r
2 regenc.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
6 * All rights reserved.\r
7 * \r
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.<BR>\r
9 *\r
10 * Redistribution and use in source and binary forms, with or without\r
11 * modification, are permitted provided that the following conditions\r
12 * are met:\r
13 * 1. Redistributions of source code must retain the above copyright\r
14 * notice, this list of conditions and the following disclaimer.\r
15 * 2. Redistributions in binary form must reproduce the above copyright\r
16 * notice, this list of conditions and the following disclaimer in the\r
17 * documentation and/or other materials provided with the distribution.\r
18 *\r
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
29 * SUCH DAMAGE.\r
30 */\r
31\r
32#include "regint.h"\r
33\r
34OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;\r
35\r
36extern int\r
37onigenc_init(void)\r
38{\r
39 return 0;\r
40}\r
41\r
42extern OnigEncoding\r
43onigenc_get_default_encoding(void)\r
44{\r
45 return OnigEncDefaultCharEncoding;\r
46}\r
47\r
48extern int\r
49onigenc_set_default_encoding(OnigEncoding enc)\r
50{\r
51 OnigEncDefaultCharEncoding = enc;\r
52 return 0;\r
53}\r
54\r
55extern UChar*\r
56onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)\r
57{\r
58 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);\r
59 if (p < s) {\r
60 p += enclen(enc, p);\r
61 }\r
62 return p;\r
63}\r
64\r
65extern UChar*\r
66onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,\r
67 const UChar* start, const UChar* s, const UChar** prev)\r
68{\r
69 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);\r
70\r
71 if (p < s) {\r
72 if (prev) *prev = (const UChar* )p;\r
73 p += enclen(enc, p);\r
74 }\r
75 else {\r
76 if (prev) *prev = (const UChar* )NULL; /* Sorry */\r
77 }\r
78 return p;\r
79}\r
80\r
81extern UChar*\r
82onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)\r
83{\r
84 if (s <= start)\r
85 return (UChar* )NULL;\r
86\r
87 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);\r
88}\r
89\r
90extern UChar*\r
91onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)\r
92{\r
93 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {\r
94 if (s <= start)\r
95 return (UChar* )NULL;\r
96\r
97 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);\r
98 }\r
99 return (UChar* )s;\r
100}\r
101\r
102extern UChar*\r
103onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)\r
104{\r
105 UChar* q = (UChar* )p;\r
106 while (n-- > 0) {\r
107 q += ONIGENC_MBC_ENC_LEN(enc, q);\r
108 }\r
109 return (q <= end ? q : NULL);\r
110}\r
111\r
112extern int\r
113onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)\r
114{\r
115 int n = 0;\r
116 UChar* q = (UChar* )p;\r
117 \r
118 while (q < end) {\r
119 q += ONIGENC_MBC_ENC_LEN(enc, q);\r
120 n++;\r
121 }\r
122 return n;\r
123}\r
124\r
125extern int\r
126onigenc_strlen_null(OnigEncoding enc, const UChar* s)\r
127{\r
128 int n = 0;\r
129 UChar* p = (UChar* )s;\r
130 \r
131 while (1) {\r
132 if (*p == '\0') {\r
133 UChar* q;\r
134 int len = ONIGENC_MBC_MINLEN(enc);\r
135\r
136 if (len == 1) return n;\r
137 q = p + 1;\r
138 while (len > 1) {\r
139 if (*q != '\0') break;\r
140 q++;\r
141 len--;\r
142 }\r
143 if (len == 1) return n;\r
144 }\r
145 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
146 n++;\r
147 }\r
148}\r
149\r
150extern int\r
151onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)\r
152{\r
153 UChar* start = (UChar* )s;\r
154 UChar* p = (UChar* )s;\r
155\r
156 while (1) {\r
157 if (*p == '\0') {\r
158 UChar* q;\r
159 int len = ONIGENC_MBC_MINLEN(enc);\r
160\r
161 if (len == 1) return (int )(p - start);\r
162 q = p + 1;\r
163 while (len > 1) {\r
164 if (*q != '\0') break;\r
165 q++;\r
166 len--;\r
167 }\r
168 if (len == 1) return (int )(p - start);\r
169 }\r
170 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
171 }\r
172}\r
173\r
174const UChar OnigEncAsciiToLowerCaseTable[] = {\r
175 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u,\r
176 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u,\r
177 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u,\r
178 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u,\r
179 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u,\r
180 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u,\r
181 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u,\r
182 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u,\r
183 100u, 141u, 142u, 143u, 144u, 145u, 146u, 147u,\r
184 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u,\r
185 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u,\r
186 170u, 171u, 172u, 133u, 134u, 135u, 136u, 137u,\r
187 140u, 141u, 142u, 143u, 144u, 145u, 146u, 147u,\r
188 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u,\r
189 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u,\r
190 170u, 171u, 172u, 173u, 174u, 175u, 176u, 177u,\r
191 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u,\r
192 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u,\r
193 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u,\r
194 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u,\r
195 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u,\r
196 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u,\r
197 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u,\r
198 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u,\r
199 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u,\r
200 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u,\r
201 320u, 321u, 322u, 323u, 324u, 325u, 326u, 327u,\r
202 330u, 331u, 332u, 333u, 334u, 335u, 336u, 337u,\r
203 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u,\r
204 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u,\r
205 360u, 361u, 362u, 363u, 364u, 365u, 366u, 367u,\r
206 370u, 371u, 372u, 373u, 374u, 375u, 376u, 377u,\r
207};\r
208\r
209#ifdef USE_UPPER_CASE_TABLE\r
210const UChar OnigEncAsciiToUpperCaseTable[256] = {\r
211 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u,\r
212 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u,\r
213 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u,\r
214 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u,\r
215 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u,\r
216 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u,\r
217 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u,\r
218 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u,\r
219 100u, 101u, 102u, 103u, 104u, 105u, 106u, 107u,\r
220 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u,\r
221 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u,\r
222 130u, 131u, 132u, 133u, 134u, 135u, 136u, 137u,\r
223 140u, 101u, 102u, 103u, 104u, 105u, 106u, 107u,\r
224 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u,\r
225 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u,\r
226 130u, 131u, 132u, 173u, 174u, 175u, 176u, 177u,\r
227 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u,\r
228 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u,\r
229 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u,\r
230 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u,\r
231 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u,\r
232 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u,\r
233 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u,\r
234 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u,\r
235 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u,\r
236 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u,\r
237 320u, 321u, 322u, 323u, 324u, 325u, 326u, 327u,\r
238 330u, 331u, 332u, 333u, 334u, 335u, 336u, 337u,\r
239 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u,\r
240 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u,\r
241 360u, 361u, 362u, 363u, 364u, 365u, 366u, 367u,\r
242 370u, 371u, 372u, 373u, 374u, 375u, 376u, 377u,\r
243};\r
244#endif\r
245\r
246const unsigned short OnigEncAsciiCtypeTable[256] = {\r
247 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
248 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,\r
249 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
250 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
251 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
252 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
253 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,\r
254 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
255 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,\r
256 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,\r
257 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,\r
258 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,\r
259 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,\r
260 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,\r
261 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,\r
262 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,\r
263 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
264 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
265 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
266 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
267 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
268 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
269 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
270 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
271 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
272 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
273 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
274 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
275 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000\r
279};\r
280\r
281const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {\r
282 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u,\r
283 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u,\r
284 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u,\r
285 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u,\r
286 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u,\r
287 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u,\r
288 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u,\r
289 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u,\r
290 100u, 141u, 142u, 143u, 144u, 145u, 146u, 147u,\r
291 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u,\r
292 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u,\r
293 170u, 171u, 172u, 133u, 134u, 135u, 136u, 137u,\r
294 140u, 141u, 142u, 143u, 144u, 145u, 146u, 147u,\r
295 150u, 151u, 152u, 153u, 154u, 155u, 156u, 157u,\r
296 160u, 161u, 162u, 163u, 164u, 165u, 166u, 167u,\r
297 170u, 171u, 172u, 173u, 174u, 175u, 176u, 177u,\r
298 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u,\r
299 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u,\r
300 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u,\r
301 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u,\r
302 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u,\r
303 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u,\r
304 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u,\r
305 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u,\r
306 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u,\r
307 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u,\r
308 360u, 361u, 362u, 363u, 364u, 365u, 366u, 327u,\r
309 370u, 371u, 372u, 373u, 374u, 375u, 376u, 337u,\r
310 340u, 341u, 342u, 343u, 344u, 345u, 346u, 347u,\r
311 350u, 351u, 352u, 353u, 354u, 355u, 356u, 357u,\r
312 360u, 361u, 362u, 363u, 364u, 365u, 366u, 367u,\r
313 370u, 371u, 372u, 373u, 374u, 375u, 376u, 377u,\r
314};\r
315\r
316#ifdef USE_UPPER_CASE_TABLE\r
317const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {\r
318 0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u,\r
319 10u, 11u, 12u, 13u, 14u, 15u, 16u, 17u,\r
320 20u, 21u, 22u, 23u, 24u, 25u, 26u, 27u,\r
321 30u, 31u, 32u, 33u, 34u, 35u, 36u, 37u,\r
322 40u, 41u, 42u, 43u, 44u, 45u, 46u, 47u,\r
323 50u, 51u, 52u, 53u, 54u, 55u, 56u, 57u,\r
324 60u, 61u, 62u, 63u, 64u, 65u, 66u, 67u,\r
325 70u, 71u, 72u, 73u, 74u, 75u, 76u, 77u,\r
326 100u, 101u, 102u, 103u, 104u, 105u, 106u, 107u,\r
327 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u,\r
328 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u,\r
329 130u, 131u, 132u, 133u, 134u, 135u, 136u, 137u,\r
330 140u, 101u, 102u, 103u, 104u, 105u, 106u, 107u,\r
331 110u, 111u, 112u, 113u, 114u, 115u, 116u, 117u,\r
332 120u, 121u, 122u, 123u, 124u, 125u, 126u, 127u,\r
333 130u, 131u, 132u, 173u, 174u, 175u, 176u, 177u,\r
334 200u, 201u, 202u, 203u, 204u, 205u, 206u, 207u,\r
335 210u, 211u, 212u, 213u, 214u, 215u, 216u, 217u,\r
336 220u, 221u, 222u, 223u, 224u, 225u, 226u, 227u,\r
337 230u, 231u, 232u, 233u, 234u, 235u, 236u, 237u,\r
338 240u, 241u, 242u, 243u, 244u, 245u, 246u, 247u,\r
339 250u, 251u, 252u, 253u, 254u, 255u, 256u, 257u,\r
340 260u, 261u, 262u, 263u, 264u, 265u, 266u, 267u,\r
341 270u, 271u, 272u, 273u, 274u, 275u, 276u, 277u,\r
342 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u,\r
343 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u,\r
344 320u, 321u, 322u, 323u, 324u, 325u, 326u, 327u,\r
345 330u, 331u, 332u, 333u, 334u, 335u, 336u, 337u,\r
346 300u, 301u, 302u, 303u, 304u, 305u, 306u, 307u,\r
347 310u, 311u, 312u, 313u, 314u, 315u, 316u, 317u,\r
348 320u, 321u, 322u, 323u, 324u, 325u, 326u, 367u,\r
349 330u, 331u, 332u, 333u, 334u, 335u, 336u, 377u,\r
350};\r
351#endif\r
352\r
353extern void\r
354onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)\r
355{\r
356 /* nothing */\r
357 /* obsoleted. */\r
358}\r
359\r
360extern UChar*\r
361onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)\r
362{\r
363 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);\r
364}\r
365\r
366const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {\r
367 { 0x41, 0x61 },\r
368 { 0x42, 0x62 },\r
369 { 0x43, 0x63 },\r
370 { 0x44, 0x64 },\r
371 { 0x45, 0x65 },\r
372 { 0x46, 0x66 },\r
373 { 0x47, 0x67 },\r
374 { 0x48, 0x68 },\r
375 { 0x49, 0x69 },\r
376 { 0x4a, 0x6a },\r
377 { 0x4b, 0x6b },\r
378 { 0x4c, 0x6c },\r
379 { 0x4d, 0x6d },\r
380 { 0x4e, 0x6e },\r
381 { 0x4f, 0x6f },\r
382 { 0x50, 0x70 },\r
383 { 0x51, 0x71 },\r
384 { 0x52, 0x72 },\r
385 { 0x53, 0x73 },\r
386 { 0x54, 0x74 },\r
387 { 0x55, 0x75 },\r
388 { 0x56, 0x76 },\r
389 { 0x57, 0x77 },\r
390 { 0x58, 0x78 },\r
391 { 0x59, 0x79 },\r
392 { 0x5a, 0x7a }\r
393};\r
394\r
395extern int\r
396onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,\r
397 OnigApplyAllCaseFoldFunc f, void* arg)\r
398{\r
399 OnigCodePoint code;\r
400 int i, r;\r
401\r
402 for (i = 0;\r
403 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));\r
404 i++) {\r
405 code = OnigAsciiLowerMap[i].to;\r
406 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);\r
407 if (r != 0) return r;\r
408\r
409 code = OnigAsciiLowerMap[i].from;\r
410 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);\r
411 if (r != 0) return r;\r
412 }\r
413\r
414 return 0;\r
415}\r
416\r
417extern int\r
418onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,\r
419 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,\r
420 OnigCaseFoldCodeItem items[])\r
421{\r
422 if (0x41 <= *p && *p <= 0x5a) {\r
423 items[0].byte_len = 1;\r
424 items[0].code_len = 1;\r
425 items[0].code[0] = (OnigCodePoint )(*p + 0x20);\r
426 return 1;\r
427 }\r
428 else if (0x61 <= *p && *p <= 0x7a) {\r
429 items[0].byte_len = 1;\r
430 items[0].code_len = 1;\r
431 items[0].code[0] = (OnigCodePoint )(*p - 0x20);\r
432 return 1;\r
433 }\r
434 else\r
435 return 0;\r
436}\r
437\r
438static int\r
439ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,\r
440 OnigApplyAllCaseFoldFunc f, void* arg)\r
441{\r
442 static OnigCodePoint ss[] = { 0x73, 0x73 };\r
443\r
444 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);\r
445}\r
446\r
447extern int\r
448onigenc_apply_all_case_fold_with_map(int map_size,\r
449 const OnigPairCaseFoldCodes map[],\r
450 int ess_tsett_flag, OnigCaseFoldType flag,\r
451 OnigApplyAllCaseFoldFunc f, void* arg)\r
452{\r
453 OnigCodePoint code;\r
454 int i, r;\r
455\r
456 r = onigenc_ascii_apply_all_case_fold(flag, f, arg);\r
457 if (r != 0) return r;\r
458\r
459 for (i = 0; i < map_size; i++) {\r
460 code = map[i].to;\r
461 r = (*f)(map[i].from, &code, 1, arg);\r
462 if (r != 0) return r;\r
463\r
464 code = map[i].from;\r
465 r = (*f)(map[i].to, &code, 1, arg);\r
466 if (r != 0) return r;\r
467 }\r
468\r
469 if (ess_tsett_flag != 0)\r
470 return ss_apply_all_case_fold(flag, f, arg);\r
471\r
472 return 0;\r
473}\r
474\r
475extern int\r
476onigenc_get_case_fold_codes_by_str_with_map(int map_size,\r
477 const OnigPairCaseFoldCodes map[],\r
478 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,\r
479 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])\r
480{\r
481 if (0x41 <= *p && *p <= 0x5a) {\r
482 items[0].byte_len = 1;\r
483 items[0].code_len = 1;\r
484 items[0].code[0] = (OnigCodePoint )(*p + 0x20);\r
485 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1\r
486 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {\r
487 /* SS */\r
488 items[1].byte_len = 2;\r
489 items[1].code_len = 1;\r
490 items[1].code[0] = (OnigCodePoint )0xdf;\r
491 return 2;\r
492 }\r
493 else\r
494 return 1;\r
495 }\r
496 else if (0x61 <= *p && *p <= 0x7a) {\r
497 items[0].byte_len = 1;\r
498 items[0].code_len = 1;\r
499 items[0].code[0] = (OnigCodePoint )(*p - 0x20);\r
500 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1\r
501 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {\r
502 /* ss */\r
503 items[1].byte_len = 2;\r
504 items[1].code_len = 1;\r
505 items[1].code[0] = (OnigCodePoint )0xdf;\r
506 return 2;\r
507 }\r
508 else\r
509 return 1;\r
510 }\r
511 else if (*p == 0xdf && ess_tsett_flag != 0) {\r
512 items[0].byte_len = 1;\r
513 items[0].code_len = 2;\r
514 items[0].code[0] = (OnigCodePoint )'s';\r
515 items[0].code[1] = (OnigCodePoint )'s';\r
516\r
517 items[1].byte_len = 1;\r
518 items[1].code_len = 2;\r
519 items[1].code[0] = (OnigCodePoint )'S';\r
520 items[1].code[1] = (OnigCodePoint )'S';\r
521\r
522 items[2].byte_len = 1;\r
523 items[2].code_len = 2;\r
524 items[2].code[0] = (OnigCodePoint )'s';\r
525 items[2].code[1] = (OnigCodePoint )'S';\r
526\r
527 items[3].byte_len = 1;\r
528 items[3].code_len = 2;\r
529 items[3].code[0] = (OnigCodePoint )'S';\r
530 items[3].code[1] = (OnigCodePoint )'s';\r
531\r
532 return 4;\r
533 }\r
534 else {\r
535 int i;\r
536\r
537 for (i = 0; i < map_size; i++) {\r
538 if (*p == map[i].from) {\r
539 items[0].byte_len = 1;\r
540 items[0].code_len = 1;\r
541 items[0].code[0] = map[i].to;\r
542 return 1;\r
543 }\r
544 else if (*p == map[i].to) {\r
545 items[0].byte_len = 1;\r
546 items[0].code_len = 1;\r
547 items[0].code[0] = map[i].from;\r
548 return 1;\r
549 }\r
550 }\r
551 }\r
552\r
553 return 0;\r
554}\r
555\r
556\r
557extern int\r
558onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,\r
559 OnigCodePoint* sb_out ARG_UNUSED,\r
560 const OnigCodePoint* ranges[] ARG_UNUSED)\r
561{\r
562 return ONIG_NO_SUPPORT_CONFIG;\r
563}\r
564\r
565extern int\r
566onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)\r
567{\r
568 if (p < end) {\r
569 if (*p == 0x0a) return 1;\r
570 }\r
571 return 0;\r
572}\r
573\r
574/* for single byte encodings */\r
575extern int\r
576onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,\r
577 const UChar*end ARG_UNUSED, UChar* lower)\r
578{\r
579 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);\r
580\r
581 (*p)++;\r
582 return 1; /* return byte length of converted char to lower */\r
583}\r
584\r
585#if 0\r
586extern int\r
587onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,\r
588 const UChar** pp, const UChar* end)\r
589{\r
590 const UChar* p = *pp;\r
591\r
592 (*pp)++;\r
593 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);\r
594}\r
595#endif\r
596\r
597extern int\r
598onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)\r
599{\r
600 return 1;\r
601}\r
602\r
603extern OnigCodePoint\r
604onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)\r
605{\r
606 return (OnigCodePoint )(*p);\r
607}\r
608\r
609extern int\r
610onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)\r
611{\r
612 return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);\r
613}\r
614\r
615extern int\r
616onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)\r
617{\r
618 *buf = (UChar )(code & 0xff);\r
619 return 1;\r
620}\r
621\r
622extern UChar*\r
623onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,\r
624 const UChar* s)\r
625{\r
626 return (UChar* )s;\r
627}\r
628\r
629extern int\r
630onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,\r
631 const UChar* end ARG_UNUSED)\r
632{\r
633 return TRUE;\r
634}\r
635\r
636extern int\r
637onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,\r
638 const UChar* end ARG_UNUSED)\r
639{\r
640 return FALSE;\r
641}\r
642\r
643extern OnigCodePoint\r
644onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)\r
645{\r
646 int c, i, len;\r
647 OnigCodePoint n;\r
648\r
649 len = enclen(enc, p);\r
650 n = (OnigCodePoint )(*p++);\r
651 if (len == 1) return n;\r
652\r
653 for (i = 1; i < len; i++) {\r
654 if (p >= end) break;\r
655 c = *p++;\r
656 n <<= 8; n += c;\r
657 }\r
658 return n;\r
659}\r
660\r
661extern int\r
662onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,\r
663 const UChar** pp, const UChar* end ARG_UNUSED,\r
664 UChar* lower)\r
665{\r
666 int len;\r
667 const UChar *p = *pp;\r
668\r
669 if (ONIGENC_IS_MBC_ASCII(p)) {\r
670 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);\r
671 (*pp)++;\r
672 return 1;\r
673 }\r
674 else {\r
675 int i;\r
676\r
677 len = enclen(enc, p);\r
678 for (i = 0; i < len; i++) {\r
679 *lower++ = *p++;\r
680 }\r
681 (*pp) += len;\r
682 return len; /* return byte length of converted to lower char */\r
683 }\r
684}\r
685\r
686#if 0\r
687extern int\r
688onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,\r
689 const UChar** pp, const UChar* end)\r
690{\r
691 const UChar* p = *pp;\r
692\r
693 if (ONIGENC_IS_MBC_ASCII(p)) {\r
694 (*pp)++;\r
695 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);\r
696 }\r
697\r
698 (*pp) += enclen(enc, p);\r
699 return FALSE;\r
700}\r
701#endif\r
702\r
703extern int\r
704onigenc_mb2_code_to_mbclen(OnigCodePoint code)\r
705{\r
706 if ((code & 0xff00) != 0) return 2;\r
707 else return 1;\r
708}\r
709\r
710extern int\r
711onigenc_mb4_code_to_mbclen(OnigCodePoint code)\r
712{\r
713 if ((code & 0xff000000) != 0) return 4;\r
714 else if ((code & 0xff0000) != 0) return 3;\r
715 else if ((code & 0xff00) != 0) return 2;\r
716 else return 1;\r
717}\r
718\r
719extern int\r
720onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)\r
721{\r
722 UChar *p = buf;\r
723\r
724 if ((code & 0xff00) != 0) {\r
725 *p++ = (UChar )((code >> 8) & 0xff);\r
726 }\r
727 *p++ = (UChar )(code & 0xff);\r
728\r
729#if 1\r
730 if (enclen(enc, buf) != (p - buf))\r
731 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
732#endif\r
733 return (int)(p - buf);\r
734}\r
735\r
736extern int\r
737onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)\r
738{\r
739 UChar *p = buf;\r
740\r
741 if ((code & 0xff000000) != 0) {\r
742 *p++ = (UChar )((code >> 24) & 0xff);\r
743 }\r
744 if ((code & 0xff0000) != 0 || p != buf) {\r
745 *p++ = (UChar )((code >> 16) & 0xff);\r
746 }\r
747 if ((code & 0xff00) != 0 || p != buf) {\r
748 *p++ = (UChar )((code >> 8) & 0xff);\r
749 }\r
750 *p++ = (UChar )(code & 0xff);\r
751\r
752#if 1\r
753 if (enclen(enc, buf) != (p - buf))\r
754 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
755#endif\r
756 return (int)(p - buf);\r
757}\r
758\r
759extern int\r
760onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)\r
761{\r
762 static PosixBracketEntryType PBS[] = {\r
763 { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },\r
764 { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },\r
765 { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },\r
766 { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r
767 { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },\r
768 { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },\r
769 { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },\r
770 { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },\r
771 { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },\r
772 { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },\r
773 { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },\r
774 { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },\r
775 { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },\r
776 { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },\r
777 { (UChar* )NULL, -1, 0 }\r
778 };\r
779\r
780 PosixBracketEntryType *pb;\r
781 int len;\r
782\r
783 len = onigenc_strlen(enc, p, end);\r
784 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r
785 if (len == pb->len &&\r
786 onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)\r
787 return pb->ctype;\r
788 }\r
789\r
790 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
791}\r
792\r
793extern int\r
794onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,\r
795 unsigned int ctype)\r
796{\r
797 if (code < 128)\r
798 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);\r
799 else {\r
800 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {\r
801 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);\r
802 }\r
803 }\r
804\r
805 return FALSE;\r
806}\r
807\r
808extern int\r
809onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,\r
810 unsigned int ctype)\r
811{\r
812 if (code < 128)\r
813 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);\r
814 else {\r
815 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {\r
816 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);\r
817 }\r
818 }\r
819\r
820 return FALSE;\r
821}\r
822\r
823extern int\r
824onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,\r
825 const UChar* sascii /* ascii */, int n)\r
826{\r
827 int x, c;\r
828\r
829 while (n-- > 0) {\r
830 if (p >= end) return (int )(*sascii);\r
831\r
832 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);\r
833 x = *sascii - c;\r
834 if (x) return x;\r
835\r
836 sascii++;\r
837 p += enclen(enc, p);\r
838 }\r
839 return 0;\r
840}\r
841\r
842/* Property management */\r
843static int\r
844resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)\r
845{\r
846 int size;\r
847 const OnigCodePoint **list = *plist;\r
848\r
849 size = sizeof(OnigCodePoint*) * new_size;\r
850 if (IS_NULL(list)) {\r
851 list = (const OnigCodePoint** )xmalloc(size);\r
852 }\r
853 else {\r
854 list = (const OnigCodePoint** )xrealloc((void* )list, size, *psize * sizeof(OnigCodePoint*));\r
855 }\r
856\r
857 if (IS_NULL(list)) return ONIGERR_MEMORY;\r
858\r
859 *plist = list;\r
860 *psize = new_size;\r
861\r
862 return 0;\r
863}\r
864\r
865extern int\r
866onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,\r
867 hash_table_type **table, const OnigCodePoint*** plist, int *pnum,\r
868 int *psize)\r
869{\r
870#define PROP_INIT_SIZE 16\r
871\r
872 int r;\r
873\r
874 if (*psize <= *pnum) {\r
875 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);\r
876 r = resize_property_list(new_size, plist, psize);\r
877 if (r != 0) return r;\r
878 }\r
879\r
880 (*plist)[*pnum] = prop;\r
881\r
882 if (ONIG_IS_NULL(*table)) {\r
883 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);\r
884 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;\r
885 }\r
886\r
887 *pnum = *pnum + 1;\r
888 onig_st_insert_strend(*table, name, name + strlen_s((char* )name, MAX_STRING_SIZE),\r
889 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));\r
890 return 0;\r
891}\r
892\r
893extern int\r
894onigenc_property_list_init(int (*f)(void))\r
895{\r
896 int r;\r
897\r
898 THREAD_ATOMIC_START;\r
899\r
900 r = f();\r
901\r
902 THREAD_ATOMIC_END;\r
903 return r;\r
904}\r