]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regenc.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regenc.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b602265d 5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578 6 * All rights reserved.\r
14b0e578
CS
7 *\r
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regint.h"\r
31\r
32OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;\r
33\r
b602265d
DG
34#define INITED_LIST_SIZE 20\r
35\r
36static int InitedListNum;\r
37\r
38static struct {\r
39 OnigEncoding enc;\r
40 int inited;\r
41} InitedList[INITED_LIST_SIZE];\r
42\r
43static int\r
44enc_inited_entry(OnigEncoding enc)\r
45{\r
46 int i;\r
47\r
48 for (i = 0; i < InitedListNum; i++) {\r
49 if (InitedList[i].enc == enc) {\r
50 InitedList[i].inited = 1;\r
51 return i;\r
52 }\r
53 }\r
54\r
55 i = InitedListNum;\r
56 if (i < INITED_LIST_SIZE - 1) {\r
57 InitedList[i].enc = enc;\r
58 InitedList[i].inited = 1;\r
59 InitedListNum++;\r
60 return i;\r
61 }\r
62\r
63 return -1;\r
64}\r
65\r
66static int\r
67enc_is_inited(OnigEncoding enc)\r
68{\r
69 int i;\r
70\r
71 for (i = 0; i < InitedListNum; i++) {\r
72 if (InitedList[i].enc == enc) {\r
73 return InitedList[i].inited;\r
74 }\r
75 }\r
76\r
77 return 0;\r
78}\r
79\r
80extern int\r
81onigenc_end(void)\r
82{\r
83 int i;\r
84\r
85 for (i = 0; i < InitedListNum; i++) {\r
86 InitedList[i].enc = 0;\r
87 InitedList[i].inited = 0;\r
88 }\r
89\r
90 InitedListNum = 0;\r
91 return ONIG_NORMAL;\r
92}\r
93\r
14b0e578
CS
94extern int\r
95onigenc_init(void)\r
96{\r
97 return 0;\r
98}\r
99\r
b602265d
DG
100extern int\r
101onig_initialize_encoding(OnigEncoding enc)\r
102{\r
103 int r;\r
104\r
105 if (enc != ONIG_ENCODING_ASCII &&\r
106 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r
107 OnigEncoding ascii = ONIG_ENCODING_ASCII;\r
108 if (ascii->init != 0 && enc_is_inited(ascii) == 0) {\r
109 r = ascii->init();\r
110 if (r != ONIG_NORMAL) return r;\r
111 enc_inited_entry(ascii);\r
112 }\r
113 }\r
114\r
115 if (enc->init != 0 &&\r
116 enc_is_inited(enc) == 0) {\r
117 r = (enc->init)();\r
118 if (r == ONIG_NORMAL)\r
119 enc_inited_entry(enc);\r
120 return r;\r
121 }\r
122\r
123 return 0;\r
124}\r
125\r
14b0e578
CS
126extern OnigEncoding\r
127onigenc_get_default_encoding(void)\r
128{\r
129 return OnigEncDefaultCharEncoding;\r
130}\r
131\r
132extern int\r
133onigenc_set_default_encoding(OnigEncoding enc)\r
134{\r
135 OnigEncDefaultCharEncoding = enc;\r
136 return 0;\r
137}\r
138\r
b602265d
DG
139extern UChar*\r
140onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end)\r
141{\r
142 int slen, term_len, i;\r
143 UChar *r;\r
144\r
145 slen = (int )(end - s);\r
146 term_len = ONIGENC_MBC_MINLEN(enc);\r
147\r
148 r = (UChar* )xmalloc(slen + term_len);\r
149 CHECK_NULL_RETURN(r);\r
150 xmemcpy(r, s, slen);\r
151\r
152 for (i = 0; i < term_len; i++)\r
153 r[slen + i] = (UChar )0;\r
154\r
155 return r;\r
156}\r
157\r
14b0e578
CS
158extern UChar*\r
159onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)\r
160{\r
161 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);\r
162 if (p < s) {\r
163 p += enclen(enc, p);\r
164 }\r
165 return p;\r
166}\r
167\r
168extern UChar*\r
169onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,\r
170 const UChar* start, const UChar* s, const UChar** prev)\r
171{\r
172 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);\r
173\r
174 if (p < s) {\r
175 if (prev) *prev = (const UChar* )p;\r
176 p += enclen(enc, p);\r
177 }\r
178 else {\r
179 if (prev) *prev = (const UChar* )NULL; /* Sorry */\r
180 }\r
181 return p;\r
182}\r
183\r
184extern UChar*\r
185onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)\r
186{\r
187 if (s <= start)\r
188 return (UChar* )NULL;\r
189\r
190 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);\r
191}\r
192\r
193extern UChar*\r
194onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)\r
195{\r
196 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {\r
197 if (s <= start)\r
198 return (UChar* )NULL;\r
199\r
200 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);\r
201 }\r
202 return (UChar* )s;\r
203}\r
204\r
b602265d
DG
205#if 0\r
206extern int\r
207onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end)\r
208{\r
209 int len;\r
210 int n;\r
211\r
212 len = ONIGENC_MBC_ENC_LEN(enc, p);\r
213 n = (int )(end - p);\r
214\r
215 return (n < len ? n : len);\r
216}\r
217#endif\r
218\r
14b0e578
CS
219extern UChar*\r
220onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)\r
221{\r
222 UChar* q = (UChar* )p;\r
223 while (n-- > 0) {\r
224 q += ONIGENC_MBC_ENC_LEN(enc, q);\r
225 }\r
226 return (q <= end ? q : NULL);\r
227}\r
228\r
229extern int\r
230onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)\r
231{\r
232 int n = 0;\r
233 UChar* q = (UChar* )p;\r
234 \r
235 while (q < end) {\r
236 q += ONIGENC_MBC_ENC_LEN(enc, q);\r
237 n++;\r
238 }\r
239 return n;\r
240}\r
241\r
242extern int\r
243onigenc_strlen_null(OnigEncoding enc, const UChar* s)\r
244{\r
245 int n = 0;\r
246 UChar* p = (UChar* )s;\r
247 \r
248 while (1) {\r
249 if (*p == '\0') {\r
250 UChar* q;\r
251 int len = ONIGENC_MBC_MINLEN(enc);\r
252\r
253 if (len == 1) return n;\r
254 q = p + 1;\r
255 while (len > 1) {\r
256 if (*q != '\0') break;\r
257 q++;\r
258 len--;\r
259 }\r
260 if (len == 1) return n;\r
261 }\r
262 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
263 n++;\r
264 }\r
265}\r
266\r
267extern int\r
268onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)\r
269{\r
270 UChar* start = (UChar* )s;\r
271 UChar* p = (UChar* )s;\r
272\r
273 while (1) {\r
274 if (*p == '\0') {\r
275 UChar* q;\r
276 int len = ONIGENC_MBC_MINLEN(enc);\r
277\r
278 if (len == 1) return (int )(p - start);\r
279 q = p + 1;\r
280 while (len > 1) {\r
281 if (*q != '\0') break;\r
282 q++;\r
283 len--;\r
284 }\r
285 if (len == 1) return (int )(p - start);\r
286 }\r
287 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
288 }\r
289}\r
290\r
291const UChar OnigEncAsciiToLowerCaseTable[] = {\r
b602265d
DG
292 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',\r
293 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',\r
294 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',\r
295 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',\r
296 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',\r
297 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',\r
298 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',\r
299 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',\r
300 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',\r
301 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',\r
302 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',\r
303 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',\r
304 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',\r
305 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',\r
306 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',\r
307 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',\r
308 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',\r
309 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',\r
310 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',\r
311 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',\r
312 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',\r
313 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',\r
314 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',\r
315 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',\r
316 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',\r
317 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',\r
318 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',\r
319 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',\r
320 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',\r
321 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',\r
322 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',\r
323 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',\r
14b0e578
CS
324};\r
325\r
326#ifdef USE_UPPER_CASE_TABLE\r
327const UChar OnigEncAsciiToUpperCaseTable[256] = {\r
b602265d
DG
328 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',\r
329 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',\r
330 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',\r
331 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',\r
332 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',\r
333 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',\r
334 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',\r
335 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',\r
336 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',\r
337 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',\r
338 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',\r
339 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',\r
340 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',\r
341 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',\r
342 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',\r
343 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',\r
344 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',\r
345 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',\r
346 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',\r
347 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',\r
348 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',\r
349 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',\r
350 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',\r
351 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',\r
352 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',\r
353 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',\r
354 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',\r
355 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',\r
356 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',\r
357 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',\r
358 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',\r
359 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',\r
14b0e578
CS
360};\r
361#endif\r
362\r
363const unsigned short OnigEncAsciiCtypeTable[256] = {\r
364 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
365 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,\r
366 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
367 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,\r
368 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
369 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
370 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,\r
371 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,\r
372 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,\r
373 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,\r
374 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,\r
375 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,\r
376 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,\r
377 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,\r
378 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,\r
379 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,\r
380 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
381 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
382 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
383 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
384 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
385 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
386 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
387 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
388 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
389 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
390 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
391 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
392 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
393 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
394 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,\r
395 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000\r
396};\r
397\r
398const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {\r
b602265d
DG
399 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',\r
400 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',\r
401 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',\r
402 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',\r
403 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',\r
404 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',\r
405 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',\r
406 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',\r
407 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',\r
408 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',\r
409 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',\r
410 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',\r
411 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',\r
412 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',\r
413 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',\r
414 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',\r
415 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',\r
416 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',\r
417 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',\r
418 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',\r
419 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',\r
420 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',\r
421 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',\r
422 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',\r
423 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',\r
424 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',\r
425 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',\r
426 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',\r
427 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',\r
428 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',\r
429 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',\r
430 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'\r
14b0e578
CS
431};\r
432\r
433#ifdef USE_UPPER_CASE_TABLE\r
434const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {\r
b602265d
DG
435 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',\r
436 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',\r
437 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',\r
438 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',\r
439 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',\r
440 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',\r
441 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',\r
442 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',\r
443 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',\r
444 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',\r
445 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',\r
446 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',\r
447 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',\r
448 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',\r
449 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',\r
450 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',\r
451 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',\r
452 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',\r
453 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',\r
454 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',\r
455 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',\r
456 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',\r
457 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',\r
458 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',\r
459 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',\r
460 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',\r
461 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',\r
462 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',\r
463 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',\r
464 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',\r
465 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',\r
466 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',\r
14b0e578
CS
467};\r
468#endif\r
469\r
470extern void\r
471onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)\r
472{\r
473 /* nothing */\r
474 /* obsoleted. */\r
475}\r
476\r
477extern UChar*\r
478onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)\r
479{\r
480 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);\r
481}\r
482\r
483const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {\r
484 { 0x41, 0x61 },\r
485 { 0x42, 0x62 },\r
486 { 0x43, 0x63 },\r
487 { 0x44, 0x64 },\r
488 { 0x45, 0x65 },\r
489 { 0x46, 0x66 },\r
490 { 0x47, 0x67 },\r
491 { 0x48, 0x68 },\r
492 { 0x49, 0x69 },\r
493 { 0x4a, 0x6a },\r
494 { 0x4b, 0x6b },\r
495 { 0x4c, 0x6c },\r
496 { 0x4d, 0x6d },\r
497 { 0x4e, 0x6e },\r
498 { 0x4f, 0x6f },\r
499 { 0x50, 0x70 },\r
500 { 0x51, 0x71 },\r
501 { 0x52, 0x72 },\r
502 { 0x53, 0x73 },\r
503 { 0x54, 0x74 },\r
504 { 0x55, 0x75 },\r
505 { 0x56, 0x76 },\r
506 { 0x57, 0x77 },\r
507 { 0x58, 0x78 },\r
508 { 0x59, 0x79 },\r
509 { 0x5a, 0x7a }\r
510};\r
511\r
512extern int\r
513onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,\r
514 OnigApplyAllCaseFoldFunc f, void* arg)\r
515{\r
516 OnigCodePoint code;\r
517 int i, r;\r
518\r
519 for (i = 0;\r
520 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));\r
521 i++) {\r
522 code = OnigAsciiLowerMap[i].to;\r
523 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);\r
524 if (r != 0) return r;\r
525\r
526 code = OnigAsciiLowerMap[i].from;\r
527 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);\r
528 if (r != 0) return r;\r
529 }\r
530\r
531 return 0;\r
532}\r
533\r
534extern int\r
535onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,\r
536 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,\r
537 OnigCaseFoldCodeItem items[])\r
538{\r
539 if (0x41 <= *p && *p <= 0x5a) {\r
540 items[0].byte_len = 1;\r
541 items[0].code_len = 1;\r
542 items[0].code[0] = (OnigCodePoint )(*p + 0x20);\r
543 return 1;\r
544 }\r
545 else if (0x61 <= *p && *p <= 0x7a) {\r
546 items[0].byte_len = 1;\r
547 items[0].code_len = 1;\r
548 items[0].code[0] = (OnigCodePoint )(*p - 0x20);\r
549 return 1;\r
550 }\r
551 else\r
552 return 0;\r
553}\r
554\r
555static int\r
556ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,\r
557 OnigApplyAllCaseFoldFunc f, void* arg)\r
558{\r
559 static OnigCodePoint ss[] = { 0x73, 0x73 };\r
560\r
561 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);\r
562}\r
563\r
564extern int\r
565onigenc_apply_all_case_fold_with_map(int map_size,\r
566 const OnigPairCaseFoldCodes map[],\r
567 int ess_tsett_flag, OnigCaseFoldType flag,\r
568 OnigApplyAllCaseFoldFunc f, void* arg)\r
569{\r
570 OnigCodePoint code;\r
571 int i, r;\r
572\r
573 r = onigenc_ascii_apply_all_case_fold(flag, f, arg);\r
574 if (r != 0) return r;\r
575\r
576 for (i = 0; i < map_size; i++) {\r
577 code = map[i].to;\r
578 r = (*f)(map[i].from, &code, 1, arg);\r
579 if (r != 0) return r;\r
580\r
581 code = map[i].from;\r
582 r = (*f)(map[i].to, &code, 1, arg);\r
583 if (r != 0) return r;\r
584 }\r
585\r
586 if (ess_tsett_flag != 0)\r
587 return ss_apply_all_case_fold(flag, f, arg);\r
588\r
589 return 0;\r
590}\r
591\r
592extern int\r
593onigenc_get_case_fold_codes_by_str_with_map(int map_size,\r
594 const OnigPairCaseFoldCodes map[],\r
595 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,\r
596 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])\r
597{\r
598 if (0x41 <= *p && *p <= 0x5a) {\r
599 items[0].byte_len = 1;\r
600 items[0].code_len = 1;\r
601 items[0].code[0] = (OnigCodePoint )(*p + 0x20);\r
602 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1\r
603 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {\r
604 /* SS */\r
605 items[1].byte_len = 2;\r
606 items[1].code_len = 1;\r
607 items[1].code[0] = (OnigCodePoint )0xdf;\r
608 return 2;\r
609 }\r
610 else\r
611 return 1;\r
612 }\r
613 else if (0x61 <= *p && *p <= 0x7a) {\r
614 items[0].byte_len = 1;\r
615 items[0].code_len = 1;\r
616 items[0].code[0] = (OnigCodePoint )(*p - 0x20);\r
617 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1\r
618 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {\r
619 /* ss */\r
620 items[1].byte_len = 2;\r
621 items[1].code_len = 1;\r
622 items[1].code[0] = (OnigCodePoint )0xdf;\r
623 return 2;\r
624 }\r
625 else\r
626 return 1;\r
627 }\r
628 else if (*p == 0xdf && ess_tsett_flag != 0) {\r
629 items[0].byte_len = 1;\r
630 items[0].code_len = 2;\r
631 items[0].code[0] = (OnigCodePoint )'s';\r
632 items[0].code[1] = (OnigCodePoint )'s';\r
633\r
634 items[1].byte_len = 1;\r
635 items[1].code_len = 2;\r
636 items[1].code[0] = (OnigCodePoint )'S';\r
637 items[1].code[1] = (OnigCodePoint )'S';\r
638\r
639 items[2].byte_len = 1;\r
640 items[2].code_len = 2;\r
641 items[2].code[0] = (OnigCodePoint )'s';\r
642 items[2].code[1] = (OnigCodePoint )'S';\r
643\r
644 items[3].byte_len = 1;\r
645 items[3].code_len = 2;\r
646 items[3].code[0] = (OnigCodePoint )'S';\r
647 items[3].code[1] = (OnigCodePoint )'s';\r
648\r
649 return 4;\r
650 }\r
651 else {\r
652 int i;\r
653\r
654 for (i = 0; i < map_size; i++) {\r
655 if (*p == map[i].from) {\r
656 items[0].byte_len = 1;\r
657 items[0].code_len = 1;\r
658 items[0].code[0] = map[i].to;\r
659 return 1;\r
660 }\r
661 else if (*p == map[i].to) {\r
662 items[0].byte_len = 1;\r
663 items[0].code_len = 1;\r
664 items[0].code[0] = map[i].from;\r
665 return 1;\r
666 }\r
667 }\r
668 }\r
669\r
670 return 0;\r
671}\r
672\r
673\r
674extern int\r
675onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,\r
676 OnigCodePoint* sb_out ARG_UNUSED,\r
677 const OnigCodePoint* ranges[] ARG_UNUSED)\r
678{\r
679 return ONIG_NO_SUPPORT_CONFIG;\r
680}\r
681\r
682extern int\r
683onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)\r
684{\r
685 if (p < end) {\r
686 if (*p == 0x0a) return 1;\r
687 }\r
688 return 0;\r
689}\r
690\r
691/* for single byte encodings */\r
692extern int\r
693onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,\r
694 const UChar*end ARG_UNUSED, UChar* lower)\r
695{\r
696 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);\r
697\r
698 (*p)++;\r
699 return 1; /* return byte length of converted char to lower */\r
700}\r
701\r
702#if 0\r
703extern int\r
704onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,\r
705 const UChar** pp, const UChar* end)\r
706{\r
707 const UChar* p = *pp;\r
708\r
709 (*pp)++;\r
710 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);\r
711}\r
712#endif\r
713\r
714extern int\r
715onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)\r
716{\r
717 return 1;\r
718}\r
719\r
720extern OnigCodePoint\r
721onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)\r
722{\r
723 return (OnigCodePoint )(*p);\r
724}\r
725\r
726extern int\r
727onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)\r
728{\r
729 return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);\r
730}\r
731\r
732extern int\r
733onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)\r
734{\r
735 *buf = (UChar )(code & 0xff);\r
736 return 1;\r
737}\r
738\r
739extern UChar*\r
740onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,\r
741 const UChar* s)\r
742{\r
743 return (UChar* )s;\r
744}\r
745\r
746extern int\r
747onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,\r
748 const UChar* end ARG_UNUSED)\r
749{\r
750 return TRUE;\r
751}\r
752\r
753extern int\r
754onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,\r
755 const UChar* end ARG_UNUSED)\r
756{\r
757 return FALSE;\r
758}\r
759\r
b602265d
DG
760extern int\r
761onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,\r
762 const UChar* end ARG_UNUSED)\r
763{\r
764 return TRUE;\r
765}\r
766\r
767extern int\r
768onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,\r
769 const UChar* p, const UChar* end)\r
770{\r
771 while (p < end) {\r
772 p += enclen(enc, p);\r
773 }\r
774\r
775 if (p != end)\r
776 return FALSE;\r
777 else\r
778 return TRUE;\r
779}\r
780\r
781extern int\r
782onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end)\r
783{\r
784 return ONIGENC_IS_VALID_MBC_STRING(enc, s, end);\r
785}\r
786\r
14b0e578
CS
787extern OnigCodePoint\r
788onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)\r
789{\r
790 int c, i, len;\r
791 OnigCodePoint n;\r
792\r
793 len = enclen(enc, p);\r
794 n = (OnigCodePoint )(*p++);\r
795 if (len == 1) return n;\r
796\r
797 for (i = 1; i < len; i++) {\r
798 if (p >= end) break;\r
799 c = *p++;\r
800 n <<= 8; n += c;\r
801 }\r
802 return n;\r
803}\r
804\r
805extern int\r
806onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,\r
807 const UChar** pp, const UChar* end ARG_UNUSED,\r
808 UChar* lower)\r
809{\r
810 int len;\r
811 const UChar *p = *pp;\r
812\r
813 if (ONIGENC_IS_MBC_ASCII(p)) {\r
814 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);\r
815 (*pp)++;\r
816 return 1;\r
817 }\r
818 else {\r
819 int i;\r
820\r
821 len = enclen(enc, p);\r
822 for (i = 0; i < len; i++) {\r
823 *lower++ = *p++;\r
824 }\r
825 (*pp) += len;\r
826 return len; /* return byte length of converted to lower char */\r
827 }\r
828}\r
829\r
830#if 0\r
831extern int\r
832onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,\r
833 const UChar** pp, const UChar* end)\r
834{\r
835 const UChar* p = *pp;\r
836\r
837 if (ONIGENC_IS_MBC_ASCII(p)) {\r
838 (*pp)++;\r
839 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);\r
840 }\r
841\r
842 (*pp) += enclen(enc, p);\r
843 return FALSE;\r
844}\r
845#endif\r
846\r
847extern int\r
848onigenc_mb2_code_to_mbclen(OnigCodePoint code)\r
849{\r
850 if ((code & 0xff00) != 0) return 2;\r
851 else return 1;\r
852}\r
853\r
854extern int\r
855onigenc_mb4_code_to_mbclen(OnigCodePoint code)\r
856{\r
857 if ((code & 0xff000000) != 0) return 4;\r
858 else if ((code & 0xff0000) != 0) return 3;\r
859 else if ((code & 0xff00) != 0) return 2;\r
860 else return 1;\r
861}\r
862\r
863extern int\r
864onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)\r
865{\r
866 UChar *p = buf;\r
867\r
868 if ((code & 0xff00) != 0) {\r
869 *p++ = (UChar )((code >> 8) & 0xff);\r
870 }\r
871 *p++ = (UChar )(code & 0xff);\r
872\r
873#if 1\r
874 if (enclen(enc, buf) != (p - buf))\r
875 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
876#endif\r
b602265d 877 return (int )(p - buf);\r
14b0e578
CS
878}\r
879\r
880extern int\r
881onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)\r
882{\r
883 UChar *p = buf;\r
884\r
885 if ((code & 0xff000000) != 0) {\r
886 *p++ = (UChar )((code >> 24) & 0xff);\r
887 }\r
888 if ((code & 0xff0000) != 0 || p != buf) {\r
889 *p++ = (UChar )((code >> 16) & 0xff);\r
890 }\r
891 if ((code & 0xff00) != 0 || p != buf) {\r
892 *p++ = (UChar )((code >> 8) & 0xff);\r
893 }\r
894 *p++ = (UChar )(code & 0xff);\r
895\r
896#if 1\r
897 if (enclen(enc, buf) != (p - buf))\r
898 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
899#endif\r
b602265d 900 return (int )(p - buf);\r
14b0e578
CS
901}\r
902\r
903extern int\r
904onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)\r
905{\r
906 static PosixBracketEntryType PBS[] = {\r
907 { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },\r
908 { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },\r
909 { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },\r
910 { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r
911 { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },\r
912 { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },\r
913 { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },\r
914 { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },\r
915 { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },\r
916 { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },\r
917 { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },\r
918 { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },\r
919 { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },\r
920 { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },\r
921 { (UChar* )NULL, -1, 0 }\r
922 };\r
923\r
924 PosixBracketEntryType *pb;\r
925 int len;\r
926\r
927 len = onigenc_strlen(enc, p, end);\r
928 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r
929 if (len == pb->len &&\r
930 onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)\r
931 return pb->ctype;\r
932 }\r
933\r
934 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
935}\r
936\r
b602265d
DG
937extern int\r
938onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)\r
939{\r
940 OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);\r
941\r
942 if (code > 127) return 0;\r
943\r
944 return ONIGENC_IS_ASCII_CODE_WORD(code);\r
945}\r
946\r
14b0e578
CS
947extern int\r
948onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,\r
949 unsigned int ctype)\r
950{\r
951 if (code < 128)\r
952 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);\r
953 else {\r
954 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {\r
955 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);\r
956 }\r
957 }\r
958\r
959 return FALSE;\r
960}\r
961\r
962extern int\r
963onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,\r
964 unsigned int ctype)\r
965{\r
966 if (code < 128)\r
967 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);\r
968 else {\r
969 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {\r
970 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);\r
971 }\r
972 }\r
973\r
974 return FALSE;\r
975}\r
976\r
977extern int\r
978onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,\r
979 const UChar* sascii /* ascii */, int n)\r
980{\r
981 int x, c;\r
982\r
983 while (n-- > 0) {\r
984 if (p >= end) return (int )(*sascii);\r
985\r
986 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);\r
987 x = *sascii - c;\r
988 if (x) return x;\r
989\r
990 sascii++;\r
991 p += enclen(enc, p);\r
992 }\r
993 return 0;\r
994}\r
995\r
14b0e578 996extern int\r
b602265d 997onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n)\r
14b0e578 998{\r
b602265d 999 int i;\r
14b0e578 1000\r
b602265d
DG
1001 for (i = 0; i < n; i++) {\r
1002 if (a[i] != b[i])\r
1003 return -1;\r
14b0e578
CS
1004 }\r
1005\r
14b0e578
CS
1006 return 0;\r
1007}\r
1008\r
1009extern int\r
b602265d 1010onig_codes_byte_at(OnigCodePoint codes[], int at)\r
14b0e578 1011{\r
b602265d
DG
1012 int index;\r
1013 int b;\r
1014 OnigCodePoint code;\r
14b0e578 1015\r
b602265d
DG
1016 index = at / 3;\r
1017 b = at % 3;\r
1018 code = codes[index];\r
14b0e578 1019\r
b602265d 1020 return ((code >> ((2 - b) * 8)) & 0xff);\r
14b0e578 1021}\r