]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c
ArmVirtPkg: Include NVMe support in ArmVirtQemu*
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regenc.c
1 /**********************************************************************
2 regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
33
34 #define INITED_LIST_SIZE 20
35
36 static int InitedListNum;
37
38 static struct {
39 OnigEncoding enc;
40 int inited;
41 } InitedList[INITED_LIST_SIZE];
42
43 static int
44 enc_inited_entry(OnigEncoding enc)
45 {
46 int i;
47
48 for (i = 0; i < InitedListNum; i++) {
49 if (InitedList[i].enc == enc) {
50 InitedList[i].inited = 1;
51 return i;
52 }
53 }
54
55 i = InitedListNum;
56 if (i < INITED_LIST_SIZE - 1) {
57 InitedList[i].enc = enc;
58 InitedList[i].inited = 1;
59 InitedListNum++;
60 return i;
61 }
62
63 return -1;
64 }
65
66 static int
67 enc_is_inited(OnigEncoding enc)
68 {
69 int i;
70
71 for (i = 0; i < InitedListNum; i++) {
72 if (InitedList[i].enc == enc) {
73 return InitedList[i].inited;
74 }
75 }
76
77 return 0;
78 }
79
80 static int OnigEncInited;
81
82 extern int
83 onigenc_init(void)
84 {
85 if (OnigEncInited != 0) return 0;
86
87 OnigEncInited = 1;
88 return 0;
89 }
90
91 extern int
92 onigenc_end(void)
93 {
94 int i;
95
96 for (i = 0; i < InitedListNum; i++) {
97 InitedList[i].enc = 0;
98 InitedList[i].inited = 0;
99 }
100 InitedListNum = 0;
101
102 OnigEncInited = 0;
103 return ONIG_NORMAL;
104 }
105
106 extern int
107 onig_initialize_encoding(OnigEncoding enc)
108 {
109 int r;
110
111 if (enc != ONIG_ENCODING_ASCII &&
112 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
113 OnigEncoding ascii = ONIG_ENCODING_ASCII;
114 if (ascii->init != 0 && enc_is_inited(ascii) == 0) {
115 r = ascii->init();
116 if (r != ONIG_NORMAL) return r;
117 enc_inited_entry(ascii);
118 }
119 }
120
121 if (enc->init != 0 &&
122 enc_is_inited(enc) == 0) {
123 r = (enc->init)();
124 if (r == ONIG_NORMAL)
125 enc_inited_entry(enc);
126 return r;
127 }
128
129 return 0;
130 }
131
132 extern OnigEncoding
133 onigenc_get_default_encoding(void)
134 {
135 return OnigEncDefaultCharEncoding;
136 }
137
138 extern int
139 onigenc_set_default_encoding(OnigEncoding enc)
140 {
141 OnigEncDefaultCharEncoding = enc;
142 return 0;
143 }
144
145 extern UChar*
146 onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end)
147 {
148 int slen, term_len, i;
149 UChar *r;
150
151 slen = (int )(end - s);
152 term_len = ONIGENC_MBC_MINLEN(enc);
153
154 r = (UChar* )xmalloc(slen + term_len);
155 CHECK_NULL_RETURN(r);
156 xmemcpy(r, s, slen);
157
158 for (i = 0; i < term_len; i++)
159 r[slen + i] = (UChar )0;
160
161 return r;
162 }
163
164 extern UChar*
165 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
166 {
167 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
168 if (p < s) {
169 p += enclen(enc, p);
170 }
171 return p;
172 }
173
174 extern UChar*
175 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
176 const UChar* start, const UChar* s, const UChar** prev)
177 {
178 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
179
180 if (p < s) {
181 if (prev) *prev = (const UChar* )p;
182 p += enclen(enc, p);
183 }
184 else {
185 if (prev) *prev = (const UChar* )NULL; /* Sorry */
186 }
187 return p;
188 }
189
190 extern UChar*
191 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
192 {
193 if (s <= start)
194 return (UChar* )NULL;
195
196 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
197 }
198
199 extern UChar*
200 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
201 {
202 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
203 if (s <= start)
204 return (UChar* )NULL;
205
206 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
207 }
208 return (UChar* )s;
209 }
210
211 #if 0
212 extern int
213 onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end)
214 {
215 int len;
216 int n;
217
218 len = ONIGENC_MBC_ENC_LEN(enc, p);
219 n = (int )(end - p);
220
221 return (n < len ? n : len);
222 }
223 #endif
224
225 extern UChar*
226 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
227 {
228 UChar* q = (UChar* )p;
229 while (n-- > 0) {
230 q += ONIGENC_MBC_ENC_LEN(enc, q);
231 }
232 return (q <= end ? q : NULL);
233 }
234
235 extern int
236 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
237 {
238 int n = 0;
239 UChar* q = (UChar* )p;
240
241 while (q < end) {
242 q += ONIGENC_MBC_ENC_LEN(enc, q);
243 n++;
244 }
245 return n;
246 }
247
248 extern int
249 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
250 {
251 int n = 0;
252 UChar* p = (UChar* )s;
253
254 while (1) {
255 if (*p == '\0') {
256 UChar* q;
257 int len = ONIGENC_MBC_MINLEN(enc);
258
259 if (len == 1) return n;
260 q = p + 1;
261 while (len > 1) {
262 if (*q != '\0') break;
263 q++;
264 len--;
265 }
266 if (len == 1) return n;
267 }
268 p += ONIGENC_MBC_ENC_LEN(enc, p);
269 n++;
270 }
271 }
272
273 extern int
274 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
275 {
276 UChar* start = (UChar* )s;
277 UChar* p = (UChar* )s;
278
279 while (1) {
280 if (*p == '\0') {
281 UChar* q;
282 int len = ONIGENC_MBC_MINLEN(enc);
283
284 if (len == 1) return (int )(p - start);
285 q = p + 1;
286 while (len > 1) {
287 if (*q != '\0') break;
288 q++;
289 len--;
290 }
291 if (len == 1) return (int )(p - start);
292 }
293 p += ONIGENC_MBC_ENC_LEN(enc, p);
294 }
295 }
296
297 const UChar OnigEncAsciiToLowerCaseTable[] = {
298 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
299 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
300 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
301 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
302 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
303 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
304 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
305 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
306 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
307 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
308 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
309 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
310 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
311 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
312 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
313 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
314 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
315 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
316 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
317 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
318 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
319 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
320 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
321 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
322 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
323 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
324 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
325 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
326 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
327 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
328 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
329 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
330 };
331
332 #ifdef USE_UPPER_CASE_TABLE
333 const UChar OnigEncAsciiToUpperCaseTable[256] = {
334 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
335 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
336 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
337 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
338 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
339 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
340 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
341 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
342 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
343 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
344 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
345 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
346 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
347 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
348 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
349 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
350 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
351 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
352 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
353 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
354 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
355 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
356 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
357 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
358 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
359 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
360 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
361 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
362 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
363 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
364 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
365 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
366 };
367 #endif
368
369 const unsigned short OnigEncAsciiCtypeTable[256] = {
370 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
371 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
372 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
373 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
374 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
375 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
376 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
377 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
378 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
379 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
380 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
381 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
382 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
383 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
384 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
385 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
386 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
387 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
388 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
389 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
390 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
391 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
392 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
393 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
394 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
395 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
396 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
397 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
398 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
399 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
400 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
401 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
402 };
403
404 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
405 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
406 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
407 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
408 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
409 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
410 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
411 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
412 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
413 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
414 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
415 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
416 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
417 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
418 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
419 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
420 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
421 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
422 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
423 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
424 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
425 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
426 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
427 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
428 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
429 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
430 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
431 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
432 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
433 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
434 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
435 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
436 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
437 };
438
439 #ifdef USE_UPPER_CASE_TABLE
440 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
441 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
442 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
443 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
444 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
445 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
446 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
447 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
448 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
449 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
450 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
451 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
452 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
453 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
454 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
455 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
456 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
457 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
458 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
459 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
460 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
461 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
462 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
463 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
464 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
465 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
466 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
467 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
468 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
469 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
470 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
471 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
472 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
473 };
474 #endif
475
476 extern void
477 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
478 {
479 /* nothing */
480 /* obsoleted. */
481 }
482
483 extern UChar*
484 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
485 {
486 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
487 }
488
489 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
490 { 0x41, 0x61 },
491 { 0x42, 0x62 },
492 { 0x43, 0x63 },
493 { 0x44, 0x64 },
494 { 0x45, 0x65 },
495 { 0x46, 0x66 },
496 { 0x47, 0x67 },
497 { 0x48, 0x68 },
498 { 0x49, 0x69 },
499 { 0x4a, 0x6a },
500 { 0x4b, 0x6b },
501 { 0x4c, 0x6c },
502 { 0x4d, 0x6d },
503 { 0x4e, 0x6e },
504 { 0x4f, 0x6f },
505 { 0x50, 0x70 },
506 { 0x51, 0x71 },
507 { 0x52, 0x72 },
508 { 0x53, 0x73 },
509 { 0x54, 0x74 },
510 { 0x55, 0x75 },
511 { 0x56, 0x76 },
512 { 0x57, 0x77 },
513 { 0x58, 0x78 },
514 { 0x59, 0x79 },
515 { 0x5a, 0x7a }
516 };
517
518 extern int
519 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
520 OnigApplyAllCaseFoldFunc f, void* arg)
521 {
522 OnigCodePoint code;
523 int i, r;
524
525 for (i = 0;
526 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
527 i++) {
528 code = OnigAsciiLowerMap[i].to;
529 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
530 if (r != 0) return r;
531
532 code = OnigAsciiLowerMap[i].from;
533 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
534 if (r != 0) return r;
535 }
536
537 return 0;
538 }
539
540 extern int
541 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
542 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
543 OnigCaseFoldCodeItem items[])
544 {
545 if (0x41 <= *p && *p <= 0x5a) {
546 items[0].byte_len = 1;
547 items[0].code_len = 1;
548 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
549 return 1;
550 }
551 else if (0x61 <= *p && *p <= 0x7a) {
552 items[0].byte_len = 1;
553 items[0].code_len = 1;
554 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
555 return 1;
556 }
557 else
558 return 0;
559 }
560
561 static int
562 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
563 OnigApplyAllCaseFoldFunc f, void* arg)
564 {
565 static OnigCodePoint ss[] = { 0x73, 0x73 };
566
567 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
568 }
569
570 extern int
571 onigenc_apply_all_case_fold_with_map(int map_size,
572 const OnigPairCaseFoldCodes map[],
573 int ess_tsett_flag, OnigCaseFoldType flag,
574 OnigApplyAllCaseFoldFunc f, void* arg)
575 {
576 OnigCodePoint code;
577 int i, r;
578
579 r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
580 if (r != 0) return r;
581
582 for (i = 0; i < map_size; i++) {
583 code = map[i].to;
584 r = (*f)(map[i].from, &code, 1, arg);
585 if (r != 0) return r;
586
587 code = map[i].from;
588 r = (*f)(map[i].to, &code, 1, arg);
589 if (r != 0) return r;
590 }
591
592 if (ess_tsett_flag != 0)
593 return ss_apply_all_case_fold(flag, f, arg);
594
595 return 0;
596 }
597
598 extern int
599 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
600 const OnigPairCaseFoldCodes map[],
601 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
602 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
603 {
604 if (0x41 <= *p && *p <= 0x5a) {
605 items[0].byte_len = 1;
606 items[0].code_len = 1;
607 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
608 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
609 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
610 /* SS */
611 items[1].byte_len = 2;
612 items[1].code_len = 1;
613 items[1].code[0] = (OnigCodePoint )0xdf;
614 return 2;
615 }
616 else
617 return 1;
618 }
619 else if (0x61 <= *p && *p <= 0x7a) {
620 items[0].byte_len = 1;
621 items[0].code_len = 1;
622 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
623 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
624 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
625 /* ss */
626 items[1].byte_len = 2;
627 items[1].code_len = 1;
628 items[1].code[0] = (OnigCodePoint )0xdf;
629 return 2;
630 }
631 else
632 return 1;
633 }
634 else if (*p == 0xdf && ess_tsett_flag != 0) {
635 items[0].byte_len = 1;
636 items[0].code_len = 2;
637 items[0].code[0] = (OnigCodePoint )'s';
638 items[0].code[1] = (OnigCodePoint )'s';
639
640 items[1].byte_len = 1;
641 items[1].code_len = 2;
642 items[1].code[0] = (OnigCodePoint )'S';
643 items[1].code[1] = (OnigCodePoint )'S';
644
645 items[2].byte_len = 1;
646 items[2].code_len = 2;
647 items[2].code[0] = (OnigCodePoint )'s';
648 items[2].code[1] = (OnigCodePoint )'S';
649
650 items[3].byte_len = 1;
651 items[3].code_len = 2;
652 items[3].code[0] = (OnigCodePoint )'S';
653 items[3].code[1] = (OnigCodePoint )'s';
654
655 return 4;
656 }
657 else {
658 int i;
659
660 for (i = 0; i < map_size; i++) {
661 if (*p == map[i].from) {
662 items[0].byte_len = 1;
663 items[0].code_len = 1;
664 items[0].code[0] = map[i].to;
665 return 1;
666 }
667 else if (*p == map[i].to) {
668 items[0].byte_len = 1;
669 items[0].code_len = 1;
670 items[0].code[0] = map[i].from;
671 return 1;
672 }
673 }
674 }
675
676 return 0;
677 }
678
679
680 extern int
681 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
682 OnigCodePoint* sb_out ARG_UNUSED,
683 const OnigCodePoint* ranges[] ARG_UNUSED)
684 {
685 return ONIG_NO_SUPPORT_CONFIG;
686 }
687
688 extern int
689 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
690 {
691 if (p < end) {
692 if (*p == 0x0a) return 1;
693 }
694 return 0;
695 }
696
697 /* for single byte encodings */
698 extern int
699 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
700 const UChar*end ARG_UNUSED, UChar* lower)
701 {
702 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
703
704 (*p)++;
705 return 1; /* return byte length of converted char to lower */
706 }
707
708 #if 0
709 extern int
710 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
711 const UChar** pp, const UChar* end)
712 {
713 const UChar* p = *pp;
714
715 (*pp)++;
716 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
717 }
718 #endif
719
720 extern int
721 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
722 {
723 return 1;
724 }
725
726 extern OnigCodePoint
727 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
728 {
729 return (OnigCodePoint )(*p);
730 }
731
732 extern int
733 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
734 {
735 return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
736 }
737
738 extern int
739 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
740 {
741 *buf = (UChar )(code & 0xff);
742 return 1;
743 }
744
745 extern UChar*
746 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
747 const UChar* s)
748 {
749 return (UChar* )s;
750 }
751
752 extern int
753 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
754 const UChar* end ARG_UNUSED)
755 {
756 return TRUE;
757 }
758
759 extern int
760 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
761 const UChar* end ARG_UNUSED)
762 {
763 return FALSE;
764 }
765
766 extern int
767 onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,
768 const UChar* end ARG_UNUSED)
769 {
770 return TRUE;
771 }
772
773 extern int
774 onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
775 const UChar* p, const UChar* end)
776 {
777 while (p < end) {
778 p += enclen(enc, p);
779 }
780
781 if (p != end)
782 return FALSE;
783 else
784 return TRUE;
785 }
786
787 extern int
788 onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end)
789 {
790 return ONIGENC_IS_VALID_MBC_STRING(enc, s, end);
791 }
792
793 extern OnigCodePoint
794 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
795 {
796 int c, i, len;
797 OnigCodePoint n;
798
799 len = enclen(enc, p);
800 n = (OnigCodePoint )(*p++);
801 if (len == 1) return n;
802
803 for (i = 1; i < len; i++) {
804 if (p >= end) break;
805 c = *p++;
806 n <<= 8; n += c;
807 }
808 return n;
809 }
810
811 extern int
812 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
813 const UChar** pp, const UChar* end ARG_UNUSED,
814 UChar* lower)
815 {
816 int len;
817 const UChar *p = *pp;
818
819 if (ONIGENC_IS_MBC_ASCII(p)) {
820 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
821 (*pp)++;
822 return 1;
823 }
824 else {
825 int i;
826
827 len = enclen(enc, p);
828 for (i = 0; i < len; i++) {
829 *lower++ = *p++;
830 }
831 (*pp) += len;
832 return len; /* return byte length of converted to lower char */
833 }
834 }
835
836 #if 0
837 extern int
838 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
839 const UChar** pp, const UChar* end)
840 {
841 const UChar* p = *pp;
842
843 if (ONIGENC_IS_MBC_ASCII(p)) {
844 (*pp)++;
845 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
846 }
847
848 (*pp) += enclen(enc, p);
849 return FALSE;
850 }
851 #endif
852
853 extern int
854 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
855 {
856 if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
857
858 if ((code & 0xff00) != 0) return 2;
859 else return 1;
860 }
861
862 extern int
863 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
864 {
865 if ((code & 0xff000000) != 0) return 4;
866 else if ((code & 0xff0000) != 0) return 3;
867 else if ((code & 0xff00) != 0) return 2;
868 else return 1;
869 }
870
871 extern int
872 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
873 {
874 UChar *p = buf;
875
876 if ((code & 0xff00) != 0) {
877 *p++ = (UChar )((code >> 8) & 0xff);
878 }
879 *p++ = (UChar )(code & 0xff);
880
881 #if 1
882 if (enclen(enc, buf) != (p - buf))
883 return ONIGERR_INVALID_CODE_POINT_VALUE;
884 #endif
885 return (int )(p - buf);
886 }
887
888 extern int
889 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
890 {
891 UChar *p = buf;
892
893 if ((code & 0xff000000) != 0) {
894 *p++ = (UChar )((code >> 24) & 0xff);
895 }
896 if ((code & 0xff0000) != 0 || p != buf) {
897 *p++ = (UChar )((code >> 16) & 0xff);
898 }
899 if ((code & 0xff00) != 0 || p != buf) {
900 *p++ = (UChar )((code >> 8) & 0xff);
901 }
902 *p++ = (UChar )(code & 0xff);
903
904 #if 1
905 if (enclen(enc, buf) != (p - buf))
906 return ONIGERR_INVALID_CODE_POINT_VALUE;
907 #endif
908 return (int )(p - buf);
909 }
910
911 extern int
912 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
913 {
914 static PosixBracketEntryType PBS[] = {
915 { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
916 { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
917 { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
918 { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
919 { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
920 { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
921 { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
922 { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
923 { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
924 { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
925 { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
926 { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
927 { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
928 { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
929 { (UChar* )NULL, -1, 0 }
930 };
931
932 PosixBracketEntryType *pb;
933 int len;
934
935 len = onigenc_strlen(enc, p, end);
936 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
937 if (len == pb->len &&
938 onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
939 return pb->ctype;
940 }
941
942 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
943 }
944
945 extern int
946 onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
947 {
948 OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
949
950 if (code > 127) return 0;
951
952 return ONIGENC_IS_ASCII_CODE_WORD(code);
953 }
954
955 extern int
956 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
957 unsigned int ctype)
958 {
959 if (code < 128)
960 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
961 else {
962 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
963 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
964 }
965 }
966
967 return FALSE;
968 }
969
970 extern int
971 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
972 unsigned int ctype)
973 {
974 if (code < 128)
975 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
976 else {
977 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
978 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
979 }
980 }
981
982 return FALSE;
983 }
984
985 extern int
986 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
987 const UChar* sascii /* ascii */, int n)
988 {
989 int x, c;
990
991 while (n-- > 0) {
992 if (p >= end) return (int )(*sascii);
993
994 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
995 x = *sascii - c;
996 if (x) return x;
997
998 sascii++;
999 p += enclen(enc, p);
1000 }
1001 return 0;
1002 }
1003
1004 extern int
1005 onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n)
1006 {
1007 int i;
1008
1009 for (i = 0; i < n; i++) {
1010 if (a[i] != b[i])
1011 return -1;
1012 }
1013
1014 return 0;
1015 }
1016
1017 extern int
1018 onig_codes_byte_at(OnigCodePoint codes[], int at)
1019 {
1020 int index;
1021 int b;
1022 OnigCodePoint code;
1023
1024 index = at / 3;
1025 b = at % 3;
1026 code = codes[index];
1027
1028 return ((code >> ((2 - b) * 8)) & 0xff);
1029 }