]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regenc.c
1 /**********************************************************************
2 regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regint.h"
31
32 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
33
34 #define INITED_LIST_SIZE 20
35
36 static int InitedListNum;
37
38 static struct {
39 OnigEncoding enc;
40 int inited;
41 } InitedList[INITED_LIST_SIZE];
42
43 static int
44 enc_inited_entry(OnigEncoding enc)
45 {
46 int i;
47
48 for (i = 0; i < InitedListNum; i++) {
49 if (InitedList[i].enc == enc) {
50 InitedList[i].inited = 1;
51 return i;
52 }
53 }
54
55 i = InitedListNum;
56 if (i < INITED_LIST_SIZE - 1) {
57 InitedList[i].enc = enc;
58 InitedList[i].inited = 1;
59 InitedListNum++;
60 return i;
61 }
62
63 return -1;
64 }
65
66 static int
67 enc_is_inited(OnigEncoding enc)
68 {
69 int i;
70
71 for (i = 0; i < InitedListNum; i++) {
72 if (InitedList[i].enc == enc) {
73 return InitedList[i].inited;
74 }
75 }
76
77 return 0;
78 }
79
80 extern int
81 onigenc_end(void)
82 {
83 int i;
84
85 for (i = 0; i < InitedListNum; i++) {
86 InitedList[i].enc = 0;
87 InitedList[i].inited = 0;
88 }
89
90 InitedListNum = 0;
91 return ONIG_NORMAL;
92 }
93
94 extern int
95 onigenc_init(void)
96 {
97 return 0;
98 }
99
100 extern int
101 onig_initialize_encoding(OnigEncoding enc)
102 {
103 int r;
104
105 if (enc != ONIG_ENCODING_ASCII &&
106 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
107 OnigEncoding ascii = ONIG_ENCODING_ASCII;
108 if (ascii->init != 0 && enc_is_inited(ascii) == 0) {
109 r = ascii->init();
110 if (r != ONIG_NORMAL) return r;
111 enc_inited_entry(ascii);
112 }
113 }
114
115 if (enc->init != 0 &&
116 enc_is_inited(enc) == 0) {
117 r = (enc->init)();
118 if (r == ONIG_NORMAL)
119 enc_inited_entry(enc);
120 return r;
121 }
122
123 return 0;
124 }
125
126 extern OnigEncoding
127 onigenc_get_default_encoding(void)
128 {
129 return OnigEncDefaultCharEncoding;
130 }
131
132 extern int
133 onigenc_set_default_encoding(OnigEncoding enc)
134 {
135 OnigEncDefaultCharEncoding = enc;
136 return 0;
137 }
138
139 extern UChar*
140 onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end)
141 {
142 int slen, term_len, i;
143 UChar *r;
144
145 slen = (int )(end - s);
146 term_len = ONIGENC_MBC_MINLEN(enc);
147
148 r = (UChar* )xmalloc(slen + term_len);
149 CHECK_NULL_RETURN(r);
150 xmemcpy(r, s, slen);
151
152 for (i = 0; i < term_len; i++)
153 r[slen + i] = (UChar )0;
154
155 return r;
156 }
157
158 extern UChar*
159 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
160 {
161 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
162 if (p < s) {
163 p += enclen(enc, p);
164 }
165 return p;
166 }
167
168 extern UChar*
169 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
170 const UChar* start, const UChar* s, const UChar** prev)
171 {
172 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
173
174 if (p < s) {
175 if (prev) *prev = (const UChar* )p;
176 p += enclen(enc, p);
177 }
178 else {
179 if (prev) *prev = (const UChar* )NULL; /* Sorry */
180 }
181 return p;
182 }
183
184 extern UChar*
185 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
186 {
187 if (s <= start)
188 return (UChar* )NULL;
189
190 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
191 }
192
193 extern UChar*
194 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
195 {
196 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
197 if (s <= start)
198 return (UChar* )NULL;
199
200 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
201 }
202 return (UChar* )s;
203 }
204
205 #if 0
206 extern int
207 onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end)
208 {
209 int len;
210 int n;
211
212 len = ONIGENC_MBC_ENC_LEN(enc, p);
213 n = (int )(end - p);
214
215 return (n < len ? n : len);
216 }
217 #endif
218
219 extern UChar*
220 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
221 {
222 UChar* q = (UChar* )p;
223 while (n-- > 0) {
224 q += ONIGENC_MBC_ENC_LEN(enc, q);
225 }
226 return (q <= end ? q : NULL);
227 }
228
229 extern int
230 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
231 {
232 int n = 0;
233 UChar* q = (UChar* )p;
234
235 while (q < end) {
236 q += ONIGENC_MBC_ENC_LEN(enc, q);
237 n++;
238 }
239 return n;
240 }
241
242 extern int
243 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
244 {
245 int n = 0;
246 UChar* p = (UChar* )s;
247
248 while (1) {
249 if (*p == '\0') {
250 UChar* q;
251 int len = ONIGENC_MBC_MINLEN(enc);
252
253 if (len == 1) return n;
254 q = p + 1;
255 while (len > 1) {
256 if (*q != '\0') break;
257 q++;
258 len--;
259 }
260 if (len == 1) return n;
261 }
262 p += ONIGENC_MBC_ENC_LEN(enc, p);
263 n++;
264 }
265 }
266
267 extern int
268 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
269 {
270 UChar* start = (UChar* )s;
271 UChar* p = (UChar* )s;
272
273 while (1) {
274 if (*p == '\0') {
275 UChar* q;
276 int len = ONIGENC_MBC_MINLEN(enc);
277
278 if (len == 1) return (int )(p - start);
279 q = p + 1;
280 while (len > 1) {
281 if (*q != '\0') break;
282 q++;
283 len--;
284 }
285 if (len == 1) return (int )(p - start);
286 }
287 p += ONIGENC_MBC_ENC_LEN(enc, p);
288 }
289 }
290
291 const UChar OnigEncAsciiToLowerCaseTable[] = {
292 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
293 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
294 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
295 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
296 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
297 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
298 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
299 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
300 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
301 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
302 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
303 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
304 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
305 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
306 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
307 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
308 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
309 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
310 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
311 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
312 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
313 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
314 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
315 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
316 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
317 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
318 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
319 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
320 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
321 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
322 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
323 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
324 };
325
326 #ifdef USE_UPPER_CASE_TABLE
327 const UChar OnigEncAsciiToUpperCaseTable[256] = {
328 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
329 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
330 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
331 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
332 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
333 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
334 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
335 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
336 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
337 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
338 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
339 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
340 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
341 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
342 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
343 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
344 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
345 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
346 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
347 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
348 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
349 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
350 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
351 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
352 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
353 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
354 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
355 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
356 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
357 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
358 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
359 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
360 };
361 #endif
362
363 const unsigned short OnigEncAsciiCtypeTable[256] = {
364 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
365 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
366 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
367 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
368 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
369 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
370 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
371 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
372 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
373 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
374 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
375 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
376 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
377 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
378 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
379 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
380 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
381 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
382 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
383 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
384 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
385 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
386 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
387 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
388 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
389 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
390 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
391 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
392 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
393 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
394 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
395 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
396 };
397
398 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
399 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
400 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
401 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
402 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
403 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
404 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
405 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
406 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
407 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
408 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
409 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
410 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
411 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
412 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
413 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
414 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
415 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
416 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
417 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
418 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
419 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
420 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
421 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
422 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
423 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
424 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
425 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
426 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
427 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
428 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
429 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
430 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
431 };
432
433 #ifdef USE_UPPER_CASE_TABLE
434 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
435 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
436 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
437 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
438 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
439 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
440 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
441 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
442 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
443 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
444 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
445 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
446 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
447 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
448 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
449 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
450 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
451 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
452 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
453 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
454 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
455 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
456 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
457 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
458 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
459 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
460 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
461 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
462 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
463 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
464 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
465 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
466 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
467 };
468 #endif
469
470 extern void
471 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
472 {
473 /* nothing */
474 /* obsoleted. */
475 }
476
477 extern UChar*
478 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
479 {
480 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
481 }
482
483 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
484 { 0x41, 0x61 },
485 { 0x42, 0x62 },
486 { 0x43, 0x63 },
487 { 0x44, 0x64 },
488 { 0x45, 0x65 },
489 { 0x46, 0x66 },
490 { 0x47, 0x67 },
491 { 0x48, 0x68 },
492 { 0x49, 0x69 },
493 { 0x4a, 0x6a },
494 { 0x4b, 0x6b },
495 { 0x4c, 0x6c },
496 { 0x4d, 0x6d },
497 { 0x4e, 0x6e },
498 { 0x4f, 0x6f },
499 { 0x50, 0x70 },
500 { 0x51, 0x71 },
501 { 0x52, 0x72 },
502 { 0x53, 0x73 },
503 { 0x54, 0x74 },
504 { 0x55, 0x75 },
505 { 0x56, 0x76 },
506 { 0x57, 0x77 },
507 { 0x58, 0x78 },
508 { 0x59, 0x79 },
509 { 0x5a, 0x7a }
510 };
511
512 extern int
513 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
514 OnigApplyAllCaseFoldFunc f, void* arg)
515 {
516 OnigCodePoint code;
517 int i, r;
518
519 for (i = 0;
520 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
521 i++) {
522 code = OnigAsciiLowerMap[i].to;
523 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
524 if (r != 0) return r;
525
526 code = OnigAsciiLowerMap[i].from;
527 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
528 if (r != 0) return r;
529 }
530
531 return 0;
532 }
533
534 extern int
535 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
536 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
537 OnigCaseFoldCodeItem items[])
538 {
539 if (0x41 <= *p && *p <= 0x5a) {
540 items[0].byte_len = 1;
541 items[0].code_len = 1;
542 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
543 return 1;
544 }
545 else if (0x61 <= *p && *p <= 0x7a) {
546 items[0].byte_len = 1;
547 items[0].code_len = 1;
548 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
549 return 1;
550 }
551 else
552 return 0;
553 }
554
555 static int
556 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
557 OnigApplyAllCaseFoldFunc f, void* arg)
558 {
559 static OnigCodePoint ss[] = { 0x73, 0x73 };
560
561 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
562 }
563
564 extern int
565 onigenc_apply_all_case_fold_with_map(int map_size,
566 const OnigPairCaseFoldCodes map[],
567 int ess_tsett_flag, OnigCaseFoldType flag,
568 OnigApplyAllCaseFoldFunc f, void* arg)
569 {
570 OnigCodePoint code;
571 int i, r;
572
573 r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
574 if (r != 0) return r;
575
576 for (i = 0; i < map_size; i++) {
577 code = map[i].to;
578 r = (*f)(map[i].from, &code, 1, arg);
579 if (r != 0) return r;
580
581 code = map[i].from;
582 r = (*f)(map[i].to, &code, 1, arg);
583 if (r != 0) return r;
584 }
585
586 if (ess_tsett_flag != 0)
587 return ss_apply_all_case_fold(flag, f, arg);
588
589 return 0;
590 }
591
592 extern int
593 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
594 const OnigPairCaseFoldCodes map[],
595 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
596 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
597 {
598 if (0x41 <= *p && *p <= 0x5a) {
599 items[0].byte_len = 1;
600 items[0].code_len = 1;
601 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
602 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
603 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
604 /* SS */
605 items[1].byte_len = 2;
606 items[1].code_len = 1;
607 items[1].code[0] = (OnigCodePoint )0xdf;
608 return 2;
609 }
610 else
611 return 1;
612 }
613 else if (0x61 <= *p && *p <= 0x7a) {
614 items[0].byte_len = 1;
615 items[0].code_len = 1;
616 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
617 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
618 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
619 /* ss */
620 items[1].byte_len = 2;
621 items[1].code_len = 1;
622 items[1].code[0] = (OnigCodePoint )0xdf;
623 return 2;
624 }
625 else
626 return 1;
627 }
628 else if (*p == 0xdf && ess_tsett_flag != 0) {
629 items[0].byte_len = 1;
630 items[0].code_len = 2;
631 items[0].code[0] = (OnigCodePoint )'s';
632 items[0].code[1] = (OnigCodePoint )'s';
633
634 items[1].byte_len = 1;
635 items[1].code_len = 2;
636 items[1].code[0] = (OnigCodePoint )'S';
637 items[1].code[1] = (OnigCodePoint )'S';
638
639 items[2].byte_len = 1;
640 items[2].code_len = 2;
641 items[2].code[0] = (OnigCodePoint )'s';
642 items[2].code[1] = (OnigCodePoint )'S';
643
644 items[3].byte_len = 1;
645 items[3].code_len = 2;
646 items[3].code[0] = (OnigCodePoint )'S';
647 items[3].code[1] = (OnigCodePoint )'s';
648
649 return 4;
650 }
651 else {
652 int i;
653
654 for (i = 0; i < map_size; i++) {
655 if (*p == map[i].from) {
656 items[0].byte_len = 1;
657 items[0].code_len = 1;
658 items[0].code[0] = map[i].to;
659 return 1;
660 }
661 else if (*p == map[i].to) {
662 items[0].byte_len = 1;
663 items[0].code_len = 1;
664 items[0].code[0] = map[i].from;
665 return 1;
666 }
667 }
668 }
669
670 return 0;
671 }
672
673
674 extern int
675 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
676 OnigCodePoint* sb_out ARG_UNUSED,
677 const OnigCodePoint* ranges[] ARG_UNUSED)
678 {
679 return ONIG_NO_SUPPORT_CONFIG;
680 }
681
682 extern int
683 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
684 {
685 if (p < end) {
686 if (*p == 0x0a) return 1;
687 }
688 return 0;
689 }
690
691 /* for single byte encodings */
692 extern int
693 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
694 const UChar*end ARG_UNUSED, UChar* lower)
695 {
696 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
697
698 (*p)++;
699 return 1; /* return byte length of converted char to lower */
700 }
701
702 #if 0
703 extern int
704 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
705 const UChar** pp, const UChar* end)
706 {
707 const UChar* p = *pp;
708
709 (*pp)++;
710 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
711 }
712 #endif
713
714 extern int
715 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
716 {
717 return 1;
718 }
719
720 extern OnigCodePoint
721 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
722 {
723 return (OnigCodePoint )(*p);
724 }
725
726 extern int
727 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
728 {
729 return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
730 }
731
732 extern int
733 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
734 {
735 *buf = (UChar )(code & 0xff);
736 return 1;
737 }
738
739 extern UChar*
740 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
741 const UChar* s)
742 {
743 return (UChar* )s;
744 }
745
746 extern int
747 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
748 const UChar* end ARG_UNUSED)
749 {
750 return TRUE;
751 }
752
753 extern int
754 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
755 const UChar* end ARG_UNUSED)
756 {
757 return FALSE;
758 }
759
760 extern int
761 onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,
762 const UChar* end ARG_UNUSED)
763 {
764 return TRUE;
765 }
766
767 extern int
768 onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
769 const UChar* p, const UChar* end)
770 {
771 while (p < end) {
772 p += enclen(enc, p);
773 }
774
775 if (p != end)
776 return FALSE;
777 else
778 return TRUE;
779 }
780
781 extern int
782 onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end)
783 {
784 return ONIGENC_IS_VALID_MBC_STRING(enc, s, end);
785 }
786
787 extern OnigCodePoint
788 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
789 {
790 int c, i, len;
791 OnigCodePoint n;
792
793 len = enclen(enc, p);
794 n = (OnigCodePoint )(*p++);
795 if (len == 1) return n;
796
797 for (i = 1; i < len; i++) {
798 if (p >= end) break;
799 c = *p++;
800 n <<= 8; n += c;
801 }
802 return n;
803 }
804
805 extern int
806 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
807 const UChar** pp, const UChar* end ARG_UNUSED,
808 UChar* lower)
809 {
810 int len;
811 const UChar *p = *pp;
812
813 if (ONIGENC_IS_MBC_ASCII(p)) {
814 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
815 (*pp)++;
816 return 1;
817 }
818 else {
819 int i;
820
821 len = enclen(enc, p);
822 for (i = 0; i < len; i++) {
823 *lower++ = *p++;
824 }
825 (*pp) += len;
826 return len; /* return byte length of converted to lower char */
827 }
828 }
829
830 #if 0
831 extern int
832 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
833 const UChar** pp, const UChar* end)
834 {
835 const UChar* p = *pp;
836
837 if (ONIGENC_IS_MBC_ASCII(p)) {
838 (*pp)++;
839 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
840 }
841
842 (*pp) += enclen(enc, p);
843 return FALSE;
844 }
845 #endif
846
847 extern int
848 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
849 {
850 if ((code & 0xff00) != 0) return 2;
851 else return 1;
852 }
853
854 extern int
855 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
856 {
857 if ((code & 0xff000000) != 0) return 4;
858 else if ((code & 0xff0000) != 0) return 3;
859 else if ((code & 0xff00) != 0) return 2;
860 else return 1;
861 }
862
863 extern int
864 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
865 {
866 UChar *p = buf;
867
868 if ((code & 0xff00) != 0) {
869 *p++ = (UChar )((code >> 8) & 0xff);
870 }
871 *p++ = (UChar )(code & 0xff);
872
873 #if 1
874 if (enclen(enc, buf) != (p - buf))
875 return ONIGERR_INVALID_CODE_POINT_VALUE;
876 #endif
877 return (int )(p - buf);
878 }
879
880 extern int
881 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
882 {
883 UChar *p = buf;
884
885 if ((code & 0xff000000) != 0) {
886 *p++ = (UChar )((code >> 24) & 0xff);
887 }
888 if ((code & 0xff0000) != 0 || p != buf) {
889 *p++ = (UChar )((code >> 16) & 0xff);
890 }
891 if ((code & 0xff00) != 0 || p != buf) {
892 *p++ = (UChar )((code >> 8) & 0xff);
893 }
894 *p++ = (UChar )(code & 0xff);
895
896 #if 1
897 if (enclen(enc, buf) != (p - buf))
898 return ONIGERR_INVALID_CODE_POINT_VALUE;
899 #endif
900 return (int )(p - buf);
901 }
902
903 extern int
904 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
905 {
906 static PosixBracketEntryType PBS[] = {
907 { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
908 { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
909 { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
910 { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
911 { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
912 { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
913 { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
914 { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
915 { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
916 { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
917 { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
918 { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
919 { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
920 { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
921 { (UChar* )NULL, -1, 0 }
922 };
923
924 PosixBracketEntryType *pb;
925 int len;
926
927 len = onigenc_strlen(enc, p, end);
928 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
929 if (len == pb->len &&
930 onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
931 return pb->ctype;
932 }
933
934 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
935 }
936
937 extern int
938 onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
939 {
940 OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
941
942 if (code > 127) return 0;
943
944 return ONIGENC_IS_ASCII_CODE_WORD(code);
945 }
946
947 extern int
948 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
949 unsigned int ctype)
950 {
951 if (code < 128)
952 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
953 else {
954 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
955 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
956 }
957 }
958
959 return FALSE;
960 }
961
962 extern int
963 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
964 unsigned int ctype)
965 {
966 if (code < 128)
967 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
968 else {
969 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
970 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
971 }
972 }
973
974 return FALSE;
975 }
976
977 extern int
978 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
979 const UChar* sascii /* ascii */, int n)
980 {
981 int x, c;
982
983 while (n-- > 0) {
984 if (p >= end) return (int )(*sascii);
985
986 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
987 x = *sascii - c;
988 if (x) return x;
989
990 sascii++;
991 p += enclen(enc, p);
992 }
993 return 0;
994 }
995
996 extern int
997 onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n)
998 {
999 int i;
1000
1001 for (i = 0; i < n; i++) {
1002 if (a[i] != b[i])
1003 return -1;
1004 }
1005
1006 return 0;
1007 }
1008
1009 extern int
1010 onig_codes_byte_at(OnigCodePoint codes[], int at)
1011 {
1012 int index;
1013 int b;
1014 OnigCodePoint code;
1015
1016 index = at / 3;
1017 b = at % 3;
1018 code = codes[index];
1019
1020 return ((code >> ((2 - b) * 8)) & 0xff);
1021 }