1 /**********************************************************************
2 regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 OnigEncoding OnigEncDefaultCharEncoding
= ONIG_ENCODING_INIT_DEFAULT
;
34 #define INITED_LIST_SIZE 20
36 static int InitedListNum
;
41 } InitedList
[INITED_LIST_SIZE
];
44 enc_inited_entry(OnigEncoding enc
)
48 for (i
= 0; i
< InitedListNum
; i
++) {
49 if (InitedList
[i
].enc
== enc
) {
50 InitedList
[i
].inited
= 1;
56 if (i
< INITED_LIST_SIZE
- 1) {
57 InitedList
[i
].enc
= enc
;
58 InitedList
[i
].inited
= 1;
67 enc_is_inited(OnigEncoding enc
)
71 for (i
= 0; i
< InitedListNum
; i
++) {
72 if (InitedList
[i
].enc
== enc
) {
73 return InitedList
[i
].inited
;
80 static int OnigEncInited
;
85 if (OnigEncInited
!= 0) return 0;
96 for (i
= 0; i
< InitedListNum
; i
++) {
97 InitedList
[i
].enc
= 0;
98 InitedList
[i
].inited
= 0;
107 onig_initialize_encoding(OnigEncoding enc
)
111 if (enc
!= ONIG_ENCODING_ASCII
&&
112 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc
)) {
113 OnigEncoding ascii
= ONIG_ENCODING_ASCII
;
114 if (ascii
->init
!= 0 && enc_is_inited(ascii
) == 0) {
116 if (r
!= ONIG_NORMAL
) return r
;
117 enc_inited_entry(ascii
);
121 if (enc
->init
!= 0 &&
122 enc_is_inited(enc
) == 0) {
124 if (r
== ONIG_NORMAL
)
125 enc_inited_entry(enc
);
133 onigenc_get_default_encoding(void)
135 return OnigEncDefaultCharEncoding
;
139 onigenc_set_default_encoding(OnigEncoding enc
)
141 OnigEncDefaultCharEncoding
= enc
;
146 onigenc_strdup(OnigEncoding enc
, const UChar
* s
, const UChar
* end
)
148 int slen
, term_len
, i
;
151 slen
= (int )(end
- s
);
152 term_len
= ONIGENC_MBC_MINLEN(enc
);
154 r
= (UChar
* )xmalloc(slen
+ term_len
);
155 CHECK_NULL_RETURN(r
);
158 for (i
= 0; i
< term_len
; i
++)
159 r
[slen
+ i
] = (UChar
)0;
165 onigenc_get_right_adjust_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
167 UChar
* p
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
175 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc
,
176 const UChar
* start
, const UChar
* s
, const UChar
** prev
)
178 UChar
* p
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
181 if (prev
) *prev
= (const UChar
* )p
;
185 if (prev
) *prev
= (const UChar
* )NULL
; /* Sorry */
191 onigenc_get_prev_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
194 return (UChar
* )NULL
;
196 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
- 1);
200 onigenc_step_back(OnigEncoding enc
, const UChar
* start
, const UChar
* s
, int n
)
202 while (ONIG_IS_NOT_NULL(s
) && n
-- > 0) {
204 return (UChar
* )NULL
;
206 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
- 1);
213 onigenc_mbc_enc_len_end(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
218 len
= ONIGENC_MBC_ENC_LEN(enc
, p
);
221 return (n
< len
? n
: len
);
226 onigenc_step(OnigEncoding enc
, const UChar
* p
, const UChar
* end
, int n
)
228 UChar
* q
= (UChar
* )p
;
230 q
+= ONIGENC_MBC_ENC_LEN(enc
, q
);
232 return (q
<= end
? q
: NULL
);
236 onigenc_strlen(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
239 UChar
* q
= (UChar
* )p
;
242 q
+= ONIGENC_MBC_ENC_LEN(enc
, q
);
249 onigenc_strlen_null(OnigEncoding enc
, const UChar
* s
)
252 UChar
* p
= (UChar
* )s
;
257 int len
= ONIGENC_MBC_MINLEN(enc
);
259 if (len
== 1) return n
;
262 if (*q
!= '\0') break;
266 if (len
== 1) return n
;
268 p
+= ONIGENC_MBC_ENC_LEN(enc
, p
);
274 onigenc_str_bytelen_null(OnigEncoding enc
, const UChar
* s
)
276 UChar
* start
= (UChar
* )s
;
277 UChar
* p
= (UChar
* )s
;
282 int len
= ONIGENC_MBC_MINLEN(enc
);
284 if (len
== 1) return (int )(p
- start
);
287 if (*q
!= '\0') break;
291 if (len
== 1) return (int )(p
- start
);
293 p
+= ONIGENC_MBC_ENC_LEN(enc
, p
);
297 const UChar OnigEncAsciiToLowerCaseTable
[] = {
298 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
299 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
300 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
301 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
302 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
303 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
304 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
305 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
306 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
307 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
308 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
309 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
310 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
311 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
312 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
313 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
314 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
315 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
316 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
317 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
318 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
319 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
320 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
321 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
322 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
323 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
324 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
325 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
326 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
327 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
328 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
329 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
332 #ifdef USE_UPPER_CASE_TABLE
333 const UChar OnigEncAsciiToUpperCaseTable
[256] = {
334 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
335 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
336 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
337 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
338 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
339 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
340 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
341 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
342 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
343 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
344 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
345 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
346 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
347 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
348 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
349 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
350 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
351 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
352 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
353 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
354 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
355 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
356 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
357 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
358 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
359 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
360 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
361 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
362 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
363 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
364 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
365 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
369 const unsigned short OnigEncAsciiCtypeTable
[256] = {
370 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
371 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
372 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
373 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
374 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
375 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
376 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
377 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
378 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
379 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
380 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
381 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
382 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
383 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
384 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
385 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
386 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
387 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
388 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
389 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
390 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
391 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
392 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
393 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
394 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
395 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
396 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
397 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
398 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
399 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
400 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
401 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
404 const UChar OnigEncISO_8859_1_ToLowerCaseTable
[256] = {
405 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
406 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
407 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
408 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
409 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
410 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
411 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
412 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
413 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
414 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
415 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
416 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
417 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
418 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
419 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
420 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
421 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
422 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
423 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
424 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
425 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
426 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
427 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
428 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
429 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
430 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
431 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
432 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
433 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
434 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
435 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
436 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
439 #ifdef USE_UPPER_CASE_TABLE
440 const UChar OnigEncISO_8859_1_ToUpperCaseTable
[256] = {
441 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
442 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
443 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
444 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
445 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
446 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
447 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
448 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
449 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
450 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
451 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
452 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
453 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
454 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
455 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
456 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
457 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
458 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
459 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
460 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
461 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
462 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
463 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
464 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
465 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
466 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
467 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
468 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
469 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
470 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
471 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
472 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
477 onigenc_set_default_caseconv_table(const UChar
* table ARG_UNUSED
)
484 onigenc_get_left_adjust_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
486 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
489 const OnigPairCaseFoldCodes OnigAsciiLowerMap
[] = {
519 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED
,
520 OnigApplyAllCaseFoldFunc f
, void* arg
)
526 i
< (int )(sizeof(OnigAsciiLowerMap
)/sizeof(OnigPairCaseFoldCodes
));
528 code
= OnigAsciiLowerMap
[i
].to
;
529 r
= (*f
)(OnigAsciiLowerMap
[i
].from
, &code
, 1, arg
);
530 if (r
!= 0) return r
;
532 code
= OnigAsciiLowerMap
[i
].from
;
533 r
= (*f
)(OnigAsciiLowerMap
[i
].to
, &code
, 1, arg
);
534 if (r
!= 0) return r
;
541 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED
,
542 const OnigUChar
* p
, const OnigUChar
* end ARG_UNUSED
,
543 OnigCaseFoldCodeItem items
[])
545 if (0x41 <= *p
&& *p
<= 0x5a) {
546 items
[0].byte_len
= 1;
547 items
[0].code_len
= 1;
548 items
[0].code
[0] = (OnigCodePoint
)(*p
+ 0x20);
551 else if (0x61 <= *p
&& *p
<= 0x7a) {
552 items
[0].byte_len
= 1;
553 items
[0].code_len
= 1;
554 items
[0].code
[0] = (OnigCodePoint
)(*p
- 0x20);
562 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED
,
563 OnigApplyAllCaseFoldFunc f
, void* arg
)
565 static OnigCodePoint ss
[] = { 0x73, 0x73 };
567 return (*f
)((OnigCodePoint
)0xdf, ss
, 2, arg
);
571 onigenc_apply_all_case_fold_with_map(int map_size
,
572 const OnigPairCaseFoldCodes map
[],
573 int ess_tsett_flag
, OnigCaseFoldType flag
,
574 OnigApplyAllCaseFoldFunc f
, void* arg
)
579 r
= onigenc_ascii_apply_all_case_fold(flag
, f
, arg
);
580 if (r
!= 0) return r
;
582 for (i
= 0; i
< map_size
; i
++) {
584 r
= (*f
)(map
[i
].from
, &code
, 1, arg
);
585 if (r
!= 0) return r
;
588 r
= (*f
)(map
[i
].to
, &code
, 1, arg
);
589 if (r
!= 0) return r
;
592 if (ess_tsett_flag
!= 0)
593 return ss_apply_all_case_fold(flag
, f
, arg
);
599 onigenc_get_case_fold_codes_by_str_with_map(int map_size
,
600 const OnigPairCaseFoldCodes map
[],
601 int ess_tsett_flag
, OnigCaseFoldType flag ARG_UNUSED
,
602 const OnigUChar
* p
, const OnigUChar
* end
, OnigCaseFoldCodeItem items
[])
604 if (0x41 <= *p
&& *p
<= 0x5a) {
605 items
[0].byte_len
= 1;
606 items
[0].code_len
= 1;
607 items
[0].code
[0] = (OnigCodePoint
)(*p
+ 0x20);
608 if (*p
== 0x53 && ess_tsett_flag
!= 0 && end
> p
+ 1
609 && (*(p
+1) == 0x53 || *(p
+1) == 0x73)) {
611 items
[1].byte_len
= 2;
612 items
[1].code_len
= 1;
613 items
[1].code
[0] = (OnigCodePoint
)0xdf;
619 else if (0x61 <= *p
&& *p
<= 0x7a) {
620 items
[0].byte_len
= 1;
621 items
[0].code_len
= 1;
622 items
[0].code
[0] = (OnigCodePoint
)(*p
- 0x20);
623 if (*p
== 0x73 && ess_tsett_flag
!= 0 && end
> p
+ 1
624 && (*(p
+1) == 0x73 || *(p
+1) == 0x53)) {
626 items
[1].byte_len
= 2;
627 items
[1].code_len
= 1;
628 items
[1].code
[0] = (OnigCodePoint
)0xdf;
634 else if (*p
== 0xdf && ess_tsett_flag
!= 0) {
635 items
[0].byte_len
= 1;
636 items
[0].code_len
= 2;
637 items
[0].code
[0] = (OnigCodePoint
)'s';
638 items
[0].code
[1] = (OnigCodePoint
)'s';
640 items
[1].byte_len
= 1;
641 items
[1].code_len
= 2;
642 items
[1].code
[0] = (OnigCodePoint
)'S';
643 items
[1].code
[1] = (OnigCodePoint
)'S';
645 items
[2].byte_len
= 1;
646 items
[2].code_len
= 2;
647 items
[2].code
[0] = (OnigCodePoint
)'s';
648 items
[2].code
[1] = (OnigCodePoint
)'S';
650 items
[3].byte_len
= 1;
651 items
[3].code_len
= 2;
652 items
[3].code
[0] = (OnigCodePoint
)'S';
653 items
[3].code
[1] = (OnigCodePoint
)'s';
660 for (i
= 0; i
< map_size
; i
++) {
661 if (*p
== map
[i
].from
) {
662 items
[0].byte_len
= 1;
663 items
[0].code_len
= 1;
664 items
[0].code
[0] = map
[i
].to
;
667 else if (*p
== map
[i
].to
) {
668 items
[0].byte_len
= 1;
669 items
[0].code_len
= 1;
670 items
[0].code
[0] = map
[i
].from
;
681 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED
,
682 OnigCodePoint
* sb_out ARG_UNUSED
,
683 const OnigCodePoint
* ranges
[] ARG_UNUSED
)
685 return ONIG_NO_SUPPORT_CONFIG
;
689 onigenc_is_mbc_newline_0x0a(const UChar
* p
, const UChar
* end
)
692 if (*p
== 0x0a) return 1;
697 /* for single byte encodings */
699 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED
, const UChar
** p
,
700 const UChar
*end ARG_UNUSED
, UChar
* lower
)
702 *lower
= ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p
);
705 return 1; /* return byte length of converted char to lower */
710 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag
,
711 const UChar
** pp
, const UChar
* end
)
713 const UChar
* p
= *pp
;
716 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p
);
721 onigenc_single_byte_mbc_enc_len(const UChar
* p ARG_UNUSED
)
727 onigenc_single_byte_mbc_to_code(const UChar
* p
, const UChar
* end ARG_UNUSED
)
729 return (OnigCodePoint
)(*p
);
733 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED
)
735 return (code
< 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE
);
739 onigenc_single_byte_code_to_mbc(OnigCodePoint code
, UChar
*buf
)
741 *buf
= (UChar
)(code
& 0xff);
746 onigenc_single_byte_left_adjust_char_head(const UChar
* start ARG_UNUSED
,
753 onigenc_always_true_is_allowed_reverse_match(const UChar
* s ARG_UNUSED
,
754 const UChar
* end ARG_UNUSED
)
760 onigenc_always_false_is_allowed_reverse_match(const UChar
* s ARG_UNUSED
,
761 const UChar
* end ARG_UNUSED
)
767 onigenc_always_true_is_valid_mbc_string(const UChar
* s ARG_UNUSED
,
768 const UChar
* end ARG_UNUSED
)
774 onigenc_length_check_is_valid_mbc_string(OnigEncoding enc
,
775 const UChar
* p
, const UChar
* end
)
788 onigenc_is_valid_mbc_string(OnigEncoding enc
, const UChar
* s
, const UChar
* end
)
790 return ONIGENC_IS_VALID_MBC_STRING(enc
, s
, end
);
794 onigenc_mbn_mbc_to_code(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
799 len
= enclen(enc
, p
);
800 n
= (OnigCodePoint
)(*p
++);
801 if (len
== 1) return n
;
803 for (i
= 1; i
< len
; i
++) {
812 onigenc_mbn_mbc_case_fold(OnigEncoding enc
, OnigCaseFoldType flag ARG_UNUSED
,
813 const UChar
** pp
, const UChar
* end ARG_UNUSED
,
817 const UChar
*p
= *pp
;
819 if (ONIGENC_IS_MBC_ASCII(p
)) {
820 *lower
= ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p
);
827 len
= enclen(enc
, p
);
828 for (i
= 0; i
< len
; i
++) {
832 return len
; /* return byte length of converted to lower char */
838 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc
, OnigCaseFoldType flag
,
839 const UChar
** pp
, const UChar
* end
)
841 const UChar
* p
= *pp
;
843 if (ONIGENC_IS_MBC_ASCII(p
)) {
845 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p
);
848 (*pp
) += enclen(enc
, p
);
854 onigenc_mb2_code_to_mbclen(OnigCodePoint code
)
856 if ((code
& (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE
;
858 if ((code
& 0xff00) != 0) return 2;
863 onigenc_mb4_code_to_mbclen(OnigCodePoint code
)
865 if ((code
& 0xff000000) != 0) return 4;
866 else if ((code
& 0xff0000) != 0) return 3;
867 else if ((code
& 0xff00) != 0) return 2;
872 onigenc_mb2_code_to_mbc(OnigEncoding enc
, OnigCodePoint code
, UChar
*buf
)
876 if ((code
& 0xff00) != 0) {
877 *p
++ = (UChar
)((code
>> 8) & 0xff);
879 *p
++ = (UChar
)(code
& 0xff);
882 if (enclen(enc
, buf
) != (p
- buf
))
883 return ONIGERR_INVALID_CODE_POINT_VALUE
;
885 return (int )(p
- buf
);
889 onigenc_mb4_code_to_mbc(OnigEncoding enc
, OnigCodePoint code
, UChar
*buf
)
893 if ((code
& 0xff000000) != 0) {
894 *p
++ = (UChar
)((code
>> 24) & 0xff);
896 if ((code
& 0xff0000) != 0 || p
!= buf
) {
897 *p
++ = (UChar
)((code
>> 16) & 0xff);
899 if ((code
& 0xff00) != 0 || p
!= buf
) {
900 *p
++ = (UChar
)((code
>> 8) & 0xff);
902 *p
++ = (UChar
)(code
& 0xff);
905 if (enclen(enc
, buf
) != (p
- buf
))
906 return ONIGERR_INVALID_CODE_POINT_VALUE
;
908 return (int )(p
- buf
);
912 onigenc_minimum_property_name_to_ctype(OnigEncoding enc
, UChar
* p
, UChar
* end
)
914 static PosixBracketEntryType PBS
[] = {
915 { (UChar
* )"Alnum", ONIGENC_CTYPE_ALNUM
, 5 },
916 { (UChar
* )"Alpha", ONIGENC_CTYPE_ALPHA
, 5 },
917 { (UChar
* )"Blank", ONIGENC_CTYPE_BLANK
, 5 },
918 { (UChar
* )"Cntrl", ONIGENC_CTYPE_CNTRL
, 5 },
919 { (UChar
* )"Digit", ONIGENC_CTYPE_DIGIT
, 5 },
920 { (UChar
* )"Graph", ONIGENC_CTYPE_GRAPH
, 5 },
921 { (UChar
* )"Lower", ONIGENC_CTYPE_LOWER
, 5 },
922 { (UChar
* )"Print", ONIGENC_CTYPE_PRINT
, 5 },
923 { (UChar
* )"Punct", ONIGENC_CTYPE_PUNCT
, 5 },
924 { (UChar
* )"Space", ONIGENC_CTYPE_SPACE
, 5 },
925 { (UChar
* )"Upper", ONIGENC_CTYPE_UPPER
, 5 },
926 { (UChar
* )"XDigit", ONIGENC_CTYPE_XDIGIT
, 6 },
927 { (UChar
* )"ASCII", ONIGENC_CTYPE_ASCII
, 5 },
928 { (UChar
* )"Word", ONIGENC_CTYPE_WORD
, 4 },
929 { (UChar
* )NULL
, -1, 0 }
932 PosixBracketEntryType
*pb
;
935 len
= onigenc_strlen(enc
, p
, end
);
936 for (pb
= PBS
; IS_NOT_NULL(pb
->name
); pb
++) {
937 if (len
== pb
->len
&&
938 onigenc_with_ascii_strncmp(enc
, p
, end
, pb
->name
, pb
->len
) == 0)
942 return ONIGERR_INVALID_CHAR_PROPERTY_NAME
;
946 onigenc_is_mbc_word_ascii(OnigEncoding enc
, UChar
* s
, const UChar
* end
)
948 OnigCodePoint code
= ONIGENC_MBC_TO_CODE(enc
, s
, end
);
950 if (code
> 127) return 0;
952 return ONIGENC_IS_ASCII_CODE_WORD(code
);
956 onigenc_mb2_is_code_ctype(OnigEncoding enc
, OnigCodePoint code
,
960 return ONIGENC_IS_ASCII_CODE_CTYPE(code
, ctype
);
962 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype
)) {
963 return (ONIGENC_CODE_TO_MBCLEN(enc
, code
) > 1 ? TRUE
: FALSE
);
971 onigenc_mb4_is_code_ctype(OnigEncoding enc
, OnigCodePoint code
,
975 return ONIGENC_IS_ASCII_CODE_CTYPE(code
, ctype
);
977 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype
)) {
978 return (ONIGENC_CODE_TO_MBCLEN(enc
, code
) > 1 ? TRUE
: FALSE
);
986 onigenc_with_ascii_strncmp(OnigEncoding enc
, const UChar
* p
, const UChar
* end
,
987 const UChar
* sascii
/* ascii */, int n
)
992 if (p
>= end
) return (int )(*sascii
);
994 c
= (int )ONIGENC_MBC_TO_CODE(enc
, p
, end
);
1005 onig_codes_cmp(OnigCodePoint a
[], OnigCodePoint b
[], int n
)
1009 for (i
= 0; i
< n
; i
++) {
1018 onig_codes_byte_at(OnigCodePoint codes
[], int at
)
1026 code
= codes
[index
];
1028 return ((code
>> ((2 - b
) * 8)) & 0xff);