1 /**********************************************************************
2 regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 OnigEncoding OnigEncDefaultCharEncoding
= ONIG_ENCODING_INIT_DEFAULT
;
34 #define INITED_LIST_SIZE 20
36 static int InitedListNum
;
41 } InitedList
[INITED_LIST_SIZE
];
44 enc_inited_entry(OnigEncoding enc
)
48 for (i
= 0; i
< InitedListNum
; i
++) {
49 if (InitedList
[i
].enc
== enc
) {
50 InitedList
[i
].inited
= 1;
56 if (i
< INITED_LIST_SIZE
- 1) {
57 InitedList
[i
].enc
= enc
;
58 InitedList
[i
].inited
= 1;
67 enc_is_inited(OnigEncoding enc
)
71 for (i
= 0; i
< InitedListNum
; i
++) {
72 if (InitedList
[i
].enc
== enc
) {
73 return InitedList
[i
].inited
;
85 for (i
= 0; i
< InitedListNum
; i
++) {
86 InitedList
[i
].enc
= 0;
87 InitedList
[i
].inited
= 0;
101 onig_initialize_encoding(OnigEncoding enc
)
105 if (enc
!= ONIG_ENCODING_ASCII
&&
106 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc
)) {
107 OnigEncoding ascii
= ONIG_ENCODING_ASCII
;
108 if (ascii
->init
!= 0 && enc_is_inited(ascii
) == 0) {
110 if (r
!= ONIG_NORMAL
) return r
;
111 enc_inited_entry(ascii
);
115 if (enc
->init
!= 0 &&
116 enc_is_inited(enc
) == 0) {
118 if (r
== ONIG_NORMAL
)
119 enc_inited_entry(enc
);
127 onigenc_get_default_encoding(void)
129 return OnigEncDefaultCharEncoding
;
133 onigenc_set_default_encoding(OnigEncoding enc
)
135 OnigEncDefaultCharEncoding
= enc
;
140 onigenc_strdup(OnigEncoding enc
, const UChar
* s
, const UChar
* end
)
142 int slen
, term_len
, i
;
145 slen
= (int )(end
- s
);
146 term_len
= ONIGENC_MBC_MINLEN(enc
);
148 r
= (UChar
* )xmalloc(slen
+ term_len
);
149 CHECK_NULL_RETURN(r
);
152 for (i
= 0; i
< term_len
; i
++)
153 r
[slen
+ i
] = (UChar
)0;
159 onigenc_get_right_adjust_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
161 UChar
* p
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
169 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc
,
170 const UChar
* start
, const UChar
* s
, const UChar
** prev
)
172 UChar
* p
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
175 if (prev
) *prev
= (const UChar
* )p
;
179 if (prev
) *prev
= (const UChar
* )NULL
; /* Sorry */
185 onigenc_get_prev_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
188 return (UChar
* )NULL
;
190 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
- 1);
194 onigenc_step_back(OnigEncoding enc
, const UChar
* start
, const UChar
* s
, int n
)
196 while (ONIG_IS_NOT_NULL(s
) && n
-- > 0) {
198 return (UChar
* )NULL
;
200 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
- 1);
207 onigenc_mbc_enc_len_end(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
212 len
= ONIGENC_MBC_ENC_LEN(enc
, p
);
215 return (n
< len
? n
: len
);
220 onigenc_step(OnigEncoding enc
, const UChar
* p
, const UChar
* end
, int n
)
222 UChar
* q
= (UChar
* )p
;
224 q
+= ONIGENC_MBC_ENC_LEN(enc
, q
);
226 return (q
<= end
? q
: NULL
);
230 onigenc_strlen(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
233 UChar
* q
= (UChar
* )p
;
236 q
+= ONIGENC_MBC_ENC_LEN(enc
, q
);
243 onigenc_strlen_null(OnigEncoding enc
, const UChar
* s
)
246 UChar
* p
= (UChar
* )s
;
251 int len
= ONIGENC_MBC_MINLEN(enc
);
253 if (len
== 1) return n
;
256 if (*q
!= '\0') break;
260 if (len
== 1) return n
;
262 p
+= ONIGENC_MBC_ENC_LEN(enc
, p
);
268 onigenc_str_bytelen_null(OnigEncoding enc
, const UChar
* s
)
270 UChar
* start
= (UChar
* )s
;
271 UChar
* p
= (UChar
* )s
;
276 int len
= ONIGENC_MBC_MINLEN(enc
);
278 if (len
== 1) return (int )(p
- start
);
281 if (*q
!= '\0') break;
285 if (len
== 1) return (int )(p
- start
);
287 p
+= ONIGENC_MBC_ENC_LEN(enc
, p
);
291 const UChar OnigEncAsciiToLowerCaseTable
[] = {
292 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
293 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
294 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
295 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
296 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
297 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
298 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
299 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
300 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
301 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
302 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
303 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
304 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
305 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
306 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
307 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
308 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
309 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
310 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
311 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
312 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
313 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
314 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
315 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
316 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
317 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
318 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
319 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
320 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
321 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
322 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
323 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
326 #ifdef USE_UPPER_CASE_TABLE
327 const UChar OnigEncAsciiToUpperCaseTable
[256] = {
328 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
329 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
330 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
331 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
332 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
333 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
334 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
335 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
336 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
337 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
338 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
339 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
340 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
341 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
342 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
343 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
344 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
345 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
346 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
347 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
348 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
349 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
350 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
351 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
352 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
353 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
354 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
355 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
356 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
357 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
358 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
359 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
363 const unsigned short OnigEncAsciiCtypeTable
[256] = {
364 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
365 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
366 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
367 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
368 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
369 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
370 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
371 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
372 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
373 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
374 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
375 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
376 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
377 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
378 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
379 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
380 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
381 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
382 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
383 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
384 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
385 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
386 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
387 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
388 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
389 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
390 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
391 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
392 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
393 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
394 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
395 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
398 const UChar OnigEncISO_8859_1_ToLowerCaseTable
[256] = {
399 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
400 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
401 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
402 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
403 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
404 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
405 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
406 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
407 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
408 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
409 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
410 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
411 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
412 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
413 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
414 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
415 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
416 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
417 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
418 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
419 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
420 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
421 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
422 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
423 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
424 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
425 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
426 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
427 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
428 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
429 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
430 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
433 #ifdef USE_UPPER_CASE_TABLE
434 const UChar OnigEncISO_8859_1_ToUpperCaseTable
[256] = {
435 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
436 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
437 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
438 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
439 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
440 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
441 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
442 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
443 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
444 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
445 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
446 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
447 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
448 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
449 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
450 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
451 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
452 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
453 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
454 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
455 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
456 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
457 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
458 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
459 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
460 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
461 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
462 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
463 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
464 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
465 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
466 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
471 onigenc_set_default_caseconv_table(const UChar
* table ARG_UNUSED
)
478 onigenc_get_left_adjust_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
480 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
483 const OnigPairCaseFoldCodes OnigAsciiLowerMap
[] = {
513 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED
,
514 OnigApplyAllCaseFoldFunc f
, void* arg
)
520 i
< (int )(sizeof(OnigAsciiLowerMap
)/sizeof(OnigPairCaseFoldCodes
));
522 code
= OnigAsciiLowerMap
[i
].to
;
523 r
= (*f
)(OnigAsciiLowerMap
[i
].from
, &code
, 1, arg
);
524 if (r
!= 0) return r
;
526 code
= OnigAsciiLowerMap
[i
].from
;
527 r
= (*f
)(OnigAsciiLowerMap
[i
].to
, &code
, 1, arg
);
528 if (r
!= 0) return r
;
535 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED
,
536 const OnigUChar
* p
, const OnigUChar
* end ARG_UNUSED
,
537 OnigCaseFoldCodeItem items
[])
539 if (0x41 <= *p
&& *p
<= 0x5a) {
540 items
[0].byte_len
= 1;
541 items
[0].code_len
= 1;
542 items
[0].code
[0] = (OnigCodePoint
)(*p
+ 0x20);
545 else if (0x61 <= *p
&& *p
<= 0x7a) {
546 items
[0].byte_len
= 1;
547 items
[0].code_len
= 1;
548 items
[0].code
[0] = (OnigCodePoint
)(*p
- 0x20);
556 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED
,
557 OnigApplyAllCaseFoldFunc f
, void* arg
)
559 static OnigCodePoint ss
[] = { 0x73, 0x73 };
561 return (*f
)((OnigCodePoint
)0xdf, ss
, 2, arg
);
565 onigenc_apply_all_case_fold_with_map(int map_size
,
566 const OnigPairCaseFoldCodes map
[],
567 int ess_tsett_flag
, OnigCaseFoldType flag
,
568 OnigApplyAllCaseFoldFunc f
, void* arg
)
573 r
= onigenc_ascii_apply_all_case_fold(flag
, f
, arg
);
574 if (r
!= 0) return r
;
576 for (i
= 0; i
< map_size
; i
++) {
578 r
= (*f
)(map
[i
].from
, &code
, 1, arg
);
579 if (r
!= 0) return r
;
582 r
= (*f
)(map
[i
].to
, &code
, 1, arg
);
583 if (r
!= 0) return r
;
586 if (ess_tsett_flag
!= 0)
587 return ss_apply_all_case_fold(flag
, f
, arg
);
593 onigenc_get_case_fold_codes_by_str_with_map(int map_size
,
594 const OnigPairCaseFoldCodes map
[],
595 int ess_tsett_flag
, OnigCaseFoldType flag ARG_UNUSED
,
596 const OnigUChar
* p
, const OnigUChar
* end
, OnigCaseFoldCodeItem items
[])
598 if (0x41 <= *p
&& *p
<= 0x5a) {
599 items
[0].byte_len
= 1;
600 items
[0].code_len
= 1;
601 items
[0].code
[0] = (OnigCodePoint
)(*p
+ 0x20);
602 if (*p
== 0x53 && ess_tsett_flag
!= 0 && end
> p
+ 1
603 && (*(p
+1) == 0x53 || *(p
+1) == 0x73)) {
605 items
[1].byte_len
= 2;
606 items
[1].code_len
= 1;
607 items
[1].code
[0] = (OnigCodePoint
)0xdf;
613 else if (0x61 <= *p
&& *p
<= 0x7a) {
614 items
[0].byte_len
= 1;
615 items
[0].code_len
= 1;
616 items
[0].code
[0] = (OnigCodePoint
)(*p
- 0x20);
617 if (*p
== 0x73 && ess_tsett_flag
!= 0 && end
> p
+ 1
618 && (*(p
+1) == 0x73 || *(p
+1) == 0x53)) {
620 items
[1].byte_len
= 2;
621 items
[1].code_len
= 1;
622 items
[1].code
[0] = (OnigCodePoint
)0xdf;
628 else if (*p
== 0xdf && ess_tsett_flag
!= 0) {
629 items
[0].byte_len
= 1;
630 items
[0].code_len
= 2;
631 items
[0].code
[0] = (OnigCodePoint
)'s';
632 items
[0].code
[1] = (OnigCodePoint
)'s';
634 items
[1].byte_len
= 1;
635 items
[1].code_len
= 2;
636 items
[1].code
[0] = (OnigCodePoint
)'S';
637 items
[1].code
[1] = (OnigCodePoint
)'S';
639 items
[2].byte_len
= 1;
640 items
[2].code_len
= 2;
641 items
[2].code
[0] = (OnigCodePoint
)'s';
642 items
[2].code
[1] = (OnigCodePoint
)'S';
644 items
[3].byte_len
= 1;
645 items
[3].code_len
= 2;
646 items
[3].code
[0] = (OnigCodePoint
)'S';
647 items
[3].code
[1] = (OnigCodePoint
)'s';
654 for (i
= 0; i
< map_size
; i
++) {
655 if (*p
== map
[i
].from
) {
656 items
[0].byte_len
= 1;
657 items
[0].code_len
= 1;
658 items
[0].code
[0] = map
[i
].to
;
661 else if (*p
== map
[i
].to
) {
662 items
[0].byte_len
= 1;
663 items
[0].code_len
= 1;
664 items
[0].code
[0] = map
[i
].from
;
675 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED
,
676 OnigCodePoint
* sb_out ARG_UNUSED
,
677 const OnigCodePoint
* ranges
[] ARG_UNUSED
)
679 return ONIG_NO_SUPPORT_CONFIG
;
683 onigenc_is_mbc_newline_0x0a(const UChar
* p
, const UChar
* end
)
686 if (*p
== 0x0a) return 1;
691 /* for single byte encodings */
693 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED
, const UChar
** p
,
694 const UChar
*end ARG_UNUSED
, UChar
* lower
)
696 *lower
= ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p
);
699 return 1; /* return byte length of converted char to lower */
704 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag
,
705 const UChar
** pp
, const UChar
* end
)
707 const UChar
* p
= *pp
;
710 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p
);
715 onigenc_single_byte_mbc_enc_len(const UChar
* p ARG_UNUSED
)
721 onigenc_single_byte_mbc_to_code(const UChar
* p
, const UChar
* end ARG_UNUSED
)
723 return (OnigCodePoint
)(*p
);
727 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED
)
729 return (code
< 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE
);
733 onigenc_single_byte_code_to_mbc(OnigCodePoint code
, UChar
*buf
)
735 *buf
= (UChar
)(code
& 0xff);
740 onigenc_single_byte_left_adjust_char_head(const UChar
* start ARG_UNUSED
,
747 onigenc_always_true_is_allowed_reverse_match(const UChar
* s ARG_UNUSED
,
748 const UChar
* end ARG_UNUSED
)
754 onigenc_always_false_is_allowed_reverse_match(const UChar
* s ARG_UNUSED
,
755 const UChar
* end ARG_UNUSED
)
761 onigenc_always_true_is_valid_mbc_string(const UChar
* s ARG_UNUSED
,
762 const UChar
* end ARG_UNUSED
)
768 onigenc_length_check_is_valid_mbc_string(OnigEncoding enc
,
769 const UChar
* p
, const UChar
* end
)
782 onigenc_is_valid_mbc_string(OnigEncoding enc
, const UChar
* s
, const UChar
* end
)
784 return ONIGENC_IS_VALID_MBC_STRING(enc
, s
, end
);
788 onigenc_mbn_mbc_to_code(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
793 len
= enclen(enc
, p
);
794 n
= (OnigCodePoint
)(*p
++);
795 if (len
== 1) return n
;
797 for (i
= 1; i
< len
; i
++) {
806 onigenc_mbn_mbc_case_fold(OnigEncoding enc
, OnigCaseFoldType flag ARG_UNUSED
,
807 const UChar
** pp
, const UChar
* end ARG_UNUSED
,
811 const UChar
*p
= *pp
;
813 if (ONIGENC_IS_MBC_ASCII(p
)) {
814 *lower
= ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p
);
821 len
= enclen(enc
, p
);
822 for (i
= 0; i
< len
; i
++) {
826 return len
; /* return byte length of converted to lower char */
832 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc
, OnigCaseFoldType flag
,
833 const UChar
** pp
, const UChar
* end
)
835 const UChar
* p
= *pp
;
837 if (ONIGENC_IS_MBC_ASCII(p
)) {
839 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p
);
842 (*pp
) += enclen(enc
, p
);
848 onigenc_mb2_code_to_mbclen(OnigCodePoint code
)
850 if ((code
& 0xff00) != 0) return 2;
855 onigenc_mb4_code_to_mbclen(OnigCodePoint code
)
857 if ((code
& 0xff000000) != 0) return 4;
858 else if ((code
& 0xff0000) != 0) return 3;
859 else if ((code
& 0xff00) != 0) return 2;
864 onigenc_mb2_code_to_mbc(OnigEncoding enc
, OnigCodePoint code
, UChar
*buf
)
868 if ((code
& 0xff00) != 0) {
869 *p
++ = (UChar
)((code
>> 8) & 0xff);
871 *p
++ = (UChar
)(code
& 0xff);
874 if (enclen(enc
, buf
) != (p
- buf
))
875 return ONIGERR_INVALID_CODE_POINT_VALUE
;
877 return (int )(p
- buf
);
881 onigenc_mb4_code_to_mbc(OnigEncoding enc
, OnigCodePoint code
, UChar
*buf
)
885 if ((code
& 0xff000000) != 0) {
886 *p
++ = (UChar
)((code
>> 24) & 0xff);
888 if ((code
& 0xff0000) != 0 || p
!= buf
) {
889 *p
++ = (UChar
)((code
>> 16) & 0xff);
891 if ((code
& 0xff00) != 0 || p
!= buf
) {
892 *p
++ = (UChar
)((code
>> 8) & 0xff);
894 *p
++ = (UChar
)(code
& 0xff);
897 if (enclen(enc
, buf
) != (p
- buf
))
898 return ONIGERR_INVALID_CODE_POINT_VALUE
;
900 return (int )(p
- buf
);
904 onigenc_minimum_property_name_to_ctype(OnigEncoding enc
, UChar
* p
, UChar
* end
)
906 static PosixBracketEntryType PBS
[] = {
907 { (UChar
* )"Alnum", ONIGENC_CTYPE_ALNUM
, 5 },
908 { (UChar
* )"Alpha", ONIGENC_CTYPE_ALPHA
, 5 },
909 { (UChar
* )"Blank", ONIGENC_CTYPE_BLANK
, 5 },
910 { (UChar
* )"Cntrl", ONIGENC_CTYPE_CNTRL
, 5 },
911 { (UChar
* )"Digit", ONIGENC_CTYPE_DIGIT
, 5 },
912 { (UChar
* )"Graph", ONIGENC_CTYPE_GRAPH
, 5 },
913 { (UChar
* )"Lower", ONIGENC_CTYPE_LOWER
, 5 },
914 { (UChar
* )"Print", ONIGENC_CTYPE_PRINT
, 5 },
915 { (UChar
* )"Punct", ONIGENC_CTYPE_PUNCT
, 5 },
916 { (UChar
* )"Space", ONIGENC_CTYPE_SPACE
, 5 },
917 { (UChar
* )"Upper", ONIGENC_CTYPE_UPPER
, 5 },
918 { (UChar
* )"XDigit", ONIGENC_CTYPE_XDIGIT
, 6 },
919 { (UChar
* )"ASCII", ONIGENC_CTYPE_ASCII
, 5 },
920 { (UChar
* )"Word", ONIGENC_CTYPE_WORD
, 4 },
921 { (UChar
* )NULL
, -1, 0 }
924 PosixBracketEntryType
*pb
;
927 len
= onigenc_strlen(enc
, p
, end
);
928 for (pb
= PBS
; IS_NOT_NULL(pb
->name
); pb
++) {
929 if (len
== pb
->len
&&
930 onigenc_with_ascii_strncmp(enc
, p
, end
, pb
->name
, pb
->len
) == 0)
934 return ONIGERR_INVALID_CHAR_PROPERTY_NAME
;
938 onigenc_is_mbc_word_ascii(OnigEncoding enc
, UChar
* s
, const UChar
* end
)
940 OnigCodePoint code
= ONIGENC_MBC_TO_CODE(enc
, s
, end
);
942 if (code
> 127) return 0;
944 return ONIGENC_IS_ASCII_CODE_WORD(code
);
948 onigenc_mb2_is_code_ctype(OnigEncoding enc
, OnigCodePoint code
,
952 return ONIGENC_IS_ASCII_CODE_CTYPE(code
, ctype
);
954 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype
)) {
955 return (ONIGENC_CODE_TO_MBCLEN(enc
, code
) > 1 ? TRUE
: FALSE
);
963 onigenc_mb4_is_code_ctype(OnigEncoding enc
, OnigCodePoint code
,
967 return ONIGENC_IS_ASCII_CODE_CTYPE(code
, ctype
);
969 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype
)) {
970 return (ONIGENC_CODE_TO_MBCLEN(enc
, code
) > 1 ? TRUE
: FALSE
);
978 onigenc_with_ascii_strncmp(OnigEncoding enc
, const UChar
* p
, const UChar
* end
,
979 const UChar
* sascii
/* ascii */, int n
)
984 if (p
>= end
) return (int )(*sascii
);
986 c
= (int )ONIGENC_MBC_TO_CODE(enc
, p
, end
);
997 onig_codes_cmp(OnigCodePoint a
[], OnigCodePoint b
[], int n
)
1001 for (i
= 0; i
< n
; i
++) {
1010 onig_codes_byte_at(OnigCodePoint codes
[], int at
)
1018 code
= codes
[index
];
1020 return ((code
>> ((2 - b
) * 8)) & 0xff);