]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
1 /**********************************************************************
2 regparse.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regparse.h"
31 #include "st.h"
32
33 #ifdef DEBUG_NODE_FREE
34 #include <stdio.h>
35 #endif
36
37 #define INIT_TAG_NAMES_ALLOC_NUM 5
38
39 #define WARN_BUFSIZE 256
40
41 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
42
43 #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
44 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
45 #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
46 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
47
48
49 OnigSyntaxType OnigSyntaxOniguruma = {
50 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
51 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
52 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
53 ONIG_SYN_OP_ESC_CONTROL_CHARS |
54 ONIG_SYN_OP_ESC_C_CONTROL )
55 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
56 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
57 ONIG_SYN_OP2_OPTION_RUBY |
58 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
59 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
60 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
61 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
62 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
63 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
64 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
65 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
66 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
67 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
68 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
69 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
70 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
71 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
72 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
73 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
74 , ( SYN_GNU_REGEX_BV |
75 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
76 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
77 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
78 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
79 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
80 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
81 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
82 , ONIG_OPTION_NONE
83 ,
84 {
85 (OnigCodePoint )'\\' /* esc */
86 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
87 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
88 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
89 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
90 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
91 }
92 };
93
94 OnigSyntaxType OnigSyntaxRuby = {
95 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
96 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
97 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
98 ONIG_SYN_OP_ESC_CONTROL_CHARS |
99 ONIG_SYN_OP_ESC_C_CONTROL )
100 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
101 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
102 ONIG_SYN_OP2_OPTION_RUBY |
103 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
104 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
105 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
106 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
107 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
108 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
109 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
110 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
111 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
112 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
113 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
114 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
115 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
116 , ( SYN_GNU_REGEX_BV |
117 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
118 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
119 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
120 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
121 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
122 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
123 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
124 , ONIG_OPTION_NONE
125 ,
126 {
127 (OnigCodePoint )'\\' /* esc */
128 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
129 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
130 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
131 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
132 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
133 }
134 };
135
136 OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
137
138 extern void onig_null_warn(const char* s ARG_UNUSED) { }
139
140 #ifdef DEFAULT_WARN_FUNCTION
141 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
142 #else
143 static OnigWarnFunc onig_warn = onig_null_warn;
144 #endif
145
146 #ifdef DEFAULT_VERB_WARN_FUNCTION
147 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
148 #else
149 static OnigWarnFunc onig_verb_warn = onig_null_warn;
150 #endif
151
152 extern void onig_set_warn_func(OnigWarnFunc f)
153 {
154 onig_warn = f;
155 }
156
157 extern void onig_set_verb_warn_func(OnigWarnFunc f)
158 {
159 onig_verb_warn = f;
160 }
161
162 extern void
163 onig_warning(const char* s)
164 {
165 if (onig_warn == onig_null_warn) return ;
166
167 (*onig_warn)(s);
168 }
169
170 #define DEFAULT_MAX_CAPTURE_NUM 32767
171
172 static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
173
174 extern int
175 onig_set_capture_num_limit(int num)
176 {
177 if (num < 0) return -1;
178
179 MaxCaptureNum = num;
180 return 0;
181 }
182
183 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
184
185 extern unsigned int
186 onig_get_parse_depth_limit(void)
187 {
188 return ParseDepthLimit;
189 }
190
191 extern int
192 onig_set_parse_depth_limit(unsigned int depth)
193 {
194 if (depth == 0)
195 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
196 else
197 ParseDepthLimit = depth;
198 return 0;
199 }
200
201 static int
202 positive_int_multiply(int x, int y)
203 {
204 if (x == 0 || y == 0) return 0;
205
206 if (x < INT_MAX / y)
207 return x * y;
208 else
209 return -1;
210 }
211
212 static void
213 bbuf_free(BBuf* bbuf)
214 {
215 if (IS_NOT_NULL(bbuf)) {
216 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
217 xfree(bbuf);
218 }
219 }
220
221 static int
222 bbuf_clone(BBuf** rto, BBuf* from)
223 {
224 int r;
225 BBuf *to;
226
227 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
228 CHECK_NULL_RETURN_MEMERR(to);
229 r = BB_INIT(to, from->alloc);
230 if (r != 0) {
231 xfree(to->p);
232 *rto = 0;
233 return r;
234 }
235 to->used = from->used;
236 xmemcpy(to->p, from->p, from->used);
237 return 0;
238 }
239
240 static int backref_rel_to_abs(int rel_no, ScanEnv* env)
241 {
242 if (rel_no > 0) {
243 return env->num_mem + rel_no;
244 }
245 else {
246 return env->num_mem + 1 + rel_no;
247 }
248 }
249
250 #define OPTION_ON(v,f) ((v) |= (f))
251 #define OPTION_OFF(v,f) ((v) &= ~(f))
252
253 #define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
254
255 #define MBCODE_START_POS(enc) \
256 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
257
258 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
259 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
260
261 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
262 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
263 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
264 if (r != 0) return r;\
265 }\
266 } while (0)
267
268
269 #define BITSET_IS_EMPTY(bs,empty) do {\
270 int i;\
271 empty = 1;\
272 for (i = 0; i < (int )BITSET_SIZE; i++) {\
273 if ((bs)[i] != 0) {\
274 empty = 0; break;\
275 }\
276 }\
277 } while (0)
278
279 static void
280 bitset_set_range(BitSetRef bs, int from, int to)
281 {
282 int i;
283 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
284 BITSET_SET_BIT(bs, i);
285 }
286 }
287
288 #if 0
289 static void
290 bitset_set_all(BitSetRef bs)
291 {
292 int i;
293 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
294 }
295 #endif
296
297 static void
298 bitset_invert(BitSetRef bs)
299 {
300 int i;
301 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
302 }
303
304 static void
305 bitset_invert_to(BitSetRef from, BitSetRef to)
306 {
307 int i;
308 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
309 }
310
311 static void
312 bitset_and(BitSetRef dest, BitSetRef bs)
313 {
314 int i;
315 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
316 }
317
318 static void
319 bitset_or(BitSetRef dest, BitSetRef bs)
320 {
321 int i;
322 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
323 }
324
325 static void
326 bitset_copy(BitSetRef dest, BitSetRef bs)
327 {
328 int i;
329 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
330 }
331
332 extern int
333 onig_strncmp(const UChar* s1, const UChar* s2, int n)
334 {
335 int x;
336
337 while (n-- > 0) {
338 x = *s2++ - *s1++;
339 if (x) return x;
340 }
341 return 0;
342 }
343
344 extern void
345 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
346 {
347 int len = (int )(end - src);
348 if (len > 0) {
349 xmemcpy(dest, src, len);
350 dest[len] = (UChar )0;
351 }
352 }
353
354 static int
355 save_entry(ScanEnv* env, enum SaveType type, int* id)
356 {
357 int nid = env->save_num;
358
359 #if 0
360 if (IS_NULL(env->saves)) {
361 int n = 10;
362 env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);
363 CHECK_NULL_RETURN_MEMERR(env->saves);
364 env->save_alloc_num = n;
365 }
366 else if (env->save_alloc_num <= nid) {
367 int n = env->save_alloc_num * 2;
368 SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);
369 CHECK_NULL_RETURN_MEMERR(p);
370 env->saves = p;
371 env->save_alloc_num = n;
372 }
373
374 env->saves[nid].type = type;
375 #endif
376
377 env->save_num++;
378 *id = nid;
379 return 0;
380 }
381
382 /* scan pattern methods */
383 #define PEND_VALUE 0
384
385 #define PFETCH_READY UChar* pfetch_prev
386 #define PEND (p < end ? 0 : 1)
387 #define PUNFETCH p = pfetch_prev
388 #define PINC do { \
389 pfetch_prev = p; \
390 p += ONIGENC_MBC_ENC_LEN(enc, p); \
391 } while (0)
392 #define PFETCH(c) do { \
393 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
394 pfetch_prev = p; \
395 p += ONIGENC_MBC_ENC_LEN(enc, p); \
396 } while (0)
397
398 #define PINC_S do { \
399 p += ONIGENC_MBC_ENC_LEN(enc, p); \
400 } while (0)
401 #define PFETCH_S(c) do { \
402 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
403 p += ONIGENC_MBC_ENC_LEN(enc, p); \
404 } while (0)
405
406 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
407 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
408
409 static UChar*
410 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
411 int capa, int oldCapa)
412 {
413 UChar* r;
414
415 if (dest)
416 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);
417 else
418 r = (UChar* )xmalloc(capa + 1);
419
420 CHECK_NULL_RETURN(r);
421 onig_strcpy(r + (dest_end - dest), src, src_end);
422 return r;
423 }
424
425 /* dest on static area */
426 static UChar*
427 strcat_capa_from_static(UChar* dest, UChar* dest_end,
428 const UChar* src, const UChar* src_end, int capa)
429 {
430 UChar* r;
431
432 r = (UChar* )xmalloc(capa + 1);
433 CHECK_NULL_RETURN(r);
434 onig_strcpy(r, dest, dest_end);
435 onig_strcpy(r + (dest_end - dest), src, src_end);
436 return r;
437 }
438
439
440 #ifdef USE_ST_LIBRARY
441
442 typedef struct {
443 UChar* s;
444 UChar* end;
445 } st_str_end_key;
446
447 static int
448 str_end_cmp(st_str_end_key* x, st_str_end_key* y)
449 {
450 UChar *p, *q;
451 int c;
452
453 if ((x->end - x->s) != (y->end - y->s))
454 return 1;
455
456 p = x->s;
457 q = y->s;
458 while (p < x->end) {
459 c = (int )*p - (int )*q;
460 if (c != 0) return c;
461
462 p++; q++;
463 }
464
465 return 0;
466 }
467
468 static int
469 str_end_hash(st_str_end_key* x)
470 {
471 UChar *p;
472 int val = 0;
473
474 p = x->s;
475 while (p < x->end) {
476 val = val * 997 + (int )*p++;
477 }
478
479 return val + (val >> 5);
480 }
481
482 extern hash_table_type*
483 onig_st_init_strend_table_with_size(int size)
484 {
485 static struct st_hash_type hashType = {
486 str_end_cmp,
487 str_end_hash,
488 };
489
490 return (hash_table_type* )
491 onig_st_init_table_with_size(&hashType, size);
492 }
493
494 extern int
495 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
496 const UChar* end_key, hash_data_type *value)
497 {
498 st_str_end_key key;
499
500 key.s = (UChar* )str_key;
501 key.end = (UChar* )end_key;
502
503 return onig_st_lookup(table, (st_data_t )(&key), value);
504 }
505
506 extern int
507 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
508 const UChar* end_key, hash_data_type value)
509 {
510 st_str_end_key* key;
511 int result;
512
513 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
514 CHECK_NULL_RETURN_MEMERR(key);
515
516 key->s = (UChar* )str_key;
517 key->end = (UChar* )end_key;
518 result = onig_st_insert(table, (st_data_t )key, value);
519 if (result) {
520 xfree(key);
521 }
522 return result;
523 }
524
525
526 typedef struct {
527 OnigEncoding enc;
528 int type; /* callout type: single or not */
529 UChar* s;
530 UChar* end;
531 } st_callout_name_key;
532
533 static int
534 callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
535 {
536 UChar *p, *q;
537 int c;
538
539 if (x->enc != y->enc) return 1;
540 if (x->type != y->type) return 1;
541 if ((x->end - x->s) != (y->end - y->s))
542 return 1;
543
544 p = x->s;
545 q = y->s;
546 while (p < x->end) {
547 c = (int )*p - (int )*q;
548 if (c != 0) return c;
549
550 p++; q++;
551 }
552
553 return 0;
554 }
555
556 static int
557 callout_name_table_hash(st_callout_name_key* x)
558 {
559 UChar *p;
560 int val = 0;
561
562 p = x->s;
563 while (p < x->end) {
564 val = val * 997 + (int )*p++;
565 }
566
567 /* use intptr_t for escape warning in Windows */
568 return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;
569 }
570
571 extern hash_table_type*
572 onig_st_init_callout_name_table_with_size(int size)
573 {
574 static struct st_hash_type hashType = {
575 callout_name_table_cmp,
576 callout_name_table_hash,
577 };
578
579 return (hash_table_type* )
580 onig_st_init_table_with_size(&hashType, size);
581 }
582
583 extern int
584 onig_st_lookup_callout_name_table(hash_table_type* table,
585 OnigEncoding enc,
586 int type,
587 const UChar* str_key,
588 const UChar* end_key,
589 hash_data_type *value)
590 {
591 st_callout_name_key key;
592
593 key.enc = enc;
594 key.type = type;
595 key.s = (UChar* )str_key;
596 key.end = (UChar* )end_key;
597
598 return onig_st_lookup(table, (st_data_t )(&key), value);
599 }
600
601 static int
602 st_insert_callout_name_table(hash_table_type* table,
603 OnigEncoding enc, int type,
604 UChar* str_key, UChar* end_key,
605 hash_data_type value)
606 {
607 st_callout_name_key* key;
608 int result;
609
610 key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
611 CHECK_NULL_RETURN_MEMERR(key);
612
613 /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
614 key->enc = enc;
615 key->type = type;
616 key->s = str_key;
617 key->end = end_key;
618 result = onig_st_insert(table, (st_data_t )key, value);
619 if (result) {
620 xfree(key);
621 }
622 return result;
623 }
624
625 #endif /* USE_ST_LIBRARY */
626
627
628 #define INIT_NAME_BACKREFS_ALLOC_NUM 8
629
630 typedef struct {
631 UChar* name;
632 int name_len; /* byte length */
633 int back_num; /* number of backrefs */
634 int back_alloc;
635 int back_ref1;
636 int* back_refs;
637 } NameEntry;
638
639 #ifdef USE_ST_LIBRARY
640
641 #define INIT_NAMES_ALLOC_NUM 5
642
643 typedef st_table NameTable;
644 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
645
646 #define NAMEBUF_SIZE 24
647 #define NAMEBUF_SIZE_1 25
648
649 #ifdef ONIG_DEBUG
650 static int
651 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
652 {
653 int i;
654 FILE* fp = (FILE* )arg;
655
656 fprintf(fp, "%s: ", e->name);
657 if (e->back_num == 0)
658 fputs("-", fp);
659 else if (e->back_num == 1)
660 fprintf(fp, "%d", e->back_ref1);
661 else {
662 for (i = 0; i < e->back_num; i++) {
663 if (i > 0) fprintf(fp, ", ");
664 fprintf(fp, "%d", e->back_refs[i]);
665 }
666 }
667 fputs("\n", fp);
668 return ST_CONTINUE;
669 }
670
671 extern int
672 onig_print_names(FILE* fp, regex_t* reg)
673 {
674 NameTable* t = (NameTable* )reg->name_table;
675
676 if (IS_NOT_NULL(t)) {
677 fprintf(fp, "name table\n");
678 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
679 fputs("\n", fp);
680 }
681 return 0;
682 }
683 #endif /* ONIG_DEBUG */
684
685 static int
686 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
687 {
688 xfree(e->name);
689 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
690 xfree(key);
691 xfree(e);
692 return ST_DELETE;
693 }
694
695 static int
696 names_clear(regex_t* reg)
697 {
698 NameTable* t = (NameTable* )reg->name_table;
699
700 if (IS_NOT_NULL(t)) {
701 onig_st_foreach(t, i_free_name_entry, 0);
702 }
703 return 0;
704 }
705
706 extern int
707 onig_names_free(regex_t* reg)
708 {
709 int r;
710 NameTable* t;
711
712 r = names_clear(reg);
713 if (r != 0) return r;
714
715 t = (NameTable* )reg->name_table;
716 if (IS_NOT_NULL(t)) onig_st_free_table(t);
717 reg->name_table = (void* )NULL;
718 return 0;
719 }
720
721 static NameEntry*
722 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
723 {
724 NameEntry* e;
725 NameTable* t = (NameTable* )reg->name_table;
726
727 e = (NameEntry* )NULL;
728 if (IS_NOT_NULL(t)) {
729 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
730 }
731 return e;
732 }
733
734 typedef struct {
735 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
736 regex_t* reg;
737 void* arg;
738 int ret;
739 OnigEncoding enc;
740 } INamesArg;
741
742 static int
743 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
744 {
745 int r = (*(arg->func))(e->name,
746 e->name + e->name_len,
747 e->back_num,
748 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
749 arg->reg, arg->arg);
750 if (r != 0) {
751 arg->ret = r;
752 return ST_STOP;
753 }
754 return ST_CONTINUE;
755 }
756
757 extern int
758 onig_foreach_name(regex_t* reg,
759 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
760 {
761 INamesArg narg;
762 NameTable* t = (NameTable* )reg->name_table;
763
764 narg.ret = 0;
765 if (IS_NOT_NULL(t)) {
766 narg.func = func;
767 narg.reg = reg;
768 narg.arg = arg;
769 narg.enc = reg->enc; /* should be pattern encoding. */
770 onig_st_foreach(t, i_names, (HashDataType )&narg);
771 }
772 return narg.ret;
773 }
774
775 static int
776 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
777 {
778 int i;
779
780 if (e->back_num > 1) {
781 for (i = 0; i < e->back_num; i++) {
782 e->back_refs[i] = map[e->back_refs[i]].new_val;
783 }
784 }
785 else if (e->back_num == 1) {
786 e->back_ref1 = map[e->back_ref1].new_val;
787 }
788
789 return ST_CONTINUE;
790 }
791
792 extern int
793 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
794 {
795 NameTable* t = (NameTable* )reg->name_table;
796
797 if (IS_NOT_NULL(t)) {
798 onig_st_foreach(t, i_renumber_name, (HashDataType )map);
799 }
800 return 0;
801 }
802
803
804 extern int
805 onig_number_of_names(regex_t* reg)
806 {
807 NameTable* t = (NameTable* )reg->name_table;
808
809 if (IS_NOT_NULL(t))
810 return t->num_entries;
811 else
812 return 0;
813 }
814
815 #else /* USE_ST_LIBRARY */
816
817 #define INIT_NAMES_ALLOC_NUM 8
818
819 typedef struct {
820 NameEntry* e;
821 int num;
822 int alloc;
823 } NameTable;
824
825 #ifdef ONIG_DEBUG
826 extern int
827 onig_print_names(FILE* fp, regex_t* reg)
828 {
829 int i, j;
830 NameEntry* e;
831 NameTable* t = (NameTable* )reg->name_table;
832
833 if (IS_NOT_NULL(t) && t->num > 0) {
834 fprintf(fp, "name table\n");
835 for (i = 0; i < t->num; i++) {
836 e = &(t->e[i]);
837 fprintf(fp, "%s: ", e->name);
838 if (e->back_num == 0) {
839 fputs("-", fp);
840 }
841 else if (e->back_num == 1) {
842 fprintf(fp, "%d", e->back_ref1);
843 }
844 else {
845 for (j = 0; j < e->back_num; j++) {
846 if (j > 0) fprintf(fp, ", ");
847 fprintf(fp, "%d", e->back_refs[j]);
848 }
849 }
850 fputs("\n", fp);
851 }
852 fputs("\n", fp);
853 }
854 return 0;
855 }
856 #endif
857
858 static int
859 names_clear(regex_t* reg)
860 {
861 int i;
862 NameEntry* e;
863 NameTable* t = (NameTable* )reg->name_table;
864
865 if (IS_NOT_NULL(t)) {
866 for (i = 0; i < t->num; i++) {
867 e = &(t->e[i]);
868 if (IS_NOT_NULL(e->name)) {
869 xfree(e->name);
870 e->name = NULL;
871 e->name_len = 0;
872 e->back_num = 0;
873 e->back_alloc = 0;
874 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
875 e->back_refs = (int* )NULL;
876 }
877 }
878 if (IS_NOT_NULL(t->e)) {
879 xfree(t->e);
880 t->e = NULL;
881 }
882 t->num = 0;
883 }
884 return 0;
885 }
886
887 extern int
888 onig_names_free(regex_t* reg)
889 {
890 int r;
891 NameTable* t;
892
893 r = names_clear(reg);
894 if (r != 0) return r;
895
896 t = (NameTable* )reg->name_table;
897 if (IS_NOT_NULL(t)) xfree(t);
898 reg->name_table = NULL;
899 return 0;
900 }
901
902 static NameEntry*
903 name_find(regex_t* reg, UChar* name, UChar* name_end)
904 {
905 int i, len;
906 NameEntry* e;
907 NameTable* t = (NameTable* )reg->name_table;
908
909 if (IS_NOT_NULL(t)) {
910 len = name_end - name;
911 for (i = 0; i < t->num; i++) {
912 e = &(t->e[i]);
913 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
914 return e;
915 }
916 }
917 return (NameEntry* )NULL;
918 }
919
920 extern int
921 onig_foreach_name(regex_t* reg,
922 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
923 {
924 int i, r;
925 NameEntry* e;
926 NameTable* t = (NameTable* )reg->name_table;
927
928 if (IS_NOT_NULL(t)) {
929 for (i = 0; i < t->num; i++) {
930 e = &(t->e[i]);
931 r = (*func)(e->name, e->name + e->name_len, e->back_num,
932 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
933 reg, arg);
934 if (r != 0) return r;
935 }
936 }
937 return 0;
938 }
939
940 extern int
941 onig_number_of_names(regex_t* reg)
942 {
943 NameTable* t = (NameTable* )reg->name_table;
944
945 if (IS_NOT_NULL(t))
946 return t->num;
947 else
948 return 0;
949 }
950
951 #endif /* else USE_ST_LIBRARY */
952
953 static int
954 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
955 {
956 int r;
957 int alloc;
958 NameEntry* e;
959 NameTable* t = (NameTable* )reg->name_table;
960
961 if (name_end - name <= 0)
962 return ONIGERR_EMPTY_GROUP_NAME;
963
964 e = name_find(reg, name, name_end);
965 if (IS_NULL(e)) {
966 #ifdef USE_ST_LIBRARY
967 if (IS_NULL(t)) {
968 t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
969 reg->name_table = (void* )t;
970 }
971 e = (NameEntry* )xmalloc(sizeof(NameEntry));
972 CHECK_NULL_RETURN_MEMERR(e);
973
974 e->name = onigenc_strdup(reg->enc, name, name_end);
975 if (IS_NULL(e->name)) {
976 xfree(e); return ONIGERR_MEMORY;
977 }
978 r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
979 (HashDataType )e);
980 if (r < 0) return r;
981
982 e->name_len = (int )(name_end - name);
983 e->back_num = 0;
984 e->back_alloc = 0;
985 e->back_refs = (int* )NULL;
986
987 #else
988
989 if (IS_NULL(t)) {
990 alloc = INIT_NAMES_ALLOC_NUM;
991 t = (NameTable* )xmalloc(sizeof(NameTable));
992 CHECK_NULL_RETURN_MEMERR(t);
993 t->e = NULL;
994 t->alloc = 0;
995 t->num = 0;
996
997 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
998 if (IS_NULL(t->e)) {
999 xfree(t);
1000 return ONIGERR_MEMORY;
1001 }
1002 t->alloc = alloc;
1003 reg->name_table = t;
1004 goto clear;
1005 }
1006 else if (t->num == t->alloc) {
1007 int i;
1008
1009 alloc = t->alloc * 2;
1010 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);
1011 CHECK_NULL_RETURN_MEMERR(t->e);
1012 t->alloc = alloc;
1013
1014 clear:
1015 for (i = t->num; i < t->alloc; i++) {
1016 t->e[i].name = NULL;
1017 t->e[i].name_len = 0;
1018 t->e[i].back_num = 0;
1019 t->e[i].back_alloc = 0;
1020 t->e[i].back_refs = (int* )NULL;
1021 }
1022 }
1023 e = &(t->e[t->num]);
1024 t->num++;
1025 e->name = onigenc_strdup(reg->enc, name, name_end);
1026 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1027 e->name_len = name_end - name;
1028 #endif
1029 }
1030
1031 if (e->back_num >= 1 &&
1032 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1033 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1034 name, name_end);
1035 return ONIGERR_MULTIPLEX_DEFINED_NAME;
1036 }
1037
1038 e->back_num++;
1039 if (e->back_num == 1) {
1040 e->back_ref1 = backref;
1041 }
1042 else {
1043 if (e->back_num == 2) {
1044 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1045 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1046 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1047 e->back_alloc = alloc;
1048 e->back_refs[0] = e->back_ref1;
1049 e->back_refs[1] = backref;
1050 }
1051 else {
1052 if (e->back_num > e->back_alloc) {
1053 alloc = e->back_alloc * 2;
1054 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);
1055 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1056 e->back_alloc = alloc;
1057 }
1058 e->back_refs[e->back_num - 1] = backref;
1059 }
1060 }
1061
1062 return 0;
1063 }
1064
1065 extern int
1066 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1067 const UChar* name_end, int** nums)
1068 {
1069 NameEntry* e = name_find(reg, name, name_end);
1070
1071 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1072
1073 switch (e->back_num) {
1074 case 0:
1075 break;
1076 case 1:
1077 *nums = &(e->back_ref1);
1078 break;
1079 default:
1080 *nums = e->back_refs;
1081 break;
1082 }
1083 return e->back_num;
1084 }
1085
1086 extern int
1087 onig_name_to_backref_number(regex_t* reg, const UChar* name,
1088 const UChar* name_end, OnigRegion *region)
1089 {
1090 int i, n, *nums;
1091
1092 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1093 if (n < 0)
1094 return n;
1095 else if (n == 0)
1096 return ONIGERR_PARSER_BUG;
1097 else if (n == 1)
1098 return nums[0];
1099 else {
1100 if (IS_NOT_NULL(region)) {
1101 for (i = n - 1; i >= 0; i--) {
1102 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1103 return nums[i];
1104 }
1105 }
1106 return nums[n - 1];
1107 }
1108 }
1109
1110 extern int
1111 onig_noname_group_capture_is_active(regex_t* reg)
1112 {
1113 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
1114 return 0;
1115
1116 if (onig_number_of_names(reg) > 0 &&
1117 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1118 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
1119 return 0;
1120 }
1121
1122 return 1;
1123 }
1124
1125 #ifdef USE_CALLOUT
1126
1127 typedef struct {
1128 OnigCalloutType type;
1129 int in;
1130 OnigCalloutFunc start_func;
1131 OnigCalloutFunc end_func;
1132 int arg_num;
1133 int opt_arg_num;
1134 unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1135 OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1136 UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */
1137 } CalloutNameListEntry;
1138
1139 typedef struct {
1140 int n;
1141 int alloc;
1142 CalloutNameListEntry* v;
1143 } CalloutNameListType;
1144
1145 static CalloutNameListType* GlobalCalloutNameList;
1146
1147 static int
1148 make_callout_func_list(CalloutNameListType** rs, int init_size)
1149 {
1150 CalloutNameListType* s;
1151 CalloutNameListEntry* v;
1152
1153 *rs = 0;
1154
1155 s = xmalloc(sizeof(*s));
1156 if (IS_NULL(s)) return ONIGERR_MEMORY;
1157
1158 v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1159 if (IS_NULL(v)) {
1160 xfree(s);
1161 return ONIGERR_MEMORY;
1162 }
1163
1164 s->n = 0;
1165 s->alloc = init_size;
1166 s->v = v;
1167
1168 *rs = s;
1169 return ONIG_NORMAL;
1170 }
1171
1172 static void
1173 free_callout_func_list(CalloutNameListType* s)
1174 {
1175 if (IS_NOT_NULL(s)) {
1176 if (IS_NOT_NULL(s->v)) {
1177 int i, j;
1178
1179 for (i = 0; i < s->n; i++) {
1180 CalloutNameListEntry* e = s->v + i;
1181 for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1182 if (e->arg_types[j] == ONIG_TYPE_STRING) {
1183 UChar* p = e->opt_defaults[j].s.start;
1184 if (IS_NOT_NULL(p)) xfree(p);
1185 }
1186 }
1187 }
1188 xfree(s->v);
1189 }
1190 xfree(s);
1191 }
1192 }
1193
1194 static int
1195 callout_func_list_add(CalloutNameListType* s, int* rid)
1196 {
1197 if (s->n >= s->alloc) {
1198 int new_size = s->alloc * 2;
1199 CalloutNameListEntry* nv = (CalloutNameListEntry* )
1200 xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);
1201 if (IS_NULL(nv)) return ONIGERR_MEMORY;
1202
1203 s->alloc = new_size;
1204 s->v = nv;
1205 }
1206
1207 *rid = s->n;
1208
1209 xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1210 s->n++;
1211 return ONIG_NORMAL;
1212 }
1213
1214
1215 typedef struct {
1216 UChar* name;
1217 int name_len; /* byte length */
1218 int id;
1219 } CalloutNameEntry;
1220
1221 #ifdef USE_ST_LIBRARY
1222 typedef st_table CalloutNameTable;
1223 #else
1224 typedef struct {
1225 CalloutNameEntry* e;
1226 int num;
1227 int alloc;
1228 } CalloutNameTable;
1229 #endif
1230
1231 static CalloutNameTable* GlobalCalloutNameTable;
1232 static int CalloutNameIDCounter;
1233
1234 #ifdef USE_ST_LIBRARY
1235
1236 static int
1237 i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1238 void* arg ARG_UNUSED)
1239 {
1240 xfree(e->name);
1241 /*xfree(key->s); */ /* is same as e->name */
1242 xfree(key);
1243 xfree(e);
1244 return ST_DELETE;
1245 }
1246
1247 static int
1248 callout_name_table_clear(CalloutNameTable* t)
1249 {
1250 if (IS_NOT_NULL(t)) {
1251 onig_st_foreach(t, i_free_callout_name_entry, 0);
1252 }
1253 return 0;
1254 }
1255
1256 static int
1257 global_callout_name_table_free(void)
1258 {
1259 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1260 int r = callout_name_table_clear(GlobalCalloutNameTable);
1261 if (r != 0) return r;
1262
1263 onig_st_free_table(GlobalCalloutNameTable);
1264 GlobalCalloutNameTable = 0;
1265 CalloutNameIDCounter = 0;
1266 }
1267
1268 return 0;
1269 }
1270
1271 static CalloutNameEntry*
1272 callout_name_find(OnigEncoding enc, int is_not_single,
1273 const UChar* name, const UChar* name_end)
1274 {
1275 int r;
1276 CalloutNameEntry* e;
1277 CalloutNameTable* t = GlobalCalloutNameTable;
1278
1279 e = (CalloutNameEntry* )NULL;
1280 if (IS_NOT_NULL(t)) {
1281 r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1282 (HashDataType* )((void* )(&e)));
1283 if (r == 0) { /* not found */
1284 if (enc != ONIG_ENCODING_ASCII &&
1285 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1286 enc = ONIG_ENCODING_ASCII;
1287 onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1288 (HashDataType* )((void* )(&e)));
1289 }
1290 }
1291 }
1292 return e;
1293 }
1294
1295 #else
1296
1297 static int
1298 callout_name_table_clear(CalloutNameTable* t)
1299 {
1300 int i;
1301 CalloutNameEntry* e;
1302
1303 if (IS_NOT_NULL(t)) {
1304 for (i = 0; i < t->num; i++) {
1305 e = &(t->e[i]);
1306 if (IS_NOT_NULL(e->name)) {
1307 xfree(e->name);
1308 e->name = NULL;
1309 e->name_len = 0;
1310 e->id = 0;
1311 e->func = 0;
1312 }
1313 }
1314 if (IS_NOT_NULL(t->e)) {
1315 xfree(t->e);
1316 t->e = NULL;
1317 }
1318 t->num = 0;
1319 }
1320 return 0;
1321 }
1322
1323 static int
1324 global_callout_name_table_free(void)
1325 {
1326 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1327 int r = callout_name_table_clear(GlobalCalloutNameTable);
1328 if (r != 0) return r;
1329
1330 xfree(GlobalCalloutNameTable);
1331 GlobalCalloutNameTable = 0;
1332 CalloutNameIDCounter = 0;
1333 }
1334 return 0;
1335 }
1336
1337 static CalloutNameEntry*
1338 callout_name_find(UChar* name, UChar* name_end)
1339 {
1340 int i, len;
1341 CalloutNameEntry* e;
1342 CalloutNameTable* t = Calloutnames;
1343
1344 if (IS_NOT_NULL(t)) {
1345 len = name_end - name;
1346 for (i = 0; i < t->num; i++) {
1347 e = &(t->e[i]);
1348 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1349 return e;
1350 }
1351 }
1352 return (CalloutNameEntry* )NULL;
1353 }
1354
1355 #endif
1356
1357 /* name string must be single byte char string. */
1358 static int
1359 callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1360 int is_not_single, UChar* name, UChar* name_end)
1361 {
1362 int r;
1363 CalloutNameEntry* e;
1364 CalloutNameTable* t = GlobalCalloutNameTable;
1365
1366 *rentry = 0;
1367 if (name_end - name <= 0)
1368 return ONIGERR_INVALID_CALLOUT_NAME;
1369
1370 e = callout_name_find(enc, is_not_single, name, name_end);
1371 if (IS_NULL(e)) {
1372 #ifdef USE_ST_LIBRARY
1373 if (IS_NULL(t)) {
1374 t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1375 GlobalCalloutNameTable = t;
1376 }
1377 e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1378 CHECK_NULL_RETURN_MEMERR(e);
1379
1380 e->name = onigenc_strdup(enc, name, name_end);
1381 if (IS_NULL(e->name)) {
1382 xfree(e); return ONIGERR_MEMORY;
1383 }
1384
1385 r = st_insert_callout_name_table(t, enc, is_not_single,
1386 e->name, (e->name + (name_end - name)),
1387 (HashDataType )e);
1388 if (r < 0) return r;
1389
1390 #else
1391
1392 int alloc;
1393
1394 if (IS_NULL(t)) {
1395 alloc = INIT_NAMES_ALLOC_NUM;
1396 t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1397 CHECK_NULL_RETURN_MEMERR(t);
1398 t->e = NULL;
1399 t->alloc = 0;
1400 t->num = 0;
1401
1402 t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1403 if (IS_NULL(t->e)) {
1404 xfree(t);
1405 return ONIGERR_MEMORY;
1406 }
1407 t->alloc = alloc;
1408 GlobalCalloutNameTable = t;
1409 goto clear;
1410 }
1411 else if (t->num == t->alloc) {
1412 int i;
1413
1414 alloc = t->alloc * 2;
1415 t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);
1416 CHECK_NULL_RETURN_MEMERR(t->e);
1417 t->alloc = alloc;
1418
1419 clear:
1420 for (i = t->num; i < t->alloc; i++) {
1421 t->e[i].name = NULL;
1422 t->e[i].name_len = 0;
1423 t->e[i].id = 0;
1424 }
1425 }
1426 e = &(t->e[t->num]);
1427 t->num++;
1428 e->name = onigenc_strdup(enc, name, name_end);
1429 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1430 #endif
1431
1432 CalloutNameIDCounter++;
1433 e->id = CalloutNameIDCounter;
1434 e->name_len = (int )(name_end - name);
1435 }
1436
1437 *rentry = e;
1438 return e->id;
1439 }
1440
1441 static int
1442 is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1443 {
1444 UChar* p;
1445 OnigCodePoint c;
1446
1447 if (name >= name_end) return 0;
1448
1449 p = name;
1450 while (p < name_end) {
1451 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1452 if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1453 return 0;
1454
1455 if (p == name) {
1456 if (c >= '0' && c <= '9') return 0;
1457 }
1458
1459 p += ONIGENC_MBC_ENC_LEN(enc, p);
1460 }
1461
1462 return 1;
1463 }
1464
1465 static int
1466 is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1467 {
1468 UChar* p;
1469 OnigCodePoint c;
1470
1471 if (name >= name_end) return 0;
1472
1473 p = name;
1474 while (p < name_end) {
1475 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1476 if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1477 return 0;
1478
1479 if (p == name) {
1480 if (c >= '0' && c <= '9') return 0;
1481 }
1482
1483 p += ONIGENC_MBC_ENC_LEN(enc, p);
1484 }
1485
1486 return 1;
1487 }
1488
1489 extern int
1490 onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1491 UChar* name, UChar* name_end, int in,
1492 OnigCalloutFunc start_func,
1493 OnigCalloutFunc end_func,
1494 int arg_num, unsigned int arg_types[],
1495 int opt_arg_num, OnigValue opt_defaults[])
1496 {
1497 int r;
1498 int i;
1499 int j;
1500 int id;
1501 int is_not_single;
1502 CalloutNameEntry* e;
1503 CalloutNameListEntry* fe;
1504
1505 if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1506 return ONIGERR_INVALID_ARGUMENT;
1507
1508 if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1509 return ONIGERR_INVALID_CALLOUT_ARG;
1510
1511 if (opt_arg_num < 0 || opt_arg_num > arg_num)
1512 return ONIGERR_INVALID_CALLOUT_ARG;
1513
1514 if (start_func == 0 && end_func == 0)
1515 return ONIGERR_INVALID_CALLOUT_ARG;
1516
1517 if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1518 return ONIGERR_INVALID_CALLOUT_ARG;
1519
1520 for (i = 0; i < arg_num; i++) {
1521 unsigned int t = arg_types[i];
1522 if (t == ONIG_TYPE_VOID)
1523 return ONIGERR_INVALID_CALLOUT_ARG;
1524 else {
1525 if (i >= arg_num - opt_arg_num) {
1526 if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1527 t != ONIG_TYPE_TAG)
1528 return ONIGERR_INVALID_CALLOUT_ARG;
1529 }
1530 else {
1531 if (t != ONIG_TYPE_LONG) {
1532 t = t & ~ONIG_TYPE_LONG;
1533 if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1534 return ONIGERR_INVALID_CALLOUT_ARG;
1535 }
1536 }
1537 }
1538 }
1539
1540 if (! is_allowed_callout_name(enc, name, name_end)) {
1541 return ONIGERR_INVALID_CALLOUT_NAME;
1542 }
1543
1544 is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1545 id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1546 if (id < 0) return id;
1547
1548 r = ONIG_NORMAL;
1549 if (IS_NULL(GlobalCalloutNameList)) {
1550 r = make_callout_func_list(&GlobalCalloutNameList, 10);
1551 if (r != ONIG_NORMAL) return r;
1552 }
1553
1554 while (id >= GlobalCalloutNameList->n) {
1555 int rid;
1556 r = callout_func_list_add(GlobalCalloutNameList, &rid);
1557 if (r != ONIG_NORMAL) return r;
1558 }
1559
1560 fe = GlobalCalloutNameList->v + id;
1561 fe->type = callout_type;
1562 fe->in = in;
1563 fe->start_func = start_func;
1564 fe->end_func = end_func;
1565 fe->arg_num = arg_num;
1566 fe->opt_arg_num = opt_arg_num;
1567 fe->name = e->name;
1568
1569 for (i = 0; i < arg_num; i++) {
1570 fe->arg_types[i] = arg_types[i];
1571 }
1572 for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1573 if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1574 OnigValue* val = opt_defaults + j;
1575 UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);
1576 CHECK_NULL_RETURN_MEMERR(ds);
1577
1578 fe->opt_defaults[i].s.start = ds;
1579 fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);
1580 }
1581 else {
1582 fe->opt_defaults[i] = opt_defaults[j];
1583 }
1584 }
1585
1586 r = id;
1587 return r;
1588 }
1589
1590 static int
1591 get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1592 UChar* name, UChar* name_end, int* rid)
1593 {
1594 int r;
1595 CalloutNameEntry* e;
1596
1597 if (! is_allowed_callout_name(enc, name, name_end)) {
1598 return ONIGERR_INVALID_CALLOUT_NAME;
1599 }
1600
1601 e = callout_name_find(enc, is_not_single, name, name_end);
1602 if (IS_NULL(e)) {
1603 return ONIGERR_UNDEFINED_CALLOUT_NAME;
1604 }
1605
1606 r = ONIG_NORMAL;
1607 *rid = e->id;
1608
1609 return r;
1610 }
1611
1612 extern OnigCalloutFunc
1613 onig_get_callout_start_func(regex_t* reg, int callout_num)
1614 {
1615 /* If used for callouts of contents, return 0. */
1616 CalloutListEntry* e;
1617
1618 e = onig_reg_callout_list_at(reg, callout_num);
1619 return e->start_func;
1620 }
1621
1622 extern const UChar*
1623 onig_get_callout_tag_start(regex_t* reg, int callout_num)
1624 {
1625 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1626 return e->tag_start;
1627 }
1628
1629 extern const UChar*
1630 onig_get_callout_tag_end(regex_t* reg, int callout_num)
1631 {
1632 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1633 return e->tag_end;
1634 }
1635
1636
1637 extern OnigCalloutType
1638 onig_get_callout_type_by_name_id(int name_id)
1639 {
1640 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1641 return 0;
1642
1643 return GlobalCalloutNameList->v[name_id].type;
1644 }
1645
1646 extern OnigCalloutFunc
1647 onig_get_callout_start_func_by_name_id(int name_id)
1648 {
1649 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1650 return 0;
1651
1652 return GlobalCalloutNameList->v[name_id].start_func;
1653 }
1654
1655 extern OnigCalloutFunc
1656 onig_get_callout_end_func_by_name_id(int name_id)
1657 {
1658 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1659 return 0;
1660
1661 return GlobalCalloutNameList->v[name_id].end_func;
1662 }
1663
1664 extern int
1665 onig_get_callout_in_by_name_id(int name_id)
1666 {
1667 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1668 return 0;
1669
1670 return GlobalCalloutNameList->v[name_id].in;
1671 }
1672
1673 static int
1674 get_callout_arg_num_by_name_id(int name_id)
1675 {
1676 return GlobalCalloutNameList->v[name_id].arg_num;
1677 }
1678
1679 static int
1680 get_callout_opt_arg_num_by_name_id(int name_id)
1681 {
1682 return GlobalCalloutNameList->v[name_id].opt_arg_num;
1683 }
1684
1685 static unsigned int
1686 get_callout_arg_type_by_name_id(int name_id, int index)
1687 {
1688 return GlobalCalloutNameList->v[name_id].arg_types[index];
1689 }
1690
1691 static OnigValue
1692 get_callout_opt_default_by_name_id(int name_id, int index)
1693 {
1694 return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1695 }
1696
1697 extern UChar*
1698 onig_get_callout_name_by_name_id(int name_id)
1699 {
1700 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1701 return 0;
1702
1703 return GlobalCalloutNameList->v[name_id].name;
1704 }
1705
1706 extern int
1707 onig_global_callout_names_free(void)
1708 {
1709 free_callout_func_list(GlobalCalloutNameList);
1710 GlobalCalloutNameList = 0;
1711
1712 global_callout_name_table_free();
1713 return ONIG_NORMAL;
1714 }
1715
1716
1717 typedef st_table CalloutTagTable;
1718 typedef intptr_t CalloutTagVal;
1719
1720 #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)
1721
1722 static int
1723 i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1724 {
1725 int num;
1726 RegexExt* ext = (RegexExt* )arg;
1727
1728 num = (int )e - 1;
1729 ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1730 return ST_CONTINUE;
1731 }
1732
1733 static int
1734 setup_ext_callout_list_values(regex_t* reg)
1735 {
1736 int i, j;
1737 RegexExt* ext;
1738
1739 ext = REG_EXTP(reg);
1740 if (IS_NOT_NULL(ext->tag_table)) {
1741 onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1742 (st_data_t )ext);
1743 }
1744
1745 for (i = 0; i < ext->callout_num; i++) {
1746 CalloutListEntry* e = ext->callout_list + i;
1747 if (e->of == ONIG_CALLOUT_OF_NAME) {
1748 for (j = 0; j < e->u.arg.num; j++) {
1749 if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1750 UChar* start;
1751 UChar* end;
1752 int num;
1753 start = e->u.arg.vals[j].s.start;
1754 end = e->u.arg.vals[j].s.end;
1755 num = onig_get_callout_num_by_tag(reg, start, end);
1756 if (num < 0) return num;
1757 e->u.arg.vals[j].tag = num;
1758 }
1759 }
1760 }
1761 }
1762
1763 return ONIG_NORMAL;
1764 }
1765
1766 extern int
1767 onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1768 {
1769 RegexExt* ext = REG_EXTP(reg);
1770
1771 if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1772 if (callout_num > ext->callout_num) return 0;
1773
1774 return (ext->callout_list[callout_num].flag &
1775 CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;
1776 }
1777
1778 static int
1779 i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1780 {
1781 xfree(key);
1782 return ST_DELETE;
1783 }
1784
1785 static int
1786 callout_tag_table_clear(CalloutTagTable* t)
1787 {
1788 if (IS_NOT_NULL(t)) {
1789 onig_st_foreach(t, i_free_callout_tag_entry, 0);
1790 }
1791 return 0;
1792 }
1793
1794 extern int
1795 onig_callout_tag_table_free(void* table)
1796 {
1797 CalloutTagTable* t = (CalloutTagTable* )table;
1798
1799 if (IS_NOT_NULL(t)) {
1800 int r = callout_tag_table_clear(t);
1801 if (r != 0) return r;
1802
1803 onig_st_free_table(t);
1804 }
1805
1806 return 0;
1807 }
1808
1809 extern int
1810 onig_get_callout_num_by_tag(regex_t* reg,
1811 const UChar* tag, const UChar* tag_end)
1812 {
1813 int r;
1814 RegexExt* ext;
1815 CalloutTagVal e;
1816
1817 ext = REG_EXTP(reg);
1818 if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1819 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1820
1821 r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1822 (HashDataType* )((void* )(&e)));
1823 if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1824 return (int )e;
1825 }
1826
1827 static CalloutTagVal
1828 callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1829 {
1830 CalloutTagVal e;
1831
1832 e = -1;
1833 if (IS_NOT_NULL(t)) {
1834 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1835 }
1836 return e;
1837 }
1838
1839 static int
1840 callout_tag_table_new(CalloutTagTable** rt)
1841 {
1842 CalloutTagTable* t;
1843
1844 *rt = 0;
1845 t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1846 CHECK_NULL_RETURN_MEMERR(t);
1847
1848 *rt = t;
1849 return ONIG_NORMAL;
1850 }
1851
1852 static int
1853 callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
1854 CalloutTagVal entry_val)
1855 {
1856 int r;
1857 CalloutTagVal val;
1858
1859 if (name_end - name <= 0)
1860 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1861
1862 val = callout_tag_find(t, name, name_end);
1863 if (val >= 0)
1864 return ONIGERR_MULTIPLEX_DEFINED_NAME;
1865
1866 r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
1867 if (r < 0) return r;
1868
1869 return ONIG_NORMAL;
1870 }
1871
1872 static int
1873 ext_ensure_tag_table(regex_t* reg)
1874 {
1875 int r;
1876 RegexExt* ext;
1877 CalloutTagTable* t;
1878
1879 ext = onig_get_regex_ext(reg);
1880 CHECK_NULL_RETURN_MEMERR(ext);
1881
1882 if (IS_NULL(ext->tag_table)) {
1883 r = callout_tag_table_new(&t);
1884 if (r != ONIG_NORMAL) return r;
1885
1886 ext->tag_table = t;
1887 }
1888
1889 return ONIG_NORMAL;
1890 }
1891
1892 static int
1893 callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
1894 CalloutTagVal entry_val)
1895 {
1896 int r;
1897 RegexExt* ext;
1898 CalloutListEntry* e;
1899
1900 r = ext_ensure_tag_table(reg);
1901 if (r != ONIG_NORMAL) return r;
1902
1903 ext = onig_get_regex_ext(reg);
1904 r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);
1905
1906 e = onig_reg_callout_list_at(reg, (int )entry_val);
1907 e->tag_start = name;
1908 e->tag_end = name_end;
1909
1910 return r;
1911 }
1912
1913 #endif /* USE_CALLOUT */
1914
1915
1916 #define INIT_SCANENV_MEMENV_ALLOC_SIZE 16
1917
1918 static void
1919 scan_env_clear(ScanEnv* env)
1920 {
1921 MEM_STATUS_CLEAR(env->capture_history);
1922 MEM_STATUS_CLEAR(env->bt_mem_start);
1923 MEM_STATUS_CLEAR(env->bt_mem_end);
1924 MEM_STATUS_CLEAR(env->backrefed_mem);
1925 env->error = (UChar* )NULL;
1926 env->error_end = (UChar* )NULL;
1927 env->num_call = 0;
1928
1929 #ifdef USE_CALL
1930 env->unset_addr_list = NULL;
1931 env->has_call_zero = 0;
1932 #endif
1933
1934 env->num_mem = 0;
1935 env->num_named = 0;
1936 env->mem_alloc = 0;
1937 env->mem_env_dynamic = (MemEnv* )NULL;
1938
1939 xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
1940
1941 env->parse_depth = 0;
1942 env->keep_num = 0;
1943 env->save_num = 0;
1944 env->save_alloc_num = 0;
1945 env->saves = 0;
1946 }
1947
1948 static int
1949 scan_env_add_mem_entry(ScanEnv* env)
1950 {
1951 int i, need, alloc;
1952 MemEnv* p;
1953
1954 need = env->num_mem + 1;
1955 if (need > MaxCaptureNum && MaxCaptureNum != 0)
1956 return ONIGERR_TOO_MANY_CAPTURES;
1957
1958 if (need >= SCANENV_MEMENV_SIZE) {
1959 if (env->mem_alloc <= need) {
1960 if (IS_NULL(env->mem_env_dynamic)) {
1961 alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;
1962 p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
1963 CHECK_NULL_RETURN_MEMERR(p);
1964 xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
1965 }
1966 else {
1967 alloc = env->mem_alloc * 2;
1968 p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);
1969 CHECK_NULL_RETURN_MEMERR(p);
1970 }
1971
1972 for (i = env->num_mem + 1; i < alloc; i++) {
1973 p[i].node = NULL_NODE;
1974 #if 0
1975 p[i].in = 0;
1976 p[i].recursion = 0;
1977 #endif
1978 }
1979
1980 env->mem_env_dynamic = p;
1981 env->mem_alloc = alloc;
1982 }
1983 }
1984
1985 env->num_mem++;
1986 return env->num_mem;
1987 }
1988
1989 static int
1990 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
1991 {
1992 if (env->num_mem >= num)
1993 SCANENV_MEMENV(env)[num].node = node;
1994 else
1995 return ONIGERR_PARSER_BUG;
1996 return 0;
1997 }
1998
1999 extern void
2000 onig_node_free(Node* node)
2001 {
2002 start:
2003 if (IS_NULL(node)) return ;
2004
2005 #ifdef DEBUG_NODE_FREE
2006 fprintf(stderr, "onig_node_free: %p\n", node);
2007 #endif
2008
2009 switch (NODE_TYPE(node)) {
2010 case NODE_STRING:
2011 if (STR_(node)->capa != 0 &&
2012 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2013 xfree(STR_(node)->s);
2014 }
2015 break;
2016
2017 case NODE_LIST:
2018 case NODE_ALT:
2019 onig_node_free(NODE_CAR(node));
2020 {
2021 Node* next_node = NODE_CDR(node);
2022
2023 xfree(node);
2024 node = next_node;
2025 goto start;
2026 }
2027 break;
2028
2029 case NODE_CCLASS:
2030 {
2031 CClassNode* cc = CCLASS_(node);
2032
2033 if (cc->mbuf)
2034 bbuf_free(cc->mbuf);
2035 }
2036 break;
2037
2038 case NODE_BACKREF:
2039 if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2040 xfree(BACKREF_(node)->back_dynamic);
2041 break;
2042
2043 case NODE_ENCLOSURE:
2044 if (NODE_BODY(node))
2045 onig_node_free(NODE_BODY(node));
2046
2047 {
2048 EnclosureNode* en = ENCLOSURE_(node);
2049 if (en->type == ENCLOSURE_IF_ELSE) {
2050 onig_node_free(en->te.Then);
2051 onig_node_free(en->te.Else);
2052 }
2053 }
2054 break;
2055
2056 case NODE_QUANT:
2057 case NODE_ANCHOR:
2058 if (NODE_BODY(node))
2059 onig_node_free(NODE_BODY(node));
2060 break;
2061
2062 case NODE_CTYPE:
2063 case NODE_CALL:
2064 case NODE_GIMMICK:
2065 break;
2066 }
2067
2068 xfree(node);
2069 }
2070
2071 static void
2072 cons_node_free_alone(Node* node)
2073 {
2074 NODE_CAR(node) = 0;
2075 NODE_CDR(node) = 0;
2076 onig_node_free(node);
2077 }
2078
2079 static Node*
2080 node_new(void)
2081 {
2082 Node* node;
2083
2084 node = (Node* )xmalloc(sizeof(Node));
2085 xmemset(node, 0, sizeof(*node));
2086
2087 #ifdef DEBUG_NODE_FREE
2088 fprintf(stderr, "node_new: %p\n", node);
2089 #endif
2090 return node;
2091 }
2092
2093
2094 static void
2095 initialize_cclass(CClassNode* cc)
2096 {
2097 BITSET_CLEAR(cc->bs);
2098 cc->flags = 0;
2099 cc->mbuf = NULL;
2100 }
2101
2102 static Node*
2103 node_new_cclass(void)
2104 {
2105 Node* node = node_new();
2106 CHECK_NULL_RETURN(node);
2107
2108 NODE_SET_TYPE(node, NODE_CCLASS);
2109 initialize_cclass(CCLASS_(node));
2110 return node;
2111 }
2112
2113 static Node*
2114 node_new_ctype(int type, int not, OnigOptionType options)
2115 {
2116 Node* node = node_new();
2117 CHECK_NULL_RETURN(node);
2118
2119 NODE_SET_TYPE(node, NODE_CTYPE);
2120 CTYPE_(node)->ctype = type;
2121 CTYPE_(node)->not = not;
2122 CTYPE_(node)->options = options;
2123 CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);
2124 return node;
2125 }
2126
2127 static Node*
2128 node_new_anychar(void)
2129 {
2130 Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
2131 return node;
2132 }
2133
2134 static Node*
2135 node_new_anychar_with_fixed_option(OnigOptionType option)
2136 {
2137 CtypeNode* ct;
2138 Node* node;
2139
2140 node = node_new_anychar();
2141 ct = CTYPE_(node);
2142 ct->options = option;
2143 NODE_STATUS_ADD(node, FIXED_OPTION);
2144 return node;
2145 }
2146
2147 static int
2148 node_new_no_newline(Node** node, ScanEnv* env)
2149 {
2150 Node* n;
2151
2152 n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);
2153 CHECK_NULL_RETURN_MEMERR(n);
2154 *node = n;
2155 return 0;
2156 }
2157
2158 static int
2159 node_new_true_anychar(Node** node, ScanEnv* env)
2160 {
2161 Node* n;
2162
2163 n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);
2164 CHECK_NULL_RETURN_MEMERR(n);
2165 *node = n;
2166 return 0;
2167 }
2168
2169 static Node*
2170 node_new_list(Node* left, Node* right)
2171 {
2172 Node* node = node_new();
2173 CHECK_NULL_RETURN(node);
2174
2175 NODE_SET_TYPE(node, NODE_LIST);
2176 NODE_CAR(node) = left;
2177 NODE_CDR(node) = right;
2178 return node;
2179 }
2180
2181 extern Node*
2182 onig_node_new_list(Node* left, Node* right)
2183 {
2184 return node_new_list(left, right);
2185 }
2186
2187 extern Node*
2188 onig_node_list_add(Node* list, Node* x)
2189 {
2190 Node *n;
2191
2192 n = onig_node_new_list(x, NULL);
2193 if (IS_NULL(n)) return NULL_NODE;
2194
2195 if (IS_NOT_NULL(list)) {
2196 while (IS_NOT_NULL(NODE_CDR(list)))
2197 list = NODE_CDR(list);
2198
2199 NODE_CDR(list) = n;
2200 }
2201
2202 return n;
2203 }
2204
2205 extern Node*
2206 onig_node_new_alt(Node* left, Node* right)
2207 {
2208 Node* node = node_new();
2209 CHECK_NULL_RETURN(node);
2210
2211 NODE_SET_TYPE(node, NODE_ALT);
2212 NODE_CAR(node) = left;
2213 NODE_CDR(node) = right;
2214 return node;
2215 }
2216
2217 static Node*
2218 make_list_or_alt(NodeType type, int n, Node* ns[])
2219 {
2220 Node* r;
2221
2222 if (n <= 0) return NULL_NODE;
2223
2224 if (n == 1) {
2225 r = node_new();
2226 CHECK_NULL_RETURN(r);
2227 NODE_SET_TYPE(r, type);
2228 NODE_CAR(r) = ns[0];
2229 NODE_CDR(r) = NULL_NODE;
2230 }
2231 else {
2232 Node* right;
2233
2234 r = node_new();
2235 CHECK_NULL_RETURN(r);
2236
2237 right = make_list_or_alt(type, n - 1, ns + 1);
2238 if (IS_NULL(right)) {
2239 onig_node_free(r);
2240 return NULL_NODE;
2241 }
2242
2243 NODE_SET_TYPE(r, type);
2244 NODE_CAR(r) = ns[0];
2245 NODE_CDR(r) = right;
2246 }
2247
2248 return r;
2249 }
2250
2251 static Node*
2252 make_list(int n, Node* ns[])
2253 {
2254 return make_list_or_alt(NODE_LIST, n, ns);
2255 }
2256
2257 static Node*
2258 make_alt(int n, Node* ns[])
2259 {
2260 return make_list_or_alt(NODE_ALT, n, ns);
2261 }
2262
2263 extern Node*
2264 onig_node_new_anchor(int type, int ascii_mode)
2265 {
2266 Node* node = node_new();
2267 CHECK_NULL_RETURN(node);
2268
2269 NODE_SET_TYPE(node, NODE_ANCHOR);
2270 ANCHOR_(node)->type = type;
2271 ANCHOR_(node)->char_len = -1;
2272 ANCHOR_(node)->ascii_mode = ascii_mode;
2273 return node;
2274 }
2275
2276 static Node*
2277 node_new_backref(int back_num, int* backrefs, int by_name,
2278 #ifdef USE_BACKREF_WITH_LEVEL
2279 int exist_level, int nest_level,
2280 #endif
2281 ScanEnv* env)
2282 {
2283 int i;
2284 Node* node = node_new();
2285
2286 CHECK_NULL_RETURN(node);
2287
2288 NODE_SET_TYPE(node, NODE_BACKREF);
2289 BACKREF_(node)->back_num = back_num;
2290 BACKREF_(node)->back_dynamic = (int* )NULL;
2291 if (by_name != 0)
2292 NODE_STATUS_ADD(node, BY_NAME);
2293
2294 #ifdef USE_BACKREF_WITH_LEVEL
2295 if (exist_level != 0) {
2296 NODE_STATUS_ADD(node, NEST_LEVEL);
2297 BACKREF_(node)->nest_level = nest_level;
2298 }
2299 #endif
2300
2301 for (i = 0; i < back_num; i++) {
2302 if (backrefs[i] <= env->num_mem &&
2303 IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {
2304 NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */
2305 break;
2306 }
2307 }
2308
2309 if (back_num <= NODE_BACKREFS_SIZE) {
2310 for (i = 0; i < back_num; i++)
2311 BACKREF_(node)->back_static[i] = backrefs[i];
2312 }
2313 else {
2314 int* p = (int* )xmalloc(sizeof(int) * back_num);
2315 if (IS_NULL(p)) {
2316 onig_node_free(node);
2317 return NULL;
2318 }
2319 BACKREF_(node)->back_dynamic = p;
2320 for (i = 0; i < back_num; i++)
2321 p[i] = backrefs[i];
2322 }
2323 return node;
2324 }
2325
2326 static Node*
2327 node_new_backref_checker(int back_num, int* backrefs, int by_name,
2328 #ifdef USE_BACKREF_WITH_LEVEL
2329 int exist_level, int nest_level,
2330 #endif
2331 ScanEnv* env)
2332 {
2333 Node* node;
2334
2335 node = node_new_backref(back_num, backrefs, by_name,
2336 #ifdef USE_BACKREF_WITH_LEVEL
2337 exist_level, nest_level,
2338 #endif
2339 env);
2340 CHECK_NULL_RETURN(node);
2341
2342 NODE_STATUS_ADD(node, CHECKER);
2343 return node;
2344 }
2345
2346 #ifdef USE_CALL
2347 static Node*
2348 node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2349 {
2350 Node* node = node_new();
2351 CHECK_NULL_RETURN(node);
2352
2353 NODE_SET_TYPE(node, NODE_CALL);
2354 CALL_(node)->by_number = by_number;
2355 CALL_(node)->name = name;
2356 CALL_(node)->name_end = name_end;
2357 CALL_(node)->group_num = gnum;
2358 CALL_(node)->entry_count = 1;
2359 return node;
2360 }
2361 #endif
2362
2363 static Node*
2364 node_new_quantifier(int lower, int upper, int by_number)
2365 {
2366 Node* node = node_new();
2367 CHECK_NULL_RETURN(node);
2368
2369 NODE_SET_TYPE(node, NODE_QUANT);
2370 QUANT_(node)->lower = lower;
2371 QUANT_(node)->upper = upper;
2372 QUANT_(node)->greedy = 1;
2373 QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;
2374 QUANT_(node)->head_exact = NULL_NODE;
2375 QUANT_(node)->next_head_exact = NULL_NODE;
2376 QUANT_(node)->is_refered = 0;
2377 if (by_number != 0)
2378 NODE_STATUS_ADD(node, BY_NUMBER);
2379
2380 return node;
2381 }
2382
2383 static Node*
2384 node_new_enclosure(enum EnclosureType type)
2385 {
2386 Node* node = node_new();
2387 CHECK_NULL_RETURN(node);
2388
2389 NODE_SET_TYPE(node, NODE_ENCLOSURE);
2390 ENCLOSURE_(node)->type = type;
2391
2392 switch (type) {
2393 case ENCLOSURE_MEMORY:
2394 ENCLOSURE_(node)->m.regnum = 0;
2395 ENCLOSURE_(node)->m.called_addr = -1;
2396 ENCLOSURE_(node)->m.entry_count = 1;
2397 ENCLOSURE_(node)->m.called_state = 0;
2398 break;
2399
2400 case ENCLOSURE_OPTION:
2401 ENCLOSURE_(node)->o.options = 0;
2402 break;
2403
2404 case ENCLOSURE_STOP_BACKTRACK:
2405 break;
2406
2407 case ENCLOSURE_IF_ELSE:
2408 ENCLOSURE_(node)->te.Then = 0;
2409 ENCLOSURE_(node)->te.Else = 0;
2410 break;
2411 }
2412
2413 ENCLOSURE_(node)->opt_count = 0;
2414 return node;
2415 }
2416
2417 extern Node*
2418 onig_node_new_enclosure(int type)
2419 {
2420 return node_new_enclosure(type);
2421 }
2422
2423 static Node*
2424 node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)
2425 {
2426 Node* n;
2427 n = node_new_enclosure(ENCLOSURE_IF_ELSE);
2428 CHECK_NULL_RETURN(n);
2429
2430 NODE_BODY(n) = cond;
2431 ENCLOSURE_(n)->te.Then = Then;
2432 ENCLOSURE_(n)->te.Else = Else;
2433 return n;
2434 }
2435
2436 static Node*
2437 node_new_memory(int is_named)
2438 {
2439 Node* node = node_new_enclosure(ENCLOSURE_MEMORY);
2440 CHECK_NULL_RETURN(node);
2441 if (is_named != 0)
2442 NODE_STATUS_ADD(node, NAMED_GROUP);
2443
2444 return node;
2445 }
2446
2447 static Node*
2448 node_new_option(OnigOptionType option)
2449 {
2450 Node* node = node_new_enclosure(ENCLOSURE_OPTION);
2451 CHECK_NULL_RETURN(node);
2452 ENCLOSURE_(node)->o.options = option;
2453 return node;
2454 }
2455
2456 static int
2457 node_new_fail(Node** node, ScanEnv* env)
2458 {
2459 *node = node_new();
2460 CHECK_NULL_RETURN_MEMERR(*node);
2461
2462 NODE_SET_TYPE(*node, NODE_GIMMICK);
2463 GIMMICK_(*node)->type = GIMMICK_FAIL;
2464 return ONIG_NORMAL;
2465 }
2466
2467 static int
2468 node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)
2469 {
2470 int id;
2471 int r;
2472
2473 r = save_entry(env, save_type, &id);
2474 if (r != ONIG_NORMAL) return r;
2475
2476 *node = node_new();
2477 CHECK_NULL_RETURN_MEMERR(*node);
2478
2479 NODE_SET_TYPE(*node, NODE_GIMMICK);
2480 GIMMICK_(*node)->id = id;
2481 GIMMICK_(*node)->type = GIMMICK_SAVE;
2482 GIMMICK_(*node)->detail_type = (int )save_type;
2483
2484 return ONIG_NORMAL;
2485 }
2486
2487 static int
2488 node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2489 int id, ScanEnv* env)
2490 {
2491 *node = node_new();
2492 CHECK_NULL_RETURN_MEMERR(*node);
2493
2494 NODE_SET_TYPE(*node, NODE_GIMMICK);
2495 GIMMICK_(*node)->id = id;
2496 GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2497 GIMMICK_(*node)->detail_type = (int )update_var_type;
2498
2499 return ONIG_NORMAL;
2500 }
2501
2502 static int
2503 node_new_keep(Node** node, ScanEnv* env)
2504 {
2505 int r;
2506
2507 r = node_new_save_gimmick(node, SAVE_KEEP, env);
2508 if (r != 0) return r;
2509
2510 env->keep_num++;
2511 return ONIG_NORMAL;
2512 }
2513
2514 #ifdef USE_CALLOUT
2515
2516 extern void
2517 onig_free_reg_callout_list(int n, CalloutListEntry* list)
2518 {
2519 int i;
2520 int j;
2521
2522 if (IS_NULL(list)) return ;
2523
2524 for (i = 0; i < n; i++) {
2525 if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2526 for (j = 0; j < list[i].u.arg.passed_num; j++) {
2527 if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2528 if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2529 xfree(list[i].u.arg.vals[j].s.start);
2530 }
2531 }
2532 }
2533 else { /* ONIG_CALLOUT_OF_CONTENTS */
2534 if (IS_NOT_NULL(list[i].u.content.start)) {
2535 xfree((void* )list[i].u.content.start);
2536 }
2537 }
2538 }
2539
2540 xfree(list);
2541 }
2542
2543 extern CalloutListEntry*
2544 onig_reg_callout_list_at(regex_t* reg, int num)
2545 {
2546 RegexExt* ext = REG_EXTP(reg);
2547 CHECK_NULL_RETURN(ext);
2548
2549 if (num <= 0 || num > ext->callout_num)
2550 return 0;
2551
2552 num--;
2553 return ext->callout_list + num;
2554 }
2555
2556 static int
2557 reg_callout_list_entry(ScanEnv* env, int* rnum)
2558 {
2559 #define INIT_CALLOUT_LIST_NUM 3
2560
2561 int num;
2562 CalloutListEntry* list;
2563 CalloutListEntry* e;
2564 RegexExt* ext;
2565
2566 ext = onig_get_regex_ext(env->reg);
2567 CHECK_NULL_RETURN_MEMERR(ext);
2568
2569 if (IS_NULL(ext->callout_list)) {
2570 list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2571 CHECK_NULL_RETURN_MEMERR(list);
2572
2573 ext->callout_list = list;
2574 ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2575 ext->callout_num = 0;
2576 }
2577
2578 num = ext->callout_num + 1;
2579 if (num > ext->callout_list_alloc) {
2580 int alloc = ext->callout_list_alloc * 2;
2581 list = (CalloutListEntry* )xrealloc(ext->callout_list,
2582 sizeof(CalloutListEntry) * alloc,
2583 sizeof(CalloutListEntry) * ext->callout_list_alloc);
2584 CHECK_NULL_RETURN_MEMERR(list);
2585
2586 ext->callout_list = list;
2587 ext->callout_list_alloc = alloc;
2588 }
2589
2590 e = ext->callout_list + (num - 1);
2591
2592 e->flag = 0;
2593 e->of = 0;
2594 e->in = ONIG_CALLOUT_OF_CONTENTS;
2595 e->type = 0;
2596 e->tag_start = 0;
2597 e->tag_end = 0;
2598 e->start_func = 0;
2599 e->end_func = 0;
2600 e->u.arg.num = 0;
2601 e->u.arg.passed_num = 0;
2602
2603 ext->callout_num = num;
2604 *rnum = num;
2605 return ONIG_NORMAL;
2606 }
2607
2608 static int
2609 node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2610 ScanEnv* env)
2611 {
2612 *node = node_new();
2613 CHECK_NULL_RETURN_MEMERR(*node);
2614
2615 NODE_SET_TYPE(*node, NODE_GIMMICK);
2616 GIMMICK_(*node)->id = id;
2617 GIMMICK_(*node)->num = num;
2618 GIMMICK_(*node)->type = GIMMICK_CALLOUT;
2619 GIMMICK_(*node)->detail_type = (int )callout_of;
2620
2621 return ONIG_NORMAL;
2622 }
2623 #endif
2624
2625 static int
2626 make_extended_grapheme_cluster(Node** node, ScanEnv* env)
2627 {
2628 int r;
2629 int i;
2630 Node* x;
2631 Node* ns[2];
2632
2633 /* \X == (?>\O(?:\Y\O)*) */
2634
2635 ns[1] = NULL_NODE;
2636
2637 r = ONIGERR_MEMORY;
2638 ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);
2639 if (IS_NULL(ns[0])) goto err;
2640
2641 r = node_new_true_anychar(&ns[1], env);
2642 if (r != 0) goto err1;
2643
2644 x = make_list(2, ns);
2645 if (IS_NULL(x)) goto err;
2646 ns[0] = x;
2647 ns[1] = NULL_NODE;
2648
2649 x = node_new_quantifier(0, REPEAT_INFINITE, 1);
2650 if (IS_NULL(x)) goto err;
2651
2652 NODE_BODY(x) = ns[0];
2653 ns[0] = NULL_NODE;
2654 ns[1] = x;
2655
2656 r = node_new_true_anychar(&ns[0], env);
2657 if (r != 0) goto err1;
2658
2659 x = make_list(2, ns);
2660 if (IS_NULL(x)) goto err;
2661
2662 ns[0] = x;
2663 ns[1] = NULL_NODE;
2664
2665 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
2666 if (IS_NULL(x)) goto err;
2667
2668 NODE_BODY(x) = ns[0];
2669
2670 *node = x;
2671 return ONIG_NORMAL;
2672
2673 err:
2674 r = ONIGERR_MEMORY;
2675 err1:
2676 for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2677 return r;
2678 }
2679
2680 static int
2681 make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2682 Node* step_one, int lower, int upper, int possessive,
2683 int is_range_cutter, ScanEnv* env)
2684 {
2685 int r;
2686 int i;
2687 int id;
2688 Node* x;
2689 Node* ns[4];
2690
2691 for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2692
2693 ns[1] = absent;
2694 ns[3] = step_one; /* for err */
2695 r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2696 if (r != 0) goto err;
2697
2698 id = GIMMICK_(ns[0])->id;
2699 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2700 id, env);
2701 if (r != 0) goto err;
2702
2703 r = node_new_fail(&ns[3], env);
2704 if (r != 0) goto err;
2705
2706 x = make_list(4, ns);
2707 if (IS_NULL(x)) goto err0;
2708
2709 ns[0] = x;
2710 ns[1] = step_one;
2711 ns[2] = ns[3] = NULL_NODE;
2712
2713 x = make_alt(2, ns);
2714 if (IS_NULL(x)) goto err0;
2715
2716 ns[0] = x;
2717
2718 x = node_new_quantifier(lower, upper, 0);
2719 if (IS_NULL(x)) goto err0;
2720
2721 NODE_BODY(x) = ns[0];
2722 ns[0] = x;
2723
2724 if (possessive != 0) {
2725 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
2726 if (IS_NULL(x)) goto err0;
2727
2728 NODE_BODY(x) = ns[0];
2729 ns[0] = x;
2730 }
2731
2732 r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2733 pre_save_right_id, env);
2734 if (r != 0) goto err;
2735
2736 r = node_new_fail(&ns[2], env);
2737 if (r != 0) goto err;
2738
2739 x = make_list(2, ns + 1);
2740 if (IS_NULL(x)) goto err0;
2741
2742 ns[1] = x; ns[2] = NULL_NODE;
2743
2744 x = make_alt(2, ns);
2745 if (IS_NULL(x)) goto err0;
2746
2747 if (is_range_cutter != 0)
2748 NODE_STATUS_ADD(x, SUPER);
2749
2750 *node = x;
2751 return ONIG_NORMAL;
2752
2753 err0:
2754 r = ONIGERR_MEMORY;
2755 err:
2756 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
2757 return r;
2758 }
2759
2760 static int
2761 make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
2762 ScanEnv* env)
2763 {
2764 int r;
2765 int id;
2766 Node* save;
2767 Node* x;
2768 Node* ns[2];
2769
2770 *node1 = *node2 = NULL_NODE;
2771 save = ns[0] = ns[1] = NULL_NODE;
2772
2773 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
2774 if (r != 0) goto err;
2775
2776 id = GIMMICK_(save)->id;
2777 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2778 id, env);
2779 if (r != 0) goto err;
2780
2781 r = node_new_fail(&ns[1], env);
2782 if (r != 0) goto err;
2783
2784 x = make_list(2, ns);
2785 if (IS_NULL(x)) goto err0;
2786
2787 ns[0] = NULL_NODE; ns[1] = x;
2788
2789 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2790 pre_save_right_id, env);
2791 if (r != 0) goto err;
2792
2793 x = make_alt(2, ns);
2794 if (IS_NULL(x)) goto err0;
2795
2796 *node1 = save;
2797 *node2 = x;
2798 return ONIG_NORMAL;
2799
2800 err0:
2801 r = ONIGERR_MEMORY;
2802 err:
2803 onig_node_free(save);
2804 onig_node_free(ns[0]);
2805 onig_node_free(ns[1]);
2806 return r;
2807 }
2808
2809 static int
2810 make_range_clear(Node** node, ScanEnv* env)
2811 {
2812 int r;
2813 int id;
2814 Node* save;
2815 Node* x;
2816 Node* ns[2];
2817
2818 *node = NULL_NODE;
2819 save = ns[0] = ns[1] = NULL_NODE;
2820
2821 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
2822 if (r != 0) goto err;
2823
2824 id = GIMMICK_(save)->id;
2825 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2826 id, env);
2827 if (r != 0) goto err;
2828
2829 r = node_new_fail(&ns[1], env);
2830 if (r != 0) goto err;
2831
2832 x = make_list(2, ns);
2833 if (IS_NULL(x)) goto err0;
2834
2835 ns[0] = NULL_NODE; ns[1] = x;
2836
2837 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
2838 if (r != 0) goto err;
2839
2840 x = make_alt(2, ns);
2841 if (IS_NULL(x)) goto err0;
2842
2843 NODE_STATUS_ADD(x, SUPER);
2844
2845 ns[0] = save;
2846 ns[1] = x;
2847 save = NULL_NODE;
2848 x = make_list(2, ns);
2849 if (IS_NULL(x)) goto err0;
2850
2851 *node = x;
2852 return ONIG_NORMAL;
2853
2854 err0:
2855 r = ONIGERR_MEMORY;
2856 err:
2857 onig_node_free(save);
2858 onig_node_free(ns[0]);
2859 onig_node_free(ns[1]);
2860 return r;
2861 }
2862
2863 static int
2864 is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
2865 int* is_possessive, ScanEnv* env)
2866 {
2867 Node* quant;
2868 Node* body;
2869
2870 *rquant = *rbody = 0;
2871 *is_possessive = 0;
2872
2873 if (NODE_TYPE(node) == NODE_QUANT) {
2874 quant = node;
2875 }
2876 else {
2877 if (NODE_TYPE(node) == NODE_ENCLOSURE) {
2878 EnclosureNode* en = ENCLOSURE_(node);
2879 if (en->type == ENCLOSURE_STOP_BACKTRACK) {
2880 *is_possessive = 1;
2881 quant = NODE_ENCLOSURE_BODY(en);
2882 if (NODE_TYPE(quant) != NODE_QUANT)
2883 return 0;
2884 }
2885 else
2886 return 0;
2887 }
2888 else
2889 return 0;
2890 }
2891
2892 if (QUANT_(quant)->greedy == 0)
2893 return 0;
2894
2895 body = NODE_BODY(quant);
2896 switch (NODE_TYPE(body)) {
2897 case NODE_STRING:
2898 {
2899 int len;
2900 StrNode* sn = STR_(body);
2901 UChar *s = sn->s;
2902
2903 len = 0;
2904 while (s < sn->end) {
2905 s += enclen(env->enc, s);
2906 len++;
2907 }
2908 if (len != 1)
2909 return 0;
2910 }
2911
2912 case NODE_CCLASS:
2913 break;
2914
2915 default:
2916 return 0;
2917 break;
2918 }
2919
2920 if (node != quant) {
2921 NODE_BODY(node) = 0;
2922 onig_node_free(node);
2923 }
2924 NODE_BODY(quant) = NULL_NODE;
2925 *rquant = quant;
2926 *rbody = body;
2927 return 1;
2928 }
2929
2930 static int
2931 make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,
2932 Node* body, int possessive, ScanEnv* env)
2933 {
2934 int r;
2935 int i;
2936 int id1;
2937 int lower, upper;
2938 Node* x;
2939 Node* ns[4];
2940
2941 *node = NULL_NODE;
2942 r = ONIGERR_MEMORY;
2943 ns[0] = ns[1] = NULL_NODE;
2944 ns[2] = body, ns[3] = absent;
2945
2946 lower = QUANT_(quant)->lower;
2947 upper = QUANT_(quant)->upper;
2948 onig_node_free(quant);
2949
2950 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
2951 if (r != 0) goto err;
2952
2953 id1 = GIMMICK_(ns[0])->id;
2954
2955 r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
2956 0, env);
2957 if (r != 0) goto err;
2958
2959 ns[2] = ns[3] = NULL_NODE;
2960
2961 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2962 id1, env);
2963 if (r != 0) goto err;
2964
2965 x = make_list(3, ns);
2966 if (IS_NULL(x)) goto err0;
2967
2968 *node = x;
2969 return ONIG_NORMAL;
2970
2971 err0:
2972 r = ONIGERR_MEMORY;
2973 err:
2974 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
2975 return r;
2976 }
2977
2978 static int
2979 make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
2980 ScanEnv* env)
2981 {
2982 int r;
2983 int i;
2984 int id1, id2;
2985 int possessive;
2986 Node* x;
2987 Node* ns[7];
2988
2989 r = ONIGERR_MEMORY;
2990 for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
2991 ns[4] = expr; ns[5] = absent;
2992
2993 if (is_range_cutter == 0) {
2994 Node* quant;
2995 Node* body;
2996
2997 if (expr == NULL_NODE) {
2998 /* default expr \O* */
2999 quant = node_new_quantifier(0, REPEAT_INFINITE, 0);
3000 if (IS_NULL(quant)) goto err0;
3001
3002 r = node_new_true_anychar(&body, env);
3003 if (r != 0) {
3004 onig_node_free(quant);
3005 goto err;
3006 }
3007 possessive = 0;
3008 goto simple;
3009 }
3010 else {
3011 if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3012 simple:
3013 r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3014 body, possessive, env);
3015 if (r != 0) {
3016 ns[4] = NULL_NODE;
3017 onig_node_free(quant);
3018 onig_node_free(body);
3019 goto err;
3020 }
3021
3022 return ONIG_NORMAL;
3023 }
3024 }
3025 }
3026
3027 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3028 if (r != 0) goto err;
3029
3030 id1 = GIMMICK_(ns[0])->id;
3031
3032 r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3033 if (r != 0) goto err;
3034
3035 id2 = GIMMICK_(ns[1])->id;
3036
3037 r = node_new_true_anychar(&ns[3], env);
3038 if (r != 0) goto err;
3039
3040 possessive = 1;
3041 r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,
3042 possessive, is_range_cutter, env);
3043 if (r != 0) goto err;
3044
3045 ns[3] = NULL_NODE;
3046 ns[5] = NULL_NODE;
3047
3048 r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3049 if (r != 0) goto err;
3050
3051 if (is_range_cutter != 0) {
3052 x = make_list(4, ns);
3053 if (IS_NULL(x)) goto err0;
3054 }
3055 else {
3056 r = make_absent_tail(&ns[5], &ns[6], id1, env);
3057 if (r != 0) goto err;
3058
3059 x = make_list(7, ns);
3060 if (IS_NULL(x)) goto err0;
3061 }
3062
3063 *node = x;
3064 return ONIG_NORMAL;
3065
3066 err0:
3067 r = ONIGERR_MEMORY;
3068 err:
3069 for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3070 return r;
3071 }
3072
3073 extern int
3074 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3075 {
3076 int addlen = (int )(end - s);
3077
3078 if (addlen > 0) {
3079 int len = (int )(STR_(node)->end - STR_(node)->s);
3080
3081 if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
3082 UChar* p;
3083 int capa = len + addlen + NODE_STRING_MARGIN;
3084
3085 if (capa <= STR_(node)->capa) {
3086 onig_strcpy(STR_(node)->s + len, s, end);
3087 }
3088 else {
3089 if (STR_(node)->s == STR_(node)->buf)
3090 p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3091 s, end, capa);
3092 else
3093 p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);
3094
3095 CHECK_NULL_RETURN_MEMERR(p);
3096 STR_(node)->s = p;
3097 STR_(node)->capa = capa;
3098 }
3099 }
3100 else {
3101 onig_strcpy(STR_(node)->s + len, s, end);
3102 }
3103 STR_(node)->end = STR_(node)->s + len + addlen;
3104 }
3105
3106 return 0;
3107 }
3108
3109 extern int
3110 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
3111 {
3112 onig_node_str_clear(node);
3113 return onig_node_str_cat(node, s, end);
3114 }
3115
3116 static int
3117 node_str_cat_char(Node* node, UChar c)
3118 {
3119 UChar s[1];
3120
3121 s[0] = c;
3122 return onig_node_str_cat(node, s, s + 1);
3123 }
3124
3125 extern void
3126 onig_node_conv_to_str_node(Node* node, int flag)
3127 {
3128 NODE_SET_TYPE(node, NODE_STRING);
3129 STR_(node)->flag = flag;
3130 STR_(node)->capa = 0;
3131 STR_(node)->s = STR_(node)->buf;
3132 STR_(node)->end = STR_(node)->buf;
3133 }
3134
3135 extern void
3136 onig_node_str_clear(Node* node)
3137 {
3138 if (STR_(node)->capa != 0 &&
3139 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3140 xfree(STR_(node)->s);
3141 }
3142
3143 STR_(node)->capa = 0;
3144 STR_(node)->flag = 0;
3145 STR_(node)->s = STR_(node)->buf;
3146 STR_(node)->end = STR_(node)->buf;
3147 }
3148
3149 static Node*
3150 node_new_str(const UChar* s, const UChar* end)
3151 {
3152 Node* node = node_new();
3153 CHECK_NULL_RETURN(node);
3154
3155 NODE_SET_TYPE(node, NODE_STRING);
3156 STR_(node)->capa = 0;
3157 STR_(node)->flag = 0;
3158 STR_(node)->s = STR_(node)->buf;
3159 STR_(node)->end = STR_(node)->buf;
3160 if (onig_node_str_cat(node, s, end)) {
3161 onig_node_free(node);
3162 return NULL;
3163 }
3164 return node;
3165 }
3166
3167 extern Node*
3168 onig_node_new_str(const UChar* s, const UChar* end)
3169 {
3170 return node_new_str(s, end);
3171 }
3172
3173 static Node*
3174 node_new_str_raw(UChar* s, UChar* end)
3175 {
3176 Node* node = node_new_str(s, end);
3177 NODE_STRING_SET_RAW(node);
3178 return node;
3179 }
3180
3181 static Node*
3182 node_new_empty(void)
3183 {
3184 return node_new_str(NULL, NULL);
3185 }
3186
3187 static Node*
3188 node_new_str_raw_char(UChar c)
3189 {
3190 UChar p[1];
3191
3192 p[0] = c;
3193 return node_new_str_raw(p, p + 1);
3194 }
3195
3196 static Node*
3197 str_node_split_last_char(Node* node, OnigEncoding enc)
3198 {
3199 const UChar *p;
3200 Node* rn;
3201 StrNode* sn;
3202
3203 sn = STR_(node);
3204 rn = NULL_NODE;
3205 if (sn->end > sn->s) {
3206 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3207 if (p && p > sn->s) { /* can be split. */
3208 rn = node_new_str(p, sn->end);
3209 if (NODE_STRING_IS_RAW(node))
3210 NODE_STRING_SET_RAW(rn);
3211
3212 sn->end = (UChar* )p;
3213 }
3214 }
3215 return rn;
3216 }
3217
3218 static int
3219 str_node_can_be_split(Node* node, OnigEncoding enc)
3220 {
3221 StrNode* sn = STR_(node);
3222 if (sn->end > sn->s) {
3223 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
3224 }
3225 return 0;
3226 }
3227
3228 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
3229 static int
3230 node_str_head_pad(StrNode* sn, int num, UChar val)
3231 {
3232 UChar buf[NODE_STRING_BUF_SIZE];
3233 int i, len;
3234
3235 len = sn->end - sn->s;
3236 onig_strcpy(buf, sn->s, sn->end);
3237 onig_strcpy(&(sn->s[num]), buf, buf + len);
3238 sn->end += num;
3239
3240 for (i = 0; i < num; i++) {
3241 sn->s[i] = val;
3242 }
3243 }
3244 #endif
3245
3246 extern int
3247 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
3248 {
3249 unsigned int num, val;
3250 OnigCodePoint c;
3251 UChar* p = *src;
3252 PFETCH_READY;
3253
3254 num = 0;
3255 while (! PEND) {
3256 PFETCH(c);
3257 if (IS_CODE_DIGIT_ASCII(enc, c)) {
3258 val = (unsigned int )DIGITVAL(c);
3259 if ((INT_MAX_LIMIT - val) / 10UL < num)
3260 return -1; /* overflow */
3261
3262 num = num * 10 + val;
3263 }
3264 else {
3265 PUNFETCH;
3266 break;
3267 }
3268 }
3269 *src = p;
3270 return num;
3271 }
3272
3273 static int
3274 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
3275 int maxlen, OnigEncoding enc)
3276 {
3277 OnigCodePoint c;
3278 unsigned int num, val;
3279 int n;
3280 UChar* p = *src;
3281 PFETCH_READY;
3282
3283 num = 0;
3284 n = 0;
3285 while (! PEND && n < maxlen) {
3286 PFETCH(c);
3287 if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3288 n++;
3289 val = (unsigned int )XDIGITVAL(enc,c);
3290 if ((INT_MAX_LIMIT - val) / 16UL < num)
3291 return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3292
3293 num = (num << 4) + XDIGITVAL(enc,c);
3294 }
3295 else {
3296 PUNFETCH;
3297 break;
3298 }
3299 }
3300
3301 if (n < minlen)
3302 return ONIGERR_INVALID_CODE_POINT_VALUE;
3303
3304 *src = p;
3305 return num;
3306 }
3307
3308 static int
3309 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
3310 OnigEncoding enc)
3311 {
3312 OnigCodePoint c;
3313 unsigned int num, val;
3314 UChar* p = *src;
3315 PFETCH_READY;
3316
3317 num = 0;
3318 while (! PEND && maxlen-- != 0) {
3319 PFETCH(c);
3320 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3321 val = ODIGITVAL(c);
3322 if ((INT_MAX_LIMIT - val) / 8UL < num)
3323 return -1; /* overflow */
3324
3325 num = (num << 3) + val;
3326 }
3327 else {
3328 PUNFETCH;
3329 break;
3330 }
3331 }
3332 *src = p;
3333 return num;
3334 }
3335
3336
3337 #define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3338 BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3339
3340 /* data format:
3341 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3342 (all data size is OnigCodePoint)
3343 */
3344 static int
3345 new_code_range(BBuf** pbuf)
3346 {
3347 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
3348 int r;
3349 OnigCodePoint n;
3350 BBuf* bbuf;
3351
3352 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3353 CHECK_NULL_RETURN_MEMERR(bbuf);
3354 r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3355 if (r != 0) {
3356 xfree(bbuf);
3357 *pbuf = 0;
3358 return r;
3359 }
3360
3361 n = 0;
3362 BB_WRITE_CODE_POINT(bbuf, 0, n);
3363 return 0;
3364 }
3365
3366 static int
3367 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3368 {
3369 int r, inc_n, pos;
3370 int low, high, bound, x;
3371 OnigCodePoint n, *data;
3372 BBuf* bbuf;
3373
3374 if (from > to) {
3375 n = from; from = to; to = n;
3376 }
3377
3378 if (IS_NULL(*pbuf)) {
3379 r = new_code_range(pbuf);
3380 if (r != 0) return r;
3381 bbuf = *pbuf;
3382 n = 0;
3383 }
3384 else {
3385 bbuf = *pbuf;
3386 GET_CODE_POINT(n, bbuf->p);
3387 }
3388 data = (OnigCodePoint* )(bbuf->p);
3389 data++;
3390
3391 for (low = 0, bound = n; low < bound; ) {
3392 x = (low + bound) >> 1;
3393 if (from > data[x*2 + 1])
3394 low = x + 1;
3395 else
3396 bound = x;
3397 }
3398
3399 high = (to == ~((OnigCodePoint )0)) ? n : low;
3400 for (bound = n; high < bound; ) {
3401 x = (high + bound) >> 1;
3402 if (to + 1 >= data[x*2])
3403 high = x + 1;
3404 else
3405 bound = x;
3406 }
3407
3408 inc_n = low + 1 - high;
3409 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3410 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3411
3412 if (inc_n != 1) {
3413 if (from > data[low*2])
3414 from = data[low*2];
3415 if (to < data[(high - 1)*2 + 1])
3416 to = data[(high - 1)*2 + 1];
3417 }
3418
3419 if (inc_n != 0 && (OnigCodePoint )high < n) {
3420 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3421 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3422 int size = (n - high) * 2 * SIZE_CODE_POINT;
3423
3424 if (inc_n > 0) {
3425 BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3426 }
3427 else {
3428 BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3429 }
3430 }
3431
3432 pos = SIZE_CODE_POINT * (1 + low * 2);
3433 BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3434 BB_WRITE_CODE_POINT(bbuf, pos, from);
3435 BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3436 n += inc_n;
3437 BB_WRITE_CODE_POINT(bbuf, 0, n);
3438
3439 return 0;
3440 }
3441
3442 static int
3443 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
3444 {
3445 if (from > to) {
3446 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3447 return 0;
3448 else
3449 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3450 }
3451
3452 return add_code_range_to_buf(pbuf, from, to);
3453 }
3454
3455 static int
3456 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3457 {
3458 int r, i, n;
3459 OnigCodePoint pre, from, *data, to = 0;
3460
3461 *pbuf = (BBuf* )NULL;
3462 if (IS_NULL(bbuf)) {
3463 set_all:
3464 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3465 }
3466
3467 data = (OnigCodePoint* )(bbuf->p);
3468 GET_CODE_POINT(n, data);
3469 data++;
3470 if (n <= 0) goto set_all;
3471
3472 r = 0;
3473 pre = MBCODE_START_POS(enc);
3474 for (i = 0; i < n; i++) {
3475 from = data[i*2];
3476 to = data[i*2+1];
3477 if (pre <= from - 1) {
3478 r = add_code_range_to_buf(pbuf, pre, from - 1);
3479 if (r != 0) return r;
3480 }
3481 if (to == ~((OnigCodePoint )0)) break;
3482 pre = to + 1;
3483 }
3484 if (to < ~((OnigCodePoint )0)) {
3485 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3486 }
3487 return r;
3488 }
3489
3490 #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3491 BBuf *tbuf; \
3492 int tnot; \
3493 tnot = not1; not1 = not2; not2 = tnot; \
3494 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3495 } while (0)
3496
3497 static int
3498 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3499 BBuf* bbuf2, int not2, BBuf** pbuf)
3500 {
3501 int r;
3502 OnigCodePoint i, n1, *data1;
3503 OnigCodePoint from, to;
3504
3505 *pbuf = (BBuf* )NULL;
3506 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3507 if (not1 != 0 || not2 != 0)
3508 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3509 return 0;
3510 }
3511
3512 r = 0;
3513 if (IS_NULL(bbuf2))
3514 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3515
3516 if (IS_NULL(bbuf1)) {
3517 if (not1 != 0) {
3518 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3519 }
3520 else {
3521 if (not2 == 0) {
3522 return bbuf_clone(pbuf, bbuf2);
3523 }
3524 else {
3525 return not_code_range_buf(enc, bbuf2, pbuf);
3526 }
3527 }
3528 }
3529
3530 if (not1 != 0)
3531 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3532
3533 data1 = (OnigCodePoint* )(bbuf1->p);
3534 GET_CODE_POINT(n1, data1);
3535 data1++;
3536
3537 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
3538 r = bbuf_clone(pbuf, bbuf2);
3539 }
3540 else if (not1 == 0) { /* 1 OR (not 2) */
3541 r = not_code_range_buf(enc, bbuf2, pbuf);
3542 }
3543 if (r != 0) return r;
3544
3545 for (i = 0; i < n1; i++) {
3546 from = data1[i*2];
3547 to = data1[i*2+1];
3548 r = add_code_range_to_buf(pbuf, from, to);
3549 if (r != 0) return r;
3550 }
3551 return 0;
3552 }
3553
3554 static int
3555 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
3556 OnigCodePoint* data, int n)
3557 {
3558 int i, r;
3559 OnigCodePoint from2, to2;
3560
3561 for (i = 0; i < n; i++) {
3562 from2 = data[i*2];
3563 to2 = data[i*2+1];
3564 if (from2 < from1) {
3565 if (to2 < from1) continue;
3566 else {
3567 from1 = to2 + 1;
3568 }
3569 }
3570 else if (from2 <= to1) {
3571 if (to2 < to1) {
3572 if (from1 <= from2 - 1) {
3573 r = add_code_range_to_buf(pbuf, from1, from2-1);
3574 if (r != 0) return r;
3575 }
3576 from1 = to2 + 1;
3577 }
3578 else {
3579 to1 = from2 - 1;
3580 }
3581 }
3582 else {
3583 from1 = from2;
3584 }
3585 if (from1 > to1) break;
3586 }
3587 if (from1 <= to1) {
3588 r = add_code_range_to_buf(pbuf, from1, to1);
3589 if (r != 0) return r;
3590 }
3591 return 0;
3592 }
3593
3594 static int
3595 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
3596 {
3597 int r;
3598 OnigCodePoint i, j, n1, n2, *data1, *data2;
3599 OnigCodePoint from, to, from1, to1, from2, to2;
3600
3601 *pbuf = (BBuf* )NULL;
3602 if (IS_NULL(bbuf1)) {
3603 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
3604 return bbuf_clone(pbuf, bbuf2);
3605 return 0;
3606 }
3607 else if (IS_NULL(bbuf2)) {
3608 if (not2 != 0)
3609 return bbuf_clone(pbuf, bbuf1);
3610 return 0;
3611 }
3612
3613 if (not1 != 0)
3614 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3615
3616 data1 = (OnigCodePoint* )(bbuf1->p);
3617 data2 = (OnigCodePoint* )(bbuf2->p);
3618 GET_CODE_POINT(n1, data1);
3619 GET_CODE_POINT(n2, data2);
3620 data1++;
3621 data2++;
3622
3623 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
3624 for (i = 0; i < n1; i++) {
3625 from1 = data1[i*2];
3626 to1 = data1[i*2+1];
3627 for (j = 0; j < n2; j++) {
3628 from2 = data2[j*2];
3629 to2 = data2[j*2+1];
3630 if (from2 > to1) break;
3631 if (to2 < from1) continue;
3632 from = MAX(from1, from2);
3633 to = MIN(to1, to2);
3634 r = add_code_range_to_buf(pbuf, from, to);
3635 if (r != 0) return r;
3636 }
3637 }
3638 }
3639 else if (not1 == 0) { /* 1 AND (not 2) */
3640 for (i = 0; i < n1; i++) {
3641 from1 = data1[i*2];
3642 to1 = data1[i*2+1];
3643 r = and_code_range1(pbuf, from1, to1, data2, n2);
3644 if (r != 0) return r;
3645 }
3646 }
3647
3648 return 0;
3649 }
3650
3651 static int
3652 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
3653 {
3654 int r, not1, not2;
3655 BBuf *buf1, *buf2, *pbuf;
3656 BitSetRef bsr1, bsr2;
3657 BitSet bs1, bs2;
3658
3659 not1 = IS_NCCLASS_NOT(dest);
3660 bsr1 = dest->bs;
3661 buf1 = dest->mbuf;
3662 not2 = IS_NCCLASS_NOT(cc);
3663 bsr2 = cc->bs;
3664 buf2 = cc->mbuf;
3665
3666 if (not1 != 0) {
3667 bitset_invert_to(bsr1, bs1);
3668 bsr1 = bs1;
3669 }
3670 if (not2 != 0) {
3671 bitset_invert_to(bsr2, bs2);
3672 bsr2 = bs2;
3673 }
3674 bitset_and(bsr1, bsr2);
3675 if (bsr1 != dest->bs) {
3676 bitset_copy(dest->bs, bsr1);
3677 }
3678 if (not1 != 0) {
3679 bitset_invert(dest->bs);
3680 }
3681
3682 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
3683 if (not1 != 0 && not2 != 0) {
3684 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
3685 }
3686 else {
3687 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
3688 if (r == 0 && not1 != 0) {
3689 BBuf *tbuf;
3690 r = not_code_range_buf(enc, pbuf, &tbuf);
3691 if (r != 0) {
3692 bbuf_free(pbuf);
3693 return r;
3694 }
3695 bbuf_free(pbuf);
3696 pbuf = tbuf;
3697 }
3698 }
3699 if (r != 0) return r;
3700
3701 dest->mbuf = pbuf;
3702 bbuf_free(buf1);
3703 return r;
3704 }
3705 return 0;
3706 }
3707
3708 static int
3709 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
3710 {
3711 int r, not1, not2;
3712 BBuf *buf1, *buf2, *pbuf;
3713 BitSetRef bsr1, bsr2;
3714 BitSet bs1, bs2;
3715
3716 not1 = IS_NCCLASS_NOT(dest);
3717 bsr1 = dest->bs;
3718 buf1 = dest->mbuf;
3719 not2 = IS_NCCLASS_NOT(cc);
3720 bsr2 = cc->bs;
3721 buf2 = cc->mbuf;
3722
3723 if (not1 != 0) {
3724 bitset_invert_to(bsr1, bs1);
3725 bsr1 = bs1;
3726 }
3727 if (not2 != 0) {
3728 bitset_invert_to(bsr2, bs2);
3729 bsr2 = bs2;
3730 }
3731 bitset_or(bsr1, bsr2);
3732 if (bsr1 != dest->bs) {
3733 bitset_copy(dest->bs, bsr1);
3734 }
3735 if (not1 != 0) {
3736 bitset_invert(dest->bs);
3737 }
3738
3739 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
3740 if (not1 != 0 && not2 != 0) {
3741 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
3742 }
3743 else {
3744 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
3745 if (r == 0 && not1 != 0) {
3746 BBuf *tbuf;
3747 r = not_code_range_buf(enc, pbuf, &tbuf);
3748 if (r != 0) {
3749 bbuf_free(pbuf);
3750 return r;
3751 }
3752 bbuf_free(pbuf);
3753 pbuf = tbuf;
3754 }
3755 }
3756 if (r != 0) return r;
3757
3758 dest->mbuf = pbuf;
3759 bbuf_free(buf1);
3760 return r;
3761 }
3762 else
3763 return 0;
3764 }
3765
3766 static OnigCodePoint
3767 conv_backslash_value(OnigCodePoint c, ScanEnv* env)
3768 {
3769 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
3770 switch (c) {
3771 case 'n': return '\n';
3772 case 't': return '\t';
3773 case 'r': return '\r';
3774 case 'f': return '\f';
3775 case 'a': return '\007';
3776 case 'b': return '\010';
3777 case 'e': return '\033';
3778 case 'v':
3779 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
3780 return '\v';
3781 break;
3782
3783 default:
3784 break;
3785 }
3786 }
3787 return c;
3788 }
3789
3790 static int
3791 is_invalid_quantifier_target(Node* node)
3792 {
3793 switch (NODE_TYPE(node)) {
3794 case NODE_ANCHOR:
3795 case NODE_GIMMICK:
3796 return 1;
3797 break;
3798
3799 case NODE_ENCLOSURE:
3800 /* allow enclosed elements */
3801 /* return is_invalid_quantifier_target(NODE_BODY(node)); */
3802 break;
3803
3804 case NODE_LIST:
3805 do {
3806 if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;
3807 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3808 return 0;
3809 break;
3810
3811 case NODE_ALT:
3812 do {
3813 if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;
3814 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3815 break;
3816
3817 default:
3818 break;
3819 }
3820 return 0;
3821 }
3822
3823 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
3824 static int
3825 quantifier_type_num(QuantNode* q)
3826 {
3827 if (q->greedy) {
3828 if (q->lower == 0) {
3829 if (q->upper == 1) return 0;
3830 else if (IS_REPEAT_INFINITE(q->upper)) return 1;
3831 }
3832 else if (q->lower == 1) {
3833 if (IS_REPEAT_INFINITE(q->upper)) return 2;
3834 }
3835 }
3836 else {
3837 if (q->lower == 0) {
3838 if (q->upper == 1) return 3;
3839 else if (IS_REPEAT_INFINITE(q->upper)) return 4;
3840 }
3841 else if (q->lower == 1) {
3842 if (IS_REPEAT_INFINITE(q->upper)) return 5;
3843 }
3844 }
3845 return -1;
3846 }
3847
3848
3849 enum ReduceType {
3850 RQ_ASIS = 0, /* as is */
3851 RQ_DEL = 1, /* delete parent */
3852 RQ_A, /* to '*' */
3853 RQ_AQ, /* to '*?' */
3854 RQ_QQ, /* to '??' */
3855 RQ_P_QQ, /* to '+)??' */
3856 RQ_PQ_Q /* to '+?)?' */
3857 };
3858
3859 static enum ReduceType ReduceTypeTable[6][6] = {
3860 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
3861 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
3862 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
3863 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
3864 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
3865 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
3866 };
3867
3868 extern void
3869 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
3870 {
3871 int pnum, cnum;
3872 QuantNode *p, *c;
3873
3874 p = QUANT_(pnode);
3875 c = QUANT_(cnode);
3876 pnum = quantifier_type_num(p);
3877 cnum = quantifier_type_num(c);
3878 if (pnum < 0 || cnum < 0) {
3879 if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {
3880 if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {
3881 int n = positive_int_multiply(p->lower, c->lower);
3882 if (n >= 0) {
3883 p->lower = p->upper = n;
3884 NODE_BODY(pnode) = NODE_BODY(cnode);
3885 goto remove_cnode;
3886 }
3887 }
3888 }
3889
3890 return ;
3891 }
3892
3893 switch(ReduceTypeTable[cnum][pnum]) {
3894 case RQ_DEL:
3895 *pnode = *cnode;
3896 break;
3897 case RQ_A:
3898 NODE_BODY(pnode) = NODE_BODY(cnode);
3899 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
3900 break;
3901 case RQ_AQ:
3902 NODE_BODY(pnode) = NODE_BODY(cnode);
3903 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
3904 break;
3905 case RQ_QQ:
3906 NODE_BODY(pnode) = NODE_BODY(cnode);
3907 p->lower = 0; p->upper = 1; p->greedy = 0;
3908 break;
3909 case RQ_P_QQ:
3910 NODE_BODY(pnode) = cnode;
3911 p->lower = 0; p->upper = 1; p->greedy = 0;
3912 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
3913 return ;
3914 break;
3915 case RQ_PQ_Q:
3916 NODE_BODY(pnode) = cnode;
3917 p->lower = 0; p->upper = 1; p->greedy = 1;
3918 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
3919 return ;
3920 break;
3921 case RQ_ASIS:
3922 NODE_BODY(pnode) = cnode;
3923 return ;
3924 break;
3925 }
3926
3927 remove_cnode:
3928 NODE_BODY(cnode) = NULL_NODE;
3929 onig_node_free(cnode);
3930 }
3931
3932 static int
3933 node_new_general_newline(Node** node, ScanEnv* env)
3934 {
3935 int r;
3936 int dlen, alen;
3937 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
3938 Node* crnl;
3939 Node* ncc;
3940 Node* x;
3941 CClassNode* cc;
3942
3943 dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
3944 if (dlen < 0) return dlen;
3945 alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
3946 if (alen < 0) return alen;
3947
3948 crnl = node_new_str_raw(buf, buf + dlen + alen);
3949 CHECK_NULL_RETURN_MEMERR(crnl);
3950
3951 ncc = node_new_cclass();
3952 if (IS_NULL(ncc)) goto err2;
3953
3954 cc = CCLASS_(ncc);
3955 if (dlen == 1) {
3956 bitset_set_range(cc->bs, 0x0a, 0x0d);
3957 }
3958 else {
3959 r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);
3960 if (r != 0) {
3961 err1:
3962 onig_node_free(ncc);
3963 err2:
3964 onig_node_free(crnl);
3965 return ONIGERR_MEMORY;
3966 }
3967 }
3968
3969 if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
3970 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
3971 if (r != 0) goto err1;
3972 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
3973 if (r != 0) goto err1;
3974 }
3975
3976 x = node_new_enclosure_if_else(crnl, 0, ncc);
3977 if (IS_NULL(x)) goto err1;
3978
3979 *node = x;
3980 return 0;
3981 }
3982
3983 enum TokenSyms {
3984 TK_EOT = 0, /* end of token */
3985 TK_RAW_BYTE = 1,
3986 TK_CHAR,
3987 TK_STRING,
3988 TK_CODE_POINT,
3989 TK_ANYCHAR,
3990 TK_CHAR_TYPE,
3991 TK_BACKREF,
3992 TK_CALL,
3993 TK_ANCHOR,
3994 TK_OP_REPEAT,
3995 TK_INTERVAL,
3996 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
3997 TK_ALT,
3998 TK_SUBEXP_OPEN,
3999 TK_SUBEXP_CLOSE,
4000 TK_CC_OPEN,
4001 TK_QUOTE_OPEN,
4002 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
4003 TK_KEEP, /* \K */
4004 TK_GENERAL_NEWLINE, /* \R */
4005 TK_NO_NEWLINE, /* \N */
4006 TK_TRUE_ANYCHAR, /* \O */
4007 TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */
4008
4009 /* in cc */
4010 TK_CC_CLOSE,
4011 TK_CC_RANGE,
4012 TK_POSIX_BRACKET_OPEN,
4013 TK_CC_AND, /* && */
4014 TK_CC_CC_OPEN /* [ */
4015 };
4016
4017 typedef struct {
4018 enum TokenSyms type;
4019 int escaped;
4020 int base; /* is number: 8, 16 (used in [....]) */
4021 UChar* backp;
4022 union {
4023 UChar* s;
4024 int c;
4025 OnigCodePoint code;
4026 int anchor;
4027 int subtype;
4028 struct {
4029 int lower;
4030 int upper;
4031 int greedy;
4032 int possessive;
4033 } repeat;
4034 struct {
4035 int num;
4036 int ref1;
4037 int* refs;
4038 int by_name;
4039 #ifdef USE_BACKREF_WITH_LEVEL
4040 int exist_level;
4041 int level; /* \k<name+n> */
4042 #endif
4043 } backref;
4044 struct {
4045 UChar* name;
4046 UChar* name_end;
4047 int gnum;
4048 int by_number;
4049 } call;
4050 struct {
4051 int ctype;
4052 int not;
4053 } prop;
4054 } u;
4055 } OnigToken;
4056
4057
4058 static int
4059 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
4060 {
4061 int low, up, syn_allow, non_low = 0;
4062 int r = 0;
4063 OnigCodePoint c;
4064 OnigEncoding enc = env->enc;
4065 UChar* p = *src;
4066 PFETCH_READY;
4067
4068 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4069
4070 if (PEND) {
4071 if (syn_allow)
4072 return 1; /* "....{" : OK! */
4073 else
4074 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
4075 }
4076
4077 if (! syn_allow) {
4078 c = PPEEK;
4079 if (c == ')' || c == '(' || c == '|') {
4080 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4081 }
4082 }
4083
4084 low = onig_scan_unsigned_number(&p, end, env->enc);
4085 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4086 if (low > ONIG_MAX_REPEAT_NUM)
4087 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4088
4089 if (p == *src) { /* can't read low */
4090 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4091 /* allow {,n} as {0,n} */
4092 low = 0;
4093 non_low = 1;
4094 }
4095 else
4096 goto invalid;
4097 }
4098
4099 if (PEND) goto invalid;
4100 PFETCH(c);
4101 if (c == ',') {
4102 UChar* prev = p;
4103 up = onig_scan_unsigned_number(&p, end, env->enc);
4104 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4105 if (up > ONIG_MAX_REPEAT_NUM)
4106 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4107
4108 if (p == prev) {
4109 if (non_low != 0)
4110 goto invalid;
4111 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
4112 }
4113 }
4114 else {
4115 if (non_low != 0)
4116 goto invalid;
4117
4118 PUNFETCH;
4119 up = low; /* {n} : exact n times */
4120 r = 2; /* fixed */
4121 }
4122
4123 if (PEND) goto invalid;
4124 PFETCH(c);
4125 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4126 if (c != MC_ESC(env->syntax)) goto invalid;
4127 PFETCH(c);
4128 }
4129 if (c != '}') goto invalid;
4130
4131 if (!IS_REPEAT_INFINITE(up) && low > up) {
4132 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4133 }
4134
4135 tok->type = TK_INTERVAL;
4136 tok->u.repeat.lower = low;
4137 tok->u.repeat.upper = up;
4138 *src = p;
4139 return r; /* 0: normal {n,m}, 2: fixed {n} */
4140
4141 invalid:
4142 if (syn_allow) {
4143 /* *src = p; */ /* !!! Don't do this line !!! */
4144 return 1; /* OK */
4145 }
4146 else
4147 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4148 }
4149
4150 /* \M-, \C-, \c, or \... */
4151 static int
4152 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
4153 {
4154 int v;
4155 OnigCodePoint c;
4156 OnigEncoding enc = env->enc;
4157 UChar* p = *src;
4158
4159 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4160
4161 PFETCH_S(c);
4162 switch (c) {
4163 case 'M':
4164 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4165 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4166 PFETCH_S(c);
4167 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4168 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4169 PFETCH_S(c);
4170 if (c == MC_ESC(env->syntax)) {
4171 v = fetch_escaped_value(&p, end, env, &c);
4172 if (v < 0) return v;
4173 }
4174 c = ((c & 0xff) | 0x80);
4175 }
4176 else
4177 goto backslash;
4178 break;
4179
4180 case 'C':
4181 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4182 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4183 PFETCH_S(c);
4184 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4185 goto control;
4186 }
4187 else
4188 goto backslash;
4189
4190 case 'c':
4191 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4192 control:
4193 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4194 PFETCH_S(c);
4195 if (c == '?') {
4196 c = 0177;
4197 }
4198 else {
4199 if (c == MC_ESC(env->syntax)) {
4200 v = fetch_escaped_value(&p, end, env, &c);
4201 if (v < 0) return v;
4202 }
4203 c &= 0x9f;
4204 }
4205 break;
4206 }
4207 /* fall through */
4208
4209 default:
4210 {
4211 backslash:
4212 c = conv_backslash_value(c, env);
4213 }
4214 break;
4215 }
4216
4217 *src = p;
4218 *val = c;
4219 return 0;
4220 }
4221
4222 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
4223
4224 static OnigCodePoint
4225 get_name_end_code_point(OnigCodePoint start)
4226 {
4227 switch (start) {
4228 case '<': return (OnigCodePoint )'>'; break;
4229 case '\'': return (OnigCodePoint )'\''; break;
4230 case '(': return (OnigCodePoint )')'; break;
4231 default:
4232 break;
4233 }
4234
4235 return (OnigCodePoint )0;
4236 }
4237
4238 enum REF_NUM {
4239 IS_NOT_NUM = 0,
4240 IS_ABS_NUM = 1,
4241 IS_REL_NUM = 2
4242 };
4243
4244 #ifdef USE_BACKREF_WITH_LEVEL
4245 /*
4246 \k<name+n>, \k<name-n>
4247 \k<num+n>, \k<num-n>
4248 \k<-num+n>, \k<-num-n>
4249 \k<+num+n>, \k<+num-n>
4250 */
4251 static int
4252 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4253 UChar** rname_end, ScanEnv* env,
4254 int* rback_num, int* rlevel, enum REF_NUM* num_type)
4255 {
4256 int r, sign, exist_level;
4257 int digit_count;
4258 OnigCodePoint end_code;
4259 OnigCodePoint c = 0;
4260 OnigEncoding enc = env->enc;
4261 UChar *name_end;
4262 UChar *pnum_head;
4263 UChar *p = *src;
4264 PFETCH_READY;
4265
4266 *rback_num = 0;
4267 exist_level = 0;
4268 *num_type = IS_NOT_NUM;
4269 sign = 1;
4270 pnum_head = *src;
4271
4272 end_code = get_name_end_code_point(start_code);
4273
4274 digit_count = 0;
4275 name_end = end;
4276 r = 0;
4277 if (PEND) {
4278 return ONIGERR_EMPTY_GROUP_NAME;
4279 }
4280 else {
4281 PFETCH(c);
4282 if (c == end_code)
4283 return ONIGERR_EMPTY_GROUP_NAME;
4284
4285 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4286 *num_type = IS_ABS_NUM;
4287 digit_count++;
4288 }
4289 else if (c == '-') {
4290 *num_type = IS_REL_NUM;
4291 sign = -1;
4292 pnum_head = p;
4293 }
4294 else if (c == '+') {
4295 *num_type = IS_REL_NUM;
4296 sign = 1;
4297 pnum_head = p;
4298 }
4299 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4300 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4301 }
4302 }
4303
4304 while (!PEND) {
4305 name_end = p;
4306 PFETCH(c);
4307 if (c == end_code || c == ')' || c == '+' || c == '-') {
4308 if (*num_type != IS_NOT_NUM && digit_count == 0)
4309 r = ONIGERR_INVALID_GROUP_NAME;
4310 break;
4311 }
4312
4313 if (*num_type != IS_NOT_NUM) {
4314 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4315 digit_count++;
4316 }
4317 else {
4318 r = ONIGERR_INVALID_GROUP_NAME;
4319 *num_type = IS_NOT_NUM;
4320 }
4321 }
4322 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4323 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4324 }
4325 }
4326
4327 if (r == 0 && c != end_code) {
4328 if (c == '+' || c == '-') {
4329 int level;
4330 int flag = (c == '-' ? -1 : 1);
4331
4332 if (PEND) {
4333 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4334 goto end;
4335 }
4336 PFETCH(c);
4337 if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4338 PUNFETCH;
4339 level = onig_scan_unsigned_number(&p, end, enc);
4340 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4341 *rlevel = (level * flag);
4342 exist_level = 1;
4343
4344 if (!PEND) {
4345 PFETCH(c);
4346 if (c == end_code)
4347 goto end;
4348 }
4349 }
4350
4351 err:
4352 name_end = end;
4353 err2:
4354 r = ONIGERR_INVALID_GROUP_NAME;
4355 }
4356
4357 end:
4358 if (r == 0) {
4359 if (*num_type != IS_NOT_NUM) {
4360 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
4361 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4362 else if (*rback_num == 0) {
4363 if (*num_type == IS_REL_NUM)
4364 goto err2;
4365 }
4366
4367 *rback_num *= sign;
4368 }
4369
4370 *rname_end = name_end;
4371 *src = p;
4372 return (exist_level ? 1 : 0);
4373 }
4374 else {
4375 onig_scan_env_set_error_string(env, r, *src, name_end);
4376 return r;
4377 }
4378 }
4379 #endif /* USE_BACKREF_WITH_LEVEL */
4380
4381 /*
4382 ref: 0 -> define name (don't allow number name)
4383 1 -> reference name (allow number name)
4384 */
4385 static int
4386 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4387 UChar** rname_end, ScanEnv* env, int* rback_num,
4388 enum REF_NUM* num_type, int ref)
4389 {
4390 int r, sign;
4391 int digit_count;
4392 OnigCodePoint end_code;
4393 OnigCodePoint c = 0;
4394 OnigEncoding enc = env->enc;
4395 UChar *name_end;
4396 UChar *pnum_head;
4397 UChar *p = *src;
4398
4399 *rback_num = 0;
4400
4401 end_code = get_name_end_code_point(start_code);
4402
4403 digit_count = 0;
4404 name_end = end;
4405 pnum_head = *src;
4406 r = 0;
4407 *num_type = IS_NOT_NUM;
4408 sign = 1;
4409 if (PEND) {
4410 return ONIGERR_EMPTY_GROUP_NAME;
4411 }
4412 else {
4413 PFETCH_S(c);
4414 if (c == end_code)
4415 return ONIGERR_EMPTY_GROUP_NAME;
4416
4417 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4418 if (ref == 1)
4419 *num_type = IS_ABS_NUM;
4420 else {
4421 r = ONIGERR_INVALID_GROUP_NAME;
4422 }
4423 digit_count++;
4424 }
4425 else if (c == '-') {
4426 if (ref == 1) {
4427 *num_type = IS_REL_NUM;
4428 sign = -1;
4429 pnum_head = p;
4430 }
4431 else {
4432 r = ONIGERR_INVALID_GROUP_NAME;
4433 }
4434 }
4435 else if (c == '+') {
4436 if (ref == 1) {
4437 *num_type = IS_REL_NUM;
4438 sign = 1;
4439 pnum_head = p;
4440 }
4441 else {
4442 r = ONIGERR_INVALID_GROUP_NAME;
4443 }
4444 }
4445 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4446 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4447 }
4448 }
4449
4450 if (r == 0) {
4451 while (!PEND) {
4452 name_end = p;
4453 PFETCH_S(c);
4454 if (c == end_code || c == ')') {
4455 if (*num_type != IS_NOT_NUM && digit_count == 0)
4456 r = ONIGERR_INVALID_GROUP_NAME;
4457 break;
4458 }
4459
4460 if (*num_type != IS_NOT_NUM) {
4461 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4462 digit_count++;
4463 }
4464 else {
4465 if (!ONIGENC_IS_CODE_WORD(enc, c))
4466 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4467 else
4468 r = ONIGERR_INVALID_GROUP_NAME;
4469
4470 *num_type = IS_NOT_NUM;
4471 }
4472 }
4473 else {
4474 if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4475 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4476 }
4477 }
4478 }
4479
4480 if (c != end_code) {
4481 r = ONIGERR_INVALID_GROUP_NAME;
4482 goto err;
4483 }
4484
4485 if (*num_type != IS_NOT_NUM) {
4486 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
4487 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4488 else if (*rback_num == 0) {
4489 if (*num_type == IS_REL_NUM) {
4490 r = ONIGERR_INVALID_GROUP_NAME;
4491 goto err;
4492 }
4493 }
4494
4495 *rback_num *= sign;
4496 }
4497
4498 *rname_end = name_end;
4499 *src = p;
4500 return 0;
4501 }
4502 else {
4503 while (!PEND) {
4504 name_end = p;
4505 PFETCH_S(c);
4506 if (c == end_code || c == ')')
4507 break;
4508 }
4509 if (PEND)
4510 name_end = end;
4511
4512 err:
4513 onig_scan_env_set_error_string(env, r, *src, name_end);
4514 return r;
4515 }
4516 }
4517
4518 static void
4519 CC_ESC_WARN(ScanEnv* env, UChar *c)
4520 {
4521 if (onig_warn == onig_null_warn) return ;
4522
4523 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
4524 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
4525 UChar buf[WARN_BUFSIZE];
4526 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
4527 env->pattern, env->pattern_end,
4528 (UChar* )"character class has '%s' without escape",
4529 c);
4530 (*onig_warn)((char* )buf);
4531 }
4532 }
4533
4534 static void
4535 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
4536 {
4537 if (onig_warn == onig_null_warn) return ;
4538
4539 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
4540 UChar buf[WARN_BUFSIZE];
4541 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
4542 (env)->pattern, (env)->pattern_end,
4543 (UChar* )"regular expression has '%s' without escape", c);
4544 (*onig_warn)((char* )buf);
4545 }
4546 }
4547
4548 static UChar*
4549 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
4550 UChar **next, OnigEncoding enc)
4551 {
4552 int i;
4553 OnigCodePoint x;
4554 UChar *q;
4555 UChar *p = from;
4556
4557 while (p < to) {
4558 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4559 q = p + enclen(enc, p);
4560 if (x == s[0]) {
4561 for (i = 1; i < n && q < to; i++) {
4562 x = ONIGENC_MBC_TO_CODE(enc, q, to);
4563 if (x != s[i]) break;
4564 q += enclen(enc, q);
4565 }
4566 if (i >= n) {
4567 if (IS_NOT_NULL(next))
4568 *next = q;
4569 return p;
4570 }
4571 }
4572 p = q;
4573 }
4574 return NULL_UCHARP;
4575 }
4576
4577 static int
4578 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
4579 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
4580 {
4581 int i, in_esc;
4582 OnigCodePoint x;
4583 UChar *q;
4584 UChar *p = from;
4585
4586 in_esc = 0;
4587 while (p < to) {
4588 if (in_esc) {
4589 in_esc = 0;
4590 p += enclen(enc, p);
4591 }
4592 else {
4593 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4594 q = p + enclen(enc, p);
4595 if (x == s[0]) {
4596 for (i = 1; i < n && q < to; i++) {
4597 x = ONIGENC_MBC_TO_CODE(enc, q, to);
4598 if (x != s[i]) break;
4599 q += enclen(enc, q);
4600 }
4601 if (i >= n) return 1;
4602 p += enclen(enc, p);
4603 }
4604 else {
4605 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4606 if (x == bad) return 0;
4607 else if (x == MC_ESC(syn)) in_esc = 1;
4608 p = q;
4609 }
4610 }
4611 }
4612 return 0;
4613 }
4614
4615 static int
4616 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
4617 {
4618 int num;
4619 OnigCodePoint c, c2;
4620 OnigSyntaxType* syn = env->syntax;
4621 OnigEncoding enc = env->enc;
4622 UChar* prev;
4623 UChar* p = *src;
4624 PFETCH_READY;
4625
4626 if (PEND) {
4627 tok->type = TK_EOT;
4628 return tok->type;
4629 }
4630
4631 PFETCH(c);
4632 tok->type = TK_CHAR;
4633 tok->base = 0;
4634 tok->u.c = c;
4635 tok->escaped = 0;
4636
4637 if (c == ']') {
4638 tok->type = TK_CC_CLOSE;
4639 }
4640 else if (c == '-') {
4641 tok->type = TK_CC_RANGE;
4642 }
4643 else if (c == MC_ESC(syn)) {
4644 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
4645 goto end;
4646
4647 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4648
4649 PFETCH(c);
4650 tok->escaped = 1;
4651 tok->u.c = c;
4652 switch (c) {
4653 case 'w':
4654 tok->type = TK_CHAR_TYPE;
4655 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4656 tok->u.prop.not = 0;
4657 break;
4658 case 'W':
4659 tok->type = TK_CHAR_TYPE;
4660 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4661 tok->u.prop.not = 1;
4662 break;
4663 case 'd':
4664 tok->type = TK_CHAR_TYPE;
4665 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
4666 tok->u.prop.not = 0;
4667 break;
4668 case 'D':
4669 tok->type = TK_CHAR_TYPE;
4670 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
4671 tok->u.prop.not = 1;
4672 break;
4673 case 's':
4674 tok->type = TK_CHAR_TYPE;
4675 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
4676 tok->u.prop.not = 0;
4677 break;
4678 case 'S':
4679 tok->type = TK_CHAR_TYPE;
4680 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
4681 tok->u.prop.not = 1;
4682 break;
4683 case 'h':
4684 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
4685 tok->type = TK_CHAR_TYPE;
4686 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
4687 tok->u.prop.not = 0;
4688 break;
4689 case 'H':
4690 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
4691 tok->type = TK_CHAR_TYPE;
4692 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
4693 tok->u.prop.not = 1;
4694 break;
4695
4696 case 'p':
4697 case 'P':
4698 if (PEND) break;
4699
4700 c2 = PPEEK;
4701 if (c2 == '{' &&
4702 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
4703 PINC;
4704 tok->type = TK_CHAR_PROPERTY;
4705 tok->u.prop.not = (c == 'P' ? 1 : 0);
4706
4707 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
4708 PFETCH(c2);
4709 if (c2 == '^') {
4710 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
4711 }
4712 else
4713 PUNFETCH;
4714 }
4715 }
4716 break;
4717
4718 case 'o':
4719 if (PEND) break;
4720
4721 prev = p;
4722 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
4723 PINC;
4724 num = scan_unsigned_octal_number(&p, end, 11, enc);
4725 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
4726 if (!PEND) {
4727 c2 = PPEEK;
4728 if (IS_CODE_DIGIT_ASCII(enc, c2))
4729 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
4730 }
4731
4732 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
4733 PINC;
4734 tok->type = TK_CODE_POINT;
4735 tok->base = 8;
4736 tok->u.code = (OnigCodePoint )num;
4737 }
4738 else {
4739 /* can't read nothing or invalid format */
4740 p = prev;
4741 }
4742 }
4743 break;
4744
4745 case 'x':
4746 if (PEND) break;
4747
4748 prev = p;
4749 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
4750 PINC;
4751 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
4752 if (num < 0) {
4753 if (num == ONIGERR_TOO_BIG_NUMBER)
4754 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
4755 else
4756 return num;
4757 }
4758 if (!PEND) {
4759 c2 = PPEEK;
4760 if (IS_CODE_XDIGIT_ASCII(enc, c2))
4761 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
4762 }
4763
4764 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
4765 PINC;
4766 tok->type = TK_CODE_POINT;
4767 tok->base = 16;
4768 tok->u.code = (OnigCodePoint )num;
4769 }
4770 else {
4771 /* can't read nothing or invalid format */
4772 p = prev;
4773 }
4774 }
4775 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
4776 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
4777 if (num < 0) return num;
4778 if (p == prev) { /* can't read nothing. */
4779 num = 0; /* but, it's not error */
4780 }
4781 tok->type = TK_RAW_BYTE;
4782 tok->base = 16;
4783 tok->u.c = num;
4784 }
4785 break;
4786
4787 case 'u':
4788 if (PEND) break;
4789
4790 prev = p;
4791 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
4792 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
4793 if (num < 0) return num;
4794 if (p == prev) { /* can't read nothing. */
4795 num = 0; /* but, it's not error */
4796 }
4797 tok->type = TK_CODE_POINT;
4798 tok->base = 16;
4799 tok->u.code = (OnigCodePoint )num;
4800 }
4801 break;
4802
4803 case '0':
4804 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
4805 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
4806 PUNFETCH;
4807 prev = p;
4808 num = scan_unsigned_octal_number(&p, end, 3, enc);
4809 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
4810 if (p == prev) { /* can't read nothing. */
4811 num = 0; /* but, it's not error */
4812 }
4813 tok->type = TK_RAW_BYTE;
4814 tok->base = 8;
4815 tok->u.c = num;
4816 }
4817 break;
4818
4819 default:
4820 PUNFETCH;
4821 num = fetch_escaped_value(&p, end, env, &c2);
4822 if (num < 0) return num;
4823 if (tok->u.c != c2) {
4824 tok->u.code = c2;
4825 tok->type = TK_CODE_POINT;
4826 }
4827 break;
4828 }
4829 }
4830 else if (c == '[') {
4831 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
4832 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
4833 tok->backp = p; /* point at '[' is read */
4834 PINC;
4835 if (str_exist_check_with_esc(send, 2, p, end,
4836 (OnigCodePoint )']', enc, syn)) {
4837 tok->type = TK_POSIX_BRACKET_OPEN;
4838 }
4839 else {
4840 PUNFETCH;
4841 goto cc_in_cc;
4842 }
4843 }
4844 else {
4845 cc_in_cc:
4846 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
4847 tok->type = TK_CC_CC_OPEN;
4848 }
4849 else {
4850 CC_ESC_WARN(env, (UChar* )"[");
4851 }
4852 }
4853 }
4854 else if (c == '&') {
4855 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
4856 !PEND && (PPEEK_IS('&'))) {
4857 PINC;
4858 tok->type = TK_CC_AND;
4859 }
4860 }
4861
4862 end:
4863 *src = p;
4864 return tok->type;
4865 }
4866
4867 static int
4868 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
4869 {
4870 int r, num;
4871 OnigCodePoint c;
4872 OnigEncoding enc = env->enc;
4873 OnigSyntaxType* syn = env->syntax;
4874 UChar* prev;
4875 UChar* p = *src;
4876 PFETCH_READY;
4877
4878 start:
4879 if (PEND) {
4880 tok->type = TK_EOT;
4881 return tok->type;
4882 }
4883
4884 tok->type = TK_STRING;
4885 tok->base = 0;
4886 tok->backp = p;
4887
4888 PFETCH(c);
4889 if (IS_MC_ESC_CODE(c, syn)) {
4890 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4891
4892 tok->backp = p;
4893 PFETCH(c);
4894
4895 tok->u.c = c;
4896 tok->escaped = 1;
4897 switch (c) {
4898 case '*':
4899 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
4900 tok->type = TK_OP_REPEAT;
4901 tok->u.repeat.lower = 0;
4902 tok->u.repeat.upper = REPEAT_INFINITE;
4903 goto greedy_check;
4904 break;
4905
4906 case '+':
4907 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
4908 tok->type = TK_OP_REPEAT;
4909 tok->u.repeat.lower = 1;
4910 tok->u.repeat.upper = REPEAT_INFINITE;
4911 goto greedy_check;
4912 break;
4913
4914 case '?':
4915 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
4916 tok->type = TK_OP_REPEAT;
4917 tok->u.repeat.lower = 0;
4918 tok->u.repeat.upper = 1;
4919 greedy_check:
4920 if (!PEND && PPEEK_IS('?') &&
4921 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
4922 PFETCH(c);
4923 tok->u.repeat.greedy = 0;
4924 tok->u.repeat.possessive = 0;
4925 }
4926 else {
4927 possessive_check:
4928 if (!PEND && PPEEK_IS('+') &&
4929 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
4930 tok->type != TK_INTERVAL) ||
4931 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
4932 tok->type == TK_INTERVAL))) {
4933 PFETCH(c);
4934 tok->u.repeat.greedy = 1;
4935 tok->u.repeat.possessive = 1;
4936 }
4937 else {
4938 tok->u.repeat.greedy = 1;
4939 tok->u.repeat.possessive = 0;
4940 }
4941 }
4942 break;
4943
4944 case '{':
4945 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
4946 r = fetch_range_quantifier(&p, end, tok, env);
4947 if (r < 0) return r; /* error */
4948 if (r == 0) goto greedy_check;
4949 else if (r == 2) { /* {n} */
4950 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
4951 goto possessive_check;
4952
4953 goto greedy_check;
4954 }
4955 /* r == 1 : normal char */
4956 break;
4957
4958 case '|':
4959 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
4960 tok->type = TK_ALT;
4961 break;
4962
4963 case '(':
4964 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
4965 tok->type = TK_SUBEXP_OPEN;
4966 break;
4967
4968 case ')':
4969 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
4970 tok->type = TK_SUBEXP_CLOSE;
4971 break;
4972
4973 case 'w':
4974 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
4975 tok->type = TK_CHAR_TYPE;
4976 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4977 tok->u.prop.not = 0;
4978 break;
4979
4980 case 'W':
4981 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
4982 tok->type = TK_CHAR_TYPE;
4983 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4984 tok->u.prop.not = 1;
4985 break;
4986
4987 case 'b':
4988 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
4989 tok->type = TK_ANCHOR;
4990 tok->u.anchor = ANCHOR_WORD_BOUNDARY;
4991 break;
4992
4993 case 'B':
4994 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
4995 tok->type = TK_ANCHOR;
4996 tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;
4997 break;
4998
4999 case 'y':
5000 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
5001 tok->type = TK_ANCHOR;
5002 tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
5003 break;
5004
5005 case 'Y':
5006 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
5007 tok->type = TK_ANCHOR;
5008 tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
5009 break;
5010
5011 #ifdef USE_WORD_BEGIN_END
5012 case '<':
5013 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5014 tok->type = TK_ANCHOR;
5015 tok->u.anchor = ANCHOR_WORD_BEGIN;
5016 break;
5017
5018 case '>':
5019 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5020 tok->type = TK_ANCHOR;
5021 tok->u.anchor = ANCHOR_WORD_END;
5022 break;
5023 #endif
5024
5025 case 's':
5026 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5027 tok->type = TK_CHAR_TYPE;
5028 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5029 tok->u.prop.not = 0;
5030 break;
5031
5032 case 'S':
5033 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5034 tok->type = TK_CHAR_TYPE;
5035 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5036 tok->u.prop.not = 1;
5037 break;
5038
5039 case 'd':
5040 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5041 tok->type = TK_CHAR_TYPE;
5042 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5043 tok->u.prop.not = 0;
5044 break;
5045
5046 case 'D':
5047 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5048 tok->type = TK_CHAR_TYPE;
5049 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5050 tok->u.prop.not = 1;
5051 break;
5052
5053 case 'h':
5054 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5055 tok->type = TK_CHAR_TYPE;
5056 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5057 tok->u.prop.not = 0;
5058 break;
5059
5060 case 'H':
5061 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5062 tok->type = TK_CHAR_TYPE;
5063 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5064 tok->u.prop.not = 1;
5065 break;
5066
5067 case 'K':
5068 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
5069 tok->type = TK_KEEP;
5070 break;
5071
5072 case 'R':
5073 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
5074 tok->type = TK_GENERAL_NEWLINE;
5075 break;
5076
5077 case 'N':
5078 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5079 tok->type = TK_NO_NEWLINE;
5080 break;
5081
5082 case 'O':
5083 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5084 tok->type = TK_TRUE_ANYCHAR;
5085 break;
5086
5087 case 'X':
5088 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
5089 tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;
5090 break;
5091
5092 case 'A':
5093 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5094 begin_buf:
5095 tok->type = TK_ANCHOR;
5096 tok->u.subtype = ANCHOR_BEGIN_BUF;
5097 break;
5098
5099 case 'Z':
5100 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5101 tok->type = TK_ANCHOR;
5102 tok->u.subtype = ANCHOR_SEMI_END_BUF;
5103 break;
5104
5105 case 'z':
5106 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5107 end_buf:
5108 tok->type = TK_ANCHOR;
5109 tok->u.subtype = ANCHOR_END_BUF;
5110 break;
5111
5112 case 'G':
5113 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
5114 tok->type = TK_ANCHOR;
5115 tok->u.subtype = ANCHOR_BEGIN_POSITION;
5116 break;
5117
5118 case '`':
5119 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5120 goto begin_buf;
5121 break;
5122
5123 case '\'':
5124 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5125 goto end_buf;
5126 break;
5127
5128 case 'o':
5129 if (PEND) break;
5130
5131 prev = p;
5132 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5133 PINC;
5134 num = scan_unsigned_octal_number(&p, end, 11, enc);
5135 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
5136 if (!PEND) {
5137 if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
5138 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5139 }
5140
5141 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
5142 PINC;
5143 tok->type = TK_CODE_POINT;
5144 tok->u.code = (OnigCodePoint )num;
5145 }
5146 else {
5147 /* can't read nothing or invalid format */
5148 p = prev;
5149 }
5150 }
5151 break;
5152
5153 case 'x':
5154 if (PEND) break;
5155
5156 prev = p;
5157 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5158 PINC;
5159 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
5160 if (num < 0) {
5161 if (num == ONIGERR_TOO_BIG_NUMBER)
5162 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
5163 else
5164 return num;
5165 }
5166 if (!PEND) {
5167 if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
5168 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5169 }
5170
5171 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
5172 PINC;
5173 tok->type = TK_CODE_POINT;
5174 tok->u.code = (OnigCodePoint )num;
5175 }
5176 else {
5177 /* can't read nothing or invalid format */
5178 p = prev;
5179 }
5180 }
5181 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5182 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
5183 if (num < 0) return num;
5184 if (p == prev) { /* can't read nothing. */
5185 num = 0; /* but, it's not error */
5186 }
5187 tok->type = TK_RAW_BYTE;
5188 tok->base = 16;
5189 tok->u.c = num;
5190 }
5191 break;
5192
5193 case 'u':
5194 if (PEND) break;
5195
5196 prev = p;
5197 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5198 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
5199 if (num < 0) return num;
5200 if (p == prev) { /* can't read nothing. */
5201 num = 0; /* but, it's not error */
5202 }
5203 tok->type = TK_CODE_POINT;
5204 tok->base = 16;
5205 tok->u.code = (OnigCodePoint )num;
5206 }
5207 break;
5208
5209 case '1': case '2': case '3': case '4':
5210 case '5': case '6': case '7': case '8': case '9':
5211 PUNFETCH;
5212 prev = p;
5213 num = onig_scan_unsigned_number(&p, end, enc);
5214 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
5215 goto skip_backref;
5216 }
5217
5218 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
5219 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
5220 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5221 if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))
5222 return ONIGERR_INVALID_BACKREF;
5223 }
5224
5225 tok->type = TK_BACKREF;
5226 tok->u.backref.num = 1;
5227 tok->u.backref.ref1 = num;
5228 tok->u.backref.by_name = 0;
5229 #ifdef USE_BACKREF_WITH_LEVEL
5230 tok->u.backref.exist_level = 0;
5231 #endif
5232 break;
5233 }
5234
5235 skip_backref:
5236 if (c == '8' || c == '9') {
5237 /* normal char */
5238 p = prev; PINC;
5239 break;
5240 }
5241
5242 p = prev;
5243 /* fall through */
5244 case '0':
5245 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5246 prev = p;
5247 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
5248 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
5249 if (p == prev) { /* can't read nothing. */
5250 num = 0; /* but, it's not error */
5251 }
5252 tok->type = TK_RAW_BYTE;
5253 tok->base = 8;
5254 tok->u.c = num;
5255 }
5256 else if (c != '0') {
5257 PINC;
5258 }
5259 break;
5260
5261 case 'k':
5262 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
5263 PFETCH(c);
5264 if (c == '<' || c == '\'') {
5265 UChar* name_end;
5266 int* backs;
5267 int back_num;
5268 enum REF_NUM num_type;
5269
5270 prev = p;
5271
5272 #ifdef USE_BACKREF_WITH_LEVEL
5273 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
5274 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
5275 env, &back_num, &tok->u.backref.level, &num_type);
5276 if (r == 1) tok->u.backref.exist_level = 1;
5277 else tok->u.backref.exist_level = 0;
5278 #else
5279 r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);
5280 #endif
5281 if (r < 0) return r;
5282
5283 if (num_type != IS_NOT_NUM) {
5284 if (num_type == IS_REL_NUM) {
5285 back_num = backref_rel_to_abs(back_num, env);
5286 }
5287 if (back_num <= 0)
5288 return ONIGERR_INVALID_BACKREF;
5289
5290 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5291 if (back_num > env->num_mem ||
5292 IS_NULL(SCANENV_MEMENV(env)[back_num].node))
5293 return ONIGERR_INVALID_BACKREF;
5294 }
5295 tok->type = TK_BACKREF;
5296 tok->u.backref.by_name = 0;
5297 tok->u.backref.num = 1;
5298 tok->u.backref.ref1 = back_num;
5299 }
5300 else {
5301 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
5302 if (num <= 0) {
5303 onig_scan_env_set_error_string(env,
5304 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
5305 return ONIGERR_UNDEFINED_NAME_REFERENCE;
5306 }
5307 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5308 int i;
5309 for (i = 0; i < num; i++) {
5310 if (backs[i] > env->num_mem ||
5311 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
5312 return ONIGERR_INVALID_BACKREF;
5313 }
5314 }
5315
5316 tok->type = TK_BACKREF;
5317 tok->u.backref.by_name = 1;
5318 if (num == 1) {
5319 tok->u.backref.num = 1;
5320 tok->u.backref.ref1 = backs[0];
5321 }
5322 else {
5323 tok->u.backref.num = num;
5324 tok->u.backref.refs = backs;
5325 }
5326 }
5327 }
5328 else
5329 PUNFETCH;
5330 }
5331 break;
5332
5333 #ifdef USE_CALL
5334 case 'g':
5335 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
5336 PFETCH(c);
5337 if (c == '<' || c == '\'') {
5338 int gnum;
5339 UChar* name_end;
5340 enum REF_NUM num_type;
5341
5342 prev = p;
5343 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
5344 &gnum, &num_type, 1);
5345 if (r < 0) return r;
5346
5347 if (num_type != IS_NOT_NUM) {
5348 if (num_type == IS_REL_NUM) {
5349 gnum = backref_rel_to_abs(gnum, env);
5350 if (gnum < 0) {
5351 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5352 prev, name_end);
5353 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5354 }
5355 }
5356 tok->u.call.by_number = 1;
5357 tok->u.call.gnum = gnum;
5358 }
5359 else {
5360 tok->u.call.by_number = 0;
5361 tok->u.call.gnum = 0;
5362 }
5363
5364 tok->type = TK_CALL;
5365 tok->u.call.name = prev;
5366 tok->u.call.name_end = name_end;
5367 }
5368 else
5369 PUNFETCH;
5370 }
5371 break;
5372 #endif
5373
5374 case 'Q':
5375 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
5376 tok->type = TK_QUOTE_OPEN;
5377 }
5378 break;
5379
5380 case 'p':
5381 case 'P':
5382 if (!PEND && PPEEK_IS('{') &&
5383 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5384 PINC;
5385 tok->type = TK_CHAR_PROPERTY;
5386 tok->u.prop.not = (c == 'P' ? 1 : 0);
5387
5388 if (!PEND &&
5389 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5390 PFETCH(c);
5391 if (c == '^') {
5392 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
5393 }
5394 else
5395 PUNFETCH;
5396 }
5397 }
5398 break;
5399
5400 default:
5401 {
5402 OnigCodePoint c2;
5403
5404 PUNFETCH;
5405 num = fetch_escaped_value(&p, end, env, &c2);
5406 if (num < 0) return num;
5407 /* set_raw: */
5408 if (tok->u.c != c2) {
5409 tok->type = TK_CODE_POINT;
5410 tok->u.code = c2;
5411 }
5412 else { /* string */
5413 p = tok->backp + enclen(enc, tok->backp);
5414 }
5415 }
5416 break;
5417 }
5418 }
5419 else {
5420 tok->u.c = c;
5421 tok->escaped = 0;
5422
5423 #ifdef USE_VARIABLE_META_CHARS
5424 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
5425 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
5426 if (c == MC_ANYCHAR(syn))
5427 goto any_char;
5428 else if (c == MC_ANYTIME(syn))
5429 goto anytime;
5430 else if (c == MC_ZERO_OR_ONE_TIME(syn))
5431 goto zero_or_one_time;
5432 else if (c == MC_ONE_OR_MORE_TIME(syn))
5433 goto one_or_more_time;
5434 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
5435 tok->type = TK_ANYCHAR_ANYTIME;
5436 goto out;
5437 }
5438 }
5439 #endif
5440
5441 switch (c) {
5442 case '.':
5443 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
5444 #ifdef USE_VARIABLE_META_CHARS
5445 any_char:
5446 #endif
5447 tok->type = TK_ANYCHAR;
5448 break;
5449
5450 case '*':
5451 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
5452 #ifdef USE_VARIABLE_META_CHARS
5453 anytime:
5454 #endif
5455 tok->type = TK_OP_REPEAT;
5456 tok->u.repeat.lower = 0;
5457 tok->u.repeat.upper = REPEAT_INFINITE;
5458 goto greedy_check;
5459 break;
5460
5461 case '+':
5462 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
5463 #ifdef USE_VARIABLE_META_CHARS
5464 one_or_more_time:
5465 #endif
5466 tok->type = TK_OP_REPEAT;
5467 tok->u.repeat.lower = 1;
5468 tok->u.repeat.upper = REPEAT_INFINITE;
5469 goto greedy_check;
5470 break;
5471
5472 case '?':
5473 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
5474 #ifdef USE_VARIABLE_META_CHARS
5475 zero_or_one_time:
5476 #endif
5477 tok->type = TK_OP_REPEAT;
5478 tok->u.repeat.lower = 0;
5479 tok->u.repeat.upper = 1;
5480 goto greedy_check;
5481 break;
5482
5483 case '{':
5484 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
5485 r = fetch_range_quantifier(&p, end, tok, env);
5486 if (r < 0) return r; /* error */
5487 if (r == 0) goto greedy_check;
5488 else if (r == 2) { /* {n} */
5489 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5490 goto possessive_check;
5491
5492 goto greedy_check;
5493 }
5494 /* r == 1 : normal char */
5495 break;
5496
5497 case '|':
5498 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
5499 tok->type = TK_ALT;
5500 break;
5501
5502 case '(':
5503 if (!PEND && PPEEK_IS('?') &&
5504 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
5505 PINC;
5506 if (! PEND) {
5507 c = PPEEK;
5508 if (c == '#') {
5509 PFETCH(c);
5510 while (1) {
5511 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
5512 PFETCH(c);
5513 if (c == MC_ESC(syn)) {
5514 if (! PEND) PFETCH(c);
5515 }
5516 else {
5517 if (c == ')') break;
5518 }
5519 }
5520 goto start;
5521 }
5522 else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {
5523 int gnum;
5524 UChar* name;
5525 UChar* name_end;
5526 enum REF_NUM num_type;
5527
5528 switch (c) {
5529 case '&':
5530 {
5531 PINC;
5532 name = p;
5533 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,
5534 &num_type, 0);
5535 if (r < 0) return r;
5536
5537 tok->type = TK_CALL;
5538 tok->u.call.by_number = 0;
5539 tok->u.call.gnum = 0;
5540 tok->u.call.name = name;
5541 tok->u.call.name_end = name_end;
5542 }
5543 break;
5544
5545 case 'R':
5546 tok->type = TK_CALL;
5547 tok->u.call.by_number = 1;
5548 tok->u.call.gnum = 0;
5549 tok->u.call.name = p;
5550 PINC;
5551 if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
5552 tok->u.call.name_end = p;
5553 break;
5554
5555 case '-':
5556 case '+':
5557 goto lparen_qmark_num;
5558 break;
5559 default:
5560 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;
5561
5562 lparen_qmark_num:
5563 {
5564 name = p;
5565 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
5566 &gnum, &num_type, 1);
5567 if (r < 0) return r;
5568
5569 if (num_type == IS_NOT_NUM) {
5570 return ONIGERR_INVALID_GROUP_NAME;
5571 }
5572 else {
5573 if (num_type == IS_REL_NUM) {
5574 gnum = backref_rel_to_abs(gnum, env);
5575 if (gnum < 0) {
5576 onig_scan_env_set_error_string(env,
5577 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
5578 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5579 }
5580 }
5581 tok->u.call.by_number = 1;
5582 tok->u.call.gnum = gnum;
5583 }
5584
5585 tok->type = TK_CALL;
5586 tok->u.call.name = name;
5587 tok->u.call.name_end = name_end;
5588 }
5589 break;
5590 }
5591 }
5592 }
5593 lparen_qmark_end:
5594 PUNFETCH;
5595 }
5596
5597 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
5598 tok->type = TK_SUBEXP_OPEN;
5599 break;
5600
5601 case ')':
5602 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
5603 tok->type = TK_SUBEXP_CLOSE;
5604 break;
5605
5606 case '^':
5607 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
5608 tok->type = TK_ANCHOR;
5609 tok->u.subtype = (IS_SINGLELINE(env->options)
5610 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
5611 break;
5612
5613 case '$':
5614 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
5615 tok->type = TK_ANCHOR;
5616 tok->u.subtype = (IS_SINGLELINE(env->options)
5617 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
5618 break;
5619
5620 case '[':
5621 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
5622 tok->type = TK_CC_OPEN;
5623 break;
5624
5625 case ']':
5626 if (*src > env->pattern) /* /].../ is allowed. */
5627 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
5628 break;
5629
5630 case '#':
5631 if (IS_EXTEND(env->options)) {
5632 while (!PEND) {
5633 PFETCH(c);
5634 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
5635 break;
5636 }
5637 goto start;
5638 break;
5639 }
5640 break;
5641
5642 case ' ': case '\t': case '\n': case '\r': case '\f':
5643 if (IS_EXTEND(env->options))
5644 goto start;
5645 break;
5646
5647 default:
5648 /* string */
5649 break;
5650 }
5651 }
5652
5653 #ifdef USE_VARIABLE_META_CHARS
5654 out:
5655 #endif
5656 *src = p;
5657 return tok->type;
5658 }
5659
5660 static int
5661 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
5662 OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
5663 const OnigCodePoint mbr[])
5664 {
5665 int i, r;
5666 OnigCodePoint j;
5667
5668 int n = ONIGENC_CODE_RANGE_NUM(mbr);
5669
5670 if (not == 0) {
5671 for (i = 0; i < n; i++) {
5672 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
5673 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
5674 if (j >= sb_out) {
5675 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5676 r = add_code_range_to_buf(&(cc->mbuf), j,
5677 ONIGENC_CODE_RANGE_TO(mbr, i));
5678 if (r != 0) return r;
5679 i++;
5680 }
5681
5682 goto sb_end;
5683 }
5684 BITSET_SET_BIT(cc->bs, j);
5685 }
5686 }
5687
5688 sb_end:
5689 for ( ; i < n; i++) {
5690 r = add_code_range_to_buf(&(cc->mbuf),
5691 ONIGENC_CODE_RANGE_FROM(mbr, i),
5692 ONIGENC_CODE_RANGE_TO(mbr, i));
5693 if (r != 0) return r;
5694 }
5695 }
5696 else {
5697 OnigCodePoint prev = 0;
5698
5699 for (i = 0; i < n; i++) {
5700 for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
5701 if (j >= sb_out) {
5702 goto sb_end2;
5703 }
5704 BITSET_SET_BIT(cc->bs, j);
5705 }
5706 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
5707 }
5708 for (j = prev; j < sb_out; j++) {
5709 BITSET_SET_BIT(cc->bs, j);
5710 }
5711
5712 sb_end2:
5713 prev = sb_out;
5714
5715 for (i = 0; i < n; i++) {
5716 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5717 r = add_code_range_to_buf(&(cc->mbuf), prev,
5718 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
5719 if (r != 0) return r;
5720 }
5721 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
5722 if (prev == 0) goto end;
5723 }
5724
5725 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
5726 if (r != 0) return r;
5727 }
5728
5729 end:
5730 return 0;
5731 }
5732
5733 static int
5734 add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
5735 OnigEncoding enc ARG_UNUSED,
5736 OnigCodePoint sb_out,
5737 const OnigCodePoint mbr[], OnigCodePoint limit)
5738 {
5739 int i, r;
5740 OnigCodePoint j;
5741 OnigCodePoint from;
5742 OnigCodePoint to;
5743
5744 int n = ONIGENC_CODE_RANGE_NUM(mbr);
5745
5746 if (not == 0) {
5747 for (i = 0; i < n; i++) {
5748 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
5749 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
5750 if (j > limit) goto end;
5751 if (j >= sb_out) {
5752 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5753 to = ONIGENC_CODE_RANGE_TO(mbr, i);
5754 if (to > limit) to = limit;
5755 r = add_code_range_to_buf(&(cc->mbuf), j, to);
5756 if (r != 0) return r;
5757 i++;
5758 }
5759
5760 goto sb_end;
5761 }
5762 BITSET_SET_BIT(cc->bs, j);
5763 }
5764 }
5765
5766 sb_end:
5767 for ( ; i < n; i++) {
5768 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5769 to = ONIGENC_CODE_RANGE_TO(mbr, i);
5770 if (from > limit) break;
5771 if (to > limit) to = limit;
5772 r = add_code_range_to_buf(&(cc->mbuf), from, to);
5773 if (r != 0) return r;
5774 }
5775 }
5776 else {
5777 OnigCodePoint prev = 0;
5778
5779 for (i = 0; i < n; i++) {
5780 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5781 if (from > limit) {
5782 for (j = prev; j < sb_out; j++) {
5783 BITSET_SET_BIT(cc->bs, j);
5784 }
5785 goto sb_end2;
5786 }
5787 for (j = prev; j < from; j++) {
5788 if (j >= sb_out) goto sb_end2;
5789 BITSET_SET_BIT(cc->bs, j);
5790 }
5791 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
5792 if (prev > limit) prev = limit;
5793 prev++;
5794 if (prev == 0) goto end;
5795 }
5796 for (j = prev; j < sb_out; j++) {
5797 BITSET_SET_BIT(cc->bs, j);
5798 }
5799
5800 sb_end2:
5801 prev = sb_out;
5802
5803 for (i = 0; i < n; i++) {
5804 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5805 if (from > limit) goto last;
5806
5807 if (prev < from) {
5808 r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
5809 if (r != 0) return r;
5810 }
5811 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
5812 if (prev > limit) prev = limit;
5813 prev++;
5814 if (prev == 0) goto end;
5815 }
5816
5817 last:
5818 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
5819 if (r != 0) return r;
5820 }
5821
5822 end:
5823 return 0;
5824 }
5825
5826 static int
5827 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
5828 {
5829 #define ASCII_LIMIT 127
5830
5831 int c, r;
5832 int ascii_mode;
5833 const OnigCodePoint *ranges;
5834 OnigCodePoint limit;
5835 OnigCodePoint sb_out;
5836 OnigEncoding enc = env->enc;
5837
5838 ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);
5839
5840 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
5841 if (r == 0) {
5842 if (ascii_mode == 0)
5843 r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
5844 else
5845 r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
5846 ranges, ASCII_LIMIT);
5847 return r;
5848 }
5849 else if (r != ONIG_NO_SUPPORT_CONFIG) {
5850 return r;
5851 }
5852
5853 r = 0;
5854 limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
5855
5856 switch (ctype) {
5857 case ONIGENC_CTYPE_ALPHA:
5858 case ONIGENC_CTYPE_BLANK:
5859 case ONIGENC_CTYPE_CNTRL:
5860 case ONIGENC_CTYPE_DIGIT:
5861 case ONIGENC_CTYPE_LOWER:
5862 case ONIGENC_CTYPE_PUNCT:
5863 case ONIGENC_CTYPE_SPACE:
5864 case ONIGENC_CTYPE_UPPER:
5865 case ONIGENC_CTYPE_XDIGIT:
5866 case ONIGENC_CTYPE_ASCII:
5867 case ONIGENC_CTYPE_ALNUM:
5868 if (not != 0) {
5869 for (c = 0; c < (int )limit; c++) {
5870 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5871 BITSET_SET_BIT(cc->bs, c);
5872 }
5873 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
5874 BITSET_SET_BIT(cc->bs, c);
5875 }
5876
5877 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
5878 }
5879 else {
5880 for (c = 0; c < (int )limit; c++) {
5881 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5882 BITSET_SET_BIT(cc->bs, c);
5883 }
5884 }
5885 break;
5886
5887 case ONIGENC_CTYPE_GRAPH:
5888 case ONIGENC_CTYPE_PRINT:
5889 case ONIGENC_CTYPE_WORD:
5890 if (not != 0) {
5891 for (c = 0; c < (int )limit; c++) {
5892 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
5893 && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5894 BITSET_SET_BIT(cc->bs, c);
5895 }
5896 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
5897 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
5898 BITSET_SET_BIT(cc->bs, c);
5899 }
5900 }
5901 else {
5902 for (c = 0; c < (int )limit; c++) {
5903 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5904 BITSET_SET_BIT(cc->bs, c);
5905 }
5906 if (ascii_mode == 0)
5907 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
5908 }
5909 break;
5910
5911 default:
5912 return ONIGERR_PARSER_BUG;
5913 break;
5914 }
5915
5916 return r;
5917 }
5918
5919 static int
5920 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
5921 {
5922 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
5923 #define POSIX_BRACKET_NAME_MIN_LEN 4
5924
5925 static PosixBracketEntryType PBS[] = {
5926 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
5927 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
5928 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
5929 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
5930 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
5931 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
5932 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
5933 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
5934 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
5935 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
5936 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
5937 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
5938 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
5939 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
5940 { (UChar* )NULL, -1, 0 }
5941 };
5942
5943 PosixBracketEntryType *pb;
5944 int not, i, r;
5945 OnigCodePoint c;
5946 OnigEncoding enc = env->enc;
5947 UChar *p = *src;
5948
5949 if (PPEEK_IS('^')) {
5950 PINC_S;
5951 not = 1;
5952 }
5953 else
5954 not = 0;
5955
5956 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
5957 goto not_posix_bracket;
5958
5959 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
5960 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
5961 p = (UChar* )onigenc_step(enc, p, end, pb->len);
5962 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
5963 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
5964
5965 r = add_ctype_to_cc(cc, pb->ctype, not, env);
5966 if (r != 0) return r;
5967
5968 PINC_S; PINC_S;
5969 *src = p;
5970 return 0;
5971 }
5972 }
5973
5974 not_posix_bracket:
5975 c = 0;
5976 i = 0;
5977 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
5978 PINC_S;
5979 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
5980 }
5981 if (c == ':' && ! PEND) {
5982 PINC_S;
5983 if (! PEND) {
5984 PFETCH_S(c);
5985 if (c == ']')
5986 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
5987 }
5988 }
5989
5990 return 1; /* 1: is not POSIX bracket, but no error. */
5991 }
5992
5993 static int
5994 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
5995 {
5996 int r;
5997 OnigCodePoint c;
5998 OnigEncoding enc = env->enc;
5999 UChar *prev, *start, *p = *src;
6000
6001 r = 0;
6002 start = prev = p;
6003
6004 while (!PEND) {
6005 prev = p;
6006 PFETCH_S(c);
6007 if (c == '}') {
6008 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
6009 if (r < 0) break;
6010
6011 *src = p;
6012 return r;
6013 }
6014 else if (c == '(' || c == ')' || c == '{' || c == '|') {
6015 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
6016 break;
6017 }
6018 }
6019
6020 onig_scan_env_set_error_string(env, r, *src, prev);
6021 return r;
6022 }
6023
6024 static int
6025 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
6026 {
6027 int r, ctype;
6028 CClassNode* cc;
6029
6030 ctype = fetch_char_property_to_ctype(src, end, env);
6031 if (ctype < 0) return ctype;
6032
6033 *np = node_new_cclass();
6034 CHECK_NULL_RETURN_MEMERR(*np);
6035 cc = CCLASS_(*np);
6036 r = add_ctype_to_cc(cc, ctype, 0, env);
6037 if (r != 0) return r;
6038 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6039
6040 return 0;
6041 }
6042
6043
6044 enum CCSTATE {
6045 CCS_VALUE,
6046 CCS_RANGE,
6047 CCS_COMPLETE,
6048 CCS_START
6049 };
6050
6051 enum CCVALTYPE {
6052 CCV_SB,
6053 CCV_CODE_POINT,
6054 CCV_CLASS
6055 };
6056
6057 static int
6058 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
6059 enum CCSTATE* state, ScanEnv* env)
6060 {
6061 int r;
6062
6063 if (*state == CCS_RANGE)
6064 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
6065
6066 if (*state == CCS_VALUE && *type != CCV_CLASS) {
6067 if (*type == CCV_SB)
6068 BITSET_SET_BIT(cc->bs, (int )(*vs));
6069 else if (*type == CCV_CODE_POINT) {
6070 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
6071 if (r < 0) return r;
6072 }
6073 }
6074
6075 *state = CCS_VALUE;
6076 *type = CCV_CLASS;
6077 return 0;
6078 }
6079
6080 static int
6081 next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
6082 int* from_israw, int to_israw,
6083 enum CCVALTYPE intype, enum CCVALTYPE* type,
6084 enum CCSTATE* state, ScanEnv* env)
6085 {
6086 int r;
6087
6088 switch (*state) {
6089 case CCS_VALUE:
6090 if (*type == CCV_SB) {
6091 if (*from > 0xff)
6092 return ONIGERR_INVALID_CODE_POINT_VALUE;
6093
6094 BITSET_SET_BIT(cc->bs, (int )(*from));
6095 }
6096 else if (*type == CCV_CODE_POINT) {
6097 r = add_code_range(&(cc->mbuf), env, *from, *from);
6098 if (r < 0) return r;
6099 }
6100 break;
6101
6102 case CCS_RANGE:
6103 if (intype == *type) {
6104 if (intype == CCV_SB) {
6105 if (*from > 0xff || to > 0xff)
6106 return ONIGERR_INVALID_CODE_POINT_VALUE;
6107
6108 if (*from > to) {
6109 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6110 goto ccs_range_end;
6111 else
6112 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6113 }
6114 bitset_set_range(cc->bs, (int )*from, (int )to);
6115 }
6116 else {
6117 r = add_code_range(&(cc->mbuf), env, *from, to);
6118 if (r < 0) return r;
6119 }
6120 }
6121 else {
6122 if (*from > to) {
6123 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6124 goto ccs_range_end;
6125 else
6126 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6127 }
6128 bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
6129 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
6130 if (r < 0) return r;
6131 }
6132 ccs_range_end:
6133 *state = CCS_COMPLETE;
6134 break;
6135
6136 case CCS_COMPLETE:
6137 case CCS_START:
6138 *state = CCS_VALUE;
6139 break;
6140
6141 default:
6142 break;
6143 }
6144
6145 *from_israw = to_israw;
6146 *from = to;
6147 *type = intype;
6148 return 0;
6149 }
6150
6151 static int
6152 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
6153 ScanEnv* env)
6154 {
6155 int in_esc;
6156 OnigCodePoint code;
6157 OnigEncoding enc = env->enc;
6158 UChar* p = from;
6159
6160 in_esc = 0;
6161 while (! PEND) {
6162 if (ignore_escaped && in_esc) {
6163 in_esc = 0;
6164 }
6165 else {
6166 PFETCH_S(code);
6167 if (code == c) return 1;
6168 if (code == MC_ESC(env->syntax)) in_esc = 1;
6169 }
6170 }
6171 return 0;
6172 }
6173
6174 static int
6175 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
6176 {
6177 int r, neg, len, fetched, and_start;
6178 OnigCodePoint v, vs;
6179 UChar *p;
6180 Node* node;
6181 CClassNode *cc, *prev_cc;
6182 CClassNode work_cc;
6183
6184 enum CCSTATE state;
6185 enum CCVALTYPE val_type, in_type;
6186 int val_israw, in_israw;
6187
6188 *np = NULL_NODE;
6189 env->parse_depth++;
6190 if (env->parse_depth > ParseDepthLimit)
6191 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
6192 prev_cc = (CClassNode* )NULL;
6193 r = fetch_token_in_cc(tok, src, end, env);
6194 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
6195 neg = 1;
6196 r = fetch_token_in_cc(tok, src, end, env);
6197 }
6198 else {
6199 neg = 0;
6200 }
6201
6202 if (r < 0) return r;
6203 if (r == TK_CC_CLOSE) {
6204 if (! code_exist_check((OnigCodePoint )']',
6205 *src, env->pattern_end, 1, env))
6206 return ONIGERR_EMPTY_CHAR_CLASS;
6207
6208 CC_ESC_WARN(env, (UChar* )"]");
6209 r = tok->type = TK_CHAR; /* allow []...] */
6210 }
6211
6212 *np = node = node_new_cclass();
6213 CHECK_NULL_RETURN_MEMERR(node);
6214 cc = CCLASS_(node);
6215
6216 and_start = 0;
6217 state = CCS_START;
6218 p = *src;
6219 while (r != TK_CC_CLOSE) {
6220 fetched = 0;
6221 switch (r) {
6222 case TK_CHAR:
6223 any_char_in:
6224 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
6225 if (len > 1) {
6226 in_type = CCV_CODE_POINT;
6227 }
6228 else if (len < 0) {
6229 r = len;
6230 goto err;
6231 }
6232 else {
6233 /* sb_char: */
6234 in_type = CCV_SB;
6235 }
6236 v = (OnigCodePoint )tok->u.c;
6237 in_israw = 0;
6238 goto val_entry2;
6239 break;
6240
6241 case TK_RAW_BYTE:
6242 /* tok->base != 0 : octal or hexadec. */
6243 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
6244 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
6245 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
6246 UChar* psave = p;
6247 int i, base = tok->base;
6248
6249 buf[0] = tok->u.c;
6250 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
6251 r = fetch_token_in_cc(tok, &p, end, env);
6252 if (r < 0) goto err;
6253 if (r != TK_RAW_BYTE || tok->base != base) {
6254 fetched = 1;
6255 break;
6256 }
6257 buf[i] = tok->u.c;
6258 }
6259
6260 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
6261 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6262 goto err;
6263 }
6264
6265 len = enclen(env->enc, buf);
6266 if (i < len) {
6267 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6268 goto err;
6269 }
6270 else if (i > len) { /* fetch back */
6271 p = psave;
6272 for (i = 1; i < len; i++) {
6273 r = fetch_token_in_cc(tok, &p, end, env);
6274 }
6275 fetched = 0;
6276 }
6277
6278 if (i == 1) {
6279 v = (OnigCodePoint )buf[0];
6280 goto raw_single;
6281 }
6282 else {
6283 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
6284 in_type = CCV_CODE_POINT;
6285 }
6286 }
6287 else {
6288 v = (OnigCodePoint )tok->u.c;
6289 raw_single:
6290 in_type = CCV_SB;
6291 }
6292 in_israw = 1;
6293 goto val_entry2;
6294 break;
6295
6296 case TK_CODE_POINT:
6297 v = tok->u.code;
6298 in_israw = 1;
6299 val_entry:
6300 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
6301 if (len < 0) {
6302 r = len;
6303 goto err;
6304 }
6305 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
6306 val_entry2:
6307 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
6308 &state, env);
6309 if (r != 0) goto err;
6310 break;
6311
6312 case TK_POSIX_BRACKET_OPEN:
6313 r = parse_posix_bracket(cc, &p, end, env);
6314 if (r < 0) goto err;
6315 if (r == 1) { /* is not POSIX bracket */
6316 CC_ESC_WARN(env, (UChar* )"[");
6317 p = tok->backp;
6318 v = (OnigCodePoint )tok->u.c;
6319 in_israw = 0;
6320 goto val_entry;
6321 }
6322 goto next_class;
6323 break;
6324
6325 case TK_CHAR_TYPE:
6326 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
6327 if (r != 0) goto err;
6328
6329 next_class:
6330 r = next_state_class(cc, &vs, &val_type, &state, env);
6331 if (r != 0) goto err;
6332 break;
6333
6334 case TK_CHAR_PROPERTY:
6335 {
6336 int ctype = fetch_char_property_to_ctype(&p, end, env);
6337 if (ctype < 0) {
6338 r = ctype;
6339 goto err;
6340 }
6341 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
6342 if (r != 0) goto err;
6343 goto next_class;
6344 }
6345 break;
6346
6347 case TK_CC_RANGE:
6348 if (state == CCS_VALUE) {
6349 r = fetch_token_in_cc(tok, &p, end, env);
6350 if (r < 0) goto err;
6351 fetched = 1;
6352 if (r == TK_CC_CLOSE) { /* allow [x-] */
6353 range_end_val:
6354 v = (OnigCodePoint )'-';
6355 in_israw = 0;
6356 goto val_entry;
6357 }
6358 else if (r == TK_CC_AND) {
6359 CC_ESC_WARN(env, (UChar* )"-");
6360 goto range_end_val;
6361 }
6362
6363 if (val_type == CCV_CLASS) {
6364 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6365 goto err;
6366 }
6367
6368 state = CCS_RANGE;
6369 }
6370 else if (state == CCS_START) {
6371 /* [-xa] is allowed */
6372 v = (OnigCodePoint )tok->u.c;
6373 in_israw = 0;
6374
6375 r = fetch_token_in_cc(tok, &p, end, env);
6376 if (r < 0) goto err;
6377 fetched = 1;
6378 /* [--x] or [a&&-x] is warned. */
6379 if (r == TK_CC_RANGE || and_start != 0)
6380 CC_ESC_WARN(env, (UChar* )"-");
6381
6382 goto val_entry;
6383 }
6384 else if (state == CCS_RANGE) {
6385 CC_ESC_WARN(env, (UChar* )"-");
6386 goto any_char_in; /* [!--x] is allowed */
6387 }
6388 else { /* CCS_COMPLETE */
6389 r = fetch_token_in_cc(tok, &p, end, env);
6390 if (r < 0) goto err;
6391 fetched = 1;
6392 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
6393 else if (r == TK_CC_AND) {
6394 CC_ESC_WARN(env, (UChar* )"-");
6395 goto range_end_val;
6396 }
6397
6398 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
6399 CC_ESC_WARN(env, (UChar* )"-");
6400 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
6401 }
6402 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6403 goto err;
6404 }
6405 break;
6406
6407 case TK_CC_CC_OPEN: /* [ */
6408 {
6409 Node *anode;
6410 CClassNode* acc;
6411
6412 r = parse_char_class(&anode, tok, &p, end, env);
6413 if (r != 0) {
6414 onig_node_free(anode);
6415 goto cc_open_err;
6416 }
6417 acc = CCLASS_(anode);
6418 r = or_cclass(cc, acc, env->enc);
6419 onig_node_free(anode);
6420
6421 cc_open_err:
6422 if (r != 0) goto err;
6423 }
6424 break;
6425
6426 case TK_CC_AND: /* && */
6427 {
6428 if (state == CCS_VALUE) {
6429 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
6430 &val_type, &state, env);
6431 if (r != 0) goto err;
6432 }
6433 /* initialize local variables */
6434 and_start = 1;
6435 state = CCS_START;
6436
6437 if (IS_NOT_NULL(prev_cc)) {
6438 r = and_cclass(prev_cc, cc, env->enc);
6439 if (r != 0) goto err;
6440 bbuf_free(cc->mbuf);
6441 }
6442 else {
6443 prev_cc = cc;
6444 cc = &work_cc;
6445 }
6446 initialize_cclass(cc);
6447 }
6448 break;
6449
6450 case TK_EOT:
6451 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
6452 goto err;
6453 break;
6454 default:
6455 r = ONIGERR_PARSER_BUG;
6456 goto err;
6457 break;
6458 }
6459
6460 if (fetched)
6461 r = tok->type;
6462 else {
6463 r = fetch_token_in_cc(tok, &p, end, env);
6464 if (r < 0) goto err;
6465 }
6466 }
6467
6468 if (state == CCS_VALUE) {
6469 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
6470 &val_type, &state, env);
6471 if (r != 0) goto err;
6472 }
6473
6474 if (IS_NOT_NULL(prev_cc)) {
6475 r = and_cclass(prev_cc, cc, env->enc);
6476 if (r != 0) goto err;
6477 bbuf_free(cc->mbuf);
6478 cc = prev_cc;
6479 }
6480
6481 if (neg != 0)
6482 NCCLASS_SET_NOT(cc);
6483 else
6484 NCCLASS_CLEAR_NOT(cc);
6485 if (IS_NCCLASS_NOT(cc) &&
6486 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
6487 int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
6488 if (is_empty != 0)
6489 BITSET_IS_EMPTY(cc->bs, is_empty);
6490
6491 if (is_empty == 0) {
6492 #define NEWLINE_CODE 0x0a
6493
6494 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
6495 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
6496 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
6497 else
6498 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
6499 }
6500 }
6501 }
6502 *src = p;
6503 env->parse_depth--;
6504 return 0;
6505
6506 err:
6507 if (cc != CCLASS_(*np))
6508 bbuf_free(cc->mbuf);
6509 return r;
6510 }
6511
6512 static int parse_subexp(Node** top, OnigToken* tok, int term,
6513 UChar** src, UChar* end, ScanEnv* env);
6514
6515 #ifdef USE_CALLOUT
6516
6517 /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
6518 static int
6519 parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
6520 {
6521 int r;
6522 int i;
6523 int in;
6524 int num;
6525 OnigCodePoint c;
6526 UChar* code_start;
6527 UChar* code_end;
6528 UChar* contents;
6529 UChar* tag_start;
6530 UChar* tag_end;
6531 int brace_nest;
6532 CalloutListEntry* e;
6533 RegexExt* ext;
6534 OnigEncoding enc = env->enc;
6535 UChar* p = *src;
6536
6537 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6538
6539 brace_nest = 0;
6540 while (PPEEK_IS('{')) {
6541 brace_nest++;
6542 PINC_S;
6543 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6544 }
6545
6546 in = ONIG_CALLOUT_IN_PROGRESS;
6547 code_start = p;
6548 while (1) {
6549 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6550
6551 code_end = p;
6552 PFETCH_S(c);
6553 if (c == '}') {
6554 i = brace_nest;
6555 while (i > 0) {
6556 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6557 PFETCH_S(c);
6558 if (c == '}') i--;
6559 else break;
6560 }
6561 if (i == 0) break;
6562 }
6563 }
6564
6565 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6566
6567 PFETCH_S(c);
6568 if (c == '[') {
6569 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6570 tag_start = p;
6571 while (! PEND) {
6572 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6573 tag_end = p;
6574 PFETCH_S(c);
6575 if (c == ']') break;
6576 }
6577 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
6578 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6579
6580 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6581 PFETCH_S(c);
6582 }
6583 else {
6584 tag_start = tag_end = 0;
6585 }
6586
6587 if (c == 'X') {
6588 in |= ONIG_CALLOUT_IN_RETRACTION;
6589 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6590 PFETCH_S(c);
6591 }
6592 else if (c == '<') {
6593 in = ONIG_CALLOUT_IN_RETRACTION;
6594 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6595 PFETCH_S(c);
6596 }
6597 else if (c == '>') { /* no needs (default) */
6598 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6599 PFETCH_S(c);
6600 }
6601
6602 if (c != cterm)
6603 return ONIGERR_INVALID_CALLOUT_PATTERN;
6604
6605 r = reg_callout_list_entry(env, &num);
6606 if (r != 0) return r;
6607
6608 ext = onig_get_regex_ext(env->reg);
6609 if (IS_NULL(ext->pattern)) {
6610 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
6611 if (r != ONIG_NORMAL) return r;
6612 }
6613
6614 if (tag_start != tag_end) {
6615 r = callout_tag_entry(env->reg, tag_start, tag_end, num);
6616 if (r != ONIG_NORMAL) return r;
6617 }
6618
6619 contents = onigenc_strdup(enc, code_start, code_end);
6620 CHECK_NULL_RETURN_MEMERR(contents);
6621
6622 r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
6623 if (r != 0) {
6624 xfree(contents);
6625 return r;
6626 }
6627
6628 e = onig_reg_callout_list_at(env->reg, num);
6629 e->of = ONIG_CALLOUT_OF_CONTENTS;
6630 e->in = in;
6631 e->name_id = ONIG_NON_NAME_ID;
6632 e->u.content.start = contents;
6633 e->u.content.end = contents + (code_end - code_start);
6634
6635 *src = p;
6636 return 0;
6637 }
6638
6639 static long
6640 parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
6641 {
6642 long v;
6643 long d;
6644 int flag;
6645 UChar* p;
6646 OnigCodePoint c;
6647
6648 if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
6649
6650 flag = 1;
6651 v = 0;
6652 p = s;
6653 while (p < end) {
6654 c = ONIGENC_MBC_TO_CODE(enc, p, end);
6655 p += ONIGENC_MBC_ENC_LEN(enc, p);
6656 if (c >= '0' && c <= '9') {
6657 d = (long )(c - '0');
6658 if (v > (max - d) / 10)
6659 return ONIGERR_INVALID_CALLOUT_ARG;
6660
6661 v = v * 10 + d;
6662 }
6663 else if (sign_on != 0 && (c == '-' || c == '+')) {
6664 if (c == '-') flag = -1;
6665 }
6666 else
6667 return ONIGERR_INVALID_CALLOUT_ARG;
6668
6669 sign_on = 0;
6670 }
6671
6672 *rl = flag * v;
6673 return ONIG_NORMAL;
6674 }
6675
6676 static int
6677 parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
6678 unsigned int types[], OnigValue vals[], ScanEnv* env)
6679 {
6680 #define MAX_CALLOUT_ARG_BYTE_LENGTH 128
6681
6682 int r;
6683 int n;
6684 int esc;
6685 int cn;
6686 UChar* s;
6687 UChar* e;
6688 UChar* eesc;
6689 OnigCodePoint c;
6690 UChar* bufend;
6691 UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
6692 OnigEncoding enc = env->enc;
6693 UChar* p = *src;
6694
6695 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6696
6697 n = 0;
6698 while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
6699 c = 0;
6700 cn = 0;
6701 esc = 0;
6702 eesc = 0;
6703 bufend = buf;
6704 s = e = p;
6705 while (1) {
6706 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6707
6708 e = p;
6709 PFETCH_S(c);
6710 if (esc != 0) {
6711 esc = 0;
6712 if (c == '\\' || c == cterm || c == ',') {
6713 /* */
6714 }
6715 else {
6716 e = eesc;
6717 cn++;
6718 }
6719 goto add_char;
6720 }
6721 else {
6722 if (c == '\\') {
6723 esc = 1;
6724 eesc = e;
6725 }
6726 else if (c == cterm || c == ',')
6727 break;
6728 else {
6729 size_t clen;
6730
6731 add_char:
6732 if (skip_mode == 0) {
6733 clen = p - e;
6734 if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
6735 return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
6736
6737 xmemcpy(bufend, e, clen);
6738 bufend += clen;
6739 }
6740 cn++;
6741 }
6742 }
6743 }
6744
6745 if (cn != 0) {
6746 if (skip_mode == 0) {
6747 if ((types[n] & ONIG_TYPE_LONG) != 0) {
6748 int fixed = 0;
6749 if (cn > 0) {
6750 long rl;
6751 r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);
6752 if (r == ONIG_NORMAL) {
6753 vals[n].l = rl;
6754 fixed = 1;
6755 types[n] = ONIG_TYPE_LONG;
6756 }
6757 }
6758
6759 if (fixed == 0) {
6760 types[n] = (types[n] & ~ONIG_TYPE_LONG);
6761 if (types[n] == ONIG_TYPE_VOID)
6762 return ONIGERR_INVALID_CALLOUT_ARG;
6763 }
6764 }
6765
6766 switch (types[n]) {
6767 case ONIG_TYPE_LONG:
6768 break;
6769
6770 case ONIG_TYPE_CHAR:
6771 if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;
6772 vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
6773 break;
6774
6775 case ONIG_TYPE_STRING:
6776 {
6777 UChar* rs = onigenc_strdup(enc, buf, bufend);
6778 CHECK_NULL_RETURN_MEMERR(rs);
6779 vals[n].s.start = rs;
6780 vals[n].s.end = rs + (e - s);
6781 }
6782 break;
6783
6784 case ONIG_TYPE_TAG:
6785 if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))
6786 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6787
6788 vals[n].s.start = s;
6789 vals[n].s.end = e;
6790 break;
6791
6792 case ONIG_TYPE_VOID:
6793 case ONIG_TYPE_POINTER:
6794 return ONIGERR_PARSER_BUG;
6795 break;
6796 }
6797 }
6798
6799 n++;
6800 }
6801
6802 if (c == cterm) break;
6803 }
6804
6805 if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;
6806
6807 *src = p;
6808 return n;
6809 }
6810
6811 /* (*name[TAG]) (*name[TAG]{a,b,..}) */
6812 static int
6813 parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
6814 {
6815 int r;
6816 int i;
6817 int in;
6818 int num;
6819 int name_id;
6820 int arg_num;
6821 int max_arg_num;
6822 int opt_arg_num;
6823 int is_not_single;
6824 OnigCodePoint c;
6825 UChar* name_start;
6826 UChar* name_end;
6827 UChar* tag_start;
6828 UChar* tag_end;
6829 Node* node;
6830 CalloutListEntry* e;
6831 RegexExt* ext;
6832 unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
6833 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];
6834 OnigEncoding enc = env->enc;
6835 UChar* p = *src;
6836
6837 /* PFETCH_READY; */
6838 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6839
6840 node = 0;
6841 name_start = p;
6842 while (1) {
6843 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6844 name_end = p;
6845 PFETCH_S(c);
6846 if (c == cterm || c == '[' || c == '{') break;
6847 }
6848
6849 if (! is_allowed_callout_name(enc, name_start, name_end))
6850 return ONIGERR_INVALID_CALLOUT_NAME;
6851
6852 if (c == '[') {
6853 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6854 tag_start = p;
6855 while (! PEND) {
6856 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6857 tag_end = p;
6858 PFETCH_S(c);
6859 if (c == ']') break;
6860 }
6861 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
6862 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6863
6864 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6865 PFETCH_S(c);
6866 }
6867 else {
6868 tag_start = tag_end = 0;
6869 }
6870
6871 if (c == '{') {
6872 UChar* save;
6873
6874 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6875
6876 /* read for single check only */
6877 save = p;
6878 arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);
6879 if (arg_num < 0) return arg_num;
6880
6881 is_not_single = PPEEK_IS(cterm) ? 0 : 1;
6882 p = save;
6883 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
6884 &name_id);
6885 if (r != ONIG_NORMAL) return r;
6886
6887 max_arg_num = get_callout_arg_num_by_name_id(name_id);
6888 for (i = 0; i < max_arg_num; i++) {
6889 types[i] = get_callout_arg_type_by_name_id(name_id, i);
6890 }
6891
6892 arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);
6893 if (arg_num < 0) return arg_num;
6894
6895 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6896 PFETCH_S(c);
6897 }
6898 else {
6899 arg_num = 0;
6900
6901 is_not_single = 0;
6902 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
6903 &name_id);
6904 if (r != ONIG_NORMAL) return r;
6905
6906 max_arg_num = get_callout_arg_num_by_name_id(name_id);
6907 for (i = 0; i < max_arg_num; i++) {
6908 types[i] = get_callout_arg_type_by_name_id(name_id, i);
6909 }
6910 }
6911
6912 in = onig_get_callout_in_by_name_id(name_id);
6913 opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
6914 if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))
6915 return ONIGERR_INVALID_CALLOUT_ARG;
6916
6917 if (c != cterm)
6918 return ONIGERR_INVALID_CALLOUT_PATTERN;
6919
6920 r = reg_callout_list_entry(env, &num);
6921 if (r != 0) return r;
6922
6923 ext = onig_get_regex_ext(env->reg);
6924 if (IS_NULL(ext->pattern)) {
6925 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
6926 if (r != ONIG_NORMAL) return r;
6927 }
6928
6929 if (tag_start != tag_end) {
6930 r = callout_tag_entry(env->reg, tag_start, tag_end, num);
6931 if (r != ONIG_NORMAL) return r;
6932 }
6933
6934 r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
6935 if (r != ONIG_NORMAL) return r;
6936
6937 e = onig_reg_callout_list_at(env->reg, num);
6938 e->of = ONIG_CALLOUT_OF_NAME;
6939 e->in = in;
6940 e->name_id = name_id;
6941 e->type = onig_get_callout_type_by_name_id(name_id);
6942 e->start_func = onig_get_callout_start_func_by_name_id(name_id);
6943 e->end_func = onig_get_callout_end_func_by_name_id(name_id);
6944 e->u.arg.num = max_arg_num;
6945 e->u.arg.passed_num = arg_num;
6946 for (i = 0; i < max_arg_num; i++) {
6947 e->u.arg.types[i] = types[i];
6948 if (i < arg_num)
6949 e->u.arg.vals[i] = vals[i];
6950 else
6951 e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
6952 }
6953
6954 *np = node;
6955 *src = p;
6956 return 0;
6957 }
6958 #endif
6959
6960 static int
6961 parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
6962 ScanEnv* env)
6963 {
6964 int r, num;
6965 Node *target;
6966 OnigOptionType option;
6967 OnigCodePoint c;
6968 int list_capture;
6969 OnigEncoding enc = env->enc;
6970
6971 UChar* p = *src;
6972 PFETCH_READY;
6973
6974 *np = NULL;
6975 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
6976
6977 option = env->options;
6978 c = PPEEK;
6979 if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
6980 PINC;
6981 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6982
6983 PFETCH(c);
6984 switch (c) {
6985 case ':': /* (?:...) grouping only */
6986 group:
6987 r = fetch_token(tok, &p, end, env);
6988 if (r < 0) return r;
6989 r = parse_subexp(np, tok, term, &p, end, env);
6990 if (r < 0) return r;
6991 *src = p;
6992 return 1; /* group */
6993 break;
6994
6995 case '=':
6996 *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);
6997 break;
6998 case '!': /* preceding read */
6999 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);
7000 break;
7001 case '>': /* (?>...) stop backtrack */
7002 *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
7003 break;
7004
7005 case '\'':
7006 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7007 goto named_group1;
7008 }
7009 else
7010 return ONIGERR_UNDEFINED_GROUP_OPTION;
7011 break;
7012
7013 case '<': /* look behind (?<=...), (?<!...) */
7014 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7015 PFETCH(c);
7016 if (c == '=')
7017 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);
7018 else if (c == '!')
7019 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);
7020 else {
7021 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7022 UChar *name;
7023 UChar *name_end;
7024 enum REF_NUM num_type;
7025
7026 PUNFETCH;
7027 c = '<';
7028
7029 named_group1:
7030 list_capture = 0;
7031
7032 named_group2:
7033 name = p;
7034 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
7035 &num_type, 0);
7036 if (r < 0) return r;
7037
7038 num = scan_env_add_mem_entry(env);
7039 if (num < 0) return num;
7040 if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
7041 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7042
7043 r = name_add(env->reg, name, name_end, num, env);
7044 if (r != 0) return r;
7045 *np = node_new_memory(1);
7046 CHECK_NULL_RETURN_MEMERR(*np);
7047 ENCLOSURE_(*np)->m.regnum = num;
7048 if (list_capture != 0)
7049 MEM_STATUS_ON_SIMPLE(env->capture_history, num);
7050 env->num_named++;
7051 }
7052 else {
7053 return ONIGERR_UNDEFINED_GROUP_OPTION;
7054 }
7055 }
7056 break;
7057
7058 case '~':
7059 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
7060 Node* absent;
7061 Node* expr;
7062 int head_bar;
7063 int is_range_cutter;
7064
7065 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7066
7067 if (PPEEK_IS('|')) { /* (?~|generator|absent) */
7068 PINC;
7069 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7070
7071 head_bar = 1;
7072 if (PPEEK_IS(')')) { /* (?~|) : range clear */
7073 PINC;
7074 r = make_range_clear(np, env);
7075 if (r != 0) return r;
7076 goto end;
7077 }
7078 }
7079 else
7080 head_bar = 0;
7081
7082 r = fetch_token(tok, &p, end, env);
7083 if (r < 0) return r;
7084 r = parse_subexp(&absent, tok, term, &p, end, env);
7085 if (r < 0) {
7086 onig_node_free(absent);
7087 return r;
7088 }
7089
7090 expr = NULL_NODE;
7091 is_range_cutter = 0;
7092 if (head_bar != 0) {
7093 Node* top = absent;
7094 if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {
7095 expr = NULL_NODE;
7096 is_range_cutter = 1;
7097 /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */
7098 }
7099 else {
7100 absent = NODE_CAR(top);
7101 expr = NODE_CDR(top);
7102 NODE_CAR(top) = NULL_NODE;
7103 NODE_CDR(top) = NULL_NODE;
7104 onig_node_free(top);
7105 if (IS_NULL(NODE_CDR(expr))) {
7106 top = expr;
7107 expr = NODE_CAR(top);
7108 NODE_CAR(top) = NULL_NODE;
7109 onig_node_free(top);
7110 }
7111 }
7112 }
7113
7114 r = make_absent_tree(np, absent, expr, is_range_cutter, env);
7115 if (r != 0) {
7116 return r;
7117 }
7118 goto end;
7119 }
7120 else {
7121 return ONIGERR_UNDEFINED_GROUP_OPTION;
7122 }
7123 break;
7124
7125 #ifdef USE_CALLOUT
7126 case '{':
7127 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
7128 return ONIGERR_UNDEFINED_GROUP_OPTION;
7129
7130 r = parse_callout_of_contents(np, ')', &p, end, env);
7131 if (r != 0) return r;
7132
7133 goto end;
7134 break;
7135 #endif
7136
7137 case '(':
7138 /* (?()...) */
7139 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
7140 UChar *prev;
7141 Node* condition;
7142 int condition_is_checker;
7143
7144 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7145 PFETCH(c);
7146 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7147
7148 if (IS_CODE_DIGIT_ASCII(enc, c)
7149 || c == '-' || c == '+' || c == '<' || c == '\'') {
7150 UChar* name_end;
7151 int back_num;
7152 int exist_level;
7153 int level;
7154 enum REF_NUM num_type;
7155 int is_enclosed;
7156
7157 is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
7158 if (! is_enclosed)
7159 PUNFETCH;
7160 prev = p;
7161 exist_level = 0;
7162 #ifdef USE_BACKREF_WITH_LEVEL
7163 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
7164 r = fetch_name_with_level(
7165 (OnigCodePoint )(is_enclosed != 0 ? c : '('),
7166 &p, end, &name_end,
7167 env, &back_num, &level, &num_type);
7168 if (r == 1) exist_level = 1;
7169 #else
7170 r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
7171 &p, end, &name_end, env, &back_num, &num_type, 1);
7172 #endif
7173 if (r < 0) {
7174 if (is_enclosed == 0) {
7175 goto any_condition;
7176 }
7177 else
7178 return r;
7179 }
7180
7181 condition_is_checker = 1;
7182 if (num_type != IS_NOT_NUM) {
7183 if (num_type == IS_REL_NUM) {
7184 back_num = backref_rel_to_abs(back_num, env);
7185 }
7186 if (back_num <= 0)
7187 return ONIGERR_INVALID_BACKREF;
7188
7189 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7190 if (back_num > env->num_mem ||
7191 IS_NULL(SCANENV_MEMENV(env)[back_num].node))
7192 return ONIGERR_INVALID_BACKREF;
7193 }
7194
7195 condition = node_new_backref_checker(1, &back_num, 0,
7196 #ifdef USE_BACKREF_WITH_LEVEL
7197 exist_level, level,
7198 #endif
7199 env);
7200 }
7201 else {
7202 int num;
7203 int* backs;
7204
7205 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
7206 if (num <= 0) {
7207 onig_scan_env_set_error_string(env,
7208 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
7209 return ONIGERR_UNDEFINED_NAME_REFERENCE;
7210 }
7211 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7212 int i;
7213 for (i = 0; i < num; i++) {
7214 if (backs[i] > env->num_mem ||
7215 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
7216 return ONIGERR_INVALID_BACKREF;
7217 }
7218 }
7219
7220 condition = node_new_backref_checker(num, backs, 1,
7221 #ifdef USE_BACKREF_WITH_LEVEL
7222 exist_level, level,
7223 #endif
7224 env);
7225 }
7226
7227 if (is_enclosed != 0) {
7228 if (PEND) goto err_if_else;
7229 PFETCH(c);
7230 if (c != ')') goto err_if_else;
7231 }
7232 }
7233 #ifdef USE_CALLOUT
7234 else if (c == '?') {
7235 if (IS_SYNTAX_OP2(env->syntax,
7236 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
7237 if (! PEND && PPEEK_IS('{')) {
7238 /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
7239 condition_is_checker = 0;
7240 PFETCH(c);
7241 r = parse_callout_of_contents(&condition, ')', &p, end, env);
7242 if (r != 0) return r;
7243 goto end_condition;
7244 }
7245 }
7246 goto any_condition;
7247 }
7248 else if (c == '*' &&
7249 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7250 condition_is_checker = 0;
7251 r = parse_callout_of_name(&condition, ')', &p, end, env);
7252 if (r != 0) return r;
7253 goto end_condition;
7254 }
7255 #endif
7256 else {
7257 any_condition:
7258 PUNFETCH;
7259 condition_is_checker = 0;
7260 r = fetch_token(tok, &p, end, env);
7261 if (r < 0) return r;
7262 r = parse_subexp(&condition, tok, term, &p, end, env);
7263 if (r < 0) {
7264 onig_node_free(condition);
7265 return r;
7266 }
7267 }
7268
7269 end_condition:
7270 CHECK_NULL_RETURN_MEMERR(condition);
7271
7272 if (PEND) {
7273 err_if_else:
7274 onig_node_free(condition);
7275 return ONIGERR_END_PATTERN_IN_GROUP;
7276 }
7277
7278 if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
7279 if (condition_is_checker == 0) {
7280 onig_node_free(condition);
7281 return ONIGERR_INVALID_IF_ELSE_SYNTAX;
7282 }
7283 PFETCH(c);
7284 *np = condition;
7285 }
7286 else { /* if-else */
7287 int then_is_empty;
7288 Node *Then, *Else;
7289
7290 if (PPEEK_IS('|')) {
7291 PFETCH(c);
7292 Then = 0;
7293 then_is_empty = 1;
7294 }
7295 else
7296 then_is_empty = 0;
7297
7298 r = fetch_token(tok, &p, end, env);
7299 if (r < 0) {
7300 onig_node_free(condition);
7301 return r;
7302 }
7303 r = parse_subexp(&target, tok, term, &p, end, env);
7304 if (r < 0) {
7305 onig_node_free(condition);
7306 onig_node_free(target);
7307 return r;
7308 }
7309
7310 if (then_is_empty != 0) {
7311 Else = target;
7312 }
7313 else {
7314 if (NODE_TYPE(target) == NODE_ALT) {
7315 Then = NODE_CAR(target);
7316 if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {
7317 Else = NODE_CAR(NODE_CDR(target));
7318 cons_node_free_alone(NODE_CDR(target));
7319 }
7320 else {
7321 Else = NODE_CDR(target);
7322 }
7323 cons_node_free_alone(target);
7324 }
7325 else {
7326 Then = target;
7327 Else = 0;
7328 }
7329 }
7330
7331 *np = node_new_enclosure_if_else(condition, Then, Else);
7332 if (IS_NULL(*np)) {
7333 onig_node_free(condition);
7334 onig_node_free(Then);
7335 onig_node_free(Else);
7336 return ONIGERR_MEMORY;
7337 }
7338 }
7339 goto end;
7340 }
7341 else {
7342 return ONIGERR_UNDEFINED_GROUP_OPTION;
7343 }
7344 break;
7345
7346 case '@':
7347 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
7348 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7349 PFETCH(c);
7350 if (c == '<' || c == '\'') {
7351 list_capture = 1;
7352 goto named_group2; /* (?@<name>...) */
7353 }
7354 PUNFETCH;
7355 }
7356
7357 *np = node_new_memory(0);
7358 CHECK_NULL_RETURN_MEMERR(*np);
7359 num = scan_env_add_mem_entry(env);
7360 if (num < 0) {
7361 return num;
7362 }
7363 else if (num >= (int )MEM_STATUS_BITS_NUM) {
7364 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7365 }
7366 ENCLOSURE_(*np)->m.regnum = num;
7367 MEM_STATUS_ON_SIMPLE(env->capture_history, num);
7368 }
7369 else {
7370 return ONIGERR_UNDEFINED_GROUP_OPTION;
7371 }
7372 break;
7373
7374 #ifdef USE_POSIXLINE_OPTION
7375 case 'p':
7376 #endif
7377 case '-': case 'i': case 'm': case 's': case 'x':
7378 case 'W': case 'D': case 'S': case 'P':
7379 {
7380 int neg = 0;
7381
7382 while (1) {
7383 switch (c) {
7384 case ':':
7385 case ')':
7386 break;
7387
7388 case '-': neg = 1; break;
7389 case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;
7390 case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
7391 case 's':
7392 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
7393 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
7394 }
7395 else
7396 return ONIGERR_UNDEFINED_GROUP_OPTION;
7397 break;
7398
7399 case 'm':
7400 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
7401 OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
7402 }
7403 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
7404 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
7405 }
7406 else
7407 return ONIGERR_UNDEFINED_GROUP_OPTION;
7408 break;
7409 #ifdef USE_POSIXLINE_OPTION
7410 case 'p':
7411 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
7412 break;
7413 #endif
7414 case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;
7415 case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;
7416 case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;
7417 case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;
7418
7419 default:
7420 return ONIGERR_UNDEFINED_GROUP_OPTION;
7421 }
7422
7423 if (c == ')') {
7424 *np = node_new_option(option);
7425 CHECK_NULL_RETURN_MEMERR(*np);
7426 *src = p;
7427 return 2; /* option only */
7428 }
7429 else if (c == ':') {
7430 OnigOptionType prev = env->options;
7431
7432 env->options = option;
7433 r = fetch_token(tok, &p, end, env);
7434 if (r < 0) return r;
7435 r = parse_subexp(&target, tok, term, &p, end, env);
7436 env->options = prev;
7437 if (r < 0) {
7438 onig_node_free(target);
7439 return r;
7440 }
7441 *np = node_new_option(option);
7442 CHECK_NULL_RETURN_MEMERR(*np);
7443 NODE_BODY(*np) = target;
7444 *src = p;
7445 return 0;
7446 }
7447
7448 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7449 PFETCH(c);
7450 }
7451 }
7452 break;
7453
7454 default:
7455 return ONIGERR_UNDEFINED_GROUP_OPTION;
7456 }
7457 }
7458 #ifdef USE_CALLOUT
7459 else if (c == '*' &&
7460 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7461 PINC;
7462 r = parse_callout_of_name(np, ')', &p, end, env);
7463 if (r != 0) return r;
7464
7465 goto end;
7466 }
7467 #endif
7468 else {
7469 if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
7470 goto group;
7471
7472 *np = node_new_memory(0);
7473 CHECK_NULL_RETURN_MEMERR(*np);
7474 num = scan_env_add_mem_entry(env);
7475 if (num < 0) return num;
7476 ENCLOSURE_(*np)->m.regnum = num;
7477 }
7478
7479 CHECK_NULL_RETURN_MEMERR(*np);
7480 r = fetch_token(tok, &p, end, env);
7481 if (r < 0) return r;
7482 r = parse_subexp(&target, tok, term, &p, end, env);
7483 if (r < 0) {
7484 onig_node_free(target);
7485 return r;
7486 }
7487
7488 NODE_BODY(*np) = target;
7489
7490 if (NODE_TYPE(*np) == NODE_ENCLOSURE) {
7491 if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {
7492 /* Don't move this to previous of parse_subexp() */
7493 r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);
7494 if (r != 0) return r;
7495 }
7496 }
7497
7498 end:
7499 *src = p;
7500 return 0;
7501 }
7502
7503 static const char* PopularQStr[] = {
7504 "?", "*", "+", "??", "*?", "+?"
7505 };
7506
7507 static const char* ReduceQStr[] = {
7508 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
7509 };
7510
7511 static int
7512 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
7513 {
7514 QuantNode* qn;
7515
7516 qn = QUANT_(qnode);
7517 if (qn->lower == 1 && qn->upper == 1)
7518 return 1;
7519
7520 switch (NODE_TYPE(target)) {
7521 case NODE_STRING:
7522 if (! group) {
7523 if (str_node_can_be_split(target, env->enc)) {
7524 Node* n = str_node_split_last_char(target, env->enc);
7525 if (IS_NOT_NULL(n)) {
7526 NODE_BODY(qnode) = n;
7527 return 2;
7528 }
7529 }
7530 }
7531 break;
7532
7533 case NODE_QUANT:
7534 { /* check redundant double repeat. */
7535 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
7536 QuantNode* qnt = QUANT_(target);
7537 int nestq_num = quantifier_type_num(qn);
7538 int targetq_num = quantifier_type_num(qnt);
7539
7540 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
7541 if (targetq_num >= 0 && nestq_num >= 0 &&
7542 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
7543 UChar buf[WARN_BUFSIZE];
7544
7545 switch(ReduceTypeTable[targetq_num][nestq_num]) {
7546 case RQ_ASIS:
7547 break;
7548
7549 case RQ_DEL:
7550 if (onig_verb_warn != onig_null_warn) {
7551 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
7552 env->pattern, env->pattern_end,
7553 (UChar* )"redundant nested repeat operator");
7554 (*onig_verb_warn)((char* )buf);
7555 }
7556 goto warn_exit;
7557 break;
7558
7559 default:
7560 if (onig_verb_warn != onig_null_warn) {
7561 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
7562 env->pattern, env->pattern_end,
7563 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
7564 PopularQStr[targetq_num], PopularQStr[nestq_num],
7565 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
7566 (*onig_verb_warn)((char* )buf);
7567 }
7568 goto warn_exit;
7569 break;
7570 }
7571 }
7572
7573 warn_exit:
7574 #endif
7575 if (targetq_num >= 0 && nestq_num < 0) {
7576 if (targetq_num == 1 || targetq_num == 2) { /* * or + */
7577 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
7578 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
7579 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
7580 }
7581 }
7582 }
7583 else {
7584 NODE_BODY(qnode) = target;
7585 onig_reduce_nested_quantifier(qnode, target);
7586 goto q_exit;
7587 }
7588 }
7589 break;
7590
7591 default:
7592 break;
7593 }
7594
7595 NODE_BODY(qnode) = target;
7596 q_exit:
7597 return 0;
7598 }
7599
7600
7601 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7602 static int
7603 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
7604 {
7605 BBuf *tbuf;
7606 int r;
7607
7608 if (IS_NCCLASS_NOT(cc)) {
7609 bitset_invert(cc->bs);
7610
7611 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
7612 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
7613 if (r != 0) return r;
7614
7615 bbuf_free(cc->mbuf);
7616 cc->mbuf = tbuf;
7617 }
7618
7619 NCCLASS_CLEAR_NOT(cc);
7620 }
7621
7622 return 0;
7623 }
7624 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
7625
7626 typedef struct {
7627 ScanEnv* env;
7628 CClassNode* cc;
7629 Node* alt_root;
7630 Node** ptail;
7631 } IApplyCaseFoldArg;
7632
7633 static int
7634 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
7635 {
7636 IApplyCaseFoldArg* iarg;
7637 ScanEnv* env;
7638 CClassNode* cc;
7639 BitSetRef bs;
7640
7641 iarg = (IApplyCaseFoldArg* )arg;
7642 env = iarg->env;
7643 cc = iarg->cc;
7644 bs = cc->bs;
7645
7646 if (to_len == 1) {
7647 int is_in = onig_is_code_in_cc(env->enc, from, cc);
7648 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7649 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
7650 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
7651 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
7652 add_code_range(&(cc->mbuf), env, *to, *to);
7653 }
7654 else {
7655 BITSET_SET_BIT(bs, *to);
7656 }
7657 }
7658 #else
7659 if (is_in != 0) {
7660 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
7661 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
7662 add_code_range(&(cc->mbuf), env, *to, *to);
7663 }
7664 else {
7665 if (IS_NCCLASS_NOT(cc)) {
7666 BITSET_CLEAR_BIT(bs, *to);
7667 }
7668 else
7669 BITSET_SET_BIT(bs, *to);
7670 }
7671 }
7672 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
7673 }
7674 else {
7675 int r, i, len;
7676 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
7677 Node *snode = NULL_NODE;
7678
7679 if (onig_is_code_in_cc(env->enc, from, cc)
7680 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7681 && !IS_NCCLASS_NOT(cc)
7682 #endif
7683 ) {
7684 for (i = 0; i < to_len; i++) {
7685 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
7686 if (i == 0) {
7687 snode = onig_node_new_str(buf, buf + len);
7688 CHECK_NULL_RETURN_MEMERR(snode);
7689
7690 /* char-class expanded multi-char only
7691 compare with string folded at match time. */
7692 NODE_STRING_SET_AMBIG(snode);
7693 }
7694 else {
7695 r = onig_node_str_cat(snode, buf, buf + len);
7696 if (r < 0) {
7697 onig_node_free(snode);
7698 return r;
7699 }
7700 }
7701 }
7702
7703 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
7704 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
7705 iarg->ptail = &(NODE_CDR((*(iarg->ptail))));
7706 }
7707 }
7708
7709 return 0;
7710 }
7711
7712 static int
7713 parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
7714 ScanEnv* env)
7715 {
7716 int r, len, group = 0;
7717 Node* qn;
7718 Node** targetp;
7719
7720 *np = NULL;
7721 if (tok->type == (enum TokenSyms )term)
7722 goto end_of_token;
7723
7724 switch (tok->type) {
7725 case TK_ALT:
7726 case TK_EOT:
7727 end_of_token:
7728 *np = node_new_empty();
7729 return tok->type;
7730 break;
7731
7732 case TK_SUBEXP_OPEN:
7733 r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);
7734 if (r < 0) return r;
7735 if (r == 1) group = 1;
7736 else if (r == 2) { /* option only */
7737 Node* target;
7738 OnigOptionType prev = env->options;
7739
7740 env->options = ENCLOSURE_(*np)->o.options;
7741 r = fetch_token(tok, src, end, env);
7742 if (r < 0) return r;
7743 r = parse_subexp(&target, tok, term, src, end, env);
7744 env->options = prev;
7745 if (r < 0) {
7746 onig_node_free(target);
7747 return r;
7748 }
7749 NODE_BODY(*np) = target;
7750 return tok->type;
7751 }
7752 break;
7753
7754 case TK_SUBEXP_CLOSE:
7755 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
7756 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
7757
7758 if (tok->escaped) goto tk_raw_byte;
7759 else goto tk_byte;
7760 break;
7761
7762 case TK_STRING:
7763 tk_byte:
7764 {
7765 *np = node_new_str(tok->backp, *src);
7766 CHECK_NULL_RETURN_MEMERR(*np);
7767
7768 while (1) {
7769 r = fetch_token(tok, src, end, env);
7770 if (r < 0) return r;
7771 if (r != TK_STRING) break;
7772
7773 r = onig_node_str_cat(*np, tok->backp, *src);
7774 if (r < 0) return r;
7775 }
7776
7777 string_end:
7778 targetp = np;
7779 goto repeat;
7780 }
7781 break;
7782
7783 case TK_RAW_BYTE:
7784 tk_raw_byte:
7785 {
7786 *np = node_new_str_raw_char((UChar )tok->u.c);
7787 CHECK_NULL_RETURN_MEMERR(*np);
7788 len = 1;
7789 while (1) {
7790 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
7791 if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */
7792 r = fetch_token(tok, src, end, env);
7793 NODE_STRING_CLEAR_RAW(*np);
7794 goto string_end;
7795 }
7796 }
7797
7798 r = fetch_token(tok, src, end, env);
7799 if (r < 0) return r;
7800 if (r != TK_RAW_BYTE) {
7801 /* Don't use this, it is wrong for little endian encodings. */
7802 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
7803 int rem;
7804 if (len < ONIGENC_MBC_MINLEN(env->enc)) {
7805 rem = ONIGENC_MBC_MINLEN(env->enc) - len;
7806 (void )node_str_head_pad(STR_(*np), rem, (UChar )0);
7807 if (len + rem == enclen(env->enc, STR_(*np)->s)) {
7808 NODE_STRING_CLEAR_RAW(*np);
7809 goto string_end;
7810 }
7811 }
7812 #endif
7813 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
7814 }
7815
7816 r = node_str_cat_char(*np, (UChar )tok->u.c);
7817 if (r < 0) return r;
7818
7819 len++;
7820 }
7821 }
7822 break;
7823
7824 case TK_CODE_POINT:
7825 {
7826 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
7827 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
7828 if (num < 0) return num;
7829 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
7830 *np = node_new_str_raw(buf, buf + num);
7831 #else
7832 *np = node_new_str(buf, buf + num);
7833 #endif
7834 CHECK_NULL_RETURN_MEMERR(*np);
7835 }
7836 break;
7837
7838 case TK_QUOTE_OPEN:
7839 {
7840 OnigCodePoint end_op[2];
7841 UChar *qstart, *qend, *nextp;
7842
7843 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
7844 end_op[1] = (OnigCodePoint )'E';
7845 qstart = *src;
7846 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
7847 if (IS_NULL(qend)) {
7848 nextp = qend = end;
7849 }
7850 *np = node_new_str(qstart, qend);
7851 CHECK_NULL_RETURN_MEMERR(*np);
7852 *src = nextp;
7853 }
7854 break;
7855
7856 case TK_CHAR_TYPE:
7857 {
7858 switch (tok->u.prop.ctype) {
7859 case ONIGENC_CTYPE_WORD:
7860 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
7861 CHECK_NULL_RETURN_MEMERR(*np);
7862 break;
7863
7864 case ONIGENC_CTYPE_SPACE:
7865 case ONIGENC_CTYPE_DIGIT:
7866 case ONIGENC_CTYPE_XDIGIT:
7867 {
7868 CClassNode* cc;
7869
7870 *np = node_new_cclass();
7871 CHECK_NULL_RETURN_MEMERR(*np);
7872 cc = CCLASS_(*np);
7873 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
7874 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
7875 }
7876 break;
7877
7878 default:
7879 return ONIGERR_PARSER_BUG;
7880 break;
7881 }
7882 }
7883 break;
7884
7885 case TK_CHAR_PROPERTY:
7886 r = parse_char_property(np, tok, src, end, env);
7887 if (r != 0) return r;
7888 break;
7889
7890 case TK_CC_OPEN:
7891 {
7892 CClassNode* cc;
7893
7894 r = parse_char_class(np, tok, src, end, env);
7895 if (r != 0) return r;
7896
7897 cc = CCLASS_(*np);
7898 if (IS_IGNORECASE(env->options)) {
7899 IApplyCaseFoldArg iarg;
7900
7901 iarg.env = env;
7902 iarg.cc = cc;
7903 iarg.alt_root = NULL_NODE;
7904 iarg.ptail = &(iarg.alt_root);
7905
7906 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
7907 i_apply_case_fold, &iarg);
7908 if (r != 0) {
7909 onig_node_free(iarg.alt_root);
7910 return r;
7911 }
7912 if (IS_NOT_NULL(iarg.alt_root)) {
7913 Node* work = onig_node_new_alt(*np, iarg.alt_root);
7914 if (IS_NULL(work)) {
7915 onig_node_free(iarg.alt_root);
7916 return ONIGERR_MEMORY;
7917 }
7918 *np = work;
7919 }
7920 }
7921 }
7922 break;
7923
7924 case TK_ANYCHAR:
7925 *np = node_new_anychar();
7926 CHECK_NULL_RETURN_MEMERR(*np);
7927 break;
7928
7929 case TK_ANYCHAR_ANYTIME:
7930 *np = node_new_anychar();
7931 CHECK_NULL_RETURN_MEMERR(*np);
7932 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
7933 CHECK_NULL_RETURN_MEMERR(qn);
7934 NODE_BODY(qn) = *np;
7935 *np = qn;
7936 break;
7937
7938 case TK_BACKREF:
7939 len = tok->u.backref.num;
7940 *np = node_new_backref(len,
7941 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
7942 tok->u.backref.by_name,
7943 #ifdef USE_BACKREF_WITH_LEVEL
7944 tok->u.backref.exist_level,
7945 tok->u.backref.level,
7946 #endif
7947 env);
7948 CHECK_NULL_RETURN_MEMERR(*np);
7949 break;
7950
7951 #ifdef USE_CALL
7952 case TK_CALL:
7953 {
7954 int gnum = tok->u.call.gnum;
7955
7956 *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
7957 gnum, tok->u.call.by_number);
7958 CHECK_NULL_RETURN_MEMERR(*np);
7959 env->num_call++;
7960 if (tok->u.call.by_number != 0 && gnum == 0) {
7961 env->has_call_zero = 1;
7962 }
7963 }
7964 break;
7965 #endif
7966
7967 case TK_ANCHOR:
7968 {
7969 int ascii_mode =
7970 IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;
7971 *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);
7972 }
7973 break;
7974
7975 case TK_OP_REPEAT:
7976 case TK_INTERVAL:
7977 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
7978 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
7979 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
7980 else
7981 *np = node_new_empty();
7982 }
7983 else {
7984 goto tk_byte;
7985 }
7986 break;
7987
7988 case TK_KEEP:
7989 r = node_new_keep(np, env);
7990 if (r < 0) return r;
7991 break;
7992
7993 case TK_GENERAL_NEWLINE:
7994 r = node_new_general_newline(np, env);
7995 if (r < 0) return r;
7996 break;
7997
7998 case TK_NO_NEWLINE:
7999 r = node_new_no_newline(np, env);
8000 if (r < 0) return r;
8001 break;
8002
8003 case TK_TRUE_ANYCHAR:
8004 r = node_new_true_anychar(np, env);
8005 if (r < 0) return r;
8006 break;
8007
8008 case TK_EXTENDED_GRAPHEME_CLUSTER:
8009 r = make_extended_grapheme_cluster(np, env);
8010 if (r < 0) return r;
8011 break;
8012
8013 default:
8014 return ONIGERR_PARSER_BUG;
8015 break;
8016 }
8017
8018 {
8019 targetp = np;
8020
8021 re_entry:
8022 r = fetch_token(tok, src, end, env);
8023 if (r < 0) return r;
8024
8025 repeat:
8026 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
8027 if (is_invalid_quantifier_target(*targetp))
8028 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
8029
8030 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
8031 (r == TK_INTERVAL ? 1 : 0));
8032 CHECK_NULL_RETURN_MEMERR(qn);
8033 QUANT_(qn)->greedy = tok->u.repeat.greedy;
8034 r = set_quantifier(qn, *targetp, group, env);
8035 if (r < 0) {
8036 onig_node_free(qn);
8037 return r;
8038 }
8039
8040 if (tok->u.repeat.possessive != 0) {
8041 Node* en;
8042 en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
8043 if (IS_NULL(en)) {
8044 onig_node_free(qn);
8045 return ONIGERR_MEMORY;
8046 }
8047 NODE_BODY(en) = qn;
8048 qn = en;
8049 }
8050
8051 if (r == 0) {
8052 *targetp = qn;
8053 }
8054 else if (r == 1) {
8055 onig_node_free(qn);
8056 }
8057 else if (r == 2) { /* split case: /abc+/ */
8058 Node *tmp;
8059
8060 *targetp = node_new_list(*targetp, NULL);
8061 if (IS_NULL(*targetp)) {
8062 onig_node_free(qn);
8063 return ONIGERR_MEMORY;
8064 }
8065 tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);
8066 if (IS_NULL(tmp)) {
8067 onig_node_free(qn);
8068 return ONIGERR_MEMORY;
8069 }
8070 targetp = &(NODE_CAR(tmp));
8071 }
8072 goto re_entry;
8073 }
8074 }
8075
8076 return r;
8077 }
8078
8079 static int
8080 parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
8081 ScanEnv* env)
8082 {
8083 int r;
8084 Node *node, **headp;
8085
8086 *top = NULL;
8087 r = parse_exp(&node, tok, term, src, end, env);
8088 if (r < 0) {
8089 onig_node_free(node);
8090 return r;
8091 }
8092
8093 if (r == TK_EOT || r == term || r == TK_ALT) {
8094 *top = node;
8095 }
8096 else {
8097 *top = node_new_list(node, NULL);
8098 headp = &(NODE_CDR(*top));
8099 while (r != TK_EOT && r != term && r != TK_ALT) {
8100 r = parse_exp(&node, tok, term, src, end, env);
8101 if (r < 0) {
8102 onig_node_free(node);
8103 return r;
8104 }
8105
8106 if (NODE_TYPE(node) == NODE_LIST) {
8107 *headp = node;
8108 while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);
8109 headp = &(NODE_CDR(node));
8110 }
8111 else {
8112 *headp = node_new_list(node, NULL);
8113 headp = &(NODE_CDR(*headp));
8114 }
8115 }
8116 }
8117
8118 return r;
8119 }
8120
8121 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
8122 static int
8123 parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
8124 ScanEnv* env)
8125 {
8126 int r;
8127 Node *node, **headp;
8128
8129 *top = NULL;
8130 env->parse_depth++;
8131 if (env->parse_depth > ParseDepthLimit)
8132 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
8133 r = parse_branch(&node, tok, term, src, end, env);
8134 if (r < 0) {
8135 onig_node_free(node);
8136 return r;
8137 }
8138
8139 if (r == term) {
8140 *top = node;
8141 }
8142 else if (r == TK_ALT) {
8143 *top = onig_node_new_alt(node, NULL);
8144 headp = &(NODE_CDR(*top));
8145 while (r == TK_ALT) {
8146 r = fetch_token(tok, src, end, env);
8147 if (r < 0) return r;
8148 r = parse_branch(&node, tok, term, src, end, env);
8149 if (r < 0) {
8150 onig_node_free(node);
8151 return r;
8152 }
8153 *headp = onig_node_new_alt(node, NULL);
8154 headp = &(NODE_CDR(*headp));
8155 }
8156
8157 if (tok->type != (enum TokenSyms )term)
8158 goto err;
8159 }
8160 else {
8161 onig_node_free(node);
8162 err:
8163 if (term == TK_SUBEXP_CLOSE)
8164 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
8165 else
8166 return ONIGERR_PARSER_BUG;
8167 }
8168
8169 env->parse_depth--;
8170 return r;
8171 }
8172
8173 static int
8174 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
8175 {
8176 int r;
8177 OnigToken tok;
8178
8179 r = fetch_token(&tok, src, end, env);
8180 if (r < 0) return r;
8181 r = parse_subexp(top, &tok, TK_EOT, src, end, env);
8182 if (r < 0) return r;
8183
8184 return 0;
8185 }
8186
8187 #ifdef USE_CALL
8188 static int
8189 make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)
8190 {
8191 int r;
8192
8193 Node* x = node_new_memory(0 /* 0: is not named */);
8194 CHECK_NULL_RETURN_MEMERR(x);
8195
8196 NODE_BODY(x) = node;
8197 ENCLOSURE_(x)->m.regnum = 0;
8198 r = scan_env_set_mem_node(env, 0, x);
8199 if (r != 0) {
8200 onig_node_free(x);
8201 return r;
8202 }
8203
8204 *rnode = x;
8205 return 0;
8206 }
8207 #endif
8208
8209 extern int
8210 onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
8211 regex_t* reg, ScanEnv* env)
8212 {
8213 int r;
8214 UChar* p;
8215 #ifdef USE_CALLOUT
8216 RegexExt* ext;
8217 #endif
8218
8219 names_clear(reg);
8220
8221 scan_env_clear(env);
8222 env->options = reg->options;
8223 env->case_fold_flag = reg->case_fold_flag;
8224 env->enc = reg->enc;
8225 env->syntax = reg->syntax;
8226 env->pattern = (UChar* )pattern;
8227 env->pattern_end = (UChar* )end;
8228 env->reg = reg;
8229
8230 *root = NULL;
8231
8232 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
8233 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
8234
8235 p = (UChar* )pattern;
8236 r = parse_regexp(root, &p, (UChar* )end, env);
8237
8238 #ifdef USE_CALL
8239 if (r != 0) return r;
8240
8241 if (env->has_call_zero != 0) {
8242 Node* zero_node;
8243 r = make_call_zero_body(*root, env, &zero_node);
8244 if (r != 0) return r;
8245
8246 *root = zero_node;
8247 }
8248 #endif
8249
8250 reg->num_mem = env->num_mem;
8251
8252 #ifdef USE_CALLOUT
8253 ext = REG_EXTP(reg);
8254 if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
8255 r = setup_ext_callout_list_values(reg);
8256 }
8257 #endif
8258
8259 return r;
8260 }
8261
8262 extern void
8263 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
8264 UChar* arg, UChar* arg_end)
8265 {
8266 env->error = arg;
8267 env->error_end = arg_end;
8268 }