]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
c71ae5fa700c55fbe6c60fac0e145c6549ecd329
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
1 /**********************************************************************
2 regparse.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regparse.h"
31 #include "st.h"
32
33 #ifdef DEBUG_NODE_FREE
34 #include <stdio.h>
35 #endif
36
37 #define INIT_TAG_NAMES_ALLOC_NUM 5
38
39 #define WARN_BUFSIZE 256
40
41 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
42
43 #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
44 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
45 #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
46 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
47
48
49 OnigSyntaxType OnigSyntaxOniguruma = {
50 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
51 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
52 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
53 ONIG_SYN_OP_ESC_CONTROL_CHARS |
54 ONIG_SYN_OP_ESC_C_CONTROL )
55 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
56 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
57 ONIG_SYN_OP2_OPTION_RUBY |
58 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
59 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
60 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
61 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
62 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
63 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
64 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
65 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
66 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
67 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
68 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
69 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
70 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
71 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
72 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
73 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
74 , ( SYN_GNU_REGEX_BV |
75 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
76 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
77 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
78 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
79 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
80 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
81 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
82 , ONIG_OPTION_NONE
83 ,
84 {
85 (OnigCodePoint )'\\' /* esc */
86 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
87 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
88 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
89 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
90 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
91 }
92 };
93
94 OnigSyntaxType OnigSyntaxRuby = {
95 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
96 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
97 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
98 ONIG_SYN_OP_ESC_CONTROL_CHARS |
99 ONIG_SYN_OP_ESC_C_CONTROL )
100 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
101 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
102 ONIG_SYN_OP2_OPTION_RUBY |
103 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
104 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
105 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
106 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
107 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
108 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
109 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
110 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
111 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
112 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
113 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
114 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
115 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
116 , ( SYN_GNU_REGEX_BV |
117 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
118 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
119 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
120 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
121 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
122 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
123 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
124 , ONIG_OPTION_NONE
125 ,
126 {
127 (OnigCodePoint )'\\' /* esc */
128 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
129 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
130 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
131 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
132 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
133 }
134 };
135
136 OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
137
138 extern void onig_null_warn(const char* s ARG_UNUSED) { }
139
140 #ifdef DEFAULT_WARN_FUNCTION
141 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
142 #else
143 static OnigWarnFunc onig_warn = onig_null_warn;
144 #endif
145
146 #ifdef DEFAULT_VERB_WARN_FUNCTION
147 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
148 #else
149 static OnigWarnFunc onig_verb_warn = onig_null_warn;
150 #endif
151
152 extern void onig_set_warn_func(OnigWarnFunc f)
153 {
154 onig_warn = f;
155 }
156
157 extern void onig_set_verb_warn_func(OnigWarnFunc f)
158 {
159 onig_verb_warn = f;
160 }
161
162 extern void
163 onig_warning(const char* s)
164 {
165 if (onig_warn == onig_null_warn) return ;
166
167 (*onig_warn)(s);
168 }
169
170 #define DEFAULT_MAX_CAPTURE_NUM 32767
171
172 static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
173
174 extern int
175 onig_set_capture_num_limit(int num)
176 {
177 if (num < 0) return -1;
178
179 MaxCaptureNum = num;
180 return 0;
181 }
182
183 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
184
185 extern unsigned int
186 onig_get_parse_depth_limit(void)
187 {
188 return ParseDepthLimit;
189 }
190
191 extern int
192 onig_set_parse_depth_limit(unsigned int depth)
193 {
194 if (depth == 0)
195 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
196 else
197 ParseDepthLimit = depth;
198 return 0;
199 }
200
201 static int
202 positive_int_multiply(int x, int y)
203 {
204 if (x == 0 || y == 0) return 0;
205
206 if (x < INT_MAX / y)
207 return x * y;
208 else
209 return -1;
210 }
211
212 static void
213 bbuf_free(BBuf* bbuf)
214 {
215 if (IS_NOT_NULL(bbuf)) {
216 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
217 xfree(bbuf);
218 }
219 }
220
221 static int
222 bbuf_clone(BBuf** rto, BBuf* from)
223 {
224 int r;
225 BBuf *to;
226
227 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
228 CHECK_NULL_RETURN_MEMERR(to);
229 r = BB_INIT(to, from->alloc);
230 if (r != 0) {
231 xfree(to->p);
232 *rto = 0;
233 return r;
234 }
235 to->used = from->used;
236 xmemcpy(to->p, from->p, from->used);
237 return 0;
238 }
239
240 static int backref_rel_to_abs(int rel_no, ScanEnv* env)
241 {
242 if (rel_no > 0) {
243 return env->num_mem + rel_no;
244 }
245 else {
246 return env->num_mem + 1 + rel_no;
247 }
248 }
249
250 #define OPTION_ON(v,f) ((v) |= (f))
251 #define OPTION_OFF(v,f) ((v) &= ~(f))
252
253 #define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
254
255 #define MBCODE_START_POS(enc) \
256 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
257
258 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
259 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
260
261 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
262 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
263 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
264 if (r != 0) return r;\
265 }\
266 } while (0)
267
268
269 #define BITSET_IS_EMPTY(bs,empty) do {\
270 int i;\
271 empty = 1;\
272 for (i = 0; i < (int )BITSET_SIZE; i++) {\
273 if ((bs)[i] != 0) {\
274 empty = 0; break;\
275 }\
276 }\
277 } while (0)
278
279 static void
280 bitset_set_range(BitSetRef bs, int from, int to)
281 {
282 int i;
283 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
284 BITSET_SET_BIT(bs, i);
285 }
286 }
287
288 #if 0
289 static void
290 bitset_set_all(BitSetRef bs)
291 {
292 int i;
293 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
294 }
295 #endif
296
297 static void
298 bitset_invert(BitSetRef bs)
299 {
300 int i;
301 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
302 }
303
304 static void
305 bitset_invert_to(BitSetRef from, BitSetRef to)
306 {
307 int i;
308 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
309 }
310
311 static void
312 bitset_and(BitSetRef dest, BitSetRef bs)
313 {
314 int i;
315 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
316 }
317
318 static void
319 bitset_or(BitSetRef dest, BitSetRef bs)
320 {
321 int i;
322 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
323 }
324
325 static void
326 bitset_copy(BitSetRef dest, BitSetRef bs)
327 {
328 int i;
329 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
330 }
331
332 extern int
333 onig_strncmp(const UChar* s1, const UChar* s2, int n)
334 {
335 int x;
336
337 while (n-- > 0) {
338 x = *s2++ - *s1++;
339 if (x) return x;
340 }
341 return 0;
342 }
343
344 extern void
345 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
346 {
347 int len = (int )(end - src);
348 if (len > 0) {
349 xmemcpy(dest, src, len);
350 dest[len] = (UChar )0;
351 }
352 }
353
354 static int
355 save_entry(ScanEnv* env, enum SaveType type, int* id)
356 {
357 int nid = env->save_num;
358
359 #if 0
360 if (IS_NULL(env->saves)) {
361 int n = 10;
362 env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);
363 CHECK_NULL_RETURN_MEMERR(env->saves);
364 env->save_alloc_num = n;
365 }
366 else if (env->save_alloc_num <= nid) {
367 int n = env->save_alloc_num * 2;
368 SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);
369 CHECK_NULL_RETURN_MEMERR(p);
370 env->saves = p;
371 env->save_alloc_num = n;
372 }
373
374 env->saves[nid].type = type;
375 #endif
376
377 env->save_num++;
378 *id = nid;
379 return 0;
380 }
381
382 /* scan pattern methods */
383 #define PEND_VALUE 0
384
385 #define PFETCH_READY UChar* pfetch_prev
386 #define PEND (p < end ? 0 : 1)
387 #define PUNFETCH p = pfetch_prev
388 #define PINC do { \
389 pfetch_prev = p; \
390 p += ONIGENC_MBC_ENC_LEN(enc, p); \
391 } while (0)
392 #define PFETCH(c) do { \
393 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
394 pfetch_prev = p; \
395 p += ONIGENC_MBC_ENC_LEN(enc, p); \
396 } while (0)
397
398 #define PINC_S do { \
399 p += ONIGENC_MBC_ENC_LEN(enc, p); \
400 } while (0)
401 #define PFETCH_S(c) do { \
402 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
403 p += ONIGENC_MBC_ENC_LEN(enc, p); \
404 } while (0)
405
406 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
407 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
408
409 static UChar*
410 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
411 int capa, int oldCapa)
412 {
413 UChar* r;
414
415 if (dest)
416 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);
417 else
418 r = (UChar* )xmalloc(capa + 1);
419
420 CHECK_NULL_RETURN(r);
421 onig_strcpy(r + (dest_end - dest), src, src_end);
422 return r;
423 }
424
425 /* dest on static area */
426 static UChar*
427 strcat_capa_from_static(UChar* dest, UChar* dest_end,
428 const UChar* src, const UChar* src_end, int capa)
429 {
430 UChar* r;
431
432 r = (UChar* )xmalloc(capa + 1);
433 CHECK_NULL_RETURN(r);
434 onig_strcpy(r, dest, dest_end);
435 onig_strcpy(r + (dest_end - dest), src, src_end);
436 return r;
437 }
438
439
440 #ifdef USE_ST_LIBRARY
441
442 typedef struct {
443 UChar* s;
444 UChar* end;
445 } st_str_end_key;
446
447 static int
448 str_end_cmp(st_str_end_key* x, st_str_end_key* y)
449 {
450 UChar *p, *q;
451 int c;
452
453 if ((x->end - x->s) != (y->end - y->s))
454 return 1;
455
456 p = x->s;
457 q = y->s;
458 while (p < x->end) {
459 c = (int )*p - (int )*q;
460 if (c != 0) return c;
461
462 p++; q++;
463 }
464
465 return 0;
466 }
467
468 static int
469 str_end_hash(st_str_end_key* x)
470 {
471 UChar *p;
472 int val = 0;
473
474 p = x->s;
475 while (p < x->end) {
476 val = val * 997 + (int )*p++;
477 }
478
479 return val + (val >> 5);
480 }
481
482 extern hash_table_type*
483 onig_st_init_strend_table_with_size(int size)
484 {
485 static struct st_hash_type hashType = {
486 str_end_cmp,
487 str_end_hash,
488 };
489
490 return (hash_table_type* )
491 onig_st_init_table_with_size(&hashType, size);
492 }
493
494 extern int
495 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
496 const UChar* end_key, hash_data_type *value)
497 {
498 st_str_end_key key;
499
500 key.s = (UChar* )str_key;
501 key.end = (UChar* )end_key;
502
503 return onig_st_lookup(table, (st_data_t )(&key), value);
504 }
505
506 extern int
507 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
508 const UChar* end_key, hash_data_type value)
509 {
510 st_str_end_key* key;
511 int result;
512
513 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
514 CHECK_NULL_RETURN_MEMERR(key);
515
516 key->s = (UChar* )str_key;
517 key->end = (UChar* )end_key;
518 result = onig_st_insert(table, (st_data_t )key, value);
519 if (result) {
520 xfree(key);
521 }
522 return result;
523 }
524
525
526 typedef struct {
527 OnigEncoding enc;
528 int type; /* callout type: single or not */
529 UChar* s;
530 UChar* end;
531 } st_callout_name_key;
532
533 static int
534 callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
535 {
536 UChar *p, *q;
537 int c;
538
539 if (x->enc != y->enc) return 1;
540 if (x->type != y->type) return 1;
541 if ((x->end - x->s) != (y->end - y->s))
542 return 1;
543
544 p = x->s;
545 q = y->s;
546 while (p < x->end) {
547 c = (int )*p - (int )*q;
548 if (c != 0) return c;
549
550 p++; q++;
551 }
552
553 return 0;
554 }
555
556 static int
557 callout_name_table_hash(st_callout_name_key* x)
558 {
559 UChar *p;
560 int val = 0;
561
562 p = x->s;
563 while (p < x->end) {
564 val = val * 997 + (int )*p++;
565 }
566
567 /* use intptr_t for escape warning in Windows */
568 return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;
569 }
570
571 extern hash_table_type*
572 onig_st_init_callout_name_table_with_size(int size)
573 {
574 static struct st_hash_type hashType = {
575 callout_name_table_cmp,
576 callout_name_table_hash,
577 };
578
579 return (hash_table_type* )
580 onig_st_init_table_with_size(&hashType, size);
581 }
582
583 extern int
584 onig_st_lookup_callout_name_table(hash_table_type* table,
585 OnigEncoding enc,
586 int type,
587 const UChar* str_key,
588 const UChar* end_key,
589 hash_data_type *value)
590 {
591 st_callout_name_key key;
592
593 key.enc = enc;
594 key.type = type;
595 key.s = (UChar* )str_key;
596 key.end = (UChar* )end_key;
597
598 return onig_st_lookup(table, (st_data_t )(&key), value);
599 }
600
601 static int
602 st_insert_callout_name_table(hash_table_type* table,
603 OnigEncoding enc, int type,
604 UChar* str_key, UChar* end_key,
605 hash_data_type value)
606 {
607 st_callout_name_key* key;
608 int result;
609
610 key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
611 CHECK_NULL_RETURN_MEMERR(key);
612
613 /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
614 key->enc = enc;
615 key->type = type;
616 key->s = str_key;
617 key->end = end_key;
618 result = onig_st_insert(table, (st_data_t )key, value);
619 if (result) {
620 xfree(key);
621 }
622 return result;
623 }
624
625 #endif /* USE_ST_LIBRARY */
626
627
628 #define INIT_NAME_BACKREFS_ALLOC_NUM 8
629
630 typedef struct {
631 UChar* name;
632 int name_len; /* byte length */
633 int back_num; /* number of backrefs */
634 int back_alloc;
635 int back_ref1;
636 int* back_refs;
637 } NameEntry;
638
639 #ifdef USE_ST_LIBRARY
640
641 #define INIT_NAMES_ALLOC_NUM 5
642
643 typedef st_table NameTable;
644 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
645
646 #define NAMEBUF_SIZE 24
647 #define NAMEBUF_SIZE_1 25
648
649 #ifdef ONIG_DEBUG
650 static int
651 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
652 {
653 int i;
654 FILE* fp = (FILE* )arg;
655
656 fprintf(fp, "%s: ", e->name);
657 if (e->back_num == 0)
658 fputs("-", fp);
659 else if (e->back_num == 1)
660 fprintf(fp, "%d", e->back_ref1);
661 else {
662 for (i = 0; i < e->back_num; i++) {
663 if (i > 0) fprintf(fp, ", ");
664 fprintf(fp, "%d", e->back_refs[i]);
665 }
666 }
667 fputs("\n", fp);
668 return ST_CONTINUE;
669 }
670
671 extern int
672 onig_print_names(FILE* fp, regex_t* reg)
673 {
674 NameTable* t = (NameTable* )reg->name_table;
675
676 if (IS_NOT_NULL(t)) {
677 fprintf(fp, "name table\n");
678 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
679 fputs("\n", fp);
680 }
681 return 0;
682 }
683 #endif /* ONIG_DEBUG */
684
685 static int
686 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
687 {
688 xfree(e->name);
689 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
690 xfree(key);
691 xfree(e);
692 return ST_DELETE;
693 }
694
695 static int
696 names_clear(regex_t* reg)
697 {
698 NameTable* t = (NameTable* )reg->name_table;
699
700 if (IS_NOT_NULL(t)) {
701 onig_st_foreach(t, i_free_name_entry, 0);
702 }
703 return 0;
704 }
705
706 extern int
707 onig_names_free(regex_t* reg)
708 {
709 int r;
710 NameTable* t;
711
712 r = names_clear(reg);
713 if (r != 0) return r;
714
715 t = (NameTable* )reg->name_table;
716 if (IS_NOT_NULL(t)) onig_st_free_table(t);
717 reg->name_table = (void* )NULL;
718 return 0;
719 }
720
721 static NameEntry*
722 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
723 {
724 NameEntry* e;
725 NameTable* t = (NameTable* )reg->name_table;
726
727 e = (NameEntry* )NULL;
728 if (IS_NOT_NULL(t)) {
729 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
730 }
731 return e;
732 }
733
734 typedef struct {
735 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
736 regex_t* reg;
737 void* arg;
738 int ret;
739 OnigEncoding enc;
740 } INamesArg;
741
742 static int
743 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
744 {
745 int r = (*(arg->func))(e->name,
746 e->name + e->name_len,
747 e->back_num,
748 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
749 arg->reg, arg->arg);
750 if (r != 0) {
751 arg->ret = r;
752 return ST_STOP;
753 }
754 return ST_CONTINUE;
755 }
756
757 extern int
758 onig_foreach_name(regex_t* reg,
759 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
760 {
761 INamesArg narg;
762 NameTable* t = (NameTable* )reg->name_table;
763
764 narg.ret = 0;
765 if (IS_NOT_NULL(t)) {
766 narg.func = func;
767 narg.reg = reg;
768 narg.arg = arg;
769 narg.enc = reg->enc; /* should be pattern encoding. */
770 onig_st_foreach(t, i_names, (HashDataType )&narg);
771 }
772 return narg.ret;
773 }
774
775 static int
776 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
777 {
778 int i;
779
780 if (e->back_num > 1) {
781 for (i = 0; i < e->back_num; i++) {
782 e->back_refs[i] = map[e->back_refs[i]].new_val;
783 }
784 }
785 else if (e->back_num == 1) {
786 e->back_ref1 = map[e->back_ref1].new_val;
787 }
788
789 return ST_CONTINUE;
790 }
791
792 extern int
793 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
794 {
795 NameTable* t = (NameTable* )reg->name_table;
796
797 if (IS_NOT_NULL(t)) {
798 onig_st_foreach(t, i_renumber_name, (HashDataType )map);
799 }
800 return 0;
801 }
802
803
804 extern int
805 onig_number_of_names(regex_t* reg)
806 {
807 NameTable* t = (NameTable* )reg->name_table;
808
809 if (IS_NOT_NULL(t))
810 return t->num_entries;
811 else
812 return 0;
813 }
814
815 #else /* USE_ST_LIBRARY */
816
817 #define INIT_NAMES_ALLOC_NUM 8
818
819 typedef struct {
820 NameEntry* e;
821 int num;
822 int alloc;
823 } NameTable;
824
825 #ifdef ONIG_DEBUG
826 extern int
827 onig_print_names(FILE* fp, regex_t* reg)
828 {
829 int i, j;
830 NameEntry* e;
831 NameTable* t = (NameTable* )reg->name_table;
832
833 if (IS_NOT_NULL(t) && t->num > 0) {
834 fprintf(fp, "name table\n");
835 for (i = 0; i < t->num; i++) {
836 e = &(t->e[i]);
837 fprintf(fp, "%s: ", e->name);
838 if (e->back_num == 0) {
839 fputs("-", fp);
840 }
841 else if (e->back_num == 1) {
842 fprintf(fp, "%d", e->back_ref1);
843 }
844 else {
845 for (j = 0; j < e->back_num; j++) {
846 if (j > 0) fprintf(fp, ", ");
847 fprintf(fp, "%d", e->back_refs[j]);
848 }
849 }
850 fputs("\n", fp);
851 }
852 fputs("\n", fp);
853 }
854 return 0;
855 }
856 #endif
857
858 static int
859 names_clear(regex_t* reg)
860 {
861 int i;
862 NameEntry* e;
863 NameTable* t = (NameTable* )reg->name_table;
864
865 if (IS_NOT_NULL(t)) {
866 for (i = 0; i < t->num; i++) {
867 e = &(t->e[i]);
868 if (IS_NOT_NULL(e->name)) {
869 xfree(e->name);
870 e->name = NULL;
871 e->name_len = 0;
872 e->back_num = 0;
873 e->back_alloc = 0;
874 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
875 e->back_refs = (int* )NULL;
876 }
877 }
878 if (IS_NOT_NULL(t->e)) {
879 xfree(t->e);
880 t->e = NULL;
881 }
882 t->num = 0;
883 }
884 return 0;
885 }
886
887 extern int
888 onig_names_free(regex_t* reg)
889 {
890 int r;
891 NameTable* t;
892
893 r = names_clear(reg);
894 if (r != 0) return r;
895
896 t = (NameTable* )reg->name_table;
897 if (IS_NOT_NULL(t)) xfree(t);
898 reg->name_table = NULL;
899 return 0;
900 }
901
902 static NameEntry*
903 name_find(regex_t* reg, UChar* name, UChar* name_end)
904 {
905 int i, len;
906 NameEntry* e;
907 NameTable* t = (NameTable* )reg->name_table;
908
909 if (IS_NOT_NULL(t)) {
910 len = name_end - name;
911 for (i = 0; i < t->num; i++) {
912 e = &(t->e[i]);
913 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
914 return e;
915 }
916 }
917 return (NameEntry* )NULL;
918 }
919
920 extern int
921 onig_foreach_name(regex_t* reg,
922 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
923 {
924 int i, r;
925 NameEntry* e;
926 NameTable* t = (NameTable* )reg->name_table;
927
928 if (IS_NOT_NULL(t)) {
929 for (i = 0; i < t->num; i++) {
930 e = &(t->e[i]);
931 r = (*func)(e->name, e->name + e->name_len, e->back_num,
932 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
933 reg, arg);
934 if (r != 0) return r;
935 }
936 }
937 return 0;
938 }
939
940 extern int
941 onig_number_of_names(regex_t* reg)
942 {
943 NameTable* t = (NameTable* )reg->name_table;
944
945 if (IS_NOT_NULL(t))
946 return t->num;
947 else
948 return 0;
949 }
950
951 #endif /* else USE_ST_LIBRARY */
952
953 static int
954 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
955 {
956 int r;
957 int alloc;
958 NameEntry* e;
959 NameTable* t = (NameTable* )reg->name_table;
960
961 if (name_end - name <= 0)
962 return ONIGERR_EMPTY_GROUP_NAME;
963
964 e = name_find(reg, name, name_end);
965 if (IS_NULL(e)) {
966 #ifdef USE_ST_LIBRARY
967 if (IS_NULL(t)) {
968 t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
969 CHECK_NULL_RETURN_MEMERR(t);
970 reg->name_table = (void* )t;
971 }
972 e = (NameEntry* )xmalloc(sizeof(NameEntry));
973 CHECK_NULL_RETURN_MEMERR(e);
974
975 e->name = onigenc_strdup(reg->enc, name, name_end);
976 if (IS_NULL(e->name)) {
977 xfree(e); return ONIGERR_MEMORY;
978 }
979 r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
980 (HashDataType )e);
981 if (r < 0) return r;
982
983 e->name_len = (int )(name_end - name);
984 e->back_num = 0;
985 e->back_alloc = 0;
986 e->back_refs = (int* )NULL;
987
988 #else
989
990 if (IS_NULL(t)) {
991 alloc = INIT_NAMES_ALLOC_NUM;
992 t = (NameTable* )xmalloc(sizeof(NameTable));
993 CHECK_NULL_RETURN_MEMERR(t);
994 t->e = NULL;
995 t->alloc = 0;
996 t->num = 0;
997
998 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
999 if (IS_NULL(t->e)) {
1000 xfree(t);
1001 return ONIGERR_MEMORY;
1002 }
1003 t->alloc = alloc;
1004 reg->name_table = t;
1005 goto clear;
1006 }
1007 else if (t->num == t->alloc) {
1008 int i;
1009
1010 alloc = t->alloc * 2;
1011 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);
1012 CHECK_NULL_RETURN_MEMERR(t->e);
1013 t->alloc = alloc;
1014
1015 clear:
1016 for (i = t->num; i < t->alloc; i++) {
1017 t->e[i].name = NULL;
1018 t->e[i].name_len = 0;
1019 t->e[i].back_num = 0;
1020 t->e[i].back_alloc = 0;
1021 t->e[i].back_refs = (int* )NULL;
1022 }
1023 }
1024 e = &(t->e[t->num]);
1025 t->num++;
1026 e->name = onigenc_strdup(reg->enc, name, name_end);
1027 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1028 e->name_len = name_end - name;
1029 #endif
1030 }
1031
1032 if (e->back_num >= 1 &&
1033 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1034 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1035 name, name_end);
1036 return ONIGERR_MULTIPLEX_DEFINED_NAME;
1037 }
1038
1039 e->back_num++;
1040 if (e->back_num == 1) {
1041 e->back_ref1 = backref;
1042 }
1043 else {
1044 if (e->back_num == 2) {
1045 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1046 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1047 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1048 e->back_alloc = alloc;
1049 e->back_refs[0] = e->back_ref1;
1050 e->back_refs[1] = backref;
1051 }
1052 else {
1053 if (e->back_num > e->back_alloc) {
1054 alloc = e->back_alloc * 2;
1055 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);
1056 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1057 e->back_alloc = alloc;
1058 }
1059 e->back_refs[e->back_num - 1] = backref;
1060 }
1061 }
1062
1063 return 0;
1064 }
1065
1066 extern int
1067 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1068 const UChar* name_end, int** nums)
1069 {
1070 NameEntry* e = name_find(reg, name, name_end);
1071
1072 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1073
1074 switch (e->back_num) {
1075 case 0:
1076 break;
1077 case 1:
1078 *nums = &(e->back_ref1);
1079 break;
1080 default:
1081 *nums = e->back_refs;
1082 break;
1083 }
1084 return e->back_num;
1085 }
1086
1087 extern int
1088 onig_name_to_backref_number(regex_t* reg, const UChar* name,
1089 const UChar* name_end, OnigRegion *region)
1090 {
1091 int i, n, *nums;
1092
1093 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1094 if (n < 0)
1095 return n;
1096 else if (n == 0)
1097 return ONIGERR_PARSER_BUG;
1098 else if (n == 1)
1099 return nums[0];
1100 else {
1101 if (IS_NOT_NULL(region)) {
1102 for (i = n - 1; i >= 0; i--) {
1103 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1104 return nums[i];
1105 }
1106 }
1107 return nums[n - 1];
1108 }
1109 }
1110
1111 extern int
1112 onig_noname_group_capture_is_active(regex_t* reg)
1113 {
1114 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
1115 return 0;
1116
1117 if (onig_number_of_names(reg) > 0 &&
1118 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1119 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
1120 return 0;
1121 }
1122
1123 return 1;
1124 }
1125
1126 #ifdef USE_CALLOUT
1127
1128 typedef struct {
1129 OnigCalloutType type;
1130 int in;
1131 OnigCalloutFunc start_func;
1132 OnigCalloutFunc end_func;
1133 int arg_num;
1134 int opt_arg_num;
1135 unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1136 OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1137 UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */
1138 } CalloutNameListEntry;
1139
1140 typedef struct {
1141 int n;
1142 int alloc;
1143 CalloutNameListEntry* v;
1144 } CalloutNameListType;
1145
1146 static CalloutNameListType* GlobalCalloutNameList;
1147
1148 static int
1149 make_callout_func_list(CalloutNameListType** rs, int init_size)
1150 {
1151 CalloutNameListType* s;
1152 CalloutNameListEntry* v;
1153
1154 *rs = 0;
1155
1156 s = xmalloc(sizeof(*s));
1157 if (IS_NULL(s)) return ONIGERR_MEMORY;
1158
1159 v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1160 if (IS_NULL(v)) {
1161 xfree(s);
1162 return ONIGERR_MEMORY;
1163 }
1164
1165 s->n = 0;
1166 s->alloc = init_size;
1167 s->v = v;
1168
1169 *rs = s;
1170 return ONIG_NORMAL;
1171 }
1172
1173 static void
1174 free_callout_func_list(CalloutNameListType* s)
1175 {
1176 if (IS_NOT_NULL(s)) {
1177 if (IS_NOT_NULL(s->v)) {
1178 int i, j;
1179
1180 for (i = 0; i < s->n; i++) {
1181 CalloutNameListEntry* e = s->v + i;
1182 for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1183 if (e->arg_types[j] == ONIG_TYPE_STRING) {
1184 UChar* p = e->opt_defaults[j].s.start;
1185 if (IS_NOT_NULL(p)) xfree(p);
1186 }
1187 }
1188 }
1189 xfree(s->v);
1190 }
1191 xfree(s);
1192 }
1193 }
1194
1195 static int
1196 callout_func_list_add(CalloutNameListType* s, int* rid)
1197 {
1198 if (s->n >= s->alloc) {
1199 int new_size = s->alloc * 2;
1200 CalloutNameListEntry* nv = (CalloutNameListEntry* )
1201 xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);
1202 if (IS_NULL(nv)) return ONIGERR_MEMORY;
1203
1204 s->alloc = new_size;
1205 s->v = nv;
1206 }
1207
1208 *rid = s->n;
1209
1210 xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1211 s->n++;
1212 return ONIG_NORMAL;
1213 }
1214
1215
1216 typedef struct {
1217 UChar* name;
1218 int name_len; /* byte length */
1219 int id;
1220 } CalloutNameEntry;
1221
1222 #ifdef USE_ST_LIBRARY
1223 typedef st_table CalloutNameTable;
1224 #else
1225 typedef struct {
1226 CalloutNameEntry* e;
1227 int num;
1228 int alloc;
1229 } CalloutNameTable;
1230 #endif
1231
1232 static CalloutNameTable* GlobalCalloutNameTable;
1233 static int CalloutNameIDCounter;
1234
1235 #ifdef USE_ST_LIBRARY
1236
1237 static int
1238 i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1239 void* arg ARG_UNUSED)
1240 {
1241 xfree(e->name);
1242 /*xfree(key->s); */ /* is same as e->name */
1243 xfree(key);
1244 xfree(e);
1245 return ST_DELETE;
1246 }
1247
1248 static int
1249 callout_name_table_clear(CalloutNameTable* t)
1250 {
1251 if (IS_NOT_NULL(t)) {
1252 onig_st_foreach(t, i_free_callout_name_entry, 0);
1253 }
1254 return 0;
1255 }
1256
1257 static int
1258 global_callout_name_table_free(void)
1259 {
1260 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1261 int r = callout_name_table_clear(GlobalCalloutNameTable);
1262 if (r != 0) return r;
1263
1264 onig_st_free_table(GlobalCalloutNameTable);
1265 GlobalCalloutNameTable = 0;
1266 CalloutNameIDCounter = 0;
1267 }
1268
1269 return 0;
1270 }
1271
1272 static CalloutNameEntry*
1273 callout_name_find(OnigEncoding enc, int is_not_single,
1274 const UChar* name, const UChar* name_end)
1275 {
1276 int r;
1277 CalloutNameEntry* e;
1278 CalloutNameTable* t = GlobalCalloutNameTable;
1279
1280 e = (CalloutNameEntry* )NULL;
1281 if (IS_NOT_NULL(t)) {
1282 r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1283 (HashDataType* )((void* )(&e)));
1284 if (r == 0) { /* not found */
1285 if (enc != ONIG_ENCODING_ASCII &&
1286 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1287 enc = ONIG_ENCODING_ASCII;
1288 onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1289 (HashDataType* )((void* )(&e)));
1290 }
1291 }
1292 }
1293 return e;
1294 }
1295
1296 #else
1297
1298 static int
1299 callout_name_table_clear(CalloutNameTable* t)
1300 {
1301 int i;
1302 CalloutNameEntry* e;
1303
1304 if (IS_NOT_NULL(t)) {
1305 for (i = 0; i < t->num; i++) {
1306 e = &(t->e[i]);
1307 if (IS_NOT_NULL(e->name)) {
1308 xfree(e->name);
1309 e->name = NULL;
1310 e->name_len = 0;
1311 e->id = 0;
1312 e->func = 0;
1313 }
1314 }
1315 if (IS_NOT_NULL(t->e)) {
1316 xfree(t->e);
1317 t->e = NULL;
1318 }
1319 t->num = 0;
1320 }
1321 return 0;
1322 }
1323
1324 static int
1325 global_callout_name_table_free(void)
1326 {
1327 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1328 int r = callout_name_table_clear(GlobalCalloutNameTable);
1329 if (r != 0) return r;
1330
1331 xfree(GlobalCalloutNameTable);
1332 GlobalCalloutNameTable = 0;
1333 CalloutNameIDCounter = 0;
1334 }
1335 return 0;
1336 }
1337
1338 static CalloutNameEntry*
1339 callout_name_find(UChar* name, UChar* name_end)
1340 {
1341 int i, len;
1342 CalloutNameEntry* e;
1343 CalloutNameTable* t = Calloutnames;
1344
1345 if (IS_NOT_NULL(t)) {
1346 len = name_end - name;
1347 for (i = 0; i < t->num; i++) {
1348 e = &(t->e[i]);
1349 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1350 return e;
1351 }
1352 }
1353 return (CalloutNameEntry* )NULL;
1354 }
1355
1356 #endif
1357
1358 /* name string must be single byte char string. */
1359 static int
1360 callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1361 int is_not_single, UChar* name, UChar* name_end)
1362 {
1363 int r;
1364 CalloutNameEntry* e;
1365 CalloutNameTable* t = GlobalCalloutNameTable;
1366
1367 *rentry = 0;
1368 if (name_end - name <= 0)
1369 return ONIGERR_INVALID_CALLOUT_NAME;
1370
1371 e = callout_name_find(enc, is_not_single, name, name_end);
1372 if (IS_NULL(e)) {
1373 #ifdef USE_ST_LIBRARY
1374 if (IS_NULL(t)) {
1375 t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1376 CHECK_NULL_RETURN_MEMERR(t);
1377 GlobalCalloutNameTable = t;
1378 }
1379 e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1380 CHECK_NULL_RETURN_MEMERR(e);
1381
1382 e->name = onigenc_strdup(enc, name, name_end);
1383 if (IS_NULL(e->name)) {
1384 xfree(e); return ONIGERR_MEMORY;
1385 }
1386
1387 r = st_insert_callout_name_table(t, enc, is_not_single,
1388 e->name, (e->name + (name_end - name)),
1389 (HashDataType )e);
1390 if (r < 0) return r;
1391
1392 #else
1393
1394 int alloc;
1395
1396 if (IS_NULL(t)) {
1397 alloc = INIT_NAMES_ALLOC_NUM;
1398 t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1399 CHECK_NULL_RETURN_MEMERR(t);
1400 t->e = NULL;
1401 t->alloc = 0;
1402 t->num = 0;
1403
1404 t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1405 if (IS_NULL(t->e)) {
1406 xfree(t);
1407 return ONIGERR_MEMORY;
1408 }
1409 t->alloc = alloc;
1410 GlobalCalloutNameTable = t;
1411 goto clear;
1412 }
1413 else if (t->num == t->alloc) {
1414 int i;
1415
1416 alloc = t->alloc * 2;
1417 t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);
1418 CHECK_NULL_RETURN_MEMERR(t->e);
1419 t->alloc = alloc;
1420
1421 clear:
1422 for (i = t->num; i < t->alloc; i++) {
1423 t->e[i].name = NULL;
1424 t->e[i].name_len = 0;
1425 t->e[i].id = 0;
1426 }
1427 }
1428 e = &(t->e[t->num]);
1429 t->num++;
1430 e->name = onigenc_strdup(enc, name, name_end);
1431 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1432 #endif
1433
1434 CalloutNameIDCounter++;
1435 e->id = CalloutNameIDCounter;
1436 e->name_len = (int )(name_end - name);
1437 }
1438
1439 *rentry = e;
1440 return e->id;
1441 }
1442
1443 static int
1444 is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1445 {
1446 UChar* p;
1447 OnigCodePoint c;
1448
1449 if (name >= name_end) return 0;
1450
1451 p = name;
1452 while (p < name_end) {
1453 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1454 if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1455 return 0;
1456
1457 if (p == name) {
1458 if (c >= '0' && c <= '9') return 0;
1459 }
1460
1461 p += ONIGENC_MBC_ENC_LEN(enc, p);
1462 }
1463
1464 return 1;
1465 }
1466
1467 static int
1468 is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1469 {
1470 UChar* p;
1471 OnigCodePoint c;
1472
1473 if (name >= name_end) return 0;
1474
1475 p = name;
1476 while (p < name_end) {
1477 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1478 if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1479 return 0;
1480
1481 if (p == name) {
1482 if (c >= '0' && c <= '9') return 0;
1483 }
1484
1485 p += ONIGENC_MBC_ENC_LEN(enc, p);
1486 }
1487
1488 return 1;
1489 }
1490
1491 extern int
1492 onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1493 UChar* name, UChar* name_end, int in,
1494 OnigCalloutFunc start_func,
1495 OnigCalloutFunc end_func,
1496 int arg_num, unsigned int arg_types[],
1497 int opt_arg_num, OnigValue opt_defaults[])
1498 {
1499 int r;
1500 int i;
1501 int j;
1502 int id;
1503 int is_not_single;
1504 CalloutNameEntry* e;
1505 CalloutNameListEntry* fe;
1506
1507 if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1508 return ONIGERR_INVALID_ARGUMENT;
1509
1510 if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1511 return ONIGERR_INVALID_CALLOUT_ARG;
1512
1513 if (opt_arg_num < 0 || opt_arg_num > arg_num)
1514 return ONIGERR_INVALID_CALLOUT_ARG;
1515
1516 if (start_func == 0 && end_func == 0)
1517 return ONIGERR_INVALID_CALLOUT_ARG;
1518
1519 if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1520 return ONIGERR_INVALID_CALLOUT_ARG;
1521
1522 for (i = 0; i < arg_num; i++) {
1523 unsigned int t = arg_types[i];
1524 if (t == ONIG_TYPE_VOID)
1525 return ONIGERR_INVALID_CALLOUT_ARG;
1526 else {
1527 if (i >= arg_num - opt_arg_num) {
1528 if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1529 t != ONIG_TYPE_TAG)
1530 return ONIGERR_INVALID_CALLOUT_ARG;
1531 }
1532 else {
1533 if (t != ONIG_TYPE_LONG) {
1534 t = t & ~ONIG_TYPE_LONG;
1535 if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1536 return ONIGERR_INVALID_CALLOUT_ARG;
1537 }
1538 }
1539 }
1540 }
1541
1542 if (! is_allowed_callout_name(enc, name, name_end)) {
1543 return ONIGERR_INVALID_CALLOUT_NAME;
1544 }
1545
1546 is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1547 id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1548 if (id < 0) return id;
1549
1550 r = ONIG_NORMAL;
1551 if (IS_NULL(GlobalCalloutNameList)) {
1552 r = make_callout_func_list(&GlobalCalloutNameList, 10);
1553 if (r != ONIG_NORMAL) return r;
1554 }
1555
1556 while (id >= GlobalCalloutNameList->n) {
1557 int rid;
1558 r = callout_func_list_add(GlobalCalloutNameList, &rid);
1559 if (r != ONIG_NORMAL) return r;
1560 }
1561
1562 fe = GlobalCalloutNameList->v + id;
1563 fe->type = callout_type;
1564 fe->in = in;
1565 fe->start_func = start_func;
1566 fe->end_func = end_func;
1567 fe->arg_num = arg_num;
1568 fe->opt_arg_num = opt_arg_num;
1569 fe->name = e->name;
1570
1571 for (i = 0; i < arg_num; i++) {
1572 fe->arg_types[i] = arg_types[i];
1573 }
1574 for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1575 if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1576 OnigValue* val = opt_defaults + j;
1577 UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);
1578 CHECK_NULL_RETURN_MEMERR(ds);
1579
1580 fe->opt_defaults[i].s.start = ds;
1581 fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);
1582 }
1583 else {
1584 fe->opt_defaults[i] = opt_defaults[j];
1585 }
1586 }
1587
1588 r = id;
1589 return r;
1590 }
1591
1592 static int
1593 get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1594 UChar* name, UChar* name_end, int* rid)
1595 {
1596 int r;
1597 CalloutNameEntry* e;
1598
1599 if (! is_allowed_callout_name(enc, name, name_end)) {
1600 return ONIGERR_INVALID_CALLOUT_NAME;
1601 }
1602
1603 e = callout_name_find(enc, is_not_single, name, name_end);
1604 if (IS_NULL(e)) {
1605 return ONIGERR_UNDEFINED_CALLOUT_NAME;
1606 }
1607
1608 r = ONIG_NORMAL;
1609 *rid = e->id;
1610
1611 return r;
1612 }
1613
1614 extern OnigCalloutFunc
1615 onig_get_callout_start_func(regex_t* reg, int callout_num)
1616 {
1617 /* If used for callouts of contents, return 0. */
1618 CalloutListEntry* e;
1619
1620 e = onig_reg_callout_list_at(reg, callout_num);
1621 CHECK_NULL_RETURN(e);
1622 return e->start_func;
1623 }
1624
1625 extern const UChar*
1626 onig_get_callout_tag_start(regex_t* reg, int callout_num)
1627 {
1628 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1629 CHECK_NULL_RETURN(e);
1630 return e->tag_start;
1631 }
1632
1633 extern const UChar*
1634 onig_get_callout_tag_end(regex_t* reg, int callout_num)
1635 {
1636 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1637 CHECK_NULL_RETURN(e);
1638 return e->tag_end;
1639 }
1640
1641
1642 extern OnigCalloutType
1643 onig_get_callout_type_by_name_id(int name_id)
1644 {
1645 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1646 return 0;
1647
1648 return GlobalCalloutNameList->v[name_id].type;
1649 }
1650
1651 extern OnigCalloutFunc
1652 onig_get_callout_start_func_by_name_id(int name_id)
1653 {
1654 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1655 return 0;
1656
1657 return GlobalCalloutNameList->v[name_id].start_func;
1658 }
1659
1660 extern OnigCalloutFunc
1661 onig_get_callout_end_func_by_name_id(int name_id)
1662 {
1663 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1664 return 0;
1665
1666 return GlobalCalloutNameList->v[name_id].end_func;
1667 }
1668
1669 extern int
1670 onig_get_callout_in_by_name_id(int name_id)
1671 {
1672 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1673 return 0;
1674
1675 return GlobalCalloutNameList->v[name_id].in;
1676 }
1677
1678 static int
1679 get_callout_arg_num_by_name_id(int name_id)
1680 {
1681 return GlobalCalloutNameList->v[name_id].arg_num;
1682 }
1683
1684 static int
1685 get_callout_opt_arg_num_by_name_id(int name_id)
1686 {
1687 return GlobalCalloutNameList->v[name_id].opt_arg_num;
1688 }
1689
1690 static unsigned int
1691 get_callout_arg_type_by_name_id(int name_id, int index)
1692 {
1693 return GlobalCalloutNameList->v[name_id].arg_types[index];
1694 }
1695
1696 static OnigValue
1697 get_callout_opt_default_by_name_id(int name_id, int index)
1698 {
1699 return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1700 }
1701
1702 extern UChar*
1703 onig_get_callout_name_by_name_id(int name_id)
1704 {
1705 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1706 return 0;
1707
1708 return GlobalCalloutNameList->v[name_id].name;
1709 }
1710
1711 extern int
1712 onig_global_callout_names_free(void)
1713 {
1714 free_callout_func_list(GlobalCalloutNameList);
1715 GlobalCalloutNameList = 0;
1716
1717 global_callout_name_table_free();
1718 return ONIG_NORMAL;
1719 }
1720
1721
1722 typedef st_table CalloutTagTable;
1723 typedef intptr_t CalloutTagVal;
1724
1725 #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)
1726
1727 static int
1728 i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1729 {
1730 int num;
1731 RegexExt* ext = (RegexExt* )arg;
1732
1733 num = (int )e - 1;
1734 ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1735 return ST_CONTINUE;
1736 }
1737
1738 static int
1739 setup_ext_callout_list_values(regex_t* reg)
1740 {
1741 int i, j;
1742 RegexExt* ext;
1743
1744 ext = REG_EXTP(reg);
1745 if (IS_NOT_NULL(ext->tag_table)) {
1746 onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1747 (st_data_t )ext);
1748 }
1749
1750 for (i = 0; i < ext->callout_num; i++) {
1751 CalloutListEntry* e = ext->callout_list + i;
1752 if (e->of == ONIG_CALLOUT_OF_NAME) {
1753 for (j = 0; j < e->u.arg.num; j++) {
1754 if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1755 UChar* start;
1756 UChar* end;
1757 int num;
1758 start = e->u.arg.vals[j].s.start;
1759 end = e->u.arg.vals[j].s.end;
1760 num = onig_get_callout_num_by_tag(reg, start, end);
1761 if (num < 0) return num;
1762 e->u.arg.vals[j].tag = num;
1763 }
1764 }
1765 }
1766 }
1767
1768 return ONIG_NORMAL;
1769 }
1770
1771 extern int
1772 onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1773 {
1774 RegexExt* ext = REG_EXTP(reg);
1775
1776 if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1777 if (callout_num > ext->callout_num) return 0;
1778
1779 return (ext->callout_list[callout_num].flag &
1780 CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;
1781 }
1782
1783 static int
1784 i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1785 {
1786 xfree(key);
1787 return ST_DELETE;
1788 }
1789
1790 static int
1791 callout_tag_table_clear(CalloutTagTable* t)
1792 {
1793 if (IS_NOT_NULL(t)) {
1794 onig_st_foreach(t, i_free_callout_tag_entry, 0);
1795 }
1796 return 0;
1797 }
1798
1799 extern int
1800 onig_callout_tag_table_free(void* table)
1801 {
1802 CalloutTagTable* t = (CalloutTagTable* )table;
1803
1804 if (IS_NOT_NULL(t)) {
1805 int r = callout_tag_table_clear(t);
1806 if (r != 0) return r;
1807
1808 onig_st_free_table(t);
1809 }
1810
1811 return 0;
1812 }
1813
1814 extern int
1815 onig_get_callout_num_by_tag(regex_t* reg,
1816 const UChar* tag, const UChar* tag_end)
1817 {
1818 int r;
1819 RegexExt* ext;
1820 CalloutTagVal e;
1821
1822 ext = REG_EXTP(reg);
1823 if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1824 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1825
1826 r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1827 (HashDataType* )((void* )(&e)));
1828 if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1829 return (int )e;
1830 }
1831
1832 static CalloutTagVal
1833 callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1834 {
1835 CalloutTagVal e;
1836
1837 e = -1;
1838 if (IS_NOT_NULL(t)) {
1839 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1840 }
1841 return e;
1842 }
1843
1844 static int
1845 callout_tag_table_new(CalloutTagTable** rt)
1846 {
1847 CalloutTagTable* t;
1848
1849 *rt = 0;
1850 t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1851 CHECK_NULL_RETURN_MEMERR(t);
1852
1853 *rt = t;
1854 return ONIG_NORMAL;
1855 }
1856
1857 static int
1858 callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
1859 CalloutTagVal entry_val)
1860 {
1861 int r;
1862 CalloutTagVal val;
1863
1864 if (name_end - name <= 0)
1865 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1866
1867 val = callout_tag_find(t, name, name_end);
1868 if (val >= 0)
1869 return ONIGERR_MULTIPLEX_DEFINED_NAME;
1870
1871 r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
1872 if (r < 0) return r;
1873
1874 return ONIG_NORMAL;
1875 }
1876
1877 static int
1878 ext_ensure_tag_table(regex_t* reg)
1879 {
1880 int r;
1881 RegexExt* ext;
1882 CalloutTagTable* t;
1883
1884 ext = onig_get_regex_ext(reg);
1885 CHECK_NULL_RETURN_MEMERR(ext);
1886
1887 if (IS_NULL(ext->tag_table)) {
1888 r = callout_tag_table_new(&t);
1889 if (r != ONIG_NORMAL) return r;
1890
1891 ext->tag_table = t;
1892 }
1893
1894 return ONIG_NORMAL;
1895 }
1896
1897 static int
1898 callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
1899 CalloutTagVal entry_val)
1900 {
1901 int r;
1902 RegexExt* ext;
1903 CalloutListEntry* e;
1904
1905 r = ext_ensure_tag_table(reg);
1906 if (r != ONIG_NORMAL) return r;
1907
1908 ext = onig_get_regex_ext(reg);
1909 CHECK_NULL_RETURN_MEMERR(ext);
1910 r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);
1911
1912 e = onig_reg_callout_list_at(reg, (int )entry_val);
1913 CHECK_NULL_RETURN_MEMERR(e);
1914 e->tag_start = name;
1915 e->tag_end = name_end;
1916
1917 return r;
1918 }
1919
1920 #endif /* USE_CALLOUT */
1921
1922
1923 #define INIT_SCANENV_MEMENV_ALLOC_SIZE 16
1924
1925 static void
1926 scan_env_clear(ScanEnv* env)
1927 {
1928 MEM_STATUS_CLEAR(env->capture_history);
1929 MEM_STATUS_CLEAR(env->bt_mem_start);
1930 MEM_STATUS_CLEAR(env->bt_mem_end);
1931 MEM_STATUS_CLEAR(env->backrefed_mem);
1932 env->error = (UChar* )NULL;
1933 env->error_end = (UChar* )NULL;
1934 env->num_call = 0;
1935
1936 #ifdef USE_CALL
1937 env->unset_addr_list = NULL;
1938 env->has_call_zero = 0;
1939 #endif
1940
1941 env->num_mem = 0;
1942 env->num_named = 0;
1943 env->mem_alloc = 0;
1944 env->mem_env_dynamic = (MemEnv* )NULL;
1945
1946 xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
1947
1948 env->parse_depth = 0;
1949 env->keep_num = 0;
1950 env->save_num = 0;
1951 env->save_alloc_num = 0;
1952 env->saves = 0;
1953 }
1954
1955 static int
1956 scan_env_add_mem_entry(ScanEnv* env)
1957 {
1958 int i, need, alloc;
1959 MemEnv* p;
1960
1961 need = env->num_mem + 1;
1962 if (need > MaxCaptureNum && MaxCaptureNum != 0)
1963 return ONIGERR_TOO_MANY_CAPTURES;
1964
1965 if (need >= SCANENV_MEMENV_SIZE) {
1966 if (env->mem_alloc <= need) {
1967 if (IS_NULL(env->mem_env_dynamic)) {
1968 alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;
1969 p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
1970 CHECK_NULL_RETURN_MEMERR(p);
1971 xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
1972 }
1973 else {
1974 alloc = env->mem_alloc * 2;
1975 p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);
1976 CHECK_NULL_RETURN_MEMERR(p);
1977 }
1978
1979 for (i = env->num_mem + 1; i < alloc; i++) {
1980 p[i].node = NULL_NODE;
1981 #if 0
1982 p[i].in = 0;
1983 p[i].recursion = 0;
1984 #endif
1985 }
1986
1987 env->mem_env_dynamic = p;
1988 env->mem_alloc = alloc;
1989 }
1990 }
1991
1992 env->num_mem++;
1993 return env->num_mem;
1994 }
1995
1996 static int
1997 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
1998 {
1999 if (env->num_mem >= num)
2000 SCANENV_MEMENV(env)[num].node = node;
2001 else
2002 return ONIGERR_PARSER_BUG;
2003 return 0;
2004 }
2005
2006 extern void
2007 onig_node_free(Node* node)
2008 {
2009 start:
2010 if (IS_NULL(node)) return ;
2011
2012 #ifdef DEBUG_NODE_FREE
2013 fprintf(stderr, "onig_node_free: %p\n", node);
2014 #endif
2015
2016 switch (NODE_TYPE(node)) {
2017 case NODE_STRING:
2018 if (STR_(node)->capa != 0 &&
2019 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2020 xfree(STR_(node)->s);
2021 }
2022 break;
2023
2024 case NODE_LIST:
2025 case NODE_ALT:
2026 onig_node_free(NODE_CAR(node));
2027 {
2028 Node* next_node = NODE_CDR(node);
2029
2030 xfree(node);
2031 node = next_node;
2032 goto start;
2033 }
2034 break;
2035
2036 case NODE_CCLASS:
2037 {
2038 CClassNode* cc = CCLASS_(node);
2039
2040 if (cc->mbuf)
2041 bbuf_free(cc->mbuf);
2042 }
2043 break;
2044
2045 case NODE_BACKREF:
2046 if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2047 xfree(BACKREF_(node)->back_dynamic);
2048 break;
2049
2050 case NODE_ENCLOSURE:
2051 if (NODE_BODY(node))
2052 onig_node_free(NODE_BODY(node));
2053
2054 {
2055 EnclosureNode* en = ENCLOSURE_(node);
2056 if (en->type == ENCLOSURE_IF_ELSE) {
2057 onig_node_free(en->te.Then);
2058 onig_node_free(en->te.Else);
2059 }
2060 }
2061 break;
2062
2063 case NODE_QUANT:
2064 case NODE_ANCHOR:
2065 if (NODE_BODY(node))
2066 onig_node_free(NODE_BODY(node));
2067 break;
2068
2069 case NODE_CTYPE:
2070 case NODE_CALL:
2071 case NODE_GIMMICK:
2072 break;
2073 }
2074
2075 xfree(node);
2076 }
2077
2078 static void
2079 cons_node_free_alone(Node* node)
2080 {
2081 NODE_CAR(node) = 0;
2082 NODE_CDR(node) = 0;
2083 onig_node_free(node);
2084 }
2085
2086 static Node*
2087 node_new(void)
2088 {
2089 Node* node;
2090
2091 node = (Node* )xmalloc(sizeof(Node));
2092 CHECK_NULL_RETURN(node);
2093 xmemset(node, 0, sizeof(*node));
2094
2095 #ifdef DEBUG_NODE_FREE
2096 fprintf(stderr, "node_new: %p\n", node);
2097 #endif
2098 return node;
2099 }
2100
2101
2102 static void
2103 initialize_cclass(CClassNode* cc)
2104 {
2105 BITSET_CLEAR(cc->bs);
2106 cc->flags = 0;
2107 cc->mbuf = NULL;
2108 }
2109
2110 static Node*
2111 node_new_cclass(void)
2112 {
2113 Node* node = node_new();
2114 CHECK_NULL_RETURN(node);
2115
2116 NODE_SET_TYPE(node, NODE_CCLASS);
2117 initialize_cclass(CCLASS_(node));
2118 return node;
2119 }
2120
2121 static Node*
2122 node_new_ctype(int type, int not, OnigOptionType options)
2123 {
2124 Node* node = node_new();
2125 CHECK_NULL_RETURN(node);
2126
2127 NODE_SET_TYPE(node, NODE_CTYPE);
2128 CTYPE_(node)->ctype = type;
2129 CTYPE_(node)->not = not;
2130 CTYPE_(node)->options = options;
2131 CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);
2132 return node;
2133 }
2134
2135 static Node*
2136 node_new_anychar(void)
2137 {
2138 Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
2139 return node;
2140 }
2141
2142 static Node*
2143 node_new_anychar_with_fixed_option(OnigOptionType option)
2144 {
2145 CtypeNode* ct;
2146 Node* node;
2147
2148 node = node_new_anychar();
2149 CHECK_NULL_RETURN(node);
2150
2151 ct = CTYPE_(node);
2152 ct->options = option;
2153 NODE_STATUS_ADD(node, FIXED_OPTION);
2154 return node;
2155 }
2156
2157 static int
2158 node_new_no_newline(Node** node, ScanEnv* env)
2159 {
2160 Node* n;
2161
2162 n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);
2163 CHECK_NULL_RETURN_MEMERR(n);
2164 *node = n;
2165 return 0;
2166 }
2167
2168 static int
2169 node_new_true_anychar(Node** node, ScanEnv* env)
2170 {
2171 Node* n;
2172
2173 n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);
2174 CHECK_NULL_RETURN_MEMERR(n);
2175 *node = n;
2176 return 0;
2177 }
2178
2179 static Node*
2180 node_new_list(Node* left, Node* right)
2181 {
2182 Node* node = node_new();
2183 CHECK_NULL_RETURN(node);
2184
2185 NODE_SET_TYPE(node, NODE_LIST);
2186 NODE_CAR(node) = left;
2187 NODE_CDR(node) = right;
2188 return node;
2189 }
2190
2191 extern Node*
2192 onig_node_new_list(Node* left, Node* right)
2193 {
2194 return node_new_list(left, right);
2195 }
2196
2197 extern Node*
2198 onig_node_list_add(Node* list, Node* x)
2199 {
2200 Node *n;
2201
2202 n = onig_node_new_list(x, NULL);
2203 if (IS_NULL(n)) return NULL_NODE;
2204
2205 if (IS_NOT_NULL(list)) {
2206 while (IS_NOT_NULL(NODE_CDR(list)))
2207 list = NODE_CDR(list);
2208
2209 NODE_CDR(list) = n;
2210 }
2211
2212 return n;
2213 }
2214
2215 extern Node*
2216 onig_node_new_alt(Node* left, Node* right)
2217 {
2218 Node* node = node_new();
2219 CHECK_NULL_RETURN(node);
2220
2221 NODE_SET_TYPE(node, NODE_ALT);
2222 NODE_CAR(node) = left;
2223 NODE_CDR(node) = right;
2224 return node;
2225 }
2226
2227 static Node*
2228 make_list_or_alt(NodeType type, int n, Node* ns[])
2229 {
2230 Node* r;
2231
2232 if (n <= 0) return NULL_NODE;
2233
2234 if (n == 1) {
2235 r = node_new();
2236 CHECK_NULL_RETURN(r);
2237 NODE_SET_TYPE(r, type);
2238 NODE_CAR(r) = ns[0];
2239 NODE_CDR(r) = NULL_NODE;
2240 }
2241 else {
2242 Node* right;
2243
2244 r = node_new();
2245 CHECK_NULL_RETURN(r);
2246
2247 right = make_list_or_alt(type, n - 1, ns + 1);
2248 if (IS_NULL(right)) {
2249 onig_node_free(r);
2250 return NULL_NODE;
2251 }
2252
2253 NODE_SET_TYPE(r, type);
2254 NODE_CAR(r) = ns[0];
2255 NODE_CDR(r) = right;
2256 }
2257
2258 return r;
2259 }
2260
2261 static Node*
2262 make_list(int n, Node* ns[])
2263 {
2264 return make_list_or_alt(NODE_LIST, n, ns);
2265 }
2266
2267 static Node*
2268 make_alt(int n, Node* ns[])
2269 {
2270 return make_list_or_alt(NODE_ALT, n, ns);
2271 }
2272
2273 extern Node*
2274 onig_node_new_anchor(int type, int ascii_mode)
2275 {
2276 Node* node = node_new();
2277 CHECK_NULL_RETURN(node);
2278
2279 NODE_SET_TYPE(node, NODE_ANCHOR);
2280 ANCHOR_(node)->type = type;
2281 ANCHOR_(node)->char_len = -1;
2282 ANCHOR_(node)->ascii_mode = ascii_mode;
2283 return node;
2284 }
2285
2286 static Node*
2287 node_new_backref(int back_num, int* backrefs, int by_name,
2288 #ifdef USE_BACKREF_WITH_LEVEL
2289 int exist_level, int nest_level,
2290 #endif
2291 ScanEnv* env)
2292 {
2293 int i;
2294 Node* node = node_new();
2295
2296 CHECK_NULL_RETURN(node);
2297
2298 NODE_SET_TYPE(node, NODE_BACKREF);
2299 BACKREF_(node)->back_num = back_num;
2300 BACKREF_(node)->back_dynamic = (int* )NULL;
2301 if (by_name != 0)
2302 NODE_STATUS_ADD(node, BY_NAME);
2303
2304 #ifdef USE_BACKREF_WITH_LEVEL
2305 if (exist_level != 0) {
2306 NODE_STATUS_ADD(node, NEST_LEVEL);
2307 BACKREF_(node)->nest_level = nest_level;
2308 }
2309 #endif
2310
2311 for (i = 0; i < back_num; i++) {
2312 if (backrefs[i] <= env->num_mem &&
2313 IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {
2314 NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */
2315 break;
2316 }
2317 }
2318
2319 if (back_num <= NODE_BACKREFS_SIZE) {
2320 for (i = 0; i < back_num; i++)
2321 BACKREF_(node)->back_static[i] = backrefs[i];
2322 }
2323 else {
2324 int* p = (int* )xmalloc(sizeof(int) * back_num);
2325 if (IS_NULL(p)) {
2326 onig_node_free(node);
2327 return NULL;
2328 }
2329 BACKREF_(node)->back_dynamic = p;
2330 for (i = 0; i < back_num; i++)
2331 p[i] = backrefs[i];
2332 }
2333 return node;
2334 }
2335
2336 static Node*
2337 node_new_backref_checker(int back_num, int* backrefs, int by_name,
2338 #ifdef USE_BACKREF_WITH_LEVEL
2339 int exist_level, int nest_level,
2340 #endif
2341 ScanEnv* env)
2342 {
2343 Node* node;
2344
2345 node = node_new_backref(back_num, backrefs, by_name,
2346 #ifdef USE_BACKREF_WITH_LEVEL
2347 exist_level, nest_level,
2348 #endif
2349 env);
2350 CHECK_NULL_RETURN(node);
2351
2352 NODE_STATUS_ADD(node, CHECKER);
2353 return node;
2354 }
2355
2356 #ifdef USE_CALL
2357 static Node*
2358 node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2359 {
2360 Node* node = node_new();
2361 CHECK_NULL_RETURN(node);
2362
2363 NODE_SET_TYPE(node, NODE_CALL);
2364 CALL_(node)->by_number = by_number;
2365 CALL_(node)->name = name;
2366 CALL_(node)->name_end = name_end;
2367 CALL_(node)->group_num = gnum;
2368 CALL_(node)->entry_count = 1;
2369 return node;
2370 }
2371 #endif
2372
2373 static Node*
2374 node_new_quantifier(int lower, int upper, int by_number)
2375 {
2376 Node* node = node_new();
2377 CHECK_NULL_RETURN(node);
2378
2379 NODE_SET_TYPE(node, NODE_QUANT);
2380 QUANT_(node)->lower = lower;
2381 QUANT_(node)->upper = upper;
2382 QUANT_(node)->greedy = 1;
2383 QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;
2384 QUANT_(node)->head_exact = NULL_NODE;
2385 QUANT_(node)->next_head_exact = NULL_NODE;
2386 QUANT_(node)->is_refered = 0;
2387 if (by_number != 0)
2388 NODE_STATUS_ADD(node, BY_NUMBER);
2389
2390 return node;
2391 }
2392
2393 static Node*
2394 node_new_enclosure(enum EnclosureType type)
2395 {
2396 Node* node = node_new();
2397 CHECK_NULL_RETURN(node);
2398
2399 NODE_SET_TYPE(node, NODE_ENCLOSURE);
2400 ENCLOSURE_(node)->type = type;
2401
2402 switch (type) {
2403 case ENCLOSURE_MEMORY:
2404 ENCLOSURE_(node)->m.regnum = 0;
2405 ENCLOSURE_(node)->m.called_addr = -1;
2406 ENCLOSURE_(node)->m.entry_count = 1;
2407 ENCLOSURE_(node)->m.called_state = 0;
2408 break;
2409
2410 case ENCLOSURE_OPTION:
2411 ENCLOSURE_(node)->o.options = 0;
2412 break;
2413
2414 case ENCLOSURE_STOP_BACKTRACK:
2415 break;
2416
2417 case ENCLOSURE_IF_ELSE:
2418 ENCLOSURE_(node)->te.Then = 0;
2419 ENCLOSURE_(node)->te.Else = 0;
2420 break;
2421 }
2422
2423 ENCLOSURE_(node)->opt_count = 0;
2424 return node;
2425 }
2426
2427 extern Node*
2428 onig_node_new_enclosure(int type)
2429 {
2430 return node_new_enclosure(type);
2431 }
2432
2433 static Node*
2434 node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)
2435 {
2436 Node* n;
2437 n = node_new_enclosure(ENCLOSURE_IF_ELSE);
2438 CHECK_NULL_RETURN(n);
2439
2440 NODE_BODY(n) = cond;
2441 ENCLOSURE_(n)->te.Then = Then;
2442 ENCLOSURE_(n)->te.Else = Else;
2443 return n;
2444 }
2445
2446 static Node*
2447 node_new_memory(int is_named)
2448 {
2449 Node* node = node_new_enclosure(ENCLOSURE_MEMORY);
2450 CHECK_NULL_RETURN(node);
2451 if (is_named != 0)
2452 NODE_STATUS_ADD(node, NAMED_GROUP);
2453
2454 return node;
2455 }
2456
2457 static Node*
2458 node_new_option(OnigOptionType option)
2459 {
2460 Node* node = node_new_enclosure(ENCLOSURE_OPTION);
2461 CHECK_NULL_RETURN(node);
2462 ENCLOSURE_(node)->o.options = option;
2463 return node;
2464 }
2465
2466 static int
2467 node_new_fail(Node** node, ScanEnv* env)
2468 {
2469 *node = node_new();
2470 CHECK_NULL_RETURN_MEMERR(*node);
2471
2472 NODE_SET_TYPE(*node, NODE_GIMMICK);
2473 GIMMICK_(*node)->type = GIMMICK_FAIL;
2474 return ONIG_NORMAL;
2475 }
2476
2477 static int
2478 node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)
2479 {
2480 int id;
2481 int r;
2482
2483 r = save_entry(env, save_type, &id);
2484 if (r != ONIG_NORMAL) return r;
2485
2486 *node = node_new();
2487 CHECK_NULL_RETURN_MEMERR(*node);
2488
2489 NODE_SET_TYPE(*node, NODE_GIMMICK);
2490 GIMMICK_(*node)->id = id;
2491 GIMMICK_(*node)->type = GIMMICK_SAVE;
2492 GIMMICK_(*node)->detail_type = (int )save_type;
2493
2494 return ONIG_NORMAL;
2495 }
2496
2497 static int
2498 node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2499 int id, ScanEnv* env)
2500 {
2501 *node = node_new();
2502 CHECK_NULL_RETURN_MEMERR(*node);
2503
2504 NODE_SET_TYPE(*node, NODE_GIMMICK);
2505 GIMMICK_(*node)->id = id;
2506 GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2507 GIMMICK_(*node)->detail_type = (int )update_var_type;
2508
2509 return ONIG_NORMAL;
2510 }
2511
2512 static int
2513 node_new_keep(Node** node, ScanEnv* env)
2514 {
2515 int r;
2516
2517 r = node_new_save_gimmick(node, SAVE_KEEP, env);
2518 if (r != 0) return r;
2519
2520 env->keep_num++;
2521 return ONIG_NORMAL;
2522 }
2523
2524 #ifdef USE_CALLOUT
2525
2526 extern void
2527 onig_free_reg_callout_list(int n, CalloutListEntry* list)
2528 {
2529 int i;
2530 int j;
2531
2532 if (IS_NULL(list)) return ;
2533
2534 for (i = 0; i < n; i++) {
2535 if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2536 for (j = 0; j < list[i].u.arg.passed_num; j++) {
2537 if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2538 if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2539 xfree(list[i].u.arg.vals[j].s.start);
2540 }
2541 }
2542 }
2543 else { /* ONIG_CALLOUT_OF_CONTENTS */
2544 if (IS_NOT_NULL(list[i].u.content.start)) {
2545 xfree((void* )list[i].u.content.start);
2546 }
2547 }
2548 }
2549
2550 xfree(list);
2551 }
2552
2553 extern CalloutListEntry*
2554 onig_reg_callout_list_at(regex_t* reg, int num)
2555 {
2556 RegexExt* ext = REG_EXTP(reg);
2557 CHECK_NULL_RETURN(ext);
2558
2559 if (num <= 0 || num > ext->callout_num)
2560 return 0;
2561
2562 num--;
2563 return ext->callout_list + num;
2564 }
2565
2566 static int
2567 reg_callout_list_entry(ScanEnv* env, int* rnum)
2568 {
2569 #define INIT_CALLOUT_LIST_NUM 3
2570
2571 int num;
2572 CalloutListEntry* list;
2573 CalloutListEntry* e;
2574 RegexExt* ext;
2575
2576 ext = onig_get_regex_ext(env->reg);
2577 CHECK_NULL_RETURN_MEMERR(ext);
2578
2579 if (IS_NULL(ext->callout_list)) {
2580 list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2581 CHECK_NULL_RETURN_MEMERR(list);
2582
2583 ext->callout_list = list;
2584 ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2585 ext->callout_num = 0;
2586 }
2587
2588 num = ext->callout_num + 1;
2589 if (num > ext->callout_list_alloc) {
2590 int alloc = ext->callout_list_alloc * 2;
2591 list = (CalloutListEntry* )xrealloc(ext->callout_list,
2592 sizeof(CalloutListEntry) * alloc,
2593 sizeof(CalloutListEntry) * ext->callout_list_alloc);
2594 CHECK_NULL_RETURN_MEMERR(list);
2595
2596 ext->callout_list = list;
2597 ext->callout_list_alloc = alloc;
2598 }
2599
2600 e = ext->callout_list + (num - 1);
2601
2602 e->flag = 0;
2603 e->of = 0;
2604 e->in = ONIG_CALLOUT_OF_CONTENTS;
2605 e->type = 0;
2606 e->tag_start = 0;
2607 e->tag_end = 0;
2608 e->start_func = 0;
2609 e->end_func = 0;
2610 e->u.arg.num = 0;
2611 e->u.arg.passed_num = 0;
2612
2613 ext->callout_num = num;
2614 *rnum = num;
2615 return ONIG_NORMAL;
2616 }
2617
2618 static int
2619 node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2620 ScanEnv* env)
2621 {
2622 *node = node_new();
2623 CHECK_NULL_RETURN_MEMERR(*node);
2624
2625 NODE_SET_TYPE(*node, NODE_GIMMICK);
2626 GIMMICK_(*node)->id = id;
2627 GIMMICK_(*node)->num = num;
2628 GIMMICK_(*node)->type = GIMMICK_CALLOUT;
2629 GIMMICK_(*node)->detail_type = (int )callout_of;
2630
2631 return ONIG_NORMAL;
2632 }
2633 #endif
2634
2635 static int
2636 make_extended_grapheme_cluster(Node** node, ScanEnv* env)
2637 {
2638 int r;
2639 int i;
2640 Node* x;
2641 Node* ns[2];
2642
2643 /* \X == (?>\O(?:\Y\O)*) */
2644
2645 ns[1] = NULL_NODE;
2646
2647 r = ONIGERR_MEMORY;
2648 ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);
2649 if (IS_NULL(ns[0])) goto err;
2650
2651 r = node_new_true_anychar(&ns[1], env);
2652 if (r != 0) goto err1;
2653
2654 x = make_list(2, ns);
2655 if (IS_NULL(x)) goto err;
2656 ns[0] = x;
2657 ns[1] = NULL_NODE;
2658
2659 x = node_new_quantifier(0, REPEAT_INFINITE, 1);
2660 if (IS_NULL(x)) goto err;
2661
2662 NODE_BODY(x) = ns[0];
2663 ns[0] = NULL_NODE;
2664 ns[1] = x;
2665
2666 r = node_new_true_anychar(&ns[0], env);
2667 if (r != 0) goto err1;
2668
2669 x = make_list(2, ns);
2670 if (IS_NULL(x)) goto err;
2671
2672 ns[0] = x;
2673 ns[1] = NULL_NODE;
2674
2675 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
2676 if (IS_NULL(x)) goto err;
2677
2678 NODE_BODY(x) = ns[0];
2679
2680 *node = x;
2681 return ONIG_NORMAL;
2682
2683 err:
2684 r = ONIGERR_MEMORY;
2685 err1:
2686 for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2687 return r;
2688 }
2689
2690 static int
2691 make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2692 Node* step_one, int lower, int upper, int possessive,
2693 int is_range_cutter, ScanEnv* env)
2694 {
2695 int r;
2696 int i;
2697 int id;
2698 Node* x;
2699 Node* ns[4];
2700
2701 for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2702
2703 ns[1] = absent;
2704 ns[3] = step_one; /* for err */
2705 r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2706 if (r != 0) goto err;
2707
2708 id = GIMMICK_(ns[0])->id;
2709 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2710 id, env);
2711 if (r != 0) goto err;
2712
2713 r = node_new_fail(&ns[3], env);
2714 if (r != 0) goto err;
2715
2716 x = make_list(4, ns);
2717 if (IS_NULL(x)) goto err0;
2718
2719 ns[0] = x;
2720 ns[1] = step_one;
2721 ns[2] = ns[3] = NULL_NODE;
2722
2723 x = make_alt(2, ns);
2724 if (IS_NULL(x)) goto err0;
2725
2726 ns[0] = x;
2727
2728 x = node_new_quantifier(lower, upper, 0);
2729 if (IS_NULL(x)) goto err0;
2730
2731 NODE_BODY(x) = ns[0];
2732 ns[0] = x;
2733
2734 if (possessive != 0) {
2735 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
2736 if (IS_NULL(x)) goto err0;
2737
2738 NODE_BODY(x) = ns[0];
2739 ns[0] = x;
2740 }
2741
2742 r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2743 pre_save_right_id, env);
2744 if (r != 0) goto err;
2745
2746 r = node_new_fail(&ns[2], env);
2747 if (r != 0) goto err;
2748
2749 x = make_list(2, ns + 1);
2750 if (IS_NULL(x)) goto err0;
2751
2752 ns[1] = x; ns[2] = NULL_NODE;
2753
2754 x = make_alt(2, ns);
2755 if (IS_NULL(x)) goto err0;
2756
2757 if (is_range_cutter != 0)
2758 NODE_STATUS_ADD(x, SUPER);
2759
2760 *node = x;
2761 return ONIG_NORMAL;
2762
2763 err0:
2764 r = ONIGERR_MEMORY;
2765 err:
2766 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
2767 return r;
2768 }
2769
2770 static int
2771 make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
2772 ScanEnv* env)
2773 {
2774 int r;
2775 int id;
2776 Node* save;
2777 Node* x;
2778 Node* ns[2];
2779
2780 *node1 = *node2 = NULL_NODE;
2781 save = ns[0] = ns[1] = NULL_NODE;
2782
2783 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
2784 if (r != 0) goto err;
2785
2786 id = GIMMICK_(save)->id;
2787 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2788 id, env);
2789 if (r != 0) goto err;
2790
2791 r = node_new_fail(&ns[1], env);
2792 if (r != 0) goto err;
2793
2794 x = make_list(2, ns);
2795 if (IS_NULL(x)) goto err0;
2796
2797 ns[0] = NULL_NODE; ns[1] = x;
2798
2799 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2800 pre_save_right_id, env);
2801 if (r != 0) goto err;
2802
2803 x = make_alt(2, ns);
2804 if (IS_NULL(x)) goto err0;
2805
2806 *node1 = save;
2807 *node2 = x;
2808 return ONIG_NORMAL;
2809
2810 err0:
2811 r = ONIGERR_MEMORY;
2812 err:
2813 onig_node_free(save);
2814 onig_node_free(ns[0]);
2815 onig_node_free(ns[1]);
2816 return r;
2817 }
2818
2819 static int
2820 make_range_clear(Node** node, ScanEnv* env)
2821 {
2822 int r;
2823 int id;
2824 Node* save;
2825 Node* x;
2826 Node* ns[2];
2827
2828 *node = NULL_NODE;
2829 save = ns[0] = ns[1] = NULL_NODE;
2830
2831 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
2832 if (r != 0) goto err;
2833
2834 id = GIMMICK_(save)->id;
2835 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2836 id, env);
2837 if (r != 0) goto err;
2838
2839 r = node_new_fail(&ns[1], env);
2840 if (r != 0) goto err;
2841
2842 x = make_list(2, ns);
2843 if (IS_NULL(x)) goto err0;
2844
2845 ns[0] = NULL_NODE; ns[1] = x;
2846
2847 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
2848 if (r != 0) goto err;
2849
2850 x = make_alt(2, ns);
2851 if (IS_NULL(x)) goto err0;
2852
2853 NODE_STATUS_ADD(x, SUPER);
2854
2855 ns[0] = save;
2856 ns[1] = x;
2857 save = NULL_NODE;
2858 x = make_list(2, ns);
2859 if (IS_NULL(x)) goto err0;
2860
2861 *node = x;
2862 return ONIG_NORMAL;
2863
2864 err0:
2865 r = ONIGERR_MEMORY;
2866 err:
2867 onig_node_free(save);
2868 onig_node_free(ns[0]);
2869 onig_node_free(ns[1]);
2870 return r;
2871 }
2872
2873 static int
2874 is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
2875 int* is_possessive, ScanEnv* env)
2876 {
2877 Node* quant;
2878 Node* body;
2879
2880 *rquant = *rbody = 0;
2881 *is_possessive = 0;
2882
2883 if (NODE_TYPE(node) == NODE_QUANT) {
2884 quant = node;
2885 }
2886 else {
2887 if (NODE_TYPE(node) == NODE_ENCLOSURE) {
2888 EnclosureNode* en = ENCLOSURE_(node);
2889 if (en->type == ENCLOSURE_STOP_BACKTRACK) {
2890 *is_possessive = 1;
2891 quant = NODE_ENCLOSURE_BODY(en);
2892 if (NODE_TYPE(quant) != NODE_QUANT)
2893 return 0;
2894 }
2895 else
2896 return 0;
2897 }
2898 else
2899 return 0;
2900 }
2901
2902 if (QUANT_(quant)->greedy == 0)
2903 return 0;
2904
2905 body = NODE_BODY(quant);
2906 switch (NODE_TYPE(body)) {
2907 case NODE_STRING:
2908 {
2909 int len;
2910 StrNode* sn = STR_(body);
2911 UChar *s = sn->s;
2912
2913 len = 0;
2914 while (s < sn->end) {
2915 s += enclen(env->enc, s);
2916 len++;
2917 }
2918 if (len != 1)
2919 return 0;
2920 }
2921
2922 case NODE_CCLASS:
2923 break;
2924
2925 default:
2926 return 0;
2927 break;
2928 }
2929
2930 if (node != quant) {
2931 NODE_BODY(node) = 0;
2932 onig_node_free(node);
2933 }
2934 NODE_BODY(quant) = NULL_NODE;
2935 *rquant = quant;
2936 *rbody = body;
2937 return 1;
2938 }
2939
2940 static int
2941 make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,
2942 Node* body, int possessive, ScanEnv* env)
2943 {
2944 int r;
2945 int i;
2946 int id1;
2947 int lower, upper;
2948 Node* x;
2949 Node* ns[4];
2950
2951 *node = NULL_NODE;
2952 r = ONIGERR_MEMORY;
2953 ns[0] = ns[1] = NULL_NODE;
2954 ns[2] = body, ns[3] = absent;
2955
2956 lower = QUANT_(quant)->lower;
2957 upper = QUANT_(quant)->upper;
2958 onig_node_free(quant);
2959
2960 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
2961 if (r != 0) goto err;
2962
2963 id1 = GIMMICK_(ns[0])->id;
2964
2965 r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
2966 0, env);
2967 if (r != 0) goto err;
2968
2969 ns[2] = ns[3] = NULL_NODE;
2970
2971 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2972 id1, env);
2973 if (r != 0) goto err;
2974
2975 x = make_list(3, ns);
2976 if (IS_NULL(x)) goto err0;
2977
2978 *node = x;
2979 return ONIG_NORMAL;
2980
2981 err0:
2982 r = ONIGERR_MEMORY;
2983 err:
2984 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
2985 return r;
2986 }
2987
2988 static int
2989 make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
2990 ScanEnv* env)
2991 {
2992 int r;
2993 int i;
2994 int id1, id2;
2995 int possessive;
2996 Node* x;
2997 Node* ns[7];
2998
2999 r = ONIGERR_MEMORY;
3000 for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3001 ns[4] = expr; ns[5] = absent;
3002
3003 if (is_range_cutter == 0) {
3004 Node* quant;
3005 Node* body;
3006
3007 if (expr == NULL_NODE) {
3008 /* default expr \O* */
3009 quant = node_new_quantifier(0, REPEAT_INFINITE, 0);
3010 if (IS_NULL(quant)) goto err0;
3011
3012 r = node_new_true_anychar(&body, env);
3013 if (r != 0) {
3014 onig_node_free(quant);
3015 goto err;
3016 }
3017 possessive = 0;
3018 goto simple;
3019 }
3020 else {
3021 if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3022 simple:
3023 r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3024 body, possessive, env);
3025 if (r != 0) {
3026 ns[4] = NULL_NODE;
3027 onig_node_free(quant);
3028 onig_node_free(body);
3029 goto err;
3030 }
3031
3032 return ONIG_NORMAL;
3033 }
3034 }
3035 }
3036
3037 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3038 if (r != 0) goto err;
3039
3040 id1 = GIMMICK_(ns[0])->id;
3041
3042 r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3043 if (r != 0) goto err;
3044
3045 id2 = GIMMICK_(ns[1])->id;
3046
3047 r = node_new_true_anychar(&ns[3], env);
3048 if (r != 0) goto err;
3049
3050 possessive = 1;
3051 r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,
3052 possessive, is_range_cutter, env);
3053 if (r != 0) goto err;
3054
3055 ns[3] = NULL_NODE;
3056 ns[5] = NULL_NODE;
3057
3058 r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3059 if (r != 0) goto err;
3060
3061 if (is_range_cutter != 0) {
3062 x = make_list(4, ns);
3063 if (IS_NULL(x)) goto err0;
3064 }
3065 else {
3066 r = make_absent_tail(&ns[5], &ns[6], id1, env);
3067 if (r != 0) goto err;
3068
3069 x = make_list(7, ns);
3070 if (IS_NULL(x)) goto err0;
3071 }
3072
3073 *node = x;
3074 return ONIG_NORMAL;
3075
3076 err0:
3077 r = ONIGERR_MEMORY;
3078 err:
3079 for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3080 return r;
3081 }
3082
3083 extern int
3084 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3085 {
3086 int addlen = (int )(end - s);
3087
3088 if (addlen > 0) {
3089 int len = (int )(STR_(node)->end - STR_(node)->s);
3090
3091 if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
3092 UChar* p;
3093 int capa = len + addlen + NODE_STRING_MARGIN;
3094
3095 if (capa <= STR_(node)->capa) {
3096 onig_strcpy(STR_(node)->s + len, s, end);
3097 }
3098 else {
3099 if (STR_(node)->s == STR_(node)->buf)
3100 p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3101 s, end, capa);
3102 else
3103 p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);
3104
3105 CHECK_NULL_RETURN_MEMERR(p);
3106 STR_(node)->s = p;
3107 STR_(node)->capa = capa;
3108 }
3109 }
3110 else {
3111 onig_strcpy(STR_(node)->s + len, s, end);
3112 }
3113 STR_(node)->end = STR_(node)->s + len + addlen;
3114 }
3115
3116 return 0;
3117 }
3118
3119 extern int
3120 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
3121 {
3122 onig_node_str_clear(node);
3123 return onig_node_str_cat(node, s, end);
3124 }
3125
3126 static int
3127 node_str_cat_char(Node* node, UChar c)
3128 {
3129 UChar s[1];
3130
3131 s[0] = c;
3132 return onig_node_str_cat(node, s, s + 1);
3133 }
3134
3135 extern void
3136 onig_node_conv_to_str_node(Node* node, int flag)
3137 {
3138 NODE_SET_TYPE(node, NODE_STRING);
3139 STR_(node)->flag = flag;
3140 STR_(node)->capa = 0;
3141 STR_(node)->s = STR_(node)->buf;
3142 STR_(node)->end = STR_(node)->buf;
3143 }
3144
3145 extern void
3146 onig_node_str_clear(Node* node)
3147 {
3148 if (STR_(node)->capa != 0 &&
3149 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3150 xfree(STR_(node)->s);
3151 }
3152
3153 STR_(node)->capa = 0;
3154 STR_(node)->flag = 0;
3155 STR_(node)->s = STR_(node)->buf;
3156 STR_(node)->end = STR_(node)->buf;
3157 }
3158
3159 static Node*
3160 node_new_str(const UChar* s, const UChar* end)
3161 {
3162 Node* node = node_new();
3163 CHECK_NULL_RETURN(node);
3164
3165 NODE_SET_TYPE(node, NODE_STRING);
3166 STR_(node)->capa = 0;
3167 STR_(node)->flag = 0;
3168 STR_(node)->s = STR_(node)->buf;
3169 STR_(node)->end = STR_(node)->buf;
3170 if (onig_node_str_cat(node, s, end)) {
3171 onig_node_free(node);
3172 return NULL;
3173 }
3174 return node;
3175 }
3176
3177 extern Node*
3178 onig_node_new_str(const UChar* s, const UChar* end)
3179 {
3180 return node_new_str(s, end);
3181 }
3182
3183 static Node*
3184 node_new_str_raw(UChar* s, UChar* end)
3185 {
3186 Node* node = node_new_str(s, end);
3187 CHECK_NULL_RETURN(node);
3188 NODE_STRING_SET_RAW(node);
3189 return node;
3190 }
3191
3192 static Node*
3193 node_new_empty(void)
3194 {
3195 return node_new_str(NULL, NULL);
3196 }
3197
3198 static Node*
3199 node_new_str_raw_char(UChar c)
3200 {
3201 UChar p[1];
3202
3203 p[0] = c;
3204 return node_new_str_raw(p, p + 1);
3205 }
3206
3207 static Node*
3208 str_node_split_last_char(Node* node, OnigEncoding enc)
3209 {
3210 const UChar *p;
3211 Node* rn;
3212 StrNode* sn;
3213
3214 sn = STR_(node);
3215 rn = NULL_NODE;
3216 if (sn->end > sn->s) {
3217 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3218 if (p && p > sn->s) { /* can be split. */
3219 rn = node_new_str(p, sn->end);
3220 CHECK_NULL_RETURN(rn);
3221 if (NODE_STRING_IS_RAW(node))
3222 NODE_STRING_SET_RAW(rn);
3223
3224 sn->end = (UChar* )p;
3225 }
3226 }
3227 return rn;
3228 }
3229
3230 static int
3231 str_node_can_be_split(Node* node, OnigEncoding enc)
3232 {
3233 StrNode* sn = STR_(node);
3234 if (sn->end > sn->s) {
3235 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
3236 }
3237 return 0;
3238 }
3239
3240 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
3241 static int
3242 node_str_head_pad(StrNode* sn, int num, UChar val)
3243 {
3244 UChar buf[NODE_STRING_BUF_SIZE];
3245 int i, len;
3246
3247 len = sn->end - sn->s;
3248 onig_strcpy(buf, sn->s, sn->end);
3249 onig_strcpy(&(sn->s[num]), buf, buf + len);
3250 sn->end += num;
3251
3252 for (i = 0; i < num; i++) {
3253 sn->s[i] = val;
3254 }
3255 }
3256 #endif
3257
3258 extern int
3259 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
3260 {
3261 unsigned int num, val;
3262 OnigCodePoint c;
3263 UChar* p = *src;
3264 PFETCH_READY;
3265
3266 num = 0;
3267 while (! PEND) {
3268 PFETCH(c);
3269 if (IS_CODE_DIGIT_ASCII(enc, c)) {
3270 val = (unsigned int )DIGITVAL(c);
3271 if ((INT_MAX_LIMIT - val) / 10UL < num)
3272 return -1; /* overflow */
3273
3274 num = num * 10 + val;
3275 }
3276 else {
3277 PUNFETCH;
3278 break;
3279 }
3280 }
3281 *src = p;
3282 return num;
3283 }
3284
3285 static int
3286 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
3287 int maxlen, OnigEncoding enc)
3288 {
3289 OnigCodePoint c;
3290 unsigned int num, val;
3291 int n;
3292 UChar* p = *src;
3293 PFETCH_READY;
3294
3295 num = 0;
3296 n = 0;
3297 while (! PEND && n < maxlen) {
3298 PFETCH(c);
3299 if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3300 n++;
3301 val = (unsigned int )XDIGITVAL(enc,c);
3302 if ((INT_MAX_LIMIT - val) / 16UL < num)
3303 return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3304
3305 num = (num << 4) + XDIGITVAL(enc,c);
3306 }
3307 else {
3308 PUNFETCH;
3309 break;
3310 }
3311 }
3312
3313 if (n < minlen)
3314 return ONIGERR_INVALID_CODE_POINT_VALUE;
3315
3316 *src = p;
3317 return num;
3318 }
3319
3320 static int
3321 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
3322 OnigEncoding enc)
3323 {
3324 OnigCodePoint c;
3325 unsigned int num, val;
3326 UChar* p = *src;
3327 PFETCH_READY;
3328
3329 num = 0;
3330 while (! PEND && maxlen-- != 0) {
3331 PFETCH(c);
3332 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3333 val = ODIGITVAL(c);
3334 if ((INT_MAX_LIMIT - val) / 8UL < num)
3335 return -1; /* overflow */
3336
3337 num = (num << 3) + val;
3338 }
3339 else {
3340 PUNFETCH;
3341 break;
3342 }
3343 }
3344 *src = p;
3345 return num;
3346 }
3347
3348
3349 #define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3350 BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3351
3352 /* data format:
3353 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3354 (all data size is OnigCodePoint)
3355 */
3356 static int
3357 new_code_range(BBuf** pbuf)
3358 {
3359 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
3360 int r;
3361 OnigCodePoint n;
3362 BBuf* bbuf;
3363
3364 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3365 CHECK_NULL_RETURN_MEMERR(bbuf);
3366 r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3367 if (r != 0) {
3368 xfree(bbuf);
3369 *pbuf = 0;
3370 return r;
3371 }
3372
3373 n = 0;
3374 BB_WRITE_CODE_POINT(bbuf, 0, n);
3375 return 0;
3376 }
3377
3378 static int
3379 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3380 {
3381 int r, inc_n, pos;
3382 int low, high, bound, x;
3383 OnigCodePoint n, *data;
3384 BBuf* bbuf;
3385
3386 if (from > to) {
3387 n = from; from = to; to = n;
3388 }
3389
3390 if (IS_NULL(*pbuf)) {
3391 r = new_code_range(pbuf);
3392 if (r != 0) return r;
3393 bbuf = *pbuf;
3394 n = 0;
3395 }
3396 else {
3397 bbuf = *pbuf;
3398 GET_CODE_POINT(n, bbuf->p);
3399 }
3400 data = (OnigCodePoint* )(bbuf->p);
3401 data++;
3402
3403 for (low = 0, bound = n; low < bound; ) {
3404 x = (low + bound) >> 1;
3405 if (from > data[x*2 + 1])
3406 low = x + 1;
3407 else
3408 bound = x;
3409 }
3410
3411 high = (to == ~((OnigCodePoint )0)) ? n : low;
3412 for (bound = n; high < bound; ) {
3413 x = (high + bound) >> 1;
3414 if (to + 1 >= data[x*2])
3415 high = x + 1;
3416 else
3417 bound = x;
3418 }
3419
3420 inc_n = low + 1 - high;
3421 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3422 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3423
3424 if (inc_n != 1) {
3425 if (from > data[low*2])
3426 from = data[low*2];
3427 if (to < data[(high - 1)*2 + 1])
3428 to = data[(high - 1)*2 + 1];
3429 }
3430
3431 if (inc_n != 0 && (OnigCodePoint )high < n) {
3432 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3433 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3434 int size = (n - high) * 2 * SIZE_CODE_POINT;
3435
3436 if (inc_n > 0) {
3437 BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3438 }
3439 else {
3440 BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3441 }
3442 }
3443
3444 pos = SIZE_CODE_POINT * (1 + low * 2);
3445 BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3446 BB_WRITE_CODE_POINT(bbuf, pos, from);
3447 BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3448 n += inc_n;
3449 BB_WRITE_CODE_POINT(bbuf, 0, n);
3450
3451 return 0;
3452 }
3453
3454 static int
3455 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
3456 {
3457 if (from > to) {
3458 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3459 return 0;
3460 else
3461 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3462 }
3463
3464 return add_code_range_to_buf(pbuf, from, to);
3465 }
3466
3467 static int
3468 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3469 {
3470 int r, i, n;
3471 OnigCodePoint pre, from, *data, to = 0;
3472
3473 *pbuf = (BBuf* )NULL;
3474 if (IS_NULL(bbuf)) {
3475 set_all:
3476 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3477 }
3478
3479 data = (OnigCodePoint* )(bbuf->p);
3480 GET_CODE_POINT(n, data);
3481 data++;
3482 if (n <= 0) goto set_all;
3483
3484 r = 0;
3485 pre = MBCODE_START_POS(enc);
3486 for (i = 0; i < n; i++) {
3487 from = data[i*2];
3488 to = data[i*2+1];
3489 if (pre <= from - 1) {
3490 r = add_code_range_to_buf(pbuf, pre, from - 1);
3491 if (r != 0) return r;
3492 }
3493 if (to == ~((OnigCodePoint )0)) break;
3494 pre = to + 1;
3495 }
3496 if (to < ~((OnigCodePoint )0)) {
3497 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3498 }
3499 return r;
3500 }
3501
3502 #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3503 BBuf *tbuf; \
3504 int tnot; \
3505 tnot = not1; not1 = not2; not2 = tnot; \
3506 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3507 } while (0)
3508
3509 static int
3510 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3511 BBuf* bbuf2, int not2, BBuf** pbuf)
3512 {
3513 int r;
3514 OnigCodePoint i, n1, *data1;
3515 OnigCodePoint from, to;
3516
3517 *pbuf = (BBuf* )NULL;
3518 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3519 if (not1 != 0 || not2 != 0)
3520 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3521 return 0;
3522 }
3523
3524 r = 0;
3525 if (IS_NULL(bbuf2))
3526 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3527
3528 if (IS_NULL(bbuf1)) {
3529 if (not1 != 0) {
3530 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3531 }
3532 else {
3533 if (not2 == 0) {
3534 return bbuf_clone(pbuf, bbuf2);
3535 }
3536 else {
3537 return not_code_range_buf(enc, bbuf2, pbuf);
3538 }
3539 }
3540 }
3541
3542 if (not1 != 0)
3543 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3544
3545 data1 = (OnigCodePoint* )(bbuf1->p);
3546 GET_CODE_POINT(n1, data1);
3547 data1++;
3548
3549 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
3550 r = bbuf_clone(pbuf, bbuf2);
3551 }
3552 else if (not1 == 0) { /* 1 OR (not 2) */
3553 r = not_code_range_buf(enc, bbuf2, pbuf);
3554 }
3555 if (r != 0) return r;
3556
3557 for (i = 0; i < n1; i++) {
3558 from = data1[i*2];
3559 to = data1[i*2+1];
3560 r = add_code_range_to_buf(pbuf, from, to);
3561 if (r != 0) return r;
3562 }
3563 return 0;
3564 }
3565
3566 static int
3567 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
3568 OnigCodePoint* data, int n)
3569 {
3570 int i, r;
3571 OnigCodePoint from2, to2;
3572
3573 for (i = 0; i < n; i++) {
3574 from2 = data[i*2];
3575 to2 = data[i*2+1];
3576 if (from2 < from1) {
3577 if (to2 < from1) continue;
3578 else {
3579 from1 = to2 + 1;
3580 }
3581 }
3582 else if (from2 <= to1) {
3583 if (to2 < to1) {
3584 if (from1 <= from2 - 1) {
3585 r = add_code_range_to_buf(pbuf, from1, from2-1);
3586 if (r != 0) return r;
3587 }
3588 from1 = to2 + 1;
3589 }
3590 else {
3591 to1 = from2 - 1;
3592 }
3593 }
3594 else {
3595 from1 = from2;
3596 }
3597 if (from1 > to1) break;
3598 }
3599 if (from1 <= to1) {
3600 r = add_code_range_to_buf(pbuf, from1, to1);
3601 if (r != 0) return r;
3602 }
3603 return 0;
3604 }
3605
3606 static int
3607 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
3608 {
3609 int r;
3610 OnigCodePoint i, j, n1, n2, *data1, *data2;
3611 OnigCodePoint from, to, from1, to1, from2, to2;
3612
3613 *pbuf = (BBuf* )NULL;
3614 if (IS_NULL(bbuf1)) {
3615 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
3616 return bbuf_clone(pbuf, bbuf2);
3617 return 0;
3618 }
3619 else if (IS_NULL(bbuf2)) {
3620 if (not2 != 0)
3621 return bbuf_clone(pbuf, bbuf1);
3622 return 0;
3623 }
3624
3625 if (not1 != 0)
3626 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3627
3628 data1 = (OnigCodePoint* )(bbuf1->p);
3629 data2 = (OnigCodePoint* )(bbuf2->p);
3630 GET_CODE_POINT(n1, data1);
3631 GET_CODE_POINT(n2, data2);
3632 data1++;
3633 data2++;
3634
3635 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
3636 for (i = 0; i < n1; i++) {
3637 from1 = data1[i*2];
3638 to1 = data1[i*2+1];
3639 for (j = 0; j < n2; j++) {
3640 from2 = data2[j*2];
3641 to2 = data2[j*2+1];
3642 if (from2 > to1) break;
3643 if (to2 < from1) continue;
3644 from = MAX(from1, from2);
3645 to = MIN(to1, to2);
3646 r = add_code_range_to_buf(pbuf, from, to);
3647 if (r != 0) return r;
3648 }
3649 }
3650 }
3651 else if (not1 == 0) { /* 1 AND (not 2) */
3652 for (i = 0; i < n1; i++) {
3653 from1 = data1[i*2];
3654 to1 = data1[i*2+1];
3655 r = and_code_range1(pbuf, from1, to1, data2, n2);
3656 if (r != 0) return r;
3657 }
3658 }
3659
3660 return 0;
3661 }
3662
3663 static int
3664 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
3665 {
3666 int r, not1, not2;
3667 BBuf *buf1, *buf2, *pbuf;
3668 BitSetRef bsr1, bsr2;
3669 BitSet bs1, bs2;
3670
3671 not1 = IS_NCCLASS_NOT(dest);
3672 bsr1 = dest->bs;
3673 buf1 = dest->mbuf;
3674 not2 = IS_NCCLASS_NOT(cc);
3675 bsr2 = cc->bs;
3676 buf2 = cc->mbuf;
3677
3678 if (not1 != 0) {
3679 bitset_invert_to(bsr1, bs1);
3680 bsr1 = bs1;
3681 }
3682 if (not2 != 0) {
3683 bitset_invert_to(bsr2, bs2);
3684 bsr2 = bs2;
3685 }
3686 bitset_and(bsr1, bsr2);
3687 if (bsr1 != dest->bs) {
3688 bitset_copy(dest->bs, bsr1);
3689 }
3690 if (not1 != 0) {
3691 bitset_invert(dest->bs);
3692 }
3693
3694 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
3695 if (not1 != 0 && not2 != 0) {
3696 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
3697 }
3698 else {
3699 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
3700 if (r == 0 && not1 != 0) {
3701 BBuf *tbuf;
3702 r = not_code_range_buf(enc, pbuf, &tbuf);
3703 if (r != 0) {
3704 bbuf_free(pbuf);
3705 return r;
3706 }
3707 bbuf_free(pbuf);
3708 pbuf = tbuf;
3709 }
3710 }
3711 if (r != 0) return r;
3712
3713 dest->mbuf = pbuf;
3714 bbuf_free(buf1);
3715 return r;
3716 }
3717 return 0;
3718 }
3719
3720 static int
3721 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
3722 {
3723 int r, not1, not2;
3724 BBuf *buf1, *buf2, *pbuf;
3725 BitSetRef bsr1, bsr2;
3726 BitSet bs1, bs2;
3727
3728 not1 = IS_NCCLASS_NOT(dest);
3729 bsr1 = dest->bs;
3730 buf1 = dest->mbuf;
3731 not2 = IS_NCCLASS_NOT(cc);
3732 bsr2 = cc->bs;
3733 buf2 = cc->mbuf;
3734
3735 if (not1 != 0) {
3736 bitset_invert_to(bsr1, bs1);
3737 bsr1 = bs1;
3738 }
3739 if (not2 != 0) {
3740 bitset_invert_to(bsr2, bs2);
3741 bsr2 = bs2;
3742 }
3743 bitset_or(bsr1, bsr2);
3744 if (bsr1 != dest->bs) {
3745 bitset_copy(dest->bs, bsr1);
3746 }
3747 if (not1 != 0) {
3748 bitset_invert(dest->bs);
3749 }
3750
3751 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
3752 if (not1 != 0 && not2 != 0) {
3753 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
3754 }
3755 else {
3756 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
3757 if (r == 0 && not1 != 0) {
3758 BBuf *tbuf;
3759 r = not_code_range_buf(enc, pbuf, &tbuf);
3760 if (r != 0) {
3761 bbuf_free(pbuf);
3762 return r;
3763 }
3764 bbuf_free(pbuf);
3765 pbuf = tbuf;
3766 }
3767 }
3768 if (r != 0) return r;
3769
3770 dest->mbuf = pbuf;
3771 bbuf_free(buf1);
3772 return r;
3773 }
3774 else
3775 return 0;
3776 }
3777
3778 static OnigCodePoint
3779 conv_backslash_value(OnigCodePoint c, ScanEnv* env)
3780 {
3781 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
3782 switch (c) {
3783 case 'n': return '\n';
3784 case 't': return '\t';
3785 case 'r': return '\r';
3786 case 'f': return '\f';
3787 case 'a': return '\007';
3788 case 'b': return '\010';
3789 case 'e': return '\033';
3790 case 'v':
3791 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
3792 return '\v';
3793 break;
3794
3795 default:
3796 break;
3797 }
3798 }
3799 return c;
3800 }
3801
3802 static int
3803 is_invalid_quantifier_target(Node* node)
3804 {
3805 switch (NODE_TYPE(node)) {
3806 case NODE_ANCHOR:
3807 case NODE_GIMMICK:
3808 return 1;
3809 break;
3810
3811 case NODE_ENCLOSURE:
3812 /* allow enclosed elements */
3813 /* return is_invalid_quantifier_target(NODE_BODY(node)); */
3814 break;
3815
3816 case NODE_LIST:
3817 do {
3818 if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;
3819 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3820 return 0;
3821 break;
3822
3823 case NODE_ALT:
3824 do {
3825 if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;
3826 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3827 break;
3828
3829 default:
3830 break;
3831 }
3832 return 0;
3833 }
3834
3835 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
3836 static int
3837 quantifier_type_num(QuantNode* q)
3838 {
3839 if (q->greedy) {
3840 if (q->lower == 0) {
3841 if (q->upper == 1) return 0;
3842 else if (IS_REPEAT_INFINITE(q->upper)) return 1;
3843 }
3844 else if (q->lower == 1) {
3845 if (IS_REPEAT_INFINITE(q->upper)) return 2;
3846 }
3847 }
3848 else {
3849 if (q->lower == 0) {
3850 if (q->upper == 1) return 3;
3851 else if (IS_REPEAT_INFINITE(q->upper)) return 4;
3852 }
3853 else if (q->lower == 1) {
3854 if (IS_REPEAT_INFINITE(q->upper)) return 5;
3855 }
3856 }
3857 return -1;
3858 }
3859
3860
3861 enum ReduceType {
3862 RQ_ASIS = 0, /* as is */
3863 RQ_DEL = 1, /* delete parent */
3864 RQ_A, /* to '*' */
3865 RQ_AQ, /* to '*?' */
3866 RQ_QQ, /* to '??' */
3867 RQ_P_QQ, /* to '+)??' */
3868 RQ_PQ_Q /* to '+?)?' */
3869 };
3870
3871 static enum ReduceType ReduceTypeTable[6][6] = {
3872 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
3873 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
3874 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
3875 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
3876 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
3877 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
3878 };
3879
3880 extern void
3881 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
3882 {
3883 int pnum, cnum;
3884 QuantNode *p, *c;
3885
3886 p = QUANT_(pnode);
3887 c = QUANT_(cnode);
3888 pnum = quantifier_type_num(p);
3889 cnum = quantifier_type_num(c);
3890 if (pnum < 0 || cnum < 0) {
3891 if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {
3892 if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {
3893 int n = positive_int_multiply(p->lower, c->lower);
3894 if (n >= 0) {
3895 p->lower = p->upper = n;
3896 NODE_BODY(pnode) = NODE_BODY(cnode);
3897 goto remove_cnode;
3898 }
3899 }
3900 }
3901
3902 return ;
3903 }
3904
3905 switch(ReduceTypeTable[cnum][pnum]) {
3906 case RQ_DEL:
3907 *pnode = *cnode;
3908 break;
3909 case RQ_A:
3910 NODE_BODY(pnode) = NODE_BODY(cnode);
3911 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
3912 break;
3913 case RQ_AQ:
3914 NODE_BODY(pnode) = NODE_BODY(cnode);
3915 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
3916 break;
3917 case RQ_QQ:
3918 NODE_BODY(pnode) = NODE_BODY(cnode);
3919 p->lower = 0; p->upper = 1; p->greedy = 0;
3920 break;
3921 case RQ_P_QQ:
3922 NODE_BODY(pnode) = cnode;
3923 p->lower = 0; p->upper = 1; p->greedy = 0;
3924 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
3925 return ;
3926 break;
3927 case RQ_PQ_Q:
3928 NODE_BODY(pnode) = cnode;
3929 p->lower = 0; p->upper = 1; p->greedy = 1;
3930 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
3931 return ;
3932 break;
3933 case RQ_ASIS:
3934 NODE_BODY(pnode) = cnode;
3935 return ;
3936 break;
3937 }
3938
3939 remove_cnode:
3940 NODE_BODY(cnode) = NULL_NODE;
3941 onig_node_free(cnode);
3942 }
3943
3944 static int
3945 node_new_general_newline(Node** node, ScanEnv* env)
3946 {
3947 int r;
3948 int dlen, alen;
3949 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
3950 Node* crnl;
3951 Node* ncc;
3952 Node* x;
3953 CClassNode* cc;
3954
3955 dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
3956 if (dlen < 0) return dlen;
3957 alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
3958 if (alen < 0) return alen;
3959
3960 crnl = node_new_str_raw(buf, buf + dlen + alen);
3961 CHECK_NULL_RETURN_MEMERR(crnl);
3962
3963 ncc = node_new_cclass();
3964 if (IS_NULL(ncc)) goto err2;
3965
3966 cc = CCLASS_(ncc);
3967 if (dlen == 1) {
3968 bitset_set_range(cc->bs, 0x0a, 0x0d);
3969 }
3970 else {
3971 r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);
3972 if (r != 0) {
3973 err1:
3974 onig_node_free(ncc);
3975 err2:
3976 onig_node_free(crnl);
3977 return ONIGERR_MEMORY;
3978 }
3979 }
3980
3981 if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
3982 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
3983 if (r != 0) goto err1;
3984 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
3985 if (r != 0) goto err1;
3986 }
3987
3988 x = node_new_enclosure_if_else(crnl, 0, ncc);
3989 if (IS_NULL(x)) goto err1;
3990
3991 *node = x;
3992 return 0;
3993 }
3994
3995 enum TokenSyms {
3996 TK_EOT = 0, /* end of token */
3997 TK_RAW_BYTE = 1,
3998 TK_CHAR,
3999 TK_STRING,
4000 TK_CODE_POINT,
4001 TK_ANYCHAR,
4002 TK_CHAR_TYPE,
4003 TK_BACKREF,
4004 TK_CALL,
4005 TK_ANCHOR,
4006 TK_OP_REPEAT,
4007 TK_INTERVAL,
4008 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
4009 TK_ALT,
4010 TK_SUBEXP_OPEN,
4011 TK_SUBEXP_CLOSE,
4012 TK_CC_OPEN,
4013 TK_QUOTE_OPEN,
4014 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
4015 TK_KEEP, /* \K */
4016 TK_GENERAL_NEWLINE, /* \R */
4017 TK_NO_NEWLINE, /* \N */
4018 TK_TRUE_ANYCHAR, /* \O */
4019 TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */
4020
4021 /* in cc */
4022 TK_CC_CLOSE,
4023 TK_CC_RANGE,
4024 TK_POSIX_BRACKET_OPEN,
4025 TK_CC_AND, /* && */
4026 TK_CC_CC_OPEN /* [ */
4027 };
4028
4029 typedef struct {
4030 enum TokenSyms type;
4031 int escaped;
4032 int base; /* is number: 8, 16 (used in [....]) */
4033 UChar* backp;
4034 union {
4035 UChar* s;
4036 int c;
4037 OnigCodePoint code;
4038 int anchor;
4039 int subtype;
4040 struct {
4041 int lower;
4042 int upper;
4043 int greedy;
4044 int possessive;
4045 } repeat;
4046 struct {
4047 int num;
4048 int ref1;
4049 int* refs;
4050 int by_name;
4051 #ifdef USE_BACKREF_WITH_LEVEL
4052 int exist_level;
4053 int level; /* \k<name+n> */
4054 #endif
4055 } backref;
4056 struct {
4057 UChar* name;
4058 UChar* name_end;
4059 int gnum;
4060 int by_number;
4061 } call;
4062 struct {
4063 int ctype;
4064 int not;
4065 } prop;
4066 } u;
4067 } OnigToken;
4068
4069
4070 static int
4071 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
4072 {
4073 int low, up, syn_allow, non_low = 0;
4074 int r = 0;
4075 OnigCodePoint c;
4076 OnigEncoding enc = env->enc;
4077 UChar* p = *src;
4078 PFETCH_READY;
4079
4080 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4081
4082 if (PEND) {
4083 if (syn_allow)
4084 return 1; /* "....{" : OK! */
4085 else
4086 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
4087 }
4088
4089 if (! syn_allow) {
4090 c = PPEEK;
4091 if (c == ')' || c == '(' || c == '|') {
4092 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4093 }
4094 }
4095
4096 low = onig_scan_unsigned_number(&p, end, env->enc);
4097 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4098 if (low > ONIG_MAX_REPEAT_NUM)
4099 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4100
4101 if (p == *src) { /* can't read low */
4102 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4103 /* allow {,n} as {0,n} */
4104 low = 0;
4105 non_low = 1;
4106 }
4107 else
4108 goto invalid;
4109 }
4110
4111 if (PEND) goto invalid;
4112 PFETCH(c);
4113 if (c == ',') {
4114 UChar* prev = p;
4115 up = onig_scan_unsigned_number(&p, end, env->enc);
4116 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4117 if (up > ONIG_MAX_REPEAT_NUM)
4118 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4119
4120 if (p == prev) {
4121 if (non_low != 0)
4122 goto invalid;
4123 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
4124 }
4125 }
4126 else {
4127 if (non_low != 0)
4128 goto invalid;
4129
4130 PUNFETCH;
4131 up = low; /* {n} : exact n times */
4132 r = 2; /* fixed */
4133 }
4134
4135 if (PEND) goto invalid;
4136 PFETCH(c);
4137 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4138 if (c != MC_ESC(env->syntax)) goto invalid;
4139 PFETCH(c);
4140 }
4141 if (c != '}') goto invalid;
4142
4143 if (!IS_REPEAT_INFINITE(up) && low > up) {
4144 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4145 }
4146
4147 tok->type = TK_INTERVAL;
4148 tok->u.repeat.lower = low;
4149 tok->u.repeat.upper = up;
4150 *src = p;
4151 return r; /* 0: normal {n,m}, 2: fixed {n} */
4152
4153 invalid:
4154 if (syn_allow) {
4155 /* *src = p; */ /* !!! Don't do this line !!! */
4156 return 1; /* OK */
4157 }
4158 else
4159 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4160 }
4161
4162 /* \M-, \C-, \c, or \... */
4163 static int
4164 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
4165 {
4166 int v;
4167 OnigCodePoint c;
4168 OnigEncoding enc = env->enc;
4169 UChar* p = *src;
4170
4171 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4172
4173 PFETCH_S(c);
4174 switch (c) {
4175 case 'M':
4176 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4177 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4178 PFETCH_S(c);
4179 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4180 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4181 PFETCH_S(c);
4182 if (c == MC_ESC(env->syntax)) {
4183 v = fetch_escaped_value(&p, end, env, &c);
4184 if (v < 0) return v;
4185 }
4186 c = ((c & 0xff) | 0x80);
4187 }
4188 else
4189 goto backslash;
4190 break;
4191
4192 case 'C':
4193 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4194 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4195 PFETCH_S(c);
4196 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4197 goto control;
4198 }
4199 else
4200 goto backslash;
4201
4202 case 'c':
4203 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4204 control:
4205 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4206 PFETCH_S(c);
4207 if (c == '?') {
4208 c = 0177;
4209 }
4210 else {
4211 if (c == MC_ESC(env->syntax)) {
4212 v = fetch_escaped_value(&p, end, env, &c);
4213 if (v < 0) return v;
4214 }
4215 c &= 0x9f;
4216 }
4217 break;
4218 }
4219 /* fall through */
4220
4221 default:
4222 {
4223 backslash:
4224 c = conv_backslash_value(c, env);
4225 }
4226 break;
4227 }
4228
4229 *src = p;
4230 *val = c;
4231 return 0;
4232 }
4233
4234 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
4235
4236 static OnigCodePoint
4237 get_name_end_code_point(OnigCodePoint start)
4238 {
4239 switch (start) {
4240 case '<': return (OnigCodePoint )'>'; break;
4241 case '\'': return (OnigCodePoint )'\''; break;
4242 case '(': return (OnigCodePoint )')'; break;
4243 default:
4244 break;
4245 }
4246
4247 return (OnigCodePoint )0;
4248 }
4249
4250 enum REF_NUM {
4251 IS_NOT_NUM = 0,
4252 IS_ABS_NUM = 1,
4253 IS_REL_NUM = 2
4254 };
4255
4256 #ifdef USE_BACKREF_WITH_LEVEL
4257 /*
4258 \k<name+n>, \k<name-n>
4259 \k<num+n>, \k<num-n>
4260 \k<-num+n>, \k<-num-n>
4261 \k<+num+n>, \k<+num-n>
4262 */
4263 static int
4264 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4265 UChar** rname_end, ScanEnv* env,
4266 int* rback_num, int* rlevel, enum REF_NUM* num_type)
4267 {
4268 int r, sign, exist_level;
4269 int digit_count;
4270 OnigCodePoint end_code;
4271 OnigCodePoint c = 0;
4272 OnigEncoding enc = env->enc;
4273 UChar *name_end;
4274 UChar *pnum_head;
4275 UChar *p = *src;
4276 PFETCH_READY;
4277
4278 *rback_num = 0;
4279 exist_level = 0;
4280 *num_type = IS_NOT_NUM;
4281 sign = 1;
4282 pnum_head = *src;
4283
4284 end_code = get_name_end_code_point(start_code);
4285
4286 digit_count = 0;
4287 name_end = end;
4288 r = 0;
4289 if (PEND) {
4290 return ONIGERR_EMPTY_GROUP_NAME;
4291 }
4292 else {
4293 PFETCH(c);
4294 if (c == end_code)
4295 return ONIGERR_EMPTY_GROUP_NAME;
4296
4297 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4298 *num_type = IS_ABS_NUM;
4299 digit_count++;
4300 }
4301 else if (c == '-') {
4302 *num_type = IS_REL_NUM;
4303 sign = -1;
4304 pnum_head = p;
4305 }
4306 else if (c == '+') {
4307 *num_type = IS_REL_NUM;
4308 sign = 1;
4309 pnum_head = p;
4310 }
4311 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4312 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4313 }
4314 }
4315
4316 while (!PEND) {
4317 name_end = p;
4318 PFETCH(c);
4319 if (c == end_code || c == ')' || c == '+' || c == '-') {
4320 if (*num_type != IS_NOT_NUM && digit_count == 0)
4321 r = ONIGERR_INVALID_GROUP_NAME;
4322 break;
4323 }
4324
4325 if (*num_type != IS_NOT_NUM) {
4326 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4327 digit_count++;
4328 }
4329 else {
4330 r = ONIGERR_INVALID_GROUP_NAME;
4331 *num_type = IS_NOT_NUM;
4332 }
4333 }
4334 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4335 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4336 }
4337 }
4338
4339 if (r == 0 && c != end_code) {
4340 if (c == '+' || c == '-') {
4341 int level;
4342 int flag = (c == '-' ? -1 : 1);
4343
4344 if (PEND) {
4345 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4346 goto end;
4347 }
4348 PFETCH(c);
4349 if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4350 PUNFETCH;
4351 level = onig_scan_unsigned_number(&p, end, enc);
4352 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4353 *rlevel = (level * flag);
4354 exist_level = 1;
4355
4356 if (!PEND) {
4357 PFETCH(c);
4358 if (c == end_code)
4359 goto end;
4360 }
4361 }
4362
4363 err:
4364 name_end = end;
4365 err2:
4366 r = ONIGERR_INVALID_GROUP_NAME;
4367 }
4368
4369 end:
4370 if (r == 0) {
4371 if (*num_type != IS_NOT_NUM) {
4372 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
4373 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4374 else if (*rback_num == 0) {
4375 if (*num_type == IS_REL_NUM)
4376 goto err2;
4377 }
4378
4379 *rback_num *= sign;
4380 }
4381
4382 *rname_end = name_end;
4383 *src = p;
4384 return (exist_level ? 1 : 0);
4385 }
4386 else {
4387 onig_scan_env_set_error_string(env, r, *src, name_end);
4388 return r;
4389 }
4390 }
4391 #endif /* USE_BACKREF_WITH_LEVEL */
4392
4393 /*
4394 ref: 0 -> define name (don't allow number name)
4395 1 -> reference name (allow number name)
4396 */
4397 static int
4398 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4399 UChar** rname_end, ScanEnv* env, int* rback_num,
4400 enum REF_NUM* num_type, int ref)
4401 {
4402 int r, sign;
4403 int digit_count;
4404 OnigCodePoint end_code;
4405 OnigCodePoint c = 0;
4406 OnigEncoding enc = env->enc;
4407 UChar *name_end;
4408 UChar *pnum_head;
4409 UChar *p = *src;
4410
4411 *rback_num = 0;
4412
4413 end_code = get_name_end_code_point(start_code);
4414
4415 digit_count = 0;
4416 name_end = end;
4417 pnum_head = *src;
4418 r = 0;
4419 *num_type = IS_NOT_NUM;
4420 sign = 1;
4421 if (PEND) {
4422 return ONIGERR_EMPTY_GROUP_NAME;
4423 }
4424 else {
4425 PFETCH_S(c);
4426 if (c == end_code)
4427 return ONIGERR_EMPTY_GROUP_NAME;
4428
4429 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4430 if (ref == 1)
4431 *num_type = IS_ABS_NUM;
4432 else {
4433 r = ONIGERR_INVALID_GROUP_NAME;
4434 }
4435 digit_count++;
4436 }
4437 else if (c == '-') {
4438 if (ref == 1) {
4439 *num_type = IS_REL_NUM;
4440 sign = -1;
4441 pnum_head = p;
4442 }
4443 else {
4444 r = ONIGERR_INVALID_GROUP_NAME;
4445 }
4446 }
4447 else if (c == '+') {
4448 if (ref == 1) {
4449 *num_type = IS_REL_NUM;
4450 sign = 1;
4451 pnum_head = p;
4452 }
4453 else {
4454 r = ONIGERR_INVALID_GROUP_NAME;
4455 }
4456 }
4457 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4458 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4459 }
4460 }
4461
4462 if (r == 0) {
4463 while (!PEND) {
4464 name_end = p;
4465 PFETCH_S(c);
4466 if (c == end_code || c == ')') {
4467 if (*num_type != IS_NOT_NUM && digit_count == 0)
4468 r = ONIGERR_INVALID_GROUP_NAME;
4469 break;
4470 }
4471
4472 if (*num_type != IS_NOT_NUM) {
4473 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4474 digit_count++;
4475 }
4476 else {
4477 if (!ONIGENC_IS_CODE_WORD(enc, c))
4478 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4479 else
4480 r = ONIGERR_INVALID_GROUP_NAME;
4481
4482 *num_type = IS_NOT_NUM;
4483 }
4484 }
4485 else {
4486 if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4487 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4488 }
4489 }
4490 }
4491
4492 if (c != end_code) {
4493 r = ONIGERR_INVALID_GROUP_NAME;
4494 goto err;
4495 }
4496
4497 if (*num_type != IS_NOT_NUM) {
4498 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
4499 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4500 else if (*rback_num == 0) {
4501 if (*num_type == IS_REL_NUM) {
4502 r = ONIGERR_INVALID_GROUP_NAME;
4503 goto err;
4504 }
4505 }
4506
4507 *rback_num *= sign;
4508 }
4509
4510 *rname_end = name_end;
4511 *src = p;
4512 return 0;
4513 }
4514 else {
4515 while (!PEND) {
4516 name_end = p;
4517 PFETCH_S(c);
4518 if (c == end_code || c == ')')
4519 break;
4520 }
4521 if (PEND)
4522 name_end = end;
4523
4524 err:
4525 onig_scan_env_set_error_string(env, r, *src, name_end);
4526 return r;
4527 }
4528 }
4529
4530 static void
4531 CC_ESC_WARN(ScanEnv* env, UChar *c)
4532 {
4533 if (onig_warn == onig_null_warn) return ;
4534
4535 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
4536 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
4537 UChar buf[WARN_BUFSIZE];
4538 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
4539 env->pattern, env->pattern_end,
4540 (UChar* )"character class has '%s' without escape",
4541 c);
4542 (*onig_warn)((char* )buf);
4543 }
4544 }
4545
4546 static void
4547 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
4548 {
4549 if (onig_warn == onig_null_warn) return ;
4550
4551 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
4552 UChar buf[WARN_BUFSIZE];
4553 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
4554 (env)->pattern, (env)->pattern_end,
4555 (UChar* )"regular expression has '%s' without escape", c);
4556 (*onig_warn)((char* )buf);
4557 }
4558 }
4559
4560 static UChar*
4561 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
4562 UChar **next, OnigEncoding enc)
4563 {
4564 int i;
4565 OnigCodePoint x;
4566 UChar *q;
4567 UChar *p = from;
4568
4569 while (p < to) {
4570 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4571 q = p + enclen(enc, p);
4572 if (x == s[0]) {
4573 for (i = 1; i < n && q < to; i++) {
4574 x = ONIGENC_MBC_TO_CODE(enc, q, to);
4575 if (x != s[i]) break;
4576 q += enclen(enc, q);
4577 }
4578 if (i >= n) {
4579 if (IS_NOT_NULL(next))
4580 *next = q;
4581 return p;
4582 }
4583 }
4584 p = q;
4585 }
4586 return NULL_UCHARP;
4587 }
4588
4589 static int
4590 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
4591 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
4592 {
4593 int i, in_esc;
4594 OnigCodePoint x;
4595 UChar *q;
4596 UChar *p = from;
4597
4598 in_esc = 0;
4599 while (p < to) {
4600 if (in_esc) {
4601 in_esc = 0;
4602 p += enclen(enc, p);
4603 }
4604 else {
4605 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4606 q = p + enclen(enc, p);
4607 if (x == s[0]) {
4608 for (i = 1; i < n && q < to; i++) {
4609 x = ONIGENC_MBC_TO_CODE(enc, q, to);
4610 if (x != s[i]) break;
4611 q += enclen(enc, q);
4612 }
4613 if (i >= n) return 1;
4614 p += enclen(enc, p);
4615 }
4616 else {
4617 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4618 if (x == bad) return 0;
4619 else if (x == MC_ESC(syn)) in_esc = 1;
4620 p = q;
4621 }
4622 }
4623 }
4624 return 0;
4625 }
4626
4627 static int
4628 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
4629 {
4630 int num;
4631 OnigCodePoint c, c2;
4632 OnigSyntaxType* syn = env->syntax;
4633 OnigEncoding enc = env->enc;
4634 UChar* prev;
4635 UChar* p = *src;
4636 PFETCH_READY;
4637
4638 if (PEND) {
4639 tok->type = TK_EOT;
4640 return tok->type;
4641 }
4642
4643 PFETCH(c);
4644 tok->type = TK_CHAR;
4645 tok->base = 0;
4646 tok->u.c = c;
4647 tok->escaped = 0;
4648
4649 if (c == ']') {
4650 tok->type = TK_CC_CLOSE;
4651 }
4652 else if (c == '-') {
4653 tok->type = TK_CC_RANGE;
4654 }
4655 else if (c == MC_ESC(syn)) {
4656 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
4657 goto end;
4658
4659 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4660
4661 PFETCH(c);
4662 tok->escaped = 1;
4663 tok->u.c = c;
4664 switch (c) {
4665 case 'w':
4666 tok->type = TK_CHAR_TYPE;
4667 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4668 tok->u.prop.not = 0;
4669 break;
4670 case 'W':
4671 tok->type = TK_CHAR_TYPE;
4672 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4673 tok->u.prop.not = 1;
4674 break;
4675 case 'd':
4676 tok->type = TK_CHAR_TYPE;
4677 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
4678 tok->u.prop.not = 0;
4679 break;
4680 case 'D':
4681 tok->type = TK_CHAR_TYPE;
4682 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
4683 tok->u.prop.not = 1;
4684 break;
4685 case 's':
4686 tok->type = TK_CHAR_TYPE;
4687 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
4688 tok->u.prop.not = 0;
4689 break;
4690 case 'S':
4691 tok->type = TK_CHAR_TYPE;
4692 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
4693 tok->u.prop.not = 1;
4694 break;
4695 case 'h':
4696 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
4697 tok->type = TK_CHAR_TYPE;
4698 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
4699 tok->u.prop.not = 0;
4700 break;
4701 case 'H':
4702 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
4703 tok->type = TK_CHAR_TYPE;
4704 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
4705 tok->u.prop.not = 1;
4706 break;
4707
4708 case 'p':
4709 case 'P':
4710 if (PEND) break;
4711
4712 c2 = PPEEK;
4713 if (c2 == '{' &&
4714 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
4715 PINC;
4716 tok->type = TK_CHAR_PROPERTY;
4717 tok->u.prop.not = (c == 'P' ? 1 : 0);
4718
4719 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
4720 PFETCH(c2);
4721 if (c2 == '^') {
4722 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
4723 }
4724 else
4725 PUNFETCH;
4726 }
4727 }
4728 break;
4729
4730 case 'o':
4731 if (PEND) break;
4732
4733 prev = p;
4734 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
4735 PINC;
4736 num = scan_unsigned_octal_number(&p, end, 11, enc);
4737 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
4738 if (!PEND) {
4739 c2 = PPEEK;
4740 if (IS_CODE_DIGIT_ASCII(enc, c2))
4741 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
4742 }
4743
4744 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
4745 PINC;
4746 tok->type = TK_CODE_POINT;
4747 tok->base = 8;
4748 tok->u.code = (OnigCodePoint )num;
4749 }
4750 else {
4751 /* can't read nothing or invalid format */
4752 p = prev;
4753 }
4754 }
4755 break;
4756
4757 case 'x':
4758 if (PEND) break;
4759
4760 prev = p;
4761 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
4762 PINC;
4763 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
4764 if (num < 0) {
4765 if (num == ONIGERR_TOO_BIG_NUMBER)
4766 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
4767 else
4768 return num;
4769 }
4770 if (!PEND) {
4771 c2 = PPEEK;
4772 if (IS_CODE_XDIGIT_ASCII(enc, c2))
4773 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
4774 }
4775
4776 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
4777 PINC;
4778 tok->type = TK_CODE_POINT;
4779 tok->base = 16;
4780 tok->u.code = (OnigCodePoint )num;
4781 }
4782 else {
4783 /* can't read nothing or invalid format */
4784 p = prev;
4785 }
4786 }
4787 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
4788 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
4789 if (num < 0) return num;
4790 if (p == prev) { /* can't read nothing. */
4791 num = 0; /* but, it's not error */
4792 }
4793 tok->type = TK_RAW_BYTE;
4794 tok->base = 16;
4795 tok->u.c = num;
4796 }
4797 break;
4798
4799 case 'u':
4800 if (PEND) break;
4801
4802 prev = p;
4803 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
4804 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
4805 if (num < 0) return num;
4806 if (p == prev) { /* can't read nothing. */
4807 num = 0; /* but, it's not error */
4808 }
4809 tok->type = TK_CODE_POINT;
4810 tok->base = 16;
4811 tok->u.code = (OnigCodePoint )num;
4812 }
4813 break;
4814
4815 case '0':
4816 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
4817 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
4818 PUNFETCH;
4819 prev = p;
4820 num = scan_unsigned_octal_number(&p, end, 3, enc);
4821 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
4822 if (p == prev) { /* can't read nothing. */
4823 num = 0; /* but, it's not error */
4824 }
4825 tok->type = TK_RAW_BYTE;
4826 tok->base = 8;
4827 tok->u.c = num;
4828 }
4829 break;
4830
4831 default:
4832 PUNFETCH;
4833 num = fetch_escaped_value(&p, end, env, &c2);
4834 if (num < 0) return num;
4835 if (tok->u.c != c2) {
4836 tok->u.code = c2;
4837 tok->type = TK_CODE_POINT;
4838 }
4839 break;
4840 }
4841 }
4842 else if (c == '[') {
4843 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
4844 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
4845 tok->backp = p; /* point at '[' is read */
4846 PINC;
4847 if (str_exist_check_with_esc(send, 2, p, end,
4848 (OnigCodePoint )']', enc, syn)) {
4849 tok->type = TK_POSIX_BRACKET_OPEN;
4850 }
4851 else {
4852 PUNFETCH;
4853 goto cc_in_cc;
4854 }
4855 }
4856 else {
4857 cc_in_cc:
4858 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
4859 tok->type = TK_CC_CC_OPEN;
4860 }
4861 else {
4862 CC_ESC_WARN(env, (UChar* )"[");
4863 }
4864 }
4865 }
4866 else if (c == '&') {
4867 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
4868 !PEND && (PPEEK_IS('&'))) {
4869 PINC;
4870 tok->type = TK_CC_AND;
4871 }
4872 }
4873
4874 end:
4875 *src = p;
4876 return tok->type;
4877 }
4878
4879 static int
4880 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
4881 {
4882 int r, num;
4883 OnigCodePoint c;
4884 OnigEncoding enc = env->enc;
4885 OnigSyntaxType* syn = env->syntax;
4886 UChar* prev;
4887 UChar* p = *src;
4888 PFETCH_READY;
4889
4890 start:
4891 if (PEND) {
4892 tok->type = TK_EOT;
4893 return tok->type;
4894 }
4895
4896 tok->type = TK_STRING;
4897 tok->base = 0;
4898 tok->backp = p;
4899
4900 PFETCH(c);
4901 if (IS_MC_ESC_CODE(c, syn)) {
4902 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4903
4904 tok->backp = p;
4905 PFETCH(c);
4906
4907 tok->u.c = c;
4908 tok->escaped = 1;
4909 switch (c) {
4910 case '*':
4911 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
4912 tok->type = TK_OP_REPEAT;
4913 tok->u.repeat.lower = 0;
4914 tok->u.repeat.upper = REPEAT_INFINITE;
4915 goto greedy_check;
4916 break;
4917
4918 case '+':
4919 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
4920 tok->type = TK_OP_REPEAT;
4921 tok->u.repeat.lower = 1;
4922 tok->u.repeat.upper = REPEAT_INFINITE;
4923 goto greedy_check;
4924 break;
4925
4926 case '?':
4927 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
4928 tok->type = TK_OP_REPEAT;
4929 tok->u.repeat.lower = 0;
4930 tok->u.repeat.upper = 1;
4931 greedy_check:
4932 if (!PEND && PPEEK_IS('?') &&
4933 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
4934 PFETCH(c);
4935 tok->u.repeat.greedy = 0;
4936 tok->u.repeat.possessive = 0;
4937 }
4938 else {
4939 possessive_check:
4940 if (!PEND && PPEEK_IS('+') &&
4941 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
4942 tok->type != TK_INTERVAL) ||
4943 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
4944 tok->type == TK_INTERVAL))) {
4945 PFETCH(c);
4946 tok->u.repeat.greedy = 1;
4947 tok->u.repeat.possessive = 1;
4948 }
4949 else {
4950 tok->u.repeat.greedy = 1;
4951 tok->u.repeat.possessive = 0;
4952 }
4953 }
4954 break;
4955
4956 case '{':
4957 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
4958 r = fetch_range_quantifier(&p, end, tok, env);
4959 if (r < 0) return r; /* error */
4960 if (r == 0) goto greedy_check;
4961 else if (r == 2) { /* {n} */
4962 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
4963 goto possessive_check;
4964
4965 goto greedy_check;
4966 }
4967 /* r == 1 : normal char */
4968 break;
4969
4970 case '|':
4971 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
4972 tok->type = TK_ALT;
4973 break;
4974
4975 case '(':
4976 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
4977 tok->type = TK_SUBEXP_OPEN;
4978 break;
4979
4980 case ')':
4981 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
4982 tok->type = TK_SUBEXP_CLOSE;
4983 break;
4984
4985 case 'w':
4986 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
4987 tok->type = TK_CHAR_TYPE;
4988 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4989 tok->u.prop.not = 0;
4990 break;
4991
4992 case 'W':
4993 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
4994 tok->type = TK_CHAR_TYPE;
4995 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4996 tok->u.prop.not = 1;
4997 break;
4998
4999 case 'b':
5000 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5001 tok->type = TK_ANCHOR;
5002 tok->u.anchor = ANCHOR_WORD_BOUNDARY;
5003 break;
5004
5005 case 'B':
5006 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5007 tok->type = TK_ANCHOR;
5008 tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;
5009 break;
5010
5011 case 'y':
5012 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
5013 tok->type = TK_ANCHOR;
5014 tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
5015 break;
5016
5017 case 'Y':
5018 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
5019 tok->type = TK_ANCHOR;
5020 tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
5021 break;
5022
5023 #ifdef USE_WORD_BEGIN_END
5024 case '<':
5025 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5026 tok->type = TK_ANCHOR;
5027 tok->u.anchor = ANCHOR_WORD_BEGIN;
5028 break;
5029
5030 case '>':
5031 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5032 tok->type = TK_ANCHOR;
5033 tok->u.anchor = ANCHOR_WORD_END;
5034 break;
5035 #endif
5036
5037 case 's':
5038 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5039 tok->type = TK_CHAR_TYPE;
5040 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5041 tok->u.prop.not = 0;
5042 break;
5043
5044 case 'S':
5045 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5046 tok->type = TK_CHAR_TYPE;
5047 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5048 tok->u.prop.not = 1;
5049 break;
5050
5051 case 'd':
5052 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5053 tok->type = TK_CHAR_TYPE;
5054 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5055 tok->u.prop.not = 0;
5056 break;
5057
5058 case 'D':
5059 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5060 tok->type = TK_CHAR_TYPE;
5061 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5062 tok->u.prop.not = 1;
5063 break;
5064
5065 case 'h':
5066 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5067 tok->type = TK_CHAR_TYPE;
5068 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5069 tok->u.prop.not = 0;
5070 break;
5071
5072 case 'H':
5073 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5074 tok->type = TK_CHAR_TYPE;
5075 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5076 tok->u.prop.not = 1;
5077 break;
5078
5079 case 'K':
5080 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
5081 tok->type = TK_KEEP;
5082 break;
5083
5084 case 'R':
5085 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
5086 tok->type = TK_GENERAL_NEWLINE;
5087 break;
5088
5089 case 'N':
5090 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5091 tok->type = TK_NO_NEWLINE;
5092 break;
5093
5094 case 'O':
5095 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5096 tok->type = TK_TRUE_ANYCHAR;
5097 break;
5098
5099 case 'X':
5100 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
5101 tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;
5102 break;
5103
5104 case 'A':
5105 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5106 begin_buf:
5107 tok->type = TK_ANCHOR;
5108 tok->u.subtype = ANCHOR_BEGIN_BUF;
5109 break;
5110
5111 case 'Z':
5112 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5113 tok->type = TK_ANCHOR;
5114 tok->u.subtype = ANCHOR_SEMI_END_BUF;
5115 break;
5116
5117 case 'z':
5118 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5119 end_buf:
5120 tok->type = TK_ANCHOR;
5121 tok->u.subtype = ANCHOR_END_BUF;
5122 break;
5123
5124 case 'G':
5125 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
5126 tok->type = TK_ANCHOR;
5127 tok->u.subtype = ANCHOR_BEGIN_POSITION;
5128 break;
5129
5130 case '`':
5131 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5132 goto begin_buf;
5133 break;
5134
5135 case '\'':
5136 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5137 goto end_buf;
5138 break;
5139
5140 case 'o':
5141 if (PEND) break;
5142
5143 prev = p;
5144 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5145 PINC;
5146 num = scan_unsigned_octal_number(&p, end, 11, enc);
5147 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
5148 if (!PEND) {
5149 if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
5150 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5151 }
5152
5153 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
5154 PINC;
5155 tok->type = TK_CODE_POINT;
5156 tok->u.code = (OnigCodePoint )num;
5157 }
5158 else {
5159 /* can't read nothing or invalid format */
5160 p = prev;
5161 }
5162 }
5163 break;
5164
5165 case 'x':
5166 if (PEND) break;
5167
5168 prev = p;
5169 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5170 PINC;
5171 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
5172 if (num < 0) {
5173 if (num == ONIGERR_TOO_BIG_NUMBER)
5174 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
5175 else
5176 return num;
5177 }
5178 if (!PEND) {
5179 if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
5180 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5181 }
5182
5183 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
5184 PINC;
5185 tok->type = TK_CODE_POINT;
5186 tok->u.code = (OnigCodePoint )num;
5187 }
5188 else {
5189 /* can't read nothing or invalid format */
5190 p = prev;
5191 }
5192 }
5193 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5194 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
5195 if (num < 0) return num;
5196 if (p == prev) { /* can't read nothing. */
5197 num = 0; /* but, it's not error */
5198 }
5199 tok->type = TK_RAW_BYTE;
5200 tok->base = 16;
5201 tok->u.c = num;
5202 }
5203 break;
5204
5205 case 'u':
5206 if (PEND) break;
5207
5208 prev = p;
5209 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5210 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
5211 if (num < 0) return num;
5212 if (p == prev) { /* can't read nothing. */
5213 num = 0; /* but, it's not error */
5214 }
5215 tok->type = TK_CODE_POINT;
5216 tok->base = 16;
5217 tok->u.code = (OnigCodePoint )num;
5218 }
5219 break;
5220
5221 case '1': case '2': case '3': case '4':
5222 case '5': case '6': case '7': case '8': case '9':
5223 PUNFETCH;
5224 prev = p;
5225 num = onig_scan_unsigned_number(&p, end, enc);
5226 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
5227 goto skip_backref;
5228 }
5229
5230 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
5231 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
5232 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5233 if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))
5234 return ONIGERR_INVALID_BACKREF;
5235 }
5236
5237 tok->type = TK_BACKREF;
5238 tok->u.backref.num = 1;
5239 tok->u.backref.ref1 = num;
5240 tok->u.backref.by_name = 0;
5241 #ifdef USE_BACKREF_WITH_LEVEL
5242 tok->u.backref.exist_level = 0;
5243 #endif
5244 break;
5245 }
5246
5247 skip_backref:
5248 if (c == '8' || c == '9') {
5249 /* normal char */
5250 p = prev; PINC;
5251 break;
5252 }
5253
5254 p = prev;
5255 /* fall through */
5256 case '0':
5257 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5258 prev = p;
5259 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
5260 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
5261 if (p == prev) { /* can't read nothing. */
5262 num = 0; /* but, it's not error */
5263 }
5264 tok->type = TK_RAW_BYTE;
5265 tok->base = 8;
5266 tok->u.c = num;
5267 }
5268 else if (c != '0') {
5269 PINC;
5270 }
5271 break;
5272
5273 case 'k':
5274 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
5275 PFETCH(c);
5276 if (c == '<' || c == '\'') {
5277 UChar* name_end;
5278 int* backs;
5279 int back_num;
5280 enum REF_NUM num_type;
5281
5282 prev = p;
5283
5284 #ifdef USE_BACKREF_WITH_LEVEL
5285 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
5286 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
5287 env, &back_num, &tok->u.backref.level, &num_type);
5288 if (r == 1) tok->u.backref.exist_level = 1;
5289 else tok->u.backref.exist_level = 0;
5290 #else
5291 r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);
5292 #endif
5293 if (r < 0) return r;
5294
5295 if (num_type != IS_NOT_NUM) {
5296 if (num_type == IS_REL_NUM) {
5297 back_num = backref_rel_to_abs(back_num, env);
5298 }
5299 if (back_num <= 0)
5300 return ONIGERR_INVALID_BACKREF;
5301
5302 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5303 if (back_num > env->num_mem ||
5304 IS_NULL(SCANENV_MEMENV(env)[back_num].node))
5305 return ONIGERR_INVALID_BACKREF;
5306 }
5307 tok->type = TK_BACKREF;
5308 tok->u.backref.by_name = 0;
5309 tok->u.backref.num = 1;
5310 tok->u.backref.ref1 = back_num;
5311 }
5312 else {
5313 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
5314 if (num <= 0) {
5315 onig_scan_env_set_error_string(env,
5316 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
5317 return ONIGERR_UNDEFINED_NAME_REFERENCE;
5318 }
5319 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5320 int i;
5321 for (i = 0; i < num; i++) {
5322 if (backs[i] > env->num_mem ||
5323 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
5324 return ONIGERR_INVALID_BACKREF;
5325 }
5326 }
5327
5328 tok->type = TK_BACKREF;
5329 tok->u.backref.by_name = 1;
5330 if (num == 1) {
5331 tok->u.backref.num = 1;
5332 tok->u.backref.ref1 = backs[0];
5333 }
5334 else {
5335 tok->u.backref.num = num;
5336 tok->u.backref.refs = backs;
5337 }
5338 }
5339 }
5340 else
5341 PUNFETCH;
5342 }
5343 break;
5344
5345 #ifdef USE_CALL
5346 case 'g':
5347 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
5348 PFETCH(c);
5349 if (c == '<' || c == '\'') {
5350 int gnum;
5351 UChar* name_end;
5352 enum REF_NUM num_type;
5353
5354 prev = p;
5355 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
5356 &gnum, &num_type, 1);
5357 if (r < 0) return r;
5358
5359 if (num_type != IS_NOT_NUM) {
5360 if (num_type == IS_REL_NUM) {
5361 gnum = backref_rel_to_abs(gnum, env);
5362 if (gnum < 0) {
5363 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5364 prev, name_end);
5365 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5366 }
5367 }
5368 tok->u.call.by_number = 1;
5369 tok->u.call.gnum = gnum;
5370 }
5371 else {
5372 tok->u.call.by_number = 0;
5373 tok->u.call.gnum = 0;
5374 }
5375
5376 tok->type = TK_CALL;
5377 tok->u.call.name = prev;
5378 tok->u.call.name_end = name_end;
5379 }
5380 else
5381 PUNFETCH;
5382 }
5383 break;
5384 #endif
5385
5386 case 'Q':
5387 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
5388 tok->type = TK_QUOTE_OPEN;
5389 }
5390 break;
5391
5392 case 'p':
5393 case 'P':
5394 if (!PEND && PPEEK_IS('{') &&
5395 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5396 PINC;
5397 tok->type = TK_CHAR_PROPERTY;
5398 tok->u.prop.not = (c == 'P' ? 1 : 0);
5399
5400 if (!PEND &&
5401 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5402 PFETCH(c);
5403 if (c == '^') {
5404 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
5405 }
5406 else
5407 PUNFETCH;
5408 }
5409 }
5410 break;
5411
5412 default:
5413 {
5414 OnigCodePoint c2;
5415
5416 PUNFETCH;
5417 num = fetch_escaped_value(&p, end, env, &c2);
5418 if (num < 0) return num;
5419 /* set_raw: */
5420 if (tok->u.c != c2) {
5421 tok->type = TK_CODE_POINT;
5422 tok->u.code = c2;
5423 }
5424 else { /* string */
5425 p = tok->backp + enclen(enc, tok->backp);
5426 }
5427 }
5428 break;
5429 }
5430 }
5431 else {
5432 tok->u.c = c;
5433 tok->escaped = 0;
5434
5435 #ifdef USE_VARIABLE_META_CHARS
5436 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
5437 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
5438 if (c == MC_ANYCHAR(syn))
5439 goto any_char;
5440 else if (c == MC_ANYTIME(syn))
5441 goto anytime;
5442 else if (c == MC_ZERO_OR_ONE_TIME(syn))
5443 goto zero_or_one_time;
5444 else if (c == MC_ONE_OR_MORE_TIME(syn))
5445 goto one_or_more_time;
5446 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
5447 tok->type = TK_ANYCHAR_ANYTIME;
5448 goto out;
5449 }
5450 }
5451 #endif
5452
5453 switch (c) {
5454 case '.':
5455 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
5456 #ifdef USE_VARIABLE_META_CHARS
5457 any_char:
5458 #endif
5459 tok->type = TK_ANYCHAR;
5460 break;
5461
5462 case '*':
5463 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
5464 #ifdef USE_VARIABLE_META_CHARS
5465 anytime:
5466 #endif
5467 tok->type = TK_OP_REPEAT;
5468 tok->u.repeat.lower = 0;
5469 tok->u.repeat.upper = REPEAT_INFINITE;
5470 goto greedy_check;
5471 break;
5472
5473 case '+':
5474 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
5475 #ifdef USE_VARIABLE_META_CHARS
5476 one_or_more_time:
5477 #endif
5478 tok->type = TK_OP_REPEAT;
5479 tok->u.repeat.lower = 1;
5480 tok->u.repeat.upper = REPEAT_INFINITE;
5481 goto greedy_check;
5482 break;
5483
5484 case '?':
5485 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
5486 #ifdef USE_VARIABLE_META_CHARS
5487 zero_or_one_time:
5488 #endif
5489 tok->type = TK_OP_REPEAT;
5490 tok->u.repeat.lower = 0;
5491 tok->u.repeat.upper = 1;
5492 goto greedy_check;
5493 break;
5494
5495 case '{':
5496 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
5497 r = fetch_range_quantifier(&p, end, tok, env);
5498 if (r < 0) return r; /* error */
5499 if (r == 0) goto greedy_check;
5500 else if (r == 2) { /* {n} */
5501 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5502 goto possessive_check;
5503
5504 goto greedy_check;
5505 }
5506 /* r == 1 : normal char */
5507 break;
5508
5509 case '|':
5510 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
5511 tok->type = TK_ALT;
5512 break;
5513
5514 case '(':
5515 if (!PEND && PPEEK_IS('?') &&
5516 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
5517 PINC;
5518 if (! PEND) {
5519 c = PPEEK;
5520 if (c == '#') {
5521 PFETCH(c);
5522 while (1) {
5523 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
5524 PFETCH(c);
5525 if (c == MC_ESC(syn)) {
5526 if (! PEND) PFETCH(c);
5527 }
5528 else {
5529 if (c == ')') break;
5530 }
5531 }
5532 goto start;
5533 }
5534 else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {
5535 int gnum;
5536 UChar* name;
5537 UChar* name_end;
5538 enum REF_NUM num_type;
5539
5540 switch (c) {
5541 case '&':
5542 {
5543 PINC;
5544 name = p;
5545 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,
5546 &num_type, 0);
5547 if (r < 0) return r;
5548
5549 tok->type = TK_CALL;
5550 tok->u.call.by_number = 0;
5551 tok->u.call.gnum = 0;
5552 tok->u.call.name = name;
5553 tok->u.call.name_end = name_end;
5554 }
5555 break;
5556
5557 case 'R':
5558 tok->type = TK_CALL;
5559 tok->u.call.by_number = 1;
5560 tok->u.call.gnum = 0;
5561 tok->u.call.name = p;
5562 PINC;
5563 if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
5564 tok->u.call.name_end = p;
5565 break;
5566
5567 case '-':
5568 case '+':
5569 goto lparen_qmark_num;
5570 break;
5571 default:
5572 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;
5573
5574 lparen_qmark_num:
5575 {
5576 name = p;
5577 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
5578 &gnum, &num_type, 1);
5579 if (r < 0) return r;
5580
5581 if (num_type == IS_NOT_NUM) {
5582 return ONIGERR_INVALID_GROUP_NAME;
5583 }
5584 else {
5585 if (num_type == IS_REL_NUM) {
5586 gnum = backref_rel_to_abs(gnum, env);
5587 if (gnum < 0) {
5588 onig_scan_env_set_error_string(env,
5589 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
5590 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5591 }
5592 }
5593 tok->u.call.by_number = 1;
5594 tok->u.call.gnum = gnum;
5595 }
5596
5597 tok->type = TK_CALL;
5598 tok->u.call.name = name;
5599 tok->u.call.name_end = name_end;
5600 }
5601 break;
5602 }
5603 }
5604 }
5605 lparen_qmark_end:
5606 PUNFETCH;
5607 }
5608
5609 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
5610 tok->type = TK_SUBEXP_OPEN;
5611 break;
5612
5613 case ')':
5614 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
5615 tok->type = TK_SUBEXP_CLOSE;
5616 break;
5617
5618 case '^':
5619 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
5620 tok->type = TK_ANCHOR;
5621 tok->u.subtype = (IS_SINGLELINE(env->options)
5622 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
5623 break;
5624
5625 case '$':
5626 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
5627 tok->type = TK_ANCHOR;
5628 tok->u.subtype = (IS_SINGLELINE(env->options)
5629 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
5630 break;
5631
5632 case '[':
5633 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
5634 tok->type = TK_CC_OPEN;
5635 break;
5636
5637 case ']':
5638 if (*src > env->pattern) /* /].../ is allowed. */
5639 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
5640 break;
5641
5642 case '#':
5643 if (IS_EXTEND(env->options)) {
5644 while (!PEND) {
5645 PFETCH(c);
5646 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
5647 break;
5648 }
5649 goto start;
5650 break;
5651 }
5652 break;
5653
5654 case ' ': case '\t': case '\n': case '\r': case '\f':
5655 if (IS_EXTEND(env->options))
5656 goto start;
5657 break;
5658
5659 default:
5660 /* string */
5661 break;
5662 }
5663 }
5664
5665 #ifdef USE_VARIABLE_META_CHARS
5666 out:
5667 #endif
5668 *src = p;
5669 return tok->type;
5670 }
5671
5672 static int
5673 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
5674 OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
5675 const OnigCodePoint mbr[])
5676 {
5677 int i, r;
5678 OnigCodePoint j;
5679
5680 int n = ONIGENC_CODE_RANGE_NUM(mbr);
5681
5682 if (not == 0) {
5683 for (i = 0; i < n; i++) {
5684 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
5685 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
5686 if (j >= sb_out) {
5687 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5688 r = add_code_range_to_buf(&(cc->mbuf), j,
5689 ONIGENC_CODE_RANGE_TO(mbr, i));
5690 if (r != 0) return r;
5691 i++;
5692 }
5693
5694 goto sb_end;
5695 }
5696 BITSET_SET_BIT(cc->bs, j);
5697 }
5698 }
5699
5700 sb_end:
5701 for ( ; i < n; i++) {
5702 r = add_code_range_to_buf(&(cc->mbuf),
5703 ONIGENC_CODE_RANGE_FROM(mbr, i),
5704 ONIGENC_CODE_RANGE_TO(mbr, i));
5705 if (r != 0) return r;
5706 }
5707 }
5708 else {
5709 OnigCodePoint prev = 0;
5710
5711 for (i = 0; i < n; i++) {
5712 for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
5713 if (j >= sb_out) {
5714 goto sb_end2;
5715 }
5716 BITSET_SET_BIT(cc->bs, j);
5717 }
5718 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
5719 }
5720 for (j = prev; j < sb_out; j++) {
5721 BITSET_SET_BIT(cc->bs, j);
5722 }
5723
5724 sb_end2:
5725 prev = sb_out;
5726
5727 for (i = 0; i < n; i++) {
5728 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5729 r = add_code_range_to_buf(&(cc->mbuf), prev,
5730 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
5731 if (r != 0) return r;
5732 }
5733 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
5734 if (prev == 0) goto end;
5735 }
5736
5737 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
5738 if (r != 0) return r;
5739 }
5740
5741 end:
5742 return 0;
5743 }
5744
5745 static int
5746 add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
5747 OnigEncoding enc ARG_UNUSED,
5748 OnigCodePoint sb_out,
5749 const OnigCodePoint mbr[], OnigCodePoint limit)
5750 {
5751 int i, r;
5752 OnigCodePoint j;
5753 OnigCodePoint from;
5754 OnigCodePoint to;
5755
5756 int n = ONIGENC_CODE_RANGE_NUM(mbr);
5757
5758 if (not == 0) {
5759 for (i = 0; i < n; i++) {
5760 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
5761 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
5762 if (j > limit) goto end;
5763 if (j >= sb_out) {
5764 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5765 to = ONIGENC_CODE_RANGE_TO(mbr, i);
5766 if (to > limit) to = limit;
5767 r = add_code_range_to_buf(&(cc->mbuf), j, to);
5768 if (r != 0) return r;
5769 i++;
5770 }
5771
5772 goto sb_end;
5773 }
5774 BITSET_SET_BIT(cc->bs, j);
5775 }
5776 }
5777
5778 sb_end:
5779 for ( ; i < n; i++) {
5780 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5781 to = ONIGENC_CODE_RANGE_TO(mbr, i);
5782 if (from > limit) break;
5783 if (to > limit) to = limit;
5784 r = add_code_range_to_buf(&(cc->mbuf), from, to);
5785 if (r != 0) return r;
5786 }
5787 }
5788 else {
5789 OnigCodePoint prev = 0;
5790
5791 for (i = 0; i < n; i++) {
5792 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5793 if (from > limit) {
5794 for (j = prev; j < sb_out; j++) {
5795 BITSET_SET_BIT(cc->bs, j);
5796 }
5797 goto sb_end2;
5798 }
5799 for (j = prev; j < from; j++) {
5800 if (j >= sb_out) goto sb_end2;
5801 BITSET_SET_BIT(cc->bs, j);
5802 }
5803 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
5804 if (prev > limit) prev = limit;
5805 prev++;
5806 if (prev == 0) goto end;
5807 }
5808 for (j = prev; j < sb_out; j++) {
5809 BITSET_SET_BIT(cc->bs, j);
5810 }
5811
5812 sb_end2:
5813 prev = sb_out;
5814
5815 for (i = 0; i < n; i++) {
5816 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5817 if (from > limit) goto last;
5818
5819 if (prev < from) {
5820 r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
5821 if (r != 0) return r;
5822 }
5823 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
5824 if (prev > limit) prev = limit;
5825 prev++;
5826 if (prev == 0) goto end;
5827 }
5828
5829 last:
5830 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
5831 if (r != 0) return r;
5832 }
5833
5834 end:
5835 return 0;
5836 }
5837
5838 static int
5839 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
5840 {
5841 #define ASCII_LIMIT 127
5842
5843 int c, r;
5844 int ascii_mode;
5845 const OnigCodePoint *ranges;
5846 OnigCodePoint limit;
5847 OnigCodePoint sb_out;
5848 OnigEncoding enc = env->enc;
5849
5850 ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);
5851
5852 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
5853 if (r == 0) {
5854 if (ascii_mode == 0)
5855 r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
5856 else
5857 r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
5858 ranges, ASCII_LIMIT);
5859 return r;
5860 }
5861 else if (r != ONIG_NO_SUPPORT_CONFIG) {
5862 return r;
5863 }
5864
5865 r = 0;
5866 limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
5867
5868 switch (ctype) {
5869 case ONIGENC_CTYPE_ALPHA:
5870 case ONIGENC_CTYPE_BLANK:
5871 case ONIGENC_CTYPE_CNTRL:
5872 case ONIGENC_CTYPE_DIGIT:
5873 case ONIGENC_CTYPE_LOWER:
5874 case ONIGENC_CTYPE_PUNCT:
5875 case ONIGENC_CTYPE_SPACE:
5876 case ONIGENC_CTYPE_UPPER:
5877 case ONIGENC_CTYPE_XDIGIT:
5878 case ONIGENC_CTYPE_ASCII:
5879 case ONIGENC_CTYPE_ALNUM:
5880 if (not != 0) {
5881 for (c = 0; c < (int )limit; c++) {
5882 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5883 BITSET_SET_BIT(cc->bs, c);
5884 }
5885 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
5886 BITSET_SET_BIT(cc->bs, c);
5887 }
5888
5889 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
5890 }
5891 else {
5892 for (c = 0; c < (int )limit; c++) {
5893 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5894 BITSET_SET_BIT(cc->bs, c);
5895 }
5896 }
5897 break;
5898
5899 case ONIGENC_CTYPE_GRAPH:
5900 case ONIGENC_CTYPE_PRINT:
5901 case ONIGENC_CTYPE_WORD:
5902 if (not != 0) {
5903 for (c = 0; c < (int )limit; c++) {
5904 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
5905 && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5906 BITSET_SET_BIT(cc->bs, c);
5907 }
5908 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
5909 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
5910 BITSET_SET_BIT(cc->bs, c);
5911 }
5912 }
5913 else {
5914 for (c = 0; c < (int )limit; c++) {
5915 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5916 BITSET_SET_BIT(cc->bs, c);
5917 }
5918 if (ascii_mode == 0)
5919 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
5920 }
5921 break;
5922
5923 default:
5924 return ONIGERR_PARSER_BUG;
5925 break;
5926 }
5927
5928 return r;
5929 }
5930
5931 static int
5932 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
5933 {
5934 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
5935 #define POSIX_BRACKET_NAME_MIN_LEN 4
5936
5937 static PosixBracketEntryType PBS[] = {
5938 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
5939 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
5940 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
5941 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
5942 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
5943 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
5944 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
5945 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
5946 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
5947 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
5948 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
5949 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
5950 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
5951 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
5952 { (UChar* )NULL, -1, 0 }
5953 };
5954
5955 PosixBracketEntryType *pb;
5956 int not, i, r;
5957 OnigCodePoint c;
5958 OnigEncoding enc = env->enc;
5959 UChar *p = *src;
5960
5961 if (PPEEK_IS('^')) {
5962 PINC_S;
5963 not = 1;
5964 }
5965 else
5966 not = 0;
5967
5968 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
5969 goto not_posix_bracket;
5970
5971 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
5972 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
5973 p = (UChar* )onigenc_step(enc, p, end, pb->len);
5974 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
5975 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
5976
5977 r = add_ctype_to_cc(cc, pb->ctype, not, env);
5978 if (r != 0) return r;
5979
5980 PINC_S; PINC_S;
5981 *src = p;
5982 return 0;
5983 }
5984 }
5985
5986 not_posix_bracket:
5987 c = 0;
5988 i = 0;
5989 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
5990 PINC_S;
5991 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
5992 }
5993 if (c == ':' && ! PEND) {
5994 PINC_S;
5995 if (! PEND) {
5996 PFETCH_S(c);
5997 if (c == ']')
5998 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
5999 }
6000 }
6001
6002 return 1; /* 1: is not POSIX bracket, but no error. */
6003 }
6004
6005 static int
6006 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
6007 {
6008 int r;
6009 OnigCodePoint c;
6010 OnigEncoding enc = env->enc;
6011 UChar *prev, *start, *p = *src;
6012
6013 r = 0;
6014 start = prev = p;
6015
6016 while (!PEND) {
6017 prev = p;
6018 PFETCH_S(c);
6019 if (c == '}') {
6020 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
6021 if (r < 0) break;
6022
6023 *src = p;
6024 return r;
6025 }
6026 else if (c == '(' || c == ')' || c == '{' || c == '|') {
6027 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
6028 break;
6029 }
6030 }
6031
6032 onig_scan_env_set_error_string(env, r, *src, prev);
6033 return r;
6034 }
6035
6036 static int
6037 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
6038 {
6039 int r, ctype;
6040 CClassNode* cc;
6041
6042 ctype = fetch_char_property_to_ctype(src, end, env);
6043 if (ctype < 0) return ctype;
6044
6045 *np = node_new_cclass();
6046 CHECK_NULL_RETURN_MEMERR(*np);
6047 cc = CCLASS_(*np);
6048 r = add_ctype_to_cc(cc, ctype, 0, env);
6049 if (r != 0) return r;
6050 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6051
6052 return 0;
6053 }
6054
6055
6056 enum CCSTATE {
6057 CCS_VALUE,
6058 CCS_RANGE,
6059 CCS_COMPLETE,
6060 CCS_START
6061 };
6062
6063 enum CCVALTYPE {
6064 CCV_SB,
6065 CCV_CODE_POINT,
6066 CCV_CLASS
6067 };
6068
6069 static int
6070 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
6071 enum CCSTATE* state, ScanEnv* env)
6072 {
6073 int r;
6074
6075 if (*state == CCS_RANGE)
6076 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
6077
6078 if (*state == CCS_VALUE && *type != CCV_CLASS) {
6079 if (*type == CCV_SB)
6080 BITSET_SET_BIT(cc->bs, (int )(*vs));
6081 else if (*type == CCV_CODE_POINT) {
6082 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
6083 if (r < 0) return r;
6084 }
6085 }
6086
6087 *state = CCS_VALUE;
6088 *type = CCV_CLASS;
6089 return 0;
6090 }
6091
6092 static int
6093 next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
6094 int* from_israw, int to_israw,
6095 enum CCVALTYPE intype, enum CCVALTYPE* type,
6096 enum CCSTATE* state, ScanEnv* env)
6097 {
6098 int r;
6099
6100 switch (*state) {
6101 case CCS_VALUE:
6102 if (*type == CCV_SB) {
6103 if (*from > 0xff)
6104 return ONIGERR_INVALID_CODE_POINT_VALUE;
6105
6106 BITSET_SET_BIT(cc->bs, (int )(*from));
6107 }
6108 else if (*type == CCV_CODE_POINT) {
6109 r = add_code_range(&(cc->mbuf), env, *from, *from);
6110 if (r < 0) return r;
6111 }
6112 break;
6113
6114 case CCS_RANGE:
6115 if (intype == *type) {
6116 if (intype == CCV_SB) {
6117 if (*from > 0xff || to > 0xff)
6118 return ONIGERR_INVALID_CODE_POINT_VALUE;
6119
6120 if (*from > to) {
6121 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6122 goto ccs_range_end;
6123 else
6124 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6125 }
6126 bitset_set_range(cc->bs, (int )*from, (int )to);
6127 }
6128 else {
6129 r = add_code_range(&(cc->mbuf), env, *from, to);
6130 if (r < 0) return r;
6131 }
6132 }
6133 else {
6134 if (*from > to) {
6135 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6136 goto ccs_range_end;
6137 else
6138 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6139 }
6140 bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
6141 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
6142 if (r < 0) return r;
6143 }
6144 ccs_range_end:
6145 *state = CCS_COMPLETE;
6146 break;
6147
6148 case CCS_COMPLETE:
6149 case CCS_START:
6150 *state = CCS_VALUE;
6151 break;
6152
6153 default:
6154 break;
6155 }
6156
6157 *from_israw = to_israw;
6158 *from = to;
6159 *type = intype;
6160 return 0;
6161 }
6162
6163 static int
6164 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
6165 ScanEnv* env)
6166 {
6167 int in_esc;
6168 OnigCodePoint code;
6169 OnigEncoding enc = env->enc;
6170 UChar* p = from;
6171
6172 in_esc = 0;
6173 while (! PEND) {
6174 if (ignore_escaped && in_esc) {
6175 in_esc = 0;
6176 }
6177 else {
6178 PFETCH_S(code);
6179 if (code == c) return 1;
6180 if (code == MC_ESC(env->syntax)) in_esc = 1;
6181 }
6182 }
6183 return 0;
6184 }
6185
6186 static int
6187 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
6188 {
6189 int r, neg, len, fetched, and_start;
6190 OnigCodePoint v, vs;
6191 UChar *p;
6192 Node* node;
6193 CClassNode *cc, *prev_cc;
6194 CClassNode work_cc;
6195
6196 enum CCSTATE state;
6197 enum CCVALTYPE val_type, in_type;
6198 int val_israw, in_israw;
6199
6200 *np = NULL_NODE;
6201 env->parse_depth++;
6202 if (env->parse_depth > ParseDepthLimit)
6203 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
6204 prev_cc = (CClassNode* )NULL;
6205 r = fetch_token_in_cc(tok, src, end, env);
6206 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
6207 neg = 1;
6208 r = fetch_token_in_cc(tok, src, end, env);
6209 }
6210 else {
6211 neg = 0;
6212 }
6213
6214 if (r < 0) return r;
6215 if (r == TK_CC_CLOSE) {
6216 if (! code_exist_check((OnigCodePoint )']',
6217 *src, env->pattern_end, 1, env))
6218 return ONIGERR_EMPTY_CHAR_CLASS;
6219
6220 CC_ESC_WARN(env, (UChar* )"]");
6221 r = tok->type = TK_CHAR; /* allow []...] */
6222 }
6223
6224 *np = node = node_new_cclass();
6225 CHECK_NULL_RETURN_MEMERR(node);
6226 cc = CCLASS_(node);
6227
6228 and_start = 0;
6229 state = CCS_START;
6230 p = *src;
6231 while (r != TK_CC_CLOSE) {
6232 fetched = 0;
6233 switch (r) {
6234 case TK_CHAR:
6235 any_char_in:
6236 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
6237 if (len > 1) {
6238 in_type = CCV_CODE_POINT;
6239 }
6240 else if (len < 0) {
6241 r = len;
6242 goto err;
6243 }
6244 else {
6245 /* sb_char: */
6246 in_type = CCV_SB;
6247 }
6248 v = (OnigCodePoint )tok->u.c;
6249 in_israw = 0;
6250 goto val_entry2;
6251 break;
6252
6253 case TK_RAW_BYTE:
6254 /* tok->base != 0 : octal or hexadec. */
6255 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
6256 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
6257 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
6258 UChar* psave = p;
6259 int i, base = tok->base;
6260
6261 buf[0] = tok->u.c;
6262 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
6263 r = fetch_token_in_cc(tok, &p, end, env);
6264 if (r < 0) goto err;
6265 if (r != TK_RAW_BYTE || tok->base != base) {
6266 fetched = 1;
6267 break;
6268 }
6269 buf[i] = tok->u.c;
6270 }
6271
6272 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
6273 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6274 goto err;
6275 }
6276
6277 len = enclen(env->enc, buf);
6278 if (i < len) {
6279 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6280 goto err;
6281 }
6282 else if (i > len) { /* fetch back */
6283 p = psave;
6284 for (i = 1; i < len; i++) {
6285 r = fetch_token_in_cc(tok, &p, end, env);
6286 }
6287 fetched = 0;
6288 }
6289
6290 if (i == 1) {
6291 v = (OnigCodePoint )buf[0];
6292 goto raw_single;
6293 }
6294 else {
6295 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
6296 in_type = CCV_CODE_POINT;
6297 }
6298 }
6299 else {
6300 v = (OnigCodePoint )tok->u.c;
6301 raw_single:
6302 in_type = CCV_SB;
6303 }
6304 in_israw = 1;
6305 goto val_entry2;
6306 break;
6307
6308 case TK_CODE_POINT:
6309 v = tok->u.code;
6310 in_israw = 1;
6311 val_entry:
6312 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
6313 if (len < 0) {
6314 r = len;
6315 goto err;
6316 }
6317 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
6318 val_entry2:
6319 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
6320 &state, env);
6321 if (r != 0) goto err;
6322 break;
6323
6324 case TK_POSIX_BRACKET_OPEN:
6325 r = parse_posix_bracket(cc, &p, end, env);
6326 if (r < 0) goto err;
6327 if (r == 1) { /* is not POSIX bracket */
6328 CC_ESC_WARN(env, (UChar* )"[");
6329 p = tok->backp;
6330 v = (OnigCodePoint )tok->u.c;
6331 in_israw = 0;
6332 goto val_entry;
6333 }
6334 goto next_class;
6335 break;
6336
6337 case TK_CHAR_TYPE:
6338 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
6339 if (r != 0) goto err;
6340
6341 next_class:
6342 r = next_state_class(cc, &vs, &val_type, &state, env);
6343 if (r != 0) goto err;
6344 break;
6345
6346 case TK_CHAR_PROPERTY:
6347 {
6348 int ctype = fetch_char_property_to_ctype(&p, end, env);
6349 if (ctype < 0) {
6350 r = ctype;
6351 goto err;
6352 }
6353 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
6354 if (r != 0) goto err;
6355 goto next_class;
6356 }
6357 break;
6358
6359 case TK_CC_RANGE:
6360 if (state == CCS_VALUE) {
6361 r = fetch_token_in_cc(tok, &p, end, env);
6362 if (r < 0) goto err;
6363 fetched = 1;
6364 if (r == TK_CC_CLOSE) { /* allow [x-] */
6365 range_end_val:
6366 v = (OnigCodePoint )'-';
6367 in_israw = 0;
6368 goto val_entry;
6369 }
6370 else if (r == TK_CC_AND) {
6371 CC_ESC_WARN(env, (UChar* )"-");
6372 goto range_end_val;
6373 }
6374
6375 if (val_type == CCV_CLASS) {
6376 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6377 goto err;
6378 }
6379
6380 state = CCS_RANGE;
6381 }
6382 else if (state == CCS_START) {
6383 /* [-xa] is allowed */
6384 v = (OnigCodePoint )tok->u.c;
6385 in_israw = 0;
6386
6387 r = fetch_token_in_cc(tok, &p, end, env);
6388 if (r < 0) goto err;
6389 fetched = 1;
6390 /* [--x] or [a&&-x] is warned. */
6391 if (r == TK_CC_RANGE || and_start != 0)
6392 CC_ESC_WARN(env, (UChar* )"-");
6393
6394 goto val_entry;
6395 }
6396 else if (state == CCS_RANGE) {
6397 CC_ESC_WARN(env, (UChar* )"-");
6398 goto any_char_in; /* [!--x] is allowed */
6399 }
6400 else { /* CCS_COMPLETE */
6401 r = fetch_token_in_cc(tok, &p, end, env);
6402 if (r < 0) goto err;
6403 fetched = 1;
6404 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
6405 else if (r == TK_CC_AND) {
6406 CC_ESC_WARN(env, (UChar* )"-");
6407 goto range_end_val;
6408 }
6409
6410 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
6411 CC_ESC_WARN(env, (UChar* )"-");
6412 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
6413 }
6414 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6415 goto err;
6416 }
6417 break;
6418
6419 case TK_CC_CC_OPEN: /* [ */
6420 {
6421 Node *anode;
6422 CClassNode* acc;
6423
6424 r = parse_char_class(&anode, tok, &p, end, env);
6425 if (r != 0) {
6426 onig_node_free(anode);
6427 goto cc_open_err;
6428 }
6429 acc = CCLASS_(anode);
6430 r = or_cclass(cc, acc, env->enc);
6431 onig_node_free(anode);
6432
6433 cc_open_err:
6434 if (r != 0) goto err;
6435 }
6436 break;
6437
6438 case TK_CC_AND: /* && */
6439 {
6440 if (state == CCS_VALUE) {
6441 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
6442 &val_type, &state, env);
6443 if (r != 0) goto err;
6444 }
6445 /* initialize local variables */
6446 and_start = 1;
6447 state = CCS_START;
6448
6449 if (IS_NOT_NULL(prev_cc)) {
6450 r = and_cclass(prev_cc, cc, env->enc);
6451 if (r != 0) goto err;
6452 bbuf_free(cc->mbuf);
6453 }
6454 else {
6455 prev_cc = cc;
6456 cc = &work_cc;
6457 }
6458 initialize_cclass(cc);
6459 }
6460 break;
6461
6462 case TK_EOT:
6463 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
6464 goto err;
6465 break;
6466 default:
6467 r = ONIGERR_PARSER_BUG;
6468 goto err;
6469 break;
6470 }
6471
6472 if (fetched)
6473 r = tok->type;
6474 else {
6475 r = fetch_token_in_cc(tok, &p, end, env);
6476 if (r < 0) goto err;
6477 }
6478 }
6479
6480 if (state == CCS_VALUE) {
6481 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
6482 &val_type, &state, env);
6483 if (r != 0) goto err;
6484 }
6485
6486 if (IS_NOT_NULL(prev_cc)) {
6487 r = and_cclass(prev_cc, cc, env->enc);
6488 if (r != 0) goto err;
6489 bbuf_free(cc->mbuf);
6490 cc = prev_cc;
6491 }
6492
6493 if (neg != 0)
6494 NCCLASS_SET_NOT(cc);
6495 else
6496 NCCLASS_CLEAR_NOT(cc);
6497 if (IS_NCCLASS_NOT(cc) &&
6498 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
6499 int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
6500 if (is_empty != 0)
6501 BITSET_IS_EMPTY(cc->bs, is_empty);
6502
6503 if (is_empty == 0) {
6504 #define NEWLINE_CODE 0x0a
6505
6506 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
6507 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
6508 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
6509 else
6510 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
6511 }
6512 }
6513 }
6514 *src = p;
6515 env->parse_depth--;
6516 return 0;
6517
6518 err:
6519 if (cc != CCLASS_(*np))
6520 bbuf_free(cc->mbuf);
6521 return r;
6522 }
6523
6524 static int parse_subexp(Node** top, OnigToken* tok, int term,
6525 UChar** src, UChar* end, ScanEnv* env);
6526
6527 #ifdef USE_CALLOUT
6528
6529 /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
6530 static int
6531 parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
6532 {
6533 int r;
6534 int i;
6535 int in;
6536 int num;
6537 OnigCodePoint c;
6538 UChar* code_start;
6539 UChar* code_end;
6540 UChar* contents;
6541 UChar* tag_start;
6542 UChar* tag_end;
6543 int brace_nest;
6544 CalloutListEntry* e;
6545 RegexExt* ext;
6546 OnigEncoding enc = env->enc;
6547 UChar* p = *src;
6548
6549 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6550
6551 brace_nest = 0;
6552 while (PPEEK_IS('{')) {
6553 brace_nest++;
6554 PINC_S;
6555 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6556 }
6557
6558 in = ONIG_CALLOUT_IN_PROGRESS;
6559 code_start = p;
6560 while (1) {
6561 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6562
6563 code_end = p;
6564 PFETCH_S(c);
6565 if (c == '}') {
6566 i = brace_nest;
6567 while (i > 0) {
6568 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6569 PFETCH_S(c);
6570 if (c == '}') i--;
6571 else break;
6572 }
6573 if (i == 0) break;
6574 }
6575 }
6576
6577 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6578
6579 PFETCH_S(c);
6580 if (c == '[') {
6581 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6582 tag_start = p;
6583 while (! PEND) {
6584 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6585 tag_end = p;
6586 PFETCH_S(c);
6587 if (c == ']') break;
6588 }
6589 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
6590 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6591
6592 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6593 PFETCH_S(c);
6594 }
6595 else {
6596 tag_start = tag_end = 0;
6597 }
6598
6599 if (c == 'X') {
6600 in |= ONIG_CALLOUT_IN_RETRACTION;
6601 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6602 PFETCH_S(c);
6603 }
6604 else if (c == '<') {
6605 in = ONIG_CALLOUT_IN_RETRACTION;
6606 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6607 PFETCH_S(c);
6608 }
6609 else if (c == '>') { /* no needs (default) */
6610 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6611 PFETCH_S(c);
6612 }
6613
6614 if (c != cterm)
6615 return ONIGERR_INVALID_CALLOUT_PATTERN;
6616
6617 r = reg_callout_list_entry(env, &num);
6618 if (r != 0) return r;
6619
6620 ext = onig_get_regex_ext(env->reg);
6621 CHECK_NULL_RETURN_MEMERR(ext);
6622 if (IS_NULL(ext->pattern)) {
6623 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
6624 if (r != ONIG_NORMAL) return r;
6625 }
6626
6627 if (tag_start != tag_end) {
6628 r = callout_tag_entry(env->reg, tag_start, tag_end, num);
6629 if (r != ONIG_NORMAL) return r;
6630 }
6631
6632 contents = onigenc_strdup(enc, code_start, code_end);
6633 CHECK_NULL_RETURN_MEMERR(contents);
6634
6635 r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
6636 if (r != 0) {
6637 xfree(contents);
6638 return r;
6639 }
6640
6641 e = onig_reg_callout_list_at(env->reg, num);
6642 if (IS_NULL(e)) {
6643 xfree(contents);
6644 return ONIGERR_MEMORY;
6645 }
6646
6647 e->of = ONIG_CALLOUT_OF_CONTENTS;
6648 e->in = in;
6649 e->name_id = ONIG_NON_NAME_ID;
6650 e->u.content.start = contents;
6651 e->u.content.end = contents + (code_end - code_start);
6652
6653 *src = p;
6654 return 0;
6655 }
6656
6657 static long
6658 parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
6659 {
6660 long v;
6661 long d;
6662 int flag;
6663 UChar* p;
6664 OnigCodePoint c;
6665
6666 if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
6667
6668 flag = 1;
6669 v = 0;
6670 p = s;
6671 while (p < end) {
6672 c = ONIGENC_MBC_TO_CODE(enc, p, end);
6673 p += ONIGENC_MBC_ENC_LEN(enc, p);
6674 if (c >= '0' && c <= '9') {
6675 d = (long )(c - '0');
6676 if (v > (max - d) / 10)
6677 return ONIGERR_INVALID_CALLOUT_ARG;
6678
6679 v = v * 10 + d;
6680 }
6681 else if (sign_on != 0 && (c == '-' || c == '+')) {
6682 if (c == '-') flag = -1;
6683 }
6684 else
6685 return ONIGERR_INVALID_CALLOUT_ARG;
6686
6687 sign_on = 0;
6688 }
6689
6690 *rl = flag * v;
6691 return ONIG_NORMAL;
6692 }
6693
6694 static int
6695 parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
6696 unsigned int types[], OnigValue vals[], ScanEnv* env)
6697 {
6698 #define MAX_CALLOUT_ARG_BYTE_LENGTH 128
6699
6700 int r;
6701 int n;
6702 int esc;
6703 int cn;
6704 UChar* s;
6705 UChar* e;
6706 UChar* eesc;
6707 OnigCodePoint c;
6708 UChar* bufend;
6709 UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
6710 OnigEncoding enc = env->enc;
6711 UChar* p = *src;
6712
6713 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6714
6715 n = 0;
6716 while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
6717 c = 0;
6718 cn = 0;
6719 esc = 0;
6720 eesc = 0;
6721 bufend = buf;
6722 s = e = p;
6723 while (1) {
6724 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6725
6726 e = p;
6727 PFETCH_S(c);
6728 if (esc != 0) {
6729 esc = 0;
6730 if (c == '\\' || c == cterm || c == ',') {
6731 /* */
6732 }
6733 else {
6734 e = eesc;
6735 cn++;
6736 }
6737 goto add_char;
6738 }
6739 else {
6740 if (c == '\\') {
6741 esc = 1;
6742 eesc = e;
6743 }
6744 else if (c == cterm || c == ',')
6745 break;
6746 else {
6747 size_t clen;
6748
6749 add_char:
6750 if (skip_mode == 0) {
6751 clen = p - e;
6752 if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
6753 return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
6754
6755 xmemcpy(bufend, e, clen);
6756 bufend += clen;
6757 }
6758 cn++;
6759 }
6760 }
6761 }
6762
6763 if (cn != 0) {
6764 if (skip_mode == 0) {
6765 if ((types[n] & ONIG_TYPE_LONG) != 0) {
6766 int fixed = 0;
6767 if (cn > 0) {
6768 long rl;
6769 r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);
6770 if (r == ONIG_NORMAL) {
6771 vals[n].l = rl;
6772 fixed = 1;
6773 types[n] = ONIG_TYPE_LONG;
6774 }
6775 }
6776
6777 if (fixed == 0) {
6778 types[n] = (types[n] & ~ONIG_TYPE_LONG);
6779 if (types[n] == ONIG_TYPE_VOID)
6780 return ONIGERR_INVALID_CALLOUT_ARG;
6781 }
6782 }
6783
6784 switch (types[n]) {
6785 case ONIG_TYPE_LONG:
6786 break;
6787
6788 case ONIG_TYPE_CHAR:
6789 if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;
6790 vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
6791 break;
6792
6793 case ONIG_TYPE_STRING:
6794 {
6795 UChar* rs = onigenc_strdup(enc, buf, bufend);
6796 CHECK_NULL_RETURN_MEMERR(rs);
6797 vals[n].s.start = rs;
6798 vals[n].s.end = rs + (e - s);
6799 }
6800 break;
6801
6802 case ONIG_TYPE_TAG:
6803 if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))
6804 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6805
6806 vals[n].s.start = s;
6807 vals[n].s.end = e;
6808 break;
6809
6810 case ONIG_TYPE_VOID:
6811 case ONIG_TYPE_POINTER:
6812 return ONIGERR_PARSER_BUG;
6813 break;
6814 }
6815 }
6816
6817 n++;
6818 }
6819
6820 if (c == cterm) break;
6821 }
6822
6823 if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;
6824
6825 *src = p;
6826 return n;
6827 }
6828
6829 /* (*name[TAG]) (*name[TAG]{a,b,..}) */
6830 static int
6831 parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
6832 {
6833 int r;
6834 int i;
6835 int in;
6836 int num;
6837 int name_id;
6838 int arg_num;
6839 int max_arg_num;
6840 int opt_arg_num;
6841 int is_not_single;
6842 OnigCodePoint c;
6843 UChar* name_start;
6844 UChar* name_end;
6845 UChar* tag_start;
6846 UChar* tag_end;
6847 Node* node;
6848 CalloutListEntry* e;
6849 RegexExt* ext;
6850 unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
6851 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];
6852 OnigEncoding enc = env->enc;
6853 UChar* p = *src;
6854
6855 /* PFETCH_READY; */
6856 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6857
6858 node = 0;
6859 name_start = p;
6860 while (1) {
6861 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6862 name_end = p;
6863 PFETCH_S(c);
6864 if (c == cterm || c == '[' || c == '{') break;
6865 }
6866
6867 if (! is_allowed_callout_name(enc, name_start, name_end))
6868 return ONIGERR_INVALID_CALLOUT_NAME;
6869
6870 if (c == '[') {
6871 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6872 tag_start = p;
6873 while (! PEND) {
6874 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6875 tag_end = p;
6876 PFETCH_S(c);
6877 if (c == ']') break;
6878 }
6879 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
6880 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6881
6882 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6883 PFETCH_S(c);
6884 }
6885 else {
6886 tag_start = tag_end = 0;
6887 }
6888
6889 if (c == '{') {
6890 UChar* save;
6891
6892 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6893
6894 /* read for single check only */
6895 save = p;
6896 arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);
6897 if (arg_num < 0) return arg_num;
6898
6899 is_not_single = PPEEK_IS(cterm) ? 0 : 1;
6900 p = save;
6901 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
6902 &name_id);
6903 if (r != ONIG_NORMAL) return r;
6904
6905 max_arg_num = get_callout_arg_num_by_name_id(name_id);
6906 for (i = 0; i < max_arg_num; i++) {
6907 types[i] = get_callout_arg_type_by_name_id(name_id, i);
6908 }
6909
6910 arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);
6911 if (arg_num < 0) return arg_num;
6912
6913 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6914 PFETCH_S(c);
6915 }
6916 else {
6917 arg_num = 0;
6918
6919 is_not_single = 0;
6920 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
6921 &name_id);
6922 if (r != ONIG_NORMAL) return r;
6923
6924 max_arg_num = get_callout_arg_num_by_name_id(name_id);
6925 for (i = 0; i < max_arg_num; i++) {
6926 types[i] = get_callout_arg_type_by_name_id(name_id, i);
6927 }
6928 }
6929
6930 in = onig_get_callout_in_by_name_id(name_id);
6931 opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
6932 if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))
6933 return ONIGERR_INVALID_CALLOUT_ARG;
6934
6935 if (c != cterm)
6936 return ONIGERR_INVALID_CALLOUT_PATTERN;
6937
6938 r = reg_callout_list_entry(env, &num);
6939 if (r != 0) return r;
6940
6941 ext = onig_get_regex_ext(env->reg);
6942 CHECK_NULL_RETURN_MEMERR(ext);
6943 if (IS_NULL(ext->pattern)) {
6944 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
6945 if (r != ONIG_NORMAL) return r;
6946 }
6947
6948 if (tag_start != tag_end) {
6949 r = callout_tag_entry(env->reg, tag_start, tag_end, num);
6950 if (r != ONIG_NORMAL) return r;
6951 }
6952
6953 r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
6954 if (r != ONIG_NORMAL) return r;
6955
6956 e = onig_reg_callout_list_at(env->reg, num);
6957 CHECK_NULL_RETURN_MEMERR(e);
6958
6959 e->of = ONIG_CALLOUT_OF_NAME;
6960 e->in = in;
6961 e->name_id = name_id;
6962 e->type = onig_get_callout_type_by_name_id(name_id);
6963 e->start_func = onig_get_callout_start_func_by_name_id(name_id);
6964 e->end_func = onig_get_callout_end_func_by_name_id(name_id);
6965 e->u.arg.num = max_arg_num;
6966 e->u.arg.passed_num = arg_num;
6967 for (i = 0; i < max_arg_num; i++) {
6968 e->u.arg.types[i] = types[i];
6969 if (i < arg_num)
6970 e->u.arg.vals[i] = vals[i];
6971 else
6972 e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
6973 }
6974
6975 *np = node;
6976 *src = p;
6977 return 0;
6978 }
6979 #endif
6980
6981 static int
6982 parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
6983 ScanEnv* env)
6984 {
6985 int r, num;
6986 Node *target;
6987 OnigOptionType option;
6988 OnigCodePoint c;
6989 int list_capture;
6990 OnigEncoding enc = env->enc;
6991
6992 UChar* p = *src;
6993 PFETCH_READY;
6994
6995 *np = NULL;
6996 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
6997
6998 option = env->options;
6999 c = PPEEK;
7000 if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
7001 PINC;
7002 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7003
7004 PFETCH(c);
7005 switch (c) {
7006 case ':': /* (?:...) grouping only */
7007 group:
7008 r = fetch_token(tok, &p, end, env);
7009 if (r < 0) return r;
7010 r = parse_subexp(np, tok, term, &p, end, env);
7011 if (r < 0) return r;
7012 *src = p;
7013 return 1; /* group */
7014 break;
7015
7016 case '=':
7017 *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);
7018 break;
7019 case '!': /* preceding read */
7020 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);
7021 break;
7022 case '>': /* (?>...) stop backtrack */
7023 *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
7024 break;
7025
7026 case '\'':
7027 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7028 goto named_group1;
7029 }
7030 else
7031 return ONIGERR_UNDEFINED_GROUP_OPTION;
7032 break;
7033
7034 case '<': /* look behind (?<=...), (?<!...) */
7035 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7036 PFETCH(c);
7037 if (c == '=')
7038 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);
7039 else if (c == '!')
7040 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);
7041 else {
7042 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7043 UChar *name;
7044 UChar *name_end;
7045 enum REF_NUM num_type;
7046
7047 PUNFETCH;
7048 c = '<';
7049
7050 named_group1:
7051 list_capture = 0;
7052
7053 named_group2:
7054 name = p;
7055 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
7056 &num_type, 0);
7057 if (r < 0) return r;
7058
7059 num = scan_env_add_mem_entry(env);
7060 if (num < 0) return num;
7061 if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
7062 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7063
7064 r = name_add(env->reg, name, name_end, num, env);
7065 if (r != 0) return r;
7066 *np = node_new_memory(1);
7067 CHECK_NULL_RETURN_MEMERR(*np);
7068 ENCLOSURE_(*np)->m.regnum = num;
7069 if (list_capture != 0)
7070 MEM_STATUS_ON_SIMPLE(env->capture_history, num);
7071 env->num_named++;
7072 }
7073 else {
7074 return ONIGERR_UNDEFINED_GROUP_OPTION;
7075 }
7076 }
7077 break;
7078
7079 case '~':
7080 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
7081 Node* absent;
7082 Node* expr;
7083 int head_bar;
7084 int is_range_cutter;
7085
7086 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7087
7088 if (PPEEK_IS('|')) { /* (?~|generator|absent) */
7089 PINC;
7090 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7091
7092 head_bar = 1;
7093 if (PPEEK_IS(')')) { /* (?~|) : range clear */
7094 PINC;
7095 r = make_range_clear(np, env);
7096 if (r != 0) return r;
7097 goto end;
7098 }
7099 }
7100 else
7101 head_bar = 0;
7102
7103 r = fetch_token(tok, &p, end, env);
7104 if (r < 0) return r;
7105 r = parse_subexp(&absent, tok, term, &p, end, env);
7106 if (r < 0) {
7107 onig_node_free(absent);
7108 return r;
7109 }
7110
7111 expr = NULL_NODE;
7112 is_range_cutter = 0;
7113 if (head_bar != 0) {
7114 Node* top = absent;
7115 if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {
7116 expr = NULL_NODE;
7117 is_range_cutter = 1;
7118 /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */
7119 }
7120 else {
7121 absent = NODE_CAR(top);
7122 expr = NODE_CDR(top);
7123 NODE_CAR(top) = NULL_NODE;
7124 NODE_CDR(top) = NULL_NODE;
7125 onig_node_free(top);
7126 if (IS_NULL(NODE_CDR(expr))) {
7127 top = expr;
7128 expr = NODE_CAR(top);
7129 NODE_CAR(top) = NULL_NODE;
7130 onig_node_free(top);
7131 }
7132 }
7133 }
7134
7135 r = make_absent_tree(np, absent, expr, is_range_cutter, env);
7136 if (r != 0) {
7137 return r;
7138 }
7139 goto end;
7140 }
7141 else {
7142 return ONIGERR_UNDEFINED_GROUP_OPTION;
7143 }
7144 break;
7145
7146 #ifdef USE_CALLOUT
7147 case '{':
7148 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
7149 return ONIGERR_UNDEFINED_GROUP_OPTION;
7150
7151 r = parse_callout_of_contents(np, ')', &p, end, env);
7152 if (r != 0) return r;
7153
7154 goto end;
7155 break;
7156 #endif
7157
7158 case '(':
7159 /* (?()...) */
7160 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
7161 UChar *prev;
7162 Node* condition;
7163 int condition_is_checker;
7164
7165 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7166 PFETCH(c);
7167 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7168
7169 if (IS_CODE_DIGIT_ASCII(enc, c)
7170 || c == '-' || c == '+' || c == '<' || c == '\'') {
7171 UChar* name_end;
7172 int back_num;
7173 int exist_level;
7174 int level;
7175 enum REF_NUM num_type;
7176 int is_enclosed;
7177
7178 is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
7179 if (! is_enclosed)
7180 PUNFETCH;
7181 prev = p;
7182 exist_level = 0;
7183 #ifdef USE_BACKREF_WITH_LEVEL
7184 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
7185 r = fetch_name_with_level(
7186 (OnigCodePoint )(is_enclosed != 0 ? c : '('),
7187 &p, end, &name_end,
7188 env, &back_num, &level, &num_type);
7189 if (r == 1) exist_level = 1;
7190 #else
7191 r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
7192 &p, end, &name_end, env, &back_num, &num_type, 1);
7193 #endif
7194 if (r < 0) {
7195 if (is_enclosed == 0) {
7196 goto any_condition;
7197 }
7198 else
7199 return r;
7200 }
7201
7202 condition_is_checker = 1;
7203 if (num_type != IS_NOT_NUM) {
7204 if (num_type == IS_REL_NUM) {
7205 back_num = backref_rel_to_abs(back_num, env);
7206 }
7207 if (back_num <= 0)
7208 return ONIGERR_INVALID_BACKREF;
7209
7210 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7211 if (back_num > env->num_mem ||
7212 IS_NULL(SCANENV_MEMENV(env)[back_num].node))
7213 return ONIGERR_INVALID_BACKREF;
7214 }
7215
7216 condition = node_new_backref_checker(1, &back_num, 0,
7217 #ifdef USE_BACKREF_WITH_LEVEL
7218 exist_level, level,
7219 #endif
7220 env);
7221 }
7222 else {
7223 int num;
7224 int* backs;
7225
7226 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
7227 if (num <= 0) {
7228 onig_scan_env_set_error_string(env,
7229 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
7230 return ONIGERR_UNDEFINED_NAME_REFERENCE;
7231 }
7232 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7233 int i;
7234 for (i = 0; i < num; i++) {
7235 if (backs[i] > env->num_mem ||
7236 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
7237 return ONIGERR_INVALID_BACKREF;
7238 }
7239 }
7240
7241 condition = node_new_backref_checker(num, backs, 1,
7242 #ifdef USE_BACKREF_WITH_LEVEL
7243 exist_level, level,
7244 #endif
7245 env);
7246 }
7247
7248 if (is_enclosed != 0) {
7249 if (PEND) goto err_if_else;
7250 PFETCH(c);
7251 if (c != ')') goto err_if_else;
7252 }
7253 }
7254 #ifdef USE_CALLOUT
7255 else if (c == '?') {
7256 if (IS_SYNTAX_OP2(env->syntax,
7257 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
7258 if (! PEND && PPEEK_IS('{')) {
7259 /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
7260 condition_is_checker = 0;
7261 PFETCH(c);
7262 r = parse_callout_of_contents(&condition, ')', &p, end, env);
7263 if (r != 0) return r;
7264 goto end_condition;
7265 }
7266 }
7267 goto any_condition;
7268 }
7269 else if (c == '*' &&
7270 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7271 condition_is_checker = 0;
7272 r = parse_callout_of_name(&condition, ')', &p, end, env);
7273 if (r != 0) return r;
7274 goto end_condition;
7275 }
7276 #endif
7277 else {
7278 any_condition:
7279 PUNFETCH;
7280 condition_is_checker = 0;
7281 r = fetch_token(tok, &p, end, env);
7282 if (r < 0) return r;
7283 r = parse_subexp(&condition, tok, term, &p, end, env);
7284 if (r < 0) {
7285 onig_node_free(condition);
7286 return r;
7287 }
7288 }
7289
7290 end_condition:
7291 CHECK_NULL_RETURN_MEMERR(condition);
7292
7293 if (PEND) {
7294 err_if_else:
7295 onig_node_free(condition);
7296 return ONIGERR_END_PATTERN_IN_GROUP;
7297 }
7298
7299 if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
7300 if (condition_is_checker == 0) {
7301 onig_node_free(condition);
7302 return ONIGERR_INVALID_IF_ELSE_SYNTAX;
7303 }
7304 PFETCH(c);
7305 *np = condition;
7306 }
7307 else { /* if-else */
7308 int then_is_empty;
7309 Node *Then, *Else;
7310
7311 if (PPEEK_IS('|')) {
7312 PFETCH(c);
7313 Then = 0;
7314 then_is_empty = 1;
7315 }
7316 else
7317 then_is_empty = 0;
7318
7319 r = fetch_token(tok, &p, end, env);
7320 if (r < 0) {
7321 onig_node_free(condition);
7322 return r;
7323 }
7324 r = parse_subexp(&target, tok, term, &p, end, env);
7325 if (r < 0) {
7326 onig_node_free(condition);
7327 onig_node_free(target);
7328 return r;
7329 }
7330
7331 if (then_is_empty != 0) {
7332 Else = target;
7333 }
7334 else {
7335 if (NODE_TYPE(target) == NODE_ALT) {
7336 Then = NODE_CAR(target);
7337 if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {
7338 Else = NODE_CAR(NODE_CDR(target));
7339 cons_node_free_alone(NODE_CDR(target));
7340 }
7341 else {
7342 Else = NODE_CDR(target);
7343 }
7344 cons_node_free_alone(target);
7345 }
7346 else {
7347 Then = target;
7348 Else = 0;
7349 }
7350 }
7351
7352 *np = node_new_enclosure_if_else(condition, Then, Else);
7353 if (IS_NULL(*np)) {
7354 onig_node_free(condition);
7355 onig_node_free(Then);
7356 onig_node_free(Else);
7357 return ONIGERR_MEMORY;
7358 }
7359 }
7360 goto end;
7361 }
7362 else {
7363 return ONIGERR_UNDEFINED_GROUP_OPTION;
7364 }
7365 break;
7366
7367 case '@':
7368 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
7369 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7370 PFETCH(c);
7371 if (c == '<' || c == '\'') {
7372 list_capture = 1;
7373 goto named_group2; /* (?@<name>...) */
7374 }
7375 PUNFETCH;
7376 }
7377
7378 *np = node_new_memory(0);
7379 CHECK_NULL_RETURN_MEMERR(*np);
7380 num = scan_env_add_mem_entry(env);
7381 if (num < 0) {
7382 return num;
7383 }
7384 else if (num >= (int )MEM_STATUS_BITS_NUM) {
7385 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7386 }
7387 ENCLOSURE_(*np)->m.regnum = num;
7388 MEM_STATUS_ON_SIMPLE(env->capture_history, num);
7389 }
7390 else {
7391 return ONIGERR_UNDEFINED_GROUP_OPTION;
7392 }
7393 break;
7394
7395 #ifdef USE_POSIXLINE_OPTION
7396 case 'p':
7397 #endif
7398 case '-': case 'i': case 'm': case 's': case 'x':
7399 case 'W': case 'D': case 'S': case 'P':
7400 {
7401 int neg = 0;
7402
7403 while (1) {
7404 switch (c) {
7405 case ':':
7406 case ')':
7407 break;
7408
7409 case '-': neg = 1; break;
7410 case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;
7411 case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
7412 case 's':
7413 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
7414 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
7415 }
7416 else
7417 return ONIGERR_UNDEFINED_GROUP_OPTION;
7418 break;
7419
7420 case 'm':
7421 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
7422 OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
7423 }
7424 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
7425 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
7426 }
7427 else
7428 return ONIGERR_UNDEFINED_GROUP_OPTION;
7429 break;
7430 #ifdef USE_POSIXLINE_OPTION
7431 case 'p':
7432 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
7433 break;
7434 #endif
7435 case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;
7436 case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;
7437 case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;
7438 case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;
7439
7440 default:
7441 return ONIGERR_UNDEFINED_GROUP_OPTION;
7442 }
7443
7444 if (c == ')') {
7445 *np = node_new_option(option);
7446 CHECK_NULL_RETURN_MEMERR(*np);
7447 *src = p;
7448 return 2; /* option only */
7449 }
7450 else if (c == ':') {
7451 OnigOptionType prev = env->options;
7452
7453 env->options = option;
7454 r = fetch_token(tok, &p, end, env);
7455 if (r < 0) return r;
7456 r = parse_subexp(&target, tok, term, &p, end, env);
7457 env->options = prev;
7458 if (r < 0) {
7459 onig_node_free(target);
7460 return r;
7461 }
7462 *np = node_new_option(option);
7463 CHECK_NULL_RETURN_MEMERR(*np);
7464 NODE_BODY(*np) = target;
7465 *src = p;
7466 return 0;
7467 }
7468
7469 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7470 PFETCH(c);
7471 }
7472 }
7473 break;
7474
7475 default:
7476 return ONIGERR_UNDEFINED_GROUP_OPTION;
7477 }
7478 }
7479 #ifdef USE_CALLOUT
7480 else if (c == '*' &&
7481 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7482 PINC;
7483 r = parse_callout_of_name(np, ')', &p, end, env);
7484 if (r != 0) return r;
7485
7486 goto end;
7487 }
7488 #endif
7489 else {
7490 if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
7491 goto group;
7492
7493 *np = node_new_memory(0);
7494 CHECK_NULL_RETURN_MEMERR(*np);
7495 num = scan_env_add_mem_entry(env);
7496 if (num < 0) return num;
7497 ENCLOSURE_(*np)->m.regnum = num;
7498 }
7499
7500 CHECK_NULL_RETURN_MEMERR(*np);
7501 r = fetch_token(tok, &p, end, env);
7502 if (r < 0) return r;
7503 r = parse_subexp(&target, tok, term, &p, end, env);
7504 if (r < 0) {
7505 onig_node_free(target);
7506 return r;
7507 }
7508
7509 NODE_BODY(*np) = target;
7510
7511 if (NODE_TYPE(*np) == NODE_ENCLOSURE) {
7512 if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {
7513 /* Don't move this to previous of parse_subexp() */
7514 r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);
7515 if (r != 0) return r;
7516 }
7517 }
7518
7519 end:
7520 *src = p;
7521 return 0;
7522 }
7523
7524 static const char* PopularQStr[] = {
7525 "?", "*", "+", "??", "*?", "+?"
7526 };
7527
7528 static const char* ReduceQStr[] = {
7529 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
7530 };
7531
7532 static int
7533 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
7534 {
7535 QuantNode* qn;
7536
7537 qn = QUANT_(qnode);
7538 if (qn->lower == 1 && qn->upper == 1)
7539 return 1;
7540
7541 switch (NODE_TYPE(target)) {
7542 case NODE_STRING:
7543 if (! group) {
7544 if (str_node_can_be_split(target, env->enc)) {
7545 Node* n = str_node_split_last_char(target, env->enc);
7546 if (IS_NOT_NULL(n)) {
7547 NODE_BODY(qnode) = n;
7548 return 2;
7549 }
7550 }
7551 }
7552 break;
7553
7554 case NODE_QUANT:
7555 { /* check redundant double repeat. */
7556 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
7557 QuantNode* qnt = QUANT_(target);
7558 int nestq_num = quantifier_type_num(qn);
7559 int targetq_num = quantifier_type_num(qnt);
7560
7561 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
7562 if (targetq_num >= 0 && nestq_num >= 0 &&
7563 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
7564 UChar buf[WARN_BUFSIZE];
7565
7566 switch(ReduceTypeTable[targetq_num][nestq_num]) {
7567 case RQ_ASIS:
7568 break;
7569
7570 case RQ_DEL:
7571 if (onig_verb_warn != onig_null_warn) {
7572 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
7573 env->pattern, env->pattern_end,
7574 (UChar* )"redundant nested repeat operator");
7575 (*onig_verb_warn)((char* )buf);
7576 }
7577 goto warn_exit;
7578 break;
7579
7580 default:
7581 if (onig_verb_warn != onig_null_warn) {
7582 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
7583 env->pattern, env->pattern_end,
7584 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
7585 PopularQStr[targetq_num], PopularQStr[nestq_num],
7586 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
7587 (*onig_verb_warn)((char* )buf);
7588 }
7589 goto warn_exit;
7590 break;
7591 }
7592 }
7593
7594 warn_exit:
7595 #endif
7596 if (targetq_num >= 0 && nestq_num < 0) {
7597 if (targetq_num == 1 || targetq_num == 2) { /* * or + */
7598 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
7599 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
7600 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
7601 }
7602 }
7603 }
7604 else {
7605 NODE_BODY(qnode) = target;
7606 onig_reduce_nested_quantifier(qnode, target);
7607 goto q_exit;
7608 }
7609 }
7610 break;
7611
7612 default:
7613 break;
7614 }
7615
7616 NODE_BODY(qnode) = target;
7617 q_exit:
7618 return 0;
7619 }
7620
7621
7622 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7623 static int
7624 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
7625 {
7626 BBuf *tbuf;
7627 int r;
7628
7629 if (IS_NCCLASS_NOT(cc)) {
7630 bitset_invert(cc->bs);
7631
7632 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
7633 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
7634 if (r != 0) return r;
7635
7636 bbuf_free(cc->mbuf);
7637 cc->mbuf = tbuf;
7638 }
7639
7640 NCCLASS_CLEAR_NOT(cc);
7641 }
7642
7643 return 0;
7644 }
7645 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
7646
7647 typedef struct {
7648 ScanEnv* env;
7649 CClassNode* cc;
7650 Node* alt_root;
7651 Node** ptail;
7652 } IApplyCaseFoldArg;
7653
7654 static int
7655 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
7656 {
7657 IApplyCaseFoldArg* iarg;
7658 ScanEnv* env;
7659 CClassNode* cc;
7660 BitSetRef bs;
7661
7662 iarg = (IApplyCaseFoldArg* )arg;
7663 env = iarg->env;
7664 cc = iarg->cc;
7665 bs = cc->bs;
7666
7667 if (to_len == 1) {
7668 int is_in = onig_is_code_in_cc(env->enc, from, cc);
7669 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7670 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
7671 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
7672 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
7673 add_code_range(&(cc->mbuf), env, *to, *to);
7674 }
7675 else {
7676 BITSET_SET_BIT(bs, *to);
7677 }
7678 }
7679 #else
7680 if (is_in != 0) {
7681 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
7682 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
7683 add_code_range(&(cc->mbuf), env, *to, *to);
7684 }
7685 else {
7686 if (IS_NCCLASS_NOT(cc)) {
7687 BITSET_CLEAR_BIT(bs, *to);
7688 }
7689 else
7690 BITSET_SET_BIT(bs, *to);
7691 }
7692 }
7693 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
7694 }
7695 else {
7696 int r, i, len;
7697 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
7698 Node *snode = NULL_NODE;
7699
7700 if (onig_is_code_in_cc(env->enc, from, cc)
7701 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7702 && !IS_NCCLASS_NOT(cc)
7703 #endif
7704 ) {
7705 for (i = 0; i < to_len; i++) {
7706 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
7707 if (i == 0) {
7708 snode = onig_node_new_str(buf, buf + len);
7709 CHECK_NULL_RETURN_MEMERR(snode);
7710
7711 /* char-class expanded multi-char only
7712 compare with string folded at match time. */
7713 NODE_STRING_SET_AMBIG(snode);
7714 }
7715 else {
7716 r = onig_node_str_cat(snode, buf, buf + len);
7717 if (r < 0) {
7718 onig_node_free(snode);
7719 return r;
7720 }
7721 }
7722 }
7723
7724 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
7725 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
7726 iarg->ptail = &(NODE_CDR((*(iarg->ptail))));
7727 }
7728 }
7729
7730 return 0;
7731 }
7732
7733 static int
7734 parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
7735 ScanEnv* env)
7736 {
7737 int r, len, group = 0;
7738 Node* qn;
7739 Node** targetp;
7740
7741 *np = NULL;
7742 if (tok->type == (enum TokenSyms )term)
7743 goto end_of_token;
7744
7745 switch (tok->type) {
7746 case TK_ALT:
7747 case TK_EOT:
7748 end_of_token:
7749 *np = node_new_empty();
7750 CHECK_NULL_RETURN_MEMERR(*np);
7751 return tok->type;
7752 break;
7753
7754 case TK_SUBEXP_OPEN:
7755 r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);
7756 if (r < 0) return r;
7757 if (r == 1) group = 1;
7758 else if (r == 2) { /* option only */
7759 Node* target;
7760 OnigOptionType prev = env->options;
7761
7762 env->options = ENCLOSURE_(*np)->o.options;
7763 r = fetch_token(tok, src, end, env);
7764 if (r < 0) return r;
7765 r = parse_subexp(&target, tok, term, src, end, env);
7766 env->options = prev;
7767 if (r < 0) {
7768 onig_node_free(target);
7769 return r;
7770 }
7771 NODE_BODY(*np) = target;
7772 return tok->type;
7773 }
7774 break;
7775
7776 case TK_SUBEXP_CLOSE:
7777 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
7778 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
7779
7780 if (tok->escaped) goto tk_raw_byte;
7781 else goto tk_byte;
7782 break;
7783
7784 case TK_STRING:
7785 tk_byte:
7786 {
7787 *np = node_new_str(tok->backp, *src);
7788 CHECK_NULL_RETURN_MEMERR(*np);
7789
7790 while (1) {
7791 r = fetch_token(tok, src, end, env);
7792 if (r < 0) return r;
7793 if (r != TK_STRING) break;
7794
7795 r = onig_node_str_cat(*np, tok->backp, *src);
7796 if (r < 0) return r;
7797 }
7798
7799 string_end:
7800 targetp = np;
7801 goto repeat;
7802 }
7803 break;
7804
7805 case TK_RAW_BYTE:
7806 tk_raw_byte:
7807 {
7808 *np = node_new_str_raw_char((UChar )tok->u.c);
7809 CHECK_NULL_RETURN_MEMERR(*np);
7810 len = 1;
7811 while (1) {
7812 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
7813 if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */
7814 r = fetch_token(tok, src, end, env);
7815 NODE_STRING_CLEAR_RAW(*np);
7816 goto string_end;
7817 }
7818 }
7819
7820 r = fetch_token(tok, src, end, env);
7821 if (r < 0) return r;
7822 if (r != TK_RAW_BYTE) {
7823 /* Don't use this, it is wrong for little endian encodings. */
7824 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
7825 int rem;
7826 if (len < ONIGENC_MBC_MINLEN(env->enc)) {
7827 rem = ONIGENC_MBC_MINLEN(env->enc) - len;
7828 (void )node_str_head_pad(STR_(*np), rem, (UChar )0);
7829 if (len + rem == enclen(env->enc, STR_(*np)->s)) {
7830 NODE_STRING_CLEAR_RAW(*np);
7831 goto string_end;
7832 }
7833 }
7834 #endif
7835 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
7836 }
7837
7838 r = node_str_cat_char(*np, (UChar )tok->u.c);
7839 if (r < 0) return r;
7840
7841 len++;
7842 }
7843 }
7844 break;
7845
7846 case TK_CODE_POINT:
7847 {
7848 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
7849 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
7850 if (num < 0) return num;
7851 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
7852 *np = node_new_str_raw(buf, buf + num);
7853 #else
7854 *np = node_new_str(buf, buf + num);
7855 #endif
7856 CHECK_NULL_RETURN_MEMERR(*np);
7857 }
7858 break;
7859
7860 case TK_QUOTE_OPEN:
7861 {
7862 OnigCodePoint end_op[2];
7863 UChar *qstart, *qend, *nextp;
7864
7865 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
7866 end_op[1] = (OnigCodePoint )'E';
7867 qstart = *src;
7868 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
7869 if (IS_NULL(qend)) {
7870 nextp = qend = end;
7871 }
7872 *np = node_new_str(qstart, qend);
7873 CHECK_NULL_RETURN_MEMERR(*np);
7874 *src = nextp;
7875 }
7876 break;
7877
7878 case TK_CHAR_TYPE:
7879 {
7880 switch (tok->u.prop.ctype) {
7881 case ONIGENC_CTYPE_WORD:
7882 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
7883 CHECK_NULL_RETURN_MEMERR(*np);
7884 break;
7885
7886 case ONIGENC_CTYPE_SPACE:
7887 case ONIGENC_CTYPE_DIGIT:
7888 case ONIGENC_CTYPE_XDIGIT:
7889 {
7890 CClassNode* cc;
7891
7892 *np = node_new_cclass();
7893 CHECK_NULL_RETURN_MEMERR(*np);
7894 cc = CCLASS_(*np);
7895 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
7896 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
7897 }
7898 break;
7899
7900 default:
7901 return ONIGERR_PARSER_BUG;
7902 break;
7903 }
7904 }
7905 break;
7906
7907 case TK_CHAR_PROPERTY:
7908 r = parse_char_property(np, tok, src, end, env);
7909 if (r != 0) return r;
7910 break;
7911
7912 case TK_CC_OPEN:
7913 {
7914 CClassNode* cc;
7915
7916 r = parse_char_class(np, tok, src, end, env);
7917 if (r != 0) return r;
7918
7919 cc = CCLASS_(*np);
7920 if (IS_IGNORECASE(env->options)) {
7921 IApplyCaseFoldArg iarg;
7922
7923 iarg.env = env;
7924 iarg.cc = cc;
7925 iarg.alt_root = NULL_NODE;
7926 iarg.ptail = &(iarg.alt_root);
7927
7928 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
7929 i_apply_case_fold, &iarg);
7930 if (r != 0) {
7931 onig_node_free(iarg.alt_root);
7932 return r;
7933 }
7934 if (IS_NOT_NULL(iarg.alt_root)) {
7935 Node* work = onig_node_new_alt(*np, iarg.alt_root);
7936 if (IS_NULL(work)) {
7937 onig_node_free(iarg.alt_root);
7938 return ONIGERR_MEMORY;
7939 }
7940 *np = work;
7941 }
7942 }
7943 }
7944 break;
7945
7946 case TK_ANYCHAR:
7947 *np = node_new_anychar();
7948 CHECK_NULL_RETURN_MEMERR(*np);
7949 break;
7950
7951 case TK_ANYCHAR_ANYTIME:
7952 *np = node_new_anychar();
7953 CHECK_NULL_RETURN_MEMERR(*np);
7954 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
7955 CHECK_NULL_RETURN_MEMERR(qn);
7956 NODE_BODY(qn) = *np;
7957 *np = qn;
7958 break;
7959
7960 case TK_BACKREF:
7961 len = tok->u.backref.num;
7962 *np = node_new_backref(len,
7963 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
7964 tok->u.backref.by_name,
7965 #ifdef USE_BACKREF_WITH_LEVEL
7966 tok->u.backref.exist_level,
7967 tok->u.backref.level,
7968 #endif
7969 env);
7970 CHECK_NULL_RETURN_MEMERR(*np);
7971 break;
7972
7973 #ifdef USE_CALL
7974 case TK_CALL:
7975 {
7976 int gnum = tok->u.call.gnum;
7977
7978 *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
7979 gnum, tok->u.call.by_number);
7980 CHECK_NULL_RETURN_MEMERR(*np);
7981 env->num_call++;
7982 if (tok->u.call.by_number != 0 && gnum == 0) {
7983 env->has_call_zero = 1;
7984 }
7985 }
7986 break;
7987 #endif
7988
7989 case TK_ANCHOR:
7990 {
7991 int ascii_mode =
7992 IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;
7993 *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);
7994 CHECK_NULL_RETURN_MEMERR(*np);
7995 }
7996 break;
7997
7998 case TK_OP_REPEAT:
7999 case TK_INTERVAL:
8000 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
8001 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
8002 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
8003 else {
8004 *np = node_new_empty();
8005 CHECK_NULL_RETURN_MEMERR(*np);
8006 }
8007 }
8008 else {
8009 goto tk_byte;
8010 }
8011 break;
8012
8013 case TK_KEEP:
8014 r = node_new_keep(np, env);
8015 if (r < 0) return r;
8016 break;
8017
8018 case TK_GENERAL_NEWLINE:
8019 r = node_new_general_newline(np, env);
8020 if (r < 0) return r;
8021 break;
8022
8023 case TK_NO_NEWLINE:
8024 r = node_new_no_newline(np, env);
8025 if (r < 0) return r;
8026 break;
8027
8028 case TK_TRUE_ANYCHAR:
8029 r = node_new_true_anychar(np, env);
8030 if (r < 0) return r;
8031 break;
8032
8033 case TK_EXTENDED_GRAPHEME_CLUSTER:
8034 r = make_extended_grapheme_cluster(np, env);
8035 if (r < 0) return r;
8036 break;
8037
8038 default:
8039 return ONIGERR_PARSER_BUG;
8040 break;
8041 }
8042
8043 {
8044 targetp = np;
8045
8046 re_entry:
8047 r = fetch_token(tok, src, end, env);
8048 if (r < 0) return r;
8049
8050 repeat:
8051 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
8052 if (is_invalid_quantifier_target(*targetp))
8053 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
8054
8055 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
8056 (r == TK_INTERVAL ? 1 : 0));
8057 CHECK_NULL_RETURN_MEMERR(qn);
8058 QUANT_(qn)->greedy = tok->u.repeat.greedy;
8059 r = set_quantifier(qn, *targetp, group, env);
8060 if (r < 0) {
8061 onig_node_free(qn);
8062 return r;
8063 }
8064
8065 if (tok->u.repeat.possessive != 0) {
8066 Node* en;
8067 en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
8068 if (IS_NULL(en)) {
8069 onig_node_free(qn);
8070 return ONIGERR_MEMORY;
8071 }
8072 NODE_BODY(en) = qn;
8073 qn = en;
8074 }
8075
8076 if (r == 0) {
8077 *targetp = qn;
8078 }
8079 else if (r == 1) {
8080 onig_node_free(qn);
8081 }
8082 else if (r == 2) { /* split case: /abc+/ */
8083 Node *tmp;
8084
8085 *targetp = node_new_list(*targetp, NULL);
8086 if (IS_NULL(*targetp)) {
8087 onig_node_free(qn);
8088 return ONIGERR_MEMORY;
8089 }
8090 tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);
8091 if (IS_NULL(tmp)) {
8092 onig_node_free(qn);
8093 return ONIGERR_MEMORY;
8094 }
8095 targetp = &(NODE_CAR(tmp));
8096 }
8097 goto re_entry;
8098 }
8099 }
8100
8101 return r;
8102 }
8103
8104 static int
8105 parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
8106 ScanEnv* env)
8107 {
8108 int r;
8109 Node *node, **headp;
8110
8111 *top = NULL;
8112 r = parse_exp(&node, tok, term, src, end, env);
8113 if (r < 0) {
8114 onig_node_free(node);
8115 return r;
8116 }
8117
8118 if (r == TK_EOT || r == term || r == TK_ALT) {
8119 *top = node;
8120 }
8121 else {
8122 *top = node_new_list(node, NULL);
8123 if (IS_NULL(*top)) {
8124 onig_node_free(node);
8125 return ONIGERR_MEMORY;
8126 }
8127
8128 headp = &(NODE_CDR(*top));
8129 while (r != TK_EOT && r != term && r != TK_ALT) {
8130 r = parse_exp(&node, tok, term, src, end, env);
8131 if (r < 0) {
8132 onig_node_free(node);
8133 return r;
8134 }
8135
8136 if (NODE_TYPE(node) == NODE_LIST) {
8137 *headp = node;
8138 while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);
8139 headp = &(NODE_CDR(node));
8140 }
8141 else {
8142 *headp = node_new_list(node, NULL);
8143 headp = &(NODE_CDR(*headp));
8144 }
8145 }
8146 }
8147
8148 return r;
8149 }
8150
8151 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
8152 static int
8153 parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
8154 ScanEnv* env)
8155 {
8156 int r;
8157 Node *node, **headp;
8158
8159 *top = NULL;
8160 env->parse_depth++;
8161 if (env->parse_depth > ParseDepthLimit)
8162 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
8163
8164 r = parse_branch(&node, tok, term, src, end, env);
8165 if (r < 0) {
8166 onig_node_free(node);
8167 return r;
8168 }
8169
8170 if (r == term) {
8171 *top = node;
8172 }
8173 else if (r == TK_ALT) {
8174 *top = onig_node_new_alt(node, NULL);
8175 if (IS_NULL(*top)) {
8176 onig_node_free(node);
8177 return ONIGERR_MEMORY;
8178 }
8179
8180 headp = &(NODE_CDR(*top));
8181 while (r == TK_ALT) {
8182 r = fetch_token(tok, src, end, env);
8183 if (r < 0) return r;
8184 r = parse_branch(&node, tok, term, src, end, env);
8185 if (r < 0) {
8186 onig_node_free(node);
8187 return r;
8188 }
8189 *headp = onig_node_new_alt(node, NULL);
8190 if (IS_NULL(*headp)) {
8191 onig_node_free(node);
8192 onig_node_free(*top);
8193 return ONIGERR_MEMORY;
8194 }
8195
8196 headp = &(NODE_CDR(*headp));
8197 }
8198
8199 if (tok->type != (enum TokenSyms )term)
8200 goto err;
8201 }
8202 else {
8203 onig_node_free(node);
8204 err:
8205 if (term == TK_SUBEXP_CLOSE)
8206 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
8207 else
8208 return ONIGERR_PARSER_BUG;
8209 }
8210
8211 env->parse_depth--;
8212 return r;
8213 }
8214
8215 static int
8216 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
8217 {
8218 int r;
8219 OnigToken tok;
8220
8221 r = fetch_token(&tok, src, end, env);
8222 if (r < 0) return r;
8223 r = parse_subexp(top, &tok, TK_EOT, src, end, env);
8224 if (r < 0) return r;
8225
8226 return 0;
8227 }
8228
8229 #ifdef USE_CALL
8230 static int
8231 make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)
8232 {
8233 int r;
8234
8235 Node* x = node_new_memory(0 /* 0: is not named */);
8236 CHECK_NULL_RETURN_MEMERR(x);
8237
8238 NODE_BODY(x) = node;
8239 ENCLOSURE_(x)->m.regnum = 0;
8240 r = scan_env_set_mem_node(env, 0, x);
8241 if (r != 0) {
8242 onig_node_free(x);
8243 return r;
8244 }
8245
8246 *rnode = x;
8247 return 0;
8248 }
8249 #endif
8250
8251 extern int
8252 onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
8253 regex_t* reg, ScanEnv* env)
8254 {
8255 int r;
8256 UChar* p;
8257 #ifdef USE_CALLOUT
8258 RegexExt* ext;
8259 #endif
8260
8261 names_clear(reg);
8262
8263 scan_env_clear(env);
8264 env->options = reg->options;
8265 env->case_fold_flag = reg->case_fold_flag;
8266 env->enc = reg->enc;
8267 env->syntax = reg->syntax;
8268 env->pattern = (UChar* )pattern;
8269 env->pattern_end = (UChar* )end;
8270 env->reg = reg;
8271
8272 *root = NULL;
8273
8274 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
8275 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
8276
8277 p = (UChar* )pattern;
8278 r = parse_regexp(root, &p, (UChar* )end, env);
8279
8280 #ifdef USE_CALL
8281 if (r != 0) return r;
8282
8283 if (env->has_call_zero != 0) {
8284 Node* zero_node;
8285 r = make_call_zero_body(*root, env, &zero_node);
8286 if (r != 0) return r;
8287
8288 *root = zero_node;
8289 }
8290 #endif
8291
8292 reg->num_mem = env->num_mem;
8293
8294 #ifdef USE_CALLOUT
8295 ext = REG_EXTP(reg);
8296 if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
8297 r = setup_ext_callout_list_values(reg);
8298 }
8299 #endif
8300
8301 return r;
8302 }
8303
8304 extern void
8305 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
8306 UChar* arg, UChar* arg_end)
8307 {
8308 env->error = arg;
8309 env->error_end = arg_end;
8310 }