]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
MdeModulePkg/RegularExpressionDxe: Add two missing null pointer checks
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
1 /**********************************************************************
2 regparse.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regparse.h"
31 #include "st.h"
32
33 #ifdef DEBUG_NODE_FREE
34 #include <stdio.h>
35 #endif
36
37 #define INIT_TAG_NAMES_ALLOC_NUM 5
38
39 #define WARN_BUFSIZE 256
40
41 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
42
43 #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
44 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
45 #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
46 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
47
48
49 OnigSyntaxType OnigSyntaxOniguruma = {
50 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
51 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
52 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
53 ONIG_SYN_OP_ESC_CONTROL_CHARS |
54 ONIG_SYN_OP_ESC_C_CONTROL )
55 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
56 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
57 ONIG_SYN_OP2_OPTION_ONIGURUMA |
58 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
59 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
60 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
61 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
62 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
63 ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
64 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
65 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
66 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
67 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
68 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
69 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
70 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
71 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
72 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
73 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
74 , ( SYN_GNU_REGEX_BV |
75 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
76 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
77 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
78 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
79 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
80 ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
81 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
82 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
83 , ONIG_OPTION_NONE
84 ,
85 {
86 (OnigCodePoint )'\\' /* esc */
87 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
88 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
89 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
90 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
91 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
92 }
93 };
94
95 OnigSyntaxType OnigSyntaxRuby = {
96 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
97 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
98 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
99 ONIG_SYN_OP_ESC_CONTROL_CHARS |
100 ONIG_SYN_OP_ESC_C_CONTROL )
101 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
102 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
103 ONIG_SYN_OP2_OPTION_RUBY |
104 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
105 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
106 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
107 ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
108 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
109 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
110 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
111 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
112 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
113 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
114 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
115 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
116 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
117 , ( SYN_GNU_REGEX_BV |
118 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
119 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
120 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
121 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
122 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
123 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
124 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
125 , ONIG_OPTION_NONE
126 ,
127 {
128 (OnigCodePoint )'\\' /* esc */
129 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
130 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
131 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
132 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
133 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
134 }
135 };
136
137 OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
138
139 extern void onig_null_warn(const char* s ARG_UNUSED) { }
140
141 #ifdef DEFAULT_WARN_FUNCTION
142 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
143 #else
144 static OnigWarnFunc onig_warn = onig_null_warn;
145 #endif
146
147 #ifdef DEFAULT_VERB_WARN_FUNCTION
148 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
149 #else
150 static OnigWarnFunc onig_verb_warn = onig_null_warn;
151 #endif
152
153 extern void onig_set_warn_func(OnigWarnFunc f)
154 {
155 onig_warn = f;
156 }
157
158 extern void onig_set_verb_warn_func(OnigWarnFunc f)
159 {
160 onig_verb_warn = f;
161 }
162
163 extern void
164 onig_warning(const char* s)
165 {
166 if (onig_warn == onig_null_warn) return ;
167
168 (*onig_warn)(s);
169 }
170
171 #define DEFAULT_MAX_CAPTURE_NUM 32767
172
173 static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
174
175 extern int
176 onig_set_capture_num_limit(int num)
177 {
178 if (num < 0) return -1;
179
180 MaxCaptureNum = num;
181 return 0;
182 }
183
184 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
185
186 extern unsigned int
187 onig_get_parse_depth_limit(void)
188 {
189 return ParseDepthLimit;
190 }
191
192 extern int
193 onig_set_parse_depth_limit(unsigned int depth)
194 {
195 if (depth == 0)
196 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
197 else
198 ParseDepthLimit = depth;
199 return 0;
200 }
201
202 static int
203 bbuf_init(BBuf* buf, int size)
204 {
205 if (size <= 0) {
206 size = 0;
207 buf->p = NULL;
208 }
209 else {
210 buf->p = (UChar* )xmalloc(size);
211 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
212 }
213
214 buf->alloc = size;
215 buf->used = 0;
216 return 0;
217 }
218
219 static void
220 bbuf_free(BBuf* bbuf)
221 {
222 if (IS_NOT_NULL(bbuf)) {
223 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
224 xfree(bbuf);
225 }
226 }
227
228 static int
229 bbuf_clone(BBuf** rto, BBuf* from)
230 {
231 int r;
232 BBuf *to;
233
234 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
235 CHECK_NULL_RETURN_MEMERR(to);
236 r = BB_INIT(to, from->alloc);
237 if (r != 0) {
238 xfree(to->p);
239 *rto = 0;
240 return r;
241 }
242 to->used = from->used;
243 xmemcpy(to->p, from->p, from->used);
244 return 0;
245 }
246
247 static int backref_rel_to_abs(int rel_no, ScanEnv* env)
248 {
249 if (rel_no > 0) {
250 return env->num_mem + rel_no;
251 }
252 else {
253 return env->num_mem + 1 + rel_no;
254 }
255 }
256
257 #define OPTION_ON(v,f) ((v) |= (f))
258 #define OPTION_OFF(v,f) ((v) &= ~(f))
259
260 #define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
261
262 #define MBCODE_START_POS(enc) \
263 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
264
265 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
266 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
267
268 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
269 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
270 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
271 if (r != 0) return r;\
272 }\
273 } while (0)
274
275
276 #define BITSET_IS_EMPTY(bs,empty) do {\
277 int i;\
278 empty = 1;\
279 for (i = 0; i < (int )BITSET_SIZE; i++) {\
280 if ((bs)[i] != 0) {\
281 empty = 0; break;\
282 }\
283 }\
284 } while (0)
285
286 static void
287 bitset_set_range(BitSetRef bs, int from, int to)
288 {
289 int i;
290 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
291 BITSET_SET_BIT(bs, i);
292 }
293 }
294
295 #if 0
296 static void
297 bitset_set_all(BitSetRef bs)
298 {
299 int i;
300 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
301 }
302 #endif
303
304 static void
305 bitset_invert(BitSetRef bs)
306 {
307 int i;
308 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
309 }
310
311 static void
312 bitset_invert_to(BitSetRef from, BitSetRef to)
313 {
314 int i;
315 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
316 }
317
318 static void
319 bitset_and(BitSetRef dest, BitSetRef bs)
320 {
321 int i;
322 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
323 }
324
325 static void
326 bitset_or(BitSetRef dest, BitSetRef bs)
327 {
328 int i;
329 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
330 }
331
332 static void
333 bitset_copy(BitSetRef dest, BitSetRef bs)
334 {
335 int i;
336 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
337 }
338
339 extern int
340 onig_strncmp(const UChar* s1, const UChar* s2, int n)
341 {
342 int x;
343
344 while (n-- > 0) {
345 x = *s2++ - *s1++;
346 if (x) return x;
347 }
348 return 0;
349 }
350
351 extern void
352 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
353 {
354 int len = (int )(end - src);
355 if (len > 0) {
356 xmemcpy(dest, src, len);
357 dest[len] = (UChar )0;
358 }
359 }
360
361 static int
362 save_entry(ScanEnv* env, enum SaveType type, int* id)
363 {
364 int nid = env->save_num;
365
366 #if 0
367 if (IS_NULL(env->saves)) {
368 int n = 10;
369 env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);
370 CHECK_NULL_RETURN_MEMERR(env->saves);
371 env->save_alloc_num = n;
372 }
373 else if (env->save_alloc_num <= nid) {
374 int n = env->save_alloc_num * 2;
375 SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);
376 CHECK_NULL_RETURN_MEMERR(p);
377 env->saves = p;
378 env->save_alloc_num = n;
379 }
380
381 env->saves[nid].type = type;
382 #endif
383
384 env->save_num++;
385 *id = nid;
386 return 0;
387 }
388
389 /* scan pattern methods */
390 #define PEND_VALUE 0
391
392 #define PFETCH_READY UChar* pfetch_prev
393 #define PEND (p < end ? 0 : 1)
394 #define PUNFETCH p = pfetch_prev
395 #define PINC do { \
396 pfetch_prev = p; \
397 p += ONIGENC_MBC_ENC_LEN(enc, p); \
398 } while (0)
399 #define PFETCH(c) do { \
400 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
401 pfetch_prev = p; \
402 p += ONIGENC_MBC_ENC_LEN(enc, p); \
403 } while (0)
404
405 #define PINC_S do { \
406 p += ONIGENC_MBC_ENC_LEN(enc, p); \
407 } while (0)
408 #define PFETCH_S(c) do { \
409 c = ONIGENC_MBC_TO_CODE(enc, p, end); \
410 p += ONIGENC_MBC_ENC_LEN(enc, p); \
411 } while (0)
412
413 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
414 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
415
416 static UChar*
417 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
418 int capa, int oldCapa)
419 {
420 UChar* r;
421
422 if (dest)
423 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);
424 else
425 r = (UChar* )xmalloc(capa + 1);
426
427 CHECK_NULL_RETURN(r);
428 onig_strcpy(r + (dest_end - dest), src, src_end);
429 return r;
430 }
431
432 /* dest on static area */
433 static UChar*
434 strcat_capa_from_static(UChar* dest, UChar* dest_end,
435 const UChar* src, const UChar* src_end, int capa)
436 {
437 UChar* r;
438
439 r = (UChar* )xmalloc(capa + 1);
440 CHECK_NULL_RETURN(r);
441 onig_strcpy(r, dest, dest_end);
442 onig_strcpy(r + (dest_end - dest), src, src_end);
443 return r;
444 }
445
446
447 #ifdef USE_ST_LIBRARY
448
449 typedef struct {
450 UChar* s;
451 UChar* end;
452 } st_str_end_key;
453
454 static int
455 str_end_cmp(st_str_end_key* x, st_str_end_key* y)
456 {
457 UChar *p, *q;
458 int c;
459
460 if ((x->end - x->s) != (y->end - y->s))
461 return 1;
462
463 p = x->s;
464 q = y->s;
465 while (p < x->end) {
466 c = (int )*p - (int )*q;
467 if (c != 0) return c;
468
469 p++; q++;
470 }
471
472 return 0;
473 }
474
475 static int
476 str_end_hash(st_str_end_key* x)
477 {
478 UChar *p;
479 int val = 0;
480
481 p = x->s;
482 while (p < x->end) {
483 val = val * 997 + (int )*p++;
484 }
485
486 return val + (val >> 5);
487 }
488
489 extern hash_table_type*
490 onig_st_init_strend_table_with_size(int size)
491 {
492 static struct st_hash_type hashType = {
493 str_end_cmp,
494 str_end_hash,
495 };
496
497 return (hash_table_type* )
498 onig_st_init_table_with_size(&hashType, size);
499 }
500
501 extern int
502 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
503 const UChar* end_key, hash_data_type *value)
504 {
505 st_str_end_key key;
506
507 key.s = (UChar* )str_key;
508 key.end = (UChar* )end_key;
509
510 return onig_st_lookup(table, (st_data_t )(&key), value);
511 }
512
513 extern int
514 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
515 const UChar* end_key, hash_data_type value)
516 {
517 st_str_end_key* key;
518 int result;
519
520 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
521 CHECK_NULL_RETURN_MEMERR(key);
522
523 key->s = (UChar* )str_key;
524 key->end = (UChar* )end_key;
525 result = onig_st_insert(table, (st_data_t )key, value);
526 if (result) {
527 xfree(key);
528 }
529 return result;
530 }
531
532
533 #ifdef USE_CALLOUT
534
535 typedef struct {
536 OnigEncoding enc;
537 int type; /* callout type: single or not */
538 UChar* s;
539 UChar* end;
540 } st_callout_name_key;
541
542 static int
543 callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
544 {
545 UChar *p, *q;
546 int c;
547
548 if (x->enc != y->enc) return 1;
549 if (x->type != y->type) return 1;
550 if ((x->end - x->s) != (y->end - y->s))
551 return 1;
552
553 p = x->s;
554 q = y->s;
555 while (p < x->end) {
556 c = (int )*p - (int )*q;
557 if (c != 0) return c;
558
559 p++; q++;
560 }
561
562 return 0;
563 }
564
565 static int
566 callout_name_table_hash(st_callout_name_key* x)
567 {
568 UChar *p;
569 int val = 0;
570
571 p = x->s;
572 while (p < x->end) {
573 val = val * 997 + (int )*p++;
574 }
575
576 /* use intptr_t for escape warning in Windows */
577 return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;
578 }
579
580 extern hash_table_type*
581 onig_st_init_callout_name_table_with_size(int size)
582 {
583 static struct st_hash_type hashType = {
584 callout_name_table_cmp,
585 callout_name_table_hash,
586 };
587
588 return (hash_table_type* )
589 onig_st_init_table_with_size(&hashType, size);
590 }
591
592 extern int
593 onig_st_lookup_callout_name_table(hash_table_type* table,
594 OnigEncoding enc,
595 int type,
596 const UChar* str_key,
597 const UChar* end_key,
598 hash_data_type *value)
599 {
600 st_callout_name_key key;
601
602 key.enc = enc;
603 key.type = type;
604 key.s = (UChar* )str_key;
605 key.end = (UChar* )end_key;
606
607 return onig_st_lookup(table, (st_data_t )(&key), value);
608 }
609
610 static int
611 st_insert_callout_name_table(hash_table_type* table,
612 OnigEncoding enc, int type,
613 UChar* str_key, UChar* end_key,
614 hash_data_type value)
615 {
616 st_callout_name_key* key;
617 int result;
618
619 key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
620 CHECK_NULL_RETURN_MEMERR(key);
621
622 /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
623 key->enc = enc;
624 key->type = type;
625 key->s = str_key;
626 key->end = end_key;
627 result = onig_st_insert(table, (st_data_t )key, value);
628 if (result) {
629 xfree(key);
630 }
631 return result;
632 }
633 #endif
634
635 #endif /* USE_ST_LIBRARY */
636
637
638 #define INIT_NAME_BACKREFS_ALLOC_NUM 8
639
640 typedef struct {
641 UChar* name;
642 int name_len; /* byte length */
643 int back_num; /* number of backrefs */
644 int back_alloc;
645 int back_ref1;
646 int* back_refs;
647 } NameEntry;
648
649 #ifdef USE_ST_LIBRARY
650
651 #define INIT_NAMES_ALLOC_NUM 5
652
653 typedef st_table NameTable;
654 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
655
656 #define NAMEBUF_SIZE 24
657 #define NAMEBUF_SIZE_1 25
658
659 #ifdef ONIG_DEBUG
660 static int
661 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
662 {
663 int i;
664 FILE* fp = (FILE* )arg;
665
666 fprintf(fp, "%s: ", e->name);
667 if (e->back_num == 0)
668 fputs("-", fp);
669 else if (e->back_num == 1)
670 fprintf(fp, "%d", e->back_ref1);
671 else {
672 for (i = 0; i < e->back_num; i++) {
673 if (i > 0) fprintf(fp, ", ");
674 fprintf(fp, "%d", e->back_refs[i]);
675 }
676 }
677 fputs("\n", fp);
678 return ST_CONTINUE;
679 }
680
681 extern int
682 onig_print_names(FILE* fp, regex_t* reg)
683 {
684 NameTable* t = (NameTable* )reg->name_table;
685
686 if (IS_NOT_NULL(t)) {
687 fprintf(fp, "name table\n");
688 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
689 fputs("\n", fp);
690 }
691 return 0;
692 }
693 #endif /* ONIG_DEBUG */
694
695 static int
696 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
697 {
698 xfree(e->name);
699 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
700 xfree(key);
701 xfree(e);
702 return ST_DELETE;
703 }
704
705 static int
706 names_clear(regex_t* reg)
707 {
708 NameTable* t = (NameTable* )reg->name_table;
709
710 if (IS_NOT_NULL(t)) {
711 onig_st_foreach(t, i_free_name_entry, 0);
712 }
713 return 0;
714 }
715
716 extern int
717 onig_names_free(regex_t* reg)
718 {
719 int r;
720 NameTable* t;
721
722 r = names_clear(reg);
723 if (r != 0) return r;
724
725 t = (NameTable* )reg->name_table;
726 if (IS_NOT_NULL(t)) onig_st_free_table(t);
727 reg->name_table = (void* )NULL;
728 return 0;
729 }
730
731 static NameEntry*
732 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
733 {
734 NameEntry* e;
735 NameTable* t = (NameTable* )reg->name_table;
736
737 e = (NameEntry* )NULL;
738 if (IS_NOT_NULL(t)) {
739 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
740 }
741 return e;
742 }
743
744 typedef struct {
745 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
746 regex_t* reg;
747 void* arg;
748 int ret;
749 OnigEncoding enc;
750 } INamesArg;
751
752 static int
753 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
754 {
755 int r = (*(arg->func))(e->name,
756 e->name + e->name_len,
757 e->back_num,
758 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
759 arg->reg, arg->arg);
760 if (r != 0) {
761 arg->ret = r;
762 return ST_STOP;
763 }
764 return ST_CONTINUE;
765 }
766
767 extern int
768 onig_foreach_name(regex_t* reg,
769 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
770 {
771 INamesArg narg;
772 NameTable* t = (NameTable* )reg->name_table;
773
774 narg.ret = 0;
775 if (IS_NOT_NULL(t)) {
776 narg.func = func;
777 narg.reg = reg;
778 narg.arg = arg;
779 narg.enc = reg->enc; /* should be pattern encoding. */
780 onig_st_foreach(t, i_names, (HashDataType )&narg);
781 }
782 return narg.ret;
783 }
784
785 static int
786 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
787 {
788 int i;
789
790 if (e->back_num > 1) {
791 for (i = 0; i < e->back_num; i++) {
792 e->back_refs[i] = map[e->back_refs[i]].new_val;
793 }
794 }
795 else if (e->back_num == 1) {
796 e->back_ref1 = map[e->back_ref1].new_val;
797 }
798
799 return ST_CONTINUE;
800 }
801
802 extern int
803 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
804 {
805 NameTable* t = (NameTable* )reg->name_table;
806
807 if (IS_NOT_NULL(t)) {
808 onig_st_foreach(t, i_renumber_name, (HashDataType )map);
809 }
810 return 0;
811 }
812
813
814 extern int
815 onig_number_of_names(regex_t* reg)
816 {
817 NameTable* t = (NameTable* )reg->name_table;
818
819 if (IS_NOT_NULL(t))
820 return t->num_entries;
821 else
822 return 0;
823 }
824
825 #else /* USE_ST_LIBRARY */
826
827 #define INIT_NAMES_ALLOC_NUM 8
828
829 typedef struct {
830 NameEntry* e;
831 int num;
832 int alloc;
833 } NameTable;
834
835 #ifdef ONIG_DEBUG
836 extern int
837 onig_print_names(FILE* fp, regex_t* reg)
838 {
839 int i, j;
840 NameEntry* e;
841 NameTable* t = (NameTable* )reg->name_table;
842
843 if (IS_NOT_NULL(t) && t->num > 0) {
844 fprintf(fp, "name table\n");
845 for (i = 0; i < t->num; i++) {
846 e = &(t->e[i]);
847 fprintf(fp, "%s: ", e->name);
848 if (e->back_num == 0) {
849 fputs("-", fp);
850 }
851 else if (e->back_num == 1) {
852 fprintf(fp, "%d", e->back_ref1);
853 }
854 else {
855 for (j = 0; j < e->back_num; j++) {
856 if (j > 0) fprintf(fp, ", ");
857 fprintf(fp, "%d", e->back_refs[j]);
858 }
859 }
860 fputs("\n", fp);
861 }
862 fputs("\n", fp);
863 }
864 return 0;
865 }
866 #endif
867
868 static int
869 names_clear(regex_t* reg)
870 {
871 int i;
872 NameEntry* e;
873 NameTable* t = (NameTable* )reg->name_table;
874
875 if (IS_NOT_NULL(t)) {
876 for (i = 0; i < t->num; i++) {
877 e = &(t->e[i]);
878 if (IS_NOT_NULL(e->name)) {
879 xfree(e->name);
880 e->name = NULL;
881 e->name_len = 0;
882 e->back_num = 0;
883 e->back_alloc = 0;
884 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
885 e->back_refs = (int* )NULL;
886 }
887 }
888 if (IS_NOT_NULL(t->e)) {
889 xfree(t->e);
890 t->e = NULL;
891 }
892 t->num = 0;
893 }
894 return 0;
895 }
896
897 extern int
898 onig_names_free(regex_t* reg)
899 {
900 int r;
901 NameTable* t;
902
903 r = names_clear(reg);
904 if (r != 0) return r;
905
906 t = (NameTable* )reg->name_table;
907 if (IS_NOT_NULL(t)) xfree(t);
908 reg->name_table = NULL;
909 return 0;
910 }
911
912 static NameEntry*
913 name_find(regex_t* reg, UChar* name, UChar* name_end)
914 {
915 int i, len;
916 NameEntry* e;
917 NameTable* t = (NameTable* )reg->name_table;
918
919 if (IS_NOT_NULL(t)) {
920 len = name_end - name;
921 for (i = 0; i < t->num; i++) {
922 e = &(t->e[i]);
923 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
924 return e;
925 }
926 }
927 return (NameEntry* )NULL;
928 }
929
930 extern int
931 onig_foreach_name(regex_t* reg,
932 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
933 {
934 int i, r;
935 NameEntry* e;
936 NameTable* t = (NameTable* )reg->name_table;
937
938 if (IS_NOT_NULL(t)) {
939 for (i = 0; i < t->num; i++) {
940 e = &(t->e[i]);
941 r = (*func)(e->name, e->name + e->name_len, e->back_num,
942 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
943 reg, arg);
944 if (r != 0) return r;
945 }
946 }
947 return 0;
948 }
949
950 extern int
951 onig_number_of_names(regex_t* reg)
952 {
953 NameTable* t = (NameTable* )reg->name_table;
954
955 if (IS_NOT_NULL(t))
956 return t->num;
957 else
958 return 0;
959 }
960
961 #endif /* else USE_ST_LIBRARY */
962
963 static int
964 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
965 {
966 int r;
967 int alloc;
968 NameEntry* e;
969 NameTable* t = (NameTable* )reg->name_table;
970
971 if (name_end - name <= 0)
972 return ONIGERR_EMPTY_GROUP_NAME;
973
974 e = name_find(reg, name, name_end);
975 if (IS_NULL(e)) {
976 #ifdef USE_ST_LIBRARY
977 if (IS_NULL(t)) {
978 t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
979 CHECK_NULL_RETURN_MEMERR(t);
980 reg->name_table = (void* )t;
981 }
982 e = (NameEntry* )xmalloc(sizeof(NameEntry));
983 CHECK_NULL_RETURN_MEMERR(e);
984
985 e->name = onigenc_strdup(reg->enc, name, name_end);
986 if (IS_NULL(e->name)) {
987 xfree(e); return ONIGERR_MEMORY;
988 }
989 r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
990 (HashDataType )e);
991 if (r < 0) return r;
992
993 e->name_len = (int )(name_end - name);
994 e->back_num = 0;
995 e->back_alloc = 0;
996 e->back_refs = (int* )NULL;
997
998 #else
999
1000 if (IS_NULL(t)) {
1001 alloc = INIT_NAMES_ALLOC_NUM;
1002 t = (NameTable* )xmalloc(sizeof(NameTable));
1003 CHECK_NULL_RETURN_MEMERR(t);
1004 t->e = NULL;
1005 t->alloc = 0;
1006 t->num = 0;
1007
1008 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
1009 if (IS_NULL(t->e)) {
1010 xfree(t);
1011 return ONIGERR_MEMORY;
1012 }
1013 t->alloc = alloc;
1014 reg->name_table = t;
1015 goto clear;
1016 }
1017 else if (t->num == t->alloc) {
1018 int i;
1019
1020 alloc = t->alloc * 2;
1021 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);
1022 CHECK_NULL_RETURN_MEMERR(t->e);
1023 t->alloc = alloc;
1024
1025 clear:
1026 for (i = t->num; i < t->alloc; i++) {
1027 t->e[i].name = NULL;
1028 t->e[i].name_len = 0;
1029 t->e[i].back_num = 0;
1030 t->e[i].back_alloc = 0;
1031 t->e[i].back_refs = (int* )NULL;
1032 }
1033 }
1034 e = &(t->e[t->num]);
1035 t->num++;
1036 e->name = onigenc_strdup(reg->enc, name, name_end);
1037 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1038 e->name_len = name_end - name;
1039 #endif
1040 }
1041
1042 if (e->back_num >= 1 &&
1043 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1044 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1045 name, name_end);
1046 return ONIGERR_MULTIPLEX_DEFINED_NAME;
1047 }
1048
1049 e->back_num++;
1050 if (e->back_num == 1) {
1051 e->back_ref1 = backref;
1052 }
1053 else {
1054 if (e->back_num == 2) {
1055 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1056 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1057 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1058 e->back_alloc = alloc;
1059 e->back_refs[0] = e->back_ref1;
1060 e->back_refs[1] = backref;
1061 }
1062 else {
1063 if (e->back_num > e->back_alloc) {
1064 alloc = e->back_alloc * 2;
1065 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);
1066 CHECK_NULL_RETURN_MEMERR(e->back_refs);
1067 e->back_alloc = alloc;
1068 }
1069 e->back_refs[e->back_num - 1] = backref;
1070 }
1071 }
1072
1073 return 0;
1074 }
1075
1076 extern int
1077 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1078 const UChar* name_end, int** nums)
1079 {
1080 NameEntry* e = name_find(reg, name, name_end);
1081
1082 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1083
1084 switch (e->back_num) {
1085 case 0:
1086 break;
1087 case 1:
1088 *nums = &(e->back_ref1);
1089 break;
1090 default:
1091 *nums = e->back_refs;
1092 break;
1093 }
1094 return e->back_num;
1095 }
1096
1097 static int
1098 name_to_group_numbers(ScanEnv* env, const UChar* name, const UChar* name_end,
1099 int** nums)
1100 {
1101 regex_t* reg;
1102 NameEntry* e;
1103
1104 reg = env->reg;
1105 e = name_find(reg, name, name_end);
1106
1107 if (IS_NULL(e)) {
1108 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
1109 (UChar* )name, (UChar* )name_end);
1110 return ONIGERR_UNDEFINED_NAME_REFERENCE;
1111 }
1112
1113 switch (e->back_num) {
1114 case 0:
1115 break;
1116 case 1:
1117 *nums = &(e->back_ref1);
1118 break;
1119 default:
1120 *nums = e->back_refs;
1121 break;
1122 }
1123 return e->back_num;
1124 }
1125
1126 extern int
1127 onig_name_to_backref_number(regex_t* reg, const UChar* name,
1128 const UChar* name_end, OnigRegion *region)
1129 {
1130 int i, n, *nums;
1131
1132 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1133 if (n < 0)
1134 return n;
1135 else if (n == 0)
1136 return ONIGERR_PARSER_BUG;
1137 else if (n == 1)
1138 return nums[0];
1139 else {
1140 if (IS_NOT_NULL(region)) {
1141 for (i = n - 1; i >= 0; i--) {
1142 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1143 return nums[i];
1144 }
1145 }
1146 return nums[n - 1];
1147 }
1148 }
1149
1150 extern int
1151 onig_noname_group_capture_is_active(regex_t* reg)
1152 {
1153 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
1154 return 0;
1155
1156 if (onig_number_of_names(reg) > 0 &&
1157 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1158 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
1159 return 0;
1160 }
1161
1162 return 1;
1163 }
1164
1165 #ifdef USE_CALLOUT
1166
1167 typedef struct {
1168 OnigCalloutType type;
1169 int in;
1170 OnigCalloutFunc start_func;
1171 OnigCalloutFunc end_func;
1172 int arg_num;
1173 int opt_arg_num;
1174 unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1175 OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1176 UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */
1177 } CalloutNameListEntry;
1178
1179 typedef struct {
1180 int n;
1181 int alloc;
1182 CalloutNameListEntry* v;
1183 } CalloutNameListType;
1184
1185 static CalloutNameListType* GlobalCalloutNameList;
1186
1187 static int
1188 make_callout_func_list(CalloutNameListType** rs, int init_size)
1189 {
1190 CalloutNameListType* s;
1191 CalloutNameListEntry* v;
1192
1193 *rs = 0;
1194
1195 s = xmalloc(sizeof(*s));
1196 if (IS_NULL(s)) return ONIGERR_MEMORY;
1197
1198 v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1199 if (IS_NULL(v)) {
1200 xfree(s);
1201 return ONIGERR_MEMORY;
1202 }
1203
1204 s->n = 0;
1205 s->alloc = init_size;
1206 s->v = v;
1207
1208 *rs = s;
1209 return ONIG_NORMAL;
1210 }
1211
1212 static void
1213 free_callout_func_list(CalloutNameListType* s)
1214 {
1215 if (IS_NOT_NULL(s)) {
1216 if (IS_NOT_NULL(s->v)) {
1217 int i, j;
1218
1219 for (i = 0; i < s->n; i++) {
1220 CalloutNameListEntry* e = s->v + i;
1221 for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1222 if (e->arg_types[j] == ONIG_TYPE_STRING) {
1223 UChar* p = e->opt_defaults[j].s.start;
1224 if (IS_NOT_NULL(p)) xfree(p);
1225 }
1226 }
1227 }
1228 xfree(s->v);
1229 }
1230 xfree(s);
1231 }
1232 }
1233
1234 static int
1235 callout_func_list_add(CalloutNameListType* s, int* rid)
1236 {
1237 if (s->n >= s->alloc) {
1238 int new_size = s->alloc * 2;
1239 CalloutNameListEntry* nv = (CalloutNameListEntry* )
1240 xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);
1241 if (IS_NULL(nv)) return ONIGERR_MEMORY;
1242
1243 s->alloc = new_size;
1244 s->v = nv;
1245 }
1246
1247 *rid = s->n;
1248
1249 xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1250 s->n++;
1251 return ONIG_NORMAL;
1252 }
1253
1254
1255 typedef struct {
1256 UChar* name;
1257 int name_len; /* byte length */
1258 int id;
1259 } CalloutNameEntry;
1260
1261 #ifdef USE_ST_LIBRARY
1262 typedef st_table CalloutNameTable;
1263 #else
1264 typedef struct {
1265 CalloutNameEntry* e;
1266 int num;
1267 int alloc;
1268 } CalloutNameTable;
1269 #endif
1270
1271 static CalloutNameTable* GlobalCalloutNameTable;
1272 static int CalloutNameIDCounter;
1273
1274 #ifdef USE_ST_LIBRARY
1275
1276 static int
1277 i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1278 void* arg ARG_UNUSED)
1279 {
1280 xfree(e->name);
1281 /*xfree(key->s); */ /* is same as e->name */
1282 xfree(key);
1283 xfree(e);
1284 return ST_DELETE;
1285 }
1286
1287 static int
1288 callout_name_table_clear(CalloutNameTable* t)
1289 {
1290 if (IS_NOT_NULL(t)) {
1291 onig_st_foreach(t, i_free_callout_name_entry, 0);
1292 }
1293 return 0;
1294 }
1295
1296 static int
1297 global_callout_name_table_free(void)
1298 {
1299 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1300 int r = callout_name_table_clear(GlobalCalloutNameTable);
1301 if (r != 0) return r;
1302
1303 onig_st_free_table(GlobalCalloutNameTable);
1304 GlobalCalloutNameTable = 0;
1305 CalloutNameIDCounter = 0;
1306 }
1307
1308 return 0;
1309 }
1310
1311 static CalloutNameEntry*
1312 callout_name_find(OnigEncoding enc, int is_not_single,
1313 const UChar* name, const UChar* name_end)
1314 {
1315 int r;
1316 CalloutNameEntry* e;
1317 CalloutNameTable* t = GlobalCalloutNameTable;
1318
1319 e = (CalloutNameEntry* )NULL;
1320 if (IS_NOT_NULL(t)) {
1321 r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1322 (HashDataType* )((void* )(&e)));
1323 if (r == 0) { /* not found */
1324 if (enc != ONIG_ENCODING_ASCII &&
1325 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1326 enc = ONIG_ENCODING_ASCII;
1327 onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1328 (HashDataType* )((void* )(&e)));
1329 }
1330 }
1331 }
1332 return e;
1333 }
1334
1335 #else
1336
1337 static int
1338 callout_name_table_clear(CalloutNameTable* t)
1339 {
1340 int i;
1341 CalloutNameEntry* e;
1342
1343 if (IS_NOT_NULL(t)) {
1344 for (i = 0; i < t->num; i++) {
1345 e = &(t->e[i]);
1346 if (IS_NOT_NULL(e->name)) {
1347 xfree(e->name);
1348 e->name = NULL;
1349 e->name_len = 0;
1350 e->id = 0;
1351 e->func = 0;
1352 }
1353 }
1354 if (IS_NOT_NULL(t->e)) {
1355 xfree(t->e);
1356 t->e = NULL;
1357 }
1358 t->num = 0;
1359 }
1360 return 0;
1361 }
1362
1363 static int
1364 global_callout_name_table_free(void)
1365 {
1366 if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1367 int r = callout_name_table_clear(GlobalCalloutNameTable);
1368 if (r != 0) return r;
1369
1370 xfree(GlobalCalloutNameTable);
1371 GlobalCalloutNameTable = 0;
1372 CalloutNameIDCounter = 0;
1373 }
1374 return 0;
1375 }
1376
1377 static CalloutNameEntry*
1378 callout_name_find(UChar* name, UChar* name_end)
1379 {
1380 int i, len;
1381 CalloutNameEntry* e;
1382 CalloutNameTable* t = Calloutnames;
1383
1384 if (IS_NOT_NULL(t)) {
1385 len = name_end - name;
1386 for (i = 0; i < t->num; i++) {
1387 e = &(t->e[i]);
1388 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1389 return e;
1390 }
1391 }
1392 return (CalloutNameEntry* )NULL;
1393 }
1394
1395 #endif
1396
1397 /* name string must be single byte char string. */
1398 static int
1399 callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1400 int is_not_single, UChar* name, UChar* name_end)
1401 {
1402 int r;
1403 CalloutNameEntry* e;
1404 CalloutNameTable* t = GlobalCalloutNameTable;
1405
1406 *rentry = 0;
1407 if (name_end - name <= 0)
1408 return ONIGERR_INVALID_CALLOUT_NAME;
1409
1410 e = callout_name_find(enc, is_not_single, name, name_end);
1411 if (IS_NULL(e)) {
1412 #ifdef USE_ST_LIBRARY
1413 if (IS_NULL(t)) {
1414 t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1415 CHECK_NULL_RETURN_MEMERR(t);
1416 GlobalCalloutNameTable = t;
1417 }
1418 e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1419 CHECK_NULL_RETURN_MEMERR(e);
1420
1421 e->name = onigenc_strdup(enc, name, name_end);
1422 if (IS_NULL(e->name)) {
1423 xfree(e); return ONIGERR_MEMORY;
1424 }
1425
1426 r = st_insert_callout_name_table(t, enc, is_not_single,
1427 e->name, (e->name + (name_end - name)),
1428 (HashDataType )e);
1429 if (r < 0) return r;
1430
1431 #else
1432
1433 int alloc;
1434
1435 if (IS_NULL(t)) {
1436 alloc = INIT_NAMES_ALLOC_NUM;
1437 t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1438 CHECK_NULL_RETURN_MEMERR(t);
1439 t->e = NULL;
1440 t->alloc = 0;
1441 t->num = 0;
1442
1443 t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1444 if (IS_NULL(t->e)) {
1445 xfree(t);
1446 return ONIGERR_MEMORY;
1447 }
1448 t->alloc = alloc;
1449 GlobalCalloutNameTable = t;
1450 goto clear;
1451 }
1452 else if (t->num == t->alloc) {
1453 int i;
1454
1455 alloc = t->alloc * 2;
1456 t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);
1457 CHECK_NULL_RETURN_MEMERR(t->e);
1458 t->alloc = alloc;
1459
1460 clear:
1461 for (i = t->num; i < t->alloc; i++) {
1462 t->e[i].name = NULL;
1463 t->e[i].name_len = 0;
1464 t->e[i].id = 0;
1465 }
1466 }
1467 e = &(t->e[t->num]);
1468 t->num++;
1469 e->name = onigenc_strdup(enc, name, name_end);
1470 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1471 #endif
1472
1473 CalloutNameIDCounter++;
1474 e->id = CalloutNameIDCounter;
1475 e->name_len = (int )(name_end - name);
1476 }
1477
1478 *rentry = e;
1479 return e->id;
1480 }
1481
1482 static int
1483 is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1484 {
1485 UChar* p;
1486 OnigCodePoint c;
1487
1488 if (name >= name_end) return 0;
1489
1490 p = name;
1491 while (p < name_end) {
1492 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1493 if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1494 return 0;
1495
1496 if (p == name) {
1497 if (c >= '0' && c <= '9') return 0;
1498 }
1499
1500 p += ONIGENC_MBC_ENC_LEN(enc, p);
1501 }
1502
1503 return 1;
1504 }
1505
1506 static int
1507 is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1508 {
1509 UChar* p;
1510 OnigCodePoint c;
1511
1512 if (name >= name_end) return 0;
1513
1514 p = name;
1515 while (p < name_end) {
1516 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1517 if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1518 return 0;
1519
1520 if (p == name) {
1521 if (c >= '0' && c <= '9') return 0;
1522 }
1523
1524 p += ONIGENC_MBC_ENC_LEN(enc, p);
1525 }
1526
1527 return 1;
1528 }
1529
1530 extern int
1531 onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1532 UChar* name, UChar* name_end, int in,
1533 OnigCalloutFunc start_func,
1534 OnigCalloutFunc end_func,
1535 int arg_num, unsigned int arg_types[],
1536 int opt_arg_num, OnigValue opt_defaults[])
1537 {
1538 int r;
1539 int i;
1540 int j;
1541 int id;
1542 int is_not_single;
1543 CalloutNameEntry* e;
1544 CalloutNameListEntry* fe;
1545
1546 if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1547 return ONIGERR_INVALID_ARGUMENT;
1548
1549 if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1550 return ONIGERR_INVALID_CALLOUT_ARG;
1551
1552 if (opt_arg_num < 0 || opt_arg_num > arg_num)
1553 return ONIGERR_INVALID_CALLOUT_ARG;
1554
1555 if (start_func == 0 && end_func == 0)
1556 return ONIGERR_INVALID_CALLOUT_ARG;
1557
1558 if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1559 return ONIGERR_INVALID_CALLOUT_ARG;
1560
1561 for (i = 0; i < arg_num; i++) {
1562 unsigned int t = arg_types[i];
1563 if (t == ONIG_TYPE_VOID)
1564 return ONIGERR_INVALID_CALLOUT_ARG;
1565 else {
1566 if (i >= arg_num - opt_arg_num) {
1567 if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1568 t != ONIG_TYPE_TAG)
1569 return ONIGERR_INVALID_CALLOUT_ARG;
1570 }
1571 else {
1572 if (t != ONIG_TYPE_LONG) {
1573 t = t & ~ONIG_TYPE_LONG;
1574 if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1575 return ONIGERR_INVALID_CALLOUT_ARG;
1576 }
1577 }
1578 }
1579 }
1580
1581 if (! is_allowed_callout_name(enc, name, name_end)) {
1582 return ONIGERR_INVALID_CALLOUT_NAME;
1583 }
1584
1585 is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1586 id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1587 if (id < 0) return id;
1588
1589 r = ONIG_NORMAL;
1590 if (IS_NULL(GlobalCalloutNameList)) {
1591 r = make_callout_func_list(&GlobalCalloutNameList, 10);
1592 if (r != ONIG_NORMAL) return r;
1593 }
1594
1595 while (id >= GlobalCalloutNameList->n) {
1596 int rid;
1597 r = callout_func_list_add(GlobalCalloutNameList, &rid);
1598 if (r != ONIG_NORMAL) return r;
1599 }
1600
1601 fe = GlobalCalloutNameList->v + id;
1602 fe->type = callout_type;
1603 fe->in = in;
1604 fe->start_func = start_func;
1605 fe->end_func = end_func;
1606 fe->arg_num = arg_num;
1607 fe->opt_arg_num = opt_arg_num;
1608 fe->name = e->name;
1609
1610 for (i = 0; i < arg_num; i++) {
1611 fe->arg_types[i] = arg_types[i];
1612 }
1613 for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1614 if(IS_NULL(opt_defaults))return ONIGERR_INVALID_ARGUMENT;
1615 if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1616 OnigValue* val;
1617 UChar* ds;
1618
1619 if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
1620
1621 val = opt_defaults + j;
1622 ds = onigenc_strdup(enc, val->s.start, val->s.end);
1623 CHECK_NULL_RETURN_MEMERR(ds);
1624
1625 fe->opt_defaults[i].s.start = ds;
1626 fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);
1627 }
1628 else {
1629 fe->opt_defaults[i] = opt_defaults[j];
1630 }
1631 }
1632
1633 r = id;
1634 return r;
1635 }
1636
1637 static int
1638 get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1639 UChar* name, UChar* name_end, int* rid)
1640 {
1641 int r;
1642 CalloutNameEntry* e;
1643
1644 if (! is_allowed_callout_name(enc, name, name_end)) {
1645 return ONIGERR_INVALID_CALLOUT_NAME;
1646 }
1647
1648 e = callout_name_find(enc, is_not_single, name, name_end);
1649 if (IS_NULL(e)) {
1650 return ONIGERR_UNDEFINED_CALLOUT_NAME;
1651 }
1652
1653 r = ONIG_NORMAL;
1654 *rid = e->id;
1655
1656 return r;
1657 }
1658
1659 extern OnigCalloutFunc
1660 onig_get_callout_start_func(regex_t* reg, int callout_num)
1661 {
1662 /* If used for callouts of contents, return 0. */
1663 CalloutListEntry* e;
1664
1665 e = onig_reg_callout_list_at(reg, callout_num);
1666 CHECK_NULL_RETURN(e);
1667 return e->start_func;
1668 }
1669
1670 extern const UChar*
1671 onig_get_callout_tag_start(regex_t* reg, int callout_num)
1672 {
1673 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1674 CHECK_NULL_RETURN(e);
1675 return e->tag_start;
1676 }
1677
1678 extern const UChar*
1679 onig_get_callout_tag_end(regex_t* reg, int callout_num)
1680 {
1681 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1682 CHECK_NULL_RETURN(e);
1683 return e->tag_end;
1684 }
1685
1686
1687 extern OnigCalloutType
1688 onig_get_callout_type_by_name_id(int name_id)
1689 {
1690 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1691 return 0;
1692
1693 return GlobalCalloutNameList->v[name_id].type;
1694 }
1695
1696 extern OnigCalloutFunc
1697 onig_get_callout_start_func_by_name_id(int name_id)
1698 {
1699 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1700 return 0;
1701
1702 return GlobalCalloutNameList->v[name_id].start_func;
1703 }
1704
1705 extern OnigCalloutFunc
1706 onig_get_callout_end_func_by_name_id(int name_id)
1707 {
1708 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1709 return 0;
1710
1711 return GlobalCalloutNameList->v[name_id].end_func;
1712 }
1713
1714 extern int
1715 onig_get_callout_in_by_name_id(int name_id)
1716 {
1717 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1718 return 0;
1719
1720 return GlobalCalloutNameList->v[name_id].in;
1721 }
1722
1723 static int
1724 get_callout_arg_num_by_name_id(int name_id)
1725 {
1726 return GlobalCalloutNameList->v[name_id].arg_num;
1727 }
1728
1729 static int
1730 get_callout_opt_arg_num_by_name_id(int name_id)
1731 {
1732 return GlobalCalloutNameList->v[name_id].opt_arg_num;
1733 }
1734
1735 static unsigned int
1736 get_callout_arg_type_by_name_id(int name_id, int index)
1737 {
1738 return GlobalCalloutNameList->v[name_id].arg_types[index];
1739 }
1740
1741 static OnigValue
1742 get_callout_opt_default_by_name_id(int name_id, int index)
1743 {
1744 return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1745 }
1746
1747 extern UChar*
1748 onig_get_callout_name_by_name_id(int name_id)
1749 {
1750 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1751 return 0;
1752
1753 return GlobalCalloutNameList->v[name_id].name;
1754 }
1755
1756 extern int
1757 onig_global_callout_names_free(void)
1758 {
1759 free_callout_func_list(GlobalCalloutNameList);
1760 GlobalCalloutNameList = 0;
1761
1762 global_callout_name_table_free();
1763 return ONIG_NORMAL;
1764 }
1765
1766
1767 typedef st_table CalloutTagTable;
1768 typedef intptr_t CalloutTagVal;
1769
1770 #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)
1771
1772 static int
1773 i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1774 {
1775 int num;
1776 RegexExt* ext = (RegexExt* )arg;
1777
1778 num = (int )e - 1;
1779 ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1780 return ST_CONTINUE;
1781 }
1782
1783 static int
1784 setup_ext_callout_list_values(regex_t* reg)
1785 {
1786 int i, j;
1787 RegexExt* ext;
1788
1789 ext = reg->extp;
1790 if (IS_NOT_NULL(ext->tag_table)) {
1791 onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1792 (st_data_t )ext);
1793 }
1794
1795 for (i = 0; i < ext->callout_num; i++) {
1796 CalloutListEntry* e = ext->callout_list + i;
1797 if (e->of == ONIG_CALLOUT_OF_NAME) {
1798 for (j = 0; j < e->u.arg.num; j++) {
1799 if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1800 UChar* start;
1801 UChar* end;
1802 int num;
1803 start = e->u.arg.vals[j].s.start;
1804 end = e->u.arg.vals[j].s.end;
1805 num = onig_get_callout_num_by_tag(reg, start, end);
1806 if (num < 0) return num;
1807 e->u.arg.vals[j].tag = num;
1808 }
1809 }
1810 }
1811 }
1812
1813 return ONIG_NORMAL;
1814 }
1815
1816 extern int
1817 onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1818 {
1819 RegexExt* ext = reg->extp;
1820
1821 if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1822 if (callout_num > ext->callout_num) return 0;
1823
1824 return (ext->callout_list[callout_num].flag &
1825 CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
1826 }
1827
1828 static int
1829 i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1830 {
1831 xfree(key);
1832 return ST_DELETE;
1833 }
1834
1835 static int
1836 callout_tag_table_clear(CalloutTagTable* t)
1837 {
1838 if (IS_NOT_NULL(t)) {
1839 onig_st_foreach(t, i_free_callout_tag_entry, 0);
1840 }
1841 return 0;
1842 }
1843
1844 extern int
1845 onig_callout_tag_table_free(void* table)
1846 {
1847 CalloutTagTable* t = (CalloutTagTable* )table;
1848
1849 if (IS_NOT_NULL(t)) {
1850 int r = callout_tag_table_clear(t);
1851 if (r != 0) return r;
1852
1853 onig_st_free_table(t);
1854 }
1855
1856 return 0;
1857 }
1858
1859 extern int
1860 onig_get_callout_num_by_tag(regex_t* reg,
1861 const UChar* tag, const UChar* tag_end)
1862 {
1863 int r;
1864 RegexExt* ext;
1865 CalloutTagVal e;
1866
1867 ext = reg->extp;
1868 if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1869 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1870
1871 r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1872 (HashDataType* )((void* )(&e)));
1873 if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1874 return (int )e;
1875 }
1876
1877 static CalloutTagVal
1878 callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1879 {
1880 CalloutTagVal e;
1881
1882 e = -1;
1883 if (IS_NOT_NULL(t)) {
1884 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1885 }
1886 return e;
1887 }
1888
1889 static int
1890 callout_tag_table_new(CalloutTagTable** rt)
1891 {
1892 CalloutTagTable* t;
1893
1894 *rt = 0;
1895 t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1896 CHECK_NULL_RETURN_MEMERR(t);
1897
1898 *rt = t;
1899 return ONIG_NORMAL;
1900 }
1901
1902 static int
1903 callout_tag_entry_raw(ScanEnv* env, CalloutTagTable* t, UChar* name,
1904 UChar* name_end, CalloutTagVal entry_val)
1905 {
1906 int r;
1907 CalloutTagVal val;
1908
1909 if (name_end - name <= 0)
1910 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1911
1912 val = callout_tag_find(t, name, name_end);
1913 if (val >= 0) {
1914 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1915 name, name_end);
1916 return ONIGERR_MULTIPLEX_DEFINED_NAME;
1917 }
1918
1919 r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
1920 if (r < 0) return r;
1921
1922 return ONIG_NORMAL;
1923 }
1924
1925 static int
1926 ext_ensure_tag_table(regex_t* reg)
1927 {
1928 int r;
1929 RegexExt* ext;
1930 CalloutTagTable* t;
1931
1932 ext = onig_get_regex_ext(reg);
1933 CHECK_NULL_RETURN_MEMERR(ext);
1934
1935 if (IS_NULL(ext->tag_table)) {
1936 r = callout_tag_table_new(&t);
1937 if (r != ONIG_NORMAL) return r;
1938
1939 ext->tag_table = t;
1940 }
1941
1942 return ONIG_NORMAL;
1943 }
1944
1945 static int
1946 callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end,
1947 CalloutTagVal entry_val)
1948 {
1949 int r;
1950 RegexExt* ext;
1951 CalloutListEntry* e;
1952
1953 r = ext_ensure_tag_table(reg);
1954 if (r != ONIG_NORMAL) return r;
1955
1956 ext = onig_get_regex_ext(reg);
1957 CHECK_NULL_RETURN_MEMERR(ext);
1958 CHECK_NULL_RETURN_MEMERR(ext->tag_table);
1959 r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
1960
1961 e = onig_reg_callout_list_at(reg, (int )entry_val);
1962 CHECK_NULL_RETURN_MEMERR(e);
1963 e->tag_start = name;
1964 e->tag_end = name_end;
1965
1966 return r;
1967 }
1968
1969 #endif /* USE_CALLOUT */
1970
1971
1972 #define INIT_SCANENV_MEMENV_ALLOC_SIZE 16
1973
1974 static void
1975 scan_env_clear(ScanEnv* env)
1976 {
1977 MEM_STATUS_CLEAR(env->capture_history);
1978 MEM_STATUS_CLEAR(env->bt_mem_start);
1979 MEM_STATUS_CLEAR(env->bt_mem_end);
1980 MEM_STATUS_CLEAR(env->backrefed_mem);
1981 env->error = (UChar* )NULL;
1982 env->error_end = (UChar* )NULL;
1983 env->num_call = 0;
1984
1985 #ifdef USE_CALL
1986 env->unset_addr_list = NULL;
1987 env->has_call_zero = 0;
1988 #endif
1989
1990 env->num_mem = 0;
1991 env->num_named = 0;
1992 env->mem_alloc = 0;
1993 env->mem_env_dynamic = (MemEnv* )NULL;
1994
1995 xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
1996
1997 env->parse_depth = 0;
1998 env->keep_num = 0;
1999 env->save_num = 0;
2000 env->save_alloc_num = 0;
2001 env->saves = 0;
2002 }
2003
2004 static int
2005 scan_env_add_mem_entry(ScanEnv* env)
2006 {
2007 int i, need, alloc;
2008 MemEnv* p;
2009
2010 need = env->num_mem + 1;
2011 if (need > MaxCaptureNum && MaxCaptureNum != 0)
2012 return ONIGERR_TOO_MANY_CAPTURES;
2013
2014 if (need >= SCANENV_MEMENV_SIZE) {
2015 if (env->mem_alloc <= need) {
2016 if (IS_NULL(env->mem_env_dynamic)) {
2017 alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;
2018 p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
2019 CHECK_NULL_RETURN_MEMERR(p);
2020 xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
2021 }
2022 else {
2023 alloc = env->mem_alloc * 2;
2024 p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);
2025 CHECK_NULL_RETURN_MEMERR(p);
2026 }
2027
2028 for (i = env->num_mem + 1; i < alloc; i++) {
2029 p[i].node = NULL_NODE;
2030 #if 0
2031 p[i].in = 0;
2032 p[i].recursion = 0;
2033 #endif
2034 }
2035
2036 env->mem_env_dynamic = p;
2037 env->mem_alloc = alloc;
2038 }
2039 }
2040
2041 env->num_mem++;
2042 return env->num_mem;
2043 }
2044
2045 static int
2046 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
2047 {
2048 if (env->num_mem >= num)
2049 SCANENV_MEMENV(env)[num].node = node;
2050 else
2051 return ONIGERR_PARSER_BUG;
2052 return 0;
2053 }
2054
2055 extern void
2056 onig_node_free(Node* node)
2057 {
2058 start:
2059 if (IS_NULL(node)) return ;
2060
2061 #ifdef DEBUG_NODE_FREE
2062 fprintf(stderr, "onig_node_free: %p\n", node);
2063 #endif
2064
2065 switch (NODE_TYPE(node)) {
2066 case NODE_STRING:
2067 if (STR_(node)->capacity != 0 &&
2068 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2069 xfree(STR_(node)->s);
2070 }
2071 break;
2072
2073 case NODE_LIST:
2074 case NODE_ALT:
2075 onig_node_free(NODE_CAR(node));
2076 {
2077 Node* next_node = NODE_CDR(node);
2078
2079 xfree(node);
2080 node = next_node;
2081 goto start;
2082 }
2083 break;
2084
2085 case NODE_CCLASS:
2086 {
2087 CClassNode* cc = CCLASS_(node);
2088
2089 if (cc->mbuf)
2090 bbuf_free(cc->mbuf);
2091 }
2092 break;
2093
2094 case NODE_BACKREF:
2095 if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2096 xfree(BACKREF_(node)->back_dynamic);
2097 break;
2098
2099 case NODE_BAG:
2100 if (NODE_BODY(node))
2101 onig_node_free(NODE_BODY(node));
2102
2103 {
2104 BagNode* en = BAG_(node);
2105 if (en->type == BAG_IF_ELSE) {
2106 onig_node_free(en->te.Then);
2107 onig_node_free(en->te.Else);
2108 }
2109 }
2110 break;
2111
2112 case NODE_QUANT:
2113 case NODE_ANCHOR:
2114 if (NODE_BODY(node))
2115 onig_node_free(NODE_BODY(node));
2116 break;
2117
2118 case NODE_CTYPE:
2119 case NODE_CALL:
2120 case NODE_GIMMICK:
2121 break;
2122 }
2123
2124 xfree(node);
2125 }
2126
2127 static void
2128 cons_node_free_alone(Node* node)
2129 {
2130 NODE_CAR(node) = 0;
2131 NODE_CDR(node) = 0;
2132 onig_node_free(node);
2133 }
2134
2135 static Node*
2136 node_new(void)
2137 {
2138 Node* node;
2139
2140 node = (Node* )xmalloc(sizeof(Node));
2141 CHECK_NULL_RETURN(node);
2142 xmemset(node, 0, sizeof(*node));
2143
2144 #ifdef DEBUG_NODE_FREE
2145 fprintf(stderr, "node_new: %p\n", node);
2146 #endif
2147 return node;
2148 }
2149
2150
2151 static void
2152 initialize_cclass(CClassNode* cc)
2153 {
2154 BITSET_CLEAR(cc->bs);
2155 cc->flags = 0;
2156 cc->mbuf = NULL;
2157 }
2158
2159 static Node*
2160 node_new_cclass(void)
2161 {
2162 Node* node = node_new();
2163 CHECK_NULL_RETURN(node);
2164
2165 NODE_SET_TYPE(node, NODE_CCLASS);
2166 initialize_cclass(CCLASS_(node));
2167 return node;
2168 }
2169
2170 static Node*
2171 node_new_ctype(int type, int not, OnigOptionType options)
2172 {
2173 Node* node = node_new();
2174 CHECK_NULL_RETURN(node);
2175
2176 NODE_SET_TYPE(node, NODE_CTYPE);
2177 CTYPE_(node)->ctype = type;
2178 CTYPE_(node)->not = not;
2179 CTYPE_(node)->options = options;
2180 CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);
2181 return node;
2182 }
2183
2184 static Node*
2185 node_new_anychar(void)
2186 {
2187 Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
2188 return node;
2189 }
2190
2191 static Node*
2192 node_new_anychar_with_fixed_option(OnigOptionType option)
2193 {
2194 CtypeNode* ct;
2195 Node* node;
2196
2197 node = node_new_anychar();
2198 CHECK_NULL_RETURN(node);
2199
2200 ct = CTYPE_(node);
2201 ct->options = option;
2202 NODE_STATUS_ADD(node, FIXED_OPTION);
2203 return node;
2204 }
2205
2206 static int
2207 node_new_no_newline(Node** node, ScanEnv* env)
2208 {
2209 Node* n;
2210
2211 n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);
2212 CHECK_NULL_RETURN_MEMERR(n);
2213 *node = n;
2214 return 0;
2215 }
2216
2217 static int
2218 node_new_true_anychar(Node** node, ScanEnv* env)
2219 {
2220 Node* n;
2221
2222 n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);
2223 CHECK_NULL_RETURN_MEMERR(n);
2224 *node = n;
2225 return 0;
2226 }
2227
2228 static Node*
2229 node_new_list(Node* left, Node* right)
2230 {
2231 Node* node = node_new();
2232 CHECK_NULL_RETURN(node);
2233
2234 NODE_SET_TYPE(node, NODE_LIST);
2235 NODE_CAR(node) = left;
2236 NODE_CDR(node) = right;
2237 return node;
2238 }
2239
2240 extern Node*
2241 onig_node_new_list(Node* left, Node* right)
2242 {
2243 return node_new_list(left, right);
2244 }
2245
2246 extern Node*
2247 onig_node_list_add(Node* list, Node* x)
2248 {
2249 Node *n;
2250
2251 n = onig_node_new_list(x, NULL);
2252 if (IS_NULL(n)) return NULL_NODE;
2253
2254 if (IS_NOT_NULL(list)) {
2255 while (IS_NOT_NULL(NODE_CDR(list)))
2256 list = NODE_CDR(list);
2257
2258 NODE_CDR(list) = n;
2259 }
2260
2261 return n;
2262 }
2263
2264 extern Node*
2265 onig_node_new_alt(Node* left, Node* right)
2266 {
2267 Node* node = node_new();
2268 CHECK_NULL_RETURN(node);
2269
2270 NODE_SET_TYPE(node, NODE_ALT);
2271 NODE_CAR(node) = left;
2272 NODE_CDR(node) = right;
2273 return node;
2274 }
2275
2276 static Node*
2277 make_list_or_alt(NodeType type, int n, Node* ns[])
2278 {
2279 Node* r;
2280
2281 if (n <= 0) return NULL_NODE;
2282
2283 if (n == 1) {
2284 r = node_new();
2285 CHECK_NULL_RETURN(r);
2286 NODE_SET_TYPE(r, type);
2287 NODE_CAR(r) = ns[0];
2288 NODE_CDR(r) = NULL_NODE;
2289 }
2290 else {
2291 Node* right;
2292
2293 r = node_new();
2294 CHECK_NULL_RETURN(r);
2295
2296 right = make_list_or_alt(type, n - 1, ns + 1);
2297 if (IS_NULL(right)) {
2298 onig_node_free(r);
2299 return NULL_NODE;
2300 }
2301
2302 NODE_SET_TYPE(r, type);
2303 NODE_CAR(r) = ns[0];
2304 NODE_CDR(r) = right;
2305 }
2306
2307 return r;
2308 }
2309
2310 static Node*
2311 make_list(int n, Node* ns[])
2312 {
2313 return make_list_or_alt(NODE_LIST, n, ns);
2314 }
2315
2316 static Node*
2317 make_alt(int n, Node* ns[])
2318 {
2319 return make_list_or_alt(NODE_ALT, n, ns);
2320 }
2321
2322 extern Node*
2323 onig_node_new_anchor(int type, int ascii_mode)
2324 {
2325 Node* node = node_new();
2326 CHECK_NULL_RETURN(node);
2327
2328 NODE_SET_TYPE(node, NODE_ANCHOR);
2329 ANCHOR_(node)->type = type;
2330 ANCHOR_(node)->char_len = -1;
2331 ANCHOR_(node)->ascii_mode = ascii_mode;
2332 return node;
2333 }
2334
2335 static Node*
2336 node_new_backref(int back_num, int* backrefs, int by_name,
2337 #ifdef USE_BACKREF_WITH_LEVEL
2338 int exist_level, int nest_level,
2339 #endif
2340 ScanEnv* env)
2341 {
2342 int i;
2343 Node* node = node_new();
2344
2345 CHECK_NULL_RETURN(node);
2346
2347 NODE_SET_TYPE(node, NODE_BACKREF);
2348 BACKREF_(node)->back_num = back_num;
2349 BACKREF_(node)->back_dynamic = (int* )NULL;
2350 if (by_name != 0)
2351 NODE_STATUS_ADD(node, BY_NAME);
2352
2353 #ifdef USE_BACKREF_WITH_LEVEL
2354 if (exist_level != 0) {
2355 NODE_STATUS_ADD(node, NEST_LEVEL);
2356 BACKREF_(node)->nest_level = nest_level;
2357 }
2358 #endif
2359
2360 for (i = 0; i < back_num; i++) {
2361 if (backrefs[i] <= env->num_mem &&
2362 IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {
2363 NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */
2364 break;
2365 }
2366 }
2367
2368 if (back_num <= NODE_BACKREFS_SIZE) {
2369 for (i = 0; i < back_num; i++)
2370 BACKREF_(node)->back_static[i] = backrefs[i];
2371 }
2372 else {
2373 int* p = (int* )xmalloc(sizeof(int) * back_num);
2374 if (IS_NULL(p)) {
2375 onig_node_free(node);
2376 return NULL;
2377 }
2378 BACKREF_(node)->back_dynamic = p;
2379 for (i = 0; i < back_num; i++)
2380 p[i] = backrefs[i];
2381 }
2382 return node;
2383 }
2384
2385 static Node*
2386 node_new_backref_checker(int back_num, int* backrefs, int by_name,
2387 #ifdef USE_BACKREF_WITH_LEVEL
2388 int exist_level, int nest_level,
2389 #endif
2390 ScanEnv* env)
2391 {
2392 Node* node;
2393
2394 node = node_new_backref(back_num, backrefs, by_name,
2395 #ifdef USE_BACKREF_WITH_LEVEL
2396 exist_level, nest_level,
2397 #endif
2398 env);
2399 CHECK_NULL_RETURN(node);
2400
2401 NODE_STATUS_ADD(node, CHECKER);
2402 return node;
2403 }
2404
2405 #ifdef USE_CALL
2406 static Node*
2407 node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2408 {
2409 Node* node = node_new();
2410 CHECK_NULL_RETURN(node);
2411
2412 NODE_SET_TYPE(node, NODE_CALL);
2413 CALL_(node)->by_number = by_number;
2414 CALL_(node)->name = name;
2415 CALL_(node)->name_end = name_end;
2416 CALL_(node)->group_num = gnum;
2417 CALL_(node)->entry_count = 1;
2418 return node;
2419 }
2420 #endif
2421
2422 static Node*
2423 node_new_quantifier(int lower, int upper, int by_number)
2424 {
2425 Node* node = node_new();
2426 CHECK_NULL_RETURN(node);
2427
2428 NODE_SET_TYPE(node, NODE_QUANT);
2429 QUANT_(node)->lower = lower;
2430 QUANT_(node)->upper = upper;
2431 QUANT_(node)->greedy = 1;
2432 QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY;
2433 QUANT_(node)->head_exact = NULL_NODE;
2434 QUANT_(node)->next_head_exact = NULL_NODE;
2435 QUANT_(node)->is_refered = 0;
2436 if (by_number != 0)
2437 NODE_STATUS_ADD(node, BY_NUMBER);
2438
2439 return node;
2440 }
2441
2442 static Node*
2443 node_new_bag(enum BagType type)
2444 {
2445 Node* node = node_new();
2446 CHECK_NULL_RETURN(node);
2447
2448 NODE_SET_TYPE(node, NODE_BAG);
2449 BAG_(node)->type = type;
2450
2451 switch (type) {
2452 case BAG_MEMORY:
2453 BAG_(node)->m.regnum = 0;
2454 BAG_(node)->m.called_addr = -1;
2455 BAG_(node)->m.entry_count = 1;
2456 BAG_(node)->m.called_state = 0;
2457 break;
2458
2459 case BAG_OPTION:
2460 BAG_(node)->o.options = 0;
2461 break;
2462
2463 case BAG_STOP_BACKTRACK:
2464 break;
2465
2466 case BAG_IF_ELSE:
2467 BAG_(node)->te.Then = 0;
2468 BAG_(node)->te.Else = 0;
2469 break;
2470 }
2471
2472 BAG_(node)->opt_count = 0;
2473 return node;
2474 }
2475
2476 extern Node*
2477 onig_node_new_bag(enum BagType type)
2478 {
2479 return node_new_bag(type);
2480 }
2481
2482 static Node*
2483 node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
2484 {
2485 Node* n;
2486 n = node_new_bag(BAG_IF_ELSE);
2487 CHECK_NULL_RETURN(n);
2488
2489 NODE_BODY(n) = cond;
2490 BAG_(n)->te.Then = Then;
2491 BAG_(n)->te.Else = Else;
2492 return n;
2493 }
2494
2495 static Node*
2496 node_new_memory(int is_named)
2497 {
2498 Node* node = node_new_bag(BAG_MEMORY);
2499 CHECK_NULL_RETURN(node);
2500 if (is_named != 0)
2501 NODE_STATUS_ADD(node, NAMED_GROUP);
2502
2503 return node;
2504 }
2505
2506 static Node*
2507 node_new_option(OnigOptionType option)
2508 {
2509 Node* node = node_new_bag(BAG_OPTION);
2510 CHECK_NULL_RETURN(node);
2511 BAG_(node)->o.options = option;
2512 return node;
2513 }
2514
2515 static Node*
2516 node_new_group(Node* content)
2517 {
2518 Node* node;
2519
2520 node = node_new();
2521 CHECK_NULL_RETURN(node);
2522 NODE_SET_TYPE(node, NODE_LIST);
2523 NODE_CAR(node) = content;
2524 NODE_CDR(node) = NULL_NODE;
2525
2526 return node;
2527 }
2528
2529 static Node*
2530 node_drop_group(Node* group)
2531 {
2532 Node* content;
2533
2534 content = NODE_CAR(group);
2535 NODE_CAR(group) = NULL_NODE;
2536 onig_node_free(group);
2537 return content;
2538 }
2539
2540 static int
2541 node_new_fail(Node** node, ScanEnv* env)
2542 {
2543 *node = node_new();
2544 CHECK_NULL_RETURN_MEMERR(*node);
2545
2546 NODE_SET_TYPE(*node, NODE_GIMMICK);
2547 GIMMICK_(*node)->type = GIMMICK_FAIL;
2548 return ONIG_NORMAL;
2549 }
2550
2551 static int
2552 node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)
2553 {
2554 int id;
2555 int r;
2556
2557 r = save_entry(env, save_type, &id);
2558 if (r != ONIG_NORMAL) return r;
2559
2560 *node = node_new();
2561 CHECK_NULL_RETURN_MEMERR(*node);
2562
2563 NODE_SET_TYPE(*node, NODE_GIMMICK);
2564 GIMMICK_(*node)->id = id;
2565 GIMMICK_(*node)->type = GIMMICK_SAVE;
2566 GIMMICK_(*node)->detail_type = (int )save_type;
2567
2568 return ONIG_NORMAL;
2569 }
2570
2571 static int
2572 node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2573 int id, ScanEnv* env)
2574 {
2575 *node = node_new();
2576 CHECK_NULL_RETURN_MEMERR(*node);
2577
2578 NODE_SET_TYPE(*node, NODE_GIMMICK);
2579 GIMMICK_(*node)->id = id;
2580 GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2581 GIMMICK_(*node)->detail_type = (int )update_var_type;
2582
2583 return ONIG_NORMAL;
2584 }
2585
2586 static int
2587 node_new_keep(Node** node, ScanEnv* env)
2588 {
2589 int r;
2590
2591 r = node_new_save_gimmick(node, SAVE_KEEP, env);
2592 if (r != 0) return r;
2593
2594 env->keep_num++;
2595 return ONIG_NORMAL;
2596 }
2597
2598 #ifdef USE_CALLOUT
2599
2600 extern void
2601 onig_free_reg_callout_list(int n, CalloutListEntry* list)
2602 {
2603 int i;
2604 int j;
2605
2606 if (IS_NULL(list)) return ;
2607
2608 for (i = 0; i < n; i++) {
2609 if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2610 for (j = 0; j < list[i].u.arg.passed_num; j++) {
2611 if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2612 if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2613 xfree(list[i].u.arg.vals[j].s.start);
2614 }
2615 }
2616 }
2617 else { /* ONIG_CALLOUT_OF_CONTENTS */
2618 if (IS_NOT_NULL(list[i].u.content.start)) {
2619 xfree((void* )list[i].u.content.start);
2620 }
2621 }
2622 }
2623
2624 xfree(list);
2625 }
2626
2627 extern CalloutListEntry*
2628 onig_reg_callout_list_at(regex_t* reg, int num)
2629 {
2630 RegexExt* ext = reg->extp;
2631 CHECK_NULL_RETURN(ext);
2632
2633 if (num <= 0 || num > ext->callout_num)
2634 return 0;
2635
2636 num--;
2637 return ext->callout_list + num;
2638 }
2639
2640 static int
2641 reg_callout_list_entry(ScanEnv* env, int* rnum)
2642 {
2643 #define INIT_CALLOUT_LIST_NUM 3
2644
2645 int num;
2646 CalloutListEntry* list;
2647 CalloutListEntry* e;
2648 RegexExt* ext;
2649
2650 ext = onig_get_regex_ext(env->reg);
2651 CHECK_NULL_RETURN_MEMERR(ext);
2652
2653 if (IS_NULL(ext->callout_list)) {
2654 list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2655 CHECK_NULL_RETURN_MEMERR(list);
2656
2657 ext->callout_list = list;
2658 ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2659 ext->callout_num = 0;
2660 }
2661
2662 num = ext->callout_num + 1;
2663 if (num > ext->callout_list_alloc) {
2664 int alloc = ext->callout_list_alloc * 2;
2665 list = (CalloutListEntry* )xrealloc(ext->callout_list,
2666 sizeof(CalloutListEntry) * alloc,
2667 sizeof(CalloutListEntry) * ext->callout_list_alloc);
2668 CHECK_NULL_RETURN_MEMERR(list);
2669
2670 ext->callout_list = list;
2671 ext->callout_list_alloc = alloc;
2672 }
2673
2674 e = ext->callout_list + (num - 1);
2675
2676 e->flag = 0;
2677 e->of = 0;
2678 e->in = ONIG_CALLOUT_OF_CONTENTS;
2679 e->type = 0;
2680 e->tag_start = 0;
2681 e->tag_end = 0;
2682 e->start_func = 0;
2683 e->end_func = 0;
2684 e->u.arg.num = 0;
2685 e->u.arg.passed_num = 0;
2686
2687 ext->callout_num = num;
2688 *rnum = num;
2689 return ONIG_NORMAL;
2690 }
2691
2692 static int
2693 node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2694 ScanEnv* env)
2695 {
2696 *node = node_new();
2697 CHECK_NULL_RETURN_MEMERR(*node);
2698
2699 NODE_SET_TYPE(*node, NODE_GIMMICK);
2700 GIMMICK_(*node)->id = id;
2701 GIMMICK_(*node)->num = num;
2702 GIMMICK_(*node)->type = GIMMICK_CALLOUT;
2703 GIMMICK_(*node)->detail_type = (int )callout_of;
2704
2705 return ONIG_NORMAL;
2706 }
2707 #endif
2708
2709 static int
2710 make_text_segment(Node** node, ScanEnv* env)
2711 {
2712 int r;
2713 int i;
2714 Node* x;
2715 Node* ns[2];
2716
2717 /* \X == (?>\O(?:\Y\O)*) */
2718
2719 ns[1] = NULL_NODE;
2720
2721 r = ONIGERR_MEMORY;
2722 ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, 0);
2723 if (IS_NULL(ns[0])) goto err;
2724
2725 r = node_new_true_anychar(&ns[1], env);
2726 if (r != 0) goto err1;
2727
2728 x = make_list(2, ns);
2729 if (IS_NULL(x)) goto err;
2730 ns[0] = x;
2731 ns[1] = NULL_NODE;
2732
2733 x = node_new_quantifier(0, INFINITE_REPEAT, 1);
2734 if (IS_NULL(x)) goto err;
2735
2736 NODE_BODY(x) = ns[0];
2737 ns[0] = NULL_NODE;
2738 ns[1] = x;
2739
2740 r = node_new_true_anychar(&ns[0], env);
2741 if (r != 0) goto err1;
2742
2743 x = make_list(2, ns);
2744 if (IS_NULL(x)) goto err;
2745
2746 ns[0] = x;
2747 ns[1] = NULL_NODE;
2748
2749 x = node_new_bag(BAG_STOP_BACKTRACK);
2750 if (IS_NULL(x)) goto err;
2751
2752 NODE_BODY(x) = ns[0];
2753
2754 *node = x;
2755 return ONIG_NORMAL;
2756
2757 err:
2758 r = ONIGERR_MEMORY;
2759 err1:
2760 for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2761 return r;
2762 }
2763
2764 static int
2765 make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2766 Node* step_one, int lower, int upper, int possessive,
2767 int is_range_cutter, ScanEnv* env)
2768 {
2769 int r;
2770 int i;
2771 int id;
2772 Node* x;
2773 Node* ns[4];
2774
2775 for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2776
2777 ns[1] = absent;
2778 ns[3] = step_one; /* for err */
2779 r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2780 if (r != 0) goto err;
2781
2782 id = GIMMICK_(ns[0])->id;
2783 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2784 id, env);
2785 if (r != 0) goto err;
2786
2787 r = node_new_fail(&ns[3], env);
2788 if (r != 0) goto err;
2789
2790 x = make_list(4, ns);
2791 if (IS_NULL(x)) goto err0;
2792
2793 ns[0] = x;
2794 ns[1] = step_one;
2795 ns[2] = ns[3] = NULL_NODE;
2796
2797 x = make_alt(2, ns);
2798 if (IS_NULL(x)) goto err0;
2799
2800 ns[0] = x;
2801
2802 x = node_new_quantifier(lower, upper, 0);
2803 if (IS_NULL(x)) goto err0;
2804
2805 NODE_BODY(x) = ns[0];
2806 ns[0] = x;
2807
2808 if (possessive != 0) {
2809 x = node_new_bag(BAG_STOP_BACKTRACK);
2810 if (IS_NULL(x)) goto err0;
2811
2812 NODE_BODY(x) = ns[0];
2813 ns[0] = x;
2814 }
2815
2816 r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2817 pre_save_right_id, env);
2818 if (r != 0) goto err;
2819
2820 r = node_new_fail(&ns[2], env);
2821 if (r != 0) goto err;
2822
2823 x = make_list(2, ns + 1);
2824 if (IS_NULL(x)) goto err0;
2825
2826 ns[1] = x; ns[2] = NULL_NODE;
2827
2828 x = make_alt(2, ns);
2829 if (IS_NULL(x)) goto err0;
2830
2831 if (is_range_cutter != 0)
2832 NODE_STATUS_ADD(x, SUPER);
2833
2834 *node = x;
2835 return ONIG_NORMAL;
2836
2837 err0:
2838 r = ONIGERR_MEMORY;
2839 err:
2840 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
2841 return r;
2842 }
2843
2844 static int
2845 make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
2846 ScanEnv* env)
2847 {
2848 int r;
2849 int id;
2850 Node* save;
2851 Node* x;
2852 Node* ns[2];
2853
2854 *node1 = *node2 = NULL_NODE;
2855 save = ns[0] = ns[1] = NULL_NODE;
2856
2857 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
2858 if (r != 0) goto err;
2859
2860 id = GIMMICK_(save)->id;
2861 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2862 id, env);
2863 if (r != 0) goto err;
2864
2865 r = node_new_fail(&ns[1], env);
2866 if (r != 0) goto err;
2867
2868 x = make_list(2, ns);
2869 if (IS_NULL(x)) goto err0;
2870
2871 ns[0] = NULL_NODE; ns[1] = x;
2872
2873 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2874 pre_save_right_id, env);
2875 if (r != 0) goto err;
2876
2877 x = make_alt(2, ns);
2878 if (IS_NULL(x)) goto err0;
2879
2880 *node1 = save;
2881 *node2 = x;
2882 return ONIG_NORMAL;
2883
2884 err0:
2885 r = ONIGERR_MEMORY;
2886 err:
2887 onig_node_free(save);
2888 onig_node_free(ns[0]);
2889 onig_node_free(ns[1]);
2890 return r;
2891 }
2892
2893 static int
2894 make_range_clear(Node** node, ScanEnv* env)
2895 {
2896 int r;
2897 int id;
2898 Node* save;
2899 Node* x;
2900 Node* ns[2];
2901
2902 *node = NULL_NODE;
2903 save = ns[0] = ns[1] = NULL_NODE;
2904
2905 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
2906 if (r != 0) goto err;
2907
2908 id = GIMMICK_(save)->id;
2909 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2910 id, env);
2911 if (r != 0) goto err;
2912
2913 r = node_new_fail(&ns[1], env);
2914 if (r != 0) goto err;
2915
2916 x = make_list(2, ns);
2917 if (IS_NULL(x)) goto err0;
2918
2919 ns[0] = NULL_NODE; ns[1] = x;
2920
2921 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
2922 if (r != 0) goto err;
2923
2924 x = make_alt(2, ns);
2925 if (IS_NULL(x)) goto err0;
2926
2927 NODE_STATUS_ADD(x, SUPER);
2928
2929 ns[0] = save;
2930 ns[1] = x;
2931 save = NULL_NODE;
2932 x = make_list(2, ns);
2933 if (IS_NULL(x)) goto err0;
2934
2935 *node = x;
2936 return ONIG_NORMAL;
2937
2938 err0:
2939 r = ONIGERR_MEMORY;
2940 err:
2941 onig_node_free(save);
2942 onig_node_free(ns[0]);
2943 onig_node_free(ns[1]);
2944 return r;
2945 }
2946
2947 static int
2948 is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
2949 int* is_possessive, ScanEnv* env)
2950 {
2951 Node* quant;
2952 Node* body;
2953
2954 *rquant = *rbody = 0;
2955 *is_possessive = 0;
2956
2957 if (NODE_TYPE(node) == NODE_QUANT) {
2958 quant = node;
2959 }
2960 else {
2961 if (NODE_TYPE(node) == NODE_BAG) {
2962 BagNode* en = BAG_(node);
2963 if (en->type == BAG_STOP_BACKTRACK) {
2964 *is_possessive = 1;
2965 quant = NODE_BAG_BODY(en);
2966 if (NODE_TYPE(quant) != NODE_QUANT)
2967 return 0;
2968 }
2969 else
2970 return 0;
2971 }
2972 else
2973 return 0;
2974 }
2975
2976 if (QUANT_(quant)->greedy == 0)
2977 return 0;
2978
2979 body = NODE_BODY(quant);
2980 switch (NODE_TYPE(body)) {
2981 case NODE_STRING:
2982 {
2983 int len;
2984 StrNode* sn = STR_(body);
2985 UChar *s = sn->s;
2986
2987 len = 0;
2988 while (s < sn->end) {
2989 s += enclen(env->enc, s);
2990 len++;
2991 }
2992 if (len != 1)
2993 return 0;
2994 }
2995
2996 case NODE_CCLASS:
2997 break;
2998
2999 default:
3000 return 0;
3001 break;
3002 }
3003
3004 if (node != quant) {
3005 NODE_BODY(node) = 0;
3006 onig_node_free(node);
3007 }
3008 NODE_BODY(quant) = NULL_NODE;
3009 *rquant = quant;
3010 *rbody = body;
3011 return 1;
3012 }
3013
3014 static int
3015 make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,
3016 Node* body, int possessive, ScanEnv* env)
3017 {
3018 int r;
3019 int i;
3020 int id1;
3021 int lower, upper;
3022 Node* x;
3023 Node* ns[4];
3024
3025 *node = NULL_NODE;
3026 r = ONIGERR_MEMORY;
3027 ns[0] = ns[1] = NULL_NODE;
3028 ns[2] = body, ns[3] = absent;
3029
3030 lower = QUANT_(quant)->lower;
3031 upper = QUANT_(quant)->upper;
3032 onig_node_free(quant);
3033
3034 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3035 if (r != 0) goto err;
3036
3037 id1 = GIMMICK_(ns[0])->id;
3038
3039 r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
3040 0, env);
3041 if (r != 0) goto err;
3042
3043 ns[2] = ns[3] = NULL_NODE;
3044
3045 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3046 id1, env);
3047 if (r != 0) goto err;
3048
3049 x = make_list(3, ns);
3050 if (IS_NULL(x)) goto err0;
3051
3052 *node = x;
3053 return ONIG_NORMAL;
3054
3055 err0:
3056 r = ONIGERR_MEMORY;
3057 err:
3058 for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3059 return r;
3060 }
3061
3062 static int
3063 make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
3064 ScanEnv* env)
3065 {
3066 int r;
3067 int i;
3068 int id1, id2;
3069 int possessive;
3070 Node* x;
3071 Node* ns[7];
3072
3073 r = ONIGERR_MEMORY;
3074 for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3075 ns[4] = expr; ns[5] = absent;
3076
3077 if (is_range_cutter == 0) {
3078 Node* quant;
3079 Node* body;
3080
3081 if (expr == NULL_NODE) {
3082 /* default expr \O* */
3083 quant = node_new_quantifier(0, INFINITE_REPEAT, 0);
3084 if (IS_NULL(quant)) goto err0;
3085
3086 r = node_new_true_anychar(&body, env);
3087 if (r != 0) {
3088 onig_node_free(quant);
3089 goto err;
3090 }
3091 possessive = 0;
3092 goto simple;
3093 }
3094 else {
3095 if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3096 simple:
3097 r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3098 body, possessive, env);
3099 if (r != 0) {
3100 ns[4] = NULL_NODE;
3101 onig_node_free(quant);
3102 onig_node_free(body);
3103 goto err;
3104 }
3105
3106 return ONIG_NORMAL;
3107 }
3108 }
3109 }
3110
3111 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3112 if (r != 0) goto err;
3113
3114 id1 = GIMMICK_(ns[0])->id;
3115
3116 r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3117 if (r != 0) goto err;
3118
3119 id2 = GIMMICK_(ns[1])->id;
3120
3121 r = node_new_true_anychar(&ns[3], env);
3122 if (r != 0) goto err;
3123
3124 possessive = 1;
3125 r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
3126 possessive, is_range_cutter, env);
3127 if (r != 0) goto err;
3128
3129 ns[3] = NULL_NODE;
3130 ns[5] = NULL_NODE;
3131
3132 r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3133 if (r != 0) goto err;
3134
3135 if (is_range_cutter != 0) {
3136 x = make_list(4, ns);
3137 if (IS_NULL(x)) goto err0;
3138 }
3139 else {
3140 r = make_absent_tail(&ns[5], &ns[6], id1, env);
3141 if (r != 0) goto err;
3142
3143 x = make_list(7, ns);
3144 if (IS_NULL(x)) goto err0;
3145 }
3146
3147 *node = x;
3148 return ONIG_NORMAL;
3149
3150 err0:
3151 r = ONIGERR_MEMORY;
3152 err:
3153 for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3154 return r;
3155 }
3156
3157 extern int
3158 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3159 {
3160 int addlen = (int )(end - s);
3161
3162 if (addlen > 0) {
3163 int len = (int )(STR_(node)->end - STR_(node)->s);
3164
3165 if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
3166 UChar* p;
3167 int capa = len + addlen + NODE_STRING_MARGIN;
3168
3169 if (capa <= STR_(node)->capacity) {
3170 onig_strcpy(STR_(node)->s + len, s, end);
3171 }
3172 else {
3173 if (STR_(node)->s == STR_(node)->buf)
3174 p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3175 s, end, capa);
3176 else
3177 p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capacity);
3178
3179 CHECK_NULL_RETURN_MEMERR(p);
3180 STR_(node)->s = p;
3181 STR_(node)->capacity = capa;
3182 }
3183 }
3184 else {
3185 onig_strcpy(STR_(node)->s + len, s, end);
3186 }
3187 STR_(node)->end = STR_(node)->s + len + addlen;
3188 }
3189
3190 return 0;
3191 }
3192
3193 extern int
3194 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
3195 {
3196 onig_node_str_clear(node);
3197 return onig_node_str_cat(node, s, end);
3198 }
3199
3200 static int
3201 node_str_cat_char(Node* node, UChar c)
3202 {
3203 UChar s[1];
3204
3205 s[0] = c;
3206 return onig_node_str_cat(node, s, s + 1);
3207 }
3208
3209 extern void
3210 onig_node_conv_to_str_node(Node* node, int flag)
3211 {
3212 NODE_SET_TYPE(node, NODE_STRING);
3213 STR_(node)->flag = flag;
3214 STR_(node)->capacity = 0;
3215 STR_(node)->s = STR_(node)->buf;
3216 STR_(node)->end = STR_(node)->buf;
3217 }
3218
3219 extern void
3220 onig_node_str_clear(Node* node)
3221 {
3222 if (STR_(node)->capacity != 0 &&
3223 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3224 xfree(STR_(node)->s);
3225 }
3226
3227 STR_(node)->capacity = 0;
3228 STR_(node)->flag = 0;
3229 STR_(node)->s = STR_(node)->buf;
3230 STR_(node)->end = STR_(node)->buf;
3231 }
3232
3233 static Node*
3234 node_new_str(const UChar* s, const UChar* end)
3235 {
3236 Node* node = node_new();
3237 CHECK_NULL_RETURN(node);
3238
3239 NODE_SET_TYPE(node, NODE_STRING);
3240 STR_(node)->capacity = 0;
3241 STR_(node)->flag = 0;
3242 STR_(node)->s = STR_(node)->buf;
3243 STR_(node)->end = STR_(node)->buf;
3244 if (onig_node_str_cat(node, s, end)) {
3245 onig_node_free(node);
3246 return NULL;
3247 }
3248 return node;
3249 }
3250
3251 extern Node*
3252 onig_node_new_str(const UChar* s, const UChar* end)
3253 {
3254 return node_new_str(s, end);
3255 }
3256
3257 static Node*
3258 node_new_str_raw(UChar* s, UChar* end)
3259 {
3260 Node* node = node_new_str(s, end);
3261 CHECK_NULL_RETURN(node);
3262 NODE_STRING_SET_RAW(node);
3263 return node;
3264 }
3265
3266 static Node*
3267 node_new_empty(void)
3268 {
3269 return node_new_str(NULL, NULL);
3270 }
3271
3272 static Node*
3273 node_new_str_raw_char(UChar c)
3274 {
3275 int i;
3276 UChar p[1];
3277 Node* node;
3278
3279 p[0] = c;
3280 node = node_new_str_raw(p, p + 1);
3281 CHECK_NULL_RETURN(node);
3282
3283 /* clear buf tail */
3284 for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
3285 STR_(node)->buf[i] = '\0';
3286
3287 return node;
3288 }
3289
3290 static Node*
3291 str_node_split_last_char(Node* node, OnigEncoding enc)
3292 {
3293 const UChar *p;
3294 Node* rn;
3295 StrNode* sn;
3296
3297 sn = STR_(node);
3298 rn = NULL_NODE;
3299 if (sn->end > sn->s) {
3300 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3301 if (p && p > sn->s) { /* can be split. */
3302 rn = node_new_str(p, sn->end);
3303 CHECK_NULL_RETURN(rn);
3304 if (NODE_STRING_IS_RAW(node))
3305 NODE_STRING_SET_RAW(rn);
3306
3307 sn->end = (UChar* )p;
3308 }
3309 }
3310 return rn;
3311 }
3312
3313 static int
3314 str_node_can_be_split(Node* node, OnigEncoding enc)
3315 {
3316 StrNode* sn = STR_(node);
3317 if (sn->end > sn->s) {
3318 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
3319 }
3320 return 0;
3321 }
3322
3323 extern int
3324 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
3325 {
3326 unsigned int num, val;
3327 OnigCodePoint c;
3328 UChar* p = *src;
3329 PFETCH_READY;
3330
3331 num = 0;
3332 while (! PEND) {
3333 PFETCH(c);
3334 if (IS_CODE_DIGIT_ASCII(enc, c)) {
3335 val = (unsigned int )DIGITVAL(c);
3336 if ((INT_MAX_LIMIT - val) / 10UL < num)
3337 return -1; /* overflow */
3338
3339 num = num * 10 + val;
3340 }
3341 else {
3342 PUNFETCH;
3343 break;
3344 }
3345 }
3346 *src = p;
3347 return num;
3348 }
3349
3350 static int
3351 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
3352 int maxlen, OnigEncoding enc)
3353 {
3354 OnigCodePoint c;
3355 unsigned int num, val;
3356 int n;
3357 UChar* p = *src;
3358 PFETCH_READY;
3359
3360 num = 0;
3361 n = 0;
3362 while (! PEND && n < maxlen) {
3363 PFETCH(c);
3364 if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3365 n++;
3366 val = (unsigned int )XDIGITVAL(enc,c);
3367 if ((INT_MAX_LIMIT - val) / 16UL < num)
3368 return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3369
3370 num = (num << 4) + XDIGITVAL(enc,c);
3371 }
3372 else {
3373 PUNFETCH;
3374 break;
3375 }
3376 }
3377
3378 if (n < minlen)
3379 return ONIGERR_INVALID_CODE_POINT_VALUE;
3380
3381 *src = p;
3382 return num;
3383 }
3384
3385 static int
3386 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
3387 OnigEncoding enc)
3388 {
3389 OnigCodePoint c;
3390 unsigned int num, val;
3391 UChar* p = *src;
3392 PFETCH_READY;
3393
3394 num = 0;
3395 while (! PEND && maxlen-- != 0) {
3396 PFETCH(c);
3397 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3398 val = ODIGITVAL(c);
3399 if ((INT_MAX_LIMIT - val) / 8UL < num)
3400 return -1; /* overflow */
3401
3402 num = (num << 3) + val;
3403 }
3404 else {
3405 PUNFETCH;
3406 break;
3407 }
3408 }
3409 *src = p;
3410 return num;
3411 }
3412
3413
3414 #define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3415 BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3416
3417 /* data format:
3418 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3419 (all data size is OnigCodePoint)
3420 */
3421 static int
3422 new_code_range(BBuf** pbuf)
3423 {
3424 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
3425 int r;
3426 OnigCodePoint n;
3427 BBuf* bbuf;
3428
3429 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3430 CHECK_NULL_RETURN_MEMERR(bbuf);
3431 r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3432 if (r != 0) {
3433 xfree(bbuf);
3434 *pbuf = 0;
3435 return r;
3436 }
3437
3438 n = 0;
3439 BB_WRITE_CODE_POINT(bbuf, 0, n);
3440 return 0;
3441 }
3442
3443 static int
3444 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3445 {
3446 int r, inc_n, pos;
3447 int low, high, bound, x;
3448 OnigCodePoint n, *data;
3449 BBuf* bbuf;
3450
3451 if (from > to) {
3452 n = from; from = to; to = n;
3453 }
3454
3455 if (IS_NULL(*pbuf)) {
3456 r = new_code_range(pbuf);
3457 if (r != 0) return r;
3458 bbuf = *pbuf;
3459 n = 0;
3460 }
3461 else {
3462 bbuf = *pbuf;
3463 GET_CODE_POINT(n, bbuf->p);
3464 }
3465 data = (OnigCodePoint* )(bbuf->p);
3466 data++;
3467
3468 for (low = 0, bound = n; low < bound; ) {
3469 x = (low + bound) >> 1;
3470 if (from > data[x*2 + 1])
3471 low = x + 1;
3472 else
3473 bound = x;
3474 }
3475
3476 high = (to == ~((OnigCodePoint )0)) ? n : low;
3477 for (bound = n; high < bound; ) {
3478 x = (high + bound) >> 1;
3479 if (to + 1 >= data[x*2])
3480 high = x + 1;
3481 else
3482 bound = x;
3483 }
3484
3485 inc_n = low + 1 - high;
3486 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3487 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3488
3489 if (inc_n != 1) {
3490 if (from > data[low*2])
3491 from = data[low*2];
3492 if (to < data[(high - 1)*2 + 1])
3493 to = data[(high - 1)*2 + 1];
3494 }
3495
3496 if (inc_n != 0 && (OnigCodePoint )high < n) {
3497 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3498 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3499 int size = (n - high) * 2 * SIZE_CODE_POINT;
3500
3501 if (inc_n > 0) {
3502 BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3503 }
3504 else {
3505 BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3506 }
3507 }
3508
3509 pos = SIZE_CODE_POINT * (1 + low * 2);
3510 BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3511 BB_WRITE_CODE_POINT(bbuf, pos, from);
3512 BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3513 n += inc_n;
3514 BB_WRITE_CODE_POINT(bbuf, 0, n);
3515
3516 return 0;
3517 }
3518
3519 static int
3520 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
3521 {
3522 if (from > to) {
3523 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3524 return 0;
3525 else
3526 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3527 }
3528
3529 return add_code_range_to_buf(pbuf, from, to);
3530 }
3531
3532 static int
3533 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3534 {
3535 int r, i, n;
3536 OnigCodePoint pre, from, *data, to = 0;
3537
3538 *pbuf = (BBuf* )NULL;
3539 if (IS_NULL(bbuf)) {
3540 set_all:
3541 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3542 }
3543
3544 data = (OnigCodePoint* )(bbuf->p);
3545 GET_CODE_POINT(n, data);
3546 data++;
3547 if (n <= 0) goto set_all;
3548
3549 r = 0;
3550 pre = MBCODE_START_POS(enc);
3551 for (i = 0; i < n; i++) {
3552 from = data[i*2];
3553 to = data[i*2+1];
3554 if (pre <= from - 1) {
3555 r = add_code_range_to_buf(pbuf, pre, from - 1);
3556 if (r != 0) return r;
3557 }
3558 if (to == ~((OnigCodePoint )0)) break;
3559 pre = to + 1;
3560 }
3561 if (to < ~((OnigCodePoint )0)) {
3562 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3563 }
3564 return r;
3565 }
3566
3567 #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3568 BBuf *tbuf; \
3569 int tnot; \
3570 tnot = not1; not1 = not2; not2 = tnot; \
3571 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3572 } while (0)
3573
3574 static int
3575 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3576 BBuf* bbuf2, int not2, BBuf** pbuf)
3577 {
3578 int r;
3579 OnigCodePoint i, n1, *data1;
3580 OnigCodePoint from, to;
3581
3582 *pbuf = (BBuf* )NULL;
3583 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3584 if (not1 != 0 || not2 != 0)
3585 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3586 return 0;
3587 }
3588
3589 r = 0;
3590 if (IS_NULL(bbuf2))
3591 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3592
3593 if (IS_NULL(bbuf1)) {
3594 if (not1 != 0) {
3595 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3596 }
3597 else {
3598 if (not2 == 0) {
3599 return bbuf_clone(pbuf, bbuf2);
3600 }
3601 else {
3602 return not_code_range_buf(enc, bbuf2, pbuf);
3603 }
3604 }
3605 }
3606
3607 if (not1 != 0)
3608 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3609
3610 data1 = (OnigCodePoint* )(bbuf1->p);
3611 GET_CODE_POINT(n1, data1);
3612 data1++;
3613
3614 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
3615 r = bbuf_clone(pbuf, bbuf2);
3616 }
3617 else if (not1 == 0) { /* 1 OR (not 2) */
3618 r = not_code_range_buf(enc, bbuf2, pbuf);
3619 }
3620 if (r != 0) return r;
3621
3622 for (i = 0; i < n1; i++) {
3623 from = data1[i*2];
3624 to = data1[i*2+1];
3625 r = add_code_range_to_buf(pbuf, from, to);
3626 if (r != 0) return r;
3627 }
3628 return 0;
3629 }
3630
3631 static int
3632 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
3633 OnigCodePoint* data, int n)
3634 {
3635 int i, r;
3636 OnigCodePoint from2, to2;
3637
3638 for (i = 0; i < n; i++) {
3639 from2 = data[i*2];
3640 to2 = data[i*2+1];
3641 if (from2 < from1) {
3642 if (to2 < from1) continue;
3643 else {
3644 from1 = to2 + 1;
3645 }
3646 }
3647 else if (from2 <= to1) {
3648 if (to2 < to1) {
3649 if (from1 <= from2 - 1) {
3650 r = add_code_range_to_buf(pbuf, from1, from2-1);
3651 if (r != 0) return r;
3652 }
3653 from1 = to2 + 1;
3654 }
3655 else {
3656 to1 = from2 - 1;
3657 }
3658 }
3659 else {
3660 from1 = from2;
3661 }
3662 if (from1 > to1) break;
3663 }
3664 if (from1 <= to1) {
3665 r = add_code_range_to_buf(pbuf, from1, to1);
3666 if (r != 0) return r;
3667 }
3668 return 0;
3669 }
3670
3671 static int
3672 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
3673 {
3674 int r;
3675 OnigCodePoint i, j, n1, n2, *data1, *data2;
3676 OnigCodePoint from, to, from1, to1, from2, to2;
3677
3678 *pbuf = (BBuf* )NULL;
3679 if (IS_NULL(bbuf1)) {
3680 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
3681 return bbuf_clone(pbuf, bbuf2);
3682 return 0;
3683 }
3684 else if (IS_NULL(bbuf2)) {
3685 if (not2 != 0)
3686 return bbuf_clone(pbuf, bbuf1);
3687 return 0;
3688 }
3689
3690 if (not1 != 0)
3691 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
3692
3693 data1 = (OnigCodePoint* )(bbuf1->p);
3694 data2 = (OnigCodePoint* )(bbuf2->p);
3695 GET_CODE_POINT(n1, data1);
3696 GET_CODE_POINT(n2, data2);
3697 data1++;
3698 data2++;
3699
3700 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
3701 for (i = 0; i < n1; i++) {
3702 from1 = data1[i*2];
3703 to1 = data1[i*2+1];
3704 for (j = 0; j < n2; j++) {
3705 from2 = data2[j*2];
3706 to2 = data2[j*2+1];
3707 if (from2 > to1) break;
3708 if (to2 < from1) continue;
3709 from = MAX(from1, from2);
3710 to = MIN(to1, to2);
3711 r = add_code_range_to_buf(pbuf, from, to);
3712 if (r != 0) return r;
3713 }
3714 }
3715 }
3716 else if (not1 == 0) { /* 1 AND (not 2) */
3717 for (i = 0; i < n1; i++) {
3718 from1 = data1[i*2];
3719 to1 = data1[i*2+1];
3720 r = and_code_range1(pbuf, from1, to1, data2, n2);
3721 if (r != 0) return r;
3722 }
3723 }
3724
3725 return 0;
3726 }
3727
3728 static int
3729 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
3730 {
3731 int r, not1, not2;
3732 BBuf *buf1, *buf2, *pbuf;
3733 BitSetRef bsr1, bsr2;
3734 BitSet bs1, bs2;
3735
3736 not1 = IS_NCCLASS_NOT(dest);
3737 bsr1 = dest->bs;
3738 buf1 = dest->mbuf;
3739 not2 = IS_NCCLASS_NOT(cc);
3740 bsr2 = cc->bs;
3741 buf2 = cc->mbuf;
3742
3743 if (not1 != 0) {
3744 bitset_invert_to(bsr1, bs1);
3745 bsr1 = bs1;
3746 }
3747 if (not2 != 0) {
3748 bitset_invert_to(bsr2, bs2);
3749 bsr2 = bs2;
3750 }
3751 bitset_and(bsr1, bsr2);
3752 if (bsr1 != dest->bs) {
3753 bitset_copy(dest->bs, bsr1);
3754 }
3755 if (not1 != 0) {
3756 bitset_invert(dest->bs);
3757 }
3758
3759 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
3760 if (not1 != 0 && not2 != 0) {
3761 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
3762 }
3763 else {
3764 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
3765 if (r == 0 && not1 != 0) {
3766 BBuf *tbuf;
3767 r = not_code_range_buf(enc, pbuf, &tbuf);
3768 if (r != 0) {
3769 bbuf_free(pbuf);
3770 return r;
3771 }
3772 bbuf_free(pbuf);
3773 pbuf = tbuf;
3774 }
3775 }
3776 if (r != 0) return r;
3777
3778 dest->mbuf = pbuf;
3779 bbuf_free(buf1);
3780 return r;
3781 }
3782 return 0;
3783 }
3784
3785 static int
3786 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
3787 {
3788 int r, not1, not2;
3789 BBuf *buf1, *buf2, *pbuf;
3790 BitSetRef bsr1, bsr2;
3791 BitSet bs1, bs2;
3792
3793 not1 = IS_NCCLASS_NOT(dest);
3794 bsr1 = dest->bs;
3795 buf1 = dest->mbuf;
3796 not2 = IS_NCCLASS_NOT(cc);
3797 bsr2 = cc->bs;
3798 buf2 = cc->mbuf;
3799
3800 if (not1 != 0) {
3801 bitset_invert_to(bsr1, bs1);
3802 bsr1 = bs1;
3803 }
3804 if (not2 != 0) {
3805 bitset_invert_to(bsr2, bs2);
3806 bsr2 = bs2;
3807 }
3808 bitset_or(bsr1, bsr2);
3809 if (bsr1 != dest->bs) {
3810 bitset_copy(dest->bs, bsr1);
3811 }
3812 if (not1 != 0) {
3813 bitset_invert(dest->bs);
3814 }
3815
3816 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
3817 if (not1 != 0 && not2 != 0) {
3818 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
3819 }
3820 else {
3821 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
3822 if (r == 0 && not1 != 0) {
3823 BBuf *tbuf;
3824 r = not_code_range_buf(enc, pbuf, &tbuf);
3825 if (r != 0) {
3826 bbuf_free(pbuf);
3827 return r;
3828 }
3829 bbuf_free(pbuf);
3830 pbuf = tbuf;
3831 }
3832 }
3833 if (r != 0) return r;
3834
3835 dest->mbuf = pbuf;
3836 bbuf_free(buf1);
3837 return r;
3838 }
3839 else
3840 return 0;
3841 }
3842
3843 static OnigCodePoint
3844 conv_backslash_value(OnigCodePoint c, ScanEnv* env)
3845 {
3846 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
3847 switch (c) {
3848 case 'n': return '\n';
3849 case 't': return '\t';
3850 case 'r': return '\r';
3851 case 'f': return '\f';
3852 case 'a': return '\007';
3853 case 'b': return '\010';
3854 case 'e': return '\033';
3855 case 'v':
3856 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
3857 return '\v';
3858 break;
3859
3860 default:
3861 break;
3862 }
3863 }
3864 return c;
3865 }
3866
3867 static int
3868 is_invalid_quantifier_target(Node* node)
3869 {
3870 switch (NODE_TYPE(node)) {
3871 case NODE_ANCHOR:
3872 case NODE_GIMMICK:
3873 return 1;
3874 break;
3875
3876 case NODE_BAG:
3877 /* allow enclosed elements */
3878 /* return is_invalid_quantifier_target(NODE_BODY(node)); */
3879 break;
3880
3881 case NODE_LIST:
3882 do {
3883 if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;
3884 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3885 return 0;
3886 break;
3887
3888 case NODE_ALT:
3889 do {
3890 if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;
3891 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3892 break;
3893
3894 default:
3895 break;
3896 }
3897 return 0;
3898 }
3899
3900 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
3901 static int
3902 quantifier_type_num(QuantNode* q)
3903 {
3904 if (q->greedy) {
3905 if (q->lower == 0) {
3906 if (q->upper == 1) return 0;
3907 else if (IS_INFINITE_REPEAT(q->upper)) return 1;
3908 }
3909 else if (q->lower == 1) {
3910 if (IS_INFINITE_REPEAT(q->upper)) return 2;
3911 }
3912 }
3913 else {
3914 if (q->lower == 0) {
3915 if (q->upper == 1) return 3;
3916 else if (IS_INFINITE_REPEAT(q->upper)) return 4;
3917 }
3918 else if (q->lower == 1) {
3919 if (IS_INFINITE_REPEAT(q->upper)) return 5;
3920 }
3921 }
3922 return -1;
3923 }
3924
3925
3926 enum ReduceType {
3927 RQ_ASIS = 0, /* as is */
3928 RQ_DEL = 1, /* delete parent */
3929 RQ_A, /* to '*' */
3930 RQ_AQ, /* to '*?' */
3931 RQ_QQ, /* to '??' */
3932 RQ_P_QQ, /* to '+)??' */
3933 RQ_PQ_Q /* to '+?)?' */
3934 };
3935
3936 static enum ReduceType ReduceTypeTable[6][6] = {
3937 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
3938 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
3939 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
3940 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
3941 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
3942 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
3943 };
3944
3945 extern void
3946 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
3947 {
3948 int pnum, cnum;
3949 QuantNode *p, *c;
3950
3951 p = QUANT_(pnode);
3952 c = QUANT_(cnode);
3953 pnum = quantifier_type_num(p);
3954 cnum = quantifier_type_num(c);
3955 if (pnum < 0 || cnum < 0) {
3956 if ((p->lower == p->upper) && ! IS_INFINITE_REPEAT(p->upper)) {
3957 if ((c->lower == c->upper) && ! IS_INFINITE_REPEAT(c->upper)) {
3958 int n = onig_positive_int_multiply(p->lower, c->lower);
3959 if (n >= 0) {
3960 p->lower = p->upper = n;
3961 NODE_BODY(pnode) = NODE_BODY(cnode);
3962 goto remove_cnode;
3963 }
3964 }
3965 }
3966
3967 return ;
3968 }
3969
3970 switch(ReduceTypeTable[cnum][pnum]) {
3971 case RQ_DEL:
3972 *pnode = *cnode;
3973 break;
3974 case RQ_A:
3975 NODE_BODY(pnode) = NODE_BODY(cnode);
3976 p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 1;
3977 break;
3978 case RQ_AQ:
3979 NODE_BODY(pnode) = NODE_BODY(cnode);
3980 p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 0;
3981 break;
3982 case RQ_QQ:
3983 NODE_BODY(pnode) = NODE_BODY(cnode);
3984 p->lower = 0; p->upper = 1; p->greedy = 0;
3985 break;
3986 case RQ_P_QQ:
3987 NODE_BODY(pnode) = cnode;
3988 p->lower = 0; p->upper = 1; p->greedy = 0;
3989 c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 1;
3990 return ;
3991 break;
3992 case RQ_PQ_Q:
3993 NODE_BODY(pnode) = cnode;
3994 p->lower = 0; p->upper = 1; p->greedy = 1;
3995 c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 0;
3996 return ;
3997 break;
3998 case RQ_ASIS:
3999 NODE_BODY(pnode) = cnode;
4000 return ;
4001 break;
4002 }
4003
4004 remove_cnode:
4005 NODE_BODY(cnode) = NULL_NODE;
4006 onig_node_free(cnode);
4007 }
4008
4009 static int
4010 node_new_general_newline(Node** node, ScanEnv* env)
4011 {
4012 int r;
4013 int dlen, alen;
4014 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
4015 Node* crnl;
4016 Node* ncc;
4017 Node* x;
4018 CClassNode* cc;
4019
4020 dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
4021 if (dlen < 0) return dlen;
4022 alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
4023 if (alen < 0) return alen;
4024
4025 crnl = node_new_str_raw(buf, buf + dlen + alen);
4026 CHECK_NULL_RETURN_MEMERR(crnl);
4027
4028 ncc = node_new_cclass();
4029 if (IS_NULL(ncc)) goto err2;
4030
4031 cc = CCLASS_(ncc);
4032 if (dlen == 1) {
4033 bitset_set_range(cc->bs, 0x0a, 0x0d);
4034 }
4035 else {
4036 r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);
4037 if (r != 0) {
4038 err1:
4039 onig_node_free(ncc);
4040 err2:
4041 onig_node_free(crnl);
4042 return ONIGERR_MEMORY;
4043 }
4044 }
4045
4046 if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
4047 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
4048 if (r != 0) goto err1;
4049 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
4050 if (r != 0) goto err1;
4051 }
4052
4053 x = node_new_bag_if_else(crnl, 0, ncc);
4054 if (IS_NULL(x)) goto err1;
4055
4056 *node = x;
4057 return 0;
4058 }
4059
4060 enum TokenSyms {
4061 TK_EOT = 0, /* end of token */
4062 TK_RAW_BYTE = 1,
4063 TK_CHAR,
4064 TK_STRING,
4065 TK_CODE_POINT,
4066 TK_ANYCHAR,
4067 TK_CHAR_TYPE,
4068 TK_BACKREF,
4069 TK_CALL,
4070 TK_ANCHOR,
4071 TK_REPEAT,
4072 TK_INTERVAL,
4073 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
4074 TK_ALT,
4075 TK_SUBEXP_OPEN,
4076 TK_SUBEXP_CLOSE,
4077 TK_CC_OPEN,
4078 TK_QUOTE_OPEN,
4079 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
4080 TK_KEEP, /* \K */
4081 TK_GENERAL_NEWLINE, /* \R */
4082 TK_NO_NEWLINE, /* \N */
4083 TK_TRUE_ANYCHAR, /* \O */
4084 TK_TEXT_SEGMENT, /* \X */
4085
4086 /* in cc */
4087 TK_CC_CLOSE,
4088 TK_CC_RANGE,
4089 TK_POSIX_BRACKET_OPEN,
4090 TK_CC_AND, /* && */
4091 TK_CC_CC_OPEN /* [ */
4092 };
4093
4094 typedef struct {
4095 enum TokenSyms type;
4096 int escaped;
4097 int base; /* is number: 8, 16 (used in [....]) */
4098 UChar* backp;
4099 union {
4100 UChar* s;
4101 int c;
4102 OnigCodePoint code;
4103 int anchor;
4104 int subtype;
4105 struct {
4106 int lower;
4107 int upper;
4108 int greedy;
4109 int possessive;
4110 } repeat;
4111 struct {
4112 int num;
4113 int ref1;
4114 int* refs;
4115 int by_name;
4116 #ifdef USE_BACKREF_WITH_LEVEL
4117 int exist_level;
4118 int level; /* \k<name+n> */
4119 #endif
4120 } backref;
4121 struct {
4122 UChar* name;
4123 UChar* name_end;
4124 int gnum;
4125 int by_number;
4126 } call;
4127 struct {
4128 int ctype;
4129 int not;
4130 } prop;
4131 } u;
4132 } PToken;
4133
4134
4135 static int
4136 fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
4137 {
4138 int low, up, syn_allow, non_low = 0;
4139 int r = 0;
4140 OnigCodePoint c;
4141 OnigEncoding enc = env->enc;
4142 UChar* p = *src;
4143 PFETCH_READY;
4144
4145 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4146
4147 if (PEND) {
4148 if (syn_allow)
4149 return 1; /* "....{" : OK! */
4150 else
4151 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
4152 }
4153
4154 if (! syn_allow) {
4155 c = PPEEK;
4156 if (c == ')' || c == '(' || c == '|') {
4157 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4158 }
4159 }
4160
4161 low = onig_scan_unsigned_number(&p, end, env->enc);
4162 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4163 if (low > ONIG_MAX_REPEAT_NUM)
4164 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4165
4166 if (p == *src) { /* can't read low */
4167 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4168 /* allow {,n} as {0,n} */
4169 low = 0;
4170 non_low = 1;
4171 }
4172 else
4173 goto invalid;
4174 }
4175
4176 if (PEND) goto invalid;
4177 PFETCH(c);
4178 if (c == ',') {
4179 UChar* prev = p;
4180 up = onig_scan_unsigned_number(&p, end, env->enc);
4181 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4182 if (up > ONIG_MAX_REPEAT_NUM)
4183 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4184
4185 if (p == prev) {
4186 if (non_low != 0)
4187 goto invalid;
4188 up = INFINITE_REPEAT; /* {n,} : {n,infinite} */
4189 }
4190 }
4191 else {
4192 if (non_low != 0)
4193 goto invalid;
4194
4195 PUNFETCH;
4196 up = low; /* {n} : exact n times */
4197 r = 2; /* fixed */
4198 }
4199
4200 if (PEND) goto invalid;
4201 PFETCH(c);
4202 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4203 if (c != MC_ESC(env->syntax)) goto invalid;
4204 PFETCH(c);
4205 }
4206 if (c != '}') goto invalid;
4207
4208 if (!IS_INFINITE_REPEAT(up) && low > up) {
4209 /* {n,m}+ supported case */
4210 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
4211 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4212
4213 tok->u.repeat.possessive = 1;
4214 {
4215 int tmp;
4216 tmp = low; low = up; up = tmp;
4217 }
4218 }
4219 else
4220 tok->u.repeat.possessive = 0;
4221
4222 tok->type = TK_INTERVAL;
4223 tok->u.repeat.lower = low;
4224 tok->u.repeat.upper = up;
4225 *src = p;
4226 return r; /* 0: normal {n,m}, 2: fixed {n} */
4227
4228 invalid:
4229 if (syn_allow) {
4230 /* *src = p; */ /* !!! Don't do this line !!! */
4231 return 1; /* OK */
4232 }
4233 else
4234 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4235 }
4236
4237 /* \M-, \C-, \c, or \... */
4238 static int
4239 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
4240 {
4241 int v;
4242 OnigCodePoint c;
4243 OnigEncoding enc = env->enc;
4244 UChar* p = *src;
4245
4246 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4247
4248 PFETCH_S(c);
4249 switch (c) {
4250 case 'M':
4251 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4252 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4253 PFETCH_S(c);
4254 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4255 if (PEND) return ONIGERR_END_PATTERN_AT_META;
4256 PFETCH_S(c);
4257 if (c == MC_ESC(env->syntax)) {
4258 v = fetch_escaped_value(&p, end, env, &c);
4259 if (v < 0) return v;
4260 }
4261 c = ((c & 0xff) | 0x80);
4262 }
4263 else
4264 goto backslash;
4265 break;
4266
4267 case 'C':
4268 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4269 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4270 PFETCH_S(c);
4271 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4272 goto control;
4273 }
4274 else
4275 goto backslash;
4276
4277 case 'c':
4278 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4279 control:
4280 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4281 PFETCH_S(c);
4282 if (c == '?') {
4283 c = 0177;
4284 }
4285 else {
4286 if (c == MC_ESC(env->syntax)) {
4287 v = fetch_escaped_value(&p, end, env, &c);
4288 if (v < 0) return v;
4289 }
4290 c &= 0x9f;
4291 }
4292 break;
4293 }
4294 /* fall through */
4295
4296 default:
4297 {
4298 backslash:
4299 c = conv_backslash_value(c, env);
4300 }
4301 break;
4302 }
4303
4304 *src = p;
4305 *val = c;
4306 return 0;
4307 }
4308
4309 static int fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env);
4310
4311 static OnigCodePoint
4312 get_name_end_code_point(OnigCodePoint start)
4313 {
4314 switch (start) {
4315 case '<': return (OnigCodePoint )'>'; break;
4316 case '\'': return (OnigCodePoint )'\''; break;
4317 case '(': return (OnigCodePoint )')'; break;
4318 default:
4319 break;
4320 }
4321
4322 return (OnigCodePoint )0;
4323 }
4324
4325 enum REF_NUM {
4326 IS_NOT_NUM = 0,
4327 IS_ABS_NUM = 1,
4328 IS_REL_NUM = 2
4329 };
4330
4331 #ifdef USE_BACKREF_WITH_LEVEL
4332 /*
4333 \k<name+n>, \k<name-n>
4334 \k<num+n>, \k<num-n>
4335 \k<-num+n>, \k<-num-n>
4336 \k<+num+n>, \k<+num-n>
4337 */
4338 static int
4339 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4340 UChar** rname_end, ScanEnv* env,
4341 int* rback_num, int* rlevel, enum REF_NUM* num_type)
4342 {
4343 int r, sign, exist_level;
4344 int digit_count;
4345 OnigCodePoint end_code;
4346 OnigCodePoint c = 0;
4347 OnigEncoding enc = env->enc;
4348 UChar *name_end;
4349 UChar *pnum_head;
4350 UChar *p = *src;
4351 PFETCH_READY;
4352
4353 *rback_num = 0;
4354 exist_level = 0;
4355 *num_type = IS_NOT_NUM;
4356 sign = 1;
4357 pnum_head = *src;
4358
4359 end_code = get_name_end_code_point(start_code);
4360
4361 digit_count = 0;
4362 name_end = end;
4363 r = 0;
4364 if (PEND) {
4365 return ONIGERR_EMPTY_GROUP_NAME;
4366 }
4367 else {
4368 PFETCH(c);
4369 if (c == end_code)
4370 return ONIGERR_EMPTY_GROUP_NAME;
4371
4372 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4373 *num_type = IS_ABS_NUM;
4374 digit_count++;
4375 }
4376 else if (c == '-') {
4377 *num_type = IS_REL_NUM;
4378 sign = -1;
4379 pnum_head = p;
4380 }
4381 else if (c == '+') {
4382 *num_type = IS_REL_NUM;
4383 sign = 1;
4384 pnum_head = p;
4385 }
4386 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4387 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4388 }
4389 }
4390
4391 while (!PEND) {
4392 name_end = p;
4393 PFETCH(c);
4394 if (c == end_code || c == ')' || c == '+' || c == '-') {
4395 if (*num_type != IS_NOT_NUM && digit_count == 0)
4396 r = ONIGERR_INVALID_GROUP_NAME;
4397 break;
4398 }
4399
4400 if (*num_type != IS_NOT_NUM) {
4401 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4402 digit_count++;
4403 }
4404 else {
4405 r = ONIGERR_INVALID_GROUP_NAME;
4406 *num_type = IS_NOT_NUM;
4407 }
4408 }
4409 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4410 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4411 }
4412 }
4413
4414 if (r == 0 && c != end_code) {
4415 if (c == '+' || c == '-') {
4416 int level;
4417 int flag = (c == '-' ? -1 : 1);
4418
4419 if (PEND) {
4420 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4421 goto end;
4422 }
4423 PFETCH(c);
4424 if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4425 PUNFETCH;
4426 level = onig_scan_unsigned_number(&p, end, enc);
4427 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4428 *rlevel = (level * flag);
4429 exist_level = 1;
4430
4431 if (!PEND) {
4432 PFETCH(c);
4433 if (c == end_code)
4434 goto end;
4435 }
4436 }
4437
4438 err:
4439 name_end = end;
4440 err2:
4441 r = ONIGERR_INVALID_GROUP_NAME;
4442 }
4443
4444 end:
4445 if (r == 0) {
4446 if (*num_type != IS_NOT_NUM) {
4447 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
4448 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4449 else if (*rback_num == 0) {
4450 if (*num_type == IS_REL_NUM)
4451 goto err2;
4452 }
4453
4454 *rback_num *= sign;
4455 }
4456
4457 *rname_end = name_end;
4458 *src = p;
4459 return (exist_level ? 1 : 0);
4460 }
4461 else {
4462 onig_scan_env_set_error_string(env, r, *src, name_end);
4463 return r;
4464 }
4465 }
4466 #endif /* USE_BACKREF_WITH_LEVEL */
4467
4468 /*
4469 ref: 0 -> define name (don't allow number name)
4470 1 -> reference name (allow number name)
4471 */
4472 static int
4473 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4474 UChar** rname_end, ScanEnv* env, int* rback_num,
4475 enum REF_NUM* num_type, int ref)
4476 {
4477 int r, sign;
4478 int digit_count;
4479 OnigCodePoint end_code;
4480 OnigCodePoint c = 0;
4481 OnigEncoding enc = env->enc;
4482 UChar *name_end;
4483 UChar *pnum_head;
4484 UChar *p = *src;
4485
4486 *rback_num = 0;
4487
4488 end_code = get_name_end_code_point(start_code);
4489
4490 digit_count = 0;
4491 name_end = end;
4492 pnum_head = *src;
4493 r = 0;
4494 *num_type = IS_NOT_NUM;
4495 sign = 1;
4496 if (PEND) {
4497 return ONIGERR_EMPTY_GROUP_NAME;
4498 }
4499 else {
4500 PFETCH_S(c);
4501 if (c == end_code)
4502 return ONIGERR_EMPTY_GROUP_NAME;
4503
4504 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4505 if (ref == 1)
4506 *num_type = IS_ABS_NUM;
4507 else {
4508 r = ONIGERR_INVALID_GROUP_NAME;
4509 }
4510 digit_count++;
4511 }
4512 else if (c == '-') {
4513 if (ref == 1) {
4514 *num_type = IS_REL_NUM;
4515 sign = -1;
4516 pnum_head = p;
4517 }
4518 else {
4519 r = ONIGERR_INVALID_GROUP_NAME;
4520 }
4521 }
4522 else if (c == '+') {
4523 if (ref == 1) {
4524 *num_type = IS_REL_NUM;
4525 sign = 1;
4526 pnum_head = p;
4527 }
4528 else {
4529 r = ONIGERR_INVALID_GROUP_NAME;
4530 }
4531 }
4532 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4533 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4534 }
4535 }
4536
4537 if (r == 0) {
4538 while (!PEND) {
4539 name_end = p;
4540 PFETCH_S(c);
4541 if (c == end_code || c == ')') {
4542 if (*num_type != IS_NOT_NUM && digit_count == 0)
4543 r = ONIGERR_INVALID_GROUP_NAME;
4544 break;
4545 }
4546
4547 if (*num_type != IS_NOT_NUM) {
4548 if (IS_CODE_DIGIT_ASCII(enc, c)) {
4549 digit_count++;
4550 }
4551 else {
4552 if (!ONIGENC_IS_CODE_WORD(enc, c))
4553 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4554 else
4555 r = ONIGERR_INVALID_GROUP_NAME;
4556
4557 *num_type = IS_NOT_NUM;
4558 }
4559 }
4560 else {
4561 if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4562 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4563 }
4564 }
4565 }
4566
4567 if (c != end_code) {
4568 r = ONIGERR_INVALID_GROUP_NAME;
4569 goto err;
4570 }
4571
4572 if (*num_type != IS_NOT_NUM) {
4573 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
4574 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4575 else if (*rback_num == 0) {
4576 if (*num_type == IS_REL_NUM) {
4577 r = ONIGERR_INVALID_GROUP_NAME;
4578 goto err;
4579 }
4580 }
4581
4582 *rback_num *= sign;
4583 }
4584
4585 *rname_end = name_end;
4586 *src = p;
4587 return 0;
4588 }
4589 else {
4590 while (!PEND) {
4591 name_end = p;
4592 PFETCH_S(c);
4593 if (c == end_code || c == ')')
4594 break;
4595 }
4596 if (PEND)
4597 name_end = end;
4598
4599 err:
4600 onig_scan_env_set_error_string(env, r, *src, name_end);
4601 return r;
4602 }
4603 }
4604
4605 static void
4606 CC_ESC_WARN(ScanEnv* env, UChar *c)
4607 {
4608 if (onig_warn == onig_null_warn) return ;
4609
4610 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
4611 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
4612 UChar buf[WARN_BUFSIZE];
4613 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
4614 env->pattern, env->pattern_end,
4615 (UChar* )"character class has '%s' without escape",
4616 c);
4617 (*onig_warn)((char* )buf);
4618 }
4619 }
4620
4621 static void
4622 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
4623 {
4624 if (onig_warn == onig_null_warn) return ;
4625
4626 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
4627 UChar buf[WARN_BUFSIZE];
4628 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
4629 (env)->pattern, (env)->pattern_end,
4630 (UChar* )"regular expression has '%s' without escape", c);
4631 (*onig_warn)((char* )buf);
4632 }
4633 }
4634
4635 static UChar*
4636 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
4637 UChar **next, OnigEncoding enc)
4638 {
4639 int i;
4640 OnigCodePoint x;
4641 UChar *q;
4642 UChar *p = from;
4643
4644 while (p < to) {
4645 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4646 q = p + enclen(enc, p);
4647 if (x == s[0]) {
4648 for (i = 1; i < n && q < to; i++) {
4649 x = ONIGENC_MBC_TO_CODE(enc, q, to);
4650 if (x != s[i]) break;
4651 q += enclen(enc, q);
4652 }
4653 if (i >= n) {
4654 if (IS_NOT_NULL(next))
4655 *next = q;
4656 return p;
4657 }
4658 }
4659 p = q;
4660 }
4661 return NULL_UCHARP;
4662 }
4663
4664 static int
4665 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
4666 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
4667 {
4668 int i, in_esc;
4669 OnigCodePoint x;
4670 UChar *q;
4671 UChar *p = from;
4672
4673 in_esc = 0;
4674 while (p < to) {
4675 if (in_esc) {
4676 in_esc = 0;
4677 p += enclen(enc, p);
4678 }
4679 else {
4680 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4681 q = p + enclen(enc, p);
4682 if (x == s[0]) {
4683 for (i = 1; i < n && q < to; i++) {
4684 x = ONIGENC_MBC_TO_CODE(enc, q, to);
4685 if (x != s[i]) break;
4686 q += enclen(enc, q);
4687 }
4688 if (i >= n) return 1;
4689 p += enclen(enc, p);
4690 }
4691 else {
4692 x = ONIGENC_MBC_TO_CODE(enc, p, to);
4693 if (x == bad) return 0;
4694 else if (x == MC_ESC(syn)) in_esc = 1;
4695 p = q;
4696 }
4697 }
4698 }
4699 return 0;
4700 }
4701
4702 static int
4703 fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
4704 {
4705 int num;
4706 OnigCodePoint c, c2;
4707 OnigSyntaxType* syn = env->syntax;
4708 OnigEncoding enc = env->enc;
4709 UChar* prev;
4710 UChar* p = *src;
4711 PFETCH_READY;
4712
4713 if (PEND) {
4714 tok->type = TK_EOT;
4715 return tok->type;
4716 }
4717
4718 PFETCH(c);
4719 tok->type = TK_CHAR;
4720 tok->base = 0;
4721 tok->u.c = c;
4722 tok->escaped = 0;
4723
4724 if (c == ']') {
4725 tok->type = TK_CC_CLOSE;
4726 }
4727 else if (c == '-') {
4728 tok->type = TK_CC_RANGE;
4729 }
4730 else if (c == MC_ESC(syn)) {
4731 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
4732 goto end;
4733
4734 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4735
4736 PFETCH(c);
4737 tok->escaped = 1;
4738 tok->u.c = c;
4739 switch (c) {
4740 case 'w':
4741 tok->type = TK_CHAR_TYPE;
4742 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4743 tok->u.prop.not = 0;
4744 break;
4745 case 'W':
4746 tok->type = TK_CHAR_TYPE;
4747 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
4748 tok->u.prop.not = 1;
4749 break;
4750 case 'd':
4751 tok->type = TK_CHAR_TYPE;
4752 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
4753 tok->u.prop.not = 0;
4754 break;
4755 case 'D':
4756 tok->type = TK_CHAR_TYPE;
4757 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
4758 tok->u.prop.not = 1;
4759 break;
4760 case 's':
4761 tok->type = TK_CHAR_TYPE;
4762 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
4763 tok->u.prop.not = 0;
4764 break;
4765 case 'S':
4766 tok->type = TK_CHAR_TYPE;
4767 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
4768 tok->u.prop.not = 1;
4769 break;
4770 case 'h':
4771 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
4772 tok->type = TK_CHAR_TYPE;
4773 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
4774 tok->u.prop.not = 0;
4775 break;
4776 case 'H':
4777 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
4778 tok->type = TK_CHAR_TYPE;
4779 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
4780 tok->u.prop.not = 1;
4781 break;
4782
4783 case 'p':
4784 case 'P':
4785 if (PEND) break;
4786
4787 c2 = PPEEK;
4788 if (c2 == '{' &&
4789 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
4790 PINC;
4791 tok->type = TK_CHAR_PROPERTY;
4792 tok->u.prop.not = c == 'P';
4793
4794 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
4795 PFETCH(c2);
4796 if (c2 == '^') {
4797 tok->u.prop.not = tok->u.prop.not == 0;
4798 }
4799 else
4800 PUNFETCH;
4801 }
4802 }
4803 break;
4804
4805 case 'o':
4806 if (PEND) break;
4807
4808 prev = p;
4809 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
4810 PINC;
4811 num = scan_unsigned_octal_number(&p, end, 11, enc);
4812 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
4813 if (!PEND) {
4814 c2 = PPEEK;
4815 if (IS_CODE_DIGIT_ASCII(enc, c2))
4816 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
4817 }
4818
4819 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
4820 PINC;
4821 tok->type = TK_CODE_POINT;
4822 tok->base = 8;
4823 tok->u.code = (OnigCodePoint )num;
4824 }
4825 else {
4826 /* can't read nothing or invalid format */
4827 p = prev;
4828 }
4829 }
4830 break;
4831
4832 case 'x':
4833 if (PEND) break;
4834
4835 prev = p;
4836 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
4837 PINC;
4838 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
4839 if (num < 0) {
4840 if (num == ONIGERR_TOO_BIG_NUMBER)
4841 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
4842 else
4843 return num;
4844 }
4845 if (!PEND) {
4846 c2 = PPEEK;
4847 if (IS_CODE_XDIGIT_ASCII(enc, c2))
4848 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
4849 }
4850
4851 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
4852 PINC;
4853 tok->type = TK_CODE_POINT;
4854 tok->base = 16;
4855 tok->u.code = (OnigCodePoint )num;
4856 }
4857 else {
4858 /* can't read nothing or invalid format */
4859 p = prev;
4860 }
4861 }
4862 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
4863 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
4864 if (num < 0) return num;
4865 if (p == prev) { /* can't read nothing. */
4866 num = 0; /* but, it's not error */
4867 }
4868 tok->type = TK_RAW_BYTE;
4869 tok->base = 16;
4870 tok->u.c = num;
4871 }
4872 break;
4873
4874 case 'u':
4875 if (PEND) break;
4876
4877 prev = p;
4878 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
4879 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
4880 if (num < 0) return num;
4881 if (p == prev) { /* can't read nothing. */
4882 num = 0; /* but, it's not error */
4883 }
4884 tok->type = TK_CODE_POINT;
4885 tok->base = 16;
4886 tok->u.code = (OnigCodePoint )num;
4887 }
4888 break;
4889
4890 case '0':
4891 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
4892 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
4893 PUNFETCH;
4894 prev = p;
4895 num = scan_unsigned_octal_number(&p, end, 3, enc);
4896 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
4897 if (p == prev) { /* can't read nothing. */
4898 num = 0; /* but, it's not error */
4899 }
4900 tok->type = TK_RAW_BYTE;
4901 tok->base = 8;
4902 tok->u.c = num;
4903 }
4904 break;
4905
4906 default:
4907 PUNFETCH;
4908 num = fetch_escaped_value(&p, end, env, &c2);
4909 if (num < 0) return num;
4910 if (tok->u.c != c2) {
4911 tok->u.code = c2;
4912 tok->type = TK_CODE_POINT;
4913 }
4914 break;
4915 }
4916 }
4917 else if (c == '[') {
4918 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
4919 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
4920 tok->backp = p; /* point at '[' is read */
4921 PINC;
4922 if (str_exist_check_with_esc(send, 2, p, end,
4923 (OnigCodePoint )']', enc, syn)) {
4924 tok->type = TK_POSIX_BRACKET_OPEN;
4925 }
4926 else {
4927 PUNFETCH;
4928 goto cc_in_cc;
4929 }
4930 }
4931 else {
4932 cc_in_cc:
4933 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
4934 tok->type = TK_CC_CC_OPEN;
4935 }
4936 else {
4937 CC_ESC_WARN(env, (UChar* )"[");
4938 }
4939 }
4940 }
4941 else if (c == '&') {
4942 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
4943 !PEND && (PPEEK_IS('&'))) {
4944 PINC;
4945 tok->type = TK_CC_AND;
4946 }
4947 }
4948
4949 end:
4950 *src = p;
4951 return tok->type;
4952 }
4953
4954 static int
4955 fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
4956 {
4957 int r, num;
4958 OnigCodePoint c;
4959 OnigEncoding enc = env->enc;
4960 OnigSyntaxType* syn = env->syntax;
4961 UChar* prev;
4962 UChar* p = *src;
4963 PFETCH_READY;
4964
4965 start:
4966 if (PEND) {
4967 tok->type = TK_EOT;
4968 return tok->type;
4969 }
4970
4971 tok->type = TK_STRING;
4972 tok->base = 0;
4973 tok->backp = p;
4974
4975 PFETCH(c);
4976 if (IS_MC_ESC_CODE(c, syn)) {
4977 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4978
4979 tok->backp = p;
4980 PFETCH(c);
4981
4982 tok->u.c = c;
4983 tok->escaped = 1;
4984 switch (c) {
4985 case '*':
4986 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
4987 tok->type = TK_REPEAT;
4988 tok->u.repeat.lower = 0;
4989 tok->u.repeat.upper = INFINITE_REPEAT;
4990 goto greedy_check;
4991 break;
4992
4993 case '+':
4994 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
4995 tok->type = TK_REPEAT;
4996 tok->u.repeat.lower = 1;
4997 tok->u.repeat.upper = INFINITE_REPEAT;
4998 goto greedy_check;
4999 break;
5000
5001 case '?':
5002 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
5003 tok->type = TK_REPEAT;
5004 tok->u.repeat.lower = 0;
5005 tok->u.repeat.upper = 1;
5006 greedy_check:
5007 tok->u.repeat.possessive = 0;
5008 greedy_check2:
5009 if (!PEND && PPEEK_IS('?') &&
5010 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY) &&
5011 tok->u.repeat.possessive == 0) {
5012 PFETCH(c);
5013 tok->u.repeat.greedy = 0;
5014 tok->u.repeat.possessive = 0;
5015 }
5016 else {
5017 possessive_check:
5018 tok->u.repeat.greedy = 1;
5019 if (!PEND && PPEEK_IS('+') &&
5020 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
5021 tok->type != TK_INTERVAL) ||
5022 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
5023 tok->type == TK_INTERVAL)) &&
5024 tok->u.repeat.possessive == 0) {
5025 PFETCH(c);
5026 tok->u.repeat.possessive = 1;
5027 }
5028 }
5029 break;
5030
5031 case '{':
5032 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
5033 r = fetch_interval_quantifier(&p, end, tok, env);
5034 if (r < 0) return r; /* error */
5035 if (r == 0) goto greedy_check2;
5036 else if (r == 2) { /* {n} */
5037 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5038 goto possessive_check;
5039
5040 goto greedy_check2;
5041 }
5042 /* r == 1 : normal char */
5043 break;
5044
5045 case '|':
5046 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
5047 tok->type = TK_ALT;
5048 break;
5049
5050 case '(':
5051 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5052 tok->type = TK_SUBEXP_OPEN;
5053 break;
5054
5055 case ')':
5056 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5057 tok->type = TK_SUBEXP_CLOSE;
5058 break;
5059
5060 case 'w':
5061 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5062 tok->type = TK_CHAR_TYPE;
5063 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5064 tok->u.prop.not = 0;
5065 break;
5066
5067 case 'W':
5068 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5069 tok->type = TK_CHAR_TYPE;
5070 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5071 tok->u.prop.not = 1;
5072 break;
5073
5074 case 'b':
5075 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5076 tok->type = TK_ANCHOR;
5077 tok->u.anchor = ANCR_WORD_BOUNDARY;
5078 break;
5079
5080 case 'B':
5081 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5082 tok->type = TK_ANCHOR;
5083 tok->u.anchor = ANCR_NO_WORD_BOUNDARY;
5084 break;
5085
5086 case 'y':
5087 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5088 tok->type = TK_ANCHOR;
5089 tok->u.anchor = ANCR_TEXT_SEGMENT_BOUNDARY;
5090 break;
5091
5092 case 'Y':
5093 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5094 tok->type = TK_ANCHOR;
5095 tok->u.anchor = ANCR_NO_TEXT_SEGMENT_BOUNDARY;
5096 break;
5097
5098 #ifdef USE_WORD_BEGIN_END
5099 case '<':
5100 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5101 tok->type = TK_ANCHOR;
5102 tok->u.anchor = ANCR_WORD_BEGIN;
5103 break;
5104
5105 case '>':
5106 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5107 tok->type = TK_ANCHOR;
5108 tok->u.anchor = ANCR_WORD_END;
5109 break;
5110 #endif
5111
5112 case 's':
5113 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5114 tok->type = TK_CHAR_TYPE;
5115 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5116 tok->u.prop.not = 0;
5117 break;
5118
5119 case 'S':
5120 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5121 tok->type = TK_CHAR_TYPE;
5122 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5123 tok->u.prop.not = 1;
5124 break;
5125
5126 case 'd':
5127 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5128 tok->type = TK_CHAR_TYPE;
5129 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5130 tok->u.prop.not = 0;
5131 break;
5132
5133 case 'D':
5134 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5135 tok->type = TK_CHAR_TYPE;
5136 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5137 tok->u.prop.not = 1;
5138 break;
5139
5140 case 'h':
5141 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5142 tok->type = TK_CHAR_TYPE;
5143 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5144 tok->u.prop.not = 0;
5145 break;
5146
5147 case 'H':
5148 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5149 tok->type = TK_CHAR_TYPE;
5150 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5151 tok->u.prop.not = 1;
5152 break;
5153
5154 case 'K':
5155 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
5156 tok->type = TK_KEEP;
5157 break;
5158
5159 case 'R':
5160 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
5161 tok->type = TK_GENERAL_NEWLINE;
5162 break;
5163
5164 case 'N':
5165 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5166 tok->type = TK_NO_NEWLINE;
5167 break;
5168
5169 case 'O':
5170 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5171 tok->type = TK_TRUE_ANYCHAR;
5172 break;
5173
5174 case 'X':
5175 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5176 tok->type = TK_TEXT_SEGMENT;
5177 break;
5178
5179 case 'A':
5180 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5181 begin_buf:
5182 tok->type = TK_ANCHOR;
5183 tok->u.subtype = ANCR_BEGIN_BUF;
5184 break;
5185
5186 case 'Z':
5187 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5188 tok->type = TK_ANCHOR;
5189 tok->u.subtype = ANCR_SEMI_END_BUF;
5190 break;
5191
5192 case 'z':
5193 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5194 end_buf:
5195 tok->type = TK_ANCHOR;
5196 tok->u.subtype = ANCR_END_BUF;
5197 break;
5198
5199 case 'G':
5200 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
5201 tok->type = TK_ANCHOR;
5202 tok->u.subtype = ANCR_BEGIN_POSITION;
5203 break;
5204
5205 case '`':
5206 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5207 goto begin_buf;
5208 break;
5209
5210 case '\'':
5211 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5212 goto end_buf;
5213 break;
5214
5215 case 'o':
5216 if (PEND) break;
5217
5218 prev = p;
5219 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5220 PINC;
5221 num = scan_unsigned_octal_number(&p, end, 11, enc);
5222 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
5223 if (!PEND) {
5224 if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
5225 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5226 }
5227
5228 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
5229 PINC;
5230 tok->type = TK_CODE_POINT;
5231 tok->u.code = (OnigCodePoint )num;
5232 }
5233 else {
5234 /* can't read nothing or invalid format */
5235 p = prev;
5236 }
5237 }
5238 break;
5239
5240 case 'x':
5241 if (PEND) break;
5242
5243 prev = p;
5244 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5245 PINC;
5246 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
5247 if (num < 0) {
5248 if (num == ONIGERR_TOO_BIG_NUMBER)
5249 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
5250 else
5251 return num;
5252 }
5253 if (!PEND) {
5254 if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
5255 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5256 }
5257
5258 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
5259 PINC;
5260 tok->type = TK_CODE_POINT;
5261 tok->u.code = (OnigCodePoint )num;
5262 }
5263 else {
5264 /* can't read nothing or invalid format */
5265 p = prev;
5266 }
5267 }
5268 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5269 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
5270 if (num < 0) return num;
5271 if (p == prev) { /* can't read nothing. */
5272 num = 0; /* but, it's not error */
5273 }
5274 tok->type = TK_RAW_BYTE;
5275 tok->base = 16;
5276 tok->u.c = num;
5277 }
5278 break;
5279
5280 case 'u':
5281 if (PEND) break;
5282
5283 prev = p;
5284 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5285 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
5286 if (num < 0) return num;
5287 if (p == prev) { /* can't read nothing. */
5288 num = 0; /* but, it's not error */
5289 }
5290 tok->type = TK_CODE_POINT;
5291 tok->base = 16;
5292 tok->u.code = (OnigCodePoint )num;
5293 }
5294 break;
5295
5296 case '1': case '2': case '3': case '4':
5297 case '5': case '6': case '7': case '8': case '9':
5298 PUNFETCH;
5299 prev = p;
5300 num = onig_scan_unsigned_number(&p, end, enc);
5301 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
5302 goto skip_backref;
5303 }
5304
5305 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
5306 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
5307 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5308 if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))
5309 return ONIGERR_INVALID_BACKREF;
5310 }
5311
5312 tok->type = TK_BACKREF;
5313 tok->u.backref.num = 1;
5314 tok->u.backref.ref1 = num;
5315 tok->u.backref.by_name = 0;
5316 #ifdef USE_BACKREF_WITH_LEVEL
5317 tok->u.backref.exist_level = 0;
5318 #endif
5319 break;
5320 }
5321
5322 skip_backref:
5323 if (c == '8' || c == '9') {
5324 /* normal char */
5325 p = prev; PINC;
5326 break;
5327 }
5328
5329 p = prev;
5330 /* fall through */
5331 case '0':
5332 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5333 prev = p;
5334 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
5335 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
5336 if (p == prev) { /* can't read nothing. */
5337 num = 0; /* but, it's not error */
5338 }
5339 tok->type = TK_RAW_BYTE;
5340 tok->base = 8;
5341 tok->u.c = num;
5342 }
5343 else if (c != '0') {
5344 PINC;
5345 }
5346 break;
5347
5348 case 'k':
5349 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
5350 PFETCH(c);
5351 if (c == '<' || c == '\'') {
5352 UChar* name_end;
5353 int* backs;
5354 int back_num;
5355 enum REF_NUM num_type;
5356
5357 prev = p;
5358
5359 #ifdef USE_BACKREF_WITH_LEVEL
5360 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
5361 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
5362 env, &back_num, &tok->u.backref.level, &num_type);
5363 if (r == 1) tok->u.backref.exist_level = 1;
5364 else tok->u.backref.exist_level = 0;
5365 #else
5366 r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);
5367 #endif
5368 if (r < 0) return r;
5369
5370 if (num_type != IS_NOT_NUM) {
5371 if (num_type == IS_REL_NUM) {
5372 back_num = backref_rel_to_abs(back_num, env);
5373 }
5374 if (back_num <= 0)
5375 return ONIGERR_INVALID_BACKREF;
5376
5377 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5378 if (back_num > env->num_mem ||
5379 IS_NULL(SCANENV_MEMENV(env)[back_num].node))
5380 return ONIGERR_INVALID_BACKREF;
5381 }
5382 tok->type = TK_BACKREF;
5383 tok->u.backref.by_name = 0;
5384 tok->u.backref.num = 1;
5385 tok->u.backref.ref1 = back_num;
5386 }
5387 else {
5388 num = name_to_group_numbers(env, prev, name_end, &backs);
5389 if (num <= 0) {
5390 return ONIGERR_UNDEFINED_NAME_REFERENCE;
5391 }
5392 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5393 int i;
5394 for (i = 0; i < num; i++) {
5395 if (backs[i] > env->num_mem ||
5396 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
5397 return ONIGERR_INVALID_BACKREF;
5398 }
5399 }
5400
5401 tok->type = TK_BACKREF;
5402 tok->u.backref.by_name = 1;
5403 if (num == 1) {
5404 tok->u.backref.num = 1;
5405 tok->u.backref.ref1 = backs[0];
5406 }
5407 else {
5408 tok->u.backref.num = num;
5409 tok->u.backref.refs = backs;
5410 }
5411 }
5412 }
5413 else
5414 PUNFETCH;
5415 }
5416 break;
5417
5418 #ifdef USE_CALL
5419 case 'g':
5420 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
5421 PFETCH(c);
5422 if (c == '<' || c == '\'') {
5423 int gnum;
5424 UChar* name_end;
5425 enum REF_NUM num_type;
5426
5427 prev = p;
5428 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
5429 &gnum, &num_type, 1);
5430 if (r < 0) return r;
5431
5432 if (num_type != IS_NOT_NUM) {
5433 if (num_type == IS_REL_NUM) {
5434 gnum = backref_rel_to_abs(gnum, env);
5435 if (gnum < 0) {
5436 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5437 prev, name_end);
5438 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5439 }
5440 }
5441 tok->u.call.by_number = 1;
5442 tok->u.call.gnum = gnum;
5443 }
5444 else {
5445 tok->u.call.by_number = 0;
5446 tok->u.call.gnum = 0;
5447 }
5448
5449 tok->type = TK_CALL;
5450 tok->u.call.name = prev;
5451 tok->u.call.name_end = name_end;
5452 }
5453 else
5454 PUNFETCH;
5455 }
5456 break;
5457 #endif
5458
5459 case 'Q':
5460 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
5461 tok->type = TK_QUOTE_OPEN;
5462 }
5463 break;
5464
5465 case 'p':
5466 case 'P':
5467 if (!PEND && PPEEK_IS('{') &&
5468 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5469 PINC;
5470 tok->type = TK_CHAR_PROPERTY;
5471 tok->u.prop.not = c == 'P';
5472
5473 if (!PEND &&
5474 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5475 PFETCH(c);
5476 if (c == '^') {
5477 tok->u.prop.not = tok->u.prop.not == 0;
5478 }
5479 else
5480 PUNFETCH;
5481 }
5482 }
5483 break;
5484
5485 default:
5486 {
5487 OnigCodePoint c2;
5488
5489 PUNFETCH;
5490 num = fetch_escaped_value(&p, end, env, &c2);
5491 if (num < 0) return num;
5492 /* set_raw: */
5493 if (tok->u.c != c2) {
5494 tok->type = TK_CODE_POINT;
5495 tok->u.code = c2;
5496 }
5497 else { /* string */
5498 p = tok->backp + enclen(enc, tok->backp);
5499 }
5500 }
5501 break;
5502 }
5503 }
5504 else {
5505 tok->u.c = c;
5506 tok->escaped = 0;
5507
5508 #ifdef USE_VARIABLE_META_CHARS
5509 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
5510 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
5511 if (c == MC_ANYCHAR(syn))
5512 goto any_char;
5513 else if (c == MC_ANYTIME(syn))
5514 goto anytime;
5515 else if (c == MC_ZERO_OR_ONE_TIME(syn))
5516 goto zero_or_one_time;
5517 else if (c == MC_ONE_OR_MORE_TIME(syn))
5518 goto one_or_more_time;
5519 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
5520 tok->type = TK_ANYCHAR_ANYTIME;
5521 goto out;
5522 }
5523 }
5524 #endif
5525
5526 switch (c) {
5527 case '.':
5528 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
5529 #ifdef USE_VARIABLE_META_CHARS
5530 any_char:
5531 #endif
5532 tok->type = TK_ANYCHAR;
5533 break;
5534
5535 case '*':
5536 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
5537 #ifdef USE_VARIABLE_META_CHARS
5538 anytime:
5539 #endif
5540 tok->type = TK_REPEAT;
5541 tok->u.repeat.lower = 0;
5542 tok->u.repeat.upper = INFINITE_REPEAT;
5543 goto greedy_check;
5544 break;
5545
5546 case '+':
5547 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
5548 #ifdef USE_VARIABLE_META_CHARS
5549 one_or_more_time:
5550 #endif
5551 tok->type = TK_REPEAT;
5552 tok->u.repeat.lower = 1;
5553 tok->u.repeat.upper = INFINITE_REPEAT;
5554 goto greedy_check;
5555 break;
5556
5557 case '?':
5558 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
5559 #ifdef USE_VARIABLE_META_CHARS
5560 zero_or_one_time:
5561 #endif
5562 tok->type = TK_REPEAT;
5563 tok->u.repeat.lower = 0;
5564 tok->u.repeat.upper = 1;
5565 goto greedy_check;
5566 break;
5567
5568 case '{':
5569 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
5570 r = fetch_interval_quantifier(&p, end, tok, env);
5571 if (r < 0) return r; /* error */
5572 if (r == 0) goto greedy_check2;
5573 else if (r == 2) { /* {n} */
5574 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5575 goto possessive_check;
5576
5577 goto greedy_check2;
5578 }
5579 /* r == 1 : normal char */
5580 break;
5581
5582 case '|':
5583 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
5584 tok->type = TK_ALT;
5585 break;
5586
5587 case '(':
5588 if (!PEND && PPEEK_IS('?') &&
5589 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
5590 PINC;
5591 if (! PEND) {
5592 c = PPEEK;
5593 if (c == '#') {
5594 PFETCH(c);
5595 while (1) {
5596 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
5597 PFETCH(c);
5598 if (c == MC_ESC(syn)) {
5599 if (! PEND) PFETCH(c);
5600 }
5601 else {
5602 if (c == ')') break;
5603 }
5604 }
5605 goto start;
5606 }
5607 else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {
5608 int gnum;
5609 UChar* name;
5610 UChar* name_end;
5611 enum REF_NUM num_type;
5612
5613 switch (c) {
5614 case '&':
5615 {
5616 PINC;
5617 name = p;
5618 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,
5619 &num_type, 0);
5620 if (r < 0) return r;
5621
5622 tok->type = TK_CALL;
5623 tok->u.call.by_number = 0;
5624 tok->u.call.gnum = 0;
5625 tok->u.call.name = name;
5626 tok->u.call.name_end = name_end;
5627 }
5628 break;
5629
5630 case 'R':
5631 tok->type = TK_CALL;
5632 tok->u.call.by_number = 1;
5633 tok->u.call.gnum = 0;
5634 tok->u.call.name = p;
5635 PINC;
5636 if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
5637 tok->u.call.name_end = p;
5638 break;
5639
5640 case '-':
5641 case '+':
5642 goto lparen_qmark_num;
5643 break;
5644 default:
5645 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;
5646
5647 lparen_qmark_num:
5648 {
5649 name = p;
5650 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
5651 &gnum, &num_type, 1);
5652 if (r < 0) return r;
5653
5654 if (num_type == IS_NOT_NUM) {
5655 return ONIGERR_INVALID_GROUP_NAME;
5656 }
5657 else {
5658 if (num_type == IS_REL_NUM) {
5659 gnum = backref_rel_to_abs(gnum, env);
5660 if (gnum < 0) {
5661 onig_scan_env_set_error_string(env,
5662 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
5663 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5664 }
5665 }
5666 tok->u.call.by_number = 1;
5667 tok->u.call.gnum = gnum;
5668 }
5669
5670 tok->type = TK_CALL;
5671 tok->u.call.name = name;
5672 tok->u.call.name_end = name_end;
5673 }
5674 break;
5675 }
5676 }
5677 }
5678 lparen_qmark_end:
5679 PUNFETCH;
5680 }
5681
5682 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
5683 tok->type = TK_SUBEXP_OPEN;
5684 break;
5685
5686 case ')':
5687 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
5688 tok->type = TK_SUBEXP_CLOSE;
5689 break;
5690
5691 case '^':
5692 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
5693 tok->type = TK_ANCHOR;
5694 tok->u.subtype = (IS_SINGLELINE(env->options)
5695 ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
5696 break;
5697
5698 case '$':
5699 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
5700 tok->type = TK_ANCHOR;
5701 tok->u.subtype = (IS_SINGLELINE(env->options)
5702 ? ANCR_SEMI_END_BUF : ANCR_END_LINE);
5703 break;
5704
5705 case '[':
5706 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
5707 tok->type = TK_CC_OPEN;
5708 break;
5709
5710 case ']':
5711 if (*src > env->pattern) /* /].../ is allowed. */
5712 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
5713 break;
5714
5715 case '#':
5716 if (IS_EXTEND(env->options)) {
5717 while (!PEND) {
5718 PFETCH(c);
5719 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
5720 break;
5721 }
5722 goto start;
5723 break;
5724 }
5725 break;
5726
5727 case ' ': case '\t': case '\n': case '\r': case '\f':
5728 if (IS_EXTEND(env->options))
5729 goto start;
5730 break;
5731
5732 default:
5733 /* string */
5734 break;
5735 }
5736 }
5737
5738 #ifdef USE_VARIABLE_META_CHARS
5739 out:
5740 #endif
5741 *src = p;
5742 return tok->type;
5743 }
5744
5745 static int
5746 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
5747 OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
5748 const OnigCodePoint mbr[])
5749 {
5750 int i, r;
5751 OnigCodePoint j;
5752
5753 int n = ONIGENC_CODE_RANGE_NUM(mbr);
5754
5755 if (not == 0) {
5756 for (i = 0; i < n; i++) {
5757 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
5758 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
5759 if (j >= sb_out) {
5760 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5761 r = add_code_range_to_buf(&(cc->mbuf), j,
5762 ONIGENC_CODE_RANGE_TO(mbr, i));
5763 if (r != 0) return r;
5764 i++;
5765 }
5766
5767 goto sb_end;
5768 }
5769 BITSET_SET_BIT(cc->bs, j);
5770 }
5771 }
5772
5773 sb_end:
5774 for ( ; i < n; i++) {
5775 r = add_code_range_to_buf(&(cc->mbuf),
5776 ONIGENC_CODE_RANGE_FROM(mbr, i),
5777 ONIGENC_CODE_RANGE_TO(mbr, i));
5778 if (r != 0) return r;
5779 }
5780 }
5781 else {
5782 OnigCodePoint prev = 0;
5783
5784 for (i = 0; i < n; i++) {
5785 for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
5786 if (j >= sb_out) {
5787 goto sb_end2;
5788 }
5789 BITSET_SET_BIT(cc->bs, j);
5790 }
5791 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
5792 }
5793 for (j = prev; j < sb_out; j++) {
5794 BITSET_SET_BIT(cc->bs, j);
5795 }
5796
5797 sb_end2:
5798 prev = sb_out;
5799
5800 for (i = 0; i < n; i++) {
5801 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5802 r = add_code_range_to_buf(&(cc->mbuf), prev,
5803 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
5804 if (r != 0) return r;
5805 }
5806 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
5807 if (prev == 0) goto end;
5808 }
5809
5810 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
5811 if (r != 0) return r;
5812 }
5813
5814 end:
5815 return 0;
5816 }
5817
5818 static int
5819 add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
5820 OnigEncoding enc ARG_UNUSED,
5821 OnigCodePoint sb_out,
5822 const OnigCodePoint mbr[], OnigCodePoint limit)
5823 {
5824 int i, r;
5825 OnigCodePoint j;
5826 OnigCodePoint from;
5827 OnigCodePoint to;
5828
5829 int n = ONIGENC_CODE_RANGE_NUM(mbr);
5830
5831 if (not == 0) {
5832 for (i = 0; i < n; i++) {
5833 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
5834 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
5835 if (j > limit) goto end;
5836 if (j >= sb_out) {
5837 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
5838 to = ONIGENC_CODE_RANGE_TO(mbr, i);
5839 if (to > limit) to = limit;
5840 r = add_code_range_to_buf(&(cc->mbuf), j, to);
5841 if (r != 0) return r;
5842 i++;
5843 }
5844
5845 goto sb_end;
5846 }
5847 BITSET_SET_BIT(cc->bs, j);
5848 }
5849 }
5850
5851 sb_end:
5852 for ( ; i < n; i++) {
5853 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5854 to = ONIGENC_CODE_RANGE_TO(mbr, i);
5855 if (from > limit) break;
5856 if (to > limit) to = limit;
5857 r = add_code_range_to_buf(&(cc->mbuf), from, to);
5858 if (r != 0) return r;
5859 }
5860 }
5861 else {
5862 OnigCodePoint prev = 0;
5863
5864 for (i = 0; i < n; i++) {
5865 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5866 if (from > limit) {
5867 for (j = prev; j < sb_out; j++) {
5868 BITSET_SET_BIT(cc->bs, j);
5869 }
5870 goto sb_end2;
5871 }
5872 for (j = prev; j < from; j++) {
5873 if (j >= sb_out) goto sb_end2;
5874 BITSET_SET_BIT(cc->bs, j);
5875 }
5876 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
5877 if (prev > limit) prev = limit;
5878 prev++;
5879 if (prev == 0) goto end;
5880 }
5881 for (j = prev; j < sb_out; j++) {
5882 BITSET_SET_BIT(cc->bs, j);
5883 }
5884
5885 sb_end2:
5886 prev = sb_out;
5887
5888 for (i = 0; i < n; i++) {
5889 from = ONIGENC_CODE_RANGE_FROM(mbr, i);
5890 if (from > limit) goto last;
5891
5892 if (prev < from) {
5893 r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
5894 if (r != 0) return r;
5895 }
5896 prev = ONIGENC_CODE_RANGE_TO(mbr, i);
5897 if (prev > limit) prev = limit;
5898 prev++;
5899 if (prev == 0) goto end;
5900 }
5901
5902 last:
5903 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
5904 if (r != 0) return r;
5905 }
5906
5907 end:
5908 return 0;
5909 }
5910
5911 static int
5912 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
5913 {
5914 #define ASCII_LIMIT 127
5915
5916 int c, r;
5917 int ascii_mode;
5918 const OnigCodePoint *ranges;
5919 OnigCodePoint limit;
5920 OnigCodePoint sb_out;
5921 OnigEncoding enc = env->enc;
5922
5923 ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);
5924
5925 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
5926 if (r == 0) {
5927 if (ascii_mode == 0)
5928 r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
5929 else
5930 r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
5931 ranges, ASCII_LIMIT);
5932 return r;
5933 }
5934 else if (r != ONIG_NO_SUPPORT_CONFIG) {
5935 return r;
5936 }
5937
5938 r = 0;
5939 limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
5940
5941 switch (ctype) {
5942 case ONIGENC_CTYPE_ALPHA:
5943 case ONIGENC_CTYPE_BLANK:
5944 case ONIGENC_CTYPE_CNTRL:
5945 case ONIGENC_CTYPE_DIGIT:
5946 case ONIGENC_CTYPE_LOWER:
5947 case ONIGENC_CTYPE_PUNCT:
5948 case ONIGENC_CTYPE_SPACE:
5949 case ONIGENC_CTYPE_UPPER:
5950 case ONIGENC_CTYPE_XDIGIT:
5951 case ONIGENC_CTYPE_ASCII:
5952 case ONIGENC_CTYPE_ALNUM:
5953 if (not != 0) {
5954 for (c = 0; c < (int )limit; c++) {
5955 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5956 BITSET_SET_BIT(cc->bs, c);
5957 }
5958 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
5959 BITSET_SET_BIT(cc->bs, c);
5960 }
5961
5962 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
5963 }
5964 else {
5965 for (c = 0; c < (int )limit; c++) {
5966 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5967 BITSET_SET_BIT(cc->bs, c);
5968 }
5969 }
5970 break;
5971
5972 case ONIGENC_CTYPE_GRAPH:
5973 case ONIGENC_CTYPE_PRINT:
5974 case ONIGENC_CTYPE_WORD:
5975 if (not != 0) {
5976 for (c = 0; c < (int )limit; c++) {
5977 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
5978 && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5979 BITSET_SET_BIT(cc->bs, c);
5980 }
5981 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
5982 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
5983 BITSET_SET_BIT(cc->bs, c);
5984 }
5985 }
5986 else {
5987 for (c = 0; c < (int )limit; c++) {
5988 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
5989 BITSET_SET_BIT(cc->bs, c);
5990 }
5991 if (ascii_mode == 0)
5992 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
5993 }
5994 break;
5995
5996 default:
5997 return ONIGERR_PARSER_BUG;
5998 break;
5999 }
6000
6001 return r;
6002 }
6003
6004 static int
6005 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
6006 {
6007 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
6008 #define POSIX_BRACKET_NAME_MIN_LEN 4
6009
6010 static PosixBracketEntryType PBS[] = {
6011 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
6012 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
6013 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
6014 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
6015 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
6016 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
6017 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
6018 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
6019 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
6020 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
6021 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
6022 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
6023 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
6024 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
6025 { (UChar* )NULL, -1, 0 }
6026 };
6027
6028 PosixBracketEntryType *pb;
6029 int not, i, r;
6030 OnigCodePoint c;
6031 OnigEncoding enc = env->enc;
6032 UChar *p = *src;
6033
6034 if (PPEEK_IS('^')) {
6035 PINC_S;
6036 not = 1;
6037 }
6038 else
6039 not = 0;
6040
6041 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
6042 goto not_posix_bracket;
6043
6044 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
6045 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
6046 p = (UChar* )onigenc_step(enc, p, end, pb->len);
6047 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
6048 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6049
6050 r = add_ctype_to_cc(cc, pb->ctype, not, env);
6051 if (r != 0) return r;
6052
6053 PINC_S; PINC_S;
6054 *src = p;
6055 return 0;
6056 }
6057 }
6058
6059 not_posix_bracket:
6060 c = 0;
6061 i = 0;
6062 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
6063 PINC_S;
6064 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
6065 }
6066 if (c == ':' && ! PEND) {
6067 PINC_S;
6068 if (! PEND) {
6069 PFETCH_S(c);
6070 if (c == ']')
6071 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6072 }
6073 }
6074
6075 return 1; /* 1: is not POSIX bracket, but no error. */
6076 }
6077
6078 static int
6079 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
6080 {
6081 int r;
6082 OnigCodePoint c;
6083 OnigEncoding enc = env->enc;
6084 UChar *prev, *start, *p = *src;
6085
6086 r = 0;
6087 start = prev = p;
6088
6089 while (!PEND) {
6090 prev = p;
6091 PFETCH_S(c);
6092 if (c == '}') {
6093 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
6094 if (r < 0) break;
6095
6096 *src = p;
6097 return r;
6098 }
6099 else if (c == '(' || c == ')' || c == '{' || c == '|') {
6100 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
6101 break;
6102 }
6103 }
6104
6105 onig_scan_env_set_error_string(env, r, *src, prev);
6106 return r;
6107 }
6108
6109 static int
6110 parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
6111 {
6112 int r, ctype;
6113 CClassNode* cc;
6114
6115 ctype = fetch_char_property_to_ctype(src, end, env);
6116 if (ctype < 0) return ctype;
6117
6118 *np = node_new_cclass();
6119 CHECK_NULL_RETURN_MEMERR(*np);
6120 cc = CCLASS_(*np);
6121 r = add_ctype_to_cc(cc, ctype, 0, env);
6122 if (r != 0) return r;
6123 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6124
6125 return 0;
6126 }
6127
6128
6129 enum CCSTATE {
6130 CCS_VALUE,
6131 CCS_RANGE,
6132 CCS_COMPLETE,
6133 CCS_START
6134 };
6135
6136 enum CCVALTYPE {
6137 CCV_SB,
6138 CCV_CODE_POINT,
6139 CCV_CLASS
6140 };
6141
6142 static int
6143 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
6144 enum CCSTATE* state, ScanEnv* env)
6145 {
6146 int r;
6147
6148 if (*state == CCS_RANGE)
6149 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
6150
6151 if (*state == CCS_VALUE && *type != CCV_CLASS) {
6152 if (*type == CCV_SB)
6153 BITSET_SET_BIT(cc->bs, (int )(*vs));
6154 else if (*type == CCV_CODE_POINT) {
6155 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
6156 if (r < 0) return r;
6157 }
6158 }
6159
6160 *state = CCS_VALUE;
6161 *type = CCV_CLASS;
6162 return 0;
6163 }
6164
6165 static int
6166 next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
6167 int* from_israw, int to_israw,
6168 enum CCVALTYPE intype, enum CCVALTYPE* type,
6169 enum CCSTATE* state, ScanEnv* env)
6170 {
6171 int r;
6172
6173 switch (*state) {
6174 case CCS_VALUE:
6175 if (*type == CCV_SB) {
6176 if (*from > 0xff)
6177 return ONIGERR_INVALID_CODE_POINT_VALUE;
6178
6179 BITSET_SET_BIT(cc->bs, (int )(*from));
6180 }
6181 else if (*type == CCV_CODE_POINT) {
6182 r = add_code_range(&(cc->mbuf), env, *from, *from);
6183 if (r < 0) return r;
6184 }
6185 break;
6186
6187 case CCS_RANGE:
6188 if (intype == *type) {
6189 if (intype == CCV_SB) {
6190 if (*from > 0xff || to > 0xff)
6191 return ONIGERR_INVALID_CODE_POINT_VALUE;
6192
6193 if (*from > to) {
6194 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6195 goto ccs_range_end;
6196 else
6197 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6198 }
6199 bitset_set_range(cc->bs, (int )*from, (int )to);
6200 }
6201 else {
6202 r = add_code_range(&(cc->mbuf), env, *from, to);
6203 if (r < 0) return r;
6204 }
6205 }
6206 else {
6207 if (*from > to) {
6208 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6209 goto ccs_range_end;
6210 else
6211 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6212 }
6213 bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
6214 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
6215 if (r < 0) return r;
6216 }
6217 ccs_range_end:
6218 *state = CCS_COMPLETE;
6219 break;
6220
6221 case CCS_COMPLETE:
6222 case CCS_START:
6223 *state = CCS_VALUE;
6224 break;
6225
6226 default:
6227 break;
6228 }
6229
6230 *from_israw = to_israw;
6231 *from = to;
6232 *type = intype;
6233 return 0;
6234 }
6235
6236 static int
6237 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
6238 ScanEnv* env)
6239 {
6240 int in_esc;
6241 OnigCodePoint code;
6242 OnigEncoding enc = env->enc;
6243 UChar* p = from;
6244
6245 in_esc = 0;
6246 while (! PEND) {
6247 if (ignore_escaped && in_esc) {
6248 in_esc = 0;
6249 }
6250 else {
6251 PFETCH_S(code);
6252 if (code == c) return 1;
6253 if (code == MC_ESC(env->syntax)) in_esc = 1;
6254 }
6255 }
6256 return 0;
6257 }
6258
6259 static int
6260 parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
6261 {
6262 int r, neg, len, fetched, and_start;
6263 OnigCodePoint v, vs;
6264 UChar *p;
6265 Node* node;
6266 CClassNode *cc, *prev_cc;
6267 CClassNode work_cc;
6268
6269 enum CCSTATE state;
6270 enum CCVALTYPE val_type, in_type;
6271 int val_israw, in_israw;
6272
6273 *np = NULL_NODE;
6274 env->parse_depth++;
6275 if (env->parse_depth > ParseDepthLimit)
6276 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
6277
6278 prev_cc = (CClassNode* )NULL;
6279 r = fetch_token_in_cc(tok, src, end, env);
6280 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
6281 neg = 1;
6282 r = fetch_token_in_cc(tok, src, end, env);
6283 }
6284 else {
6285 neg = 0;
6286 }
6287
6288 if (r < 0) return r;
6289 if (r == TK_CC_CLOSE) {
6290 if (! code_exist_check((OnigCodePoint )']',
6291 *src, env->pattern_end, 1, env))
6292 return ONIGERR_EMPTY_CHAR_CLASS;
6293
6294 CC_ESC_WARN(env, (UChar* )"]");
6295 r = tok->type = TK_CHAR; /* allow []...] */
6296 }
6297
6298 *np = node = node_new_cclass();
6299 CHECK_NULL_RETURN_MEMERR(node);
6300 cc = CCLASS_(node);
6301
6302 and_start = 0;
6303 state = CCS_START;
6304 p = *src;
6305 while (r != TK_CC_CLOSE) {
6306 fetched = 0;
6307 switch (r) {
6308 case TK_CHAR:
6309 any_char_in:
6310 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
6311 if (len > 1) {
6312 in_type = CCV_CODE_POINT;
6313 }
6314 else if (len < 0) {
6315 r = len;
6316 goto err;
6317 }
6318 else {
6319 /* sb_char: */
6320 in_type = CCV_SB;
6321 }
6322 v = (OnigCodePoint )tok->u.c;
6323 in_israw = 0;
6324 goto val_entry2;
6325 break;
6326
6327 case TK_RAW_BYTE:
6328 /* tok->base != 0 : octal or hexadec. */
6329 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
6330 int i, j;
6331 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
6332 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
6333 UChar* psave = p;
6334 int base = tok->base;
6335
6336 buf[0] = tok->u.c;
6337 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
6338 r = fetch_token_in_cc(tok, &p, end, env);
6339 if (r < 0) goto err;
6340 if (r != TK_RAW_BYTE || tok->base != base) {
6341 fetched = 1;
6342 break;
6343 }
6344 buf[i] = tok->u.c;
6345 }
6346
6347 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
6348 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6349 goto err;
6350 }
6351
6352 /* clear buf tail */
6353 for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
6354
6355 len = enclen(env->enc, buf);
6356 if (i < len) {
6357 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6358 goto err;
6359 }
6360 else if (i > len) { /* fetch back */
6361 p = psave;
6362 for (i = 1; i < len; i++) {
6363 r = fetch_token_in_cc(tok, &p, end, env);
6364 }
6365 fetched = 0;
6366 }
6367
6368 if (i == 1) {
6369 v = (OnigCodePoint )buf[0];
6370 goto raw_single;
6371 }
6372 else {
6373 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
6374 in_type = CCV_CODE_POINT;
6375 }
6376 }
6377 else {
6378 v = (OnigCodePoint )tok->u.c;
6379 raw_single:
6380 in_type = CCV_SB;
6381 }
6382 in_israw = 1;
6383 goto val_entry2;
6384 break;
6385
6386 case TK_CODE_POINT:
6387 v = tok->u.code;
6388 in_israw = 1;
6389 val_entry:
6390 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
6391 if (len < 0) {
6392 if (state != CCS_RANGE ||
6393 ! IS_SYNTAX_BV(env->syntax,
6394 ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
6395 v < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
6396 r = len;
6397 goto err;
6398 }
6399 }
6400 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
6401 val_entry2:
6402 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
6403 &state, env);
6404 if (r != 0) goto err;
6405 break;
6406
6407 case TK_POSIX_BRACKET_OPEN:
6408 r = parse_posix_bracket(cc, &p, end, env);
6409 if (r < 0) goto err;
6410 if (r == 1) { /* is not POSIX bracket */
6411 CC_ESC_WARN(env, (UChar* )"[");
6412 p = tok->backp;
6413 v = (OnigCodePoint )tok->u.c;
6414 in_israw = 0;
6415 goto val_entry;
6416 }
6417 goto next_class;
6418 break;
6419
6420 case TK_CHAR_TYPE:
6421 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
6422 if (r != 0) goto err;
6423
6424 next_class:
6425 r = next_state_class(cc, &vs, &val_type, &state, env);
6426 if (r != 0) goto err;
6427 break;
6428
6429 case TK_CHAR_PROPERTY:
6430 {
6431 int ctype = fetch_char_property_to_ctype(&p, end, env);
6432 if (ctype < 0) {
6433 r = ctype;
6434 goto err;
6435 }
6436 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
6437 if (r != 0) goto err;
6438 goto next_class;
6439 }
6440 break;
6441
6442 case TK_CC_RANGE:
6443 if (state == CCS_VALUE) {
6444 r = fetch_token_in_cc(tok, &p, end, env);
6445 if (r < 0) goto err;
6446 fetched = 1;
6447 if (r == TK_CC_CLOSE) { /* allow [x-] */
6448 range_end_val:
6449 v = (OnigCodePoint )'-';
6450 in_israw = 0;
6451 goto val_entry;
6452 }
6453 else if (r == TK_CC_AND) {
6454 CC_ESC_WARN(env, (UChar* )"-");
6455 goto range_end_val;
6456 }
6457
6458 if (val_type == CCV_CLASS) {
6459 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6460 goto err;
6461 }
6462
6463 state = CCS_RANGE;
6464 }
6465 else if (state == CCS_START) {
6466 /* [-xa] is allowed */
6467 v = (OnigCodePoint )tok->u.c;
6468 in_israw = 0;
6469
6470 r = fetch_token_in_cc(tok, &p, end, env);
6471 if (r < 0) goto err;
6472 fetched = 1;
6473 /* [--x] or [a&&-x] is warned. */
6474 if (r == TK_CC_RANGE || and_start != 0)
6475 CC_ESC_WARN(env, (UChar* )"-");
6476
6477 goto val_entry;
6478 }
6479 else if (state == CCS_RANGE) {
6480 CC_ESC_WARN(env, (UChar* )"-");
6481 goto any_char_in; /* [!--x] is allowed */
6482 }
6483 else { /* CCS_COMPLETE */
6484 r = fetch_token_in_cc(tok, &p, end, env);
6485 if (r < 0) goto err;
6486 fetched = 1;
6487 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
6488 else if (r == TK_CC_AND) {
6489 CC_ESC_WARN(env, (UChar* )"-");
6490 goto range_end_val;
6491 }
6492
6493 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
6494 CC_ESC_WARN(env, (UChar* )"-");
6495 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
6496 }
6497 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
6498 goto err;
6499 }
6500 break;
6501
6502 case TK_CC_CC_OPEN: /* [ */
6503 {
6504 Node *anode;
6505 CClassNode* acc;
6506
6507 r = parse_char_class(&anode, tok, &p, end, env);
6508 if (r != 0) {
6509 onig_node_free(anode);
6510 goto cc_open_err;
6511 }
6512 acc = CCLASS_(anode);
6513 r = or_cclass(cc, acc, env->enc);
6514 onig_node_free(anode);
6515
6516 cc_open_err:
6517 if (r != 0) goto err;
6518 }
6519 break;
6520
6521 case TK_CC_AND: /* && */
6522 {
6523 if (state == CCS_VALUE) {
6524 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
6525 &val_type, &state, env);
6526 if (r != 0) goto err;
6527 }
6528 /* initialize local variables */
6529 and_start = 1;
6530 state = CCS_START;
6531
6532 if (IS_NOT_NULL(prev_cc)) {
6533 r = and_cclass(prev_cc, cc, env->enc);
6534 if (r != 0) goto err;
6535 bbuf_free(cc->mbuf);
6536 }
6537 else {
6538 prev_cc = cc;
6539 cc = &work_cc;
6540 }
6541 initialize_cclass(cc);
6542 }
6543 break;
6544
6545 case TK_EOT:
6546 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
6547 goto err;
6548 break;
6549 default:
6550 r = ONIGERR_PARSER_BUG;
6551 goto err;
6552 break;
6553 }
6554
6555 if (fetched)
6556 r = tok->type;
6557 else {
6558 r = fetch_token_in_cc(tok, &p, end, env);
6559 if (r < 0) goto err;
6560 }
6561 }
6562
6563 if (state == CCS_VALUE) {
6564 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
6565 &val_type, &state, env);
6566 if (r != 0) goto err;
6567 }
6568
6569 if (IS_NOT_NULL(prev_cc)) {
6570 r = and_cclass(prev_cc, cc, env->enc);
6571 if (r != 0) goto err;
6572 bbuf_free(cc->mbuf);
6573 cc = prev_cc;
6574 }
6575
6576 if (neg != 0)
6577 NCCLASS_SET_NOT(cc);
6578 else
6579 NCCLASS_CLEAR_NOT(cc);
6580 if (IS_NCCLASS_NOT(cc) &&
6581 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
6582 int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
6583 if (is_empty != 0)
6584 BITSET_IS_EMPTY(cc->bs, is_empty);
6585
6586 if (is_empty == 0) {
6587 #define NEWLINE_CODE 0x0a
6588
6589 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
6590 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
6591 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
6592 else
6593 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
6594 }
6595 }
6596 }
6597 *src = p;
6598 env->parse_depth--;
6599 return 0;
6600
6601 err:
6602 if (cc != CCLASS_(*np))
6603 bbuf_free(cc->mbuf);
6604 return r;
6605 }
6606
6607 static int parse_subexp(Node** top, PToken* tok, int term,
6608 UChar** src, UChar* end, ScanEnv* env, int group_head);
6609
6610 #ifdef USE_CALLOUT
6611
6612 /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
6613 static int
6614 parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
6615 {
6616 int r;
6617 int i;
6618 int in;
6619 int num;
6620 OnigCodePoint c;
6621 UChar* code_start;
6622 UChar* code_end;
6623 UChar* contents;
6624 UChar* tag_start;
6625 UChar* tag_end;
6626 int brace_nest;
6627 CalloutListEntry* e;
6628 RegexExt* ext;
6629 OnigEncoding enc = env->enc;
6630 UChar* p = *src;
6631
6632 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6633
6634 brace_nest = 0;
6635 while (PPEEK_IS('{')) {
6636 brace_nest++;
6637 PINC_S;
6638 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6639 }
6640
6641 in = ONIG_CALLOUT_IN_PROGRESS;
6642 code_start = p;
6643 while (1) {
6644 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6645
6646 code_end = p;
6647 PFETCH_S(c);
6648 if (c == '}') {
6649 i = brace_nest;
6650 while (i > 0) {
6651 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6652 PFETCH_S(c);
6653 if (c == '}') i--;
6654 else break;
6655 }
6656 if (i == 0) break;
6657 }
6658 }
6659
6660 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6661
6662 PFETCH_S(c);
6663 if (c == '[') {
6664 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6665 tag_end = tag_start = p;
6666 while (! PEND) {
6667 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6668 tag_end = p;
6669 PFETCH_S(c);
6670 if (c == ']') break;
6671 }
6672 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
6673 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6674
6675 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6676 PFETCH_S(c);
6677 }
6678 else {
6679 tag_start = tag_end = 0;
6680 }
6681
6682 if (c == 'X') {
6683 in |= ONIG_CALLOUT_IN_RETRACTION;
6684 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6685 PFETCH_S(c);
6686 }
6687 else if (c == '<') {
6688 in = ONIG_CALLOUT_IN_RETRACTION;
6689 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6690 PFETCH_S(c);
6691 }
6692 else if (c == '>') { /* no needs (default) */
6693 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6694 PFETCH_S(c);
6695 }
6696
6697 if (c != cterm)
6698 return ONIGERR_INVALID_CALLOUT_PATTERN;
6699
6700 r = reg_callout_list_entry(env, &num);
6701 if (r != 0) return r;
6702
6703 ext = onig_get_regex_ext(env->reg);
6704 CHECK_NULL_RETURN_MEMERR(ext);
6705 if (IS_NULL(ext->pattern)) {
6706 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
6707 if (r != ONIG_NORMAL) return r;
6708 }
6709
6710 if (tag_start != tag_end) {
6711 r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
6712 if (r != ONIG_NORMAL) return r;
6713 }
6714
6715 contents = onigenc_strdup(enc, code_start, code_end);
6716 CHECK_NULL_RETURN_MEMERR(contents);
6717
6718 r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
6719 if (r != 0) {
6720 xfree(contents);
6721 return r;
6722 }
6723
6724 e = onig_reg_callout_list_at(env->reg, num);
6725 if (IS_NULL(e)) {
6726 xfree(contents);
6727 return ONIGERR_MEMORY;
6728 }
6729
6730 e->of = ONIG_CALLOUT_OF_CONTENTS;
6731 e->in = in;
6732 e->name_id = ONIG_NON_NAME_ID;
6733 e->u.content.start = contents;
6734 e->u.content.end = contents + (code_end - code_start);
6735
6736 *src = p;
6737 return 0;
6738 }
6739
6740 static long
6741 parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
6742 {
6743 long v;
6744 long d;
6745 int flag;
6746 UChar* p;
6747 OnigCodePoint c;
6748
6749 if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
6750
6751 flag = 1;
6752 v = 0;
6753 p = s;
6754 while (p < end) {
6755 c = ONIGENC_MBC_TO_CODE(enc, p, end);
6756 p += ONIGENC_MBC_ENC_LEN(enc, p);
6757 if (c >= '0' && c <= '9') {
6758 d = (long )(c - '0');
6759 if (v > (max - d) / 10)
6760 return ONIGERR_INVALID_CALLOUT_ARG;
6761
6762 v = v * 10 + d;
6763 }
6764 else if (sign_on != 0 && (c == '-' || c == '+')) {
6765 if (c == '-') flag = -1;
6766 }
6767 else
6768 return ONIGERR_INVALID_CALLOUT_ARG;
6769
6770 sign_on = 0;
6771 }
6772
6773 *rl = flag * v;
6774 return ONIG_NORMAL;
6775 }
6776
6777 static int
6778 parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
6779 unsigned int types[], OnigValue vals[], ScanEnv* env)
6780 {
6781 #define MAX_CALLOUT_ARG_BYTE_LENGTH 128
6782
6783 int r;
6784 int n;
6785 int esc;
6786 int cn;
6787 UChar* s;
6788 UChar* e;
6789 UChar* eesc;
6790 OnigCodePoint c;
6791 UChar* bufend;
6792 UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
6793 OnigEncoding enc = env->enc;
6794 UChar* p = *src;
6795
6796 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6797
6798 n = 0;
6799 while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
6800 c = 0;
6801 cn = 0;
6802 esc = 0;
6803 eesc = 0;
6804 bufend = buf;
6805 s = e = p;
6806 while (1) {
6807 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6808
6809 e = p;
6810 PFETCH_S(c);
6811 if (esc != 0) {
6812 esc = 0;
6813 if (c == '\\' || c == cterm || c == ',') {
6814 /* */
6815 }
6816 else {
6817 e = eesc;
6818 cn++;
6819 }
6820 goto add_char;
6821 }
6822 else {
6823 if (c == '\\') {
6824 esc = 1;
6825 eesc = e;
6826 }
6827 else if (c == cterm || c == ',')
6828 break;
6829 else {
6830 size_t clen;
6831
6832 add_char:
6833 if (skip_mode == 0) {
6834 clen = p - e;
6835 if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
6836 return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
6837
6838 xmemcpy(bufend, e, clen);
6839 bufend += clen;
6840 }
6841 cn++;
6842 }
6843 }
6844 }
6845
6846 if (cn != 0) {
6847 if (skip_mode == 0) {
6848 if ((types[n] & ONIG_TYPE_LONG) != 0) {
6849 int fixed = 0;
6850 if (cn > 0) {
6851 long rl;
6852 r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);
6853 if (r == ONIG_NORMAL) {
6854 vals[n].l = rl;
6855 fixed = 1;
6856 types[n] = ONIG_TYPE_LONG;
6857 }
6858 }
6859
6860 if (fixed == 0) {
6861 types[n] = (types[n] & ~ONIG_TYPE_LONG);
6862 if (types[n] == ONIG_TYPE_VOID)
6863 return ONIGERR_INVALID_CALLOUT_ARG;
6864 }
6865 }
6866
6867 switch (types[n]) {
6868 case ONIG_TYPE_LONG:
6869 break;
6870
6871 case ONIG_TYPE_CHAR:
6872 if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;
6873 vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
6874 break;
6875
6876 case ONIG_TYPE_STRING:
6877 {
6878 UChar* rs = onigenc_strdup(enc, buf, bufend);
6879 CHECK_NULL_RETURN_MEMERR(rs);
6880 vals[n].s.start = rs;
6881 vals[n].s.end = rs + (e - s);
6882 }
6883 break;
6884
6885 case ONIG_TYPE_TAG:
6886 if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))
6887 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6888
6889 vals[n].s.start = s;
6890 vals[n].s.end = e;
6891 break;
6892
6893 case ONIG_TYPE_VOID:
6894 case ONIG_TYPE_POINTER:
6895 return ONIGERR_PARSER_BUG;
6896 break;
6897 }
6898 }
6899
6900 n++;
6901 }
6902
6903 if (c == cterm) break;
6904 }
6905
6906 if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;
6907
6908 *src = p;
6909 return n;
6910 }
6911
6912 /* (*name[TAG]) (*name[TAG]{a,b,..}) */
6913 static int
6914 parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
6915 {
6916 int r;
6917 int i;
6918 int in;
6919 int num;
6920 int name_id;
6921 int arg_num;
6922 int max_arg_num;
6923 int opt_arg_num;
6924 int is_not_single;
6925 OnigCodePoint c;
6926 UChar* name_start;
6927 UChar* name_end;
6928 UChar* tag_start;
6929 UChar* tag_end;
6930 Node* node;
6931 CalloutListEntry* e;
6932 RegexExt* ext;
6933 unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
6934 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];
6935 OnigEncoding enc = env->enc;
6936 UChar* p = *src;
6937
6938 /* PFETCH_READY; */
6939 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
6940
6941 node = 0;
6942 name_start = p;
6943 while (1) {
6944 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6945 name_end = p;
6946 PFETCH_S(c);
6947 if (c == cterm || c == '[' || c == '{') break;
6948 }
6949
6950 if (! is_allowed_callout_name(enc, name_start, name_end))
6951 return ONIGERR_INVALID_CALLOUT_NAME;
6952
6953 if (c == '[') {
6954 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6955 tag_end = tag_start = p;
6956 while (! PEND) {
6957 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6958 tag_end = p;
6959 PFETCH_S(c);
6960 if (c == ']') break;
6961 }
6962 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
6963 return ONIGERR_INVALID_CALLOUT_TAG_NAME;
6964
6965 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6966 PFETCH_S(c);
6967 }
6968 else {
6969 tag_start = tag_end = 0;
6970 }
6971
6972 if (c == '{') {
6973 UChar* save;
6974
6975 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6976
6977 /* read for single check only */
6978 save = p;
6979 arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);
6980 if (arg_num < 0) return arg_num;
6981
6982 is_not_single = PPEEK_IS(cterm) ? 0 : 1;
6983 p = save;
6984 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
6985 &name_id);
6986 if (r != ONIG_NORMAL) return r;
6987
6988 max_arg_num = get_callout_arg_num_by_name_id(name_id);
6989 for (i = 0; i < max_arg_num; i++) {
6990 types[i] = get_callout_arg_type_by_name_id(name_id, i);
6991 }
6992
6993 arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);
6994 if (arg_num < 0) return arg_num;
6995
6996 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6997 PFETCH_S(c);
6998 }
6999 else {
7000 arg_num = 0;
7001
7002 is_not_single = 0;
7003 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7004 &name_id);
7005 if (r != ONIG_NORMAL) return r;
7006
7007 max_arg_num = get_callout_arg_num_by_name_id(name_id);
7008 for (i = 0; i < max_arg_num; i++) {
7009 types[i] = get_callout_arg_type_by_name_id(name_id, i);
7010 }
7011 }
7012
7013 in = onig_get_callout_in_by_name_id(name_id);
7014 opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
7015 if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))
7016 return ONIGERR_INVALID_CALLOUT_ARG;
7017
7018 if (c != cterm)
7019 return ONIGERR_INVALID_CALLOUT_PATTERN;
7020
7021 r = reg_callout_list_entry(env, &num);
7022 if (r != 0) return r;
7023
7024 ext = onig_get_regex_ext(env->reg);
7025 CHECK_NULL_RETURN_MEMERR(ext);
7026 if (IS_NULL(ext->pattern)) {
7027 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7028 if (r != ONIG_NORMAL) return r;
7029 }
7030
7031 if (tag_start != tag_end) {
7032 r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7033 if (r != ONIG_NORMAL) return r;
7034 }
7035
7036 r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
7037 if (r != ONIG_NORMAL) return r;
7038
7039 e = onig_reg_callout_list_at(env->reg, num);
7040 CHECK_NULL_RETURN_MEMERR(e);
7041
7042 e->of = ONIG_CALLOUT_OF_NAME;
7043 e->in = in;
7044 e->name_id = name_id;
7045 e->type = onig_get_callout_type_by_name_id(name_id);
7046 e->start_func = onig_get_callout_start_func_by_name_id(name_id);
7047 e->end_func = onig_get_callout_end_func_by_name_id(name_id);
7048 e->u.arg.num = max_arg_num;
7049 e->u.arg.passed_num = arg_num;
7050 for (i = 0; i < max_arg_num; i++) {
7051 e->u.arg.types[i] = types[i];
7052 if (i < arg_num)
7053 e->u.arg.vals[i] = vals[i];
7054 else
7055 e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
7056 }
7057
7058 *np = node;
7059 *src = p;
7060 return 0;
7061 }
7062 #endif
7063
7064 static int
7065 parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
7066 ScanEnv* env)
7067 {
7068 int r, num;
7069 Node *target;
7070 OnigOptionType option;
7071 OnigCodePoint c;
7072 int list_capture;
7073 OnigEncoding enc = env->enc;
7074
7075 UChar* p = *src;
7076 PFETCH_READY;
7077
7078 *np = NULL;
7079 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7080
7081 option = env->options;
7082 c = PPEEK;
7083 if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
7084 PINC;
7085 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7086
7087 PFETCH(c);
7088 switch (c) {
7089 case ':': /* (?:...) grouping only */
7090 group:
7091 r = fetch_token(tok, &p, end, env);
7092 if (r < 0) return r;
7093 r = parse_subexp(np, tok, term, &p, end, env, 0);
7094 if (r < 0) return r;
7095 *src = p;
7096 return 1; /* group */
7097 break;
7098
7099 case '=':
7100 *np = onig_node_new_anchor(ANCR_PREC_READ, 0);
7101 break;
7102 case '!': /* preceding read */
7103 *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0);
7104 break;
7105 case '>': /* (?>...) stop backtrack */
7106 *np = node_new_bag(BAG_STOP_BACKTRACK);
7107 break;
7108
7109 case '\'':
7110 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7111 goto named_group1;
7112 }
7113 else
7114 return ONIGERR_UNDEFINED_GROUP_OPTION;
7115 break;
7116
7117 case '<': /* look behind (?<=...), (?<!...) */
7118 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7119 PFETCH(c);
7120 if (c == '=')
7121 *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0);
7122 else if (c == '!')
7123 *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0);
7124 else {
7125 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7126 UChar *name;
7127 UChar *name_end;
7128 enum REF_NUM num_type;
7129
7130 PUNFETCH;
7131 c = '<';
7132
7133 named_group1:
7134 list_capture = 0;
7135
7136 named_group2:
7137 name = p;
7138 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
7139 &num_type, 0);
7140 if (r < 0) return r;
7141
7142 num = scan_env_add_mem_entry(env);
7143 if (num < 0) return num;
7144 if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
7145 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7146
7147 r = name_add(env->reg, name, name_end, num, env);
7148 if (r != 0) return r;
7149 *np = node_new_memory(1);
7150 CHECK_NULL_RETURN_MEMERR(*np);
7151 BAG_(*np)->m.regnum = num;
7152 if (list_capture != 0)
7153 MEM_STATUS_ON_SIMPLE(env->capture_history, num);
7154 env->num_named++;
7155 }
7156 else {
7157 return ONIGERR_UNDEFINED_GROUP_OPTION;
7158 }
7159 }
7160 break;
7161
7162 case '~':
7163 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
7164 Node* absent;
7165 Node* expr;
7166 int head_bar;
7167 int is_range_cutter;
7168
7169 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7170
7171 if (PPEEK_IS('|')) { /* (?~|generator|absent) */
7172 PINC;
7173 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7174
7175 head_bar = 1;
7176 if (PPEEK_IS(')')) { /* (?~|) : range clear */
7177 PINC;
7178 r = make_range_clear(np, env);
7179 if (r != 0) return r;
7180 goto end;
7181 }
7182 }
7183 else
7184 head_bar = 0;
7185
7186 r = fetch_token(tok, &p, end, env);
7187 if (r < 0) return r;
7188 r = parse_subexp(&absent, tok, term, &p, end, env, 1);
7189 if (r < 0) {
7190 onig_node_free(absent);
7191 return r;
7192 }
7193
7194 expr = NULL_NODE;
7195 is_range_cutter = 0;
7196 if (head_bar != 0) {
7197 Node* top = absent;
7198 if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {
7199 expr = NULL_NODE;
7200 is_range_cutter = 1;
7201 /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */
7202 }
7203 else {
7204 absent = NODE_CAR(top);
7205 expr = NODE_CDR(top);
7206 NODE_CAR(top) = NULL_NODE;
7207 NODE_CDR(top) = NULL_NODE;
7208 onig_node_free(top);
7209 if (IS_NULL(NODE_CDR(expr))) {
7210 top = expr;
7211 expr = NODE_CAR(top);
7212 NODE_CAR(top) = NULL_NODE;
7213 onig_node_free(top);
7214 }
7215 }
7216 }
7217
7218 r = make_absent_tree(np, absent, expr, is_range_cutter, env);
7219 if (r != 0) {
7220 return r;
7221 }
7222 goto end;
7223 }
7224 else {
7225 return ONIGERR_UNDEFINED_GROUP_OPTION;
7226 }
7227 break;
7228
7229 #ifdef USE_CALLOUT
7230 case '{':
7231 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
7232 return ONIGERR_UNDEFINED_GROUP_OPTION;
7233
7234 r = parse_callout_of_contents(np, ')', &p, end, env);
7235 if (r != 0) return r;
7236
7237 goto end;
7238 break;
7239 #endif
7240
7241 case '(':
7242 /* (?()...) */
7243 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
7244 UChar *prev;
7245 Node* condition;
7246 int condition_is_checker;
7247
7248 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7249 PFETCH(c);
7250 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7251
7252 if (IS_CODE_DIGIT_ASCII(enc, c)
7253 || c == '-' || c == '+' || c == '<' || c == '\'') {
7254 UChar* name_end;
7255 int back_num;
7256 int exist_level;
7257 int level;
7258 enum REF_NUM num_type;
7259 int is_enclosed;
7260
7261 is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
7262 if (! is_enclosed)
7263 PUNFETCH;
7264 prev = p;
7265 exist_level = 0;
7266 #ifdef USE_BACKREF_WITH_LEVEL
7267 name_end = NULL_UCHARP; /* no need. escape gcc warning. */
7268 r = fetch_name_with_level(
7269 (OnigCodePoint )(is_enclosed != 0 ? c : '('),
7270 &p, end, &name_end,
7271 env, &back_num, &level, &num_type);
7272 if (r == 1) exist_level = 1;
7273 #else
7274 r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
7275 &p, end, &name_end, env, &back_num, &num_type, 1);
7276 #endif
7277 if (r < 0) {
7278 if (is_enclosed == 0) {
7279 goto any_condition;
7280 }
7281 else
7282 return r;
7283 }
7284
7285 condition_is_checker = 1;
7286 if (num_type != IS_NOT_NUM) {
7287 if (num_type == IS_REL_NUM) {
7288 back_num = backref_rel_to_abs(back_num, env);
7289 }
7290 if (back_num <= 0)
7291 return ONIGERR_INVALID_BACKREF;
7292
7293 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7294 if (back_num > env->num_mem ||
7295 IS_NULL(SCANENV_MEMENV(env)[back_num].node))
7296 return ONIGERR_INVALID_BACKREF;
7297 }
7298
7299 condition = node_new_backref_checker(1, &back_num, 0,
7300 #ifdef USE_BACKREF_WITH_LEVEL
7301 exist_level, level,
7302 #endif
7303 env);
7304 }
7305 else {
7306 int num;
7307 int* backs;
7308
7309 num = name_to_group_numbers(env, prev, name_end, &backs);
7310 if (num <= 0) {
7311 return ONIGERR_UNDEFINED_NAME_REFERENCE;
7312 }
7313 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
7314 int i;
7315 for (i = 0; i < num; i++) {
7316 if (backs[i] > env->num_mem ||
7317 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
7318 return ONIGERR_INVALID_BACKREF;
7319 }
7320 }
7321
7322 condition = node_new_backref_checker(num, backs, 1,
7323 #ifdef USE_BACKREF_WITH_LEVEL
7324 exist_level, level,
7325 #endif
7326 env);
7327 }
7328
7329 if (is_enclosed != 0) {
7330 if (PEND) goto err_if_else;
7331 PFETCH(c);
7332 if (c != ')') goto err_if_else;
7333 }
7334 }
7335 #ifdef USE_CALLOUT
7336 else if (c == '?') {
7337 if (IS_SYNTAX_OP2(env->syntax,
7338 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
7339 if (! PEND && PPEEK_IS('{')) {
7340 /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
7341 condition_is_checker = 0;
7342 PFETCH(c);
7343 r = parse_callout_of_contents(&condition, ')', &p, end, env);
7344 if (r != 0) return r;
7345 goto end_condition;
7346 }
7347 }
7348 goto any_condition;
7349 }
7350 else if (c == '*' &&
7351 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7352 condition_is_checker = 0;
7353 r = parse_callout_of_name(&condition, ')', &p, end, env);
7354 if (r != 0) return r;
7355 goto end_condition;
7356 }
7357 #endif
7358 else {
7359 any_condition:
7360 PUNFETCH;
7361 condition_is_checker = 0;
7362 r = fetch_token(tok, &p, end, env);
7363 if (r < 0) return r;
7364 r = parse_subexp(&condition, tok, term, &p, end, env, 0);
7365 if (r < 0) {
7366 onig_node_free(condition);
7367 return r;
7368 }
7369 }
7370
7371 #ifdef USE_CALLOUT
7372 end_condition:
7373 #endif
7374 CHECK_NULL_RETURN_MEMERR(condition);
7375
7376 if (PEND) {
7377 err_if_else:
7378 onig_node_free(condition);
7379 return ONIGERR_END_PATTERN_IN_GROUP;
7380 }
7381
7382 if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
7383 if (condition_is_checker == 0) {
7384 onig_node_free(condition);
7385 return ONIGERR_INVALID_IF_ELSE_SYNTAX;
7386 }
7387 PFETCH(c);
7388 *np = condition;
7389 }
7390 else { /* if-else */
7391 int then_is_empty;
7392 Node *Then, *Else;
7393
7394 Then = 0;
7395 if (PPEEK_IS('|')) {
7396 PFETCH(c);
7397 then_is_empty = 1;
7398 }
7399 else
7400 then_is_empty = 0;
7401
7402 r = fetch_token(tok, &p, end, env);
7403 if (r < 0) {
7404 onig_node_free(condition);
7405 return r;
7406 }
7407 r = parse_subexp(&target, tok, term, &p, end, env, 1);
7408 if (r < 0) {
7409 onig_node_free(condition);
7410 onig_node_free(target);
7411 return r;
7412 }
7413
7414 if (then_is_empty != 0) {
7415 Else = target;
7416 }
7417 else {
7418 if (NODE_TYPE(target) == NODE_ALT) {
7419 Then = NODE_CAR(target);
7420 if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {
7421 Else = NODE_CAR(NODE_CDR(target));
7422 cons_node_free_alone(NODE_CDR(target));
7423 }
7424 else {
7425 Else = NODE_CDR(target);
7426 }
7427 cons_node_free_alone(target);
7428 }
7429 else {
7430 Then = target;
7431 Else = 0;
7432 }
7433 }
7434
7435 *np = node_new_bag_if_else(condition, Then, Else);
7436 if (IS_NULL(*np)) {
7437 onig_node_free(condition);
7438 onig_node_free(Then);
7439 onig_node_free(Else);
7440 return ONIGERR_MEMORY;
7441 }
7442 }
7443 goto end;
7444 }
7445 else {
7446 return ONIGERR_UNDEFINED_GROUP_OPTION;
7447 }
7448 break;
7449
7450 #ifdef USE_CAPTURE_HISTORY
7451 case '@':
7452 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
7453 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7454 PFETCH(c);
7455 if (c == '<' || c == '\'') {
7456 list_capture = 1;
7457 goto named_group2; /* (?@<name>...) */
7458 }
7459 PUNFETCH;
7460 }
7461
7462 *np = node_new_memory(0);
7463 CHECK_NULL_RETURN_MEMERR(*np);
7464 num = scan_env_add_mem_entry(env);
7465 if (num < 0) {
7466 return num;
7467 }
7468 else if (num >= (int )MEM_STATUS_BITS_NUM) {
7469 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7470 }
7471 BAG_(*np)->m.regnum = num;
7472 MEM_STATUS_ON_SIMPLE(env->capture_history, num);
7473 }
7474 else {
7475 return ONIGERR_UNDEFINED_GROUP_OPTION;
7476 }
7477 break;
7478 #endif
7479
7480 #ifdef USE_POSIXLINE_OPTION
7481 case 'p':
7482 #endif
7483 case '-': case 'i': case 'm': case 's': case 'x':
7484 case 'W': case 'D': case 'S': case 'P':
7485 case 'y':
7486 {
7487 int neg = 0;
7488
7489 while (1) {
7490 switch (c) {
7491 case ':':
7492 case ')':
7493 break;
7494
7495 case '-': neg = 1; break;
7496 case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;
7497 case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
7498 case 's':
7499 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
7500 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
7501 }
7502 else
7503 return ONIGERR_UNDEFINED_GROUP_OPTION;
7504 break;
7505
7506 case 'm':
7507 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
7508 OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
7509 }
7510 else if (IS_SYNTAX_OP2(env->syntax,
7511 ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
7512 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
7513 }
7514 else
7515 return ONIGERR_UNDEFINED_GROUP_OPTION;
7516 break;
7517 #ifdef USE_POSIXLINE_OPTION
7518 case 'p':
7519 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
7520 break;
7521 #endif
7522 case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;
7523 case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;
7524 case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;
7525 case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;
7526
7527 case 'y': /* y{g}, y{w} */
7528 {
7529 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
7530 return ONIGERR_UNDEFINED_GROUP_OPTION;
7531
7532 if (neg != 0) return ONIGERR_UNDEFINED_GROUP_OPTION;
7533
7534 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7535 if (! PPEEK_IS('{')) return ONIGERR_UNDEFINED_GROUP_OPTION;
7536 PFETCH(c);
7537 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7538 PFETCH(c);
7539 switch (c) {
7540 case 'g':
7541 if (! ONIGENC_IS_UNICODE_ENCODING(enc))
7542 return ONIGERR_UNDEFINED_GROUP_OPTION;
7543
7544 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 0);
7545 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 1);
7546 break;
7547 #ifdef USE_UNICODE_WORD_BREAK
7548 case 'w':
7549 if (! ONIGENC_IS_UNICODE_ENCODING(enc))
7550 return ONIGERR_UNDEFINED_GROUP_OPTION;
7551
7552 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 0);
7553 OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 1);
7554 break;
7555 #endif
7556 default:
7557 return ONIGERR_UNDEFINED_GROUP_OPTION;
7558 break;
7559 }
7560 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7561 PFETCH(c);
7562 if (c != '}')
7563 return ONIGERR_UNDEFINED_GROUP_OPTION;
7564 break;
7565 } /* case 'y' */
7566
7567 default:
7568 return ONIGERR_UNDEFINED_GROUP_OPTION;
7569 }
7570
7571 if (c == ')') {
7572 *np = node_new_option(option);
7573 CHECK_NULL_RETURN_MEMERR(*np);
7574 *src = p;
7575 return 2; /* option only */
7576 }
7577 else if (c == ':') {
7578 OnigOptionType prev = env->options;
7579
7580 env->options = option;
7581 r = fetch_token(tok, &p, end, env);
7582 if (r < 0) return r;
7583 r = parse_subexp(&target, tok, term, &p, end, env, 0);
7584 env->options = prev;
7585 if (r < 0) {
7586 onig_node_free(target);
7587 return r;
7588 }
7589 *np = node_new_option(option);
7590 CHECK_NULL_RETURN_MEMERR(*np);
7591 NODE_BODY(*np) = target;
7592 *src = p;
7593 return 0;
7594 }
7595
7596 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7597 PFETCH(c);
7598 } /* while (1) */
7599 }
7600 break;
7601
7602 default:
7603 return ONIGERR_UNDEFINED_GROUP_OPTION;
7604 }
7605 }
7606 #ifdef USE_CALLOUT
7607 else if (c == '*' &&
7608 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
7609 PINC;
7610 r = parse_callout_of_name(np, ')', &p, end, env);
7611 if (r != 0) return r;
7612
7613 goto end;
7614 }
7615 #endif
7616 else {
7617 if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
7618 goto group;
7619
7620 *np = node_new_memory(0);
7621 CHECK_NULL_RETURN_MEMERR(*np);
7622 num = scan_env_add_mem_entry(env);
7623 if (num < 0) return num;
7624 BAG_(*np)->m.regnum = num;
7625 }
7626
7627 CHECK_NULL_RETURN_MEMERR(*np);
7628 r = fetch_token(tok, &p, end, env);
7629 if (r < 0) return r;
7630 r = parse_subexp(&target, tok, term, &p, end, env, 0);
7631 if (r < 0) {
7632 onig_node_free(target);
7633 return r;
7634 }
7635
7636 NODE_BODY(*np) = target;
7637
7638 if (NODE_TYPE(*np) == NODE_BAG) {
7639 if (BAG_(*np)->type == BAG_MEMORY) {
7640 /* Don't move this to previous of parse_subexp() */
7641 r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
7642 if (r != 0) return r;
7643 }
7644 }
7645
7646 end:
7647 *src = p;
7648 return 0;
7649 }
7650
7651 static const char* PopularQStr[] = {
7652 "?", "*", "+", "??", "*?", "+?"
7653 };
7654
7655 static const char* ReduceQStr[] = {
7656 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
7657 };
7658
7659 static int
7660 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
7661 {
7662 QuantNode* qn;
7663
7664 qn = QUANT_(qnode);
7665 if (qn->lower == 1 && qn->upper == 1)
7666 return 1;
7667
7668 switch (NODE_TYPE(target)) {
7669 case NODE_STRING:
7670 if (group == 0) {
7671 if (str_node_can_be_split(target, env->enc)) {
7672 Node* n = str_node_split_last_char(target, env->enc);
7673 if (IS_NOT_NULL(n)) {
7674 NODE_BODY(qnode) = n;
7675 return 2;
7676 }
7677 }
7678 }
7679 break;
7680
7681 case NODE_QUANT:
7682 { /* check redundant double repeat. */
7683 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
7684 QuantNode* qnt = QUANT_(target);
7685 int nestq_num = quantifier_type_num(qn);
7686 int targetq_num = quantifier_type_num(qnt);
7687
7688 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
7689 if (targetq_num >= 0 && nestq_num >= 0 &&
7690 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
7691 UChar buf[WARN_BUFSIZE];
7692
7693 switch(ReduceTypeTable[targetq_num][nestq_num]) {
7694 case RQ_ASIS:
7695 break;
7696
7697 case RQ_DEL:
7698 if (onig_verb_warn != onig_null_warn) {
7699 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
7700 env->pattern, env->pattern_end,
7701 (UChar* )"redundant nested repeat operator");
7702 (*onig_verb_warn)((char* )buf);
7703 }
7704 goto warn_exit;
7705 break;
7706
7707 default:
7708 if (onig_verb_warn != onig_null_warn) {
7709 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
7710 env->pattern, env->pattern_end,
7711 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
7712 PopularQStr[targetq_num], PopularQStr[nestq_num],
7713 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
7714 (*onig_verb_warn)((char* )buf);
7715 }
7716 goto warn_exit;
7717 break;
7718 }
7719 }
7720
7721 warn_exit:
7722 #endif
7723 if (targetq_num >= 0 && nestq_num < 0) {
7724 if (targetq_num == 1 || targetq_num == 2) { /* * or + */
7725 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
7726 if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
7727 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
7728 }
7729 }
7730 }
7731 else {
7732 NODE_BODY(qnode) = target;
7733 onig_reduce_nested_quantifier(qnode, target);
7734 goto q_exit;
7735 }
7736 }
7737 break;
7738
7739 default:
7740 break;
7741 }
7742
7743 NODE_BODY(qnode) = target;
7744 q_exit:
7745 return 0;
7746 }
7747
7748
7749 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7750 static int
7751 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
7752 {
7753 BBuf *tbuf;
7754 int r;
7755
7756 if (IS_NCCLASS_NOT(cc)) {
7757 bitset_invert(cc->bs);
7758
7759 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
7760 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
7761 if (r != 0) return r;
7762
7763 bbuf_free(cc->mbuf);
7764 cc->mbuf = tbuf;
7765 }
7766
7767 NCCLASS_CLEAR_NOT(cc);
7768 }
7769
7770 return 0;
7771 }
7772 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
7773
7774 typedef struct {
7775 ScanEnv* env;
7776 CClassNode* cc;
7777 Node* alt_root;
7778 Node** ptail;
7779 } IApplyCaseFoldArg;
7780
7781 static int
7782 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
7783 {
7784 IApplyCaseFoldArg* iarg;
7785 ScanEnv* env;
7786 CClassNode* cc;
7787 BitSetRef bs;
7788
7789 iarg = (IApplyCaseFoldArg* )arg;
7790 env = iarg->env;
7791 cc = iarg->cc;
7792 bs = cc->bs;
7793
7794 if (to_len == 1) {
7795 int is_in = onig_is_code_in_cc(env->enc, from, cc);
7796 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7797 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
7798 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
7799 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
7800 add_code_range(&(cc->mbuf), env, *to, *to);
7801 }
7802 else {
7803 BITSET_SET_BIT(bs, *to);
7804 }
7805 }
7806 #else
7807 if (is_in != 0) {
7808 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
7809 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
7810 add_code_range(&(cc->mbuf), env, *to, *to);
7811 }
7812 else {
7813 if (IS_NCCLASS_NOT(cc)) {
7814 BITSET_CLEAR_BIT(bs, *to);
7815 }
7816 else
7817 BITSET_SET_BIT(bs, *to);
7818 }
7819 }
7820 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
7821 }
7822 else {
7823 int r, i, len;
7824 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
7825 Node *snode = NULL_NODE;
7826
7827 if (onig_is_code_in_cc(env->enc, from, cc)
7828 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
7829 && !IS_NCCLASS_NOT(cc)
7830 #endif
7831 ) {
7832 for (i = 0; i < to_len; i++) {
7833 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
7834 if (i == 0) {
7835 snode = onig_node_new_str(buf, buf + len);
7836 CHECK_NULL_RETURN_MEMERR(snode);
7837
7838 /* char-class expanded multi-char only
7839 compare with string folded at match time. */
7840 NODE_STRING_SET_AMBIG(snode);
7841 }
7842 else {
7843 r = onig_node_str_cat(snode, buf, buf + len);
7844 if (r < 0) {
7845 onig_node_free(snode);
7846 return r;
7847 }
7848 }
7849 }
7850
7851 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
7852 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
7853 iarg->ptail = &(NODE_CDR((*(iarg->ptail))));
7854 }
7855 }
7856
7857 return 0;
7858 }
7859
7860 static int
7861 parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
7862 ScanEnv* env, int group_head)
7863 {
7864 int r, len, group;
7865 Node* qn;
7866 Node** tp;
7867 unsigned int parse_depth;
7868
7869 group = 0;
7870 *np = NULL;
7871 if (tok->type == (enum TokenSyms )term)
7872 goto end_of_token;
7873
7874 parse_depth = env->parse_depth;
7875
7876 switch (tok->type) {
7877 case TK_ALT:
7878 case TK_EOT:
7879 end_of_token:
7880 *np = node_new_empty();
7881 CHECK_NULL_RETURN_MEMERR(*np);
7882 return tok->type;
7883 break;
7884
7885 case TK_SUBEXP_OPEN:
7886 r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
7887 if (r < 0) return r;
7888 if (r == 1) { /* group */
7889 if (group_head == 0)
7890 group = 1;
7891 else {
7892 Node* target = *np;
7893 *np = node_new_group(target);
7894 if (IS_NULL(*np)) {
7895 onig_node_free(target);
7896 return ONIGERR_MEMORY;
7897 }
7898 group = 2;
7899 }
7900 }
7901 else if (r == 2) { /* option only */
7902 Node* target;
7903 OnigOptionType prev = env->options;
7904
7905 env->options = BAG_(*np)->o.options;
7906 r = fetch_token(tok, src, end, env);
7907 if (r < 0) return r;
7908 r = parse_subexp(&target, tok, term, src, end, env, 0);
7909 env->options = prev;
7910 if (r < 0) {
7911 onig_node_free(target);
7912 return r;
7913 }
7914 NODE_BODY(*np) = target;
7915 return tok->type;
7916 }
7917 break;
7918
7919 case TK_SUBEXP_CLOSE:
7920 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
7921 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
7922
7923 if (tok->escaped) goto tk_raw_byte;
7924 else goto tk_byte;
7925 break;
7926
7927 case TK_STRING:
7928 tk_byte:
7929 {
7930 *np = node_new_str(tok->backp, *src);
7931 CHECK_NULL_RETURN_MEMERR(*np);
7932
7933 while (1) {
7934 r = fetch_token(tok, src, end, env);
7935 if (r < 0) return r;
7936 if (r != TK_STRING) break;
7937
7938 r = onig_node_str_cat(*np, tok->backp, *src);
7939 if (r < 0) return r;
7940 }
7941
7942 string_end:
7943 tp = np;
7944 goto repeat;
7945 }
7946 break;
7947
7948 case TK_RAW_BYTE:
7949 tk_raw_byte:
7950 {
7951 *np = node_new_str_raw_char((UChar )tok->u.c);
7952 CHECK_NULL_RETURN_MEMERR(*np);
7953 len = 1;
7954 while (1) {
7955 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
7956 if (len == enclen(env->enc, STR_(*np)->s)) {
7957 r = fetch_token(tok, src, end, env);
7958 goto tk_raw_byte_end;
7959 }
7960 }
7961
7962 r = fetch_token(tok, src, end, env);
7963 if (r < 0) return r;
7964 if (r != TK_RAW_BYTE)
7965 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
7966
7967 r = node_str_cat_char(*np, (UChar )tok->u.c);
7968 if (r < 0) return r;
7969
7970 len++;
7971 }
7972
7973 tk_raw_byte_end:
7974 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
7975 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
7976
7977 NODE_STRING_CLEAR_RAW(*np);
7978 goto string_end;
7979 }
7980 break;
7981
7982 case TK_CODE_POINT:
7983 {
7984 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
7985 len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
7986 if (len < 0) return len;
7987 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
7988 *np = node_new_str_raw(buf, buf + len);
7989 #else
7990 *np = node_new_str(buf, buf + len);
7991 #endif
7992 CHECK_NULL_RETURN_MEMERR(*np);
7993 }
7994 break;
7995
7996 case TK_QUOTE_OPEN:
7997 {
7998 OnigCodePoint end_op[2];
7999 UChar *qstart, *qend, *nextp;
8000
8001 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
8002 end_op[1] = (OnigCodePoint )'E';
8003 qstart = *src;
8004 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
8005 if (IS_NULL(qend)) {
8006 nextp = qend = end;
8007 }
8008 *np = node_new_str(qstart, qend);
8009 CHECK_NULL_RETURN_MEMERR(*np);
8010 *src = nextp;
8011 }
8012 break;
8013
8014 case TK_CHAR_TYPE:
8015 {
8016 switch (tok->u.prop.ctype) {
8017 case ONIGENC_CTYPE_WORD:
8018 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
8019 CHECK_NULL_RETURN_MEMERR(*np);
8020 break;
8021
8022 case ONIGENC_CTYPE_SPACE:
8023 case ONIGENC_CTYPE_DIGIT:
8024 case ONIGENC_CTYPE_XDIGIT:
8025 {
8026 CClassNode* cc;
8027
8028 *np = node_new_cclass();
8029 CHECK_NULL_RETURN_MEMERR(*np);
8030 cc = CCLASS_(*np);
8031 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
8032 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
8033 }
8034 break;
8035
8036 default:
8037 return ONIGERR_PARSER_BUG;
8038 break;
8039 }
8040 }
8041 break;
8042
8043 case TK_CHAR_PROPERTY:
8044 r = parse_char_property(np, tok, src, end, env);
8045 if (r != 0) return r;
8046 break;
8047
8048 case TK_CC_OPEN:
8049 {
8050 CClassNode* cc;
8051
8052 r = parse_char_class(np, tok, src, end, env);
8053 if (r != 0) return r;
8054
8055 cc = CCLASS_(*np);
8056 if (IS_IGNORECASE(env->options)) {
8057 IApplyCaseFoldArg iarg;
8058
8059 iarg.env = env;
8060 iarg.cc = cc;
8061 iarg.alt_root = NULL_NODE;
8062 iarg.ptail = &(iarg.alt_root);
8063
8064 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
8065 i_apply_case_fold, &iarg);
8066 if (r != 0) {
8067 onig_node_free(iarg.alt_root);
8068 return r;
8069 }
8070 if (IS_NOT_NULL(iarg.alt_root)) {
8071 Node* work = onig_node_new_alt(*np, iarg.alt_root);
8072 if (IS_NULL(work)) {
8073 onig_node_free(iarg.alt_root);
8074 return ONIGERR_MEMORY;
8075 }
8076 *np = work;
8077 }
8078 }
8079 }
8080 break;
8081
8082 case TK_ANYCHAR:
8083 *np = node_new_anychar();
8084 CHECK_NULL_RETURN_MEMERR(*np);
8085 break;
8086
8087 case TK_ANYCHAR_ANYTIME:
8088 *np = node_new_anychar();
8089 CHECK_NULL_RETURN_MEMERR(*np);
8090 qn = node_new_quantifier(0, INFINITE_REPEAT, 0);
8091 CHECK_NULL_RETURN_MEMERR(qn);
8092 NODE_BODY(qn) = *np;
8093 *np = qn;
8094 break;
8095
8096 case TK_BACKREF:
8097 len = tok->u.backref.num;
8098 *np = node_new_backref(len,
8099 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
8100 tok->u.backref.by_name,
8101 #ifdef USE_BACKREF_WITH_LEVEL
8102 tok->u.backref.exist_level,
8103 tok->u.backref.level,
8104 #endif
8105 env);
8106 CHECK_NULL_RETURN_MEMERR(*np);
8107 break;
8108
8109 #ifdef USE_CALL
8110 case TK_CALL:
8111 {
8112 int gnum = tok->u.call.gnum;
8113
8114 *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
8115 gnum, tok->u.call.by_number);
8116 CHECK_NULL_RETURN_MEMERR(*np);
8117 env->num_call++;
8118 if (tok->u.call.by_number != 0 && gnum == 0) {
8119 env->has_call_zero = 1;
8120 }
8121 }
8122 break;
8123 #endif
8124
8125 case TK_ANCHOR:
8126 {
8127 int ascii_mode =
8128 IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;
8129 *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);
8130 CHECK_NULL_RETURN_MEMERR(*np);
8131 }
8132 break;
8133
8134 case TK_REPEAT:
8135 case TK_INTERVAL:
8136 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
8137 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
8138 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
8139 else {
8140 *np = node_new_empty();
8141 CHECK_NULL_RETURN_MEMERR(*np);
8142 }
8143 }
8144 else {
8145 goto tk_byte;
8146 }
8147 break;
8148
8149 case TK_KEEP:
8150 r = node_new_keep(np, env);
8151 if (r < 0) return r;
8152 break;
8153
8154 case TK_GENERAL_NEWLINE:
8155 r = node_new_general_newline(np, env);
8156 if (r < 0) return r;
8157 break;
8158
8159 case TK_NO_NEWLINE:
8160 r = node_new_no_newline(np, env);
8161 if (r < 0) return r;
8162 break;
8163
8164 case TK_TRUE_ANYCHAR:
8165 r = node_new_true_anychar(np, env);
8166 if (r < 0) return r;
8167 break;
8168
8169 case TK_TEXT_SEGMENT:
8170 r = make_text_segment(np, env);
8171 if (r < 0) return r;
8172 break;
8173
8174 default:
8175 return ONIGERR_PARSER_BUG;
8176 break;
8177 }
8178
8179 {
8180 tp = np;
8181
8182 re_entry:
8183 r = fetch_token(tok, src, end, env);
8184 if (r < 0) return r;
8185
8186 repeat:
8187 if (r == TK_REPEAT || r == TK_INTERVAL) {
8188 Node* target;
8189
8190 if (is_invalid_quantifier_target(*tp))
8191 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
8192
8193 parse_depth++;
8194 if (parse_depth > ParseDepthLimit)
8195 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
8196
8197 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
8198 r == TK_INTERVAL);
8199 CHECK_NULL_RETURN_MEMERR(qn);
8200 QUANT_(qn)->greedy = tok->u.repeat.greedy;
8201 if (group == 2) {
8202 target = node_drop_group(*tp);
8203 *tp = NULL_NODE;
8204 }
8205 else {
8206 target = *tp;
8207 }
8208 r = set_quantifier(qn, target, group, env);
8209 if (r < 0) {
8210 onig_node_free(qn);
8211 return r;
8212 }
8213
8214 if (tok->u.repeat.possessive != 0) {
8215 Node* en;
8216 en = node_new_bag(BAG_STOP_BACKTRACK);
8217 if (IS_NULL(en)) {
8218 onig_node_free(qn);
8219 return ONIGERR_MEMORY;
8220 }
8221 NODE_BODY(en) = qn;
8222 qn = en;
8223 }
8224
8225 if (r == 0) {
8226 *tp = qn;
8227 }
8228 else if (r == 1) { /* x{1,1} ==> x */
8229 onig_node_free(qn);
8230 *tp = target;
8231 }
8232 else if (r == 2) { /* split case: /abc+/ */
8233 Node *tmp;
8234
8235 *tp = node_new_list(*tp, NULL);
8236 if (IS_NULL(*tp)) {
8237 onig_node_free(qn);
8238 return ONIGERR_MEMORY;
8239 }
8240 tmp = NODE_CDR(*tp) = node_new_list(qn, NULL);
8241 if (IS_NULL(tmp)) {
8242 onig_node_free(qn);
8243 return ONIGERR_MEMORY;
8244 }
8245 tp = &(NODE_CAR(tmp));
8246 }
8247 group = 0;
8248 goto re_entry;
8249 }
8250 }
8251
8252 return r;
8253 }
8254
8255 static int
8256 parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
8257 ScanEnv* env, int group_head)
8258 {
8259 int r;
8260 Node *node, **headp;
8261
8262 *top = NULL;
8263 r = parse_exp(&node, tok, term, src, end, env, group_head);
8264 if (r < 0) {
8265 onig_node_free(node);
8266 return r;
8267 }
8268
8269 if (r == TK_EOT || r == term || r == TK_ALT) {
8270 *top = node;
8271 }
8272 else {
8273 *top = node_new_list(node, NULL);
8274 if (IS_NULL(*top)) {
8275 onig_node_free(node);
8276 return ONIGERR_MEMORY;
8277 }
8278
8279 headp = &(NODE_CDR(*top));
8280 while (r != TK_EOT && r != term && r != TK_ALT) {
8281 r = parse_exp(&node, tok, term, src, end, env, 0);
8282 if (r < 0) {
8283 onig_node_free(node);
8284 return r;
8285 }
8286
8287 if (NODE_TYPE(node) == NODE_LIST) {
8288 *headp = node;
8289 while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);
8290 headp = &(NODE_CDR(node));
8291 }
8292 else {
8293 *headp = node_new_list(node, NULL);
8294 headp = &(NODE_CDR(*headp));
8295 }
8296 }
8297 }
8298
8299 return r;
8300 }
8301
8302 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
8303 static int
8304 parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end,
8305 ScanEnv* env, int group_head)
8306 {
8307 int r;
8308 Node *node, **headp;
8309
8310 *top = NULL;
8311 env->parse_depth++;
8312 if (env->parse_depth > ParseDepthLimit)
8313 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
8314
8315 r = parse_branch(&node, tok, term, src, end, env, group_head);
8316 if (r < 0) {
8317 onig_node_free(node);
8318 return r;
8319 }
8320
8321 if (r == term) {
8322 *top = node;
8323 }
8324 else if (r == TK_ALT) {
8325 *top = onig_node_new_alt(node, NULL);
8326 if (IS_NULL(*top)) {
8327 onig_node_free(node);
8328 return ONIGERR_MEMORY;
8329 }
8330
8331 headp = &(NODE_CDR(*top));
8332 while (r == TK_ALT) {
8333 r = fetch_token(tok, src, end, env);
8334 if (r < 0) return r;
8335 r = parse_branch(&node, tok, term, src, end, env, 0);
8336 if (r < 0) {
8337 onig_node_free(node);
8338 return r;
8339 }
8340 *headp = onig_node_new_alt(node, NULL);
8341 if (IS_NULL(*headp)) {
8342 onig_node_free(node);
8343 onig_node_free(*top);
8344 return ONIGERR_MEMORY;
8345 }
8346
8347 headp = &(NODE_CDR(*headp));
8348 }
8349
8350 if (tok->type != (enum TokenSyms )term)
8351 goto err;
8352 }
8353 else {
8354 onig_node_free(node);
8355 err:
8356 if (term == TK_SUBEXP_CLOSE)
8357 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
8358 else
8359 return ONIGERR_PARSER_BUG;
8360 }
8361
8362 env->parse_depth--;
8363 return r;
8364 }
8365
8366 static int
8367 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
8368 {
8369 int r;
8370 PToken tok;
8371
8372 r = fetch_token(&tok, src, end, env);
8373 if (r < 0) return r;
8374 r = parse_subexp(top, &tok, TK_EOT, src, end, env, 0);
8375 if (r < 0) return r;
8376
8377 return 0;
8378 }
8379
8380 #ifdef USE_CALL
8381 static int
8382 make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)
8383 {
8384 int r;
8385
8386 Node* x = node_new_memory(0 /* 0: is not named */);
8387 CHECK_NULL_RETURN_MEMERR(x);
8388
8389 NODE_BODY(x) = node;
8390 BAG_(x)->m.regnum = 0;
8391 r = scan_env_set_mem_node(env, 0, x);
8392 if (r != 0) {
8393 onig_node_free(x);
8394 return r;
8395 }
8396
8397 *rnode = x;
8398 return 0;
8399 }
8400 #endif
8401
8402 extern int
8403 onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
8404 regex_t* reg, ScanEnv* env)
8405 {
8406 int r;
8407 UChar* p;
8408 #ifdef USE_CALLOUT
8409 RegexExt* ext;
8410 #endif
8411
8412 names_clear(reg);
8413
8414 scan_env_clear(env);
8415 env->options = reg->options;
8416 env->case_fold_flag = reg->case_fold_flag;
8417 env->enc = reg->enc;
8418 env->syntax = reg->syntax;
8419 env->pattern = (UChar* )pattern;
8420 env->pattern_end = (UChar* )end;
8421 env->reg = reg;
8422
8423 *root = NULL;
8424
8425 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
8426 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
8427
8428 p = (UChar* )pattern;
8429 r = parse_regexp(root, &p, (UChar* )end, env);
8430
8431 #ifdef USE_CALL
8432 if (r != 0) return r;
8433
8434 if (env->has_call_zero != 0) {
8435 Node* zero_node;
8436 r = make_call_zero_body(*root, env, &zero_node);
8437 if (r != 0) return r;
8438
8439 *root = zero_node;
8440 }
8441 #endif
8442
8443 reg->num_mem = env->num_mem;
8444
8445 #ifdef USE_CALLOUT
8446 ext = reg->extp;
8447 if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
8448 r = setup_ext_callout_list_values(reg);
8449 }
8450 #endif
8451
8452 return r;
8453 }
8454
8455 extern void
8456 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
8457 UChar* arg, UChar* arg_end)
8458 {
8459 env->error = arg;
8460 env->error_end = arg_end;
8461 }