]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg: Regular expression protocol
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "regparse.h"
33
34 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35
36 extern OnigCaseFoldType
37 onig_get_default_case_fold_flag(void)
38 {
39 return OnigDefaultCaseFoldFlag;
40 }
41
42 extern int
43 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 {
45 OnigDefaultCaseFoldFlag = case_fold_flag;
46 return 0;
47 }
48
49
50 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
51 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
52 #endif
53
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57 int len = (int)(end - s);
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67 }
68
69 static void
70 swap_node(Node* a, Node* b)
71 {
72 Node c;
73 c = *a; *a = *b; *b = c;
74
75 if (NTYPE(a) == NT_STR) {
76 StrNode* sn = NSTR(a);
77 if (sn->capa == 0) {
78 int len = (int)(sn->end - sn->s);
79 sn->s = sn->buf;
80 sn->end = sn->s + len;
81 }
82 }
83
84 if (NTYPE(b) == NT_STR) {
85 StrNode* sn = NSTR(b);
86 if (sn->capa == 0) {
87 int len = (int)(sn->end - sn->s);
88 sn->s = sn->buf;
89 sn->end = sn->s + len;
90 }
91 }
92 }
93
94 static OnigDistance
95 distance_add(OnigDistance d1, OnigDistance d2)
96 {
97 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
98 return ONIG_INFINITE_DISTANCE;
99 else {
100 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
101 else return ONIG_INFINITE_DISTANCE;
102 }
103 }
104
105 static OnigDistance
106 distance_multiply(OnigDistance d, int m)
107 {
108 if (m == 0) return 0;
109
110 if (d < ONIG_INFINITE_DISTANCE / m)
111 return d * m;
112 else
113 return ONIG_INFINITE_DISTANCE;
114 }
115
116 static int
117 bitset_is_empty(BitSetRef bs)
118 {
119 int i;
120 for (i = 0; i < (int )BITSET_SIZE; i++) {
121 if (bs[i] != 0) return 0;
122 }
123 return 1;
124 }
125
126 #ifdef ONIG_DEBUG
127 static int
128 bitset_on_num(BitSetRef bs)
129 {
130 int i, n;
131
132 n = 0;
133 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
134 if (BITSET_AT(bs, i)) n++;
135 }
136 return n;
137 }
138 #endif
139
140 extern int
141 onig_bbuf_init(BBuf* buf, int size)
142 {
143 if (size <= 0) {
144 size = 0;
145 buf->p = NULL;
146 }
147 else {
148 buf->p = (UChar* )xmalloc(size);
149 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
150 }
151
152 buf->alloc = size;
153 buf->used = 0;
154 return 0;
155 }
156
157
158 #ifdef USE_SUBEXP_CALL
159
160 static int
161 unset_addr_list_init(UnsetAddrList* uslist, int size)
162 {
163 UnsetAddr* p;
164
165 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
166 CHECK_NULL_RETURN_MEMERR(p);
167 uslist->num = 0;
168 uslist->alloc = size;
169 uslist->us = p;
170 return 0;
171 }
172
173 static void
174 unset_addr_list_end(UnsetAddrList* uslist)
175 {
176 if (IS_NOT_NULL(uslist->us))
177 xfree(uslist->us);
178 }
179
180 static int
181 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
182 {
183 UnsetAddr* p;
184 int size;
185
186 if (uslist->num >= uslist->alloc) {
187 size = uslist->alloc * 2;
188 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc);
189 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
191 uslist->us = p;
192 }
193
194 uslist->us[uslist->num].offset = offset;
195 uslist->us[uslist->num].target = node;
196 uslist->num++;
197 return 0;
198 }
199 #endif /* USE_SUBEXP_CALL */
200
201
202 static int
203 add_opcode(regex_t* reg, int opcode)
204 {
205 BBUF_ADD1(reg, ((unsigned char)opcode));
206 return 0;
207 }
208
209 #ifdef USE_COMBINATION_EXPLOSION_CHECK
210 static int
211 add_state_check_num(regex_t* reg, int num)
212 {
213 StateCheckNumType n = (StateCheckNumType )num;
214
215 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
216 return 0;
217 }
218 #endif
219
220 static int
221 add_rel_addr(regex_t* reg, int addr)
222 {
223 RelAddrType ra = (RelAddrType )addr;
224
225 BBUF_ADD(reg, &ra, SIZE_RELADDR);
226 return 0;
227 }
228
229 static int
230 add_abs_addr(regex_t* reg, int addr)
231 {
232 AbsAddrType ra = (AbsAddrType )addr;
233
234 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
235 return 0;
236 }
237
238 static int
239 add_length(regex_t* reg, int len)
240 {
241 LengthType l = (LengthType )len;
242
243 BBUF_ADD(reg, &l, SIZE_LENGTH);
244 return 0;
245 }
246
247 static int
248 add_mem_num(regex_t* reg, int num)
249 {
250 MemNumType n = (MemNumType )num;
251
252 BBUF_ADD(reg, &n, SIZE_MEMNUM);
253 return 0;
254 }
255
256 static int
257 add_pointer(regex_t* reg, void* addr)
258 {
259 PointerType ptr = (PointerType )addr;
260
261 BBUF_ADD(reg, &ptr, SIZE_POINTER);
262 return 0;
263 }
264
265 static int
266 add_option(regex_t* reg, OnigOptionType option)
267 {
268 BBUF_ADD(reg, &option, SIZE_OPTION);
269 return 0;
270 }
271
272 static int
273 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
274 {
275 int r;
276
277 r = add_opcode(reg, opcode);
278 if (r) return r;
279 r = add_rel_addr(reg, addr);
280 return r;
281 }
282
283 static int
284 add_bytes(regex_t* reg, UChar* bytes, int len)
285 {
286 BBUF_ADD(reg, bytes, len);
287 return 0;
288 }
289
290 static int
291 add_bitset(regex_t* reg, BitSetRef bs)
292 {
293 BBUF_ADD(reg, bs, SIZE_BITSET);
294 return 0;
295 }
296
297 static int
298 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
299 {
300 int r;
301
302 r = add_opcode(reg, opcode);
303 if (r) return r;
304 r = add_option(reg, option);
305 return r;
306 }
307
308 static int compile_length_tree(Node* node, regex_t* reg);
309 static int compile_tree(Node* node, regex_t* reg);
310
311
312 #define IS_NEED_STR_LEN_OP_EXACT(op) \
313 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
314 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
315
316 static int
317 select_str_opcode(int mb_len, int str_len, int ignore_case)
318 {
319 int op;
320
321 if (ignore_case) {
322 switch (str_len) {
323 case 1: op = OP_EXACT1_IC; break;
324 default: op = OP_EXACTN_IC; break;
325 }
326 }
327 else {
328 switch (mb_len) {
329 case 1:
330 switch (str_len) {
331 case 1: op = OP_EXACT1; break;
332 case 2: op = OP_EXACT2; break;
333 case 3: op = OP_EXACT3; break;
334 case 4: op = OP_EXACT4; break;
335 case 5: op = OP_EXACT5; break;
336 default: op = OP_EXACTN; break;
337 }
338 break;
339
340 case 2:
341 switch (str_len) {
342 case 1: op = OP_EXACTMB2N1; break;
343 case 2: op = OP_EXACTMB2N2; break;
344 case 3: op = OP_EXACTMB2N3; break;
345 default: op = OP_EXACTMB2N; break;
346 }
347 break;
348
349 case 3:
350 op = OP_EXACTMB3N;
351 break;
352
353 default:
354 op = OP_EXACTMBN;
355 break;
356 }
357 }
358 return op;
359 }
360
361 static int
362 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
363 {
364 int r;
365 int saved_num_null_check = reg->num_null_check;
366
367 if (empty_info != 0) {
368 r = add_opcode(reg, OP_NULL_CHECK_START);
369 if (r) return r;
370 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
371 if (r) return r;
372 reg->num_null_check++;
373 }
374
375 r = compile_tree(node, reg);
376 if (r) return r;
377
378 if (empty_info != 0) {
379 if (empty_info == NQ_TARGET_IS_EMPTY)
380 r = add_opcode(reg, OP_NULL_CHECK_END);
381 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
382 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
383 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
384 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
385
386 if (r) return r;
387 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
388 }
389 return r;
390 }
391
392 #ifdef USE_SUBEXP_CALL
393 static int
394 compile_call(CallNode* node, regex_t* reg)
395 {
396 int r;
397
398 r = add_opcode(reg, OP_CALL);
399 if (r) return r;
400 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
401 node->target);
402 if (r) return r;
403 r = add_abs_addr(reg, 0 /*dummy addr.*/);
404 return r;
405 }
406 #endif
407
408 static int
409 compile_tree_n_times(Node* node, int n, regex_t* reg)
410 {
411 int i, r;
412
413 for (i = 0; i < n; i++) {
414 r = compile_tree(node, reg);
415 if (r) return r;
416 }
417 return 0;
418 }
419
420 static int
421 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
422 regex_t* reg ARG_UNUSED, int ignore_case)
423 {
424 int len;
425 int op = select_str_opcode(mb_len, str_len, ignore_case);
426
427 len = SIZE_OPCODE;
428
429 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
430 if (IS_NEED_STR_LEN_OP_EXACT(op))
431 len += SIZE_LENGTH;
432
433 len += mb_len * str_len;
434 return len;
435 }
436
437 static int
438 add_compile_string(UChar* s, int mb_len, int str_len,
439 regex_t* reg, int ignore_case)
440 {
441 int op = select_str_opcode(mb_len, str_len, ignore_case);
442 add_opcode(reg, op);
443
444 if (op == OP_EXACTMBN)
445 add_length(reg, mb_len);
446
447 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
448 if (op == OP_EXACTN_IC)
449 add_length(reg, mb_len * str_len);
450 else
451 add_length(reg, str_len);
452 }
453
454 add_bytes(reg, s, mb_len * str_len);
455 return 0;
456 }
457
458
459 static int
460 compile_length_string_node(Node* node, regex_t* reg)
461 {
462 int rlen, r, len, prev_len, slen, ambig;
463 OnigEncoding enc = reg->enc;
464 UChar *p, *prev;
465 StrNode* sn;
466
467 sn = NSTR(node);
468 if (sn->end <= sn->s)
469 return 0;
470
471 ambig = NSTRING_IS_AMBIG(node);
472
473 p = prev = sn->s;
474 prev_len = enclen(enc, p);
475 p += prev_len;
476 slen = 1;
477 rlen = 0;
478
479 for (; p < sn->end; ) {
480 len = enclen(enc, p);
481 if (len == prev_len) {
482 slen++;
483 }
484 else {
485 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
486 rlen += r;
487 prev = p;
488 slen = 1;
489 prev_len = len;
490 }
491 p += len;
492 }
493 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
494 rlen += r;
495 return rlen;
496 }
497
498 static int
499 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
500 {
501 if (sn->end <= sn->s)
502 return 0;
503
504 return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
505 }
506
507 static int
508 compile_string_node(Node* node, regex_t* reg)
509 {
510 int r, len, prev_len, slen, ambig;
511 OnigEncoding enc = reg->enc;
512 UChar *p, *prev, *end;
513 StrNode* sn;
514
515 sn = NSTR(node);
516 if (sn->end <= sn->s)
517 return 0;
518
519 end = sn->end;
520 ambig = NSTRING_IS_AMBIG(node);
521
522 p = prev = sn->s;
523 prev_len = enclen(enc, p);
524 p += prev_len;
525 slen = 1;
526
527 for (; p < end; ) {
528 len = enclen(enc, p);
529 if (len == prev_len) {
530 slen++;
531 }
532 else {
533 r = add_compile_string(prev, prev_len, slen, reg, ambig);
534 if (r) return r;
535
536 prev = p;
537 slen = 1;
538 prev_len = len;
539 }
540
541 p += len;
542 }
543 return add_compile_string(prev, prev_len, slen, reg, ambig);
544 }
545
546 static int
547 compile_string_raw_node(StrNode* sn, regex_t* reg)
548 {
549 if (sn->end <= sn->s)
550 return 0;
551
552 return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
553 }
554
555 static int
556 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
557 {
558 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
559 add_length(reg, mbuf->used);
560 return add_bytes(reg, mbuf->p, mbuf->used);
561 #else
562 int r, pad_size;
563 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
564
565 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
566 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
567 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
568
569 r = add_bytes(reg, mbuf->p, mbuf->used);
570
571 /* padding for return value from compile_length_cclass_node() to be fix. */
572 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
573 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
574 return r;
575 #endif
576 }
577
578 static int
579 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
580 {
581 int len;
582
583 if (IS_NCCLASS_SHARE(cc)) {
584 len = SIZE_OPCODE + SIZE_POINTER;
585 return len;
586 }
587
588 if (IS_NULL(cc->mbuf)) {
589 len = SIZE_OPCODE + SIZE_BITSET;
590 }
591 else {
592 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
593 len = SIZE_OPCODE;
594 }
595 else {
596 len = SIZE_OPCODE + SIZE_BITSET;
597 }
598 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
599 len += SIZE_LENGTH + cc->mbuf->used;
600 #else
601 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
602 #endif
603 }
604
605 return len;
606 }
607
608 static int
609 compile_cclass_node(CClassNode* cc, regex_t* reg)
610 {
611 int r;
612
613 if (IS_NCCLASS_SHARE(cc)) {
614 add_opcode(reg, OP_CCLASS_NODE);
615 r = add_pointer(reg, cc);
616 return r;
617 }
618
619 if (IS_NULL(cc->mbuf)) {
620 if (IS_NCCLASS_NOT(cc))
621 add_opcode(reg, OP_CCLASS_NOT);
622 else
623 add_opcode(reg, OP_CCLASS);
624
625 r = add_bitset(reg, cc->bs);
626 }
627 else {
628 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
629 if (IS_NCCLASS_NOT(cc))
630 add_opcode(reg, OP_CCLASS_MB_NOT);
631 else
632 add_opcode(reg, OP_CCLASS_MB);
633
634 r = add_multi_byte_cclass(cc->mbuf, reg);
635 }
636 else {
637 if (IS_NCCLASS_NOT(cc))
638 add_opcode(reg, OP_CCLASS_MIX_NOT);
639 else
640 add_opcode(reg, OP_CCLASS_MIX);
641
642 r = add_bitset(reg, cc->bs);
643 if (r) return r;
644 r = add_multi_byte_cclass(cc->mbuf, reg);
645 }
646 }
647
648 return r;
649 }
650
651 static int
652 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
653 {
654 #define REPEAT_RANGE_ALLOC 4
655
656 OnigRepeatRange* p;
657
658 if (reg->repeat_range_alloc == 0) {
659 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
660 CHECK_NULL_RETURN_MEMERR(p);
661 reg->repeat_range = p;
662 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
663 }
664 else if (reg->repeat_range_alloc <= id) {
665 int n;
666 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
667 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
668 sizeof(OnigRepeatRange) * n,
669 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
670 CHECK_NULL_RETURN_MEMERR(p);
671 reg->repeat_range = p;
672 reg->repeat_range_alloc = n;
673 }
674 else {
675 p = reg->repeat_range;
676 }
677
678 p[id].lower = lower;
679 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
680 return 0;
681 }
682
683 static int
684 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
685 regex_t* reg)
686 {
687 int r;
688 int num_repeat = reg->num_repeat;
689
690 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
691 if (r) return r;
692 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
693 reg->num_repeat++;
694 if (r) return r;
695 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
696 if (r) return r;
697
698 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
699 if (r) return r;
700
701 r = compile_tree_empty_check(qn->target, reg, empty_info);
702 if (r) return r;
703
704 if (
705 #ifdef USE_SUBEXP_CALL
706 reg->num_call > 0 ||
707 #endif
708 IS_QUANTIFIER_IN_REPEAT(qn)) {
709 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
710 }
711 else {
712 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
713 }
714 if (r) return r;
715 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
716 return r;
717 }
718
719 static int
720 is_anychar_star_quantifier(QtfrNode* qn)
721 {
722 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
723 NTYPE(qn->target) == NT_CANY)
724 return 1;
725 else
726 return 0;
727 }
728
729 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
730 #define CKN_ON (ckn > 0)
731
732 #ifdef USE_COMBINATION_EXPLOSION_CHECK
733
734 static int
735 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
736 {
737 int len, mod_tlen, cklen;
738 int ckn;
739 int infinite = IS_REPEAT_INFINITE(qn->upper);
740 int empty_info = qn->target_empty_info;
741 int tlen = compile_length_tree(qn->target, reg);
742
743 if (tlen < 0) return tlen;
744
745 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
746
747 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
748
749 /* anychar repeat */
750 if (NTYPE(qn->target) == NT_CANY) {
751 if (qn->greedy && infinite) {
752 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
753 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
754 else
755 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
756 }
757 }
758
759 if (empty_info != 0)
760 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
761 else
762 mod_tlen = tlen;
763
764 if (infinite && qn->lower <= 1) {
765 if (qn->greedy) {
766 if (qn->lower == 1)
767 len = SIZE_OP_JUMP;
768 else
769 len = 0;
770
771 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
772 }
773 else {
774 if (qn->lower == 0)
775 len = SIZE_OP_JUMP;
776 else
777 len = 0;
778
779 len += mod_tlen + SIZE_OP_PUSH + cklen;
780 }
781 }
782 else if (qn->upper == 0) {
783 if (qn->is_refered != 0) /* /(?<n>..){0}/ */
784 len = SIZE_OP_JUMP + tlen;
785 else
786 len = 0;
787 }
788 else if (qn->upper == 1 && qn->greedy) {
789 if (qn->lower == 0) {
790 if (CKN_ON) {
791 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
792 }
793 else {
794 len = SIZE_OP_PUSH + tlen;
795 }
796 }
797 else {
798 len = tlen;
799 }
800 }
801 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
802 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
803 }
804 else {
805 len = SIZE_OP_REPEAT_INC
806 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
807 if (CKN_ON)
808 len += SIZE_OP_STATE_CHECK;
809 }
810
811 return len;
812 }
813
814 static int
815 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
816 {
817 int r, mod_tlen;
818 int ckn;
819 int infinite = IS_REPEAT_INFINITE(qn->upper);
820 int empty_info = qn->target_empty_info;
821 int tlen = compile_length_tree(qn->target, reg);
822
823 if (tlen < 0) return tlen;
824
825 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
826
827 if (is_anychar_star_quantifier(qn)) {
828 r = compile_tree_n_times(qn->target, qn->lower, reg);
829 if (r) return r;
830 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
831 if (IS_MULTILINE(reg->options))
832 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
833 else
834 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
835 if (r) return r;
836 if (CKN_ON) {
837 r = add_state_check_num(reg, ckn);
838 if (r) return r;
839 }
840
841 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
842 }
843 else {
844 if (IS_MULTILINE(reg->options)) {
845 r = add_opcode(reg, (CKN_ON ?
846 OP_STATE_CHECK_ANYCHAR_ML_STAR
847 : OP_ANYCHAR_ML_STAR));
848 }
849 else {
850 r = add_opcode(reg, (CKN_ON ?
851 OP_STATE_CHECK_ANYCHAR_STAR
852 : OP_ANYCHAR_STAR));
853 }
854 if (r) return r;
855 if (CKN_ON)
856 r = add_state_check_num(reg, ckn);
857
858 return r;
859 }
860 }
861
862 if (empty_info != 0)
863 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
864 else
865 mod_tlen = tlen;
866
867 if (infinite && qn->lower <= 1) {
868 if (qn->greedy) {
869 if (qn->lower == 1) {
870 r = add_opcode_rel_addr(reg, OP_JUMP,
871 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
872 if (r) return r;
873 }
874
875 if (CKN_ON) {
876 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
877 if (r) return r;
878 r = add_state_check_num(reg, ckn);
879 if (r) return r;
880 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
881 }
882 else {
883 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
884 }
885 if (r) return r;
886 r = compile_tree_empty_check(qn->target, reg, empty_info);
887 if (r) return r;
888 r = add_opcode_rel_addr(reg, OP_JUMP,
889 -(mod_tlen + (int )SIZE_OP_JUMP
890 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
891 }
892 else {
893 if (qn->lower == 0) {
894 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
895 if (r) return r;
896 }
897 r = compile_tree_empty_check(qn->target, reg, empty_info);
898 if (r) return r;
899 if (CKN_ON) {
900 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
901 if (r) return r;
902 r = add_state_check_num(reg, ckn);
903 if (r) return r;
904 r = add_rel_addr(reg,
905 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
906 }
907 else
908 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
909 }
910 }
911 else if (qn->upper == 0) {
912 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
913 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
914 if (r) return r;
915 r = compile_tree(qn->target, reg);
916 }
917 else
918 r = 0;
919 }
920 else if (qn->upper == 1 && qn->greedy) {
921 if (qn->lower == 0) {
922 if (CKN_ON) {
923 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
924 if (r) return r;
925 r = add_state_check_num(reg, ckn);
926 if (r) return r;
927 r = add_rel_addr(reg, tlen);
928 }
929 else {
930 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
931 }
932 if (r) return r;
933 }
934
935 r = compile_tree(qn->target, reg);
936 }
937 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
938 if (CKN_ON) {
939 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
940 if (r) return r;
941 r = add_state_check_num(reg, ckn);
942 if (r) return r;
943 r = add_rel_addr(reg, SIZE_OP_JUMP);
944 }
945 else {
946 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
947 }
948
949 if (r) return r;
950 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
951 if (r) return r;
952 r = compile_tree(qn->target, reg);
953 }
954 else {
955 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
956 if (CKN_ON) {
957 if (r) return r;
958 r = add_opcode(reg, OP_STATE_CHECK);
959 if (r) return r;
960 r = add_state_check_num(reg, ckn);
961 }
962 }
963 return r;
964 }
965
966 #else /* USE_COMBINATION_EXPLOSION_CHECK */
967
968 static int
969 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
970 {
971 int len, mod_tlen;
972 int infinite = IS_REPEAT_INFINITE(qn->upper);
973 int empty_info = qn->target_empty_info;
974 int tlen = compile_length_tree(qn->target, reg);
975
976 if (tlen < 0) return tlen;
977
978 /* anychar repeat */
979 if (NTYPE(qn->target) == NT_CANY) {
980 if (qn->greedy && infinite) {
981 if (IS_NOT_NULL(qn->next_head_exact))
982 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
983 else
984 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
985 }
986 }
987
988 if (empty_info != 0)
989 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
990 else
991 mod_tlen = tlen;
992
993 if (infinite &&
994 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
995 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
996 len = SIZE_OP_JUMP;
997 }
998 else {
999 len = tlen * qn->lower;
1000 }
1001
1002 if (qn->greedy) {
1003 if (IS_NOT_NULL(qn->head_exact))
1004 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1005 else if (IS_NOT_NULL(qn->next_head_exact))
1006 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1007 else
1008 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1009 }
1010 else
1011 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1012 }
1013 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1014 len = SIZE_OP_JUMP + tlen;
1015 }
1016 else if (!infinite && qn->greedy &&
1017 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1018 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1019 len = tlen * qn->lower;
1020 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1021 }
1022 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1023 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1024 }
1025 else {
1026 len = SIZE_OP_REPEAT_INC
1027 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1028 }
1029
1030 return len;
1031 }
1032
1033 static int
1034 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1035 {
1036 int i, r, mod_tlen;
1037 int infinite = IS_REPEAT_INFINITE(qn->upper);
1038 int empty_info = qn->target_empty_info;
1039 int tlen = compile_length_tree(qn->target, reg);
1040
1041 if (tlen < 0) return tlen;
1042
1043 if (is_anychar_star_quantifier(qn)) {
1044 r = compile_tree_n_times(qn->target, qn->lower, reg);
1045 if (r) return r;
1046 if (IS_NOT_NULL(qn->next_head_exact)) {
1047 if (IS_MULTILINE(reg->options))
1048 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1049 else
1050 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1051 if (r) return r;
1052 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1053 }
1054 else {
1055 if (IS_MULTILINE(reg->options))
1056 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1057 else
1058 return add_opcode(reg, OP_ANYCHAR_STAR);
1059 }
1060 }
1061
1062 if (empty_info != 0)
1063 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1064 else
1065 mod_tlen = tlen;
1066
1067 if (infinite &&
1068 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1069 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1070 if (qn->greedy) {
1071 if (IS_NOT_NULL(qn->head_exact))
1072 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1073 else if (IS_NOT_NULL(qn->next_head_exact))
1074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1075 else
1076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1077 }
1078 else {
1079 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1080 }
1081 if (r) return r;
1082 }
1083 else {
1084 r = compile_tree_n_times(qn->target, qn->lower, reg);
1085 if (r) return r;
1086 }
1087
1088 if (qn->greedy) {
1089 if (IS_NOT_NULL(qn->head_exact)) {
1090 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1091 mod_tlen + SIZE_OP_JUMP);
1092 if (r) return r;
1093 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1094 r = compile_tree_empty_check(qn->target, reg, empty_info);
1095 if (r) return r;
1096 r = add_opcode_rel_addr(reg, OP_JUMP,
1097 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1098 }
1099 else if (IS_NOT_NULL(qn->next_head_exact)) {
1100 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1101 mod_tlen + SIZE_OP_JUMP);
1102 if (r) return r;
1103 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1104 r = compile_tree_empty_check(qn->target, reg, empty_info);
1105 if (r) return r;
1106 r = add_opcode_rel_addr(reg, OP_JUMP,
1107 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1108 }
1109 else {
1110 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1111 if (r) return r;
1112 r = compile_tree_empty_check(qn->target, reg, empty_info);
1113 if (r) return r;
1114 r = add_opcode_rel_addr(reg, OP_JUMP,
1115 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1116 }
1117 }
1118 else {
1119 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1120 if (r) return r;
1121 r = compile_tree_empty_check(qn->target, reg, empty_info);
1122 if (r) return r;
1123 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1124 }
1125 }
1126 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1127 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1128 if (r) return r;
1129 r = compile_tree(qn->target, reg);
1130 }
1131 else if (!infinite && qn->greedy &&
1132 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1133 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1134 int n = qn->upper - qn->lower;
1135
1136 r = compile_tree_n_times(qn->target, qn->lower, reg);
1137 if (r) return r;
1138
1139 for (i = 0; i < n; i++) {
1140 r = add_opcode_rel_addr(reg, OP_PUSH,
1141 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1142 if (r) return r;
1143 r = compile_tree(qn->target, reg);
1144 if (r) return r;
1145 }
1146 }
1147 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1148 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1149 if (r) return r;
1150 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1151 if (r) return r;
1152 r = compile_tree(qn->target, reg);
1153 }
1154 else {
1155 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1156 }
1157 return r;
1158 }
1159 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1160
1161 static int
1162 compile_length_option_node(EncloseNode* node, regex_t* reg)
1163 {
1164 int tlen;
1165 OnigOptionType prev = reg->options;
1166
1167 reg->options = node->option;
1168 tlen = compile_length_tree(node->target, reg);
1169 reg->options = prev;
1170
1171 if (tlen < 0) return tlen;
1172
1173 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1174 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1175 + tlen + SIZE_OP_SET_OPTION;
1176 }
1177 else
1178 return tlen;
1179 }
1180
1181 static int
1182 compile_option_node(EncloseNode* node, regex_t* reg)
1183 {
1184 int r;
1185 OnigOptionType prev = reg->options;
1186
1187 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1188 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1189 if (r) return r;
1190 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1191 if (r) return r;
1192 r = add_opcode(reg, OP_FAIL);
1193 if (r) return r;
1194 }
1195
1196 reg->options = node->option;
1197 r = compile_tree(node->target, reg);
1198 reg->options = prev;
1199
1200 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1201 if (r) return r;
1202 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1203 }
1204 return r;
1205 }
1206
1207 static int
1208 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1209 {
1210 int len;
1211 int tlen;
1212
1213 if (node->type == ENCLOSE_OPTION)
1214 return compile_length_option_node(node, reg);
1215
1216 if (node->target) {
1217 tlen = compile_length_tree(node->target, reg);
1218 if (tlen < 0) return tlen;
1219 }
1220 else
1221 tlen = 0;
1222
1223 switch (node->type) {
1224 case ENCLOSE_MEMORY:
1225 #ifdef USE_SUBEXP_CALL
1226 if (IS_ENCLOSE_CALLED(node)) {
1227 len = SIZE_OP_MEMORY_START_PUSH + tlen
1228 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1229 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1230 len += (IS_ENCLOSE_RECURSION(node)
1231 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1232 else
1233 len += (IS_ENCLOSE_RECURSION(node)
1234 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1235 }
1236 else
1237 #endif
1238 {
1239 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1240 len = SIZE_OP_MEMORY_START_PUSH;
1241 else
1242 len = SIZE_OP_MEMORY_START;
1243
1244 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1245 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1246 }
1247 break;
1248
1249 case ENCLOSE_STOP_BACKTRACK:
1250 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1251 QtfrNode* qn = NQTFR(node->target);
1252 tlen = compile_length_tree(qn->target, reg);
1253 if (tlen < 0) return tlen;
1254
1255 len = tlen * qn->lower
1256 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1257 }
1258 else {
1259 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1260 }
1261 break;
1262
1263 default:
1264 return ONIGERR_TYPE_BUG;
1265 break;
1266 }
1267
1268 return len;
1269 }
1270
1271 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1272
1273 static int
1274 compile_enclose_node(EncloseNode* node, regex_t* reg)
1275 {
1276 int r, len;
1277
1278 if (node->type == ENCLOSE_OPTION)
1279 return compile_option_node(node, reg);
1280
1281 switch (node->type) {
1282 case ENCLOSE_MEMORY:
1283 #ifdef USE_SUBEXP_CALL
1284 if (IS_ENCLOSE_CALLED(node)) {
1285 r = add_opcode(reg, OP_CALL);
1286 if (r) return r;
1287 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1288 node->state |= NST_ADDR_FIXED;
1289 r = add_abs_addr(reg, (int )node->call_addr);
1290 if (r) return r;
1291 len = compile_length_tree(node->target, reg);
1292 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1293 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1294 len += (IS_ENCLOSE_RECURSION(node)
1295 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1296 else
1297 len += (IS_ENCLOSE_RECURSION(node)
1298 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1299
1300 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1301 if (r) return r;
1302 }
1303 #endif
1304 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1305 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1306 else
1307 r = add_opcode(reg, OP_MEMORY_START);
1308 if (r) return r;
1309 r = add_mem_num(reg, node->regnum);
1310 if (r) return r;
1311 r = compile_tree(node->target, reg);
1312 if (r) return r;
1313 #ifdef USE_SUBEXP_CALL
1314 if (IS_ENCLOSE_CALLED(node)) {
1315 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1316 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1317 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1318 else
1319 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1320 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1321
1322 if (r) return r;
1323 r = add_mem_num(reg, node->regnum);
1324 if (r) return r;
1325 r = add_opcode(reg, OP_RETURN);
1326 }
1327 else
1328 #endif
1329 {
1330 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1331 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1332 else
1333 r = add_opcode(reg, OP_MEMORY_END);
1334 if (r) return r;
1335 r = add_mem_num(reg, node->regnum);
1336 }
1337 break;
1338
1339 case ENCLOSE_STOP_BACKTRACK:
1340 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1341 QtfrNode* qn = NQTFR(node->target);
1342 r = compile_tree_n_times(qn->target, qn->lower, reg);
1343 if (r) return r;
1344
1345 len = compile_length_tree(qn->target, reg);
1346 if (len < 0) return len;
1347
1348 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1349 if (r) return r;
1350 r = compile_tree(qn->target, reg);
1351 if (r) return r;
1352 r = add_opcode(reg, OP_POP);
1353 if (r) return r;
1354 r = add_opcode_rel_addr(reg, OP_JUMP,
1355 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1356 }
1357 else {
1358 r = add_opcode(reg, OP_PUSH_STOP_BT);
1359 if (r) return r;
1360 r = compile_tree(node->target, reg);
1361 if (r) return r;
1362 r = add_opcode(reg, OP_POP_STOP_BT);
1363 }
1364 break;
1365
1366 default:
1367 return ONIGERR_TYPE_BUG;
1368 break;
1369 }
1370
1371 return r;
1372 }
1373
1374 static int
1375 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1376 {
1377 int len;
1378 int tlen = 0;
1379
1380 if (node->target) {
1381 tlen = compile_length_tree(node->target, reg);
1382 if (tlen < 0) return tlen;
1383 }
1384
1385 switch (node->type) {
1386 case ANCHOR_PREC_READ:
1387 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1388 break;
1389 case ANCHOR_PREC_READ_NOT:
1390 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1391 break;
1392 case ANCHOR_LOOK_BEHIND:
1393 len = SIZE_OP_LOOK_BEHIND + tlen;
1394 break;
1395 case ANCHOR_LOOK_BEHIND_NOT:
1396 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1397 break;
1398
1399 default:
1400 len = SIZE_OPCODE;
1401 break;
1402 }
1403
1404 return len;
1405 }
1406
1407 static int
1408 compile_anchor_node(AnchorNode* node, regex_t* reg)
1409 {
1410 int r, len;
1411
1412 switch (node->type) {
1413 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1414 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1415 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1416 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1417 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1418 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1419
1420 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
1421 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1422 #ifdef USE_WORD_BEGIN_END
1423 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
1424 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
1425 #endif
1426
1427 case ANCHOR_PREC_READ:
1428 r = add_opcode(reg, OP_PUSH_POS);
1429 if (r) return r;
1430 r = compile_tree(node->target, reg);
1431 if (r) return r;
1432 r = add_opcode(reg, OP_POP_POS);
1433 break;
1434
1435 case ANCHOR_PREC_READ_NOT:
1436 len = compile_length_tree(node->target, reg);
1437 if (len < 0) return len;
1438 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1439 if (r) return r;
1440 r = compile_tree(node->target, reg);
1441 if (r) return r;
1442 r = add_opcode(reg, OP_FAIL_POS);
1443 break;
1444
1445 case ANCHOR_LOOK_BEHIND:
1446 {
1447 int n;
1448 r = add_opcode(reg, OP_LOOK_BEHIND);
1449 if (r) return r;
1450 if (node->char_len < 0) {
1451 r = get_char_length_tree(node->target, reg, &n);
1452 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1453 }
1454 else
1455 n = node->char_len;
1456 r = add_length(reg, n);
1457 if (r) return r;
1458 r = compile_tree(node->target, reg);
1459 }
1460 break;
1461
1462 case ANCHOR_LOOK_BEHIND_NOT:
1463 {
1464 int n;
1465 len = compile_length_tree(node->target, reg);
1466 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1467 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1468 if (r) return r;
1469 if (node->char_len < 0) {
1470 r = get_char_length_tree(node->target, reg, &n);
1471 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1472 }
1473 else
1474 n = node->char_len;
1475 r = add_length(reg, n);
1476 if (r) return r;
1477 r = compile_tree(node->target, reg);
1478 if (r) return r;
1479 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1480 }
1481 break;
1482
1483 default:
1484 return ONIGERR_TYPE_BUG;
1485 break;
1486 }
1487
1488 return r;
1489 }
1490
1491 static int
1492 compile_length_tree(Node* node, regex_t* reg)
1493 {
1494 int len, type, r;
1495
1496 type = NTYPE(node);
1497 switch (type) {
1498 case NT_LIST:
1499 len = 0;
1500 do {
1501 r = compile_length_tree(NCAR(node), reg);
1502 if (r < 0) return r;
1503 len += r;
1504 } while (IS_NOT_NULL(node = NCDR(node)));
1505 r = len;
1506 break;
1507
1508 case NT_ALT:
1509 {
1510 int n;
1511
1512 n = r = 0;
1513 do {
1514 r += compile_length_tree(NCAR(node), reg);
1515 n++;
1516 } while (IS_NOT_NULL(node = NCDR(node)));
1517 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1518 }
1519 break;
1520
1521 case NT_STR:
1522 if (NSTRING_IS_RAW(node))
1523 r = compile_length_string_raw_node(NSTR(node), reg);
1524 else
1525 r = compile_length_string_node(node, reg);
1526 break;
1527
1528 case NT_CCLASS:
1529 r = compile_length_cclass_node(NCCLASS(node), reg);
1530 break;
1531
1532 case NT_CTYPE:
1533 case NT_CANY:
1534 r = SIZE_OPCODE;
1535 break;
1536
1537 case NT_BREF:
1538 {
1539 BRefNode* br = NBREF(node);
1540
1541 #ifdef USE_BACKREF_WITH_LEVEL
1542 if (IS_BACKREF_NEST_LEVEL(br)) {
1543 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1544 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1545 }
1546 else
1547 #endif
1548 if (br->back_num == 1) {
1549 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1550 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1551 }
1552 else {
1553 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1554 }
1555 }
1556 break;
1557
1558 #ifdef USE_SUBEXP_CALL
1559 case NT_CALL:
1560 r = SIZE_OP_CALL;
1561 break;
1562 #endif
1563
1564 case NT_QTFR:
1565 r = compile_length_quantifier_node(NQTFR(node), reg);
1566 break;
1567
1568 case NT_ENCLOSE:
1569 r = compile_length_enclose_node(NENCLOSE(node), reg);
1570 break;
1571
1572 case NT_ANCHOR:
1573 r = compile_length_anchor_node(NANCHOR(node), reg);
1574 break;
1575
1576 default:
1577 return ONIGERR_TYPE_BUG;
1578 break;
1579 }
1580
1581 return r;
1582 }
1583
1584 static int
1585 compile_tree(Node* node, regex_t* reg)
1586 {
1587 int n, type, len, pos, r = 0;
1588
1589 type = NTYPE(node);
1590 switch (type) {
1591 case NT_LIST:
1592 do {
1593 r = compile_tree(NCAR(node), reg);
1594 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1595 break;
1596
1597 case NT_ALT:
1598 {
1599 Node* x = node;
1600 len = 0;
1601 do {
1602 len += compile_length_tree(NCAR(x), reg);
1603 if (NCDR(x) != NULL) {
1604 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1605 }
1606 } while (IS_NOT_NULL(x = NCDR(x)));
1607 pos = reg->used + len; /* goal position */
1608
1609 do {
1610 len = compile_length_tree(NCAR(node), reg);
1611 if (IS_NOT_NULL(NCDR(node))) {
1612 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1613 if (r) break;
1614 }
1615 r = compile_tree(NCAR(node), reg);
1616 if (r) break;
1617 if (IS_NOT_NULL(NCDR(node))) {
1618 len = pos - (reg->used + SIZE_OP_JUMP);
1619 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1620 if (r) break;
1621 }
1622 } while (IS_NOT_NULL(node = NCDR(node)));
1623 }
1624 break;
1625
1626 case NT_STR:
1627 if (NSTRING_IS_RAW(node))
1628 r = compile_string_raw_node(NSTR(node), reg);
1629 else
1630 r = compile_string_node(node, reg);
1631 break;
1632
1633 case NT_CCLASS:
1634 r = compile_cclass_node(NCCLASS(node), reg);
1635 break;
1636
1637 case NT_CTYPE:
1638 {
1639 int op;
1640
1641 switch (NCTYPE(node)->ctype) {
1642 case ONIGENC_CTYPE_WORD:
1643 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1644 else op = OP_WORD;
1645 break;
1646 default:
1647 return ONIGERR_TYPE_BUG;
1648 break;
1649 }
1650 r = add_opcode(reg, op);
1651 }
1652 break;
1653
1654 case NT_CANY:
1655 if (IS_MULTILINE(reg->options))
1656 r = add_opcode(reg, OP_ANYCHAR_ML);
1657 else
1658 r = add_opcode(reg, OP_ANYCHAR);
1659 break;
1660
1661 case NT_BREF:
1662 {
1663 BRefNode* br = NBREF(node);
1664
1665 #ifdef USE_BACKREF_WITH_LEVEL
1666 if (IS_BACKREF_NEST_LEVEL(br)) {
1667 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1668 if (r) return r;
1669 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1670 if (r) return r;
1671 r = add_length(reg, br->nest_level);
1672 if (r) return r;
1673
1674 goto add_bacref_mems;
1675 }
1676 else
1677 #endif
1678 if (br->back_num == 1) {
1679 n = br->back_static[0];
1680 if (IS_IGNORECASE(reg->options)) {
1681 r = add_opcode(reg, OP_BACKREFN_IC);
1682 if (r) return r;
1683 r = add_mem_num(reg, n);
1684 }
1685 else {
1686 switch (n) {
1687 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1688 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1689 default:
1690 r = add_opcode(reg, OP_BACKREFN);
1691 if (r) return r;
1692 r = add_mem_num(reg, n);
1693 break;
1694 }
1695 }
1696 }
1697 else {
1698 int i;
1699 int* p;
1700
1701 if (IS_IGNORECASE(reg->options)) {
1702 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1703 }
1704 else {
1705 r = add_opcode(reg, OP_BACKREF_MULTI);
1706 }
1707 if (r) return r;
1708
1709 #ifdef USE_BACKREF_WITH_LEVEL
1710 add_bacref_mems:
1711 #endif
1712 r = add_length(reg, br->back_num);
1713 if (r) return r;
1714 p = BACKREFS_P(br);
1715 for (i = br->back_num - 1; i >= 0; i--) {
1716 r = add_mem_num(reg, p[i]);
1717 if (r) return r;
1718 }
1719 }
1720 }
1721 break;
1722
1723 #ifdef USE_SUBEXP_CALL
1724 case NT_CALL:
1725 r = compile_call(NCALL(node), reg);
1726 break;
1727 #endif
1728
1729 case NT_QTFR:
1730 r = compile_quantifier_node(NQTFR(node), reg);
1731 break;
1732
1733 case NT_ENCLOSE:
1734 r = compile_enclose_node(NENCLOSE(node), reg);
1735 break;
1736
1737 case NT_ANCHOR:
1738 r = compile_anchor_node(NANCHOR(node), reg);
1739 break;
1740
1741 default:
1742 #ifdef ONIG_DEBUG
1743 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1744 #endif
1745 break;
1746 }
1747
1748 return r;
1749 }
1750
1751 #ifdef USE_NAMED_GROUP
1752
1753 static int
1754 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1755 {
1756 int r = 0;
1757 Node* node = *plink;
1758
1759 switch (NTYPE(node)) {
1760 case NT_LIST:
1761 case NT_ALT:
1762 do {
1763 r = noname_disable_map(&(NCAR(node)), map, counter);
1764 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1765 break;
1766
1767 case NT_QTFR:
1768 {
1769 Node** ptarget = &(NQTFR(node)->target);
1770 Node* old = *ptarget;
1771 r = noname_disable_map(ptarget, map, counter);
1772 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1773 onig_reduce_nested_quantifier(node, *ptarget);
1774 }
1775 }
1776 break;
1777
1778 case NT_ENCLOSE:
1779 {
1780 EncloseNode* en = NENCLOSE(node);
1781 if (en->type == ENCLOSE_MEMORY) {
1782 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1783 (*counter)++;
1784 map[en->regnum].new_val = *counter;
1785 en->regnum = *counter;
1786 r = noname_disable_map(&(en->target), map, counter);
1787 }
1788 else {
1789 *plink = en->target;
1790 en->target = NULL_NODE;
1791 onig_node_free(node);
1792 r = noname_disable_map(plink, map, counter);
1793 }
1794 }
1795 else
1796 r = noname_disable_map(&(en->target), map, counter);
1797 }
1798 break;
1799
1800 default:
1801 break;
1802 }
1803
1804 return r;
1805 }
1806
1807 static int
1808 renumber_node_backref(Node* node, GroupNumRemap* map)
1809 {
1810 int i, pos, n, old_num;
1811 int *backs;
1812 BRefNode* bn = NBREF(node);
1813
1814 if (! IS_BACKREF_NAME_REF(bn))
1815 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1816
1817 old_num = bn->back_num;
1818 if (IS_NULL(bn->back_dynamic))
1819 backs = bn->back_static;
1820 else
1821 backs = bn->back_dynamic;
1822
1823 for (i = 0, pos = 0; i < old_num; i++) {
1824 n = map[backs[i]].new_val;
1825 if (n > 0) {
1826 backs[pos] = n;
1827 pos++;
1828 }
1829 }
1830
1831 bn->back_num = pos;
1832 return 0;
1833 }
1834
1835 static int
1836 renumber_by_map(Node* node, GroupNumRemap* map)
1837 {
1838 int r = 0;
1839
1840 switch (NTYPE(node)) {
1841 case NT_LIST:
1842 case NT_ALT:
1843 do {
1844 r = renumber_by_map(NCAR(node), map);
1845 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1846 break;
1847 case NT_QTFR:
1848 r = renumber_by_map(NQTFR(node)->target, map);
1849 break;
1850 case NT_ENCLOSE:
1851 r = renumber_by_map(NENCLOSE(node)->target, map);
1852 break;
1853
1854 case NT_BREF:
1855 r = renumber_node_backref(node, map);
1856 break;
1857
1858 default:
1859 break;
1860 }
1861
1862 return r;
1863 }
1864
1865 static int
1866 numbered_ref_check(Node* node)
1867 {
1868 int r = 0;
1869
1870 switch (NTYPE(node)) {
1871 case NT_LIST:
1872 case NT_ALT:
1873 do {
1874 r = numbered_ref_check(NCAR(node));
1875 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1876 break;
1877 case NT_QTFR:
1878 r = numbered_ref_check(NQTFR(node)->target);
1879 break;
1880 case NT_ENCLOSE:
1881 r = numbered_ref_check(NENCLOSE(node)->target);
1882 break;
1883
1884 case NT_BREF:
1885 if (! IS_BACKREF_NAME_REF(NBREF(node)))
1886 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1887 break;
1888
1889 default:
1890 break;
1891 }
1892
1893 return r;
1894 }
1895
1896 static int
1897 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1898 {
1899 int r, i, pos, counter;
1900 int Result;
1901 BitStatusType loc;
1902 GroupNumRemap* map;
1903
1904 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
1905 CHECK_NULL_RETURN_MEMERR(map);
1906 for (i = 1; i <= env->num_mem; i++) {
1907 map[i].new_val = 0;
1908 }
1909 counter = 0;
1910 r = noname_disable_map(root, map, &counter);
1911 if (r != 0) return r;
1912
1913 r = renumber_by_map(*root, map);
1914 if (r != 0) return r;
1915
1916 for (i = 1, pos = 1; i <= env->num_mem; i++) {
1917 if (map[i].new_val > 0) {
1918 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1919 pos++;
1920 }
1921 }
1922
1923 loc = env->capture_history;
1924 BIT_STATUS_CLEAR(env->capture_history);
1925 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1926 if (BIT_STATUS_AT(loc, i)) {
1927 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
1928 }
1929 }
1930
1931 env->num_mem = env->num_named;
1932 reg->num_mem = env->num_named;
1933
1934 Result = onig_renumber_name_table(reg, map);
1935 xfree(map);
1936 return Result;
1937 }
1938 #endif /* USE_NAMED_GROUP */
1939
1940 #ifdef USE_SUBEXP_CALL
1941 static int
1942 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1943 {
1944 int i, offset;
1945 EncloseNode* en;
1946 AbsAddrType addr;
1947
1948 for (i = 0; i < uslist->num; i++) {
1949 en = NENCLOSE(uslist->us[i].target);
1950 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1951 addr = en->call_addr;
1952 offset = uslist->us[i].offset;
1953
1954 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1955 }
1956 return 0;
1957 }
1958 #endif
1959
1960 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1961 static int
1962 quantifiers_memory_node_info(Node* node)
1963 {
1964 int r = 0;
1965
1966 switch (NTYPE(node)) {
1967 case NT_LIST:
1968 case NT_ALT:
1969 {
1970 int v;
1971 do {
1972 v = quantifiers_memory_node_info(NCAR(node));
1973 if (v > r) r = v;
1974 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
1975 }
1976 break;
1977
1978 #ifdef USE_SUBEXP_CALL
1979 case NT_CALL:
1980 if (IS_CALL_RECURSION(NCALL(node))) {
1981 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
1982 }
1983 else
1984 r = quantifiers_memory_node_info(NCALL(node)->target);
1985 break;
1986 #endif
1987
1988 case NT_QTFR:
1989 {
1990 QtfrNode* qn = NQTFR(node);
1991 if (qn->upper != 0) {
1992 r = quantifiers_memory_node_info(qn->target);
1993 }
1994 }
1995 break;
1996
1997 case NT_ENCLOSE:
1998 {
1999 EncloseNode* en = NENCLOSE(node);
2000 switch (en->type) {
2001 case ENCLOSE_MEMORY:
2002 return NQ_TARGET_IS_EMPTY_MEM;
2003 break;
2004
2005 case ENCLOSE_OPTION:
2006 case ENCLOSE_STOP_BACKTRACK:
2007 r = quantifiers_memory_node_info(en->target);
2008 break;
2009 default:
2010 break;
2011 }
2012 }
2013 break;
2014
2015 case NT_BREF:
2016 case NT_STR:
2017 case NT_CTYPE:
2018 case NT_CCLASS:
2019 case NT_CANY:
2020 case NT_ANCHOR:
2021 default:
2022 break;
2023 }
2024
2025 return r;
2026 }
2027 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2028
2029 static int
2030 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2031 {
2032 OnigDistance tmin;
2033 int r = 0;
2034
2035 *min = 0;
2036 switch (NTYPE(node)) {
2037 case NT_BREF:
2038 {
2039 int i;
2040 int* backs;
2041 Node** nodes = SCANENV_MEM_NODES(env);
2042 BRefNode* br = NBREF(node);
2043 if (br->state & NST_RECURSION) break;
2044
2045 backs = BACKREFS_P(br);
2046 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2047 r = get_min_match_length(nodes[backs[0]], min, env);
2048 if (r != 0) break;
2049 for (i = 1; i < br->back_num; i++) {
2050 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2051 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2052 if (r != 0) break;
2053 if (*min > tmin) *min = tmin;
2054 }
2055 }
2056 break;
2057
2058 #ifdef USE_SUBEXP_CALL
2059 case NT_CALL:
2060 if (IS_CALL_RECURSION(NCALL(node))) {
2061 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2062 if (IS_ENCLOSE_MIN_FIXED(en))
2063 *min = en->min_len;
2064 }
2065 else
2066 r = get_min_match_length(NCALL(node)->target, min, env);
2067 break;
2068 #endif
2069
2070 case NT_LIST:
2071 do {
2072 r = get_min_match_length(NCAR(node), &tmin, env);
2073 if (r == 0) *min += tmin;
2074 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2075 break;
2076
2077 case NT_ALT:
2078 {
2079 Node *x, *y;
2080 y = node;
2081 do {
2082 x = NCAR(y);
2083 r = get_min_match_length(x, &tmin, env);
2084 if (r != 0) break;
2085 if (y == node) *min = tmin;
2086 else if (*min > tmin) *min = tmin;
2087 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2088 }
2089 break;
2090
2091 case NT_STR:
2092 {
2093 StrNode* sn = NSTR(node);
2094 *min = (OnigDistance)(sn->end - sn->s);
2095 }
2096 break;
2097
2098 case NT_CTYPE:
2099 *min = 1;
2100 break;
2101
2102 case NT_CCLASS:
2103 case NT_CANY:
2104 *min = 1;
2105 break;
2106
2107 case NT_QTFR:
2108 {
2109 QtfrNode* qn = NQTFR(node);
2110
2111 if (qn->lower > 0) {
2112 r = get_min_match_length(qn->target, min, env);
2113 if (r == 0)
2114 *min = distance_multiply(*min, qn->lower);
2115 }
2116 }
2117 break;
2118
2119 case NT_ENCLOSE:
2120 {
2121 EncloseNode* en = NENCLOSE(node);
2122 switch (en->type) {
2123 case ENCLOSE_MEMORY:
2124 #ifdef USE_SUBEXP_CALL
2125 if (IS_ENCLOSE_MIN_FIXED(en))
2126 *min = en->min_len;
2127 else {
2128 r = get_min_match_length(en->target, min, env);
2129 if (r == 0) {
2130 en->min_len = *min;
2131 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2132 }
2133 }
2134 break;
2135 #endif
2136 case ENCLOSE_OPTION:
2137 case ENCLOSE_STOP_BACKTRACK:
2138 r = get_min_match_length(en->target, min, env);
2139 break;
2140 }
2141 }
2142 break;
2143
2144 case NT_ANCHOR:
2145 default:
2146 break;
2147 }
2148
2149 return r;
2150 }
2151
2152 static int
2153 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2154 {
2155 OnigDistance tmax;
2156 int r = 0;
2157
2158 *max = 0;
2159 switch (NTYPE(node)) {
2160 case NT_LIST:
2161 do {
2162 r = get_max_match_length(NCAR(node), &tmax, env);
2163 if (r == 0)
2164 *max = distance_add(*max, tmax);
2165 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2166 break;
2167
2168 case NT_ALT:
2169 do {
2170 r = get_max_match_length(NCAR(node), &tmax, env);
2171 if (r == 0 && *max < tmax) *max = tmax;
2172 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2173 break;
2174
2175 case NT_STR:
2176 {
2177 StrNode* sn = NSTR(node);
2178 *max = (OnigDistance)(sn->end - sn->s);
2179 }
2180 break;
2181
2182 case NT_CTYPE:
2183 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2184 break;
2185
2186 case NT_CCLASS:
2187 case NT_CANY:
2188 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2189 break;
2190
2191 case NT_BREF:
2192 {
2193 int i;
2194 int* backs;
2195 Node** nodes = SCANENV_MEM_NODES(env);
2196 BRefNode* br = NBREF(node);
2197 if (br->state & NST_RECURSION) {
2198 *max = ONIG_INFINITE_DISTANCE;
2199 break;
2200 }
2201 backs = BACKREFS_P(br);
2202 for (i = 0; i < br->back_num; i++) {
2203 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2204 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2205 if (r != 0) break;
2206 if (*max < tmax) *max = tmax;
2207 }
2208 }
2209 break;
2210
2211 #ifdef USE_SUBEXP_CALL
2212 case NT_CALL:
2213 if (! IS_CALL_RECURSION(NCALL(node)))
2214 r = get_max_match_length(NCALL(node)->target, max, env);
2215 else
2216 *max = ONIG_INFINITE_DISTANCE;
2217 break;
2218 #endif
2219
2220 case NT_QTFR:
2221 {
2222 QtfrNode* qn = NQTFR(node);
2223
2224 if (qn->upper != 0) {
2225 r = get_max_match_length(qn->target, max, env);
2226 if (r == 0 && *max != 0) {
2227 if (! IS_REPEAT_INFINITE(qn->upper))
2228 *max = distance_multiply(*max, qn->upper);
2229 else
2230 *max = ONIG_INFINITE_DISTANCE;
2231 }
2232 }
2233 }
2234 break;
2235
2236 case NT_ENCLOSE:
2237 {
2238 EncloseNode* en = NENCLOSE(node);
2239 switch (en->type) {
2240 case ENCLOSE_MEMORY:
2241 #ifdef USE_SUBEXP_CALL
2242 if (IS_ENCLOSE_MAX_FIXED(en))
2243 *max = en->max_len;
2244 else {
2245 r = get_max_match_length(en->target, max, env);
2246 if (r == 0) {
2247 en->max_len = *max;
2248 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2249 }
2250 }
2251 break;
2252 #endif
2253 case ENCLOSE_OPTION:
2254 case ENCLOSE_STOP_BACKTRACK:
2255 r = get_max_match_length(en->target, max, env);
2256 break;
2257 }
2258 }
2259 break;
2260
2261 case NT_ANCHOR:
2262 default:
2263 break;
2264 }
2265
2266 return r;
2267 }
2268
2269 #define GET_CHAR_LEN_VARLEN -1
2270 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2271
2272 /* fixed size pattern node only */
2273 static int
2274 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2275 {
2276 int tlen;
2277 int r = 0;
2278
2279 level++;
2280 *len = 0;
2281 switch (NTYPE(node)) {
2282 case NT_LIST:
2283 do {
2284 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2285 if (r == 0)
2286 *len = distance_add(*len, tlen);
2287 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2288 break;
2289
2290 case NT_ALT:
2291 {
2292 int tlen2;
2293 int varlen = 0;
2294
2295 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2296 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2297 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2298 if (r == 0) {
2299 if (tlen != tlen2)
2300 varlen = 1;
2301 }
2302 }
2303 if (r == 0) {
2304 if (varlen != 0) {
2305 if (level == 1)
2306 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2307 else
2308 r = GET_CHAR_LEN_VARLEN;
2309 }
2310 else
2311 *len = tlen;
2312 }
2313 }
2314 break;
2315
2316 case NT_STR:
2317 {
2318 StrNode* sn = NSTR(node);
2319 UChar *s = sn->s;
2320 while (s < sn->end) {
2321 s += enclen(reg->enc, s);
2322 (*len)++;
2323 }
2324 }
2325 break;
2326
2327 case NT_QTFR:
2328 {
2329 QtfrNode* qn = NQTFR(node);
2330 if (qn->lower == qn->upper) {
2331 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2332 if (r == 0)
2333 *len = distance_multiply(tlen, qn->lower);
2334 }
2335 else
2336 r = GET_CHAR_LEN_VARLEN;
2337 }
2338 break;
2339
2340 #ifdef USE_SUBEXP_CALL
2341 case NT_CALL:
2342 if (! IS_CALL_RECURSION(NCALL(node)))
2343 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2344 else
2345 r = GET_CHAR_LEN_VARLEN;
2346 break;
2347 #endif
2348
2349 case NT_CTYPE:
2350 *len = 1;
2351 break;
2352
2353 case NT_CCLASS:
2354 case NT_CANY:
2355 *len = 1;
2356 break;
2357
2358 case NT_ENCLOSE:
2359 {
2360 EncloseNode* en = NENCLOSE(node);
2361 switch (en->type) {
2362 case ENCLOSE_MEMORY:
2363 #ifdef USE_SUBEXP_CALL
2364 if (IS_ENCLOSE_CLEN_FIXED(en))
2365 *len = en->char_len;
2366 else {
2367 r = get_char_length_tree1(en->target, reg, len, level);
2368 if (r == 0) {
2369 en->char_len = *len;
2370 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2371 }
2372 }
2373 break;
2374 #endif
2375 case ENCLOSE_OPTION:
2376 case ENCLOSE_STOP_BACKTRACK:
2377 r = get_char_length_tree1(en->target, reg, len, level);
2378 break;
2379 default:
2380 break;
2381 }
2382 }
2383 break;
2384
2385 case NT_ANCHOR:
2386 break;
2387
2388 default:
2389 r = GET_CHAR_LEN_VARLEN;
2390 break;
2391 }
2392
2393 return r;
2394 }
2395
2396 static int
2397 get_char_length_tree(Node* node, regex_t* reg, int* len)
2398 {
2399 return get_char_length_tree1(node, reg, len, 0);
2400 }
2401
2402 /* x is not included y ==> 1 : 0 */
2403 static int
2404 is_not_included(Node* x, Node* y, regex_t* reg)
2405 {
2406 int i, len;
2407 OnigCodePoint code;
2408 UChar *p;
2409 int ytype;
2410
2411 retry:
2412 ytype = NTYPE(y);
2413 switch (NTYPE(x)) {
2414 case NT_CTYPE:
2415 {
2416 switch (ytype) {
2417 case NT_CTYPE:
2418 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2419 NCTYPE(y)->not != NCTYPE(x)->not)
2420 return 1;
2421 else
2422 return 0;
2423 break;
2424
2425 case NT_CCLASS:
2426 swap:
2427 {
2428 Node* tmp;
2429 tmp = x; x = y; y = tmp;
2430 goto retry;
2431 }
2432 break;
2433
2434 case NT_STR:
2435 goto swap;
2436 break;
2437
2438 default:
2439 break;
2440 }
2441 }
2442 break;
2443
2444 case NT_CCLASS:
2445 {
2446 CClassNode* xc = NCCLASS(x);
2447 switch (ytype) {
2448 case NT_CTYPE:
2449 switch (NCTYPE(y)->ctype) {
2450 case ONIGENC_CTYPE_WORD:
2451 if (NCTYPE(y)->not == 0) {
2452 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2453 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2454 if (BITSET_AT(xc->bs, i)) {
2455 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2456 }
2457 }
2458 return 1;
2459 }
2460 return 0;
2461 }
2462 else {
2463 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2464 if (! IS_CODE_SB_WORD(reg->enc, i)) {
2465 if (!IS_NCCLASS_NOT(xc)) {
2466 if (BITSET_AT(xc->bs, i))
2467 return 0;
2468 }
2469 else {
2470 if (! BITSET_AT(xc->bs, i))
2471 return 0;
2472 }
2473 }
2474 }
2475 return 1;
2476 }
2477 break;
2478
2479 default:
2480 break;
2481 }
2482 break;
2483
2484 case NT_CCLASS:
2485 {
2486 int v;
2487 CClassNode* yc = NCCLASS(y);
2488
2489 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2490 v = BITSET_AT(xc->bs, i);
2491 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2492 (v == 0 && IS_NCCLASS_NOT(xc))) {
2493 v = BITSET_AT(yc->bs, i);
2494 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2495 (v == 0 && IS_NCCLASS_NOT(yc)))
2496 return 0;
2497 }
2498 }
2499 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2500 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2501 return 1;
2502 return 0;
2503 }
2504 break;
2505
2506 case NT_STR:
2507 goto swap;
2508 break;
2509
2510 default:
2511 break;
2512 }
2513 }
2514 break;
2515
2516 case NT_STR:
2517 {
2518 StrNode* xs = NSTR(x);
2519 if (NSTRING_LEN(x) == 0)
2520 break;
2521
2522 //c = *(xs->s);
2523 switch (ytype) {
2524 case NT_CTYPE:
2525 switch (NCTYPE(y)->ctype) {
2526 case ONIGENC_CTYPE_WORD:
2527 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2528 return NCTYPE(y)->not;
2529 else
2530 return !(NCTYPE(y)->not);
2531 break;
2532 default:
2533 break;
2534 }
2535 break;
2536
2537 case NT_CCLASS:
2538 {
2539 CClassNode* cc = NCCLASS(y);
2540
2541 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2542 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2543 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2544 }
2545 break;
2546
2547 case NT_STR:
2548 {
2549 UChar *q;
2550 StrNode* ys = NSTR(y);
2551 len = NSTRING_LEN(x);
2552 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2553 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2554 /* tiny version */
2555 return 0;
2556 }
2557 else {
2558 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2559 if (*p != *q) return 1;
2560 }
2561 }
2562 }
2563 break;
2564
2565 default:
2566 break;
2567 }
2568 }
2569 break;
2570
2571 default:
2572 break;
2573 }
2574
2575 return 0;
2576 }
2577
2578 static Node*
2579 get_head_value_node(Node* node, int exact, regex_t* reg)
2580 {
2581 Node* n = NULL_NODE;
2582
2583 switch (NTYPE(node)) {
2584 case NT_BREF:
2585 case NT_ALT:
2586 case NT_CANY:
2587 #ifdef USE_SUBEXP_CALL
2588 case NT_CALL:
2589 #endif
2590 break;
2591
2592 case NT_CTYPE:
2593 case NT_CCLASS:
2594 if (exact == 0) {
2595 n = node;
2596 }
2597 break;
2598
2599 case NT_LIST:
2600 n = get_head_value_node(NCAR(node), exact, reg);
2601 break;
2602
2603 case NT_STR:
2604 {
2605 StrNode* sn = NSTR(node);
2606
2607 if (sn->end <= sn->s)
2608 break;
2609
2610 if (exact != 0 &&
2611 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2612 }
2613 else {
2614 n = node;
2615 }
2616 }
2617 break;
2618
2619 case NT_QTFR:
2620 {
2621 QtfrNode* qn = NQTFR(node);
2622 if (qn->lower > 0) {
2623 if (IS_NOT_NULL(qn->head_exact))
2624 n = qn->head_exact;
2625 else
2626 n = get_head_value_node(qn->target, exact, reg);
2627 }
2628 }
2629 break;
2630
2631 case NT_ENCLOSE:
2632 {
2633 EncloseNode* en = NENCLOSE(node);
2634 switch (en->type) {
2635 case ENCLOSE_OPTION:
2636 {
2637 OnigOptionType options = reg->options;
2638
2639 reg->options = NENCLOSE(node)->option;
2640 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2641 reg->options = options;
2642 }
2643 break;
2644
2645 case ENCLOSE_MEMORY:
2646 case ENCLOSE_STOP_BACKTRACK:
2647 n = get_head_value_node(en->target, exact, reg);
2648 break;
2649 }
2650 }
2651 break;
2652
2653 case NT_ANCHOR:
2654 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2655 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2656 break;
2657
2658 default:
2659 break;
2660 }
2661
2662 return n;
2663 }
2664
2665 static int
2666 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2667 {
2668 int type, r = 0;
2669
2670 type = NTYPE(node);
2671 if ((NTYPE2BIT(type) & type_mask) == 0)
2672 return 1;
2673
2674 switch (type) {
2675 case NT_LIST:
2676 case NT_ALT:
2677 do {
2678 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2679 anchor_mask);
2680 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2681 break;
2682
2683 case NT_QTFR:
2684 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2685 anchor_mask);
2686 break;
2687
2688 case NT_ENCLOSE:
2689 {
2690 EncloseNode* en = NENCLOSE(node);
2691 if ((en->type & enclose_mask) == 0)
2692 return 1;
2693
2694 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2695 }
2696 break;
2697
2698 case NT_ANCHOR:
2699 type = NANCHOR(node)->type;
2700 if ((type & anchor_mask) == 0)
2701 return 1;
2702
2703 if (NANCHOR(node)->target)
2704 r = check_type_tree(NANCHOR(node)->target,
2705 type_mask, enclose_mask, anchor_mask);
2706 break;
2707
2708 default:
2709 break;
2710 }
2711 return r;
2712 }
2713
2714 #ifdef USE_SUBEXP_CALL
2715
2716 #define RECURSION_EXIST 1
2717 #define RECURSION_INFINITE 2
2718
2719 static int
2720 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2721 {
2722 int type;
2723 int r = 0;
2724
2725 type = NTYPE(node);
2726 switch (type) {
2727 case NT_LIST:
2728 {
2729 Node *x;
2730 OnigDistance min;
2731 int ret;
2732
2733 x = node;
2734 do {
2735 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2736 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2737 r |= ret;
2738 if (head) {
2739 ret = get_min_match_length(NCAR(x), &min, env);
2740 if (ret != 0) return ret;
2741 if (min != 0) head = 0;
2742 }
2743 } while (IS_NOT_NULL(x = NCDR(x)));
2744 }
2745 break;
2746
2747 case NT_ALT:
2748 {
2749 int ret;
2750 r = RECURSION_EXIST;
2751 do {
2752 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2753 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2754 r &= ret;
2755 } while (IS_NOT_NULL(node = NCDR(node)));
2756 }
2757 break;
2758
2759 case NT_QTFR:
2760 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2761 if (r == RECURSION_EXIST) {
2762 if (NQTFR(node)->lower == 0) r = 0;
2763 }
2764 break;
2765
2766 case NT_ANCHOR:
2767 {
2768 AnchorNode* an = NANCHOR(node);
2769 switch (an->type) {
2770 case ANCHOR_PREC_READ:
2771 case ANCHOR_PREC_READ_NOT:
2772 case ANCHOR_LOOK_BEHIND:
2773 case ANCHOR_LOOK_BEHIND_NOT:
2774 r = subexp_inf_recursive_check(an->target, env, head);
2775 break;
2776 }
2777 }
2778 break;
2779
2780 case NT_CALL:
2781 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2782 break;
2783
2784 case NT_ENCLOSE:
2785 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2786 return 0;
2787 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2788 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2789 else {
2790 SET_ENCLOSE_STATUS(node, NST_MARK2);
2791 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2792 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2793 }
2794 break;
2795
2796 default:
2797 break;
2798 }
2799
2800 return r;
2801 }
2802
2803 static int
2804 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2805 {
2806 int type;
2807 int r = 0;
2808
2809 type = NTYPE(node);
2810 switch (type) {
2811 case NT_LIST:
2812 case NT_ALT:
2813 do {
2814 r = subexp_inf_recursive_check_trav(NCAR(node), env);
2815 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2816 break;
2817
2818 case NT_QTFR:
2819 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2820 break;
2821
2822 case NT_ANCHOR:
2823 {
2824 AnchorNode* an = NANCHOR(node);
2825 switch (an->type) {
2826 case ANCHOR_PREC_READ:
2827 case ANCHOR_PREC_READ_NOT:
2828 case ANCHOR_LOOK_BEHIND:
2829 case ANCHOR_LOOK_BEHIND_NOT:
2830 r = subexp_inf_recursive_check_trav(an->target, env);
2831 break;
2832 }
2833 }
2834 break;
2835
2836 case NT_ENCLOSE:
2837 {
2838 EncloseNode* en = NENCLOSE(node);
2839
2840 if (IS_ENCLOSE_RECURSION(en)) {
2841 SET_ENCLOSE_STATUS(node, NST_MARK1);
2842 r = subexp_inf_recursive_check(en->target, env, 1);
2843 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2844 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2845 }
2846 r = subexp_inf_recursive_check_trav(en->target, env);
2847 }
2848
2849 break;
2850
2851 default:
2852 break;
2853 }
2854
2855 return r;
2856 }
2857
2858 static int
2859 subexp_recursive_check(Node* node)
2860 {
2861 int r = 0;
2862
2863 switch (NTYPE(node)) {
2864 case NT_LIST:
2865 case NT_ALT:
2866 do {
2867 r |= subexp_recursive_check(NCAR(node));
2868 } while (IS_NOT_NULL(node = NCDR(node)));
2869 break;
2870
2871 case NT_QTFR:
2872 r = subexp_recursive_check(NQTFR(node)->target);
2873 break;
2874
2875 case NT_ANCHOR:
2876 {
2877 AnchorNode* an = NANCHOR(node);
2878 switch (an->type) {
2879 case ANCHOR_PREC_READ:
2880 case ANCHOR_PREC_READ_NOT:
2881 case ANCHOR_LOOK_BEHIND:
2882 case ANCHOR_LOOK_BEHIND_NOT:
2883 r = subexp_recursive_check(an->target);
2884 break;
2885 }
2886 }
2887 break;
2888
2889 case NT_CALL:
2890 r = subexp_recursive_check(NCALL(node)->target);
2891 if (r != 0) SET_CALL_RECURSION(node);
2892 break;
2893
2894 case NT_ENCLOSE:
2895 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2896 return 0;
2897 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2898 return 1; /* recursion */
2899 else {
2900 SET_ENCLOSE_STATUS(node, NST_MARK2);
2901 r = subexp_recursive_check(NENCLOSE(node)->target);
2902 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2903 }
2904 break;
2905
2906 default:
2907 break;
2908 }
2909
2910 return r;
2911 }
2912
2913
2914 static int
2915 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2916 {
2917 #define FOUND_CALLED_NODE 1
2918
2919 int type;
2920 int r = 0;
2921
2922 type = NTYPE(node);
2923 switch (type) {
2924 case NT_LIST:
2925 case NT_ALT:
2926 {
2927 int ret;
2928 do {
2929 ret = subexp_recursive_check_trav(NCAR(node), env);
2930 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2931 else if (ret < 0) return ret;
2932 } while (IS_NOT_NULL(node = NCDR(node)));
2933 }
2934 break;
2935
2936 case NT_QTFR:
2937 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2938 if (NQTFR(node)->upper == 0) {
2939 if (r == FOUND_CALLED_NODE)
2940 NQTFR(node)->is_refered = 1;
2941 }
2942 break;
2943
2944 case NT_ANCHOR:
2945 {
2946 AnchorNode* an = NANCHOR(node);
2947 switch (an->type) {
2948 case ANCHOR_PREC_READ:
2949 case ANCHOR_PREC_READ_NOT:
2950 case ANCHOR_LOOK_BEHIND:
2951 case ANCHOR_LOOK_BEHIND_NOT:
2952 r = subexp_recursive_check_trav(an->target, env);
2953 break;
2954 }
2955 }
2956 break;
2957
2958 case NT_ENCLOSE:
2959 {
2960 EncloseNode* en = NENCLOSE(node);
2961
2962 if (! IS_ENCLOSE_RECURSION(en)) {
2963 if (IS_ENCLOSE_CALLED(en)) {
2964 SET_ENCLOSE_STATUS(node, NST_MARK1);
2965 r = subexp_recursive_check(en->target);
2966 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2967 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2968 }
2969 }
2970 r = subexp_recursive_check_trav(en->target, env);
2971 if (IS_ENCLOSE_CALLED(en))
2972 r |= FOUND_CALLED_NODE;
2973 }
2974 break;
2975
2976 default:
2977 break;
2978 }
2979
2980 return r;
2981 }
2982
2983 static int
2984 setup_subexp_call(Node* node, ScanEnv* env)
2985 {
2986 int type;
2987 int r = 0;
2988
2989 type = NTYPE(node);
2990 switch (type) {
2991 case NT_LIST:
2992 do {
2993 r = setup_subexp_call(NCAR(node), env);
2994 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2995 break;
2996
2997 case NT_ALT:
2998 do {
2999 r = setup_subexp_call(NCAR(node), env);
3000 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3001 break;
3002
3003 case NT_QTFR:
3004 r = setup_subexp_call(NQTFR(node)->target, env);
3005 break;
3006 case NT_ENCLOSE:
3007 r = setup_subexp_call(NENCLOSE(node)->target, env);
3008 break;
3009
3010 case NT_CALL:
3011 {
3012 CallNode* cn = NCALL(node);
3013 Node** nodes = SCANENV_MEM_NODES(env);
3014
3015 if (cn->group_num != 0) {
3016 int gnum = cn->group_num;
3017
3018 #ifdef USE_NAMED_GROUP
3019 if (env->num_named > 0 &&
3020 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3021 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3022 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3023 }
3024 #endif
3025 if (gnum > env->num_mem) {
3026 onig_scan_env_set_error_string(env,
3027 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3028 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3029 }
3030
3031 #ifdef USE_NAMED_GROUP
3032 set_call_attr:
3033 #endif
3034 cn->target = nodes[cn->group_num];
3035 if (IS_NULL(cn->target)) {
3036 onig_scan_env_set_error_string(env,
3037 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3038 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3039 }
3040 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3041 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3042 cn->unset_addr_list = env->unset_addr_list;
3043 }
3044 #ifdef USE_NAMED_GROUP
3045 else {
3046 int *refs;
3047
3048 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3049 &refs);
3050 if (n <= 0) {
3051 onig_scan_env_set_error_string(env,
3052 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3053 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3054 }
3055 else if (n > 1) {
3056 onig_scan_env_set_error_string(env,
3057 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3058 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3059 }
3060 else {
3061 cn->group_num = refs[0];
3062 goto set_call_attr;
3063 }
3064 }
3065 #endif
3066 }
3067 break;
3068
3069 case NT_ANCHOR:
3070 {
3071 AnchorNode* an = NANCHOR(node);
3072
3073 switch (an->type) {
3074 case ANCHOR_PREC_READ:
3075 case ANCHOR_PREC_READ_NOT:
3076 case ANCHOR_LOOK_BEHIND:
3077 case ANCHOR_LOOK_BEHIND_NOT:
3078 r = setup_subexp_call(an->target, env);
3079 break;
3080 }
3081 }
3082 break;
3083
3084 default:
3085 break;
3086 }
3087
3088 return r;
3089 }
3090 #endif
3091
3092 /* divide different length alternatives in look-behind.
3093 (?<=A|B) ==> (?<=A)|(?<=B)
3094 (?<!A|B) ==> (?<!A)(?<!B)
3095 */
3096 static int
3097 divide_look_behind_alternatives(Node* node)
3098 {
3099 Node *head, *np, *insert_node;
3100 AnchorNode* an = NANCHOR(node);
3101 int anc_type = an->type;
3102
3103 head = an->target;
3104 np = NCAR(head);
3105 swap_node(node, head);
3106 NCAR(node) = head;
3107 NANCHOR(head)->target = np;
3108
3109 np = node;
3110 while ((np = NCDR(np)) != NULL_NODE) {
3111 insert_node = onig_node_new_anchor(anc_type);
3112 CHECK_NULL_RETURN_MEMERR(insert_node);
3113 NANCHOR(insert_node)->target = NCAR(np);
3114 NCAR(np) = insert_node;
3115 }
3116
3117 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3118 np = node;
3119 do {
3120 SET_NTYPE(np, NT_LIST); /* alt -> list */
3121 } while ((np = NCDR(np)) != NULL_NODE);
3122 }
3123 return 0;
3124 }
3125
3126 static int
3127 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3128 {
3129 int r, len;
3130 AnchorNode* an = NANCHOR(node);
3131
3132 r = get_char_length_tree(an->target, reg, &len);
3133 if (r == 0)
3134 an->char_len = len;
3135 else if (r == GET_CHAR_LEN_VARLEN)
3136 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3137 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3138 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3139 r = divide_look_behind_alternatives(node);
3140 else
3141 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3142 }
3143
3144 return r;
3145 }
3146
3147 static int
3148 next_setup(Node* node, Node* next_node, regex_t* reg)
3149 {
3150 int type;
3151
3152 retry:
3153 type = NTYPE(node);
3154 if (type == NT_QTFR) {
3155 QtfrNode* qn = NQTFR(node);
3156 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3157 #ifdef USE_QTFR_PEEK_NEXT
3158 Node* n = get_head_value_node(next_node, 1, reg);
3159 /* '\0': for UTF-16BE etc... */
3160 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3161 qn->next_head_exact = n;
3162 }
3163 #endif
3164 /* automatic posseivation a*b ==> (?>a*)b */
3165 if (qn->lower <= 1) {
3166 int ttype = NTYPE(qn->target);
3167 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3168 Node *x, *y;
3169 x = get_head_value_node(qn->target, 0, reg);
3170 if (IS_NOT_NULL(x)) {
3171 y = get_head_value_node(next_node, 0, reg);
3172 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3173 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3174 CHECK_NULL_RETURN_MEMERR(en);
3175 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3176 swap_node(node, en);
3177 NENCLOSE(node)->target = en;
3178 }
3179 }
3180 }
3181 }
3182 }
3183 }
3184 else if (type == NT_ENCLOSE) {
3185 EncloseNode* en = NENCLOSE(node);
3186 if (en->type == ENCLOSE_MEMORY) {
3187 node = en->target;
3188 goto retry;
3189 }
3190 }
3191 return 0;
3192 }
3193
3194
3195 static int
3196 update_string_node_case_fold(regex_t* reg, Node *node)
3197 {
3198 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3199 UChar *sbuf, *ebuf, *sp;
3200 int r, i, len, sbuf_size;
3201 StrNode* sn = NSTR(node);
3202
3203 end = sn->end;
3204 sbuf_size = (int)(end - sn->s) * 2;
3205 sbuf = (UChar* )xmalloc(sbuf_size);
3206 CHECK_NULL_RETURN_MEMERR(sbuf);
3207 ebuf = sbuf + sbuf_size;
3208
3209 sp = sbuf;
3210 p = sn->s;
3211 while (p < end) {
3212 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3213 for (i = 0; i < len; i++) {
3214 if (sp >= ebuf) {
3215 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3216 CHECK_NULL_RETURN_MEMERR(sbuf);
3217 sp = sbuf + sbuf_size;
3218 sbuf_size *= 2;
3219 ebuf = sbuf + sbuf_size;
3220 }
3221
3222 *sp++ = buf[i];
3223 }
3224 }
3225
3226 r = onig_node_str_set(node, sbuf, sp);
3227 if (r != 0) {
3228 xfree(sbuf);
3229 return r;
3230 }
3231
3232 xfree(sbuf);
3233 return 0;
3234 }
3235
3236 static int
3237 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3238 regex_t* reg)
3239 {
3240 int r;
3241 Node *node;
3242
3243 node = onig_node_new_str(s, end);
3244 if (IS_NULL(node)) return ONIGERR_MEMORY;
3245
3246 r = update_string_node_case_fold(reg, node);
3247 if (r != 0) {
3248 onig_node_free(node);
3249 return r;
3250 }
3251
3252 NSTRING_SET_AMBIG(node);
3253 NSTRING_SET_DONT_GET_OPT_INFO(node);
3254 *rnode = node;
3255 return 0;
3256 }
3257
3258 static int
3259 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3260 UChar *p, int slen, UChar *end,
3261 regex_t* reg, Node **rnode)
3262 {
3263 int r, i, j, len, varlen;
3264 Node *anode, *var_anode, *snode, *xnode, *an;
3265 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3266
3267 *rnode = var_anode = NULL_NODE;
3268
3269 varlen = 0;
3270 for (i = 0; i < item_num; i++) {
3271 if (items[i].byte_len != slen) {
3272 varlen = 1;
3273 break;
3274 }
3275 }
3276
3277 if (varlen != 0) {
3278 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3279 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3280
3281 xnode = onig_node_new_list(NULL, NULL);
3282 if (IS_NULL(xnode)) goto mem_err;
3283 NCAR(var_anode) = xnode;
3284
3285 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3286 if (IS_NULL(anode)) goto mem_err;
3287 NCAR(xnode) = anode;
3288 }
3289 else {
3290 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3291 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3292 }
3293
3294 snode = onig_node_new_str(p, p + slen);
3295 if (IS_NULL(snode)) goto mem_err;
3296
3297 NCAR(anode) = snode;
3298
3299 for (i = 0; i < item_num; i++) {
3300 snode = onig_node_new_str(NULL, NULL);
3301 if (IS_NULL(snode)) goto mem_err;
3302
3303 for (j = 0; j < items[i].code_len; j++) {
3304 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3305 if (len < 0) {
3306 r = len;
3307 goto mem_err2;
3308 }
3309
3310 r = onig_node_str_cat(snode, buf, buf + len);
3311 if (r != 0) goto mem_err2;
3312 }
3313
3314 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3315 if (IS_NULL(an)) {
3316 goto mem_err2;
3317 }
3318
3319 if (items[i].byte_len != slen) {
3320 Node *rem;
3321 UChar *q = p + items[i].byte_len;
3322
3323 if (q < end) {
3324 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3325 if (r != 0) {
3326 onig_node_free(an);
3327 goto mem_err2;
3328 }
3329
3330 xnode = onig_node_list_add(NULL_NODE, snode);
3331 if (IS_NULL(xnode)) {
3332 onig_node_free(an);
3333 onig_node_free(rem);
3334 goto mem_err2;
3335 }
3336 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3337 onig_node_free(an);
3338 onig_node_free(xnode);
3339 onig_node_free(rem);
3340 goto mem_err;
3341 }
3342
3343 NCAR(an) = xnode;
3344 }
3345 else {
3346 NCAR(an) = snode;
3347 }
3348
3349 NCDR(var_anode) = an;
3350 var_anode = an;
3351 }
3352 else {
3353 NCAR(an) = snode;
3354 NCDR(anode) = an;
3355 anode = an;
3356 }
3357 }
3358
3359 return varlen;
3360
3361 mem_err2:
3362 onig_node_free(snode);
3363
3364 mem_err:
3365 onig_node_free(*rnode);
3366
3367 return ONIGERR_MEMORY;
3368 }
3369
3370 static int
3371 expand_case_fold_string(Node* node, regex_t* reg)
3372 {
3373 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3374
3375 int r, n, len, alt_num;
3376 UChar *start, *end, *p;
3377 Node *top_root, *root, *snode, *prev_node;
3378 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3379 StrNode* sn = NSTR(node);
3380
3381 if (NSTRING_IS_AMBIG(node)) return 0;
3382
3383 start = sn->s;
3384 end = sn->end;
3385 if (start >= end) return 0;
3386
3387 r = 0;
3388 top_root = root = prev_node = snode = NULL_NODE;
3389 alt_num = 1;
3390 p = start;
3391 while (p < end) {
3392 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3393 p, end, items);
3394 if (n < 0) {
3395 r = n;
3396 goto err;
3397 }
3398
3399 len = enclen(reg->enc, p);
3400
3401 if (n == 0) {
3402 if (IS_NULL(snode)) {
3403 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3404 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3405 if (IS_NULL(root)) {
3406 onig_node_free(prev_node);
3407 goto mem_err;
3408 }
3409 }
3410
3411 prev_node = snode = onig_node_new_str(NULL, NULL);
3412 if (IS_NULL(snode)) goto mem_err;
3413 if (IS_NOT_NULL(root)) {
3414 if (IS_NULL(onig_node_list_add(root, snode))) {
3415 onig_node_free(snode);
3416 goto mem_err;
3417 }
3418 }
3419 }
3420
3421 r = onig_node_str_cat(snode, p, p + len);
3422 if (r != 0) goto err;
3423 }
3424 else {
3425 alt_num *= (n + 1);
3426 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3427
3428 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3429 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3430 if (IS_NULL(root)) {
3431 onig_node_free(prev_node);
3432 goto mem_err;
3433 }
3434 }
3435
3436 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3437 if (r < 0) goto mem_err;
3438 if (r == 1) {
3439 if (IS_NULL(root)) {
3440 top_root = prev_node;
3441 }
3442 else {
3443 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3444 onig_node_free(prev_node);
3445 goto mem_err;
3446 }
3447 }
3448
3449 root = NCAR(prev_node);
3450 }
3451 else { /* r == 0 */
3452 if (IS_NOT_NULL(root)) {
3453 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3454 onig_node_free(prev_node);
3455 goto mem_err;
3456 }
3457 }
3458 }
3459
3460 snode = NULL_NODE;
3461 }
3462
3463 p += len;
3464 }
3465
3466 if (p < end) {
3467 Node *srem;
3468
3469 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3470 if (r != 0) goto mem_err;
3471
3472 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3473 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3474 if (IS_NULL(root)) {
3475 onig_node_free(srem);
3476 onig_node_free(prev_node);
3477 goto mem_err;
3478 }
3479 }
3480
3481 if (IS_NULL(root)) {
3482 prev_node = srem;
3483 }
3484 else {
3485 if (IS_NULL(onig_node_list_add(root, srem))) {
3486 onig_node_free(srem);
3487 goto mem_err;
3488 }
3489 }
3490 }
3491
3492 /* ending */
3493 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3494 swap_node(node, top_root);
3495 onig_node_free(top_root);
3496 return 0;
3497
3498 mem_err:
3499 r = ONIGERR_MEMORY;
3500
3501 err:
3502 onig_node_free(top_root);
3503 return r;
3504 }
3505
3506
3507 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3508
3509 #define CEC_THRES_NUM_BIG_REPEAT 512
3510 #define CEC_INFINITE_NUM 0x7fffffff
3511
3512 #define CEC_IN_INFINITE_REPEAT (1<<0)
3513 #define CEC_IN_FINITE_REPEAT (1<<1)
3514 #define CEC_CONT_BIG_REPEAT (1<<2)
3515
3516 static int
3517 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3518 {
3519 int type;
3520 int r = state;
3521
3522 type = NTYPE(node);
3523 switch (type) {
3524 case NT_LIST:
3525 {
3526 Node* prev = NULL_NODE;
3527 do {
3528 r = setup_comb_exp_check(NCAR(node), r, env);
3529 prev = NCAR(node);
3530 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3531 }
3532 break;
3533
3534 case NT_ALT:
3535 {
3536 int ret;
3537 do {
3538 ret = setup_comb_exp_check(NCAR(node), state, env);
3539 r |= ret;
3540 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3541 }
3542 break;
3543
3544 case NT_QTFR:
3545 {
3546 int child_state = state;
3547 int add_state = 0;
3548 QtfrNode* qn = NQTFR(node);
3549 Node* target = qn->target;
3550 int var_num;
3551
3552 if (! IS_REPEAT_INFINITE(qn->upper)) {
3553 if (qn->upper > 1) {
3554 /* {0,1}, {1,1} are allowed */
3555 child_state |= CEC_IN_FINITE_REPEAT;
3556
3557 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3558 if (env->backrefed_mem == 0) {
3559 if (NTYPE(qn->target) == NT_ENCLOSE) {
3560 EncloseNode* en = NENCLOSE(qn->target);
3561 if (en->type == ENCLOSE_MEMORY) {
3562 if (NTYPE(en->target) == NT_QTFR) {
3563 QtfrNode* q = NQTFR(en->target);
3564 if (IS_REPEAT_INFINITE(q->upper)
3565 && q->greedy == qn->greedy) {
3566 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3567 if (qn->upper == 1)
3568 child_state = state;
3569 }
3570 }
3571 }
3572 }
3573 }
3574 }
3575 }
3576
3577 if (state & CEC_IN_FINITE_REPEAT) {
3578 qn->comb_exp_check_num = -1;
3579 }
3580 else {
3581 if (IS_REPEAT_INFINITE(qn->upper)) {
3582 var_num = CEC_INFINITE_NUM;
3583 child_state |= CEC_IN_INFINITE_REPEAT;
3584 }
3585 else {
3586 var_num = qn->upper - qn->lower;
3587 }
3588
3589 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3590 add_state |= CEC_CONT_BIG_REPEAT;
3591
3592 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3593 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3594 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3595 if (qn->comb_exp_check_num == 0) {
3596 env->num_comb_exp_check++;
3597 qn->comb_exp_check_num = env->num_comb_exp_check;
3598 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3599 env->comb_exp_max_regnum = env->curr_max_regnum;
3600 }
3601 }
3602 }
3603
3604 r = setup_comb_exp_check(target, child_state, env);
3605 r |= add_state;
3606 }
3607 break;
3608
3609 case NT_ENCLOSE:
3610 {
3611 EncloseNode* en = NENCLOSE(node);
3612
3613 switch (en->type) {
3614 case ENCLOSE_MEMORY:
3615 {
3616 if (env->curr_max_regnum < en->regnum)
3617 env->curr_max_regnum = en->regnum;
3618
3619 r = setup_comb_exp_check(en->target, state, env);
3620 }
3621 break;
3622
3623 default:
3624 r = setup_comb_exp_check(en->target, state, env);
3625 break;
3626 }
3627 }
3628 break;
3629
3630 #ifdef USE_SUBEXP_CALL
3631 case NT_CALL:
3632 if (IS_CALL_RECURSION(NCALL(node)))
3633 env->has_recursion = 1;
3634 else
3635 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3636 break;
3637 #endif
3638
3639 default:
3640 break;
3641 }
3642
3643 return r;
3644 }
3645 #endif
3646
3647 #define IN_ALT (1<<0)
3648 #define IN_NOT (1<<1)
3649 #define IN_REPEAT (1<<2)
3650 #define IN_VAR_REPEAT (1<<3)
3651
3652 /* setup_tree does the following work.
3653 1. check empty loop. (set qn->target_empty_info)
3654 2. expand ignore-case in char class.
3655 3. set memory status bit flags. (reg->mem_stats)
3656 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3657 5. find invalid patterns in look-behind.
3658 6. expand repeated string.
3659 */
3660 static int
3661 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3662 {
3663 int type;
3664 int r = 0;
3665
3666 type = NTYPE(node);
3667 switch (type) {
3668 case NT_LIST:
3669 {
3670 Node* prev = NULL_NODE;
3671 do {
3672 r = setup_tree(NCAR(node), reg, state, env);
3673 if (IS_NOT_NULL(prev) && r == 0) {
3674 r = next_setup(prev, NCAR(node), reg);
3675 }
3676 prev = NCAR(node);
3677 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3678 }
3679 break;
3680
3681 case NT_ALT:
3682 do {
3683 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3684 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3685 break;
3686
3687 case NT_CCLASS:
3688 break;
3689
3690 case NT_STR:
3691 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3692 r = expand_case_fold_string(node, reg);
3693 }
3694 break;
3695
3696 case NT_CTYPE:
3697 case NT_CANY:
3698 break;
3699
3700 #ifdef USE_SUBEXP_CALL
3701 case NT_CALL:
3702 break;
3703 #endif
3704
3705 case NT_BREF:
3706 {
3707 int i;
3708 int* p;
3709 Node** nodes = SCANENV_MEM_NODES(env);
3710 BRefNode* br = NBREF(node);
3711 p = BACKREFS_P(br);
3712 for (i = 0; i < br->back_num; i++) {
3713 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3714 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3715 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3716 #ifdef USE_BACKREF_WITH_LEVEL
3717 if (IS_BACKREF_NEST_LEVEL(br)) {
3718 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3719 }
3720 #endif
3721 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3722 }
3723 }
3724 break;
3725
3726 case NT_QTFR:
3727 {
3728 OnigDistance d;
3729 QtfrNode* qn = NQTFR(node);
3730 Node* target = qn->target;
3731
3732 if ((state & IN_REPEAT) != 0) {
3733 qn->state |= NST_IN_REPEAT;
3734 }
3735
3736 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3737 r = get_min_match_length(target, &d, env);
3738 if (r) break;
3739 if (d == 0) {
3740 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3741 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3742 r = quantifiers_memory_node_info(target);
3743 if (r < 0) break;
3744 if (r > 0) {
3745 qn->target_empty_info = r;
3746 }
3747 #endif
3748 #if 0
3749 r = get_max_match_length(target, &d, env);
3750 if (r == 0 && d == 0) {
3751 /* ()* ==> ()?, ()+ ==> () */
3752 qn->upper = 1;
3753 if (qn->lower > 1) qn->lower = 1;
3754 if (NTYPE(target) == NT_STR) {
3755 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3756 }
3757 }
3758 #endif
3759 }
3760 }
3761
3762 state |= IN_REPEAT;
3763 if (qn->lower != qn->upper)
3764 state |= IN_VAR_REPEAT;
3765 r = setup_tree(target, reg, state, env);
3766 if (r) break;
3767
3768 /* expand string */
3769 #define EXPAND_STRING_MAX_LENGTH 100
3770 if (NTYPE(target) == NT_STR) {
3771 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3772 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3773 int len = NSTRING_LEN(target);
3774 StrNode* sn = NSTR(target);
3775
3776 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3777 int i, n = qn->lower;
3778 onig_node_conv_to_str_node(node, NSTR(target)->flag);
3779 for (i = 0; i < n; i++) {
3780 r = onig_node_str_cat(node, sn->s, sn->end);
3781 if (r) break;
3782 }
3783 onig_node_free(target);
3784 break; /* break case NT_QTFR: */
3785 }
3786 }
3787 }
3788
3789 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3790 if (qn->greedy && (qn->target_empty_info != 0)) {
3791 if (NTYPE(target) == NT_QTFR) {
3792 QtfrNode* tqn = NQTFR(target);
3793 if (IS_NOT_NULL(tqn->head_exact)) {
3794 qn->head_exact = tqn->head_exact;
3795 tqn->head_exact = NULL;
3796 }
3797 }
3798 else {
3799 qn->head_exact = get_head_value_node(qn->target, 1, reg);
3800 }
3801 }
3802 #endif
3803 }
3804 break;
3805
3806 case NT_ENCLOSE:
3807 {
3808 EncloseNode* en = NENCLOSE(node);
3809
3810 switch (en->type) {
3811 case ENCLOSE_OPTION:
3812 {
3813 OnigOptionType options = reg->options;
3814 reg->options = NENCLOSE(node)->option;
3815 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
3816 reg->options = options;
3817 }
3818 break;
3819
3820 case ENCLOSE_MEMORY:
3821 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3822 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
3823 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3824 }
3825 r = setup_tree(en->target, reg, state, env);
3826 break;
3827
3828 case ENCLOSE_STOP_BACKTRACK:
3829 {
3830 Node* target = en->target;
3831 r = setup_tree(target, reg, state, env);
3832 if (NTYPE(target) == NT_QTFR) {
3833 QtfrNode* tqn = NQTFR(target);
3834 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3835 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
3836 int qtype = NTYPE(tqn->target);
3837 if (IS_NODE_TYPE_SIMPLE(qtype))
3838 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
3839 }
3840 }
3841 }
3842 break;
3843 }
3844 }
3845 break;
3846
3847 case NT_ANCHOR:
3848 {
3849 AnchorNode* an = NANCHOR(node);
3850
3851 switch (an->type) {
3852 case ANCHOR_PREC_READ:
3853 r = setup_tree(an->target, reg, state, env);
3854 break;
3855 case ANCHOR_PREC_READ_NOT:
3856 r = setup_tree(an->target, reg, (state | IN_NOT), env);
3857 break;
3858
3859 /* allowed node types in look-behind */
3860 #define ALLOWED_TYPE_IN_LB \
3861 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3862 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3863
3864 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3865 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3866
3867 #define ALLOWED_ANCHOR_IN_LB \
3868 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3869 #define ALLOWED_ANCHOR_IN_LB_NOT \
3870 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3871
3872 case ANCHOR_LOOK_BEHIND:
3873 {
3874 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3875 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
3876 if (r < 0) return r;
3877 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3878 r = setup_look_behind(node, reg, env);
3879 if (r != 0) return r;
3880 r = setup_tree(an->target, reg, state, env);
3881 }
3882 break;
3883
3884 case ANCHOR_LOOK_BEHIND_NOT:
3885 {
3886 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3887 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
3888 if (r < 0) return r;
3889 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3890 r = setup_look_behind(node, reg, env);
3891 if (r != 0) return r;
3892 r = setup_tree(an->target, reg, (state | IN_NOT), env);
3893 }
3894 break;
3895 }
3896 }
3897 break;
3898
3899 default:
3900 break;
3901 }
3902
3903 return r;
3904 }
3905
3906 /* set skip map for Boyer-Moor search */
3907 static int
3908 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3909 UChar skip[], int** int_skip)
3910 {
3911 int i, len;
3912
3913 len = (int)(end - s);
3914 if (len < ONIG_CHAR_TABLE_SIZE) {
3915 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len;
3916
3917 for (i = 0; i < len - 1; i++)
3918 skip[s[i]] = (UChar)(len - 1 - i);
3919 }
3920 else {
3921 if (IS_NULL(*int_skip)) {
3922 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3923 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3924 }
3925 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
3926
3927 for (i = 0; i < len - 1; i++)
3928 (*int_skip)[s[i]] = len - 1 - i;
3929 }
3930 return 0;
3931 }
3932
3933 #define OPT_EXACT_MAXLEN 24
3934
3935 typedef struct {
3936 OnigDistance min; /* min byte length */
3937 OnigDistance max; /* max byte length */
3938 } MinMaxLen;
3939
3940 typedef struct {
3941 MinMaxLen mmd;
3942 OnigEncoding enc;
3943 OnigOptionType options;
3944 OnigCaseFoldType case_fold_flag;
3945 ScanEnv* scan_env;
3946 } OptEnv;
3947
3948 typedef struct {
3949 int left_anchor;
3950 int right_anchor;
3951 } OptAncInfo;
3952
3953 typedef struct {
3954 MinMaxLen mmd; /* info position */
3955 OptAncInfo anc;
3956
3957 int reach_end;
3958 int ignore_case;
3959 int len;
3960 UChar s[OPT_EXACT_MAXLEN];
3961 } OptExactInfo;
3962
3963 typedef struct {
3964 MinMaxLen mmd; /* info position */
3965 OptAncInfo anc;
3966
3967 int value; /* weighted value */
3968 UChar map[ONIG_CHAR_TABLE_SIZE];
3969 } OptMapInfo;
3970
3971 typedef struct {
3972 MinMaxLen len;
3973
3974 OptAncInfo anc;
3975 OptExactInfo exb; /* boundary */
3976 OptExactInfo exm; /* middle */
3977 OptExactInfo expr; /* prec read (?=...) */
3978
3979 OptMapInfo map; /* boundary */
3980 } NodeOptInfo;
3981
3982
3983 static int
3984 map_position_value(OnigEncoding enc, int i)
3985 {
3986 static const short int ByteValTable[] = {
3987 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
3988 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3989 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
3990 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
3991 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3992 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
3993 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3994 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
3995 };
3996
3997 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
3998 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
3999 return 20;
4000 else
4001 return (int )ByteValTable[i];
4002 }
4003 else
4004 return 4; /* Take it easy. */
4005 }
4006
4007 static int
4008 distance_value(MinMaxLen* mm)
4009 {
4010 /* 1000 / (min-max-dist + 1) */
4011 static const short int dist_vals[] = {
4012 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4013 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4014 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4015 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4016 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4017 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4018 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4019 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4020 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4021 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4022 };
4023
4024 int d;
4025
4026 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4027
4028 d = mm->max - mm->min;
4029 if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4030 /* return dist_vals[d] * 16 / (mm->min + 12); */
4031 return (int )dist_vals[d];
4032 else
4033 return 1;
4034 }
4035
4036 static int
4037 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4038 {
4039 if (v2 <= 0) return -1;
4040 if (v1 <= 0) return 1;
4041
4042 v1 *= distance_value(d1);
4043 v2 *= distance_value(d2);
4044
4045 if (v2 > v1) return 1;
4046 if (v2 < v1) return -1;
4047
4048 if (d2->min < d1->min) return 1;
4049 if (d2->min > d1->min) return -1;
4050 return 0;
4051 }
4052
4053 static int
4054 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4055 {
4056 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4057 }
4058
4059
4060 static void
4061 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4062 {
4063 mml->min = min;
4064 mml->max = max;
4065 }
4066
4067 static void
4068 clear_mml(MinMaxLen* mml)
4069 {
4070 mml->min = mml->max = 0;
4071 }
4072
4073 static void
4074 copy_mml(MinMaxLen* to, MinMaxLen* from)
4075 {
4076 to->min = from->min;
4077 to->max = from->max;
4078 }
4079
4080 static void
4081 add_mml(MinMaxLen* to, MinMaxLen* from)
4082 {
4083 to->min = distance_add(to->min, from->min);
4084 to->max = distance_add(to->max, from->max);
4085 }
4086
4087 #if 0
4088 static void
4089 add_len_mml(MinMaxLen* to, OnigDistance len)
4090 {
4091 to->min = distance_add(to->min, len);
4092 to->max = distance_add(to->max, len);
4093 }
4094 #endif
4095
4096 static void
4097 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4098 {
4099 if (to->min > from->min) to->min = from->min;
4100 if (to->max < from->max) to->max = from->max;
4101 }
4102
4103 static void
4104 copy_opt_env(OptEnv* to, OptEnv* from)
4105 {
4106 *to = *from;
4107 }
4108
4109 static void
4110 clear_opt_anc_info(OptAncInfo* anc)
4111 {
4112 anc->left_anchor = 0;
4113 anc->right_anchor = 0;
4114 }
4115
4116 static void
4117 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4118 {
4119 *to = *from;
4120 }
4121
4122 static void
4123 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4124 OnigDistance left_len, OnigDistance right_len)
4125 {
4126 clear_opt_anc_info(to);
4127
4128 to->left_anchor = left->left_anchor;
4129 if (left_len == 0) {
4130 to->left_anchor |= right->left_anchor;
4131 }
4132
4133 to->right_anchor = right->right_anchor;
4134 if (right_len == 0) {
4135 to->right_anchor |= left->right_anchor;
4136 }
4137 }
4138
4139 static int
4140 is_left_anchor(int anc)
4141 {
4142 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4143 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4144 anc == ANCHOR_PREC_READ_NOT)
4145 return 0;
4146
4147 return 1;
4148 }
4149
4150 static int
4151 is_set_opt_anc_info(OptAncInfo* to, int anc)
4152 {
4153 if ((to->left_anchor & anc) != 0) return 1;
4154
4155 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4156 }
4157
4158 static void
4159 add_opt_anc_info(OptAncInfo* to, int anc)
4160 {
4161 if (is_left_anchor(anc))
4162 to->left_anchor |= anc;
4163 else
4164 to->right_anchor |= anc;
4165 }
4166
4167 static void
4168 remove_opt_anc_info(OptAncInfo* to, int anc)
4169 {
4170 if (is_left_anchor(anc))
4171 to->left_anchor &= ~anc;
4172 else
4173 to->right_anchor &= ~anc;
4174 }
4175
4176 static void
4177 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4178 {
4179 to->left_anchor &= add->left_anchor;
4180 to->right_anchor &= add->right_anchor;
4181 }
4182
4183 static int
4184 is_full_opt_exact_info(OptExactInfo* ex)
4185 {
4186 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4187 }
4188
4189 static void
4190 clear_opt_exact_info(OptExactInfo* ex)
4191 {
4192 clear_mml(&ex->mmd);
4193 clear_opt_anc_info(&ex->anc);
4194 ex->reach_end = 0;
4195 ex->ignore_case = 0;
4196 ex->len = 0;
4197 ex->s[0] = '\0';
4198 }
4199
4200 static void
4201 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4202 {
4203 *to = *from;
4204 }
4205
4206 static void
4207 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4208 {
4209 int i, j, len;
4210 UChar *p, *end;
4211 OptAncInfo tanc;
4212
4213 if (! to->ignore_case && add->ignore_case) {
4214 if (to->len >= add->len) return ; /* avoid */
4215
4216 to->ignore_case = 1;
4217 }
4218
4219 p = add->s;
4220 end = p + add->len;
4221 for (i = to->len; p < end; ) {
4222 len = enclen(enc, p);
4223 if (i + len > OPT_EXACT_MAXLEN) break;
4224 for (j = 0; j < len && p < end; j++)
4225 to->s[i++] = *p++;
4226 }
4227
4228 to->len = i;
4229 to->reach_end = (p == end ? add->reach_end : 0);
4230
4231 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4232 if (! to->reach_end) tanc.right_anchor = 0;
4233 copy_opt_anc_info(&to->anc, &tanc);
4234 }
4235
4236 static void
4237 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4238 int raw ARG_UNUSED, OnigEncoding enc)
4239 {
4240 int i, j, len;
4241 UChar *p;
4242
4243 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4244 len = enclen(enc, p);
4245 if (i + len > OPT_EXACT_MAXLEN) break;
4246 for (j = 0; j < len && p < end; j++)
4247 to->s[i++] = *p++;
4248 }
4249
4250 to->len = i;
4251 }
4252
4253 static void
4254 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4255 {
4256 int i, j, len;
4257
4258 if (add->len == 0 || to->len == 0) {
4259 clear_opt_exact_info(to);
4260 return ;
4261 }
4262
4263 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4264 clear_opt_exact_info(to);
4265 return ;
4266 }
4267
4268 for (i = 0; i < to->len && i < add->len; ) {
4269 if (to->s[i] != add->s[i]) break;
4270 len = enclen(env->enc, to->s + i);
4271
4272 for (j = 1; j < len; j++) {
4273 if (to->s[i+j] != add->s[i+j]) break;
4274 }
4275 if (j < len) break;
4276 i += len;
4277 }
4278
4279 if (! add->reach_end || i < add->len || i < to->len) {
4280 to->reach_end = 0;
4281 }
4282 to->len = i;
4283 to->ignore_case |= add->ignore_case;
4284
4285 alt_merge_opt_anc_info(&to->anc, &add->anc);
4286 if (! to->reach_end) to->anc.right_anchor = 0;
4287 }
4288
4289 static void
4290 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4291 {
4292 int v1, v2;
4293
4294 v1 = now->len;
4295 v2 = alt->len;
4296
4297 if (v2 == 0) {
4298 return ;
4299 }
4300 else if (v1 == 0) {
4301 copy_opt_exact_info(now, alt);
4302 return ;
4303 }
4304 else if (v1 <= 2 && v2 <= 2) {
4305 /* ByteValTable[x] is big value --> low price */
4306 v2 = map_position_value(enc, now->s[0]);
4307 v1 = map_position_value(enc, alt->s[0]);
4308
4309 if (now->len > 1) v1 += 5;
4310 if (alt->len > 1) v2 += 5;
4311 }
4312
4313 if (now->ignore_case == 0) v1 *= 2;
4314 if (alt->ignore_case == 0) v2 *= 2;
4315
4316 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4317 copy_opt_exact_info(now, alt);
4318 }
4319
4320 static void
4321 clear_opt_map_info(OptMapInfo* map)
4322 {
4323 static const OptMapInfo clean_info = {
4324 {0, 0}, {0, 0}, 0,
4325 {
4326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4342 }
4343 };
4344
4345 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4346 }
4347
4348 static void
4349 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4350 {
4351 *to = *from;
4352 }
4353
4354 static void
4355 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4356 {
4357 if (map->map[c] == 0) {
4358 map->map[c] = 1;
4359 map->value += map_position_value(enc, c);
4360 }
4361 }
4362
4363 static int
4364 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4365 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4366 {
4367 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4368 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4369 int i, n;
4370
4371 add_char_opt_map_info(map, p[0], enc);
4372
4373 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4374 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4375 if (n < 0) return n;
4376
4377 for (i = 0; i < n; i++) {
4378 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4379 add_char_opt_map_info(map, buf[0], enc);
4380 }
4381
4382 return 0;
4383 }
4384
4385 static void
4386 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4387 {
4388 static int z = 1<<15; /* 32768: something big value */
4389
4390 int v1, v2;
4391
4392 if (alt->value == 0) return ;
4393 if (now->value == 0) {
4394 copy_opt_map_info(now, alt);
4395 return ;
4396 }
4397
4398 v1 = z / now->value;
4399 v2 = z / alt->value;
4400 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4401 copy_opt_map_info(now, alt);
4402 }
4403
4404 static int
4405 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4406 {
4407 #define COMP_EM_BASE 20
4408 int ve, vm;
4409
4410 if (m->value <= 0) return -1;
4411
4412 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4413 vm = COMP_EM_BASE * 5 * 2 / m->value;
4414 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4415 }
4416
4417 static void
4418 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4419 {
4420 int i, val;
4421
4422 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4423 if (to->value == 0) return ;
4424 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4425 clear_opt_map_info(to);
4426 return ;
4427 }
4428
4429 alt_merge_mml(&to->mmd, &add->mmd);
4430
4431 val = 0;
4432 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4433 if (add->map[i])
4434 to->map[i] = 1;
4435
4436 if (to->map[i])
4437 val += map_position_value(enc, i);
4438 }
4439 to->value = val;
4440
4441 alt_merge_opt_anc_info(&to->anc, &add->anc);
4442 }
4443
4444 static void
4445 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4446 {
4447 copy_mml(&(opt->exb.mmd), mmd);
4448 copy_mml(&(opt->expr.mmd), mmd);
4449 copy_mml(&(opt->map.mmd), mmd);
4450 }
4451
4452 static void
4453 clear_node_opt_info(NodeOptInfo* opt)
4454 {
4455 clear_mml(&opt->len);
4456 clear_opt_anc_info(&opt->anc);
4457 clear_opt_exact_info(&opt->exb);
4458 clear_opt_exact_info(&opt->exm);
4459 clear_opt_exact_info(&opt->expr);
4460 clear_opt_map_info(&opt->map);
4461 }
4462
4463 static void
4464 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4465 {
4466 *to = *from;
4467 }
4468
4469 static void
4470 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4471 {
4472 int exb_reach, exm_reach;
4473 OptAncInfo tanc;
4474
4475 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4476 copy_opt_anc_info(&to->anc, &tanc);
4477
4478 if (add->exb.len > 0 && to->len.max == 0) {
4479 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4480 to->len.max, add->len.max);
4481 copy_opt_anc_info(&add->exb.anc, &tanc);
4482 }
4483
4484 if (add->map.value > 0 && to->len.max == 0) {
4485 if (add->map.mmd.max == 0)
4486 add->map.anc.left_anchor |= to->anc.left_anchor;
4487 }
4488
4489 exb_reach = to->exb.reach_end;
4490 exm_reach = to->exm.reach_end;
4491
4492 if (add->len.max != 0)
4493 to->exb.reach_end = to->exm.reach_end = 0;
4494
4495 if (add->exb.len > 0) {
4496 if (exb_reach) {
4497 concat_opt_exact_info(&to->exb, &add->exb, enc);
4498 clear_opt_exact_info(&add->exb);
4499 }
4500 else if (exm_reach) {
4501 concat_opt_exact_info(&to->exm, &add->exb, enc);
4502 clear_opt_exact_info(&add->exb);
4503 }
4504 }
4505 select_opt_exact_info(enc, &to->exm, &add->exb);
4506 select_opt_exact_info(enc, &to->exm, &add->exm);
4507
4508 if (to->expr.len > 0) {
4509 if (add->len.max > 0) {
4510 if (to->expr.len > (int )add->len.max)
4511 to->expr.len = add->len.max;
4512
4513 if (to->expr.mmd.max == 0)
4514 select_opt_exact_info(enc, &to->exb, &to->expr);
4515 else
4516 select_opt_exact_info(enc, &to->exm, &to->expr);
4517 }
4518 }
4519 else if (add->expr.len > 0) {
4520 copy_opt_exact_info(&to->expr, &add->expr);
4521 }
4522
4523 select_opt_map_info(&to->map, &add->map);
4524
4525 add_mml(&to->len, &add->len);
4526 }
4527
4528 static void
4529 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4530 {
4531 alt_merge_opt_anc_info (&to->anc, &add->anc);
4532 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4533 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4534 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4535 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4536
4537 alt_merge_mml(&to->len, &add->len);
4538 }
4539
4540
4541 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4542
4543 static int
4544 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4545 {
4546 int type;
4547 int r = 0;
4548
4549 clear_node_opt_info(opt);
4550 set_bound_node_opt_info(opt, &env->mmd);
4551
4552 type = NTYPE(node);
4553 switch (type) {
4554 case NT_LIST:
4555 {
4556 OptEnv nenv;
4557 NodeOptInfo nopt;
4558 Node* nd = node;
4559
4560 copy_opt_env(&nenv, env);
4561 do {
4562 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4563 if (r == 0) {
4564 add_mml(&nenv.mmd, &nopt.len);
4565 concat_left_node_opt_info(env->enc, opt, &nopt);
4566 }
4567 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4568 }
4569 break;
4570
4571 case NT_ALT:
4572 {
4573 NodeOptInfo nopt;
4574 Node* nd = node;
4575
4576 do {
4577 r = optimize_node_left(NCAR(nd), &nopt, env);
4578 if (r == 0) {
4579 if (nd == node) copy_node_opt_info(opt, &nopt);
4580 else alt_merge_node_opt_info(opt, &nopt, env);
4581 }
4582 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4583 }
4584 break;
4585
4586 case NT_STR:
4587 {
4588 StrNode* sn = NSTR(node);
4589 int slen = (int)(sn->end - sn->s);
4590 int is_raw = NSTRING_IS_RAW(node);
4591
4592 if (! NSTRING_IS_AMBIG(node)) {
4593 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4594 NSTRING_IS_RAW(node), env->enc);
4595 if (slen > 0) {
4596 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4597 }
4598 set_mml(&opt->len, slen, slen);
4599 }
4600 else {
4601 int max;
4602
4603 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4604 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4605 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4606 }
4607 else {
4608 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4609 is_raw, env->enc);
4610 opt->exb.ignore_case = 1;
4611
4612 if (slen > 0) {
4613 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4614 env->enc, env->case_fold_flag);
4615 if (r != 0) break;
4616 }
4617
4618 max = slen;
4619 }
4620
4621 set_mml(&opt->len, slen, max);
4622 }
4623
4624 if (opt->exb.len == slen)
4625 opt->exb.reach_end = 1;
4626 }
4627 break;
4628
4629 case NT_CCLASS:
4630 {
4631 int i, z;
4632 CClassNode* cc = NCCLASS(node);
4633
4634 /* no need to check ignore case. (setted in setup_tree()) */
4635
4636 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4637 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4638 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4639
4640 set_mml(&opt->len, min, max);
4641 }
4642 else {
4643 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4644 z = BITSET_AT(cc->bs, i);
4645 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4646 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4647 }
4648 }
4649 set_mml(&opt->len, 1, 1);
4650 }
4651 }
4652 break;
4653
4654 case NT_CTYPE:
4655 {
4656 int i, min, max;
4657
4658 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4659
4660 if (max == 1) {
4661 min = 1;
4662
4663 switch (NCTYPE(node)->ctype) {
4664 case ONIGENC_CTYPE_WORD:
4665 if (NCTYPE(node)->not != 0) {
4666 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4667 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4668 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4669 }
4670 }
4671 }
4672 else {
4673 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4674 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4675 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4676 }
4677 }
4678 }
4679 break;
4680 }
4681 }
4682 else {
4683 min = ONIGENC_MBC_MINLEN(env->enc);
4684 }
4685 set_mml(&opt->len, min, max);
4686 }
4687 break;
4688
4689 case NT_CANY:
4690 {
4691 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4692 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4693 set_mml(&opt->len, min, max);
4694 }
4695 break;
4696
4697 case NT_ANCHOR:
4698 switch (NANCHOR(node)->type) {
4699 case ANCHOR_BEGIN_BUF:
4700 case ANCHOR_BEGIN_POSITION:
4701 case ANCHOR_BEGIN_LINE:
4702 case ANCHOR_END_BUF:
4703 case ANCHOR_SEMI_END_BUF:
4704 case ANCHOR_END_LINE:
4705 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
4706 break;
4707
4708 case ANCHOR_PREC_READ:
4709 {
4710 NodeOptInfo nopt;
4711
4712 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
4713 if (r == 0) {
4714 if (nopt.exb.len > 0)
4715 copy_opt_exact_info(&opt->expr, &nopt.exb);
4716 else if (nopt.exm.len > 0)
4717 copy_opt_exact_info(&opt->expr, &nopt.exm);
4718
4719 opt->expr.reach_end = 0;
4720
4721 if (nopt.map.value > 0)
4722 copy_opt_map_info(&opt->map, &nopt.map);
4723 }
4724 }
4725 break;
4726
4727 case ANCHOR_PREC_READ_NOT:
4728 case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
4729 case ANCHOR_LOOK_BEHIND_NOT:
4730 break;
4731 }
4732 break;
4733
4734 case NT_BREF:
4735 {
4736 int i;
4737 int* backs;
4738 OnigDistance min, max, tmin, tmax;
4739 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4740 BRefNode* br = NBREF(node);
4741
4742 if (br->state & NST_RECURSION) {
4743 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4744 break;
4745 }
4746 backs = BACKREFS_P(br);
4747 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4748 if (r != 0) break;
4749 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4750 if (r != 0) break;
4751 for (i = 1; i < br->back_num; i++) {
4752 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4753 if (r != 0) break;
4754 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4755 if (r != 0) break;
4756 if (min > tmin) min = tmin;
4757 if (max < tmax) max = tmax;
4758 }
4759 if (r == 0) set_mml(&opt->len, min, max);
4760 }
4761 break;
4762
4763 #ifdef USE_SUBEXP_CALL
4764 case NT_CALL:
4765 if (IS_CALL_RECURSION(NCALL(node)))
4766 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4767 else {
4768 OnigOptionType save = env->options;
4769 env->options = NENCLOSE(NCALL(node)->target)->option;
4770 r = optimize_node_left(NCALL(node)->target, opt, env);
4771 env->options = save;
4772 }
4773 break;
4774 #endif
4775
4776 case NT_QTFR:
4777 {
4778 int i;
4779 OnigDistance min, max;
4780 NodeOptInfo nopt;
4781 QtfrNode* qn = NQTFR(node);
4782
4783 r = optimize_node_left(qn->target, &nopt, env);
4784 if (r) break;
4785
4786 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4787 if (env->mmd.max == 0 &&
4788 NTYPE(qn->target) == NT_CANY && qn->greedy) {
4789 if (IS_MULTILINE(env->options))
4790 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
4791 else
4792 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
4793 }
4794 }
4795 else {
4796 if (qn->lower > 0) {
4797 copy_node_opt_info(opt, &nopt);
4798 if (nopt.exb.len > 0) {
4799 if (nopt.exb.reach_end) {
4800 for (i = 2; i <= qn->lower &&
4801 ! is_full_opt_exact_info(&opt->exb); i++) {
4802 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4803 }
4804 if (i < qn->lower) {
4805 opt->exb.reach_end = 0;
4806 }
4807 }
4808 }
4809
4810 if (qn->lower != qn->upper) {
4811 opt->exb.reach_end = 0;
4812 opt->exm.reach_end = 0;
4813 }
4814 if (qn->lower > 1)
4815 opt->exm.reach_end = 0;
4816 }
4817 }
4818
4819 min = distance_multiply(nopt.len.min, qn->lower);
4820 if (IS_REPEAT_INFINITE(qn->upper))
4821 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4822 else
4823 max = distance_multiply(nopt.len.max, qn->upper);
4824
4825 set_mml(&opt->len, min, max);
4826 }
4827 break;
4828
4829 case NT_ENCLOSE:
4830 {
4831 EncloseNode* en = NENCLOSE(node);
4832
4833 switch (en->type) {
4834 case ENCLOSE_OPTION:
4835 {
4836 OnigOptionType save = env->options;
4837
4838 env->options = en->option;
4839 r = optimize_node_left(en->target, opt, env);
4840 env->options = save;
4841 }
4842 break;
4843
4844 case ENCLOSE_MEMORY:
4845 #ifdef USE_SUBEXP_CALL
4846 en->opt_count++;
4847 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
4848 OnigDistance min, max;
4849
4850 min = 0;
4851 max = ONIG_INFINITE_DISTANCE;
4852 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
4853 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
4854 set_mml(&opt->len, min, max);
4855 }
4856 else
4857 #endif
4858 {
4859 r = optimize_node_left(en->target, opt, env);
4860
4861 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
4862 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4863 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
4864 }
4865 }
4866 break;
4867
4868 case ENCLOSE_STOP_BACKTRACK:
4869 r = optimize_node_left(en->target, opt, env);
4870 break;
4871 }
4872 }
4873 break;
4874
4875 default:
4876 #ifdef ONIG_DEBUG
4877 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4878 NTYPE(node));
4879 #endif
4880 r = ONIGERR_TYPE_BUG;
4881 break;
4882 }
4883
4884 return r;
4885 }
4886
4887 static int
4888 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
4889 {
4890 int r;
4891
4892 if (e->len == 0) return 0;
4893
4894 if (e->ignore_case) {
4895 reg->exact = (UChar* )xmalloc(e->len);
4896 CHECK_NULL_RETURN_MEMERR(reg->exact);
4897 xmemcpy(reg->exact, e->s, e->len);
4898 reg->exact_end = reg->exact + e->len;
4899 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
4900 }
4901 else {
4902 int allow_reverse;
4903
4904 reg->exact = str_dup(e->s, e->s + e->len);
4905 CHECK_NULL_RETURN_MEMERR(reg->exact);
4906 reg->exact_end = reg->exact + e->len;
4907
4908 allow_reverse =
4909 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
4910
4911 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4912 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4913 reg->map, &(reg->int_map));
4914 if (r) return r;
4915
4916 reg->optimize = (allow_reverse != 0
4917 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
4918 }
4919 else {
4920 reg->optimize = ONIG_OPTIMIZE_EXACT;
4921 }
4922 }
4923
4924 reg->dmin = e->mmd.min;
4925 reg->dmax = e->mmd.max;
4926
4927 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4928 reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact);
4929 }
4930
4931 return 0;
4932 }
4933
4934 static void
4935 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
4936 {
4937 int i;
4938
4939 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4940 reg->map[i] = m->map[i];
4941
4942 reg->optimize = ONIG_OPTIMIZE_MAP;
4943 reg->dmin = m->mmd.min;
4944 reg->dmax = m->mmd.max;
4945
4946 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4947 reg->threshold_len = reg->dmin + 1;
4948 }
4949 }
4950
4951 static void
4952 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
4953 {
4954 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
4955 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4956 }
4957
4958 #ifdef ONIG_DEBUG
4959 static void print_optimize_info(FILE* f, regex_t* reg);
4960 #endif
4961
4962 static int
4963 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
4964 {
4965
4966 int r;
4967 NodeOptInfo opt;
4968 OptEnv env;
4969
4970 env.enc = reg->enc;
4971 env.options = reg->options;
4972 env.case_fold_flag = reg->case_fold_flag;
4973 env.scan_env = scan_env;
4974 clear_mml(&env.mmd);
4975
4976 r = optimize_node_left(node, &opt, &env);
4977 if (r) return r;
4978
4979 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
4980 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
4981
4982 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
4983
4984 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
4985 reg->anchor_dmin = opt.len.min;
4986 reg->anchor_dmax = opt.len.max;
4987 }
4988
4989 if (opt.exb.len > 0 || opt.exm.len > 0) {
4990 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
4991 if (opt.map.value > 0 &&
4992 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
4993 goto set_map;
4994 }
4995 else {
4996 r = set_optimize_exact_info(reg, &opt.exb);
4997 set_sub_anchor(reg, &opt.exb.anc);
4998 }
4999 }
5000 else if (opt.map.value > 0) {
5001 set_map:
5002 set_optimize_map_info(reg, &opt.map);
5003 set_sub_anchor(reg, &opt.map.anc);
5004 }
5005 else {
5006 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5007 if (opt.len.max == 0)
5008 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5009 }
5010
5011 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5012 print_optimize_info(stderr, reg);
5013 #endif
5014 return r;
5015 }
5016
5017 static void
5018 clear_optimize_info(regex_t* reg)
5019 {
5020 reg->optimize = ONIG_OPTIMIZE_NONE;
5021 reg->anchor = 0;
5022 reg->anchor_dmin = 0;
5023 reg->anchor_dmax = 0;
5024 reg->sub_anchor = 0;
5025 reg->exact_end = (UChar* )NULL;
5026 reg->threshold_len = 0;
5027 if (IS_NOT_NULL(reg->exact)) {
5028 xfree(reg->exact);
5029 reg->exact = (UChar* )NULL;
5030 }
5031 }
5032
5033 #ifdef ONIG_DEBUG
5034
5035 static void print_enc_string(FILE* fp, OnigEncoding enc,
5036 const UChar *s, const UChar *end)
5037 {
5038 fprintf(fp, "\nPATTERN: /");
5039
5040 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5041 const UChar *p;
5042 OnigCodePoint code;
5043
5044 p = s;
5045 while (p < end) {
5046 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5047 if (code >= 0x80) {
5048 fprintf(fp, " 0x%04x ", (int )code);
5049 }
5050 else {
5051 fputc((int )code, fp);
5052 }
5053
5054 p += enclen(enc, p);
5055 }
5056 }
5057 else {
5058 while (s < end) {
5059 fputc((int )*s, fp);
5060 s++;
5061 }
5062 }
5063
5064 fprintf(fp, "/\n");
5065 }
5066
5067 static void
5068 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5069 {
5070 if (a == ONIG_INFINITE_DISTANCE)
5071 fputs("inf", f);
5072 else
5073 fprintf(f, "(%u)", a);
5074
5075 fputs("-", f);
5076
5077 if (b == ONIG_INFINITE_DISTANCE)
5078 fputs("inf", f);
5079 else
5080 fprintf(f, "(%u)", b);
5081 }
5082
5083 static void
5084 print_anchor(FILE* f, int anchor)
5085 {
5086 int q = 0;
5087
5088 fprintf(f, "[");
5089
5090 if (anchor & ANCHOR_BEGIN_BUF) {
5091 fprintf(f, "begin-buf");
5092 q = 1;
5093 }
5094 if (anchor & ANCHOR_BEGIN_LINE) {
5095 if (q) fprintf(f, ", ");
5096 q = 1;
5097 fprintf(f, "begin-line");
5098 }
5099 if (anchor & ANCHOR_BEGIN_POSITION) {
5100 if (q) fprintf(f, ", ");
5101 q = 1;
5102 fprintf(f, "begin-pos");
5103 }
5104 if (anchor & ANCHOR_END_BUF) {
5105 if (q) fprintf(f, ", ");
5106 q = 1;
5107 fprintf(f, "end-buf");
5108 }
5109 if (anchor & ANCHOR_SEMI_END_BUF) {
5110 if (q) fprintf(f, ", ");
5111 q = 1;
5112 fprintf(f, "semi-end-buf");
5113 }
5114 if (anchor & ANCHOR_END_LINE) {
5115 if (q) fprintf(f, ", ");
5116 q = 1;
5117 fprintf(f, "end-line");
5118 }
5119 if (anchor & ANCHOR_ANYCHAR_STAR) {
5120 if (q) fprintf(f, ", ");
5121 q = 1;
5122 fprintf(f, "anychar-star");
5123 }
5124 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5125 if (q) fprintf(f, ", ");
5126 fprintf(f, "anychar-star-pl");
5127 }
5128
5129 fprintf(f, "]");
5130 }
5131
5132 static void
5133 print_optimize_info(FILE* f, regex_t* reg)
5134 {
5135 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5136 "EXACT_IC", "MAP" };
5137
5138 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5139 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5140 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5141 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5142 fprintf(f, "\n");
5143
5144 if (reg->optimize) {
5145 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5146 fprintf(f, "\n");
5147 }
5148 fprintf(f, "\n");
5149
5150 if (reg->exact) {
5151 UChar *p;
5152 fprintf(f, "exact: [");
5153 for (p = reg->exact; p < reg->exact_end; p++) {
5154 fputc(*p, f);
5155 }
5156 fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
5157 }
5158 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5159 int c, i, n = 0;
5160
5161 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5162 if (reg->map[i]) n++;
5163
5164 fprintf(f, "map: n=%d\n", n);
5165 if (n > 0) {
5166 c = 0;
5167 fputc('[', f);
5168 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5169 if (reg->map[i] != 0) {
5170 if (c > 0) fputs(", ", f);
5171 c++;
5172 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5173 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5174 fputc(i, f);
5175 else
5176 fprintf(f, "%d", i);
5177 }
5178 }
5179 fprintf(f, "]\n");
5180 }
5181 }
5182 }
5183 #endif /* ONIG_DEBUG */
5184
5185
5186 extern void
5187 onig_free_body(regex_t* reg)
5188 {
5189 if (IS_NOT_NULL(reg)) {
5190 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5191 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5192 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5193 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5194 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5195 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5196
5197 #ifdef USE_NAMED_GROUP
5198 onig_names_free(reg);
5199 #endif
5200 }
5201 }
5202
5203 extern void
5204 onig_free(regex_t* reg)
5205 {
5206 if (IS_NOT_NULL(reg)) {
5207 onig_free_body(reg);
5208 xfree(reg);
5209 }
5210 }
5211
5212 #define REGEX_TRANSFER(to,from) do {\
5213 (to)->state = ONIG_STATE_MODIFY;\
5214 onig_free_body(to);\
5215 xmemcpy(to, from, sizeof(regex_t));\
5216 xfree(from);\
5217 } while (0)
5218
5219 extern void
5220 onig_transfer(regex_t* to, regex_t* from)
5221 {
5222 THREAD_ATOMIC_START;
5223 REGEX_TRANSFER(to, from);
5224 THREAD_ATOMIC_END;
5225 }
5226
5227 #define REGEX_CHAIN_HEAD(reg) do {\
5228 while (IS_NOT_NULL((reg)->chain)) {\
5229 (reg) = (reg)->chain;\
5230 }\
5231 } while (0)
5232
5233 extern void
5234 onig_chain_link_add(regex_t* to, regex_t* add)
5235 {
5236 THREAD_ATOMIC_START;
5237 REGEX_CHAIN_HEAD(to);
5238 to->chain = add;
5239 THREAD_ATOMIC_END;
5240 }
5241
5242 extern void
5243 onig_chain_reduce(regex_t* reg)
5244 {
5245 regex_t *head, *prev;
5246
5247 prev = reg;
5248 head = prev->chain;
5249 if (IS_NOT_NULL(head)) {
5250 reg->state = ONIG_STATE_MODIFY;
5251 while (IS_NOT_NULL(head->chain)) {
5252 prev = head;
5253 head = head->chain;
5254 }
5255 prev->chain = (regex_t* )NULL;
5256 REGEX_TRANSFER(reg, head);
5257 }
5258 }
5259
5260 #ifdef ONIG_DEBUG
5261 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5262 #endif
5263 #ifdef ONIG_DEBUG_PARSE_TREE
5264 static void print_tree P_((FILE* f, Node* node));
5265 #endif
5266
5267 extern int
5268 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5269 OnigErrorInfo* einfo)
5270 {
5271 #define COMPILE_INIT_SIZE 20
5272
5273 int r, init_size;
5274 Node* root;
5275 ScanEnv scan_env;
5276 #ifdef USE_SUBEXP_CALL
5277 UnsetAddrList uslist;
5278 #endif
5279
5280 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5281
5282 reg->state = ONIG_STATE_COMPILING;
5283
5284 #ifdef ONIG_DEBUG
5285 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5286 #endif
5287
5288 if (reg->alloc == 0) {
5289 init_size = ((int)(pattern_end - pattern)) * 2;
5290 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5291 r = BBUF_INIT(reg, init_size);
5292 if (r != 0) goto end;
5293 }
5294 else
5295 reg->used = 0;
5296
5297 reg->num_mem = 0;
5298 reg->num_repeat = 0;
5299 reg->num_null_check = 0;
5300 reg->repeat_range_alloc = 0;
5301 reg->repeat_range = (OnigRepeatRange* )NULL;
5302 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5303 reg->num_comb_exp_check = 0;
5304 #endif
5305
5306 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5307 if (r != 0) goto err;
5308
5309 #ifdef USE_NAMED_GROUP
5310 /* mixed use named group and no-named group */
5311 if (scan_env.num_named > 0 &&
5312 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5313 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5314 if (scan_env.num_named != scan_env.num_mem)
5315 r = disable_noname_group_capture(&root, reg, &scan_env);
5316 else
5317 r = numbered_ref_check(root);
5318
5319 if (r != 0) goto err;
5320 }
5321 #endif
5322
5323 #ifdef USE_SUBEXP_CALL
5324 if (scan_env.num_call > 0) {
5325 r = unset_addr_list_init(&uslist, scan_env.num_call);
5326 if (r != 0) goto err;
5327 scan_env.unset_addr_list = &uslist;
5328 r = setup_subexp_call(root, &scan_env);
5329 if (r != 0) goto err_unset;
5330 r = subexp_recursive_check_trav(root, &scan_env);
5331 if (r < 0) goto err_unset;
5332 r = subexp_inf_recursive_check_trav(root, &scan_env);
5333 if (r != 0) goto err_unset;
5334
5335 reg->num_call = scan_env.num_call;
5336 }
5337 else
5338 reg->num_call = 0;
5339 #endif
5340
5341 r = setup_tree(root, reg, 0, &scan_env);
5342 if (r != 0) goto err_unset;
5343
5344 #ifdef ONIG_DEBUG_PARSE_TREE
5345 print_tree(stderr, root);
5346 #endif
5347
5348 reg->capture_history = scan_env.capture_history;
5349 reg->bt_mem_start = scan_env.bt_mem_start;
5350 reg->bt_mem_start |= reg->capture_history;
5351 if (IS_FIND_CONDITION(reg->options))
5352 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5353 else {
5354 reg->bt_mem_end = scan_env.bt_mem_end;
5355 reg->bt_mem_end |= reg->capture_history;
5356 }
5357
5358 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5359 if (scan_env.backrefed_mem == 0
5360 #ifdef USE_SUBEXP_CALL
5361 || scan_env.num_call == 0
5362 #endif
5363 ) {
5364 setup_comb_exp_check(root, 0, &scan_env);
5365 #ifdef USE_SUBEXP_CALL
5366 if (scan_env.has_recursion != 0) {
5367 scan_env.num_comb_exp_check = 0;
5368 }
5369 else
5370 #endif
5371 if (scan_env.comb_exp_max_regnum > 0) {
5372 int i;
5373 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5374 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5375 scan_env.num_comb_exp_check = 0;
5376 break;
5377 }
5378 }
5379 }
5380 }
5381
5382 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5383 #endif
5384
5385 clear_optimize_info(reg);
5386 #ifndef ONIG_DONT_OPTIMIZE
5387 r = set_optimize_info_from_tree(root, reg, &scan_env);
5388 if (r != 0) goto err_unset;
5389 #endif
5390
5391 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5392 xfree(scan_env.mem_nodes_dynamic);
5393 scan_env.mem_nodes_dynamic = (Node** )NULL;
5394 }
5395
5396 r = compile_tree(root, reg);
5397 if (r == 0) {
5398 r = add_opcode(reg, OP_END);
5399 #ifdef USE_SUBEXP_CALL
5400 if (scan_env.num_call > 0) {
5401 r = unset_addr_list_fix(&uslist, reg);
5402 unset_addr_list_end(&uslist);
5403 if (r) goto err;
5404 }
5405 #endif
5406
5407 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5408 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5409 else {
5410 if (reg->bt_mem_start != 0)
5411 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5412 else
5413 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5414 }
5415 }
5416 #ifdef USE_SUBEXP_CALL
5417 else if (scan_env.num_call > 0) {
5418 unset_addr_list_end(&uslist);
5419 }
5420 #endif
5421 onig_node_free(root);
5422
5423 #ifdef ONIG_DEBUG_COMPILE
5424 #ifdef USE_NAMED_GROUP
5425 onig_print_names(stderr, reg);
5426 #endif
5427 print_compiled_byte_code_list(stderr, reg);
5428 #endif
5429
5430 end:
5431 reg->state = ONIG_STATE_NORMAL;
5432 return r;
5433
5434 err_unset:
5435 #ifdef USE_SUBEXP_CALL
5436 if (scan_env.num_call > 0) {
5437 unset_addr_list_end(&uslist);
5438 }
5439 #endif
5440 err:
5441 if (IS_NOT_NULL(scan_env.error)) {
5442 if (IS_NOT_NULL(einfo)) {
5443 einfo->enc = scan_env.enc;
5444 einfo->par = scan_env.error;
5445 einfo->par_end = scan_env.error_end;
5446 }
5447 }
5448
5449 onig_node_free(root);
5450 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5451 xfree(scan_env.mem_nodes_dynamic);
5452 return r;
5453 }
5454
5455 #ifdef USE_RECOMPILE_API
5456 extern int
5457 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5458 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5459 OnigErrorInfo* einfo)
5460 {
5461 int r;
5462 regex_t *new_reg;
5463
5464 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5465 if (r) return r;
5466 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5467 onig_transfer(reg, new_reg);
5468 }
5469 else {
5470 onig_chain_link_add(reg, new_reg);
5471 }
5472 return 0;
5473 }
5474 #endif
5475
5476 static int onig_inited = 0;
5477
5478 extern int
5479 onig_reg_init(regex_t* reg, OnigOptionType option,
5480 OnigCaseFoldType case_fold_flag,
5481 OnigEncoding enc, OnigSyntaxType* syntax)
5482 {
5483 if (! onig_inited)
5484 onig_init();
5485
5486 if (IS_NULL(reg))
5487 return ONIGERR_INVALID_ARGUMENT;
5488
5489 if (ONIGENC_IS_UNDEF(enc))
5490 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
5491
5492 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5493 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5494 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5495 }
5496
5497 (reg)->state = ONIG_STATE_MODIFY;
5498
5499 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5500 option |= syntax->options;
5501 option &= ~ONIG_OPTION_SINGLELINE;
5502 }
5503 else
5504 option |= syntax->options;
5505
5506 (reg)->enc = enc;
5507 (reg)->options = option;
5508 (reg)->syntax = syntax;
5509 (reg)->optimize = 0;
5510 (reg)->exact = (UChar* )NULL;
5511 (reg)->int_map = (int* )NULL;
5512 (reg)->int_map_backward = (int* )NULL;
5513 (reg)->chain = (regex_t* )NULL;
5514
5515 (reg)->p = (UChar* )NULL;
5516 (reg)->alloc = 0;
5517 (reg)->used = 0;
5518 (reg)->name_table = (void* )NULL;
5519
5520 (reg)->case_fold_flag = case_fold_flag;
5521 return 0;
5522 }
5523
5524 extern int
5525 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5526 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5527 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5528 {
5529 int r;
5530
5531 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5532 if (r) return r;
5533
5534 r = onig_compile(reg, pattern, pattern_end, einfo);
5535 return r;
5536 }
5537
5538 extern int
5539 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5540 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5541 OnigErrorInfo* einfo)
5542 {
5543 int r;
5544
5545 *reg = (regex_t* )xmalloc(sizeof(regex_t));
5546 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5547
5548 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5549 if (r) goto err;
5550
5551 r = onig_compile(*reg, pattern, pattern_end, einfo);
5552 if (r) {
5553 err:
5554 onig_free(*reg);
5555 *reg = NULL;
5556 }
5557 return r;
5558 }
5559
5560
5561 extern int
5562 onig_init(void)
5563 {
5564 if (onig_inited != 0)
5565 return 0;
5566
5567 THREAD_SYSTEM_INIT;
5568 THREAD_ATOMIC_START;
5569
5570 onig_inited = 1;
5571
5572 onigenc_init();
5573 /* onigenc_set_default_caseconv_table((UChar* )0); */
5574
5575 #ifdef ONIG_DEBUG_STATISTICS
5576 onig_statistics_init();
5577 #endif
5578
5579 THREAD_ATOMIC_END;
5580 return 0;
5581 }
5582
5583
5584 static OnigEndCallListItemType* EndCallTop;
5585
5586 extern void onig_add_end_call(void (*func)(void))
5587 {
5588 OnigEndCallListItemType* item;
5589
5590 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5591 if (item == 0) return ;
5592
5593 item->next = EndCallTop;
5594 item->func = func;
5595
5596 EndCallTop = item;
5597 }
5598
5599 static void
5600 exec_end_call_list(void)
5601 {
5602 OnigEndCallListItemType* prev;
5603 void (*func)(void);
5604
5605 while (EndCallTop != 0) {
5606 func = EndCallTop->func;
5607 (*func)();
5608
5609 prev = EndCallTop;
5610 EndCallTop = EndCallTop->next;
5611 xfree(prev);
5612 }
5613 }
5614
5615 extern int
5616 onig_end(void)
5617 {
5618 THREAD_ATOMIC_START;
5619
5620 exec_end_call_list();
5621
5622 #ifdef ONIG_DEBUG_STATISTICS
5623 onig_print_statistics(stderr);
5624 #endif
5625
5626 #ifdef USE_SHARED_CCLASS_TABLE
5627 onig_free_shared_cclass_table();
5628 #endif
5629
5630 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5631 onig_free_node_list();
5632 #endif
5633
5634 onig_inited = 0;
5635
5636 THREAD_ATOMIC_END;
5637 THREAD_SYSTEM_END;
5638 return 0;
5639 }
5640
5641 extern int
5642 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5643 {
5644 OnigCodePoint n, *data;
5645 OnigCodePoint low, high, x;
5646
5647 GET_CODE_POINT(n, p);
5648 data = (OnigCodePoint* )p;
5649 data++;
5650
5651 for (low = 0, high = n; low < high; ) {
5652 x = (low + high) >> 1;
5653 if (code > data[x * 2 + 1])
5654 low = x + 1;
5655 else
5656 high = x;
5657 }
5658
5659 return ((low < n && code >= data[low * 2]) ? 1 : 0);
5660 }
5661
5662 extern int
5663 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5664 {
5665 int found;
5666
5667 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5668 if (IS_NULL(cc->mbuf)) {
5669 found = 0;
5670 }
5671 else {
5672 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
5673 }
5674 }
5675 else {
5676 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
5677 }
5678
5679 if (IS_NCCLASS_NOT(cc))
5680 return !found;
5681 else
5682 return found;
5683 }
5684
5685 extern int
5686 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
5687 {
5688 int len;
5689
5690 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5691 len = 2;
5692 }
5693 else {
5694 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
5695 }
5696 return onig_is_code_in_cc_len(len, code, cc);
5697 }
5698
5699
5700 #ifdef ONIG_DEBUG
5701
5702 /* arguments type */
5703 #define ARG_SPECIAL -1
5704 #define ARG_NON 0
5705 #define ARG_RELADDR 1
5706 #define ARG_ABSADDR 2
5707 #define ARG_LENGTH 3
5708 #define ARG_MEMNUM 4
5709 #define ARG_OPTION 5
5710 #define ARG_STATE_CHECK 6
5711
5712 OnigOpInfoType OnigOpInfo[] = {
5713 { OP_FINISH, "finish", ARG_NON },
5714 { OP_END, "end", ARG_NON },
5715 { OP_EXACT1, "exact1", ARG_SPECIAL },
5716 { OP_EXACT2, "exact2", ARG_SPECIAL },
5717 { OP_EXACT3, "exact3", ARG_SPECIAL },
5718 { OP_EXACT4, "exact4", ARG_SPECIAL },
5719 { OP_EXACT5, "exact5", ARG_SPECIAL },
5720 { OP_EXACTN, "exactn", ARG_SPECIAL },
5721 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
5722 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
5723 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
5724 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
5725 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
5726 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
5727 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
5728 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
5729 { OP_CCLASS, "cclass", ARG_SPECIAL },
5730 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
5731 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
5732 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
5733 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
5734 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
5735 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
5736 { OP_ANYCHAR, "anychar", ARG_NON },
5737 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
5738 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
5739 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
5740 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5741 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5742 { OP_WORD, "word", ARG_NON },
5743 { OP_NOT_WORD, "not-word", ARG_NON },
5744 { OP_WORD_BOUND, "word-bound", ARG_NON },
5745 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
5746 { OP_WORD_BEGIN, "word-begin", ARG_NON },
5747 { OP_WORD_END, "word-end", ARG_NON },
5748 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
5749 { OP_END_BUF, "end-buf", ARG_NON },
5750 { OP_BEGIN_LINE, "begin-line", ARG_NON },
5751 { OP_END_LINE, "end-line", ARG_NON },
5752 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
5753 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
5754 { OP_BACKREF1, "backref1", ARG_NON },
5755 { OP_BACKREF2, "backref2", ARG_NON },
5756 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
5757 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
5758 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
5759 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
5760 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
5761 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
5762 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
5763 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
5764 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
5765 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
5766 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
5767 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
5768 { OP_SET_OPTION, "set-option", ARG_OPTION },
5769 { OP_FAIL, "fail", ARG_NON },
5770 { OP_JUMP, "jump", ARG_RELADDR },
5771 { OP_PUSH, "push", ARG_RELADDR },
5772 { OP_POP, "pop", ARG_NON },
5773 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
5774 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
5775 { OP_REPEAT, "repeat", ARG_SPECIAL },
5776 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
5777 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
5778 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
5779 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
5780 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
5781 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
5782 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
5783 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
5784 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
5785 { OP_PUSH_POS, "push-pos", ARG_NON },
5786 { OP_POP_POS, "pop-pos", ARG_NON },
5787 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
5788 { OP_FAIL_POS, "fail-pos", ARG_NON },
5789 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
5790 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
5791 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
5792 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5793 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5794 { OP_CALL, "call", ARG_ABSADDR },
5795 { OP_RETURN, "return", ARG_NON },
5796 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
5797 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5798 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
5799 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
5800 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
5801 "state-check-anychar-ml*", ARG_STATE_CHECK },
5802 { -1, "", ARG_NON }
5803 };
5804
5805 static char*
5806 op2name(int opcode)
5807 {
5808 int i;
5809
5810 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5811 if (opcode == OnigOpInfo[i].opcode)
5812 return OnigOpInfo[i].name;
5813 }
5814 return "";
5815 }
5816
5817 static int
5818 op2arg_type(int opcode)
5819 {
5820 int i;
5821
5822 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5823 if (opcode == OnigOpInfo[i].opcode)
5824 return OnigOpInfo[i].arg_type;
5825 }
5826 return ARG_SPECIAL;
5827 }
5828
5829 static void
5830 Indent(FILE* f, int indent)
5831 {
5832 int i;
5833 for (i = 0; i < indent; i++) putc(' ', f);
5834 }
5835
5836 static void
5837 p_string(FILE* f, int len, UChar* s)
5838 {
5839 fputs(":", f);
5840 while (len-- > 0) { fputc(*s++, f); }
5841 }
5842
5843 static void
5844 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5845 {
5846 int x = len * mb_len;
5847
5848 fprintf(f, ":%d:", len);
5849 while (x-- > 0) { fputc(*s++, f); }
5850 }
5851
5852 extern void
5853 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
5854 OnigEncoding enc)
5855 {
5856 int i, n, arg_type;
5857 RelAddrType addr;
5858 LengthType len;
5859 MemNumType mem;
5860 StateCheckNumType scn;
5861 OnigCodePoint code;
5862 UChar *q;
5863
5864 fprintf(f, "[%s", op2name(*bp));
5865 arg_type = op2arg_type(*bp);
5866 if (arg_type != ARG_SPECIAL) {
5867 bp++;
5868 switch (arg_type) {
5869 case ARG_NON:
5870 break;
5871 case ARG_RELADDR:
5872 GET_RELADDR_INC(addr, bp);
5873 fprintf(f, ":(%d)", addr);
5874 break;
5875 case ARG_ABSADDR:
5876 GET_ABSADDR_INC(addr, bp);
5877 fprintf(f, ":(%d)", addr);
5878 break;
5879 case ARG_LENGTH:
5880 GET_LENGTH_INC(len, bp);
5881 fprintf(f, ":%d", len);
5882 break;
5883 case ARG_MEMNUM:
5884 mem = *((MemNumType* )bp);
5885 bp += SIZE_MEMNUM;
5886 fprintf(f, ":%d", mem);
5887 break;
5888 case ARG_OPTION:
5889 {
5890 OnigOptionType option = *((OnigOptionType* )bp);
5891 bp += SIZE_OPTION;
5892 fprintf(f, ":%d", option);
5893 }
5894 break;
5895
5896 case ARG_STATE_CHECK:
5897 scn = *((StateCheckNumType* )bp);
5898 bp += SIZE_STATE_CHECK_NUM;
5899 fprintf(f, ":%d", scn);
5900 break;
5901 }
5902 }
5903 else {
5904 switch (*bp++) {
5905 case OP_EXACT1:
5906 case OP_ANYCHAR_STAR_PEEK_NEXT:
5907 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
5908 p_string(f, 1, bp++); break;
5909 case OP_EXACT2:
5910 p_string(f, 2, bp); bp += 2; break;
5911 case OP_EXACT3:
5912 p_string(f, 3, bp); bp += 3; break;
5913 case OP_EXACT4:
5914 p_string(f, 4, bp); bp += 4; break;
5915 case OP_EXACT5:
5916 p_string(f, 5, bp); bp += 5; break;
5917 case OP_EXACTN:
5918 GET_LENGTH_INC(len, bp);
5919 p_len_string(f, len, 1, bp);
5920 bp += len;
5921 break;
5922
5923 case OP_EXACTMB2N1:
5924 p_string(f, 2, bp); bp += 2; break;
5925 case OP_EXACTMB2N2:
5926 p_string(f, 4, bp); bp += 4; break;
5927 case OP_EXACTMB2N3:
5928 p_string(f, 6, bp); bp += 6; break;
5929 case OP_EXACTMB2N:
5930 GET_LENGTH_INC(len, bp);
5931 p_len_string(f, len, 2, bp);
5932 bp += len * 2;
5933 break;
5934 case OP_EXACTMB3N:
5935 GET_LENGTH_INC(len, bp);
5936 p_len_string(f, len, 3, bp);
5937 bp += len * 3;
5938 break;
5939 case OP_EXACTMBN:
5940 {
5941 int mb_len;
5942
5943 GET_LENGTH_INC(mb_len, bp);
5944 GET_LENGTH_INC(len, bp);
5945 fprintf(f, ":%d:%d:", mb_len, len);
5946 n = len * mb_len;
5947 while (n-- > 0) { fputc(*bp++, f); }
5948 }
5949 break;
5950
5951 case OP_EXACT1_IC:
5952 len = enclen(enc, bp);
5953 p_string(f, len, bp);
5954 bp += len;
5955 break;
5956 case OP_EXACTN_IC:
5957 GET_LENGTH_INC(len, bp);
5958 p_len_string(f, len, 1, bp);
5959 bp += len;
5960 break;
5961
5962 case OP_CCLASS:
5963 n = bitset_on_num((BitSetRef )bp);
5964 bp += SIZE_BITSET;
5965 fprintf(f, ":%d", n);
5966 break;
5967
5968 case OP_CCLASS_NOT:
5969 n = bitset_on_num((BitSetRef )bp);
5970 bp += SIZE_BITSET;
5971 fprintf(f, ":%d", n);
5972 break;
5973
5974 case OP_CCLASS_MB:
5975 case OP_CCLASS_MB_NOT:
5976 GET_LENGTH_INC(len, bp);
5977 q = bp;
5978 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5979 ALIGNMENT_RIGHT(q);
5980 #endif
5981 GET_CODE_POINT(code, q);
5982 bp += len;
5983 fprintf(f, ":%d:%d", (int )code, len);
5984 break;
5985
5986 case OP_CCLASS_MIX:
5987 case OP_CCLASS_MIX_NOT:
5988 n = bitset_on_num((BitSetRef )bp);
5989 bp += SIZE_BITSET;
5990 GET_LENGTH_INC(len, bp);
5991 q = bp;
5992 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5993 ALIGNMENT_RIGHT(q);
5994 #endif
5995 GET_CODE_POINT(code, q);
5996 bp += len;
5997 fprintf(f, ":%d:%d:%d", n, (int )code, len);
5998 break;
5999
6000 case OP_CCLASS_NODE:
6001 {
6002 CClassNode *cc;
6003
6004 GET_POINTER_INC(cc, bp);
6005 n = bitset_on_num(cc->bs);
6006 fprintf(f, ":%u:%d", (unsigned int )cc, n);
6007 }
6008 break;
6009
6010 case OP_BACKREFN_IC:
6011 mem = *((MemNumType* )bp);
6012 bp += SIZE_MEMNUM;
6013 fprintf(f, ":%d", mem);
6014 break;
6015
6016 case OP_BACKREF_MULTI_IC:
6017 case OP_BACKREF_MULTI:
6018 fputs(" ", f);
6019 GET_LENGTH_INC(len, bp);
6020 for (i = 0; i < len; i++) {
6021 GET_MEMNUM_INC(mem, bp);
6022 if (i > 0) fputs(", ", f);
6023 fprintf(f, "%d", mem);
6024 }
6025 break;
6026
6027 case OP_BACKREF_WITH_LEVEL:
6028 {
6029 OnigOptionType option;
6030 LengthType level;
6031
6032 GET_OPTION_INC(option, bp);
6033 fprintf(f, ":%d", option);
6034 GET_LENGTH_INC(level, bp);
6035 fprintf(f, ":%d", level);
6036
6037 fputs(" ", f);
6038 GET_LENGTH_INC(len, bp);
6039 for (i = 0; i < len; i++) {
6040 GET_MEMNUM_INC(mem, bp);
6041 if (i > 0) fputs(", ", f);
6042 fprintf(f, "%d", mem);
6043 }
6044 }
6045 break;
6046
6047 case OP_REPEAT:
6048 case OP_REPEAT_NG:
6049 {
6050 mem = *((MemNumType* )bp);
6051 bp += SIZE_MEMNUM;
6052 addr = *((RelAddrType* )bp);
6053 bp += SIZE_RELADDR;
6054 fprintf(f, ":%d:%d", mem, addr);
6055 }
6056 break;
6057
6058 case OP_PUSH_OR_JUMP_EXACT1:
6059 case OP_PUSH_IF_PEEK_NEXT:
6060 addr = *((RelAddrType* )bp);
6061 bp += SIZE_RELADDR;
6062 fprintf(f, ":(%d)", addr);
6063 p_string(f, 1, bp);
6064 bp += 1;
6065 break;
6066
6067 case OP_LOOK_BEHIND:
6068 GET_LENGTH_INC(len, bp);
6069 fprintf(f, ":%d", len);
6070 break;
6071
6072 case OP_PUSH_LOOK_BEHIND_NOT:
6073 GET_RELADDR_INC(addr, bp);
6074 GET_LENGTH_INC(len, bp);
6075 fprintf(f, ":%d:(%d)", len, addr);
6076 break;
6077
6078 case OP_STATE_CHECK_PUSH:
6079 case OP_STATE_CHECK_PUSH_OR_JUMP:
6080 scn = *((StateCheckNumType* )bp);
6081 bp += SIZE_STATE_CHECK_NUM;
6082 addr = *((RelAddrType* )bp);
6083 bp += SIZE_RELADDR;
6084 fprintf(f, ":%d:(%d)", scn, addr);
6085 break;
6086
6087 default:
6088 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6089 *--bp);
6090 }
6091 }
6092 fputs("]", f);
6093 if (nextp) *nextp = bp;
6094 }
6095
6096 static void
6097 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6098 {
6099 int ncode;
6100 UChar* bp = reg->p;
6101 UChar* end = reg->p + reg->used;
6102
6103 fprintf(f, "code length: %d\n", reg->used);
6104
6105 ncode = 0;
6106 while (bp < end) {
6107 ncode++;
6108 if (bp > reg->p) {
6109 if (ncode % 5 == 0)
6110 fprintf(f, "\n");
6111 else
6112 fputs(" ", f);
6113 }
6114 onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
6115 }
6116
6117 fprintf(f, "\n");
6118 }
6119
6120 static void
6121 print_indent_tree(FILE* f, Node* node, int indent)
6122 {
6123 int i, type;
6124 int add = 3;
6125 UChar* p;
6126
6127 Indent(f, indent);
6128 if (IS_NULL(node)) {
6129 fprintf(f, "ERROR: null node!!!\n");
6130 exit (0);
6131 }
6132
6133 type = NTYPE(node);
6134 switch (type) {
6135 case NT_LIST:
6136 case NT_ALT:
6137 if (NTYPE(node) == NT_LIST)
6138 fprintf(f, "<list:%x>\n", (int )node);
6139 else
6140 fprintf(f, "<alt:%x>\n", (int )node);
6141
6142 print_indent_tree(f, NCAR(node), indent + add);
6143 while (IS_NOT_NULL(node = NCDR(node))) {
6144 if (NTYPE(node) != type) {
6145 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6146 exit(0);
6147 }
6148 print_indent_tree(f, NCAR(node), indent + add);
6149 }
6150 break;
6151
6152 case NT_STR:
6153 fprintf(f, "<string%s:%x>",
6154 (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
6155 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6156 if (*p >= 0x20 && *p < 0x7f)
6157 fputc(*p, f);
6158 else {
6159 fprintf(f, " 0x%02x", *p);
6160 }
6161 }
6162 break;
6163
6164 case NT_CCLASS:
6165 fprintf(f, "<cclass:%x>", (int )node);
6166 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6167 if (NCCLASS(node)->mbuf) {
6168 BBuf* bbuf = NCCLASS(node)->mbuf;
6169 for (i = 0; i < bbuf->used; i++) {
6170 if (i > 0) fprintf(f, ",");
6171 fprintf(f, "%0x", bbuf->p[i]);
6172 }
6173 }
6174 break;
6175
6176 case NT_CTYPE:
6177 fprintf(f, "<ctype:%x> ", (int )node);
6178 switch (NCTYPE(node)->ctype) {
6179 case ONIGENC_CTYPE_WORD:
6180 if (NCTYPE(node)->not != 0)
6181 fputs("not word", f);
6182 else
6183 fputs("word", f);
6184 break;
6185
6186 default:
6187 fprintf(f, "ERROR: undefined ctype.\n");
6188 exit(0);
6189 }
6190 break;
6191
6192 case NT_CANY:
6193 fprintf(f, "<anychar:%x>", (int )node);
6194 break;
6195
6196 case NT_ANCHOR:
6197 fprintf(f, "<anchor:%x> ", (int )node);
6198 switch (NANCHOR(node)->type) {
6199 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6200 case ANCHOR_END_BUF: fputs("end buf", f); break;
6201 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6202 case ANCHOR_END_LINE: fputs("end line", f); break;
6203 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6204 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6205
6206 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6207 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6208 #ifdef USE_WORD_BEGIN_END
6209 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6210 case ANCHOR_WORD_END: fputs("word end", f); break;
6211 #endif
6212 case ANCHOR_PREC_READ: fputs("prec read", f); break;
6213 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
6214 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
6215 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
6216
6217 default:
6218 fprintf(f, "ERROR: undefined anchor type.\n");
6219 break;
6220 }
6221 break;
6222
6223 case NT_BREF:
6224 {
6225 int* p;
6226 BRefNode* br = NBREF(node);
6227 p = BACKREFS_P(br);
6228 fprintf(f, "<backref:%x>", (int )node);
6229 for (i = 0; i < br->back_num; i++) {
6230 if (i > 0) fputs(", ", f);
6231 fprintf(f, "%d", p[i]);
6232 }
6233 }
6234 break;
6235
6236 #ifdef USE_SUBEXP_CALL
6237 case NT_CALL:
6238 {
6239 CallNode* cn = NCALL(node);
6240 fprintf(f, "<call:%x>", (int )node);
6241 p_string(f, cn->name_end - cn->name, cn->name);
6242 }
6243 break;
6244 #endif
6245
6246 case NT_QTFR:
6247 fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
6248 NQTFR(node)->lower, NQTFR(node)->upper,
6249 (NQTFR(node)->greedy ? "" : "?"));
6250 print_indent_tree(f, NQTFR(node)->target, indent + add);
6251 break;
6252
6253 case NT_ENCLOSE:
6254 fprintf(f, "<enclose:%x> ", (int )node);
6255 switch (NENCLOSE(node)->type) {
6256 case ENCLOSE_OPTION:
6257 fprintf(f, "option:%d", NENCLOSE(node)->option);
6258 break;
6259 case ENCLOSE_MEMORY:
6260 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6261 break;
6262 case ENCLOSE_STOP_BACKTRACK:
6263 fprintf(f, "stop-bt");
6264 break;
6265
6266 default:
6267 break;
6268 }
6269 fprintf(f, "\n");
6270 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6271 break;
6272
6273 default:
6274 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6275 break;
6276 }
6277
6278 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6279 type != NT_ENCLOSE)
6280 fprintf(f, "\n");
6281 fflush(f);
6282 }
6283 #endif /* ONIG_DEBUG */
6284
6285 #ifdef ONIG_DEBUG_PARSE_TREE
6286 static void
6287 print_tree(FILE* f, Node* node)
6288 {
6289 print_indent_tree(f, node, 0);
6290 }
6291 #endif