]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg: Add NULL pointer check for RegularExpressionDxe.
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "regparse.h"
33
34 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35
36 extern OnigCaseFoldType
37 onig_get_default_case_fold_flag(void)
38 {
39 return OnigDefaultCaseFoldFlag;
40 }
41
42 extern int
43 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 {
45 OnigDefaultCaseFoldFlag = case_fold_flag;
46 return 0;
47 }
48
49
50 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
51 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
52 #endif
53
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57 int len = (int)(end - s);
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67 }
68
69 static void
70 swap_node(Node* a, Node* b)
71 {
72 Node c;
73 c = *a; *a = *b; *b = c;
74
75 if (NTYPE(a) == NT_STR) {
76 StrNode* sn = NSTR(a);
77 if (sn->capa == 0) {
78 int len = (int)(sn->end - sn->s);
79 sn->s = sn->buf;
80 sn->end = sn->s + len;
81 }
82 }
83
84 if (NTYPE(b) == NT_STR) {
85 StrNode* sn = NSTR(b);
86 if (sn->capa == 0) {
87 int len = (int)(sn->end - sn->s);
88 sn->s = sn->buf;
89 sn->end = sn->s + len;
90 }
91 }
92 }
93
94 static OnigDistance
95 distance_add(OnigDistance d1, OnigDistance d2)
96 {
97 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
98 return ONIG_INFINITE_DISTANCE;
99 else {
100 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
101 else return ONIG_INFINITE_DISTANCE;
102 }
103 }
104
105 static OnigDistance
106 distance_multiply(OnigDistance d, int m)
107 {
108 if (m == 0) return 0;
109
110 if (d < ONIG_INFINITE_DISTANCE / m)
111 return d * m;
112 else
113 return ONIG_INFINITE_DISTANCE;
114 }
115
116 static int
117 bitset_is_empty(BitSetRef bs)
118 {
119 int i;
120 for (i = 0; i < (int )BITSET_SIZE; i++) {
121 if (bs[i] != 0) return 0;
122 }
123 return 1;
124 }
125
126 #ifdef ONIG_DEBUG
127 static int
128 bitset_on_num(BitSetRef bs)
129 {
130 int i, n;
131
132 n = 0;
133 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
134 if (BITSET_AT(bs, i)) n++;
135 }
136 return n;
137 }
138 #endif
139
140 extern int
141 onig_bbuf_init(BBuf* buf, int size)
142 {
143 if (size <= 0) {
144 size = 0;
145 buf->p = NULL;
146 }
147 else {
148 buf->p = (UChar* )xmalloc(size);
149 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
150 }
151
152 buf->alloc = size;
153 buf->used = 0;
154 return 0;
155 }
156
157
158 #ifdef USE_SUBEXP_CALL
159
160 static int
161 unset_addr_list_init(UnsetAddrList* uslist, int size)
162 {
163 UnsetAddr* p;
164
165 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
166 CHECK_NULL_RETURN_MEMERR(p);
167 uslist->num = 0;
168 uslist->alloc = size;
169 uslist->us = p;
170 return 0;
171 }
172
173 static void
174 unset_addr_list_end(UnsetAddrList* uslist)
175 {
176 if (IS_NOT_NULL(uslist->us))
177 xfree(uslist->us);
178 }
179
180 static int
181 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
182 {
183 UnsetAddr* p;
184 int size;
185
186 if (uslist->num >= uslist->alloc) {
187 size = uslist->alloc * 2;
188 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc);
189 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
191 uslist->us = p;
192 }
193
194 uslist->us[uslist->num].offset = offset;
195 uslist->us[uslist->num].target = node;
196 uslist->num++;
197 return 0;
198 }
199 #endif /* USE_SUBEXP_CALL */
200
201
202 static int
203 add_opcode(regex_t* reg, int opcode)
204 {
205 BBUF_ADD1(reg, ((unsigned char)opcode));
206 return 0;
207 }
208
209 #ifdef USE_COMBINATION_EXPLOSION_CHECK
210 static int
211 add_state_check_num(regex_t* reg, int num)
212 {
213 StateCheckNumType n = (StateCheckNumType )num;
214
215 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
216 return 0;
217 }
218 #endif
219
220 static int
221 add_rel_addr(regex_t* reg, int addr)
222 {
223 RelAddrType ra = (RelAddrType )addr;
224
225 BBUF_ADD(reg, &ra, SIZE_RELADDR);
226 return 0;
227 }
228
229 static int
230 add_abs_addr(regex_t* reg, int addr)
231 {
232 AbsAddrType ra = (AbsAddrType )addr;
233
234 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
235 return 0;
236 }
237
238 static int
239 add_length(regex_t* reg, int len)
240 {
241 LengthType l = (LengthType )len;
242
243 BBUF_ADD(reg, &l, SIZE_LENGTH);
244 return 0;
245 }
246
247 static int
248 add_mem_num(regex_t* reg, int num)
249 {
250 MemNumType n = (MemNumType )num;
251
252 BBUF_ADD(reg, &n, SIZE_MEMNUM);
253 return 0;
254 }
255
256 static int
257 add_pointer(regex_t* reg, void* addr)
258 {
259 PointerType ptr = (PointerType )addr;
260
261 BBUF_ADD(reg, &ptr, SIZE_POINTER);
262 return 0;
263 }
264
265 static int
266 add_option(regex_t* reg, OnigOptionType option)
267 {
268 BBUF_ADD(reg, &option, SIZE_OPTION);
269 return 0;
270 }
271
272 static int
273 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
274 {
275 int r;
276
277 r = add_opcode(reg, opcode);
278 if (r) return r;
279 r = add_rel_addr(reg, addr);
280 return r;
281 }
282
283 static int
284 add_bytes(regex_t* reg, UChar* bytes, int len)
285 {
286 BBUF_ADD(reg, bytes, len);
287 return 0;
288 }
289
290 static int
291 add_bitset(regex_t* reg, BitSetRef bs)
292 {
293 BBUF_ADD(reg, bs, SIZE_BITSET);
294 return 0;
295 }
296
297 static int
298 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
299 {
300 int r;
301
302 r = add_opcode(reg, opcode);
303 if (r) return r;
304 r = add_option(reg, option);
305 return r;
306 }
307
308 static int compile_length_tree(Node* node, regex_t* reg);
309 static int compile_tree(Node* node, regex_t* reg);
310
311
312 #define IS_NEED_STR_LEN_OP_EXACT(op) \
313 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
314 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
315
316 static int
317 select_str_opcode(int mb_len, int str_len, int ignore_case)
318 {
319 int op;
320
321 if (ignore_case) {
322 switch (str_len) {
323 case 1: op = OP_EXACT1_IC; break;
324 default: op = OP_EXACTN_IC; break;
325 }
326 }
327 else {
328 switch (mb_len) {
329 case 1:
330 switch (str_len) {
331 case 1: op = OP_EXACT1; break;
332 case 2: op = OP_EXACT2; break;
333 case 3: op = OP_EXACT3; break;
334 case 4: op = OP_EXACT4; break;
335 case 5: op = OP_EXACT5; break;
336 default: op = OP_EXACTN; break;
337 }
338 break;
339
340 case 2:
341 switch (str_len) {
342 case 1: op = OP_EXACTMB2N1; break;
343 case 2: op = OP_EXACTMB2N2; break;
344 case 3: op = OP_EXACTMB2N3; break;
345 default: op = OP_EXACTMB2N; break;
346 }
347 break;
348
349 case 3:
350 op = OP_EXACTMB3N;
351 break;
352
353 default:
354 op = OP_EXACTMBN;
355 break;
356 }
357 }
358 return op;
359 }
360
361 static int
362 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
363 {
364 int r;
365 int saved_num_null_check = reg->num_null_check;
366
367 if (empty_info != 0) {
368 r = add_opcode(reg, OP_NULL_CHECK_START);
369 if (r) return r;
370 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
371 if (r) return r;
372 reg->num_null_check++;
373 }
374
375 r = compile_tree(node, reg);
376 if (r) return r;
377
378 if (empty_info != 0) {
379 if (empty_info == NQ_TARGET_IS_EMPTY)
380 r = add_opcode(reg, OP_NULL_CHECK_END);
381 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
382 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
383 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
384 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
385
386 if (r) return r;
387 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
388 }
389 return r;
390 }
391
392 #ifdef USE_SUBEXP_CALL
393 static int
394 compile_call(CallNode* node, regex_t* reg)
395 {
396 int r;
397
398 r = add_opcode(reg, OP_CALL);
399 if (r) return r;
400 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
401 node->target);
402 if (r) return r;
403 r = add_abs_addr(reg, 0 /*dummy addr.*/);
404 return r;
405 }
406 #endif
407
408 static int
409 compile_tree_n_times(Node* node, int n, regex_t* reg)
410 {
411 int i, r;
412
413 for (i = 0; i < n; i++) {
414 r = compile_tree(node, reg);
415 if (r) return r;
416 }
417 return 0;
418 }
419
420 static int
421 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
422 regex_t* reg ARG_UNUSED, int ignore_case)
423 {
424 int len;
425 int op = select_str_opcode(mb_len, str_len, ignore_case);
426
427 len = SIZE_OPCODE;
428
429 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
430 if (IS_NEED_STR_LEN_OP_EXACT(op))
431 len += SIZE_LENGTH;
432
433 len += mb_len * str_len;
434 return len;
435 }
436
437 static int
438 add_compile_string(UChar* s, int mb_len, int str_len,
439 regex_t* reg, int ignore_case)
440 {
441 int op = select_str_opcode(mb_len, str_len, ignore_case);
442 add_opcode(reg, op);
443
444 if (op == OP_EXACTMBN)
445 add_length(reg, mb_len);
446
447 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
448 if (op == OP_EXACTN_IC)
449 add_length(reg, mb_len * str_len);
450 else
451 add_length(reg, str_len);
452 }
453
454 add_bytes(reg, s, mb_len * str_len);
455 return 0;
456 }
457
458
459 static int
460 compile_length_string_node(Node* node, regex_t* reg)
461 {
462 int rlen, r, len, prev_len, slen, ambig;
463 OnigEncoding enc = reg->enc;
464 UChar *p, *prev;
465 StrNode* sn;
466
467 sn = NSTR(node);
468 if (sn->end <= sn->s)
469 return 0;
470
471 ambig = NSTRING_IS_AMBIG(node);
472
473 p = prev = sn->s;
474 prev_len = enclen(enc, p);
475 p += prev_len;
476 slen = 1;
477 rlen = 0;
478
479 for (; p < sn->end; ) {
480 len = enclen(enc, p);
481 if (len == prev_len) {
482 slen++;
483 }
484 else {
485 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
486 rlen += r;
487 prev = p;
488 slen = 1;
489 prev_len = len;
490 }
491 p += len;
492 }
493 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
494 rlen += r;
495 return rlen;
496 }
497
498 static int
499 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
500 {
501 if (sn->end <= sn->s)
502 return 0;
503
504 return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
505 }
506
507 static int
508 compile_string_node(Node* node, regex_t* reg)
509 {
510 int r, len, prev_len, slen, ambig;
511 OnigEncoding enc = reg->enc;
512 UChar *p, *prev, *end;
513 StrNode* sn;
514
515 sn = NSTR(node);
516 if (sn->end <= sn->s)
517 return 0;
518
519 end = sn->end;
520 ambig = NSTRING_IS_AMBIG(node);
521
522 p = prev = sn->s;
523 prev_len = enclen(enc, p);
524 p += prev_len;
525 slen = 1;
526
527 for (; p < end; ) {
528 len = enclen(enc, p);
529 if (len == prev_len) {
530 slen++;
531 }
532 else {
533 r = add_compile_string(prev, prev_len, slen, reg, ambig);
534 if (r) return r;
535
536 prev = p;
537 slen = 1;
538 prev_len = len;
539 }
540
541 p += len;
542 }
543 return add_compile_string(prev, prev_len, slen, reg, ambig);
544 }
545
546 static int
547 compile_string_raw_node(StrNode* sn, regex_t* reg)
548 {
549 if (sn->end <= sn->s)
550 return 0;
551
552 return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);
553 }
554
555 static int
556 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
557 {
558 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
559 add_length(reg, mbuf->used);
560 return add_bytes(reg, mbuf->p, mbuf->used);
561 #else
562 int r, pad_size;
563 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
564
565 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
566 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
567 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
568
569 r = add_bytes(reg, mbuf->p, mbuf->used);
570
571 /* padding for return value from compile_length_cclass_node() to be fix. */
572 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
573 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
574 return r;
575 #endif
576 }
577
578 static int
579 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
580 {
581 int len;
582
583 if (IS_NCCLASS_SHARE(cc)) {
584 len = SIZE_OPCODE + SIZE_POINTER;
585 return len;
586 }
587
588 if (IS_NULL(cc->mbuf)) {
589 len = SIZE_OPCODE + SIZE_BITSET;
590 }
591 else {
592 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
593 len = SIZE_OPCODE;
594 }
595 else {
596 len = SIZE_OPCODE + SIZE_BITSET;
597 }
598 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
599 len += SIZE_LENGTH + cc->mbuf->used;
600 #else
601 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
602 #endif
603 }
604
605 return len;
606 }
607
608 static int
609 compile_cclass_node(CClassNode* cc, regex_t* reg)
610 {
611 int r;
612
613 if (IS_NCCLASS_SHARE(cc)) {
614 add_opcode(reg, OP_CCLASS_NODE);
615 r = add_pointer(reg, cc);
616 return r;
617 }
618
619 if (IS_NULL(cc->mbuf)) {
620 if (IS_NCCLASS_NOT(cc))
621 add_opcode(reg, OP_CCLASS_NOT);
622 else
623 add_opcode(reg, OP_CCLASS);
624
625 r = add_bitset(reg, cc->bs);
626 }
627 else {
628 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
629 if (IS_NCCLASS_NOT(cc))
630 add_opcode(reg, OP_CCLASS_MB_NOT);
631 else
632 add_opcode(reg, OP_CCLASS_MB);
633
634 r = add_multi_byte_cclass(cc->mbuf, reg);
635 }
636 else {
637 if (IS_NCCLASS_NOT(cc))
638 add_opcode(reg, OP_CCLASS_MIX_NOT);
639 else
640 add_opcode(reg, OP_CCLASS_MIX);
641
642 r = add_bitset(reg, cc->bs);
643 if (r) return r;
644 r = add_multi_byte_cclass(cc->mbuf, reg);
645 }
646 }
647
648 return r;
649 }
650
651 static int
652 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
653 {
654 #define REPEAT_RANGE_ALLOC 4
655
656 OnigRepeatRange* p;
657
658 if (reg->repeat_range_alloc == 0) {
659 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
660 CHECK_NULL_RETURN_MEMERR(p);
661 reg->repeat_range = p;
662 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
663 }
664 else if (reg->repeat_range_alloc <= id) {
665 int n;
666 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
667 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
668 sizeof(OnigRepeatRange) * n,
669 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
670 CHECK_NULL_RETURN_MEMERR(p);
671 reg->repeat_range = p;
672 reg->repeat_range_alloc = n;
673 }
674 else {
675 p = reg->repeat_range;
676 }
677
678 p[id].lower = lower;
679 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
680 return 0;
681 }
682
683 static int
684 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
685 regex_t* reg)
686 {
687 int r;
688 int num_repeat = reg->num_repeat;
689
690 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
691 if (r) return r;
692 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
693 reg->num_repeat++;
694 if (r) return r;
695 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
696 if (r) return r;
697
698 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
699 if (r) return r;
700
701 r = compile_tree_empty_check(qn->target, reg, empty_info);
702 if (r) return r;
703
704 if (
705 #ifdef USE_SUBEXP_CALL
706 reg->num_call > 0 ||
707 #endif
708 IS_QUANTIFIER_IN_REPEAT(qn)) {
709 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
710 }
711 else {
712 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
713 }
714 if (r) return r;
715 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
716 return r;
717 }
718
719 static int
720 is_anychar_star_quantifier(QtfrNode* qn)
721 {
722 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
723 NTYPE(qn->target) == NT_CANY)
724 return 1;
725 else
726 return 0;
727 }
728
729 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
730 #define CKN_ON (ckn > 0)
731
732 #ifdef USE_COMBINATION_EXPLOSION_CHECK
733
734 static int
735 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
736 {
737 int len, mod_tlen, cklen;
738 int ckn;
739 int infinite = IS_REPEAT_INFINITE(qn->upper);
740 int empty_info = qn->target_empty_info;
741 int tlen = compile_length_tree(qn->target, reg);
742
743 if (tlen < 0) return tlen;
744
745 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
746
747 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
748
749 /* anychar repeat */
750 if (NTYPE(qn->target) == NT_CANY) {
751 if (qn->greedy && infinite) {
752 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
753 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
754 else
755 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
756 }
757 }
758
759 if (empty_info != 0)
760 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
761 else
762 mod_tlen = tlen;
763
764 if (infinite && qn->lower <= 1) {
765 if (qn->greedy) {
766 if (qn->lower == 1)
767 len = SIZE_OP_JUMP;
768 else
769 len = 0;
770
771 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
772 }
773 else {
774 if (qn->lower == 0)
775 len = SIZE_OP_JUMP;
776 else
777 len = 0;
778
779 len += mod_tlen + SIZE_OP_PUSH + cklen;
780 }
781 }
782 else if (qn->upper == 0) {
783 if (qn->is_refered != 0) /* /(?<n>..){0}/ */
784 len = SIZE_OP_JUMP + tlen;
785 else
786 len = 0;
787 }
788 else if (qn->upper == 1 && qn->greedy) {
789 if (qn->lower == 0) {
790 if (CKN_ON) {
791 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
792 }
793 else {
794 len = SIZE_OP_PUSH + tlen;
795 }
796 }
797 else {
798 len = tlen;
799 }
800 }
801 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
802 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
803 }
804 else {
805 len = SIZE_OP_REPEAT_INC
806 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
807 if (CKN_ON)
808 len += SIZE_OP_STATE_CHECK;
809 }
810
811 return len;
812 }
813
814 static int
815 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
816 {
817 int r, mod_tlen;
818 int ckn;
819 int infinite = IS_REPEAT_INFINITE(qn->upper);
820 int empty_info = qn->target_empty_info;
821 int tlen = compile_length_tree(qn->target, reg);
822
823 if (tlen < 0) return tlen;
824
825 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
826
827 if (is_anychar_star_quantifier(qn)) {
828 r = compile_tree_n_times(qn->target, qn->lower, reg);
829 if (r) return r;
830 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
831 if (IS_MULTILINE(reg->options))
832 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
833 else
834 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
835 if (r) return r;
836 if (CKN_ON) {
837 r = add_state_check_num(reg, ckn);
838 if (r) return r;
839 }
840
841 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
842 }
843 else {
844 if (IS_MULTILINE(reg->options)) {
845 r = add_opcode(reg, (CKN_ON ?
846 OP_STATE_CHECK_ANYCHAR_ML_STAR
847 : OP_ANYCHAR_ML_STAR));
848 }
849 else {
850 r = add_opcode(reg, (CKN_ON ?
851 OP_STATE_CHECK_ANYCHAR_STAR
852 : OP_ANYCHAR_STAR));
853 }
854 if (r) return r;
855 if (CKN_ON)
856 r = add_state_check_num(reg, ckn);
857
858 return r;
859 }
860 }
861
862 if (empty_info != 0)
863 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
864 else
865 mod_tlen = tlen;
866
867 if (infinite && qn->lower <= 1) {
868 if (qn->greedy) {
869 if (qn->lower == 1) {
870 r = add_opcode_rel_addr(reg, OP_JUMP,
871 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
872 if (r) return r;
873 }
874
875 if (CKN_ON) {
876 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
877 if (r) return r;
878 r = add_state_check_num(reg, ckn);
879 if (r) return r;
880 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
881 }
882 else {
883 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
884 }
885 if (r) return r;
886 r = compile_tree_empty_check(qn->target, reg, empty_info);
887 if (r) return r;
888 r = add_opcode_rel_addr(reg, OP_JUMP,
889 -(mod_tlen + (int )SIZE_OP_JUMP
890 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
891 }
892 else {
893 if (qn->lower == 0) {
894 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
895 if (r) return r;
896 }
897 r = compile_tree_empty_check(qn->target, reg, empty_info);
898 if (r) return r;
899 if (CKN_ON) {
900 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
901 if (r) return r;
902 r = add_state_check_num(reg, ckn);
903 if (r) return r;
904 r = add_rel_addr(reg,
905 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
906 }
907 else
908 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
909 }
910 }
911 else if (qn->upper == 0) {
912 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
913 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
914 if (r) return r;
915 r = compile_tree(qn->target, reg);
916 }
917 else
918 r = 0;
919 }
920 else if (qn->upper == 1 && qn->greedy) {
921 if (qn->lower == 0) {
922 if (CKN_ON) {
923 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
924 if (r) return r;
925 r = add_state_check_num(reg, ckn);
926 if (r) return r;
927 r = add_rel_addr(reg, tlen);
928 }
929 else {
930 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
931 }
932 if (r) return r;
933 }
934
935 r = compile_tree(qn->target, reg);
936 }
937 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
938 if (CKN_ON) {
939 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
940 if (r) return r;
941 r = add_state_check_num(reg, ckn);
942 if (r) return r;
943 r = add_rel_addr(reg, SIZE_OP_JUMP);
944 }
945 else {
946 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
947 }
948
949 if (r) return r;
950 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
951 if (r) return r;
952 r = compile_tree(qn->target, reg);
953 }
954 else {
955 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
956 if (CKN_ON) {
957 if (r) return r;
958 r = add_opcode(reg, OP_STATE_CHECK);
959 if (r) return r;
960 r = add_state_check_num(reg, ckn);
961 }
962 }
963 return r;
964 }
965
966 #else /* USE_COMBINATION_EXPLOSION_CHECK */
967
968 static int
969 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
970 {
971 int len, mod_tlen;
972 int infinite = IS_REPEAT_INFINITE(qn->upper);
973 int empty_info = qn->target_empty_info;
974 int tlen = compile_length_tree(qn->target, reg);
975
976 if (tlen < 0) return tlen;
977
978 /* anychar repeat */
979 if (NTYPE(qn->target) == NT_CANY) {
980 if (qn->greedy && infinite) {
981 if (IS_NOT_NULL(qn->next_head_exact))
982 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
983 else
984 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
985 }
986 }
987
988 if (empty_info != 0)
989 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
990 else
991 mod_tlen = tlen;
992
993 if (infinite &&
994 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
995 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
996 len = SIZE_OP_JUMP;
997 }
998 else {
999 len = tlen * qn->lower;
1000 }
1001
1002 if (qn->greedy) {
1003 if (IS_NOT_NULL(qn->head_exact))
1004 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1005 else if (IS_NOT_NULL(qn->next_head_exact))
1006 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1007 else
1008 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1009 }
1010 else
1011 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1012 }
1013 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1014 len = SIZE_OP_JUMP + tlen;
1015 }
1016 else if (!infinite && qn->greedy &&
1017 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1018 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1019 len = tlen * qn->lower;
1020 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1021 }
1022 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1023 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1024 }
1025 else {
1026 len = SIZE_OP_REPEAT_INC
1027 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1028 }
1029
1030 return len;
1031 }
1032
1033 static int
1034 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1035 {
1036 int i, r, mod_tlen;
1037 int infinite = IS_REPEAT_INFINITE(qn->upper);
1038 int empty_info = qn->target_empty_info;
1039 int tlen = compile_length_tree(qn->target, reg);
1040
1041 if (tlen < 0) return tlen;
1042
1043 if (is_anychar_star_quantifier(qn)) {
1044 r = compile_tree_n_times(qn->target, qn->lower, reg);
1045 if (r) return r;
1046 if (IS_NOT_NULL(qn->next_head_exact)) {
1047 if (IS_MULTILINE(reg->options))
1048 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1049 else
1050 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1051 if (r) return r;
1052 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1053 }
1054 else {
1055 if (IS_MULTILINE(reg->options))
1056 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1057 else
1058 return add_opcode(reg, OP_ANYCHAR_STAR);
1059 }
1060 }
1061
1062 if (empty_info != 0)
1063 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1064 else
1065 mod_tlen = tlen;
1066
1067 if (infinite &&
1068 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1069 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1070 if (qn->greedy) {
1071 if (IS_NOT_NULL(qn->head_exact))
1072 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1073 else if (IS_NOT_NULL(qn->next_head_exact))
1074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1075 else
1076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1077 }
1078 else {
1079 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1080 }
1081 if (r) return r;
1082 }
1083 else {
1084 r = compile_tree_n_times(qn->target, qn->lower, reg);
1085 if (r) return r;
1086 }
1087
1088 if (qn->greedy) {
1089 if (IS_NOT_NULL(qn->head_exact)) {
1090 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1091 mod_tlen + SIZE_OP_JUMP);
1092 if (r) return r;
1093 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1094 r = compile_tree_empty_check(qn->target, reg, empty_info);
1095 if (r) return r;
1096 r = add_opcode_rel_addr(reg, OP_JUMP,
1097 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1098 }
1099 else if (IS_NOT_NULL(qn->next_head_exact)) {
1100 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1101 mod_tlen + SIZE_OP_JUMP);
1102 if (r) return r;
1103 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1104 r = compile_tree_empty_check(qn->target, reg, empty_info);
1105 if (r) return r;
1106 r = add_opcode_rel_addr(reg, OP_JUMP,
1107 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1108 }
1109 else {
1110 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1111 if (r) return r;
1112 r = compile_tree_empty_check(qn->target, reg, empty_info);
1113 if (r) return r;
1114 r = add_opcode_rel_addr(reg, OP_JUMP,
1115 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1116 }
1117 }
1118 else {
1119 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1120 if (r) return r;
1121 r = compile_tree_empty_check(qn->target, reg, empty_info);
1122 if (r) return r;
1123 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1124 }
1125 }
1126 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1127 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1128 if (r) return r;
1129 r = compile_tree(qn->target, reg);
1130 }
1131 else if (!infinite && qn->greedy &&
1132 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1133 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1134 int n = qn->upper - qn->lower;
1135
1136 r = compile_tree_n_times(qn->target, qn->lower, reg);
1137 if (r) return r;
1138
1139 for (i = 0; i < n; i++) {
1140 r = add_opcode_rel_addr(reg, OP_PUSH,
1141 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1142 if (r) return r;
1143 r = compile_tree(qn->target, reg);
1144 if (r) return r;
1145 }
1146 }
1147 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1148 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1149 if (r) return r;
1150 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1151 if (r) return r;
1152 r = compile_tree(qn->target, reg);
1153 }
1154 else {
1155 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1156 }
1157 return r;
1158 }
1159 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1160
1161 static int
1162 compile_length_option_node(EncloseNode* node, regex_t* reg)
1163 {
1164 int tlen;
1165 OnigOptionType prev = reg->options;
1166
1167 reg->options = node->option;
1168 tlen = compile_length_tree(node->target, reg);
1169 reg->options = prev;
1170
1171 if (tlen < 0) return tlen;
1172
1173 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1174 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1175 + tlen + SIZE_OP_SET_OPTION;
1176 }
1177 else
1178 return tlen;
1179 }
1180
1181 static int
1182 compile_option_node(EncloseNode* node, regex_t* reg)
1183 {
1184 int r;
1185 OnigOptionType prev = reg->options;
1186
1187 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1188 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1189 if (r) return r;
1190 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1191 if (r) return r;
1192 r = add_opcode(reg, OP_FAIL);
1193 if (r) return r;
1194 }
1195
1196 reg->options = node->option;
1197 r = compile_tree(node->target, reg);
1198 reg->options = prev;
1199
1200 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1201 if (r) return r;
1202 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1203 }
1204 return r;
1205 }
1206
1207 static int
1208 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1209 {
1210 int len;
1211 int tlen;
1212
1213 if (node->type == ENCLOSE_OPTION)
1214 return compile_length_option_node(node, reg);
1215
1216 if (node->target) {
1217 tlen = compile_length_tree(node->target, reg);
1218 if (tlen < 0) return tlen;
1219 }
1220 else
1221 tlen = 0;
1222
1223 switch (node->type) {
1224 case ENCLOSE_MEMORY:
1225 #ifdef USE_SUBEXP_CALL
1226 if (IS_ENCLOSE_CALLED(node)) {
1227 len = SIZE_OP_MEMORY_START_PUSH + tlen
1228 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1229 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1230 len += (IS_ENCLOSE_RECURSION(node)
1231 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1232 else
1233 len += (IS_ENCLOSE_RECURSION(node)
1234 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1235 }
1236 else
1237 #endif
1238 {
1239 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1240 len = SIZE_OP_MEMORY_START_PUSH;
1241 else
1242 len = SIZE_OP_MEMORY_START;
1243
1244 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1245 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1246 }
1247 break;
1248
1249 case ENCLOSE_STOP_BACKTRACK:
1250 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1251 if (node->target == NULL) {
1252 CHECK_NULL_RETURN_MEMERR(node->target);
1253 }
1254 QtfrNode* qn = NQTFR(node->target);
1255 tlen = compile_length_tree(qn->target, reg);
1256 if (tlen < 0) return tlen;
1257
1258 len = tlen * qn->lower
1259 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1260 }
1261 else {
1262 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1263 }
1264 break;
1265
1266 default:
1267 return ONIGERR_TYPE_BUG;
1268 break;
1269 }
1270
1271 return len;
1272 }
1273
1274 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1275
1276 static int
1277 compile_enclose_node(EncloseNode* node, regex_t* reg)
1278 {
1279 int r, len;
1280
1281 if (node->type == ENCLOSE_OPTION)
1282 return compile_option_node(node, reg);
1283
1284 switch (node->type) {
1285 case ENCLOSE_MEMORY:
1286 #ifdef USE_SUBEXP_CALL
1287 if (IS_ENCLOSE_CALLED(node)) {
1288 r = add_opcode(reg, OP_CALL);
1289 if (r) return r;
1290 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1291 node->state |= NST_ADDR_FIXED;
1292 r = add_abs_addr(reg, (int )node->call_addr);
1293 if (r) return r;
1294 len = compile_length_tree(node->target, reg);
1295 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1296 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1297 len += (IS_ENCLOSE_RECURSION(node)
1298 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1299 else
1300 len += (IS_ENCLOSE_RECURSION(node)
1301 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1302
1303 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1304 if (r) return r;
1305 }
1306 #endif
1307 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1308 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1309 else
1310 r = add_opcode(reg, OP_MEMORY_START);
1311 if (r) return r;
1312 r = add_mem_num(reg, node->regnum);
1313 if (r) return r;
1314 r = compile_tree(node->target, reg);
1315 if (r) return r;
1316 #ifdef USE_SUBEXP_CALL
1317 if (IS_ENCLOSE_CALLED(node)) {
1318 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1319 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1320 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1321 else
1322 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1323 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1324
1325 if (r) return r;
1326 r = add_mem_num(reg, node->regnum);
1327 if (r) return r;
1328 r = add_opcode(reg, OP_RETURN);
1329 }
1330 else
1331 #endif
1332 {
1333 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1334 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1335 else
1336 r = add_opcode(reg, OP_MEMORY_END);
1337 if (r) return r;
1338 r = add_mem_num(reg, node->regnum);
1339 }
1340 break;
1341
1342 case ENCLOSE_STOP_BACKTRACK:
1343 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1344 QtfrNode* qn = NQTFR(node->target);
1345 r = compile_tree_n_times(qn->target, qn->lower, reg);
1346 if (r) return r;
1347
1348 len = compile_length_tree(qn->target, reg);
1349 if (len < 0) return len;
1350
1351 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1352 if (r) return r;
1353 r = compile_tree(qn->target, reg);
1354 if (r) return r;
1355 r = add_opcode(reg, OP_POP);
1356 if (r) return r;
1357 r = add_opcode_rel_addr(reg, OP_JUMP,
1358 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1359 }
1360 else {
1361 r = add_opcode(reg, OP_PUSH_STOP_BT);
1362 if (r) return r;
1363 r = compile_tree(node->target, reg);
1364 if (r) return r;
1365 r = add_opcode(reg, OP_POP_STOP_BT);
1366 }
1367 break;
1368
1369 default:
1370 return ONIGERR_TYPE_BUG;
1371 break;
1372 }
1373
1374 return r;
1375 }
1376
1377 static int
1378 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1379 {
1380 int len;
1381 int tlen = 0;
1382
1383 if (node->target) {
1384 tlen = compile_length_tree(node->target, reg);
1385 if (tlen < 0) return tlen;
1386 }
1387
1388 switch (node->type) {
1389 case ANCHOR_PREC_READ:
1390 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1391 break;
1392 case ANCHOR_PREC_READ_NOT:
1393 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1394 break;
1395 case ANCHOR_LOOK_BEHIND:
1396 len = SIZE_OP_LOOK_BEHIND + tlen;
1397 break;
1398 case ANCHOR_LOOK_BEHIND_NOT:
1399 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1400 break;
1401
1402 default:
1403 len = SIZE_OPCODE;
1404 break;
1405 }
1406
1407 return len;
1408 }
1409
1410 static int
1411 compile_anchor_node(AnchorNode* node, regex_t* reg)
1412 {
1413 int r, len;
1414
1415 switch (node->type) {
1416 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1417 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1418 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1419 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1420 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1421 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1422
1423 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
1424 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
1425 #ifdef USE_WORD_BEGIN_END
1426 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
1427 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
1428 #endif
1429
1430 case ANCHOR_PREC_READ:
1431 r = add_opcode(reg, OP_PUSH_POS);
1432 if (r) return r;
1433 r = compile_tree(node->target, reg);
1434 if (r) return r;
1435 r = add_opcode(reg, OP_POP_POS);
1436 break;
1437
1438 case ANCHOR_PREC_READ_NOT:
1439 len = compile_length_tree(node->target, reg);
1440 if (len < 0) return len;
1441 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1442 if (r) return r;
1443 r = compile_tree(node->target, reg);
1444 if (r) return r;
1445 r = add_opcode(reg, OP_FAIL_POS);
1446 break;
1447
1448 case ANCHOR_LOOK_BEHIND:
1449 {
1450 int n;
1451 r = add_opcode(reg, OP_LOOK_BEHIND);
1452 if (r) return r;
1453 if (node->char_len < 0) {
1454 r = get_char_length_tree(node->target, reg, &n);
1455 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1456 }
1457 else
1458 n = node->char_len;
1459 r = add_length(reg, n);
1460 if (r) return r;
1461 r = compile_tree(node->target, reg);
1462 }
1463 break;
1464
1465 case ANCHOR_LOOK_BEHIND_NOT:
1466 {
1467 int n;
1468 len = compile_length_tree(node->target, reg);
1469 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1470 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1471 if (r) return r;
1472 if (node->char_len < 0) {
1473 r = get_char_length_tree(node->target, reg, &n);
1474 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1475 }
1476 else
1477 n = node->char_len;
1478 r = add_length(reg, n);
1479 if (r) return r;
1480 r = compile_tree(node->target, reg);
1481 if (r) return r;
1482 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1483 }
1484 break;
1485
1486 default:
1487 return ONIGERR_TYPE_BUG;
1488 break;
1489 }
1490
1491 return r;
1492 }
1493
1494 static int
1495 compile_length_tree(Node* node, regex_t* reg)
1496 {
1497 int len, type, r;
1498
1499 type = NTYPE(node);
1500 switch (type) {
1501 case NT_LIST:
1502 len = 0;
1503 do {
1504 r = compile_length_tree(NCAR(node), reg);
1505 if (r < 0) return r;
1506 len += r;
1507 } while (IS_NOT_NULL(node = NCDR(node)));
1508 r = len;
1509 break;
1510
1511 case NT_ALT:
1512 {
1513 int n;
1514
1515 n = r = 0;
1516 do {
1517 r += compile_length_tree(NCAR(node), reg);
1518 n++;
1519 } while (IS_NOT_NULL(node = NCDR(node)));
1520 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1521 }
1522 break;
1523
1524 case NT_STR:
1525 if (NSTRING_IS_RAW(node))
1526 r = compile_length_string_raw_node(NSTR(node), reg);
1527 else
1528 r = compile_length_string_node(node, reg);
1529 break;
1530
1531 case NT_CCLASS:
1532 r = compile_length_cclass_node(NCCLASS(node), reg);
1533 break;
1534
1535 case NT_CTYPE:
1536 case NT_CANY:
1537 r = SIZE_OPCODE;
1538 break;
1539
1540 case NT_BREF:
1541 {
1542 BRefNode* br = NBREF(node);
1543
1544 #ifdef USE_BACKREF_WITH_LEVEL
1545 if (IS_BACKREF_NEST_LEVEL(br)) {
1546 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1547 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1548 }
1549 else
1550 #endif
1551 if (br->back_num == 1) {
1552 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1553 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1554 }
1555 else {
1556 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1557 }
1558 }
1559 break;
1560
1561 #ifdef USE_SUBEXP_CALL
1562 case NT_CALL:
1563 r = SIZE_OP_CALL;
1564 break;
1565 #endif
1566
1567 case NT_QTFR:
1568 r = compile_length_quantifier_node(NQTFR(node), reg);
1569 break;
1570
1571 case NT_ENCLOSE:
1572 r = compile_length_enclose_node(NENCLOSE(node), reg);
1573 break;
1574
1575 case NT_ANCHOR:
1576 r = compile_length_anchor_node(NANCHOR(node), reg);
1577 break;
1578
1579 default:
1580 return ONIGERR_TYPE_BUG;
1581 break;
1582 }
1583
1584 return r;
1585 }
1586
1587 static int
1588 compile_tree(Node* node, regex_t* reg)
1589 {
1590 int n, type, len, pos, r = 0;
1591
1592 type = NTYPE(node);
1593 switch (type) {
1594 case NT_LIST:
1595 do {
1596 r = compile_tree(NCAR(node), reg);
1597 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1598 break;
1599
1600 case NT_ALT:
1601 {
1602 Node* x = node;
1603 len = 0;
1604 do {
1605 len += compile_length_tree(NCAR(x), reg);
1606 if (NCDR(x) != NULL) {
1607 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1608 }
1609 } while (IS_NOT_NULL(x = NCDR(x)));
1610 pos = reg->used + len; /* goal position */
1611
1612 do {
1613 len = compile_length_tree(NCAR(node), reg);
1614 if (IS_NOT_NULL(NCDR(node))) {
1615 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1616 if (r) break;
1617 }
1618 r = compile_tree(NCAR(node), reg);
1619 if (r) break;
1620 if (IS_NOT_NULL(NCDR(node))) {
1621 len = pos - (reg->used + SIZE_OP_JUMP);
1622 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1623 if (r) break;
1624 }
1625 } while (IS_NOT_NULL(node = NCDR(node)));
1626 }
1627 break;
1628
1629 case NT_STR:
1630 if (NSTRING_IS_RAW(node))
1631 r = compile_string_raw_node(NSTR(node), reg);
1632 else
1633 r = compile_string_node(node, reg);
1634 break;
1635
1636 case NT_CCLASS:
1637 r = compile_cclass_node(NCCLASS(node), reg);
1638 break;
1639
1640 case NT_CTYPE:
1641 {
1642 int op;
1643
1644 switch (NCTYPE(node)->ctype) {
1645 case ONIGENC_CTYPE_WORD:
1646 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1647 else op = OP_WORD;
1648 break;
1649 default:
1650 return ONIGERR_TYPE_BUG;
1651 break;
1652 }
1653 r = add_opcode(reg, op);
1654 }
1655 break;
1656
1657 case NT_CANY:
1658 if (IS_MULTILINE(reg->options))
1659 r = add_opcode(reg, OP_ANYCHAR_ML);
1660 else
1661 r = add_opcode(reg, OP_ANYCHAR);
1662 break;
1663
1664 case NT_BREF:
1665 {
1666 BRefNode* br = NBREF(node);
1667
1668 #ifdef USE_BACKREF_WITH_LEVEL
1669 if (IS_BACKREF_NEST_LEVEL(br)) {
1670 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1671 if (r) return r;
1672 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1673 if (r) return r;
1674 r = add_length(reg, br->nest_level);
1675 if (r) return r;
1676
1677 goto add_bacref_mems;
1678 }
1679 else
1680 #endif
1681 if (br->back_num == 1) {
1682 n = br->back_static[0];
1683 if (IS_IGNORECASE(reg->options)) {
1684 r = add_opcode(reg, OP_BACKREFN_IC);
1685 if (r) return r;
1686 r = add_mem_num(reg, n);
1687 }
1688 else {
1689 switch (n) {
1690 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1691 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1692 default:
1693 r = add_opcode(reg, OP_BACKREFN);
1694 if (r) return r;
1695 r = add_mem_num(reg, n);
1696 break;
1697 }
1698 }
1699 }
1700 else {
1701 int i;
1702 int* p;
1703
1704 if (IS_IGNORECASE(reg->options)) {
1705 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1706 }
1707 else {
1708 r = add_opcode(reg, OP_BACKREF_MULTI);
1709 }
1710 if (r) return r;
1711
1712 #ifdef USE_BACKREF_WITH_LEVEL
1713 add_bacref_mems:
1714 #endif
1715 r = add_length(reg, br->back_num);
1716 if (r) return r;
1717 p = BACKREFS_P(br);
1718 for (i = br->back_num - 1; i >= 0; i--) {
1719 r = add_mem_num(reg, p[i]);
1720 if (r) return r;
1721 }
1722 }
1723 }
1724 break;
1725
1726 #ifdef USE_SUBEXP_CALL
1727 case NT_CALL:
1728 r = compile_call(NCALL(node), reg);
1729 break;
1730 #endif
1731
1732 case NT_QTFR:
1733 r = compile_quantifier_node(NQTFR(node), reg);
1734 break;
1735
1736 case NT_ENCLOSE:
1737 r = compile_enclose_node(NENCLOSE(node), reg);
1738 break;
1739
1740 case NT_ANCHOR:
1741 r = compile_anchor_node(NANCHOR(node), reg);
1742 break;
1743
1744 default:
1745 #ifdef ONIG_DEBUG
1746 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1747 #endif
1748 break;
1749 }
1750
1751 return r;
1752 }
1753
1754 #ifdef USE_NAMED_GROUP
1755
1756 static int
1757 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1758 {
1759 int r = 0;
1760 Node* node = *plink;
1761
1762 switch (NTYPE(node)) {
1763 case NT_LIST:
1764 case NT_ALT:
1765 do {
1766 r = noname_disable_map(&(NCAR(node)), map, counter);
1767 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1768 break;
1769
1770 case NT_QTFR:
1771 {
1772 Node** ptarget = &(NQTFR(node)->target);
1773 Node* old = *ptarget;
1774 r = noname_disable_map(ptarget, map, counter);
1775 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1776 onig_reduce_nested_quantifier(node, *ptarget);
1777 }
1778 }
1779 break;
1780
1781 case NT_ENCLOSE:
1782 {
1783 EncloseNode* en = NENCLOSE(node);
1784 if (en->type == ENCLOSE_MEMORY) {
1785 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1786 (*counter)++;
1787 map[en->regnum].new_val = *counter;
1788 en->regnum = *counter;
1789 r = noname_disable_map(&(en->target), map, counter);
1790 }
1791 else {
1792 *plink = en->target;
1793 en->target = NULL_NODE;
1794 onig_node_free(node);
1795 r = noname_disable_map(plink, map, counter);
1796 }
1797 }
1798 else
1799 r = noname_disable_map(&(en->target), map, counter);
1800 }
1801 break;
1802
1803 default:
1804 break;
1805 }
1806
1807 return r;
1808 }
1809
1810 static int
1811 renumber_node_backref(Node* node, GroupNumRemap* map)
1812 {
1813 int i, pos, n, old_num;
1814 int *backs;
1815 BRefNode* bn = NBREF(node);
1816
1817 if (! IS_BACKREF_NAME_REF(bn))
1818 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1819
1820 old_num = bn->back_num;
1821 if (IS_NULL(bn->back_dynamic))
1822 backs = bn->back_static;
1823 else
1824 backs = bn->back_dynamic;
1825
1826 for (i = 0, pos = 0; i < old_num; i++) {
1827 n = map[backs[i]].new_val;
1828 if (n > 0) {
1829 backs[pos] = n;
1830 pos++;
1831 }
1832 }
1833
1834 bn->back_num = pos;
1835 return 0;
1836 }
1837
1838 static int
1839 renumber_by_map(Node* node, GroupNumRemap* map)
1840 {
1841 int r = 0;
1842
1843 switch (NTYPE(node)) {
1844 case NT_LIST:
1845 case NT_ALT:
1846 do {
1847 r = renumber_by_map(NCAR(node), map);
1848 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1849 break;
1850 case NT_QTFR:
1851 r = renumber_by_map(NQTFR(node)->target, map);
1852 break;
1853 case NT_ENCLOSE:
1854 r = renumber_by_map(NENCLOSE(node)->target, map);
1855 break;
1856
1857 case NT_BREF:
1858 r = renumber_node_backref(node, map);
1859 break;
1860
1861 default:
1862 break;
1863 }
1864
1865 return r;
1866 }
1867
1868 static int
1869 numbered_ref_check(Node* node)
1870 {
1871 int r = 0;
1872
1873 switch (NTYPE(node)) {
1874 case NT_LIST:
1875 case NT_ALT:
1876 do {
1877 r = numbered_ref_check(NCAR(node));
1878 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1879 break;
1880 case NT_QTFR:
1881 r = numbered_ref_check(NQTFR(node)->target);
1882 break;
1883 case NT_ENCLOSE:
1884 r = numbered_ref_check(NENCLOSE(node)->target);
1885 break;
1886
1887 case NT_BREF:
1888 if (! IS_BACKREF_NAME_REF(NBREF(node)))
1889 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1890 break;
1891
1892 default:
1893 break;
1894 }
1895
1896 return r;
1897 }
1898
1899 static int
1900 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
1901 {
1902 int r, i, pos, counter;
1903 int Result;
1904 BitStatusType loc;
1905 GroupNumRemap* map;
1906
1907 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
1908 CHECK_NULL_RETURN_MEMERR(map);
1909 for (i = 1; i <= env->num_mem; i++) {
1910 map[i].new_val = 0;
1911 }
1912 counter = 0;
1913 r = noname_disable_map(root, map, &counter);
1914 if (r != 0) return r;
1915
1916 r = renumber_by_map(*root, map);
1917 if (r != 0) return r;
1918
1919 for (i = 1, pos = 1; i <= env->num_mem; i++) {
1920 if (map[i].new_val > 0) {
1921 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
1922 pos++;
1923 }
1924 }
1925
1926 loc = env->capture_history;
1927 BIT_STATUS_CLEAR(env->capture_history);
1928 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
1929 if (BIT_STATUS_AT(loc, i)) {
1930 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
1931 }
1932 }
1933
1934 env->num_mem = env->num_named;
1935 reg->num_mem = env->num_named;
1936
1937 Result = onig_renumber_name_table(reg, map);
1938 xfree(map);
1939 return Result;
1940 }
1941 #endif /* USE_NAMED_GROUP */
1942
1943 #ifdef USE_SUBEXP_CALL
1944 static int
1945 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
1946 {
1947 int i, offset;
1948 EncloseNode* en;
1949 AbsAddrType addr;
1950
1951 for (i = 0; i < uslist->num; i++) {
1952 en = NENCLOSE(uslist->us[i].target);
1953 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
1954 addr = en->call_addr;
1955 offset = uslist->us[i].offset;
1956
1957 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
1958 }
1959 return 0;
1960 }
1961 #endif
1962
1963 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1964 static int
1965 quantifiers_memory_node_info(Node* node)
1966 {
1967 int r = 0;
1968
1969 switch (NTYPE(node)) {
1970 case NT_LIST:
1971 case NT_ALT:
1972 {
1973 int v;
1974 do {
1975 v = quantifiers_memory_node_info(NCAR(node));
1976 if (v > r) r = v;
1977 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
1978 }
1979 break;
1980
1981 #ifdef USE_SUBEXP_CALL
1982 case NT_CALL:
1983 if (IS_CALL_RECURSION(NCALL(node))) {
1984 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
1985 }
1986 else
1987 r = quantifiers_memory_node_info(NCALL(node)->target);
1988 break;
1989 #endif
1990
1991 case NT_QTFR:
1992 {
1993 QtfrNode* qn = NQTFR(node);
1994 if (qn->upper != 0) {
1995 r = quantifiers_memory_node_info(qn->target);
1996 }
1997 }
1998 break;
1999
2000 case NT_ENCLOSE:
2001 {
2002 EncloseNode* en = NENCLOSE(node);
2003 switch (en->type) {
2004 case ENCLOSE_MEMORY:
2005 return NQ_TARGET_IS_EMPTY_MEM;
2006 break;
2007
2008 case ENCLOSE_OPTION:
2009 case ENCLOSE_STOP_BACKTRACK:
2010 r = quantifiers_memory_node_info(en->target);
2011 break;
2012 default:
2013 break;
2014 }
2015 }
2016 break;
2017
2018 case NT_BREF:
2019 case NT_STR:
2020 case NT_CTYPE:
2021 case NT_CCLASS:
2022 case NT_CANY:
2023 case NT_ANCHOR:
2024 default:
2025 break;
2026 }
2027
2028 return r;
2029 }
2030 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2031
2032 static int
2033 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2034 {
2035 OnigDistance tmin;
2036 int r = 0;
2037
2038 *min = 0;
2039 switch (NTYPE(node)) {
2040 case NT_BREF:
2041 {
2042 int i;
2043 int* backs;
2044 Node** nodes = SCANENV_MEM_NODES(env);
2045 BRefNode* br = NBREF(node);
2046 if (br->state & NST_RECURSION) break;
2047
2048 backs = BACKREFS_P(br);
2049 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2050 r = get_min_match_length(nodes[backs[0]], min, env);
2051 if (r != 0) break;
2052 for (i = 1; i < br->back_num; i++) {
2053 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2054 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2055 if (r != 0) break;
2056 if (*min > tmin) *min = tmin;
2057 }
2058 }
2059 break;
2060
2061 #ifdef USE_SUBEXP_CALL
2062 case NT_CALL:
2063 if (IS_CALL_RECURSION(NCALL(node))) {
2064 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2065 if (IS_ENCLOSE_MIN_FIXED(en))
2066 *min = en->min_len;
2067 }
2068 else
2069 r = get_min_match_length(NCALL(node)->target, min, env);
2070 break;
2071 #endif
2072
2073 case NT_LIST:
2074 do {
2075 r = get_min_match_length(NCAR(node), &tmin, env);
2076 if (r == 0) *min += tmin;
2077 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2078 break;
2079
2080 case NT_ALT:
2081 {
2082 Node *x, *y;
2083 y = node;
2084 do {
2085 x = NCAR(y);
2086 r = get_min_match_length(x, &tmin, env);
2087 if (r != 0) break;
2088 if (y == node) *min = tmin;
2089 else if (*min > tmin) *min = tmin;
2090 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2091 }
2092 break;
2093
2094 case NT_STR:
2095 {
2096 StrNode* sn = NSTR(node);
2097 *min = (OnigDistance)(sn->end - sn->s);
2098 }
2099 break;
2100
2101 case NT_CTYPE:
2102 *min = 1;
2103 break;
2104
2105 case NT_CCLASS:
2106 case NT_CANY:
2107 *min = 1;
2108 break;
2109
2110 case NT_QTFR:
2111 {
2112 QtfrNode* qn = NQTFR(node);
2113
2114 if (qn->lower > 0) {
2115 r = get_min_match_length(qn->target, min, env);
2116 if (r == 0)
2117 *min = distance_multiply(*min, qn->lower);
2118 }
2119 }
2120 break;
2121
2122 case NT_ENCLOSE:
2123 {
2124 EncloseNode* en = NENCLOSE(node);
2125 switch (en->type) {
2126 case ENCLOSE_MEMORY:
2127 #ifdef USE_SUBEXP_CALL
2128 if (IS_ENCLOSE_MIN_FIXED(en))
2129 *min = en->min_len;
2130 else {
2131 r = get_min_match_length(en->target, min, env);
2132 if (r == 0) {
2133 en->min_len = *min;
2134 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2135 }
2136 }
2137 break;
2138 #endif
2139 case ENCLOSE_OPTION:
2140 case ENCLOSE_STOP_BACKTRACK:
2141 r = get_min_match_length(en->target, min, env);
2142 break;
2143 }
2144 }
2145 break;
2146
2147 case NT_ANCHOR:
2148 default:
2149 break;
2150 }
2151
2152 return r;
2153 }
2154
2155 static int
2156 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2157 {
2158 OnigDistance tmax;
2159 int r = 0;
2160
2161 *max = 0;
2162 switch (NTYPE(node)) {
2163 case NT_LIST:
2164 do {
2165 r = get_max_match_length(NCAR(node), &tmax, env);
2166 if (r == 0)
2167 *max = distance_add(*max, tmax);
2168 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2169 break;
2170
2171 case NT_ALT:
2172 do {
2173 r = get_max_match_length(NCAR(node), &tmax, env);
2174 if (r == 0 && *max < tmax) *max = tmax;
2175 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2176 break;
2177
2178 case NT_STR:
2179 {
2180 StrNode* sn = NSTR(node);
2181 *max = (OnigDistance)(sn->end - sn->s);
2182 }
2183 break;
2184
2185 case NT_CTYPE:
2186 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2187 break;
2188
2189 case NT_CCLASS:
2190 case NT_CANY:
2191 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2192 break;
2193
2194 case NT_BREF:
2195 {
2196 int i;
2197 int* backs;
2198 Node** nodes = SCANENV_MEM_NODES(env);
2199 BRefNode* br = NBREF(node);
2200 if (br->state & NST_RECURSION) {
2201 *max = ONIG_INFINITE_DISTANCE;
2202 break;
2203 }
2204 backs = BACKREFS_P(br);
2205 for (i = 0; i < br->back_num; i++) {
2206 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2207 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2208 if (r != 0) break;
2209 if (*max < tmax) *max = tmax;
2210 }
2211 }
2212 break;
2213
2214 #ifdef USE_SUBEXP_CALL
2215 case NT_CALL:
2216 if (! IS_CALL_RECURSION(NCALL(node)))
2217 r = get_max_match_length(NCALL(node)->target, max, env);
2218 else
2219 *max = ONIG_INFINITE_DISTANCE;
2220 break;
2221 #endif
2222
2223 case NT_QTFR:
2224 {
2225 QtfrNode* qn = NQTFR(node);
2226
2227 if (qn->upper != 0) {
2228 r = get_max_match_length(qn->target, max, env);
2229 if (r == 0 && *max != 0) {
2230 if (! IS_REPEAT_INFINITE(qn->upper))
2231 *max = distance_multiply(*max, qn->upper);
2232 else
2233 *max = ONIG_INFINITE_DISTANCE;
2234 }
2235 }
2236 }
2237 break;
2238
2239 case NT_ENCLOSE:
2240 {
2241 EncloseNode* en = NENCLOSE(node);
2242 switch (en->type) {
2243 case ENCLOSE_MEMORY:
2244 #ifdef USE_SUBEXP_CALL
2245 if (IS_ENCLOSE_MAX_FIXED(en))
2246 *max = en->max_len;
2247 else {
2248 r = get_max_match_length(en->target, max, env);
2249 if (r == 0) {
2250 en->max_len = *max;
2251 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2252 }
2253 }
2254 break;
2255 #endif
2256 case ENCLOSE_OPTION:
2257 case ENCLOSE_STOP_BACKTRACK:
2258 r = get_max_match_length(en->target, max, env);
2259 break;
2260 }
2261 }
2262 break;
2263
2264 case NT_ANCHOR:
2265 default:
2266 break;
2267 }
2268
2269 return r;
2270 }
2271
2272 #define GET_CHAR_LEN_VARLEN -1
2273 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2274
2275 /* fixed size pattern node only */
2276 static int
2277 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2278 {
2279 int tlen;
2280 int r = 0;
2281
2282 level++;
2283 *len = 0;
2284 switch (NTYPE(node)) {
2285 case NT_LIST:
2286 do {
2287 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2288 if (r == 0)
2289 *len = distance_add(*len, tlen);
2290 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2291 break;
2292
2293 case NT_ALT:
2294 {
2295 int tlen2;
2296 int varlen = 0;
2297
2298 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2299 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2300 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2301 if (r == 0) {
2302 if (tlen != tlen2)
2303 varlen = 1;
2304 }
2305 }
2306 if (r == 0) {
2307 if (varlen != 0) {
2308 if (level == 1)
2309 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2310 else
2311 r = GET_CHAR_LEN_VARLEN;
2312 }
2313 else
2314 *len = tlen;
2315 }
2316 }
2317 break;
2318
2319 case NT_STR:
2320 {
2321 StrNode* sn = NSTR(node);
2322 UChar *s = sn->s;
2323 while (s < sn->end) {
2324 s += enclen(reg->enc, s);
2325 (*len)++;
2326 }
2327 }
2328 break;
2329
2330 case NT_QTFR:
2331 {
2332 QtfrNode* qn = NQTFR(node);
2333 if (qn->lower == qn->upper) {
2334 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2335 if (r == 0)
2336 *len = distance_multiply(tlen, qn->lower);
2337 }
2338 else
2339 r = GET_CHAR_LEN_VARLEN;
2340 }
2341 break;
2342
2343 #ifdef USE_SUBEXP_CALL
2344 case NT_CALL:
2345 if (! IS_CALL_RECURSION(NCALL(node)))
2346 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2347 else
2348 r = GET_CHAR_LEN_VARLEN;
2349 break;
2350 #endif
2351
2352 case NT_CTYPE:
2353 *len = 1;
2354 break;
2355
2356 case NT_CCLASS:
2357 case NT_CANY:
2358 *len = 1;
2359 break;
2360
2361 case NT_ENCLOSE:
2362 {
2363 EncloseNode* en = NENCLOSE(node);
2364 switch (en->type) {
2365 case ENCLOSE_MEMORY:
2366 #ifdef USE_SUBEXP_CALL
2367 if (IS_ENCLOSE_CLEN_FIXED(en))
2368 *len = en->char_len;
2369 else {
2370 r = get_char_length_tree1(en->target, reg, len, level);
2371 if (r == 0) {
2372 en->char_len = *len;
2373 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2374 }
2375 }
2376 break;
2377 #endif
2378 case ENCLOSE_OPTION:
2379 case ENCLOSE_STOP_BACKTRACK:
2380 r = get_char_length_tree1(en->target, reg, len, level);
2381 break;
2382 default:
2383 break;
2384 }
2385 }
2386 break;
2387
2388 case NT_ANCHOR:
2389 break;
2390
2391 default:
2392 r = GET_CHAR_LEN_VARLEN;
2393 break;
2394 }
2395
2396 return r;
2397 }
2398
2399 static int
2400 get_char_length_tree(Node* node, regex_t* reg, int* len)
2401 {
2402 return get_char_length_tree1(node, reg, len, 0);
2403 }
2404
2405 /* x is not included y ==> 1 : 0 */
2406 static int
2407 is_not_included(Node* x, Node* y, regex_t* reg)
2408 {
2409 int i, len;
2410 OnigCodePoint code;
2411 UChar *p;
2412 int ytype;
2413
2414 retry:
2415 ytype = NTYPE(y);
2416 switch (NTYPE(x)) {
2417 case NT_CTYPE:
2418 {
2419 switch (ytype) {
2420 case NT_CTYPE:
2421 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2422 NCTYPE(y)->not != NCTYPE(x)->not)
2423 return 1;
2424 else
2425 return 0;
2426 break;
2427
2428 case NT_CCLASS:
2429 swap:
2430 {
2431 Node* tmp;
2432 tmp = x; x = y; y = tmp;
2433 goto retry;
2434 }
2435 break;
2436
2437 case NT_STR:
2438 goto swap;
2439 break;
2440
2441 default:
2442 break;
2443 }
2444 }
2445 break;
2446
2447 case NT_CCLASS:
2448 {
2449 CClassNode* xc = NCCLASS(x);
2450 switch (ytype) {
2451 case NT_CTYPE:
2452 switch (NCTYPE(y)->ctype) {
2453 case ONIGENC_CTYPE_WORD:
2454 if (NCTYPE(y)->not == 0) {
2455 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2456 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2457 if (BITSET_AT(xc->bs, i)) {
2458 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2459 }
2460 }
2461 return 1;
2462 }
2463 return 0;
2464 }
2465 else {
2466 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2467 if (! IS_CODE_SB_WORD(reg->enc, i)) {
2468 if (!IS_NCCLASS_NOT(xc)) {
2469 if (BITSET_AT(xc->bs, i))
2470 return 0;
2471 }
2472 else {
2473 if (! BITSET_AT(xc->bs, i))
2474 return 0;
2475 }
2476 }
2477 }
2478 return 1;
2479 }
2480 break;
2481
2482 default:
2483 break;
2484 }
2485 break;
2486
2487 case NT_CCLASS:
2488 {
2489 int v;
2490 CClassNode* yc = NCCLASS(y);
2491
2492 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2493 v = BITSET_AT(xc->bs, i);
2494 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2495 (v == 0 && IS_NCCLASS_NOT(xc))) {
2496 v = BITSET_AT(yc->bs, i);
2497 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2498 (v == 0 && IS_NCCLASS_NOT(yc)))
2499 return 0;
2500 }
2501 }
2502 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2503 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2504 return 1;
2505 return 0;
2506 }
2507 break;
2508
2509 case NT_STR:
2510 goto swap;
2511 break;
2512
2513 default:
2514 break;
2515 }
2516 }
2517 break;
2518
2519 case NT_STR:
2520 {
2521 StrNode* xs = NSTR(x);
2522 if (NSTRING_LEN(x) == 0)
2523 break;
2524
2525 //c = *(xs->s);
2526 switch (ytype) {
2527 case NT_CTYPE:
2528 switch (NCTYPE(y)->ctype) {
2529 case ONIGENC_CTYPE_WORD:
2530 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2531 return NCTYPE(y)->not;
2532 else
2533 return !(NCTYPE(y)->not);
2534 break;
2535 default:
2536 break;
2537 }
2538 break;
2539
2540 case NT_CCLASS:
2541 {
2542 CClassNode* cc = NCCLASS(y);
2543
2544 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2545 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2546 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2547 }
2548 break;
2549
2550 case NT_STR:
2551 {
2552 UChar *q;
2553 StrNode* ys = NSTR(y);
2554 len = NSTRING_LEN(x);
2555 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2556 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2557 /* tiny version */
2558 return 0;
2559 }
2560 else {
2561 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2562 if (*p != *q) return 1;
2563 }
2564 }
2565 }
2566 break;
2567
2568 default:
2569 break;
2570 }
2571 }
2572 break;
2573
2574 default:
2575 break;
2576 }
2577
2578 return 0;
2579 }
2580
2581 static Node*
2582 get_head_value_node(Node* node, int exact, regex_t* reg)
2583 {
2584 Node* n = NULL_NODE;
2585
2586 switch (NTYPE(node)) {
2587 case NT_BREF:
2588 case NT_ALT:
2589 case NT_CANY:
2590 #ifdef USE_SUBEXP_CALL
2591 case NT_CALL:
2592 #endif
2593 break;
2594
2595 case NT_CTYPE:
2596 case NT_CCLASS:
2597 if (exact == 0) {
2598 n = node;
2599 }
2600 break;
2601
2602 case NT_LIST:
2603 n = get_head_value_node(NCAR(node), exact, reg);
2604 break;
2605
2606 case NT_STR:
2607 {
2608 StrNode* sn = NSTR(node);
2609
2610 if (sn->end <= sn->s)
2611 break;
2612
2613 if (exact != 0 &&
2614 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2615 }
2616 else {
2617 n = node;
2618 }
2619 }
2620 break;
2621
2622 case NT_QTFR:
2623 {
2624 QtfrNode* qn = NQTFR(node);
2625 if (qn->lower > 0) {
2626 if (IS_NOT_NULL(qn->head_exact))
2627 n = qn->head_exact;
2628 else
2629 n = get_head_value_node(qn->target, exact, reg);
2630 }
2631 }
2632 break;
2633
2634 case NT_ENCLOSE:
2635 {
2636 EncloseNode* en = NENCLOSE(node);
2637 switch (en->type) {
2638 case ENCLOSE_OPTION:
2639 {
2640 OnigOptionType options = reg->options;
2641
2642 reg->options = NENCLOSE(node)->option;
2643 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2644 reg->options = options;
2645 }
2646 break;
2647
2648 case ENCLOSE_MEMORY:
2649 case ENCLOSE_STOP_BACKTRACK:
2650 n = get_head_value_node(en->target, exact, reg);
2651 break;
2652 }
2653 }
2654 break;
2655
2656 case NT_ANCHOR:
2657 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2658 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2659 break;
2660
2661 default:
2662 break;
2663 }
2664
2665 return n;
2666 }
2667
2668 static int
2669 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2670 {
2671 int type, r = 0;
2672
2673 type = NTYPE(node);
2674 if ((NTYPE2BIT(type) & type_mask) == 0)
2675 return 1;
2676
2677 switch (type) {
2678 case NT_LIST:
2679 case NT_ALT:
2680 do {
2681 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2682 anchor_mask);
2683 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2684 break;
2685
2686 case NT_QTFR:
2687 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2688 anchor_mask);
2689 break;
2690
2691 case NT_ENCLOSE:
2692 {
2693 EncloseNode* en = NENCLOSE(node);
2694 if ((en->type & enclose_mask) == 0)
2695 return 1;
2696
2697 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2698 }
2699 break;
2700
2701 case NT_ANCHOR:
2702 type = NANCHOR(node)->type;
2703 if ((type & anchor_mask) == 0)
2704 return 1;
2705
2706 if (NANCHOR(node)->target)
2707 r = check_type_tree(NANCHOR(node)->target,
2708 type_mask, enclose_mask, anchor_mask);
2709 break;
2710
2711 default:
2712 break;
2713 }
2714 return r;
2715 }
2716
2717 #ifdef USE_SUBEXP_CALL
2718
2719 #define RECURSION_EXIST 1
2720 #define RECURSION_INFINITE 2
2721
2722 static int
2723 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2724 {
2725 int type;
2726 int r = 0;
2727
2728 type = NTYPE(node);
2729 switch (type) {
2730 case NT_LIST:
2731 {
2732 Node *x;
2733 OnigDistance min;
2734 int ret;
2735
2736 x = node;
2737 do {
2738 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2739 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2740 r |= ret;
2741 if (head) {
2742 ret = get_min_match_length(NCAR(x), &min, env);
2743 if (ret != 0) return ret;
2744 if (min != 0) head = 0;
2745 }
2746 } while (IS_NOT_NULL(x = NCDR(x)));
2747 }
2748 break;
2749
2750 case NT_ALT:
2751 {
2752 int ret;
2753 r = RECURSION_EXIST;
2754 do {
2755 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2756 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2757 r &= ret;
2758 } while (IS_NOT_NULL(node = NCDR(node)));
2759 }
2760 break;
2761
2762 case NT_QTFR:
2763 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2764 if (r == RECURSION_EXIST) {
2765 if (NQTFR(node)->lower == 0) r = 0;
2766 }
2767 break;
2768
2769 case NT_ANCHOR:
2770 {
2771 AnchorNode* an = NANCHOR(node);
2772 switch (an->type) {
2773 case ANCHOR_PREC_READ:
2774 case ANCHOR_PREC_READ_NOT:
2775 case ANCHOR_LOOK_BEHIND:
2776 case ANCHOR_LOOK_BEHIND_NOT:
2777 r = subexp_inf_recursive_check(an->target, env, head);
2778 break;
2779 }
2780 }
2781 break;
2782
2783 case NT_CALL:
2784 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2785 break;
2786
2787 case NT_ENCLOSE:
2788 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2789 return 0;
2790 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2791 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2792 else {
2793 SET_ENCLOSE_STATUS(node, NST_MARK2);
2794 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2795 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2796 }
2797 break;
2798
2799 default:
2800 break;
2801 }
2802
2803 return r;
2804 }
2805
2806 static int
2807 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2808 {
2809 int type;
2810 int r = 0;
2811
2812 type = NTYPE(node);
2813 switch (type) {
2814 case NT_LIST:
2815 case NT_ALT:
2816 do {
2817 r = subexp_inf_recursive_check_trav(NCAR(node), env);
2818 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2819 break;
2820
2821 case NT_QTFR:
2822 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2823 break;
2824
2825 case NT_ANCHOR:
2826 {
2827 AnchorNode* an = NANCHOR(node);
2828 switch (an->type) {
2829 case ANCHOR_PREC_READ:
2830 case ANCHOR_PREC_READ_NOT:
2831 case ANCHOR_LOOK_BEHIND:
2832 case ANCHOR_LOOK_BEHIND_NOT:
2833 r = subexp_inf_recursive_check_trav(an->target, env);
2834 break;
2835 }
2836 }
2837 break;
2838
2839 case NT_ENCLOSE:
2840 {
2841 EncloseNode* en = NENCLOSE(node);
2842
2843 if (IS_ENCLOSE_RECURSION(en)) {
2844 SET_ENCLOSE_STATUS(node, NST_MARK1);
2845 r = subexp_inf_recursive_check(en->target, env, 1);
2846 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2847 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2848 }
2849 r = subexp_inf_recursive_check_trav(en->target, env);
2850 }
2851
2852 break;
2853
2854 default:
2855 break;
2856 }
2857
2858 return r;
2859 }
2860
2861 static int
2862 subexp_recursive_check(Node* node)
2863 {
2864 int r = 0;
2865
2866 switch (NTYPE(node)) {
2867 case NT_LIST:
2868 case NT_ALT:
2869 do {
2870 r |= subexp_recursive_check(NCAR(node));
2871 } while (IS_NOT_NULL(node = NCDR(node)));
2872 break;
2873
2874 case NT_QTFR:
2875 r = subexp_recursive_check(NQTFR(node)->target);
2876 break;
2877
2878 case NT_ANCHOR:
2879 {
2880 AnchorNode* an = NANCHOR(node);
2881 switch (an->type) {
2882 case ANCHOR_PREC_READ:
2883 case ANCHOR_PREC_READ_NOT:
2884 case ANCHOR_LOOK_BEHIND:
2885 case ANCHOR_LOOK_BEHIND_NOT:
2886 r = subexp_recursive_check(an->target);
2887 break;
2888 }
2889 }
2890 break;
2891
2892 case NT_CALL:
2893 r = subexp_recursive_check(NCALL(node)->target);
2894 if (r != 0) SET_CALL_RECURSION(node);
2895 break;
2896
2897 case NT_ENCLOSE:
2898 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2899 return 0;
2900 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2901 return 1; /* recursion */
2902 else {
2903 SET_ENCLOSE_STATUS(node, NST_MARK2);
2904 r = subexp_recursive_check(NENCLOSE(node)->target);
2905 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2906 }
2907 break;
2908
2909 default:
2910 break;
2911 }
2912
2913 return r;
2914 }
2915
2916
2917 static int
2918 subexp_recursive_check_trav(Node* node, ScanEnv* env)
2919 {
2920 #define FOUND_CALLED_NODE 1
2921
2922 int type;
2923 int r = 0;
2924
2925 type = NTYPE(node);
2926 switch (type) {
2927 case NT_LIST:
2928 case NT_ALT:
2929 {
2930 int ret;
2931 do {
2932 ret = subexp_recursive_check_trav(NCAR(node), env);
2933 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
2934 else if (ret < 0) return ret;
2935 } while (IS_NOT_NULL(node = NCDR(node)));
2936 }
2937 break;
2938
2939 case NT_QTFR:
2940 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
2941 if (NQTFR(node)->upper == 0) {
2942 if (r == FOUND_CALLED_NODE)
2943 NQTFR(node)->is_refered = 1;
2944 }
2945 break;
2946
2947 case NT_ANCHOR:
2948 {
2949 AnchorNode* an = NANCHOR(node);
2950 switch (an->type) {
2951 case ANCHOR_PREC_READ:
2952 case ANCHOR_PREC_READ_NOT:
2953 case ANCHOR_LOOK_BEHIND:
2954 case ANCHOR_LOOK_BEHIND_NOT:
2955 r = subexp_recursive_check_trav(an->target, env);
2956 break;
2957 }
2958 }
2959 break;
2960
2961 case NT_ENCLOSE:
2962 {
2963 EncloseNode* en = NENCLOSE(node);
2964
2965 if (! IS_ENCLOSE_RECURSION(en)) {
2966 if (IS_ENCLOSE_CALLED(en)) {
2967 SET_ENCLOSE_STATUS(node, NST_MARK1);
2968 r = subexp_recursive_check(en->target);
2969 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
2970 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2971 }
2972 }
2973 r = subexp_recursive_check_trav(en->target, env);
2974 if (IS_ENCLOSE_CALLED(en))
2975 r |= FOUND_CALLED_NODE;
2976 }
2977 break;
2978
2979 default:
2980 break;
2981 }
2982
2983 return r;
2984 }
2985
2986 static int
2987 setup_subexp_call(Node* node, ScanEnv* env)
2988 {
2989 int type;
2990 int r = 0;
2991
2992 type = NTYPE(node);
2993 switch (type) {
2994 case NT_LIST:
2995 do {
2996 r = setup_subexp_call(NCAR(node), env);
2997 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2998 break;
2999
3000 case NT_ALT:
3001 do {
3002 r = setup_subexp_call(NCAR(node), env);
3003 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3004 break;
3005
3006 case NT_QTFR:
3007 r = setup_subexp_call(NQTFR(node)->target, env);
3008 break;
3009 case NT_ENCLOSE:
3010 r = setup_subexp_call(NENCLOSE(node)->target, env);
3011 break;
3012
3013 case NT_CALL:
3014 {
3015 CallNode* cn = NCALL(node);
3016 Node** nodes = SCANENV_MEM_NODES(env);
3017
3018 if (cn->group_num != 0) {
3019 int gnum = cn->group_num;
3020
3021 #ifdef USE_NAMED_GROUP
3022 if (env->num_named > 0 &&
3023 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3024 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3025 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3026 }
3027 #endif
3028 if (gnum > env->num_mem) {
3029 onig_scan_env_set_error_string(env,
3030 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3031 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3032 }
3033
3034 #ifdef USE_NAMED_GROUP
3035 set_call_attr:
3036 #endif
3037 cn->target = nodes[cn->group_num];
3038 if (IS_NULL(cn->target)) {
3039 onig_scan_env_set_error_string(env,
3040 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3041 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3042 }
3043 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3044 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3045 cn->unset_addr_list = env->unset_addr_list;
3046 }
3047 #ifdef USE_NAMED_GROUP
3048 else {
3049 int *refs;
3050
3051 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3052 &refs);
3053 if (n <= 0) {
3054 onig_scan_env_set_error_string(env,
3055 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3056 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3057 }
3058 else if (n > 1) {
3059 onig_scan_env_set_error_string(env,
3060 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3061 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3062 }
3063 else {
3064 cn->group_num = refs[0];
3065 goto set_call_attr;
3066 }
3067 }
3068 #endif
3069 }
3070 break;
3071
3072 case NT_ANCHOR:
3073 {
3074 AnchorNode* an = NANCHOR(node);
3075
3076 switch (an->type) {
3077 case ANCHOR_PREC_READ:
3078 case ANCHOR_PREC_READ_NOT:
3079 case ANCHOR_LOOK_BEHIND:
3080 case ANCHOR_LOOK_BEHIND_NOT:
3081 r = setup_subexp_call(an->target, env);
3082 break;
3083 }
3084 }
3085 break;
3086
3087 default:
3088 break;
3089 }
3090
3091 return r;
3092 }
3093 #endif
3094
3095 /* divide different length alternatives in look-behind.
3096 (?<=A|B) ==> (?<=A)|(?<=B)
3097 (?<!A|B) ==> (?<!A)(?<!B)
3098 */
3099 static int
3100 divide_look_behind_alternatives(Node* node)
3101 {
3102 Node *head, *np, *insert_node;
3103 AnchorNode* an = NANCHOR(node);
3104 int anc_type = an->type;
3105
3106 head = an->target;
3107 np = NCAR(head);
3108 swap_node(node, head);
3109 NCAR(node) = head;
3110 NANCHOR(head)->target = np;
3111
3112 np = node;
3113 while ((np = NCDR(np)) != NULL_NODE) {
3114 insert_node = onig_node_new_anchor(anc_type);
3115 CHECK_NULL_RETURN_MEMERR(insert_node);
3116 NANCHOR(insert_node)->target = NCAR(np);
3117 NCAR(np) = insert_node;
3118 }
3119
3120 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3121 np = node;
3122 do {
3123 SET_NTYPE(np, NT_LIST); /* alt -> list */
3124 } while ((np = NCDR(np)) != NULL_NODE);
3125 }
3126 return 0;
3127 }
3128
3129 static int
3130 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3131 {
3132 int r, len;
3133 AnchorNode* an = NANCHOR(node);
3134
3135 r = get_char_length_tree(an->target, reg, &len);
3136 if (r == 0)
3137 an->char_len = len;
3138 else if (r == GET_CHAR_LEN_VARLEN)
3139 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3140 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3141 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3142 r = divide_look_behind_alternatives(node);
3143 else
3144 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3145 }
3146
3147 return r;
3148 }
3149
3150 static int
3151 next_setup(Node* node, Node* next_node, regex_t* reg)
3152 {
3153 int type;
3154
3155 retry:
3156 type = NTYPE(node);
3157 if (type == NT_QTFR) {
3158 QtfrNode* qn = NQTFR(node);
3159 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3160 #ifdef USE_QTFR_PEEK_NEXT
3161 Node* n = get_head_value_node(next_node, 1, reg);
3162 /* '\0': for UTF-16BE etc... */
3163 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3164 qn->next_head_exact = n;
3165 }
3166 #endif
3167 /* automatic posseivation a*b ==> (?>a*)b */
3168 if (qn->lower <= 1) {
3169 int ttype = NTYPE(qn->target);
3170 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3171 Node *x, *y;
3172 x = get_head_value_node(qn->target, 0, reg);
3173 if (IS_NOT_NULL(x)) {
3174 y = get_head_value_node(next_node, 0, reg);
3175 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3176 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3177 CHECK_NULL_RETURN_MEMERR(en);
3178 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3179 swap_node(node, en);
3180 NENCLOSE(node)->target = en;
3181 }
3182 }
3183 }
3184 }
3185 }
3186 }
3187 else if (type == NT_ENCLOSE) {
3188 EncloseNode* en = NENCLOSE(node);
3189 if (en->type == ENCLOSE_MEMORY) {
3190 node = en->target;
3191 goto retry;
3192 }
3193 }
3194 return 0;
3195 }
3196
3197
3198 static int
3199 update_string_node_case_fold(regex_t* reg, Node *node)
3200 {
3201 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3202 UChar *sbuf, *ebuf, *sp;
3203 int r, i, len, sbuf_size;
3204 StrNode* sn = NSTR(node);
3205
3206 end = sn->end;
3207 sbuf_size = (int)(end - sn->s) * 2;
3208 sbuf = (UChar* )xmalloc(sbuf_size);
3209 CHECK_NULL_RETURN_MEMERR(sbuf);
3210 ebuf = sbuf + sbuf_size;
3211
3212 sp = sbuf;
3213 p = sn->s;
3214 while (p < end) {
3215 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3216 for (i = 0; i < len; i++) {
3217 if (sp >= ebuf) {
3218 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3219 CHECK_NULL_RETURN_MEMERR(sbuf);
3220 sp = sbuf + sbuf_size;
3221 sbuf_size *= 2;
3222 ebuf = sbuf + sbuf_size;
3223 }
3224
3225 *sp++ = buf[i];
3226 }
3227 }
3228
3229 r = onig_node_str_set(node, sbuf, sp);
3230 if (r != 0) {
3231 xfree(sbuf);
3232 return r;
3233 }
3234
3235 xfree(sbuf);
3236 return 0;
3237 }
3238
3239 static int
3240 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3241 regex_t* reg)
3242 {
3243 int r;
3244 Node *node;
3245
3246 node = onig_node_new_str(s, end);
3247 if (IS_NULL(node)) return ONIGERR_MEMORY;
3248
3249 r = update_string_node_case_fold(reg, node);
3250 if (r != 0) {
3251 onig_node_free(node);
3252 return r;
3253 }
3254
3255 NSTRING_SET_AMBIG(node);
3256 NSTRING_SET_DONT_GET_OPT_INFO(node);
3257 *rnode = node;
3258 return 0;
3259 }
3260
3261 static int
3262 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3263 UChar *p, int slen, UChar *end,
3264 regex_t* reg, Node **rnode)
3265 {
3266 int r, i, j, len, varlen;
3267 Node *anode, *var_anode, *snode, *xnode, *an;
3268 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3269 xnode = NULL_NODE;
3270
3271 *rnode = var_anode = NULL_NODE;
3272
3273 varlen = 0;
3274 for (i = 0; i < item_num; i++) {
3275 if (items[i].byte_len != slen) {
3276 varlen = 1;
3277 break;
3278 }
3279 }
3280
3281 if (varlen != 0) {
3282 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3283 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3284
3285 xnode = onig_node_new_list(NULL, NULL);
3286 if (IS_NULL(xnode)) goto mem_err;
3287 NCAR(var_anode) = xnode;
3288
3289 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3290 if (IS_NULL(anode)) goto mem_err;
3291 NCAR(xnode) = anode;
3292 }
3293 else {
3294 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3295 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3296 }
3297
3298 snode = onig_node_new_str(p, p + slen);
3299 if (IS_NULL(snode)) goto mem_err;
3300
3301 NCAR(anode) = snode;
3302
3303 for (i = 0; i < item_num; i++) {
3304 snode = onig_node_new_str(NULL, NULL);
3305 if (IS_NULL(snode)) goto mem_err;
3306
3307 for (j = 0; j < items[i].code_len; j++) {
3308 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3309 if (len < 0) {
3310 r = len;
3311 goto mem_err2;
3312 }
3313
3314 r = onig_node_str_cat(snode, buf, buf + len);
3315 if (r != 0) goto mem_err2;
3316 }
3317
3318 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3319 if (IS_NULL(an)) {
3320 goto mem_err2;
3321 }
3322
3323 if (items[i].byte_len != slen) {
3324 Node *rem = NULL_NODE;
3325 UChar *q = p + items[i].byte_len;
3326
3327 if (q < end) {
3328 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3329 if (r != 0) {
3330 onig_node_free(an);
3331 goto mem_err2;
3332 }
3333
3334 xnode = onig_node_list_add(NULL_NODE, snode);
3335 if (IS_NULL(xnode)) {
3336 onig_node_free(an);
3337 onig_node_free(rem);
3338 goto mem_err2;
3339 }
3340 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3341 onig_node_free(an);
3342 onig_node_free(xnode);
3343 onig_node_free(rem);
3344 goto mem_err;
3345 }
3346
3347 NCAR(an) = xnode;
3348 }
3349 else {
3350 NCAR(an) = snode;
3351 }
3352
3353 if (var_anode == NULL) {
3354 onig_node_free(an);
3355 onig_node_free(xnode);
3356 onig_node_free(rem);
3357 goto mem_err2;
3358 }
3359 NCDR(var_anode) = an;
3360 var_anode = an;
3361 }
3362 else {
3363 NCAR(an) = snode;
3364 NCDR(anode) = an;
3365 anode = an;
3366 }
3367 }
3368
3369 return varlen;
3370
3371 mem_err2:
3372 onig_node_free(snode);
3373
3374 mem_err:
3375 onig_node_free(*rnode);
3376
3377 return ONIGERR_MEMORY;
3378 }
3379
3380 static int
3381 expand_case_fold_string(Node* node, regex_t* reg)
3382 {
3383 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3384
3385 int r, n, len, alt_num;
3386 UChar *start, *end, *p;
3387 Node *top_root, *root, *snode, *prev_node;
3388 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3389 StrNode* sn = NSTR(node);
3390
3391 if (NSTRING_IS_AMBIG(node)) return 0;
3392
3393 start = sn->s;
3394 end = sn->end;
3395 if (start >= end) return 0;
3396
3397 r = 0;
3398 top_root = root = prev_node = snode = NULL_NODE;
3399 alt_num = 1;
3400 p = start;
3401 while (p < end) {
3402 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3403 p, end, items);
3404 if (n < 0) {
3405 r = n;
3406 goto err;
3407 }
3408
3409 len = enclen(reg->enc, p);
3410
3411 if (n == 0) {
3412 if (IS_NULL(snode)) {
3413 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3414 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3415 if (IS_NULL(root)) {
3416 onig_node_free(prev_node);
3417 goto mem_err;
3418 }
3419 }
3420
3421 prev_node = snode = onig_node_new_str(NULL, NULL);
3422 if (IS_NULL(snode)) goto mem_err;
3423 if (IS_NOT_NULL(root)) {
3424 if (IS_NULL(onig_node_list_add(root, snode))) {
3425 onig_node_free(snode);
3426 goto mem_err;
3427 }
3428 }
3429 }
3430
3431 r = onig_node_str_cat(snode, p, p + len);
3432 if (r != 0) goto err;
3433 }
3434 else {
3435 alt_num *= (n + 1);
3436 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3437
3438 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3439 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3440 if (IS_NULL(root)) {
3441 onig_node_free(prev_node);
3442 goto mem_err;
3443 }
3444 }
3445
3446 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3447 if (r < 0) goto mem_err;
3448 if (r == 1) {
3449 if (IS_NULL(root)) {
3450 top_root = prev_node;
3451 }
3452 else {
3453 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3454 onig_node_free(prev_node);
3455 goto mem_err;
3456 }
3457 }
3458
3459 root = NCAR(prev_node);
3460 }
3461 else { /* r == 0 */
3462 if (IS_NOT_NULL(root)) {
3463 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3464 onig_node_free(prev_node);
3465 goto mem_err;
3466 }
3467 }
3468 }
3469
3470 snode = NULL_NODE;
3471 }
3472
3473 p += len;
3474 }
3475
3476 if (p < end) {
3477 Node *srem;
3478
3479 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3480 if (r != 0) goto mem_err;
3481
3482 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3483 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3484 if (IS_NULL(root)) {
3485 onig_node_free(srem);
3486 onig_node_free(prev_node);
3487 goto mem_err;
3488 }
3489 }
3490
3491 if (IS_NULL(root)) {
3492 prev_node = srem;
3493 }
3494 else {
3495 if (IS_NULL(onig_node_list_add(root, srem))) {
3496 onig_node_free(srem);
3497 goto mem_err;
3498 }
3499 }
3500 }
3501
3502 /* ending */
3503 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3504 swap_node(node, top_root);
3505 onig_node_free(top_root);
3506 return 0;
3507
3508 mem_err:
3509 r = ONIGERR_MEMORY;
3510
3511 err:
3512 onig_node_free(top_root);
3513 return r;
3514 }
3515
3516
3517 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3518
3519 #define CEC_THRES_NUM_BIG_REPEAT 512
3520 #define CEC_INFINITE_NUM 0x7fffffff
3521
3522 #define CEC_IN_INFINITE_REPEAT (1<<0)
3523 #define CEC_IN_FINITE_REPEAT (1<<1)
3524 #define CEC_CONT_BIG_REPEAT (1<<2)
3525
3526 static int
3527 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3528 {
3529 int type;
3530 int r = state;
3531
3532 type = NTYPE(node);
3533 switch (type) {
3534 case NT_LIST:
3535 {
3536 Node* prev = NULL_NODE;
3537 do {
3538 r = setup_comb_exp_check(NCAR(node), r, env);
3539 prev = NCAR(node);
3540 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3541 }
3542 break;
3543
3544 case NT_ALT:
3545 {
3546 int ret;
3547 do {
3548 ret = setup_comb_exp_check(NCAR(node), state, env);
3549 r |= ret;
3550 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3551 }
3552 break;
3553
3554 case NT_QTFR:
3555 {
3556 int child_state = state;
3557 int add_state = 0;
3558 QtfrNode* qn = NQTFR(node);
3559 Node* target = qn->target;
3560 int var_num;
3561
3562 if (! IS_REPEAT_INFINITE(qn->upper)) {
3563 if (qn->upper > 1) {
3564 /* {0,1}, {1,1} are allowed */
3565 child_state |= CEC_IN_FINITE_REPEAT;
3566
3567 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3568 if (env->backrefed_mem == 0) {
3569 if (NTYPE(qn->target) == NT_ENCLOSE) {
3570 EncloseNode* en = NENCLOSE(qn->target);
3571 if (en->type == ENCLOSE_MEMORY) {
3572 if (NTYPE(en->target) == NT_QTFR) {
3573 QtfrNode* q = NQTFR(en->target);
3574 if (IS_REPEAT_INFINITE(q->upper)
3575 && q->greedy == qn->greedy) {
3576 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3577 if (qn->upper == 1)
3578 child_state = state;
3579 }
3580 }
3581 }
3582 }
3583 }
3584 }
3585 }
3586
3587 if (state & CEC_IN_FINITE_REPEAT) {
3588 qn->comb_exp_check_num = -1;
3589 }
3590 else {
3591 if (IS_REPEAT_INFINITE(qn->upper)) {
3592 var_num = CEC_INFINITE_NUM;
3593 child_state |= CEC_IN_INFINITE_REPEAT;
3594 }
3595 else {
3596 var_num = qn->upper - qn->lower;
3597 }
3598
3599 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3600 add_state |= CEC_CONT_BIG_REPEAT;
3601
3602 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3603 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3604 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3605 if (qn->comb_exp_check_num == 0) {
3606 env->num_comb_exp_check++;
3607 qn->comb_exp_check_num = env->num_comb_exp_check;
3608 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3609 env->comb_exp_max_regnum = env->curr_max_regnum;
3610 }
3611 }
3612 }
3613
3614 r = setup_comb_exp_check(target, child_state, env);
3615 r |= add_state;
3616 }
3617 break;
3618
3619 case NT_ENCLOSE:
3620 {
3621 EncloseNode* en = NENCLOSE(node);
3622
3623 switch (en->type) {
3624 case ENCLOSE_MEMORY:
3625 {
3626 if (env->curr_max_regnum < en->regnum)
3627 env->curr_max_regnum = en->regnum;
3628
3629 r = setup_comb_exp_check(en->target, state, env);
3630 }
3631 break;
3632
3633 default:
3634 r = setup_comb_exp_check(en->target, state, env);
3635 break;
3636 }
3637 }
3638 break;
3639
3640 #ifdef USE_SUBEXP_CALL
3641 case NT_CALL:
3642 if (IS_CALL_RECURSION(NCALL(node)))
3643 env->has_recursion = 1;
3644 else
3645 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3646 break;
3647 #endif
3648
3649 default:
3650 break;
3651 }
3652
3653 return r;
3654 }
3655 #endif
3656
3657 #define IN_ALT (1<<0)
3658 #define IN_NOT (1<<1)
3659 #define IN_REPEAT (1<<2)
3660 #define IN_VAR_REPEAT (1<<3)
3661
3662 /* setup_tree does the following work.
3663 1. check empty loop. (set qn->target_empty_info)
3664 2. expand ignore-case in char class.
3665 3. set memory status bit flags. (reg->mem_stats)
3666 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3667 5. find invalid patterns in look-behind.
3668 6. expand repeated string.
3669 */
3670 static int
3671 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3672 {
3673 int type;
3674 int r = 0;
3675
3676 type = NTYPE(node);
3677 switch (type) {
3678 case NT_LIST:
3679 {
3680 Node* prev = NULL_NODE;
3681 do {
3682 r = setup_tree(NCAR(node), reg, state, env);
3683 if (IS_NOT_NULL(prev) && r == 0) {
3684 r = next_setup(prev, NCAR(node), reg);
3685 }
3686 prev = NCAR(node);
3687 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3688 }
3689 break;
3690
3691 case NT_ALT:
3692 do {
3693 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3694 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3695 break;
3696
3697 case NT_CCLASS:
3698 break;
3699
3700 case NT_STR:
3701 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3702 r = expand_case_fold_string(node, reg);
3703 }
3704 break;
3705
3706 case NT_CTYPE:
3707 case NT_CANY:
3708 break;
3709
3710 #ifdef USE_SUBEXP_CALL
3711 case NT_CALL:
3712 break;
3713 #endif
3714
3715 case NT_BREF:
3716 {
3717 int i;
3718 int* p;
3719 Node** nodes = SCANENV_MEM_NODES(env);
3720 BRefNode* br = NBREF(node);
3721 p = BACKREFS_P(br);
3722 for (i = 0; i < br->back_num; i++) {
3723 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3724 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3725 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3726 #ifdef USE_BACKREF_WITH_LEVEL
3727 if (IS_BACKREF_NEST_LEVEL(br)) {
3728 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3729 }
3730 #endif
3731 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3732 }
3733 }
3734 break;
3735
3736 case NT_QTFR:
3737 {
3738 OnigDistance d;
3739 QtfrNode* qn = NQTFR(node);
3740 Node* target = qn->target;
3741
3742 if ((state & IN_REPEAT) != 0) {
3743 qn->state |= NST_IN_REPEAT;
3744 }
3745
3746 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3747 r = get_min_match_length(target, &d, env);
3748 if (r) break;
3749 if (d == 0) {
3750 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3751 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3752 r = quantifiers_memory_node_info(target);
3753 if (r < 0) break;
3754 if (r > 0) {
3755 qn->target_empty_info = r;
3756 }
3757 #endif
3758 #if 0
3759 r = get_max_match_length(target, &d, env);
3760 if (r == 0 && d == 0) {
3761 /* ()* ==> ()?, ()+ ==> () */
3762 qn->upper = 1;
3763 if (qn->lower > 1) qn->lower = 1;
3764 if (NTYPE(target) == NT_STR) {
3765 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3766 }
3767 }
3768 #endif
3769 }
3770 }
3771
3772 state |= IN_REPEAT;
3773 if (qn->lower != qn->upper)
3774 state |= IN_VAR_REPEAT;
3775 r = setup_tree(target, reg, state, env);
3776 if (r) break;
3777
3778 /* expand string */
3779 #define EXPAND_STRING_MAX_LENGTH 100
3780 if (NTYPE(target) == NT_STR) {
3781 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
3782 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3783 int len = NSTRING_LEN(target);
3784 StrNode* sn = NSTR(target);
3785
3786 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
3787 int i, n = qn->lower;
3788 onig_node_conv_to_str_node(node, NSTR(target)->flag);
3789 for (i = 0; i < n; i++) {
3790 r = onig_node_str_cat(node, sn->s, sn->end);
3791 if (r) break;
3792 }
3793 onig_node_free(target);
3794 break; /* break case NT_QTFR: */
3795 }
3796 }
3797 }
3798
3799 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3800 if (qn->greedy && (qn->target_empty_info != 0)) {
3801 if (NTYPE(target) == NT_QTFR) {
3802 QtfrNode* tqn = NQTFR(target);
3803 if (IS_NOT_NULL(tqn->head_exact)) {
3804 qn->head_exact = tqn->head_exact;
3805 tqn->head_exact = NULL;
3806 }
3807 }
3808 else {
3809 qn->head_exact = get_head_value_node(qn->target, 1, reg);
3810 }
3811 }
3812 #endif
3813 }
3814 break;
3815
3816 case NT_ENCLOSE:
3817 {
3818 EncloseNode* en = NENCLOSE(node);
3819
3820 switch (en->type) {
3821 case ENCLOSE_OPTION:
3822 {
3823 OnigOptionType options = reg->options;
3824 reg->options = NENCLOSE(node)->option;
3825 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
3826 reg->options = options;
3827 }
3828 break;
3829
3830 case ENCLOSE_MEMORY:
3831 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
3832 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
3833 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3834 }
3835 r = setup_tree(en->target, reg, state, env);
3836 break;
3837
3838 case ENCLOSE_STOP_BACKTRACK:
3839 {
3840 Node* target = en->target;
3841 r = setup_tree(target, reg, state, env);
3842 if (NTYPE(target) == NT_QTFR) {
3843 QtfrNode* tqn = NQTFR(target);
3844 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
3845 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
3846 int qtype = NTYPE(tqn->target);
3847 if (IS_NODE_TYPE_SIMPLE(qtype))
3848 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
3849 }
3850 }
3851 }
3852 break;
3853 }
3854 }
3855 break;
3856
3857 case NT_ANCHOR:
3858 {
3859 AnchorNode* an = NANCHOR(node);
3860
3861 switch (an->type) {
3862 case ANCHOR_PREC_READ:
3863 r = setup_tree(an->target, reg, state, env);
3864 break;
3865 case ANCHOR_PREC_READ_NOT:
3866 r = setup_tree(an->target, reg, (state | IN_NOT), env);
3867 break;
3868
3869 /* allowed node types in look-behind */
3870 #define ALLOWED_TYPE_IN_LB \
3871 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3872 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3873
3874 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3875 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3876
3877 #define ALLOWED_ANCHOR_IN_LB \
3878 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3879 #define ALLOWED_ANCHOR_IN_LB_NOT \
3880 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3881
3882 case ANCHOR_LOOK_BEHIND:
3883 {
3884 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3885 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
3886 if (r < 0) return r;
3887 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3888 r = setup_look_behind(node, reg, env);
3889 if (r != 0) return r;
3890 r = setup_tree(an->target, reg, state, env);
3891 }
3892 break;
3893
3894 case ANCHOR_LOOK_BEHIND_NOT:
3895 {
3896 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
3897 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
3898 if (r < 0) return r;
3899 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3900 r = setup_look_behind(node, reg, env);
3901 if (r != 0) return r;
3902 r = setup_tree(an->target, reg, (state | IN_NOT), env);
3903 }
3904 break;
3905 }
3906 }
3907 break;
3908
3909 default:
3910 break;
3911 }
3912
3913 return r;
3914 }
3915
3916 /* set skip map for Boyer-Moor search */
3917 static int
3918 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3919 UChar skip[], int** int_skip)
3920 {
3921 int i, len;
3922
3923 len = (int)(end - s);
3924 if (len < ONIG_CHAR_TABLE_SIZE) {
3925 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len;
3926
3927 for (i = 0; i < len - 1; i++)
3928 skip[s[i]] = (UChar)(len - 1 - i);
3929 }
3930 else {
3931 if (IS_NULL(*int_skip)) {
3932 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3933 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
3934 }
3935 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
3936
3937 for (i = 0; i < len - 1; i++)
3938 (*int_skip)[s[i]] = len - 1 - i;
3939 }
3940 return 0;
3941 }
3942
3943 #define OPT_EXACT_MAXLEN 24
3944
3945 typedef struct {
3946 OnigDistance min; /* min byte length */
3947 OnigDistance max; /* max byte length */
3948 } MinMaxLen;
3949
3950 typedef struct {
3951 MinMaxLen mmd;
3952 OnigEncoding enc;
3953 OnigOptionType options;
3954 OnigCaseFoldType case_fold_flag;
3955 ScanEnv* scan_env;
3956 } OptEnv;
3957
3958 typedef struct {
3959 int left_anchor;
3960 int right_anchor;
3961 } OptAncInfo;
3962
3963 typedef struct {
3964 MinMaxLen mmd; /* info position */
3965 OptAncInfo anc;
3966
3967 int reach_end;
3968 int ignore_case;
3969 int len;
3970 UChar s[OPT_EXACT_MAXLEN];
3971 } OptExactInfo;
3972
3973 typedef struct {
3974 MinMaxLen mmd; /* info position */
3975 OptAncInfo anc;
3976
3977 int value; /* weighted value */
3978 UChar map[ONIG_CHAR_TABLE_SIZE];
3979 } OptMapInfo;
3980
3981 typedef struct {
3982 MinMaxLen len;
3983
3984 OptAncInfo anc;
3985 OptExactInfo exb; /* boundary */
3986 OptExactInfo exm; /* middle */
3987 OptExactInfo expr; /* prec read (?=...) */
3988
3989 OptMapInfo map; /* boundary */
3990 } NodeOptInfo;
3991
3992
3993 static int
3994 map_position_value(OnigEncoding enc, int i)
3995 {
3996 static const short int ByteValTable[] = {
3997 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
3998 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3999 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4000 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4001 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4002 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4003 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4004 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4005 };
4006
4007 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4008 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4009 return 20;
4010 else
4011 return (int )ByteValTable[i];
4012 }
4013 else
4014 return 4; /* Take it easy. */
4015 }
4016
4017 static int
4018 distance_value(MinMaxLen* mm)
4019 {
4020 /* 1000 / (min-max-dist + 1) */
4021 static const short int dist_vals[] = {
4022 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4023 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4024 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4025 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4026 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4027 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4028 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4029 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4030 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4031 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4032 };
4033
4034 int d;
4035
4036 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4037
4038 d = mm->max - mm->min;
4039 if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4040 /* return dist_vals[d] * 16 / (mm->min + 12); */
4041 return (int )dist_vals[d];
4042 else
4043 return 1;
4044 }
4045
4046 static int
4047 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4048 {
4049 if (v2 <= 0) return -1;
4050 if (v1 <= 0) return 1;
4051
4052 v1 *= distance_value(d1);
4053 v2 *= distance_value(d2);
4054
4055 if (v2 > v1) return 1;
4056 if (v2 < v1) return -1;
4057
4058 if (d2->min < d1->min) return 1;
4059 if (d2->min > d1->min) return -1;
4060 return 0;
4061 }
4062
4063 static int
4064 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4065 {
4066 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4067 }
4068
4069
4070 static void
4071 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4072 {
4073 mml->min = min;
4074 mml->max = max;
4075 }
4076
4077 static void
4078 clear_mml(MinMaxLen* mml)
4079 {
4080 mml->min = mml->max = 0;
4081 }
4082
4083 static void
4084 copy_mml(MinMaxLen* to, MinMaxLen* from)
4085 {
4086 to->min = from->min;
4087 to->max = from->max;
4088 }
4089
4090 static void
4091 add_mml(MinMaxLen* to, MinMaxLen* from)
4092 {
4093 to->min = distance_add(to->min, from->min);
4094 to->max = distance_add(to->max, from->max);
4095 }
4096
4097 #if 0
4098 static void
4099 add_len_mml(MinMaxLen* to, OnigDistance len)
4100 {
4101 to->min = distance_add(to->min, len);
4102 to->max = distance_add(to->max, len);
4103 }
4104 #endif
4105
4106 static void
4107 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4108 {
4109 if (to->min > from->min) to->min = from->min;
4110 if (to->max < from->max) to->max = from->max;
4111 }
4112
4113 static void
4114 copy_opt_env(OptEnv* to, OptEnv* from)
4115 {
4116 *to = *from;
4117 }
4118
4119 static void
4120 clear_opt_anc_info(OptAncInfo* anc)
4121 {
4122 anc->left_anchor = 0;
4123 anc->right_anchor = 0;
4124 }
4125
4126 static void
4127 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4128 {
4129 *to = *from;
4130 }
4131
4132 static void
4133 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4134 OnigDistance left_len, OnigDistance right_len)
4135 {
4136 clear_opt_anc_info(to);
4137
4138 to->left_anchor = left->left_anchor;
4139 if (left_len == 0) {
4140 to->left_anchor |= right->left_anchor;
4141 }
4142
4143 to->right_anchor = right->right_anchor;
4144 if (right_len == 0) {
4145 to->right_anchor |= left->right_anchor;
4146 }
4147 }
4148
4149 static int
4150 is_left_anchor(int anc)
4151 {
4152 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4153 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4154 anc == ANCHOR_PREC_READ_NOT)
4155 return 0;
4156
4157 return 1;
4158 }
4159
4160 static int
4161 is_set_opt_anc_info(OptAncInfo* to, int anc)
4162 {
4163 if ((to->left_anchor & anc) != 0) return 1;
4164
4165 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4166 }
4167
4168 static void
4169 add_opt_anc_info(OptAncInfo* to, int anc)
4170 {
4171 if (is_left_anchor(anc))
4172 to->left_anchor |= anc;
4173 else
4174 to->right_anchor |= anc;
4175 }
4176
4177 static void
4178 remove_opt_anc_info(OptAncInfo* to, int anc)
4179 {
4180 if (is_left_anchor(anc))
4181 to->left_anchor &= ~anc;
4182 else
4183 to->right_anchor &= ~anc;
4184 }
4185
4186 static void
4187 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4188 {
4189 to->left_anchor &= add->left_anchor;
4190 to->right_anchor &= add->right_anchor;
4191 }
4192
4193 static int
4194 is_full_opt_exact_info(OptExactInfo* ex)
4195 {
4196 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4197 }
4198
4199 static void
4200 clear_opt_exact_info(OptExactInfo* ex)
4201 {
4202 clear_mml(&ex->mmd);
4203 clear_opt_anc_info(&ex->anc);
4204 ex->reach_end = 0;
4205 ex->ignore_case = 0;
4206 ex->len = 0;
4207 ex->s[0] = '\0';
4208 }
4209
4210 static void
4211 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4212 {
4213 *to = *from;
4214 }
4215
4216 static void
4217 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4218 {
4219 int i, j, len;
4220 UChar *p, *end;
4221 OptAncInfo tanc;
4222
4223 if (! to->ignore_case && add->ignore_case) {
4224 if (to->len >= add->len) return ; /* avoid */
4225
4226 to->ignore_case = 1;
4227 }
4228
4229 p = add->s;
4230 end = p + add->len;
4231 for (i = to->len; p < end; ) {
4232 len = enclen(enc, p);
4233 if (i + len > OPT_EXACT_MAXLEN) break;
4234 for (j = 0; j < len && p < end; j++)
4235 to->s[i++] = *p++;
4236 }
4237
4238 to->len = i;
4239 to->reach_end = (p == end ? add->reach_end : 0);
4240
4241 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4242 if (! to->reach_end) tanc.right_anchor = 0;
4243 copy_opt_anc_info(&to->anc, &tanc);
4244 }
4245
4246 static void
4247 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4248 int raw ARG_UNUSED, OnigEncoding enc)
4249 {
4250 int i, j, len;
4251 UChar *p;
4252
4253 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4254 len = enclen(enc, p);
4255 if (i + len > OPT_EXACT_MAXLEN) break;
4256 for (j = 0; j < len && p < end; j++)
4257 to->s[i++] = *p++;
4258 }
4259
4260 to->len = i;
4261 }
4262
4263 static void
4264 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4265 {
4266 int i, j, len;
4267
4268 if (add->len == 0 || to->len == 0) {
4269 clear_opt_exact_info(to);
4270 return ;
4271 }
4272
4273 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4274 clear_opt_exact_info(to);
4275 return ;
4276 }
4277
4278 for (i = 0; i < to->len && i < add->len; ) {
4279 if (to->s[i] != add->s[i]) break;
4280 len = enclen(env->enc, to->s + i);
4281
4282 for (j = 1; j < len; j++) {
4283 if (to->s[i+j] != add->s[i+j]) break;
4284 }
4285 if (j < len) break;
4286 i += len;
4287 }
4288
4289 if (! add->reach_end || i < add->len || i < to->len) {
4290 to->reach_end = 0;
4291 }
4292 to->len = i;
4293 to->ignore_case |= add->ignore_case;
4294
4295 alt_merge_opt_anc_info(&to->anc, &add->anc);
4296 if (! to->reach_end) to->anc.right_anchor = 0;
4297 }
4298
4299 static void
4300 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4301 {
4302 int v1, v2;
4303
4304 v1 = now->len;
4305 v2 = alt->len;
4306
4307 if (v2 == 0) {
4308 return ;
4309 }
4310 else if (v1 == 0) {
4311 copy_opt_exact_info(now, alt);
4312 return ;
4313 }
4314 else if (v1 <= 2 && v2 <= 2) {
4315 /* ByteValTable[x] is big value --> low price */
4316 v2 = map_position_value(enc, now->s[0]);
4317 v1 = map_position_value(enc, alt->s[0]);
4318
4319 if (now->len > 1) v1 += 5;
4320 if (alt->len > 1) v2 += 5;
4321 }
4322
4323 if (now->ignore_case == 0) v1 *= 2;
4324 if (alt->ignore_case == 0) v2 *= 2;
4325
4326 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4327 copy_opt_exact_info(now, alt);
4328 }
4329
4330 static void
4331 clear_opt_map_info(OptMapInfo* map)
4332 {
4333 static const OptMapInfo clean_info = {
4334 {0, 0}, {0, 0}, 0,
4335 {
4336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4352 }
4353 };
4354
4355 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4356 }
4357
4358 static void
4359 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4360 {
4361 *to = *from;
4362 }
4363
4364 static void
4365 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4366 {
4367 if (map->map[c] == 0) {
4368 map->map[c] = 1;
4369 map->value += map_position_value(enc, c);
4370 }
4371 }
4372
4373 static int
4374 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4375 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4376 {
4377 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4378 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4379 int i, n;
4380
4381 add_char_opt_map_info(map, p[0], enc);
4382
4383 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4384 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4385 if (n < 0) return n;
4386
4387 for (i = 0; i < n; i++) {
4388 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4389 add_char_opt_map_info(map, buf[0], enc);
4390 }
4391
4392 return 0;
4393 }
4394
4395 static void
4396 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4397 {
4398 static int z = 1<<15; /* 32768: something big value */
4399
4400 int v1, v2;
4401
4402 if (alt->value == 0) return ;
4403 if (now->value == 0) {
4404 copy_opt_map_info(now, alt);
4405 return ;
4406 }
4407
4408 v1 = z / now->value;
4409 v2 = z / alt->value;
4410 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4411 copy_opt_map_info(now, alt);
4412 }
4413
4414 static int
4415 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4416 {
4417 #define COMP_EM_BASE 20
4418 int ve, vm;
4419
4420 if (m->value <= 0) return -1;
4421
4422 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
4423 vm = COMP_EM_BASE * 5 * 2 / m->value;
4424 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4425 }
4426
4427 static void
4428 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4429 {
4430 int i, val;
4431
4432 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4433 if (to->value == 0) return ;
4434 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4435 clear_opt_map_info(to);
4436 return ;
4437 }
4438
4439 alt_merge_mml(&to->mmd, &add->mmd);
4440
4441 val = 0;
4442 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4443 if (add->map[i])
4444 to->map[i] = 1;
4445
4446 if (to->map[i])
4447 val += map_position_value(enc, i);
4448 }
4449 to->value = val;
4450
4451 alt_merge_opt_anc_info(&to->anc, &add->anc);
4452 }
4453
4454 static void
4455 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4456 {
4457 copy_mml(&(opt->exb.mmd), mmd);
4458 copy_mml(&(opt->expr.mmd), mmd);
4459 copy_mml(&(opt->map.mmd), mmd);
4460 }
4461
4462 static void
4463 clear_node_opt_info(NodeOptInfo* opt)
4464 {
4465 clear_mml(&opt->len);
4466 clear_opt_anc_info(&opt->anc);
4467 clear_opt_exact_info(&opt->exb);
4468 clear_opt_exact_info(&opt->exm);
4469 clear_opt_exact_info(&opt->expr);
4470 clear_opt_map_info(&opt->map);
4471 }
4472
4473 static void
4474 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4475 {
4476 *to = *from;
4477 }
4478
4479 static void
4480 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4481 {
4482 int exb_reach, exm_reach;
4483 OptAncInfo tanc;
4484
4485 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4486 copy_opt_anc_info(&to->anc, &tanc);
4487
4488 if (add->exb.len > 0 && to->len.max == 0) {
4489 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4490 to->len.max, add->len.max);
4491 copy_opt_anc_info(&add->exb.anc, &tanc);
4492 }
4493
4494 if (add->map.value > 0 && to->len.max == 0) {
4495 if (add->map.mmd.max == 0)
4496 add->map.anc.left_anchor |= to->anc.left_anchor;
4497 }
4498
4499 exb_reach = to->exb.reach_end;
4500 exm_reach = to->exm.reach_end;
4501
4502 if (add->len.max != 0)
4503 to->exb.reach_end = to->exm.reach_end = 0;
4504
4505 if (add->exb.len > 0) {
4506 if (exb_reach) {
4507 concat_opt_exact_info(&to->exb, &add->exb, enc);
4508 clear_opt_exact_info(&add->exb);
4509 }
4510 else if (exm_reach) {
4511 concat_opt_exact_info(&to->exm, &add->exb, enc);
4512 clear_opt_exact_info(&add->exb);
4513 }
4514 }
4515 select_opt_exact_info(enc, &to->exm, &add->exb);
4516 select_opt_exact_info(enc, &to->exm, &add->exm);
4517
4518 if (to->expr.len > 0) {
4519 if (add->len.max > 0) {
4520 if (to->expr.len > (int )add->len.max)
4521 to->expr.len = add->len.max;
4522
4523 if (to->expr.mmd.max == 0)
4524 select_opt_exact_info(enc, &to->exb, &to->expr);
4525 else
4526 select_opt_exact_info(enc, &to->exm, &to->expr);
4527 }
4528 }
4529 else if (add->expr.len > 0) {
4530 copy_opt_exact_info(&to->expr, &add->expr);
4531 }
4532
4533 select_opt_map_info(&to->map, &add->map);
4534
4535 add_mml(&to->len, &add->len);
4536 }
4537
4538 static void
4539 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4540 {
4541 alt_merge_opt_anc_info (&to->anc, &add->anc);
4542 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4543 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4544 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4545 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4546
4547 alt_merge_mml(&to->len, &add->len);
4548 }
4549
4550
4551 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4552
4553 static int
4554 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4555 {
4556 int type;
4557 int r = 0;
4558
4559 clear_node_opt_info(opt);
4560 set_bound_node_opt_info(opt, &env->mmd);
4561
4562 type = NTYPE(node);
4563 switch (type) {
4564 case NT_LIST:
4565 {
4566 OptEnv nenv;
4567 NodeOptInfo nopt;
4568 Node* nd = node;
4569
4570 copy_opt_env(&nenv, env);
4571 do {
4572 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4573 if (r == 0) {
4574 add_mml(&nenv.mmd, &nopt.len);
4575 concat_left_node_opt_info(env->enc, opt, &nopt);
4576 }
4577 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4578 }
4579 break;
4580
4581 case NT_ALT:
4582 {
4583 NodeOptInfo nopt;
4584 Node* nd = node;
4585
4586 do {
4587 r = optimize_node_left(NCAR(nd), &nopt, env);
4588 if (r == 0) {
4589 if (nd == node) copy_node_opt_info(opt, &nopt);
4590 else alt_merge_node_opt_info(opt, &nopt, env);
4591 }
4592 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4593 }
4594 break;
4595
4596 case NT_STR:
4597 {
4598 StrNode* sn = NSTR(node);
4599 int slen = (int)(sn->end - sn->s);
4600 int is_raw = NSTRING_IS_RAW(node);
4601
4602 if (! NSTRING_IS_AMBIG(node)) {
4603 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4604 NSTRING_IS_RAW(node), env->enc);
4605 if (slen > 0) {
4606 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4607 }
4608 set_mml(&opt->len, slen, slen);
4609 }
4610 else {
4611 int max;
4612
4613 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4614 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4615 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4616 }
4617 else {
4618 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4619 is_raw, env->enc);
4620 opt->exb.ignore_case = 1;
4621
4622 if (slen > 0) {
4623 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4624 env->enc, env->case_fold_flag);
4625 if (r != 0) break;
4626 }
4627
4628 max = slen;
4629 }
4630
4631 set_mml(&opt->len, slen, max);
4632 }
4633
4634 if (opt->exb.len == slen)
4635 opt->exb.reach_end = 1;
4636 }
4637 break;
4638
4639 case NT_CCLASS:
4640 {
4641 int i, z;
4642 CClassNode* cc = NCCLASS(node);
4643
4644 /* no need to check ignore case. (setted in setup_tree()) */
4645
4646 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4647 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4648 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4649
4650 set_mml(&opt->len, min, max);
4651 }
4652 else {
4653 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4654 z = BITSET_AT(cc->bs, i);
4655 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4656 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4657 }
4658 }
4659 set_mml(&opt->len, 1, 1);
4660 }
4661 }
4662 break;
4663
4664 case NT_CTYPE:
4665 {
4666 int i, min, max;
4667
4668 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4669
4670 if (max == 1) {
4671 min = 1;
4672
4673 switch (NCTYPE(node)->ctype) {
4674 case ONIGENC_CTYPE_WORD:
4675 if (NCTYPE(node)->not != 0) {
4676 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4677 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
4678 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4679 }
4680 }
4681 }
4682 else {
4683 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4684 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4685 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4686 }
4687 }
4688 }
4689 break;
4690 }
4691 }
4692 else {
4693 min = ONIGENC_MBC_MINLEN(env->enc);
4694 }
4695 set_mml(&opt->len, min, max);
4696 }
4697 break;
4698
4699 case NT_CANY:
4700 {
4701 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4702 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4703 set_mml(&opt->len, min, max);
4704 }
4705 break;
4706
4707 case NT_ANCHOR:
4708 switch (NANCHOR(node)->type) {
4709 case ANCHOR_BEGIN_BUF:
4710 case ANCHOR_BEGIN_POSITION:
4711 case ANCHOR_BEGIN_LINE:
4712 case ANCHOR_END_BUF:
4713 case ANCHOR_SEMI_END_BUF:
4714 case ANCHOR_END_LINE:
4715 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
4716 break;
4717
4718 case ANCHOR_PREC_READ:
4719 {
4720 NodeOptInfo nopt;
4721
4722 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
4723 if (r == 0) {
4724 if (nopt.exb.len > 0)
4725 copy_opt_exact_info(&opt->expr, &nopt.exb);
4726 else if (nopt.exm.len > 0)
4727 copy_opt_exact_info(&opt->expr, &nopt.exm);
4728
4729 opt->expr.reach_end = 0;
4730
4731 if (nopt.map.value > 0)
4732 copy_opt_map_info(&opt->map, &nopt.map);
4733 }
4734 }
4735 break;
4736
4737 case ANCHOR_PREC_READ_NOT:
4738 case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
4739 case ANCHOR_LOOK_BEHIND_NOT:
4740 break;
4741 }
4742 break;
4743
4744 case NT_BREF:
4745 {
4746 int i;
4747 int* backs;
4748 OnigDistance min, max, tmin, tmax;
4749 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
4750 BRefNode* br = NBREF(node);
4751
4752 if (br->state & NST_RECURSION) {
4753 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4754 break;
4755 }
4756 backs = BACKREFS_P(br);
4757 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
4758 if (r != 0) break;
4759 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
4760 if (r != 0) break;
4761 for (i = 1; i < br->back_num; i++) {
4762 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
4763 if (r != 0) break;
4764 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
4765 if (r != 0) break;
4766 if (min > tmin) min = tmin;
4767 if (max < tmax) max = tmax;
4768 }
4769 if (r == 0) set_mml(&opt->len, min, max);
4770 }
4771 break;
4772
4773 #ifdef USE_SUBEXP_CALL
4774 case NT_CALL:
4775 if (IS_CALL_RECURSION(NCALL(node)))
4776 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
4777 else {
4778 OnigOptionType save = env->options;
4779 env->options = NENCLOSE(NCALL(node)->target)->option;
4780 r = optimize_node_left(NCALL(node)->target, opt, env);
4781 env->options = save;
4782 }
4783 break;
4784 #endif
4785
4786 case NT_QTFR:
4787 {
4788 int i;
4789 OnigDistance min, max;
4790 NodeOptInfo nopt;
4791 QtfrNode* qn = NQTFR(node);
4792
4793 r = optimize_node_left(qn->target, &nopt, env);
4794 if (r) break;
4795
4796 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
4797 if (env->mmd.max == 0 &&
4798 NTYPE(qn->target) == NT_CANY && qn->greedy) {
4799 if (IS_MULTILINE(env->options))
4800 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
4801 else
4802 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
4803 }
4804 }
4805 else {
4806 if (qn->lower > 0) {
4807 copy_node_opt_info(opt, &nopt);
4808 if (nopt.exb.len > 0) {
4809 if (nopt.exb.reach_end) {
4810 for (i = 2; i <= qn->lower &&
4811 ! is_full_opt_exact_info(&opt->exb); i++) {
4812 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
4813 }
4814 if (i < qn->lower) {
4815 opt->exb.reach_end = 0;
4816 }
4817 }
4818 }
4819
4820 if (qn->lower != qn->upper) {
4821 opt->exb.reach_end = 0;
4822 opt->exm.reach_end = 0;
4823 }
4824 if (qn->lower > 1)
4825 opt->exm.reach_end = 0;
4826 }
4827 }
4828
4829 min = distance_multiply(nopt.len.min, qn->lower);
4830 if (IS_REPEAT_INFINITE(qn->upper))
4831 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
4832 else
4833 max = distance_multiply(nopt.len.max, qn->upper);
4834
4835 set_mml(&opt->len, min, max);
4836 }
4837 break;
4838
4839 case NT_ENCLOSE:
4840 {
4841 EncloseNode* en = NENCLOSE(node);
4842
4843 switch (en->type) {
4844 case ENCLOSE_OPTION:
4845 {
4846 OnigOptionType save = env->options;
4847
4848 env->options = en->option;
4849 r = optimize_node_left(en->target, opt, env);
4850 env->options = save;
4851 }
4852 break;
4853
4854 case ENCLOSE_MEMORY:
4855 #ifdef USE_SUBEXP_CALL
4856 en->opt_count++;
4857 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
4858 OnigDistance min, max;
4859
4860 min = 0;
4861 max = ONIG_INFINITE_DISTANCE;
4862 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
4863 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
4864 set_mml(&opt->len, min, max);
4865 }
4866 else
4867 #endif
4868 {
4869 r = optimize_node_left(en->target, opt, env);
4870
4871 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
4872 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
4873 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
4874 }
4875 }
4876 break;
4877
4878 case ENCLOSE_STOP_BACKTRACK:
4879 r = optimize_node_left(en->target, opt, env);
4880 break;
4881 }
4882 }
4883 break;
4884
4885 default:
4886 #ifdef ONIG_DEBUG
4887 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
4888 NTYPE(node));
4889 #endif
4890 r = ONIGERR_TYPE_BUG;
4891 break;
4892 }
4893
4894 return r;
4895 }
4896
4897 static int
4898 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
4899 {
4900 int r;
4901
4902 if (e->len == 0) return 0;
4903
4904 if (e->ignore_case) {
4905 reg->exact = (UChar* )xmalloc(e->len);
4906 CHECK_NULL_RETURN_MEMERR(reg->exact);
4907 xmemcpy(reg->exact, e->s, e->len);
4908 reg->exact_end = reg->exact + e->len;
4909 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
4910 }
4911 else {
4912 int allow_reverse;
4913
4914 reg->exact = str_dup(e->s, e->s + e->len);
4915 CHECK_NULL_RETURN_MEMERR(reg->exact);
4916 reg->exact_end = reg->exact + e->len;
4917
4918 allow_reverse =
4919 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
4920
4921 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
4922 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
4923 reg->map, &(reg->int_map));
4924 if (r) return r;
4925
4926 reg->optimize = (allow_reverse != 0
4927 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
4928 }
4929 else {
4930 reg->optimize = ONIG_OPTIMIZE_EXACT;
4931 }
4932 }
4933
4934 reg->dmin = e->mmd.min;
4935 reg->dmax = e->mmd.max;
4936
4937 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4938 reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact);
4939 }
4940
4941 return 0;
4942 }
4943
4944 static void
4945 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
4946 {
4947 int i;
4948
4949 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4950 reg->map[i] = m->map[i];
4951
4952 reg->optimize = ONIG_OPTIMIZE_MAP;
4953 reg->dmin = m->mmd.min;
4954 reg->dmax = m->mmd.max;
4955
4956 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
4957 reg->threshold_len = reg->dmin + 1;
4958 }
4959 }
4960
4961 static void
4962 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
4963 {
4964 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
4965 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
4966 }
4967
4968 #ifdef ONIG_DEBUG
4969 static void print_optimize_info(FILE* f, regex_t* reg);
4970 #endif
4971
4972 static int
4973 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
4974 {
4975
4976 int r;
4977 NodeOptInfo opt;
4978 OptEnv env;
4979
4980 env.enc = reg->enc;
4981 env.options = reg->options;
4982 env.case_fold_flag = reg->case_fold_flag;
4983 env.scan_env = scan_env;
4984 clear_mml(&env.mmd);
4985
4986 r = optimize_node_left(node, &opt, &env);
4987 if (r) return r;
4988
4989 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
4990 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
4991
4992 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
4993
4994 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
4995 reg->anchor_dmin = opt.len.min;
4996 reg->anchor_dmax = opt.len.max;
4997 }
4998
4999 if (opt.exb.len > 0 || opt.exm.len > 0) {
5000 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5001 if (opt.map.value > 0 &&
5002 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5003 goto set_map;
5004 }
5005 else {
5006 r = set_optimize_exact_info(reg, &opt.exb);
5007 set_sub_anchor(reg, &opt.exb.anc);
5008 }
5009 }
5010 else if (opt.map.value > 0) {
5011 set_map:
5012 set_optimize_map_info(reg, &opt.map);
5013 set_sub_anchor(reg, &opt.map.anc);
5014 }
5015 else {
5016 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5017 if (opt.len.max == 0)
5018 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5019 }
5020
5021 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5022 print_optimize_info(stderr, reg);
5023 #endif
5024 return r;
5025 }
5026
5027 static void
5028 clear_optimize_info(regex_t* reg)
5029 {
5030 reg->optimize = ONIG_OPTIMIZE_NONE;
5031 reg->anchor = 0;
5032 reg->anchor_dmin = 0;
5033 reg->anchor_dmax = 0;
5034 reg->sub_anchor = 0;
5035 reg->exact_end = (UChar* )NULL;
5036 reg->threshold_len = 0;
5037 if (IS_NOT_NULL(reg->exact)) {
5038 xfree(reg->exact);
5039 reg->exact = (UChar* )NULL;
5040 }
5041 }
5042
5043 #ifdef ONIG_DEBUG
5044
5045 static void print_enc_string(FILE* fp, OnigEncoding enc,
5046 const UChar *s, const UChar *end)
5047 {
5048 fprintf(fp, "\nPATTERN: /");
5049
5050 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5051 const UChar *p;
5052 OnigCodePoint code;
5053
5054 p = s;
5055 while (p < end) {
5056 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5057 if (code >= 0x80) {
5058 fprintf(fp, " 0x%04x ", (int )code);
5059 }
5060 else {
5061 fputc((int )code, fp);
5062 }
5063
5064 p += enclen(enc, p);
5065 }
5066 }
5067 else {
5068 while (s < end) {
5069 fputc((int )*s, fp);
5070 s++;
5071 }
5072 }
5073
5074 fprintf(fp, "/\n");
5075 }
5076
5077 static void
5078 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5079 {
5080 if (a == ONIG_INFINITE_DISTANCE)
5081 fputs("inf", f);
5082 else
5083 fprintf(f, "(%u)", a);
5084
5085 fputs("-", f);
5086
5087 if (b == ONIG_INFINITE_DISTANCE)
5088 fputs("inf", f);
5089 else
5090 fprintf(f, "(%u)", b);
5091 }
5092
5093 static void
5094 print_anchor(FILE* f, int anchor)
5095 {
5096 int q = 0;
5097
5098 fprintf(f, "[");
5099
5100 if (anchor & ANCHOR_BEGIN_BUF) {
5101 fprintf(f, "begin-buf");
5102 q = 1;
5103 }
5104 if (anchor & ANCHOR_BEGIN_LINE) {
5105 if (q) fprintf(f, ", ");
5106 q = 1;
5107 fprintf(f, "begin-line");
5108 }
5109 if (anchor & ANCHOR_BEGIN_POSITION) {
5110 if (q) fprintf(f, ", ");
5111 q = 1;
5112 fprintf(f, "begin-pos");
5113 }
5114 if (anchor & ANCHOR_END_BUF) {
5115 if (q) fprintf(f, ", ");
5116 q = 1;
5117 fprintf(f, "end-buf");
5118 }
5119 if (anchor & ANCHOR_SEMI_END_BUF) {
5120 if (q) fprintf(f, ", ");
5121 q = 1;
5122 fprintf(f, "semi-end-buf");
5123 }
5124 if (anchor & ANCHOR_END_LINE) {
5125 if (q) fprintf(f, ", ");
5126 q = 1;
5127 fprintf(f, "end-line");
5128 }
5129 if (anchor & ANCHOR_ANYCHAR_STAR) {
5130 if (q) fprintf(f, ", ");
5131 q = 1;
5132 fprintf(f, "anychar-star");
5133 }
5134 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5135 if (q) fprintf(f, ", ");
5136 fprintf(f, "anychar-star-pl");
5137 }
5138
5139 fprintf(f, "]");
5140 }
5141
5142 static void
5143 print_optimize_info(FILE* f, regex_t* reg)
5144 {
5145 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5146 "EXACT_IC", "MAP" };
5147
5148 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5149 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5150 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5151 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5152 fprintf(f, "\n");
5153
5154 if (reg->optimize) {
5155 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5156 fprintf(f, "\n");
5157 }
5158 fprintf(f, "\n");
5159
5160 if (reg->exact) {
5161 UChar *p;
5162 fprintf(f, "exact: [");
5163 for (p = reg->exact; p < reg->exact_end; p++) {
5164 fputc(*p, f);
5165 }
5166 fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
5167 }
5168 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5169 int c, i, n = 0;
5170
5171 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5172 if (reg->map[i]) n++;
5173
5174 fprintf(f, "map: n=%d\n", n);
5175 if (n > 0) {
5176 c = 0;
5177 fputc('[', f);
5178 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5179 if (reg->map[i] != 0) {
5180 if (c > 0) fputs(", ", f);
5181 c++;
5182 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5183 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5184 fputc(i, f);
5185 else
5186 fprintf(f, "%d", i);
5187 }
5188 }
5189 fprintf(f, "]\n");
5190 }
5191 }
5192 }
5193 #endif /* ONIG_DEBUG */
5194
5195
5196 extern void
5197 onig_free_body(regex_t* reg)
5198 {
5199 if (IS_NOT_NULL(reg)) {
5200 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5201 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5202 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5203 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5204 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5205 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5206
5207 #ifdef USE_NAMED_GROUP
5208 onig_names_free(reg);
5209 #endif
5210 }
5211 }
5212
5213 extern void
5214 onig_free(regex_t* reg)
5215 {
5216 if (IS_NOT_NULL(reg)) {
5217 onig_free_body(reg);
5218 xfree(reg);
5219 }
5220 }
5221
5222 #define REGEX_TRANSFER(to,from) do {\
5223 (to)->state = ONIG_STATE_MODIFY;\
5224 onig_free_body(to);\
5225 xmemcpy(to, from, sizeof(regex_t));\
5226 xfree(from);\
5227 } while (0)
5228
5229 extern void
5230 onig_transfer(regex_t* to, regex_t* from)
5231 {
5232 THREAD_ATOMIC_START;
5233 REGEX_TRANSFER(to, from);
5234 THREAD_ATOMIC_END;
5235 }
5236
5237 #define REGEX_CHAIN_HEAD(reg) do {\
5238 while (IS_NOT_NULL((reg)->chain)) {\
5239 (reg) = (reg)->chain;\
5240 }\
5241 } while (0)
5242
5243 extern void
5244 onig_chain_link_add(regex_t* to, regex_t* add)
5245 {
5246 THREAD_ATOMIC_START;
5247 REGEX_CHAIN_HEAD(to);
5248 to->chain = add;
5249 THREAD_ATOMIC_END;
5250 }
5251
5252 extern void
5253 onig_chain_reduce(regex_t* reg)
5254 {
5255 regex_t *head, *prev;
5256
5257 prev = reg;
5258 head = prev->chain;
5259 if (IS_NOT_NULL(head)) {
5260 reg->state = ONIG_STATE_MODIFY;
5261 while (IS_NOT_NULL(head->chain)) {
5262 prev = head;
5263 head = head->chain;
5264 }
5265 prev->chain = (regex_t* )NULL;
5266 REGEX_TRANSFER(reg, head);
5267 }
5268 }
5269
5270 #ifdef ONIG_DEBUG
5271 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5272 #endif
5273 #ifdef ONIG_DEBUG_PARSE_TREE
5274 static void print_tree P_((FILE* f, Node* node));
5275 #endif
5276
5277 extern int
5278 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5279 OnigErrorInfo* einfo)
5280 {
5281 #define COMPILE_INIT_SIZE 20
5282
5283 int r, init_size;
5284 Node* root;
5285 ScanEnv scan_env;
5286 #ifdef USE_SUBEXP_CALL
5287 UnsetAddrList uslist;
5288 #endif
5289
5290 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5291
5292 reg->state = ONIG_STATE_COMPILING;
5293
5294 #ifdef ONIG_DEBUG
5295 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5296 #endif
5297
5298 if (reg->alloc == 0) {
5299 init_size = ((int)(pattern_end - pattern)) * 2;
5300 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5301 r = BBUF_INIT(reg, init_size);
5302 if (r != 0) goto end;
5303 }
5304 else
5305 reg->used = 0;
5306
5307 reg->num_mem = 0;
5308 reg->num_repeat = 0;
5309 reg->num_null_check = 0;
5310 reg->repeat_range_alloc = 0;
5311 reg->repeat_range = (OnigRepeatRange* )NULL;
5312 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5313 reg->num_comb_exp_check = 0;
5314 #endif
5315
5316 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5317 if (r != 0 || root == NULL) goto err;
5318
5319 #ifdef USE_NAMED_GROUP
5320 /* mixed use named group and no-named group */
5321 if (scan_env.num_named > 0 &&
5322 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5323 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5324 if (scan_env.num_named != scan_env.num_mem)
5325 r = disable_noname_group_capture(&root, reg, &scan_env);
5326 else
5327 r = numbered_ref_check(root);
5328
5329 if (r != 0) goto err;
5330 }
5331 #endif
5332
5333 #ifdef USE_SUBEXP_CALL
5334 if (scan_env.num_call > 0) {
5335 r = unset_addr_list_init(&uslist, scan_env.num_call);
5336 if (r != 0) goto err;
5337 scan_env.unset_addr_list = &uslist;
5338 r = setup_subexp_call(root, &scan_env);
5339 if (r != 0) goto err_unset;
5340 r = subexp_recursive_check_trav(root, &scan_env);
5341 if (r < 0) goto err_unset;
5342 r = subexp_inf_recursive_check_trav(root, &scan_env);
5343 if (r != 0) goto err_unset;
5344
5345 reg->num_call = scan_env.num_call;
5346 }
5347 else
5348 reg->num_call = 0;
5349 #endif
5350
5351 r = setup_tree(root, reg, 0, &scan_env);
5352 if (r != 0) goto err_unset;
5353
5354 #ifdef ONIG_DEBUG_PARSE_TREE
5355 print_tree(stderr, root);
5356 #endif
5357
5358 reg->capture_history = scan_env.capture_history;
5359 reg->bt_mem_start = scan_env.bt_mem_start;
5360 reg->bt_mem_start |= reg->capture_history;
5361 if (IS_FIND_CONDITION(reg->options))
5362 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5363 else {
5364 reg->bt_mem_end = scan_env.bt_mem_end;
5365 reg->bt_mem_end |= reg->capture_history;
5366 }
5367
5368 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5369 if (scan_env.backrefed_mem == 0
5370 #ifdef USE_SUBEXP_CALL
5371 || scan_env.num_call == 0
5372 #endif
5373 ) {
5374 setup_comb_exp_check(root, 0, &scan_env);
5375 #ifdef USE_SUBEXP_CALL
5376 if (scan_env.has_recursion != 0) {
5377 scan_env.num_comb_exp_check = 0;
5378 }
5379 else
5380 #endif
5381 if (scan_env.comb_exp_max_regnum > 0) {
5382 int i;
5383 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5384 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5385 scan_env.num_comb_exp_check = 0;
5386 break;
5387 }
5388 }
5389 }
5390 }
5391
5392 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5393 #endif
5394
5395 clear_optimize_info(reg);
5396 #ifndef ONIG_DONT_OPTIMIZE
5397 r = set_optimize_info_from_tree(root, reg, &scan_env);
5398 if (r != 0) goto err_unset;
5399 #endif
5400
5401 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5402 xfree(scan_env.mem_nodes_dynamic);
5403 scan_env.mem_nodes_dynamic = (Node** )NULL;
5404 }
5405
5406 r = compile_tree(root, reg);
5407 if (r == 0) {
5408 r = add_opcode(reg, OP_END);
5409 #ifdef USE_SUBEXP_CALL
5410 if (scan_env.num_call > 0) {
5411 r = unset_addr_list_fix(&uslist, reg);
5412 unset_addr_list_end(&uslist);
5413 if (r) goto err;
5414 }
5415 #endif
5416
5417 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5418 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5419 else {
5420 if (reg->bt_mem_start != 0)
5421 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5422 else
5423 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5424 }
5425 }
5426 #ifdef USE_SUBEXP_CALL
5427 else if (scan_env.num_call > 0) {
5428 unset_addr_list_end(&uslist);
5429 }
5430 #endif
5431 onig_node_free(root);
5432
5433 #ifdef ONIG_DEBUG_COMPILE
5434 #ifdef USE_NAMED_GROUP
5435 onig_print_names(stderr, reg);
5436 #endif
5437 print_compiled_byte_code_list(stderr, reg);
5438 #endif
5439
5440 end:
5441 reg->state = ONIG_STATE_NORMAL;
5442 return r;
5443
5444 err_unset:
5445 #ifdef USE_SUBEXP_CALL
5446 if (scan_env.num_call > 0) {
5447 unset_addr_list_end(&uslist);
5448 }
5449 #endif
5450 err:
5451 if (IS_NOT_NULL(scan_env.error)) {
5452 if (IS_NOT_NULL(einfo)) {
5453 einfo->enc = scan_env.enc;
5454 einfo->par = scan_env.error;
5455 einfo->par_end = scan_env.error_end;
5456 }
5457 }
5458
5459 onig_node_free(root);
5460 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5461 xfree(scan_env.mem_nodes_dynamic);
5462 return r;
5463 }
5464
5465 #ifdef USE_RECOMPILE_API
5466 extern int
5467 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5468 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5469 OnigErrorInfo* einfo)
5470 {
5471 int r;
5472 regex_t *new_reg;
5473
5474 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5475 if (r) return r;
5476 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5477 onig_transfer(reg, new_reg);
5478 }
5479 else {
5480 onig_chain_link_add(reg, new_reg);
5481 }
5482 return 0;
5483 }
5484 #endif
5485
5486 static int onig_inited = 0;
5487
5488 extern int
5489 onig_reg_init(regex_t* reg, OnigOptionType option,
5490 OnigCaseFoldType case_fold_flag,
5491 OnigEncoding enc, OnigSyntaxType* syntax)
5492 {
5493 if (! onig_inited)
5494 onig_init();
5495
5496 if (IS_NULL(reg))
5497 return ONIGERR_INVALID_ARGUMENT;
5498
5499 if (ONIGENC_IS_UNDEF(enc))
5500 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
5501
5502 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5503 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5504 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5505 }
5506
5507 (reg)->state = ONIG_STATE_MODIFY;
5508
5509 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5510 option |= syntax->options;
5511 option &= ~ONIG_OPTION_SINGLELINE;
5512 }
5513 else
5514 option |= syntax->options;
5515
5516 (reg)->enc = enc;
5517 (reg)->options = option;
5518 (reg)->syntax = syntax;
5519 (reg)->optimize = 0;
5520 (reg)->exact = (UChar* )NULL;
5521 (reg)->int_map = (int* )NULL;
5522 (reg)->int_map_backward = (int* )NULL;
5523 (reg)->chain = (regex_t* )NULL;
5524
5525 (reg)->p = (UChar* )NULL;
5526 (reg)->alloc = 0;
5527 (reg)->used = 0;
5528 (reg)->name_table = (void* )NULL;
5529
5530 (reg)->case_fold_flag = case_fold_flag;
5531 return 0;
5532 }
5533
5534 extern int
5535 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5536 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5537 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5538 {
5539 int r;
5540
5541 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5542 if (r) return r;
5543
5544 r = onig_compile(reg, pattern, pattern_end, einfo);
5545 return r;
5546 }
5547
5548 extern int
5549 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5550 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5551 OnigErrorInfo* einfo)
5552 {
5553 int r;
5554
5555 *reg = (regex_t* )xmalloc(sizeof(regex_t));
5556 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5557
5558 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5559 if (r) goto err;
5560
5561 r = onig_compile(*reg, pattern, pattern_end, einfo);
5562 if (r) {
5563 err:
5564 onig_free(*reg);
5565 *reg = NULL;
5566 }
5567 return r;
5568 }
5569
5570
5571 extern int
5572 onig_init(void)
5573 {
5574 if (onig_inited != 0)
5575 return 0;
5576
5577 THREAD_SYSTEM_INIT;
5578 THREAD_ATOMIC_START;
5579
5580 onig_inited = 1;
5581
5582 onigenc_init();
5583 /* onigenc_set_default_caseconv_table((UChar* )0); */
5584
5585 #ifdef ONIG_DEBUG_STATISTICS
5586 onig_statistics_init();
5587 #endif
5588
5589 THREAD_ATOMIC_END;
5590 return 0;
5591 }
5592
5593
5594 static OnigEndCallListItemType* EndCallTop;
5595
5596 extern void onig_add_end_call(void (*func)(void))
5597 {
5598 OnigEndCallListItemType* item;
5599
5600 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5601 if (item == 0) return ;
5602
5603 item->next = EndCallTop;
5604 item->func = func;
5605
5606 EndCallTop = item;
5607 }
5608
5609 static void
5610 exec_end_call_list(void)
5611 {
5612 OnigEndCallListItemType* prev;
5613 void (*func)(void);
5614
5615 while (EndCallTop != 0) {
5616 func = EndCallTop->func;
5617 (*func)();
5618
5619 prev = EndCallTop;
5620 EndCallTop = EndCallTop->next;
5621 xfree(prev);
5622 }
5623 }
5624
5625 extern int
5626 onig_end(void)
5627 {
5628 THREAD_ATOMIC_START;
5629
5630 exec_end_call_list();
5631
5632 #ifdef ONIG_DEBUG_STATISTICS
5633 onig_print_statistics(stderr);
5634 #endif
5635
5636 #ifdef USE_SHARED_CCLASS_TABLE
5637 onig_free_shared_cclass_table();
5638 #endif
5639
5640 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5641 onig_free_node_list();
5642 #endif
5643
5644 onig_inited = 0;
5645
5646 THREAD_ATOMIC_END;
5647 THREAD_SYSTEM_END;
5648 return 0;
5649 }
5650
5651 extern int
5652 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5653 {
5654 OnigCodePoint n, *data;
5655 OnigCodePoint low, high, x;
5656
5657 GET_CODE_POINT(n, p);
5658 data = (OnigCodePoint* )p;
5659 data++;
5660
5661 for (low = 0, high = n; low < high; ) {
5662 x = (low + high) >> 1;
5663 if (code > data[x * 2 + 1])
5664 low = x + 1;
5665 else
5666 high = x;
5667 }
5668
5669 return ((low < n && code >= data[low * 2]) ? 1 : 0);
5670 }
5671
5672 extern int
5673 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5674 {
5675 int found;
5676
5677 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
5678 if (IS_NULL(cc->mbuf)) {
5679 found = 0;
5680 }
5681 else {
5682 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
5683 }
5684 }
5685 else {
5686 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
5687 }
5688
5689 if (IS_NCCLASS_NOT(cc))
5690 return !found;
5691 else
5692 return found;
5693 }
5694
5695 extern int
5696 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
5697 {
5698 int len;
5699
5700 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5701 len = 2;
5702 }
5703 else {
5704 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
5705 }
5706 return onig_is_code_in_cc_len(len, code, cc);
5707 }
5708
5709
5710 #ifdef ONIG_DEBUG
5711
5712 /* arguments type */
5713 #define ARG_SPECIAL -1
5714 #define ARG_NON 0
5715 #define ARG_RELADDR 1
5716 #define ARG_ABSADDR 2
5717 #define ARG_LENGTH 3
5718 #define ARG_MEMNUM 4
5719 #define ARG_OPTION 5
5720 #define ARG_STATE_CHECK 6
5721
5722 OnigOpInfoType OnigOpInfo[] = {
5723 { OP_FINISH, "finish", ARG_NON },
5724 { OP_END, "end", ARG_NON },
5725 { OP_EXACT1, "exact1", ARG_SPECIAL },
5726 { OP_EXACT2, "exact2", ARG_SPECIAL },
5727 { OP_EXACT3, "exact3", ARG_SPECIAL },
5728 { OP_EXACT4, "exact4", ARG_SPECIAL },
5729 { OP_EXACT5, "exact5", ARG_SPECIAL },
5730 { OP_EXACTN, "exactn", ARG_SPECIAL },
5731 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
5732 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
5733 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
5734 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
5735 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
5736 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
5737 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
5738 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
5739 { OP_CCLASS, "cclass", ARG_SPECIAL },
5740 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
5741 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
5742 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
5743 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
5744 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
5745 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
5746 { OP_ANYCHAR, "anychar", ARG_NON },
5747 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
5748 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
5749 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
5750 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
5751 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
5752 { OP_WORD, "word", ARG_NON },
5753 { OP_NOT_WORD, "not-word", ARG_NON },
5754 { OP_WORD_BOUND, "word-bound", ARG_NON },
5755 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
5756 { OP_WORD_BEGIN, "word-begin", ARG_NON },
5757 { OP_WORD_END, "word-end", ARG_NON },
5758 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
5759 { OP_END_BUF, "end-buf", ARG_NON },
5760 { OP_BEGIN_LINE, "begin-line", ARG_NON },
5761 { OP_END_LINE, "end-line", ARG_NON },
5762 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
5763 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
5764 { OP_BACKREF1, "backref1", ARG_NON },
5765 { OP_BACKREF2, "backref2", ARG_NON },
5766 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
5767 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
5768 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
5769 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
5770 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
5771 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
5772 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
5773 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
5774 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
5775 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
5776 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
5777 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
5778 { OP_SET_OPTION, "set-option", ARG_OPTION },
5779 { OP_FAIL, "fail", ARG_NON },
5780 { OP_JUMP, "jump", ARG_RELADDR },
5781 { OP_PUSH, "push", ARG_RELADDR },
5782 { OP_POP, "pop", ARG_NON },
5783 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
5784 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
5785 { OP_REPEAT, "repeat", ARG_SPECIAL },
5786 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
5787 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
5788 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
5789 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
5790 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
5791 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
5792 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
5793 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
5794 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
5795 { OP_PUSH_POS, "push-pos", ARG_NON },
5796 { OP_POP_POS, "pop-pos", ARG_NON },
5797 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
5798 { OP_FAIL_POS, "fail-pos", ARG_NON },
5799 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
5800 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
5801 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
5802 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
5803 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
5804 { OP_CALL, "call", ARG_ABSADDR },
5805 { OP_RETURN, "return", ARG_NON },
5806 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
5807 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
5808 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
5809 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
5810 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
5811 "state-check-anychar-ml*", ARG_STATE_CHECK },
5812 { -1, "", ARG_NON }
5813 };
5814
5815 static char*
5816 op2name(int opcode)
5817 {
5818 int i;
5819
5820 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5821 if (opcode == OnigOpInfo[i].opcode)
5822 return OnigOpInfo[i].name;
5823 }
5824 return "";
5825 }
5826
5827 static int
5828 op2arg_type(int opcode)
5829 {
5830 int i;
5831
5832 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
5833 if (opcode == OnigOpInfo[i].opcode)
5834 return OnigOpInfo[i].arg_type;
5835 }
5836 return ARG_SPECIAL;
5837 }
5838
5839 static void
5840 Indent(FILE* f, int indent)
5841 {
5842 int i;
5843 for (i = 0; i < indent; i++) putc(' ', f);
5844 }
5845
5846 static void
5847 p_string(FILE* f, int len, UChar* s)
5848 {
5849 fputs(":", f);
5850 while (len-- > 0) { fputc(*s++, f); }
5851 }
5852
5853 static void
5854 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
5855 {
5856 int x = len * mb_len;
5857
5858 fprintf(f, ":%d:", len);
5859 while (x-- > 0) { fputc(*s++, f); }
5860 }
5861
5862 extern void
5863 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
5864 OnigEncoding enc)
5865 {
5866 int i, n, arg_type;
5867 RelAddrType addr;
5868 LengthType len;
5869 MemNumType mem;
5870 StateCheckNumType scn;
5871 OnigCodePoint code;
5872 UChar *q;
5873
5874 fprintf(f, "[%s", op2name(*bp));
5875 arg_type = op2arg_type(*bp);
5876 if (arg_type != ARG_SPECIAL) {
5877 bp++;
5878 switch (arg_type) {
5879 case ARG_NON:
5880 break;
5881 case ARG_RELADDR:
5882 GET_RELADDR_INC(addr, bp);
5883 fprintf(f, ":(%d)", addr);
5884 break;
5885 case ARG_ABSADDR:
5886 GET_ABSADDR_INC(addr, bp);
5887 fprintf(f, ":(%d)", addr);
5888 break;
5889 case ARG_LENGTH:
5890 GET_LENGTH_INC(len, bp);
5891 fprintf(f, ":%d", len);
5892 break;
5893 case ARG_MEMNUM:
5894 mem = *((MemNumType* )bp);
5895 bp += SIZE_MEMNUM;
5896 fprintf(f, ":%d", mem);
5897 break;
5898 case ARG_OPTION:
5899 {
5900 OnigOptionType option = *((OnigOptionType* )bp);
5901 bp += SIZE_OPTION;
5902 fprintf(f, ":%d", option);
5903 }
5904 break;
5905
5906 case ARG_STATE_CHECK:
5907 scn = *((StateCheckNumType* )bp);
5908 bp += SIZE_STATE_CHECK_NUM;
5909 fprintf(f, ":%d", scn);
5910 break;
5911 }
5912 }
5913 else {
5914 switch (*bp++) {
5915 case OP_EXACT1:
5916 case OP_ANYCHAR_STAR_PEEK_NEXT:
5917 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
5918 p_string(f, 1, bp++); break;
5919 case OP_EXACT2:
5920 p_string(f, 2, bp); bp += 2; break;
5921 case OP_EXACT3:
5922 p_string(f, 3, bp); bp += 3; break;
5923 case OP_EXACT4:
5924 p_string(f, 4, bp); bp += 4; break;
5925 case OP_EXACT5:
5926 p_string(f, 5, bp); bp += 5; break;
5927 case OP_EXACTN:
5928 GET_LENGTH_INC(len, bp);
5929 p_len_string(f, len, 1, bp);
5930 bp += len;
5931 break;
5932
5933 case OP_EXACTMB2N1:
5934 p_string(f, 2, bp); bp += 2; break;
5935 case OP_EXACTMB2N2:
5936 p_string(f, 4, bp); bp += 4; break;
5937 case OP_EXACTMB2N3:
5938 p_string(f, 6, bp); bp += 6; break;
5939 case OP_EXACTMB2N:
5940 GET_LENGTH_INC(len, bp);
5941 p_len_string(f, len, 2, bp);
5942 bp += len * 2;
5943 break;
5944 case OP_EXACTMB3N:
5945 GET_LENGTH_INC(len, bp);
5946 p_len_string(f, len, 3, bp);
5947 bp += len * 3;
5948 break;
5949 case OP_EXACTMBN:
5950 {
5951 int mb_len;
5952
5953 GET_LENGTH_INC(mb_len, bp);
5954 GET_LENGTH_INC(len, bp);
5955 fprintf(f, ":%d:%d:", mb_len, len);
5956 n = len * mb_len;
5957 while (n-- > 0) { fputc(*bp++, f); }
5958 }
5959 break;
5960
5961 case OP_EXACT1_IC:
5962 len = enclen(enc, bp);
5963 p_string(f, len, bp);
5964 bp += len;
5965 break;
5966 case OP_EXACTN_IC:
5967 GET_LENGTH_INC(len, bp);
5968 p_len_string(f, len, 1, bp);
5969 bp += len;
5970 break;
5971
5972 case OP_CCLASS:
5973 n = bitset_on_num((BitSetRef )bp);
5974 bp += SIZE_BITSET;
5975 fprintf(f, ":%d", n);
5976 break;
5977
5978 case OP_CCLASS_NOT:
5979 n = bitset_on_num((BitSetRef )bp);
5980 bp += SIZE_BITSET;
5981 fprintf(f, ":%d", n);
5982 break;
5983
5984 case OP_CCLASS_MB:
5985 case OP_CCLASS_MB_NOT:
5986 GET_LENGTH_INC(len, bp);
5987 q = bp;
5988 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5989 ALIGNMENT_RIGHT(q);
5990 #endif
5991 GET_CODE_POINT(code, q);
5992 bp += len;
5993 fprintf(f, ":%d:%d", (int )code, len);
5994 break;
5995
5996 case OP_CCLASS_MIX:
5997 case OP_CCLASS_MIX_NOT:
5998 n = bitset_on_num((BitSetRef )bp);
5999 bp += SIZE_BITSET;
6000 GET_LENGTH_INC(len, bp);
6001 q = bp;
6002 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6003 ALIGNMENT_RIGHT(q);
6004 #endif
6005 GET_CODE_POINT(code, q);
6006 bp += len;
6007 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6008 break;
6009
6010 case OP_CCLASS_NODE:
6011 {
6012 CClassNode *cc;
6013
6014 GET_POINTER_INC(cc, bp);
6015 n = bitset_on_num(cc->bs);
6016 fprintf(f, ":%u:%d", (unsigned int )cc, n);
6017 }
6018 break;
6019
6020 case OP_BACKREFN_IC:
6021 mem = *((MemNumType* )bp);
6022 bp += SIZE_MEMNUM;
6023 fprintf(f, ":%d", mem);
6024 break;
6025
6026 case OP_BACKREF_MULTI_IC:
6027 case OP_BACKREF_MULTI:
6028 fputs(" ", f);
6029 GET_LENGTH_INC(len, bp);
6030 for (i = 0; i < len; i++) {
6031 GET_MEMNUM_INC(mem, bp);
6032 if (i > 0) fputs(", ", f);
6033 fprintf(f, "%d", mem);
6034 }
6035 break;
6036
6037 case OP_BACKREF_WITH_LEVEL:
6038 {
6039 OnigOptionType option;
6040 LengthType level;
6041
6042 GET_OPTION_INC(option, bp);
6043 fprintf(f, ":%d", option);
6044 GET_LENGTH_INC(level, bp);
6045 fprintf(f, ":%d", level);
6046
6047 fputs(" ", f);
6048 GET_LENGTH_INC(len, bp);
6049 for (i = 0; i < len; i++) {
6050 GET_MEMNUM_INC(mem, bp);
6051 if (i > 0) fputs(", ", f);
6052 fprintf(f, "%d", mem);
6053 }
6054 }
6055 break;
6056
6057 case OP_REPEAT:
6058 case OP_REPEAT_NG:
6059 {
6060 mem = *((MemNumType* )bp);
6061 bp += SIZE_MEMNUM;
6062 addr = *((RelAddrType* )bp);
6063 bp += SIZE_RELADDR;
6064 fprintf(f, ":%d:%d", mem, addr);
6065 }
6066 break;
6067
6068 case OP_PUSH_OR_JUMP_EXACT1:
6069 case OP_PUSH_IF_PEEK_NEXT:
6070 addr = *((RelAddrType* )bp);
6071 bp += SIZE_RELADDR;
6072 fprintf(f, ":(%d)", addr);
6073 p_string(f, 1, bp);
6074 bp += 1;
6075 break;
6076
6077 case OP_LOOK_BEHIND:
6078 GET_LENGTH_INC(len, bp);
6079 fprintf(f, ":%d", len);
6080 break;
6081
6082 case OP_PUSH_LOOK_BEHIND_NOT:
6083 GET_RELADDR_INC(addr, bp);
6084 GET_LENGTH_INC(len, bp);
6085 fprintf(f, ":%d:(%d)", len, addr);
6086 break;
6087
6088 case OP_STATE_CHECK_PUSH:
6089 case OP_STATE_CHECK_PUSH_OR_JUMP:
6090 scn = *((StateCheckNumType* )bp);
6091 bp += SIZE_STATE_CHECK_NUM;
6092 addr = *((RelAddrType* )bp);
6093 bp += SIZE_RELADDR;
6094 fprintf(f, ":%d:(%d)", scn, addr);
6095 break;
6096
6097 default:
6098 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6099 *--bp);
6100 }
6101 }
6102 fputs("]", f);
6103 if (nextp) *nextp = bp;
6104 }
6105
6106 static void
6107 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6108 {
6109 int ncode;
6110 UChar* bp = reg->p;
6111 UChar* end = reg->p + reg->used;
6112
6113 fprintf(f, "code length: %d\n", reg->used);
6114
6115 ncode = 0;
6116 while (bp < end) {
6117 ncode++;
6118 if (bp > reg->p) {
6119 if (ncode % 5 == 0)
6120 fprintf(f, "\n");
6121 else
6122 fputs(" ", f);
6123 }
6124 onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
6125 }
6126
6127 fprintf(f, "\n");
6128 }
6129
6130 static void
6131 print_indent_tree(FILE* f, Node* node, int indent)
6132 {
6133 int i, type;
6134 int add = 3;
6135 UChar* p;
6136
6137 Indent(f, indent);
6138 if (IS_NULL(node)) {
6139 fprintf(f, "ERROR: null node!!!\n");
6140 exit (0);
6141 }
6142
6143 type = NTYPE(node);
6144 switch (type) {
6145 case NT_LIST:
6146 case NT_ALT:
6147 if (NTYPE(node) == NT_LIST)
6148 fprintf(f, "<list:%x>\n", (int )node);
6149 else
6150 fprintf(f, "<alt:%x>\n", (int )node);
6151
6152 print_indent_tree(f, NCAR(node), indent + add);
6153 while (IS_NOT_NULL(node = NCDR(node))) {
6154 if (NTYPE(node) != type) {
6155 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6156 exit(0);
6157 }
6158 print_indent_tree(f, NCAR(node), indent + add);
6159 }
6160 break;
6161
6162 case NT_STR:
6163 fprintf(f, "<string%s:%x>",
6164 (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
6165 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6166 if (*p >= 0x20 && *p < 0x7f)
6167 fputc(*p, f);
6168 else {
6169 fprintf(f, " 0x%02x", *p);
6170 }
6171 }
6172 break;
6173
6174 case NT_CCLASS:
6175 fprintf(f, "<cclass:%x>", (int )node);
6176 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6177 if (NCCLASS(node)->mbuf) {
6178 BBuf* bbuf = NCCLASS(node)->mbuf;
6179 for (i = 0; i < bbuf->used; i++) {
6180 if (i > 0) fprintf(f, ",");
6181 fprintf(f, "%0x", bbuf->p[i]);
6182 }
6183 }
6184 break;
6185
6186 case NT_CTYPE:
6187 fprintf(f, "<ctype:%x> ", (int )node);
6188 switch (NCTYPE(node)->ctype) {
6189 case ONIGENC_CTYPE_WORD:
6190 if (NCTYPE(node)->not != 0)
6191 fputs("not word", f);
6192 else
6193 fputs("word", f);
6194 break;
6195
6196 default:
6197 fprintf(f, "ERROR: undefined ctype.\n");
6198 exit(0);
6199 }
6200 break;
6201
6202 case NT_CANY:
6203 fprintf(f, "<anychar:%x>", (int )node);
6204 break;
6205
6206 case NT_ANCHOR:
6207 fprintf(f, "<anchor:%x> ", (int )node);
6208 switch (NANCHOR(node)->type) {
6209 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6210 case ANCHOR_END_BUF: fputs("end buf", f); break;
6211 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6212 case ANCHOR_END_LINE: fputs("end line", f); break;
6213 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6214 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6215
6216 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6217 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6218 #ifdef USE_WORD_BEGIN_END
6219 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6220 case ANCHOR_WORD_END: fputs("word end", f); break;
6221 #endif
6222 case ANCHOR_PREC_READ: fputs("prec read", f); break;
6223 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
6224 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
6225 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
6226
6227 default:
6228 fprintf(f, "ERROR: undefined anchor type.\n");
6229 break;
6230 }
6231 break;
6232
6233 case NT_BREF:
6234 {
6235 int* p;
6236 BRefNode* br = NBREF(node);
6237 p = BACKREFS_P(br);
6238 fprintf(f, "<backref:%x>", (int )node);
6239 for (i = 0; i < br->back_num; i++) {
6240 if (i > 0) fputs(", ", f);
6241 fprintf(f, "%d", p[i]);
6242 }
6243 }
6244 break;
6245
6246 #ifdef USE_SUBEXP_CALL
6247 case NT_CALL:
6248 {
6249 CallNode* cn = NCALL(node);
6250 fprintf(f, "<call:%x>", (int )node);
6251 p_string(f, cn->name_end - cn->name, cn->name);
6252 }
6253 break;
6254 #endif
6255
6256 case NT_QTFR:
6257 fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
6258 NQTFR(node)->lower, NQTFR(node)->upper,
6259 (NQTFR(node)->greedy ? "" : "?"));
6260 print_indent_tree(f, NQTFR(node)->target, indent + add);
6261 break;
6262
6263 case NT_ENCLOSE:
6264 fprintf(f, "<enclose:%x> ", (int )node);
6265 switch (NENCLOSE(node)->type) {
6266 case ENCLOSE_OPTION:
6267 fprintf(f, "option:%d", NENCLOSE(node)->option);
6268 break;
6269 case ENCLOSE_MEMORY:
6270 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6271 break;
6272 case ENCLOSE_STOP_BACKTRACK:
6273 fprintf(f, "stop-bt");
6274 break;
6275
6276 default:
6277 break;
6278 }
6279 fprintf(f, "\n");
6280 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6281 break;
6282
6283 default:
6284 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6285 break;
6286 }
6287
6288 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6289 type != NT_ENCLOSE)
6290 fprintf(f, "\n");
6291 fflush(f);
6292 }
6293 #endif /* ONIG_DEBUG */
6294
6295 #ifdef ONIG_DEBUG_PARSE_TREE
6296 static void
6297 print_tree(FILE* f, Node* node)
6298 {
6299 print_indent_tree(f, node, 0);
6300 }
6301 #endif