1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 OnigCaseFoldType OnigDefaultCaseFoldFlag
= ONIGENC_CASE_FOLD_MIN
;
36 extern OnigCaseFoldType
37 onig_get_default_case_fold_flag(void)
39 return OnigDefaultCaseFoldFlag
;
43 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag
)
45 OnigDefaultCaseFoldFlag
= case_fold_flag
;
50 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
51 static unsigned char PadBuf
[WORD_ALIGNMENT_SIZE
];
55 str_dup(UChar
* s
, UChar
* end
)
57 int len
= (int)(end
- s
);
60 UChar
* r
= (UChar
* )xmalloc(len
+ 1);
70 swap_node(Node
* a
, Node
* b
)
73 CopyMem (&c
, a
, sizeof (Node
));
74 CopyMem (a
, b
, sizeof (Node
));
75 CopyMem (b
, &c
, sizeof (Node
));
77 if (NTYPE(a
) == NT_STR
) {
78 StrNode
* sn
= NSTR(a
);
80 int len
= (int)(sn
->end
- sn
->s
);
82 sn
->end
= sn
->s
+ len
;
86 if (NTYPE(b
) == NT_STR
) {
87 StrNode
* sn
= NSTR(b
);
89 int len
= (int)(sn
->end
- sn
->s
);
91 sn
->end
= sn
->s
+ len
;
97 distance_add(OnigDistance d1
, OnigDistance d2
)
99 if (d1
== ONIG_INFINITE_DISTANCE
|| d2
== ONIG_INFINITE_DISTANCE
)
100 return ONIG_INFINITE_DISTANCE
;
102 if (d1
<= ONIG_INFINITE_DISTANCE
- d2
) return d1
+ d2
;
103 else return ONIG_INFINITE_DISTANCE
;
108 distance_multiply(OnigDistance d
, int m
)
110 if (m
== 0) return 0;
112 if (d
< ONIG_INFINITE_DISTANCE
/ m
)
115 return ONIG_INFINITE_DISTANCE
;
119 bitset_is_empty(BitSetRef bs
)
122 for (i
= 0; i
< (int )BITSET_SIZE
; i
++) {
123 if (bs
[i
] != 0) return 0;
130 bitset_on_num(BitSetRef bs
)
135 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
136 if (BITSET_AT(bs
, i
)) n
++;
143 onig_bbuf_init(BBuf
* buf
, int size
)
150 buf
->p
= (UChar
* )xmalloc(size
);
151 if (IS_NULL(buf
->p
)) return(ONIGERR_MEMORY
);
160 #ifdef USE_SUBEXP_CALL
163 unset_addr_list_init(UnsetAddrList
* uslist
, int size
)
167 p
= (UnsetAddr
* )xmalloc(sizeof(UnsetAddr
)* size
);
168 CHECK_NULL_RETURN_MEMERR(p
);
170 uslist
->alloc
= size
;
176 unset_addr_list_end(UnsetAddrList
* uslist
)
178 if (IS_NOT_NULL(uslist
->us
))
183 unset_addr_list_add(UnsetAddrList
* uslist
, int offset
, struct _Node
* node
)
188 if (uslist
->num
>= uslist
->alloc
) {
189 size
= uslist
->alloc
* 2;
190 p
= (UnsetAddr
* )xrealloc(uslist
->us
, sizeof(UnsetAddr
) * size
, sizeof(UnsetAddr
) * uslist
->alloc
);
191 CHECK_NULL_RETURN_MEMERR(p
);
192 uslist
->alloc
= size
;
196 uslist
->us
[uslist
->num
].offset
= offset
;
197 uslist
->us
[uslist
->num
].target
= node
;
201 #endif /* USE_SUBEXP_CALL */
205 add_opcode(regex_t
* reg
, int opcode
)
207 BBUF_ADD1(reg
, ((unsigned char)opcode
));
211 #ifdef USE_COMBINATION_EXPLOSION_CHECK
213 add_state_check_num(regex_t
* reg
, int num
)
215 StateCheckNumType n
= (StateCheckNumType
)num
;
217 BBUF_ADD(reg
, &n
, SIZE_STATE_CHECK_NUM
);
223 add_rel_addr(regex_t
* reg
, int addr
)
225 RelAddrType ra
= (RelAddrType
)addr
;
227 BBUF_ADD(reg
, &ra
, SIZE_RELADDR
);
232 add_abs_addr(regex_t
* reg
, int addr
)
234 AbsAddrType ra
= (AbsAddrType
)addr
;
236 BBUF_ADD(reg
, &ra
, SIZE_ABSADDR
);
241 add_length(regex_t
* reg
, int len
)
243 LengthType l
= (LengthType
)len
;
245 BBUF_ADD(reg
, &l
, SIZE_LENGTH
);
250 add_mem_num(regex_t
* reg
, int num
)
252 MemNumType n
= (MemNumType
)num
;
254 BBUF_ADD(reg
, &n
, SIZE_MEMNUM
);
259 add_pointer(regex_t
* reg
, void* addr
)
261 PointerType ptr
= (PointerType
)addr
;
263 BBUF_ADD(reg
, &ptr
, SIZE_POINTER
);
268 add_option(regex_t
* reg
, OnigOptionType option
)
270 BBUF_ADD(reg
, &option
, SIZE_OPTION
);
275 add_opcode_rel_addr(regex_t
* reg
, int opcode
, int addr
)
279 r
= add_opcode(reg
, opcode
);
281 r
= add_rel_addr(reg
, addr
);
286 add_bytes(regex_t
* reg
, UChar
* bytes
, int len
)
288 BBUF_ADD(reg
, bytes
, len
);
293 add_bitset(regex_t
* reg
, BitSetRef bs
)
295 BBUF_ADD(reg
, bs
, SIZE_BITSET
);
300 add_opcode_option(regex_t
* reg
, int opcode
, OnigOptionType option
)
304 r
= add_opcode(reg
, opcode
);
306 r
= add_option(reg
, option
);
310 static int compile_length_tree(Node
* node
, regex_t
* reg
);
311 static int compile_tree(Node
* node
, regex_t
* reg
);
314 #define IS_NEED_STR_LEN_OP_EXACT(op) \
315 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
316 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
319 select_str_opcode(int mb_len
, int str_len
, int ignore_case
)
325 case 1: op
= OP_EXACT1_IC
; break;
326 default: op
= OP_EXACTN_IC
; break;
333 case 1: op
= OP_EXACT1
; break;
334 case 2: op
= OP_EXACT2
; break;
335 case 3: op
= OP_EXACT3
; break;
336 case 4: op
= OP_EXACT4
; break;
337 case 5: op
= OP_EXACT5
; break;
338 default: op
= OP_EXACTN
; break;
344 case 1: op
= OP_EXACTMB2N1
; break;
345 case 2: op
= OP_EXACTMB2N2
; break;
346 case 3: op
= OP_EXACTMB2N3
; break;
347 default: op
= OP_EXACTMB2N
; break;
364 compile_tree_empty_check(Node
* node
, regex_t
* reg
, int empty_info
)
367 int saved_num_null_check
= reg
->num_null_check
;
369 if (empty_info
!= 0) {
370 r
= add_opcode(reg
, OP_NULL_CHECK_START
);
372 r
= add_mem_num(reg
, reg
->num_null_check
); /* NULL CHECK ID */
374 reg
->num_null_check
++;
377 r
= compile_tree(node
, reg
);
380 if (empty_info
!= 0) {
381 if (empty_info
== NQ_TARGET_IS_EMPTY
)
382 r
= add_opcode(reg
, OP_NULL_CHECK_END
);
383 else if (empty_info
== NQ_TARGET_IS_EMPTY_MEM
)
384 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST
);
385 else if (empty_info
== NQ_TARGET_IS_EMPTY_REC
)
386 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST_PUSH
);
389 r
= add_mem_num(reg
, saved_num_null_check
); /* NULL CHECK ID */
394 #ifdef USE_SUBEXP_CALL
396 compile_call(CallNode
* node
, regex_t
* reg
)
400 r
= add_opcode(reg
, OP_CALL
);
402 r
= unset_addr_list_add(node
->unset_addr_list
, BBUF_GET_OFFSET_POS(reg
),
405 r
= add_abs_addr(reg
, 0 /*dummy addr.*/);
411 compile_tree_n_times(Node
* node
, int n
, regex_t
* reg
)
415 for (i
= 0; i
< n
; i
++) {
416 r
= compile_tree(node
, reg
);
423 add_compile_string_length(UChar
* s ARG_UNUSED
, int mb_len
, int str_len
,
424 regex_t
* reg ARG_UNUSED
, int ignore_case
)
427 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
431 if (op
== OP_EXACTMBN
) len
+= SIZE_LENGTH
;
432 if (IS_NEED_STR_LEN_OP_EXACT(op
))
435 len
+= mb_len
* str_len
;
440 add_compile_string(UChar
* s
, int mb_len
, int str_len
,
441 regex_t
* reg
, int ignore_case
)
443 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
446 if (op
== OP_EXACTMBN
)
447 add_length(reg
, mb_len
);
449 if (IS_NEED_STR_LEN_OP_EXACT(op
)) {
450 if (op
== OP_EXACTN_IC
)
451 add_length(reg
, mb_len
* str_len
);
453 add_length(reg
, str_len
);
456 add_bytes(reg
, s
, mb_len
* str_len
);
462 compile_length_string_node(Node
* node
, regex_t
* reg
)
464 int rlen
, r
, len
, prev_len
, slen
, ambig
;
465 OnigEncoding enc
= reg
->enc
;
470 if (sn
->end
<= sn
->s
)
473 ambig
= NSTRING_IS_AMBIG(node
);
476 prev_len
= enclen(enc
, p
);
481 for (; p
< sn
->end
; ) {
482 len
= enclen(enc
, p
);
483 if (len
== prev_len
) {
487 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
495 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
501 compile_length_string_raw_node(StrNode
* sn
, regex_t
* reg
)
503 if (sn
->end
<= sn
->s
)
506 return add_compile_string_length(sn
->s
, 1 /* sb */, (int)(sn
->end
- sn
->s
), reg
, 0);
510 compile_string_node(Node
* node
, regex_t
* reg
)
512 int r
, len
, prev_len
, slen
, ambig
;
513 OnigEncoding enc
= reg
->enc
;
514 UChar
*p
, *prev
, *end
;
518 if (sn
->end
<= sn
->s
)
522 ambig
= NSTRING_IS_AMBIG(node
);
525 prev_len
= enclen(enc
, p
);
530 len
= enclen(enc
, p
);
531 if (len
== prev_len
) {
535 r
= add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
545 return add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
549 compile_string_raw_node(StrNode
* sn
, regex_t
* reg
)
551 if (sn
->end
<= sn
->s
)
554 return add_compile_string(sn
->s
, 1 /* sb */, (int)(sn
->end
- sn
->s
), reg
, 0);
558 add_multi_byte_cclass(BBuf
* mbuf
, regex_t
* reg
)
560 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
561 add_length(reg
, mbuf
->used
);
562 return add_bytes(reg
, mbuf
->p
, mbuf
->used
);
565 UChar
* p
= BBUF_GET_ADD_ADDRESS(reg
) + SIZE_LENGTH
;
567 GET_ALIGNMENT_PAD_SIZE(p
, pad_size
);
568 add_length(reg
, mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1));
569 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
571 r
= add_bytes(reg
, mbuf
->p
, mbuf
->used
);
573 /* padding for return value from compile_length_cclass_node() to be fix. */
574 pad_size
= (WORD_ALIGNMENT_SIZE
- 1) - pad_size
;
575 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
581 compile_length_cclass_node(CClassNode
* cc
, regex_t
* reg
)
585 if (IS_NCCLASS_SHARE(cc
)) {
586 len
= SIZE_OPCODE
+ SIZE_POINTER
;
590 if (IS_NULL(cc
->mbuf
)) {
591 len
= SIZE_OPCODE
+ SIZE_BITSET
;
594 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
598 len
= SIZE_OPCODE
+ SIZE_BITSET
;
600 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
;
603 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1);
611 compile_cclass_node(CClassNode
* cc
, regex_t
* reg
)
615 if (IS_NCCLASS_SHARE(cc
)) {
616 add_opcode(reg
, OP_CCLASS_NODE
);
617 r
= add_pointer(reg
, cc
);
621 if (IS_NULL(cc
->mbuf
)) {
622 if (IS_NCCLASS_NOT(cc
))
623 add_opcode(reg
, OP_CCLASS_NOT
);
625 add_opcode(reg
, OP_CCLASS
);
627 r
= add_bitset(reg
, cc
->bs
);
630 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
631 if (IS_NCCLASS_NOT(cc
))
632 add_opcode(reg
, OP_CCLASS_MB_NOT
);
634 add_opcode(reg
, OP_CCLASS_MB
);
636 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
639 if (IS_NCCLASS_NOT(cc
))
640 add_opcode(reg
, OP_CCLASS_MIX_NOT
);
642 add_opcode(reg
, OP_CCLASS_MIX
);
644 r
= add_bitset(reg
, cc
->bs
);
646 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
654 entry_repeat_range(regex_t
* reg
, int id
, int lower
, int upper
)
656 #define REPEAT_RANGE_ALLOC 4
660 if (reg
->repeat_range_alloc
== 0) {
661 p
= (OnigRepeatRange
* )xmalloc(sizeof(OnigRepeatRange
) * REPEAT_RANGE_ALLOC
);
662 CHECK_NULL_RETURN_MEMERR(p
);
663 reg
->repeat_range
= p
;
664 reg
->repeat_range_alloc
= REPEAT_RANGE_ALLOC
;
666 else if (reg
->repeat_range_alloc
<= id
) {
668 n
= reg
->repeat_range_alloc
+ REPEAT_RANGE_ALLOC
;
669 p
= (OnigRepeatRange
* )xrealloc(reg
->repeat_range
,
670 sizeof(OnigRepeatRange
) * n
,
671 sizeof(OnigRepeatRange
) * reg
->repeat_range_alloc
);
672 CHECK_NULL_RETURN_MEMERR(p
);
673 reg
->repeat_range
= p
;
674 reg
->repeat_range_alloc
= n
;
677 p
= reg
->repeat_range
;
681 p
[id
].upper
= (IS_REPEAT_INFINITE(upper
) ? 0x7fffffff : upper
);
686 compile_range_repeat_node(QtfrNode
* qn
, int target_len
, int empty_info
,
690 int num_repeat
= reg
->num_repeat
;
692 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT
: OP_REPEAT_NG
);
694 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
697 r
= add_rel_addr(reg
, target_len
+ SIZE_OP_REPEAT_INC
);
700 r
= entry_repeat_range(reg
, num_repeat
, qn
->lower
, qn
->upper
);
703 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
707 #ifdef USE_SUBEXP_CALL
710 IS_QUANTIFIER_IN_REPEAT(qn
)) {
711 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC_SG
: OP_REPEAT_INC_NG_SG
);
714 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC
: OP_REPEAT_INC_NG
);
717 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
722 is_anychar_star_quantifier(QtfrNode
* qn
)
724 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
) &&
725 NTYPE(qn
->target
) == NT_CANY
)
731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
732 #define CKN_ON (ckn > 0)
734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
737 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
739 int len
, mod_tlen
, cklen
;
741 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
742 int empty_info
= qn
->target_empty_info
;
743 int tlen
= compile_length_tree(qn
->target
, reg
);
745 if (tlen
< 0) return tlen
;
747 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
749 cklen
= (CKN_ON
? SIZE_STATE_CHECK_NUM
: 0);
752 if (NTYPE(qn
->target
) == NT_CANY
) {
753 if (qn
->greedy
&& infinite
) {
754 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
)
755 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
+ cklen
;
757 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
+ cklen
;
762 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
766 if (infinite
&& qn
->lower
<= 1) {
773 len
+= SIZE_OP_PUSH
+ cklen
+ mod_tlen
+ SIZE_OP_JUMP
;
781 len
+= mod_tlen
+ SIZE_OP_PUSH
+ cklen
;
784 else if (qn
->upper
== 0) {
785 if (qn
->is_refered
!= 0) /* /(?<n>..){0}/ */
786 len
= SIZE_OP_JUMP
+ tlen
;
790 else if (qn
->upper
== 1 && qn
->greedy
) {
791 if (qn
->lower
== 0) {
793 len
= SIZE_OP_STATE_CHECK_PUSH
+ tlen
;
796 len
= SIZE_OP_PUSH
+ tlen
;
803 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
804 len
= SIZE_OP_PUSH
+ cklen
+ SIZE_OP_JUMP
+ tlen
;
807 len
= SIZE_OP_REPEAT_INC
808 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
810 len
+= SIZE_OP_STATE_CHECK
;
817 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
821 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
822 int empty_info
= qn
->target_empty_info
;
823 int tlen
= compile_length_tree(qn
->target
, reg
);
825 if (tlen
< 0) return tlen
;
827 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
829 if (is_anychar_star_quantifier(qn
)) {
830 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
832 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
) {
833 if (IS_MULTILINE(reg
->options
))
834 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
836 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
839 r
= add_state_check_num(reg
, ckn
);
843 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
846 if (IS_MULTILINE(reg
->options
)) {
847 r
= add_opcode(reg
, (CKN_ON
?
848 OP_STATE_CHECK_ANYCHAR_ML_STAR
849 : OP_ANYCHAR_ML_STAR
));
852 r
= add_opcode(reg
, (CKN_ON
?
853 OP_STATE_CHECK_ANYCHAR_STAR
858 r
= add_state_check_num(reg
, ckn
);
865 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
869 if (infinite
&& qn
->lower
<= 1) {
871 if (qn
->lower
== 1) {
872 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
873 (CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
));
878 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
880 r
= add_state_check_num(reg
, ckn
);
882 r
= add_rel_addr(reg
, mod_tlen
+ SIZE_OP_JUMP
);
885 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
888 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
890 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
891 -(mod_tlen
+ (int )SIZE_OP_JUMP
892 + (int )(CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
)));
895 if (qn
->lower
== 0) {
896 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
899 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
902 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH_OR_JUMP
);
904 r
= add_state_check_num(reg
, ckn
);
906 r
= add_rel_addr(reg
,
907 -(mod_tlen
+ (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP
));
910 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
913 else if (qn
->upper
== 0) {
914 if (qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
915 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
917 r
= compile_tree(qn
->target
, reg
);
922 else if (qn
->upper
== 1 && qn
->greedy
) {
923 if (qn
->lower
== 0) {
925 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
927 r
= add_state_check_num(reg
, ckn
);
929 r
= add_rel_addr(reg
, tlen
);
932 r
= add_opcode_rel_addr(reg
, OP_PUSH
, tlen
);
937 r
= compile_tree(qn
->target
, reg
);
939 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
941 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
943 r
= add_state_check_num(reg
, ckn
);
945 r
= add_rel_addr(reg
, SIZE_OP_JUMP
);
948 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
952 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
954 r
= compile_tree(qn
->target
, reg
);
957 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
960 r
= add_opcode(reg
, OP_STATE_CHECK
);
962 r
= add_state_check_num(reg
, ckn
);
968 #else /* USE_COMBINATION_EXPLOSION_CHECK */
971 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
974 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
975 int empty_info
= qn
->target_empty_info
;
976 int tlen
= compile_length_tree(qn
->target
, reg
);
978 if (tlen
< 0) return tlen
;
981 if (NTYPE(qn
->target
) == NT_CANY
) {
982 if (qn
->greedy
&& infinite
) {
983 if (IS_NOT_NULL(qn
->next_head_exact
))
984 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
;
986 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
;
991 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
996 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
997 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1001 len
= tlen
* qn
->lower
;
1005 if (IS_NOT_NULL(qn
->head_exact
))
1006 len
+= SIZE_OP_PUSH_OR_JUMP_EXACT1
+ mod_tlen
+ SIZE_OP_JUMP
;
1007 else if (IS_NOT_NULL(qn
->next_head_exact
))
1008 len
+= SIZE_OP_PUSH_IF_PEEK_NEXT
+ mod_tlen
+ SIZE_OP_JUMP
;
1010 len
+= SIZE_OP_PUSH
+ mod_tlen
+ SIZE_OP_JUMP
;
1013 len
+= SIZE_OP_JUMP
+ mod_tlen
+ SIZE_OP_PUSH
;
1015 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1016 len
= SIZE_OP_JUMP
+ tlen
;
1018 else if (!infinite
&& qn
->greedy
&&
1019 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1020 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1021 len
= tlen
* qn
->lower
;
1022 len
+= (SIZE_OP_PUSH
+ tlen
) * (qn
->upper
- qn
->lower
);
1024 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1025 len
= SIZE_OP_PUSH
+ SIZE_OP_JUMP
+ tlen
;
1028 len
= SIZE_OP_REPEAT_INC
1029 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
1036 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
1039 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
1040 int empty_info
= qn
->target_empty_info
;
1041 int tlen
= compile_length_tree(qn
->target
, reg
);
1043 if (tlen
< 0) return tlen
;
1045 if (is_anychar_star_quantifier(qn
)) {
1046 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1048 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1049 if (IS_MULTILINE(reg
->options
))
1050 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1052 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
1054 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1057 if (IS_MULTILINE(reg
->options
))
1058 return add_opcode(reg
, OP_ANYCHAR_ML_STAR
);
1060 return add_opcode(reg
, OP_ANYCHAR_STAR
);
1064 if (empty_info
!= 0)
1065 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1070 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1071 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1073 if (IS_NOT_NULL(qn
->head_exact
))
1074 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_OR_JUMP_EXACT1
);
1075 else if (IS_NOT_NULL(qn
->next_head_exact
))
1076 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_IF_PEEK_NEXT
);
1078 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH
);
1081 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_JUMP
);
1086 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1091 if (IS_NOT_NULL(qn
->head_exact
)) {
1092 r
= add_opcode_rel_addr(reg
, OP_PUSH_OR_JUMP_EXACT1
,
1093 mod_tlen
+ SIZE_OP_JUMP
);
1095 add_bytes(reg
, NSTR(qn
->head_exact
)->s
, 1);
1096 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1098 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1099 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_OR_JUMP_EXACT1
));
1101 else if (IS_NOT_NULL(qn
->next_head_exact
)) {
1102 r
= add_opcode_rel_addr(reg
, OP_PUSH_IF_PEEK_NEXT
,
1103 mod_tlen
+ SIZE_OP_JUMP
);
1105 add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1106 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1108 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1109 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_IF_PEEK_NEXT
));
1112 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
1114 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1116 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1117 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH
));
1121 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
1123 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1125 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
1128 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1129 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1131 r
= compile_tree(qn
->target
, reg
);
1133 else if (!infinite
&& qn
->greedy
&&
1134 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1135 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1136 int n
= qn
->upper
- qn
->lower
;
1138 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1141 for (i
= 0; i
< n
; i
++) {
1142 r
= add_opcode_rel_addr(reg
, OP_PUSH
,
1143 (n
- i
) * tlen
+ (n
- i
- 1) * SIZE_OP_PUSH
);
1145 r
= compile_tree(qn
->target
, reg
);
1149 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1150 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
1152 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1154 r
= compile_tree(qn
->target
, reg
);
1157 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
1161 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1164 compile_length_option_node(EncloseNode
* node
, regex_t
* reg
)
1167 OnigOptionType prev
= reg
->options
;
1169 reg
->options
= node
->option
;
1170 tlen
= compile_length_tree(node
->target
, reg
);
1171 reg
->options
= prev
;
1173 if (tlen
< 0) return tlen
;
1175 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1176 return SIZE_OP_SET_OPTION_PUSH
+ SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
1177 + tlen
+ SIZE_OP_SET_OPTION
;
1184 compile_option_node(EncloseNode
* node
, regex_t
* reg
)
1187 OnigOptionType prev
= reg
->options
;
1189 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1190 r
= add_opcode_option(reg
, OP_SET_OPTION_PUSH
, node
->option
);
1192 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1194 r
= add_opcode(reg
, OP_FAIL
);
1198 reg
->options
= node
->option
;
1199 r
= compile_tree(node
->target
, reg
);
1200 reg
->options
= prev
;
1202 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1204 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1210 compile_length_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1218 if (node
->type
== ENCLOSE_OPTION
)
1219 return compile_length_option_node(node
, reg
);
1222 tlen
= compile_length_tree(node
->target
, reg
);
1223 if (tlen
< 0) return tlen
;
1228 switch (node
->type
) {
1229 case ENCLOSE_MEMORY
:
1230 #ifdef USE_SUBEXP_CALL
1231 if (IS_ENCLOSE_CALLED(node
)) {
1232 len
= SIZE_OP_MEMORY_START_PUSH
+ tlen
1233 + SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1234 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1235 len
+= (IS_ENCLOSE_RECURSION(node
)
1236 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1238 len
+= (IS_ENCLOSE_RECURSION(node
)
1239 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1244 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1245 len
= SIZE_OP_MEMORY_START_PUSH
;
1247 len
= SIZE_OP_MEMORY_START
;
1249 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1250 ? SIZE_OP_MEMORY_END_PUSH
: SIZE_OP_MEMORY_END
);
1254 case ENCLOSE_STOP_BACKTRACK
:
1255 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1256 if (node
->target
== NULL
) {
1257 CHECK_NULL_RETURN_MEMERR(node
->target
);
1259 qn
= NQTFR(node
->target
);
1260 tlen
= compile_length_tree(qn
->target
, reg
);
1261 if (tlen
< 0) return tlen
;
1263 len
= tlen
* qn
->lower
1264 + SIZE_OP_PUSH
+ tlen
+ SIZE_OP_POP
+ SIZE_OP_JUMP
;
1267 len
= SIZE_OP_PUSH_STOP_BT
+ tlen
+ SIZE_OP_POP_STOP_BT
;
1272 return ONIGERR_TYPE_BUG
;
1279 static int get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
);
1282 compile_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1286 if (node
->type
== ENCLOSE_OPTION
)
1287 return compile_option_node(node
, reg
);
1289 switch (node
->type
) {
1290 case ENCLOSE_MEMORY
:
1291 #ifdef USE_SUBEXP_CALL
1292 if (IS_ENCLOSE_CALLED(node
)) {
1293 r
= add_opcode(reg
, OP_CALL
);
1295 node
->call_addr
= BBUF_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1296 node
->state
|= NST_ADDR_FIXED
;
1297 r
= add_abs_addr(reg
, (int )node
->call_addr
);
1299 len
= compile_length_tree(node
->target
, reg
);
1300 len
+= (SIZE_OP_MEMORY_START_PUSH
+ SIZE_OP_RETURN
);
1301 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1302 len
+= (IS_ENCLOSE_RECURSION(node
)
1303 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1305 len
+= (IS_ENCLOSE_RECURSION(node
)
1306 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1308 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1312 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1313 r
= add_opcode(reg
, OP_MEMORY_START_PUSH
);
1315 r
= add_opcode(reg
, OP_MEMORY_START
);
1317 r
= add_mem_num(reg
, node
->regnum
);
1319 r
= compile_tree(node
->target
, reg
);
1321 #ifdef USE_SUBEXP_CALL
1322 if (IS_ENCLOSE_CALLED(node
)) {
1323 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1324 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1325 ? OP_MEMORY_END_PUSH_REC
: OP_MEMORY_END_PUSH
));
1327 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1328 ? OP_MEMORY_END_REC
: OP_MEMORY_END
));
1331 r
= add_mem_num(reg
, node
->regnum
);
1333 r
= add_opcode(reg
, OP_RETURN
);
1338 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1339 r
= add_opcode(reg
, OP_MEMORY_END_PUSH
);
1341 r
= add_opcode(reg
, OP_MEMORY_END
);
1343 r
= add_mem_num(reg
, node
->regnum
);
1347 case ENCLOSE_STOP_BACKTRACK
:
1348 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1349 QtfrNode
* qn
= NQTFR(node
->target
);
1350 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1353 len
= compile_length_tree(qn
->target
, reg
);
1354 if (len
< 0) return len
;
1356 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_POP
+ SIZE_OP_JUMP
);
1358 r
= compile_tree(qn
->target
, reg
);
1360 r
= add_opcode(reg
, OP_POP
);
1362 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1363 -((int )SIZE_OP_PUSH
+ len
+ (int )SIZE_OP_POP
+ (int )SIZE_OP_JUMP
));
1366 r
= add_opcode(reg
, OP_PUSH_STOP_BT
);
1368 r
= compile_tree(node
->target
, reg
);
1370 r
= add_opcode(reg
, OP_POP_STOP_BT
);
1375 return ONIGERR_TYPE_BUG
;
1383 compile_length_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1389 tlen
= compile_length_tree(node
->target
, reg
);
1390 if (tlen
< 0) return tlen
;
1393 switch (node
->type
) {
1394 case ANCHOR_PREC_READ
:
1395 len
= SIZE_OP_PUSH_POS
+ tlen
+ SIZE_OP_POP_POS
;
1397 case ANCHOR_PREC_READ_NOT
:
1398 len
= SIZE_OP_PUSH_POS_NOT
+ tlen
+ SIZE_OP_FAIL_POS
;
1400 case ANCHOR_LOOK_BEHIND
:
1401 len
= SIZE_OP_LOOK_BEHIND
+ tlen
;
1403 case ANCHOR_LOOK_BEHIND_NOT
:
1404 len
= SIZE_OP_PUSH_LOOK_BEHIND_NOT
+ tlen
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
;
1416 compile_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1420 switch (node
->type
) {
1421 case ANCHOR_BEGIN_BUF
: r
= add_opcode(reg
, OP_BEGIN_BUF
); break;
1422 case ANCHOR_END_BUF
: r
= add_opcode(reg
, OP_END_BUF
); break;
1423 case ANCHOR_BEGIN_LINE
: r
= add_opcode(reg
, OP_BEGIN_LINE
); break;
1424 case ANCHOR_END_LINE
: r
= add_opcode(reg
, OP_END_LINE
); break;
1425 case ANCHOR_SEMI_END_BUF
: r
= add_opcode(reg
, OP_SEMI_END_BUF
); break;
1426 case ANCHOR_BEGIN_POSITION
: r
= add_opcode(reg
, OP_BEGIN_POSITION
); break;
1428 case ANCHOR_WORD_BOUND
: r
= add_opcode(reg
, OP_WORD_BOUND
); break;
1429 case ANCHOR_NOT_WORD_BOUND
: r
= add_opcode(reg
, OP_NOT_WORD_BOUND
); break;
1430 #ifdef USE_WORD_BEGIN_END
1431 case ANCHOR_WORD_BEGIN
: r
= add_opcode(reg
, OP_WORD_BEGIN
); break;
1432 case ANCHOR_WORD_END
: r
= add_opcode(reg
, OP_WORD_END
); break;
1435 case ANCHOR_PREC_READ
:
1436 r
= add_opcode(reg
, OP_PUSH_POS
);
1438 r
= compile_tree(node
->target
, reg
);
1440 r
= add_opcode(reg
, OP_POP_POS
);
1443 case ANCHOR_PREC_READ_NOT
:
1444 len
= compile_length_tree(node
->target
, reg
);
1445 if (len
< 0) return len
;
1446 r
= add_opcode_rel_addr(reg
, OP_PUSH_POS_NOT
, len
+ SIZE_OP_FAIL_POS
);
1448 r
= compile_tree(node
->target
, reg
);
1450 r
= add_opcode(reg
, OP_FAIL_POS
);
1453 case ANCHOR_LOOK_BEHIND
:
1456 r
= add_opcode(reg
, OP_LOOK_BEHIND
);
1458 if (node
->char_len
< 0) {
1459 r
= get_char_length_tree(node
->target
, reg
, &n
);
1460 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1464 r
= add_length(reg
, n
);
1466 r
= compile_tree(node
->target
, reg
);
1470 case ANCHOR_LOOK_BEHIND_NOT
:
1473 len
= compile_length_tree(node
->target
, reg
);
1474 r
= add_opcode_rel_addr(reg
, OP_PUSH_LOOK_BEHIND_NOT
,
1475 len
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
);
1477 if (node
->char_len
< 0) {
1478 r
= get_char_length_tree(node
->target
, reg
, &n
);
1479 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1483 r
= add_length(reg
, n
);
1485 r
= compile_tree(node
->target
, reg
);
1487 r
= add_opcode(reg
, OP_FAIL_LOOK_BEHIND_NOT
);
1492 return ONIGERR_TYPE_BUG
;
1500 compile_length_tree(Node
* node
, regex_t
* reg
)
1509 r
= compile_length_tree(NCAR(node
), reg
);
1510 if (r
< 0) return r
;
1512 } while (IS_NOT_NULL(node
= NCDR(node
)));
1522 r
+= compile_length_tree(NCAR(node
), reg
);
1524 } while (IS_NOT_NULL(node
= NCDR(node
)));
1525 r
+= (SIZE_OP_PUSH
+ SIZE_OP_JUMP
) * (n
- 1);
1530 if (NSTRING_IS_RAW(node
))
1531 r
= compile_length_string_raw_node(NSTR(node
), reg
);
1533 r
= compile_length_string_node(node
, reg
);
1537 r
= compile_length_cclass_node(NCCLASS(node
), reg
);
1547 BRefNode
* br
= NBREF(node
);
1549 #ifdef USE_BACKREF_WITH_LEVEL
1550 if (IS_BACKREF_NEST_LEVEL(br
)) {
1551 r
= SIZE_OPCODE
+ SIZE_OPTION
+ SIZE_LENGTH
+
1552 SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1556 if (br
->back_num
== 1) {
1557 r
= ((!IS_IGNORECASE(reg
->options
) && br
->back_static
[0] <= 2)
1558 ? SIZE_OPCODE
: (SIZE_OPCODE
+ SIZE_MEMNUM
));
1561 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1566 #ifdef USE_SUBEXP_CALL
1573 r
= compile_length_quantifier_node(NQTFR(node
), reg
);
1577 r
= compile_length_enclose_node(NENCLOSE(node
), reg
);
1581 r
= compile_length_anchor_node(NANCHOR(node
), reg
);
1585 return ONIGERR_TYPE_BUG
;
1593 compile_tree(Node
* node
, regex_t
* reg
)
1595 int n
, type
, len
, pos
, r
= 0;
1601 r
= compile_tree(NCAR(node
), reg
);
1602 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1610 len
+= compile_length_tree(NCAR(x
), reg
);
1611 if (NCDR(x
) != NULL
) {
1612 len
+= SIZE_OP_PUSH
+ SIZE_OP_JUMP
;
1614 } while (IS_NOT_NULL(x
= NCDR(x
)));
1615 pos
= reg
->used
+ len
; /* goal position */
1618 len
= compile_length_tree(NCAR(node
), reg
);
1619 if (IS_NOT_NULL(NCDR(node
))) {
1620 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_JUMP
);
1623 r
= compile_tree(NCAR(node
), reg
);
1625 if (IS_NOT_NULL(NCDR(node
))) {
1626 len
= pos
- (reg
->used
+ SIZE_OP_JUMP
);
1627 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1630 } while (IS_NOT_NULL(node
= NCDR(node
)));
1635 if (NSTRING_IS_RAW(node
))
1636 r
= compile_string_raw_node(NSTR(node
), reg
);
1638 r
= compile_string_node(node
, reg
);
1642 r
= compile_cclass_node(NCCLASS(node
), reg
);
1649 switch (NCTYPE(node
)->ctype
) {
1650 case ONIGENC_CTYPE_WORD
:
1651 if (NCTYPE(node
)->not != 0) op
= OP_NOT_WORD
;
1655 return ONIGERR_TYPE_BUG
;
1658 r
= add_opcode(reg
, op
);
1663 if (IS_MULTILINE(reg
->options
))
1664 r
= add_opcode(reg
, OP_ANYCHAR_ML
);
1666 r
= add_opcode(reg
, OP_ANYCHAR
);
1671 BRefNode
* br
= NBREF(node
);
1673 #ifdef USE_BACKREF_WITH_LEVEL
1674 if (IS_BACKREF_NEST_LEVEL(br
)) {
1675 r
= add_opcode(reg
, OP_BACKREF_WITH_LEVEL
);
1677 r
= add_option(reg
, (reg
->options
& ONIG_OPTION_IGNORECASE
));
1679 r
= add_length(reg
, br
->nest_level
);
1682 goto add_bacref_mems
;
1686 if (br
->back_num
== 1) {
1687 n
= br
->back_static
[0];
1688 if (IS_IGNORECASE(reg
->options
)) {
1689 r
= add_opcode(reg
, OP_BACKREFN_IC
);
1691 r
= add_mem_num(reg
, n
);
1695 case 1: r
= add_opcode(reg
, OP_BACKREF1
); break;
1696 case 2: r
= add_opcode(reg
, OP_BACKREF2
); break;
1698 r
= add_opcode(reg
, OP_BACKREFN
);
1700 r
= add_mem_num(reg
, n
);
1709 if (IS_IGNORECASE(reg
->options
)) {
1710 r
= add_opcode(reg
, OP_BACKREF_MULTI_IC
);
1713 r
= add_opcode(reg
, OP_BACKREF_MULTI
);
1717 #ifdef USE_BACKREF_WITH_LEVEL
1720 r
= add_length(reg
, br
->back_num
);
1723 for (i
= br
->back_num
- 1; i
>= 0; i
--) {
1724 r
= add_mem_num(reg
, p
[i
]);
1731 #ifdef USE_SUBEXP_CALL
1733 r
= compile_call(NCALL(node
), reg
);
1738 r
= compile_quantifier_node(NQTFR(node
), reg
);
1742 r
= compile_enclose_node(NENCLOSE(node
), reg
);
1746 r
= compile_anchor_node(NANCHOR(node
), reg
);
1751 fprintf(stderr
, "compile_tree: undefined node type %d\n", NTYPE(node
));
1759 #ifdef USE_NAMED_GROUP
1762 noname_disable_map(Node
** plink
, GroupNumRemap
* map
, int* counter
)
1765 Node
* node
= *plink
;
1767 switch (NTYPE(node
)) {
1771 r
= noname_disable_map(&(NCAR(node
)), map
, counter
);
1772 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1777 Node
** ptarget
= &(NQTFR(node
)->target
);
1778 Node
* old
= *ptarget
;
1779 r
= noname_disable_map(ptarget
, map
, counter
);
1780 if (*ptarget
!= old
&& NTYPE(*ptarget
) == NT_QTFR
) {
1781 onig_reduce_nested_quantifier(node
, *ptarget
);
1788 EncloseNode
* en
= NENCLOSE(node
);
1789 if (en
->type
== ENCLOSE_MEMORY
) {
1790 if (IS_ENCLOSE_NAMED_GROUP(en
)) {
1792 map
[en
->regnum
].new_val
= *counter
;
1793 en
->regnum
= *counter
;
1794 r
= noname_disable_map(&(en
->target
), map
, counter
);
1797 *plink
= en
->target
;
1798 en
->target
= NULL_NODE
;
1799 onig_node_free(node
);
1800 r
= noname_disable_map(plink
, map
, counter
);
1804 r
= noname_disable_map(&(en
->target
), map
, counter
);
1816 renumber_node_backref(Node
* node
, GroupNumRemap
* map
)
1818 int i
, pos
, n
, old_num
;
1820 BRefNode
* bn
= NBREF(node
);
1822 if (! IS_BACKREF_NAME_REF(bn
))
1823 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1825 old_num
= bn
->back_num
;
1826 if (IS_NULL(bn
->back_dynamic
))
1827 backs
= bn
->back_static
;
1829 backs
= bn
->back_dynamic
;
1831 for (i
= 0, pos
= 0; i
< old_num
; i
++) {
1832 n
= map
[backs
[i
]].new_val
;
1844 renumber_by_map(Node
* node
, GroupNumRemap
* map
)
1848 switch (NTYPE(node
)) {
1852 r
= renumber_by_map(NCAR(node
), map
);
1853 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1856 r
= renumber_by_map(NQTFR(node
)->target
, map
);
1859 r
= renumber_by_map(NENCLOSE(node
)->target
, map
);
1863 r
= renumber_node_backref(node
, map
);
1874 numbered_ref_check(Node
* node
)
1878 switch (NTYPE(node
)) {
1882 r
= numbered_ref_check(NCAR(node
));
1883 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1886 r
= numbered_ref_check(NQTFR(node
)->target
);
1889 r
= numbered_ref_check(NENCLOSE(node
)->target
);
1893 if (! IS_BACKREF_NAME_REF(NBREF(node
)))
1894 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1905 disable_noname_group_capture(Node
** root
, regex_t
* reg
, ScanEnv
* env
)
1907 int r
, i
, pos
, counter
;
1912 map
= (GroupNumRemap
* )xmalloc(sizeof(GroupNumRemap
) * (env
->num_mem
+ 1));
1913 CHECK_NULL_RETURN_MEMERR(map
);
1914 for (i
= 1; i
<= env
->num_mem
; i
++) {
1918 r
= noname_disable_map(root
, map
, &counter
);
1919 if (r
!= 0) return r
;
1921 r
= renumber_by_map(*root
, map
);
1922 if (r
!= 0) return r
;
1924 for (i
= 1, pos
= 1; i
<= env
->num_mem
; i
++) {
1925 if (map
[i
].new_val
> 0) {
1926 SCANENV_MEM_NODES(env
)[pos
] = SCANENV_MEM_NODES(env
)[i
];
1931 loc
= env
->capture_history
;
1932 BIT_STATUS_CLEAR(env
->capture_history
);
1933 for (i
= 1; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
1934 if (BIT_STATUS_AT(loc
, i
)) {
1935 BIT_STATUS_ON_AT_SIMPLE(env
->capture_history
, map
[i
].new_val
);
1939 env
->num_mem
= env
->num_named
;
1940 reg
->num_mem
= env
->num_named
;
1942 Result
= onig_renumber_name_table(reg
, map
);
1946 #endif /* USE_NAMED_GROUP */
1948 #ifdef USE_SUBEXP_CALL
1950 unset_addr_list_fix(UnsetAddrList
* uslist
, regex_t
* reg
)
1956 for (i
= 0; i
< uslist
->num
; i
++) {
1957 en
= NENCLOSE(uslist
->us
[i
].target
);
1958 if (! IS_ENCLOSE_ADDR_FIXED(en
)) return ONIGERR_PARSER_BUG
;
1959 addr
= en
->call_addr
;
1960 offset
= uslist
->us
[i
].offset
;
1962 BBUF_WRITE(reg
, offset
, &addr
, SIZE_ABSADDR
);
1968 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1970 quantifiers_memory_node_info(Node
* node
)
1974 switch (NTYPE(node
)) {
1980 v
= quantifiers_memory_node_info(NCAR(node
));
1982 } while (v
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
1986 #ifdef USE_SUBEXP_CALL
1988 if (IS_CALL_RECURSION(NCALL(node
))) {
1989 return NQ_TARGET_IS_EMPTY_REC
; /* tiny version */
1992 r
= quantifiers_memory_node_info(NCALL(node
)->target
);
1998 QtfrNode
* qn
= NQTFR(node
);
1999 if (qn
->upper
!= 0) {
2000 r
= quantifiers_memory_node_info(qn
->target
);
2007 EncloseNode
* en
= NENCLOSE(node
);
2009 case ENCLOSE_MEMORY
:
2010 return NQ_TARGET_IS_EMPTY_MEM
;
2013 case ENCLOSE_OPTION
:
2014 case ENCLOSE_STOP_BACKTRACK
:
2015 r
= quantifiers_memory_node_info(en
->target
);
2035 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2038 get_min_match_length(Node
* node
, OnigDistance
*min
, ScanEnv
* env
)
2044 switch (NTYPE(node
)) {
2049 Node
** nodes
= SCANENV_MEM_NODES(env
);
2050 BRefNode
* br
= NBREF(node
);
2051 if (br
->state
& NST_RECURSION
) break;
2053 backs
= BACKREFS_P(br
);
2054 if (backs
[0] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2055 r
= get_min_match_length(nodes
[backs
[0]], min
, env
);
2057 for (i
= 1; i
< br
->back_num
; i
++) {
2058 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2059 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
);
2061 if (*min
> tmin
) *min
= tmin
;
2066 #ifdef USE_SUBEXP_CALL
2068 if (IS_CALL_RECURSION(NCALL(node
))) {
2069 EncloseNode
* en
= NENCLOSE(NCALL(node
)->target
);
2070 if (IS_ENCLOSE_MIN_FIXED(en
))
2074 r
= get_min_match_length(NCALL(node
)->target
, min
, env
);
2080 r
= get_min_match_length(NCAR(node
), &tmin
, env
);
2081 if (r
== 0) *min
+= tmin
;
2082 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2091 r
= get_min_match_length(x
, &tmin
, env
);
2093 if (y
== node
) *min
= tmin
;
2094 else if (*min
> tmin
) *min
= tmin
;
2095 } while (r
== 0 && IS_NOT_NULL(y
= NCDR(y
)));
2101 StrNode
* sn
= NSTR(node
);
2102 *min
= (OnigDistance
)(sn
->end
- sn
->s
);
2117 QtfrNode
* qn
= NQTFR(node
);
2119 if (qn
->lower
> 0) {
2120 r
= get_min_match_length(qn
->target
, min
, env
);
2122 *min
= distance_multiply(*min
, qn
->lower
);
2129 EncloseNode
* en
= NENCLOSE(node
);
2131 case ENCLOSE_MEMORY
:
2132 #ifdef USE_SUBEXP_CALL
2133 if (IS_ENCLOSE_MIN_FIXED(en
))
2136 r
= get_min_match_length(en
->target
, min
, env
);
2139 SET_ENCLOSE_STATUS(node
, NST_MIN_FIXED
);
2144 case ENCLOSE_OPTION
:
2145 case ENCLOSE_STOP_BACKTRACK
:
2146 r
= get_min_match_length(en
->target
, min
, env
);
2161 get_max_match_length(Node
* node
, OnigDistance
*max
, ScanEnv
* env
)
2167 switch (NTYPE(node
)) {
2170 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2172 *max
= distance_add(*max
, tmax
);
2173 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2178 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2179 if (r
== 0 && *max
< tmax
) *max
= tmax
;
2180 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2185 StrNode
* sn
= NSTR(node
);
2186 *max
= (OnigDistance
)(sn
->end
- sn
->s
);
2191 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2196 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2203 Node
** nodes
= SCANENV_MEM_NODES(env
);
2204 BRefNode
* br
= NBREF(node
);
2205 if (br
->state
& NST_RECURSION
) {
2206 *max
= ONIG_INFINITE_DISTANCE
;
2209 backs
= BACKREFS_P(br
);
2210 for (i
= 0; i
< br
->back_num
; i
++) {
2211 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2212 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
);
2214 if (*max
< tmax
) *max
= tmax
;
2219 #ifdef USE_SUBEXP_CALL
2221 if (! IS_CALL_RECURSION(NCALL(node
)))
2222 r
= get_max_match_length(NCALL(node
)->target
, max
, env
);
2224 *max
= ONIG_INFINITE_DISTANCE
;
2230 QtfrNode
* qn
= NQTFR(node
);
2232 if (qn
->upper
!= 0) {
2233 r
= get_max_match_length(qn
->target
, max
, env
);
2234 if (r
== 0 && *max
!= 0) {
2235 if (! IS_REPEAT_INFINITE(qn
->upper
))
2236 *max
= distance_multiply(*max
, qn
->upper
);
2238 *max
= ONIG_INFINITE_DISTANCE
;
2246 EncloseNode
* en
= NENCLOSE(node
);
2248 case ENCLOSE_MEMORY
:
2249 #ifdef USE_SUBEXP_CALL
2250 if (IS_ENCLOSE_MAX_FIXED(en
))
2253 r
= get_max_match_length(en
->target
, max
, env
);
2256 SET_ENCLOSE_STATUS(node
, NST_MAX_FIXED
);
2261 case ENCLOSE_OPTION
:
2262 case ENCLOSE_STOP_BACKTRACK
:
2263 r
= get_max_match_length(en
->target
, max
, env
);
2277 #define GET_CHAR_LEN_VARLEN -1
2278 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2280 /* fixed size pattern node only */
2282 get_char_length_tree1(Node
* node
, regex_t
* reg
, int* len
, int level
)
2289 switch (NTYPE(node
)) {
2292 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2294 *len
= distance_add(*len
, tlen
);
2295 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2303 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2304 while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
))) {
2305 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen2
, level
);
2314 r
= GET_CHAR_LEN_TOP_ALT_VARLEN
;
2316 r
= GET_CHAR_LEN_VARLEN
;
2326 StrNode
* sn
= NSTR(node
);
2328 while (s
< sn
->end
) {
2329 s
+= enclen(reg
->enc
, s
);
2337 QtfrNode
* qn
= NQTFR(node
);
2338 if (qn
->lower
== qn
->upper
) {
2339 r
= get_char_length_tree1(qn
->target
, reg
, &tlen
, level
);
2341 *len
= distance_multiply(tlen
, qn
->lower
);
2344 r
= GET_CHAR_LEN_VARLEN
;
2348 #ifdef USE_SUBEXP_CALL
2350 if (! IS_CALL_RECURSION(NCALL(node
)))
2351 r
= get_char_length_tree1(NCALL(node
)->target
, reg
, len
, level
);
2353 r
= GET_CHAR_LEN_VARLEN
;
2368 EncloseNode
* en
= NENCLOSE(node
);
2370 case ENCLOSE_MEMORY
:
2371 #ifdef USE_SUBEXP_CALL
2372 if (IS_ENCLOSE_CLEN_FIXED(en
))
2373 *len
= en
->char_len
;
2375 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2377 en
->char_len
= *len
;
2378 SET_ENCLOSE_STATUS(node
, NST_CLEN_FIXED
);
2383 case ENCLOSE_OPTION
:
2384 case ENCLOSE_STOP_BACKTRACK
:
2385 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2397 r
= GET_CHAR_LEN_VARLEN
;
2405 get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
)
2407 return get_char_length_tree1(node
, reg
, len
, 0);
2410 /* x is not included y ==> 1 : 0 */
2412 is_not_included(Node
* x
, Node
* y
, regex_t
* reg
)
2426 if (NCTYPE(y
)->ctype
== NCTYPE(x
)->ctype
&&
2427 NCTYPE(y
)->not != NCTYPE(x
)->not)
2437 tmp
= x
; x
= y
; y
= tmp
;
2454 CClassNode
* xc
= NCCLASS(x
);
2457 switch (NCTYPE(y
)->ctype
) {
2458 case ONIGENC_CTYPE_WORD
:
2459 if (NCTYPE(y
)->not == 0) {
2460 if (IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) {
2461 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2462 if (BITSET_AT(xc
->bs
, i
)) {
2463 if (IS_CODE_SB_WORD(reg
->enc
, i
)) return 0;
2471 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2472 if (! IS_CODE_SB_WORD(reg
->enc
, i
)) {
2473 if (!IS_NCCLASS_NOT(xc
)) {
2474 if (BITSET_AT(xc
->bs
, i
))
2478 if (! BITSET_AT(xc
->bs
, i
))
2495 CClassNode
* yc
= NCCLASS(y
);
2497 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2498 v
= BITSET_AT(xc
->bs
, i
);
2499 if ((v
!= 0 && !IS_NCCLASS_NOT(xc
)) ||
2500 (v
== 0 && IS_NCCLASS_NOT(xc
))) {
2501 v
= BITSET_AT(yc
->bs
, i
);
2502 if ((v
!= 0 && !IS_NCCLASS_NOT(yc
)) ||
2503 (v
== 0 && IS_NCCLASS_NOT(yc
)))
2507 if ((IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) ||
2508 (IS_NULL(yc
->mbuf
) && !IS_NCCLASS_NOT(yc
)))
2526 StrNode
* xs
= NSTR(x
);
2527 if (NSTRING_LEN(x
) == 0)
2533 switch (NCTYPE(y
)->ctype
) {
2534 case ONIGENC_CTYPE_WORD
:
2535 if (ONIGENC_IS_MBC_WORD(reg
->enc
, xs
->s
, xs
->end
))
2536 return NCTYPE(y
)->not;
2538 return !(NCTYPE(y
)->not);
2547 CClassNode
* cc
= NCCLASS(y
);
2549 code
= ONIGENC_MBC_TO_CODE(reg
->enc
, xs
->s
,
2550 xs
->s
+ ONIGENC_MBC_MAXLEN(reg
->enc
));
2551 return (onig_is_code_in_cc(reg
->enc
, code
, cc
) != 0 ? 0 : 1);
2558 StrNode
* ys
= NSTR(y
);
2559 len
= NSTRING_LEN(x
);
2560 if (len
> NSTRING_LEN(y
)) len
= NSTRING_LEN(y
);
2561 if (NSTRING_IS_AMBIG(x
) || NSTRING_IS_AMBIG(y
)) {
2566 for (i
= 0, p
= ys
->s
, q
= xs
->s
; i
< len
; i
++, p
++, q
++) {
2567 if (*p
!= *q
) return 1;
2587 get_head_value_node(Node
* node
, int exact
, regex_t
* reg
)
2589 Node
* n
= NULL_NODE
;
2591 switch (NTYPE(node
)) {
2595 #ifdef USE_SUBEXP_CALL
2608 n
= get_head_value_node(NCAR(node
), exact
, reg
);
2613 StrNode
* sn
= NSTR(node
);
2615 if (sn
->end
<= sn
->s
)
2619 !NSTRING_IS_RAW(node
) && IS_IGNORECASE(reg
->options
)) {
2629 QtfrNode
* qn
= NQTFR(node
);
2630 if (qn
->lower
> 0) {
2631 if (IS_NOT_NULL(qn
->head_exact
))
2634 n
= get_head_value_node(qn
->target
, exact
, reg
);
2641 EncloseNode
* en
= NENCLOSE(node
);
2643 case ENCLOSE_OPTION
:
2645 OnigOptionType options
= reg
->options
;
2647 reg
->options
= NENCLOSE(node
)->option
;
2648 n
= get_head_value_node(NENCLOSE(node
)->target
, exact
, reg
);
2649 reg
->options
= options
;
2653 case ENCLOSE_MEMORY
:
2654 case ENCLOSE_STOP_BACKTRACK
:
2655 n
= get_head_value_node(en
->target
, exact
, reg
);
2662 if (NANCHOR(node
)->type
== ANCHOR_PREC_READ
)
2663 n
= get_head_value_node(NANCHOR(node
)->target
, exact
, reg
);
2674 check_type_tree(Node
* node
, int type_mask
, int enclose_mask
, int anchor_mask
)
2679 if ((NTYPE2BIT(type
) & type_mask
) == 0)
2686 r
= check_type_tree(NCAR(node
), type_mask
, enclose_mask
,
2688 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2692 r
= check_type_tree(NQTFR(node
)->target
, type_mask
, enclose_mask
,
2698 EncloseNode
* en
= NENCLOSE(node
);
2699 if ((en
->type
& enclose_mask
) == 0)
2702 r
= check_type_tree(en
->target
, type_mask
, enclose_mask
, anchor_mask
);
2707 type
= NANCHOR(node
)->type
;
2708 if ((type
& anchor_mask
) == 0)
2711 if (NANCHOR(node
)->target
)
2712 r
= check_type_tree(NANCHOR(node
)->target
,
2713 type_mask
, enclose_mask
, anchor_mask
);
2722 #ifdef USE_SUBEXP_CALL
2724 #define RECURSION_EXIST 1
2725 #define RECURSION_INFINITE 2
2728 subexp_inf_recursive_check(Node
* node
, ScanEnv
* env
, int head
)
2743 ret
= subexp_inf_recursive_check(NCAR(x
), env
, head
);
2744 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2747 ret
= get_min_match_length(NCAR(x
), &min
, env
);
2748 if (ret
!= 0) return ret
;
2749 if (min
!= 0) head
= 0;
2751 } while (IS_NOT_NULL(x
= NCDR(x
)));
2758 r
= RECURSION_EXIST
;
2760 ret
= subexp_inf_recursive_check(NCAR(node
), env
, head
);
2761 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2763 } while (IS_NOT_NULL(node
= NCDR(node
)));
2768 r
= subexp_inf_recursive_check(NQTFR(node
)->target
, env
, head
);
2769 if (r
== RECURSION_EXIST
) {
2770 if (NQTFR(node
)->lower
== 0) r
= 0;
2776 AnchorNode
* an
= NANCHOR(node
);
2778 case ANCHOR_PREC_READ
:
2779 case ANCHOR_PREC_READ_NOT
:
2780 case ANCHOR_LOOK_BEHIND
:
2781 case ANCHOR_LOOK_BEHIND_NOT
:
2782 r
= subexp_inf_recursive_check(an
->target
, env
, head
);
2789 r
= subexp_inf_recursive_check(NCALL(node
)->target
, env
, head
);
2793 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2795 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2796 return (head
== 0 ? RECURSION_EXIST
: RECURSION_INFINITE
);
2798 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2799 r
= subexp_inf_recursive_check(NENCLOSE(node
)->target
, env
, head
);
2800 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2812 subexp_inf_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2822 r
= subexp_inf_recursive_check_trav(NCAR(node
), env
);
2823 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2827 r
= subexp_inf_recursive_check_trav(NQTFR(node
)->target
, env
);
2832 AnchorNode
* an
= NANCHOR(node
);
2834 case ANCHOR_PREC_READ
:
2835 case ANCHOR_PREC_READ_NOT
:
2836 case ANCHOR_LOOK_BEHIND
:
2837 case ANCHOR_LOOK_BEHIND_NOT
:
2838 r
= subexp_inf_recursive_check_trav(an
->target
, env
);
2846 EncloseNode
* en
= NENCLOSE(node
);
2848 if (IS_ENCLOSE_RECURSION(en
)) {
2849 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2850 r
= subexp_inf_recursive_check(en
->target
, env
, 1);
2851 if (r
> 0) return ONIGERR_NEVER_ENDING_RECURSION
;
2852 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2854 r
= subexp_inf_recursive_check_trav(en
->target
, env
);
2867 subexp_recursive_check(Node
* node
)
2871 switch (NTYPE(node
)) {
2875 r
|= subexp_recursive_check(NCAR(node
));
2876 } while (IS_NOT_NULL(node
= NCDR(node
)));
2880 r
= subexp_recursive_check(NQTFR(node
)->target
);
2885 AnchorNode
* an
= NANCHOR(node
);
2887 case ANCHOR_PREC_READ
:
2888 case ANCHOR_PREC_READ_NOT
:
2889 case ANCHOR_LOOK_BEHIND
:
2890 case ANCHOR_LOOK_BEHIND_NOT
:
2891 r
= subexp_recursive_check(an
->target
);
2898 r
= subexp_recursive_check(NCALL(node
)->target
);
2899 if (r
!= 0) SET_CALL_RECURSION(node
);
2903 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2905 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2906 return 1; /* recursion */
2908 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2909 r
= subexp_recursive_check(NENCLOSE(node
)->target
);
2910 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2923 subexp_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2925 #define FOUND_CALLED_NODE 1
2937 ret
= subexp_recursive_check_trav(NCAR(node
), env
);
2938 if (ret
== FOUND_CALLED_NODE
) r
= FOUND_CALLED_NODE
;
2939 else if (ret
< 0) return ret
;
2940 } while (IS_NOT_NULL(node
= NCDR(node
)));
2945 r
= subexp_recursive_check_trav(NQTFR(node
)->target
, env
);
2946 if (NQTFR(node
)->upper
== 0) {
2947 if (r
== FOUND_CALLED_NODE
)
2948 NQTFR(node
)->is_refered
= 1;
2954 AnchorNode
* an
= NANCHOR(node
);
2956 case ANCHOR_PREC_READ
:
2957 case ANCHOR_PREC_READ_NOT
:
2958 case ANCHOR_LOOK_BEHIND
:
2959 case ANCHOR_LOOK_BEHIND_NOT
:
2960 r
= subexp_recursive_check_trav(an
->target
, env
);
2968 EncloseNode
* en
= NENCLOSE(node
);
2970 if (! IS_ENCLOSE_RECURSION(en
)) {
2971 if (IS_ENCLOSE_CALLED(en
)) {
2972 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2973 r
= subexp_recursive_check(en
->target
);
2974 if (r
!= 0) SET_ENCLOSE_STATUS(node
, NST_RECURSION
);
2975 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2978 r
= subexp_recursive_check_trav(en
->target
, env
);
2979 if (IS_ENCLOSE_CALLED(en
))
2980 r
|= FOUND_CALLED_NODE
;
2992 setup_subexp_call(Node
* node
, ScanEnv
* env
)
3001 r
= setup_subexp_call(NCAR(node
), env
);
3002 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3007 r
= setup_subexp_call(NCAR(node
), env
);
3008 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3012 r
= setup_subexp_call(NQTFR(node
)->target
, env
);
3015 r
= setup_subexp_call(NENCLOSE(node
)->target
, env
);
3020 CallNode
* cn
= NCALL(node
);
3021 Node
** nodes
= SCANENV_MEM_NODES(env
);
3023 if (cn
->group_num
!= 0) {
3024 int gnum
= cn
->group_num
;
3026 #ifdef USE_NAMED_GROUP
3027 if (env
->num_named
> 0 &&
3028 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
3029 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
3030 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
3033 if (gnum
> env
->num_mem
) {
3034 onig_scan_env_set_error_string(env
,
3035 ONIGERR_UNDEFINED_GROUP_REFERENCE
, cn
->name
, cn
->name_end
);
3036 return ONIGERR_UNDEFINED_GROUP_REFERENCE
;
3039 #ifdef USE_NAMED_GROUP
3042 cn
->target
= nodes
[cn
->group_num
];
3043 if (IS_NULL(cn
->target
)) {
3044 onig_scan_env_set_error_string(env
,
3045 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3046 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3048 SET_ENCLOSE_STATUS(cn
->target
, NST_CALLED
);
3049 BIT_STATUS_ON_AT(env
->bt_mem_start
, cn
->group_num
);
3050 cn
->unset_addr_list
= env
->unset_addr_list
;
3052 #ifdef USE_NAMED_GROUP
3056 int n
= onig_name_to_group_numbers(env
->reg
, cn
->name
, cn
->name_end
,
3059 onig_scan_env_set_error_string(env
,
3060 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3061 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3064 onig_scan_env_set_error_string(env
,
3065 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, cn
->name
, cn
->name_end
);
3066 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
;
3069 cn
->group_num
= refs
[0];
3079 AnchorNode
* an
= NANCHOR(node
);
3082 case ANCHOR_PREC_READ
:
3083 case ANCHOR_PREC_READ_NOT
:
3084 case ANCHOR_LOOK_BEHIND
:
3085 case ANCHOR_LOOK_BEHIND_NOT
:
3086 r
= setup_subexp_call(an
->target
, env
);
3100 /* divide different length alternatives in look-behind.
3101 (?<=A|B) ==> (?<=A)|(?<=B)
3102 (?<!A|B) ==> (?<!A)(?<!B)
3105 divide_look_behind_alternatives(Node
* node
)
3107 Node
*head
, *np
, *insert_node
;
3108 AnchorNode
* an
= NANCHOR(node
);
3109 int anc_type
= an
->type
;
3113 swap_node(node
, head
);
3115 NANCHOR(head
)->target
= np
;
3118 while ((np
= NCDR(np
)) != NULL_NODE
) {
3119 insert_node
= onig_node_new_anchor(anc_type
);
3120 CHECK_NULL_RETURN_MEMERR(insert_node
);
3121 NANCHOR(insert_node
)->target
= NCAR(np
);
3122 NCAR(np
) = insert_node
;
3125 if (anc_type
== ANCHOR_LOOK_BEHIND_NOT
) {
3128 SET_NTYPE(np
, NT_LIST
); /* alt -> list */
3129 } while ((np
= NCDR(np
)) != NULL_NODE
);
3135 setup_look_behind(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
3138 AnchorNode
* an
= NANCHOR(node
);
3140 r
= get_char_length_tree(an
->target
, reg
, &len
);
3143 else if (r
== GET_CHAR_LEN_VARLEN
)
3144 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3145 else if (r
== GET_CHAR_LEN_TOP_ALT_VARLEN
) {
3146 if (IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
))
3147 r
= divide_look_behind_alternatives(node
);
3149 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3156 next_setup(Node
* node
, Node
* next_node
, regex_t
* reg
)
3162 if (type
== NT_QTFR
) {
3163 QtfrNode
* qn
= NQTFR(node
);
3164 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
)) {
3165 #ifdef USE_QTFR_PEEK_NEXT
3166 Node
* n
= get_head_value_node(next_node
, 1, reg
);
3167 /* '\0': for UTF-16BE etc... */
3168 if (IS_NOT_NULL(n
) && NSTR(n
)->s
[0] != '\0') {
3169 qn
->next_head_exact
= n
;
3172 /* automatic posseivation a*b ==> (?>a*)b */
3173 if (qn
->lower
<= 1) {
3174 int ttype
= NTYPE(qn
->target
);
3175 if (IS_NODE_TYPE_SIMPLE(ttype
)) {
3177 x
= get_head_value_node(qn
->target
, 0, reg
);
3178 if (IS_NOT_NULL(x
)) {
3179 y
= get_head_value_node(next_node
, 0, reg
);
3180 if (IS_NOT_NULL(y
) && is_not_included(x
, y
, reg
)) {
3181 Node
* en
= onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK
);
3182 CHECK_NULL_RETURN_MEMERR(en
);
3183 SET_ENCLOSE_STATUS(en
, NST_STOP_BT_SIMPLE_REPEAT
);
3184 swap_node(node
, en
);
3185 NENCLOSE(node
)->target
= en
;
3192 else if (type
== NT_ENCLOSE
) {
3193 EncloseNode
* en
= NENCLOSE(node
);
3194 if (en
->type
== ENCLOSE_MEMORY
) {
3204 update_string_node_case_fold(regex_t
* reg
, Node
*node
)
3206 UChar
*p
, *end
, buf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
3207 UChar
*sbuf
, *ebuf
, *sp
;
3208 int r
, i
, len
, sbuf_size
;
3209 StrNode
* sn
= NSTR(node
);
3212 sbuf_size
= (int)(end
- sn
->s
) * 2;
3213 sbuf
= (UChar
* )xmalloc(sbuf_size
);
3214 CHECK_NULL_RETURN_MEMERR(sbuf
);
3215 ebuf
= sbuf
+ sbuf_size
;
3220 len
= ONIGENC_MBC_CASE_FOLD(reg
->enc
, reg
->case_fold_flag
, &p
, end
, buf
);
3221 for (i
= 0; i
< len
; i
++) {
3223 sbuf
= (UChar
* )xrealloc(sbuf
, sbuf_size
* 2, sbuf_size
);
3224 CHECK_NULL_RETURN_MEMERR(sbuf
);
3225 sp
= sbuf
+ sbuf_size
;
3227 ebuf
= sbuf
+ sbuf_size
;
3234 r
= onig_node_str_set(node
, sbuf
, sp
);
3245 expand_case_fold_make_rem_string(Node
** rnode
, UChar
*s
, UChar
*end
,
3251 node
= onig_node_new_str(s
, end
);
3252 if (IS_NULL(node
)) return ONIGERR_MEMORY
;
3254 r
= update_string_node_case_fold(reg
, node
);
3256 onig_node_free(node
);
3260 NSTRING_SET_AMBIG(node
);
3261 NSTRING_SET_DONT_GET_OPT_INFO(node
);
3267 expand_case_fold_string_alt(int item_num
, OnigCaseFoldCodeItem items
[],
3268 UChar
*p
, int slen
, UChar
*end
,
3269 regex_t
* reg
, Node
**rnode
)
3271 int r
, i
, j
, len
, varlen
;
3272 Node
*anode
, *var_anode
, *snode
, *xnode
, *an
;
3273 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
3276 *rnode
= var_anode
= NULL_NODE
;
3279 for (i
= 0; i
< item_num
; i
++) {
3280 if (items
[i
].byte_len
!= slen
) {
3287 *rnode
= var_anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3288 if (IS_NULL(var_anode
)) return ONIGERR_MEMORY
;
3290 xnode
= onig_node_new_list(NULL
, NULL
);
3291 if (IS_NULL(xnode
)) goto mem_err
;
3292 NCAR(var_anode
) = xnode
;
3294 anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3295 if (IS_NULL(anode
)) goto mem_err
;
3296 NCAR(xnode
) = anode
;
3299 *rnode
= anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3300 if (IS_NULL(anode
)) return ONIGERR_MEMORY
;
3303 snode
= onig_node_new_str(p
, p
+ slen
);
3304 if (IS_NULL(snode
)) goto mem_err
;
3306 NCAR(anode
) = snode
;
3308 for (i
= 0; i
< item_num
; i
++) {
3309 snode
= onig_node_new_str(NULL
, NULL
);
3310 if (IS_NULL(snode
)) goto mem_err
;
3312 for (j
= 0; j
< items
[i
].code_len
; j
++) {
3313 len
= ONIGENC_CODE_TO_MBC(reg
->enc
, items
[i
].code
[j
], buf
);
3319 r
= onig_node_str_cat(snode
, buf
, buf
+ len
);
3320 if (r
!= 0) goto mem_err2
;
3323 an
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3328 if (items
[i
].byte_len
!= slen
) {
3329 Node
*rem
= NULL_NODE
;
3330 UChar
*q
= p
+ items
[i
].byte_len
;
3333 r
= expand_case_fold_make_rem_string(&rem
, q
, end
, reg
);
3339 xnode
= onig_node_list_add(NULL_NODE
, snode
);
3340 if (IS_NULL(xnode
)) {
3342 onig_node_free(rem
);
3345 if (IS_NULL(onig_node_list_add(xnode
, rem
))) {
3347 onig_node_free(xnode
);
3348 onig_node_free(rem
);
3358 if (var_anode
== NULL
) {
3360 onig_node_free(xnode
);
3361 onig_node_free(rem
);
3364 NCDR(var_anode
) = an
;
3377 onig_node_free(snode
);
3380 onig_node_free(*rnode
);
3382 return ONIGERR_MEMORY
;
3386 expand_case_fold_string(Node
* node
, regex_t
* reg
)
3388 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3390 int r
, n
, len
, alt_num
;
3391 UChar
*start
, *end
, *p
;
3392 Node
*top_root
, *root
, *snode
, *prev_node
;
3393 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
3394 StrNode
* sn
= NSTR(node
);
3396 if (NSTRING_IS_AMBIG(node
)) return 0;
3400 if (start
>= end
) return 0;
3403 top_root
= root
= prev_node
= snode
= NULL_NODE
;
3407 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg
->enc
, reg
->case_fold_flag
,
3414 len
= enclen(reg
->enc
, p
);
3417 if (IS_NULL(snode
)) {
3418 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3419 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3420 if (IS_NULL(root
)) {
3421 onig_node_free(prev_node
);
3426 prev_node
= snode
= onig_node_new_str(NULL
, NULL
);
3427 if (IS_NULL(snode
)) goto mem_err
;
3428 if (IS_NOT_NULL(root
)) {
3429 if (IS_NULL(onig_node_list_add(root
, snode
))) {
3430 onig_node_free(snode
);
3436 r
= onig_node_str_cat(snode
, p
, p
+ len
);
3437 if (r
!= 0) goto err
;
3441 if (alt_num
> THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
) break;
3443 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3444 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3445 if (IS_NULL(root
)) {
3446 onig_node_free(prev_node
);
3451 r
= expand_case_fold_string_alt(n
, items
, p
, len
, end
, reg
, &prev_node
);
3452 if (r
< 0) goto mem_err
;
3454 if (IS_NULL(root
)) {
3455 top_root
= prev_node
;
3458 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3459 onig_node_free(prev_node
);
3464 root
= NCAR(prev_node
);
3467 if (IS_NOT_NULL(root
)) {
3468 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3469 onig_node_free(prev_node
);
3484 r
= expand_case_fold_make_rem_string(&srem
, p
, end
, reg
);
3485 if (r
!= 0) goto mem_err
;
3487 if (IS_NOT_NULL(prev_node
) && IS_NULL(root
)) {
3488 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3489 if (IS_NULL(root
)) {
3490 onig_node_free(srem
);
3491 onig_node_free(prev_node
);
3496 if (IS_NULL(root
)) {
3500 if (IS_NULL(onig_node_list_add(root
, srem
))) {
3501 onig_node_free(srem
);
3508 top_root
= (IS_NOT_NULL(top_root
) ? top_root
: prev_node
);
3509 swap_node(node
, top_root
);
3510 onig_node_free(top_root
);
3517 onig_node_free(top_root
);
3522 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3524 #define CEC_THRES_NUM_BIG_REPEAT 512
3525 #define CEC_INFINITE_NUM 0x7fffffff
3527 #define CEC_IN_INFINITE_REPEAT (1<<0)
3528 #define CEC_IN_FINITE_REPEAT (1<<1)
3529 #define CEC_CONT_BIG_REPEAT (1<<2)
3532 setup_comb_exp_check(Node
* node
, int state
, ScanEnv
* env
)
3541 Node
* prev
= NULL_NODE
;
3543 r
= setup_comb_exp_check(NCAR(node
), r
, env
);
3545 } while (r
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3553 ret
= setup_comb_exp_check(NCAR(node
), state
, env
);
3555 } while (ret
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3561 int child_state
= state
;
3563 QtfrNode
* qn
= NQTFR(node
);
3564 Node
* target
= qn
->target
;
3567 if (! IS_REPEAT_INFINITE(qn
->upper
)) {
3568 if (qn
->upper
> 1) {
3569 /* {0,1}, {1,1} are allowed */
3570 child_state
|= CEC_IN_FINITE_REPEAT
;
3572 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3573 if (env
->backrefed_mem
== 0) {
3574 if (NTYPE(qn
->target
) == NT_ENCLOSE
) {
3575 EncloseNode
* en
= NENCLOSE(qn
->target
);
3576 if (en
->type
== ENCLOSE_MEMORY
) {
3577 if (NTYPE(en
->target
) == NT_QTFR
) {
3578 QtfrNode
* q
= NQTFR(en
->target
);
3579 if (IS_REPEAT_INFINITE(q
->upper
)
3580 && q
->greedy
== qn
->greedy
) {
3581 qn
->upper
= (qn
->lower
== 0 ? 1 : qn
->lower
);
3583 child_state
= state
;
3592 if (state
& CEC_IN_FINITE_REPEAT
) {
3593 qn
->comb_exp_check_num
= -1;
3596 if (IS_REPEAT_INFINITE(qn
->upper
)) {
3597 var_num
= CEC_INFINITE_NUM
;
3598 child_state
|= CEC_IN_INFINITE_REPEAT
;
3601 var_num
= qn
->upper
- qn
->lower
;
3604 if (var_num
>= CEC_THRES_NUM_BIG_REPEAT
)
3605 add_state
|= CEC_CONT_BIG_REPEAT
;
3607 if (((state
& CEC_IN_INFINITE_REPEAT
) != 0 && var_num
!= 0) ||
3608 ((state
& CEC_CONT_BIG_REPEAT
) != 0 &&
3609 var_num
>= CEC_THRES_NUM_BIG_REPEAT
)) {
3610 if (qn
->comb_exp_check_num
== 0) {
3611 env
->num_comb_exp_check
++;
3612 qn
->comb_exp_check_num
= env
->num_comb_exp_check
;
3613 if (env
->curr_max_regnum
> env
->comb_exp_max_regnum
)
3614 env
->comb_exp_max_regnum
= env
->curr_max_regnum
;
3619 r
= setup_comb_exp_check(target
, child_state
, env
);
3626 EncloseNode
* en
= NENCLOSE(node
);
3629 case ENCLOSE_MEMORY
:
3631 if (env
->curr_max_regnum
< en
->regnum
)
3632 env
->curr_max_regnum
= en
->regnum
;
3634 r
= setup_comb_exp_check(en
->target
, state
, env
);
3639 r
= setup_comb_exp_check(en
->target
, state
, env
);
3645 #ifdef USE_SUBEXP_CALL
3647 if (IS_CALL_RECURSION(NCALL(node
)))
3648 env
->has_recursion
= 1;
3650 r
= setup_comb_exp_check(NCALL(node
)->target
, state
, env
);
3662 #define IN_ALT (1<<0)
3663 #define IN_NOT (1<<1)
3664 #define IN_REPEAT (1<<2)
3665 #define IN_VAR_REPEAT (1<<3)
3667 /* setup_tree does the following work.
3668 1. check empty loop. (set qn->target_empty_info)
3669 2. expand ignore-case in char class.
3670 3. set memory status bit flags. (reg->mem_stats)
3671 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3672 5. find invalid patterns in look-behind.
3673 6. expand repeated string.
3676 setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
3685 Node
* prev
= NULL_NODE
;
3687 r
= setup_tree(NCAR(node
), reg
, state
, env
);
3688 if (IS_NOT_NULL(prev
) && r
== 0) {
3689 r
= next_setup(prev
, NCAR(node
), reg
);
3692 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3698 r
= setup_tree(NCAR(node
), reg
, (state
| IN_ALT
), env
);
3699 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3706 if (IS_IGNORECASE(reg
->options
) && !NSTRING_IS_RAW(node
)) {
3707 r
= expand_case_fold_string(node
, reg
);
3715 #ifdef USE_SUBEXP_CALL
3724 Node
** nodes
= SCANENV_MEM_NODES(env
);
3725 BRefNode
* br
= NBREF(node
);
3727 for (i
= 0; i
< br
->back_num
; i
++) {
3728 if (p
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
3729 BIT_STATUS_ON_AT(env
->backrefed_mem
, p
[i
]);
3730 BIT_STATUS_ON_AT(env
->bt_mem_start
, p
[i
]);
3731 #ifdef USE_BACKREF_WITH_LEVEL
3732 if (IS_BACKREF_NEST_LEVEL(br
)) {
3733 BIT_STATUS_ON_AT(env
->bt_mem_end
, p
[i
]);
3736 SET_ENCLOSE_STATUS(nodes
[p
[i
]], NST_MEM_BACKREFED
);
3744 QtfrNode
* qn
= NQTFR(node
);
3745 Node
* target
= qn
->target
;
3747 if ((state
& IN_REPEAT
) != 0) {
3748 qn
->state
|= NST_IN_REPEAT
;
3751 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 1) {
3752 r
= get_min_match_length(target
, &d
, env
);
3755 qn
->target_empty_info
= NQ_TARGET_IS_EMPTY
;
3756 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3757 r
= quantifiers_memory_node_info(target
);
3760 qn
->target_empty_info
= r
;
3764 r
= get_max_match_length(target
, &d
, env
);
3765 if (r
== 0 && d
== 0) {
3766 /* ()* ==> ()?, ()+ ==> () */
3768 if (qn
->lower
> 1) qn
->lower
= 1;
3769 if (NTYPE(target
) == NT_STR
) {
3770 qn
->upper
= qn
->lower
= 0; /* /(?:)+/ ==> // */
3778 if (qn
->lower
!= qn
->upper
)
3779 state
|= IN_VAR_REPEAT
;
3780 r
= setup_tree(target
, reg
, state
, env
);
3784 #define EXPAND_STRING_MAX_LENGTH 100
3785 if (NTYPE(target
) == NT_STR
) {
3786 if (!IS_REPEAT_INFINITE(qn
->lower
) && qn
->lower
== qn
->upper
&&
3787 qn
->lower
> 1 && qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3788 int len
= NSTRING_LEN(target
);
3789 StrNode
* sn
= NSTR(target
);
3791 if (len
* qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3792 int i
, n
= qn
->lower
;
3793 onig_node_conv_to_str_node(node
, NSTR(target
)->flag
);
3794 for (i
= 0; i
< n
; i
++) {
3795 r
= onig_node_str_cat(node
, sn
->s
, sn
->end
);
3798 onig_node_free(target
);
3799 break; /* break case NT_QTFR: */
3804 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3805 if (qn
->greedy
&& (qn
->target_empty_info
!= 0)) {
3806 if (NTYPE(target
) == NT_QTFR
) {
3807 QtfrNode
* tqn
= NQTFR(target
);
3808 if (IS_NOT_NULL(tqn
->head_exact
)) {
3809 qn
->head_exact
= tqn
->head_exact
;
3810 tqn
->head_exact
= NULL
;
3814 qn
->head_exact
= get_head_value_node(qn
->target
, 1, reg
);
3823 EncloseNode
* en
= NENCLOSE(node
);
3826 case ENCLOSE_OPTION
:
3828 OnigOptionType options
= reg
->options
;
3829 reg
->options
= NENCLOSE(node
)->option
;
3830 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
3831 reg
->options
= options
;
3835 case ENCLOSE_MEMORY
:
3836 if ((state
& (IN_ALT
| IN_NOT
| IN_VAR_REPEAT
)) != 0) {
3837 BIT_STATUS_ON_AT(env
->bt_mem_start
, en
->regnum
);
3838 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3840 r
= setup_tree(en
->target
, reg
, state
, env
);
3843 case ENCLOSE_STOP_BACKTRACK
:
3845 Node
* target
= en
->target
;
3846 r
= setup_tree(target
, reg
, state
, env
);
3847 if (NTYPE(target
) == NT_QTFR
) {
3848 QtfrNode
* tqn
= NQTFR(target
);
3849 if (IS_REPEAT_INFINITE(tqn
->upper
) && tqn
->lower
<= 1 &&
3850 tqn
->greedy
!= 0) { /* (?>a*), a*+ etc... */
3851 int qtype
= NTYPE(tqn
->target
);
3852 if (IS_NODE_TYPE_SIMPLE(qtype
))
3853 SET_ENCLOSE_STATUS(node
, NST_STOP_BT_SIMPLE_REPEAT
);
3864 AnchorNode
* an
= NANCHOR(node
);
3867 case ANCHOR_PREC_READ
:
3868 r
= setup_tree(an
->target
, reg
, state
, env
);
3870 case ANCHOR_PREC_READ_NOT
:
3871 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3874 /* allowed node types in look-behind */
3875 #define ALLOWED_TYPE_IN_LB \
3876 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3877 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3879 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3880 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3882 #define ALLOWED_ANCHOR_IN_LB \
3883 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3884 #define ALLOWED_ANCHOR_IN_LB_NOT \
3885 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3887 case ANCHOR_LOOK_BEHIND
:
3889 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3890 ALLOWED_ENCLOSE_IN_LB
, ALLOWED_ANCHOR_IN_LB
);
3891 if (r
< 0) return r
;
3892 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3893 r
= setup_look_behind(node
, reg
, env
);
3894 if (r
!= 0) return r
;
3895 r
= setup_tree(an
->target
, reg
, state
, env
);
3899 case ANCHOR_LOOK_BEHIND_NOT
:
3901 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3902 ALLOWED_ENCLOSE_IN_LB_NOT
, ALLOWED_ANCHOR_IN_LB_NOT
);
3903 if (r
< 0) return r
;
3904 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3905 r
= setup_look_behind(node
, reg
, env
);
3906 if (r
!= 0) return r
;
3907 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3921 /* set skip map for Boyer-Moor search */
3923 set_bm_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
3924 UChar skip
[], int** int_skip
)
3928 len
= (int)(end
- s
);
3929 if (len
< ONIG_CHAR_TABLE_SIZE
) {
3930 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = (UChar
)len
;
3932 for (i
= 0; i
< len
- 1; i
++)
3933 skip
[s
[i
]] = (UChar
)(len
- 1 - i
);
3936 if (IS_NULL(*int_skip
)) {
3937 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
3938 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
3940 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = len
;
3942 for (i
= 0; i
< len
- 1; i
++)
3943 (*int_skip
)[s
[i
]] = len
- 1 - i
;
3948 #define OPT_EXACT_MAXLEN 24
3951 OnigDistance min
; /* min byte length */
3952 OnigDistance max
; /* max byte length */
3958 OnigOptionType options
;
3959 OnigCaseFoldType case_fold_flag
;
3969 MinMaxLen mmd
; /* info position */
3975 UChar s
[OPT_EXACT_MAXLEN
];
3979 MinMaxLen mmd
; /* info position */
3982 int value
; /* weighted value */
3983 UChar map
[ONIG_CHAR_TABLE_SIZE
];
3990 OptExactInfo exb
; /* boundary */
3991 OptExactInfo exm
; /* middle */
3992 OptExactInfo expr
; /* prec read (?=...) */
3994 OptMapInfo map
; /* boundary */
3999 map_position_value(OnigEncoding enc
, int i
)
4001 static const short int ByteValTable
[] = {
4002 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4003 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4004 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4005 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4006 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4007 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4008 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4009 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4012 if (i
< (int )(sizeof(ByteValTable
)/sizeof(ByteValTable
[0]))) {
4013 if (i
== 0 && ONIGENC_MBC_MINLEN(enc
) > 1)
4016 return (int )ByteValTable
[i
];
4019 return 4; /* Take it easy. */
4023 distance_value(MinMaxLen
* mm
)
4025 /* 1000 / (min-max-dist + 1) */
4026 static const short int dist_vals
[] = {
4027 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4028 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4029 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4030 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4031 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4032 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4033 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4034 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4035 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4036 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4041 if (mm
->max
== ONIG_INFINITE_DISTANCE
) return 0;
4043 d
= mm
->max
- mm
->min
;
4044 if (d
< (int )(sizeof(dist_vals
)/sizeof(dist_vals
[0])))
4045 /* return dist_vals[d] * 16 / (mm->min + 12); */
4046 return (int )dist_vals
[d
];
4052 comp_distance_value(MinMaxLen
* d1
, MinMaxLen
* d2
, int v1
, int v2
)
4054 if (v2
<= 0) return -1;
4055 if (v1
<= 0) return 1;
4057 v1
*= distance_value(d1
);
4058 v2
*= distance_value(d2
);
4060 if (v2
> v1
) return 1;
4061 if (v2
< v1
) return -1;
4063 if (d2
->min
< d1
->min
) return 1;
4064 if (d2
->min
> d1
->min
) return -1;
4069 is_equal_mml(MinMaxLen
* a
, MinMaxLen
* b
)
4071 return (a
->min
== b
->min
&& a
->max
== b
->max
) ? 1 : 0;
4076 set_mml(MinMaxLen
* mml
, OnigDistance min
, OnigDistance max
)
4083 clear_mml(MinMaxLen
* mml
)
4085 mml
->min
= mml
->max
= 0;
4089 copy_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4091 to
->min
= from
->min
;
4092 to
->max
= from
->max
;
4096 add_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4098 to
->min
= distance_add(to
->min
, from
->min
);
4099 to
->max
= distance_add(to
->max
, from
->max
);
4104 add_len_mml(MinMaxLen
* to
, OnigDistance len
)
4106 to
->min
= distance_add(to
->min
, len
);
4107 to
->max
= distance_add(to
->max
, len
);
4112 alt_merge_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4114 if (to
->min
> from
->min
) to
->min
= from
->min
;
4115 if (to
->max
< from
->max
) to
->max
= from
->max
;
4119 copy_opt_env(OptEnv
* to
, OptEnv
* from
)
4121 CopyMem (to
, from
, sizeof (OptEnv
));
4125 clear_opt_anc_info(OptAncInfo
* anc
)
4127 anc
->left_anchor
= 0;
4128 anc
->right_anchor
= 0;
4132 copy_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* from
)
4134 CopyMem (to
, from
, sizeof (OptAncInfo
));
4138 concat_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* left
, OptAncInfo
* right
,
4139 OnigDistance left_len
, OnigDistance right_len
)
4141 clear_opt_anc_info(to
);
4143 to
->left_anchor
= left
->left_anchor
;
4144 if (left_len
== 0) {
4145 to
->left_anchor
|= right
->left_anchor
;
4148 to
->right_anchor
= right
->right_anchor
;
4149 if (right_len
== 0) {
4150 to
->right_anchor
|= left
->right_anchor
;
4155 is_left_anchor(int anc
)
4157 if (anc
== ANCHOR_END_BUF
|| anc
== ANCHOR_SEMI_END_BUF
||
4158 anc
== ANCHOR_END_LINE
|| anc
== ANCHOR_PREC_READ
||
4159 anc
== ANCHOR_PREC_READ_NOT
)
4166 is_set_opt_anc_info(OptAncInfo
* to
, int anc
)
4168 if ((to
->left_anchor
& anc
) != 0) return 1;
4170 return ((to
->right_anchor
& anc
) != 0 ? 1 : 0);
4174 add_opt_anc_info(OptAncInfo
* to
, int anc
)
4176 if (is_left_anchor(anc
))
4177 to
->left_anchor
|= anc
;
4179 to
->right_anchor
|= anc
;
4183 remove_opt_anc_info(OptAncInfo
* to
, int anc
)
4185 if (is_left_anchor(anc
))
4186 to
->left_anchor
&= ~anc
;
4188 to
->right_anchor
&= ~anc
;
4192 alt_merge_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* add
)
4194 to
->left_anchor
&= add
->left_anchor
;
4195 to
->right_anchor
&= add
->right_anchor
;
4199 is_full_opt_exact_info(OptExactInfo
* ex
)
4201 return (ex
->len
>= OPT_EXACT_MAXLEN
? 1 : 0);
4205 clear_opt_exact_info(OptExactInfo
* ex
)
4207 clear_mml(&ex
->mmd
);
4208 clear_opt_anc_info(&ex
->anc
);
4210 ex
->ignore_case
= 0;
4216 copy_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* from
)
4218 CopyMem (to
, from
, sizeof (OptExactInfo
));
4222 concat_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OnigEncoding enc
)
4228 if (! to
->ignore_case
&& add
->ignore_case
) {
4229 if (to
->len
>= add
->len
) return ; /* avoid */
4231 to
->ignore_case
= 1;
4236 for (i
= to
->len
; p
< end
; ) {
4237 len
= enclen(enc
, p
);
4238 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4239 for (j
= 0; j
< len
&& p
< end
; j
++)
4244 to
->reach_end
= (p
== end
? add
->reach_end
: 0);
4246 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, 1, 1);
4247 if (! to
->reach_end
) tanc
.right_anchor
= 0;
4248 copy_opt_anc_info(&to
->anc
, &tanc
);
4252 concat_opt_exact_info_str(OptExactInfo
* to
, UChar
* s
, UChar
* end
,
4253 int raw ARG_UNUSED
, OnigEncoding enc
)
4258 for (i
= to
->len
, p
= s
; p
< end
&& i
< OPT_EXACT_MAXLEN
; ) {
4259 len
= enclen(enc
, p
);
4260 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4261 for (j
= 0; j
< len
&& p
< end
; j
++)
4269 alt_merge_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OptEnv
* env
)
4273 if (add
->len
== 0 || to
->len
== 0) {
4274 clear_opt_exact_info(to
);
4278 if (! is_equal_mml(&to
->mmd
, &add
->mmd
)) {
4279 clear_opt_exact_info(to
);
4283 for (i
= 0; i
< to
->len
&& i
< add
->len
; ) {
4284 if (to
->s
[i
] != add
->s
[i
]) break;
4285 len
= enclen(env
->enc
, to
->s
+ i
);
4287 for (j
= 1; j
< len
; j
++) {
4288 if (to
->s
[i
+j
] != add
->s
[i
+j
]) break;
4294 if (! add
->reach_end
|| i
< add
->len
|| i
< to
->len
) {
4298 to
->ignore_case
|= add
->ignore_case
;
4300 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4301 if (! to
->reach_end
) to
->anc
.right_anchor
= 0;
4305 select_opt_exact_info(OnigEncoding enc
, OptExactInfo
* now
, OptExactInfo
* alt
)
4316 copy_opt_exact_info(now
, alt
);
4319 else if (v1
<= 2 && v2
<= 2) {
4320 /* ByteValTable[x] is big value --> low price */
4321 v2
= map_position_value(enc
, now
->s
[0]);
4322 v1
= map_position_value(enc
, alt
->s
[0]);
4324 if (now
->len
> 1) v1
+= 5;
4325 if (alt
->len
> 1) v2
+= 5;
4328 if (now
->ignore_case
== 0) v1
*= 2;
4329 if (alt
->ignore_case
== 0) v2
*= 2;
4331 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4332 copy_opt_exact_info(now
, alt
);
4336 clear_opt_map_info(OptMapInfo
* map
)
4338 static const OptMapInfo clean_info
= {
4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4360 xmemcpy(map
, &clean_info
, sizeof(OptMapInfo
));
4364 copy_opt_map_info(OptMapInfo
* to
, OptMapInfo
* from
)
4366 CopyMem (to
, from
, sizeof (OptMapInfo
));
4370 add_char_opt_map_info(OptMapInfo
* map
, UChar c
, OnigEncoding enc
)
4372 if (map
->map
[c
] == 0) {
4374 map
->value
+= map_position_value(enc
, c
);
4379 add_char_amb_opt_map_info(OptMapInfo
* map
, UChar
* p
, UChar
* end
,
4380 OnigEncoding enc
, OnigCaseFoldType case_fold_flag
)
4382 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4383 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
4386 add_char_opt_map_info(map
, p
[0], enc
);
4388 case_fold_flag
= DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag
);
4389 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, case_fold_flag
, p
, end
, items
);
4390 if (n
< 0) return n
;
4392 for (i
= 0; i
< n
; i
++) {
4393 ONIGENC_CODE_TO_MBC(enc
, items
[i
].code
[0], buf
);
4394 add_char_opt_map_info(map
, buf
[0], enc
);
4401 select_opt_map_info(OptMapInfo
* now
, OptMapInfo
* alt
)
4403 static int z
= 1<<15; /* 32768: something big value */
4407 if (alt
->value
== 0) return ;
4408 if (now
->value
== 0) {
4409 copy_opt_map_info(now
, alt
);
4413 v1
= z
/ now
->value
;
4414 v2
= z
/ alt
->value
;
4415 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4416 copy_opt_map_info(now
, alt
);
4420 comp_opt_exact_or_map_info(OptExactInfo
* e
, OptMapInfo
* m
)
4422 #define COMP_EM_BASE 20
4425 if (m
->value
<= 0) return -1;
4427 ve
= COMP_EM_BASE
* e
->len
* (e
->ignore_case
? 1 : 2);
4428 vm
= COMP_EM_BASE
* 5 * 2 / m
->value
;
4429 return comp_distance_value(&e
->mmd
, &m
->mmd
, ve
, vm
);
4433 alt_merge_opt_map_info(OnigEncoding enc
, OptMapInfo
* to
, OptMapInfo
* add
)
4437 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4438 if (to
->value
== 0) return ;
4439 if (add
->value
== 0 || to
->mmd
.max
< add
->mmd
.min
) {
4440 clear_opt_map_info(to
);
4444 alt_merge_mml(&to
->mmd
, &add
->mmd
);
4447 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
4452 val
+= map_position_value(enc
, i
);
4456 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4460 set_bound_node_opt_info(NodeOptInfo
* opt
, MinMaxLen
* mmd
)
4462 copy_mml(&(opt
->exb
.mmd
), mmd
);
4463 copy_mml(&(opt
->expr
.mmd
), mmd
);
4464 copy_mml(&(opt
->map
.mmd
), mmd
);
4468 clear_node_opt_info(NodeOptInfo
* opt
)
4470 clear_mml(&opt
->len
);
4471 clear_opt_anc_info(&opt
->anc
);
4472 clear_opt_exact_info(&opt
->exb
);
4473 clear_opt_exact_info(&opt
->exm
);
4474 clear_opt_exact_info(&opt
->expr
);
4475 clear_opt_map_info(&opt
->map
);
4479 copy_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* from
)
4481 CopyMem (to
, from
, sizeof (NodeOptInfo
));
4485 concat_left_node_opt_info(OnigEncoding enc
, NodeOptInfo
* to
, NodeOptInfo
* add
)
4487 int exb_reach
, exm_reach
;
4490 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, to
->len
.max
, add
->len
.max
);
4491 copy_opt_anc_info(&to
->anc
, &tanc
);
4493 if (add
->exb
.len
> 0 && to
->len
.max
== 0) {
4494 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->exb
.anc
,
4495 to
->len
.max
, add
->len
.max
);
4496 copy_opt_anc_info(&add
->exb
.anc
, &tanc
);
4499 if (add
->map
.value
> 0 && to
->len
.max
== 0) {
4500 if (add
->map
.mmd
.max
== 0)
4501 add
->map
.anc
.left_anchor
|= to
->anc
.left_anchor
;
4504 exb_reach
= to
->exb
.reach_end
;
4505 exm_reach
= to
->exm
.reach_end
;
4507 if (add
->len
.max
!= 0)
4508 to
->exb
.reach_end
= to
->exm
.reach_end
= 0;
4510 if (add
->exb
.len
> 0) {
4512 concat_opt_exact_info(&to
->exb
, &add
->exb
, enc
);
4513 clear_opt_exact_info(&add
->exb
);
4515 else if (exm_reach
) {
4516 concat_opt_exact_info(&to
->exm
, &add
->exb
, enc
);
4517 clear_opt_exact_info(&add
->exb
);
4520 select_opt_exact_info(enc
, &to
->exm
, &add
->exb
);
4521 select_opt_exact_info(enc
, &to
->exm
, &add
->exm
);
4523 if (to
->expr
.len
> 0) {
4524 if (add
->len
.max
> 0) {
4525 if (to
->expr
.len
> (int )add
->len
.max
)
4526 to
->expr
.len
= add
->len
.max
;
4528 if (to
->expr
.mmd
.max
== 0)
4529 select_opt_exact_info(enc
, &to
->exb
, &to
->expr
);
4531 select_opt_exact_info(enc
, &to
->exm
, &to
->expr
);
4534 else if (add
->expr
.len
> 0) {
4535 copy_opt_exact_info(&to
->expr
, &add
->expr
);
4538 select_opt_map_info(&to
->map
, &add
->map
);
4540 add_mml(&to
->len
, &add
->len
);
4544 alt_merge_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* add
, OptEnv
* env
)
4546 alt_merge_opt_anc_info (&to
->anc
, &add
->anc
);
4547 alt_merge_opt_exact_info(&to
->exb
, &add
->exb
, env
);
4548 alt_merge_opt_exact_info(&to
->exm
, &add
->exm
, env
);
4549 alt_merge_opt_exact_info(&to
->expr
, &add
->expr
, env
);
4550 alt_merge_opt_map_info(env
->enc
, &to
->map
, &add
->map
);
4552 alt_merge_mml(&to
->len
, &add
->len
);
4556 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4559 optimize_node_left(Node
* node
, NodeOptInfo
* opt
, OptEnv
* env
)
4564 clear_node_opt_info(opt
);
4565 set_bound_node_opt_info(opt
, &env
->mmd
);
4575 copy_opt_env(&nenv
, env
);
4577 r
= optimize_node_left(NCAR(nd
), &nopt
, &nenv
);
4579 add_mml(&nenv
.mmd
, &nopt
.len
);
4580 concat_left_node_opt_info(env
->enc
, opt
, &nopt
);
4582 } while (r
== 0 && IS_NOT_NULL(nd
= NCDR(nd
)));
4592 r
= optimize_node_left(NCAR(nd
), &nopt
, env
);
4594 if (nd
== node
) copy_node_opt_info(opt
, &nopt
);
4595 else alt_merge_node_opt_info(opt
, &nopt
, env
);
4597 } while ((r
== 0) && IS_NOT_NULL(nd
= NCDR(nd
)));
4603 StrNode
* sn
= NSTR(node
);
4604 int slen
= (int)(sn
->end
- sn
->s
);
4605 int is_raw
= NSTRING_IS_RAW(node
);
4607 if (! NSTRING_IS_AMBIG(node
)) {
4608 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4609 NSTRING_IS_RAW(node
), env
->enc
);
4611 add_char_opt_map_info(&opt
->map
, *(sn
->s
), env
->enc
);
4613 set_mml(&opt
->len
, slen
, slen
);
4618 if (NSTRING_IS_DONT_GET_OPT_INFO(node
)) {
4619 int n
= onigenc_strlen(env
->enc
, sn
->s
, sn
->end
);
4620 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
) * n
;
4623 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4625 opt
->exb
.ignore_case
= 1;
4628 r
= add_char_amb_opt_map_info(&opt
->map
, sn
->s
, sn
->end
,
4629 env
->enc
, env
->case_fold_flag
);
4636 set_mml(&opt
->len
, slen
, max
);
4639 if (opt
->exb
.len
== slen
)
4640 opt
->exb
.reach_end
= 1;
4647 CClassNode
* cc
= NCCLASS(node
);
4649 /* no need to check ignore case. (setted in setup_tree()) */
4651 if (IS_NOT_NULL(cc
->mbuf
) || IS_NCCLASS_NOT(cc
)) {
4652 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4653 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4655 set_mml(&opt
->len
, min
, max
);
4658 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4659 z
= BITSET_AT(cc
->bs
, i
);
4660 if ((z
&& !IS_NCCLASS_NOT(cc
)) || (!z
&& IS_NCCLASS_NOT(cc
))) {
4661 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4664 set_mml(&opt
->len
, 1, 1);
4673 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4678 switch (NCTYPE(node
)->ctype
) {
4679 case ONIGENC_CTYPE_WORD
:
4680 if (NCTYPE(node
)->not != 0) {
4681 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4682 if (! ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4683 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4688 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4689 if (ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4690 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4698 min
= ONIGENC_MBC_MINLEN(env
->enc
);
4700 set_mml(&opt
->len
, min
, max
);
4706 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4707 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4708 set_mml(&opt
->len
, min
, max
);
4713 switch (NANCHOR(node
)->type
) {
4714 case ANCHOR_BEGIN_BUF
:
4715 case ANCHOR_BEGIN_POSITION
:
4716 case ANCHOR_BEGIN_LINE
:
4717 case ANCHOR_END_BUF
:
4718 case ANCHOR_SEMI_END_BUF
:
4719 case ANCHOR_END_LINE
:
4720 add_opt_anc_info(&opt
->anc
, NANCHOR(node
)->type
);
4723 case ANCHOR_PREC_READ
:
4727 r
= optimize_node_left(NANCHOR(node
)->target
, &nopt
, env
);
4729 if (nopt
.exb
.len
> 0)
4730 copy_opt_exact_info(&opt
->expr
, &nopt
.exb
);
4731 else if (nopt
.exm
.len
> 0)
4732 copy_opt_exact_info(&opt
->expr
, &nopt
.exm
);
4734 opt
->expr
.reach_end
= 0;
4736 if (nopt
.map
.value
> 0)
4737 copy_opt_map_info(&opt
->map
, &nopt
.map
);
4742 case ANCHOR_PREC_READ_NOT
:
4743 case ANCHOR_LOOK_BEHIND
: /* Sorry, I can't make use of it. */
4744 case ANCHOR_LOOK_BEHIND_NOT
:
4753 OnigDistance min
, max
, tmin
, tmax
;
4754 Node
** nodes
= SCANENV_MEM_NODES(env
->scan_env
);
4755 BRefNode
* br
= NBREF(node
);
4757 if (br
->state
& NST_RECURSION
) {
4758 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4761 backs
= BACKREFS_P(br
);
4762 r
= get_min_match_length(nodes
[backs
[0]], &min
, env
->scan_env
);
4764 r
= get_max_match_length(nodes
[backs
[0]], &max
, env
->scan_env
);
4766 for (i
= 1; i
< br
->back_num
; i
++) {
4767 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
->scan_env
);
4769 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
->scan_env
);
4771 if (min
> tmin
) min
= tmin
;
4772 if (max
< tmax
) max
= tmax
;
4774 if (r
== 0) set_mml(&opt
->len
, min
, max
);
4778 #ifdef USE_SUBEXP_CALL
4780 if (IS_CALL_RECURSION(NCALL(node
)))
4781 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4783 OnigOptionType save
= env
->options
;
4784 env
->options
= NENCLOSE(NCALL(node
)->target
)->option
;
4785 r
= optimize_node_left(NCALL(node
)->target
, opt
, env
);
4786 env
->options
= save
;
4794 OnigDistance min
, max
;
4796 QtfrNode
* qn
= NQTFR(node
);
4798 r
= optimize_node_left(qn
->target
, &nopt
, env
);
4801 if (qn
->lower
== 0 && IS_REPEAT_INFINITE(qn
->upper
)) {
4802 if (env
->mmd
.max
== 0 &&
4803 NTYPE(qn
->target
) == NT_CANY
&& qn
->greedy
) {
4804 if (IS_MULTILINE(env
->options
))
4805 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_ML
);
4807 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR
);
4811 if (qn
->lower
> 0) {
4812 copy_node_opt_info(opt
, &nopt
);
4813 if (nopt
.exb
.len
> 0) {
4814 if (nopt
.exb
.reach_end
) {
4815 for (i
= 2; i
<= qn
->lower
&&
4816 ! is_full_opt_exact_info(&opt
->exb
); i
++) {
4817 concat_opt_exact_info(&opt
->exb
, &nopt
.exb
, env
->enc
);
4819 if (i
< qn
->lower
) {
4820 opt
->exb
.reach_end
= 0;
4825 if (qn
->lower
!= qn
->upper
) {
4826 opt
->exb
.reach_end
= 0;
4827 opt
->exm
.reach_end
= 0;
4830 opt
->exm
.reach_end
= 0;
4834 min
= distance_multiply(nopt
.len
.min
, qn
->lower
);
4835 if (IS_REPEAT_INFINITE(qn
->upper
))
4836 max
= (nopt
.len
.max
> 0 ? ONIG_INFINITE_DISTANCE
: 0);
4838 max
= distance_multiply(nopt
.len
.max
, qn
->upper
);
4840 set_mml(&opt
->len
, min
, max
);
4846 EncloseNode
* en
= NENCLOSE(node
);
4849 case ENCLOSE_OPTION
:
4851 OnigOptionType save
= env
->options
;
4853 env
->options
= en
->option
;
4854 r
= optimize_node_left(en
->target
, opt
, env
);
4855 env
->options
= save
;
4859 case ENCLOSE_MEMORY
:
4860 #ifdef USE_SUBEXP_CALL
4862 if (en
->opt_count
> MAX_NODE_OPT_INFO_REF_COUNT
) {
4863 OnigDistance min
, max
;
4866 max
= ONIG_INFINITE_DISTANCE
;
4867 if (IS_ENCLOSE_MIN_FIXED(en
)) min
= en
->min_len
;
4868 if (IS_ENCLOSE_MAX_FIXED(en
)) max
= en
->max_len
;
4869 set_mml(&opt
->len
, min
, max
);
4874 r
= optimize_node_left(en
->target
, opt
, env
);
4876 if (is_set_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
)) {
4877 if (BIT_STATUS_AT(env
->scan_env
->backrefed_mem
, en
->regnum
))
4878 remove_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
);
4883 case ENCLOSE_STOP_BACKTRACK
:
4884 r
= optimize_node_left(en
->target
, opt
, env
);
4892 fprintf(stderr
, "optimize_node_left: undefined node type %d\n",
4895 r
= ONIGERR_TYPE_BUG
;
4903 set_optimize_exact_info(regex_t
* reg
, OptExactInfo
* e
)
4907 if (e
->len
== 0) return 0;
4909 if (e
->ignore_case
) {
4910 reg
->exact
= (UChar
* )xmalloc(e
->len
);
4911 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4912 xmemcpy(reg
->exact
, e
->s
, e
->len
);
4913 reg
->exact_end
= reg
->exact
+ e
->len
;
4914 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
4919 reg
->exact
= str_dup(e
->s
, e
->s
+ e
->len
);
4920 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4921 reg
->exact_end
= reg
->exact
+ e
->len
;
4924 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg
->enc
, reg
->exact
, reg
->exact_end
);
4926 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
4927 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
4928 reg
->map
, &(reg
->int_map
));
4931 reg
->optimize
= (allow_reverse
!= 0
4932 ? ONIG_OPTIMIZE_EXACT_BM
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV
);
4935 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
4939 reg
->dmin
= e
->mmd
.min
;
4940 reg
->dmax
= e
->mmd
.max
;
4942 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4943 reg
->threshold_len
= reg
->dmin
+ (int)(reg
->exact_end
- reg
->exact
);
4950 set_optimize_map_info(regex_t
* reg
, OptMapInfo
* m
)
4954 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
4955 reg
->map
[i
] = m
->map
[i
];
4957 reg
->optimize
= ONIG_OPTIMIZE_MAP
;
4958 reg
->dmin
= m
->mmd
.min
;
4959 reg
->dmax
= m
->mmd
.max
;
4961 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4962 reg
->threshold_len
= reg
->dmin
+ 1;
4967 set_sub_anchor(regex_t
* reg
, OptAncInfo
* anc
)
4969 reg
->sub_anchor
|= anc
->left_anchor
& ANCHOR_BEGIN_LINE
;
4970 reg
->sub_anchor
|= anc
->right_anchor
& ANCHOR_END_LINE
;
4974 static void print_optimize_info(FILE* f
, regex_t
* reg
);
4978 set_optimize_info_from_tree(Node
* node
, regex_t
* reg
, ScanEnv
* scan_env
)
4986 env
.options
= reg
->options
;
4987 env
.case_fold_flag
= reg
->case_fold_flag
;
4988 env
.scan_env
= scan_env
;
4989 clear_mml(&env
.mmd
);
4991 r
= optimize_node_left(node
, &opt
, &env
);
4994 reg
->anchor
= opt
.anc
.left_anchor
& (ANCHOR_BEGIN_BUF
|
4995 ANCHOR_BEGIN_POSITION
| ANCHOR_ANYCHAR_STAR
| ANCHOR_ANYCHAR_STAR_ML
);
4997 reg
->anchor
|= opt
.anc
.right_anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
);
4999 if (reg
->anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
)) {
5000 reg
->anchor_dmin
= opt
.len
.min
;
5001 reg
->anchor_dmax
= opt
.len
.max
;
5004 if (opt
.exb
.len
> 0 || opt
.exm
.len
> 0) {
5005 select_opt_exact_info(reg
->enc
, &opt
.exb
, &opt
.exm
);
5006 if (opt
.map
.value
> 0 &&
5007 comp_opt_exact_or_map_info(&opt
.exb
, &opt
.map
) > 0) {
5011 r
= set_optimize_exact_info(reg
, &opt
.exb
);
5012 set_sub_anchor(reg
, &opt
.exb
.anc
);
5015 else if (opt
.map
.value
> 0) {
5017 set_optimize_map_info(reg
, &opt
.map
);
5018 set_sub_anchor(reg
, &opt
.map
.anc
);
5021 reg
->sub_anchor
|= opt
.anc
.left_anchor
& ANCHOR_BEGIN_LINE
;
5022 if (opt
.len
.max
== 0)
5023 reg
->sub_anchor
|= opt
.anc
.right_anchor
& ANCHOR_END_LINE
;
5026 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5027 print_optimize_info(stderr
, reg
);
5033 clear_optimize_info(regex_t
* reg
)
5035 reg
->optimize
= ONIG_OPTIMIZE_NONE
;
5037 reg
->anchor_dmin
= 0;
5038 reg
->anchor_dmax
= 0;
5039 reg
->sub_anchor
= 0;
5040 reg
->exact_end
= (UChar
* )NULL
;
5041 reg
->threshold_len
= 0;
5042 if (IS_NOT_NULL(reg
->exact
)) {
5044 reg
->exact
= (UChar
* )NULL
;
5050 static void print_enc_string(FILE* fp
, OnigEncoding enc
,
5051 const UChar
*s
, const UChar
*end
)
5053 fprintf(fp
, "\nPATTERN: /");
5055 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5061 code
= ONIGENC_MBC_TO_CODE(enc
, p
, end
);
5063 fprintf(fp
, " 0x%04x ", (int )code
);
5066 fputc((int )code
, fp
);
5069 p
+= enclen(enc
, p
);
5074 fputc((int )*s
, fp
);
5083 print_distance_range(FILE* f
, OnigDistance a
, OnigDistance b
)
5085 if (a
== ONIG_INFINITE_DISTANCE
)
5088 fprintf(f
, "(%u)", a
);
5092 if (b
== ONIG_INFINITE_DISTANCE
)
5095 fprintf(f
, "(%u)", b
);
5099 print_anchor(FILE* f
, int anchor
)
5105 if (anchor
& ANCHOR_BEGIN_BUF
) {
5106 fprintf(f
, "begin-buf");
5109 if (anchor
& ANCHOR_BEGIN_LINE
) {
5110 if (q
) fprintf(f
, ", ");
5112 fprintf(f
, "begin-line");
5114 if (anchor
& ANCHOR_BEGIN_POSITION
) {
5115 if (q
) fprintf(f
, ", ");
5117 fprintf(f
, "begin-pos");
5119 if (anchor
& ANCHOR_END_BUF
) {
5120 if (q
) fprintf(f
, ", ");
5122 fprintf(f
, "end-buf");
5124 if (anchor
& ANCHOR_SEMI_END_BUF
) {
5125 if (q
) fprintf(f
, ", ");
5127 fprintf(f
, "semi-end-buf");
5129 if (anchor
& ANCHOR_END_LINE
) {
5130 if (q
) fprintf(f
, ", ");
5132 fprintf(f
, "end-line");
5134 if (anchor
& ANCHOR_ANYCHAR_STAR
) {
5135 if (q
) fprintf(f
, ", ");
5137 fprintf(f
, "anychar-star");
5139 if (anchor
& ANCHOR_ANYCHAR_STAR_ML
) {
5140 if (q
) fprintf(f
, ", ");
5141 fprintf(f
, "anychar-star-pl");
5148 print_optimize_info(FILE* f
, regex_t
* reg
)
5150 static const char* on
[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5151 "EXACT_IC", "MAP" };
5153 fprintf(f
, "optimize: %s\n", on
[reg
->optimize
]);
5154 fprintf(f
, " anchor: "); print_anchor(f
, reg
->anchor
);
5155 if ((reg
->anchor
& ANCHOR_END_BUF_MASK
) != 0)
5156 print_distance_range(f
, reg
->anchor_dmin
, reg
->anchor_dmax
);
5159 if (reg
->optimize
) {
5160 fprintf(f
, " sub anchor: "); print_anchor(f
, reg
->sub_anchor
);
5167 fprintf(f
, "exact: [");
5168 for (p
= reg
->exact
; p
< reg
->exact_end
; p
++) {
5171 fprintf(f
, "]: length: %d\n", (reg
->exact_end
- reg
->exact
));
5173 else if (reg
->optimize
& ONIG_OPTIMIZE_MAP
) {
5176 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5177 if (reg
->map
[i
]) n
++;
5179 fprintf(f
, "map: n=%d\n", n
);
5183 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5184 if (reg
->map
[i
] != 0) {
5185 if (c
> 0) fputs(", ", f
);
5187 if (ONIGENC_MBC_MAXLEN(reg
->enc
) == 1 &&
5188 ONIGENC_IS_CODE_PRINT(reg
->enc
, (OnigCodePoint
)i
))
5191 fprintf(f
, "%d", i
);
5198 #endif /* ONIG_DEBUG */
5202 onig_free_body(regex_t
* reg
)
5204 if (IS_NOT_NULL(reg
)) {
5205 if (IS_NOT_NULL(reg
->p
)) xfree(reg
->p
);
5206 if (IS_NOT_NULL(reg
->exact
)) xfree(reg
->exact
);
5207 if (IS_NOT_NULL(reg
->int_map
)) xfree(reg
->int_map
);
5208 if (IS_NOT_NULL(reg
->int_map_backward
)) xfree(reg
->int_map_backward
);
5209 if (IS_NOT_NULL(reg
->repeat_range
)) xfree(reg
->repeat_range
);
5210 if (IS_NOT_NULL(reg
->chain
)) onig_free(reg
->chain
);
5212 #ifdef USE_NAMED_GROUP
5213 onig_names_free(reg
);
5219 onig_free(regex_t
* reg
)
5221 if (IS_NOT_NULL(reg
)) {
5222 onig_free_body(reg
);
5227 #define REGEX_TRANSFER(to,from) do {\
5228 (to)->state = ONIG_STATE_MODIFY;\
5229 onig_free_body(to);\
5230 xmemcpy(to, from, sizeof(regex_t));\
5235 onig_transfer(regex_t
* to
, regex_t
* from
)
5237 THREAD_ATOMIC_START
;
5238 REGEX_TRANSFER(to
, from
);
5242 #define REGEX_CHAIN_HEAD(reg) do {\
5243 while (IS_NOT_NULL((reg)->chain)) {\
5244 (reg) = (reg)->chain;\
5249 onig_chain_link_add(regex_t
* to
, regex_t
* add
)
5251 THREAD_ATOMIC_START
;
5252 REGEX_CHAIN_HEAD(to
);
5258 onig_chain_reduce(regex_t
* reg
)
5260 regex_t
*head
, *prev
;
5264 if (IS_NOT_NULL(head
)) {
5265 reg
->state
= ONIG_STATE_MODIFY
;
5266 while (IS_NOT_NULL(head
->chain
)) {
5270 prev
->chain
= (regex_t
* )NULL
;
5271 REGEX_TRANSFER(reg
, head
);
5276 static void print_compiled_byte_code_list
P_((FILE* f
, regex_t
* reg
));
5278 #ifdef ONIG_DEBUG_PARSE_TREE
5279 static void print_tree
P_((FILE* f
, Node
* node
));
5283 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5284 OnigErrorInfo
* einfo
)
5286 #define COMPILE_INIT_SIZE 20
5291 #ifdef USE_SUBEXP_CALL
5292 UnsetAddrList uslist
;
5295 if (IS_NOT_NULL(einfo
)) einfo
->par
= (UChar
* )NULL
;
5297 reg
->state
= ONIG_STATE_COMPILING
;
5300 print_enc_string(stderr
, reg
->enc
, pattern
, pattern_end
);
5303 if (reg
->alloc
== 0) {
5304 init_size
= ((int)(pattern_end
- pattern
)) * 2;
5305 if (init_size
<= 0) init_size
= COMPILE_INIT_SIZE
;
5306 r
= BBUF_INIT(reg
, init_size
);
5307 if (r
!= 0) goto end
;
5313 reg
->num_repeat
= 0;
5314 reg
->num_null_check
= 0;
5315 reg
->repeat_range_alloc
= 0;
5316 reg
->repeat_range
= (OnigRepeatRange
* )NULL
;
5317 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5318 reg
->num_comb_exp_check
= 0;
5321 r
= onig_parse_make_tree(&root
, pattern
, pattern_end
, reg
, &scan_env
);
5322 if (r
!= 0 || root
== NULL
) goto err
;
5324 #ifdef USE_NAMED_GROUP
5325 /* mixed use named group and no-named group */
5326 if (scan_env
.num_named
> 0 &&
5327 IS_SYNTAX_BV(scan_env
.syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
5328 !ONIG_IS_OPTION_ON(reg
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
5329 if (scan_env
.num_named
!= scan_env
.num_mem
)
5330 r
= disable_noname_group_capture(&root
, reg
, &scan_env
);
5332 r
= numbered_ref_check(root
);
5334 if (r
!= 0) goto err
;
5338 #ifdef USE_SUBEXP_CALL
5339 if (scan_env
.num_call
> 0) {
5340 r
= unset_addr_list_init(&uslist
, scan_env
.num_call
);
5341 if (r
!= 0) goto err
;
5342 scan_env
.unset_addr_list
= &uslist
;
5343 r
= setup_subexp_call(root
, &scan_env
);
5344 if (r
!= 0) goto err_unset
;
5345 r
= subexp_recursive_check_trav(root
, &scan_env
);
5346 if (r
< 0) goto err_unset
;
5347 r
= subexp_inf_recursive_check_trav(root
, &scan_env
);
5348 if (r
!= 0) goto err_unset
;
5350 reg
->num_call
= scan_env
.num_call
;
5356 r
= setup_tree(root
, reg
, 0, &scan_env
);
5357 if (r
!= 0) goto err_unset
;
5359 #ifdef ONIG_DEBUG_PARSE_TREE
5360 print_tree(stderr
, root
);
5363 reg
->capture_history
= scan_env
.capture_history
;
5364 reg
->bt_mem_start
= scan_env
.bt_mem_start
;
5365 reg
->bt_mem_start
|= reg
->capture_history
;
5366 if (IS_FIND_CONDITION(reg
->options
))
5367 BIT_STATUS_ON_ALL(reg
->bt_mem_end
);
5369 reg
->bt_mem_end
= scan_env
.bt_mem_end
;
5370 reg
->bt_mem_end
|= reg
->capture_history
;
5373 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5374 if (scan_env
.backrefed_mem
== 0
5375 #ifdef USE_SUBEXP_CALL
5376 || scan_env
.num_call
== 0
5379 setup_comb_exp_check(root
, 0, &scan_env
);
5380 #ifdef USE_SUBEXP_CALL
5381 if (scan_env
.has_recursion
!= 0) {
5382 scan_env
.num_comb_exp_check
= 0;
5386 if (scan_env
.comb_exp_max_regnum
> 0) {
5388 for (i
= 1; i
<= scan_env
.comb_exp_max_regnum
; i
++) {
5389 if (BIT_STATUS_AT(scan_env
.backrefed_mem
, i
) != 0) {
5390 scan_env
.num_comb_exp_check
= 0;
5397 reg
->num_comb_exp_check
= scan_env
.num_comb_exp_check
;
5400 clear_optimize_info(reg
);
5401 #ifndef ONIG_DONT_OPTIMIZE
5402 r
= set_optimize_info_from_tree(root
, reg
, &scan_env
);
5403 if (r
!= 0) goto err_unset
;
5406 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
)) {
5407 xfree(scan_env
.mem_nodes_dynamic
);
5408 scan_env
.mem_nodes_dynamic
= (Node
** )NULL
;
5411 r
= compile_tree(root
, reg
);
5413 r
= add_opcode(reg
, OP_END
);
5414 #ifdef USE_SUBEXP_CALL
5415 if (scan_env
.num_call
> 0) {
5416 r
= unset_addr_list_fix(&uslist
, reg
);
5417 unset_addr_list_end(&uslist
);
5422 if ((reg
->num_repeat
!= 0) || (reg
->bt_mem_end
!= 0))
5423 reg
->stack_pop_level
= STACK_POP_LEVEL_ALL
;
5425 if (reg
->bt_mem_start
!= 0)
5426 reg
->stack_pop_level
= STACK_POP_LEVEL_MEM_START
;
5428 reg
->stack_pop_level
= STACK_POP_LEVEL_FREE
;
5431 #ifdef USE_SUBEXP_CALL
5432 else if (scan_env
.num_call
> 0) {
5433 unset_addr_list_end(&uslist
);
5436 onig_node_free(root
);
5438 #ifdef ONIG_DEBUG_COMPILE
5439 #ifdef USE_NAMED_GROUP
5440 onig_print_names(stderr
, reg
);
5442 print_compiled_byte_code_list(stderr
, reg
);
5446 reg
->state
= ONIG_STATE_NORMAL
;
5450 #ifdef USE_SUBEXP_CALL
5451 if (scan_env
.num_call
> 0) {
5452 unset_addr_list_end(&uslist
);
5456 if (IS_NOT_NULL(scan_env
.error
)) {
5457 if (IS_NOT_NULL(einfo
)) {
5458 einfo
->enc
= scan_env
.enc
;
5459 einfo
->par
= scan_env
.error
;
5460 einfo
->par_end
= scan_env
.error_end
;
5464 onig_node_free(root
);
5465 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
))
5466 xfree(scan_env
.mem_nodes_dynamic
);
5470 #ifdef USE_RECOMPILE_API
5472 onig_recompile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5473 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5474 OnigErrorInfo
* einfo
)
5479 r
= onig_new(&new_reg
, pattern
, pattern_end
, option
, enc
, syntax
, einfo
);
5481 if (ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
5482 onig_transfer(reg
, new_reg
);
5485 onig_chain_link_add(reg
, new_reg
);
5491 static int onig_inited
= 0;
5494 onig_reg_init(regex_t
* reg
, OnigOptionType option
,
5495 OnigCaseFoldType case_fold_flag
,
5496 OnigEncoding enc
, OnigSyntaxType
* syntax
)
5502 return ONIGERR_INVALID_ARGUMENT
;
5504 if (ONIGENC_IS_UNDEF(enc
))
5505 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
;
5507 if ((option
& (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
))
5508 == (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
)) {
5509 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS
;
5512 (reg
)->state
= ONIG_STATE_MODIFY
;
5514 if ((option
& ONIG_OPTION_NEGATE_SINGLELINE
) != 0) {
5515 option
|= syntax
->options
;
5516 option
&= ~ONIG_OPTION_SINGLELINE
;
5519 option
|= syntax
->options
;
5522 (reg
)->options
= option
;
5523 (reg
)->syntax
= syntax
;
5524 (reg
)->optimize
= 0;
5525 (reg
)->exact
= (UChar
* )NULL
;
5526 (reg
)->int_map
= (int* )NULL
;
5527 (reg
)->int_map_backward
= (int* )NULL
;
5528 (reg
)->chain
= (regex_t
* )NULL
;
5530 (reg
)->p
= (UChar
* )NULL
;
5533 (reg
)->name_table
= (void* )NULL
;
5535 (reg
)->case_fold_flag
= case_fold_flag
;
5540 onig_new_without_alloc(regex_t
* reg
, const UChar
* pattern
,
5541 const UChar
* pattern_end
, OnigOptionType option
, OnigEncoding enc
,
5542 OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
)
5546 r
= onig_reg_init(reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
5549 r
= onig_compile(reg
, pattern
, pattern_end
, einfo
);
5554 onig_new(regex_t
** reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5555 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5556 OnigErrorInfo
* einfo
)
5560 *reg
= (regex_t
* )xmalloc(sizeof(regex_t
));
5561 if (IS_NULL(*reg
)) return ONIGERR_MEMORY
;
5563 r
= onig_reg_init(*reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
5566 r
= onig_compile(*reg
, pattern
, pattern_end
, einfo
);
5579 if (onig_inited
!= 0)
5583 THREAD_ATOMIC_START
;
5588 /* onigenc_set_default_caseconv_table((UChar* )0); */
5590 #ifdef ONIG_DEBUG_STATISTICS
5591 onig_statistics_init();
5599 static OnigEndCallListItemType
* EndCallTop
;
5601 extern void onig_add_end_call(void (*func
)(void))
5603 OnigEndCallListItemType
* item
;
5605 item
= (OnigEndCallListItemType
* )xmalloc(sizeof(*item
));
5606 if (item
== 0) return ;
5608 item
->next
= EndCallTop
;
5615 exec_end_call_list(void)
5617 OnigEndCallListItemType
* prev
;
5620 while (EndCallTop
!= 0) {
5621 func
= EndCallTop
->func
;
5625 EndCallTop
= EndCallTop
->next
;
5633 THREAD_ATOMIC_START
;
5635 exec_end_call_list();
5637 #ifdef ONIG_DEBUG_STATISTICS
5638 onig_print_statistics(stderr
);
5641 #ifdef USE_SHARED_CCLASS_TABLE
5642 onig_free_shared_cclass_table();
5645 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5646 onig_free_node_list();
5657 onig_is_in_code_range(const UChar
* p
, OnigCodePoint code
)
5659 OnigCodePoint n
, *data
;
5660 OnigCodePoint low
, high
, x
;
5662 GET_CODE_POINT(n
, p
);
5663 data
= (OnigCodePoint
* )p
;
5666 for (low
= 0, high
= n
; low
< high
; ) {
5667 x
= (low
+ high
) >> 1;
5668 if (code
> data
[x
* 2 + 1])
5674 return ((low
< n
&& code
>= data
[low
* 2]) ? 1 : 0);
5678 onig_is_code_in_cc_len(int elen
, OnigCodePoint code
, CClassNode
* cc
)
5682 if (elen
> 1 || (code
>= SINGLE_BYTE_SIZE
)) {
5683 if (IS_NULL(cc
->mbuf
)) {
5687 found
= (onig_is_in_code_range(cc
->mbuf
->p
, code
) != 0 ? 1 : 0);
5691 found
= (BITSET_AT(cc
->bs
, code
) == 0 ? 0 : 1);
5694 if (IS_NCCLASS_NOT(cc
))
5701 onig_is_code_in_cc(OnigEncoding enc
, OnigCodePoint code
, CClassNode
* cc
)
5705 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5709 len
= ONIGENC_CODE_TO_MBCLEN(enc
, code
);
5711 return onig_is_code_in_cc_len(len
, code
, cc
);
5717 /* arguments type */
5718 #define ARG_SPECIAL -1
5720 #define ARG_RELADDR 1
5721 #define ARG_ABSADDR 2
5722 #define ARG_LENGTH 3
5723 #define ARG_MEMNUM 4
5724 #define ARG_OPTION 5
5725 #define ARG_STATE_CHECK 6
5727 OnigOpInfoType OnigOpInfo
[] = {
5728 { OP_FINISH
, "finish", ARG_NON
},
5729 { OP_END
, "end", ARG_NON
},
5730 { OP_EXACT1
, "exact1", ARG_SPECIAL
},
5731 { OP_EXACT2
, "exact2", ARG_SPECIAL
},
5732 { OP_EXACT3
, "exact3", ARG_SPECIAL
},
5733 { OP_EXACT4
, "exact4", ARG_SPECIAL
},
5734 { OP_EXACT5
, "exact5", ARG_SPECIAL
},
5735 { OP_EXACTN
, "exactn", ARG_SPECIAL
},
5736 { OP_EXACTMB2N1
, "exactmb2-n1", ARG_SPECIAL
},
5737 { OP_EXACTMB2N2
, "exactmb2-n2", ARG_SPECIAL
},
5738 { OP_EXACTMB2N3
, "exactmb2-n3", ARG_SPECIAL
},
5739 { OP_EXACTMB2N
, "exactmb2-n", ARG_SPECIAL
},
5740 { OP_EXACTMB3N
, "exactmb3n" , ARG_SPECIAL
},
5741 { OP_EXACTMBN
, "exactmbn", ARG_SPECIAL
},
5742 { OP_EXACT1_IC
, "exact1-ic", ARG_SPECIAL
},
5743 { OP_EXACTN_IC
, "exactn-ic", ARG_SPECIAL
},
5744 { OP_CCLASS
, "cclass", ARG_SPECIAL
},
5745 { OP_CCLASS_MB
, "cclass-mb", ARG_SPECIAL
},
5746 { OP_CCLASS_MIX
, "cclass-mix", ARG_SPECIAL
},
5747 { OP_CCLASS_NOT
, "cclass-not", ARG_SPECIAL
},
5748 { OP_CCLASS_MB_NOT
, "cclass-mb-not", ARG_SPECIAL
},
5749 { OP_CCLASS_MIX_NOT
, "cclass-mix-not", ARG_SPECIAL
},
5750 { OP_CCLASS_NODE
, "cclass-node", ARG_SPECIAL
},
5751 { OP_ANYCHAR
, "anychar", ARG_NON
},
5752 { OP_ANYCHAR_ML
, "anychar-ml", ARG_NON
},
5753 { OP_ANYCHAR_STAR
, "anychar*", ARG_NON
},
5754 { OP_ANYCHAR_ML_STAR
, "anychar-ml*", ARG_NON
},
5755 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next", ARG_SPECIAL
},
5756 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next", ARG_SPECIAL
},
5757 { OP_WORD
, "word", ARG_NON
},
5758 { OP_NOT_WORD
, "not-word", ARG_NON
},
5759 { OP_WORD_BOUND
, "word-bound", ARG_NON
},
5760 { OP_NOT_WORD_BOUND
, "not-word-bound", ARG_NON
},
5761 { OP_WORD_BEGIN
, "word-begin", ARG_NON
},
5762 { OP_WORD_END
, "word-end", ARG_NON
},
5763 { OP_BEGIN_BUF
, "begin-buf", ARG_NON
},
5764 { OP_END_BUF
, "end-buf", ARG_NON
},
5765 { OP_BEGIN_LINE
, "begin-line", ARG_NON
},
5766 { OP_END_LINE
, "end-line", ARG_NON
},
5767 { OP_SEMI_END_BUF
, "semi-end-buf", ARG_NON
},
5768 { OP_BEGIN_POSITION
, "begin-position", ARG_NON
},
5769 { OP_BACKREF1
, "backref1", ARG_NON
},
5770 { OP_BACKREF2
, "backref2", ARG_NON
},
5771 { OP_BACKREFN
, "backrefn", ARG_MEMNUM
},
5772 { OP_BACKREFN_IC
, "backrefn-ic", ARG_SPECIAL
},
5773 { OP_BACKREF_MULTI
, "backref_multi", ARG_SPECIAL
},
5774 { OP_BACKREF_MULTI_IC
, "backref_multi-ic", ARG_SPECIAL
},
5775 { OP_BACKREF_WITH_LEVEL
, "backref_at_level", ARG_SPECIAL
},
5776 { OP_MEMORY_START_PUSH
, "mem-start-push", ARG_MEMNUM
},
5777 { OP_MEMORY_START
, "mem-start", ARG_MEMNUM
},
5778 { OP_MEMORY_END_PUSH
, "mem-end-push", ARG_MEMNUM
},
5779 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec", ARG_MEMNUM
},
5780 { OP_MEMORY_END
, "mem-end", ARG_MEMNUM
},
5781 { OP_MEMORY_END_REC
, "mem-end-rec", ARG_MEMNUM
},
5782 { OP_SET_OPTION_PUSH
, "set-option-push", ARG_OPTION
},
5783 { OP_SET_OPTION
, "set-option", ARG_OPTION
},
5784 { OP_FAIL
, "fail", ARG_NON
},
5785 { OP_JUMP
, "jump", ARG_RELADDR
},
5786 { OP_PUSH
, "push", ARG_RELADDR
},
5787 { OP_POP
, "pop", ARG_NON
},
5788 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1", ARG_SPECIAL
},
5789 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next", ARG_SPECIAL
},
5790 { OP_REPEAT
, "repeat", ARG_SPECIAL
},
5791 { OP_REPEAT_NG
, "repeat-ng", ARG_SPECIAL
},
5792 { OP_REPEAT_INC
, "repeat-inc", ARG_MEMNUM
},
5793 { OP_REPEAT_INC_NG
, "repeat-inc-ng", ARG_MEMNUM
},
5794 { OP_REPEAT_INC_SG
, "repeat-inc-sg", ARG_MEMNUM
},
5795 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg", ARG_MEMNUM
},
5796 { OP_NULL_CHECK_START
, "null-check-start", ARG_MEMNUM
},
5797 { OP_NULL_CHECK_END
, "null-check-end", ARG_MEMNUM
},
5798 { OP_NULL_CHECK_END_MEMST
,"null-check-end-memst", ARG_MEMNUM
},
5799 { OP_NULL_CHECK_END_MEMST_PUSH
,"null-check-end-memst-push", ARG_MEMNUM
},
5800 { OP_PUSH_POS
, "push-pos", ARG_NON
},
5801 { OP_POP_POS
, "pop-pos", ARG_NON
},
5802 { OP_PUSH_POS_NOT
, "push-pos-not", ARG_RELADDR
},
5803 { OP_FAIL_POS
, "fail-pos", ARG_NON
},
5804 { OP_PUSH_STOP_BT
, "push-stop-bt", ARG_NON
},
5805 { OP_POP_STOP_BT
, "pop-stop-bt", ARG_NON
},
5806 { OP_LOOK_BEHIND
, "look-behind", ARG_SPECIAL
},
5807 { OP_PUSH_LOOK_BEHIND_NOT
, "push-look-behind-not", ARG_SPECIAL
},
5808 { OP_FAIL_LOOK_BEHIND_NOT
, "fail-look-behind-not", ARG_NON
},
5809 { OP_CALL
, "call", ARG_ABSADDR
},
5810 { OP_RETURN
, "return", ARG_NON
},
5811 { OP_STATE_CHECK_PUSH
, "state-check-push", ARG_SPECIAL
},
5812 { OP_STATE_CHECK_PUSH_OR_JUMP
, "state-check-push-or-jump", ARG_SPECIAL
},
5813 { OP_STATE_CHECK
, "state-check", ARG_STATE_CHECK
},
5814 { OP_STATE_CHECK_ANYCHAR_STAR
, "state-check-anychar*", ARG_STATE_CHECK
},
5815 { OP_STATE_CHECK_ANYCHAR_ML_STAR
,
5816 "state-check-anychar-ml*", ARG_STATE_CHECK
},
5825 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5826 if (opcode
== OnigOpInfo
[i
].opcode
)
5827 return OnigOpInfo
[i
].name
;
5833 op2arg_type(int opcode
)
5837 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5838 if (opcode
== OnigOpInfo
[i
].opcode
)
5839 return OnigOpInfo
[i
].arg_type
;
5845 Indent(FILE* f
, int indent
)
5848 for (i
= 0; i
< indent
; i
++) putc(' ', f
);
5852 p_string(FILE* f
, int len
, UChar
* s
)
5855 while (len
-- > 0) { fputc(*s
++, f
); }
5859 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
5861 int x
= len
* mb_len
;
5863 fprintf(f
, ":%d:", len
);
5864 while (x
-- > 0) { fputc(*s
++, f
); }
5868 onig_print_compiled_byte_code(FILE* f
, UChar
* bp
, UChar
** nextp
,
5875 StateCheckNumType scn
;
5879 fprintf(f
, "[%s", op2name(*bp
));
5880 arg_type
= op2arg_type(*bp
);
5881 if (arg_type
!= ARG_SPECIAL
) {
5887 GET_RELADDR_INC(addr
, bp
);
5888 fprintf(f
, ":(%d)", addr
);
5891 GET_ABSADDR_INC(addr
, bp
);
5892 fprintf(f
, ":(%d)", addr
);
5895 GET_LENGTH_INC(len
, bp
);
5896 fprintf(f
, ":%d", len
);
5899 mem
= *((MemNumType
* )bp
);
5901 fprintf(f
, ":%d", mem
);
5905 OnigOptionType option
= *((OnigOptionType
* )bp
);
5907 fprintf(f
, ":%d", option
);
5911 case ARG_STATE_CHECK
:
5912 scn
= *((StateCheckNumType
* )bp
);
5913 bp
+= SIZE_STATE_CHECK_NUM
;
5914 fprintf(f
, ":%d", scn
);
5921 case OP_ANYCHAR_STAR_PEEK_NEXT
:
5922 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
5923 p_string(f
, 1, bp
++); break;
5925 p_string(f
, 2, bp
); bp
+= 2; break;
5927 p_string(f
, 3, bp
); bp
+= 3; break;
5929 p_string(f
, 4, bp
); bp
+= 4; break;
5931 p_string(f
, 5, bp
); bp
+= 5; break;
5933 GET_LENGTH_INC(len
, bp
);
5934 p_len_string(f
, len
, 1, bp
);
5939 p_string(f
, 2, bp
); bp
+= 2; break;
5941 p_string(f
, 4, bp
); bp
+= 4; break;
5943 p_string(f
, 6, bp
); bp
+= 6; break;
5945 GET_LENGTH_INC(len
, bp
);
5946 p_len_string(f
, len
, 2, bp
);
5950 GET_LENGTH_INC(len
, bp
);
5951 p_len_string(f
, len
, 3, bp
);
5958 GET_LENGTH_INC(mb_len
, bp
);
5959 GET_LENGTH_INC(len
, bp
);
5960 fprintf(f
, ":%d:%d:", mb_len
, len
);
5962 while (n
-- > 0) { fputc(*bp
++, f
); }
5967 len
= enclen(enc
, bp
);
5968 p_string(f
, len
, bp
);
5972 GET_LENGTH_INC(len
, bp
);
5973 p_len_string(f
, len
, 1, bp
);
5978 n
= bitset_on_num((BitSetRef
)bp
);
5980 fprintf(f
, ":%d", n
);
5984 n
= bitset_on_num((BitSetRef
)bp
);
5986 fprintf(f
, ":%d", n
);
5990 case OP_CCLASS_MB_NOT
:
5991 GET_LENGTH_INC(len
, bp
);
5993 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5996 GET_CODE_POINT(code
, q
);
5998 fprintf(f
, ":%d:%d", (int )code
, len
);
6002 case OP_CCLASS_MIX_NOT
:
6003 n
= bitset_on_num((BitSetRef
)bp
);
6005 GET_LENGTH_INC(len
, bp
);
6007 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6010 GET_CODE_POINT(code
, q
);
6012 fprintf(f
, ":%d:%d:%d", n
, (int )code
, len
);
6015 case OP_CCLASS_NODE
:
6019 GET_POINTER_INC(cc
, bp
);
6020 n
= bitset_on_num(cc
->bs
);
6021 fprintf(f
, ":%u:%d", (unsigned int )cc
, n
);
6025 case OP_BACKREFN_IC
:
6026 mem
= *((MemNumType
* )bp
);
6028 fprintf(f
, ":%d", mem
);
6031 case OP_BACKREF_MULTI_IC
:
6032 case OP_BACKREF_MULTI
:
6034 GET_LENGTH_INC(len
, bp
);
6035 for (i
= 0; i
< len
; i
++) {
6036 GET_MEMNUM_INC(mem
, bp
);
6037 if (i
> 0) fputs(", ", f
);
6038 fprintf(f
, "%d", mem
);
6042 case OP_BACKREF_WITH_LEVEL
:
6044 OnigOptionType option
;
6047 GET_OPTION_INC(option
, bp
);
6048 fprintf(f
, ":%d", option
);
6049 GET_LENGTH_INC(level
, bp
);
6050 fprintf(f
, ":%d", level
);
6053 GET_LENGTH_INC(len
, bp
);
6054 for (i
= 0; i
< len
; i
++) {
6055 GET_MEMNUM_INC(mem
, bp
);
6056 if (i
> 0) fputs(", ", f
);
6057 fprintf(f
, "%d", mem
);
6065 mem
= *((MemNumType
* )bp
);
6067 addr
= *((RelAddrType
* )bp
);
6069 fprintf(f
, ":%d:%d", mem
, addr
);
6073 case OP_PUSH_OR_JUMP_EXACT1
:
6074 case OP_PUSH_IF_PEEK_NEXT
:
6075 addr
= *((RelAddrType
* )bp
);
6077 fprintf(f
, ":(%d)", addr
);
6082 case OP_LOOK_BEHIND
:
6083 GET_LENGTH_INC(len
, bp
);
6084 fprintf(f
, ":%d", len
);
6087 case OP_PUSH_LOOK_BEHIND_NOT
:
6088 GET_RELADDR_INC(addr
, bp
);
6089 GET_LENGTH_INC(len
, bp
);
6090 fprintf(f
, ":%d:(%d)", len
, addr
);
6093 case OP_STATE_CHECK_PUSH
:
6094 case OP_STATE_CHECK_PUSH_OR_JUMP
:
6095 scn
= *((StateCheckNumType
* )bp
);
6096 bp
+= SIZE_STATE_CHECK_NUM
;
6097 addr
= *((RelAddrType
* )bp
);
6099 fprintf(f
, ":%d:(%d)", scn
, addr
);
6103 fprintf(stderr
, "onig_print_compiled_byte_code: undefined code %d\n",
6108 if (nextp
) *nextp
= bp
;
6112 print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
6116 UChar
* end
= reg
->p
+ reg
->used
;
6118 fprintf(f
, "code length: %d\n", reg
->used
);
6129 onig_print_compiled_byte_code(f
, bp
, &bp
, reg
->enc
);
6136 print_indent_tree(FILE* f
, Node
* node
, int indent
)
6143 if (IS_NULL(node
)) {
6144 fprintf(f
, "ERROR: null node!!!\n");
6152 if (NTYPE(node
) == NT_LIST
)
6153 fprintf(f
, "<list:%x>\n", (int )node
);
6155 fprintf(f
, "<alt:%x>\n", (int )node
);
6157 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6158 while (IS_NOT_NULL(node
= NCDR(node
))) {
6159 if (NTYPE(node
) != type
) {
6160 fprintf(f
, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node
));
6163 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6168 fprintf(f
, "<string%s:%x>",
6169 (NSTRING_IS_RAW(node
) ? "-raw" : ""), (int )node
);
6170 for (p
= NSTR(node
)->s
; p
< NSTR(node
)->end
; p
++) {
6171 if (*p
>= 0x20 && *p
< 0x7f)
6174 fprintf(f
, " 0x%02x", *p
);
6180 fprintf(f
, "<cclass:%x>", (int )node
);
6181 if (IS_NCCLASS_NOT(NCCLASS(node
))) fputs(" not", f
);
6182 if (NCCLASS(node
)->mbuf
) {
6183 BBuf
* bbuf
= NCCLASS(node
)->mbuf
;
6184 for (i
= 0; i
< bbuf
->used
; i
++) {
6185 if (i
> 0) fprintf(f
, ",");
6186 fprintf(f
, "%0x", bbuf
->p
[i
]);
6192 fprintf(f
, "<ctype:%x> ", (int )node
);
6193 switch (NCTYPE(node
)->ctype
) {
6194 case ONIGENC_CTYPE_WORD
:
6195 if (NCTYPE(node
)->not != 0)
6196 fputs("not word", f
);
6202 fprintf(f
, "ERROR: undefined ctype.\n");
6208 fprintf(f
, "<anychar:%x>", (int )node
);
6212 fprintf(f
, "<anchor:%x> ", (int )node
);
6213 switch (NANCHOR(node
)->type
) {
6214 case ANCHOR_BEGIN_BUF
: fputs("begin buf", f
); break;
6215 case ANCHOR_END_BUF
: fputs("end buf", f
); break;
6216 case ANCHOR_BEGIN_LINE
: fputs("begin line", f
); break;
6217 case ANCHOR_END_LINE
: fputs("end line", f
); break;
6218 case ANCHOR_SEMI_END_BUF
: fputs("semi end buf", f
); break;
6219 case ANCHOR_BEGIN_POSITION
: fputs("begin position", f
); break;
6221 case ANCHOR_WORD_BOUND
: fputs("word bound", f
); break;
6222 case ANCHOR_NOT_WORD_BOUND
: fputs("not word bound", f
); break;
6223 #ifdef USE_WORD_BEGIN_END
6224 case ANCHOR_WORD_BEGIN
: fputs("word begin", f
); break;
6225 case ANCHOR_WORD_END
: fputs("word end", f
); break;
6227 case ANCHOR_PREC_READ
: fputs("prec read", f
); break;
6228 case ANCHOR_PREC_READ_NOT
: fputs("prec read not", f
); break;
6229 case ANCHOR_LOOK_BEHIND
: fputs("look_behind", f
); break;
6230 case ANCHOR_LOOK_BEHIND_NOT
: fputs("look_behind_not",f
); break;
6233 fprintf(f
, "ERROR: undefined anchor type.\n");
6241 BRefNode
* br
= NBREF(node
);
6243 fprintf(f
, "<backref:%x>", (int )node
);
6244 for (i
= 0; i
< br
->back_num
; i
++) {
6245 if (i
> 0) fputs(", ", f
);
6246 fprintf(f
, "%d", p
[i
]);
6251 #ifdef USE_SUBEXP_CALL
6254 CallNode
* cn
= NCALL(node
);
6255 fprintf(f
, "<call:%x>", (int )node
);
6256 p_string(f
, cn
->name_end
- cn
->name
, cn
->name
);
6262 fprintf(f
, "<quantifier:%x>{%d,%d}%s\n", (int )node
,
6263 NQTFR(node
)->lower
, NQTFR(node
)->upper
,
6264 (NQTFR(node
)->greedy
? "" : "?"));
6265 print_indent_tree(f
, NQTFR(node
)->target
, indent
+ add
);
6269 fprintf(f
, "<enclose:%x> ", (int )node
);
6270 switch (NENCLOSE(node
)->type
) {
6271 case ENCLOSE_OPTION
:
6272 fprintf(f
, "option:%d", NENCLOSE(node
)->option
);
6274 case ENCLOSE_MEMORY
:
6275 fprintf(f
, "memory:%d", NENCLOSE(node
)->regnum
);
6277 case ENCLOSE_STOP_BACKTRACK
:
6278 fprintf(f
, "stop-bt");
6285 print_indent_tree(f
, NENCLOSE(node
)->target
, indent
+ add
);
6289 fprintf(f
, "print_indent_tree: undefined node type %d\n", NTYPE(node
));
6293 if (type
!= NT_LIST
&& type
!= NT_ALT
&& type
!= NT_QTFR
&&
6298 #endif /* ONIG_DEBUG */
6300 #ifdef ONIG_DEBUG_PARSE_TREE
6302 print_tree(FILE* f
, Node
* node
)
6304 print_indent_tree(f
, node
, 0);