1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.<BR>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 OnigCaseFoldType OnigDefaultCaseFoldFlag
= ONIGENC_CASE_FOLD_MIN
;
36 extern OnigCaseFoldType
37 onig_get_default_case_fold_flag(void)
39 return OnigDefaultCaseFoldFlag
;
43 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag
)
45 OnigDefaultCaseFoldFlag
= case_fold_flag
;
50 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
51 static unsigned char PadBuf
[WORD_ALIGNMENT_SIZE
];
55 str_dup(UChar
* s
, UChar
* end
)
57 int len
= (int)(end
- s
);
60 UChar
* r
= (UChar
* )xmalloc(len
+ 1);
70 swap_node(Node
* a
, Node
* b
)
73 c
= *a
; *a
= *b
; *b
= c
;
75 if (NTYPE(a
) == NT_STR
) {
76 StrNode
* sn
= NSTR(a
);
78 int len
= (int)(sn
->end
- sn
->s
);
80 sn
->end
= sn
->s
+ len
;
84 if (NTYPE(b
) == NT_STR
) {
85 StrNode
* sn
= NSTR(b
);
87 int len
= (int)(sn
->end
- sn
->s
);
89 sn
->end
= sn
->s
+ len
;
95 distance_add(OnigDistance d1
, OnigDistance d2
)
97 if (d1
== ONIG_INFINITE_DISTANCE
|| d2
== ONIG_INFINITE_DISTANCE
)
98 return ONIG_INFINITE_DISTANCE
;
100 if (d1
<= ONIG_INFINITE_DISTANCE
- d2
) return d1
+ d2
;
101 else return ONIG_INFINITE_DISTANCE
;
106 distance_multiply(OnigDistance d
, int m
)
108 if (m
== 0) return 0;
110 if (d
< ONIG_INFINITE_DISTANCE
/ m
)
113 return ONIG_INFINITE_DISTANCE
;
117 bitset_is_empty(BitSetRef bs
)
120 for (i
= 0; i
< (int )BITSET_SIZE
; i
++) {
121 if (bs
[i
] != 0) return 0;
128 bitset_on_num(BitSetRef bs
)
133 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
134 if (BITSET_AT(bs
, i
)) n
++;
141 onig_bbuf_init(BBuf
* buf
, int size
)
148 buf
->p
= (UChar
* )xmalloc(size
);
149 if (IS_NULL(buf
->p
)) return(ONIGERR_MEMORY
);
158 #ifdef USE_SUBEXP_CALL
161 unset_addr_list_init(UnsetAddrList
* uslist
, int size
)
165 p
= (UnsetAddr
* )xmalloc(sizeof(UnsetAddr
)* size
);
166 CHECK_NULL_RETURN_MEMERR(p
);
168 uslist
->alloc
= size
;
174 unset_addr_list_end(UnsetAddrList
* uslist
)
176 if (IS_NOT_NULL(uslist
->us
))
181 unset_addr_list_add(UnsetAddrList
* uslist
, int offset
, struct _Node
* node
)
186 if (uslist
->num
>= uslist
->alloc
) {
187 size
= uslist
->alloc
* 2;
188 p
= (UnsetAddr
* )xrealloc(uslist
->us
, sizeof(UnsetAddr
) * size
, sizeof(UnsetAddr
) * uslist
->alloc
);
189 CHECK_NULL_RETURN_MEMERR(p
);
190 uslist
->alloc
= size
;
194 uslist
->us
[uslist
->num
].offset
= offset
;
195 uslist
->us
[uslist
->num
].target
= node
;
199 #endif /* USE_SUBEXP_CALL */
203 add_opcode(regex_t
* reg
, int opcode
)
205 BBUF_ADD1(reg
, ((unsigned char)opcode
));
209 #ifdef USE_COMBINATION_EXPLOSION_CHECK
211 add_state_check_num(regex_t
* reg
, int num
)
213 StateCheckNumType n
= (StateCheckNumType
)num
;
215 BBUF_ADD(reg
, &n
, SIZE_STATE_CHECK_NUM
);
221 add_rel_addr(regex_t
* reg
, int addr
)
223 RelAddrType ra
= (RelAddrType
)addr
;
225 BBUF_ADD(reg
, &ra
, SIZE_RELADDR
);
230 add_abs_addr(regex_t
* reg
, int addr
)
232 AbsAddrType ra
= (AbsAddrType
)addr
;
234 BBUF_ADD(reg
, &ra
, SIZE_ABSADDR
);
239 add_length(regex_t
* reg
, int len
)
241 LengthType l
= (LengthType
)len
;
243 BBUF_ADD(reg
, &l
, SIZE_LENGTH
);
248 add_mem_num(regex_t
* reg
, int num
)
250 MemNumType n
= (MemNumType
)num
;
252 BBUF_ADD(reg
, &n
, SIZE_MEMNUM
);
257 add_pointer(regex_t
* reg
, void* addr
)
259 PointerType ptr
= (PointerType
)addr
;
261 BBUF_ADD(reg
, &ptr
, SIZE_POINTER
);
266 add_option(regex_t
* reg
, OnigOptionType option
)
268 BBUF_ADD(reg
, &option
, SIZE_OPTION
);
273 add_opcode_rel_addr(regex_t
* reg
, int opcode
, int addr
)
277 r
= add_opcode(reg
, opcode
);
279 r
= add_rel_addr(reg
, addr
);
284 add_bytes(regex_t
* reg
, UChar
* bytes
, int len
)
286 BBUF_ADD(reg
, bytes
, len
);
291 add_bitset(regex_t
* reg
, BitSetRef bs
)
293 BBUF_ADD(reg
, bs
, SIZE_BITSET
);
298 add_opcode_option(regex_t
* reg
, int opcode
, OnigOptionType option
)
302 r
= add_opcode(reg
, opcode
);
304 r
= add_option(reg
, option
);
308 static int compile_length_tree(Node
* node
, regex_t
* reg
);
309 static int compile_tree(Node
* node
, regex_t
* reg
);
312 #define IS_NEED_STR_LEN_OP_EXACT(op) \
313 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
314 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
317 select_str_opcode(int mb_len
, int str_len
, int ignore_case
)
323 case 1: op
= OP_EXACT1_IC
; break;
324 default: op
= OP_EXACTN_IC
; break;
331 case 1: op
= OP_EXACT1
; break;
332 case 2: op
= OP_EXACT2
; break;
333 case 3: op
= OP_EXACT3
; break;
334 case 4: op
= OP_EXACT4
; break;
335 case 5: op
= OP_EXACT5
; break;
336 default: op
= OP_EXACTN
; break;
342 case 1: op
= OP_EXACTMB2N1
; break;
343 case 2: op
= OP_EXACTMB2N2
; break;
344 case 3: op
= OP_EXACTMB2N3
; break;
345 default: op
= OP_EXACTMB2N
; break;
362 compile_tree_empty_check(Node
* node
, regex_t
* reg
, int empty_info
)
365 int saved_num_null_check
= reg
->num_null_check
;
367 if (empty_info
!= 0) {
368 r
= add_opcode(reg
, OP_NULL_CHECK_START
);
370 r
= add_mem_num(reg
, reg
->num_null_check
); /* NULL CHECK ID */
372 reg
->num_null_check
++;
375 r
= compile_tree(node
, reg
);
378 if (empty_info
!= 0) {
379 if (empty_info
== NQ_TARGET_IS_EMPTY
)
380 r
= add_opcode(reg
, OP_NULL_CHECK_END
);
381 else if (empty_info
== NQ_TARGET_IS_EMPTY_MEM
)
382 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST
);
383 else if (empty_info
== NQ_TARGET_IS_EMPTY_REC
)
384 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST_PUSH
);
387 r
= add_mem_num(reg
, saved_num_null_check
); /* NULL CHECK ID */
392 #ifdef USE_SUBEXP_CALL
394 compile_call(CallNode
* node
, regex_t
* reg
)
398 r
= add_opcode(reg
, OP_CALL
);
400 r
= unset_addr_list_add(node
->unset_addr_list
, BBUF_GET_OFFSET_POS(reg
),
403 r
= add_abs_addr(reg
, 0 /*dummy addr.*/);
409 compile_tree_n_times(Node
* node
, int n
, regex_t
* reg
)
413 for (i
= 0; i
< n
; i
++) {
414 r
= compile_tree(node
, reg
);
421 add_compile_string_length(UChar
* s ARG_UNUSED
, int mb_len
, int str_len
,
422 regex_t
* reg ARG_UNUSED
, int ignore_case
)
425 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
429 if (op
== OP_EXACTMBN
) len
+= SIZE_LENGTH
;
430 if (IS_NEED_STR_LEN_OP_EXACT(op
))
433 len
+= mb_len
* str_len
;
438 add_compile_string(UChar
* s
, int mb_len
, int str_len
,
439 regex_t
* reg
, int ignore_case
)
441 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
444 if (op
== OP_EXACTMBN
)
445 add_length(reg
, mb_len
);
447 if (IS_NEED_STR_LEN_OP_EXACT(op
)) {
448 if (op
== OP_EXACTN_IC
)
449 add_length(reg
, mb_len
* str_len
);
451 add_length(reg
, str_len
);
454 add_bytes(reg
, s
, mb_len
* str_len
);
460 compile_length_string_node(Node
* node
, regex_t
* reg
)
462 int rlen
, r
, len
, prev_len
, slen
, ambig
;
463 OnigEncoding enc
= reg
->enc
;
468 if (sn
->end
<= sn
->s
)
471 ambig
= NSTRING_IS_AMBIG(node
);
474 prev_len
= enclen(enc
, p
);
479 for (; p
< sn
->end
; ) {
480 len
= enclen(enc
, p
);
481 if (len
== prev_len
) {
485 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
493 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
499 compile_length_string_raw_node(StrNode
* sn
, regex_t
* reg
)
501 if (sn
->end
<= sn
->s
)
504 return add_compile_string_length(sn
->s
, 1 /* sb */, (int)(sn
->end
- sn
->s
), reg
, 0);
508 compile_string_node(Node
* node
, regex_t
* reg
)
510 int r
, len
, prev_len
, slen
, ambig
;
511 OnigEncoding enc
= reg
->enc
;
512 UChar
*p
, *prev
, *end
;
516 if (sn
->end
<= sn
->s
)
520 ambig
= NSTRING_IS_AMBIG(node
);
523 prev_len
= enclen(enc
, p
);
528 len
= enclen(enc
, p
);
529 if (len
== prev_len
) {
533 r
= add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
543 return add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
547 compile_string_raw_node(StrNode
* sn
, regex_t
* reg
)
549 if (sn
->end
<= sn
->s
)
552 return add_compile_string(sn
->s
, 1 /* sb */, (int)(sn
->end
- sn
->s
), reg
, 0);
556 add_multi_byte_cclass(BBuf
* mbuf
, regex_t
* reg
)
558 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
559 add_length(reg
, mbuf
->used
);
560 return add_bytes(reg
, mbuf
->p
, mbuf
->used
);
563 UChar
* p
= BBUF_GET_ADD_ADDRESS(reg
) + SIZE_LENGTH
;
565 GET_ALIGNMENT_PAD_SIZE(p
, pad_size
);
566 add_length(reg
, mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1));
567 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
569 r
= add_bytes(reg
, mbuf
->p
, mbuf
->used
);
571 /* padding for return value from compile_length_cclass_node() to be fix. */
572 pad_size
= (WORD_ALIGNMENT_SIZE
- 1) - pad_size
;
573 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
579 compile_length_cclass_node(CClassNode
* cc
, regex_t
* reg
)
583 if (IS_NCCLASS_SHARE(cc
)) {
584 len
= SIZE_OPCODE
+ SIZE_POINTER
;
588 if (IS_NULL(cc
->mbuf
)) {
589 len
= SIZE_OPCODE
+ SIZE_BITSET
;
592 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
596 len
= SIZE_OPCODE
+ SIZE_BITSET
;
598 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
599 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
;
601 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1);
609 compile_cclass_node(CClassNode
* cc
, regex_t
* reg
)
613 if (IS_NCCLASS_SHARE(cc
)) {
614 add_opcode(reg
, OP_CCLASS_NODE
);
615 r
= add_pointer(reg
, cc
);
619 if (IS_NULL(cc
->mbuf
)) {
620 if (IS_NCCLASS_NOT(cc
))
621 add_opcode(reg
, OP_CCLASS_NOT
);
623 add_opcode(reg
, OP_CCLASS
);
625 r
= add_bitset(reg
, cc
->bs
);
628 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
629 if (IS_NCCLASS_NOT(cc
))
630 add_opcode(reg
, OP_CCLASS_MB_NOT
);
632 add_opcode(reg
, OP_CCLASS_MB
);
634 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
637 if (IS_NCCLASS_NOT(cc
))
638 add_opcode(reg
, OP_CCLASS_MIX_NOT
);
640 add_opcode(reg
, OP_CCLASS_MIX
);
642 r
= add_bitset(reg
, cc
->bs
);
644 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
652 entry_repeat_range(regex_t
* reg
, int id
, int lower
, int upper
)
654 #define REPEAT_RANGE_ALLOC 4
658 if (reg
->repeat_range_alloc
== 0) {
659 p
= (OnigRepeatRange
* )xmalloc(sizeof(OnigRepeatRange
) * REPEAT_RANGE_ALLOC
);
660 CHECK_NULL_RETURN_MEMERR(p
);
661 reg
->repeat_range
= p
;
662 reg
->repeat_range_alloc
= REPEAT_RANGE_ALLOC
;
664 else if (reg
->repeat_range_alloc
<= id
) {
666 n
= reg
->repeat_range_alloc
+ REPEAT_RANGE_ALLOC
;
667 p
= (OnigRepeatRange
* )xrealloc(reg
->repeat_range
,
668 sizeof(OnigRepeatRange
) * n
,
669 sizeof(OnigRepeatRange
) * reg
->repeat_range_alloc
);
670 CHECK_NULL_RETURN_MEMERR(p
);
671 reg
->repeat_range
= p
;
672 reg
->repeat_range_alloc
= n
;
675 p
= reg
->repeat_range
;
679 p
[id
].upper
= (IS_REPEAT_INFINITE(upper
) ? 0x7fffffff : upper
);
684 compile_range_repeat_node(QtfrNode
* qn
, int target_len
, int empty_info
,
688 int num_repeat
= reg
->num_repeat
;
690 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT
: OP_REPEAT_NG
);
692 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
695 r
= add_rel_addr(reg
, target_len
+ SIZE_OP_REPEAT_INC
);
698 r
= entry_repeat_range(reg
, num_repeat
, qn
->lower
, qn
->upper
);
701 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
705 #ifdef USE_SUBEXP_CALL
708 IS_QUANTIFIER_IN_REPEAT(qn
)) {
709 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC_SG
: OP_REPEAT_INC_NG_SG
);
712 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC
: OP_REPEAT_INC_NG
);
715 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
720 is_anychar_star_quantifier(QtfrNode
* qn
)
722 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
) &&
723 NTYPE(qn
->target
) == NT_CANY
)
729 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
730 #define CKN_ON (ckn > 0)
732 #ifdef USE_COMBINATION_EXPLOSION_CHECK
735 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
737 int len
, mod_tlen
, cklen
;
739 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
740 int empty_info
= qn
->target_empty_info
;
741 int tlen
= compile_length_tree(qn
->target
, reg
);
743 if (tlen
< 0) return tlen
;
745 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
747 cklen
= (CKN_ON
? SIZE_STATE_CHECK_NUM
: 0);
750 if (NTYPE(qn
->target
) == NT_CANY
) {
751 if (qn
->greedy
&& infinite
) {
752 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
)
753 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
+ cklen
;
755 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
+ cklen
;
760 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
764 if (infinite
&& qn
->lower
<= 1) {
771 len
+= SIZE_OP_PUSH
+ cklen
+ mod_tlen
+ SIZE_OP_JUMP
;
779 len
+= mod_tlen
+ SIZE_OP_PUSH
+ cklen
;
782 else if (qn
->upper
== 0) {
783 if (qn
->is_refered
!= 0) /* /(?<n>..){0}/ */
784 len
= SIZE_OP_JUMP
+ tlen
;
788 else if (qn
->upper
== 1 && qn
->greedy
) {
789 if (qn
->lower
== 0) {
791 len
= SIZE_OP_STATE_CHECK_PUSH
+ tlen
;
794 len
= SIZE_OP_PUSH
+ tlen
;
801 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
802 len
= SIZE_OP_PUSH
+ cklen
+ SIZE_OP_JUMP
+ tlen
;
805 len
= SIZE_OP_REPEAT_INC
806 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
808 len
+= SIZE_OP_STATE_CHECK
;
815 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
819 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
820 int empty_info
= qn
->target_empty_info
;
821 int tlen
= compile_length_tree(qn
->target
, reg
);
823 if (tlen
< 0) return tlen
;
825 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
827 if (is_anychar_star_quantifier(qn
)) {
828 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
830 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
) {
831 if (IS_MULTILINE(reg
->options
))
832 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
834 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
837 r
= add_state_check_num(reg
, ckn
);
841 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
844 if (IS_MULTILINE(reg
->options
)) {
845 r
= add_opcode(reg
, (CKN_ON
?
846 OP_STATE_CHECK_ANYCHAR_ML_STAR
847 : OP_ANYCHAR_ML_STAR
));
850 r
= add_opcode(reg
, (CKN_ON
?
851 OP_STATE_CHECK_ANYCHAR_STAR
856 r
= add_state_check_num(reg
, ckn
);
863 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
867 if (infinite
&& qn
->lower
<= 1) {
869 if (qn
->lower
== 1) {
870 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
871 (CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
));
876 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
878 r
= add_state_check_num(reg
, ckn
);
880 r
= add_rel_addr(reg
, mod_tlen
+ SIZE_OP_JUMP
);
883 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
886 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
888 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
889 -(mod_tlen
+ (int )SIZE_OP_JUMP
890 + (int )(CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
)));
893 if (qn
->lower
== 0) {
894 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
897 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
900 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH_OR_JUMP
);
902 r
= add_state_check_num(reg
, ckn
);
904 r
= add_rel_addr(reg
,
905 -(mod_tlen
+ (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP
));
908 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
911 else if (qn
->upper
== 0) {
912 if (qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
913 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
915 r
= compile_tree(qn
->target
, reg
);
920 else if (qn
->upper
== 1 && qn
->greedy
) {
921 if (qn
->lower
== 0) {
923 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
925 r
= add_state_check_num(reg
, ckn
);
927 r
= add_rel_addr(reg
, tlen
);
930 r
= add_opcode_rel_addr(reg
, OP_PUSH
, tlen
);
935 r
= compile_tree(qn
->target
, reg
);
937 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
939 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
941 r
= add_state_check_num(reg
, ckn
);
943 r
= add_rel_addr(reg
, SIZE_OP_JUMP
);
946 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
950 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
952 r
= compile_tree(qn
->target
, reg
);
955 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
958 r
= add_opcode(reg
, OP_STATE_CHECK
);
960 r
= add_state_check_num(reg
, ckn
);
966 #else /* USE_COMBINATION_EXPLOSION_CHECK */
969 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
972 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
973 int empty_info
= qn
->target_empty_info
;
974 int tlen
= compile_length_tree(qn
->target
, reg
);
976 if (tlen
< 0) return tlen
;
979 if (NTYPE(qn
->target
) == NT_CANY
) {
980 if (qn
->greedy
&& infinite
) {
981 if (IS_NOT_NULL(qn
->next_head_exact
))
982 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
;
984 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
;
989 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
994 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
995 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
999 len
= tlen
* qn
->lower
;
1003 if (IS_NOT_NULL(qn
->head_exact
))
1004 len
+= SIZE_OP_PUSH_OR_JUMP_EXACT1
+ mod_tlen
+ SIZE_OP_JUMP
;
1005 else if (IS_NOT_NULL(qn
->next_head_exact
))
1006 len
+= SIZE_OP_PUSH_IF_PEEK_NEXT
+ mod_tlen
+ SIZE_OP_JUMP
;
1008 len
+= SIZE_OP_PUSH
+ mod_tlen
+ SIZE_OP_JUMP
;
1011 len
+= SIZE_OP_JUMP
+ mod_tlen
+ SIZE_OP_PUSH
;
1013 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1014 len
= SIZE_OP_JUMP
+ tlen
;
1016 else if (!infinite
&& qn
->greedy
&&
1017 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1018 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1019 len
= tlen
* qn
->lower
;
1020 len
+= (SIZE_OP_PUSH
+ tlen
) * (qn
->upper
- qn
->lower
);
1022 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1023 len
= SIZE_OP_PUSH
+ SIZE_OP_JUMP
+ tlen
;
1026 len
= SIZE_OP_REPEAT_INC
1027 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
1034 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
1037 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
1038 int empty_info
= qn
->target_empty_info
;
1039 int tlen
= compile_length_tree(qn
->target
, reg
);
1041 if (tlen
< 0) return tlen
;
1043 if (is_anychar_star_quantifier(qn
)) {
1044 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1046 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1047 if (IS_MULTILINE(reg
->options
))
1048 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1050 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
1052 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1055 if (IS_MULTILINE(reg
->options
))
1056 return add_opcode(reg
, OP_ANYCHAR_ML_STAR
);
1058 return add_opcode(reg
, OP_ANYCHAR_STAR
);
1062 if (empty_info
!= 0)
1063 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1068 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1069 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1071 if (IS_NOT_NULL(qn
->head_exact
))
1072 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_OR_JUMP_EXACT1
);
1073 else if (IS_NOT_NULL(qn
->next_head_exact
))
1074 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_IF_PEEK_NEXT
);
1076 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH
);
1079 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_JUMP
);
1084 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1089 if (IS_NOT_NULL(qn
->head_exact
)) {
1090 r
= add_opcode_rel_addr(reg
, OP_PUSH_OR_JUMP_EXACT1
,
1091 mod_tlen
+ SIZE_OP_JUMP
);
1093 add_bytes(reg
, NSTR(qn
->head_exact
)->s
, 1);
1094 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1096 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1097 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_OR_JUMP_EXACT1
));
1099 else if (IS_NOT_NULL(qn
->next_head_exact
)) {
1100 r
= add_opcode_rel_addr(reg
, OP_PUSH_IF_PEEK_NEXT
,
1101 mod_tlen
+ SIZE_OP_JUMP
);
1103 add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1104 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1106 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1107 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_IF_PEEK_NEXT
));
1110 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
1112 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1114 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1115 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH
));
1119 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
1121 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1123 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
1126 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1127 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1129 r
= compile_tree(qn
->target
, reg
);
1131 else if (!infinite
&& qn
->greedy
&&
1132 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1133 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1134 int n
= qn
->upper
- qn
->lower
;
1136 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1139 for (i
= 0; i
< n
; i
++) {
1140 r
= add_opcode_rel_addr(reg
, OP_PUSH
,
1141 (n
- i
) * tlen
+ (n
- i
- 1) * SIZE_OP_PUSH
);
1143 r
= compile_tree(qn
->target
, reg
);
1147 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1148 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
1150 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1152 r
= compile_tree(qn
->target
, reg
);
1155 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
1159 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1162 compile_length_option_node(EncloseNode
* node
, regex_t
* reg
)
1165 OnigOptionType prev
= reg
->options
;
1167 reg
->options
= node
->option
;
1168 tlen
= compile_length_tree(node
->target
, reg
);
1169 reg
->options
= prev
;
1171 if (tlen
< 0) return tlen
;
1173 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1174 return SIZE_OP_SET_OPTION_PUSH
+ SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
1175 + tlen
+ SIZE_OP_SET_OPTION
;
1182 compile_option_node(EncloseNode
* node
, regex_t
* reg
)
1185 OnigOptionType prev
= reg
->options
;
1187 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1188 r
= add_opcode_option(reg
, OP_SET_OPTION_PUSH
, node
->option
);
1190 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1192 r
= add_opcode(reg
, OP_FAIL
);
1196 reg
->options
= node
->option
;
1197 r
= compile_tree(node
->target
, reg
);
1198 reg
->options
= prev
;
1200 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1202 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1208 compile_length_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1213 if (node
->type
== ENCLOSE_OPTION
)
1214 return compile_length_option_node(node
, reg
);
1217 tlen
= compile_length_tree(node
->target
, reg
);
1218 if (tlen
< 0) return tlen
;
1223 switch (node
->type
) {
1224 case ENCLOSE_MEMORY
:
1225 #ifdef USE_SUBEXP_CALL
1226 if (IS_ENCLOSE_CALLED(node
)) {
1227 len
= SIZE_OP_MEMORY_START_PUSH
+ tlen
1228 + SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1229 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1230 len
+= (IS_ENCLOSE_RECURSION(node
)
1231 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1233 len
+= (IS_ENCLOSE_RECURSION(node
)
1234 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1239 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1240 len
= SIZE_OP_MEMORY_START_PUSH
;
1242 len
= SIZE_OP_MEMORY_START
;
1244 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1245 ? SIZE_OP_MEMORY_END_PUSH
: SIZE_OP_MEMORY_END
);
1249 case ENCLOSE_STOP_BACKTRACK
:
1250 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1251 QtfrNode
* qn
= NQTFR(node
->target
);
1252 tlen
= compile_length_tree(qn
->target
, reg
);
1253 if (tlen
< 0) return tlen
;
1255 len
= tlen
* qn
->lower
1256 + SIZE_OP_PUSH
+ tlen
+ SIZE_OP_POP
+ SIZE_OP_JUMP
;
1259 len
= SIZE_OP_PUSH_STOP_BT
+ tlen
+ SIZE_OP_POP_STOP_BT
;
1264 return ONIGERR_TYPE_BUG
;
1271 static int get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
);
1274 compile_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1278 if (node
->type
== ENCLOSE_OPTION
)
1279 return compile_option_node(node
, reg
);
1281 switch (node
->type
) {
1282 case ENCLOSE_MEMORY
:
1283 #ifdef USE_SUBEXP_CALL
1284 if (IS_ENCLOSE_CALLED(node
)) {
1285 r
= add_opcode(reg
, OP_CALL
);
1287 node
->call_addr
= BBUF_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1288 node
->state
|= NST_ADDR_FIXED
;
1289 r
= add_abs_addr(reg
, (int )node
->call_addr
);
1291 len
= compile_length_tree(node
->target
, reg
);
1292 len
+= (SIZE_OP_MEMORY_START_PUSH
+ SIZE_OP_RETURN
);
1293 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1294 len
+= (IS_ENCLOSE_RECURSION(node
)
1295 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1297 len
+= (IS_ENCLOSE_RECURSION(node
)
1298 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1300 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1304 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1305 r
= add_opcode(reg
, OP_MEMORY_START_PUSH
);
1307 r
= add_opcode(reg
, OP_MEMORY_START
);
1309 r
= add_mem_num(reg
, node
->regnum
);
1311 r
= compile_tree(node
->target
, reg
);
1313 #ifdef USE_SUBEXP_CALL
1314 if (IS_ENCLOSE_CALLED(node
)) {
1315 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1316 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1317 ? OP_MEMORY_END_PUSH_REC
: OP_MEMORY_END_PUSH
));
1319 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1320 ? OP_MEMORY_END_REC
: OP_MEMORY_END
));
1323 r
= add_mem_num(reg
, node
->regnum
);
1325 r
= add_opcode(reg
, OP_RETURN
);
1330 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1331 r
= add_opcode(reg
, OP_MEMORY_END_PUSH
);
1333 r
= add_opcode(reg
, OP_MEMORY_END
);
1335 r
= add_mem_num(reg
, node
->regnum
);
1339 case ENCLOSE_STOP_BACKTRACK
:
1340 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1341 QtfrNode
* qn
= NQTFR(node
->target
);
1342 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1345 len
= compile_length_tree(qn
->target
, reg
);
1346 if (len
< 0) return len
;
1348 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_POP
+ SIZE_OP_JUMP
);
1350 r
= compile_tree(qn
->target
, reg
);
1352 r
= add_opcode(reg
, OP_POP
);
1354 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1355 -((int )SIZE_OP_PUSH
+ len
+ (int )SIZE_OP_POP
+ (int )SIZE_OP_JUMP
));
1358 r
= add_opcode(reg
, OP_PUSH_STOP_BT
);
1360 r
= compile_tree(node
->target
, reg
);
1362 r
= add_opcode(reg
, OP_POP_STOP_BT
);
1367 return ONIGERR_TYPE_BUG
;
1375 compile_length_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1381 tlen
= compile_length_tree(node
->target
, reg
);
1382 if (tlen
< 0) return tlen
;
1385 switch (node
->type
) {
1386 case ANCHOR_PREC_READ
:
1387 len
= SIZE_OP_PUSH_POS
+ tlen
+ SIZE_OP_POP_POS
;
1389 case ANCHOR_PREC_READ_NOT
:
1390 len
= SIZE_OP_PUSH_POS_NOT
+ tlen
+ SIZE_OP_FAIL_POS
;
1392 case ANCHOR_LOOK_BEHIND
:
1393 len
= SIZE_OP_LOOK_BEHIND
+ tlen
;
1395 case ANCHOR_LOOK_BEHIND_NOT
:
1396 len
= SIZE_OP_PUSH_LOOK_BEHIND_NOT
+ tlen
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
;
1408 compile_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1412 switch (node
->type
) {
1413 case ANCHOR_BEGIN_BUF
: r
= add_opcode(reg
, OP_BEGIN_BUF
); break;
1414 case ANCHOR_END_BUF
: r
= add_opcode(reg
, OP_END_BUF
); break;
1415 case ANCHOR_BEGIN_LINE
: r
= add_opcode(reg
, OP_BEGIN_LINE
); break;
1416 case ANCHOR_END_LINE
: r
= add_opcode(reg
, OP_END_LINE
); break;
1417 case ANCHOR_SEMI_END_BUF
: r
= add_opcode(reg
, OP_SEMI_END_BUF
); break;
1418 case ANCHOR_BEGIN_POSITION
: r
= add_opcode(reg
, OP_BEGIN_POSITION
); break;
1420 case ANCHOR_WORD_BOUND
: r
= add_opcode(reg
, OP_WORD_BOUND
); break;
1421 case ANCHOR_NOT_WORD_BOUND
: r
= add_opcode(reg
, OP_NOT_WORD_BOUND
); break;
1422 #ifdef USE_WORD_BEGIN_END
1423 case ANCHOR_WORD_BEGIN
: r
= add_opcode(reg
, OP_WORD_BEGIN
); break;
1424 case ANCHOR_WORD_END
: r
= add_opcode(reg
, OP_WORD_END
); break;
1427 case ANCHOR_PREC_READ
:
1428 r
= add_opcode(reg
, OP_PUSH_POS
);
1430 r
= compile_tree(node
->target
, reg
);
1432 r
= add_opcode(reg
, OP_POP_POS
);
1435 case ANCHOR_PREC_READ_NOT
:
1436 len
= compile_length_tree(node
->target
, reg
);
1437 if (len
< 0) return len
;
1438 r
= add_opcode_rel_addr(reg
, OP_PUSH_POS_NOT
, len
+ SIZE_OP_FAIL_POS
);
1440 r
= compile_tree(node
->target
, reg
);
1442 r
= add_opcode(reg
, OP_FAIL_POS
);
1445 case ANCHOR_LOOK_BEHIND
:
1448 r
= add_opcode(reg
, OP_LOOK_BEHIND
);
1450 if (node
->char_len
< 0) {
1451 r
= get_char_length_tree(node
->target
, reg
, &n
);
1452 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1456 r
= add_length(reg
, n
);
1458 r
= compile_tree(node
->target
, reg
);
1462 case ANCHOR_LOOK_BEHIND_NOT
:
1465 len
= compile_length_tree(node
->target
, reg
);
1466 r
= add_opcode_rel_addr(reg
, OP_PUSH_LOOK_BEHIND_NOT
,
1467 len
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
);
1469 if (node
->char_len
< 0) {
1470 r
= get_char_length_tree(node
->target
, reg
, &n
);
1471 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1475 r
= add_length(reg
, n
);
1477 r
= compile_tree(node
->target
, reg
);
1479 r
= add_opcode(reg
, OP_FAIL_LOOK_BEHIND_NOT
);
1484 return ONIGERR_TYPE_BUG
;
1492 compile_length_tree(Node
* node
, regex_t
* reg
)
1501 r
= compile_length_tree(NCAR(node
), reg
);
1502 if (r
< 0) return r
;
1504 } while (IS_NOT_NULL(node
= NCDR(node
)));
1514 r
+= compile_length_tree(NCAR(node
), reg
);
1516 } while (IS_NOT_NULL(node
= NCDR(node
)));
1517 r
+= (SIZE_OP_PUSH
+ SIZE_OP_JUMP
) * (n
- 1);
1522 if (NSTRING_IS_RAW(node
))
1523 r
= compile_length_string_raw_node(NSTR(node
), reg
);
1525 r
= compile_length_string_node(node
, reg
);
1529 r
= compile_length_cclass_node(NCCLASS(node
), reg
);
1539 BRefNode
* br
= NBREF(node
);
1541 #ifdef USE_BACKREF_WITH_LEVEL
1542 if (IS_BACKREF_NEST_LEVEL(br
)) {
1543 r
= SIZE_OPCODE
+ SIZE_OPTION
+ SIZE_LENGTH
+
1544 SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1548 if (br
->back_num
== 1) {
1549 r
= ((!IS_IGNORECASE(reg
->options
) && br
->back_static
[0] <= 2)
1550 ? SIZE_OPCODE
: (SIZE_OPCODE
+ SIZE_MEMNUM
));
1553 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1558 #ifdef USE_SUBEXP_CALL
1565 r
= compile_length_quantifier_node(NQTFR(node
), reg
);
1569 r
= compile_length_enclose_node(NENCLOSE(node
), reg
);
1573 r
= compile_length_anchor_node(NANCHOR(node
), reg
);
1577 return ONIGERR_TYPE_BUG
;
1585 compile_tree(Node
* node
, regex_t
* reg
)
1587 int n
, type
, len
, pos
, r
= 0;
1593 r
= compile_tree(NCAR(node
), reg
);
1594 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1602 len
+= compile_length_tree(NCAR(x
), reg
);
1603 if (NCDR(x
) != NULL
) {
1604 len
+= SIZE_OP_PUSH
+ SIZE_OP_JUMP
;
1606 } while (IS_NOT_NULL(x
= NCDR(x
)));
1607 pos
= reg
->used
+ len
; /* goal position */
1610 len
= compile_length_tree(NCAR(node
), reg
);
1611 if (IS_NOT_NULL(NCDR(node
))) {
1612 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_JUMP
);
1615 r
= compile_tree(NCAR(node
), reg
);
1617 if (IS_NOT_NULL(NCDR(node
))) {
1618 len
= pos
- (reg
->used
+ SIZE_OP_JUMP
);
1619 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1622 } while (IS_NOT_NULL(node
= NCDR(node
)));
1627 if (NSTRING_IS_RAW(node
))
1628 r
= compile_string_raw_node(NSTR(node
), reg
);
1630 r
= compile_string_node(node
, reg
);
1634 r
= compile_cclass_node(NCCLASS(node
), reg
);
1641 switch (NCTYPE(node
)->ctype
) {
1642 case ONIGENC_CTYPE_WORD
:
1643 if (NCTYPE(node
)->not != 0) op
= OP_NOT_WORD
;
1647 return ONIGERR_TYPE_BUG
;
1650 r
= add_opcode(reg
, op
);
1655 if (IS_MULTILINE(reg
->options
))
1656 r
= add_opcode(reg
, OP_ANYCHAR_ML
);
1658 r
= add_opcode(reg
, OP_ANYCHAR
);
1663 BRefNode
* br
= NBREF(node
);
1665 #ifdef USE_BACKREF_WITH_LEVEL
1666 if (IS_BACKREF_NEST_LEVEL(br
)) {
1667 r
= add_opcode(reg
, OP_BACKREF_WITH_LEVEL
);
1669 r
= add_option(reg
, (reg
->options
& ONIG_OPTION_IGNORECASE
));
1671 r
= add_length(reg
, br
->nest_level
);
1674 goto add_bacref_mems
;
1678 if (br
->back_num
== 1) {
1679 n
= br
->back_static
[0];
1680 if (IS_IGNORECASE(reg
->options
)) {
1681 r
= add_opcode(reg
, OP_BACKREFN_IC
);
1683 r
= add_mem_num(reg
, n
);
1687 case 1: r
= add_opcode(reg
, OP_BACKREF1
); break;
1688 case 2: r
= add_opcode(reg
, OP_BACKREF2
); break;
1690 r
= add_opcode(reg
, OP_BACKREFN
);
1692 r
= add_mem_num(reg
, n
);
1701 if (IS_IGNORECASE(reg
->options
)) {
1702 r
= add_opcode(reg
, OP_BACKREF_MULTI_IC
);
1705 r
= add_opcode(reg
, OP_BACKREF_MULTI
);
1709 #ifdef USE_BACKREF_WITH_LEVEL
1712 r
= add_length(reg
, br
->back_num
);
1715 for (i
= br
->back_num
- 1; i
>= 0; i
--) {
1716 r
= add_mem_num(reg
, p
[i
]);
1723 #ifdef USE_SUBEXP_CALL
1725 r
= compile_call(NCALL(node
), reg
);
1730 r
= compile_quantifier_node(NQTFR(node
), reg
);
1734 r
= compile_enclose_node(NENCLOSE(node
), reg
);
1738 r
= compile_anchor_node(NANCHOR(node
), reg
);
1743 fprintf(stderr
, "compile_tree: undefined node type %d\n", NTYPE(node
));
1751 #ifdef USE_NAMED_GROUP
1754 noname_disable_map(Node
** plink
, GroupNumRemap
* map
, int* counter
)
1757 Node
* node
= *plink
;
1759 switch (NTYPE(node
)) {
1763 r
= noname_disable_map(&(NCAR(node
)), map
, counter
);
1764 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1769 Node
** ptarget
= &(NQTFR(node
)->target
);
1770 Node
* old
= *ptarget
;
1771 r
= noname_disable_map(ptarget
, map
, counter
);
1772 if (*ptarget
!= old
&& NTYPE(*ptarget
) == NT_QTFR
) {
1773 onig_reduce_nested_quantifier(node
, *ptarget
);
1780 EncloseNode
* en
= NENCLOSE(node
);
1781 if (en
->type
== ENCLOSE_MEMORY
) {
1782 if (IS_ENCLOSE_NAMED_GROUP(en
)) {
1784 map
[en
->regnum
].new_val
= *counter
;
1785 en
->regnum
= *counter
;
1786 r
= noname_disable_map(&(en
->target
), map
, counter
);
1789 *plink
= en
->target
;
1790 en
->target
= NULL_NODE
;
1791 onig_node_free(node
);
1792 r
= noname_disable_map(plink
, map
, counter
);
1796 r
= noname_disable_map(&(en
->target
), map
, counter
);
1808 renumber_node_backref(Node
* node
, GroupNumRemap
* map
)
1810 int i
, pos
, n
, old_num
;
1812 BRefNode
* bn
= NBREF(node
);
1814 if (! IS_BACKREF_NAME_REF(bn
))
1815 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1817 old_num
= bn
->back_num
;
1818 if (IS_NULL(bn
->back_dynamic
))
1819 backs
= bn
->back_static
;
1821 backs
= bn
->back_dynamic
;
1823 for (i
= 0, pos
= 0; i
< old_num
; i
++) {
1824 n
= map
[backs
[i
]].new_val
;
1836 renumber_by_map(Node
* node
, GroupNumRemap
* map
)
1840 switch (NTYPE(node
)) {
1844 r
= renumber_by_map(NCAR(node
), map
);
1845 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1848 r
= renumber_by_map(NQTFR(node
)->target
, map
);
1851 r
= renumber_by_map(NENCLOSE(node
)->target
, map
);
1855 r
= renumber_node_backref(node
, map
);
1866 numbered_ref_check(Node
* node
)
1870 switch (NTYPE(node
)) {
1874 r
= numbered_ref_check(NCAR(node
));
1875 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1878 r
= numbered_ref_check(NQTFR(node
)->target
);
1881 r
= numbered_ref_check(NENCLOSE(node
)->target
);
1885 if (! IS_BACKREF_NAME_REF(NBREF(node
)))
1886 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1897 disable_noname_group_capture(Node
** root
, regex_t
* reg
, ScanEnv
* env
)
1899 int r
, i
, pos
, counter
;
1904 map
= (GroupNumRemap
* )xmalloc(sizeof(GroupNumRemap
) * (env
->num_mem
+ 1));
1905 CHECK_NULL_RETURN_MEMERR(map
);
1906 for (i
= 1; i
<= env
->num_mem
; i
++) {
1910 r
= noname_disable_map(root
, map
, &counter
);
1911 if (r
!= 0) return r
;
1913 r
= renumber_by_map(*root
, map
);
1914 if (r
!= 0) return r
;
1916 for (i
= 1, pos
= 1; i
<= env
->num_mem
; i
++) {
1917 if (map
[i
].new_val
> 0) {
1918 SCANENV_MEM_NODES(env
)[pos
] = SCANENV_MEM_NODES(env
)[i
];
1923 loc
= env
->capture_history
;
1924 BIT_STATUS_CLEAR(env
->capture_history
);
1925 for (i
= 1; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
1926 if (BIT_STATUS_AT(loc
, i
)) {
1927 BIT_STATUS_ON_AT_SIMPLE(env
->capture_history
, map
[i
].new_val
);
1931 env
->num_mem
= env
->num_named
;
1932 reg
->num_mem
= env
->num_named
;
1934 Result
= onig_renumber_name_table(reg
, map
);
1938 #endif /* USE_NAMED_GROUP */
1940 #ifdef USE_SUBEXP_CALL
1942 unset_addr_list_fix(UnsetAddrList
* uslist
, regex_t
* reg
)
1948 for (i
= 0; i
< uslist
->num
; i
++) {
1949 en
= NENCLOSE(uslist
->us
[i
].target
);
1950 if (! IS_ENCLOSE_ADDR_FIXED(en
)) return ONIGERR_PARSER_BUG
;
1951 addr
= en
->call_addr
;
1952 offset
= uslist
->us
[i
].offset
;
1954 BBUF_WRITE(reg
, offset
, &addr
, SIZE_ABSADDR
);
1960 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1962 quantifiers_memory_node_info(Node
* node
)
1966 switch (NTYPE(node
)) {
1972 v
= quantifiers_memory_node_info(NCAR(node
));
1974 } while (v
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
1978 #ifdef USE_SUBEXP_CALL
1980 if (IS_CALL_RECURSION(NCALL(node
))) {
1981 return NQ_TARGET_IS_EMPTY_REC
; /* tiny version */
1984 r
= quantifiers_memory_node_info(NCALL(node
)->target
);
1990 QtfrNode
* qn
= NQTFR(node
);
1991 if (qn
->upper
!= 0) {
1992 r
= quantifiers_memory_node_info(qn
->target
);
1999 EncloseNode
* en
= NENCLOSE(node
);
2001 case ENCLOSE_MEMORY
:
2002 return NQ_TARGET_IS_EMPTY_MEM
;
2005 case ENCLOSE_OPTION
:
2006 case ENCLOSE_STOP_BACKTRACK
:
2007 r
= quantifiers_memory_node_info(en
->target
);
2027 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2030 get_min_match_length(Node
* node
, OnigDistance
*min
, ScanEnv
* env
)
2036 switch (NTYPE(node
)) {
2041 Node
** nodes
= SCANENV_MEM_NODES(env
);
2042 BRefNode
* br
= NBREF(node
);
2043 if (br
->state
& NST_RECURSION
) break;
2045 backs
= BACKREFS_P(br
);
2046 if (backs
[0] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2047 r
= get_min_match_length(nodes
[backs
[0]], min
, env
);
2049 for (i
= 1; i
< br
->back_num
; i
++) {
2050 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2051 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
);
2053 if (*min
> tmin
) *min
= tmin
;
2058 #ifdef USE_SUBEXP_CALL
2060 if (IS_CALL_RECURSION(NCALL(node
))) {
2061 EncloseNode
* en
= NENCLOSE(NCALL(node
)->target
);
2062 if (IS_ENCLOSE_MIN_FIXED(en
))
2066 r
= get_min_match_length(NCALL(node
)->target
, min
, env
);
2072 r
= get_min_match_length(NCAR(node
), &tmin
, env
);
2073 if (r
== 0) *min
+= tmin
;
2074 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2083 r
= get_min_match_length(x
, &tmin
, env
);
2085 if (y
== node
) *min
= tmin
;
2086 else if (*min
> tmin
) *min
= tmin
;
2087 } while (r
== 0 && IS_NOT_NULL(y
= NCDR(y
)));
2093 StrNode
* sn
= NSTR(node
);
2094 *min
= (OnigDistance
)(sn
->end
- sn
->s
);
2109 QtfrNode
* qn
= NQTFR(node
);
2111 if (qn
->lower
> 0) {
2112 r
= get_min_match_length(qn
->target
, min
, env
);
2114 *min
= distance_multiply(*min
, qn
->lower
);
2121 EncloseNode
* en
= NENCLOSE(node
);
2123 case ENCLOSE_MEMORY
:
2124 #ifdef USE_SUBEXP_CALL
2125 if (IS_ENCLOSE_MIN_FIXED(en
))
2128 r
= get_min_match_length(en
->target
, min
, env
);
2131 SET_ENCLOSE_STATUS(node
, NST_MIN_FIXED
);
2136 case ENCLOSE_OPTION
:
2137 case ENCLOSE_STOP_BACKTRACK
:
2138 r
= get_min_match_length(en
->target
, min
, env
);
2153 get_max_match_length(Node
* node
, OnigDistance
*max
, ScanEnv
* env
)
2159 switch (NTYPE(node
)) {
2162 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2164 *max
= distance_add(*max
, tmax
);
2165 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2170 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2171 if (r
== 0 && *max
< tmax
) *max
= tmax
;
2172 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2177 StrNode
* sn
= NSTR(node
);
2178 *max
= (OnigDistance
)(sn
->end
- sn
->s
);
2183 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2188 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2195 Node
** nodes
= SCANENV_MEM_NODES(env
);
2196 BRefNode
* br
= NBREF(node
);
2197 if (br
->state
& NST_RECURSION
) {
2198 *max
= ONIG_INFINITE_DISTANCE
;
2201 backs
= BACKREFS_P(br
);
2202 for (i
= 0; i
< br
->back_num
; i
++) {
2203 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2204 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
);
2206 if (*max
< tmax
) *max
= tmax
;
2211 #ifdef USE_SUBEXP_CALL
2213 if (! IS_CALL_RECURSION(NCALL(node
)))
2214 r
= get_max_match_length(NCALL(node
)->target
, max
, env
);
2216 *max
= ONIG_INFINITE_DISTANCE
;
2222 QtfrNode
* qn
= NQTFR(node
);
2224 if (qn
->upper
!= 0) {
2225 r
= get_max_match_length(qn
->target
, max
, env
);
2226 if (r
== 0 && *max
!= 0) {
2227 if (! IS_REPEAT_INFINITE(qn
->upper
))
2228 *max
= distance_multiply(*max
, qn
->upper
);
2230 *max
= ONIG_INFINITE_DISTANCE
;
2238 EncloseNode
* en
= NENCLOSE(node
);
2240 case ENCLOSE_MEMORY
:
2241 #ifdef USE_SUBEXP_CALL
2242 if (IS_ENCLOSE_MAX_FIXED(en
))
2245 r
= get_max_match_length(en
->target
, max
, env
);
2248 SET_ENCLOSE_STATUS(node
, NST_MAX_FIXED
);
2253 case ENCLOSE_OPTION
:
2254 case ENCLOSE_STOP_BACKTRACK
:
2255 r
= get_max_match_length(en
->target
, max
, env
);
2269 #define GET_CHAR_LEN_VARLEN -1
2270 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2272 /* fixed size pattern node only */
2274 get_char_length_tree1(Node
* node
, regex_t
* reg
, int* len
, int level
)
2281 switch (NTYPE(node
)) {
2284 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2286 *len
= distance_add(*len
, tlen
);
2287 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2295 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2296 while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
))) {
2297 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen2
, level
);
2306 r
= GET_CHAR_LEN_TOP_ALT_VARLEN
;
2308 r
= GET_CHAR_LEN_VARLEN
;
2318 StrNode
* sn
= NSTR(node
);
2320 while (s
< sn
->end
) {
2321 s
+= enclen(reg
->enc
, s
);
2329 QtfrNode
* qn
= NQTFR(node
);
2330 if (qn
->lower
== qn
->upper
) {
2331 r
= get_char_length_tree1(qn
->target
, reg
, &tlen
, level
);
2333 *len
= distance_multiply(tlen
, qn
->lower
);
2336 r
= GET_CHAR_LEN_VARLEN
;
2340 #ifdef USE_SUBEXP_CALL
2342 if (! IS_CALL_RECURSION(NCALL(node
)))
2343 r
= get_char_length_tree1(NCALL(node
)->target
, reg
, len
, level
);
2345 r
= GET_CHAR_LEN_VARLEN
;
2360 EncloseNode
* en
= NENCLOSE(node
);
2362 case ENCLOSE_MEMORY
:
2363 #ifdef USE_SUBEXP_CALL
2364 if (IS_ENCLOSE_CLEN_FIXED(en
))
2365 *len
= en
->char_len
;
2367 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2369 en
->char_len
= *len
;
2370 SET_ENCLOSE_STATUS(node
, NST_CLEN_FIXED
);
2375 case ENCLOSE_OPTION
:
2376 case ENCLOSE_STOP_BACKTRACK
:
2377 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2389 r
= GET_CHAR_LEN_VARLEN
;
2397 get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
)
2399 return get_char_length_tree1(node
, reg
, len
, 0);
2402 /* x is not included y ==> 1 : 0 */
2404 is_not_included(Node
* x
, Node
* y
, regex_t
* reg
)
2418 if (NCTYPE(y
)->ctype
== NCTYPE(x
)->ctype
&&
2419 NCTYPE(y
)->not != NCTYPE(x
)->not)
2429 tmp
= x
; x
= y
; y
= tmp
;
2446 CClassNode
* xc
= NCCLASS(x
);
2449 switch (NCTYPE(y
)->ctype
) {
2450 case ONIGENC_CTYPE_WORD
:
2451 if (NCTYPE(y
)->not == 0) {
2452 if (IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) {
2453 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2454 if (BITSET_AT(xc
->bs
, i
)) {
2455 if (IS_CODE_SB_WORD(reg
->enc
, i
)) return 0;
2463 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2464 if (! IS_CODE_SB_WORD(reg
->enc
, i
)) {
2465 if (!IS_NCCLASS_NOT(xc
)) {
2466 if (BITSET_AT(xc
->bs
, i
))
2470 if (! BITSET_AT(xc
->bs
, i
))
2487 CClassNode
* yc
= NCCLASS(y
);
2489 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2490 v
= BITSET_AT(xc
->bs
, i
);
2491 if ((v
!= 0 && !IS_NCCLASS_NOT(xc
)) ||
2492 (v
== 0 && IS_NCCLASS_NOT(xc
))) {
2493 v
= BITSET_AT(yc
->bs
, i
);
2494 if ((v
!= 0 && !IS_NCCLASS_NOT(yc
)) ||
2495 (v
== 0 && IS_NCCLASS_NOT(yc
)))
2499 if ((IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) ||
2500 (IS_NULL(yc
->mbuf
) && !IS_NCCLASS_NOT(yc
)))
2518 StrNode
* xs
= NSTR(x
);
2519 if (NSTRING_LEN(x
) == 0)
2525 switch (NCTYPE(y
)->ctype
) {
2526 case ONIGENC_CTYPE_WORD
:
2527 if (ONIGENC_IS_MBC_WORD(reg
->enc
, xs
->s
, xs
->end
))
2528 return NCTYPE(y
)->not;
2530 return !(NCTYPE(y
)->not);
2539 CClassNode
* cc
= NCCLASS(y
);
2541 code
= ONIGENC_MBC_TO_CODE(reg
->enc
, xs
->s
,
2542 xs
->s
+ ONIGENC_MBC_MAXLEN(reg
->enc
));
2543 return (onig_is_code_in_cc(reg
->enc
, code
, cc
) != 0 ? 0 : 1);
2550 StrNode
* ys
= NSTR(y
);
2551 len
= NSTRING_LEN(x
);
2552 if (len
> NSTRING_LEN(y
)) len
= NSTRING_LEN(y
);
2553 if (NSTRING_IS_AMBIG(x
) || NSTRING_IS_AMBIG(y
)) {
2558 for (i
= 0, p
= ys
->s
, q
= xs
->s
; i
< len
; i
++, p
++, q
++) {
2559 if (*p
!= *q
) return 1;
2579 get_head_value_node(Node
* node
, int exact
, regex_t
* reg
)
2581 Node
* n
= NULL_NODE
;
2583 switch (NTYPE(node
)) {
2587 #ifdef USE_SUBEXP_CALL
2600 n
= get_head_value_node(NCAR(node
), exact
, reg
);
2605 StrNode
* sn
= NSTR(node
);
2607 if (sn
->end
<= sn
->s
)
2611 !NSTRING_IS_RAW(node
) && IS_IGNORECASE(reg
->options
)) {
2621 QtfrNode
* qn
= NQTFR(node
);
2622 if (qn
->lower
> 0) {
2623 if (IS_NOT_NULL(qn
->head_exact
))
2626 n
= get_head_value_node(qn
->target
, exact
, reg
);
2633 EncloseNode
* en
= NENCLOSE(node
);
2635 case ENCLOSE_OPTION
:
2637 OnigOptionType options
= reg
->options
;
2639 reg
->options
= NENCLOSE(node
)->option
;
2640 n
= get_head_value_node(NENCLOSE(node
)->target
, exact
, reg
);
2641 reg
->options
= options
;
2645 case ENCLOSE_MEMORY
:
2646 case ENCLOSE_STOP_BACKTRACK
:
2647 n
= get_head_value_node(en
->target
, exact
, reg
);
2654 if (NANCHOR(node
)->type
== ANCHOR_PREC_READ
)
2655 n
= get_head_value_node(NANCHOR(node
)->target
, exact
, reg
);
2666 check_type_tree(Node
* node
, int type_mask
, int enclose_mask
, int anchor_mask
)
2671 if ((NTYPE2BIT(type
) & type_mask
) == 0)
2678 r
= check_type_tree(NCAR(node
), type_mask
, enclose_mask
,
2680 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2684 r
= check_type_tree(NQTFR(node
)->target
, type_mask
, enclose_mask
,
2690 EncloseNode
* en
= NENCLOSE(node
);
2691 if ((en
->type
& enclose_mask
) == 0)
2694 r
= check_type_tree(en
->target
, type_mask
, enclose_mask
, anchor_mask
);
2699 type
= NANCHOR(node
)->type
;
2700 if ((type
& anchor_mask
) == 0)
2703 if (NANCHOR(node
)->target
)
2704 r
= check_type_tree(NANCHOR(node
)->target
,
2705 type_mask
, enclose_mask
, anchor_mask
);
2714 #ifdef USE_SUBEXP_CALL
2716 #define RECURSION_EXIST 1
2717 #define RECURSION_INFINITE 2
2720 subexp_inf_recursive_check(Node
* node
, ScanEnv
* env
, int head
)
2735 ret
= subexp_inf_recursive_check(NCAR(x
), env
, head
);
2736 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2739 ret
= get_min_match_length(NCAR(x
), &min
, env
);
2740 if (ret
!= 0) return ret
;
2741 if (min
!= 0) head
= 0;
2743 } while (IS_NOT_NULL(x
= NCDR(x
)));
2750 r
= RECURSION_EXIST
;
2752 ret
= subexp_inf_recursive_check(NCAR(node
), env
, head
);
2753 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2755 } while (IS_NOT_NULL(node
= NCDR(node
)));
2760 r
= subexp_inf_recursive_check(NQTFR(node
)->target
, env
, head
);
2761 if (r
== RECURSION_EXIST
) {
2762 if (NQTFR(node
)->lower
== 0) r
= 0;
2768 AnchorNode
* an
= NANCHOR(node
);
2770 case ANCHOR_PREC_READ
:
2771 case ANCHOR_PREC_READ_NOT
:
2772 case ANCHOR_LOOK_BEHIND
:
2773 case ANCHOR_LOOK_BEHIND_NOT
:
2774 r
= subexp_inf_recursive_check(an
->target
, env
, head
);
2781 r
= subexp_inf_recursive_check(NCALL(node
)->target
, env
, head
);
2785 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2787 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2788 return (head
== 0 ? RECURSION_EXIST
: RECURSION_INFINITE
);
2790 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2791 r
= subexp_inf_recursive_check(NENCLOSE(node
)->target
, env
, head
);
2792 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2804 subexp_inf_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2814 r
= subexp_inf_recursive_check_trav(NCAR(node
), env
);
2815 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2819 r
= subexp_inf_recursive_check_trav(NQTFR(node
)->target
, env
);
2824 AnchorNode
* an
= NANCHOR(node
);
2826 case ANCHOR_PREC_READ
:
2827 case ANCHOR_PREC_READ_NOT
:
2828 case ANCHOR_LOOK_BEHIND
:
2829 case ANCHOR_LOOK_BEHIND_NOT
:
2830 r
= subexp_inf_recursive_check_trav(an
->target
, env
);
2838 EncloseNode
* en
= NENCLOSE(node
);
2840 if (IS_ENCLOSE_RECURSION(en
)) {
2841 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2842 r
= subexp_inf_recursive_check(en
->target
, env
, 1);
2843 if (r
> 0) return ONIGERR_NEVER_ENDING_RECURSION
;
2844 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2846 r
= subexp_inf_recursive_check_trav(en
->target
, env
);
2859 subexp_recursive_check(Node
* node
)
2863 switch (NTYPE(node
)) {
2867 r
|= subexp_recursive_check(NCAR(node
));
2868 } while (IS_NOT_NULL(node
= NCDR(node
)));
2872 r
= subexp_recursive_check(NQTFR(node
)->target
);
2877 AnchorNode
* an
= NANCHOR(node
);
2879 case ANCHOR_PREC_READ
:
2880 case ANCHOR_PREC_READ_NOT
:
2881 case ANCHOR_LOOK_BEHIND
:
2882 case ANCHOR_LOOK_BEHIND_NOT
:
2883 r
= subexp_recursive_check(an
->target
);
2890 r
= subexp_recursive_check(NCALL(node
)->target
);
2891 if (r
!= 0) SET_CALL_RECURSION(node
);
2895 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2897 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2898 return 1; /* recursion */
2900 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2901 r
= subexp_recursive_check(NENCLOSE(node
)->target
);
2902 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2915 subexp_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2917 #define FOUND_CALLED_NODE 1
2929 ret
= subexp_recursive_check_trav(NCAR(node
), env
);
2930 if (ret
== FOUND_CALLED_NODE
) r
= FOUND_CALLED_NODE
;
2931 else if (ret
< 0) return ret
;
2932 } while (IS_NOT_NULL(node
= NCDR(node
)));
2937 r
= subexp_recursive_check_trav(NQTFR(node
)->target
, env
);
2938 if (NQTFR(node
)->upper
== 0) {
2939 if (r
== FOUND_CALLED_NODE
)
2940 NQTFR(node
)->is_refered
= 1;
2946 AnchorNode
* an
= NANCHOR(node
);
2948 case ANCHOR_PREC_READ
:
2949 case ANCHOR_PREC_READ_NOT
:
2950 case ANCHOR_LOOK_BEHIND
:
2951 case ANCHOR_LOOK_BEHIND_NOT
:
2952 r
= subexp_recursive_check_trav(an
->target
, env
);
2960 EncloseNode
* en
= NENCLOSE(node
);
2962 if (! IS_ENCLOSE_RECURSION(en
)) {
2963 if (IS_ENCLOSE_CALLED(en
)) {
2964 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2965 r
= subexp_recursive_check(en
->target
);
2966 if (r
!= 0) SET_ENCLOSE_STATUS(node
, NST_RECURSION
);
2967 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2970 r
= subexp_recursive_check_trav(en
->target
, env
);
2971 if (IS_ENCLOSE_CALLED(en
))
2972 r
|= FOUND_CALLED_NODE
;
2984 setup_subexp_call(Node
* node
, ScanEnv
* env
)
2993 r
= setup_subexp_call(NCAR(node
), env
);
2994 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2999 r
= setup_subexp_call(NCAR(node
), env
);
3000 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3004 r
= setup_subexp_call(NQTFR(node
)->target
, env
);
3007 r
= setup_subexp_call(NENCLOSE(node
)->target
, env
);
3012 CallNode
* cn
= NCALL(node
);
3013 Node
** nodes
= SCANENV_MEM_NODES(env
);
3015 if (cn
->group_num
!= 0) {
3016 int gnum
= cn
->group_num
;
3018 #ifdef USE_NAMED_GROUP
3019 if (env
->num_named
> 0 &&
3020 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
3021 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
3022 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
3025 if (gnum
> env
->num_mem
) {
3026 onig_scan_env_set_error_string(env
,
3027 ONIGERR_UNDEFINED_GROUP_REFERENCE
, cn
->name
, cn
->name_end
);
3028 return ONIGERR_UNDEFINED_GROUP_REFERENCE
;
3031 #ifdef USE_NAMED_GROUP
3034 cn
->target
= nodes
[cn
->group_num
];
3035 if (IS_NULL(cn
->target
)) {
3036 onig_scan_env_set_error_string(env
,
3037 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3038 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3040 SET_ENCLOSE_STATUS(cn
->target
, NST_CALLED
);
3041 BIT_STATUS_ON_AT(env
->bt_mem_start
, cn
->group_num
);
3042 cn
->unset_addr_list
= env
->unset_addr_list
;
3044 #ifdef USE_NAMED_GROUP
3048 int n
= onig_name_to_group_numbers(env
->reg
, cn
->name
, cn
->name_end
,
3051 onig_scan_env_set_error_string(env
,
3052 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3053 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3056 onig_scan_env_set_error_string(env
,
3057 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, cn
->name
, cn
->name_end
);
3058 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
;
3061 cn
->group_num
= refs
[0];
3071 AnchorNode
* an
= NANCHOR(node
);
3074 case ANCHOR_PREC_READ
:
3075 case ANCHOR_PREC_READ_NOT
:
3076 case ANCHOR_LOOK_BEHIND
:
3077 case ANCHOR_LOOK_BEHIND_NOT
:
3078 r
= setup_subexp_call(an
->target
, env
);
3092 /* divide different length alternatives in look-behind.
3093 (?<=A|B) ==> (?<=A)|(?<=B)
3094 (?<!A|B) ==> (?<!A)(?<!B)
3097 divide_look_behind_alternatives(Node
* node
)
3099 Node
*head
, *np
, *insert_node
;
3100 AnchorNode
* an
= NANCHOR(node
);
3101 int anc_type
= an
->type
;
3105 swap_node(node
, head
);
3107 NANCHOR(head
)->target
= np
;
3110 while ((np
= NCDR(np
)) != NULL_NODE
) {
3111 insert_node
= onig_node_new_anchor(anc_type
);
3112 CHECK_NULL_RETURN_MEMERR(insert_node
);
3113 NANCHOR(insert_node
)->target
= NCAR(np
);
3114 NCAR(np
) = insert_node
;
3117 if (anc_type
== ANCHOR_LOOK_BEHIND_NOT
) {
3120 SET_NTYPE(np
, NT_LIST
); /* alt -> list */
3121 } while ((np
= NCDR(np
)) != NULL_NODE
);
3127 setup_look_behind(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
3130 AnchorNode
* an
= NANCHOR(node
);
3132 r
= get_char_length_tree(an
->target
, reg
, &len
);
3135 else if (r
== GET_CHAR_LEN_VARLEN
)
3136 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3137 else if (r
== GET_CHAR_LEN_TOP_ALT_VARLEN
) {
3138 if (IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
))
3139 r
= divide_look_behind_alternatives(node
);
3141 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3148 next_setup(Node
* node
, Node
* next_node
, regex_t
* reg
)
3154 if (type
== NT_QTFR
) {
3155 QtfrNode
* qn
= NQTFR(node
);
3156 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
)) {
3157 #ifdef USE_QTFR_PEEK_NEXT
3158 Node
* n
= get_head_value_node(next_node
, 1, reg
);
3159 /* '\0': for UTF-16BE etc... */
3160 if (IS_NOT_NULL(n
) && NSTR(n
)->s
[0] != '\0') {
3161 qn
->next_head_exact
= n
;
3164 /* automatic posseivation a*b ==> (?>a*)b */
3165 if (qn
->lower
<= 1) {
3166 int ttype
= NTYPE(qn
->target
);
3167 if (IS_NODE_TYPE_SIMPLE(ttype
)) {
3169 x
= get_head_value_node(qn
->target
, 0, reg
);
3170 if (IS_NOT_NULL(x
)) {
3171 y
= get_head_value_node(next_node
, 0, reg
);
3172 if (IS_NOT_NULL(y
) && is_not_included(x
, y
, reg
)) {
3173 Node
* en
= onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK
);
3174 CHECK_NULL_RETURN_MEMERR(en
);
3175 SET_ENCLOSE_STATUS(en
, NST_STOP_BT_SIMPLE_REPEAT
);
3176 swap_node(node
, en
);
3177 NENCLOSE(node
)->target
= en
;
3184 else if (type
== NT_ENCLOSE
) {
3185 EncloseNode
* en
= NENCLOSE(node
);
3186 if (en
->type
== ENCLOSE_MEMORY
) {
3196 update_string_node_case_fold(regex_t
* reg
, Node
*node
)
3198 UChar
*p
, *end
, buf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
3199 UChar
*sbuf
, *ebuf
, *sp
;
3200 int r
, i
, len
, sbuf_size
;
3201 StrNode
* sn
= NSTR(node
);
3204 sbuf_size
= (int)(end
- sn
->s
) * 2;
3205 sbuf
= (UChar
* )xmalloc(sbuf_size
);
3206 CHECK_NULL_RETURN_MEMERR(sbuf
);
3207 ebuf
= sbuf
+ sbuf_size
;
3212 len
= ONIGENC_MBC_CASE_FOLD(reg
->enc
, reg
->case_fold_flag
, &p
, end
, buf
);
3213 for (i
= 0; i
< len
; i
++) {
3215 sbuf
= (UChar
* )xrealloc(sbuf
, sbuf_size
* 2, sbuf_size
);
3216 CHECK_NULL_RETURN_MEMERR(sbuf
);
3217 sp
= sbuf
+ sbuf_size
;
3219 ebuf
= sbuf
+ sbuf_size
;
3226 r
= onig_node_str_set(node
, sbuf
, sp
);
3237 expand_case_fold_make_rem_string(Node
** rnode
, UChar
*s
, UChar
*end
,
3243 node
= onig_node_new_str(s
, end
);
3244 if (IS_NULL(node
)) return ONIGERR_MEMORY
;
3246 r
= update_string_node_case_fold(reg
, node
);
3248 onig_node_free(node
);
3252 NSTRING_SET_AMBIG(node
);
3253 NSTRING_SET_DONT_GET_OPT_INFO(node
);
3259 expand_case_fold_string_alt(int item_num
, OnigCaseFoldCodeItem items
[],
3260 UChar
*p
, int slen
, UChar
*end
,
3261 regex_t
* reg
, Node
**rnode
)
3263 int r
, i
, j
, len
, varlen
;
3264 Node
*anode
, *var_anode
, *snode
, *xnode
, *an
;
3265 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
3267 *rnode
= var_anode
= NULL_NODE
;
3270 for (i
= 0; i
< item_num
; i
++) {
3271 if (items
[i
].byte_len
!= slen
) {
3278 *rnode
= var_anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3279 if (IS_NULL(var_anode
)) return ONIGERR_MEMORY
;
3281 xnode
= onig_node_new_list(NULL
, NULL
);
3282 if (IS_NULL(xnode
)) goto mem_err
;
3283 NCAR(var_anode
) = xnode
;
3285 anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3286 if (IS_NULL(anode
)) goto mem_err
;
3287 NCAR(xnode
) = anode
;
3290 *rnode
= anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3291 if (IS_NULL(anode
)) return ONIGERR_MEMORY
;
3294 snode
= onig_node_new_str(p
, p
+ slen
);
3295 if (IS_NULL(snode
)) goto mem_err
;
3297 NCAR(anode
) = snode
;
3299 for (i
= 0; i
< item_num
; i
++) {
3300 snode
= onig_node_new_str(NULL
, NULL
);
3301 if (IS_NULL(snode
)) goto mem_err
;
3303 for (j
= 0; j
< items
[i
].code_len
; j
++) {
3304 len
= ONIGENC_CODE_TO_MBC(reg
->enc
, items
[i
].code
[j
], buf
);
3310 r
= onig_node_str_cat(snode
, buf
, buf
+ len
);
3311 if (r
!= 0) goto mem_err2
;
3314 an
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3319 if (items
[i
].byte_len
!= slen
) {
3321 UChar
*q
= p
+ items
[i
].byte_len
;
3324 r
= expand_case_fold_make_rem_string(&rem
, q
, end
, reg
);
3330 xnode
= onig_node_list_add(NULL_NODE
, snode
);
3331 if (IS_NULL(xnode
)) {
3333 onig_node_free(rem
);
3336 if (IS_NULL(onig_node_list_add(xnode
, rem
))) {
3338 onig_node_free(xnode
);
3339 onig_node_free(rem
);
3349 NCDR(var_anode
) = an
;
3362 onig_node_free(snode
);
3365 onig_node_free(*rnode
);
3367 return ONIGERR_MEMORY
;
3371 expand_case_fold_string(Node
* node
, regex_t
* reg
)
3373 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3375 int r
, n
, len
, alt_num
;
3376 UChar
*start
, *end
, *p
;
3377 Node
*top_root
, *root
, *snode
, *prev_node
;
3378 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
3379 StrNode
* sn
= NSTR(node
);
3381 if (NSTRING_IS_AMBIG(node
)) return 0;
3385 if (start
>= end
) return 0;
3388 top_root
= root
= prev_node
= snode
= NULL_NODE
;
3392 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg
->enc
, reg
->case_fold_flag
,
3399 len
= enclen(reg
->enc
, p
);
3402 if (IS_NULL(snode
)) {
3403 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3404 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3405 if (IS_NULL(root
)) {
3406 onig_node_free(prev_node
);
3411 prev_node
= snode
= onig_node_new_str(NULL
, NULL
);
3412 if (IS_NULL(snode
)) goto mem_err
;
3413 if (IS_NOT_NULL(root
)) {
3414 if (IS_NULL(onig_node_list_add(root
, snode
))) {
3415 onig_node_free(snode
);
3421 r
= onig_node_str_cat(snode
, p
, p
+ len
);
3422 if (r
!= 0) goto err
;
3426 if (alt_num
> THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
) break;
3428 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3429 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3430 if (IS_NULL(root
)) {
3431 onig_node_free(prev_node
);
3436 r
= expand_case_fold_string_alt(n
, items
, p
, len
, end
, reg
, &prev_node
);
3437 if (r
< 0) goto mem_err
;
3439 if (IS_NULL(root
)) {
3440 top_root
= prev_node
;
3443 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3444 onig_node_free(prev_node
);
3449 root
= NCAR(prev_node
);
3452 if (IS_NOT_NULL(root
)) {
3453 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3454 onig_node_free(prev_node
);
3469 r
= expand_case_fold_make_rem_string(&srem
, p
, end
, reg
);
3470 if (r
!= 0) goto mem_err
;
3472 if (IS_NOT_NULL(prev_node
) && IS_NULL(root
)) {
3473 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3474 if (IS_NULL(root
)) {
3475 onig_node_free(srem
);
3476 onig_node_free(prev_node
);
3481 if (IS_NULL(root
)) {
3485 if (IS_NULL(onig_node_list_add(root
, srem
))) {
3486 onig_node_free(srem
);
3493 top_root
= (IS_NOT_NULL(top_root
) ? top_root
: prev_node
);
3494 swap_node(node
, top_root
);
3495 onig_node_free(top_root
);
3502 onig_node_free(top_root
);
3507 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3509 #define CEC_THRES_NUM_BIG_REPEAT 512
3510 #define CEC_INFINITE_NUM 0x7fffffff
3512 #define CEC_IN_INFINITE_REPEAT (1<<0)
3513 #define CEC_IN_FINITE_REPEAT (1<<1)
3514 #define CEC_CONT_BIG_REPEAT (1<<2)
3517 setup_comb_exp_check(Node
* node
, int state
, ScanEnv
* env
)
3526 Node
* prev
= NULL_NODE
;
3528 r
= setup_comb_exp_check(NCAR(node
), r
, env
);
3530 } while (r
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3538 ret
= setup_comb_exp_check(NCAR(node
), state
, env
);
3540 } while (ret
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3546 int child_state
= state
;
3548 QtfrNode
* qn
= NQTFR(node
);
3549 Node
* target
= qn
->target
;
3552 if (! IS_REPEAT_INFINITE(qn
->upper
)) {
3553 if (qn
->upper
> 1) {
3554 /* {0,1}, {1,1} are allowed */
3555 child_state
|= CEC_IN_FINITE_REPEAT
;
3557 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3558 if (env
->backrefed_mem
== 0) {
3559 if (NTYPE(qn
->target
) == NT_ENCLOSE
) {
3560 EncloseNode
* en
= NENCLOSE(qn
->target
);
3561 if (en
->type
== ENCLOSE_MEMORY
) {
3562 if (NTYPE(en
->target
) == NT_QTFR
) {
3563 QtfrNode
* q
= NQTFR(en
->target
);
3564 if (IS_REPEAT_INFINITE(q
->upper
)
3565 && q
->greedy
== qn
->greedy
) {
3566 qn
->upper
= (qn
->lower
== 0 ? 1 : qn
->lower
);
3568 child_state
= state
;
3577 if (state
& CEC_IN_FINITE_REPEAT
) {
3578 qn
->comb_exp_check_num
= -1;
3581 if (IS_REPEAT_INFINITE(qn
->upper
)) {
3582 var_num
= CEC_INFINITE_NUM
;
3583 child_state
|= CEC_IN_INFINITE_REPEAT
;
3586 var_num
= qn
->upper
- qn
->lower
;
3589 if (var_num
>= CEC_THRES_NUM_BIG_REPEAT
)
3590 add_state
|= CEC_CONT_BIG_REPEAT
;
3592 if (((state
& CEC_IN_INFINITE_REPEAT
) != 0 && var_num
!= 0) ||
3593 ((state
& CEC_CONT_BIG_REPEAT
) != 0 &&
3594 var_num
>= CEC_THRES_NUM_BIG_REPEAT
)) {
3595 if (qn
->comb_exp_check_num
== 0) {
3596 env
->num_comb_exp_check
++;
3597 qn
->comb_exp_check_num
= env
->num_comb_exp_check
;
3598 if (env
->curr_max_regnum
> env
->comb_exp_max_regnum
)
3599 env
->comb_exp_max_regnum
= env
->curr_max_regnum
;
3604 r
= setup_comb_exp_check(target
, child_state
, env
);
3611 EncloseNode
* en
= NENCLOSE(node
);
3614 case ENCLOSE_MEMORY
:
3616 if (env
->curr_max_regnum
< en
->regnum
)
3617 env
->curr_max_regnum
= en
->regnum
;
3619 r
= setup_comb_exp_check(en
->target
, state
, env
);
3624 r
= setup_comb_exp_check(en
->target
, state
, env
);
3630 #ifdef USE_SUBEXP_CALL
3632 if (IS_CALL_RECURSION(NCALL(node
)))
3633 env
->has_recursion
= 1;
3635 r
= setup_comb_exp_check(NCALL(node
)->target
, state
, env
);
3647 #define IN_ALT (1<<0)
3648 #define IN_NOT (1<<1)
3649 #define IN_REPEAT (1<<2)
3650 #define IN_VAR_REPEAT (1<<3)
3652 /* setup_tree does the following work.
3653 1. check empty loop. (set qn->target_empty_info)
3654 2. expand ignore-case in char class.
3655 3. set memory status bit flags. (reg->mem_stats)
3656 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3657 5. find invalid patterns in look-behind.
3658 6. expand repeated string.
3661 setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
3670 Node
* prev
= NULL_NODE
;
3672 r
= setup_tree(NCAR(node
), reg
, state
, env
);
3673 if (IS_NOT_NULL(prev
) && r
== 0) {
3674 r
= next_setup(prev
, NCAR(node
), reg
);
3677 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3683 r
= setup_tree(NCAR(node
), reg
, (state
| IN_ALT
), env
);
3684 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3691 if (IS_IGNORECASE(reg
->options
) && !NSTRING_IS_RAW(node
)) {
3692 r
= expand_case_fold_string(node
, reg
);
3700 #ifdef USE_SUBEXP_CALL
3709 Node
** nodes
= SCANENV_MEM_NODES(env
);
3710 BRefNode
* br
= NBREF(node
);
3712 for (i
= 0; i
< br
->back_num
; i
++) {
3713 if (p
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
3714 BIT_STATUS_ON_AT(env
->backrefed_mem
, p
[i
]);
3715 BIT_STATUS_ON_AT(env
->bt_mem_start
, p
[i
]);
3716 #ifdef USE_BACKREF_WITH_LEVEL
3717 if (IS_BACKREF_NEST_LEVEL(br
)) {
3718 BIT_STATUS_ON_AT(env
->bt_mem_end
, p
[i
]);
3721 SET_ENCLOSE_STATUS(nodes
[p
[i
]], NST_MEM_BACKREFED
);
3729 QtfrNode
* qn
= NQTFR(node
);
3730 Node
* target
= qn
->target
;
3732 if ((state
& IN_REPEAT
) != 0) {
3733 qn
->state
|= NST_IN_REPEAT
;
3736 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 1) {
3737 r
= get_min_match_length(target
, &d
, env
);
3740 qn
->target_empty_info
= NQ_TARGET_IS_EMPTY
;
3741 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3742 r
= quantifiers_memory_node_info(target
);
3745 qn
->target_empty_info
= r
;
3749 r
= get_max_match_length(target
, &d
, env
);
3750 if (r
== 0 && d
== 0) {
3751 /* ()* ==> ()?, ()+ ==> () */
3753 if (qn
->lower
> 1) qn
->lower
= 1;
3754 if (NTYPE(target
) == NT_STR
) {
3755 qn
->upper
= qn
->lower
= 0; /* /(?:)+/ ==> // */
3763 if (qn
->lower
!= qn
->upper
)
3764 state
|= IN_VAR_REPEAT
;
3765 r
= setup_tree(target
, reg
, state
, env
);
3769 #define EXPAND_STRING_MAX_LENGTH 100
3770 if (NTYPE(target
) == NT_STR
) {
3771 if (!IS_REPEAT_INFINITE(qn
->lower
) && qn
->lower
== qn
->upper
&&
3772 qn
->lower
> 1 && qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3773 int len
= NSTRING_LEN(target
);
3774 StrNode
* sn
= NSTR(target
);
3776 if (len
* qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3777 int i
, n
= qn
->lower
;
3778 onig_node_conv_to_str_node(node
, NSTR(target
)->flag
);
3779 for (i
= 0; i
< n
; i
++) {
3780 r
= onig_node_str_cat(node
, sn
->s
, sn
->end
);
3783 onig_node_free(target
);
3784 break; /* break case NT_QTFR: */
3789 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3790 if (qn
->greedy
&& (qn
->target_empty_info
!= 0)) {
3791 if (NTYPE(target
) == NT_QTFR
) {
3792 QtfrNode
* tqn
= NQTFR(target
);
3793 if (IS_NOT_NULL(tqn
->head_exact
)) {
3794 qn
->head_exact
= tqn
->head_exact
;
3795 tqn
->head_exact
= NULL
;
3799 qn
->head_exact
= get_head_value_node(qn
->target
, 1, reg
);
3808 EncloseNode
* en
= NENCLOSE(node
);
3811 case ENCLOSE_OPTION
:
3813 OnigOptionType options
= reg
->options
;
3814 reg
->options
= NENCLOSE(node
)->option
;
3815 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
3816 reg
->options
= options
;
3820 case ENCLOSE_MEMORY
:
3821 if ((state
& (IN_ALT
| IN_NOT
| IN_VAR_REPEAT
)) != 0) {
3822 BIT_STATUS_ON_AT(env
->bt_mem_start
, en
->regnum
);
3823 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3825 r
= setup_tree(en
->target
, reg
, state
, env
);
3828 case ENCLOSE_STOP_BACKTRACK
:
3830 Node
* target
= en
->target
;
3831 r
= setup_tree(target
, reg
, state
, env
);
3832 if (NTYPE(target
) == NT_QTFR
) {
3833 QtfrNode
* tqn
= NQTFR(target
);
3834 if (IS_REPEAT_INFINITE(tqn
->upper
) && tqn
->lower
<= 1 &&
3835 tqn
->greedy
!= 0) { /* (?>a*), a*+ etc... */
3836 int qtype
= NTYPE(tqn
->target
);
3837 if (IS_NODE_TYPE_SIMPLE(qtype
))
3838 SET_ENCLOSE_STATUS(node
, NST_STOP_BT_SIMPLE_REPEAT
);
3849 AnchorNode
* an
= NANCHOR(node
);
3852 case ANCHOR_PREC_READ
:
3853 r
= setup_tree(an
->target
, reg
, state
, env
);
3855 case ANCHOR_PREC_READ_NOT
:
3856 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3859 /* allowed node types in look-behind */
3860 #define ALLOWED_TYPE_IN_LB \
3861 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3862 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3864 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3865 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3867 #define ALLOWED_ANCHOR_IN_LB \
3868 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3869 #define ALLOWED_ANCHOR_IN_LB_NOT \
3870 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3872 case ANCHOR_LOOK_BEHIND
:
3874 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3875 ALLOWED_ENCLOSE_IN_LB
, ALLOWED_ANCHOR_IN_LB
);
3876 if (r
< 0) return r
;
3877 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3878 r
= setup_look_behind(node
, reg
, env
);
3879 if (r
!= 0) return r
;
3880 r
= setup_tree(an
->target
, reg
, state
, env
);
3884 case ANCHOR_LOOK_BEHIND_NOT
:
3886 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3887 ALLOWED_ENCLOSE_IN_LB_NOT
, ALLOWED_ANCHOR_IN_LB_NOT
);
3888 if (r
< 0) return r
;
3889 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3890 r
= setup_look_behind(node
, reg
, env
);
3891 if (r
!= 0) return r
;
3892 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3906 /* set skip map for Boyer-Moor search */
3908 set_bm_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
3909 UChar skip
[], int** int_skip
)
3913 len
= (int)(end
- s
);
3914 if (len
< ONIG_CHAR_TABLE_SIZE
) {
3915 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = (UChar
)len
;
3917 for (i
= 0; i
< len
- 1; i
++)
3918 skip
[s
[i
]] = (UChar
)(len
- 1 - i
);
3921 if (IS_NULL(*int_skip
)) {
3922 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
3923 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
3925 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = len
;
3927 for (i
= 0; i
< len
- 1; i
++)
3928 (*int_skip
)[s
[i
]] = len
- 1 - i
;
3933 #define OPT_EXACT_MAXLEN 24
3936 OnigDistance min
; /* min byte length */
3937 OnigDistance max
; /* max byte length */
3943 OnigOptionType options
;
3944 OnigCaseFoldType case_fold_flag
;
3954 MinMaxLen mmd
; /* info position */
3960 UChar s
[OPT_EXACT_MAXLEN
];
3964 MinMaxLen mmd
; /* info position */
3967 int value
; /* weighted value */
3968 UChar map
[ONIG_CHAR_TABLE_SIZE
];
3975 OptExactInfo exb
; /* boundary */
3976 OptExactInfo exm
; /* middle */
3977 OptExactInfo expr
; /* prec read (?=...) */
3979 OptMapInfo map
; /* boundary */
3984 map_position_value(OnigEncoding enc
, int i
)
3986 static const short int ByteValTable
[] = {
3987 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
3988 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3989 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
3990 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
3991 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3992 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
3993 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
3994 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
3997 if (i
< (int )(sizeof(ByteValTable
)/sizeof(ByteValTable
[0]))) {
3998 if (i
== 0 && ONIGENC_MBC_MINLEN(enc
) > 1)
4001 return (int )ByteValTable
[i
];
4004 return 4; /* Take it easy. */
4008 distance_value(MinMaxLen
* mm
)
4010 /* 1000 / (min-max-dist + 1) */
4011 static const short int dist_vals
[] = {
4012 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4013 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4014 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4015 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4016 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4017 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4018 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4019 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4020 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4021 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4026 if (mm
->max
== ONIG_INFINITE_DISTANCE
) return 0;
4028 d
= mm
->max
- mm
->min
;
4029 if (d
< (int )(sizeof(dist_vals
)/sizeof(dist_vals
[0])))
4030 /* return dist_vals[d] * 16 / (mm->min + 12); */
4031 return (int )dist_vals
[d
];
4037 comp_distance_value(MinMaxLen
* d1
, MinMaxLen
* d2
, int v1
, int v2
)
4039 if (v2
<= 0) return -1;
4040 if (v1
<= 0) return 1;
4042 v1
*= distance_value(d1
);
4043 v2
*= distance_value(d2
);
4045 if (v2
> v1
) return 1;
4046 if (v2
< v1
) return -1;
4048 if (d2
->min
< d1
->min
) return 1;
4049 if (d2
->min
> d1
->min
) return -1;
4054 is_equal_mml(MinMaxLen
* a
, MinMaxLen
* b
)
4056 return (a
->min
== b
->min
&& a
->max
== b
->max
) ? 1 : 0;
4061 set_mml(MinMaxLen
* mml
, OnigDistance min
, OnigDistance max
)
4068 clear_mml(MinMaxLen
* mml
)
4070 mml
->min
= mml
->max
= 0;
4074 copy_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4076 to
->min
= from
->min
;
4077 to
->max
= from
->max
;
4081 add_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4083 to
->min
= distance_add(to
->min
, from
->min
);
4084 to
->max
= distance_add(to
->max
, from
->max
);
4089 add_len_mml(MinMaxLen
* to
, OnigDistance len
)
4091 to
->min
= distance_add(to
->min
, len
);
4092 to
->max
= distance_add(to
->max
, len
);
4097 alt_merge_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4099 if (to
->min
> from
->min
) to
->min
= from
->min
;
4100 if (to
->max
< from
->max
) to
->max
= from
->max
;
4104 copy_opt_env(OptEnv
* to
, OptEnv
* from
)
4110 clear_opt_anc_info(OptAncInfo
* anc
)
4112 anc
->left_anchor
= 0;
4113 anc
->right_anchor
= 0;
4117 copy_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* from
)
4123 concat_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* left
, OptAncInfo
* right
,
4124 OnigDistance left_len
, OnigDistance right_len
)
4126 clear_opt_anc_info(to
);
4128 to
->left_anchor
= left
->left_anchor
;
4129 if (left_len
== 0) {
4130 to
->left_anchor
|= right
->left_anchor
;
4133 to
->right_anchor
= right
->right_anchor
;
4134 if (right_len
== 0) {
4135 to
->right_anchor
|= left
->right_anchor
;
4140 is_left_anchor(int anc
)
4142 if (anc
== ANCHOR_END_BUF
|| anc
== ANCHOR_SEMI_END_BUF
||
4143 anc
== ANCHOR_END_LINE
|| anc
== ANCHOR_PREC_READ
||
4144 anc
== ANCHOR_PREC_READ_NOT
)
4151 is_set_opt_anc_info(OptAncInfo
* to
, int anc
)
4153 if ((to
->left_anchor
& anc
) != 0) return 1;
4155 return ((to
->right_anchor
& anc
) != 0 ? 1 : 0);
4159 add_opt_anc_info(OptAncInfo
* to
, int anc
)
4161 if (is_left_anchor(anc
))
4162 to
->left_anchor
|= anc
;
4164 to
->right_anchor
|= anc
;
4168 remove_opt_anc_info(OptAncInfo
* to
, int anc
)
4170 if (is_left_anchor(anc
))
4171 to
->left_anchor
&= ~anc
;
4173 to
->right_anchor
&= ~anc
;
4177 alt_merge_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* add
)
4179 to
->left_anchor
&= add
->left_anchor
;
4180 to
->right_anchor
&= add
->right_anchor
;
4184 is_full_opt_exact_info(OptExactInfo
* ex
)
4186 return (ex
->len
>= OPT_EXACT_MAXLEN
? 1 : 0);
4190 clear_opt_exact_info(OptExactInfo
* ex
)
4192 clear_mml(&ex
->mmd
);
4193 clear_opt_anc_info(&ex
->anc
);
4195 ex
->ignore_case
= 0;
4201 copy_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* from
)
4207 concat_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OnigEncoding enc
)
4213 if (! to
->ignore_case
&& add
->ignore_case
) {
4214 if (to
->len
>= add
->len
) return ; /* avoid */
4216 to
->ignore_case
= 1;
4221 for (i
= to
->len
; p
< end
; ) {
4222 len
= enclen(enc
, p
);
4223 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4224 for (j
= 0; j
< len
&& p
< end
; j
++)
4229 to
->reach_end
= (p
== end
? add
->reach_end
: 0);
4231 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, 1, 1);
4232 if (! to
->reach_end
) tanc
.right_anchor
= 0;
4233 copy_opt_anc_info(&to
->anc
, &tanc
);
4237 concat_opt_exact_info_str(OptExactInfo
* to
, UChar
* s
, UChar
* end
,
4238 int raw ARG_UNUSED
, OnigEncoding enc
)
4243 for (i
= to
->len
, p
= s
; p
< end
&& i
< OPT_EXACT_MAXLEN
; ) {
4244 len
= enclen(enc
, p
);
4245 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4246 for (j
= 0; j
< len
&& p
< end
; j
++)
4254 alt_merge_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OptEnv
* env
)
4258 if (add
->len
== 0 || to
->len
== 0) {
4259 clear_opt_exact_info(to
);
4263 if (! is_equal_mml(&to
->mmd
, &add
->mmd
)) {
4264 clear_opt_exact_info(to
);
4268 for (i
= 0; i
< to
->len
&& i
< add
->len
; ) {
4269 if (to
->s
[i
] != add
->s
[i
]) break;
4270 len
= enclen(env
->enc
, to
->s
+ i
);
4272 for (j
= 1; j
< len
; j
++) {
4273 if (to
->s
[i
+j
] != add
->s
[i
+j
]) break;
4279 if (! add
->reach_end
|| i
< add
->len
|| i
< to
->len
) {
4283 to
->ignore_case
|= add
->ignore_case
;
4285 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4286 if (! to
->reach_end
) to
->anc
.right_anchor
= 0;
4290 select_opt_exact_info(OnigEncoding enc
, OptExactInfo
* now
, OptExactInfo
* alt
)
4301 copy_opt_exact_info(now
, alt
);
4304 else if (v1
<= 2 && v2
<= 2) {
4305 /* ByteValTable[x] is big value --> low price */
4306 v2
= map_position_value(enc
, now
->s
[0]);
4307 v1
= map_position_value(enc
, alt
->s
[0]);
4309 if (now
->len
> 1) v1
+= 5;
4310 if (alt
->len
> 1) v2
+= 5;
4313 if (now
->ignore_case
== 0) v1
*= 2;
4314 if (alt
->ignore_case
== 0) v2
*= 2;
4316 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4317 copy_opt_exact_info(now
, alt
);
4321 clear_opt_map_info(OptMapInfo
* map
)
4323 static const OptMapInfo clean_info
= {
4326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4345 xmemcpy(map
, &clean_info
, sizeof(OptMapInfo
));
4349 copy_opt_map_info(OptMapInfo
* to
, OptMapInfo
* from
)
4355 add_char_opt_map_info(OptMapInfo
* map
, UChar c
, OnigEncoding enc
)
4357 if (map
->map
[c
] == 0) {
4359 map
->value
+= map_position_value(enc
, c
);
4364 add_char_amb_opt_map_info(OptMapInfo
* map
, UChar
* p
, UChar
* end
,
4365 OnigEncoding enc
, OnigCaseFoldType case_fold_flag
)
4367 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4368 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
4371 add_char_opt_map_info(map
, p
[0], enc
);
4373 case_fold_flag
= DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag
);
4374 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, case_fold_flag
, p
, end
, items
);
4375 if (n
< 0) return n
;
4377 for (i
= 0; i
< n
; i
++) {
4378 ONIGENC_CODE_TO_MBC(enc
, items
[i
].code
[0], buf
);
4379 add_char_opt_map_info(map
, buf
[0], enc
);
4386 select_opt_map_info(OptMapInfo
* now
, OptMapInfo
* alt
)
4388 static int z
= 1<<15; /* 32768: something big value */
4392 if (alt
->value
== 0) return ;
4393 if (now
->value
== 0) {
4394 copy_opt_map_info(now
, alt
);
4398 v1
= z
/ now
->value
;
4399 v2
= z
/ alt
->value
;
4400 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4401 copy_opt_map_info(now
, alt
);
4405 comp_opt_exact_or_map_info(OptExactInfo
* e
, OptMapInfo
* m
)
4407 #define COMP_EM_BASE 20
4410 if (m
->value
<= 0) return -1;
4412 ve
= COMP_EM_BASE
* e
->len
* (e
->ignore_case
? 1 : 2);
4413 vm
= COMP_EM_BASE
* 5 * 2 / m
->value
;
4414 return comp_distance_value(&e
->mmd
, &m
->mmd
, ve
, vm
);
4418 alt_merge_opt_map_info(OnigEncoding enc
, OptMapInfo
* to
, OptMapInfo
* add
)
4422 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4423 if (to
->value
== 0) return ;
4424 if (add
->value
== 0 || to
->mmd
.max
< add
->mmd
.min
) {
4425 clear_opt_map_info(to
);
4429 alt_merge_mml(&to
->mmd
, &add
->mmd
);
4432 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
4437 val
+= map_position_value(enc
, i
);
4441 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4445 set_bound_node_opt_info(NodeOptInfo
* opt
, MinMaxLen
* mmd
)
4447 copy_mml(&(opt
->exb
.mmd
), mmd
);
4448 copy_mml(&(opt
->expr
.mmd
), mmd
);
4449 copy_mml(&(opt
->map
.mmd
), mmd
);
4453 clear_node_opt_info(NodeOptInfo
* opt
)
4455 clear_mml(&opt
->len
);
4456 clear_opt_anc_info(&opt
->anc
);
4457 clear_opt_exact_info(&opt
->exb
);
4458 clear_opt_exact_info(&opt
->exm
);
4459 clear_opt_exact_info(&opt
->expr
);
4460 clear_opt_map_info(&opt
->map
);
4464 copy_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* from
)
4470 concat_left_node_opt_info(OnigEncoding enc
, NodeOptInfo
* to
, NodeOptInfo
* add
)
4472 int exb_reach
, exm_reach
;
4475 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, to
->len
.max
, add
->len
.max
);
4476 copy_opt_anc_info(&to
->anc
, &tanc
);
4478 if (add
->exb
.len
> 0 && to
->len
.max
== 0) {
4479 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->exb
.anc
,
4480 to
->len
.max
, add
->len
.max
);
4481 copy_opt_anc_info(&add
->exb
.anc
, &tanc
);
4484 if (add
->map
.value
> 0 && to
->len
.max
== 0) {
4485 if (add
->map
.mmd
.max
== 0)
4486 add
->map
.anc
.left_anchor
|= to
->anc
.left_anchor
;
4489 exb_reach
= to
->exb
.reach_end
;
4490 exm_reach
= to
->exm
.reach_end
;
4492 if (add
->len
.max
!= 0)
4493 to
->exb
.reach_end
= to
->exm
.reach_end
= 0;
4495 if (add
->exb
.len
> 0) {
4497 concat_opt_exact_info(&to
->exb
, &add
->exb
, enc
);
4498 clear_opt_exact_info(&add
->exb
);
4500 else if (exm_reach
) {
4501 concat_opt_exact_info(&to
->exm
, &add
->exb
, enc
);
4502 clear_opt_exact_info(&add
->exb
);
4505 select_opt_exact_info(enc
, &to
->exm
, &add
->exb
);
4506 select_opt_exact_info(enc
, &to
->exm
, &add
->exm
);
4508 if (to
->expr
.len
> 0) {
4509 if (add
->len
.max
> 0) {
4510 if (to
->expr
.len
> (int )add
->len
.max
)
4511 to
->expr
.len
= add
->len
.max
;
4513 if (to
->expr
.mmd
.max
== 0)
4514 select_opt_exact_info(enc
, &to
->exb
, &to
->expr
);
4516 select_opt_exact_info(enc
, &to
->exm
, &to
->expr
);
4519 else if (add
->expr
.len
> 0) {
4520 copy_opt_exact_info(&to
->expr
, &add
->expr
);
4523 select_opt_map_info(&to
->map
, &add
->map
);
4525 add_mml(&to
->len
, &add
->len
);
4529 alt_merge_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* add
, OptEnv
* env
)
4531 alt_merge_opt_anc_info (&to
->anc
, &add
->anc
);
4532 alt_merge_opt_exact_info(&to
->exb
, &add
->exb
, env
);
4533 alt_merge_opt_exact_info(&to
->exm
, &add
->exm
, env
);
4534 alt_merge_opt_exact_info(&to
->expr
, &add
->expr
, env
);
4535 alt_merge_opt_map_info(env
->enc
, &to
->map
, &add
->map
);
4537 alt_merge_mml(&to
->len
, &add
->len
);
4541 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4544 optimize_node_left(Node
* node
, NodeOptInfo
* opt
, OptEnv
* env
)
4549 clear_node_opt_info(opt
);
4550 set_bound_node_opt_info(opt
, &env
->mmd
);
4560 copy_opt_env(&nenv
, env
);
4562 r
= optimize_node_left(NCAR(nd
), &nopt
, &nenv
);
4564 add_mml(&nenv
.mmd
, &nopt
.len
);
4565 concat_left_node_opt_info(env
->enc
, opt
, &nopt
);
4567 } while (r
== 0 && IS_NOT_NULL(nd
= NCDR(nd
)));
4577 r
= optimize_node_left(NCAR(nd
), &nopt
, env
);
4579 if (nd
== node
) copy_node_opt_info(opt
, &nopt
);
4580 else alt_merge_node_opt_info(opt
, &nopt
, env
);
4582 } while ((r
== 0) && IS_NOT_NULL(nd
= NCDR(nd
)));
4588 StrNode
* sn
= NSTR(node
);
4589 int slen
= (int)(sn
->end
- sn
->s
);
4590 int is_raw
= NSTRING_IS_RAW(node
);
4592 if (! NSTRING_IS_AMBIG(node
)) {
4593 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4594 NSTRING_IS_RAW(node
), env
->enc
);
4596 add_char_opt_map_info(&opt
->map
, *(sn
->s
), env
->enc
);
4598 set_mml(&opt
->len
, slen
, slen
);
4603 if (NSTRING_IS_DONT_GET_OPT_INFO(node
)) {
4604 int n
= onigenc_strlen(env
->enc
, sn
->s
, sn
->end
);
4605 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
) * n
;
4608 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4610 opt
->exb
.ignore_case
= 1;
4613 r
= add_char_amb_opt_map_info(&opt
->map
, sn
->s
, sn
->end
,
4614 env
->enc
, env
->case_fold_flag
);
4621 set_mml(&opt
->len
, slen
, max
);
4624 if (opt
->exb
.len
== slen
)
4625 opt
->exb
.reach_end
= 1;
4632 CClassNode
* cc
= NCCLASS(node
);
4634 /* no need to check ignore case. (setted in setup_tree()) */
4636 if (IS_NOT_NULL(cc
->mbuf
) || IS_NCCLASS_NOT(cc
)) {
4637 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4638 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4640 set_mml(&opt
->len
, min
, max
);
4643 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4644 z
= BITSET_AT(cc
->bs
, i
);
4645 if ((z
&& !IS_NCCLASS_NOT(cc
)) || (!z
&& IS_NCCLASS_NOT(cc
))) {
4646 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4649 set_mml(&opt
->len
, 1, 1);
4658 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4663 switch (NCTYPE(node
)->ctype
) {
4664 case ONIGENC_CTYPE_WORD
:
4665 if (NCTYPE(node
)->not != 0) {
4666 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4667 if (! ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4668 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4673 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4674 if (ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4675 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4683 min
= ONIGENC_MBC_MINLEN(env
->enc
);
4685 set_mml(&opt
->len
, min
, max
);
4691 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4692 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4693 set_mml(&opt
->len
, min
, max
);
4698 switch (NANCHOR(node
)->type
) {
4699 case ANCHOR_BEGIN_BUF
:
4700 case ANCHOR_BEGIN_POSITION
:
4701 case ANCHOR_BEGIN_LINE
:
4702 case ANCHOR_END_BUF
:
4703 case ANCHOR_SEMI_END_BUF
:
4704 case ANCHOR_END_LINE
:
4705 add_opt_anc_info(&opt
->anc
, NANCHOR(node
)->type
);
4708 case ANCHOR_PREC_READ
:
4712 r
= optimize_node_left(NANCHOR(node
)->target
, &nopt
, env
);
4714 if (nopt
.exb
.len
> 0)
4715 copy_opt_exact_info(&opt
->expr
, &nopt
.exb
);
4716 else if (nopt
.exm
.len
> 0)
4717 copy_opt_exact_info(&opt
->expr
, &nopt
.exm
);
4719 opt
->expr
.reach_end
= 0;
4721 if (nopt
.map
.value
> 0)
4722 copy_opt_map_info(&opt
->map
, &nopt
.map
);
4727 case ANCHOR_PREC_READ_NOT
:
4728 case ANCHOR_LOOK_BEHIND
: /* Sorry, I can't make use of it. */
4729 case ANCHOR_LOOK_BEHIND_NOT
:
4738 OnigDistance min
, max
, tmin
, tmax
;
4739 Node
** nodes
= SCANENV_MEM_NODES(env
->scan_env
);
4740 BRefNode
* br
= NBREF(node
);
4742 if (br
->state
& NST_RECURSION
) {
4743 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4746 backs
= BACKREFS_P(br
);
4747 r
= get_min_match_length(nodes
[backs
[0]], &min
, env
->scan_env
);
4749 r
= get_max_match_length(nodes
[backs
[0]], &max
, env
->scan_env
);
4751 for (i
= 1; i
< br
->back_num
; i
++) {
4752 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
->scan_env
);
4754 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
->scan_env
);
4756 if (min
> tmin
) min
= tmin
;
4757 if (max
< tmax
) max
= tmax
;
4759 if (r
== 0) set_mml(&opt
->len
, min
, max
);
4763 #ifdef USE_SUBEXP_CALL
4765 if (IS_CALL_RECURSION(NCALL(node
)))
4766 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4768 OnigOptionType save
= env
->options
;
4769 env
->options
= NENCLOSE(NCALL(node
)->target
)->option
;
4770 r
= optimize_node_left(NCALL(node
)->target
, opt
, env
);
4771 env
->options
= save
;
4779 OnigDistance min
, max
;
4781 QtfrNode
* qn
= NQTFR(node
);
4783 r
= optimize_node_left(qn
->target
, &nopt
, env
);
4786 if (qn
->lower
== 0 && IS_REPEAT_INFINITE(qn
->upper
)) {
4787 if (env
->mmd
.max
== 0 &&
4788 NTYPE(qn
->target
) == NT_CANY
&& qn
->greedy
) {
4789 if (IS_MULTILINE(env
->options
))
4790 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_ML
);
4792 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR
);
4796 if (qn
->lower
> 0) {
4797 copy_node_opt_info(opt
, &nopt
);
4798 if (nopt
.exb
.len
> 0) {
4799 if (nopt
.exb
.reach_end
) {
4800 for (i
= 2; i
<= qn
->lower
&&
4801 ! is_full_opt_exact_info(&opt
->exb
); i
++) {
4802 concat_opt_exact_info(&opt
->exb
, &nopt
.exb
, env
->enc
);
4804 if (i
< qn
->lower
) {
4805 opt
->exb
.reach_end
= 0;
4810 if (qn
->lower
!= qn
->upper
) {
4811 opt
->exb
.reach_end
= 0;
4812 opt
->exm
.reach_end
= 0;
4815 opt
->exm
.reach_end
= 0;
4819 min
= distance_multiply(nopt
.len
.min
, qn
->lower
);
4820 if (IS_REPEAT_INFINITE(qn
->upper
))
4821 max
= (nopt
.len
.max
> 0 ? ONIG_INFINITE_DISTANCE
: 0);
4823 max
= distance_multiply(nopt
.len
.max
, qn
->upper
);
4825 set_mml(&opt
->len
, min
, max
);
4831 EncloseNode
* en
= NENCLOSE(node
);
4834 case ENCLOSE_OPTION
:
4836 OnigOptionType save
= env
->options
;
4838 env
->options
= en
->option
;
4839 r
= optimize_node_left(en
->target
, opt
, env
);
4840 env
->options
= save
;
4844 case ENCLOSE_MEMORY
:
4845 #ifdef USE_SUBEXP_CALL
4847 if (en
->opt_count
> MAX_NODE_OPT_INFO_REF_COUNT
) {
4848 OnigDistance min
, max
;
4851 max
= ONIG_INFINITE_DISTANCE
;
4852 if (IS_ENCLOSE_MIN_FIXED(en
)) min
= en
->min_len
;
4853 if (IS_ENCLOSE_MAX_FIXED(en
)) max
= en
->max_len
;
4854 set_mml(&opt
->len
, min
, max
);
4859 r
= optimize_node_left(en
->target
, opt
, env
);
4861 if (is_set_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
)) {
4862 if (BIT_STATUS_AT(env
->scan_env
->backrefed_mem
, en
->regnum
))
4863 remove_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
);
4868 case ENCLOSE_STOP_BACKTRACK
:
4869 r
= optimize_node_left(en
->target
, opt
, env
);
4877 fprintf(stderr
, "optimize_node_left: undefined node type %d\n",
4880 r
= ONIGERR_TYPE_BUG
;
4888 set_optimize_exact_info(regex_t
* reg
, OptExactInfo
* e
)
4892 if (e
->len
== 0) return 0;
4894 if (e
->ignore_case
) {
4895 reg
->exact
= (UChar
* )xmalloc(e
->len
);
4896 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4897 xmemcpy(reg
->exact
, e
->s
, e
->len
);
4898 reg
->exact_end
= reg
->exact
+ e
->len
;
4899 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
4904 reg
->exact
= str_dup(e
->s
, e
->s
+ e
->len
);
4905 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4906 reg
->exact_end
= reg
->exact
+ e
->len
;
4909 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg
->enc
, reg
->exact
, reg
->exact_end
);
4911 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
4912 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
4913 reg
->map
, &(reg
->int_map
));
4916 reg
->optimize
= (allow_reverse
!= 0
4917 ? ONIG_OPTIMIZE_EXACT_BM
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV
);
4920 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
4924 reg
->dmin
= e
->mmd
.min
;
4925 reg
->dmax
= e
->mmd
.max
;
4927 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4928 reg
->threshold_len
= reg
->dmin
+ (int)(reg
->exact_end
- reg
->exact
);
4935 set_optimize_map_info(regex_t
* reg
, OptMapInfo
* m
)
4939 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
4940 reg
->map
[i
] = m
->map
[i
];
4942 reg
->optimize
= ONIG_OPTIMIZE_MAP
;
4943 reg
->dmin
= m
->mmd
.min
;
4944 reg
->dmax
= m
->mmd
.max
;
4946 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4947 reg
->threshold_len
= reg
->dmin
+ 1;
4952 set_sub_anchor(regex_t
* reg
, OptAncInfo
* anc
)
4954 reg
->sub_anchor
|= anc
->left_anchor
& ANCHOR_BEGIN_LINE
;
4955 reg
->sub_anchor
|= anc
->right_anchor
& ANCHOR_END_LINE
;
4959 static void print_optimize_info(FILE* f
, regex_t
* reg
);
4963 set_optimize_info_from_tree(Node
* node
, regex_t
* reg
, ScanEnv
* scan_env
)
4971 env
.options
= reg
->options
;
4972 env
.case_fold_flag
= reg
->case_fold_flag
;
4973 env
.scan_env
= scan_env
;
4974 clear_mml(&env
.mmd
);
4976 r
= optimize_node_left(node
, &opt
, &env
);
4979 reg
->anchor
= opt
.anc
.left_anchor
& (ANCHOR_BEGIN_BUF
|
4980 ANCHOR_BEGIN_POSITION
| ANCHOR_ANYCHAR_STAR
| ANCHOR_ANYCHAR_STAR_ML
);
4982 reg
->anchor
|= opt
.anc
.right_anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
);
4984 if (reg
->anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
)) {
4985 reg
->anchor_dmin
= opt
.len
.min
;
4986 reg
->anchor_dmax
= opt
.len
.max
;
4989 if (opt
.exb
.len
> 0 || opt
.exm
.len
> 0) {
4990 select_opt_exact_info(reg
->enc
, &opt
.exb
, &opt
.exm
);
4991 if (opt
.map
.value
> 0 &&
4992 comp_opt_exact_or_map_info(&opt
.exb
, &opt
.map
) > 0) {
4996 r
= set_optimize_exact_info(reg
, &opt
.exb
);
4997 set_sub_anchor(reg
, &opt
.exb
.anc
);
5000 else if (opt
.map
.value
> 0) {
5002 set_optimize_map_info(reg
, &opt
.map
);
5003 set_sub_anchor(reg
, &opt
.map
.anc
);
5006 reg
->sub_anchor
|= opt
.anc
.left_anchor
& ANCHOR_BEGIN_LINE
;
5007 if (opt
.len
.max
== 0)
5008 reg
->sub_anchor
|= opt
.anc
.right_anchor
& ANCHOR_END_LINE
;
5011 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5012 print_optimize_info(stderr
, reg
);
5018 clear_optimize_info(regex_t
* reg
)
5020 reg
->optimize
= ONIG_OPTIMIZE_NONE
;
5022 reg
->anchor_dmin
= 0;
5023 reg
->anchor_dmax
= 0;
5024 reg
->sub_anchor
= 0;
5025 reg
->exact_end
= (UChar
* )NULL
;
5026 reg
->threshold_len
= 0;
5027 if (IS_NOT_NULL(reg
->exact
)) {
5029 reg
->exact
= (UChar
* )NULL
;
5035 static void print_enc_string(FILE* fp
, OnigEncoding enc
,
5036 const UChar
*s
, const UChar
*end
)
5038 fprintf(fp
, "\nPATTERN: /");
5040 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5046 code
= ONIGENC_MBC_TO_CODE(enc
, p
, end
);
5048 fprintf(fp
, " 0x%04x ", (int )code
);
5051 fputc((int )code
, fp
);
5054 p
+= enclen(enc
, p
);
5059 fputc((int )*s
, fp
);
5068 print_distance_range(FILE* f
, OnigDistance a
, OnigDistance b
)
5070 if (a
== ONIG_INFINITE_DISTANCE
)
5073 fprintf(f
, "(%u)", a
);
5077 if (b
== ONIG_INFINITE_DISTANCE
)
5080 fprintf(f
, "(%u)", b
);
5084 print_anchor(FILE* f
, int anchor
)
5090 if (anchor
& ANCHOR_BEGIN_BUF
) {
5091 fprintf(f
, "begin-buf");
5094 if (anchor
& ANCHOR_BEGIN_LINE
) {
5095 if (q
) fprintf(f
, ", ");
5097 fprintf(f
, "begin-line");
5099 if (anchor
& ANCHOR_BEGIN_POSITION
) {
5100 if (q
) fprintf(f
, ", ");
5102 fprintf(f
, "begin-pos");
5104 if (anchor
& ANCHOR_END_BUF
) {
5105 if (q
) fprintf(f
, ", ");
5107 fprintf(f
, "end-buf");
5109 if (anchor
& ANCHOR_SEMI_END_BUF
) {
5110 if (q
) fprintf(f
, ", ");
5112 fprintf(f
, "semi-end-buf");
5114 if (anchor
& ANCHOR_END_LINE
) {
5115 if (q
) fprintf(f
, ", ");
5117 fprintf(f
, "end-line");
5119 if (anchor
& ANCHOR_ANYCHAR_STAR
) {
5120 if (q
) fprintf(f
, ", ");
5122 fprintf(f
, "anychar-star");
5124 if (anchor
& ANCHOR_ANYCHAR_STAR_ML
) {
5125 if (q
) fprintf(f
, ", ");
5126 fprintf(f
, "anychar-star-pl");
5133 print_optimize_info(FILE* f
, regex_t
* reg
)
5135 static const char* on
[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5136 "EXACT_IC", "MAP" };
5138 fprintf(f
, "optimize: %s\n", on
[reg
->optimize
]);
5139 fprintf(f
, " anchor: "); print_anchor(f
, reg
->anchor
);
5140 if ((reg
->anchor
& ANCHOR_END_BUF_MASK
) != 0)
5141 print_distance_range(f
, reg
->anchor_dmin
, reg
->anchor_dmax
);
5144 if (reg
->optimize
) {
5145 fprintf(f
, " sub anchor: "); print_anchor(f
, reg
->sub_anchor
);
5152 fprintf(f
, "exact: [");
5153 for (p
= reg
->exact
; p
< reg
->exact_end
; p
++) {
5156 fprintf(f
, "]: length: %d\n", (reg
->exact_end
- reg
->exact
));
5158 else if (reg
->optimize
& ONIG_OPTIMIZE_MAP
) {
5161 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5162 if (reg
->map
[i
]) n
++;
5164 fprintf(f
, "map: n=%d\n", n
);
5168 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5169 if (reg
->map
[i
] != 0) {
5170 if (c
> 0) fputs(", ", f
);
5172 if (ONIGENC_MBC_MAXLEN(reg
->enc
) == 1 &&
5173 ONIGENC_IS_CODE_PRINT(reg
->enc
, (OnigCodePoint
)i
))
5176 fprintf(f
, "%d", i
);
5183 #endif /* ONIG_DEBUG */
5187 onig_free_body(regex_t
* reg
)
5189 if (IS_NOT_NULL(reg
)) {
5190 if (IS_NOT_NULL(reg
->p
)) xfree(reg
->p
);
5191 if (IS_NOT_NULL(reg
->exact
)) xfree(reg
->exact
);
5192 if (IS_NOT_NULL(reg
->int_map
)) xfree(reg
->int_map
);
5193 if (IS_NOT_NULL(reg
->int_map_backward
)) xfree(reg
->int_map_backward
);
5194 if (IS_NOT_NULL(reg
->repeat_range
)) xfree(reg
->repeat_range
);
5195 if (IS_NOT_NULL(reg
->chain
)) onig_free(reg
->chain
);
5197 #ifdef USE_NAMED_GROUP
5198 onig_names_free(reg
);
5204 onig_free(regex_t
* reg
)
5206 if (IS_NOT_NULL(reg
)) {
5207 onig_free_body(reg
);
5212 #define REGEX_TRANSFER(to,from) do {\
5213 (to)->state = ONIG_STATE_MODIFY;\
5214 onig_free_body(to);\
5215 xmemcpy(to, from, sizeof(regex_t));\
5220 onig_transfer(regex_t
* to
, regex_t
* from
)
5222 THREAD_ATOMIC_START
;
5223 REGEX_TRANSFER(to
, from
);
5227 #define REGEX_CHAIN_HEAD(reg) do {\
5228 while (IS_NOT_NULL((reg)->chain)) {\
5229 (reg) = (reg)->chain;\
5234 onig_chain_link_add(regex_t
* to
, regex_t
* add
)
5236 THREAD_ATOMIC_START
;
5237 REGEX_CHAIN_HEAD(to
);
5243 onig_chain_reduce(regex_t
* reg
)
5245 regex_t
*head
, *prev
;
5249 if (IS_NOT_NULL(head
)) {
5250 reg
->state
= ONIG_STATE_MODIFY
;
5251 while (IS_NOT_NULL(head
->chain
)) {
5255 prev
->chain
= (regex_t
* )NULL
;
5256 REGEX_TRANSFER(reg
, head
);
5261 static void print_compiled_byte_code_list
P_((FILE* f
, regex_t
* reg
));
5263 #ifdef ONIG_DEBUG_PARSE_TREE
5264 static void print_tree
P_((FILE* f
, Node
* node
));
5268 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5269 OnigErrorInfo
* einfo
)
5271 #define COMPILE_INIT_SIZE 20
5276 #ifdef USE_SUBEXP_CALL
5277 UnsetAddrList uslist
;
5280 if (IS_NOT_NULL(einfo
)) einfo
->par
= (UChar
* )NULL
;
5282 reg
->state
= ONIG_STATE_COMPILING
;
5285 print_enc_string(stderr
, reg
->enc
, pattern
, pattern_end
);
5288 if (reg
->alloc
== 0) {
5289 init_size
= ((int)(pattern_end
- pattern
)) * 2;
5290 if (init_size
<= 0) init_size
= COMPILE_INIT_SIZE
;
5291 r
= BBUF_INIT(reg
, init_size
);
5292 if (r
!= 0) goto end
;
5298 reg
->num_repeat
= 0;
5299 reg
->num_null_check
= 0;
5300 reg
->repeat_range_alloc
= 0;
5301 reg
->repeat_range
= (OnigRepeatRange
* )NULL
;
5302 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5303 reg
->num_comb_exp_check
= 0;
5306 r
= onig_parse_make_tree(&root
, pattern
, pattern_end
, reg
, &scan_env
);
5307 if (r
!= 0) goto err
;
5309 #ifdef USE_NAMED_GROUP
5310 /* mixed use named group and no-named group */
5311 if (scan_env
.num_named
> 0 &&
5312 IS_SYNTAX_BV(scan_env
.syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
5313 !ONIG_IS_OPTION_ON(reg
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
5314 if (scan_env
.num_named
!= scan_env
.num_mem
)
5315 r
= disable_noname_group_capture(&root
, reg
, &scan_env
);
5317 r
= numbered_ref_check(root
);
5319 if (r
!= 0) goto err
;
5323 #ifdef USE_SUBEXP_CALL
5324 if (scan_env
.num_call
> 0) {
5325 r
= unset_addr_list_init(&uslist
, scan_env
.num_call
);
5326 if (r
!= 0) goto err
;
5327 scan_env
.unset_addr_list
= &uslist
;
5328 r
= setup_subexp_call(root
, &scan_env
);
5329 if (r
!= 0) goto err_unset
;
5330 r
= subexp_recursive_check_trav(root
, &scan_env
);
5331 if (r
< 0) goto err_unset
;
5332 r
= subexp_inf_recursive_check_trav(root
, &scan_env
);
5333 if (r
!= 0) goto err_unset
;
5335 reg
->num_call
= scan_env
.num_call
;
5341 r
= setup_tree(root
, reg
, 0, &scan_env
);
5342 if (r
!= 0) goto err_unset
;
5344 #ifdef ONIG_DEBUG_PARSE_TREE
5345 print_tree(stderr
, root
);
5348 reg
->capture_history
= scan_env
.capture_history
;
5349 reg
->bt_mem_start
= scan_env
.bt_mem_start
;
5350 reg
->bt_mem_start
|= reg
->capture_history
;
5351 if (IS_FIND_CONDITION(reg
->options
))
5352 BIT_STATUS_ON_ALL(reg
->bt_mem_end
);
5354 reg
->bt_mem_end
= scan_env
.bt_mem_end
;
5355 reg
->bt_mem_end
|= reg
->capture_history
;
5358 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5359 if (scan_env
.backrefed_mem
== 0
5360 #ifdef USE_SUBEXP_CALL
5361 || scan_env
.num_call
== 0
5364 setup_comb_exp_check(root
, 0, &scan_env
);
5365 #ifdef USE_SUBEXP_CALL
5366 if (scan_env
.has_recursion
!= 0) {
5367 scan_env
.num_comb_exp_check
= 0;
5371 if (scan_env
.comb_exp_max_regnum
> 0) {
5373 for (i
= 1; i
<= scan_env
.comb_exp_max_regnum
; i
++) {
5374 if (BIT_STATUS_AT(scan_env
.backrefed_mem
, i
) != 0) {
5375 scan_env
.num_comb_exp_check
= 0;
5382 reg
->num_comb_exp_check
= scan_env
.num_comb_exp_check
;
5385 clear_optimize_info(reg
);
5386 #ifndef ONIG_DONT_OPTIMIZE
5387 r
= set_optimize_info_from_tree(root
, reg
, &scan_env
);
5388 if (r
!= 0) goto err_unset
;
5391 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
)) {
5392 xfree(scan_env
.mem_nodes_dynamic
);
5393 scan_env
.mem_nodes_dynamic
= (Node
** )NULL
;
5396 r
= compile_tree(root
, reg
);
5398 r
= add_opcode(reg
, OP_END
);
5399 #ifdef USE_SUBEXP_CALL
5400 if (scan_env
.num_call
> 0) {
5401 r
= unset_addr_list_fix(&uslist
, reg
);
5402 unset_addr_list_end(&uslist
);
5407 if ((reg
->num_repeat
!= 0) || (reg
->bt_mem_end
!= 0))
5408 reg
->stack_pop_level
= STACK_POP_LEVEL_ALL
;
5410 if (reg
->bt_mem_start
!= 0)
5411 reg
->stack_pop_level
= STACK_POP_LEVEL_MEM_START
;
5413 reg
->stack_pop_level
= STACK_POP_LEVEL_FREE
;
5416 #ifdef USE_SUBEXP_CALL
5417 else if (scan_env
.num_call
> 0) {
5418 unset_addr_list_end(&uslist
);
5421 onig_node_free(root
);
5423 #ifdef ONIG_DEBUG_COMPILE
5424 #ifdef USE_NAMED_GROUP
5425 onig_print_names(stderr
, reg
);
5427 print_compiled_byte_code_list(stderr
, reg
);
5431 reg
->state
= ONIG_STATE_NORMAL
;
5435 #ifdef USE_SUBEXP_CALL
5436 if (scan_env
.num_call
> 0) {
5437 unset_addr_list_end(&uslist
);
5441 if (IS_NOT_NULL(scan_env
.error
)) {
5442 if (IS_NOT_NULL(einfo
)) {
5443 einfo
->enc
= scan_env
.enc
;
5444 einfo
->par
= scan_env
.error
;
5445 einfo
->par_end
= scan_env
.error_end
;
5449 onig_node_free(root
);
5450 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
))
5451 xfree(scan_env
.mem_nodes_dynamic
);
5455 #ifdef USE_RECOMPILE_API
5457 onig_recompile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5458 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5459 OnigErrorInfo
* einfo
)
5464 r
= onig_new(&new_reg
, pattern
, pattern_end
, option
, enc
, syntax
, einfo
);
5466 if (ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
5467 onig_transfer(reg
, new_reg
);
5470 onig_chain_link_add(reg
, new_reg
);
5476 static int onig_inited
= 0;
5479 onig_reg_init(regex_t
* reg
, OnigOptionType option
,
5480 OnigCaseFoldType case_fold_flag
,
5481 OnigEncoding enc
, OnigSyntaxType
* syntax
)
5487 return ONIGERR_INVALID_ARGUMENT
;
5489 if (ONIGENC_IS_UNDEF(enc
))
5490 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
;
5492 if ((option
& (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
))
5493 == (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
)) {
5494 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS
;
5497 (reg
)->state
= ONIG_STATE_MODIFY
;
5499 if ((option
& ONIG_OPTION_NEGATE_SINGLELINE
) != 0) {
5500 option
|= syntax
->options
;
5501 option
&= ~ONIG_OPTION_SINGLELINE
;
5504 option
|= syntax
->options
;
5507 (reg
)->options
= option
;
5508 (reg
)->syntax
= syntax
;
5509 (reg
)->optimize
= 0;
5510 (reg
)->exact
= (UChar
* )NULL
;
5511 (reg
)->int_map
= (int* )NULL
;
5512 (reg
)->int_map_backward
= (int* )NULL
;
5513 (reg
)->chain
= (regex_t
* )NULL
;
5515 (reg
)->p
= (UChar
* )NULL
;
5518 (reg
)->name_table
= (void* )NULL
;
5520 (reg
)->case_fold_flag
= case_fold_flag
;
5525 onig_new_without_alloc(regex_t
* reg
, const UChar
* pattern
,
5526 const UChar
* pattern_end
, OnigOptionType option
, OnigEncoding enc
,
5527 OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
)
5531 r
= onig_reg_init(reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
5534 r
= onig_compile(reg
, pattern
, pattern_end
, einfo
);
5539 onig_new(regex_t
** reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5540 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5541 OnigErrorInfo
* einfo
)
5545 *reg
= (regex_t
* )xmalloc(sizeof(regex_t
));
5546 if (IS_NULL(*reg
)) return ONIGERR_MEMORY
;
5548 r
= onig_reg_init(*reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
5551 r
= onig_compile(*reg
, pattern
, pattern_end
, einfo
);
5564 if (onig_inited
!= 0)
5568 THREAD_ATOMIC_START
;
5573 /* onigenc_set_default_caseconv_table((UChar* )0); */
5575 #ifdef ONIG_DEBUG_STATISTICS
5576 onig_statistics_init();
5584 static OnigEndCallListItemType
* EndCallTop
;
5586 extern void onig_add_end_call(void (*func
)(void))
5588 OnigEndCallListItemType
* item
;
5590 item
= (OnigEndCallListItemType
* )xmalloc(sizeof(*item
));
5591 if (item
== 0) return ;
5593 item
->next
= EndCallTop
;
5600 exec_end_call_list(void)
5602 OnigEndCallListItemType
* prev
;
5605 while (EndCallTop
!= 0) {
5606 func
= EndCallTop
->func
;
5610 EndCallTop
= EndCallTop
->next
;
5618 THREAD_ATOMIC_START
;
5620 exec_end_call_list();
5622 #ifdef ONIG_DEBUG_STATISTICS
5623 onig_print_statistics(stderr
);
5626 #ifdef USE_SHARED_CCLASS_TABLE
5627 onig_free_shared_cclass_table();
5630 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5631 onig_free_node_list();
5642 onig_is_in_code_range(const UChar
* p
, OnigCodePoint code
)
5644 OnigCodePoint n
, *data
;
5645 OnigCodePoint low
, high
, x
;
5647 GET_CODE_POINT(n
, p
);
5648 data
= (OnigCodePoint
* )p
;
5651 for (low
= 0, high
= n
; low
< high
; ) {
5652 x
= (low
+ high
) >> 1;
5653 if (code
> data
[x
* 2 + 1])
5659 return ((low
< n
&& code
>= data
[low
* 2]) ? 1 : 0);
5663 onig_is_code_in_cc_len(int elen
, OnigCodePoint code
, CClassNode
* cc
)
5667 if (elen
> 1 || (code
>= SINGLE_BYTE_SIZE
)) {
5668 if (IS_NULL(cc
->mbuf
)) {
5672 found
= (onig_is_in_code_range(cc
->mbuf
->p
, code
) != 0 ? 1 : 0);
5676 found
= (BITSET_AT(cc
->bs
, code
) == 0 ? 0 : 1);
5679 if (IS_NCCLASS_NOT(cc
))
5686 onig_is_code_in_cc(OnigEncoding enc
, OnigCodePoint code
, CClassNode
* cc
)
5690 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5694 len
= ONIGENC_CODE_TO_MBCLEN(enc
, code
);
5696 return onig_is_code_in_cc_len(len
, code
, cc
);
5702 /* arguments type */
5703 #define ARG_SPECIAL -1
5705 #define ARG_RELADDR 1
5706 #define ARG_ABSADDR 2
5707 #define ARG_LENGTH 3
5708 #define ARG_MEMNUM 4
5709 #define ARG_OPTION 5
5710 #define ARG_STATE_CHECK 6
5712 OnigOpInfoType OnigOpInfo
[] = {
5713 { OP_FINISH
, "finish", ARG_NON
},
5714 { OP_END
, "end", ARG_NON
},
5715 { OP_EXACT1
, "exact1", ARG_SPECIAL
},
5716 { OP_EXACT2
, "exact2", ARG_SPECIAL
},
5717 { OP_EXACT3
, "exact3", ARG_SPECIAL
},
5718 { OP_EXACT4
, "exact4", ARG_SPECIAL
},
5719 { OP_EXACT5
, "exact5", ARG_SPECIAL
},
5720 { OP_EXACTN
, "exactn", ARG_SPECIAL
},
5721 { OP_EXACTMB2N1
, "exactmb2-n1", ARG_SPECIAL
},
5722 { OP_EXACTMB2N2
, "exactmb2-n2", ARG_SPECIAL
},
5723 { OP_EXACTMB2N3
, "exactmb2-n3", ARG_SPECIAL
},
5724 { OP_EXACTMB2N
, "exactmb2-n", ARG_SPECIAL
},
5725 { OP_EXACTMB3N
, "exactmb3n" , ARG_SPECIAL
},
5726 { OP_EXACTMBN
, "exactmbn", ARG_SPECIAL
},
5727 { OP_EXACT1_IC
, "exact1-ic", ARG_SPECIAL
},
5728 { OP_EXACTN_IC
, "exactn-ic", ARG_SPECIAL
},
5729 { OP_CCLASS
, "cclass", ARG_SPECIAL
},
5730 { OP_CCLASS_MB
, "cclass-mb", ARG_SPECIAL
},
5731 { OP_CCLASS_MIX
, "cclass-mix", ARG_SPECIAL
},
5732 { OP_CCLASS_NOT
, "cclass-not", ARG_SPECIAL
},
5733 { OP_CCLASS_MB_NOT
, "cclass-mb-not", ARG_SPECIAL
},
5734 { OP_CCLASS_MIX_NOT
, "cclass-mix-not", ARG_SPECIAL
},
5735 { OP_CCLASS_NODE
, "cclass-node", ARG_SPECIAL
},
5736 { OP_ANYCHAR
, "anychar", ARG_NON
},
5737 { OP_ANYCHAR_ML
, "anychar-ml", ARG_NON
},
5738 { OP_ANYCHAR_STAR
, "anychar*", ARG_NON
},
5739 { OP_ANYCHAR_ML_STAR
, "anychar-ml*", ARG_NON
},
5740 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next", ARG_SPECIAL
},
5741 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next", ARG_SPECIAL
},
5742 { OP_WORD
, "word", ARG_NON
},
5743 { OP_NOT_WORD
, "not-word", ARG_NON
},
5744 { OP_WORD_BOUND
, "word-bound", ARG_NON
},
5745 { OP_NOT_WORD_BOUND
, "not-word-bound", ARG_NON
},
5746 { OP_WORD_BEGIN
, "word-begin", ARG_NON
},
5747 { OP_WORD_END
, "word-end", ARG_NON
},
5748 { OP_BEGIN_BUF
, "begin-buf", ARG_NON
},
5749 { OP_END_BUF
, "end-buf", ARG_NON
},
5750 { OP_BEGIN_LINE
, "begin-line", ARG_NON
},
5751 { OP_END_LINE
, "end-line", ARG_NON
},
5752 { OP_SEMI_END_BUF
, "semi-end-buf", ARG_NON
},
5753 { OP_BEGIN_POSITION
, "begin-position", ARG_NON
},
5754 { OP_BACKREF1
, "backref1", ARG_NON
},
5755 { OP_BACKREF2
, "backref2", ARG_NON
},
5756 { OP_BACKREFN
, "backrefn", ARG_MEMNUM
},
5757 { OP_BACKREFN_IC
, "backrefn-ic", ARG_SPECIAL
},
5758 { OP_BACKREF_MULTI
, "backref_multi", ARG_SPECIAL
},
5759 { OP_BACKREF_MULTI_IC
, "backref_multi-ic", ARG_SPECIAL
},
5760 { OP_BACKREF_WITH_LEVEL
, "backref_at_level", ARG_SPECIAL
},
5761 { OP_MEMORY_START_PUSH
, "mem-start-push", ARG_MEMNUM
},
5762 { OP_MEMORY_START
, "mem-start", ARG_MEMNUM
},
5763 { OP_MEMORY_END_PUSH
, "mem-end-push", ARG_MEMNUM
},
5764 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec", ARG_MEMNUM
},
5765 { OP_MEMORY_END
, "mem-end", ARG_MEMNUM
},
5766 { OP_MEMORY_END_REC
, "mem-end-rec", ARG_MEMNUM
},
5767 { OP_SET_OPTION_PUSH
, "set-option-push", ARG_OPTION
},
5768 { OP_SET_OPTION
, "set-option", ARG_OPTION
},
5769 { OP_FAIL
, "fail", ARG_NON
},
5770 { OP_JUMP
, "jump", ARG_RELADDR
},
5771 { OP_PUSH
, "push", ARG_RELADDR
},
5772 { OP_POP
, "pop", ARG_NON
},
5773 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1", ARG_SPECIAL
},
5774 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next", ARG_SPECIAL
},
5775 { OP_REPEAT
, "repeat", ARG_SPECIAL
},
5776 { OP_REPEAT_NG
, "repeat-ng", ARG_SPECIAL
},
5777 { OP_REPEAT_INC
, "repeat-inc", ARG_MEMNUM
},
5778 { OP_REPEAT_INC_NG
, "repeat-inc-ng", ARG_MEMNUM
},
5779 { OP_REPEAT_INC_SG
, "repeat-inc-sg", ARG_MEMNUM
},
5780 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg", ARG_MEMNUM
},
5781 { OP_NULL_CHECK_START
, "null-check-start", ARG_MEMNUM
},
5782 { OP_NULL_CHECK_END
, "null-check-end", ARG_MEMNUM
},
5783 { OP_NULL_CHECK_END_MEMST
,"null-check-end-memst", ARG_MEMNUM
},
5784 { OP_NULL_CHECK_END_MEMST_PUSH
,"null-check-end-memst-push", ARG_MEMNUM
},
5785 { OP_PUSH_POS
, "push-pos", ARG_NON
},
5786 { OP_POP_POS
, "pop-pos", ARG_NON
},
5787 { OP_PUSH_POS_NOT
, "push-pos-not", ARG_RELADDR
},
5788 { OP_FAIL_POS
, "fail-pos", ARG_NON
},
5789 { OP_PUSH_STOP_BT
, "push-stop-bt", ARG_NON
},
5790 { OP_POP_STOP_BT
, "pop-stop-bt", ARG_NON
},
5791 { OP_LOOK_BEHIND
, "look-behind", ARG_SPECIAL
},
5792 { OP_PUSH_LOOK_BEHIND_NOT
, "push-look-behind-not", ARG_SPECIAL
},
5793 { OP_FAIL_LOOK_BEHIND_NOT
, "fail-look-behind-not", ARG_NON
},
5794 { OP_CALL
, "call", ARG_ABSADDR
},
5795 { OP_RETURN
, "return", ARG_NON
},
5796 { OP_STATE_CHECK_PUSH
, "state-check-push", ARG_SPECIAL
},
5797 { OP_STATE_CHECK_PUSH_OR_JUMP
, "state-check-push-or-jump", ARG_SPECIAL
},
5798 { OP_STATE_CHECK
, "state-check", ARG_STATE_CHECK
},
5799 { OP_STATE_CHECK_ANYCHAR_STAR
, "state-check-anychar*", ARG_STATE_CHECK
},
5800 { OP_STATE_CHECK_ANYCHAR_ML_STAR
,
5801 "state-check-anychar-ml*", ARG_STATE_CHECK
},
5810 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5811 if (opcode
== OnigOpInfo
[i
].opcode
)
5812 return OnigOpInfo
[i
].name
;
5818 op2arg_type(int opcode
)
5822 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5823 if (opcode
== OnigOpInfo
[i
].opcode
)
5824 return OnigOpInfo
[i
].arg_type
;
5830 Indent(FILE* f
, int indent
)
5833 for (i
= 0; i
< indent
; i
++) putc(' ', f
);
5837 p_string(FILE* f
, int len
, UChar
* s
)
5840 while (len
-- > 0) { fputc(*s
++, f
); }
5844 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
5846 int x
= len
* mb_len
;
5848 fprintf(f
, ":%d:", len
);
5849 while (x
-- > 0) { fputc(*s
++, f
); }
5853 onig_print_compiled_byte_code(FILE* f
, UChar
* bp
, UChar
** nextp
,
5860 StateCheckNumType scn
;
5864 fprintf(f
, "[%s", op2name(*bp
));
5865 arg_type
= op2arg_type(*bp
);
5866 if (arg_type
!= ARG_SPECIAL
) {
5872 GET_RELADDR_INC(addr
, bp
);
5873 fprintf(f
, ":(%d)", addr
);
5876 GET_ABSADDR_INC(addr
, bp
);
5877 fprintf(f
, ":(%d)", addr
);
5880 GET_LENGTH_INC(len
, bp
);
5881 fprintf(f
, ":%d", len
);
5884 mem
= *((MemNumType
* )bp
);
5886 fprintf(f
, ":%d", mem
);
5890 OnigOptionType option
= *((OnigOptionType
* )bp
);
5892 fprintf(f
, ":%d", option
);
5896 case ARG_STATE_CHECK
:
5897 scn
= *((StateCheckNumType
* )bp
);
5898 bp
+= SIZE_STATE_CHECK_NUM
;
5899 fprintf(f
, ":%d", scn
);
5906 case OP_ANYCHAR_STAR_PEEK_NEXT
:
5907 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
5908 p_string(f
, 1, bp
++); break;
5910 p_string(f
, 2, bp
); bp
+= 2; break;
5912 p_string(f
, 3, bp
); bp
+= 3; break;
5914 p_string(f
, 4, bp
); bp
+= 4; break;
5916 p_string(f
, 5, bp
); bp
+= 5; break;
5918 GET_LENGTH_INC(len
, bp
);
5919 p_len_string(f
, len
, 1, bp
);
5924 p_string(f
, 2, bp
); bp
+= 2; break;
5926 p_string(f
, 4, bp
); bp
+= 4; break;
5928 p_string(f
, 6, bp
); bp
+= 6; break;
5930 GET_LENGTH_INC(len
, bp
);
5931 p_len_string(f
, len
, 2, bp
);
5935 GET_LENGTH_INC(len
, bp
);
5936 p_len_string(f
, len
, 3, bp
);
5943 GET_LENGTH_INC(mb_len
, bp
);
5944 GET_LENGTH_INC(len
, bp
);
5945 fprintf(f
, ":%d:%d:", mb_len
, len
);
5947 while (n
-- > 0) { fputc(*bp
++, f
); }
5952 len
= enclen(enc
, bp
);
5953 p_string(f
, len
, bp
);
5957 GET_LENGTH_INC(len
, bp
);
5958 p_len_string(f
, len
, 1, bp
);
5963 n
= bitset_on_num((BitSetRef
)bp
);
5965 fprintf(f
, ":%d", n
);
5969 n
= bitset_on_num((BitSetRef
)bp
);
5971 fprintf(f
, ":%d", n
);
5975 case OP_CCLASS_MB_NOT
:
5976 GET_LENGTH_INC(len
, bp
);
5978 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5981 GET_CODE_POINT(code
, q
);
5983 fprintf(f
, ":%d:%d", (int )code
, len
);
5987 case OP_CCLASS_MIX_NOT
:
5988 n
= bitset_on_num((BitSetRef
)bp
);
5990 GET_LENGTH_INC(len
, bp
);
5992 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5995 GET_CODE_POINT(code
, q
);
5997 fprintf(f
, ":%d:%d:%d", n
, (int )code
, len
);
6000 case OP_CCLASS_NODE
:
6004 GET_POINTER_INC(cc
, bp
);
6005 n
= bitset_on_num(cc
->bs
);
6006 fprintf(f
, ":%u:%d", (unsigned int )cc
, n
);
6010 case OP_BACKREFN_IC
:
6011 mem
= *((MemNumType
* )bp
);
6013 fprintf(f
, ":%d", mem
);
6016 case OP_BACKREF_MULTI_IC
:
6017 case OP_BACKREF_MULTI
:
6019 GET_LENGTH_INC(len
, bp
);
6020 for (i
= 0; i
< len
; i
++) {
6021 GET_MEMNUM_INC(mem
, bp
);
6022 if (i
> 0) fputs(", ", f
);
6023 fprintf(f
, "%d", mem
);
6027 case OP_BACKREF_WITH_LEVEL
:
6029 OnigOptionType option
;
6032 GET_OPTION_INC(option
, bp
);
6033 fprintf(f
, ":%d", option
);
6034 GET_LENGTH_INC(level
, bp
);
6035 fprintf(f
, ":%d", level
);
6038 GET_LENGTH_INC(len
, bp
);
6039 for (i
= 0; i
< len
; i
++) {
6040 GET_MEMNUM_INC(mem
, bp
);
6041 if (i
> 0) fputs(", ", f
);
6042 fprintf(f
, "%d", mem
);
6050 mem
= *((MemNumType
* )bp
);
6052 addr
= *((RelAddrType
* )bp
);
6054 fprintf(f
, ":%d:%d", mem
, addr
);
6058 case OP_PUSH_OR_JUMP_EXACT1
:
6059 case OP_PUSH_IF_PEEK_NEXT
:
6060 addr
= *((RelAddrType
* )bp
);
6062 fprintf(f
, ":(%d)", addr
);
6067 case OP_LOOK_BEHIND
:
6068 GET_LENGTH_INC(len
, bp
);
6069 fprintf(f
, ":%d", len
);
6072 case OP_PUSH_LOOK_BEHIND_NOT
:
6073 GET_RELADDR_INC(addr
, bp
);
6074 GET_LENGTH_INC(len
, bp
);
6075 fprintf(f
, ":%d:(%d)", len
, addr
);
6078 case OP_STATE_CHECK_PUSH
:
6079 case OP_STATE_CHECK_PUSH_OR_JUMP
:
6080 scn
= *((StateCheckNumType
* )bp
);
6081 bp
+= SIZE_STATE_CHECK_NUM
;
6082 addr
= *((RelAddrType
* )bp
);
6084 fprintf(f
, ":%d:(%d)", scn
, addr
);
6088 fprintf(stderr
, "onig_print_compiled_byte_code: undefined code %d\n",
6093 if (nextp
) *nextp
= bp
;
6097 print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
6101 UChar
* end
= reg
->p
+ reg
->used
;
6103 fprintf(f
, "code length: %d\n", reg
->used
);
6114 onig_print_compiled_byte_code(f
, bp
, &bp
, reg
->enc
);
6121 print_indent_tree(FILE* f
, Node
* node
, int indent
)
6128 if (IS_NULL(node
)) {
6129 fprintf(f
, "ERROR: null node!!!\n");
6137 if (NTYPE(node
) == NT_LIST
)
6138 fprintf(f
, "<list:%x>\n", (int )node
);
6140 fprintf(f
, "<alt:%x>\n", (int )node
);
6142 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6143 while (IS_NOT_NULL(node
= NCDR(node
))) {
6144 if (NTYPE(node
) != type
) {
6145 fprintf(f
, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node
));
6148 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6153 fprintf(f
, "<string%s:%x>",
6154 (NSTRING_IS_RAW(node
) ? "-raw" : ""), (int )node
);
6155 for (p
= NSTR(node
)->s
; p
< NSTR(node
)->end
; p
++) {
6156 if (*p
>= 0x20 && *p
< 0x7f)
6159 fprintf(f
, " 0x%02x", *p
);
6165 fprintf(f
, "<cclass:%x>", (int )node
);
6166 if (IS_NCCLASS_NOT(NCCLASS(node
))) fputs(" not", f
);
6167 if (NCCLASS(node
)->mbuf
) {
6168 BBuf
* bbuf
= NCCLASS(node
)->mbuf
;
6169 for (i
= 0; i
< bbuf
->used
; i
++) {
6170 if (i
> 0) fprintf(f
, ",");
6171 fprintf(f
, "%0x", bbuf
->p
[i
]);
6177 fprintf(f
, "<ctype:%x> ", (int )node
);
6178 switch (NCTYPE(node
)->ctype
) {
6179 case ONIGENC_CTYPE_WORD
:
6180 if (NCTYPE(node
)->not != 0)
6181 fputs("not word", f
);
6187 fprintf(f
, "ERROR: undefined ctype.\n");
6193 fprintf(f
, "<anychar:%x>", (int )node
);
6197 fprintf(f
, "<anchor:%x> ", (int )node
);
6198 switch (NANCHOR(node
)->type
) {
6199 case ANCHOR_BEGIN_BUF
: fputs("begin buf", f
); break;
6200 case ANCHOR_END_BUF
: fputs("end buf", f
); break;
6201 case ANCHOR_BEGIN_LINE
: fputs("begin line", f
); break;
6202 case ANCHOR_END_LINE
: fputs("end line", f
); break;
6203 case ANCHOR_SEMI_END_BUF
: fputs("semi end buf", f
); break;
6204 case ANCHOR_BEGIN_POSITION
: fputs("begin position", f
); break;
6206 case ANCHOR_WORD_BOUND
: fputs("word bound", f
); break;
6207 case ANCHOR_NOT_WORD_BOUND
: fputs("not word bound", f
); break;
6208 #ifdef USE_WORD_BEGIN_END
6209 case ANCHOR_WORD_BEGIN
: fputs("word begin", f
); break;
6210 case ANCHOR_WORD_END
: fputs("word end", f
); break;
6212 case ANCHOR_PREC_READ
: fputs("prec read", f
); break;
6213 case ANCHOR_PREC_READ_NOT
: fputs("prec read not", f
); break;
6214 case ANCHOR_LOOK_BEHIND
: fputs("look_behind", f
); break;
6215 case ANCHOR_LOOK_BEHIND_NOT
: fputs("look_behind_not",f
); break;
6218 fprintf(f
, "ERROR: undefined anchor type.\n");
6226 BRefNode
* br
= NBREF(node
);
6228 fprintf(f
, "<backref:%x>", (int )node
);
6229 for (i
= 0; i
< br
->back_num
; i
++) {
6230 if (i
> 0) fputs(", ", f
);
6231 fprintf(f
, "%d", p
[i
]);
6236 #ifdef USE_SUBEXP_CALL
6239 CallNode
* cn
= NCALL(node
);
6240 fprintf(f
, "<call:%x>", (int )node
);
6241 p_string(f
, cn
->name_end
- cn
->name
, cn
->name
);
6247 fprintf(f
, "<quantifier:%x>{%d,%d}%s\n", (int )node
,
6248 NQTFR(node
)->lower
, NQTFR(node
)->upper
,
6249 (NQTFR(node
)->greedy
? "" : "?"));
6250 print_indent_tree(f
, NQTFR(node
)->target
, indent
+ add
);
6254 fprintf(f
, "<enclose:%x> ", (int )node
);
6255 switch (NENCLOSE(node
)->type
) {
6256 case ENCLOSE_OPTION
:
6257 fprintf(f
, "option:%d", NENCLOSE(node
)->option
);
6259 case ENCLOSE_MEMORY
:
6260 fprintf(f
, "memory:%d", NENCLOSE(node
)->regnum
);
6262 case ENCLOSE_STOP_BACKTRACK
:
6263 fprintf(f
, "stop-bt");
6270 print_indent_tree(f
, NENCLOSE(node
)->target
, indent
+ add
);
6274 fprintf(f
, "print_indent_tree: undefined node type %d\n", NTYPE(node
));
6278 if (type
!= NT_LIST
&& type
!= NT_ALT
&& type
!= NT_QTFR
&&
6283 #endif /* ONIG_DEBUG */
6285 #ifdef ONIG_DEBUG_PARSE_TREE
6287 print_tree(FILE* f
, Node
* node
)
6289 print_indent_tree(f
, node
, 0);