1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 OnigCaseFoldType OnigDefaultCaseFoldFlag
= ONIGENC_CASE_FOLD_MIN
;
36 extern OnigCaseFoldType
37 onig_get_default_case_fold_flag(void)
39 return OnigDefaultCaseFoldFlag
;
43 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag
)
45 OnigDefaultCaseFoldFlag
= case_fold_flag
;
50 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
51 static unsigned char PadBuf
[WORD_ALIGNMENT_SIZE
];
55 str_dup(UChar
* s
, UChar
* end
)
57 int len
= (int)(end
- s
);
60 UChar
* r
= (UChar
* )xmalloc(len
+ 1);
70 swap_node(Node
* a
, Node
* b
)
73 c
= *a
; *a
= *b
; *b
= c
;
75 if (NTYPE(a
) == NT_STR
) {
76 StrNode
* sn
= NSTR(a
);
78 int len
= (int)(sn
->end
- sn
->s
);
80 sn
->end
= sn
->s
+ len
;
84 if (NTYPE(b
) == NT_STR
) {
85 StrNode
* sn
= NSTR(b
);
87 int len
= (int)(sn
->end
- sn
->s
);
89 sn
->end
= sn
->s
+ len
;
95 distance_add(OnigDistance d1
, OnigDistance d2
)
97 if (d1
== ONIG_INFINITE_DISTANCE
|| d2
== ONIG_INFINITE_DISTANCE
)
98 return ONIG_INFINITE_DISTANCE
;
100 if (d1
<= ONIG_INFINITE_DISTANCE
- d2
) return d1
+ d2
;
101 else return ONIG_INFINITE_DISTANCE
;
106 distance_multiply(OnigDistance d
, int m
)
108 if (m
== 0) return 0;
110 if (d
< ONIG_INFINITE_DISTANCE
/ m
)
113 return ONIG_INFINITE_DISTANCE
;
117 bitset_is_empty(BitSetRef bs
)
120 for (i
= 0; i
< (int )BITSET_SIZE
; i
++) {
121 if (bs
[i
] != 0) return 0;
128 bitset_on_num(BitSetRef bs
)
133 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
134 if (BITSET_AT(bs
, i
)) n
++;
141 onig_bbuf_init(BBuf
* buf
, int size
)
148 buf
->p
= (UChar
* )xmalloc(size
);
149 if (IS_NULL(buf
->p
)) return(ONIGERR_MEMORY
);
158 #ifdef USE_SUBEXP_CALL
161 unset_addr_list_init(UnsetAddrList
* uslist
, int size
)
165 p
= (UnsetAddr
* )xmalloc(sizeof(UnsetAddr
)* size
);
166 CHECK_NULL_RETURN_MEMERR(p
);
168 uslist
->alloc
= size
;
174 unset_addr_list_end(UnsetAddrList
* uslist
)
176 if (IS_NOT_NULL(uslist
->us
))
181 unset_addr_list_add(UnsetAddrList
* uslist
, int offset
, struct _Node
* node
)
186 if (uslist
->num
>= uslist
->alloc
) {
187 size
= uslist
->alloc
* 2;
188 p
= (UnsetAddr
* )xrealloc(uslist
->us
, sizeof(UnsetAddr
) * size
, sizeof(UnsetAddr
) * uslist
->alloc
);
189 CHECK_NULL_RETURN_MEMERR(p
);
190 uslist
->alloc
= size
;
194 uslist
->us
[uslist
->num
].offset
= offset
;
195 uslist
->us
[uslist
->num
].target
= node
;
199 #endif /* USE_SUBEXP_CALL */
203 add_opcode(regex_t
* reg
, int opcode
)
205 BBUF_ADD1(reg
, ((unsigned char)opcode
));
209 #ifdef USE_COMBINATION_EXPLOSION_CHECK
211 add_state_check_num(regex_t
* reg
, int num
)
213 StateCheckNumType n
= (StateCheckNumType
)num
;
215 BBUF_ADD(reg
, &n
, SIZE_STATE_CHECK_NUM
);
221 add_rel_addr(regex_t
* reg
, int addr
)
223 RelAddrType ra
= (RelAddrType
)addr
;
225 BBUF_ADD(reg
, &ra
, SIZE_RELADDR
);
230 add_abs_addr(regex_t
* reg
, int addr
)
232 AbsAddrType ra
= (AbsAddrType
)addr
;
234 BBUF_ADD(reg
, &ra
, SIZE_ABSADDR
);
239 add_length(regex_t
* reg
, int len
)
241 LengthType l
= (LengthType
)len
;
243 BBUF_ADD(reg
, &l
, SIZE_LENGTH
);
248 add_mem_num(regex_t
* reg
, int num
)
250 MemNumType n
= (MemNumType
)num
;
252 BBUF_ADD(reg
, &n
, SIZE_MEMNUM
);
257 add_pointer(regex_t
* reg
, void* addr
)
259 PointerType ptr
= (PointerType
)addr
;
261 BBUF_ADD(reg
, &ptr
, SIZE_POINTER
);
266 add_option(regex_t
* reg
, OnigOptionType option
)
268 BBUF_ADD(reg
, &option
, SIZE_OPTION
);
273 add_opcode_rel_addr(regex_t
* reg
, int opcode
, int addr
)
277 r
= add_opcode(reg
, opcode
);
279 r
= add_rel_addr(reg
, addr
);
284 add_bytes(regex_t
* reg
, UChar
* bytes
, int len
)
286 BBUF_ADD(reg
, bytes
, len
);
291 add_bitset(regex_t
* reg
, BitSetRef bs
)
293 BBUF_ADD(reg
, bs
, SIZE_BITSET
);
298 add_opcode_option(regex_t
* reg
, int opcode
, OnigOptionType option
)
302 r
= add_opcode(reg
, opcode
);
304 r
= add_option(reg
, option
);
308 static int compile_length_tree(Node
* node
, regex_t
* reg
);
309 static int compile_tree(Node
* node
, regex_t
* reg
);
312 #define IS_NEED_STR_LEN_OP_EXACT(op) \
313 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
314 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
317 select_str_opcode(int mb_len
, int str_len
, int ignore_case
)
323 case 1: op
= OP_EXACT1_IC
; break;
324 default: op
= OP_EXACTN_IC
; break;
331 case 1: op
= OP_EXACT1
; break;
332 case 2: op
= OP_EXACT2
; break;
333 case 3: op
= OP_EXACT3
; break;
334 case 4: op
= OP_EXACT4
; break;
335 case 5: op
= OP_EXACT5
; break;
336 default: op
= OP_EXACTN
; break;
342 case 1: op
= OP_EXACTMB2N1
; break;
343 case 2: op
= OP_EXACTMB2N2
; break;
344 case 3: op
= OP_EXACTMB2N3
; break;
345 default: op
= OP_EXACTMB2N
; break;
362 compile_tree_empty_check(Node
* node
, regex_t
* reg
, int empty_info
)
365 int saved_num_null_check
= reg
->num_null_check
;
367 if (empty_info
!= 0) {
368 r
= add_opcode(reg
, OP_NULL_CHECK_START
);
370 r
= add_mem_num(reg
, reg
->num_null_check
); /* NULL CHECK ID */
372 reg
->num_null_check
++;
375 r
= compile_tree(node
, reg
);
378 if (empty_info
!= 0) {
379 if (empty_info
== NQ_TARGET_IS_EMPTY
)
380 r
= add_opcode(reg
, OP_NULL_CHECK_END
);
381 else if (empty_info
== NQ_TARGET_IS_EMPTY_MEM
)
382 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST
);
383 else if (empty_info
== NQ_TARGET_IS_EMPTY_REC
)
384 r
= add_opcode(reg
, OP_NULL_CHECK_END_MEMST_PUSH
);
387 r
= add_mem_num(reg
, saved_num_null_check
); /* NULL CHECK ID */
392 #ifdef USE_SUBEXP_CALL
394 compile_call(CallNode
* node
, regex_t
* reg
)
398 r
= add_opcode(reg
, OP_CALL
);
400 r
= unset_addr_list_add(node
->unset_addr_list
, BBUF_GET_OFFSET_POS(reg
),
403 r
= add_abs_addr(reg
, 0 /*dummy addr.*/);
409 compile_tree_n_times(Node
* node
, int n
, regex_t
* reg
)
413 for (i
= 0; i
< n
; i
++) {
414 r
= compile_tree(node
, reg
);
421 add_compile_string_length(UChar
* s ARG_UNUSED
, int mb_len
, int str_len
,
422 regex_t
* reg ARG_UNUSED
, int ignore_case
)
425 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
429 if (op
== OP_EXACTMBN
) len
+= SIZE_LENGTH
;
430 if (IS_NEED_STR_LEN_OP_EXACT(op
))
433 len
+= mb_len
* str_len
;
438 add_compile_string(UChar
* s
, int mb_len
, int str_len
,
439 regex_t
* reg
, int ignore_case
)
441 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
444 if (op
== OP_EXACTMBN
)
445 add_length(reg
, mb_len
);
447 if (IS_NEED_STR_LEN_OP_EXACT(op
)) {
448 if (op
== OP_EXACTN_IC
)
449 add_length(reg
, mb_len
* str_len
);
451 add_length(reg
, str_len
);
454 add_bytes(reg
, s
, mb_len
* str_len
);
460 compile_length_string_node(Node
* node
, regex_t
* reg
)
462 int rlen
, r
, len
, prev_len
, slen
, ambig
;
463 OnigEncoding enc
= reg
->enc
;
468 if (sn
->end
<= sn
->s
)
471 ambig
= NSTRING_IS_AMBIG(node
);
474 prev_len
= enclen(enc
, p
);
479 for (; p
< sn
->end
; ) {
480 len
= enclen(enc
, p
);
481 if (len
== prev_len
) {
485 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
493 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
499 compile_length_string_raw_node(StrNode
* sn
, regex_t
* reg
)
501 if (sn
->end
<= sn
->s
)
504 return add_compile_string_length(sn
->s
, 1 /* sb */, (int)(sn
->end
- sn
->s
), reg
, 0);
508 compile_string_node(Node
* node
, regex_t
* reg
)
510 int r
, len
, prev_len
, slen
, ambig
;
511 OnigEncoding enc
= reg
->enc
;
512 UChar
*p
, *prev
, *end
;
516 if (sn
->end
<= sn
->s
)
520 ambig
= NSTRING_IS_AMBIG(node
);
523 prev_len
= enclen(enc
, p
);
528 len
= enclen(enc
, p
);
529 if (len
== prev_len
) {
533 r
= add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
543 return add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
547 compile_string_raw_node(StrNode
* sn
, regex_t
* reg
)
549 if (sn
->end
<= sn
->s
)
552 return add_compile_string(sn
->s
, 1 /* sb */, (int)(sn
->end
- sn
->s
), reg
, 0);
556 add_multi_byte_cclass(BBuf
* mbuf
, regex_t
* reg
)
558 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
559 add_length(reg
, mbuf
->used
);
560 return add_bytes(reg
, mbuf
->p
, mbuf
->used
);
563 UChar
* p
= BBUF_GET_ADD_ADDRESS(reg
) + SIZE_LENGTH
;
565 GET_ALIGNMENT_PAD_SIZE(p
, pad_size
);
566 add_length(reg
, mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1));
567 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
569 r
= add_bytes(reg
, mbuf
->p
, mbuf
->used
);
571 /* padding for return value from compile_length_cclass_node() to be fix. */
572 pad_size
= (WORD_ALIGNMENT_SIZE
- 1) - pad_size
;
573 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
579 compile_length_cclass_node(CClassNode
* cc
, regex_t
* reg
)
583 if (IS_NCCLASS_SHARE(cc
)) {
584 len
= SIZE_OPCODE
+ SIZE_POINTER
;
588 if (IS_NULL(cc
->mbuf
)) {
589 len
= SIZE_OPCODE
+ SIZE_BITSET
;
592 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
596 len
= SIZE_OPCODE
+ SIZE_BITSET
;
598 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
599 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
;
601 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1);
609 compile_cclass_node(CClassNode
* cc
, regex_t
* reg
)
613 if (IS_NCCLASS_SHARE(cc
)) {
614 add_opcode(reg
, OP_CCLASS_NODE
);
615 r
= add_pointer(reg
, cc
);
619 if (IS_NULL(cc
->mbuf
)) {
620 if (IS_NCCLASS_NOT(cc
))
621 add_opcode(reg
, OP_CCLASS_NOT
);
623 add_opcode(reg
, OP_CCLASS
);
625 r
= add_bitset(reg
, cc
->bs
);
628 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
629 if (IS_NCCLASS_NOT(cc
))
630 add_opcode(reg
, OP_CCLASS_MB_NOT
);
632 add_opcode(reg
, OP_CCLASS_MB
);
634 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
637 if (IS_NCCLASS_NOT(cc
))
638 add_opcode(reg
, OP_CCLASS_MIX_NOT
);
640 add_opcode(reg
, OP_CCLASS_MIX
);
642 r
= add_bitset(reg
, cc
->bs
);
644 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
652 entry_repeat_range(regex_t
* reg
, int id
, int lower
, int upper
)
654 #define REPEAT_RANGE_ALLOC 4
658 if (reg
->repeat_range_alloc
== 0) {
659 p
= (OnigRepeatRange
* )xmalloc(sizeof(OnigRepeatRange
) * REPEAT_RANGE_ALLOC
);
660 CHECK_NULL_RETURN_MEMERR(p
);
661 reg
->repeat_range
= p
;
662 reg
->repeat_range_alloc
= REPEAT_RANGE_ALLOC
;
664 else if (reg
->repeat_range_alloc
<= id
) {
666 n
= reg
->repeat_range_alloc
+ REPEAT_RANGE_ALLOC
;
667 p
= (OnigRepeatRange
* )xrealloc(reg
->repeat_range
,
668 sizeof(OnigRepeatRange
) * n
,
669 sizeof(OnigRepeatRange
) * reg
->repeat_range_alloc
);
670 CHECK_NULL_RETURN_MEMERR(p
);
671 reg
->repeat_range
= p
;
672 reg
->repeat_range_alloc
= n
;
675 p
= reg
->repeat_range
;
679 p
[id
].upper
= (IS_REPEAT_INFINITE(upper
) ? 0x7fffffff : upper
);
684 compile_range_repeat_node(QtfrNode
* qn
, int target_len
, int empty_info
,
688 int num_repeat
= reg
->num_repeat
;
690 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT
: OP_REPEAT_NG
);
692 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
695 r
= add_rel_addr(reg
, target_len
+ SIZE_OP_REPEAT_INC
);
698 r
= entry_repeat_range(reg
, num_repeat
, qn
->lower
, qn
->upper
);
701 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
705 #ifdef USE_SUBEXP_CALL
708 IS_QUANTIFIER_IN_REPEAT(qn
)) {
709 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC_SG
: OP_REPEAT_INC_NG_SG
);
712 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC
: OP_REPEAT_INC_NG
);
715 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
720 is_anychar_star_quantifier(QtfrNode
* qn
)
722 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
) &&
723 NTYPE(qn
->target
) == NT_CANY
)
729 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
730 #define CKN_ON (ckn > 0)
732 #ifdef USE_COMBINATION_EXPLOSION_CHECK
735 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
737 int len
, mod_tlen
, cklen
;
739 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
740 int empty_info
= qn
->target_empty_info
;
741 int tlen
= compile_length_tree(qn
->target
, reg
);
743 if (tlen
< 0) return tlen
;
745 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
747 cklen
= (CKN_ON
? SIZE_STATE_CHECK_NUM
: 0);
750 if (NTYPE(qn
->target
) == NT_CANY
) {
751 if (qn
->greedy
&& infinite
) {
752 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
)
753 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
+ cklen
;
755 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
+ cklen
;
760 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
764 if (infinite
&& qn
->lower
<= 1) {
771 len
+= SIZE_OP_PUSH
+ cklen
+ mod_tlen
+ SIZE_OP_JUMP
;
779 len
+= mod_tlen
+ SIZE_OP_PUSH
+ cklen
;
782 else if (qn
->upper
== 0) {
783 if (qn
->is_refered
!= 0) /* /(?<n>..){0}/ */
784 len
= SIZE_OP_JUMP
+ tlen
;
788 else if (qn
->upper
== 1 && qn
->greedy
) {
789 if (qn
->lower
== 0) {
791 len
= SIZE_OP_STATE_CHECK_PUSH
+ tlen
;
794 len
= SIZE_OP_PUSH
+ tlen
;
801 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
802 len
= SIZE_OP_PUSH
+ cklen
+ SIZE_OP_JUMP
+ tlen
;
805 len
= SIZE_OP_REPEAT_INC
806 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
808 len
+= SIZE_OP_STATE_CHECK
;
815 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
819 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
820 int empty_info
= qn
->target_empty_info
;
821 int tlen
= compile_length_tree(qn
->target
, reg
);
823 if (tlen
< 0) return tlen
;
825 ckn
= ((reg
->num_comb_exp_check
> 0) ? qn
->comb_exp_check_num
: 0);
827 if (is_anychar_star_quantifier(qn
)) {
828 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
830 if (IS_NOT_NULL(qn
->next_head_exact
) && !CKN_ON
) {
831 if (IS_MULTILINE(reg
->options
))
832 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
834 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
837 r
= add_state_check_num(reg
, ckn
);
841 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
844 if (IS_MULTILINE(reg
->options
)) {
845 r
= add_opcode(reg
, (CKN_ON
?
846 OP_STATE_CHECK_ANYCHAR_ML_STAR
847 : OP_ANYCHAR_ML_STAR
));
850 r
= add_opcode(reg
, (CKN_ON
?
851 OP_STATE_CHECK_ANYCHAR_STAR
856 r
= add_state_check_num(reg
, ckn
);
863 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
867 if (infinite
&& qn
->lower
<= 1) {
869 if (qn
->lower
== 1) {
870 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
871 (CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
));
876 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
878 r
= add_state_check_num(reg
, ckn
);
880 r
= add_rel_addr(reg
, mod_tlen
+ SIZE_OP_JUMP
);
883 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
886 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
888 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
889 -(mod_tlen
+ (int )SIZE_OP_JUMP
890 + (int )(CKN_ON
? SIZE_OP_STATE_CHECK_PUSH
: SIZE_OP_PUSH
)));
893 if (qn
->lower
== 0) {
894 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
897 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
900 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH_OR_JUMP
);
902 r
= add_state_check_num(reg
, ckn
);
904 r
= add_rel_addr(reg
,
905 -(mod_tlen
+ (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP
));
908 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
911 else if (qn
->upper
== 0) {
912 if (qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
913 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
915 r
= compile_tree(qn
->target
, reg
);
920 else if (qn
->upper
== 1 && qn
->greedy
) {
921 if (qn
->lower
== 0) {
923 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
925 r
= add_state_check_num(reg
, ckn
);
927 r
= add_rel_addr(reg
, tlen
);
930 r
= add_opcode_rel_addr(reg
, OP_PUSH
, tlen
);
935 r
= compile_tree(qn
->target
, reg
);
937 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
939 r
= add_opcode(reg
, OP_STATE_CHECK_PUSH
);
941 r
= add_state_check_num(reg
, ckn
);
943 r
= add_rel_addr(reg
, SIZE_OP_JUMP
);
946 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
950 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
952 r
= compile_tree(qn
->target
, reg
);
955 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
958 r
= add_opcode(reg
, OP_STATE_CHECK
);
960 r
= add_state_check_num(reg
, ckn
);
966 #else /* USE_COMBINATION_EXPLOSION_CHECK */
969 compile_length_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
972 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
973 int empty_info
= qn
->target_empty_info
;
974 int tlen
= compile_length_tree(qn
->target
, reg
);
976 if (tlen
< 0) return tlen
;
979 if (NTYPE(qn
->target
) == NT_CANY
) {
980 if (qn
->greedy
&& infinite
) {
981 if (IS_NOT_NULL(qn
->next_head_exact
))
982 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
;
984 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
;
989 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
994 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
995 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
999 len
= tlen
* qn
->lower
;
1003 if (IS_NOT_NULL(qn
->head_exact
))
1004 len
+= SIZE_OP_PUSH_OR_JUMP_EXACT1
+ mod_tlen
+ SIZE_OP_JUMP
;
1005 else if (IS_NOT_NULL(qn
->next_head_exact
))
1006 len
+= SIZE_OP_PUSH_IF_PEEK_NEXT
+ mod_tlen
+ SIZE_OP_JUMP
;
1008 len
+= SIZE_OP_PUSH
+ mod_tlen
+ SIZE_OP_JUMP
;
1011 len
+= SIZE_OP_JUMP
+ mod_tlen
+ SIZE_OP_PUSH
;
1013 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1014 len
= SIZE_OP_JUMP
+ tlen
;
1016 else if (!infinite
&& qn
->greedy
&&
1017 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1018 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1019 len
= tlen
* qn
->lower
;
1020 len
+= (SIZE_OP_PUSH
+ tlen
) * (qn
->upper
- qn
->lower
);
1022 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1023 len
= SIZE_OP_PUSH
+ SIZE_OP_JUMP
+ tlen
;
1026 len
= SIZE_OP_REPEAT_INC
1027 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
1034 compile_quantifier_node(QtfrNode
* qn
, regex_t
* reg
)
1037 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
1038 int empty_info
= qn
->target_empty_info
;
1039 int tlen
= compile_length_tree(qn
->target
, reg
);
1041 if (tlen
< 0) return tlen
;
1043 if (is_anychar_star_quantifier(qn
)) {
1044 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1046 if (IS_NOT_NULL(qn
->next_head_exact
)) {
1047 if (IS_MULTILINE(reg
->options
))
1048 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1050 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
1052 return add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1055 if (IS_MULTILINE(reg
->options
))
1056 return add_opcode(reg
, OP_ANYCHAR_ML_STAR
);
1058 return add_opcode(reg
, OP_ANYCHAR_STAR
);
1062 if (empty_info
!= 0)
1063 mod_tlen
= tlen
+ (SIZE_OP_NULL_CHECK_START
+ SIZE_OP_NULL_CHECK_END
);
1068 (qn
->lower
<= 1 || tlen
* qn
->lower
<= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1069 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
1071 if (IS_NOT_NULL(qn
->head_exact
))
1072 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_OR_JUMP_EXACT1
);
1073 else if (IS_NOT_NULL(qn
->next_head_exact
))
1074 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_IF_PEEK_NEXT
);
1076 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH
);
1079 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_JUMP
);
1084 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1089 if (IS_NOT_NULL(qn
->head_exact
)) {
1090 r
= add_opcode_rel_addr(reg
, OP_PUSH_OR_JUMP_EXACT1
,
1091 mod_tlen
+ SIZE_OP_JUMP
);
1093 add_bytes(reg
, NSTR(qn
->head_exact
)->s
, 1);
1094 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1096 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1097 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_OR_JUMP_EXACT1
));
1099 else if (IS_NOT_NULL(qn
->next_head_exact
)) {
1100 r
= add_opcode_rel_addr(reg
, OP_PUSH_IF_PEEK_NEXT
,
1101 mod_tlen
+ SIZE_OP_JUMP
);
1103 add_bytes(reg
, NSTR(qn
->next_head_exact
)->s
, 1);
1104 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1106 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1107 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_IF_PEEK_NEXT
));
1110 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
1112 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1114 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1115 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH
));
1119 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
1121 r
= compile_tree_empty_check(qn
->target
, reg
, empty_info
);
1123 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
1126 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
1127 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1129 r
= compile_tree(qn
->target
, reg
);
1131 else if (!infinite
&& qn
->greedy
&&
1132 (qn
->upper
== 1 || (tlen
+ SIZE_OP_PUSH
) * qn
->upper
1133 <= QUANTIFIER_EXPAND_LIMIT_SIZE
)) {
1134 int n
= qn
->upper
- qn
->lower
;
1136 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1139 for (i
= 0; i
< n
; i
++) {
1140 r
= add_opcode_rel_addr(reg
, OP_PUSH
,
1141 (n
- i
) * tlen
+ (n
- i
- 1) * SIZE_OP_PUSH
);
1143 r
= compile_tree(qn
->target
, reg
);
1147 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
1148 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
1150 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
1152 r
= compile_tree(qn
->target
, reg
);
1155 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
);
1159 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1162 compile_length_option_node(EncloseNode
* node
, regex_t
* reg
)
1165 OnigOptionType prev
= reg
->options
;
1167 reg
->options
= node
->option
;
1168 tlen
= compile_length_tree(node
->target
, reg
);
1169 reg
->options
= prev
;
1171 if (tlen
< 0) return tlen
;
1173 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1174 return SIZE_OP_SET_OPTION_PUSH
+ SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
1175 + tlen
+ SIZE_OP_SET_OPTION
;
1182 compile_option_node(EncloseNode
* node
, regex_t
* reg
)
1185 OnigOptionType prev
= reg
->options
;
1187 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1188 r
= add_opcode_option(reg
, OP_SET_OPTION_PUSH
, node
->option
);
1190 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1192 r
= add_opcode(reg
, OP_FAIL
);
1196 reg
->options
= node
->option
;
1197 r
= compile_tree(node
->target
, reg
);
1198 reg
->options
= prev
;
1200 if (IS_DYNAMIC_OPTION(prev
^ node
->option
)) {
1202 r
= add_opcode_option(reg
, OP_SET_OPTION
, prev
);
1208 compile_length_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1213 if (node
->type
== ENCLOSE_OPTION
)
1214 return compile_length_option_node(node
, reg
);
1217 tlen
= compile_length_tree(node
->target
, reg
);
1218 if (tlen
< 0) return tlen
;
1223 switch (node
->type
) {
1224 case ENCLOSE_MEMORY
:
1225 #ifdef USE_SUBEXP_CALL
1226 if (IS_ENCLOSE_CALLED(node
)) {
1227 len
= SIZE_OP_MEMORY_START_PUSH
+ tlen
1228 + SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1229 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1230 len
+= (IS_ENCLOSE_RECURSION(node
)
1231 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1233 len
+= (IS_ENCLOSE_RECURSION(node
)
1234 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1239 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1240 len
= SIZE_OP_MEMORY_START_PUSH
;
1242 len
= SIZE_OP_MEMORY_START
;
1244 len
+= tlen
+ (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
)
1245 ? SIZE_OP_MEMORY_END_PUSH
: SIZE_OP_MEMORY_END
);
1249 case ENCLOSE_STOP_BACKTRACK
:
1250 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1251 if (node
->target
== NULL
) {
1252 CHECK_NULL_RETURN_MEMERR(node
->target
);
1254 QtfrNode
* qn
= NQTFR(node
->target
);
1255 tlen
= compile_length_tree(qn
->target
, reg
);
1256 if (tlen
< 0) return tlen
;
1258 len
= tlen
* qn
->lower
1259 + SIZE_OP_PUSH
+ tlen
+ SIZE_OP_POP
+ SIZE_OP_JUMP
;
1262 len
= SIZE_OP_PUSH_STOP_BT
+ tlen
+ SIZE_OP_POP_STOP_BT
;
1267 return ONIGERR_TYPE_BUG
;
1274 static int get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
);
1277 compile_enclose_node(EncloseNode
* node
, regex_t
* reg
)
1281 if (node
->type
== ENCLOSE_OPTION
)
1282 return compile_option_node(node
, reg
);
1284 switch (node
->type
) {
1285 case ENCLOSE_MEMORY
:
1286 #ifdef USE_SUBEXP_CALL
1287 if (IS_ENCLOSE_CALLED(node
)) {
1288 r
= add_opcode(reg
, OP_CALL
);
1290 node
->call_addr
= BBUF_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1291 node
->state
|= NST_ADDR_FIXED
;
1292 r
= add_abs_addr(reg
, (int )node
->call_addr
);
1294 len
= compile_length_tree(node
->target
, reg
);
1295 len
+= (SIZE_OP_MEMORY_START_PUSH
+ SIZE_OP_RETURN
);
1296 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1297 len
+= (IS_ENCLOSE_RECURSION(node
)
1298 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1300 len
+= (IS_ENCLOSE_RECURSION(node
)
1301 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1303 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1307 if (BIT_STATUS_AT(reg
->bt_mem_start
, node
->regnum
))
1308 r
= add_opcode(reg
, OP_MEMORY_START_PUSH
);
1310 r
= add_opcode(reg
, OP_MEMORY_START
);
1312 r
= add_mem_num(reg
, node
->regnum
);
1314 r
= compile_tree(node
->target
, reg
);
1316 #ifdef USE_SUBEXP_CALL
1317 if (IS_ENCLOSE_CALLED(node
)) {
1318 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1319 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1320 ? OP_MEMORY_END_PUSH_REC
: OP_MEMORY_END_PUSH
));
1322 r
= add_opcode(reg
, (IS_ENCLOSE_RECURSION(node
)
1323 ? OP_MEMORY_END_REC
: OP_MEMORY_END
));
1326 r
= add_mem_num(reg
, node
->regnum
);
1328 r
= add_opcode(reg
, OP_RETURN
);
1333 if (BIT_STATUS_AT(reg
->bt_mem_end
, node
->regnum
))
1334 r
= add_opcode(reg
, OP_MEMORY_END_PUSH
);
1336 r
= add_opcode(reg
, OP_MEMORY_END
);
1338 r
= add_mem_num(reg
, node
->regnum
);
1342 case ENCLOSE_STOP_BACKTRACK
:
1343 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node
)) {
1344 QtfrNode
* qn
= NQTFR(node
->target
);
1345 r
= compile_tree_n_times(qn
->target
, qn
->lower
, reg
);
1348 len
= compile_length_tree(qn
->target
, reg
);
1349 if (len
< 0) return len
;
1351 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_POP
+ SIZE_OP_JUMP
);
1353 r
= compile_tree(qn
->target
, reg
);
1355 r
= add_opcode(reg
, OP_POP
);
1357 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1358 -((int )SIZE_OP_PUSH
+ len
+ (int )SIZE_OP_POP
+ (int )SIZE_OP_JUMP
));
1361 r
= add_opcode(reg
, OP_PUSH_STOP_BT
);
1363 r
= compile_tree(node
->target
, reg
);
1365 r
= add_opcode(reg
, OP_POP_STOP_BT
);
1370 return ONIGERR_TYPE_BUG
;
1378 compile_length_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1384 tlen
= compile_length_tree(node
->target
, reg
);
1385 if (tlen
< 0) return tlen
;
1388 switch (node
->type
) {
1389 case ANCHOR_PREC_READ
:
1390 len
= SIZE_OP_PUSH_POS
+ tlen
+ SIZE_OP_POP_POS
;
1392 case ANCHOR_PREC_READ_NOT
:
1393 len
= SIZE_OP_PUSH_POS_NOT
+ tlen
+ SIZE_OP_FAIL_POS
;
1395 case ANCHOR_LOOK_BEHIND
:
1396 len
= SIZE_OP_LOOK_BEHIND
+ tlen
;
1398 case ANCHOR_LOOK_BEHIND_NOT
:
1399 len
= SIZE_OP_PUSH_LOOK_BEHIND_NOT
+ tlen
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
;
1411 compile_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1415 switch (node
->type
) {
1416 case ANCHOR_BEGIN_BUF
: r
= add_opcode(reg
, OP_BEGIN_BUF
); break;
1417 case ANCHOR_END_BUF
: r
= add_opcode(reg
, OP_END_BUF
); break;
1418 case ANCHOR_BEGIN_LINE
: r
= add_opcode(reg
, OP_BEGIN_LINE
); break;
1419 case ANCHOR_END_LINE
: r
= add_opcode(reg
, OP_END_LINE
); break;
1420 case ANCHOR_SEMI_END_BUF
: r
= add_opcode(reg
, OP_SEMI_END_BUF
); break;
1421 case ANCHOR_BEGIN_POSITION
: r
= add_opcode(reg
, OP_BEGIN_POSITION
); break;
1423 case ANCHOR_WORD_BOUND
: r
= add_opcode(reg
, OP_WORD_BOUND
); break;
1424 case ANCHOR_NOT_WORD_BOUND
: r
= add_opcode(reg
, OP_NOT_WORD_BOUND
); break;
1425 #ifdef USE_WORD_BEGIN_END
1426 case ANCHOR_WORD_BEGIN
: r
= add_opcode(reg
, OP_WORD_BEGIN
); break;
1427 case ANCHOR_WORD_END
: r
= add_opcode(reg
, OP_WORD_END
); break;
1430 case ANCHOR_PREC_READ
:
1431 r
= add_opcode(reg
, OP_PUSH_POS
);
1433 r
= compile_tree(node
->target
, reg
);
1435 r
= add_opcode(reg
, OP_POP_POS
);
1438 case ANCHOR_PREC_READ_NOT
:
1439 len
= compile_length_tree(node
->target
, reg
);
1440 if (len
< 0) return len
;
1441 r
= add_opcode_rel_addr(reg
, OP_PUSH_POS_NOT
, len
+ SIZE_OP_FAIL_POS
);
1443 r
= compile_tree(node
->target
, reg
);
1445 r
= add_opcode(reg
, OP_FAIL_POS
);
1448 case ANCHOR_LOOK_BEHIND
:
1451 r
= add_opcode(reg
, OP_LOOK_BEHIND
);
1453 if (node
->char_len
< 0) {
1454 r
= get_char_length_tree(node
->target
, reg
, &n
);
1455 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1459 r
= add_length(reg
, n
);
1461 r
= compile_tree(node
->target
, reg
);
1465 case ANCHOR_LOOK_BEHIND_NOT
:
1468 len
= compile_length_tree(node
->target
, reg
);
1469 r
= add_opcode_rel_addr(reg
, OP_PUSH_LOOK_BEHIND_NOT
,
1470 len
+ SIZE_OP_FAIL_LOOK_BEHIND_NOT
);
1472 if (node
->char_len
< 0) {
1473 r
= get_char_length_tree(node
->target
, reg
, &n
);
1474 if (r
) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1478 r
= add_length(reg
, n
);
1480 r
= compile_tree(node
->target
, reg
);
1482 r
= add_opcode(reg
, OP_FAIL_LOOK_BEHIND_NOT
);
1487 return ONIGERR_TYPE_BUG
;
1495 compile_length_tree(Node
* node
, regex_t
* reg
)
1504 r
= compile_length_tree(NCAR(node
), reg
);
1505 if (r
< 0) return r
;
1507 } while (IS_NOT_NULL(node
= NCDR(node
)));
1517 r
+= compile_length_tree(NCAR(node
), reg
);
1519 } while (IS_NOT_NULL(node
= NCDR(node
)));
1520 r
+= (SIZE_OP_PUSH
+ SIZE_OP_JUMP
) * (n
- 1);
1525 if (NSTRING_IS_RAW(node
))
1526 r
= compile_length_string_raw_node(NSTR(node
), reg
);
1528 r
= compile_length_string_node(node
, reg
);
1532 r
= compile_length_cclass_node(NCCLASS(node
), reg
);
1542 BRefNode
* br
= NBREF(node
);
1544 #ifdef USE_BACKREF_WITH_LEVEL
1545 if (IS_BACKREF_NEST_LEVEL(br
)) {
1546 r
= SIZE_OPCODE
+ SIZE_OPTION
+ SIZE_LENGTH
+
1547 SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1551 if (br
->back_num
== 1) {
1552 r
= ((!IS_IGNORECASE(reg
->options
) && br
->back_static
[0] <= 2)
1553 ? SIZE_OPCODE
: (SIZE_OPCODE
+ SIZE_MEMNUM
));
1556 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1561 #ifdef USE_SUBEXP_CALL
1568 r
= compile_length_quantifier_node(NQTFR(node
), reg
);
1572 r
= compile_length_enclose_node(NENCLOSE(node
), reg
);
1576 r
= compile_length_anchor_node(NANCHOR(node
), reg
);
1580 return ONIGERR_TYPE_BUG
;
1588 compile_tree(Node
* node
, regex_t
* reg
)
1590 int n
, type
, len
, pos
, r
= 0;
1596 r
= compile_tree(NCAR(node
), reg
);
1597 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1605 len
+= compile_length_tree(NCAR(x
), reg
);
1606 if (NCDR(x
) != NULL
) {
1607 len
+= SIZE_OP_PUSH
+ SIZE_OP_JUMP
;
1609 } while (IS_NOT_NULL(x
= NCDR(x
)));
1610 pos
= reg
->used
+ len
; /* goal position */
1613 len
= compile_length_tree(NCAR(node
), reg
);
1614 if (IS_NOT_NULL(NCDR(node
))) {
1615 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_JUMP
);
1618 r
= compile_tree(NCAR(node
), reg
);
1620 if (IS_NOT_NULL(NCDR(node
))) {
1621 len
= pos
- (reg
->used
+ SIZE_OP_JUMP
);
1622 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1625 } while (IS_NOT_NULL(node
= NCDR(node
)));
1630 if (NSTRING_IS_RAW(node
))
1631 r
= compile_string_raw_node(NSTR(node
), reg
);
1633 r
= compile_string_node(node
, reg
);
1637 r
= compile_cclass_node(NCCLASS(node
), reg
);
1644 switch (NCTYPE(node
)->ctype
) {
1645 case ONIGENC_CTYPE_WORD
:
1646 if (NCTYPE(node
)->not != 0) op
= OP_NOT_WORD
;
1650 return ONIGERR_TYPE_BUG
;
1653 r
= add_opcode(reg
, op
);
1658 if (IS_MULTILINE(reg
->options
))
1659 r
= add_opcode(reg
, OP_ANYCHAR_ML
);
1661 r
= add_opcode(reg
, OP_ANYCHAR
);
1666 BRefNode
* br
= NBREF(node
);
1668 #ifdef USE_BACKREF_WITH_LEVEL
1669 if (IS_BACKREF_NEST_LEVEL(br
)) {
1670 r
= add_opcode(reg
, OP_BACKREF_WITH_LEVEL
);
1672 r
= add_option(reg
, (reg
->options
& ONIG_OPTION_IGNORECASE
));
1674 r
= add_length(reg
, br
->nest_level
);
1677 goto add_bacref_mems
;
1681 if (br
->back_num
== 1) {
1682 n
= br
->back_static
[0];
1683 if (IS_IGNORECASE(reg
->options
)) {
1684 r
= add_opcode(reg
, OP_BACKREFN_IC
);
1686 r
= add_mem_num(reg
, n
);
1690 case 1: r
= add_opcode(reg
, OP_BACKREF1
); break;
1691 case 2: r
= add_opcode(reg
, OP_BACKREF2
); break;
1693 r
= add_opcode(reg
, OP_BACKREFN
);
1695 r
= add_mem_num(reg
, n
);
1704 if (IS_IGNORECASE(reg
->options
)) {
1705 r
= add_opcode(reg
, OP_BACKREF_MULTI_IC
);
1708 r
= add_opcode(reg
, OP_BACKREF_MULTI
);
1712 #ifdef USE_BACKREF_WITH_LEVEL
1715 r
= add_length(reg
, br
->back_num
);
1718 for (i
= br
->back_num
- 1; i
>= 0; i
--) {
1719 r
= add_mem_num(reg
, p
[i
]);
1726 #ifdef USE_SUBEXP_CALL
1728 r
= compile_call(NCALL(node
), reg
);
1733 r
= compile_quantifier_node(NQTFR(node
), reg
);
1737 r
= compile_enclose_node(NENCLOSE(node
), reg
);
1741 r
= compile_anchor_node(NANCHOR(node
), reg
);
1746 fprintf(stderr
, "compile_tree: undefined node type %d\n", NTYPE(node
));
1754 #ifdef USE_NAMED_GROUP
1757 noname_disable_map(Node
** plink
, GroupNumRemap
* map
, int* counter
)
1760 Node
* node
= *plink
;
1762 switch (NTYPE(node
)) {
1766 r
= noname_disable_map(&(NCAR(node
)), map
, counter
);
1767 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1772 Node
** ptarget
= &(NQTFR(node
)->target
);
1773 Node
* old
= *ptarget
;
1774 r
= noname_disable_map(ptarget
, map
, counter
);
1775 if (*ptarget
!= old
&& NTYPE(*ptarget
) == NT_QTFR
) {
1776 onig_reduce_nested_quantifier(node
, *ptarget
);
1783 EncloseNode
* en
= NENCLOSE(node
);
1784 if (en
->type
== ENCLOSE_MEMORY
) {
1785 if (IS_ENCLOSE_NAMED_GROUP(en
)) {
1787 map
[en
->regnum
].new_val
= *counter
;
1788 en
->regnum
= *counter
;
1789 r
= noname_disable_map(&(en
->target
), map
, counter
);
1792 *plink
= en
->target
;
1793 en
->target
= NULL_NODE
;
1794 onig_node_free(node
);
1795 r
= noname_disable_map(plink
, map
, counter
);
1799 r
= noname_disable_map(&(en
->target
), map
, counter
);
1811 renumber_node_backref(Node
* node
, GroupNumRemap
* map
)
1813 int i
, pos
, n
, old_num
;
1815 BRefNode
* bn
= NBREF(node
);
1817 if (! IS_BACKREF_NAME_REF(bn
))
1818 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1820 old_num
= bn
->back_num
;
1821 if (IS_NULL(bn
->back_dynamic
))
1822 backs
= bn
->back_static
;
1824 backs
= bn
->back_dynamic
;
1826 for (i
= 0, pos
= 0; i
< old_num
; i
++) {
1827 n
= map
[backs
[i
]].new_val
;
1839 renumber_by_map(Node
* node
, GroupNumRemap
* map
)
1843 switch (NTYPE(node
)) {
1847 r
= renumber_by_map(NCAR(node
), map
);
1848 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1851 r
= renumber_by_map(NQTFR(node
)->target
, map
);
1854 r
= renumber_by_map(NENCLOSE(node
)->target
, map
);
1858 r
= renumber_node_backref(node
, map
);
1869 numbered_ref_check(Node
* node
)
1873 switch (NTYPE(node
)) {
1877 r
= numbered_ref_check(NCAR(node
));
1878 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
1881 r
= numbered_ref_check(NQTFR(node
)->target
);
1884 r
= numbered_ref_check(NENCLOSE(node
)->target
);
1888 if (! IS_BACKREF_NAME_REF(NBREF(node
)))
1889 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1900 disable_noname_group_capture(Node
** root
, regex_t
* reg
, ScanEnv
* env
)
1902 int r
, i
, pos
, counter
;
1907 map
= (GroupNumRemap
* )xmalloc(sizeof(GroupNumRemap
) * (env
->num_mem
+ 1));
1908 CHECK_NULL_RETURN_MEMERR(map
);
1909 for (i
= 1; i
<= env
->num_mem
; i
++) {
1913 r
= noname_disable_map(root
, map
, &counter
);
1914 if (r
!= 0) return r
;
1916 r
= renumber_by_map(*root
, map
);
1917 if (r
!= 0) return r
;
1919 for (i
= 1, pos
= 1; i
<= env
->num_mem
; i
++) {
1920 if (map
[i
].new_val
> 0) {
1921 SCANENV_MEM_NODES(env
)[pos
] = SCANENV_MEM_NODES(env
)[i
];
1926 loc
= env
->capture_history
;
1927 BIT_STATUS_CLEAR(env
->capture_history
);
1928 for (i
= 1; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
1929 if (BIT_STATUS_AT(loc
, i
)) {
1930 BIT_STATUS_ON_AT_SIMPLE(env
->capture_history
, map
[i
].new_val
);
1934 env
->num_mem
= env
->num_named
;
1935 reg
->num_mem
= env
->num_named
;
1937 Result
= onig_renumber_name_table(reg
, map
);
1941 #endif /* USE_NAMED_GROUP */
1943 #ifdef USE_SUBEXP_CALL
1945 unset_addr_list_fix(UnsetAddrList
* uslist
, regex_t
* reg
)
1951 for (i
= 0; i
< uslist
->num
; i
++) {
1952 en
= NENCLOSE(uslist
->us
[i
].target
);
1953 if (! IS_ENCLOSE_ADDR_FIXED(en
)) return ONIGERR_PARSER_BUG
;
1954 addr
= en
->call_addr
;
1955 offset
= uslist
->us
[i
].offset
;
1957 BBUF_WRITE(reg
, offset
, &addr
, SIZE_ABSADDR
);
1963 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1965 quantifiers_memory_node_info(Node
* node
)
1969 switch (NTYPE(node
)) {
1975 v
= quantifiers_memory_node_info(NCAR(node
));
1977 } while (v
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
1981 #ifdef USE_SUBEXP_CALL
1983 if (IS_CALL_RECURSION(NCALL(node
))) {
1984 return NQ_TARGET_IS_EMPTY_REC
; /* tiny version */
1987 r
= quantifiers_memory_node_info(NCALL(node
)->target
);
1993 QtfrNode
* qn
= NQTFR(node
);
1994 if (qn
->upper
!= 0) {
1995 r
= quantifiers_memory_node_info(qn
->target
);
2002 EncloseNode
* en
= NENCLOSE(node
);
2004 case ENCLOSE_MEMORY
:
2005 return NQ_TARGET_IS_EMPTY_MEM
;
2008 case ENCLOSE_OPTION
:
2009 case ENCLOSE_STOP_BACKTRACK
:
2010 r
= quantifiers_memory_node_info(en
->target
);
2030 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2033 get_min_match_length(Node
* node
, OnigDistance
*min
, ScanEnv
* env
)
2039 switch (NTYPE(node
)) {
2044 Node
** nodes
= SCANENV_MEM_NODES(env
);
2045 BRefNode
* br
= NBREF(node
);
2046 if (br
->state
& NST_RECURSION
) break;
2048 backs
= BACKREFS_P(br
);
2049 if (backs
[0] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2050 r
= get_min_match_length(nodes
[backs
[0]], min
, env
);
2052 for (i
= 1; i
< br
->back_num
; i
++) {
2053 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2054 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
);
2056 if (*min
> tmin
) *min
= tmin
;
2061 #ifdef USE_SUBEXP_CALL
2063 if (IS_CALL_RECURSION(NCALL(node
))) {
2064 EncloseNode
* en
= NENCLOSE(NCALL(node
)->target
);
2065 if (IS_ENCLOSE_MIN_FIXED(en
))
2069 r
= get_min_match_length(NCALL(node
)->target
, min
, env
);
2075 r
= get_min_match_length(NCAR(node
), &tmin
, env
);
2076 if (r
== 0) *min
+= tmin
;
2077 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2086 r
= get_min_match_length(x
, &tmin
, env
);
2088 if (y
== node
) *min
= tmin
;
2089 else if (*min
> tmin
) *min
= tmin
;
2090 } while (r
== 0 && IS_NOT_NULL(y
= NCDR(y
)));
2096 StrNode
* sn
= NSTR(node
);
2097 *min
= (OnigDistance
)(sn
->end
- sn
->s
);
2112 QtfrNode
* qn
= NQTFR(node
);
2114 if (qn
->lower
> 0) {
2115 r
= get_min_match_length(qn
->target
, min
, env
);
2117 *min
= distance_multiply(*min
, qn
->lower
);
2124 EncloseNode
* en
= NENCLOSE(node
);
2126 case ENCLOSE_MEMORY
:
2127 #ifdef USE_SUBEXP_CALL
2128 if (IS_ENCLOSE_MIN_FIXED(en
))
2131 r
= get_min_match_length(en
->target
, min
, env
);
2134 SET_ENCLOSE_STATUS(node
, NST_MIN_FIXED
);
2139 case ENCLOSE_OPTION
:
2140 case ENCLOSE_STOP_BACKTRACK
:
2141 r
= get_min_match_length(en
->target
, min
, env
);
2156 get_max_match_length(Node
* node
, OnigDistance
*max
, ScanEnv
* env
)
2162 switch (NTYPE(node
)) {
2165 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2167 *max
= distance_add(*max
, tmax
);
2168 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2173 r
= get_max_match_length(NCAR(node
), &tmax
, env
);
2174 if (r
== 0 && *max
< tmax
) *max
= tmax
;
2175 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2180 StrNode
* sn
= NSTR(node
);
2181 *max
= (OnigDistance
)(sn
->end
- sn
->s
);
2186 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2191 *max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2198 Node
** nodes
= SCANENV_MEM_NODES(env
);
2199 BRefNode
* br
= NBREF(node
);
2200 if (br
->state
& NST_RECURSION
) {
2201 *max
= ONIG_INFINITE_DISTANCE
;
2204 backs
= BACKREFS_P(br
);
2205 for (i
= 0; i
< br
->back_num
; i
++) {
2206 if (backs
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
2207 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
);
2209 if (*max
< tmax
) *max
= tmax
;
2214 #ifdef USE_SUBEXP_CALL
2216 if (! IS_CALL_RECURSION(NCALL(node
)))
2217 r
= get_max_match_length(NCALL(node
)->target
, max
, env
);
2219 *max
= ONIG_INFINITE_DISTANCE
;
2225 QtfrNode
* qn
= NQTFR(node
);
2227 if (qn
->upper
!= 0) {
2228 r
= get_max_match_length(qn
->target
, max
, env
);
2229 if (r
== 0 && *max
!= 0) {
2230 if (! IS_REPEAT_INFINITE(qn
->upper
))
2231 *max
= distance_multiply(*max
, qn
->upper
);
2233 *max
= ONIG_INFINITE_DISTANCE
;
2241 EncloseNode
* en
= NENCLOSE(node
);
2243 case ENCLOSE_MEMORY
:
2244 #ifdef USE_SUBEXP_CALL
2245 if (IS_ENCLOSE_MAX_FIXED(en
))
2248 r
= get_max_match_length(en
->target
, max
, env
);
2251 SET_ENCLOSE_STATUS(node
, NST_MAX_FIXED
);
2256 case ENCLOSE_OPTION
:
2257 case ENCLOSE_STOP_BACKTRACK
:
2258 r
= get_max_match_length(en
->target
, max
, env
);
2272 #define GET_CHAR_LEN_VARLEN -1
2273 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2275 /* fixed size pattern node only */
2277 get_char_length_tree1(Node
* node
, regex_t
* reg
, int* len
, int level
)
2284 switch (NTYPE(node
)) {
2287 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2289 *len
= distance_add(*len
, tlen
);
2290 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2298 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen
, level
);
2299 while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
))) {
2300 r
= get_char_length_tree1(NCAR(node
), reg
, &tlen2
, level
);
2309 r
= GET_CHAR_LEN_TOP_ALT_VARLEN
;
2311 r
= GET_CHAR_LEN_VARLEN
;
2321 StrNode
* sn
= NSTR(node
);
2323 while (s
< sn
->end
) {
2324 s
+= enclen(reg
->enc
, s
);
2332 QtfrNode
* qn
= NQTFR(node
);
2333 if (qn
->lower
== qn
->upper
) {
2334 r
= get_char_length_tree1(qn
->target
, reg
, &tlen
, level
);
2336 *len
= distance_multiply(tlen
, qn
->lower
);
2339 r
= GET_CHAR_LEN_VARLEN
;
2343 #ifdef USE_SUBEXP_CALL
2345 if (! IS_CALL_RECURSION(NCALL(node
)))
2346 r
= get_char_length_tree1(NCALL(node
)->target
, reg
, len
, level
);
2348 r
= GET_CHAR_LEN_VARLEN
;
2363 EncloseNode
* en
= NENCLOSE(node
);
2365 case ENCLOSE_MEMORY
:
2366 #ifdef USE_SUBEXP_CALL
2367 if (IS_ENCLOSE_CLEN_FIXED(en
))
2368 *len
= en
->char_len
;
2370 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2372 en
->char_len
= *len
;
2373 SET_ENCLOSE_STATUS(node
, NST_CLEN_FIXED
);
2378 case ENCLOSE_OPTION
:
2379 case ENCLOSE_STOP_BACKTRACK
:
2380 r
= get_char_length_tree1(en
->target
, reg
, len
, level
);
2392 r
= GET_CHAR_LEN_VARLEN
;
2400 get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
)
2402 return get_char_length_tree1(node
, reg
, len
, 0);
2405 /* x is not included y ==> 1 : 0 */
2407 is_not_included(Node
* x
, Node
* y
, regex_t
* reg
)
2421 if (NCTYPE(y
)->ctype
== NCTYPE(x
)->ctype
&&
2422 NCTYPE(y
)->not != NCTYPE(x
)->not)
2432 tmp
= x
; x
= y
; y
= tmp
;
2449 CClassNode
* xc
= NCCLASS(x
);
2452 switch (NCTYPE(y
)->ctype
) {
2453 case ONIGENC_CTYPE_WORD
:
2454 if (NCTYPE(y
)->not == 0) {
2455 if (IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) {
2456 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2457 if (BITSET_AT(xc
->bs
, i
)) {
2458 if (IS_CODE_SB_WORD(reg
->enc
, i
)) return 0;
2466 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2467 if (! IS_CODE_SB_WORD(reg
->enc
, i
)) {
2468 if (!IS_NCCLASS_NOT(xc
)) {
2469 if (BITSET_AT(xc
->bs
, i
))
2473 if (! BITSET_AT(xc
->bs
, i
))
2490 CClassNode
* yc
= NCCLASS(y
);
2492 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2493 v
= BITSET_AT(xc
->bs
, i
);
2494 if ((v
!= 0 && !IS_NCCLASS_NOT(xc
)) ||
2495 (v
== 0 && IS_NCCLASS_NOT(xc
))) {
2496 v
= BITSET_AT(yc
->bs
, i
);
2497 if ((v
!= 0 && !IS_NCCLASS_NOT(yc
)) ||
2498 (v
== 0 && IS_NCCLASS_NOT(yc
)))
2502 if ((IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) ||
2503 (IS_NULL(yc
->mbuf
) && !IS_NCCLASS_NOT(yc
)))
2521 StrNode
* xs
= NSTR(x
);
2522 if (NSTRING_LEN(x
) == 0)
2528 switch (NCTYPE(y
)->ctype
) {
2529 case ONIGENC_CTYPE_WORD
:
2530 if (ONIGENC_IS_MBC_WORD(reg
->enc
, xs
->s
, xs
->end
))
2531 return NCTYPE(y
)->not;
2533 return !(NCTYPE(y
)->not);
2542 CClassNode
* cc
= NCCLASS(y
);
2544 code
= ONIGENC_MBC_TO_CODE(reg
->enc
, xs
->s
,
2545 xs
->s
+ ONIGENC_MBC_MAXLEN(reg
->enc
));
2546 return (onig_is_code_in_cc(reg
->enc
, code
, cc
) != 0 ? 0 : 1);
2553 StrNode
* ys
= NSTR(y
);
2554 len
= NSTRING_LEN(x
);
2555 if (len
> NSTRING_LEN(y
)) len
= NSTRING_LEN(y
);
2556 if (NSTRING_IS_AMBIG(x
) || NSTRING_IS_AMBIG(y
)) {
2561 for (i
= 0, p
= ys
->s
, q
= xs
->s
; i
< len
; i
++, p
++, q
++) {
2562 if (*p
!= *q
) return 1;
2582 get_head_value_node(Node
* node
, int exact
, regex_t
* reg
)
2584 Node
* n
= NULL_NODE
;
2586 switch (NTYPE(node
)) {
2590 #ifdef USE_SUBEXP_CALL
2603 n
= get_head_value_node(NCAR(node
), exact
, reg
);
2608 StrNode
* sn
= NSTR(node
);
2610 if (sn
->end
<= sn
->s
)
2614 !NSTRING_IS_RAW(node
) && IS_IGNORECASE(reg
->options
)) {
2624 QtfrNode
* qn
= NQTFR(node
);
2625 if (qn
->lower
> 0) {
2626 if (IS_NOT_NULL(qn
->head_exact
))
2629 n
= get_head_value_node(qn
->target
, exact
, reg
);
2636 EncloseNode
* en
= NENCLOSE(node
);
2638 case ENCLOSE_OPTION
:
2640 OnigOptionType options
= reg
->options
;
2642 reg
->options
= NENCLOSE(node
)->option
;
2643 n
= get_head_value_node(NENCLOSE(node
)->target
, exact
, reg
);
2644 reg
->options
= options
;
2648 case ENCLOSE_MEMORY
:
2649 case ENCLOSE_STOP_BACKTRACK
:
2650 n
= get_head_value_node(en
->target
, exact
, reg
);
2657 if (NANCHOR(node
)->type
== ANCHOR_PREC_READ
)
2658 n
= get_head_value_node(NANCHOR(node
)->target
, exact
, reg
);
2669 check_type_tree(Node
* node
, int type_mask
, int enclose_mask
, int anchor_mask
)
2674 if ((NTYPE2BIT(type
) & type_mask
) == 0)
2681 r
= check_type_tree(NCAR(node
), type_mask
, enclose_mask
,
2683 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2687 r
= check_type_tree(NQTFR(node
)->target
, type_mask
, enclose_mask
,
2693 EncloseNode
* en
= NENCLOSE(node
);
2694 if ((en
->type
& enclose_mask
) == 0)
2697 r
= check_type_tree(en
->target
, type_mask
, enclose_mask
, anchor_mask
);
2702 type
= NANCHOR(node
)->type
;
2703 if ((type
& anchor_mask
) == 0)
2706 if (NANCHOR(node
)->target
)
2707 r
= check_type_tree(NANCHOR(node
)->target
,
2708 type_mask
, enclose_mask
, anchor_mask
);
2717 #ifdef USE_SUBEXP_CALL
2719 #define RECURSION_EXIST 1
2720 #define RECURSION_INFINITE 2
2723 subexp_inf_recursive_check(Node
* node
, ScanEnv
* env
, int head
)
2738 ret
= subexp_inf_recursive_check(NCAR(x
), env
, head
);
2739 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2742 ret
= get_min_match_length(NCAR(x
), &min
, env
);
2743 if (ret
!= 0) return ret
;
2744 if (min
!= 0) head
= 0;
2746 } while (IS_NOT_NULL(x
= NCDR(x
)));
2753 r
= RECURSION_EXIST
;
2755 ret
= subexp_inf_recursive_check(NCAR(node
), env
, head
);
2756 if (ret
< 0 || ret
== RECURSION_INFINITE
) return ret
;
2758 } while (IS_NOT_NULL(node
= NCDR(node
)));
2763 r
= subexp_inf_recursive_check(NQTFR(node
)->target
, env
, head
);
2764 if (r
== RECURSION_EXIST
) {
2765 if (NQTFR(node
)->lower
== 0) r
= 0;
2771 AnchorNode
* an
= NANCHOR(node
);
2773 case ANCHOR_PREC_READ
:
2774 case ANCHOR_PREC_READ_NOT
:
2775 case ANCHOR_LOOK_BEHIND
:
2776 case ANCHOR_LOOK_BEHIND_NOT
:
2777 r
= subexp_inf_recursive_check(an
->target
, env
, head
);
2784 r
= subexp_inf_recursive_check(NCALL(node
)->target
, env
, head
);
2788 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2790 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2791 return (head
== 0 ? RECURSION_EXIST
: RECURSION_INFINITE
);
2793 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2794 r
= subexp_inf_recursive_check(NENCLOSE(node
)->target
, env
, head
);
2795 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2807 subexp_inf_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2817 r
= subexp_inf_recursive_check_trav(NCAR(node
), env
);
2818 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
2822 r
= subexp_inf_recursive_check_trav(NQTFR(node
)->target
, env
);
2827 AnchorNode
* an
= NANCHOR(node
);
2829 case ANCHOR_PREC_READ
:
2830 case ANCHOR_PREC_READ_NOT
:
2831 case ANCHOR_LOOK_BEHIND
:
2832 case ANCHOR_LOOK_BEHIND_NOT
:
2833 r
= subexp_inf_recursive_check_trav(an
->target
, env
);
2841 EncloseNode
* en
= NENCLOSE(node
);
2843 if (IS_ENCLOSE_RECURSION(en
)) {
2844 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2845 r
= subexp_inf_recursive_check(en
->target
, env
, 1);
2846 if (r
> 0) return ONIGERR_NEVER_ENDING_RECURSION
;
2847 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2849 r
= subexp_inf_recursive_check_trav(en
->target
, env
);
2862 subexp_recursive_check(Node
* node
)
2866 switch (NTYPE(node
)) {
2870 r
|= subexp_recursive_check(NCAR(node
));
2871 } while (IS_NOT_NULL(node
= NCDR(node
)));
2875 r
= subexp_recursive_check(NQTFR(node
)->target
);
2880 AnchorNode
* an
= NANCHOR(node
);
2882 case ANCHOR_PREC_READ
:
2883 case ANCHOR_PREC_READ_NOT
:
2884 case ANCHOR_LOOK_BEHIND
:
2885 case ANCHOR_LOOK_BEHIND_NOT
:
2886 r
= subexp_recursive_check(an
->target
);
2893 r
= subexp_recursive_check(NCALL(node
)->target
);
2894 if (r
!= 0) SET_CALL_RECURSION(node
);
2898 if (IS_ENCLOSE_MARK2(NENCLOSE(node
)))
2900 else if (IS_ENCLOSE_MARK1(NENCLOSE(node
)))
2901 return 1; /* recursion */
2903 SET_ENCLOSE_STATUS(node
, NST_MARK2
);
2904 r
= subexp_recursive_check(NENCLOSE(node
)->target
);
2905 CLEAR_ENCLOSE_STATUS(node
, NST_MARK2
);
2918 subexp_recursive_check_trav(Node
* node
, ScanEnv
* env
)
2920 #define FOUND_CALLED_NODE 1
2932 ret
= subexp_recursive_check_trav(NCAR(node
), env
);
2933 if (ret
== FOUND_CALLED_NODE
) r
= FOUND_CALLED_NODE
;
2934 else if (ret
< 0) return ret
;
2935 } while (IS_NOT_NULL(node
= NCDR(node
)));
2940 r
= subexp_recursive_check_trav(NQTFR(node
)->target
, env
);
2941 if (NQTFR(node
)->upper
== 0) {
2942 if (r
== FOUND_CALLED_NODE
)
2943 NQTFR(node
)->is_refered
= 1;
2949 AnchorNode
* an
= NANCHOR(node
);
2951 case ANCHOR_PREC_READ
:
2952 case ANCHOR_PREC_READ_NOT
:
2953 case ANCHOR_LOOK_BEHIND
:
2954 case ANCHOR_LOOK_BEHIND_NOT
:
2955 r
= subexp_recursive_check_trav(an
->target
, env
);
2963 EncloseNode
* en
= NENCLOSE(node
);
2965 if (! IS_ENCLOSE_RECURSION(en
)) {
2966 if (IS_ENCLOSE_CALLED(en
)) {
2967 SET_ENCLOSE_STATUS(node
, NST_MARK1
);
2968 r
= subexp_recursive_check(en
->target
);
2969 if (r
!= 0) SET_ENCLOSE_STATUS(node
, NST_RECURSION
);
2970 CLEAR_ENCLOSE_STATUS(node
, NST_MARK1
);
2973 r
= subexp_recursive_check_trav(en
->target
, env
);
2974 if (IS_ENCLOSE_CALLED(en
))
2975 r
|= FOUND_CALLED_NODE
;
2987 setup_subexp_call(Node
* node
, ScanEnv
* env
)
2996 r
= setup_subexp_call(NCAR(node
), env
);
2997 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3002 r
= setup_subexp_call(NCAR(node
), env
);
3003 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3007 r
= setup_subexp_call(NQTFR(node
)->target
, env
);
3010 r
= setup_subexp_call(NENCLOSE(node
)->target
, env
);
3015 CallNode
* cn
= NCALL(node
);
3016 Node
** nodes
= SCANENV_MEM_NODES(env
);
3018 if (cn
->group_num
!= 0) {
3019 int gnum
= cn
->group_num
;
3021 #ifdef USE_NAMED_GROUP
3022 if (env
->num_named
> 0 &&
3023 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
3024 !ONIG_IS_OPTION_ON(env
->option
, ONIG_OPTION_CAPTURE_GROUP
)) {
3025 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
3028 if (gnum
> env
->num_mem
) {
3029 onig_scan_env_set_error_string(env
,
3030 ONIGERR_UNDEFINED_GROUP_REFERENCE
, cn
->name
, cn
->name_end
);
3031 return ONIGERR_UNDEFINED_GROUP_REFERENCE
;
3034 #ifdef USE_NAMED_GROUP
3037 cn
->target
= nodes
[cn
->group_num
];
3038 if (IS_NULL(cn
->target
)) {
3039 onig_scan_env_set_error_string(env
,
3040 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3041 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3043 SET_ENCLOSE_STATUS(cn
->target
, NST_CALLED
);
3044 BIT_STATUS_ON_AT(env
->bt_mem_start
, cn
->group_num
);
3045 cn
->unset_addr_list
= env
->unset_addr_list
;
3047 #ifdef USE_NAMED_GROUP
3051 int n
= onig_name_to_group_numbers(env
->reg
, cn
->name
, cn
->name_end
,
3054 onig_scan_env_set_error_string(env
,
3055 ONIGERR_UNDEFINED_NAME_REFERENCE
, cn
->name
, cn
->name_end
);
3056 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3059 onig_scan_env_set_error_string(env
,
3060 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
, cn
->name
, cn
->name_end
);
3061 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
;
3064 cn
->group_num
= refs
[0];
3074 AnchorNode
* an
= NANCHOR(node
);
3077 case ANCHOR_PREC_READ
:
3078 case ANCHOR_PREC_READ_NOT
:
3079 case ANCHOR_LOOK_BEHIND
:
3080 case ANCHOR_LOOK_BEHIND_NOT
:
3081 r
= setup_subexp_call(an
->target
, env
);
3095 /* divide different length alternatives in look-behind.
3096 (?<=A|B) ==> (?<=A)|(?<=B)
3097 (?<!A|B) ==> (?<!A)(?<!B)
3100 divide_look_behind_alternatives(Node
* node
)
3102 Node
*head
, *np
, *insert_node
;
3103 AnchorNode
* an
= NANCHOR(node
);
3104 int anc_type
= an
->type
;
3108 swap_node(node
, head
);
3110 NANCHOR(head
)->target
= np
;
3113 while ((np
= NCDR(np
)) != NULL_NODE
) {
3114 insert_node
= onig_node_new_anchor(anc_type
);
3115 CHECK_NULL_RETURN_MEMERR(insert_node
);
3116 NANCHOR(insert_node
)->target
= NCAR(np
);
3117 NCAR(np
) = insert_node
;
3120 if (anc_type
== ANCHOR_LOOK_BEHIND_NOT
) {
3123 SET_NTYPE(np
, NT_LIST
); /* alt -> list */
3124 } while ((np
= NCDR(np
)) != NULL_NODE
);
3130 setup_look_behind(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
3133 AnchorNode
* an
= NANCHOR(node
);
3135 r
= get_char_length_tree(an
->target
, reg
, &len
);
3138 else if (r
== GET_CHAR_LEN_VARLEN
)
3139 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3140 else if (r
== GET_CHAR_LEN_TOP_ALT_VARLEN
) {
3141 if (IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
))
3142 r
= divide_look_behind_alternatives(node
);
3144 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3151 next_setup(Node
* node
, Node
* next_node
, regex_t
* reg
)
3157 if (type
== NT_QTFR
) {
3158 QtfrNode
* qn
= NQTFR(node
);
3159 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
)) {
3160 #ifdef USE_QTFR_PEEK_NEXT
3161 Node
* n
= get_head_value_node(next_node
, 1, reg
);
3162 /* '\0': for UTF-16BE etc... */
3163 if (IS_NOT_NULL(n
) && NSTR(n
)->s
[0] != '\0') {
3164 qn
->next_head_exact
= n
;
3167 /* automatic posseivation a*b ==> (?>a*)b */
3168 if (qn
->lower
<= 1) {
3169 int ttype
= NTYPE(qn
->target
);
3170 if (IS_NODE_TYPE_SIMPLE(ttype
)) {
3172 x
= get_head_value_node(qn
->target
, 0, reg
);
3173 if (IS_NOT_NULL(x
)) {
3174 y
= get_head_value_node(next_node
, 0, reg
);
3175 if (IS_NOT_NULL(y
) && is_not_included(x
, y
, reg
)) {
3176 Node
* en
= onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK
);
3177 CHECK_NULL_RETURN_MEMERR(en
);
3178 SET_ENCLOSE_STATUS(en
, NST_STOP_BT_SIMPLE_REPEAT
);
3179 swap_node(node
, en
);
3180 NENCLOSE(node
)->target
= en
;
3187 else if (type
== NT_ENCLOSE
) {
3188 EncloseNode
* en
= NENCLOSE(node
);
3189 if (en
->type
== ENCLOSE_MEMORY
) {
3199 update_string_node_case_fold(regex_t
* reg
, Node
*node
)
3201 UChar
*p
, *end
, buf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
3202 UChar
*sbuf
, *ebuf
, *sp
;
3203 int r
, i
, len
, sbuf_size
;
3204 StrNode
* sn
= NSTR(node
);
3207 sbuf_size
= (int)(end
- sn
->s
) * 2;
3208 sbuf
= (UChar
* )xmalloc(sbuf_size
);
3209 CHECK_NULL_RETURN_MEMERR(sbuf
);
3210 ebuf
= sbuf
+ sbuf_size
;
3215 len
= ONIGENC_MBC_CASE_FOLD(reg
->enc
, reg
->case_fold_flag
, &p
, end
, buf
);
3216 for (i
= 0; i
< len
; i
++) {
3218 sbuf
= (UChar
* )xrealloc(sbuf
, sbuf_size
* 2, sbuf_size
);
3219 CHECK_NULL_RETURN_MEMERR(sbuf
);
3220 sp
= sbuf
+ sbuf_size
;
3222 ebuf
= sbuf
+ sbuf_size
;
3229 r
= onig_node_str_set(node
, sbuf
, sp
);
3240 expand_case_fold_make_rem_string(Node
** rnode
, UChar
*s
, UChar
*end
,
3246 node
= onig_node_new_str(s
, end
);
3247 if (IS_NULL(node
)) return ONIGERR_MEMORY
;
3249 r
= update_string_node_case_fold(reg
, node
);
3251 onig_node_free(node
);
3255 NSTRING_SET_AMBIG(node
);
3256 NSTRING_SET_DONT_GET_OPT_INFO(node
);
3262 expand_case_fold_string_alt(int item_num
, OnigCaseFoldCodeItem items
[],
3263 UChar
*p
, int slen
, UChar
*end
,
3264 regex_t
* reg
, Node
**rnode
)
3266 int r
, i
, j
, len
, varlen
;
3267 Node
*anode
, *var_anode
, *snode
, *xnode
, *an
;
3268 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
3271 *rnode
= var_anode
= NULL_NODE
;
3274 for (i
= 0; i
< item_num
; i
++) {
3275 if (items
[i
].byte_len
!= slen
) {
3282 *rnode
= var_anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3283 if (IS_NULL(var_anode
)) return ONIGERR_MEMORY
;
3285 xnode
= onig_node_new_list(NULL
, NULL
);
3286 if (IS_NULL(xnode
)) goto mem_err
;
3287 NCAR(var_anode
) = xnode
;
3289 anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3290 if (IS_NULL(anode
)) goto mem_err
;
3291 NCAR(xnode
) = anode
;
3294 *rnode
= anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3295 if (IS_NULL(anode
)) return ONIGERR_MEMORY
;
3298 snode
= onig_node_new_str(p
, p
+ slen
);
3299 if (IS_NULL(snode
)) goto mem_err
;
3301 NCAR(anode
) = snode
;
3303 for (i
= 0; i
< item_num
; i
++) {
3304 snode
= onig_node_new_str(NULL
, NULL
);
3305 if (IS_NULL(snode
)) goto mem_err
;
3307 for (j
= 0; j
< items
[i
].code_len
; j
++) {
3308 len
= ONIGENC_CODE_TO_MBC(reg
->enc
, items
[i
].code
[j
], buf
);
3314 r
= onig_node_str_cat(snode
, buf
, buf
+ len
);
3315 if (r
!= 0) goto mem_err2
;
3318 an
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3323 if (items
[i
].byte_len
!= slen
) {
3324 Node
*rem
= NULL_NODE
;
3325 UChar
*q
= p
+ items
[i
].byte_len
;
3328 r
= expand_case_fold_make_rem_string(&rem
, q
, end
, reg
);
3334 xnode
= onig_node_list_add(NULL_NODE
, snode
);
3335 if (IS_NULL(xnode
)) {
3337 onig_node_free(rem
);
3340 if (IS_NULL(onig_node_list_add(xnode
, rem
))) {
3342 onig_node_free(xnode
);
3343 onig_node_free(rem
);
3353 if (var_anode
== NULL
) {
3355 onig_node_free(xnode
);
3356 onig_node_free(rem
);
3359 NCDR(var_anode
) = an
;
3372 onig_node_free(snode
);
3375 onig_node_free(*rnode
);
3377 return ONIGERR_MEMORY
;
3381 expand_case_fold_string(Node
* node
, regex_t
* reg
)
3383 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3385 int r
, n
, len
, alt_num
;
3386 UChar
*start
, *end
, *p
;
3387 Node
*top_root
, *root
, *snode
, *prev_node
;
3388 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
3389 StrNode
* sn
= NSTR(node
);
3391 if (NSTRING_IS_AMBIG(node
)) return 0;
3395 if (start
>= end
) return 0;
3398 top_root
= root
= prev_node
= snode
= NULL_NODE
;
3402 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg
->enc
, reg
->case_fold_flag
,
3409 len
= enclen(reg
->enc
, p
);
3412 if (IS_NULL(snode
)) {
3413 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3414 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3415 if (IS_NULL(root
)) {
3416 onig_node_free(prev_node
);
3421 prev_node
= snode
= onig_node_new_str(NULL
, NULL
);
3422 if (IS_NULL(snode
)) goto mem_err
;
3423 if (IS_NOT_NULL(root
)) {
3424 if (IS_NULL(onig_node_list_add(root
, snode
))) {
3425 onig_node_free(snode
);
3431 r
= onig_node_str_cat(snode
, p
, p
+ len
);
3432 if (r
!= 0) goto err
;
3436 if (alt_num
> THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
) break;
3438 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3439 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3440 if (IS_NULL(root
)) {
3441 onig_node_free(prev_node
);
3446 r
= expand_case_fold_string_alt(n
, items
, p
, len
, end
, reg
, &prev_node
);
3447 if (r
< 0) goto mem_err
;
3449 if (IS_NULL(root
)) {
3450 top_root
= prev_node
;
3453 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3454 onig_node_free(prev_node
);
3459 root
= NCAR(prev_node
);
3462 if (IS_NOT_NULL(root
)) {
3463 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3464 onig_node_free(prev_node
);
3479 r
= expand_case_fold_make_rem_string(&srem
, p
, end
, reg
);
3480 if (r
!= 0) goto mem_err
;
3482 if (IS_NOT_NULL(prev_node
) && IS_NULL(root
)) {
3483 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3484 if (IS_NULL(root
)) {
3485 onig_node_free(srem
);
3486 onig_node_free(prev_node
);
3491 if (IS_NULL(root
)) {
3495 if (IS_NULL(onig_node_list_add(root
, srem
))) {
3496 onig_node_free(srem
);
3503 top_root
= (IS_NOT_NULL(top_root
) ? top_root
: prev_node
);
3504 swap_node(node
, top_root
);
3505 onig_node_free(top_root
);
3512 onig_node_free(top_root
);
3517 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3519 #define CEC_THRES_NUM_BIG_REPEAT 512
3520 #define CEC_INFINITE_NUM 0x7fffffff
3522 #define CEC_IN_INFINITE_REPEAT (1<<0)
3523 #define CEC_IN_FINITE_REPEAT (1<<1)
3524 #define CEC_CONT_BIG_REPEAT (1<<2)
3527 setup_comb_exp_check(Node
* node
, int state
, ScanEnv
* env
)
3536 Node
* prev
= NULL_NODE
;
3538 r
= setup_comb_exp_check(NCAR(node
), r
, env
);
3540 } while (r
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3548 ret
= setup_comb_exp_check(NCAR(node
), state
, env
);
3550 } while (ret
>= 0 && IS_NOT_NULL(node
= NCDR(node
)));
3556 int child_state
= state
;
3558 QtfrNode
* qn
= NQTFR(node
);
3559 Node
* target
= qn
->target
;
3562 if (! IS_REPEAT_INFINITE(qn
->upper
)) {
3563 if (qn
->upper
> 1) {
3564 /* {0,1}, {1,1} are allowed */
3565 child_state
|= CEC_IN_FINITE_REPEAT
;
3567 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3568 if (env
->backrefed_mem
== 0) {
3569 if (NTYPE(qn
->target
) == NT_ENCLOSE
) {
3570 EncloseNode
* en
= NENCLOSE(qn
->target
);
3571 if (en
->type
== ENCLOSE_MEMORY
) {
3572 if (NTYPE(en
->target
) == NT_QTFR
) {
3573 QtfrNode
* q
= NQTFR(en
->target
);
3574 if (IS_REPEAT_INFINITE(q
->upper
)
3575 && q
->greedy
== qn
->greedy
) {
3576 qn
->upper
= (qn
->lower
== 0 ? 1 : qn
->lower
);
3578 child_state
= state
;
3587 if (state
& CEC_IN_FINITE_REPEAT
) {
3588 qn
->comb_exp_check_num
= -1;
3591 if (IS_REPEAT_INFINITE(qn
->upper
)) {
3592 var_num
= CEC_INFINITE_NUM
;
3593 child_state
|= CEC_IN_INFINITE_REPEAT
;
3596 var_num
= qn
->upper
- qn
->lower
;
3599 if (var_num
>= CEC_THRES_NUM_BIG_REPEAT
)
3600 add_state
|= CEC_CONT_BIG_REPEAT
;
3602 if (((state
& CEC_IN_INFINITE_REPEAT
) != 0 && var_num
!= 0) ||
3603 ((state
& CEC_CONT_BIG_REPEAT
) != 0 &&
3604 var_num
>= CEC_THRES_NUM_BIG_REPEAT
)) {
3605 if (qn
->comb_exp_check_num
== 0) {
3606 env
->num_comb_exp_check
++;
3607 qn
->comb_exp_check_num
= env
->num_comb_exp_check
;
3608 if (env
->curr_max_regnum
> env
->comb_exp_max_regnum
)
3609 env
->comb_exp_max_regnum
= env
->curr_max_regnum
;
3614 r
= setup_comb_exp_check(target
, child_state
, env
);
3621 EncloseNode
* en
= NENCLOSE(node
);
3624 case ENCLOSE_MEMORY
:
3626 if (env
->curr_max_regnum
< en
->regnum
)
3627 env
->curr_max_regnum
= en
->regnum
;
3629 r
= setup_comb_exp_check(en
->target
, state
, env
);
3634 r
= setup_comb_exp_check(en
->target
, state
, env
);
3640 #ifdef USE_SUBEXP_CALL
3642 if (IS_CALL_RECURSION(NCALL(node
)))
3643 env
->has_recursion
= 1;
3645 r
= setup_comb_exp_check(NCALL(node
)->target
, state
, env
);
3657 #define IN_ALT (1<<0)
3658 #define IN_NOT (1<<1)
3659 #define IN_REPEAT (1<<2)
3660 #define IN_VAR_REPEAT (1<<3)
3662 /* setup_tree does the following work.
3663 1. check empty loop. (set qn->target_empty_info)
3664 2. expand ignore-case in char class.
3665 3. set memory status bit flags. (reg->mem_stats)
3666 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3667 5. find invalid patterns in look-behind.
3668 6. expand repeated string.
3671 setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
3680 Node
* prev
= NULL_NODE
;
3682 r
= setup_tree(NCAR(node
), reg
, state
, env
);
3683 if (IS_NOT_NULL(prev
) && r
== 0) {
3684 r
= next_setup(prev
, NCAR(node
), reg
);
3687 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3693 r
= setup_tree(NCAR(node
), reg
, (state
| IN_ALT
), env
);
3694 } while (r
== 0 && IS_NOT_NULL(node
= NCDR(node
)));
3701 if (IS_IGNORECASE(reg
->options
) && !NSTRING_IS_RAW(node
)) {
3702 r
= expand_case_fold_string(node
, reg
);
3710 #ifdef USE_SUBEXP_CALL
3719 Node
** nodes
= SCANENV_MEM_NODES(env
);
3720 BRefNode
* br
= NBREF(node
);
3722 for (i
= 0; i
< br
->back_num
; i
++) {
3723 if (p
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
3724 BIT_STATUS_ON_AT(env
->backrefed_mem
, p
[i
]);
3725 BIT_STATUS_ON_AT(env
->bt_mem_start
, p
[i
]);
3726 #ifdef USE_BACKREF_WITH_LEVEL
3727 if (IS_BACKREF_NEST_LEVEL(br
)) {
3728 BIT_STATUS_ON_AT(env
->bt_mem_end
, p
[i
]);
3731 SET_ENCLOSE_STATUS(nodes
[p
[i
]], NST_MEM_BACKREFED
);
3739 QtfrNode
* qn
= NQTFR(node
);
3740 Node
* target
= qn
->target
;
3742 if ((state
& IN_REPEAT
) != 0) {
3743 qn
->state
|= NST_IN_REPEAT
;
3746 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 1) {
3747 r
= get_min_match_length(target
, &d
, env
);
3750 qn
->target_empty_info
= NQ_TARGET_IS_EMPTY
;
3751 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3752 r
= quantifiers_memory_node_info(target
);
3755 qn
->target_empty_info
= r
;
3759 r
= get_max_match_length(target
, &d
, env
);
3760 if (r
== 0 && d
== 0) {
3761 /* ()* ==> ()?, ()+ ==> () */
3763 if (qn
->lower
> 1) qn
->lower
= 1;
3764 if (NTYPE(target
) == NT_STR
) {
3765 qn
->upper
= qn
->lower
= 0; /* /(?:)+/ ==> // */
3773 if (qn
->lower
!= qn
->upper
)
3774 state
|= IN_VAR_REPEAT
;
3775 r
= setup_tree(target
, reg
, state
, env
);
3779 #define EXPAND_STRING_MAX_LENGTH 100
3780 if (NTYPE(target
) == NT_STR
) {
3781 if (!IS_REPEAT_INFINITE(qn
->lower
) && qn
->lower
== qn
->upper
&&
3782 qn
->lower
> 1 && qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3783 int len
= NSTRING_LEN(target
);
3784 StrNode
* sn
= NSTR(target
);
3786 if (len
* qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
3787 int i
, n
= qn
->lower
;
3788 onig_node_conv_to_str_node(node
, NSTR(target
)->flag
);
3789 for (i
= 0; i
< n
; i
++) {
3790 r
= onig_node_str_cat(node
, sn
->s
, sn
->end
);
3793 onig_node_free(target
);
3794 break; /* break case NT_QTFR: */
3799 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3800 if (qn
->greedy
&& (qn
->target_empty_info
!= 0)) {
3801 if (NTYPE(target
) == NT_QTFR
) {
3802 QtfrNode
* tqn
= NQTFR(target
);
3803 if (IS_NOT_NULL(tqn
->head_exact
)) {
3804 qn
->head_exact
= tqn
->head_exact
;
3805 tqn
->head_exact
= NULL
;
3809 qn
->head_exact
= get_head_value_node(qn
->target
, 1, reg
);
3818 EncloseNode
* en
= NENCLOSE(node
);
3821 case ENCLOSE_OPTION
:
3823 OnigOptionType options
= reg
->options
;
3824 reg
->options
= NENCLOSE(node
)->option
;
3825 r
= setup_tree(NENCLOSE(node
)->target
, reg
, state
, env
);
3826 reg
->options
= options
;
3830 case ENCLOSE_MEMORY
:
3831 if ((state
& (IN_ALT
| IN_NOT
| IN_VAR_REPEAT
)) != 0) {
3832 BIT_STATUS_ON_AT(env
->bt_mem_start
, en
->regnum
);
3833 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
3835 r
= setup_tree(en
->target
, reg
, state
, env
);
3838 case ENCLOSE_STOP_BACKTRACK
:
3840 Node
* target
= en
->target
;
3841 r
= setup_tree(target
, reg
, state
, env
);
3842 if (NTYPE(target
) == NT_QTFR
) {
3843 QtfrNode
* tqn
= NQTFR(target
);
3844 if (IS_REPEAT_INFINITE(tqn
->upper
) && tqn
->lower
<= 1 &&
3845 tqn
->greedy
!= 0) { /* (?>a*), a*+ etc... */
3846 int qtype
= NTYPE(tqn
->target
);
3847 if (IS_NODE_TYPE_SIMPLE(qtype
))
3848 SET_ENCLOSE_STATUS(node
, NST_STOP_BT_SIMPLE_REPEAT
);
3859 AnchorNode
* an
= NANCHOR(node
);
3862 case ANCHOR_PREC_READ
:
3863 r
= setup_tree(an
->target
, reg
, state
, env
);
3865 case ANCHOR_PREC_READ_NOT
:
3866 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3869 /* allowed node types in look-behind */
3870 #define ALLOWED_TYPE_IN_LB \
3871 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
3872 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
3874 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
3875 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
3877 #define ALLOWED_ANCHOR_IN_LB \
3878 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3879 #define ALLOWED_ANCHOR_IN_LB_NOT \
3880 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
3882 case ANCHOR_LOOK_BEHIND
:
3884 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3885 ALLOWED_ENCLOSE_IN_LB
, ALLOWED_ANCHOR_IN_LB
);
3886 if (r
< 0) return r
;
3887 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3888 r
= setup_look_behind(node
, reg
, env
);
3889 if (r
!= 0) return r
;
3890 r
= setup_tree(an
->target
, reg
, state
, env
);
3894 case ANCHOR_LOOK_BEHIND_NOT
:
3896 r
= check_type_tree(an
->target
, ALLOWED_TYPE_IN_LB
,
3897 ALLOWED_ENCLOSE_IN_LB_NOT
, ALLOWED_ANCHOR_IN_LB_NOT
);
3898 if (r
< 0) return r
;
3899 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3900 r
= setup_look_behind(node
, reg
, env
);
3901 if (r
!= 0) return r
;
3902 r
= setup_tree(an
->target
, reg
, (state
| IN_NOT
), env
);
3916 /* set skip map for Boyer-Moor search */
3918 set_bm_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
3919 UChar skip
[], int** int_skip
)
3923 len
= (int)(end
- s
);
3924 if (len
< ONIG_CHAR_TABLE_SIZE
) {
3925 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = (UChar
)len
;
3927 for (i
= 0; i
< len
- 1; i
++)
3928 skip
[s
[i
]] = (UChar
)(len
- 1 - i
);
3931 if (IS_NULL(*int_skip
)) {
3932 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
3933 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
3935 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = len
;
3937 for (i
= 0; i
< len
- 1; i
++)
3938 (*int_skip
)[s
[i
]] = len
- 1 - i
;
3943 #define OPT_EXACT_MAXLEN 24
3946 OnigDistance min
; /* min byte length */
3947 OnigDistance max
; /* max byte length */
3953 OnigOptionType options
;
3954 OnigCaseFoldType case_fold_flag
;
3964 MinMaxLen mmd
; /* info position */
3970 UChar s
[OPT_EXACT_MAXLEN
];
3974 MinMaxLen mmd
; /* info position */
3977 int value
; /* weighted value */
3978 UChar map
[ONIG_CHAR_TABLE_SIZE
];
3985 OptExactInfo exb
; /* boundary */
3986 OptExactInfo exm
; /* middle */
3987 OptExactInfo expr
; /* prec read (?=...) */
3989 OptMapInfo map
; /* boundary */
3994 map_position_value(OnigEncoding enc
, int i
)
3996 static const short int ByteValTable
[] = {
3997 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
3998 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3999 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4000 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4001 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4002 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4003 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4004 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4007 if (i
< (int )(sizeof(ByteValTable
)/sizeof(ByteValTable
[0]))) {
4008 if (i
== 0 && ONIGENC_MBC_MINLEN(enc
) > 1)
4011 return (int )ByteValTable
[i
];
4014 return 4; /* Take it easy. */
4018 distance_value(MinMaxLen
* mm
)
4020 /* 1000 / (min-max-dist + 1) */
4021 static const short int dist_vals
[] = {
4022 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4023 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4024 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4025 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4026 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4027 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4028 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4029 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4030 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4031 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4036 if (mm
->max
== ONIG_INFINITE_DISTANCE
) return 0;
4038 d
= mm
->max
- mm
->min
;
4039 if (d
< (int )(sizeof(dist_vals
)/sizeof(dist_vals
[0])))
4040 /* return dist_vals[d] * 16 / (mm->min + 12); */
4041 return (int )dist_vals
[d
];
4047 comp_distance_value(MinMaxLen
* d1
, MinMaxLen
* d2
, int v1
, int v2
)
4049 if (v2
<= 0) return -1;
4050 if (v1
<= 0) return 1;
4052 v1
*= distance_value(d1
);
4053 v2
*= distance_value(d2
);
4055 if (v2
> v1
) return 1;
4056 if (v2
< v1
) return -1;
4058 if (d2
->min
< d1
->min
) return 1;
4059 if (d2
->min
> d1
->min
) return -1;
4064 is_equal_mml(MinMaxLen
* a
, MinMaxLen
* b
)
4066 return (a
->min
== b
->min
&& a
->max
== b
->max
) ? 1 : 0;
4071 set_mml(MinMaxLen
* mml
, OnigDistance min
, OnigDistance max
)
4078 clear_mml(MinMaxLen
* mml
)
4080 mml
->min
= mml
->max
= 0;
4084 copy_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4086 to
->min
= from
->min
;
4087 to
->max
= from
->max
;
4091 add_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4093 to
->min
= distance_add(to
->min
, from
->min
);
4094 to
->max
= distance_add(to
->max
, from
->max
);
4099 add_len_mml(MinMaxLen
* to
, OnigDistance len
)
4101 to
->min
= distance_add(to
->min
, len
);
4102 to
->max
= distance_add(to
->max
, len
);
4107 alt_merge_mml(MinMaxLen
* to
, MinMaxLen
* from
)
4109 if (to
->min
> from
->min
) to
->min
= from
->min
;
4110 if (to
->max
< from
->max
) to
->max
= from
->max
;
4114 copy_opt_env(OptEnv
* to
, OptEnv
* from
)
4120 clear_opt_anc_info(OptAncInfo
* anc
)
4122 anc
->left_anchor
= 0;
4123 anc
->right_anchor
= 0;
4127 copy_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* from
)
4133 concat_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* left
, OptAncInfo
* right
,
4134 OnigDistance left_len
, OnigDistance right_len
)
4136 clear_opt_anc_info(to
);
4138 to
->left_anchor
= left
->left_anchor
;
4139 if (left_len
== 0) {
4140 to
->left_anchor
|= right
->left_anchor
;
4143 to
->right_anchor
= right
->right_anchor
;
4144 if (right_len
== 0) {
4145 to
->right_anchor
|= left
->right_anchor
;
4150 is_left_anchor(int anc
)
4152 if (anc
== ANCHOR_END_BUF
|| anc
== ANCHOR_SEMI_END_BUF
||
4153 anc
== ANCHOR_END_LINE
|| anc
== ANCHOR_PREC_READ
||
4154 anc
== ANCHOR_PREC_READ_NOT
)
4161 is_set_opt_anc_info(OptAncInfo
* to
, int anc
)
4163 if ((to
->left_anchor
& anc
) != 0) return 1;
4165 return ((to
->right_anchor
& anc
) != 0 ? 1 : 0);
4169 add_opt_anc_info(OptAncInfo
* to
, int anc
)
4171 if (is_left_anchor(anc
))
4172 to
->left_anchor
|= anc
;
4174 to
->right_anchor
|= anc
;
4178 remove_opt_anc_info(OptAncInfo
* to
, int anc
)
4180 if (is_left_anchor(anc
))
4181 to
->left_anchor
&= ~anc
;
4183 to
->right_anchor
&= ~anc
;
4187 alt_merge_opt_anc_info(OptAncInfo
* to
, OptAncInfo
* add
)
4189 to
->left_anchor
&= add
->left_anchor
;
4190 to
->right_anchor
&= add
->right_anchor
;
4194 is_full_opt_exact_info(OptExactInfo
* ex
)
4196 return (ex
->len
>= OPT_EXACT_MAXLEN
? 1 : 0);
4200 clear_opt_exact_info(OptExactInfo
* ex
)
4202 clear_mml(&ex
->mmd
);
4203 clear_opt_anc_info(&ex
->anc
);
4205 ex
->ignore_case
= 0;
4211 copy_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* from
)
4217 concat_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OnigEncoding enc
)
4223 if (! to
->ignore_case
&& add
->ignore_case
) {
4224 if (to
->len
>= add
->len
) return ; /* avoid */
4226 to
->ignore_case
= 1;
4231 for (i
= to
->len
; p
< end
; ) {
4232 len
= enclen(enc
, p
);
4233 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4234 for (j
= 0; j
< len
&& p
< end
; j
++)
4239 to
->reach_end
= (p
== end
? add
->reach_end
: 0);
4241 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, 1, 1);
4242 if (! to
->reach_end
) tanc
.right_anchor
= 0;
4243 copy_opt_anc_info(&to
->anc
, &tanc
);
4247 concat_opt_exact_info_str(OptExactInfo
* to
, UChar
* s
, UChar
* end
,
4248 int raw ARG_UNUSED
, OnigEncoding enc
)
4253 for (i
= to
->len
, p
= s
; p
< end
&& i
< OPT_EXACT_MAXLEN
; ) {
4254 len
= enclen(enc
, p
);
4255 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4256 for (j
= 0; j
< len
&& p
< end
; j
++)
4264 alt_merge_opt_exact_info(OptExactInfo
* to
, OptExactInfo
* add
, OptEnv
* env
)
4268 if (add
->len
== 0 || to
->len
== 0) {
4269 clear_opt_exact_info(to
);
4273 if (! is_equal_mml(&to
->mmd
, &add
->mmd
)) {
4274 clear_opt_exact_info(to
);
4278 for (i
= 0; i
< to
->len
&& i
< add
->len
; ) {
4279 if (to
->s
[i
] != add
->s
[i
]) break;
4280 len
= enclen(env
->enc
, to
->s
+ i
);
4282 for (j
= 1; j
< len
; j
++) {
4283 if (to
->s
[i
+j
] != add
->s
[i
+j
]) break;
4289 if (! add
->reach_end
|| i
< add
->len
|| i
< to
->len
) {
4293 to
->ignore_case
|= add
->ignore_case
;
4295 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4296 if (! to
->reach_end
) to
->anc
.right_anchor
= 0;
4300 select_opt_exact_info(OnigEncoding enc
, OptExactInfo
* now
, OptExactInfo
* alt
)
4311 copy_opt_exact_info(now
, alt
);
4314 else if (v1
<= 2 && v2
<= 2) {
4315 /* ByteValTable[x] is big value --> low price */
4316 v2
= map_position_value(enc
, now
->s
[0]);
4317 v1
= map_position_value(enc
, alt
->s
[0]);
4319 if (now
->len
> 1) v1
+= 5;
4320 if (alt
->len
> 1) v2
+= 5;
4323 if (now
->ignore_case
== 0) v1
*= 2;
4324 if (alt
->ignore_case
== 0) v2
*= 2;
4326 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4327 copy_opt_exact_info(now
, alt
);
4331 clear_opt_map_info(OptMapInfo
* map
)
4333 static const OptMapInfo clean_info
= {
4336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4355 xmemcpy(map
, &clean_info
, sizeof(OptMapInfo
));
4359 copy_opt_map_info(OptMapInfo
* to
, OptMapInfo
* from
)
4365 add_char_opt_map_info(OptMapInfo
* map
, UChar c
, OnigEncoding enc
)
4367 if (map
->map
[c
] == 0) {
4369 map
->value
+= map_position_value(enc
, c
);
4374 add_char_amb_opt_map_info(OptMapInfo
* map
, UChar
* p
, UChar
* end
,
4375 OnigEncoding enc
, OnigCaseFoldType case_fold_flag
)
4377 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4378 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
4381 add_char_opt_map_info(map
, p
[0], enc
);
4383 case_fold_flag
= DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag
);
4384 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, case_fold_flag
, p
, end
, items
);
4385 if (n
< 0) return n
;
4387 for (i
= 0; i
< n
; i
++) {
4388 ONIGENC_CODE_TO_MBC(enc
, items
[i
].code
[0], buf
);
4389 add_char_opt_map_info(map
, buf
[0], enc
);
4396 select_opt_map_info(OptMapInfo
* now
, OptMapInfo
* alt
)
4398 static int z
= 1<<15; /* 32768: something big value */
4402 if (alt
->value
== 0) return ;
4403 if (now
->value
== 0) {
4404 copy_opt_map_info(now
, alt
);
4408 v1
= z
/ now
->value
;
4409 v2
= z
/ alt
->value
;
4410 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, v1
, v2
) > 0)
4411 copy_opt_map_info(now
, alt
);
4415 comp_opt_exact_or_map_info(OptExactInfo
* e
, OptMapInfo
* m
)
4417 #define COMP_EM_BASE 20
4420 if (m
->value
<= 0) return -1;
4422 ve
= COMP_EM_BASE
* e
->len
* (e
->ignore_case
? 1 : 2);
4423 vm
= COMP_EM_BASE
* 5 * 2 / m
->value
;
4424 return comp_distance_value(&e
->mmd
, &m
->mmd
, ve
, vm
);
4428 alt_merge_opt_map_info(OnigEncoding enc
, OptMapInfo
* to
, OptMapInfo
* add
)
4432 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4433 if (to
->value
== 0) return ;
4434 if (add
->value
== 0 || to
->mmd
.max
< add
->mmd
.min
) {
4435 clear_opt_map_info(to
);
4439 alt_merge_mml(&to
->mmd
, &add
->mmd
);
4442 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
4447 val
+= map_position_value(enc
, i
);
4451 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4455 set_bound_node_opt_info(NodeOptInfo
* opt
, MinMaxLen
* mmd
)
4457 copy_mml(&(opt
->exb
.mmd
), mmd
);
4458 copy_mml(&(opt
->expr
.mmd
), mmd
);
4459 copy_mml(&(opt
->map
.mmd
), mmd
);
4463 clear_node_opt_info(NodeOptInfo
* opt
)
4465 clear_mml(&opt
->len
);
4466 clear_opt_anc_info(&opt
->anc
);
4467 clear_opt_exact_info(&opt
->exb
);
4468 clear_opt_exact_info(&opt
->exm
);
4469 clear_opt_exact_info(&opt
->expr
);
4470 clear_opt_map_info(&opt
->map
);
4474 copy_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* from
)
4480 concat_left_node_opt_info(OnigEncoding enc
, NodeOptInfo
* to
, NodeOptInfo
* add
)
4482 int exb_reach
, exm_reach
;
4485 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, to
->len
.max
, add
->len
.max
);
4486 copy_opt_anc_info(&to
->anc
, &tanc
);
4488 if (add
->exb
.len
> 0 && to
->len
.max
== 0) {
4489 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->exb
.anc
,
4490 to
->len
.max
, add
->len
.max
);
4491 copy_opt_anc_info(&add
->exb
.anc
, &tanc
);
4494 if (add
->map
.value
> 0 && to
->len
.max
== 0) {
4495 if (add
->map
.mmd
.max
== 0)
4496 add
->map
.anc
.left_anchor
|= to
->anc
.left_anchor
;
4499 exb_reach
= to
->exb
.reach_end
;
4500 exm_reach
= to
->exm
.reach_end
;
4502 if (add
->len
.max
!= 0)
4503 to
->exb
.reach_end
= to
->exm
.reach_end
= 0;
4505 if (add
->exb
.len
> 0) {
4507 concat_opt_exact_info(&to
->exb
, &add
->exb
, enc
);
4508 clear_opt_exact_info(&add
->exb
);
4510 else if (exm_reach
) {
4511 concat_opt_exact_info(&to
->exm
, &add
->exb
, enc
);
4512 clear_opt_exact_info(&add
->exb
);
4515 select_opt_exact_info(enc
, &to
->exm
, &add
->exb
);
4516 select_opt_exact_info(enc
, &to
->exm
, &add
->exm
);
4518 if (to
->expr
.len
> 0) {
4519 if (add
->len
.max
> 0) {
4520 if (to
->expr
.len
> (int )add
->len
.max
)
4521 to
->expr
.len
= add
->len
.max
;
4523 if (to
->expr
.mmd
.max
== 0)
4524 select_opt_exact_info(enc
, &to
->exb
, &to
->expr
);
4526 select_opt_exact_info(enc
, &to
->exm
, &to
->expr
);
4529 else if (add
->expr
.len
> 0) {
4530 copy_opt_exact_info(&to
->expr
, &add
->expr
);
4533 select_opt_map_info(&to
->map
, &add
->map
);
4535 add_mml(&to
->len
, &add
->len
);
4539 alt_merge_node_opt_info(NodeOptInfo
* to
, NodeOptInfo
* add
, OptEnv
* env
)
4541 alt_merge_opt_anc_info (&to
->anc
, &add
->anc
);
4542 alt_merge_opt_exact_info(&to
->exb
, &add
->exb
, env
);
4543 alt_merge_opt_exact_info(&to
->exm
, &add
->exm
, env
);
4544 alt_merge_opt_exact_info(&to
->expr
, &add
->expr
, env
);
4545 alt_merge_opt_map_info(env
->enc
, &to
->map
, &add
->map
);
4547 alt_merge_mml(&to
->len
, &add
->len
);
4551 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4554 optimize_node_left(Node
* node
, NodeOptInfo
* opt
, OptEnv
* env
)
4559 clear_node_opt_info(opt
);
4560 set_bound_node_opt_info(opt
, &env
->mmd
);
4570 copy_opt_env(&nenv
, env
);
4572 r
= optimize_node_left(NCAR(nd
), &nopt
, &nenv
);
4574 add_mml(&nenv
.mmd
, &nopt
.len
);
4575 concat_left_node_opt_info(env
->enc
, opt
, &nopt
);
4577 } while (r
== 0 && IS_NOT_NULL(nd
= NCDR(nd
)));
4587 r
= optimize_node_left(NCAR(nd
), &nopt
, env
);
4589 if (nd
== node
) copy_node_opt_info(opt
, &nopt
);
4590 else alt_merge_node_opt_info(opt
, &nopt
, env
);
4592 } while ((r
== 0) && IS_NOT_NULL(nd
= NCDR(nd
)));
4598 StrNode
* sn
= NSTR(node
);
4599 int slen
= (int)(sn
->end
- sn
->s
);
4600 int is_raw
= NSTRING_IS_RAW(node
);
4602 if (! NSTRING_IS_AMBIG(node
)) {
4603 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4604 NSTRING_IS_RAW(node
), env
->enc
);
4606 add_char_opt_map_info(&opt
->map
, *(sn
->s
), env
->enc
);
4608 set_mml(&opt
->len
, slen
, slen
);
4613 if (NSTRING_IS_DONT_GET_OPT_INFO(node
)) {
4614 int n
= onigenc_strlen(env
->enc
, sn
->s
, sn
->end
);
4615 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
) * n
;
4618 concat_opt_exact_info_str(&opt
->exb
, sn
->s
, sn
->end
,
4620 opt
->exb
.ignore_case
= 1;
4623 r
= add_char_amb_opt_map_info(&opt
->map
, sn
->s
, sn
->end
,
4624 env
->enc
, env
->case_fold_flag
);
4631 set_mml(&opt
->len
, slen
, max
);
4634 if (opt
->exb
.len
== slen
)
4635 opt
->exb
.reach_end
= 1;
4642 CClassNode
* cc
= NCCLASS(node
);
4644 /* no need to check ignore case. (setted in setup_tree()) */
4646 if (IS_NOT_NULL(cc
->mbuf
) || IS_NCCLASS_NOT(cc
)) {
4647 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4648 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4650 set_mml(&opt
->len
, min
, max
);
4653 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4654 z
= BITSET_AT(cc
->bs
, i
);
4655 if ((z
&& !IS_NCCLASS_NOT(cc
)) || (!z
&& IS_NCCLASS_NOT(cc
))) {
4656 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4659 set_mml(&opt
->len
, 1, 1);
4668 max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4673 switch (NCTYPE(node
)->ctype
) {
4674 case ONIGENC_CTYPE_WORD
:
4675 if (NCTYPE(node
)->not != 0) {
4676 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4677 if (! ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4678 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4683 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
4684 if (ONIGENC_IS_CODE_WORD(env
->enc
, i
)) {
4685 add_char_opt_map_info(&opt
->map
, (UChar
)i
, env
->enc
);
4693 min
= ONIGENC_MBC_MINLEN(env
->enc
);
4695 set_mml(&opt
->len
, min
, max
);
4701 OnigDistance min
= ONIGENC_MBC_MINLEN(env
->enc
);
4702 OnigDistance max
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
4703 set_mml(&opt
->len
, min
, max
);
4708 switch (NANCHOR(node
)->type
) {
4709 case ANCHOR_BEGIN_BUF
:
4710 case ANCHOR_BEGIN_POSITION
:
4711 case ANCHOR_BEGIN_LINE
:
4712 case ANCHOR_END_BUF
:
4713 case ANCHOR_SEMI_END_BUF
:
4714 case ANCHOR_END_LINE
:
4715 add_opt_anc_info(&opt
->anc
, NANCHOR(node
)->type
);
4718 case ANCHOR_PREC_READ
:
4722 r
= optimize_node_left(NANCHOR(node
)->target
, &nopt
, env
);
4724 if (nopt
.exb
.len
> 0)
4725 copy_opt_exact_info(&opt
->expr
, &nopt
.exb
);
4726 else if (nopt
.exm
.len
> 0)
4727 copy_opt_exact_info(&opt
->expr
, &nopt
.exm
);
4729 opt
->expr
.reach_end
= 0;
4731 if (nopt
.map
.value
> 0)
4732 copy_opt_map_info(&opt
->map
, &nopt
.map
);
4737 case ANCHOR_PREC_READ_NOT
:
4738 case ANCHOR_LOOK_BEHIND
: /* Sorry, I can't make use of it. */
4739 case ANCHOR_LOOK_BEHIND_NOT
:
4748 OnigDistance min
, max
, tmin
, tmax
;
4749 Node
** nodes
= SCANENV_MEM_NODES(env
->scan_env
);
4750 BRefNode
* br
= NBREF(node
);
4752 if (br
->state
& NST_RECURSION
) {
4753 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4756 backs
= BACKREFS_P(br
);
4757 r
= get_min_match_length(nodes
[backs
[0]], &min
, env
->scan_env
);
4759 r
= get_max_match_length(nodes
[backs
[0]], &max
, env
->scan_env
);
4761 for (i
= 1; i
< br
->back_num
; i
++) {
4762 r
= get_min_match_length(nodes
[backs
[i
]], &tmin
, env
->scan_env
);
4764 r
= get_max_match_length(nodes
[backs
[i
]], &tmax
, env
->scan_env
);
4766 if (min
> tmin
) min
= tmin
;
4767 if (max
< tmax
) max
= tmax
;
4769 if (r
== 0) set_mml(&opt
->len
, min
, max
);
4773 #ifdef USE_SUBEXP_CALL
4775 if (IS_CALL_RECURSION(NCALL(node
)))
4776 set_mml(&opt
->len
, 0, ONIG_INFINITE_DISTANCE
);
4778 OnigOptionType save
= env
->options
;
4779 env
->options
= NENCLOSE(NCALL(node
)->target
)->option
;
4780 r
= optimize_node_left(NCALL(node
)->target
, opt
, env
);
4781 env
->options
= save
;
4789 OnigDistance min
, max
;
4791 QtfrNode
* qn
= NQTFR(node
);
4793 r
= optimize_node_left(qn
->target
, &nopt
, env
);
4796 if (qn
->lower
== 0 && IS_REPEAT_INFINITE(qn
->upper
)) {
4797 if (env
->mmd
.max
== 0 &&
4798 NTYPE(qn
->target
) == NT_CANY
&& qn
->greedy
) {
4799 if (IS_MULTILINE(env
->options
))
4800 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_ML
);
4802 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR
);
4806 if (qn
->lower
> 0) {
4807 copy_node_opt_info(opt
, &nopt
);
4808 if (nopt
.exb
.len
> 0) {
4809 if (nopt
.exb
.reach_end
) {
4810 for (i
= 2; i
<= qn
->lower
&&
4811 ! is_full_opt_exact_info(&opt
->exb
); i
++) {
4812 concat_opt_exact_info(&opt
->exb
, &nopt
.exb
, env
->enc
);
4814 if (i
< qn
->lower
) {
4815 opt
->exb
.reach_end
= 0;
4820 if (qn
->lower
!= qn
->upper
) {
4821 opt
->exb
.reach_end
= 0;
4822 opt
->exm
.reach_end
= 0;
4825 opt
->exm
.reach_end
= 0;
4829 min
= distance_multiply(nopt
.len
.min
, qn
->lower
);
4830 if (IS_REPEAT_INFINITE(qn
->upper
))
4831 max
= (nopt
.len
.max
> 0 ? ONIG_INFINITE_DISTANCE
: 0);
4833 max
= distance_multiply(nopt
.len
.max
, qn
->upper
);
4835 set_mml(&opt
->len
, min
, max
);
4841 EncloseNode
* en
= NENCLOSE(node
);
4844 case ENCLOSE_OPTION
:
4846 OnigOptionType save
= env
->options
;
4848 env
->options
= en
->option
;
4849 r
= optimize_node_left(en
->target
, opt
, env
);
4850 env
->options
= save
;
4854 case ENCLOSE_MEMORY
:
4855 #ifdef USE_SUBEXP_CALL
4857 if (en
->opt_count
> MAX_NODE_OPT_INFO_REF_COUNT
) {
4858 OnigDistance min
, max
;
4861 max
= ONIG_INFINITE_DISTANCE
;
4862 if (IS_ENCLOSE_MIN_FIXED(en
)) min
= en
->min_len
;
4863 if (IS_ENCLOSE_MAX_FIXED(en
)) max
= en
->max_len
;
4864 set_mml(&opt
->len
, min
, max
);
4869 r
= optimize_node_left(en
->target
, opt
, env
);
4871 if (is_set_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
)) {
4872 if (BIT_STATUS_AT(env
->scan_env
->backrefed_mem
, en
->regnum
))
4873 remove_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_STAR_MASK
);
4878 case ENCLOSE_STOP_BACKTRACK
:
4879 r
= optimize_node_left(en
->target
, opt
, env
);
4887 fprintf(stderr
, "optimize_node_left: undefined node type %d\n",
4890 r
= ONIGERR_TYPE_BUG
;
4898 set_optimize_exact_info(regex_t
* reg
, OptExactInfo
* e
)
4902 if (e
->len
== 0) return 0;
4904 if (e
->ignore_case
) {
4905 reg
->exact
= (UChar
* )xmalloc(e
->len
);
4906 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4907 xmemcpy(reg
->exact
, e
->s
, e
->len
);
4908 reg
->exact_end
= reg
->exact
+ e
->len
;
4909 reg
->optimize
= ONIG_OPTIMIZE_EXACT_IC
;
4914 reg
->exact
= str_dup(e
->s
, e
->s
+ e
->len
);
4915 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
4916 reg
->exact_end
= reg
->exact
+ e
->len
;
4919 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg
->enc
, reg
->exact
, reg
->exact_end
);
4921 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
4922 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
4923 reg
->map
, &(reg
->int_map
));
4926 reg
->optimize
= (allow_reverse
!= 0
4927 ? ONIG_OPTIMIZE_EXACT_BM
: ONIG_OPTIMIZE_EXACT_BM_NOT_REV
);
4930 reg
->optimize
= ONIG_OPTIMIZE_EXACT
;
4934 reg
->dmin
= e
->mmd
.min
;
4935 reg
->dmax
= e
->mmd
.max
;
4937 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4938 reg
->threshold_len
= reg
->dmin
+ (int)(reg
->exact_end
- reg
->exact
);
4945 set_optimize_map_info(regex_t
* reg
, OptMapInfo
* m
)
4949 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
4950 reg
->map
[i
] = m
->map
[i
];
4952 reg
->optimize
= ONIG_OPTIMIZE_MAP
;
4953 reg
->dmin
= m
->mmd
.min
;
4954 reg
->dmax
= m
->mmd
.max
;
4956 if (reg
->dmin
!= ONIG_INFINITE_DISTANCE
) {
4957 reg
->threshold_len
= reg
->dmin
+ 1;
4962 set_sub_anchor(regex_t
* reg
, OptAncInfo
* anc
)
4964 reg
->sub_anchor
|= anc
->left_anchor
& ANCHOR_BEGIN_LINE
;
4965 reg
->sub_anchor
|= anc
->right_anchor
& ANCHOR_END_LINE
;
4969 static void print_optimize_info(FILE* f
, regex_t
* reg
);
4973 set_optimize_info_from_tree(Node
* node
, regex_t
* reg
, ScanEnv
* scan_env
)
4981 env
.options
= reg
->options
;
4982 env
.case_fold_flag
= reg
->case_fold_flag
;
4983 env
.scan_env
= scan_env
;
4984 clear_mml(&env
.mmd
);
4986 r
= optimize_node_left(node
, &opt
, &env
);
4989 reg
->anchor
= opt
.anc
.left_anchor
& (ANCHOR_BEGIN_BUF
|
4990 ANCHOR_BEGIN_POSITION
| ANCHOR_ANYCHAR_STAR
| ANCHOR_ANYCHAR_STAR_ML
);
4992 reg
->anchor
|= opt
.anc
.right_anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
);
4994 if (reg
->anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
)) {
4995 reg
->anchor_dmin
= opt
.len
.min
;
4996 reg
->anchor_dmax
= opt
.len
.max
;
4999 if (opt
.exb
.len
> 0 || opt
.exm
.len
> 0) {
5000 select_opt_exact_info(reg
->enc
, &opt
.exb
, &opt
.exm
);
5001 if (opt
.map
.value
> 0 &&
5002 comp_opt_exact_or_map_info(&opt
.exb
, &opt
.map
) > 0) {
5006 r
= set_optimize_exact_info(reg
, &opt
.exb
);
5007 set_sub_anchor(reg
, &opt
.exb
.anc
);
5010 else if (opt
.map
.value
> 0) {
5012 set_optimize_map_info(reg
, &opt
.map
);
5013 set_sub_anchor(reg
, &opt
.map
.anc
);
5016 reg
->sub_anchor
|= opt
.anc
.left_anchor
& ANCHOR_BEGIN_LINE
;
5017 if (opt
.len
.max
== 0)
5018 reg
->sub_anchor
|= opt
.anc
.right_anchor
& ANCHOR_END_LINE
;
5021 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5022 print_optimize_info(stderr
, reg
);
5028 clear_optimize_info(regex_t
* reg
)
5030 reg
->optimize
= ONIG_OPTIMIZE_NONE
;
5032 reg
->anchor_dmin
= 0;
5033 reg
->anchor_dmax
= 0;
5034 reg
->sub_anchor
= 0;
5035 reg
->exact_end
= (UChar
* )NULL
;
5036 reg
->threshold_len
= 0;
5037 if (IS_NOT_NULL(reg
->exact
)) {
5039 reg
->exact
= (UChar
* )NULL
;
5045 static void print_enc_string(FILE* fp
, OnigEncoding enc
,
5046 const UChar
*s
, const UChar
*end
)
5048 fprintf(fp
, "\nPATTERN: /");
5050 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5056 code
= ONIGENC_MBC_TO_CODE(enc
, p
, end
);
5058 fprintf(fp
, " 0x%04x ", (int )code
);
5061 fputc((int )code
, fp
);
5064 p
+= enclen(enc
, p
);
5069 fputc((int )*s
, fp
);
5078 print_distance_range(FILE* f
, OnigDistance a
, OnigDistance b
)
5080 if (a
== ONIG_INFINITE_DISTANCE
)
5083 fprintf(f
, "(%u)", a
);
5087 if (b
== ONIG_INFINITE_DISTANCE
)
5090 fprintf(f
, "(%u)", b
);
5094 print_anchor(FILE* f
, int anchor
)
5100 if (anchor
& ANCHOR_BEGIN_BUF
) {
5101 fprintf(f
, "begin-buf");
5104 if (anchor
& ANCHOR_BEGIN_LINE
) {
5105 if (q
) fprintf(f
, ", ");
5107 fprintf(f
, "begin-line");
5109 if (anchor
& ANCHOR_BEGIN_POSITION
) {
5110 if (q
) fprintf(f
, ", ");
5112 fprintf(f
, "begin-pos");
5114 if (anchor
& ANCHOR_END_BUF
) {
5115 if (q
) fprintf(f
, ", ");
5117 fprintf(f
, "end-buf");
5119 if (anchor
& ANCHOR_SEMI_END_BUF
) {
5120 if (q
) fprintf(f
, ", ");
5122 fprintf(f
, "semi-end-buf");
5124 if (anchor
& ANCHOR_END_LINE
) {
5125 if (q
) fprintf(f
, ", ");
5127 fprintf(f
, "end-line");
5129 if (anchor
& ANCHOR_ANYCHAR_STAR
) {
5130 if (q
) fprintf(f
, ", ");
5132 fprintf(f
, "anychar-star");
5134 if (anchor
& ANCHOR_ANYCHAR_STAR_ML
) {
5135 if (q
) fprintf(f
, ", ");
5136 fprintf(f
, "anychar-star-pl");
5143 print_optimize_info(FILE* f
, regex_t
* reg
)
5145 static const char* on
[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5146 "EXACT_IC", "MAP" };
5148 fprintf(f
, "optimize: %s\n", on
[reg
->optimize
]);
5149 fprintf(f
, " anchor: "); print_anchor(f
, reg
->anchor
);
5150 if ((reg
->anchor
& ANCHOR_END_BUF_MASK
) != 0)
5151 print_distance_range(f
, reg
->anchor_dmin
, reg
->anchor_dmax
);
5154 if (reg
->optimize
) {
5155 fprintf(f
, " sub anchor: "); print_anchor(f
, reg
->sub_anchor
);
5162 fprintf(f
, "exact: [");
5163 for (p
= reg
->exact
; p
< reg
->exact_end
; p
++) {
5166 fprintf(f
, "]: length: %d\n", (reg
->exact_end
- reg
->exact
));
5168 else if (reg
->optimize
& ONIG_OPTIMIZE_MAP
) {
5171 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5172 if (reg
->map
[i
]) n
++;
5174 fprintf(f
, "map: n=%d\n", n
);
5178 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5179 if (reg
->map
[i
] != 0) {
5180 if (c
> 0) fputs(", ", f
);
5182 if (ONIGENC_MBC_MAXLEN(reg
->enc
) == 1 &&
5183 ONIGENC_IS_CODE_PRINT(reg
->enc
, (OnigCodePoint
)i
))
5186 fprintf(f
, "%d", i
);
5193 #endif /* ONIG_DEBUG */
5197 onig_free_body(regex_t
* reg
)
5199 if (IS_NOT_NULL(reg
)) {
5200 if (IS_NOT_NULL(reg
->p
)) xfree(reg
->p
);
5201 if (IS_NOT_NULL(reg
->exact
)) xfree(reg
->exact
);
5202 if (IS_NOT_NULL(reg
->int_map
)) xfree(reg
->int_map
);
5203 if (IS_NOT_NULL(reg
->int_map_backward
)) xfree(reg
->int_map_backward
);
5204 if (IS_NOT_NULL(reg
->repeat_range
)) xfree(reg
->repeat_range
);
5205 if (IS_NOT_NULL(reg
->chain
)) onig_free(reg
->chain
);
5207 #ifdef USE_NAMED_GROUP
5208 onig_names_free(reg
);
5214 onig_free(regex_t
* reg
)
5216 if (IS_NOT_NULL(reg
)) {
5217 onig_free_body(reg
);
5222 #define REGEX_TRANSFER(to,from) do {\
5223 (to)->state = ONIG_STATE_MODIFY;\
5224 onig_free_body(to);\
5225 xmemcpy(to, from, sizeof(regex_t));\
5230 onig_transfer(regex_t
* to
, regex_t
* from
)
5232 THREAD_ATOMIC_START
;
5233 REGEX_TRANSFER(to
, from
);
5237 #define REGEX_CHAIN_HEAD(reg) do {\
5238 while (IS_NOT_NULL((reg)->chain)) {\
5239 (reg) = (reg)->chain;\
5244 onig_chain_link_add(regex_t
* to
, regex_t
* add
)
5246 THREAD_ATOMIC_START
;
5247 REGEX_CHAIN_HEAD(to
);
5253 onig_chain_reduce(regex_t
* reg
)
5255 regex_t
*head
, *prev
;
5259 if (IS_NOT_NULL(head
)) {
5260 reg
->state
= ONIG_STATE_MODIFY
;
5261 while (IS_NOT_NULL(head
->chain
)) {
5265 prev
->chain
= (regex_t
* )NULL
;
5266 REGEX_TRANSFER(reg
, head
);
5271 static void print_compiled_byte_code_list
P_((FILE* f
, regex_t
* reg
));
5273 #ifdef ONIG_DEBUG_PARSE_TREE
5274 static void print_tree
P_((FILE* f
, Node
* node
));
5278 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5279 OnigErrorInfo
* einfo
)
5281 #define COMPILE_INIT_SIZE 20
5286 #ifdef USE_SUBEXP_CALL
5287 UnsetAddrList uslist
;
5290 if (IS_NOT_NULL(einfo
)) einfo
->par
= (UChar
* )NULL
;
5292 reg
->state
= ONIG_STATE_COMPILING
;
5295 print_enc_string(stderr
, reg
->enc
, pattern
, pattern_end
);
5298 if (reg
->alloc
== 0) {
5299 init_size
= ((int)(pattern_end
- pattern
)) * 2;
5300 if (init_size
<= 0) init_size
= COMPILE_INIT_SIZE
;
5301 r
= BBUF_INIT(reg
, init_size
);
5302 if (r
!= 0) goto end
;
5308 reg
->num_repeat
= 0;
5309 reg
->num_null_check
= 0;
5310 reg
->repeat_range_alloc
= 0;
5311 reg
->repeat_range
= (OnigRepeatRange
* )NULL
;
5312 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5313 reg
->num_comb_exp_check
= 0;
5316 r
= onig_parse_make_tree(&root
, pattern
, pattern_end
, reg
, &scan_env
);
5317 if (r
!= 0 || root
== NULL
) goto err
;
5319 #ifdef USE_NAMED_GROUP
5320 /* mixed use named group and no-named group */
5321 if (scan_env
.num_named
> 0 &&
5322 IS_SYNTAX_BV(scan_env
.syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
5323 !ONIG_IS_OPTION_ON(reg
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
5324 if (scan_env
.num_named
!= scan_env
.num_mem
)
5325 r
= disable_noname_group_capture(&root
, reg
, &scan_env
);
5327 r
= numbered_ref_check(root
);
5329 if (r
!= 0) goto err
;
5333 #ifdef USE_SUBEXP_CALL
5334 if (scan_env
.num_call
> 0) {
5335 r
= unset_addr_list_init(&uslist
, scan_env
.num_call
);
5336 if (r
!= 0) goto err
;
5337 scan_env
.unset_addr_list
= &uslist
;
5338 r
= setup_subexp_call(root
, &scan_env
);
5339 if (r
!= 0) goto err_unset
;
5340 r
= subexp_recursive_check_trav(root
, &scan_env
);
5341 if (r
< 0) goto err_unset
;
5342 r
= subexp_inf_recursive_check_trav(root
, &scan_env
);
5343 if (r
!= 0) goto err_unset
;
5345 reg
->num_call
= scan_env
.num_call
;
5351 r
= setup_tree(root
, reg
, 0, &scan_env
);
5352 if (r
!= 0) goto err_unset
;
5354 #ifdef ONIG_DEBUG_PARSE_TREE
5355 print_tree(stderr
, root
);
5358 reg
->capture_history
= scan_env
.capture_history
;
5359 reg
->bt_mem_start
= scan_env
.bt_mem_start
;
5360 reg
->bt_mem_start
|= reg
->capture_history
;
5361 if (IS_FIND_CONDITION(reg
->options
))
5362 BIT_STATUS_ON_ALL(reg
->bt_mem_end
);
5364 reg
->bt_mem_end
= scan_env
.bt_mem_end
;
5365 reg
->bt_mem_end
|= reg
->capture_history
;
5368 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5369 if (scan_env
.backrefed_mem
== 0
5370 #ifdef USE_SUBEXP_CALL
5371 || scan_env
.num_call
== 0
5374 setup_comb_exp_check(root
, 0, &scan_env
);
5375 #ifdef USE_SUBEXP_CALL
5376 if (scan_env
.has_recursion
!= 0) {
5377 scan_env
.num_comb_exp_check
= 0;
5381 if (scan_env
.comb_exp_max_regnum
> 0) {
5383 for (i
= 1; i
<= scan_env
.comb_exp_max_regnum
; i
++) {
5384 if (BIT_STATUS_AT(scan_env
.backrefed_mem
, i
) != 0) {
5385 scan_env
.num_comb_exp_check
= 0;
5392 reg
->num_comb_exp_check
= scan_env
.num_comb_exp_check
;
5395 clear_optimize_info(reg
);
5396 #ifndef ONIG_DONT_OPTIMIZE
5397 r
= set_optimize_info_from_tree(root
, reg
, &scan_env
);
5398 if (r
!= 0) goto err_unset
;
5401 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
)) {
5402 xfree(scan_env
.mem_nodes_dynamic
);
5403 scan_env
.mem_nodes_dynamic
= (Node
** )NULL
;
5406 r
= compile_tree(root
, reg
);
5408 r
= add_opcode(reg
, OP_END
);
5409 #ifdef USE_SUBEXP_CALL
5410 if (scan_env
.num_call
> 0) {
5411 r
= unset_addr_list_fix(&uslist
, reg
);
5412 unset_addr_list_end(&uslist
);
5417 if ((reg
->num_repeat
!= 0) || (reg
->bt_mem_end
!= 0))
5418 reg
->stack_pop_level
= STACK_POP_LEVEL_ALL
;
5420 if (reg
->bt_mem_start
!= 0)
5421 reg
->stack_pop_level
= STACK_POP_LEVEL_MEM_START
;
5423 reg
->stack_pop_level
= STACK_POP_LEVEL_FREE
;
5426 #ifdef USE_SUBEXP_CALL
5427 else if (scan_env
.num_call
> 0) {
5428 unset_addr_list_end(&uslist
);
5431 onig_node_free(root
);
5433 #ifdef ONIG_DEBUG_COMPILE
5434 #ifdef USE_NAMED_GROUP
5435 onig_print_names(stderr
, reg
);
5437 print_compiled_byte_code_list(stderr
, reg
);
5441 reg
->state
= ONIG_STATE_NORMAL
;
5445 #ifdef USE_SUBEXP_CALL
5446 if (scan_env
.num_call
> 0) {
5447 unset_addr_list_end(&uslist
);
5451 if (IS_NOT_NULL(scan_env
.error
)) {
5452 if (IS_NOT_NULL(einfo
)) {
5453 einfo
->enc
= scan_env
.enc
;
5454 einfo
->par
= scan_env
.error
;
5455 einfo
->par_end
= scan_env
.error_end
;
5459 onig_node_free(root
);
5460 if (IS_NOT_NULL(scan_env
.mem_nodes_dynamic
))
5461 xfree(scan_env
.mem_nodes_dynamic
);
5465 #ifdef USE_RECOMPILE_API
5467 onig_recompile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5468 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5469 OnigErrorInfo
* einfo
)
5474 r
= onig_new(&new_reg
, pattern
, pattern_end
, option
, enc
, syntax
, einfo
);
5476 if (ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
5477 onig_transfer(reg
, new_reg
);
5480 onig_chain_link_add(reg
, new_reg
);
5486 static int onig_inited
= 0;
5489 onig_reg_init(regex_t
* reg
, OnigOptionType option
,
5490 OnigCaseFoldType case_fold_flag
,
5491 OnigEncoding enc
, OnigSyntaxType
* syntax
)
5497 return ONIGERR_INVALID_ARGUMENT
;
5499 if (ONIGENC_IS_UNDEF(enc
))
5500 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
;
5502 if ((option
& (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
))
5503 == (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
)) {
5504 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS
;
5507 (reg
)->state
= ONIG_STATE_MODIFY
;
5509 if ((option
& ONIG_OPTION_NEGATE_SINGLELINE
) != 0) {
5510 option
|= syntax
->options
;
5511 option
&= ~ONIG_OPTION_SINGLELINE
;
5514 option
|= syntax
->options
;
5517 (reg
)->options
= option
;
5518 (reg
)->syntax
= syntax
;
5519 (reg
)->optimize
= 0;
5520 (reg
)->exact
= (UChar
* )NULL
;
5521 (reg
)->int_map
= (int* )NULL
;
5522 (reg
)->int_map_backward
= (int* )NULL
;
5523 (reg
)->chain
= (regex_t
* )NULL
;
5525 (reg
)->p
= (UChar
* )NULL
;
5528 (reg
)->name_table
= (void* )NULL
;
5530 (reg
)->case_fold_flag
= case_fold_flag
;
5535 onig_new_without_alloc(regex_t
* reg
, const UChar
* pattern
,
5536 const UChar
* pattern_end
, OnigOptionType option
, OnigEncoding enc
,
5537 OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
)
5541 r
= onig_reg_init(reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
5544 r
= onig_compile(reg
, pattern
, pattern_end
, einfo
);
5549 onig_new(regex_t
** reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5550 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
5551 OnigErrorInfo
* einfo
)
5555 *reg
= (regex_t
* )xmalloc(sizeof(regex_t
));
5556 if (IS_NULL(*reg
)) return ONIGERR_MEMORY
;
5558 r
= onig_reg_init(*reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
5561 r
= onig_compile(*reg
, pattern
, pattern_end
, einfo
);
5574 if (onig_inited
!= 0)
5578 THREAD_ATOMIC_START
;
5583 /* onigenc_set_default_caseconv_table((UChar* )0); */
5585 #ifdef ONIG_DEBUG_STATISTICS
5586 onig_statistics_init();
5594 static OnigEndCallListItemType
* EndCallTop
;
5596 extern void onig_add_end_call(void (*func
)(void))
5598 OnigEndCallListItemType
* item
;
5600 item
= (OnigEndCallListItemType
* )xmalloc(sizeof(*item
));
5601 if (item
== 0) return ;
5603 item
->next
= EndCallTop
;
5610 exec_end_call_list(void)
5612 OnigEndCallListItemType
* prev
;
5615 while (EndCallTop
!= 0) {
5616 func
= EndCallTop
->func
;
5620 EndCallTop
= EndCallTop
->next
;
5628 THREAD_ATOMIC_START
;
5630 exec_end_call_list();
5632 #ifdef ONIG_DEBUG_STATISTICS
5633 onig_print_statistics(stderr
);
5636 #ifdef USE_SHARED_CCLASS_TABLE
5637 onig_free_shared_cclass_table();
5640 #ifdef USE_PARSE_TREE_NODE_RECYCLE
5641 onig_free_node_list();
5652 onig_is_in_code_range(const UChar
* p
, OnigCodePoint code
)
5654 OnigCodePoint n
, *data
;
5655 OnigCodePoint low
, high
, x
;
5657 GET_CODE_POINT(n
, p
);
5658 data
= (OnigCodePoint
* )p
;
5661 for (low
= 0, high
= n
; low
< high
; ) {
5662 x
= (low
+ high
) >> 1;
5663 if (code
> data
[x
* 2 + 1])
5669 return ((low
< n
&& code
>= data
[low
* 2]) ? 1 : 0);
5673 onig_is_code_in_cc_len(int elen
, OnigCodePoint code
, CClassNode
* cc
)
5677 if (elen
> 1 || (code
>= SINGLE_BYTE_SIZE
)) {
5678 if (IS_NULL(cc
->mbuf
)) {
5682 found
= (onig_is_in_code_range(cc
->mbuf
->p
, code
) != 0 ? 1 : 0);
5686 found
= (BITSET_AT(cc
->bs
, code
) == 0 ? 0 : 1);
5689 if (IS_NCCLASS_NOT(cc
))
5696 onig_is_code_in_cc(OnigEncoding enc
, OnigCodePoint code
, CClassNode
* cc
)
5700 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5704 len
= ONIGENC_CODE_TO_MBCLEN(enc
, code
);
5706 return onig_is_code_in_cc_len(len
, code
, cc
);
5712 /* arguments type */
5713 #define ARG_SPECIAL -1
5715 #define ARG_RELADDR 1
5716 #define ARG_ABSADDR 2
5717 #define ARG_LENGTH 3
5718 #define ARG_MEMNUM 4
5719 #define ARG_OPTION 5
5720 #define ARG_STATE_CHECK 6
5722 OnigOpInfoType OnigOpInfo
[] = {
5723 { OP_FINISH
, "finish", ARG_NON
},
5724 { OP_END
, "end", ARG_NON
},
5725 { OP_EXACT1
, "exact1", ARG_SPECIAL
},
5726 { OP_EXACT2
, "exact2", ARG_SPECIAL
},
5727 { OP_EXACT3
, "exact3", ARG_SPECIAL
},
5728 { OP_EXACT4
, "exact4", ARG_SPECIAL
},
5729 { OP_EXACT5
, "exact5", ARG_SPECIAL
},
5730 { OP_EXACTN
, "exactn", ARG_SPECIAL
},
5731 { OP_EXACTMB2N1
, "exactmb2-n1", ARG_SPECIAL
},
5732 { OP_EXACTMB2N2
, "exactmb2-n2", ARG_SPECIAL
},
5733 { OP_EXACTMB2N3
, "exactmb2-n3", ARG_SPECIAL
},
5734 { OP_EXACTMB2N
, "exactmb2-n", ARG_SPECIAL
},
5735 { OP_EXACTMB3N
, "exactmb3n" , ARG_SPECIAL
},
5736 { OP_EXACTMBN
, "exactmbn", ARG_SPECIAL
},
5737 { OP_EXACT1_IC
, "exact1-ic", ARG_SPECIAL
},
5738 { OP_EXACTN_IC
, "exactn-ic", ARG_SPECIAL
},
5739 { OP_CCLASS
, "cclass", ARG_SPECIAL
},
5740 { OP_CCLASS_MB
, "cclass-mb", ARG_SPECIAL
},
5741 { OP_CCLASS_MIX
, "cclass-mix", ARG_SPECIAL
},
5742 { OP_CCLASS_NOT
, "cclass-not", ARG_SPECIAL
},
5743 { OP_CCLASS_MB_NOT
, "cclass-mb-not", ARG_SPECIAL
},
5744 { OP_CCLASS_MIX_NOT
, "cclass-mix-not", ARG_SPECIAL
},
5745 { OP_CCLASS_NODE
, "cclass-node", ARG_SPECIAL
},
5746 { OP_ANYCHAR
, "anychar", ARG_NON
},
5747 { OP_ANYCHAR_ML
, "anychar-ml", ARG_NON
},
5748 { OP_ANYCHAR_STAR
, "anychar*", ARG_NON
},
5749 { OP_ANYCHAR_ML_STAR
, "anychar-ml*", ARG_NON
},
5750 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next", ARG_SPECIAL
},
5751 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next", ARG_SPECIAL
},
5752 { OP_WORD
, "word", ARG_NON
},
5753 { OP_NOT_WORD
, "not-word", ARG_NON
},
5754 { OP_WORD_BOUND
, "word-bound", ARG_NON
},
5755 { OP_NOT_WORD_BOUND
, "not-word-bound", ARG_NON
},
5756 { OP_WORD_BEGIN
, "word-begin", ARG_NON
},
5757 { OP_WORD_END
, "word-end", ARG_NON
},
5758 { OP_BEGIN_BUF
, "begin-buf", ARG_NON
},
5759 { OP_END_BUF
, "end-buf", ARG_NON
},
5760 { OP_BEGIN_LINE
, "begin-line", ARG_NON
},
5761 { OP_END_LINE
, "end-line", ARG_NON
},
5762 { OP_SEMI_END_BUF
, "semi-end-buf", ARG_NON
},
5763 { OP_BEGIN_POSITION
, "begin-position", ARG_NON
},
5764 { OP_BACKREF1
, "backref1", ARG_NON
},
5765 { OP_BACKREF2
, "backref2", ARG_NON
},
5766 { OP_BACKREFN
, "backrefn", ARG_MEMNUM
},
5767 { OP_BACKREFN_IC
, "backrefn-ic", ARG_SPECIAL
},
5768 { OP_BACKREF_MULTI
, "backref_multi", ARG_SPECIAL
},
5769 { OP_BACKREF_MULTI_IC
, "backref_multi-ic", ARG_SPECIAL
},
5770 { OP_BACKREF_WITH_LEVEL
, "backref_at_level", ARG_SPECIAL
},
5771 { OP_MEMORY_START_PUSH
, "mem-start-push", ARG_MEMNUM
},
5772 { OP_MEMORY_START
, "mem-start", ARG_MEMNUM
},
5773 { OP_MEMORY_END_PUSH
, "mem-end-push", ARG_MEMNUM
},
5774 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec", ARG_MEMNUM
},
5775 { OP_MEMORY_END
, "mem-end", ARG_MEMNUM
},
5776 { OP_MEMORY_END_REC
, "mem-end-rec", ARG_MEMNUM
},
5777 { OP_SET_OPTION_PUSH
, "set-option-push", ARG_OPTION
},
5778 { OP_SET_OPTION
, "set-option", ARG_OPTION
},
5779 { OP_FAIL
, "fail", ARG_NON
},
5780 { OP_JUMP
, "jump", ARG_RELADDR
},
5781 { OP_PUSH
, "push", ARG_RELADDR
},
5782 { OP_POP
, "pop", ARG_NON
},
5783 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1", ARG_SPECIAL
},
5784 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next", ARG_SPECIAL
},
5785 { OP_REPEAT
, "repeat", ARG_SPECIAL
},
5786 { OP_REPEAT_NG
, "repeat-ng", ARG_SPECIAL
},
5787 { OP_REPEAT_INC
, "repeat-inc", ARG_MEMNUM
},
5788 { OP_REPEAT_INC_NG
, "repeat-inc-ng", ARG_MEMNUM
},
5789 { OP_REPEAT_INC_SG
, "repeat-inc-sg", ARG_MEMNUM
},
5790 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg", ARG_MEMNUM
},
5791 { OP_NULL_CHECK_START
, "null-check-start", ARG_MEMNUM
},
5792 { OP_NULL_CHECK_END
, "null-check-end", ARG_MEMNUM
},
5793 { OP_NULL_CHECK_END_MEMST
,"null-check-end-memst", ARG_MEMNUM
},
5794 { OP_NULL_CHECK_END_MEMST_PUSH
,"null-check-end-memst-push", ARG_MEMNUM
},
5795 { OP_PUSH_POS
, "push-pos", ARG_NON
},
5796 { OP_POP_POS
, "pop-pos", ARG_NON
},
5797 { OP_PUSH_POS_NOT
, "push-pos-not", ARG_RELADDR
},
5798 { OP_FAIL_POS
, "fail-pos", ARG_NON
},
5799 { OP_PUSH_STOP_BT
, "push-stop-bt", ARG_NON
},
5800 { OP_POP_STOP_BT
, "pop-stop-bt", ARG_NON
},
5801 { OP_LOOK_BEHIND
, "look-behind", ARG_SPECIAL
},
5802 { OP_PUSH_LOOK_BEHIND_NOT
, "push-look-behind-not", ARG_SPECIAL
},
5803 { OP_FAIL_LOOK_BEHIND_NOT
, "fail-look-behind-not", ARG_NON
},
5804 { OP_CALL
, "call", ARG_ABSADDR
},
5805 { OP_RETURN
, "return", ARG_NON
},
5806 { OP_STATE_CHECK_PUSH
, "state-check-push", ARG_SPECIAL
},
5807 { OP_STATE_CHECK_PUSH_OR_JUMP
, "state-check-push-or-jump", ARG_SPECIAL
},
5808 { OP_STATE_CHECK
, "state-check", ARG_STATE_CHECK
},
5809 { OP_STATE_CHECK_ANYCHAR_STAR
, "state-check-anychar*", ARG_STATE_CHECK
},
5810 { OP_STATE_CHECK_ANYCHAR_ML_STAR
,
5811 "state-check-anychar-ml*", ARG_STATE_CHECK
},
5820 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5821 if (opcode
== OnigOpInfo
[i
].opcode
)
5822 return OnigOpInfo
[i
].name
;
5828 op2arg_type(int opcode
)
5832 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
5833 if (opcode
== OnigOpInfo
[i
].opcode
)
5834 return OnigOpInfo
[i
].arg_type
;
5840 Indent(FILE* f
, int indent
)
5843 for (i
= 0; i
< indent
; i
++) putc(' ', f
);
5847 p_string(FILE* f
, int len
, UChar
* s
)
5850 while (len
-- > 0) { fputc(*s
++, f
); }
5854 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
5856 int x
= len
* mb_len
;
5858 fprintf(f
, ":%d:", len
);
5859 while (x
-- > 0) { fputc(*s
++, f
); }
5863 onig_print_compiled_byte_code(FILE* f
, UChar
* bp
, UChar
** nextp
,
5870 StateCheckNumType scn
;
5874 fprintf(f
, "[%s", op2name(*bp
));
5875 arg_type
= op2arg_type(*bp
);
5876 if (arg_type
!= ARG_SPECIAL
) {
5882 GET_RELADDR_INC(addr
, bp
);
5883 fprintf(f
, ":(%d)", addr
);
5886 GET_ABSADDR_INC(addr
, bp
);
5887 fprintf(f
, ":(%d)", addr
);
5890 GET_LENGTH_INC(len
, bp
);
5891 fprintf(f
, ":%d", len
);
5894 mem
= *((MemNumType
* )bp
);
5896 fprintf(f
, ":%d", mem
);
5900 OnigOptionType option
= *((OnigOptionType
* )bp
);
5902 fprintf(f
, ":%d", option
);
5906 case ARG_STATE_CHECK
:
5907 scn
= *((StateCheckNumType
* )bp
);
5908 bp
+= SIZE_STATE_CHECK_NUM
;
5909 fprintf(f
, ":%d", scn
);
5916 case OP_ANYCHAR_STAR_PEEK_NEXT
:
5917 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
5918 p_string(f
, 1, bp
++); break;
5920 p_string(f
, 2, bp
); bp
+= 2; break;
5922 p_string(f
, 3, bp
); bp
+= 3; break;
5924 p_string(f
, 4, bp
); bp
+= 4; break;
5926 p_string(f
, 5, bp
); bp
+= 5; break;
5928 GET_LENGTH_INC(len
, bp
);
5929 p_len_string(f
, len
, 1, bp
);
5934 p_string(f
, 2, bp
); bp
+= 2; break;
5936 p_string(f
, 4, bp
); bp
+= 4; break;
5938 p_string(f
, 6, bp
); bp
+= 6; break;
5940 GET_LENGTH_INC(len
, bp
);
5941 p_len_string(f
, len
, 2, bp
);
5945 GET_LENGTH_INC(len
, bp
);
5946 p_len_string(f
, len
, 3, bp
);
5953 GET_LENGTH_INC(mb_len
, bp
);
5954 GET_LENGTH_INC(len
, bp
);
5955 fprintf(f
, ":%d:%d:", mb_len
, len
);
5957 while (n
-- > 0) { fputc(*bp
++, f
); }
5962 len
= enclen(enc
, bp
);
5963 p_string(f
, len
, bp
);
5967 GET_LENGTH_INC(len
, bp
);
5968 p_len_string(f
, len
, 1, bp
);
5973 n
= bitset_on_num((BitSetRef
)bp
);
5975 fprintf(f
, ":%d", n
);
5979 n
= bitset_on_num((BitSetRef
)bp
);
5981 fprintf(f
, ":%d", n
);
5985 case OP_CCLASS_MB_NOT
:
5986 GET_LENGTH_INC(len
, bp
);
5988 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
5991 GET_CODE_POINT(code
, q
);
5993 fprintf(f
, ":%d:%d", (int )code
, len
);
5997 case OP_CCLASS_MIX_NOT
:
5998 n
= bitset_on_num((BitSetRef
)bp
);
6000 GET_LENGTH_INC(len
, bp
);
6002 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6005 GET_CODE_POINT(code
, q
);
6007 fprintf(f
, ":%d:%d:%d", n
, (int )code
, len
);
6010 case OP_CCLASS_NODE
:
6014 GET_POINTER_INC(cc
, bp
);
6015 n
= bitset_on_num(cc
->bs
);
6016 fprintf(f
, ":%u:%d", (unsigned int )cc
, n
);
6020 case OP_BACKREFN_IC
:
6021 mem
= *((MemNumType
* )bp
);
6023 fprintf(f
, ":%d", mem
);
6026 case OP_BACKREF_MULTI_IC
:
6027 case OP_BACKREF_MULTI
:
6029 GET_LENGTH_INC(len
, bp
);
6030 for (i
= 0; i
< len
; i
++) {
6031 GET_MEMNUM_INC(mem
, bp
);
6032 if (i
> 0) fputs(", ", f
);
6033 fprintf(f
, "%d", mem
);
6037 case OP_BACKREF_WITH_LEVEL
:
6039 OnigOptionType option
;
6042 GET_OPTION_INC(option
, bp
);
6043 fprintf(f
, ":%d", option
);
6044 GET_LENGTH_INC(level
, bp
);
6045 fprintf(f
, ":%d", level
);
6048 GET_LENGTH_INC(len
, bp
);
6049 for (i
= 0; i
< len
; i
++) {
6050 GET_MEMNUM_INC(mem
, bp
);
6051 if (i
> 0) fputs(", ", f
);
6052 fprintf(f
, "%d", mem
);
6060 mem
= *((MemNumType
* )bp
);
6062 addr
= *((RelAddrType
* )bp
);
6064 fprintf(f
, ":%d:%d", mem
, addr
);
6068 case OP_PUSH_OR_JUMP_EXACT1
:
6069 case OP_PUSH_IF_PEEK_NEXT
:
6070 addr
= *((RelAddrType
* )bp
);
6072 fprintf(f
, ":(%d)", addr
);
6077 case OP_LOOK_BEHIND
:
6078 GET_LENGTH_INC(len
, bp
);
6079 fprintf(f
, ":%d", len
);
6082 case OP_PUSH_LOOK_BEHIND_NOT
:
6083 GET_RELADDR_INC(addr
, bp
);
6084 GET_LENGTH_INC(len
, bp
);
6085 fprintf(f
, ":%d:(%d)", len
, addr
);
6088 case OP_STATE_CHECK_PUSH
:
6089 case OP_STATE_CHECK_PUSH_OR_JUMP
:
6090 scn
= *((StateCheckNumType
* )bp
);
6091 bp
+= SIZE_STATE_CHECK_NUM
;
6092 addr
= *((RelAddrType
* )bp
);
6094 fprintf(f
, ":%d:(%d)", scn
, addr
);
6098 fprintf(stderr
, "onig_print_compiled_byte_code: undefined code %d\n",
6103 if (nextp
) *nextp
= bp
;
6107 print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
6111 UChar
* end
= reg
->p
+ reg
->used
;
6113 fprintf(f
, "code length: %d\n", reg
->used
);
6124 onig_print_compiled_byte_code(f
, bp
, &bp
, reg
->enc
);
6131 print_indent_tree(FILE* f
, Node
* node
, int indent
)
6138 if (IS_NULL(node
)) {
6139 fprintf(f
, "ERROR: null node!!!\n");
6147 if (NTYPE(node
) == NT_LIST
)
6148 fprintf(f
, "<list:%x>\n", (int )node
);
6150 fprintf(f
, "<alt:%x>\n", (int )node
);
6152 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6153 while (IS_NOT_NULL(node
= NCDR(node
))) {
6154 if (NTYPE(node
) != type
) {
6155 fprintf(f
, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node
));
6158 print_indent_tree(f
, NCAR(node
), indent
+ add
);
6163 fprintf(f
, "<string%s:%x>",
6164 (NSTRING_IS_RAW(node
) ? "-raw" : ""), (int )node
);
6165 for (p
= NSTR(node
)->s
; p
< NSTR(node
)->end
; p
++) {
6166 if (*p
>= 0x20 && *p
< 0x7f)
6169 fprintf(f
, " 0x%02x", *p
);
6175 fprintf(f
, "<cclass:%x>", (int )node
);
6176 if (IS_NCCLASS_NOT(NCCLASS(node
))) fputs(" not", f
);
6177 if (NCCLASS(node
)->mbuf
) {
6178 BBuf
* bbuf
= NCCLASS(node
)->mbuf
;
6179 for (i
= 0; i
< bbuf
->used
; i
++) {
6180 if (i
> 0) fprintf(f
, ",");
6181 fprintf(f
, "%0x", bbuf
->p
[i
]);
6187 fprintf(f
, "<ctype:%x> ", (int )node
);
6188 switch (NCTYPE(node
)->ctype
) {
6189 case ONIGENC_CTYPE_WORD
:
6190 if (NCTYPE(node
)->not != 0)
6191 fputs("not word", f
);
6197 fprintf(f
, "ERROR: undefined ctype.\n");
6203 fprintf(f
, "<anychar:%x>", (int )node
);
6207 fprintf(f
, "<anchor:%x> ", (int )node
);
6208 switch (NANCHOR(node
)->type
) {
6209 case ANCHOR_BEGIN_BUF
: fputs("begin buf", f
); break;
6210 case ANCHOR_END_BUF
: fputs("end buf", f
); break;
6211 case ANCHOR_BEGIN_LINE
: fputs("begin line", f
); break;
6212 case ANCHOR_END_LINE
: fputs("end line", f
); break;
6213 case ANCHOR_SEMI_END_BUF
: fputs("semi end buf", f
); break;
6214 case ANCHOR_BEGIN_POSITION
: fputs("begin position", f
); break;
6216 case ANCHOR_WORD_BOUND
: fputs("word bound", f
); break;
6217 case ANCHOR_NOT_WORD_BOUND
: fputs("not word bound", f
); break;
6218 #ifdef USE_WORD_BEGIN_END
6219 case ANCHOR_WORD_BEGIN
: fputs("word begin", f
); break;
6220 case ANCHOR_WORD_END
: fputs("word end", f
); break;
6222 case ANCHOR_PREC_READ
: fputs("prec read", f
); break;
6223 case ANCHOR_PREC_READ_NOT
: fputs("prec read not", f
); break;
6224 case ANCHOR_LOOK_BEHIND
: fputs("look_behind", f
); break;
6225 case ANCHOR_LOOK_BEHIND_NOT
: fputs("look_behind_not",f
); break;
6228 fprintf(f
, "ERROR: undefined anchor type.\n");
6236 BRefNode
* br
= NBREF(node
);
6238 fprintf(f
, "<backref:%x>", (int )node
);
6239 for (i
= 0; i
< br
->back_num
; i
++) {
6240 if (i
> 0) fputs(", ", f
);
6241 fprintf(f
, "%d", p
[i
]);
6246 #ifdef USE_SUBEXP_CALL
6249 CallNode
* cn
= NCALL(node
);
6250 fprintf(f
, "<call:%x>", (int )node
);
6251 p_string(f
, cn
->name_end
- cn
->name
, cn
->name
);
6257 fprintf(f
, "<quantifier:%x>{%d,%d}%s\n", (int )node
,
6258 NQTFR(node
)->lower
, NQTFR(node
)->upper
,
6259 (NQTFR(node
)->greedy
? "" : "?"));
6260 print_indent_tree(f
, NQTFR(node
)->target
, indent
+ add
);
6264 fprintf(f
, "<enclose:%x> ", (int )node
);
6265 switch (NENCLOSE(node
)->type
) {
6266 case ENCLOSE_OPTION
:
6267 fprintf(f
, "option:%d", NENCLOSE(node
)->option
);
6269 case ENCLOSE_MEMORY
:
6270 fprintf(f
, "memory:%d", NENCLOSE(node
)->regnum
);
6272 case ENCLOSE_STOP_BACKTRACK
:
6273 fprintf(f
, "stop-bt");
6280 print_indent_tree(f
, NENCLOSE(node
)->target
, indent
+ add
);
6284 fprintf(f
, "print_indent_tree: undefined node type %d\n", NTYPE(node
));
6288 if (type
!= NT_LIST
&& type
!= NT_ALT
&& type
!= NT_QTFR
&&
6293 #endif /* ONIG_DEBUG */
6295 #ifdef ONIG_DEBUG_PARSE_TREE
6297 print_tree(FILE* f
, Node
* node
)
6299 print_indent_tree(f
, node
, 0);