1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 OnigCaseFoldType OnigDefaultCaseFoldFlag
= ONIGENC_CASE_FOLD_MIN
;
42 make_int_stack(int_stack
** rs
, int init_size
)
49 s
= xmalloc(sizeof(*s
));
50 if (IS_NULL(s
)) return ONIGERR_MEMORY
;
52 v
= (int* )xmalloc(sizeof(int) * init_size
);
55 return ONIGERR_MEMORY
;
67 free_int_stack(int_stack
* s
)
70 if (IS_NOT_NULL(s
->v
))
77 int_stack_push(int_stack
* s
, int v
)
79 if (s
->n
>= s
->alloc
) {
80 int new_size
= s
->alloc
* 2;
81 int* nv
= (int* )xrealloc(s
->v
, sizeof(int) * new_size
);
82 if (IS_NULL(nv
)) return ONIGERR_MEMORY
;
94 int_stack_pop(int_stack
* s
)
100 fprintf(stderr
, "int_stack_pop: fail empty. %p\n", s
);
111 extern OnigCaseFoldType
112 onig_get_default_case_fold_flag(void)
114 return OnigDefaultCaseFoldFlag
;
118 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag
)
120 OnigDefaultCaseFoldFlag
= case_fold_flag
;
125 int_multiply_cmp(int x
, int y
, int v
)
127 if (x
== 0 || y
== 0) return -1;
129 if (x
< INT_MAX
/ y
) {
131 if (xy
> v
) return 1;
133 if (xy
== v
) return 0;
142 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
143 static unsigned char PadBuf
[WORD_ALIGNMENT_SIZE
];
147 swap_node(Node
* a
, Node
* b
)
151 c
= *a
; *a
= *b
; *b
= c
;
153 if (NODE_TYPE(a
) == NODE_STRING
) {
154 StrNode
* sn
= STR_(a
);
156 int len
= (int )(sn
->end
- sn
->s
);
158 sn
->end
= sn
->s
+ len
;
162 if (NODE_TYPE(b
) == NODE_STRING
) {
163 StrNode
* sn
= STR_(b
);
165 int len
= (int )(sn
->end
- sn
->s
);
167 sn
->end
= sn
->s
+ len
;
173 distance_add(OnigLen d1
, OnigLen d2
)
175 if (d1
== INFINITE_LEN
|| d2
== INFINITE_LEN
)
178 if (d1
<= INFINITE_LEN
- d2
) return d1
+ d2
;
179 else return INFINITE_LEN
;
184 distance_multiply(OnigLen d
, int m
)
186 if (m
== 0) return 0;
188 if (d
< INFINITE_LEN
/ m
)
195 bitset_is_empty(BitSetRef bs
)
199 for (i
= 0; i
< (int )BITSET_SIZE
; i
++) {
200 if (bs
[i
] != 0) return 0;
206 onig_bbuf_init(BBuf
* buf
, int size
)
213 buf
->p
= (UChar
* )xmalloc(size
);
214 if (IS_NULL(buf
->p
)) return(ONIGERR_MEMORY
);
226 unset_addr_list_init(UnsetAddrList
* list
, int size
)
228 UnsetAddr
* p
= (UnsetAddr
* )xmalloc(sizeof(UnsetAddr
)* size
);
229 CHECK_NULL_RETURN_MEMERR(p
);
238 unset_addr_list_end(UnsetAddrList
* list
)
240 if (IS_NOT_NULL(list
->us
))
245 unset_addr_list_add(UnsetAddrList
* list
, int offset
, struct _Node
* node
)
250 if (list
->num
>= list
->alloc
) {
251 size
= list
->alloc
* 2;
252 p
= (UnsetAddr
* )xrealloc(list
->us
, sizeof(UnsetAddr
) * size
, sizeof(UnsetAddr
)* list
->alloc
);
253 CHECK_NULL_RETURN_MEMERR(p
);
258 list
->us
[list
->num
].offset
= offset
;
259 list
->us
[list
->num
].target
= node
;
263 #endif /* USE_CALL */
267 add_opcode(regex_t
* reg
, int opcode
)
269 BB_ADD1(reg
, opcode
);
274 add_rel_addr(regex_t
* reg
, int addr
)
276 RelAddrType ra
= (RelAddrType
)addr
;
278 BB_ADD(reg
, &ra
, SIZE_RELADDR
);
283 add_abs_addr(regex_t
* reg
, int addr
)
285 AbsAddrType ra
= (AbsAddrType
)addr
;
287 BB_ADD(reg
, &ra
, SIZE_ABSADDR
);
292 add_length(regex_t
* reg
, int len
)
294 LengthType l
= (LengthType
)len
;
296 BB_ADD(reg
, &l
, SIZE_LENGTH
);
301 add_mem_num(regex_t
* reg
, int num
)
303 MemNumType n
= (MemNumType
)num
;
305 BB_ADD(reg
, &n
, SIZE_MEMNUM
);
311 add_pointer(regex_t
* reg
, void* addr
)
313 PointerType ptr
= (PointerType
)addr
;
315 BB_ADD(reg
, &ptr
, SIZE_POINTER
);
321 add_option(regex_t
* reg
, OnigOptionType option
)
323 BB_ADD(reg
, &option
, SIZE_OPTION
);
328 add_save_type(regex_t
* reg
, enum SaveType type
)
330 SaveType t
= (SaveType
)type
;
332 BB_ADD(reg
, &t
, SIZE_SAVE_TYPE
);
337 add_update_var_type(regex_t
* reg
, enum UpdateVarType type
)
339 UpdateVarType t
= (UpdateVarType
)type
;
341 BB_ADD(reg
, &t
, SIZE_UPDATE_VAR_TYPE
);
346 add_mode(regex_t
* reg
, ModeType mode
)
348 BB_ADD(reg
, &mode
, SIZE_MODE
);
353 add_opcode_rel_addr(regex_t
* reg
, int opcode
, int addr
)
357 r
= add_opcode(reg
, opcode
);
358 if (r
!= 0) return r
;
359 r
= add_rel_addr(reg
, addr
);
364 add_bytes(regex_t
* reg
, UChar
* bytes
, int len
)
366 BB_ADD(reg
, bytes
, len
);
371 add_bitset(regex_t
* reg
, BitSetRef bs
)
373 BB_ADD(reg
, bs
, SIZE_BITSET
);
377 static int compile_length_tree(Node
* node
, regex_t
* reg
);
378 static int compile_tree(Node
* node
, regex_t
* reg
, ScanEnv
* env
);
381 #define IS_NEED_STR_LEN_OP_EXACT(op) \
382 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
383 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
386 select_str_opcode(int mb_len
, int str_len
, int ignore_case
)
392 case 1: op
= OP_EXACT1_IC
; break;
393 default: op
= OP_EXACTN_IC
; break;
400 case 1: op
= OP_EXACT1
; break;
401 case 2: op
= OP_EXACT2
; break;
402 case 3: op
= OP_EXACT3
; break;
403 case 4: op
= OP_EXACT4
; break;
404 case 5: op
= OP_EXACT5
; break;
405 default: op
= OP_EXACTN
; break;
411 case 1: op
= OP_EXACTMB2N1
; break;
412 case 2: op
= OP_EXACTMB2N2
; break;
413 case 3: op
= OP_EXACTMB2N3
; break;
414 default: op
= OP_EXACTMB2N
; break;
431 compile_tree_empty_check(Node
* node
, regex_t
* reg
, int empty_info
, ScanEnv
* env
)
434 int saved_num_null_check
= reg
->num_null_check
;
436 if (empty_info
!= QUANT_BODY_IS_NOT_EMPTY
) {
437 r
= add_opcode(reg
, OP_EMPTY_CHECK_START
);
438 if (r
!= 0) return r
;
439 r
= add_mem_num(reg
, reg
->num_null_check
); /* NULL CHECK ID */
440 if (r
!= 0) return r
;
441 reg
->num_null_check
++;
444 r
= compile_tree(node
, reg
, env
);
445 if (r
!= 0) return r
;
447 if (empty_info
!= QUANT_BODY_IS_NOT_EMPTY
) {
448 if (empty_info
== QUANT_BODY_IS_EMPTY
)
449 r
= add_opcode(reg
, OP_EMPTY_CHECK_END
);
450 else if (empty_info
== QUANT_BODY_IS_EMPTY_MEM
)
451 r
= add_opcode(reg
, OP_EMPTY_CHECK_END_MEMST
);
452 else if (empty_info
== QUANT_BODY_IS_EMPTY_REC
)
453 r
= add_opcode(reg
, OP_EMPTY_CHECK_END_MEMST_PUSH
);
455 if (r
!= 0) return r
;
456 r
= add_mem_num(reg
, saved_num_null_check
); /* NULL CHECK ID */
463 compile_call(CallNode
* node
, regex_t
* reg
, ScanEnv
* env
)
467 r
= add_opcode(reg
, OP_CALL
);
468 if (r
!= 0) return r
;
469 r
= unset_addr_list_add(env
->unset_addr_list
, BB_GET_OFFSET_POS(reg
),
470 NODE_CALL_BODY(node
));
471 if (r
!= 0) return r
;
472 r
= add_abs_addr(reg
, 0 /*dummy addr.*/);
478 compile_tree_n_times(Node
* node
, int n
, regex_t
* reg
, ScanEnv
* env
)
482 for (i
= 0; i
< n
; i
++) {
483 r
= compile_tree(node
, reg
, env
);
484 if (r
!= 0) return r
;
490 add_compile_string_length(UChar
* s ARG_UNUSED
, int mb_len
, int str_len
,
491 regex_t
* reg ARG_UNUSED
, int ignore_case
)
494 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
498 if (op
== OP_EXACTMBN
) len
+= SIZE_LENGTH
;
499 if (IS_NEED_STR_LEN_OP_EXACT(op
))
502 len
+= mb_len
* str_len
;
507 add_compile_string(UChar
* s
, int mb_len
, int str_len
,
508 regex_t
* reg
, int ignore_case
)
510 int op
= select_str_opcode(mb_len
, str_len
, ignore_case
);
513 if (op
== OP_EXACTMBN
)
514 add_length(reg
, mb_len
);
516 if (IS_NEED_STR_LEN_OP_EXACT(op
)) {
517 if (op
== OP_EXACTN_IC
)
518 add_length(reg
, mb_len
* str_len
);
520 add_length(reg
, str_len
);
523 add_bytes(reg
, s
, mb_len
* str_len
);
529 compile_length_string_node(Node
* node
, regex_t
* reg
)
531 int rlen
, r
, len
, prev_len
, slen
, ambig
;
534 OnigEncoding enc
= reg
->enc
;
537 if (sn
->end
<= sn
->s
)
540 ambig
= NODE_STRING_IS_AMBIG(node
);
543 prev_len
= enclen(enc
, p
);
548 for (; p
< sn
->end
; ) {
549 len
= enclen(enc
, p
);
550 if (len
== prev_len
) {
554 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
563 r
= add_compile_string_length(prev
, prev_len
, slen
, reg
, ambig
);
569 compile_length_string_raw_node(StrNode
* sn
, regex_t
* reg
)
571 if (sn
->end
<= sn
->s
)
574 return add_compile_string_length(sn
->s
, 1 /* sb */, (int )(sn
->end
- sn
->s
),
579 compile_string_node(Node
* node
, regex_t
* reg
)
581 int r
, len
, prev_len
, slen
, ambig
;
582 UChar
*p
, *prev
, *end
;
584 OnigEncoding enc
= reg
->enc
;
587 if (sn
->end
<= sn
->s
)
591 ambig
= NODE_STRING_IS_AMBIG(node
);
594 prev_len
= enclen(enc
, p
);
599 len
= enclen(enc
, p
);
600 if (len
== prev_len
) {
604 r
= add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
605 if (r
!= 0) return r
;
615 return add_compile_string(prev
, prev_len
, slen
, reg
, ambig
);
619 compile_string_raw_node(StrNode
* sn
, regex_t
* reg
)
621 if (sn
->end
<= sn
->s
)
624 return add_compile_string(sn
->s
, 1 /* sb */, (int )(sn
->end
- sn
->s
), reg
, 0);
628 add_multi_byte_cclass(BBuf
* mbuf
, regex_t
* reg
)
630 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
631 add_length(reg
, mbuf
->used
);
632 return add_bytes(reg
, mbuf
->p
, mbuf
->used
);
635 UChar
* p
= BB_GET_ADD_ADDRESS(reg
) + SIZE_LENGTH
;
637 GET_ALIGNMENT_PAD_SIZE(p
, pad_size
);
638 add_length(reg
, mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1));
639 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
641 r
= add_bytes(reg
, mbuf
->p
, mbuf
->used
);
643 /* padding for return value from compile_length_cclass_node() to be fix. */
644 pad_size
= (WORD_ALIGNMENT_SIZE
- 1) - pad_size
;
645 if (pad_size
!= 0) add_bytes(reg
, PadBuf
, pad_size
);
651 compile_length_cclass_node(CClassNode
* cc
, regex_t
* reg
)
655 if (IS_NULL(cc
->mbuf
)) {
656 len
= SIZE_OPCODE
+ SIZE_BITSET
;
659 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
663 len
= SIZE_OPCODE
+ SIZE_BITSET
;
665 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
666 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
;
668 len
+= SIZE_LENGTH
+ cc
->mbuf
->used
+ (WORD_ALIGNMENT_SIZE
- 1);
676 compile_cclass_node(CClassNode
* cc
, regex_t
* reg
)
680 if (IS_NULL(cc
->mbuf
)) {
681 if (IS_NCCLASS_NOT(cc
))
682 add_opcode(reg
, OP_CCLASS_NOT
);
684 add_opcode(reg
, OP_CCLASS
);
686 r
= add_bitset(reg
, cc
->bs
);
689 if (ONIGENC_MBC_MINLEN(reg
->enc
) > 1 || bitset_is_empty(cc
->bs
)) {
690 if (IS_NCCLASS_NOT(cc
))
691 add_opcode(reg
, OP_CCLASS_MB_NOT
);
693 add_opcode(reg
, OP_CCLASS_MB
);
695 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
698 if (IS_NCCLASS_NOT(cc
))
699 add_opcode(reg
, OP_CCLASS_MIX_NOT
);
701 add_opcode(reg
, OP_CCLASS_MIX
);
703 r
= add_bitset(reg
, cc
->bs
);
704 if (r
!= 0) return r
;
705 r
= add_multi_byte_cclass(cc
->mbuf
, reg
);
713 entry_repeat_range(regex_t
* reg
, int id
, int lower
, int upper
)
715 #define REPEAT_RANGE_ALLOC 4
719 if (reg
->repeat_range_alloc
== 0) {
720 p
= (OnigRepeatRange
* )xmalloc(sizeof(OnigRepeatRange
) * REPEAT_RANGE_ALLOC
);
721 CHECK_NULL_RETURN_MEMERR(p
);
722 reg
->repeat_range
= p
;
723 reg
->repeat_range_alloc
= REPEAT_RANGE_ALLOC
;
725 else if (reg
->repeat_range_alloc
<= id
) {
727 n
= reg
->repeat_range_alloc
+ REPEAT_RANGE_ALLOC
;
728 p
= (OnigRepeatRange
* )xrealloc(reg
->repeat_range
,
729 sizeof(OnigRepeatRange
) * n
,
730 sizeof(OnigRepeatRange
) * reg
->repeat_range_alloc
);
731 CHECK_NULL_RETURN_MEMERR(p
);
732 reg
->repeat_range
= p
;
733 reg
->repeat_range_alloc
= n
;
736 p
= reg
->repeat_range
;
740 p
[id
].upper
= (IS_REPEAT_INFINITE(upper
) ? 0x7fffffff : upper
);
745 compile_range_repeat_node(QuantNode
* qn
, int target_len
, int empty_info
,
746 regex_t
* reg
, ScanEnv
* env
)
749 int num_repeat
= reg
->num_repeat
;
751 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT
: OP_REPEAT_NG
);
752 if (r
!= 0) return r
;
753 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
755 if (r
!= 0) return r
;
756 r
= add_rel_addr(reg
, target_len
+ SIZE_OP_REPEAT_INC
);
757 if (r
!= 0) return r
;
759 r
= entry_repeat_range(reg
, num_repeat
, qn
->lower
, qn
->upper
);
760 if (r
!= 0) return r
;
762 r
= compile_tree_empty_check(NODE_QUANT_BODY(qn
), reg
, empty_info
, env
);
763 if (r
!= 0) return r
;
767 NODE_IS_IN_MULTI_ENTRY(qn
) ||
769 NODE_IS_IN_REAL_REPEAT(qn
)) {
770 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC_SG
: OP_REPEAT_INC_NG_SG
);
773 r
= add_opcode(reg
, qn
->greedy
? OP_REPEAT_INC
: OP_REPEAT_INC_NG
);
775 if (r
!= 0) return r
;
776 r
= add_mem_num(reg
, num_repeat
); /* OP_REPEAT ID */
781 is_anychar_infinite_greedy(QuantNode
* qn
)
783 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
) &&
784 NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn
)))
790 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
791 #define CKN_ON (ckn > 0)
794 compile_length_quantifier_node(QuantNode
* qn
, regex_t
* reg
)
797 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
798 enum QuantBodyEmpty empty_info
= qn
->body_empty_info
;
799 int tlen
= compile_length_tree(NODE_QUANT_BODY(qn
), reg
);
801 if (tlen
< 0) return tlen
;
802 if (tlen
== 0) return 0;
805 if (is_anychar_infinite_greedy(qn
)) {
806 if (qn
->lower
<= 1 ||
807 int_multiply_cmp(tlen
, qn
->lower
, QUANTIFIER_EXPAND_LIMIT_SIZE
) <= 0) {
808 if (IS_NOT_NULL(qn
->next_head_exact
))
809 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
+ tlen
* qn
->lower
;
811 return SIZE_OP_ANYCHAR_STAR
+ tlen
* qn
->lower
;
815 if (empty_info
== QUANT_BODY_IS_NOT_EMPTY
)
818 mod_tlen
= tlen
+ (SIZE_OP_EMPTY_CHECK_START
+ SIZE_OP_EMPTY_CHECK_END
);
822 int_multiply_cmp(tlen
, qn
->lower
, QUANTIFIER_EXPAND_LIMIT_SIZE
) <= 0)) {
823 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
827 len
= tlen
* qn
->lower
;
831 if (IS_NOT_NULL(qn
->head_exact
))
832 len
+= SIZE_OP_PUSH_OR_JUMP_EXACT1
+ mod_tlen
+ SIZE_OP_JUMP
;
833 else if (IS_NOT_NULL(qn
->next_head_exact
))
834 len
+= SIZE_OP_PUSH_IF_PEEK_NEXT
+ mod_tlen
+ SIZE_OP_JUMP
;
836 len
+= SIZE_OP_PUSH
+ mod_tlen
+ SIZE_OP_JUMP
;
839 len
+= SIZE_OP_JUMP
+ mod_tlen
+ SIZE_OP_PUSH
;
841 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
842 len
= SIZE_OP_JUMP
+ tlen
;
844 else if (!infinite
&& qn
->greedy
&&
846 int_multiply_cmp(tlen
+ SIZE_OP_PUSH
, qn
->upper
,
847 QUANTIFIER_EXPAND_LIMIT_SIZE
) <= 0)) {
848 len
= tlen
* qn
->lower
;
849 len
+= (SIZE_OP_PUSH
+ tlen
) * (qn
->upper
- qn
->lower
);
851 else if (!qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
852 len
= SIZE_OP_PUSH
+ SIZE_OP_JUMP
+ tlen
;
855 len
= SIZE_OP_REPEAT_INC
856 + mod_tlen
+ SIZE_OPCODE
+ SIZE_RELADDR
+ SIZE_MEMNUM
;
863 compile_quantifier_node(QuantNode
* qn
, regex_t
* reg
, ScanEnv
* env
)
866 int infinite
= IS_REPEAT_INFINITE(qn
->upper
);
867 enum QuantBodyEmpty empty_info
= qn
->body_empty_info
;
868 int tlen
= compile_length_tree(NODE_QUANT_BODY(qn
), reg
);
870 if (tlen
< 0) return tlen
;
871 if (tlen
== 0) return 0;
873 if (is_anychar_infinite_greedy(qn
) &&
875 int_multiply_cmp(tlen
, qn
->lower
, QUANTIFIER_EXPAND_LIMIT_SIZE
) <= 0)) {
876 r
= compile_tree_n_times(NODE_QUANT_BODY(qn
), qn
->lower
, reg
, env
);
877 if (r
!= 0) return r
;
878 if (IS_NOT_NULL(qn
->next_head_exact
)) {
879 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn
), reg
)))
880 r
= add_opcode(reg
, OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
882 r
= add_opcode(reg
, OP_ANYCHAR_STAR_PEEK_NEXT
);
883 if (r
!= 0) return r
;
884 return add_bytes(reg
, STR_(qn
->next_head_exact
)->s
, 1);
887 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn
), reg
)))
888 return add_opcode(reg
, OP_ANYCHAR_ML_STAR
);
890 return add_opcode(reg
, OP_ANYCHAR_STAR
);
894 if (empty_info
== QUANT_BODY_IS_NOT_EMPTY
)
897 mod_tlen
= tlen
+ (SIZE_OP_EMPTY_CHECK_START
+ SIZE_OP_EMPTY_CHECK_END
);
901 int_multiply_cmp(tlen
, qn
->lower
, QUANTIFIER_EXPAND_LIMIT_SIZE
) <= 0)) {
902 if (qn
->lower
== 1 && tlen
> QUANTIFIER_EXPAND_LIMIT_SIZE
) {
904 if (IS_NOT_NULL(qn
->head_exact
))
905 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_OR_JUMP_EXACT1
);
906 else if (IS_NOT_NULL(qn
->next_head_exact
))
907 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH_IF_PEEK_NEXT
);
909 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_PUSH
);
912 r
= add_opcode_rel_addr(reg
, OP_JUMP
, SIZE_OP_JUMP
);
914 if (r
!= 0) return r
;
917 r
= compile_tree_n_times(NODE_QUANT_BODY(qn
), qn
->lower
, reg
, env
);
918 if (r
!= 0) return r
;
922 if (IS_NOT_NULL(qn
->head_exact
)) {
923 r
= add_opcode_rel_addr(reg
, OP_PUSH_OR_JUMP_EXACT1
,
924 mod_tlen
+ SIZE_OP_JUMP
);
925 if (r
!= 0) return r
;
926 add_bytes(reg
, STR_(qn
->head_exact
)->s
, 1);
927 r
= compile_tree_empty_check(NODE_QUANT_BODY(qn
), reg
, empty_info
, env
);
928 if (r
!= 0) return r
;
929 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
930 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_OR_JUMP_EXACT1
));
932 else if (IS_NOT_NULL(qn
->next_head_exact
)) {
933 r
= add_opcode_rel_addr(reg
, OP_PUSH_IF_PEEK_NEXT
,
934 mod_tlen
+ SIZE_OP_JUMP
);
935 if (r
!= 0) return r
;
936 add_bytes(reg
, STR_(qn
->next_head_exact
)->s
, 1);
937 r
= compile_tree_empty_check(NODE_QUANT_BODY(qn
), reg
, empty_info
, env
);
938 if (r
!= 0) return r
;
939 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
940 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH_IF_PEEK_NEXT
));
943 r
= add_opcode_rel_addr(reg
, OP_PUSH
, mod_tlen
+ SIZE_OP_JUMP
);
944 if (r
!= 0) return r
;
945 r
= compile_tree_empty_check(NODE_QUANT_BODY(qn
), reg
, empty_info
, env
);
946 if (r
!= 0) return r
;
947 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
948 -(mod_tlen
+ (int )SIZE_OP_JUMP
+ (int )SIZE_OP_PUSH
));
952 r
= add_opcode_rel_addr(reg
, OP_JUMP
, mod_tlen
);
953 if (r
!= 0) return r
;
954 r
= compile_tree_empty_check(NODE_QUANT_BODY(qn
), reg
, empty_info
, env
);
955 if (r
!= 0) return r
;
956 r
= add_opcode_rel_addr(reg
, OP_PUSH
, -(mod_tlen
+ (int )SIZE_OP_PUSH
));
959 else if (qn
->upper
== 0 && qn
->is_refered
!= 0) { /* /(?<n>..){0}/ */
960 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
961 if (r
!= 0) return r
;
962 r
= compile_tree(NODE_QUANT_BODY(qn
), reg
, env
);
964 else if (! infinite
&& qn
->greedy
&&
966 int_multiply_cmp(tlen
+ SIZE_OP_PUSH
, qn
->upper
,
967 QUANTIFIER_EXPAND_LIMIT_SIZE
) <= 0)) {
968 int n
= qn
->upper
- qn
->lower
;
970 r
= compile_tree_n_times(NODE_QUANT_BODY(qn
), qn
->lower
, reg
, env
);
971 if (r
!= 0) return r
;
973 for (i
= 0; i
< n
; i
++) {
974 r
= add_opcode_rel_addr(reg
, OP_PUSH
,
975 (n
- i
) * tlen
+ (n
- i
- 1) * SIZE_OP_PUSH
);
976 if (r
!= 0) return r
;
977 r
= compile_tree(NODE_QUANT_BODY(qn
), reg
, env
);
978 if (r
!= 0) return r
;
981 else if (! qn
->greedy
&& qn
->upper
== 1 && qn
->lower
== 0) { /* '??' */
982 r
= add_opcode_rel_addr(reg
, OP_PUSH
, SIZE_OP_JUMP
);
983 if (r
!= 0) return r
;
984 r
= add_opcode_rel_addr(reg
, OP_JUMP
, tlen
);
985 if (r
!= 0) return r
;
986 r
= compile_tree(NODE_QUANT_BODY(qn
), reg
, env
);
989 r
= compile_range_repeat_node(qn
, mod_tlen
, empty_info
, reg
, env
);
995 compile_length_option_node(EnclosureNode
* node
, regex_t
* reg
)
998 OnigOptionType prev
= reg
->options
;
1000 reg
->options
= node
->o
.options
;
1001 tlen
= compile_length_tree(NODE_ENCLOSURE_BODY(node
), reg
);
1002 reg
->options
= prev
;
1008 compile_option_node(EnclosureNode
* node
, regex_t
* reg
, ScanEnv
* env
)
1011 OnigOptionType prev
= reg
->options
;
1013 reg
->options
= node
->o
.options
;
1014 r
= compile_tree(NODE_ENCLOSURE_BODY(node
), reg
, env
);
1015 reg
->options
= prev
;
1021 compile_length_enclosure_node(EnclosureNode
* node
, regex_t
* reg
)
1026 if (node
->type
== ENCLOSURE_OPTION
)
1027 return compile_length_option_node(node
, reg
);
1029 if (NODE_ENCLOSURE_BODY(node
)) {
1030 tlen
= compile_length_tree(NODE_ENCLOSURE_BODY(node
), reg
);
1031 if (tlen
< 0) return tlen
;
1036 switch (node
->type
) {
1037 case ENCLOSURE_MEMORY
:
1040 if (node
->m
.regnum
== 0 && NODE_IS_CALLED(node
)) {
1041 len
= tlen
+ SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1045 if (NODE_IS_CALLED(node
)) {
1046 len
= SIZE_OP_MEMORY_START_PUSH
+ tlen
1047 + SIZE_OP_CALL
+ SIZE_OP_JUMP
+ SIZE_OP_RETURN
;
1048 if (MEM_STATUS_AT0(reg
->bt_mem_end
, node
->m
.regnum
))
1049 len
+= (NODE_IS_RECURSION(node
)
1050 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1052 len
+= (NODE_IS_RECURSION(node
)
1053 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1055 else if (NODE_IS_RECURSION(node
)) {
1056 len
= SIZE_OP_MEMORY_START_PUSH
;
1057 len
+= tlen
+ (MEM_STATUS_AT0(reg
->bt_mem_end
, node
->m
.regnum
)
1058 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_REC
);
1063 if (MEM_STATUS_AT0(reg
->bt_mem_start
, node
->m
.regnum
))
1064 len
= SIZE_OP_MEMORY_START_PUSH
;
1066 len
= SIZE_OP_MEMORY_START
;
1068 len
+= tlen
+ (MEM_STATUS_AT0(reg
->bt_mem_end
, node
->m
.regnum
)
1069 ? SIZE_OP_MEMORY_END_PUSH
: SIZE_OP_MEMORY_END
);
1073 case ENCLOSURE_STOP_BACKTRACK
:
1074 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node
)) {
1075 QuantNode
* qn
= QUANT_(NODE_ENCLOSURE_BODY(node
));
1076 tlen
= compile_length_tree(NODE_QUANT_BODY(qn
), reg
);
1077 if (tlen
< 0) return tlen
;
1079 len
= tlen
* qn
->lower
1080 + SIZE_OP_PUSH
+ tlen
+ SIZE_OP_POP_OUT
+ SIZE_OP_JUMP
;
1083 len
= SIZE_OP_ATOMIC_START
+ tlen
+ SIZE_OP_ATOMIC_END
;
1087 case ENCLOSURE_IF_ELSE
:
1089 Node
* cond
= NODE_ENCLOSURE_BODY(node
);
1090 Node
* Then
= node
->te
.Then
;
1091 Node
* Else
= node
->te
.Else
;
1093 len
= compile_length_tree(cond
, reg
);
1094 if (len
< 0) return len
;
1095 len
+= SIZE_OP_PUSH
;
1096 len
+= SIZE_OP_ATOMIC_START
+ SIZE_OP_ATOMIC_END
;
1098 if (IS_NOT_NULL(Then
)) {
1099 tlen
= compile_length_tree(Then
, reg
);
1100 if (tlen
< 0) return tlen
;
1104 if (IS_NOT_NULL(Else
)) {
1105 len
+= SIZE_OP_JUMP
;
1106 tlen
= compile_length_tree(Else
, reg
);
1107 if (tlen
< 0) return tlen
;
1114 return ONIGERR_TYPE_BUG
;
1121 static int get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
);
1124 compile_enclosure_memory_node(EnclosureNode
* node
, regex_t
* reg
, ScanEnv
* env
)
1130 if (node
->m
.regnum
== 0 && NODE_IS_CALLED(node
)) {
1131 r
= add_opcode(reg
, OP_CALL
);
1132 if (r
!= 0) return r
;
1133 node
->m
.called_addr
= BB_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1134 NODE_STATUS_ADD(node
, ADDR_FIXED
);
1135 r
= add_abs_addr(reg
, (int )node
->m
.called_addr
);
1136 if (r
!= 0) return r
;
1137 len
= compile_length_tree(NODE_ENCLOSURE_BODY(node
), reg
);
1138 len
+= SIZE_OP_RETURN
;
1139 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1140 if (r
!= 0) return r
;
1142 r
= compile_tree(NODE_ENCLOSURE_BODY(node
), reg
, env
);
1143 if (r
!= 0) return r
;
1144 r
= add_opcode(reg
, OP_RETURN
);
1148 if (NODE_IS_CALLED(node
)) {
1149 r
= add_opcode(reg
, OP_CALL
);
1150 if (r
!= 0) return r
;
1151 node
->m
.called_addr
= BB_GET_OFFSET_POS(reg
) + SIZE_ABSADDR
+ SIZE_OP_JUMP
;
1152 NODE_STATUS_ADD(node
, ADDR_FIXED
);
1153 r
= add_abs_addr(reg
, (int )node
->m
.called_addr
);
1154 if (r
!= 0) return r
;
1155 len
= compile_length_tree(NODE_ENCLOSURE_BODY(node
), reg
);
1156 len
+= (SIZE_OP_MEMORY_START_PUSH
+ SIZE_OP_RETURN
);
1157 if (MEM_STATUS_AT0(reg
->bt_mem_end
, node
->m
.regnum
))
1158 len
+= (NODE_IS_RECURSION(node
)
1159 ? SIZE_OP_MEMORY_END_PUSH_REC
: SIZE_OP_MEMORY_END_PUSH
);
1161 len
+= (NODE_IS_RECURSION(node
)
1162 ? SIZE_OP_MEMORY_END_REC
: SIZE_OP_MEMORY_END
);
1164 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1165 if (r
!= 0) return r
;
1169 if (MEM_STATUS_AT0(reg
->bt_mem_start
, node
->m
.regnum
))
1170 r
= add_opcode(reg
, OP_MEMORY_START_PUSH
);
1172 r
= add_opcode(reg
, OP_MEMORY_START
);
1173 if (r
!= 0) return r
;
1174 r
= add_mem_num(reg
, node
->m
.regnum
);
1175 if (r
!= 0) return r
;
1176 r
= compile_tree(NODE_ENCLOSURE_BODY(node
), reg
, env
);
1177 if (r
!= 0) return r
;
1180 if (MEM_STATUS_AT0(reg
->bt_mem_end
, node
->m
.regnum
))
1181 r
= add_opcode(reg
, (NODE_IS_RECURSION(node
)
1182 ? OP_MEMORY_END_PUSH_REC
: OP_MEMORY_END_PUSH
));
1184 r
= add_opcode(reg
, (NODE_IS_RECURSION(node
)
1185 ? OP_MEMORY_END_REC
: OP_MEMORY_END
));
1186 if (r
!= 0) return r
;
1187 r
= add_mem_num(reg
, node
->m
.regnum
);
1188 if (NODE_IS_CALLED(node
)) {
1189 if (r
!= 0) return r
;
1190 r
= add_opcode(reg
, OP_RETURN
);
1193 if (MEM_STATUS_AT0(reg
->bt_mem_end
, node
->m
.regnum
))
1194 r
= add_opcode(reg
, OP_MEMORY_END_PUSH
);
1196 r
= add_opcode(reg
, OP_MEMORY_END
);
1197 if (r
!= 0) return r
;
1198 r
= add_mem_num(reg
, node
->m
.regnum
);
1205 compile_enclosure_node(EnclosureNode
* node
, regex_t
* reg
, ScanEnv
* env
)
1209 switch (node
->type
) {
1210 case ENCLOSURE_MEMORY
:
1211 r
= compile_enclosure_memory_node(node
, reg
, env
);
1214 case ENCLOSURE_OPTION
:
1215 r
= compile_option_node(node
, reg
, env
);
1218 case ENCLOSURE_STOP_BACKTRACK
:
1219 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node
)) {
1220 QuantNode
* qn
= QUANT_(NODE_ENCLOSURE_BODY(node
));
1221 r
= compile_tree_n_times(NODE_QUANT_BODY(qn
), qn
->lower
, reg
, env
);
1222 if (r
!= 0) return r
;
1224 len
= compile_length_tree(NODE_QUANT_BODY(qn
), reg
);
1225 if (len
< 0) return len
;
1227 r
= add_opcode_rel_addr(reg
, OP_PUSH
, len
+ SIZE_OP_POP_OUT
+ SIZE_OP_JUMP
);
1228 if (r
!= 0) return r
;
1229 r
= compile_tree(NODE_QUANT_BODY(qn
), reg
, env
);
1230 if (r
!= 0) return r
;
1231 r
= add_opcode(reg
, OP_POP_OUT
);
1232 if (r
!= 0) return r
;
1233 r
= add_opcode_rel_addr(reg
, OP_JUMP
,
1234 -((int )SIZE_OP_PUSH
+ len
+ (int )SIZE_OP_POP_OUT
+ (int )SIZE_OP_JUMP
));
1237 r
= add_opcode(reg
, OP_ATOMIC_START
);
1238 if (r
!= 0) return r
;
1239 r
= compile_tree(NODE_ENCLOSURE_BODY(node
), reg
, env
);
1240 if (r
!= 0) return r
;
1241 r
= add_opcode(reg
, OP_ATOMIC_END
);
1245 case ENCLOSURE_IF_ELSE
:
1247 int cond_len
, then_len
, jump_len
;
1248 Node
* cond
= NODE_ENCLOSURE_BODY(node
);
1249 Node
* Then
= node
->te
.Then
;
1250 Node
* Else
= node
->te
.Else
;
1252 r
= add_opcode(reg
, OP_ATOMIC_START
);
1253 if (r
!= 0) return r
;
1255 cond_len
= compile_length_tree(cond
, reg
);
1256 if (cond_len
< 0) return cond_len
;
1257 if (IS_NOT_NULL(Then
)) {
1258 then_len
= compile_length_tree(Then
, reg
);
1259 if (then_len
< 0) return then_len
;
1264 jump_len
= cond_len
+ then_len
+ SIZE_OP_ATOMIC_END
;
1265 if (IS_NOT_NULL(Else
)) jump_len
+= SIZE_OP_JUMP
;
1267 r
= add_opcode_rel_addr(reg
, OP_PUSH
, jump_len
);
1268 if (r
!= 0) return r
;
1269 r
= compile_tree(cond
, reg
, env
);
1270 if (r
!= 0) return r
;
1271 r
= add_opcode(reg
, OP_ATOMIC_END
);
1272 if (r
!= 0) return r
;
1274 if (IS_NOT_NULL(Then
)) {
1275 r
= compile_tree(Then
, reg
, env
);
1276 if (r
!= 0) return r
;
1279 if (IS_NOT_NULL(Else
)) {
1280 int else_len
= compile_length_tree(Else
, reg
);
1281 r
= add_opcode_rel_addr(reg
, OP_JUMP
, else_len
);
1282 if (r
!= 0) return r
;
1283 r
= compile_tree(Else
, reg
, env
);
1289 return ONIGERR_TYPE_BUG
;
1297 compile_length_anchor_node(AnchorNode
* node
, regex_t
* reg
)
1302 if (IS_NOT_NULL(NODE_ANCHOR_BODY(node
))) {
1303 tlen
= compile_length_tree(NODE_ANCHOR_BODY(node
), reg
);
1304 if (tlen
< 0) return tlen
;
1307 switch (node
->type
) {
1308 case ANCHOR_PREC_READ
:
1309 len
= SIZE_OP_PREC_READ_START
+ tlen
+ SIZE_OP_PREC_READ_END
;
1311 case ANCHOR_PREC_READ_NOT
:
1312 len
= SIZE_OP_PREC_READ_NOT_START
+ tlen
+ SIZE_OP_PREC_READ_NOT_END
;
1314 case ANCHOR_LOOK_BEHIND
:
1315 len
= SIZE_OP_LOOK_BEHIND
+ tlen
;
1317 case ANCHOR_LOOK_BEHIND_NOT
:
1318 len
= SIZE_OP_LOOK_BEHIND_NOT_START
+ tlen
+ SIZE_OP_LOOK_BEHIND_NOT_END
;
1321 case ANCHOR_WORD_BOUNDARY
:
1322 case ANCHOR_NO_WORD_BOUNDARY
:
1323 #ifdef USE_WORD_BEGIN_END
1324 case ANCHOR_WORD_BEGIN
:
1325 case ANCHOR_WORD_END
:
1327 len
= SIZE_OP_WORD_BOUNDARY
;
1330 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
1331 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
1344 compile_anchor_node(AnchorNode
* node
, regex_t
* reg
, ScanEnv
* env
)
1349 switch (node
->type
) {
1350 case ANCHOR_BEGIN_BUF
: r
= add_opcode(reg
, OP_BEGIN_BUF
); break;
1351 case ANCHOR_END_BUF
: r
= add_opcode(reg
, OP_END_BUF
); break;
1352 case ANCHOR_BEGIN_LINE
: r
= add_opcode(reg
, OP_BEGIN_LINE
); break;
1353 case ANCHOR_END_LINE
: r
= add_opcode(reg
, OP_END_LINE
); break;
1354 case ANCHOR_SEMI_END_BUF
: r
= add_opcode(reg
, OP_SEMI_END_BUF
); break;
1355 case ANCHOR_BEGIN_POSITION
: r
= add_opcode(reg
, OP_BEGIN_POSITION
); break;
1357 case ANCHOR_WORD_BOUNDARY
:
1358 op
= OP_WORD_BOUNDARY
;
1360 r
= add_opcode(reg
, op
);
1361 if (r
!= 0) return r
;
1362 r
= add_mode(reg
, (ModeType
)node
->ascii_mode
);
1365 case ANCHOR_NO_WORD_BOUNDARY
:
1366 op
= OP_NO_WORD_BOUNDARY
; goto word
;
1368 #ifdef USE_WORD_BEGIN_END
1369 case ANCHOR_WORD_BEGIN
:
1370 op
= OP_WORD_BEGIN
; goto word
;
1372 case ANCHOR_WORD_END
:
1373 op
= OP_WORD_END
; goto word
;
1377 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
1378 r
= add_opcode(reg
, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
);
1381 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
1382 r
= add_opcode(reg
, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
);
1385 case ANCHOR_PREC_READ
:
1386 r
= add_opcode(reg
, OP_PREC_READ_START
);
1387 if (r
!= 0) return r
;
1388 r
= compile_tree(NODE_ANCHOR_BODY(node
), reg
, env
);
1389 if (r
!= 0) return r
;
1390 r
= add_opcode(reg
, OP_PREC_READ_END
);
1393 case ANCHOR_PREC_READ_NOT
:
1394 len
= compile_length_tree(NODE_ANCHOR_BODY(node
), reg
);
1395 if (len
< 0) return len
;
1396 r
= add_opcode_rel_addr(reg
, OP_PREC_READ_NOT_START
, len
+ SIZE_OP_PREC_READ_NOT_END
);
1397 if (r
!= 0) return r
;
1398 r
= compile_tree(NODE_ANCHOR_BODY(node
), reg
, env
);
1399 if (r
!= 0) return r
;
1400 r
= add_opcode(reg
, OP_PREC_READ_NOT_END
);
1403 case ANCHOR_LOOK_BEHIND
:
1406 r
= add_opcode(reg
, OP_LOOK_BEHIND
);
1407 if (r
!= 0) return r
;
1408 if (node
->char_len
< 0) {
1409 r
= get_char_length_tree(NODE_ANCHOR_BODY(node
), reg
, &n
);
1410 if (r
!= 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1415 r
= add_length(reg
, n
);
1416 if (r
!= 0) return r
;
1417 r
= compile_tree(NODE_ANCHOR_BODY(node
), reg
, env
);
1421 case ANCHOR_LOOK_BEHIND_NOT
:
1425 len
= compile_length_tree(NODE_ANCHOR_BODY(node
), reg
);
1426 r
= add_opcode_rel_addr(reg
, OP_LOOK_BEHIND_NOT_START
,
1427 len
+ SIZE_OP_LOOK_BEHIND_NOT_END
);
1428 if (r
!= 0) return r
;
1429 if (node
->char_len
< 0) {
1430 r
= get_char_length_tree(NODE_ANCHOR_BODY(node
), reg
, &n
);
1431 if (r
!= 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
1435 r
= add_length(reg
, n
);
1436 if (r
!= 0) return r
;
1437 r
= compile_tree(NODE_ANCHOR_BODY(node
), reg
, env
);
1438 if (r
!= 0) return r
;
1439 r
= add_opcode(reg
, OP_LOOK_BEHIND_NOT_END
);
1444 return ONIGERR_TYPE_BUG
;
1452 compile_gimmick_node(GimmickNode
* node
, regex_t
* reg
)
1456 switch (node
->type
) {
1458 r
= add_opcode(reg
, OP_FAIL
);
1462 r
= add_opcode(reg
, OP_PUSH_SAVE_VAL
);
1463 if (r
!= 0) return r
;
1464 r
= add_save_type(reg
, SAVE_KEEP
);
1465 if (r
!= 0) return r
;
1466 r
= add_mem_num(reg
, node
->id
);
1470 r
= add_opcode(reg
, OP_PUSH_SAVE_VAL
);
1471 if (r
!= 0) return r
;
1472 r
= add_save_type(reg
, node
->detail_type
);
1473 if (r
!= 0) return r
;
1474 r
= add_mem_num(reg
, node
->id
);
1477 case GIMMICK_UPDATE_VAR
:
1478 r
= add_opcode(reg
, OP_UPDATE_VAR
);
1479 if (r
!= 0) return r
;
1480 r
= add_update_var_type(reg
, node
->detail_type
);
1481 if (r
!= 0) return r
;
1482 r
= add_mem_num(reg
, node
->id
);
1486 case GIMMICK_CALLOUT
:
1487 switch (node
->detail_type
) {
1488 case ONIG_CALLOUT_OF_CONTENTS
:
1489 case ONIG_CALLOUT_OF_NAME
:
1491 r
= add_opcode(reg
, (node
->detail_type
== ONIG_CALLOUT_OF_CONTENTS
) ?
1492 OP_CALLOUT_CONTENTS
: OP_CALLOUT_NAME
);
1493 if (r
!= 0) return r
;
1494 if (node
->detail_type
== ONIG_CALLOUT_OF_NAME
) {
1495 r
= add_mem_num(reg
, node
->id
);
1496 if (r
!= 0) return r
;
1498 r
= add_mem_num(reg
, node
->num
);
1499 if (r
!= 0) return r
;
1504 r
= ONIGERR_TYPE_BUG
;
1514 compile_length_gimmick_node(GimmickNode
* node
, regex_t
* reg
)
1518 switch (node
->type
) {
1525 len
= SIZE_OP_PUSH_SAVE_VAL
;
1528 case GIMMICK_UPDATE_VAR
:
1529 len
= SIZE_OP_UPDATE_VAR
;
1533 case GIMMICK_CALLOUT
:
1534 switch (node
->detail_type
) {
1535 case ONIG_CALLOUT_OF_CONTENTS
:
1536 len
= SIZE_OP_CALLOUT_CONTENTS
;
1538 case ONIG_CALLOUT_OF_NAME
:
1539 len
= SIZE_OP_CALLOUT_NAME
;
1543 len
= ONIGERR_TYPE_BUG
;
1554 compile_length_tree(Node
* node
, regex_t
* reg
)
1558 switch (NODE_TYPE(node
)) {
1562 r
= compile_length_tree(NODE_CAR(node
), reg
);
1563 if (r
< 0) return r
;
1565 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
1575 r
+= compile_length_tree(NODE_CAR(node
), reg
);
1577 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
1578 r
+= (SIZE_OP_PUSH
+ SIZE_OP_JUMP
) * (n
- 1);
1583 if (NODE_STRING_IS_RAW(node
))
1584 r
= compile_length_string_raw_node(STR_(node
), reg
);
1586 r
= compile_length_string_node(node
, reg
);
1590 r
= compile_length_cclass_node(CCLASS_(node
), reg
);
1599 BackRefNode
* br
= BACKREF_(node
);
1601 if (NODE_IS_CHECKER(node
)) {
1602 #ifdef USE_BACKREF_WITH_LEVEL
1603 if (NODE_IS_NEST_LEVEL(node
)) {
1604 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1608 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1611 #ifdef USE_BACKREF_WITH_LEVEL
1612 if (NODE_IS_NEST_LEVEL(node
)) {
1613 r
= SIZE_OPCODE
+ SIZE_OPTION
+ SIZE_LENGTH
+
1614 SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1618 if (br
->back_num
== 1) {
1619 r
= ((!IS_IGNORECASE(reg
->options
) && br
->back_static
[0] <= 2)
1620 ? SIZE_OPCODE
: (SIZE_OPCODE
+ SIZE_MEMNUM
));
1623 r
= SIZE_OPCODE
+ SIZE_LENGTH
+ (SIZE_MEMNUM
* br
->back_num
);
1636 r
= compile_length_quantifier_node(QUANT_(node
), reg
);
1639 case NODE_ENCLOSURE
:
1640 r
= compile_length_enclosure_node(ENCLOSURE_(node
), reg
);
1644 r
= compile_length_anchor_node(ANCHOR_(node
), reg
);
1648 r
= compile_length_gimmick_node(GIMMICK_(node
), reg
);
1652 return ONIGERR_TYPE_BUG
;
1660 compile_tree(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
1662 int n
, len
, pos
, r
= 0;
1664 switch (NODE_TYPE(node
)) {
1667 r
= compile_tree(NODE_CAR(node
), reg
, env
);
1668 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
1676 len
+= compile_length_tree(NODE_CAR(x
), reg
);
1677 if (IS_NOT_NULL(NODE_CDR(x
))) {
1678 len
+= SIZE_OP_PUSH
+ SIZE_OP_JUMP
;
1680 } while (IS_NOT_NULL(x
= NODE_CDR(x
)));
1681 pos
= reg
->used
+ len
; /* goal position */
1684 len
= compile_length_tree(NODE_CAR(node
), reg
);
1685 if (IS_NOT_NULL(NODE_CDR(node
))) {
1686 enum OpCode push
= NODE_IS_SUPER(node
) ? OP_PUSH_SUPER
: OP_PUSH
;
1687 r
= add_opcode_rel_addr(reg
, push
, len
+ SIZE_OP_JUMP
);
1690 r
= compile_tree(NODE_CAR(node
), reg
, env
);
1692 if (IS_NOT_NULL(NODE_CDR(node
))) {
1693 len
= pos
- (reg
->used
+ SIZE_OP_JUMP
);
1694 r
= add_opcode_rel_addr(reg
, OP_JUMP
, len
);
1697 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
1702 if (NODE_STRING_IS_RAW(node
))
1703 r
= compile_string_raw_node(STR_(node
), reg
);
1705 r
= compile_string_node(node
, reg
);
1709 r
= compile_cclass_node(CCLASS_(node
), reg
);
1716 switch (CTYPE_(node
)->ctype
) {
1718 if (IS_MULTILINE(CTYPE_OPTION(node
, reg
)))
1719 r
= add_opcode(reg
, OP_ANYCHAR_ML
);
1721 r
= add_opcode(reg
, OP_ANYCHAR
);
1724 case ONIGENC_CTYPE_WORD
:
1725 if (CTYPE_(node
)->ascii_mode
== 0) {
1726 op
= CTYPE_(node
)->not != 0 ? OP_NO_WORD
: OP_WORD
;
1729 op
= CTYPE_(node
)->not != 0 ? OP_NO_WORD_ASCII
: OP_WORD_ASCII
;
1731 r
= add_opcode(reg
, op
);
1735 return ONIGERR_TYPE_BUG
;
1743 BackRefNode
* br
= BACKREF_(node
);
1745 if (NODE_IS_CHECKER(node
)) {
1746 #ifdef USE_BACKREF_WITH_LEVEL
1747 if (NODE_IS_NEST_LEVEL(node
)) {
1748 r
= add_opcode(reg
, OP_BACKREF_CHECK_WITH_LEVEL
);
1749 if (r
!= 0) return r
;
1750 r
= add_length(reg
, br
->nest_level
);
1751 if (r
!= 0) return r
;
1756 r
= add_opcode(reg
, OP_BACKREF_CHECK
);
1757 if (r
!= 0) return r
;
1760 goto add_bacref_mems
;
1763 #ifdef USE_BACKREF_WITH_LEVEL
1764 if (NODE_IS_NEST_LEVEL(node
)) {
1765 r
= add_opcode(reg
, OP_BACKREF_WITH_LEVEL
);
1766 if (r
!= 0) return r
;
1767 r
= add_option(reg
, (reg
->options
& ONIG_OPTION_IGNORECASE
));
1768 if (r
!= 0) return r
;
1769 r
= add_length(reg
, br
->nest_level
);
1770 if (r
!= 0) return r
;
1772 goto add_bacref_mems
;
1776 if (br
->back_num
== 1) {
1777 n
= br
->back_static
[0];
1778 if (IS_IGNORECASE(reg
->options
)) {
1779 r
= add_opcode(reg
, OP_BACKREF_N_IC
);
1780 if (r
!= 0) return r
;
1781 r
= add_mem_num(reg
, n
);
1785 case 1: r
= add_opcode(reg
, OP_BACKREF1
); break;
1786 case 2: r
= add_opcode(reg
, OP_BACKREF2
); break;
1788 r
= add_opcode(reg
, OP_BACKREF_N
);
1789 if (r
!= 0) return r
;
1790 r
= add_mem_num(reg
, n
);
1799 if (IS_IGNORECASE(reg
->options
)) {
1800 r
= add_opcode(reg
, OP_BACKREF_MULTI_IC
);
1803 r
= add_opcode(reg
, OP_BACKREF_MULTI
);
1805 if (r
!= 0) return r
;
1808 r
= add_length(reg
, br
->back_num
);
1809 if (r
!= 0) return r
;
1811 for (i
= br
->back_num
- 1; i
>= 0; i
--) {
1812 r
= add_mem_num(reg
, p
[i
]);
1813 if (r
!= 0) return r
;
1822 r
= compile_call(CALL_(node
), reg
, env
);
1827 r
= compile_quantifier_node(QUANT_(node
), reg
, env
);
1830 case NODE_ENCLOSURE
:
1831 r
= compile_enclosure_node(ENCLOSURE_(node
), reg
, env
);
1835 r
= compile_anchor_node(ANCHOR_(node
), reg
, env
);
1839 r
= compile_gimmick_node(GIMMICK_(node
), reg
);
1844 fprintf(stderr
, "compile_tree: undefined node type %d\n", NODE_TYPE(node
));
1853 noname_disable_map(Node
** plink
, GroupNumRemap
* map
, int* counter
)
1856 Node
* node
= *plink
;
1858 switch (NODE_TYPE(node
)) {
1862 r
= noname_disable_map(&(NODE_CAR(node
)), map
, counter
);
1863 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
1868 Node
** ptarget
= &(NODE_BODY(node
));
1869 Node
* old
= *ptarget
;
1870 r
= noname_disable_map(ptarget
, map
, counter
);
1871 if (*ptarget
!= old
&& NODE_TYPE(*ptarget
) == NODE_QUANT
) {
1872 onig_reduce_nested_quantifier(node
, *ptarget
);
1877 case NODE_ENCLOSURE
:
1879 EnclosureNode
* en
= ENCLOSURE_(node
);
1880 if (en
->type
== ENCLOSURE_MEMORY
) {
1881 if (NODE_IS_NAMED_GROUP(node
)) {
1883 map
[en
->m
.regnum
].new_val
= *counter
;
1884 en
->m
.regnum
= *counter
;
1885 r
= noname_disable_map(&(NODE_BODY(node
)), map
, counter
);
1888 *plink
= NODE_BODY(node
);
1889 NODE_BODY(node
) = NULL_NODE
;
1890 onig_node_free(node
);
1891 r
= noname_disable_map(plink
, map
, counter
);
1894 else if (en
->type
== ENCLOSURE_IF_ELSE
) {
1895 r
= noname_disable_map(&(NODE_ENCLOSURE_BODY(en
)), map
, counter
);
1896 if (r
!= 0) return r
;
1897 if (IS_NOT_NULL(en
->te
.Then
)) {
1898 r
= noname_disable_map(&(en
->te
.Then
), map
, counter
);
1899 if (r
!= 0) return r
;
1901 if (IS_NOT_NULL(en
->te
.Else
)) {
1902 r
= noname_disable_map(&(en
->te
.Else
), map
, counter
);
1903 if (r
!= 0) return r
;
1907 r
= noname_disable_map(&(NODE_BODY(node
)), map
, counter
);
1912 if (IS_NOT_NULL(NODE_BODY(node
)))
1913 r
= noname_disable_map(&(NODE_BODY(node
)), map
, counter
);
1924 renumber_node_backref(Node
* node
, GroupNumRemap
* map
)
1926 int i
, pos
, n
, old_num
;
1928 BackRefNode
* bn
= BACKREF_(node
);
1930 if (! NODE_IS_BY_NAME(node
))
1931 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
1933 old_num
= bn
->back_num
;
1934 if (IS_NULL(bn
->back_dynamic
))
1935 backs
= bn
->back_static
;
1937 backs
= bn
->back_dynamic
;
1939 for (i
= 0, pos
= 0; i
< old_num
; i
++) {
1940 n
= map
[backs
[i
]].new_val
;
1952 renumber_by_map(Node
* node
, GroupNumRemap
* map
)
1956 switch (NODE_TYPE(node
)) {
1960 r
= renumber_by_map(NODE_CAR(node
), map
);
1961 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
1965 r
= renumber_by_map(NODE_BODY(node
), map
);
1968 case NODE_ENCLOSURE
:
1970 EnclosureNode
* en
= ENCLOSURE_(node
);
1972 r
= renumber_by_map(NODE_BODY(node
), map
);
1973 if (r
!= 0) return r
;
1975 if (en
->type
== ENCLOSURE_IF_ELSE
) {
1976 if (IS_NOT_NULL(en
->te
.Then
)) {
1977 r
= renumber_by_map(en
->te
.Then
, map
);
1978 if (r
!= 0) return r
;
1980 if (IS_NOT_NULL(en
->te
.Else
)) {
1981 r
= renumber_by_map(en
->te
.Else
, map
);
1982 if (r
!= 0) return r
;
1989 r
= renumber_node_backref(node
, map
);
1993 if (IS_NOT_NULL(NODE_BODY(node
)))
1994 r
= renumber_by_map(NODE_BODY(node
), map
);
2005 numbered_ref_check(Node
* node
)
2009 switch (NODE_TYPE(node
)) {
2013 r
= numbered_ref_check(NODE_CAR(node
));
2014 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
2018 if (IS_NULL(NODE_BODY(node
)))
2022 r
= numbered_ref_check(NODE_BODY(node
));
2025 case NODE_ENCLOSURE
:
2027 EnclosureNode
* en
= ENCLOSURE_(node
);
2029 r
= numbered_ref_check(NODE_BODY(node
));
2030 if (r
!= 0) return r
;
2032 if (en
->type
== ENCLOSURE_IF_ELSE
) {
2033 if (IS_NOT_NULL(en
->te
.Then
)) {
2034 r
= numbered_ref_check(en
->te
.Then
);
2035 if (r
!= 0) return r
;
2037 if (IS_NOT_NULL(en
->te
.Else
)) {
2038 r
= numbered_ref_check(en
->te
.Else
);
2039 if (r
!= 0) return r
;
2047 if (! NODE_IS_BY_NAME(node
))
2048 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
2059 disable_noname_group_capture(Node
** root
, regex_t
* reg
, ScanEnv
* env
)
2061 int r
, i
, pos
, counter
;
2066 map
= (GroupNumRemap
* )xmalloc(sizeof(GroupNumRemap
) * (env
->num_mem
+ 1));
2067 CHECK_NULL_RETURN_MEMERR(map
);
2068 for (i
= 1; i
<= env
->num_mem
; i
++) {
2072 r
= noname_disable_map(root
, map
, &counter
);
2073 if (r
!= 0) return r
;
2075 r
= renumber_by_map(*root
, map
);
2076 if (r
!= 0) return r
;
2078 for (i
= 1, pos
= 1; i
<= env
->num_mem
; i
++) {
2079 if (map
[i
].new_val
> 0) {
2080 SCANENV_MEMENV(env
)[pos
] = SCANENV_MEMENV(env
)[i
];
2085 loc
= env
->capture_history
;
2086 MEM_STATUS_CLEAR(env
->capture_history
);
2087 for (i
= 1; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
2088 if (MEM_STATUS_AT(loc
, i
)) {
2089 MEM_STATUS_ON_SIMPLE(env
->capture_history
, map
[i
].new_val
);
2093 env
->num_mem
= env
->num_named
;
2094 reg
->num_mem
= env
->num_named
;
2095 result
= onig_renumber_name_table(reg
, map
);
2102 fix_unset_addr_list(UnsetAddrList
* uslist
, regex_t
* reg
)
2108 for (i
= 0; i
< uslist
->num
; i
++) {
2109 if (! NODE_IS_ADDR_FIXED(uslist
->us
[i
].target
))
2110 return ONIGERR_PARSER_BUG
;
2112 en
= ENCLOSURE_(uslist
->us
[i
].target
);
2113 addr
= en
->m
.called_addr
;
2114 offset
= uslist
->us
[i
].offset
;
2116 BB_WRITE(reg
, offset
, &addr
, SIZE_ABSADDR
);
2123 #define GET_CHAR_LEN_VARLEN -1
2124 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2126 /* fixed size pattern node only */
2128 get_char_length_tree1(Node
* node
, regex_t
* reg
, int* len
, int level
)
2135 switch (NODE_TYPE(node
)) {
2138 r
= get_char_length_tree1(NODE_CAR(node
), reg
, &tlen
, level
);
2140 *len
= distance_add(*len
, tlen
);
2141 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
2149 r
= get_char_length_tree1(NODE_CAR(node
), reg
, &tlen
, level
);
2150 while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
))) {
2151 r
= get_char_length_tree1(NODE_CAR(node
), reg
, &tlen2
, level
);
2160 r
= GET_CHAR_LEN_TOP_ALT_VARLEN
;
2162 r
= GET_CHAR_LEN_VARLEN
;
2172 StrNode
* sn
= STR_(node
);
2175 while (s
< sn
->end
) {
2176 s
+= enclen(reg
->enc
, s
);
2184 QuantNode
* qn
= QUANT_(node
);
2186 if (qn
->lower
== qn
->upper
) {
2187 if (qn
->upper
== 0) {
2191 r
= get_char_length_tree1(NODE_BODY(node
), reg
, &tlen
, level
);
2193 *len
= distance_multiply(tlen
, qn
->lower
);
2197 r
= GET_CHAR_LEN_VARLEN
;
2203 if (! NODE_IS_RECURSION(node
))
2204 r
= get_char_length_tree1(NODE_BODY(node
), reg
, len
, level
);
2206 r
= GET_CHAR_LEN_VARLEN
;
2215 case NODE_ENCLOSURE
:
2217 EnclosureNode
* en
= ENCLOSURE_(node
);
2220 case ENCLOSURE_MEMORY
:
2222 if (NODE_IS_CLEN_FIXED(node
))
2223 *len
= en
->char_len
;
2225 r
= get_char_length_tree1(NODE_BODY(node
), reg
, len
, level
);
2227 en
->char_len
= *len
;
2228 NODE_STATUS_ADD(node
, CLEN_FIXED
);
2233 case ENCLOSURE_OPTION
:
2234 case ENCLOSURE_STOP_BACKTRACK
:
2235 r
= get_char_length_tree1(NODE_BODY(node
), reg
, len
, level
);
2237 case ENCLOSURE_IF_ELSE
:
2241 r
= get_char_length_tree1(NODE_BODY(node
), reg
, &clen
, level
);
2243 if (IS_NOT_NULL(en
->te
.Then
)) {
2244 r
= get_char_length_tree1(en
->te
.Then
, reg
, &tlen
, level
);
2248 if (IS_NOT_NULL(en
->te
.Else
)) {
2249 r
= get_char_length_tree1(en
->te
.Else
, reg
, &elen
, level
);
2254 if (clen
+ tlen
!= elen
) {
2255 r
= GET_CHAR_LEN_VARLEN
;
2275 if (NODE_IS_CHECKER(node
))
2279 r
= GET_CHAR_LEN_VARLEN
;
2287 get_char_length_tree(Node
* node
, regex_t
* reg
, int* len
)
2289 return get_char_length_tree1(node
, reg
, len
, 0);
2292 /* x is not included y ==> 1 : 0 */
2294 is_exclusive(Node
* x
, Node
* y
, regex_t
* reg
)
2302 ytype
= NODE_TYPE(y
);
2303 switch (NODE_TYPE(x
)) {
2306 if (CTYPE_(x
)->ctype
== CTYPE_ANYCHAR
||
2307 CTYPE_(y
)->ctype
== CTYPE_ANYCHAR
)
2312 if (CTYPE_(y
)->ctype
== CTYPE_(x
)->ctype
&&
2313 CTYPE_(y
)->not != CTYPE_(x
)->not &&
2314 CTYPE_(y
)->ascii_mode
== CTYPE_(x
)->ascii_mode
)
2324 tmp
= x
; x
= y
; y
= tmp
;
2342 CClassNode
* xc
= CCLASS_(x
);
2346 switch (CTYPE_(y
)->ctype
) {
2351 case ONIGENC_CTYPE_WORD
:
2352 if (CTYPE_(y
)->not == 0) {
2353 if (IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) {
2354 range
= CTYPE_(y
)->ascii_mode
!= 0 ? 128 : SINGLE_BYTE_SIZE
;
2355 for (i
= 0; i
< range
; i
++) {
2356 if (BITSET_AT(xc
->bs
, i
)) {
2357 if (ONIGENC_IS_CODE_WORD(reg
->enc
, i
)) return 0;
2365 if (IS_NOT_NULL(xc
->mbuf
)) return 0;
2366 if (IS_NCCLASS_NOT(xc
)) return 0;
2368 range
= CTYPE_(y
)->ascii_mode
!= 0 ? 128 : SINGLE_BYTE_SIZE
;
2369 for (i
= 0; i
< range
; i
++) {
2370 if (! ONIGENC_IS_CODE_WORD(reg
->enc
, i
)) {
2371 if (BITSET_AT(xc
->bs
, i
))
2375 for (i
= range
; i
< SINGLE_BYTE_SIZE
; i
++) {
2376 if (BITSET_AT(xc
->bs
, i
)) return 0;
2390 CClassNode
* yc
= CCLASS_(y
);
2392 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
2393 v
= BITSET_AT(xc
->bs
, i
);
2394 if ((v
!= 0 && !IS_NCCLASS_NOT(xc
)) || (v
== 0 && IS_NCCLASS_NOT(xc
))) {
2395 v
= BITSET_AT(yc
->bs
, i
);
2396 if ((v
!= 0 && !IS_NCCLASS_NOT(yc
)) ||
2397 (v
== 0 && IS_NCCLASS_NOT(yc
)))
2401 if ((IS_NULL(xc
->mbuf
) && !IS_NCCLASS_NOT(xc
)) ||
2402 (IS_NULL(yc
->mbuf
) && !IS_NCCLASS_NOT(yc
)))
2420 StrNode
* xs
= STR_(x
);
2422 if (NODE_STRING_LEN(x
) == 0)
2427 switch (CTYPE_(y
)->ctype
) {
2431 case ONIGENC_CTYPE_WORD
:
2432 if (CTYPE_(y
)->ascii_mode
== 0) {
2433 if (ONIGENC_IS_MBC_WORD(reg
->enc
, xs
->s
, xs
->end
))
2434 return CTYPE_(y
)->not;
2436 return !(CTYPE_(y
)->not);
2439 if (ONIGENC_IS_MBC_WORD_ASCII(reg
->enc
, xs
->s
, xs
->end
))
2440 return CTYPE_(y
)->not;
2442 return !(CTYPE_(y
)->not);
2452 CClassNode
* cc
= CCLASS_(y
);
2454 code
= ONIGENC_MBC_TO_CODE(reg
->enc
, xs
->s
,
2455 xs
->s
+ ONIGENC_MBC_MAXLEN(reg
->enc
));
2456 return (onig_is_code_in_cc(reg
->enc
, code
, cc
) != 0 ? 0 : 1);
2463 StrNode
* ys
= STR_(y
);
2465 len
= NODE_STRING_LEN(x
);
2466 if (len
> NODE_STRING_LEN(y
)) len
= NODE_STRING_LEN(y
);
2467 if (NODE_STRING_IS_AMBIG(x
) || NODE_STRING_IS_AMBIG(y
)) {
2472 for (i
= 0, p
= ys
->s
, q
= xs
->s
; i
< len
; i
++, p
++, q
++) {
2473 if (*p
!= *q
) return 1;
2493 get_head_value_node(Node
* node
, int exact
, regex_t
* reg
)
2495 Node
* n
= NULL_NODE
;
2497 switch (NODE_TYPE(node
)) {
2506 if (CTYPE_(node
)->ctype
== CTYPE_ANYCHAR
)
2516 n
= get_head_value_node(NODE_CAR(node
), exact
, reg
);
2521 StrNode
* sn
= STR_(node
);
2523 if (sn
->end
<= sn
->s
)
2527 !NODE_STRING_IS_RAW(node
) && IS_IGNORECASE(reg
->options
)) {
2537 QuantNode
* qn
= QUANT_(node
);
2538 if (qn
->lower
> 0) {
2539 if (IS_NOT_NULL(qn
->head_exact
))
2542 n
= get_head_value_node(NODE_BODY(node
), exact
, reg
);
2547 case NODE_ENCLOSURE
:
2549 EnclosureNode
* en
= ENCLOSURE_(node
);
2551 case ENCLOSURE_OPTION
:
2553 OnigOptionType options
= reg
->options
;
2555 reg
->options
= ENCLOSURE_(node
)->o
.options
;
2556 n
= get_head_value_node(NODE_BODY(node
), exact
, reg
);
2557 reg
->options
= options
;
2561 case ENCLOSURE_MEMORY
:
2562 case ENCLOSURE_STOP_BACKTRACK
:
2563 case ENCLOSURE_IF_ELSE
:
2564 n
= get_head_value_node(NODE_BODY(node
), exact
, reg
);
2571 if (ANCHOR_(node
)->type
== ANCHOR_PREC_READ
)
2572 n
= get_head_value_node(NODE_BODY(node
), exact
, reg
);
2584 check_type_tree(Node
* node
, int type_mask
, int enclosure_mask
, int anchor_mask
)
2589 type
= NODE_TYPE(node
);
2590 if ((NODE_TYPE2BIT(type
) & type_mask
) == 0)
2597 r
= check_type_tree(NODE_CAR(node
), type_mask
, enclosure_mask
,
2599 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
2603 r
= check_type_tree(NODE_BODY(node
), type_mask
, enclosure_mask
, anchor_mask
);
2606 case NODE_ENCLOSURE
:
2608 EnclosureNode
* en
= ENCLOSURE_(node
);
2609 if (((1<<en
->type
) & enclosure_mask
) == 0)
2612 r
= check_type_tree(NODE_BODY(node
), type_mask
, enclosure_mask
, anchor_mask
);
2613 if (r
== 0 && en
->type
== ENCLOSURE_IF_ELSE
) {
2614 if (IS_NOT_NULL(en
->te
.Then
)) {
2615 r
= check_type_tree(en
->te
.Then
, type_mask
, enclosure_mask
, anchor_mask
);
2618 if (IS_NOT_NULL(en
->te
.Else
)) {
2619 r
= check_type_tree(en
->te
.Else
, type_mask
, enclosure_mask
, anchor_mask
);
2626 type
= ANCHOR_(node
)->type
;
2627 if ((type
& anchor_mask
) == 0)
2630 if (IS_NOT_NULL(NODE_BODY(node
)))
2631 r
= check_type_tree(NODE_BODY(node
), type_mask
, enclosure_mask
, anchor_mask
);
2642 tree_min_len(Node
* node
, ScanEnv
* env
)
2648 switch (NODE_TYPE(node
)) {
2650 if (! NODE_IS_CHECKER(node
)) {
2653 MemEnv
* mem_env
= SCANENV_MEMENV(env
);
2654 BackRefNode
* br
= BACKREF_(node
);
2655 if (NODE_IS_RECURSION(node
)) break;
2657 backs
= BACKREFS_P(br
);
2658 len
= tree_min_len(mem_env
[backs
[0]].node
, env
);
2659 for (i
= 1; i
< br
->back_num
; i
++) {
2660 tmin
= tree_min_len(mem_env
[backs
[i
]].node
, env
);
2661 if (len
> tmin
) len
= tmin
;
2669 Node
* t
= NODE_BODY(node
);
2670 if (NODE_IS_RECURSION(node
)) {
2671 if (NODE_IS_MIN_FIXED(t
))
2672 len
= ENCLOSURE_(t
)->min_len
;
2675 len
= tree_min_len(t
, env
);
2682 tmin
= tree_min_len(NODE_CAR(node
), env
);
2683 len
= distance_add(len
, tmin
);
2684 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
2693 tmin
= tree_min_len(x
, env
);
2694 if (y
== node
) len
= tmin
;
2695 else if (len
> tmin
) len
= tmin
;
2696 } while (IS_NOT_NULL(y
= NODE_CDR(y
)));
2702 StrNode
* sn
= STR_(node
);
2703 len
= (int )(sn
->end
- sn
->s
);
2709 len
= ONIGENC_MBC_MINLEN(env
->enc
);
2714 QuantNode
* qn
= QUANT_(node
);
2716 if (qn
->lower
> 0) {
2717 len
= tree_min_len(NODE_BODY(node
), env
);
2718 len
= distance_multiply(len
, qn
->lower
);
2723 case NODE_ENCLOSURE
:
2725 EnclosureNode
* en
= ENCLOSURE_(node
);
2727 case ENCLOSURE_MEMORY
:
2728 if (NODE_IS_MIN_FIXED(node
))
2731 if (NODE_IS_MARK1(node
))
2732 len
= 0; /* recursive */
2734 NODE_STATUS_ADD(node
, MARK1
);
2735 len
= tree_min_len(NODE_BODY(node
), env
);
2736 NODE_STATUS_REMOVE(node
, MARK1
);
2739 NODE_STATUS_ADD(node
, MIN_FIXED
);
2744 case ENCLOSURE_OPTION
:
2745 case ENCLOSURE_STOP_BACKTRACK
:
2746 len
= tree_min_len(NODE_BODY(node
), env
);
2748 case ENCLOSURE_IF_ELSE
:
2752 len
= tree_min_len(NODE_BODY(node
), env
);
2753 if (IS_NOT_NULL(en
->te
.Then
))
2754 len
+= tree_min_len(en
->te
.Then
, env
);
2755 if (IS_NOT_NULL(en
->te
.Else
))
2756 elen
= tree_min_len(en
->te
.Else
, env
);
2759 if (elen
< len
) len
= elen
;
2768 GimmickNode
* g
= GIMMICK_(node
);
2769 if (g
->type
== GIMMICK_FAIL
) {
2784 tree_max_len(Node
* node
, ScanEnv
* env
)
2790 switch (NODE_TYPE(node
)) {
2793 tmax
= tree_max_len(NODE_CAR(node
), env
);
2794 len
= distance_add(len
, tmax
);
2795 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
2800 tmax
= tree_max_len(NODE_CAR(node
), env
);
2801 if (len
< tmax
) len
= tmax
;
2802 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
2807 StrNode
* sn
= STR_(node
);
2808 len
= (OnigLen
)(sn
->end
- sn
->s
);
2814 len
= ONIGENC_MBC_MAXLEN_DIST(env
->enc
);
2818 if (! NODE_IS_CHECKER(node
)) {
2821 MemEnv
* mem_env
= SCANENV_MEMENV(env
);
2822 BackRefNode
* br
= BACKREF_(node
);
2823 if (NODE_IS_RECURSION(node
)) {
2827 backs
= BACKREFS_P(br
);
2828 for (i
= 0; i
< br
->back_num
; i
++) {
2829 tmax
= tree_max_len(mem_env
[backs
[i
]].node
, env
);
2830 if (len
< tmax
) len
= tmax
;
2837 if (! NODE_IS_RECURSION(node
))
2838 len
= tree_max_len(NODE_BODY(node
), env
);
2846 QuantNode
* qn
= QUANT_(node
);
2848 if (qn
->upper
!= 0) {
2849 len
= tree_max_len(NODE_BODY(node
), env
);
2851 if (! IS_REPEAT_INFINITE(qn
->upper
))
2852 len
= distance_multiply(len
, qn
->upper
);
2860 case NODE_ENCLOSURE
:
2862 EnclosureNode
* en
= ENCLOSURE_(node
);
2864 case ENCLOSURE_MEMORY
:
2865 if (NODE_IS_MAX_FIXED(node
))
2868 if (NODE_IS_MARK1(node
))
2871 NODE_STATUS_ADD(node
, MARK1
);
2872 len
= tree_max_len(NODE_BODY(node
), env
);
2873 NODE_STATUS_REMOVE(node
, MARK1
);
2876 NODE_STATUS_ADD(node
, MAX_FIXED
);
2881 case ENCLOSURE_OPTION
:
2882 case ENCLOSURE_STOP_BACKTRACK
:
2883 len
= tree_max_len(NODE_BODY(node
), env
);
2885 case ENCLOSURE_IF_ELSE
:
2889 len
= tree_max_len(NODE_BODY(node
), env
);
2890 if (IS_NOT_NULL(en
->te
.Then
)) {
2891 tlen
= tree_max_len(en
->te
.Then
, env
);
2892 len
= distance_add(len
, tlen
);
2894 if (IS_NOT_NULL(en
->te
.Else
))
2895 elen
= tree_max_len(en
->te
.Else
, env
);
2898 if (elen
> len
) len
= elen
;
2915 check_backrefs(Node
* node
, ScanEnv
* env
)
2919 switch (NODE_TYPE(node
)) {
2923 r
= check_backrefs(NODE_CAR(node
), env
);
2924 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
2928 if (! ANCHOR_HAS_BODY(ANCHOR_(node
))) {
2934 r
= check_backrefs(NODE_BODY(node
), env
);
2937 case NODE_ENCLOSURE
:
2938 r
= check_backrefs(NODE_BODY(node
), env
);
2940 EnclosureNode
* en
= ENCLOSURE_(node
);
2942 if (en
->type
== ENCLOSURE_IF_ELSE
) {
2943 if (r
!= 0) return r
;
2944 if (IS_NOT_NULL(en
->te
.Then
)) {
2945 r
= check_backrefs(en
->te
.Then
, env
);
2946 if (r
!= 0) return r
;
2948 if (IS_NOT_NULL(en
->te
.Else
)) {
2949 r
= check_backrefs(en
->te
.Else
, env
);
2958 BackRefNode
* br
= BACKREF_(node
);
2959 int* backs
= BACKREFS_P(br
);
2960 MemEnv
* mem_env
= SCANENV_MEMENV(env
);
2962 for (i
= 0; i
< br
->back_num
; i
++) {
2963 if (backs
[i
] > env
->num_mem
)
2964 return ONIGERR_INVALID_BACKREF
;
2966 NODE_STATUS_ADD(mem_env
[backs
[i
]].node
, BACKREF
);
2983 #define RECURSION_EXIST (1<<0)
2984 #define RECURSION_MUST (1<<1)
2985 #define RECURSION_INFINITE (1<<2)
2988 infinite_recursive_call_check(Node
* node
, ScanEnv
* env
, int head
)
2993 switch (NODE_TYPE(node
)) {
3001 ret
= infinite_recursive_call_check(NODE_CAR(x
), env
, head
);
3002 if (ret
< 0 || (ret
& RECURSION_INFINITE
) != 0) return ret
;
3005 min
= tree_min_len(NODE_CAR(x
), env
);
3006 if (min
!= 0) head
= 0;
3008 } while (IS_NOT_NULL(x
= NODE_CDR(x
)));
3016 must
= RECURSION_MUST
;
3018 ret
= infinite_recursive_call_check(NODE_CAR(node
), env
, head
);
3019 if (ret
< 0 || (ret
& RECURSION_INFINITE
) != 0) return ret
;
3021 r
|= (ret
& RECURSION_EXIST
);
3023 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
3029 r
= infinite_recursive_call_check(NODE_BODY(node
), env
, head
);
3030 if (r
< 0) return r
;
3031 if ((r
& RECURSION_MUST
) != 0) {
3032 if (QUANT_(node
)->lower
== 0)
3033 r
&= ~RECURSION_MUST
;
3038 if (! ANCHOR_HAS_BODY(ANCHOR_(node
)))
3042 r
= infinite_recursive_call_check(NODE_BODY(node
), env
, head
);
3045 case NODE_ENCLOSURE
:
3047 EnclosureNode
* en
= ENCLOSURE_(node
);
3049 if (en
->type
== ENCLOSURE_MEMORY
) {
3050 if (NODE_IS_MARK2(node
))
3052 else if (NODE_IS_MARK1(node
))
3053 return (head
== 0 ? RECURSION_EXIST
| RECURSION_MUST
3054 : RECURSION_EXIST
| RECURSION_MUST
| RECURSION_INFINITE
);
3056 NODE_STATUS_ADD(node
, MARK2
);
3057 r
= infinite_recursive_call_check(NODE_BODY(node
), env
, head
);
3058 NODE_STATUS_REMOVE(node
, MARK2
);
3061 else if (en
->type
== ENCLOSURE_IF_ELSE
) {
3064 ret
= infinite_recursive_call_check(NODE_BODY(node
), env
, head
);
3065 if (ret
< 0 || (ret
& RECURSION_INFINITE
) != 0) return ret
;
3067 if (IS_NOT_NULL(en
->te
.Then
)) {
3070 min
= tree_min_len(NODE_BODY(node
), env
);
3074 ret
= infinite_recursive_call_check(en
->te
.Then
, env
, min
!= 0 ? 0:head
);
3075 if (ret
< 0 || (ret
& RECURSION_INFINITE
) != 0) return ret
;
3078 if (IS_NOT_NULL(en
->te
.Else
)) {
3079 eret
= infinite_recursive_call_check(en
->te
.Else
, env
, head
);
3080 if (eret
< 0 || (eret
& RECURSION_INFINITE
) != 0) return eret
;
3081 r
|= (eret
& RECURSION_EXIST
);
3082 if ((eret
& RECURSION_MUST
) == 0)
3083 r
&= ~RECURSION_MUST
;
3087 r
= infinite_recursive_call_check(NODE_BODY(node
), env
, head
);
3100 infinite_recursive_call_check_trav(Node
* node
, ScanEnv
* env
)
3104 switch (NODE_TYPE(node
)) {
3108 r
= infinite_recursive_call_check_trav(NODE_CAR(node
), env
);
3109 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
3113 if (! ANCHOR_HAS_BODY(ANCHOR_(node
))) {
3119 r
= infinite_recursive_call_check_trav(NODE_BODY(node
), env
);
3122 case NODE_ENCLOSURE
:
3124 EnclosureNode
* en
= ENCLOSURE_(node
);
3126 if (en
->type
== ENCLOSURE_MEMORY
) {
3127 if (NODE_IS_RECURSION(node
) && NODE_IS_CALLED(node
)) {
3130 NODE_STATUS_ADD(node
, MARK1
);
3132 ret
= infinite_recursive_call_check(NODE_BODY(node
), env
, 1);
3133 if (ret
< 0) return ret
;
3134 else if ((ret
& (RECURSION_MUST
| RECURSION_INFINITE
)) != 0)
3135 return ONIGERR_NEVER_ENDING_RECURSION
;
3137 NODE_STATUS_REMOVE(node
, MARK1
);
3140 else if (en
->type
== ENCLOSURE_IF_ELSE
) {
3141 if (IS_NOT_NULL(en
->te
.Then
)) {
3142 r
= infinite_recursive_call_check_trav(en
->te
.Then
, env
);
3143 if (r
!= 0) return r
;
3145 if (IS_NOT_NULL(en
->te
.Else
)) {
3146 r
= infinite_recursive_call_check_trav(en
->te
.Else
, env
);
3147 if (r
!= 0) return r
;
3152 r
= infinite_recursive_call_check_trav(NODE_BODY(node
), env
);
3164 recursive_call_check(Node
* node
)
3168 switch (NODE_TYPE(node
)) {
3173 r
|= recursive_call_check(NODE_CAR(node
));
3174 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
3178 if (! ANCHOR_HAS_BODY(ANCHOR_(node
))) {
3184 r
= recursive_call_check(NODE_BODY(node
));
3188 r
= recursive_call_check(NODE_BODY(node
));
3190 if (NODE_IS_MARK1(NODE_BODY(node
)))
3191 NODE_STATUS_ADD(node
, RECURSION
);
3195 case NODE_ENCLOSURE
:
3197 EnclosureNode
* en
= ENCLOSURE_(node
);
3199 if (en
->type
== ENCLOSURE_MEMORY
) {
3200 if (NODE_IS_MARK2(node
))
3202 else if (NODE_IS_MARK1(node
))
3203 return 1; /* recursion */
3205 NODE_STATUS_ADD(node
, MARK2
);
3206 r
= recursive_call_check(NODE_BODY(node
));
3207 NODE_STATUS_REMOVE(node
, MARK2
);
3210 else if (en
->type
== ENCLOSURE_IF_ELSE
) {
3212 if (IS_NOT_NULL(en
->te
.Then
)) {
3213 r
|= recursive_call_check(en
->te
.Then
);
3215 if (IS_NOT_NULL(en
->te
.Else
)) {
3216 r
|= recursive_call_check(en
->te
.Else
);
3218 r
|= recursive_call_check(NODE_BODY(node
));
3221 r
= recursive_call_check(NODE_BODY(node
));
3234 #define IN_RECURSION (1<<0)
3235 #define FOUND_CALLED_NODE 1
3238 recursive_call_check_trav(Node
* node
, ScanEnv
* env
, int state
)
3242 switch (NODE_TYPE(node
)) {
3248 ret
= recursive_call_check_trav(NODE_CAR(node
), env
, state
);
3249 if (ret
== FOUND_CALLED_NODE
) r
= FOUND_CALLED_NODE
;
3250 else if (ret
< 0) return ret
;
3251 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
3256 r
= recursive_call_check_trav(NODE_BODY(node
), env
, state
);
3257 if (QUANT_(node
)->upper
== 0) {
3258 if (r
== FOUND_CALLED_NODE
)
3259 QUANT_(node
)->is_refered
= 1;
3265 AnchorNode
* an
= ANCHOR_(node
);
3266 if (ANCHOR_HAS_BODY(an
))
3267 r
= recursive_call_check_trav(NODE_ANCHOR_BODY(an
), env
, state
);
3271 case NODE_ENCLOSURE
:
3275 EnclosureNode
* en
= ENCLOSURE_(node
);
3277 if (en
->type
== ENCLOSURE_MEMORY
) {
3278 if (NODE_IS_CALLED(node
) || (state
& IN_RECURSION
) != 0) {
3279 if (! NODE_IS_RECURSION(node
)) {
3280 NODE_STATUS_ADD(node
, MARK1
);
3281 r
= recursive_call_check(NODE_BODY(node
));
3283 NODE_STATUS_ADD(node
, RECURSION
);
3284 NODE_STATUS_REMOVE(node
, MARK1
);
3287 if (NODE_IS_CALLED(node
))
3288 r
= FOUND_CALLED_NODE
;
3293 if (NODE_IS_RECURSION(node
))
3294 state1
|= IN_RECURSION
;
3296 ret
= recursive_call_check_trav(NODE_BODY(node
), env
, state1
);
3297 if (ret
== FOUND_CALLED_NODE
)
3298 r
= FOUND_CALLED_NODE
;
3300 if (en
->type
== ENCLOSURE_IF_ELSE
) {
3301 if (IS_NOT_NULL(en
->te
.Then
)) {
3302 ret
= recursive_call_check_trav(en
->te
.Then
, env
, state1
);
3303 if (ret
== FOUND_CALLED_NODE
)
3304 r
= FOUND_CALLED_NODE
;
3306 if (IS_NOT_NULL(en
->te
.Else
)) {
3307 ret
= recursive_call_check_trav(en
->te
.Else
, env
, state1
);
3308 if (ret
== FOUND_CALLED_NODE
)
3309 r
= FOUND_CALLED_NODE
;
3324 /* divide different length alternatives in look-behind.
3325 (?<=A|B) ==> (?<=A)|(?<=B)
3326 (?<!A|B) ==> (?<!A)(?<!B)
3329 divide_look_behind_alternatives(Node
* node
)
3331 Node
*head
, *np
, *insert_node
;
3332 AnchorNode
* an
= ANCHOR_(node
);
3333 int anc_type
= an
->type
;
3335 head
= NODE_ANCHOR_BODY(an
);
3336 np
= NODE_CAR(head
);
3337 swap_node(node
, head
);
3338 NODE_CAR(node
) = head
;
3339 NODE_BODY(head
) = np
;
3342 while (IS_NOT_NULL(np
= NODE_CDR(np
))) {
3343 insert_node
= onig_node_new_anchor(anc_type
, an
->ascii_mode
);
3344 CHECK_NULL_RETURN_MEMERR(insert_node
);
3345 NODE_BODY(insert_node
) = NODE_CAR(np
);
3346 NODE_CAR(np
) = insert_node
;
3349 if (anc_type
== ANCHOR_LOOK_BEHIND_NOT
) {
3352 NODE_SET_TYPE(np
, NODE_LIST
); /* alt -> list */
3353 } while (IS_NOT_NULL(np
= NODE_CDR(np
)));
3359 setup_look_behind(Node
* node
, regex_t
* reg
, ScanEnv
* env
)
3362 AnchorNode
* an
= ANCHOR_(node
);
3364 r
= get_char_length_tree(NODE_ANCHOR_BODY(an
), reg
, &len
);
3367 else if (r
== GET_CHAR_LEN_VARLEN
)
3368 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3369 else if (r
== GET_CHAR_LEN_TOP_ALT_VARLEN
) {
3370 if (IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
))
3371 r
= divide_look_behind_alternatives(node
);
3373 r
= ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
3380 next_setup(Node
* node
, Node
* next_node
, regex_t
* reg
)
3385 type
= NODE_TYPE(node
);
3386 if (type
== NODE_QUANT
) {
3387 QuantNode
* qn
= QUANT_(node
);
3388 if (qn
->greedy
&& IS_REPEAT_INFINITE(qn
->upper
)) {
3389 #ifdef USE_QUANT_PEEK_NEXT
3390 Node
* n
= get_head_value_node(next_node
, 1, reg
);
3391 /* '\0': for UTF-16BE etc... */
3392 if (IS_NOT_NULL(n
) && STR_(n
)->s
[0] != '\0') {
3393 qn
->next_head_exact
= n
;
3396 /* automatic posseivation a*b ==> (?>a*)b */
3397 if (qn
->lower
<= 1) {
3398 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node
))) {
3400 x
= get_head_value_node(NODE_BODY(node
), 0, reg
);
3401 if (IS_NOT_NULL(x
)) {
3402 y
= get_head_value_node(next_node
, 0, reg
);
3403 if (IS_NOT_NULL(y
) && is_exclusive(x
, y
, reg
)) {
3404 Node
* en
= onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK
);
3405 CHECK_NULL_RETURN_MEMERR(en
);
3406 NODE_STATUS_ADD(en
, STOP_BT_SIMPLE_REPEAT
);
3407 swap_node(node
, en
);
3408 NODE_BODY(node
) = en
;
3415 else if (type
== NODE_ENCLOSURE
) {
3416 EnclosureNode
* en
= ENCLOSURE_(node
);
3417 if (en
->type
== ENCLOSURE_MEMORY
) {
3418 node
= NODE_BODY(node
);
3427 update_string_node_case_fold(regex_t
* reg
, Node
*node
)
3429 UChar
*p
, *end
, buf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
3430 UChar
*sbuf
, *ebuf
, *sp
;
3431 int r
, i
, len
, sbuf_size
;
3432 StrNode
* sn
= STR_(node
);
3435 sbuf_size
= (int )(end
- sn
->s
) * 2;
3436 sbuf
= (UChar
* )xmalloc(sbuf_size
);
3437 CHECK_NULL_RETURN_MEMERR(sbuf
);
3438 ebuf
= sbuf
+ sbuf_size
;
3443 len
= ONIGENC_MBC_CASE_FOLD(reg
->enc
, reg
->case_fold_flag
, &p
, end
, buf
);
3444 for (i
= 0; i
< len
; i
++) {
3446 sbuf
= (UChar
* )xrealloc(sbuf
, sbuf_size
* 2, sbuf_size
);
3447 CHECK_NULL_RETURN_MEMERR(sbuf
);
3448 sp
= sbuf
+ sbuf_size
;
3450 ebuf
= sbuf
+ sbuf_size
;
3457 r
= onig_node_str_set(node
, sbuf
, sp
);
3468 expand_case_fold_make_rem_string(Node
** rnode
, UChar
*s
, UChar
*end
, regex_t
* reg
)
3473 node
= onig_node_new_str(s
, end
);
3474 if (IS_NULL(node
)) return ONIGERR_MEMORY
;
3476 r
= update_string_node_case_fold(reg
, node
);
3478 onig_node_free(node
);
3482 NODE_STRING_SET_AMBIG(node
);
3483 NODE_STRING_SET_DONT_GET_OPT_INFO(node
);
3489 expand_case_fold_string_alt(int item_num
, OnigCaseFoldCodeItem items
[], UChar
*p
,
3490 int slen
, UChar
*end
, regex_t
* reg
, Node
**rnode
)
3495 Node
*anode
, *var_anode
, *snode
, *xnode
, *an
;
3496 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
3498 *rnode
= var_anode
= NULL_NODE
;
3501 for (i
= 0; i
< item_num
; i
++) {
3502 if (items
[i
].byte_len
!= slen
) {
3509 *rnode
= var_anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3510 if (IS_NULL(var_anode
)) return ONIGERR_MEMORY
;
3512 xnode
= onig_node_new_list(NULL
, NULL
);
3513 if (IS_NULL(xnode
)) goto mem_err
;
3514 NODE_CAR(var_anode
) = xnode
;
3516 anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3517 if (IS_NULL(anode
)) goto mem_err
;
3518 NODE_CAR(xnode
) = anode
;
3521 *rnode
= anode
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3522 if (IS_NULL(anode
)) return ONIGERR_MEMORY
;
3525 snode
= onig_node_new_str(p
, p
+ slen
);
3526 if (IS_NULL(snode
)) goto mem_err
;
3528 NODE_CAR(anode
) = snode
;
3530 for (i
= 0; i
< item_num
; i
++) {
3531 snode
= onig_node_new_str(NULL
, NULL
);
3532 if (IS_NULL(snode
)) goto mem_err
;
3534 for (j
= 0; j
< items
[i
].code_len
; j
++) {
3535 len
= ONIGENC_CODE_TO_MBC(reg
->enc
, items
[i
].code
[j
], buf
);
3541 r
= onig_node_str_cat(snode
, buf
, buf
+ len
);
3542 if (r
!= 0) goto mem_err2
;
3545 an
= onig_node_new_alt(NULL_NODE
, NULL_NODE
);
3550 if (items
[i
].byte_len
!= slen
) {
3552 UChar
*q
= p
+ items
[i
].byte_len
;
3555 r
= expand_case_fold_make_rem_string(&rem
, q
, end
, reg
);
3561 xnode
= onig_node_list_add(NULL_NODE
, snode
);
3562 if (IS_NULL(xnode
)) {
3564 onig_node_free(rem
);
3567 if (IS_NULL(onig_node_list_add(xnode
, rem
))) {
3569 onig_node_free(xnode
);
3570 onig_node_free(rem
);
3574 NODE_CAR(an
) = xnode
;
3577 NODE_CAR(an
) = snode
;
3580 NODE_CDR(var_anode
) = an
;
3584 NODE_CAR(an
) = snode
;
3585 NODE_CDR(anode
) = an
;
3593 onig_node_free(snode
);
3596 onig_node_free(*rnode
);
3598 return ONIGERR_MEMORY
;
3602 expand_case_fold_string(Node
* node
, regex_t
* reg
)
3604 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3606 int r
, n
, len
, alt_num
;
3607 UChar
*start
, *end
, *p
;
3608 Node
*top_root
, *root
, *snode
, *prev_node
;
3609 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
3610 StrNode
* sn
= STR_(node
);
3612 if (NODE_STRING_IS_AMBIG(node
)) return 0;
3616 if (start
>= end
) return 0;
3619 top_root
= root
= prev_node
= snode
= NULL_NODE
;
3623 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg
->enc
, reg
->case_fold_flag
, p
, end
,
3630 len
= enclen(reg
->enc
, p
);
3633 if (IS_NULL(snode
)) {
3634 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3635 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3636 if (IS_NULL(root
)) {
3637 onig_node_free(prev_node
);
3642 prev_node
= snode
= onig_node_new_str(NULL
, NULL
);
3643 if (IS_NULL(snode
)) goto mem_err
;
3644 if (IS_NOT_NULL(root
)) {
3645 if (IS_NULL(onig_node_list_add(root
, snode
))) {
3646 onig_node_free(snode
);
3652 r
= onig_node_str_cat(snode
, p
, p
+ len
);
3653 if (r
!= 0) goto err
;
3657 if (alt_num
> THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
) break;
3659 if (IS_NULL(root
) && IS_NOT_NULL(prev_node
)) {
3660 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3661 if (IS_NULL(root
)) {
3662 onig_node_free(prev_node
);
3667 r
= expand_case_fold_string_alt(n
, items
, p
, len
, end
, reg
, &prev_node
);
3668 if (r
< 0) goto mem_err
;
3670 if (IS_NULL(root
)) {
3671 top_root
= prev_node
;
3674 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3675 onig_node_free(prev_node
);
3680 root
= NODE_CAR(prev_node
);
3683 if (IS_NOT_NULL(root
)) {
3684 if (IS_NULL(onig_node_list_add(root
, prev_node
))) {
3685 onig_node_free(prev_node
);
3700 r
= expand_case_fold_make_rem_string(&srem
, p
, end
, reg
);
3701 if (r
!= 0) goto mem_err
;
3703 if (IS_NOT_NULL(prev_node
) && IS_NULL(root
)) {
3704 top_root
= root
= onig_node_list_add(NULL_NODE
, prev_node
);
3705 if (IS_NULL(root
)) {
3706 onig_node_free(srem
);
3707 onig_node_free(prev_node
);
3712 if (IS_NULL(root
)) {
3716 if (IS_NULL(onig_node_list_add(root
, srem
))) {
3717 onig_node_free(srem
);
3724 top_root
= (IS_NOT_NULL(top_root
) ? top_root
: prev_node
);
3725 swap_node(node
, top_root
);
3726 onig_node_free(top_root
);
3733 onig_node_free(top_root
);
3737 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3738 static enum QuantBodyEmpty
3739 quantifiers_memory_node_info(Node
* node
)
3741 int r
= QUANT_BODY_IS_EMPTY
;
3743 switch (NODE_TYPE(node
)) {
3749 v
= quantifiers_memory_node_info(NODE_CAR(node
));
3751 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
3757 if (NODE_IS_RECURSION(node
)) {
3758 return QUANT_BODY_IS_EMPTY_REC
; /* tiny version */
3761 r
= quantifiers_memory_node_info(NODE_BODY(node
));
3767 QuantNode
* qn
= QUANT_(node
);
3768 if (qn
->upper
!= 0) {
3769 r
= quantifiers_memory_node_info(NODE_BODY(node
));
3774 case NODE_ENCLOSURE
:
3776 EnclosureNode
* en
= ENCLOSURE_(node
);
3778 case ENCLOSURE_MEMORY
:
3779 if (NODE_IS_RECURSION(node
)) {
3780 return QUANT_BODY_IS_EMPTY_REC
;
3782 return QUANT_BODY_IS_EMPTY_MEM
;
3785 case ENCLOSURE_OPTION
:
3786 case ENCLOSURE_STOP_BACKTRACK
:
3787 r
= quantifiers_memory_node_info(NODE_BODY(node
));
3789 case ENCLOSURE_IF_ELSE
:
3792 r
= quantifiers_memory_node_info(NODE_BODY(node
));
3793 if (IS_NOT_NULL(en
->te
.Then
)) {
3794 v
= quantifiers_memory_node_info(en
->te
.Then
);
3797 if (IS_NOT_NULL(en
->te
.Else
)) {
3798 v
= quantifiers_memory_node_info(en
->te
.Else
);
3821 #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
3824 #define IN_ALT (1<<0)
3825 #define IN_NOT (1<<1)
3826 #define IN_REAL_REPEAT (1<<2)
3827 #define IN_VAR_REPEAT (1<<3)
3828 #define IN_ZERO_REPEAT (1<<4)
3829 #define IN_MULTI_ENTRY (1<<5)
3837 setup_call_node_call(CallNode
* cn
, ScanEnv
* env
, int state
)
3839 MemEnv
* mem_env
= SCANENV_MEMENV(env
);
3841 if (cn
->by_number
!= 0) {
3842 int gnum
= cn
->group_num
;
3844 if (env
->num_named
> 0 &&
3845 IS_SYNTAX_BV(env
->syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
3846 ! ONIG_IS_OPTION_ON(env
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
3847 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
;
3850 if (gnum
> env
->num_mem
) {
3851 onig_scan_env_set_error_string(env
, ONIGERR_UNDEFINED_GROUP_REFERENCE
,
3852 cn
->name
, cn
->name_end
);
3853 return ONIGERR_UNDEFINED_GROUP_REFERENCE
;
3857 NODE_CALL_BODY(cn
) = mem_env
[cn
->group_num
].node
;
3858 if (IS_NULL(NODE_CALL_BODY(cn
))) {
3859 onig_scan_env_set_error_string(env
, ONIGERR_UNDEFINED_NAME_REFERENCE
,
3860 cn
->name
, cn
->name_end
);
3861 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3867 int n
= onig_name_to_group_numbers(env
->reg
, cn
->name
, cn
->name_end
, &refs
);
3869 onig_scan_env_set_error_string(env
, ONIGERR_UNDEFINED_NAME_REFERENCE
,
3870 cn
->name
, cn
->name_end
);
3871 return ONIGERR_UNDEFINED_NAME_REFERENCE
;
3874 onig_scan_env_set_error_string(env
, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
,
3875 cn
->name
, cn
->name_end
);
3876 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
;
3879 cn
->group_num
= refs
[0];
3888 setup_call2_call(Node
* node
)
3890 switch (NODE_TYPE(node
)) {
3894 setup_call2_call(NODE_CAR(node
));
3895 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
3899 setup_call2_call(NODE_BODY(node
));
3903 if (ANCHOR_HAS_BODY(ANCHOR_(node
)))
3904 setup_call2_call(NODE_BODY(node
));
3907 case NODE_ENCLOSURE
:
3909 EnclosureNode
* en
= ENCLOSURE_(node
);
3911 if (en
->type
== ENCLOSURE_MEMORY
) {
3912 if (! NODE_IS_MARK1(node
)) {
3913 NODE_STATUS_ADD(node
, MARK1
);
3914 setup_call2_call(NODE_BODY(node
));
3915 NODE_STATUS_REMOVE(node
, MARK1
);
3918 else if (en
->type
== ENCLOSURE_IF_ELSE
) {
3919 setup_call2_call(NODE_BODY(node
));
3920 if (IS_NOT_NULL(en
->te
.Then
))
3921 setup_call2_call(en
->te
.Then
);
3922 if (IS_NOT_NULL(en
->te
.Else
))
3923 setup_call2_call(en
->te
.Else
);
3926 setup_call2_call(NODE_BODY(node
));
3932 if (! NODE_IS_MARK1(node
)) {
3933 NODE_STATUS_ADD(node
, MARK1
);
3935 CallNode
* cn
= CALL_(node
);
3936 Node
* called
= NODE_CALL_BODY(cn
);
3940 NODE_STATUS_ADD(called
, CALLED
);
3941 ENCLOSURE_(called
)->m
.entry_count
++;
3942 setup_call2_call(called
);
3944 NODE_STATUS_REMOVE(node
, MARK1
);
3954 setup_call(Node
* node
, ScanEnv
* env
, int state
)
3958 switch (NODE_TYPE(node
)) {
3962 r
= setup_call(NODE_CAR(node
), env
, state
);
3963 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
3967 if (QUANT_(node
)->upper
== 0)
3968 state
|= IN_ZERO_REPEAT
;
3970 r
= setup_call(NODE_BODY(node
), env
, state
);
3974 if (ANCHOR_HAS_BODY(ANCHOR_(node
)))
3975 r
= setup_call(NODE_BODY(node
), env
, state
);
3980 case NODE_ENCLOSURE
:
3982 EnclosureNode
* en
= ENCLOSURE_(node
);
3984 if (en
->type
== ENCLOSURE_MEMORY
) {
3985 if ((state
& IN_ZERO_REPEAT
) != 0) {
3986 NODE_STATUS_ADD(node
, IN_ZERO_REPEAT
);
3987 ENCLOSURE_(node
)->m
.entry_count
--;
3989 r
= setup_call(NODE_BODY(node
), env
, state
);
3991 else if (en
->type
== ENCLOSURE_IF_ELSE
) {
3992 r
= setup_call(NODE_BODY(node
), env
, state
);
3993 if (r
!= 0) return r
;
3994 if (IS_NOT_NULL(en
->te
.Then
)) {
3995 r
= setup_call(en
->te
.Then
, env
, state
);
3996 if (r
!= 0) return r
;
3998 if (IS_NOT_NULL(en
->te
.Else
))
3999 r
= setup_call(en
->te
.Else
, env
, state
);
4002 r
= setup_call(NODE_BODY(node
), env
, state
);
4007 if ((state
& IN_ZERO_REPEAT
) != 0) {
4008 NODE_STATUS_ADD(node
, IN_ZERO_REPEAT
);
4009 CALL_(node
)->entry_count
--;
4012 r
= setup_call_node_call(CALL_(node
), env
, state
);
4024 setup_call2(Node
* node
)
4028 switch (NODE_TYPE(node
)) {
4032 r
= setup_call2(NODE_CAR(node
));
4033 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
4037 if (QUANT_(node
)->upper
!= 0)
4038 r
= setup_call2(NODE_BODY(node
));
4042 if (ANCHOR_HAS_BODY(ANCHOR_(node
)))
4043 r
= setup_call2(NODE_BODY(node
));
4046 case NODE_ENCLOSURE
:
4047 if (! NODE_IS_IN_ZERO_REPEAT(node
))
4048 r
= setup_call2(NODE_BODY(node
));
4051 EnclosureNode
* en
= ENCLOSURE_(node
);
4053 if (r
!= 0) return r
;
4054 if (en
->type
== ENCLOSURE_IF_ELSE
) {
4055 if (IS_NOT_NULL(en
->te
.Then
)) {
4056 r
= setup_call2(en
->te
.Then
);
4057 if (r
!= 0) return r
;
4059 if (IS_NOT_NULL(en
->te
.Else
))
4060 r
= setup_call2(en
->te
.Else
);
4066 if (! NODE_IS_IN_ZERO_REPEAT(node
)) {
4067 setup_call2_call(node
);
4080 setup_called_state_call(Node
* node
, int state
)
4082 switch (NODE_TYPE(node
)) {
4088 setup_called_state_call(NODE_CAR(node
), state
);
4089 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
4094 QuantNode
* qn
= QUANT_(node
);
4096 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 2)
4097 state
|= IN_REAL_REPEAT
;
4098 if (qn
->lower
!= qn
->upper
)
4099 state
|= IN_VAR_REPEAT
;
4101 setup_called_state_call(NODE_QUANT_BODY(qn
), state
);
4107 AnchorNode
* an
= ANCHOR_(node
);
4110 case ANCHOR_PREC_READ_NOT
:
4111 case ANCHOR_LOOK_BEHIND_NOT
:
4114 case ANCHOR_PREC_READ
:
4115 case ANCHOR_LOOK_BEHIND
:
4116 setup_called_state_call(NODE_ANCHOR_BODY(an
), state
);
4124 case NODE_ENCLOSURE
:
4126 EnclosureNode
* en
= ENCLOSURE_(node
);
4128 if (en
->type
== ENCLOSURE_MEMORY
) {
4129 if (NODE_IS_MARK1(node
)) {
4130 if ((~en
->m
.called_state
& state
) != 0) {
4131 en
->m
.called_state
|= state
;
4132 setup_called_state_call(NODE_BODY(node
), state
);
4136 NODE_STATUS_ADD(node
, MARK1
);
4137 en
->m
.called_state
|= state
;
4138 setup_called_state_call(NODE_BODY(node
), state
);
4139 NODE_STATUS_REMOVE(node
, MARK1
);
4142 else if (en
->type
== ENCLOSURE_IF_ELSE
) {
4143 if (IS_NOT_NULL(en
->te
.Then
)) {
4144 setup_called_state_call(en
->te
.Then
, state
);
4146 if (IS_NOT_NULL(en
->te
.Else
))
4147 setup_called_state_call(en
->te
.Else
, state
);
4150 setup_called_state_call(NODE_BODY(node
), state
);
4156 setup_called_state_call(NODE_BODY(node
), state
);
4165 setup_called_state(Node
* node
, int state
)
4167 switch (NODE_TYPE(node
)) {
4173 setup_called_state(NODE_CAR(node
), state
);
4174 } while (IS_NOT_NULL(node
= NODE_CDR(node
)));
4179 setup_called_state_call(node
, state
);
4183 case NODE_ENCLOSURE
:
4185 EnclosureNode
* en
= ENCLOSURE_(node
);
4188 case ENCLOSURE_MEMORY
:
4189 if (en
->m
.entry_count
> 1)
4190 state
|= IN_MULTI_ENTRY
;
4192 en
->m
.called_state
|= state
;
4194 case ENCLOSURE_OPTION
:
4195 case ENCLOSURE_STOP_BACKTRACK
:
4196 setup_called_state(NODE_BODY(node
), state
);
4198 case ENCLOSURE_IF_ELSE
:
4199 setup_called_state(NODE_BODY(node
), state
);
4200 if (IS_NOT_NULL(en
->te
.Then
))
4201 setup_called_state(en
->te
.Then
, state
);
4202 if (IS_NOT_NULL(en
->te
.Else
))
4203 setup_called_state(en
->te
.Else
, state
);
4211 QuantNode
* qn
= QUANT_(node
);
4213 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 2)
4214 state
|= IN_REAL_REPEAT
;
4215 if (qn
->lower
!= qn
->upper
)
4216 state
|= IN_VAR_REPEAT
;
4218 setup_called_state(NODE_QUANT_BODY(qn
), state
);
4224 AnchorNode
* an
= ANCHOR_(node
);
4227 case ANCHOR_PREC_READ_NOT
:
4228 case ANCHOR_LOOK_BEHIND_NOT
:
4231 case ANCHOR_PREC_READ
:
4232 case ANCHOR_LOOK_BEHIND
:
4233 setup_called_state(NODE_ANCHOR_BODY(an
), state
);
4251 #endif /* USE_CALL */
4254 static int setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
);
4260 setup_anchor(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
4262 /* allowed node types in look-behind */
4263 #define ALLOWED_TYPE_IN_LB \
4264 ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
4265 | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \
4266 | NODE_BIT_CALL | NODE_BIT_GIMMICK)
4268 #define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
4269 #define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)
4271 #define ALLOWED_ANCHOR_IN_LB \
4272 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
4273 | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \
4274 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4275 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4276 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4278 #define ALLOWED_ANCHOR_IN_LB_NOT \
4279 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
4280 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \
4281 | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4282 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4283 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4286 AnchorNode
* an
= ANCHOR_(node
);
4289 case ANCHOR_PREC_READ
:
4290 r
= setup_tree(NODE_ANCHOR_BODY(an
), reg
, state
, env
);
4292 case ANCHOR_PREC_READ_NOT
:
4293 r
= setup_tree(NODE_ANCHOR_BODY(an
), reg
, (state
| IN_NOT
), env
);
4296 case ANCHOR_LOOK_BEHIND
:
4298 r
= check_type_tree(NODE_ANCHOR_BODY(an
), ALLOWED_TYPE_IN_LB
,
4299 ALLOWED_ENCLOSURE_IN_LB
, ALLOWED_ANCHOR_IN_LB
);
4300 if (r
< 0) return r
;
4301 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
4302 r
= setup_tree(NODE_ANCHOR_BODY(an
), reg
, state
, env
);
4303 if (r
!= 0) return r
;
4304 r
= setup_look_behind(node
, reg
, env
);
4308 case ANCHOR_LOOK_BEHIND_NOT
:
4310 r
= check_type_tree(NODE_ANCHOR_BODY(an
), ALLOWED_TYPE_IN_LB
,
4311 ALLOWED_ENCLOSURE_IN_LB_NOT
, ALLOWED_ANCHOR_IN_LB_NOT
);
4312 if (r
< 0) return r
;
4313 if (r
> 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN
;
4314 r
= setup_tree(NODE_ANCHOR_BODY(an
), reg
, (state
| IN_NOT
), env
);
4315 if (r
!= 0) return r
;
4316 r
= setup_look_behind(node
, reg
, env
);
4332 setup_quant(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
4336 QuantNode
* qn
= QUANT_(node
);
4337 Node
* body
= NODE_BODY(node
);
4339 if ((state
& IN_REAL_REPEAT
) != 0) {
4340 NODE_STATUS_ADD(node
, IN_REAL_REPEAT
);
4342 if ((state
& IN_MULTI_ENTRY
) != 0) {
4343 NODE_STATUS_ADD(node
, IN_MULTI_ENTRY
);
4346 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 1) {
4347 d
= tree_min_len(body
, env
);
4349 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
4350 qn
->body_empty_info
= quantifiers_memory_node_info(body
);
4351 if (qn
->body_empty_info
== QUANT_BODY_IS_EMPTY_REC
) {
4352 if (NODE_TYPE(body
) == NODE_ENCLOSURE
&&
4353 ENCLOSURE_(body
)->type
== ENCLOSURE_MEMORY
) {
4354 MEM_STATUS_ON(env
->bt_mem_end
, ENCLOSURE_(body
)->m
.regnum
);
4358 qn
->body_empty_info
= QUANT_BODY_IS_EMPTY
;
4363 if (IS_REPEAT_INFINITE(qn
->upper
) || qn
->upper
>= 2)
4364 state
|= IN_REAL_REPEAT
;
4365 if (qn
->lower
!= qn
->upper
)
4366 state
|= IN_VAR_REPEAT
;
4368 r
= setup_tree(body
, reg
, state
, env
);
4369 if (r
!= 0) return r
;
4372 #define EXPAND_STRING_MAX_LENGTH 100
4373 if (NODE_TYPE(body
) == NODE_STRING
) {
4374 if (!IS_REPEAT_INFINITE(qn
->lower
) && qn
->lower
== qn
->upper
&&
4375 qn
->lower
> 1 && qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
4376 int len
= NODE_STRING_LEN(body
);
4377 StrNode
* sn
= STR_(body
);
4379 if (len
* qn
->lower
<= EXPAND_STRING_MAX_LENGTH
) {
4380 int i
, n
= qn
->lower
;
4381 onig_node_conv_to_str_node(node
, STR_(body
)->flag
);
4382 for (i
= 0; i
< n
; i
++) {
4383 r
= onig_node_str_cat(node
, sn
->s
, sn
->end
);
4384 if (r
!= 0) return r
;
4386 onig_node_free(body
);
4392 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4393 if (qn
->greedy
&& (qn
->body_empty_info
!= QUANT_BODY_IS_NOT_EMPTY
)) {
4394 if (NODE_TYPE(body
) == NODE_QUANT
) {
4395 QuantNode
* tqn
= QUANT_(body
);
4396 if (IS_NOT_NULL(tqn
->head_exact
)) {
4397 qn
->head_exact
= tqn
->head_exact
;
4398 tqn
->head_exact
= NULL
;
4402 qn
->head_exact
= get_head_value_node(NODE_BODY(node
), 1, reg
);
4410 /* setup_tree does the following work.
4411 1. check empty loop. (set qn->body_empty_info)
4412 2. expand ignore-case in char class.
4413 3. set memory status bit flags. (reg->mem_stats)
4414 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
4415 5. find invalid patterns in look-behind.
4416 6. expand repeated string.
4419 setup_tree(Node
* node
, regex_t
* reg
, int state
, ScanEnv
* env
)
4423 switch (NODE_TYPE(node
)) {
4426 Node
* prev
= NULL_NODE
;
4428 r
= setup_tree(NODE_CAR(node
), reg
, state
, env
);
4429 if (IS_NOT_NULL(prev
) && r
== 0) {
4430 r
= next_setup(prev
, NODE_CAR(node
), reg
);
4432 prev
= NODE_CAR(node
);
4433 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
4439 r
= setup_tree(NODE_CAR(node
), reg
, (state
| IN_ALT
), env
);
4440 } while (r
== 0 && IS_NOT_NULL(node
= NODE_CDR(node
)));
4444 if (IS_IGNORECASE(reg
->options
) && !NODE_STRING_IS_RAW(node
)) {
4445 r
= expand_case_fold_string(node
, reg
);
4453 BackRefNode
* br
= BACKREF_(node
);
4455 for (i
= 0; i
< br
->back_num
; i
++) {
4456 if (p
[i
] > env
->num_mem
) return ONIGERR_INVALID_BACKREF
;
4457 MEM_STATUS_ON(env
->backrefed_mem
, p
[i
]);
4458 MEM_STATUS_ON(env
->bt_mem_start
, p
[i
]);
4459 #ifdef USE_BACKREF_WITH_LEVEL
4460 if (NODE_IS_NEST_LEVEL(node
)) {
4461 MEM_STATUS_ON(env
->bt_mem_end
, p
[i
]);
4468 case NODE_ENCLOSURE
:
4470 EnclosureNode
* en
= ENCLOSURE_(node
);
4473 case ENCLOSURE_OPTION
:
4475 OnigOptionType options
= reg
->options
;
4476 reg
->options
= ENCLOSURE_(node
)->o
.options
;
4477 r
= setup_tree(NODE_BODY(node
), reg
, state
, env
);
4478 reg
->options
= options
;
4482 case ENCLOSURE_MEMORY
:
4484 state
|= en
->m
.called_state
;
4487 if ((state
& (IN_ALT
| IN_NOT
| IN_VAR_REPEAT
| IN_MULTI_ENTRY
)) != 0
4488 || NODE_IS_RECURSION(node
)) {
4489 MEM_STATUS_ON(env
->bt_mem_start
, en
->m
.regnum
);
4491 r
= setup_tree(NODE_BODY(node
), reg
, state
, env
);
4494 case ENCLOSURE_STOP_BACKTRACK
:
4496 Node
* target
= NODE_BODY(node
);
4497 r
= setup_tree(target
, reg
, state
, env
);
4498 if (NODE_TYPE(target
) == NODE_QUANT
) {
4499 QuantNode
* tqn
= QUANT_(target
);
4500 if (IS_REPEAT_INFINITE(tqn
->upper
) && tqn
->lower
<= 1 &&
4501 tqn
->greedy
!= 0) { /* (?>a*), a*+ etc... */
4502 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target
)))
4503 NODE_STATUS_ADD(node
, STOP_BT_SIMPLE_REPEAT
);
4509 case ENCLOSURE_IF_ELSE
:
4510 r
= setup_tree(NODE_BODY(node
), reg
, (state
| IN_ALT
), env
);
4511 if (r
!= 0) return r
;
4512 if (IS_NOT_NULL(en
->te
.Then
)) {
4513 r
= setup_tree(en
->te
.Then
, reg
, (state
| IN_ALT
), env
);
4514 if (r
!= 0) return r
;
4516 if (IS_NOT_NULL(en
->te
.Else
))
4517 r
= setup_tree(en
->te
.Else
, reg
, (state
| IN_ALT
), env
);
4524 r
= setup_quant(node
, reg
, state
, env
);
4528 r
= setup_anchor(node
, reg
, state
, env
);
4544 /* set skip map for Boyer-Moore search */
4546 set_bm_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
4547 UChar skip
[], int** int_skip
)
4551 len
= (int )(end
- s
);
4552 if (len
< ONIG_CHAR_TABLE_SIZE
) {
4553 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) skip
[i
] = len
;
4555 for (i
= 0; i
< len
- 1; i
++)
4556 skip
[s
[i
]] = len
- 1 - i
;
4559 if (IS_NULL(*int_skip
)) {
4560 *int_skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
4561 if (IS_NULL(*int_skip
)) return ONIGERR_MEMORY
;
4563 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) (*int_skip
)[i
] = len
;
4565 for (i
= 0; i
< len
- 1; i
++)
4566 (*int_skip
)[s
[i
]] = len
- 1 - i
;
4571 #define OPT_EXACT_MAXLEN 24
4574 OnigLen min
; /* min byte length */
4575 OnigLen max
; /* max byte length */
4581 OnigOptionType options
;
4582 OnigCaseFoldType case_fold_flag
;
4592 MinMax mmd
; /* position */
4597 UChar s
[OPT_EXACT_MAXLEN
];
4601 MinMax mmd
; /* position */
4603 int value
; /* weighted value */
4604 UChar map
[ONIG_CHAR_TABLE_SIZE
];
4610 OptExact exb
; /* boundary */
4611 OptExact exm
; /* middle */
4612 OptExact expr
; /* prec read (?=...) */
4613 OptMap map
; /* boundary */
4618 map_position_value(OnigEncoding enc
, int i
)
4620 static const short int Vals
[] = {
4621 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4622 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4623 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4624 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4625 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4626 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4627 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4628 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4631 if (i
< (int )(sizeof(Vals
)/sizeof(Vals
[0]))) {
4632 if (i
== 0 && ONIGENC_MBC_MINLEN(enc
) > 1)
4635 return (int )Vals
[i
];
4638 return 4; /* Take it easy. */
4642 distance_value(MinMax
* mm
)
4644 /* 1000 / (min-max-dist + 1) */
4645 static const short int dist_vals
[] = {
4646 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4647 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4648 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4649 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4650 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4651 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4652 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4653 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4654 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4655 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4660 if (mm
->max
== INFINITE_LEN
) return 0;
4662 d
= mm
->max
- mm
->min
;
4663 if (d
< (OnigLen
)(sizeof(dist_vals
)/sizeof(dist_vals
[0])))
4664 /* return dist_vals[d] * 16 / (mm->min + 12); */
4665 return (int )dist_vals
[d
];
4671 comp_distance_value(MinMax
* d1
, MinMax
* d2
, int v1
, int v2
)
4673 if (v2
<= 0) return -1;
4674 if (v1
<= 0) return 1;
4676 v1
*= distance_value(d1
);
4677 v2
*= distance_value(d2
);
4679 if (v2
> v1
) return 1;
4680 if (v2
< v1
) return -1;
4682 if (d2
->min
< d1
->min
) return 1;
4683 if (d2
->min
> d1
->min
) return -1;
4688 is_equal_mml(MinMax
* a
, MinMax
* b
)
4690 return (a
->min
== b
->min
&& a
->max
== b
->max
) ? 1 : 0;
4694 set_mml(MinMax
* l
, OnigLen min
, OnigLen max
)
4701 clear_mml(MinMax
* l
)
4703 l
->min
= l
->max
= 0;
4707 copy_mml(MinMax
* to
, MinMax
* from
)
4709 to
->min
= from
->min
;
4710 to
->max
= from
->max
;
4714 add_mml(MinMax
* to
, MinMax
* from
)
4716 to
->min
= distance_add(to
->min
, from
->min
);
4717 to
->max
= distance_add(to
->max
, from
->max
);
4721 alt_merge_mml(MinMax
* to
, MinMax
* from
)
4723 if (to
->min
> from
->min
) to
->min
= from
->min
;
4724 if (to
->max
< from
->max
) to
->max
= from
->max
;
4728 copy_opt_env(OptEnv
* to
, OptEnv
* from
)
4734 clear_opt_anc_info(OptAnc
* a
)
4741 copy_opt_anc_info(OptAnc
* to
, OptAnc
* from
)
4747 concat_opt_anc_info(OptAnc
* to
, OptAnc
* left
, OptAnc
* right
,
4748 OnigLen left_len
, OnigLen right_len
)
4750 clear_opt_anc_info(to
);
4752 to
->left
= left
->left
;
4753 if (left_len
== 0) {
4754 to
->left
|= right
->left
;
4757 to
->right
= right
->right
;
4758 if (right_len
== 0) {
4759 to
->right
|= left
->right
;
4762 to
->right
|= (left
->right
& ANCHOR_PREC_READ_NOT
);
4769 if (a
== ANCHOR_END_BUF
|| a
== ANCHOR_SEMI_END_BUF
||
4770 a
== ANCHOR_END_LINE
|| a
== ANCHOR_PREC_READ
|| a
== ANCHOR_PREC_READ_NOT
)
4777 is_set_opt_anc_info(OptAnc
* to
, int anc
)
4779 if ((to
->left
& anc
) != 0) return 1;
4781 return ((to
->right
& anc
) != 0 ? 1 : 0);
4785 add_opt_anc_info(OptAnc
* to
, int anc
)
4794 remove_opt_anc_info(OptAnc
* to
, int anc
)
4803 alt_merge_opt_anc_info(OptAnc
* to
, OptAnc
* add
)
4805 to
->left
&= add
->left
;
4806 to
->right
&= add
->right
;
4810 is_full_opt_exact(OptExact
* e
)
4812 return (e
->len
>= OPT_EXACT_MAXLEN
? 1 : 0);
4816 clear_opt_exact(OptExact
* e
)
4819 clear_opt_anc_info(&e
->anc
);
4827 copy_opt_exact(OptExact
* to
, OptExact
* from
)
4833 concat_opt_exact(OptExact
* to
, OptExact
* add
, OnigEncoding enc
)
4839 if (! to
->ignore_case
&& add
->ignore_case
) {
4840 if (to
->len
>= add
->len
) return 0; /* avoid */
4842 to
->ignore_case
= 1;
4848 for (i
= to
->len
; p
< end
; ) {
4849 len
= enclen(enc
, p
);
4850 if (i
+ len
> OPT_EXACT_MAXLEN
) {
4854 for (j
= 0; j
< len
&& p
< end
; j
++)
4859 to
->reach_end
= (p
== end
? add
->reach_end
: 0);
4861 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, 1, 1);
4862 if (! to
->reach_end
) tanc
.right
= 0;
4863 copy_opt_anc_info(&to
->anc
, &tanc
);
4869 concat_opt_exact_str(OptExact
* to
, UChar
* s
, UChar
* end
, OnigEncoding enc
)
4874 for (i
= to
->len
, p
= s
; p
< end
&& i
< OPT_EXACT_MAXLEN
; ) {
4875 len
= enclen(enc
, p
);
4876 if (i
+ len
> OPT_EXACT_MAXLEN
) break;
4877 for (j
= 0; j
< len
&& p
< end
; j
++)
4885 alt_merge_opt_exact(OptExact
* to
, OptExact
* add
, OptEnv
* env
)
4889 if (add
->len
== 0 || to
->len
== 0) {
4890 clear_opt_exact(to
);
4894 if (! is_equal_mml(&to
->mmd
, &add
->mmd
)) {
4895 clear_opt_exact(to
);
4899 for (i
= 0; i
< to
->len
&& i
< add
->len
; ) {
4900 if (to
->s
[i
] != add
->s
[i
]) break;
4901 len
= enclen(env
->enc
, to
->s
+ i
);
4903 for (j
= 1; j
< len
; j
++) {
4904 if (to
->s
[i
+j
] != add
->s
[i
+j
]) break;
4910 if (! add
->reach_end
|| i
< add
->len
|| i
< to
->len
) {
4914 to
->ignore_case
|= add
->ignore_case
;
4916 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
4917 if (! to
->reach_end
) to
->anc
.right
= 0;
4921 select_opt_exact(OnigEncoding enc
, OptExact
* now
, OptExact
* alt
)
4932 copy_opt_exact(now
, alt
);
4935 else if (vn
<= 2 && va
<= 2) {
4936 /* ByteValTable[x] is big value --> low price */
4937 va
= map_position_value(enc
, now
->s
[0]);
4938 vn
= map_position_value(enc
, alt
->s
[0]);
4940 if (now
->len
> 1) vn
+= 5;
4941 if (alt
->len
> 1) va
+= 5;
4944 if (now
->ignore_case
== 0) vn
*= 2;
4945 if (alt
->ignore_case
== 0) va
*= 2;
4947 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, vn
, va
) > 0)
4948 copy_opt_exact(now
, alt
);
4952 clear_opt_map(OptMap
* map
)
4954 static const OptMap clean_info
= {
4957 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4958 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4959 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4960 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4961 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4963 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4964 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4965 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4966 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4967 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4968 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4969 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4970 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4971 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4972 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4976 xmemcpy(map
, &clean_info
, sizeof(OptMap
));
4980 copy_opt_map(OptMap
* to
, OptMap
* from
)
4982 xmemcpy(to
,from
,sizeof(OptMap
));
4986 add_char_opt_map(OptMap
* m
, UChar c
, OnigEncoding enc
)
4988 if (m
->map
[c
] == 0) {
4990 m
->value
+= map_position_value(enc
, c
);
4995 add_char_amb_opt_map(OptMap
* map
, UChar
* p
, UChar
* end
,
4996 OnigEncoding enc
, OnigCaseFoldType fold_flag
)
4998 OnigCaseFoldCodeItem items
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
4999 UChar buf
[ONIGENC_CODE_TO_MBC_MAXLEN
];
5002 add_char_opt_map(map
, p
[0], enc
);
5004 fold_flag
= DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag
);
5005 n
= ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc
, fold_flag
, p
, end
, items
);
5006 if (n
< 0) return n
;
5008 for (i
= 0; i
< n
; i
++) {
5009 ONIGENC_CODE_TO_MBC(enc
, items
[i
].code
[0], buf
);
5010 add_char_opt_map(map
, buf
[0], enc
);
5017 select_opt_map(OptMap
* now
, OptMap
* alt
)
5019 static int z
= 1<<15; /* 32768: something big value */
5023 if (alt
->value
== 0) return ;
5024 if (now
->value
== 0) {
5025 copy_opt_map(now
, alt
);
5029 vn
= z
/ now
->value
;
5030 va
= z
/ alt
->value
;
5031 if (comp_distance_value(&now
->mmd
, &alt
->mmd
, vn
, va
) > 0)
5032 copy_opt_map(now
, alt
);
5036 comp_opt_exact_or_map(OptExact
* e
, OptMap
* m
)
5038 #define COMP_EM_BASE 20
5041 if (m
->value
<= 0) return -1;
5043 ae
= COMP_EM_BASE
* e
->len
* (e
->ignore_case
? 1 : 2);
5044 am
= COMP_EM_BASE
* 5 * 2 / m
->value
;
5045 return comp_distance_value(&e
->mmd
, &m
->mmd
, ae
, am
);
5049 alt_merge_opt_map(OnigEncoding enc
, OptMap
* to
, OptMap
* add
)
5053 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
5054 if (to
->value
== 0) return ;
5055 if (add
->value
== 0 || to
->mmd
.max
< add
->mmd
.min
) {
5060 alt_merge_mml(&to
->mmd
, &add
->mmd
);
5063 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5068 val
+= map_position_value(enc
, i
);
5072 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
5076 set_bound_node_opt_info(NodeOpt
* opt
, MinMax
* plen
)
5078 copy_mml(&(opt
->exb
.mmd
), plen
);
5079 copy_mml(&(opt
->expr
.mmd
), plen
);
5080 copy_mml(&(opt
->map
.mmd
), plen
);
5084 clear_node_opt_info(NodeOpt
* opt
)
5086 clear_mml(&opt
->len
);
5087 clear_opt_anc_info(&opt
->anc
);
5088 clear_opt_exact(&opt
->exb
);
5089 clear_opt_exact(&opt
->exm
);
5090 clear_opt_exact(&opt
->expr
);
5091 clear_opt_map(&opt
->map
);
5095 copy_node_opt_info(NodeOpt
* to
, NodeOpt
* from
)
5097 xmemcpy(to
,from
,sizeof(NodeOpt
));
5101 concat_left_node_opt_info(OnigEncoding enc
, NodeOpt
* to
, NodeOpt
* add
)
5103 int exb_reach
, exm_reach
;
5106 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->anc
, to
->len
.max
, add
->len
.max
);
5107 copy_opt_anc_info(&to
->anc
, &tanc
);
5109 if (add
->exb
.len
> 0 && to
->len
.max
== 0) {
5110 concat_opt_anc_info(&tanc
, &to
->anc
, &add
->exb
.anc
, to
->len
.max
, add
->len
.max
);
5111 copy_opt_anc_info(&add
->exb
.anc
, &tanc
);
5114 if (add
->map
.value
> 0 && to
->len
.max
== 0) {
5115 if (add
->map
.mmd
.max
== 0)
5116 add
->map
.anc
.left
|= to
->anc
.left
;
5119 exb_reach
= to
->exb
.reach_end
;
5120 exm_reach
= to
->exm
.reach_end
;
5122 if (add
->len
.max
!= 0)
5123 to
->exb
.reach_end
= to
->exm
.reach_end
= 0;
5125 if (add
->exb
.len
> 0) {
5127 concat_opt_exact(&to
->exb
, &add
->exb
, enc
);
5128 clear_opt_exact(&add
->exb
);
5130 else if (exm_reach
) {
5131 concat_opt_exact(&to
->exm
, &add
->exb
, enc
);
5132 clear_opt_exact(&add
->exb
);
5135 select_opt_exact(enc
, &to
->exm
, &add
->exb
);
5136 select_opt_exact(enc
, &to
->exm
, &add
->exm
);
5138 if (to
->expr
.len
> 0) {
5139 if (add
->len
.max
> 0) {
5140 if (to
->expr
.len
> (int )add
->len
.max
)
5141 to
->expr
.len
= add
->len
.max
;
5143 if (to
->expr
.mmd
.max
== 0)
5144 select_opt_exact(enc
, &to
->exb
, &to
->expr
);
5146 select_opt_exact(enc
, &to
->exm
, &to
->expr
);
5149 else if (add
->expr
.len
> 0) {
5150 copy_opt_exact(&to
->expr
, &add
->expr
);
5153 select_opt_map(&to
->map
, &add
->map
);
5154 add_mml(&to
->len
, &add
->len
);
5158 alt_merge_node_opt_info(NodeOpt
* to
, NodeOpt
* add
, OptEnv
* env
)
5160 alt_merge_opt_anc_info(&to
->anc
, &add
->anc
);
5161 alt_merge_opt_exact(&to
->exb
, &add
->exb
, env
);
5162 alt_merge_opt_exact(&to
->exm
, &add
->exm
, env
);
5163 alt_merge_opt_exact(&to
->expr
, &add
->expr
, env
);
5164 alt_merge_opt_map(env
->enc
, &to
->map
, &add
->map
);
5166 alt_merge_mml(&to
->len
, &add
->len
);
5170 #define MAX_NODE_OPT_INFO_REF_COUNT 5
5173 optimize_nodes(Node
* node
, NodeOpt
* opt
, OptEnv
* env
)
5182 clear_node_opt_info(opt
);
5183 set_bound_node_opt_info(opt
, &env
->mmd
);
5185 switch (NODE_TYPE(node
)) {
5191 copy_opt_env(&nenv
, env
);
5193 r
= optimize_nodes(NODE_CAR(nd
), &xo
, &nenv
);
5195 add_mml(&nenv
.mmd
, &xo
.len
);
5196 concat_left_node_opt_info(enc
, opt
, &xo
);
5198 } while (r
== 0 && IS_NOT_NULL(nd
= NODE_CDR(nd
)));
5207 r
= optimize_nodes(NODE_CAR(nd
), &xo
, env
);
5209 if (nd
== node
) copy_node_opt_info(opt
, &xo
);
5210 else alt_merge_node_opt_info(opt
, &xo
, env
);
5212 } while ((r
== 0) && IS_NOT_NULL(nd
= NODE_CDR(nd
)));
5218 StrNode
* sn
= STR_(node
);
5219 int slen
= (int )(sn
->end
- sn
->s
);
5220 /* int is_raw = NODE_STRING_IS_RAW(node); */
5222 if (! NODE_STRING_IS_AMBIG(node
)) {
5223 concat_opt_exact_str(&opt
->exb
, sn
->s
, sn
->end
, enc
);
5225 add_char_opt_map(&opt
->map
, *(sn
->s
), enc
);
5227 set_mml(&opt
->len
, slen
, slen
);
5232 if (NODE_STRING_IS_DONT_GET_OPT_INFO(node
)) {
5233 int n
= onigenc_strlen(enc
, sn
->s
, sn
->end
);
5234 max
= ONIGENC_MBC_MAXLEN_DIST(enc
) * n
;
5237 concat_opt_exact_str(&opt
->exb
, sn
->s
, sn
->end
, enc
);
5238 opt
->exb
.ignore_case
= 1;
5241 r
= add_char_amb_opt_map(&opt
->map
, sn
->s
, sn
->end
,
5242 enc
, env
->case_fold_flag
);
5249 set_mml(&opt
->len
, slen
, max
);
5252 if (opt
->exb
.len
== slen
)
5253 opt
->exb
.reach_end
= 1;
5260 CClassNode
* cc
= CCLASS_(node
);
5262 /* no need to check ignore case. (set in setup_tree()) */
5264 if (IS_NOT_NULL(cc
->mbuf
) || IS_NCCLASS_NOT(cc
)) {
5265 OnigLen min
= ONIGENC_MBC_MINLEN(enc
);
5266 OnigLen max
= ONIGENC_MBC_MAXLEN_DIST(enc
);
5268 set_mml(&opt
->len
, min
, max
);
5271 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
5272 z
= BITSET_AT(cc
->bs
, i
);
5273 if ((z
&& ! IS_NCCLASS_NOT(cc
)) || (! z
&& IS_NCCLASS_NOT(cc
))) {
5274 add_char_opt_map(&opt
->map
, (UChar
)i
, enc
);
5277 set_mml(&opt
->len
, 1, 1);
5287 max
= ONIGENC_MBC_MAXLEN_DIST(enc
);
5292 switch (CTYPE_(node
)->ctype
) {
5296 case ONIGENC_CTYPE_WORD
:
5297 range
= CTYPE_(node
)->ascii_mode
!= 0 ? 128 : SINGLE_BYTE_SIZE
;
5298 if (CTYPE_(node
)->not != 0) {
5299 for (i
= 0; i
< range
; i
++) {
5300 if (! ONIGENC_IS_CODE_WORD(enc
, i
)) {
5301 add_char_opt_map(&opt
->map
, (UChar
)i
, enc
);
5304 for (i
= range
; i
< SINGLE_BYTE_SIZE
; i
++) {
5305 add_char_opt_map(&opt
->map
, (UChar
)i
, enc
);
5309 for (i
= 0; i
< range
; i
++) {
5310 if (ONIGENC_IS_CODE_WORD(enc
, i
)) {
5311 add_char_opt_map(&opt
->map
, (UChar
)i
, enc
);
5319 min
= ONIGENC_MBC_MINLEN(enc
);
5321 set_mml(&opt
->len
, min
, max
);
5326 switch (ANCHOR_(node
)->type
) {
5327 case ANCHOR_BEGIN_BUF
:
5328 case ANCHOR_BEGIN_POSITION
:
5329 case ANCHOR_BEGIN_LINE
:
5330 case ANCHOR_END_BUF
:
5331 case ANCHOR_SEMI_END_BUF
:
5332 case ANCHOR_END_LINE
:
5333 case ANCHOR_PREC_READ_NOT
:
5334 case ANCHOR_LOOK_BEHIND
:
5335 add_opt_anc_info(&opt
->anc
, ANCHOR_(node
)->type
);
5338 case ANCHOR_PREC_READ
:
5340 r
= optimize_nodes(NODE_BODY(node
), &xo
, env
);
5343 copy_opt_exact(&opt
->expr
, &xo
.exb
);
5344 else if (xo
.exm
.len
> 0)
5345 copy_opt_exact(&opt
->expr
, &xo
.exm
);
5347 opt
->expr
.reach_end
= 0;
5349 if (xo
.map
.value
> 0)
5350 copy_opt_map(&opt
->map
, &xo
.map
);
5355 case ANCHOR_LOOK_BEHIND_NOT
:
5361 if (! NODE_IS_CHECKER(node
)) {
5363 OnigLen min
, max
, tmin
, tmax
;
5364 MemEnv
* mem_env
= SCANENV_MEMENV(env
->scan_env
);
5365 BackRefNode
* br
= BACKREF_(node
);
5367 if (NODE_IS_RECURSION(node
)) {
5368 set_mml(&opt
->len
, 0, INFINITE_LEN
);
5371 backs
= BACKREFS_P(br
);
5372 min
= tree_min_len(mem_env
[backs
[0]].node
, env
->scan_env
);
5373 max
= tree_max_len(mem_env
[backs
[0]].node
, env
->scan_env
);
5374 for (i
= 1; i
< br
->back_num
; i
++) {
5375 tmin
= tree_min_len(mem_env
[backs
[i
]].node
, env
->scan_env
);
5376 tmax
= tree_max_len(mem_env
[backs
[i
]].node
, env
->scan_env
);
5377 if (min
> tmin
) min
= tmin
;
5378 if (max
< tmax
) max
= tmax
;
5380 set_mml(&opt
->len
, min
, max
);
5386 if (NODE_IS_RECURSION(node
))
5387 set_mml(&opt
->len
, 0, INFINITE_LEN
);
5389 OnigOptionType save
= env
->options
;
5390 env
->options
= ENCLOSURE_(NODE_BODY(node
))->o
.options
;
5391 r
= optimize_nodes(NODE_BODY(node
), opt
, env
);
5392 env
->options
= save
;
5400 QuantNode
* qn
= QUANT_(node
);
5402 r
= optimize_nodes(NODE_BODY(node
), &xo
, env
);
5405 if (qn
->lower
> 0) {
5406 copy_node_opt_info(opt
, &xo
);
5407 if (xo
.exb
.len
> 0) {
5408 if (xo
.exb
.reach_end
) {
5409 for (i
= 2; i
<= qn
->lower
&& ! is_full_opt_exact(&opt
->exb
); i
++) {
5410 int rc
= concat_opt_exact(&opt
->exb
, &xo
.exb
, enc
);
5413 if (i
< qn
->lower
) opt
->exb
.reach_end
= 0;
5417 if (qn
->lower
!= qn
->upper
) {
5418 opt
->exb
.reach_end
= 0;
5419 opt
->exm
.reach_end
= 0;
5422 opt
->exm
.reach_end
= 0;
5425 if (IS_REPEAT_INFINITE(qn
->upper
)) {
5426 if (env
->mmd
.max
== 0 &&
5427 NODE_IS_ANYCHAR(NODE_BODY(node
)) && qn
->greedy
!= 0) {
5428 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn
), env
)))
5429 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_INF_ML
);
5431 add_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_INF
);
5434 max
= (xo
.len
.max
> 0 ? INFINITE_LEN
: 0);
5437 max
= distance_multiply(xo
.len
.max
, qn
->upper
);
5440 min
= distance_multiply(xo
.len
.min
, qn
->lower
);
5441 set_mml(&opt
->len
, min
, max
);
5445 case NODE_ENCLOSURE
:
5447 EnclosureNode
* en
= ENCLOSURE_(node
);
5450 case ENCLOSURE_OPTION
:
5452 OnigOptionType save
= env
->options
;
5454 env
->options
= en
->o
.options
;
5455 r
= optimize_nodes(NODE_BODY(node
), opt
, env
);
5456 env
->options
= save
;
5460 case ENCLOSURE_MEMORY
:
5463 if (en
->opt_count
> MAX_NODE_OPT_INFO_REF_COUNT
) {
5468 if (NODE_IS_MIN_FIXED(node
)) min
= en
->min_len
;
5469 if (NODE_IS_MAX_FIXED(node
)) max
= en
->max_len
;
5470 set_mml(&opt
->len
, min
, max
);
5475 r
= optimize_nodes(NODE_BODY(node
), opt
, env
);
5476 if (is_set_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_INF_MASK
)) {
5477 if (MEM_STATUS_AT0(env
->scan_env
->backrefed_mem
, en
->m
.regnum
))
5478 remove_opt_anc_info(&opt
->anc
, ANCHOR_ANYCHAR_INF_MASK
);
5483 case ENCLOSURE_STOP_BACKTRACK
:
5484 r
= optimize_nodes(NODE_BODY(node
), opt
, env
);
5487 case ENCLOSURE_IF_ELSE
:
5491 copy_opt_env(&nenv
, env
);
5492 r
= optimize_nodes(NODE_ENCLOSURE_BODY(en
), &xo
, &nenv
);
5494 add_mml(&nenv
.mmd
, &xo
.len
);
5495 concat_left_node_opt_info(enc
, opt
, &xo
);
5496 if (IS_NOT_NULL(en
->te
.Then
)) {
5497 r
= optimize_nodes(en
->te
.Then
, &xo
, &nenv
);
5499 concat_left_node_opt_info(enc
, opt
, &xo
);
5503 if (IS_NOT_NULL(en
->te
.Else
)) {
5504 r
= optimize_nodes(en
->te
.Else
, &xo
, env
);
5506 alt_merge_node_opt_info(opt
, &xo
, env
);
5520 fprintf(stderr
, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node
));
5522 r
= ONIGERR_TYPE_BUG
;
5530 set_optimize_exact(regex_t
* reg
, OptExact
* e
)
5534 if (e
->len
== 0) return 0;
5536 if (e
->ignore_case
) {
5537 reg
->exact
= (UChar
* )xmalloc(e
->len
);
5538 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
5539 xmemcpy(reg
->exact
, e
->s
, e
->len
);
5540 reg
->exact_end
= reg
->exact
+ e
->len
;
5541 reg
->optimize
= OPTIMIZE_EXACT_IC
;
5546 reg
->exact
= onigenc_strdup(reg
->enc
, e
->s
, e
->s
+ e
->len
);
5547 CHECK_NULL_RETURN_MEMERR(reg
->exact
);
5548 reg
->exact_end
= reg
->exact
+ e
->len
;
5551 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg
->enc
, reg
->exact
, reg
->exact_end
);
5553 if (e
->len
>= 3 || (e
->len
>= 2 && allow_reverse
)) {
5554 r
= set_bm_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
5555 reg
->map
, &(reg
->int_map
));
5556 if (r
!= 0) return r
;
5558 reg
->optimize
= (allow_reverse
!= 0
5559 ? OPTIMIZE_EXACT_BM
: OPTIMIZE_EXACT_BM_NO_REV
);
5562 reg
->optimize
= OPTIMIZE_EXACT
;
5566 reg
->dmin
= e
->mmd
.min
;
5567 reg
->dmax
= e
->mmd
.max
;
5569 if (reg
->dmin
!= INFINITE_LEN
) {
5570 reg
->threshold_len
= reg
->dmin
+ (int )(reg
->exact_end
- reg
->exact
);
5577 set_optimize_map(regex_t
* reg
, OptMap
* m
)
5581 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5582 reg
->map
[i
] = m
->map
[i
];
5584 reg
->optimize
= OPTIMIZE_MAP
;
5585 reg
->dmin
= m
->mmd
.min
;
5586 reg
->dmax
= m
->mmd
.max
;
5588 if (reg
->dmin
!= INFINITE_LEN
) {
5589 reg
->threshold_len
= reg
->dmin
+ 1;
5594 set_sub_anchor(regex_t
* reg
, OptAnc
* anc
)
5596 reg
->sub_anchor
|= anc
->left
& ANCHOR_BEGIN_LINE
;
5597 reg
->sub_anchor
|= anc
->right
& ANCHOR_END_LINE
;
5600 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5601 static void print_optimize_info(FILE* f
, regex_t
* reg
);
5605 set_optimize_info_from_tree(Node
* node
, regex_t
* reg
, ScanEnv
* scan_env
)
5612 env
.options
= reg
->options
;
5613 env
.case_fold_flag
= reg
->case_fold_flag
;
5614 env
.scan_env
= scan_env
;
5615 clear_mml(&env
.mmd
);
5617 r
= optimize_nodes(node
, &opt
, &env
);
5618 if (r
!= 0) return r
;
5620 reg
->anchor
= opt
.anc
.left
& (ANCHOR_BEGIN_BUF
|
5621 ANCHOR_BEGIN_POSITION
| ANCHOR_ANYCHAR_INF
| ANCHOR_ANYCHAR_INF_ML
|
5622 ANCHOR_LOOK_BEHIND
);
5624 if ((opt
.anc
.left
& (ANCHOR_LOOK_BEHIND
| ANCHOR_PREC_READ_NOT
)) != 0)
5625 reg
->anchor
&= ~ANCHOR_ANYCHAR_INF_ML
;
5627 reg
->anchor
|= opt
.anc
.right
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
|
5628 ANCHOR_PREC_READ_NOT
);
5630 if (reg
->anchor
& (ANCHOR_END_BUF
| ANCHOR_SEMI_END_BUF
)) {
5631 reg
->anchor_dmin
= opt
.len
.min
;
5632 reg
->anchor_dmax
= opt
.len
.max
;
5635 if (opt
.exb
.len
> 0 || opt
.exm
.len
> 0) {
5636 select_opt_exact(reg
->enc
, &opt
.exb
, &opt
.exm
);
5637 if (opt
.map
.value
> 0 && comp_opt_exact_or_map(&opt
.exb
, &opt
.map
) > 0) {
5641 r
= set_optimize_exact(reg
, &opt
.exb
);
5642 set_sub_anchor(reg
, &opt
.exb
.anc
);
5645 else if (opt
.map
.value
> 0) {
5647 set_optimize_map(reg
, &opt
.map
);
5648 set_sub_anchor(reg
, &opt
.map
.anc
);
5651 reg
->sub_anchor
|= opt
.anc
.left
& ANCHOR_BEGIN_LINE
;
5652 if (opt
.len
.max
== 0)
5653 reg
->sub_anchor
|= opt
.anc
.right
& ANCHOR_END_LINE
;
5656 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5657 print_optimize_info(stderr
, reg
);
5663 clear_optimize_info(regex_t
* reg
)
5665 reg
->optimize
= OPTIMIZE_NONE
;
5667 reg
->anchor_dmin
= 0;
5668 reg
->anchor_dmax
= 0;
5669 reg
->sub_anchor
= 0;
5670 reg
->exact_end
= (UChar
* )NULL
;
5671 reg
->threshold_len
= 0;
5672 if (IS_NOT_NULL(reg
->exact
)) {
5674 reg
->exact
= (UChar
* )NULL
;
5680 static void print_enc_string(FILE* fp
, OnigEncoding enc
,
5681 const UChar
*s
, const UChar
*end
)
5683 fprintf(fp
, "\nPATTERN: /");
5685 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
5691 code
= ONIGENC_MBC_TO_CODE(enc
, p
, end
);
5693 fprintf(fp
, " 0x%04x ", (int )code
);
5696 fputc((int )code
, fp
);
5699 p
+= enclen(enc
, p
);
5704 fputc((int )*s
, fp
);
5712 #endif /* ONIG_DEBUG */
5714 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5717 print_distance_range(FILE* f
, OnigLen a
, OnigLen b
)
5719 if (a
== INFINITE_LEN
)
5722 fprintf(f
, "(%u)", a
);
5726 if (b
== INFINITE_LEN
)
5729 fprintf(f
, "(%u)", b
);
5733 print_anchor(FILE* f
, int anchor
)
5739 if (anchor
& ANCHOR_BEGIN_BUF
) {
5740 fprintf(f
, "begin-buf");
5743 if (anchor
& ANCHOR_BEGIN_LINE
) {
5744 if (q
) fprintf(f
, ", ");
5746 fprintf(f
, "begin-line");
5748 if (anchor
& ANCHOR_BEGIN_POSITION
) {
5749 if (q
) fprintf(f
, ", ");
5751 fprintf(f
, "begin-pos");
5753 if (anchor
& ANCHOR_END_BUF
) {
5754 if (q
) fprintf(f
, ", ");
5756 fprintf(f
, "end-buf");
5758 if (anchor
& ANCHOR_SEMI_END_BUF
) {
5759 if (q
) fprintf(f
, ", ");
5761 fprintf(f
, "semi-end-buf");
5763 if (anchor
& ANCHOR_END_LINE
) {
5764 if (q
) fprintf(f
, ", ");
5766 fprintf(f
, "end-line");
5768 if (anchor
& ANCHOR_ANYCHAR_INF
) {
5769 if (q
) fprintf(f
, ", ");
5771 fprintf(f
, "anychar-inf");
5773 if (anchor
& ANCHOR_ANYCHAR_INF_ML
) {
5774 if (q
) fprintf(f
, ", ");
5775 fprintf(f
, "anychar-inf-ml");
5782 print_optimize_info(FILE* f
, regex_t
* reg
)
5784 static const char* on
[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5785 "EXACT_IC", "MAP" };
5787 fprintf(f
, "optimize: %s\n", on
[reg
->optimize
]);
5788 fprintf(f
, " anchor: "); print_anchor(f
, reg
->anchor
);
5789 if ((reg
->anchor
& ANCHOR_END_BUF_MASK
) != 0)
5790 print_distance_range(f
, reg
->anchor_dmin
, reg
->anchor_dmax
);
5793 if (reg
->optimize
) {
5794 fprintf(f
, " sub anchor: "); print_anchor(f
, reg
->sub_anchor
);
5801 fprintf(f
, "exact: [");
5802 for (p
= reg
->exact
; p
< reg
->exact_end
; p
++) {
5805 fprintf(f
, "]: length: %ld\n", (reg
->exact_end
- reg
->exact
));
5807 else if (reg
->optimize
& OPTIMIZE_MAP
) {
5810 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
5811 if (reg
->map
[i
]) n
++;
5813 fprintf(f
, "map: n=%d\n", n
);
5817 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++) {
5818 if (reg
->map
[i
] != 0) {
5819 if (c
> 0) fputs(", ", f
);
5821 if (ONIGENC_MBC_MAXLEN(reg
->enc
) == 1 &&
5822 ONIGENC_IS_CODE_PRINT(reg
->enc
, (OnigCodePoint
)i
))
5825 fprintf(f
, "%d", i
);
5836 onig_get_regex_ext(regex_t
* reg
)
5838 if (IS_NULL(REG_EXTP(reg
))) {
5839 RegexExt
* ext
= (RegexExt
* )xmalloc(sizeof(*ext
));
5840 if (IS_NULL(ext
)) return 0;
5843 ext
->pattern_end
= 0;
5846 ext
->callout_num
= 0;
5847 ext
->callout_list_alloc
= 0;
5848 ext
->callout_list
= 0;
5851 REG_EXTPL(reg
) = (void* )ext
;
5854 return REG_EXTP(reg
);
5858 free_regex_ext(RegexExt
* ext
)
5860 if (IS_NOT_NULL(ext
)) {
5861 if (IS_NOT_NULL(ext
->pattern
))
5862 xfree((void* )ext
->pattern
);
5865 if (IS_NOT_NULL(ext
->tag_table
))
5866 onig_callout_tag_table_free(ext
->tag_table
);
5868 if (IS_NOT_NULL(ext
->callout_list
))
5869 onig_free_reg_callout_list(ext
->callout_num
, ext
->callout_list
);
5877 onig_ext_set_pattern(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
)
5882 ext
= onig_get_regex_ext(reg
);
5883 CHECK_NULL_RETURN_MEMERR(ext
);
5885 s
= onigenc_strdup(reg
->enc
, pattern
, pattern_end
);
5886 CHECK_NULL_RETURN_MEMERR(s
);
5889 ext
->pattern_end
= s
+ (pattern_end
- pattern
);
5896 onig_free_body(regex_t
* reg
)
5898 if (IS_NOT_NULL(reg
)) {
5899 if (IS_NOT_NULL(reg
->p
)) xfree(reg
->p
);
5900 if (IS_NOT_NULL(reg
->exact
)) xfree(reg
->exact
);
5901 if (IS_NOT_NULL(reg
->int_map
)) xfree(reg
->int_map
);
5902 if (IS_NOT_NULL(reg
->int_map_backward
)) xfree(reg
->int_map_backward
);
5903 if (IS_NOT_NULL(reg
->repeat_range
)) xfree(reg
->repeat_range
);
5904 if (IS_NOT_NULL(REG_EXTP(reg
))) {
5905 free_regex_ext(REG_EXTP(reg
));
5909 onig_names_free(reg
);
5914 onig_free(regex_t
* reg
)
5916 if (IS_NOT_NULL(reg
)) {
5917 onig_free_body(reg
);
5922 #define REGEX_TRANSFER(to,from) do {\
5923 onig_free_body(to);\
5924 xmemcpy(to, from, sizeof(regex_t));\
5929 onig_transfer(regex_t
* to
, regex_t
* from
)
5931 REGEX_TRANSFER(to
, from
);
5935 #ifdef ONIG_DEBUG_PARSE
5936 static void print_tree
P_((FILE* f
, Node
* node
));
5940 onig_compile(regex_t
* reg
, const UChar
* pattern
, const UChar
* pattern_end
,
5941 OnigErrorInfo
* einfo
)
5943 #define COMPILE_INIT_SIZE 20
5949 UnsetAddrList uslist
;
5953 if (IS_NOT_NULL(einfo
)) {
5954 einfo
->enc
= reg
->enc
;
5955 einfo
->par
= (UChar
* )NULL
;
5959 print_enc_string(stderr
, reg
->enc
, pattern
, pattern_end
);
5962 if (reg
->alloc
== 0) {
5963 init_size
= (int )(pattern_end
- pattern
) * 2;
5964 if (init_size
<= 0) init_size
= COMPILE_INIT_SIZE
;
5965 r
= BB_INIT(reg
, init_size
);
5966 if (r
!= 0) goto end
;
5972 reg
->num_repeat
= 0;
5973 reg
->num_null_check
= 0;
5974 reg
->repeat_range_alloc
= 0;
5975 reg
->repeat_range
= (OnigRepeatRange
* )NULL
;
5977 r
= onig_parse_tree(&root
, pattern
, pattern_end
, reg
, &scan_env
);
5978 if (r
!= 0) goto err
;
5980 /* mixed use named group and no-named group */
5981 if (scan_env
.num_named
> 0 &&
5982 IS_SYNTAX_BV(scan_env
.syntax
, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
) &&
5983 ! ONIG_IS_OPTION_ON(reg
->options
, ONIG_OPTION_CAPTURE_GROUP
)) {
5984 if (scan_env
.num_named
!= scan_env
.num_mem
)
5985 r
= disable_noname_group_capture(&root
, reg
, &scan_env
);
5987 r
= numbered_ref_check(root
);
5989 if (r
!= 0) goto err
;
5992 r
= check_backrefs(root
, &scan_env
);
5993 if (r
!= 0) goto err
;
5996 if (scan_env
.num_call
> 0) {
5997 r
= unset_addr_list_init(&uslist
, scan_env
.num_call
);
5998 if (r
!= 0) goto err
;
5999 scan_env
.unset_addr_list
= &uslist
;
6000 r
= setup_call(root
, &scan_env
, 0);
6001 if (r
!= 0) goto err_unset
;
6002 r
= setup_call2(root
);
6003 if (r
!= 0) goto err_unset
;
6004 r
= recursive_call_check_trav(root
, &scan_env
, 0);
6005 if (r
< 0) goto err_unset
;
6006 r
= infinite_recursive_call_check_trav(root
, &scan_env
);
6007 if (r
!= 0) goto err_unset
;
6009 setup_called_state(root
, 0);
6012 reg
->num_call
= scan_env
.num_call
;
6015 r
= setup_tree(root
, reg
, 0, &scan_env
);
6016 if (r
!= 0) goto err_unset
;
6018 #ifdef ONIG_DEBUG_PARSE
6019 print_tree(stderr
, root
);
6022 reg
->capture_history
= scan_env
.capture_history
;
6023 reg
->bt_mem_start
= scan_env
.bt_mem_start
;
6024 reg
->bt_mem_start
|= reg
->capture_history
;
6025 if (IS_FIND_CONDITION(reg
->options
))
6026 MEM_STATUS_ON_ALL(reg
->bt_mem_end
);
6028 reg
->bt_mem_end
= scan_env
.bt_mem_end
;
6029 reg
->bt_mem_end
|= reg
->capture_history
;
6031 reg
->bt_mem_start
|= reg
->bt_mem_end
;
6033 clear_optimize_info(reg
);
6034 #ifndef ONIG_DONT_OPTIMIZE
6035 r
= set_optimize_info_from_tree(root
, reg
, &scan_env
);
6036 if (r
!= 0) goto err_unset
;
6039 if (IS_NOT_NULL(scan_env
.mem_env_dynamic
)) {
6040 xfree(scan_env
.mem_env_dynamic
);
6041 scan_env
.mem_env_dynamic
= (MemEnv
* )NULL
;
6044 r
= compile_tree(root
, reg
, &scan_env
);
6046 if (scan_env
.keep_num
> 0) {
6047 r
= add_opcode(reg
, OP_UPDATE_VAR
);
6048 if (r
!= 0) goto err
;
6049 r
= add_update_var_type(reg
, UPDATE_VAR_KEEP_FROM_STACK_LAST
);
6050 if (r
!= 0) goto err
;
6051 r
= add_mem_num(reg
, 0 /* not used */);
6052 if (r
!= 0) goto err
;
6055 r
= add_opcode(reg
, OP_END
);
6057 if (scan_env
.num_call
> 0) {
6058 r
= fix_unset_addr_list(&uslist
, reg
);
6059 unset_addr_list_end(&uslist
);
6060 if (r
!= 0) goto err
;
6064 if ((reg
->num_repeat
!= 0) || (reg
->bt_mem_end
!= 0)
6066 || (IS_NOT_NULL(REG_EXTP(reg
)) && REG_EXTP(reg
)->callout_num
!= 0)
6069 reg
->stack_pop_level
= STACK_POP_LEVEL_ALL
;
6071 if (reg
->bt_mem_start
!= 0)
6072 reg
->stack_pop_level
= STACK_POP_LEVEL_MEM_START
;
6074 reg
->stack_pop_level
= STACK_POP_LEVEL_FREE
;
6078 else if (scan_env
.num_call
> 0) {
6079 unset_addr_list_end(&uslist
);
6082 onig_node_free(root
);
6084 #ifdef ONIG_DEBUG_COMPILE
6085 onig_print_names(stderr
, reg
);
6086 onig_print_compiled_byte_code_list(stderr
, reg
);
6094 if (scan_env
.num_call
> 0) {
6095 unset_addr_list_end(&uslist
);
6099 if (IS_NOT_NULL(scan_env
.error
)) {
6100 if (IS_NOT_NULL(einfo
)) {
6101 einfo
->par
= scan_env
.error
;
6102 einfo
->par_end
= scan_env
.error_end
;
6106 onig_node_free(root
);
6107 if (IS_NOT_NULL(scan_env
.mem_env_dynamic
))
6108 xfree(scan_env
.mem_env_dynamic
);
6113 static int onig_inited
= 0;
6116 onig_reg_init(regex_t
* reg
, OnigOptionType option
, OnigCaseFoldType case_fold_flag
,
6117 OnigEncoding enc
, OnigSyntaxType
* syntax
)
6121 xmemset(reg
, 0, sizeof(*reg
));
6123 if (onig_inited
== 0) {
6125 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED
;
6127 r
= onig_initialize(&enc
, 1);
6129 return ONIGERR_FAIL_TO_INITIALIZE
;
6131 onig_warning("You didn't call onig_initialize() explicitly");
6136 return ONIGERR_INVALID_ARGUMENT
;
6138 if (ONIGENC_IS_UNDEF(enc
))
6139 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED
;
6141 if ((option
& (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
))
6142 == (ONIG_OPTION_DONT_CAPTURE_GROUP
|ONIG_OPTION_CAPTURE_GROUP
)) {
6143 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS
;
6146 if ((option
& ONIG_OPTION_NEGATE_SINGLELINE
) != 0) {
6147 option
|= syntax
->options
;
6148 option
&= ~ONIG_OPTION_SINGLELINE
;
6151 option
|= syntax
->options
;
6154 (reg
)->options
= option
;
6155 (reg
)->syntax
= syntax
;
6156 (reg
)->optimize
= 0;
6157 (reg
)->exact
= (UChar
* )NULL
;
6158 (reg
)->int_map
= (int* )NULL
;
6159 (reg
)->int_map_backward
= (int* )NULL
;
6160 REG_EXTPL(reg
) = NULL
;
6162 (reg
)->p
= (UChar
* )NULL
;
6165 (reg
)->name_table
= (void* )NULL
;
6167 (reg
)->case_fold_flag
= case_fold_flag
;
6172 onig_new_without_alloc(regex_t
* reg
,
6173 const UChar
* pattern
, const UChar
* pattern_end
,
6174 OnigOptionType option
, OnigEncoding enc
,
6175 OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
)
6179 r
= onig_reg_init(reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
6180 if (r
!= 0) return r
;
6182 r
= onig_compile(reg
, pattern
, pattern_end
, einfo
);
6187 onig_new(regex_t
** reg
, const UChar
* pattern
, const UChar
* pattern_end
,
6188 OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
,
6189 OnigErrorInfo
* einfo
)
6193 *reg
= (regex_t
* )xmalloc(sizeof(regex_t
));
6194 if (IS_NULL(*reg
)) return ONIGERR_MEMORY
;
6196 r
= onig_reg_init(*reg
, option
, ONIGENC_CASE_FOLD_DEFAULT
, enc
, syntax
);
6197 if (r
!= 0) goto err
;
6199 r
= onig_compile(*reg
, pattern
, pattern_end
, einfo
);
6209 onig_initialize(OnigEncoding encodings
[], int n
)
6214 if (onig_inited
!= 0)
6221 for (i
= 0; i
< n
; i
++) {
6222 OnigEncoding enc
= encodings
[i
];
6223 r
= onig_initialize_encoding(enc
);
6231 typedef struct EndCallListItem
{
6232 struct EndCallListItem
* next
;
6234 } EndCallListItemType
;
6236 static EndCallListItemType
* EndCallTop
;
6238 extern void onig_add_end_call(void (*func
)(void))
6240 EndCallListItemType
* item
;
6242 item
= (EndCallListItemType
* )xmalloc(sizeof(*item
));
6243 if (item
== 0) return ;
6245 item
->next
= EndCallTop
;
6252 exec_end_call_list(void)
6254 EndCallListItemType
* prev
;
6257 while (EndCallTop
!= 0) {
6258 func
= EndCallTop
->func
;
6262 EndCallTop
= EndCallTop
->next
;
6270 exec_end_call_list();
6273 onig_global_callout_names_free();
6284 onig_is_in_code_range(const UChar
* p
, OnigCodePoint code
)
6286 OnigCodePoint n
, *data
;
6287 OnigCodePoint low
, high
, x
;
6289 GET_CODE_POINT(n
, p
);
6290 data
= (OnigCodePoint
* )p
;
6293 for (low
= 0, high
= n
; low
< high
; ) {
6294 x
= (low
+ high
) >> 1;
6295 if (code
> data
[x
* 2 + 1])
6301 return ((low
< n
&& code
>= data
[low
* 2]) ? 1 : 0);
6305 onig_is_code_in_cc_len(int elen
, OnigCodePoint code
, /* CClassNode* */ void* cc_arg
)
6308 CClassNode
* cc
= (CClassNode
* )cc_arg
;
6310 if (elen
> 1 || (code
>= SINGLE_BYTE_SIZE
)) {
6311 if (IS_NULL(cc
->mbuf
)) {
6315 found
= (onig_is_in_code_range(cc
->mbuf
->p
, code
) != 0 ? 1 : 0);
6319 found
= (BITSET_AT(cc
->bs
, code
) == 0 ? 0 : 1);
6322 if (IS_NCCLASS_NOT(cc
))
6329 onig_is_code_in_cc(OnigEncoding enc
, OnigCodePoint code
, CClassNode
* cc
)
6333 if (ONIGENC_MBC_MINLEN(enc
) > 1) {
6337 len
= ONIGENC_CODE_TO_MBCLEN(enc
, code
);
6339 return onig_is_code_in_cc_len(len
, code
, cc
);
6343 #ifdef ONIG_DEBUG_PARSE
6346 p_string(FILE* f
, int len
, UChar
* s
)
6349 while (len
-- > 0) { fputc(*s
++, f
); }
6353 Indent(FILE* f
, int indent
)
6356 for (i
= 0; i
< indent
; i
++) putc(' ', f
);
6360 print_indent_tree(FILE* f
, Node
* node
, int indent
)
6368 if (IS_NULL(node
)) {
6369 fprintf(f
, "ERROR: null node!!!\n");
6373 type
= NODE_TYPE(node
);
6377 if (type
== NODE_LIST
)
6378 fprintf(f
, "<list:%p>\n", node
);
6380 fprintf(f
, "<alt:%p>\n", node
);
6382 print_indent_tree(f
, NODE_CAR(node
), indent
+ add
);
6383 while (IS_NOT_NULL(node
= NODE_CDR(node
))) {
6384 if (NODE_TYPE(node
) != type
) {
6385 fprintf(f
, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node
));
6388 print_indent_tree(f
, NODE_CAR(node
), indent
+ add
);
6393 fprintf(f
, "<string%s:%p>", (NODE_STRING_IS_RAW(node
) ? "-raw" : ""), node
);
6394 for (p
= STR_(node
)->s
; p
< STR_(node
)->end
; p
++) {
6395 if (*p
>= 0x20 && *p
< 0x7f)
6398 fprintf(f
, " 0x%02x", *p
);
6404 fprintf(f
, "<cclass:%p>", node
);
6405 if (IS_NCCLASS_NOT(CCLASS_(node
))) fputs(" not", f
);
6406 if (CCLASS_(node
)->mbuf
) {
6407 BBuf
* bbuf
= CCLASS_(node
)->mbuf
;
6408 for (i
= 0; i
< bbuf
->used
; i
++) {
6409 if (i
> 0) fprintf(f
, ",");
6410 fprintf(f
, "%0x", bbuf
->p
[i
]);
6416 fprintf(f
, "<ctype:%p> ", node
);
6417 switch (CTYPE_(node
)->ctype
) {
6419 fprintf(f
, "<anychar:%p>", node
);
6422 case ONIGENC_CTYPE_WORD
:
6423 if (CTYPE_(node
)->not != 0)
6424 fputs("not word", f
);
6428 if (CTYPE_(node
)->ascii_mode
!= 0)
6429 fputs(" (ascii)", f
);
6434 fprintf(f
, "ERROR: undefined ctype.\n");
6440 fprintf(f
, "<anchor:%p> ", node
);
6441 switch (ANCHOR_(node
)->type
) {
6442 case ANCHOR_BEGIN_BUF
: fputs("begin buf", f
); break;
6443 case ANCHOR_END_BUF
: fputs("end buf", f
); break;
6444 case ANCHOR_BEGIN_LINE
: fputs("begin line", f
); break;
6445 case ANCHOR_END_LINE
: fputs("end line", f
); break;
6446 case ANCHOR_SEMI_END_BUF
: fputs("semi end buf", f
); break;
6447 case ANCHOR_BEGIN_POSITION
: fputs("begin position", f
); break;
6449 case ANCHOR_WORD_BOUNDARY
: fputs("word boundary", f
); break;
6450 case ANCHOR_NO_WORD_BOUNDARY
: fputs("not word boundary", f
); break;
6451 #ifdef USE_WORD_BEGIN_END
6452 case ANCHOR_WORD_BEGIN
: fputs("word begin", f
); break;
6453 case ANCHOR_WORD_END
: fputs("word end", f
); break;
6455 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
6456 fputs("extended-grapheme-cluster boundary", f
); break;
6457 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
6458 fputs("no-extended-grapheme-cluster boundary", f
); break;
6459 case ANCHOR_PREC_READ
:
6460 fprintf(f
, "prec read\n");
6461 print_indent_tree(f
, NODE_BODY(node
), indent
+ add
);
6463 case ANCHOR_PREC_READ_NOT
:
6464 fprintf(f
, "prec read not\n");
6465 print_indent_tree(f
, NODE_BODY(node
), indent
+ add
);
6467 case ANCHOR_LOOK_BEHIND
:
6468 fprintf(f
, "look behind\n");
6469 print_indent_tree(f
, NODE_BODY(node
), indent
+ add
);
6471 case ANCHOR_LOOK_BEHIND_NOT
:
6472 fprintf(f
, "look behind not\n");
6473 print_indent_tree(f
, NODE_BODY(node
), indent
+ add
);
6477 fprintf(f
, "ERROR: undefined anchor type.\n");
6485 BackRefNode
* br
= BACKREF_(node
);
6487 fprintf(f
, "<backref%s:%p>", NODE_IS_CHECKER(node
) ? "-checker" : "", node
);
6488 for (i
= 0; i
< br
->back_num
; i
++) {
6489 if (i
> 0) fputs(", ", f
);
6490 fprintf(f
, "%d", p
[i
]);
6498 CallNode
* cn
= CALL_(node
);
6499 fprintf(f
, "<call:%p>", node
);
6500 p_string(f
, cn
->name_end
- cn
->name
, cn
->name
);
6506 fprintf(f
, "<quantifier:%p>{%d,%d}%s\n", node
,
6507 QUANT_(node
)->lower
, QUANT_(node
)->upper
,
6508 (QUANT_(node
)->greedy
? "" : "?"));
6509 print_indent_tree(f
, NODE_BODY(node
), indent
+ add
);
6512 case NODE_ENCLOSURE
:
6513 fprintf(f
, "<enclosure:%p> ", node
);
6514 switch (ENCLOSURE_(node
)->type
) {
6515 case ENCLOSURE_OPTION
:
6516 fprintf(f
, "option:%d", ENCLOSURE_(node
)->o
.options
);
6518 case ENCLOSURE_MEMORY
:
6519 fprintf(f
, "memory:%d", ENCLOSURE_(node
)->m
.regnum
);
6521 case ENCLOSURE_STOP_BACKTRACK
:
6522 fprintf(f
, "stop-bt");
6529 print_indent_tree(f
, NODE_BODY(node
), indent
+ add
);
6533 fprintf(f
, "<gimmick:%p> ", node
);
6534 switch (GIMMICK_(node
)->type
) {
6539 fprintf(f
, "keep:%d", GIMMICK_(node
)->id
);
6542 fprintf(f
, "save:%d:%d", GIMMICK_(node
)->detail_type
, GIMMICK_(node
)->id
);
6544 case GIMMICK_UPDATE_VAR
:
6545 fprintf(f
, "update_var:%d:%d", GIMMICK_(node
)->detail_type
, GIMMICK_(node
)->id
);
6548 case GIMMICK_CALLOUT
:
6549 switch (GIMMICK_(node
)->detail_type
) {
6550 case ONIG_CALLOUT_OF_CONTENTS
:
6551 fprintf(f
, "callout:contents:%d", GIMMICK_(node
)->num
);
6553 case ONIG_CALLOUT_OF_NAME
:
6554 fprintf(f
, "callout:name:%d:%d", GIMMICK_(node
)->id
, GIMMICK_(node
)->num
);
6562 fprintf(f
, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node
));
6566 if (type
!= NODE_LIST
&& type
!= NODE_ALT
&& type
!= NODE_QUANT
&&
6567 type
!= NODE_ENCLOSURE
)
6573 print_tree(FILE* f
, Node
* node
)
6575 print_indent_tree(f
, node
, 0);