1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
32 ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
40 #define CHECK_INTERRUPT_IN_MATCH
44 int last_match_at_call_counter
;
48 } slot
[ONIG_CALLOUT_DATA_SLOT_NUM
];
52 struct OnigMatchParamStruct
{
53 unsigned int match_stack_limit
;
54 unsigned long retry_limit_in_match
;
56 OnigCalloutFunc progress_callout_of_contents
;
57 OnigCalloutFunc retraction_callout_of_contents
;
58 int match_at_call_counter
;
59 void* callout_user_data
;
60 CalloutData
* callout_data
;
61 int callout_data_alloc_num
;
66 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam
* param
,
69 param
->match_stack_limit
= limit
;
74 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam
* param
,
77 param
->retry_limit_in_match
= limit
;
82 onig_set_progress_callout_of_match_param(OnigMatchParam
* param
, OnigCalloutFunc f
)
85 param
->progress_callout_of_contents
= f
;
88 return ONIG_NO_SUPPORT_CONFIG
;
93 onig_set_retraction_callout_of_match_param(OnigMatchParam
* param
, OnigCalloutFunc f
)
96 param
->retraction_callout_of_contents
= f
;
99 return ONIG_NO_SUPPORT_CONFIG
;
104 onig_set_callout_user_data_of_match_param(OnigMatchParam
* param
, void* user_data
)
107 param
->callout_user_data
= user_data
;
110 return ONIG_NO_SUPPORT_CONFIG
;
119 OnigOptionType options
;
122 const UChar
* start
; /* search start position (for \G: BEGIN_POSITION) */
123 unsigned int match_stack_limit
;
124 unsigned long retry_limit_in_match
;
126 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
127 int best_len
; /* for ONIG_OPTION_FIND_LONGEST */
153 static OpInfoType OpInfo
[] = {
154 { OP_FINISH
, "finish", ARG_NON
},
155 { OP_END
, "end", ARG_NON
},
156 { OP_EXACT1
, "exact1", ARG_SPECIAL
},
157 { OP_EXACT2
, "exact2", ARG_SPECIAL
},
158 { OP_EXACT3
, "exact3", ARG_SPECIAL
},
159 { OP_EXACT4
, "exact4", ARG_SPECIAL
},
160 { OP_EXACT5
, "exact5", ARG_SPECIAL
},
161 { OP_EXACTN
, "exactn", ARG_SPECIAL
},
162 { OP_EXACTMB2N1
, "exactmb2-n1", ARG_SPECIAL
},
163 { OP_EXACTMB2N2
, "exactmb2-n2", ARG_SPECIAL
},
164 { OP_EXACTMB2N3
, "exactmb2-n3", ARG_SPECIAL
},
165 { OP_EXACTMB2N
, "exactmb2-n", ARG_SPECIAL
},
166 { OP_EXACTMB3N
, "exactmb3n" , ARG_SPECIAL
},
167 { OP_EXACTMBN
, "exactmbn", ARG_SPECIAL
},
168 { OP_EXACT1_IC
, "exact1-ic", ARG_SPECIAL
},
169 { OP_EXACTN_IC
, "exactn-ic", ARG_SPECIAL
},
170 { OP_CCLASS
, "cclass", ARG_SPECIAL
},
171 { OP_CCLASS_MB
, "cclass-mb", ARG_SPECIAL
},
172 { OP_CCLASS_MIX
, "cclass-mix", ARG_SPECIAL
},
173 { OP_CCLASS_NOT
, "cclass-not", ARG_SPECIAL
},
174 { OP_CCLASS_MB_NOT
, "cclass-mb-not", ARG_SPECIAL
},
175 { OP_CCLASS_MIX_NOT
, "cclass-mix-not", ARG_SPECIAL
},
176 #ifdef USE_OP_CCLASS_NODE
177 { OP_CCLASS_NODE
, "cclass-node", ARG_SPECIAL
},
179 { OP_ANYCHAR
, "anychar", ARG_NON
},
180 { OP_ANYCHAR_ML
, "anychar-ml", ARG_NON
},
181 { OP_ANYCHAR_STAR
, "anychar*", ARG_NON
},
182 { OP_ANYCHAR_ML_STAR
, "anychar-ml*", ARG_NON
},
183 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next", ARG_SPECIAL
},
184 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next", ARG_SPECIAL
},
185 { OP_WORD
, "word", ARG_NON
},
186 { OP_WORD_ASCII
, "word-ascii", ARG_NON
},
187 { OP_NO_WORD
, "not-word", ARG_NON
},
188 { OP_NO_WORD_ASCII
, "not-word-ascii", ARG_NON
},
189 { OP_WORD_BOUNDARY
, "word-boundary", ARG_MODE
},
190 { OP_NO_WORD_BOUNDARY
, "not-word-boundary", ARG_MODE
},
191 { OP_WORD_BEGIN
, "word-begin", ARG_MODE
},
192 { OP_WORD_END
, "word-end", ARG_MODE
},
193 { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
, "extended-grapheme-cluster-boundary", ARG_NON
},
194 { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
, "no-extended-grapheme-cluster-boundary", ARG_NON
},
195 { OP_BEGIN_BUF
, "begin-buf", ARG_NON
},
196 { OP_END_BUF
, "end-buf", ARG_NON
},
197 { OP_BEGIN_LINE
, "begin-line", ARG_NON
},
198 { OP_END_LINE
, "end-line", ARG_NON
},
199 { OP_SEMI_END_BUF
, "semi-end-buf", ARG_NON
},
200 { OP_BEGIN_POSITION
, "begin-position", ARG_NON
},
201 { OP_BACKREF1
, "backref1", ARG_NON
},
202 { OP_BACKREF2
, "backref2", ARG_NON
},
203 { OP_BACKREF_N
, "backref-n", ARG_MEMNUM
},
204 { OP_BACKREF_N_IC
, "backref-n-ic", ARG_SPECIAL
},
205 { OP_BACKREF_MULTI
, "backref_multi", ARG_SPECIAL
},
206 { OP_BACKREF_MULTI_IC
, "backref_multi-ic", ARG_SPECIAL
},
207 { OP_BACKREF_WITH_LEVEL
, "backref_with_level", ARG_SPECIAL
},
208 { OP_BACKREF_CHECK
, "backref_check", ARG_SPECIAL
},
209 { OP_BACKREF_CHECK_WITH_LEVEL
, "backref_check_with_level", ARG_SPECIAL
},
210 { OP_MEMORY_START_PUSH
, "mem-start-push", ARG_MEMNUM
},
211 { OP_MEMORY_START
, "mem-start", ARG_MEMNUM
},
212 { OP_MEMORY_END_PUSH
, "mem-end-push", ARG_MEMNUM
},
213 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec", ARG_MEMNUM
},
214 { OP_MEMORY_END
, "mem-end", ARG_MEMNUM
},
215 { OP_MEMORY_END_REC
, "mem-end-rec", ARG_MEMNUM
},
216 { OP_FAIL
, "fail", ARG_NON
},
217 { OP_JUMP
, "jump", ARG_RELADDR
},
218 { OP_PUSH
, "push", ARG_RELADDR
},
219 { OP_PUSH_SUPER
, "push-super", ARG_RELADDR
},
220 { OP_POP_OUT
, "pop-out", ARG_NON
},
221 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1", ARG_SPECIAL
},
222 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next", ARG_SPECIAL
},
223 { OP_REPEAT
, "repeat", ARG_SPECIAL
},
224 { OP_REPEAT_NG
, "repeat-ng", ARG_SPECIAL
},
225 { OP_REPEAT_INC
, "repeat-inc", ARG_MEMNUM
},
226 { OP_REPEAT_INC_NG
, "repeat-inc-ng", ARG_MEMNUM
},
227 { OP_REPEAT_INC_SG
, "repeat-inc-sg", ARG_MEMNUM
},
228 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg", ARG_MEMNUM
},
229 { OP_EMPTY_CHECK_START
, "empty-check-start", ARG_MEMNUM
},
230 { OP_EMPTY_CHECK_END
, "empty-check-end", ARG_MEMNUM
},
231 { OP_EMPTY_CHECK_END_MEMST
,"empty-check-end-memst", ARG_MEMNUM
},
232 { OP_EMPTY_CHECK_END_MEMST_PUSH
,"empty-check-end-memst-push", ARG_MEMNUM
},
233 { OP_PREC_READ_START
, "push-pos", ARG_NON
},
234 { OP_PREC_READ_END
, "pop-pos", ARG_NON
},
235 { OP_PREC_READ_NOT_START
, "prec-read-not-start", ARG_RELADDR
},
236 { OP_PREC_READ_NOT_END
, "prec-read-not-end", ARG_NON
},
237 { OP_ATOMIC_START
, "atomic-start", ARG_NON
},
238 { OP_ATOMIC_END
, "atomic-end", ARG_NON
},
239 { OP_LOOK_BEHIND
, "look-behind", ARG_SPECIAL
},
240 { OP_LOOK_BEHIND_NOT_START
, "look-behind-not-start", ARG_SPECIAL
},
241 { OP_LOOK_BEHIND_NOT_END
, "look-behind-not-end", ARG_NON
},
242 { OP_CALL
, "call", ARG_ABSADDR
},
243 { OP_RETURN
, "return", ARG_NON
},
244 { OP_PUSH_SAVE_VAL
, "push-save-val", ARG_SPECIAL
},
245 { OP_UPDATE_VAR
, "update-var", ARG_SPECIAL
},
247 { OP_CALLOUT_CONTENTS
, "callout-contents", ARG_SPECIAL
},
248 { OP_CALLOUT_NAME
, "callout-name", ARG_SPECIAL
},
258 for (i
= 0; OpInfo
[i
].opcode
>= 0; i
++) {
259 if (opcode
== OpInfo
[i
].opcode
)
260 return OpInfo
[i
].name
;
266 op2arg_type(int opcode
)
270 for (i
= 0; OpInfo
[i
].opcode
>= 0; i
++) {
271 if (opcode
== OpInfo
[i
].opcode
)
272 return OpInfo
[i
].arg_type
;
278 p_string(FILE* f
, int len
, UChar
* s
)
281 while (len
-- > 0) { fputc(*s
++, f
); }
285 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
287 int x
= len
* mb_len
;
289 fprintf(f
, ":%d:", len
);
290 while (x
-- > 0) { fputc(*s
++, f
); }
294 p_rel_addr(FILE* f
, RelAddrType rel_addr
, UChar
* p
, UChar
* start
)
296 RelAddrType curr
= (RelAddrType
)(p
- start
);
298 fprintf(f
, "{%d/%d}", rel_addr
, curr
+ rel_addr
);
302 bitset_on_num(BitSetRef bs
)
307 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
308 if (BITSET_AT(bs
, i
)) n
++;
314 onig_print_compiled_byte_code(FILE* f
, UChar
* bp
, UChar
** nextp
, UChar
* start
,
323 OnigOptionType option
;
327 fprintf(f
, "%s", op2name(*bp
));
328 arg_type
= op2arg_type(*bp
);
329 if (arg_type
!= ARG_SPECIAL
) {
335 GET_RELADDR_INC(addr
, bp
);
337 p_rel_addr(f
, addr
, bp
, start
);
340 GET_ABSADDR_INC(addr
, bp
);
341 fprintf(f
, ":{/%d}", addr
);
344 GET_LENGTH_INC(len
, bp
);
345 fprintf(f
, ":%d", len
);
348 mem
= *((MemNumType
* )bp
);
350 fprintf(f
, ":%d", mem
);
354 OnigOptionType option
= *((OnigOptionType
* )bp
);
356 fprintf(f
, ":%d", option
);
360 mode
= *((ModeType
* )bp
);
362 fprintf(f
, ":%d", mode
);
371 case OP_ANYCHAR_STAR_PEEK_NEXT
:
372 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
373 p_string(f
, 1, bp
++); break;
375 p_string(f
, 2, bp
); bp
+= 2; break;
377 p_string(f
, 3, bp
); bp
+= 3; break;
379 p_string(f
, 4, bp
); bp
+= 4; break;
381 p_string(f
, 5, bp
); bp
+= 5; break;
383 GET_LENGTH_INC(len
, bp
);
384 p_len_string(f
, len
, 1, bp
);
389 p_string(f
, 2, bp
); bp
+= 2; break;
391 p_string(f
, 4, bp
); bp
+= 4; break;
393 p_string(f
, 6, bp
); bp
+= 6; break;
395 GET_LENGTH_INC(len
, bp
);
396 p_len_string(f
, len
, 2, bp
);
400 GET_LENGTH_INC(len
, bp
);
401 p_len_string(f
, len
, 3, bp
);
408 GET_LENGTH_INC(mb_len
, bp
);
409 GET_LENGTH_INC(len
, bp
);
410 fprintf(f
, ":%d:%d:", mb_len
, len
);
412 while (n
-- > 0) { fputc(*bp
++, f
); }
417 len
= enclen(enc
, bp
);
418 p_string(f
, len
, bp
);
422 GET_LENGTH_INC(len
, bp
);
423 p_len_string(f
, len
, 1, bp
);
428 n
= bitset_on_num((BitSetRef
)bp
);
430 fprintf(f
, ":%d", n
);
434 n
= bitset_on_num((BitSetRef
)bp
);
436 fprintf(f
, ":%d", n
);
440 case OP_CCLASS_MB_NOT
:
441 GET_LENGTH_INC(len
, bp
);
443 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
446 GET_CODE_POINT(code
, q
);
448 fprintf(f
, ":%d:%d", (int )code
, len
);
452 case OP_CCLASS_MIX_NOT
:
453 n
= bitset_on_num((BitSetRef
)bp
);
455 GET_LENGTH_INC(len
, bp
);
457 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
460 GET_CODE_POINT(code
, q
);
462 fprintf(f
, ":%d:%d:%d", n
, (int )code
, len
);
465 #ifdef USE_OP_CCLASS_NODE
470 GET_POINTER_INC(cc
, bp
);
471 n
= bitset_on_num(cc
->bs
);
472 fprintf(f
, ":%p:%d", cc
, n
);
477 case OP_BACKREF_N_IC
:
478 mem
= *((MemNumType
* )bp
);
480 fprintf(f
, ":%d", mem
);
483 case OP_BACKREF_MULTI_IC
:
484 case OP_BACKREF_MULTI
:
485 case OP_BACKREF_CHECK
:
487 GET_LENGTH_INC(len
, bp
);
488 for (i
= 0; i
< len
; i
++) {
489 GET_MEMNUM_INC(mem
, bp
);
490 if (i
> 0) fputs(", ", f
);
491 fprintf(f
, "%d", mem
);
495 case OP_BACKREF_WITH_LEVEL
:
496 GET_OPTION_INC(option
, bp
);
497 fprintf(f
, ":%d", option
);
499 case OP_BACKREF_CHECK_WITH_LEVEL
:
503 GET_LENGTH_INC(level
, bp
);
504 fprintf(f
, ":%d", level
);
507 GET_LENGTH_INC(len
, bp
);
508 for (i
= 0; i
< len
; i
++) {
509 GET_MEMNUM_INC(mem
, bp
);
510 if (i
> 0) fputs(", ", f
);
511 fprintf(f
, "%d", mem
);
519 mem
= *((MemNumType
* )bp
);
521 addr
= *((RelAddrType
* )bp
);
523 fprintf(f
, ":%d:%d", mem
, addr
);
527 case OP_PUSH_OR_JUMP_EXACT1
:
528 case OP_PUSH_IF_PEEK_NEXT
:
529 addr
= *((RelAddrType
* )bp
);
532 p_rel_addr(f
, addr
, bp
, start
);
538 GET_LENGTH_INC(len
, bp
);
539 fprintf(f
, ":%d", len
);
542 case OP_LOOK_BEHIND_NOT_START
:
543 GET_RELADDR_INC(addr
, bp
);
544 GET_LENGTH_INC(len
, bp
);
545 fprintf(f
, ":%d:", len
);
546 p_rel_addr(f
, addr
, bp
, start
);
549 case OP_PUSH_SAVE_VAL
:
552 GET_SAVE_TYPE_INC(type
, bp
);
553 GET_MEMNUM_INC(mem
, bp
);
554 fprintf(f
, ":%d:%d", type
, mem
);
561 GET_UPDATE_VAR_TYPE_INC(type
, bp
);
562 GET_MEMNUM_INC(mem
, bp
);
563 fprintf(f
, ":%d:%d", type
, mem
);
568 case OP_CALLOUT_CONTENTS
:
570 GET_MEMNUM_INC(mem
, bp
); /* number */
571 fprintf(f
, ":%d", mem
);
575 case OP_CALLOUT_NAME
:
579 GET_MEMNUM_INC(id
, bp
); /* id */
580 GET_MEMNUM_INC(mem
, bp
); /* number */
582 fprintf(f
, ":%d:%d", id
, mem
);
588 fprintf(stderr
, "onig_print_compiled_byte_code: undefined code %d\n", *--bp
);
591 if (nextp
) *nextp
= bp
;
593 #endif /* ONIG_DEBUG */
595 #ifdef ONIG_DEBUG_COMPILE
597 onig_print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
600 UChar
* start
= reg
->p
;
601 UChar
* end
= reg
->p
+ reg
->used
;
603 fprintf(f
, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
604 reg
->bt_mem_start
, reg
->bt_mem_end
);
605 fprintf(f
, "code-length: %d\n", reg
->used
);
609 int pos
= bp
- start
;
611 fprintf(f
, "%4d: ", pos
);
612 onig_print_compiled_byte_code(f
, bp
, &bp
, start
, reg
->enc
);
620 #ifdef USE_CAPTURE_HISTORY
621 static void history_tree_free(OnigCaptureTreeNode
* node
);
624 history_tree_clear(OnigCaptureTreeNode
* node
)
628 if (IS_NOT_NULL(node
)) {
629 for (i
= 0; i
< node
->num_childs
; i
++) {
630 if (IS_NOT_NULL(node
->childs
[i
])) {
631 history_tree_free(node
->childs
[i
]);
634 for (i
= 0; i
< node
->allocated
; i
++) {
635 node
->childs
[i
] = (OnigCaptureTreeNode
* )0;
637 node
->num_childs
= 0;
638 node
->beg
= ONIG_REGION_NOTPOS
;
639 node
->end
= ONIG_REGION_NOTPOS
;
645 history_tree_free(OnigCaptureTreeNode
* node
)
647 history_tree_clear(node
);
652 history_root_free(OnigRegion
* r
)
654 if (IS_NOT_NULL(r
->history_root
)) {
655 history_tree_free(r
->history_root
);
656 r
->history_root
= (OnigCaptureTreeNode
* )0;
660 static OnigCaptureTreeNode
*
661 history_node_new(void)
663 OnigCaptureTreeNode
* node
;
665 node
= (OnigCaptureTreeNode
* )xmalloc(sizeof(OnigCaptureTreeNode
));
666 CHECK_NULL_RETURN(node
);
667 node
->childs
= (OnigCaptureTreeNode
** )0;
669 node
->num_childs
= 0;
671 node
->beg
= ONIG_REGION_NOTPOS
;
672 node
->end
= ONIG_REGION_NOTPOS
;
678 history_tree_add_child(OnigCaptureTreeNode
* parent
, OnigCaptureTreeNode
* child
)
680 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
682 if (parent
->num_childs
>= parent
->allocated
) {
685 if (IS_NULL(parent
->childs
)) {
686 n
= HISTORY_TREE_INIT_ALLOC_SIZE
;
688 (OnigCaptureTreeNode
** )xmalloc(sizeof(OnigCaptureTreeNode
*) * n
);
691 n
= parent
->allocated
* 2;
693 (OnigCaptureTreeNode
** )xrealloc(parent
->childs
,
694 sizeof(OnigCaptureTreeNode
*) * n
,
695 sizeof(OnigCaptureTreeNode
*) * parent
->allocated
);
697 CHECK_NULL_RETURN_MEMERR(parent
->childs
);
698 for (i
= parent
->allocated
; i
< n
; i
++) {
699 parent
->childs
[i
] = (OnigCaptureTreeNode
* )0;
701 parent
->allocated
= n
;
704 parent
->childs
[parent
->num_childs
] = child
;
705 parent
->num_childs
++;
709 static OnigCaptureTreeNode
*
710 history_tree_clone(OnigCaptureTreeNode
* node
)
713 OnigCaptureTreeNode
*clone
, *child
;
715 clone
= history_node_new();
716 CHECK_NULL_RETURN(clone
);
718 clone
->beg
= node
->beg
;
719 clone
->end
= node
->end
;
720 for (i
= 0; i
< node
->num_childs
; i
++) {
721 child
= history_tree_clone(node
->childs
[i
]);
722 if (IS_NULL(child
)) {
723 history_tree_free(clone
);
724 return (OnigCaptureTreeNode
* )0;
726 history_tree_add_child(clone
, child
);
732 extern OnigCaptureTreeNode
*
733 onig_get_capture_tree(OnigRegion
* region
)
735 return region
->history_root
;
737 #endif /* USE_CAPTURE_HISTORY */
740 onig_region_clear(OnigRegion
* region
)
744 for (i
= 0; i
< region
->num_regs
; i
++) {
745 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
747 #ifdef USE_CAPTURE_HISTORY
748 history_root_free(region
);
753 onig_region_resize(OnigRegion
* region
, int n
)
755 region
->num_regs
= n
;
757 if (n
< ONIG_NREGION
)
760 if (region
->allocated
== 0) {
761 region
->beg
= (int* )xmalloc(n
* sizeof(int));
762 region
->end
= (int* )xmalloc(n
* sizeof(int));
764 if (region
->beg
== 0 || region
->end
== 0)
765 return ONIGERR_MEMORY
;
767 region
->allocated
= n
;
769 else if (region
->allocated
< n
) {
770 region
->beg
= (int* )xrealloc(region
->beg
, n
* sizeof(int), region
->allocated
* sizeof(int));
771 region
->end
= (int* )xrealloc(region
->end
, n
* sizeof(int), region
->allocated
* sizeof(int));
773 if (region
->beg
== 0 || region
->end
== 0)
774 return ONIGERR_MEMORY
;
776 region
->allocated
= n
;
783 onig_region_resize_clear(OnigRegion
* region
, int n
)
787 r
= onig_region_resize(region
, n
);
788 if (r
!= 0) return r
;
789 onig_region_clear(region
);
794 onig_region_set(OnigRegion
* region
, int at
, int beg
, int end
)
796 if (at
< 0) return ONIGERR_INVALID_ARGUMENT
;
798 if (at
>= region
->allocated
) {
799 int r
= onig_region_resize(region
, at
+ 1);
803 region
->beg
[at
] = beg
;
804 region
->end
[at
] = end
;
809 onig_region_init(OnigRegion
* region
)
811 region
->num_regs
= 0;
812 region
->allocated
= 0;
813 region
->beg
= (int* )0;
814 region
->end
= (int* )0;
815 region
->history_root
= (OnigCaptureTreeNode
* )0;
819 onig_region_new(void)
823 r
= (OnigRegion
* )xmalloc(sizeof(OnigRegion
));
824 CHECK_NULL_RETURN(r
);
830 onig_region_free(OnigRegion
* r
, int free_self
)
833 if (r
->allocated
> 0) {
834 if (r
->beg
) xfree(r
->beg
);
835 if (r
->end
) xfree(r
->end
);
838 #ifdef USE_CAPTURE_HISTORY
839 history_root_free(r
);
841 if (free_self
) xfree(r
);
846 onig_region_copy(OnigRegion
* to
, OnigRegion
* from
)
848 #define RREGC_SIZE (sizeof(int) * from->num_regs)
851 if (to
== from
) return;
853 if (to
->allocated
== 0) {
854 if (from
->num_regs
> 0) {
855 to
->beg
= (int* )xmalloc(RREGC_SIZE
);
856 if (IS_NULL(to
->beg
)) return;
857 to
->end
= (int* )xmalloc(RREGC_SIZE
);
858 if (IS_NULL(to
->end
)) return;
859 to
->allocated
= from
->num_regs
;
862 else if (to
->allocated
< from
->num_regs
) {
863 to
->beg
= (int* )xrealloc(to
->beg
, RREGC_SIZE
, sizeof(int) * to
->allocated
);
864 if (IS_NULL(to
->beg
)) return;
865 to
->end
= (int* )xrealloc(to
->end
, RREGC_SIZE
, sizeof(int) * to
->allocated
);
866 if (IS_NULL(to
->end
)) return;
867 to
->allocated
= from
->num_regs
;
870 for (i
= 0; i
< from
->num_regs
; i
++) {
871 to
->beg
[i
] = from
->beg
[i
];
872 to
->end
[i
] = from
->end
[i
];
874 to
->num_regs
= from
->num_regs
;
876 #ifdef USE_CAPTURE_HISTORY
877 history_root_free(to
);
879 if (IS_NOT_NULL(from
->history_root
)) {
880 to
->history_root
= history_tree_clone(from
->history_root
);
886 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
888 args.name_id = (aname_id);\
892 args.string_end = end;\
893 args.start = sstart;\
894 args.right_range = right_range;\
896 args.retry_in_match_counter = retry_in_match_counter;\
898 args.stk_base = stk_base;\
900 args.mem_start_stk = mem_start_stk;\
901 args.mem_end_stk = mem_end_stk;\
902 result = (func)(&args, user);\
905 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
907 OnigCalloutArgs args;\
908 CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
910 case ONIG_CALLOUT_FAIL:\
911 case ONIG_CALLOUT_SUCCESS:\
915 result = ONIGERR_INVALID_ARGUMENT;\
926 #define INVALID_STACK_INDEX -1
928 #define STK_ALT_FLAG 0x0001
931 /* used by normal-POP */
932 #define STK_SUPER_ALT STK_ALT_FLAG
933 #define STK_ALT (0x0002 | STK_ALT_FLAG)
934 #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
935 #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
937 /* handled by normal-POP */
938 #define STK_MEM_START 0x0010
939 #define STK_MEM_END 0x8030
940 #define STK_REPEAT_INC 0x0050
942 #define STK_CALLOUT 0x0070
945 /* avoided by normal-POP */
946 #define STK_VOID 0x0000 /* for fill a blank */
947 #define STK_EMPTY_CHECK_START 0x3000
948 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
949 #define STK_MEM_END_MARK 0x8100
950 #define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
951 #define STK_REPEAT 0x0300
952 #define STK_CALL_FRAME 0x0400
953 #define STK_RETURN 0x0500
954 #define STK_SAVE_VAL 0x0600
956 /* stack type check mask */
957 #define STK_MASK_POP_USED STK_ALT_FLAG
958 #define STK_MASK_POP_HANDLED 0x0010
959 #define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
960 #define STK_MASK_TO_VOID_TARGET 0x100e
961 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
963 typedef intptr_t StackIndex
;
965 typedef struct _StackType
{
970 UChar
*pcode
; /* byte code position */
971 UChar
*pstr
; /* string position */
972 UChar
*pstr_prev
; /* previous char position of pstr */
975 int count
; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
976 UChar
*pcode
; /* byte code position (head of repeated target) */
979 StackIndex si
; /* index of stack */
982 UChar
*pstr
; /* start/end position */
983 /* Following information is set, if this stack type is MEM-START */
984 StackIndex prev_start
; /* prev. info (for backtrack "(...)*" ) */
985 StackIndex prev_end
; /* prev. info (for backtrack "(...)*" ) */
988 UChar
*pstr
; /* start position */
992 UChar
*ret_addr
; /* byte code position */
993 UChar
*pstr
; /* string position */
1004 OnigCalloutFunc func
;
1012 struct OnigCalloutArgsStruct
{
1014 int name_id
; /* name id or ONIG_NON_NAME_ID */
1017 const OnigUChar
* string
;
1018 const OnigUChar
* string_end
;
1019 const OnigUChar
* start
;
1020 const OnigUChar
* right_range
;
1021 const OnigUChar
* current
; /* current matching position */
1022 unsigned long retry_in_match_counter
;
1024 /* invisible to users */
1026 StackType
* stk_base
;
1028 StackIndex
* mem_start_stk
;
1029 StackIndex
* mem_end_stk
;
1035 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1036 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1037 (msa).stack_p = (void* )0;\
1038 (msa).options = (arg_option);\
1039 (msa).region = (arg_region);\
1040 (msa).start = (arg_start);\
1041 (msa).match_stack_limit = (mp)->match_stack_limit;\
1042 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1044 (msa).best_len = ONIG_MISMATCH;\
1045 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1048 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1049 (msa).stack_p = (void* )0;\
1050 (msa).options = (arg_option);\
1051 (msa).region = (arg_region);\
1052 (msa).start = (arg_start);\
1053 (msa).match_stack_limit = (mp)->match_stack_limit;\
1054 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1056 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1060 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
1063 #define ALLOCA_PTR_NUM_LIMIT 50
1065 #define STACK_INIT(stack_num) do {\
1066 if (msa->stack_p) {\
1068 alloc_base = msa->stack_p;\
1069 stk_base = (StackType* )(alloc_base\
1070 + (sizeof(StackIndex) * msa->ptr_num));\
1072 stk_end = stk_base + msa->stack_n;\
1074 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1076 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1077 + sizeof(StackType) * (stack_num));\
1078 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1079 stk_base = (StackType* )(alloc_base\
1080 + (sizeof(StackIndex) * msa->ptr_num));\
1082 stk_end = stk_base + (stack_num);\
1086 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1087 + sizeof(StackType) * (stack_num));\
1088 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1089 stk_base = (StackType* )(alloc_base\
1090 + (sizeof(StackIndex) * msa->ptr_num));\
1092 stk_end = stk_base + (stack_num);\
1097 #define STACK_SAVE do{\
1098 msa->stack_n = (int )(stk_end - stk_base);\
1099 if (is_alloca != 0) {\
1100 size_t size = sizeof(StackIndex) * msa->ptr_num \
1101 + sizeof(StackType) * msa->stack_n;\
1102 msa->stack_p = xmalloc(size);\
1103 CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
1104 xmemcpy(msa->stack_p, alloc_base, size);\
1107 msa->stack_p = alloc_base;\
1111 #define UPDATE_FOR_STACK_REALLOC do{\
1112 repeat_stk = (StackIndex* )alloc_base;\
1113 mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1114 mem_end_stk = mem_start_stk + num_mem + 1;\
1117 static unsigned int MatchStackLimit
= DEFAULT_MATCH_STACK_LIMIT_SIZE
;
1120 onig_get_match_stack_limit_size(void)
1122 return MatchStackLimit
;
1126 onig_set_match_stack_limit_size(unsigned int size
)
1128 MatchStackLimit
= size
;
1132 #ifdef USE_RETRY_LIMIT_IN_MATCH
1134 static unsigned long RetryLimitInMatch
= DEFAULT_RETRY_LIMIT_IN_MATCH
;
1136 #define CHECK_RETRY_LIMIT_IN_MATCH do {\
1137 if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
1142 #define CHECK_RETRY_LIMIT_IN_MATCH
1144 #endif /* USE_RETRY_LIMIT_IN_MATCH */
1146 extern unsigned long
1147 onig_get_retry_limit_in_match(void)
1149 #ifdef USE_RETRY_LIMIT_IN_MATCH
1150 return RetryLimitInMatch
;
1152 /* return ONIG_NO_SUPPORT_CONFIG; */
1158 onig_set_retry_limit_in_match(unsigned long size
)
1160 #ifdef USE_RETRY_LIMIT_IN_MATCH
1161 RetryLimitInMatch
= size
;
1164 return ONIG_NO_SUPPORT_CONFIG
;
1168 static OnigCalloutFunc DefaultProgressCallout
;
1169 static OnigCalloutFunc DefaultRetractionCallout
;
1171 extern OnigMatchParam
*
1172 onig_new_match_param(void)
1176 p
= (OnigMatchParam
* )xmalloc(sizeof(*p
));
1177 if (IS_NOT_NULL(p
)) {
1178 onig_initialize_match_param(p
);
1185 onig_free_match_param_content(OnigMatchParam
* p
)
1188 if (IS_NOT_NULL(p
->callout_data
)) {
1189 xfree(p
->callout_data
);
1190 p
->callout_data
= 0;
1196 onig_free_match_param(OnigMatchParam
* p
)
1198 if (IS_NOT_NULL(p
)) {
1199 onig_free_match_param_content(p
);
1205 onig_initialize_match_param(OnigMatchParam
* mp
)
1207 mp
->match_stack_limit
= MatchStackLimit
;
1208 #ifdef USE_RETRY_LIMIT_IN_MATCH
1209 mp
->retry_limit_in_match
= RetryLimitInMatch
;
1211 mp
->progress_callout_of_contents
= DefaultProgressCallout
;
1212 mp
->retraction_callout_of_contents
= DefaultRetractionCallout
;
1215 mp
->match_at_call_counter
= 0;
1216 mp
->callout_user_data
= 0;
1217 mp
->callout_data
= 0;
1218 mp
->callout_data_alloc_num
= 0;
1227 adjust_match_param(regex_t
* reg
, OnigMatchParam
* mp
)
1229 RegexExt
* ext
= REG_EXTP(reg
);
1231 mp
->match_at_call_counter
= 0;
1233 if (IS_NULL(ext
) || ext
->callout_num
== 0) return ONIG_NORMAL
;
1235 if (ext
->callout_num
> mp
->callout_data_alloc_num
) {
1237 size_t n
= ext
->callout_num
* sizeof(*d
);
1238 if (IS_NOT_NULL(mp
->callout_data
))
1239 d
= (CalloutData
* )xrealloc(mp
->callout_data
, n
, mp
->callout_data_alloc_num
* sizeof(*d
));
1241 d
= (CalloutData
* )xmalloc(n
);
1242 CHECK_NULL_RETURN_MEMERR(d
);
1244 mp
->callout_data
= d
;
1245 mp
->callout_data_alloc_num
= ext
->callout_num
;
1248 xmemset(mp
->callout_data
, 0, mp
->callout_data_alloc_num
* sizeof(CalloutData
));
1252 #define ADJUST_MATCH_PARAM(reg, mp) \
1253 r = adjust_match_param(reg, mp);\
1254 if (r != ONIG_NORMAL) return r;
1256 #define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
1259 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs
* args
)
1268 d
= CALLOUT_DATA_AT_NUM(mp
, num
);
1269 if (d
->last_match_at_call_counter
!= mp
->match_at_call_counter
) {
1270 xmemset(d
, 0, sizeof(*d
));
1271 d
->last_match_at_call_counter
= mp
->match_at_call_counter
;
1272 return d
->last_match_at_call_counter
;
1279 onig_get_callout_data_dont_clear_old(regex_t
* reg
, OnigMatchParam
* mp
,
1280 int callout_num
, int slot
,
1281 OnigType
* type
, OnigValue
* val
)
1286 if (callout_num
<= 0) return ONIGERR_INVALID_ARGUMENT
;
1288 d
= CALLOUT_DATA_AT_NUM(mp
, callout_num
);
1289 t
= d
->slot
[slot
].type
;
1290 if (IS_NOT_NULL(type
)) *type
= t
;
1291 if (IS_NOT_NULL(val
)) *val
= d
->slot
[slot
].val
;
1292 return (t
== ONIG_TYPE_VOID
? 1 : ONIG_NORMAL
);
1296 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs
* args
,
1297 int slot
, OnigType
* type
,
1300 return onig_get_callout_data_dont_clear_old(args
->regex
, args
->msa
->mp
,
1301 args
->num
, slot
, type
, val
);
1305 onig_get_callout_data(regex_t
* reg
, OnigMatchParam
* mp
,
1306 int callout_num
, int slot
,
1307 OnigType
* type
, OnigValue
* val
)
1312 if (callout_num
<= 0) return ONIGERR_INVALID_ARGUMENT
;
1314 d
= CALLOUT_DATA_AT_NUM(mp
, callout_num
);
1315 if (d
->last_match_at_call_counter
!= mp
->match_at_call_counter
) {
1316 xmemset(d
, 0, sizeof(*d
));
1317 d
->last_match_at_call_counter
= mp
->match_at_call_counter
;
1320 t
= d
->slot
[slot
].type
;
1321 if (IS_NOT_NULL(type
)) *type
= t
;
1322 if (IS_NOT_NULL(val
)) *val
= d
->slot
[slot
].val
;
1323 return (t
== ONIG_TYPE_VOID
? 1 : ONIG_NORMAL
);
1327 onig_get_callout_data_by_tag(regex_t
* reg
, OnigMatchParam
* mp
,
1328 const UChar
* tag
, const UChar
* tag_end
, int slot
,
1329 OnigType
* type
, OnigValue
* val
)
1333 num
= onig_get_callout_num_by_tag(reg
, tag
, tag_end
);
1334 if (num
< 0) return num
;
1335 if (num
== 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME
;
1337 return onig_get_callout_data(reg
, mp
, num
, slot
, type
, val
);
1341 onig_get_callout_data_by_callout_args(OnigCalloutArgs
* args
,
1342 int callout_num
, int slot
,
1343 OnigType
* type
, OnigValue
* val
)
1345 return onig_get_callout_data(args
->regex
, args
->msa
->mp
, callout_num
, slot
,
1350 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs
* args
,
1351 int slot
, OnigType
* type
, OnigValue
* val
)
1353 return onig_get_callout_data(args
->regex
, args
->msa
->mp
, args
->num
, slot
,
1358 onig_set_callout_data(regex_t
* reg
, OnigMatchParam
* mp
,
1359 int callout_num
, int slot
,
1360 OnigType type
, OnigValue
* val
)
1364 if (callout_num
<= 0) return ONIGERR_INVALID_ARGUMENT
;
1366 d
= CALLOUT_DATA_AT_NUM(mp
, callout_num
);
1367 d
->slot
[slot
].type
= type
;
1368 d
->slot
[slot
].val
= *val
;
1369 d
->last_match_at_call_counter
= mp
->match_at_call_counter
;
1375 onig_set_callout_data_by_tag(regex_t
* reg
, OnigMatchParam
* mp
,
1376 const UChar
* tag
, const UChar
* tag_end
, int slot
,
1377 OnigType type
, OnigValue
* val
)
1381 num
= onig_get_callout_num_by_tag(reg
, tag
, tag_end
);
1382 if (num
< 0) return num
;
1383 if (num
== 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME
;
1385 return onig_set_callout_data(reg
, mp
, num
, slot
, type
, val
);
1389 onig_set_callout_data_by_callout_args(OnigCalloutArgs
* args
,
1390 int callout_num
, int slot
,
1391 OnigType type
, OnigValue
* val
)
1393 return onig_set_callout_data(args
->regex
, args
->msa
->mp
, callout_num
, slot
,
1398 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs
* args
,
1399 int slot
, OnigType type
, OnigValue
* val
)
1401 return onig_set_callout_data(args
->regex
, args
->msa
->mp
, args
->num
, slot
,
1406 #define ADJUST_MATCH_PARAM(reg, mp)
1407 #endif /* USE_CALLOUT */
1411 stack_double(int is_alloca
, char** arg_alloc_base
,
1412 StackType
** arg_stk_base
, StackType
** arg_stk_end
, StackType
** arg_stk
,
1420 char* new_alloc_base
;
1421 StackType
*stk_base
, *stk_end
, *stk
;
1423 alloc_base
= *arg_alloc_base
;
1424 stk_base
= *arg_stk_base
;
1425 stk_end
= *arg_stk_end
;
1428 n
= (unsigned int )(stk_end
- stk_base
);
1429 size
= sizeof(StackIndex
) * msa
->ptr_num
+ sizeof(StackType
) * n
;
1431 new_size
= sizeof(StackIndex
) * msa
->ptr_num
+ sizeof(StackType
) * n
;
1432 if (is_alloca
!= 0) {
1433 new_alloc_base
= (char* )xmalloc(new_size
);
1434 if (IS_NULL(new_alloc_base
)) {
1436 return ONIGERR_MEMORY
;
1438 xmemcpy(new_alloc_base
, alloc_base
, size
);
1441 if (msa
->match_stack_limit
!= 0 && n
> msa
->match_stack_limit
) {
1442 if ((unsigned int )(stk_end
- stk_base
) == msa
->match_stack_limit
)
1443 return ONIGERR_MATCH_STACK_LIMIT_OVER
;
1445 n
= msa
->match_stack_limit
;
1447 new_alloc_base
= (char* )xrealloc(alloc_base
, new_size
, size
);
1448 if (IS_NULL(new_alloc_base
)) {
1450 return ONIGERR_MEMORY
;
1454 alloc_base
= new_alloc_base
;
1455 used
= (int )(stk
- stk_base
);
1456 *arg_alloc_base
= alloc_base
;
1457 *arg_stk_base
= (StackType
* )(alloc_base
1458 + (sizeof(StackIndex
) * msa
->ptr_num
));
1459 *arg_stk
= *arg_stk_base
+ used
;
1460 *arg_stk_end
= *arg_stk_base
+ n
;
1464 #define STACK_ENSURE(n) do {\
1465 if ((int )(stk_end - stk) < (n)) {\
1466 int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1467 if (r != 0) { STACK_SAVE; return r; } \
1469 UPDATE_FOR_STACK_REALLOC;\
1473 #define STACK_AT(index) (stk_base + (index))
1474 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1476 #define STACK_PUSH_TYPE(stack_type) do {\
1478 stk->type = (stack_type);\
1482 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1484 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
1486 stk->type = (stack_type);\
1487 stk->u.state.pcode = (pat);\
1488 stk->u.state.pstr = (s);\
1489 stk->u.state.pstr_prev = (sprev);\
1493 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1494 stk->type = (stack_type);\
1495 stk->u.state.pcode = (pat);\
1499 #ifdef ONIG_DEBUG_MATCH
1500 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1501 stk->type = (stack_type);\
1502 stk->u.state.pcode = (pat);\
1503 stk->u.state.pstr = s;\
1504 stk->u.state.pstr_prev = sprev;\
1508 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1509 stk->type = (stack_type);\
1510 stk->u.state.pcode = (pat);\
1515 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
1516 #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
1517 #define STACK_PUSH_POS(s,sprev) \
1518 STACK_PUSH(STK_TO_VOID_START,NULL_UCHARP,s,sprev)
1519 #define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
1520 STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
1521 #define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
1522 #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
1523 STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
1525 #define STACK_PUSH_REPEAT(sid, pat) do {\
1527 stk->type = STK_REPEAT;\
1529 stk->u.repeat.pcode = (pat);\
1530 stk->u.repeat.count = 0;\
1534 #define STACK_PUSH_REPEAT_INC(sindex) do {\
1536 stk->type = STK_REPEAT_INC;\
1537 stk->u.repeat_inc.si = (sindex);\
1541 #define STACK_PUSH_MEM_START(mnum, s) do {\
1543 stk->type = STK_MEM_START;\
1545 stk->u.mem.pstr = (s);\
1546 stk->u.mem.prev_start = mem_start_stk[mnum];\
1547 stk->u.mem.prev_end = mem_end_stk[mnum];\
1548 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1549 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1553 #define STACK_PUSH_MEM_END(mnum, s) do {\
1555 stk->type = STK_MEM_END;\
1557 stk->u.mem.pstr = (s);\
1558 stk->u.mem.prev_start = mem_start_stk[mnum];\
1559 stk->u.mem.prev_end = mem_end_stk[mnum];\
1560 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1564 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1566 stk->type = STK_MEM_END_MARK;\
1571 #define STACK_GET_MEM_START(mnum, k) do {\
1574 while (k > stk_base) {\
1576 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1577 && k->zid == (mnum)) {\
1580 else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1581 if (level == 0) break;\
1587 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1590 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1591 if (level == 0) (start) = k->u.mem.pstr;\
1594 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1597 (end) = k->u.mem.pstr;\
1605 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1607 stk->type = STK_EMPTY_CHECK_START;\
1609 stk->u.empty_check.pstr = (s);\
1613 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1615 stk->type = STK_EMPTY_CHECK_END;\
1620 #define STACK_PUSH_CALL_FRAME(pat) do {\
1622 stk->type = STK_CALL_FRAME;\
1623 stk->u.call_frame.ret_addr = (pat);\
1627 #define STACK_PUSH_RETURN do {\
1629 stk->type = STK_RETURN;\
1633 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1635 stk->type = STK_SAVE_VAL;\
1637 stk->u.val.type = (stype);\
1638 stk->u.val.v = (UChar* )(sval);\
1642 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1644 stk->type = STK_SAVE_VAL;\
1646 stk->u.val.type = (stype);\
1647 stk->u.val.v = (UChar* )(sval);\
1648 stk->u.val.v2 = sprev;\
1652 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1653 StackType *k = stk;\
1654 while (k > stk_base) {\
1656 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1657 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1658 (sval) = k->u.val.v;\
1664 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
1666 StackType *k = stk;\
1667 while (k > stk_base) {\
1669 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1670 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1671 && k->zid == (sid)) {\
1673 (sval) = k->u.val.v;\
1677 else if (k->type == STK_CALL_FRAME)\
1679 else if (k->type == STK_RETURN)\
1684 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1686 StackType *k = stk;\
1687 while (k > stk_base) {\
1689 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1690 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1691 && k->zid == (sid)) {\
1693 (sval) = k->u.val.v;\
1694 sprev = k->u.val.v2;\
1698 else if (k->type == STK_CALL_FRAME)\
1700 else if (k->type == STK_RETURN)\
1705 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
1707 StackType *k = (stk_from);\
1708 while (k > stk_base) {\
1709 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
1710 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1711 && k->u.val.id == (sid)) {\
1713 (sval) = k->u.val.v;\
1717 else if (k->type == STK_CALL_FRAME)\
1719 else if (k->type == STK_RETURN)\
1725 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1727 stk->type = STK_CALLOUT;\
1728 stk->zid = ONIG_NON_NAME_ID;\
1729 stk->u.callout.num = (anum);\
1730 stk->u.callout.func = (func);\
1734 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1736 stk->type = STK_CALLOUT;\
1738 stk->u.callout.num = (anum);\
1739 stk->u.callout.func = (func);\
1744 #define STACK_BASE_CHECK(p, at) \
1745 if ((p) < stk_base) {\
1746 fprintf(stderr, "at %s\n", at);\
1750 #define STACK_BASE_CHECK(p, at)
1753 #define STACK_POP_ONE do {\
1755 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1760 #define POP_CALLOUT_CASE \
1761 else if (stk->type == STK_CALLOUT) {\
1762 RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
1765 #define POP_CALLOUT_CASE
1768 #define STACK_POP do {\
1769 switch (pop_level) {\
1770 case STACK_POP_LEVEL_FREE:\
1773 STACK_BASE_CHECK(stk, "STACK_POP"); \
1774 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1777 case STACK_POP_LEVEL_MEM_START:\
1780 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1781 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1782 else if (stk->type == STK_MEM_START) {\
1783 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1784 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1791 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1792 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1793 else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
1794 if (stk->type == STK_MEM_START) {\
1795 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1796 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1798 else if (stk->type == STK_REPEAT_INC) {\
1799 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1801 else if (stk->type == STK_MEM_END) {\
1802 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1803 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1812 #define POP_TIL_BODY(aname, til_type) do {\
1815 STACK_BASE_CHECK(stk, (aname));\
1816 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
1817 if (stk->type == (til_type)) break;\
1819 if (stk->type == STK_MEM_START) {\
1820 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1821 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1823 else if (stk->type == STK_REPEAT_INC) {\
1824 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1826 else if (stk->type == STK_MEM_END) {\
1827 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1828 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1830 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
1836 #define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
1837 POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
1840 #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
1841 POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
1845 #define STACK_EXEC_TO_VOID(k) do {\
1849 STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
1850 if (IS_TO_VOID_TARGET(k)) {\
1851 if (k->type == STK_TO_VOID_START) {\
1852 k->type = STK_VOID;\
1855 k->type = STK_VOID;\
1860 #define STACK_EMPTY_CHECK(isnull,sid,s) do {\
1861 StackType* k = stk;\
1864 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
1865 if (k->type == STK_EMPTY_CHECK_START) {\
1866 if (k->zid == (sid)) {\
1867 (isnull) = (k->u.empty_check.pstr == (s));\
1874 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
1875 if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
1879 if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
1880 (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
1882 (addr) = (UChar* )k->u.mem.prev_end;\
1886 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
1887 #define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
1888 StackType* k = stk;\
1891 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
1892 if (k->type == STK_EMPTY_CHECK_START) {\
1893 if (k->zid == (sid)) {\
1894 if (k->u.empty_check.pstr != (s)) {\
1902 if (k->type == STK_MEM_START) {\
1903 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
1905 (isnull) = 0; break;\
1907 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
1908 (isnull) = 0; break;\
1910 else if (endp != s) {\
1911 (isnull) = -1; /* empty, but position changed */ \
1923 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
1925 StackType* k = stk;\
1928 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
1929 if (k->type == STK_EMPTY_CHECK_START) {\
1930 if (k->zid == (sid)) {\
1932 if (k->u.empty_check.pstr != (s)) {\
1940 if (k->type == STK_MEM_START) {\
1942 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
1944 (isnull) = 0; break;\
1946 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
1947 (isnull) = 0; break;\
1949 else if (endp != s) {\
1950 (isnull) = -1; /* empty, but position changed */\
1954 else if (k->type == STK_EMPTY_CHECK_START) {\
1955 if (k->zid == (sid)) level++;\
1957 else if (k->type == STK_EMPTY_CHECK_END) {\
1958 if (k->zid == (sid)) level--;\
1970 else if (k->type == STK_EMPTY_CHECK_END) {\
1971 if (k->zid == (sid)) level++;\
1976 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
1978 StackType* k = stk;\
1981 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
1982 if (k->type == STK_EMPTY_CHECK_START) {\
1983 if (k->u.empty_check.num == (id)) {\
1985 (isnull) = (k->u.empty_check.pstr == (s));\
1991 else if (k->type == STK_EMPTY_CHECK_END) {\
1996 #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
1998 #define STACK_GET_REPEAT(sid, k) do {\
2003 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
2004 if (k->type == STK_REPEAT) {\
2006 if (k->zid == (sid)) {\
2011 else if (k->type == STK_CALL_FRAME) level--;\
2012 else if (k->type == STK_RETURN) level++;\
2016 #define STACK_RETURN(addr) do {\
2018 StackType* k = stk;\
2021 STACK_BASE_CHECK(k, "STACK_RETURN"); \
2022 if (k->type == STK_CALL_FRAME) {\
2024 (addr) = k->u.call_frame.ret_addr;\
2029 else if (k->type == STK_RETURN)\
2035 #define STRING_CMP(s1,s2,len) do {\
2036 while (len-- > 0) {\
2037 if (*s1++ != *s2++) goto fail;\
2041 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2042 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2046 static int string_cmp_ic(OnigEncoding enc
, int case_fold_flag
,
2047 UChar
* s1
, UChar
** ps2
, int mblen
)
2049 UChar buf1
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2050 UChar buf2
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2051 UChar
*p1
, *p2
, *end1
, *s2
, *end2
;
2058 len1
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s1
, end1
, buf1
);
2059 len2
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s2
, end2
, buf2
);
2060 if (len1
!= len2
) return 0;
2063 while (len1
-- > 0) {
2064 if (*p1
!= *p2
) return 0;
2074 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2076 while (len-- > 0) {\
2077 if (*s1++ != *s2++) {\
2078 is_fail = 1; break;\
2083 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2084 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2091 #define IS_EMPTY_STR (str == end)
2092 #define ON_STR_BEGIN(s) ((s) == str)
2093 #define ON_STR_END(s) ((s) == end)
2094 #define DATA_ENSURE_CHECK1 (s < right_range)
2095 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
2096 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
2098 #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
2100 #ifdef USE_CAPTURE_HISTORY
2102 make_capture_history_tree(OnigCaptureTreeNode
* node
, StackType
** kp
,
2103 StackType
* stk_top
, UChar
* str
, regex_t
* reg
)
2106 OnigCaptureTreeNode
* child
;
2109 while (k
< stk_top
) {
2110 if (k
->type
== STK_MEM_START
) {
2112 if (n
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
&&
2113 MEM_STATUS_AT(reg
->capture_history
, n
) != 0) {
2114 child
= history_node_new();
2115 CHECK_NULL_RETURN_MEMERR(child
);
2117 child
->beg
= (int )(k
->u
.mem
.pstr
- str
);
2118 r
= history_tree_add_child(node
, child
);
2119 if (r
!= 0) return r
;
2121 r
= make_capture_history_tree(child
, kp
, stk_top
, str
, reg
);
2122 if (r
!= 0) return r
;
2125 child
->end
= (int )(k
->u
.mem
.pstr
- str
);
2128 else if (k
->type
== STK_MEM_END
) {
2129 if (k
->zid
== node
->group
) {
2130 node
->end
= (int )(k
->u
.mem
.pstr
- str
);
2138 return 1; /* 1: root node ending. */
2142 #ifdef USE_BACKREF_WITH_LEVEL
2143 static int mem_is_in_memp(int mem
, int num
, UChar
* memp
)
2148 for (i
= 0; i
< num
; i
++) {
2149 GET_MEMNUM_INC(m
, memp
);
2150 if (mem
== (int )m
) return 1;
2156 backref_match_at_nested_level(regex_t
* reg
,
2157 StackType
* top
, StackType
* stk_base
,
2158 int ignore_case
, int case_fold_flag
,
2159 int nest
, int mem_num
, UChar
* memp
,
2160 UChar
** s
, const UChar
* send
)
2162 UChar
*ss
, *p
, *pstart
, *pend
= NULL_UCHARP
;
2169 while (k
>= stk_base
) {
2170 if (k
->type
== STK_CALL_FRAME
) {
2173 else if (k
->type
== STK_RETURN
) {
2176 else if (level
== nest
) {
2177 if (k
->type
== STK_MEM_START
) {
2178 if (mem_is_in_memp(k
->zid
, mem_num
, memp
)) {
2179 pstart
= k
->u
.mem
.pstr
;
2180 if (IS_NOT_NULL(pend
)) {
2181 if (pend
- pstart
> send
- *s
) return 0; /* or goto next_mem; */
2185 if (ignore_case
!= 0) {
2186 if (string_cmp_ic(reg
->enc
, case_fold_flag
,
2187 pstart
, &ss
, (int )(pend
- pstart
)) == 0)
2188 return 0; /* or goto next_mem; */
2192 if (*p
++ != *ss
++) return 0; /* or goto next_mem; */
2201 else if (k
->type
== STK_MEM_END
) {
2202 if (mem_is_in_memp(k
->zid
, mem_num
, memp
)) {
2203 pend
= k
->u
.mem
.pstr
;
2214 backref_check_at_nested_level(regex_t
* reg
,
2215 StackType
* top
, StackType
* stk_base
,
2216 int nest
, int mem_num
, UChar
* memp
)
2224 while (k
>= stk_base
) {
2225 if (k
->type
== STK_CALL_FRAME
) {
2228 else if (k
->type
== STK_RETURN
) {
2231 else if (level
== nest
) {
2232 if (k
->type
== STK_MEM_END
) {
2233 if (mem_is_in_memp(k
->zid
, mem_num
, memp
)) {
2243 #endif /* USE_BACKREF_WITH_LEVEL */
2246 #ifdef ONIG_DEBUG_STATISTICS
2248 #define USE_TIMEOFDAY
2250 #ifdef USE_TIMEOFDAY
2251 #ifdef HAVE_SYS_TIME_H
2252 #include <sys/time.h>
2254 #ifdef HAVE_UNISTD_H
2257 static struct timeval ts
, te
;
2258 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2259 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2260 (((te).tv_sec - (ts).tv_sec)*1000000))
2262 #ifdef HAVE_SYS_TIMES_H
2263 #include <sys/times.h>
2265 static struct tms ts
, te
;
2266 #define GETTIME(t) times(&(t))
2267 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2270 static int OpCounter
[256];
2271 static int OpPrevCounter
[256];
2272 static unsigned long OpTime
[256];
2273 static int OpCurr
= OP_FINISH
;
2274 static int OpPrevTarget
= OP_FAIL
;
2275 static int MaxStackDepth
= 0;
2277 #define SOP_IN(opcode) do {\
2278 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2280 OpCounter[opcode]++;\
2284 #define SOP_OUT do {\
2286 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2290 onig_statistics_init(void)
2293 for (i
= 0; i
< 256; i
++) {
2294 OpCounter
[i
] = OpPrevCounter
[i
] = 0; OpTime
[i
] = 0;
2300 onig_print_statistics(FILE* f
)
2305 r
= fprintf(f
, " count prev time\n");
2306 if (r
< 0) return -1;
2308 for (i
= 0; OpInfo
[i
].opcode
>= 0; i
++) {
2309 r
= fprintf(f
, "%8d: %8d: %10ld: %s\n",
2310 OpCounter
[i
], OpPrevCounter
[i
], OpTime
[i
], OpInfo
[i
].name
);
2311 if (r
< 0) return -1;
2313 r
= fprintf(f
, "\nmax stack depth: %d\n", MaxStackDepth
);
2314 if (r
< 0) return -1;
2319 #define STACK_INC do {\
2321 if (stk - stk_base > MaxStackDepth) \
2322 MaxStackDepth = stk - stk_base;\
2326 #define STACK_INC stk++
2328 #define SOP_IN(opcode)
2333 /* matching region of POSIX API */
2334 typedef int regoff_t
;
2341 /* match data(str - end) from position (sstart). */
2342 /* if sstart == str then set sprev to NULL. */
2344 match_at(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
2345 const UChar
* in_right_range
, const UChar
* sstart
, UChar
* sprev
,
2348 static UChar FinishCode
[] = { OP_FINISH
};
2350 int i
, n
, num_mem
, best_len
, pop_level
;
2351 LengthType tlen
, tlen2
;
2354 UChar
*s
, *q
, *sbegin
;
2358 StackType
*stk_base
, *stk
, *stk_end
;
2359 StackType
*stkp
; /* used as any purpose. */
2361 StackIndex
*repeat_stk
;
2362 StackIndex
*mem_start_stk
, *mem_end_stk
;
2364 #ifdef USE_RETRY_LIMIT_IN_MATCH
2365 unsigned long retry_limit_in_match
;
2366 unsigned long retry_in_match_counter
;
2374 OnigOptionType option
= reg
->options
;
2375 OnigEncoding encode
= reg
->enc
;
2376 OnigCaseFoldType case_fold_flag
= reg
->case_fold_flag
;
2379 msa
->mp
->match_at_call_counter
++;
2382 #ifdef USE_RETRY_LIMIT_IN_MATCH
2383 retry_limit_in_match
= msa
->retry_limit_in_match
;
2386 pop_level
= reg
->stack_pop_level
;
2387 num_mem
= reg
->num_mem
;
2388 STACK_INIT(INIT_MATCH_STACK_SIZE
);
2389 UPDATE_FOR_STACK_REALLOC
;
2390 for (i
= 1; i
<= num_mem
; i
++) {
2391 mem_start_stk
[i
] = mem_end_stk
[i
] = INVALID_STACK_INDEX
;
2394 #ifdef ONIG_DEBUG_MATCH
2395 fprintf(stderr
, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
2396 str
, end
, sstart
, sprev
);
2397 fprintf(stderr
, "size: %d, start offset: %d\n",
2398 (int )(end
- str
), (int )(sstart
- str
));
2401 best_len
= ONIG_MISMATCH
;
2402 keep
= s
= (UChar
* )sstart
;
2403 STACK_PUSH_BOTTOM(STK_ALT
, FinishCode
); /* bottom stack */
2406 #ifdef USE_RETRY_LIMIT_IN_MATCH
2407 retry_in_match_counter
= 0;
2411 #ifdef ONIG_DEBUG_MATCH
2413 static unsigned int counter
= 1;
2415 UChar
*q
, *bp
, buf
[50];
2417 fprintf(stderr
, "%7u: %7ld: %4d> \"",
2418 counter
, GET_STACK_INDEX(stk
), (int )(s
- str
));
2422 for (i
= 0, q
= s
; i
< 7 && q
< end
; i
++) {
2423 len
= enclen(encode
, q
);
2424 while (len
-- > 0) *bp
++ = *q
++;
2426 if (q
< end
) { xmemcpy(bp
, "...\"", 4); bp
+= 4; }
2427 else { xmemcpy(bp
, "\"", 1); bp
+= 1; }
2429 fputs((char* )buf
, stderr
);
2431 for (i
= 0; i
< 20 - (bp
- buf
); i
++) fputc(' ', stderr
);
2432 if (p
== FinishCode
)
2433 fprintf(stderr
, "----: ");
2435 fprintf(stderr
, "%4d: ", (int )(p
- reg
->p
));
2436 onig_print_compiled_byte_code(stderr
, p
, NULL
, reg
->p
, encode
);
2437 fprintf(stderr
, "\n");
2443 case OP_END
: SOP_IN(OP_END
);
2444 n
= (int )(s
- sstart
);
2447 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2448 if (IS_FIND_LONGEST(option
)) {
2449 if (n
> msa
->best_len
) {
2451 msa
->best_s
= (UChar
* )sstart
;
2458 region
= msa
->region
;
2460 if (keep
> s
) keep
= s
;
2462 #ifdef USE_POSIX_API_REGION_OPTION
2463 if (IS_POSIX_REGION(msa
->options
)) {
2464 posix_regmatch_t
* rmt
= (posix_regmatch_t
* )region
;
2466 rmt
[0].rm_so
= (regoff_t
)(keep
- str
);
2467 rmt
[0].rm_eo
= (regoff_t
)(s
- str
);
2468 for (i
= 1; i
<= num_mem
; i
++) {
2469 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
2470 if (MEM_STATUS_AT(reg
->bt_mem_start
, i
))
2471 rmt
[i
].rm_so
= (regoff_t
)(STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
);
2473 rmt
[i
].rm_so
= (regoff_t
)((UChar
* )((void* )(mem_start_stk
[i
])) - str
);
2475 rmt
[i
].rm_eo
= (regoff_t
)((MEM_STATUS_AT(reg
->bt_mem_end
, i
)
2476 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
2477 : (UChar
* )((void* )mem_end_stk
[i
]))
2481 rmt
[i
].rm_so
= rmt
[i
].rm_eo
= ONIG_REGION_NOTPOS
;
2486 #endif /* USE_POSIX_API_REGION_OPTION */
2487 region
->beg
[0] = (int )(keep
- str
);
2488 region
->end
[0] = (int )(s
- str
);
2489 for (i
= 1; i
<= num_mem
; i
++) {
2490 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
2491 if (MEM_STATUS_AT(reg
->bt_mem_start
, i
))
2492 region
->beg
[i
] = (int )(STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
);
2494 region
->beg
[i
] = (int )((UChar
* )((void* )mem_start_stk
[i
]) - str
);
2496 region
->end
[i
] = (int )((MEM_STATUS_AT(reg
->bt_mem_end
, i
)
2497 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
2498 : (UChar
* )((void* )mem_end_stk
[i
])) - str
);
2501 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
2505 #ifdef USE_CAPTURE_HISTORY
2506 if (reg
->capture_history
!= 0) {
2508 OnigCaptureTreeNode
* node
;
2510 if (IS_NULL(region
->history_root
)) {
2511 region
->history_root
= node
= history_node_new();
2512 CHECK_NULL_RETURN_MEMERR(node
);
2515 node
= region
->history_root
;
2516 history_tree_clear(node
);
2520 node
->beg
= (int )(keep
- str
);
2521 node
->end
= (int )(s
- str
);
2524 r
= make_capture_history_tree(region
->history_root
, &stkp
,
2525 stk
, (UChar
* )str
, reg
);
2527 best_len
= r
; /* error code */
2531 #endif /* USE_CAPTURE_HISTORY */
2532 #ifdef USE_POSIX_API_REGION_OPTION
2533 } /* else IS_POSIX_REGION() */
2536 } /* n > best_len */
2538 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2543 if (IS_FIND_CONDITION(option
)) {
2544 if (IS_FIND_NOT_EMPTY(option
) && s
== sstart
) {
2545 best_len
= ONIG_MISMATCH
;
2546 goto fail
; /* for retry */
2548 if (IS_FIND_LONGEST(option
) && DATA_ENSURE_CHECK1
) {
2549 goto fail
; /* for retry */
2553 /* default behavior: return first-matching result. */
2557 case OP_EXACT1
: SOP_IN(OP_EXACT1
);
2559 if (*p
!= *s
) goto fail
;
2564 case OP_EXACT1_IC
: SOP_IN(OP_EXACT1_IC
);
2567 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2570 len
= ONIGENC_MBC_CASE_FOLD(encode
,
2571 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2586 case OP_EXACT2
: SOP_IN(OP_EXACT2
);
2588 if (*p
!= *s
) goto fail
;
2590 if (*p
!= *s
) goto fail
;
2597 case OP_EXACT3
: SOP_IN(OP_EXACT3
);
2599 if (*p
!= *s
) goto fail
;
2601 if (*p
!= *s
) goto fail
;
2603 if (*p
!= *s
) goto fail
;
2610 case OP_EXACT4
: SOP_IN(OP_EXACT4
);
2612 if (*p
!= *s
) goto fail
;
2614 if (*p
!= *s
) goto fail
;
2616 if (*p
!= *s
) goto fail
;
2618 if (*p
!= *s
) goto fail
;
2625 case OP_EXACT5
: SOP_IN(OP_EXACT5
);
2627 if (*p
!= *s
) goto fail
;
2629 if (*p
!= *s
) goto fail
;
2631 if (*p
!= *s
) goto fail
;
2633 if (*p
!= *s
) goto fail
;
2635 if (*p
!= *s
) goto fail
;
2642 case OP_EXACTN
: SOP_IN(OP_EXACTN
);
2643 GET_LENGTH_INC(tlen
, p
);
2645 while (tlen
-- > 0) {
2646 if (*p
++ != *s
++) goto fail
;
2653 case OP_EXACTN_IC
: SOP_IN(OP_EXACTN_IC
);
2656 UChar
*q
, *endp
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2658 GET_LENGTH_INC(tlen
, p
);
2664 len
= ONIGENC_MBC_CASE_FOLD(encode
,
2665 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2671 if (*p
!= *q
) goto fail
;
2681 case OP_EXACTMB2N1
: SOP_IN(OP_EXACTMB2N1
);
2683 if (*p
!= *s
) goto fail
;
2685 if (*p
!= *s
) goto fail
;
2690 case OP_EXACTMB2N2
: SOP_IN(OP_EXACTMB2N2
);
2692 if (*p
!= *s
) goto fail
;
2694 if (*p
!= *s
) goto fail
;
2697 if (*p
!= *s
) goto fail
;
2699 if (*p
!= *s
) goto fail
;
2705 case OP_EXACTMB2N3
: SOP_IN(OP_EXACTMB2N3
);
2707 if (*p
!= *s
) goto fail
;
2709 if (*p
!= *s
) goto fail
;
2711 if (*p
!= *s
) goto fail
;
2713 if (*p
!= *s
) goto fail
;
2716 if (*p
!= *s
) goto fail
;
2718 if (*p
!= *s
) goto fail
;
2724 case OP_EXACTMB2N
: SOP_IN(OP_EXACTMB2N
);
2725 GET_LENGTH_INC(tlen
, p
);
2726 DATA_ENSURE(tlen
* 2);
2727 while (tlen
-- > 0) {
2728 if (*p
!= *s
) goto fail
;
2730 if (*p
!= *s
) goto fail
;
2738 case OP_EXACTMB3N
: SOP_IN(OP_EXACTMB3N
);
2739 GET_LENGTH_INC(tlen
, p
);
2740 DATA_ENSURE(tlen
* 3);
2741 while (tlen
-- > 0) {
2742 if (*p
!= *s
) goto fail
;
2744 if (*p
!= *s
) goto fail
;
2746 if (*p
!= *s
) goto fail
;
2754 case OP_EXACTMBN
: SOP_IN(OP_EXACTMBN
);
2755 GET_LENGTH_INC(tlen
, p
); /* mb-len */
2756 GET_LENGTH_INC(tlen2
, p
); /* string len */
2759 while (tlen2
-- > 0) {
2760 if (*p
!= *s
) goto fail
;
2768 case OP_CCLASS
: SOP_IN(OP_CCLASS
);
2770 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0) goto fail
;
2772 s
+= enclen(encode
, s
); /* OP_CCLASS can match mb-code. \D, \S */
2776 case OP_CCLASS_MB
: SOP_IN(OP_CCLASS_MB
);
2777 if (! ONIGENC_IS_MBC_HEAD(encode
, s
)) goto fail
;
2780 GET_LENGTH_INC(tlen
, p
);
2787 mb_len
= enclen(encode
, s
);
2788 DATA_ENSURE(mb_len
);
2791 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
2793 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2794 if (! onig_is_in_code_range(p
, code
)) goto fail
;
2798 if (! onig_is_in_code_range(q
, code
)) goto fail
;
2805 case OP_CCLASS_MIX
: SOP_IN(OP_CCLASS_MIX
);
2807 if (ONIGENC_IS_MBC_HEAD(encode
, s
)) {
2812 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0)
2816 GET_LENGTH_INC(tlen
, p
);
2823 case OP_CCLASS_NOT
: SOP_IN(OP_CCLASS_NOT
);
2825 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0) goto fail
;
2827 s
+= enclen(encode
, s
);
2831 case OP_CCLASS_MB_NOT
: SOP_IN(OP_CCLASS_MB_NOT
);
2833 if (! ONIGENC_IS_MBC_HEAD(encode
, s
)) {
2835 GET_LENGTH_INC(tlen
, p
);
2837 goto cc_mb_not_success
;
2841 GET_LENGTH_INC(tlen
, p
);
2845 int mb_len
= enclen(encode
, s
);
2847 if (! DATA_ENSURE_CHECK(mb_len
)) {
2851 goto cc_mb_not_success
;
2856 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
2858 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2859 if (onig_is_in_code_range(p
, code
)) goto fail
;
2863 if (onig_is_in_code_range(q
, code
)) goto fail
;
2872 case OP_CCLASS_MIX_NOT
: SOP_IN(OP_CCLASS_MIX_NOT
);
2874 if (ONIGENC_IS_MBC_HEAD(encode
, s
)) {
2879 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0)
2883 GET_LENGTH_INC(tlen
, p
);
2890 #ifdef USE_OP_CCLASS_NODE
2891 case OP_CCLASS_NODE
: SOP_IN(OP_CCLASS_NODE
);
2899 GET_POINTER_INC(node
, p
);
2900 mb_len
= enclen(encode
, s
);
2904 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
2905 if (onig_is_code_in_cc_len(mb_len
, code
, node
) == 0) goto fail
;
2911 case OP_ANYCHAR
: SOP_IN(OP_ANYCHAR
);
2913 n
= enclen(encode
, s
);
2915 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
2920 case OP_ANYCHAR_ML
: SOP_IN(OP_ANYCHAR_ML
);
2922 n
= enclen(encode
, s
);
2928 case OP_ANYCHAR_STAR
: SOP_IN(OP_ANYCHAR_STAR
);
2929 while (DATA_ENSURE_CHECK1
) {
2930 STACK_PUSH_ALT(p
, s
, sprev
);
2931 n
= enclen(encode
, s
);
2933 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
2941 case OP_ANYCHAR_ML_STAR
: SOP_IN(OP_ANYCHAR_ML_STAR
);
2942 while (DATA_ENSURE_CHECK1
) {
2943 STACK_PUSH_ALT(p
, s
, sprev
);
2944 n
= enclen(encode
, s
);
2959 case OP_ANYCHAR_STAR_PEEK_NEXT
: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT
);
2960 while (DATA_ENSURE_CHECK1
) {
2962 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
2964 n
= enclen(encode
, s
);
2966 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
2974 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
2975 while (DATA_ENSURE_CHECK1
) {
2977 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
2979 n
= enclen(encode
, s
);
2994 case OP_WORD
: SOP_IN(OP_WORD
);
2996 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
2999 s
+= enclen(encode
, s
);
3003 case OP_WORD_ASCII
: SOP_IN(OP_WORD_ASCII
);
3005 if (! ONIGENC_IS_MBC_WORD_ASCII(encode
, s
, end
))
3008 s
+= enclen(encode
, s
);
3012 case OP_NO_WORD
: SOP_IN(OP_NO_WORD
);
3014 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
))
3017 s
+= enclen(encode
, s
);
3021 case OP_NO_WORD_ASCII
: SOP_IN(OP_NO_WORD_ASCII
);
3023 if (ONIGENC_IS_MBC_WORD_ASCII(encode
, s
, end
))
3026 s
+= enclen(encode
, s
);
3030 case OP_WORD_BOUNDARY
: SOP_IN(OP_WORD_BOUNDARY
);
3033 GET_MODE_INC(mode
, p
); /* ascii_mode */
3035 if (ON_STR_BEGIN(s
)) {
3037 if (! IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
))
3040 else if (ON_STR_END(s
)) {
3041 if (! IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3045 if (IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)
3046 == IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3054 case OP_NO_WORD_BOUNDARY
: SOP_IN(OP_NO_WORD_BOUNDARY
);
3057 GET_MODE_INC(mode
, p
); /* ascii_mode */
3059 if (ON_STR_BEGIN(s
)) {
3060 if (DATA_ENSURE_CHECK1
&& IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
))
3063 else if (ON_STR_END(s
)) {
3064 if (IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3068 if (IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)
3069 != IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3077 #ifdef USE_WORD_BEGIN_END
3078 case OP_WORD_BEGIN
: SOP_IN(OP_WORD_BEGIN
);
3081 GET_MODE_INC(mode
, p
); /* ascii_mode */
3083 if (DATA_ENSURE_CHECK1
&& IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)) {
3084 if (ON_STR_BEGIN(s
) || !IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
)) {
3093 case OP_WORD_END
: SOP_IN(OP_WORD_END
);
3096 GET_MODE_INC(mode
, p
); /* ascii_mode */
3098 if (!ON_STR_BEGIN(s
) && IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
)) {
3099 if (ON_STR_END(s
) || ! IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)) {
3109 case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
3110 SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
);
3111 if (onigenc_egcb_is_break_position(encode
, s
, sprev
, str
, end
)) {
3118 case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
3119 SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
);
3120 if (onigenc_egcb_is_break_position(encode
, s
, sprev
, str
, end
))
3127 case OP_BEGIN_BUF
: SOP_IN(OP_BEGIN_BUF
);
3128 if (! ON_STR_BEGIN(s
)) goto fail
;
3134 case OP_END_BUF
: SOP_IN(OP_END_BUF
);
3135 if (! ON_STR_END(s
)) goto fail
;
3141 case OP_BEGIN_LINE
: SOP_IN(OP_BEGIN_LINE
);
3142 if (ON_STR_BEGIN(s
)) {
3143 if (IS_NOTBOL(msa
->options
)) goto fail
;
3147 else if (ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
) && !ON_STR_END(s
)) {
3154 case OP_END_LINE
: SOP_IN(OP_END_LINE
);
3155 if (ON_STR_END(s
)) {
3156 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3157 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
3159 if (IS_NOTEOL(msa
->options
)) goto fail
;
3162 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3166 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) {
3170 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3171 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
3179 case OP_SEMI_END_BUF
: SOP_IN(OP_SEMI_END_BUF
);
3180 if (ON_STR_END(s
)) {
3181 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3182 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
3184 if (IS_NOTEOL(msa
->options
)) goto fail
;
3187 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3191 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
) &&
3192 ON_STR_END(s
+ enclen(encode
, s
))) {
3196 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3197 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
3198 UChar
* ss
= s
+ enclen(encode
, s
);
3199 ss
+= enclen(encode
, ss
);
3200 if (ON_STR_END(ss
)) {
3209 case OP_BEGIN_POSITION
: SOP_IN(OP_BEGIN_POSITION
);
3210 if (s
!= msa
->start
)
3217 case OP_MEMORY_START_PUSH
: SOP_IN(OP_MEMORY_START_PUSH
);
3218 GET_MEMNUM_INC(mem
, p
);
3219 STACK_PUSH_MEM_START(mem
, s
);
3224 case OP_MEMORY_START
: SOP_IN(OP_MEMORY_START
);
3225 GET_MEMNUM_INC(mem
, p
);
3226 mem_start_stk
[mem
] = (StackIndex
)((void* )s
);
3231 case OP_MEMORY_END_PUSH
: SOP_IN(OP_MEMORY_END_PUSH
);
3232 GET_MEMNUM_INC(mem
, p
);
3233 STACK_PUSH_MEM_END(mem
, s
);
3238 case OP_MEMORY_END
: SOP_IN(OP_MEMORY_END
);
3239 GET_MEMNUM_INC(mem
, p
);
3240 mem_end_stk
[mem
] = (StackIndex
)((void* )s
);
3246 case OP_MEMORY_END_PUSH_REC
: SOP_IN(OP_MEMORY_END_PUSH_REC
);
3247 GET_MEMNUM_INC(mem
, p
);
3248 STACK_GET_MEM_START(mem
, stkp
); /* should be before push mem-end. */
3249 STACK_PUSH_MEM_END(mem
, s
);
3250 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
3255 case OP_MEMORY_END_REC
: SOP_IN(OP_MEMORY_END_REC
);
3256 GET_MEMNUM_INC(mem
, p
);
3257 mem_end_stk
[mem
] = (StackIndex
)((void* )s
);
3258 STACK_GET_MEM_START(mem
, stkp
);
3260 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3261 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
3263 mem_start_stk
[mem
] = (StackIndex
)((void* )stkp
->u
.mem
.pstr
);
3265 STACK_PUSH_MEM_END_MARK(mem
);
3271 case OP_BACKREF1
: SOP_IN(OP_BACKREF1
);
3276 case OP_BACKREF2
: SOP_IN(OP_BACKREF2
);
3281 case OP_BACKREF_N
: SOP_IN(OP_BACKREF_N
);
3282 GET_MEMNUM_INC(mem
, p
);
3286 UChar
*pstart
, *pend
;
3288 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3289 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3291 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3292 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3294 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3296 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3297 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3298 : (UChar
* )((void* )mem_end_stk
[mem
]));
3299 n
= (int )(pend
- pstart
);
3302 STRING_CMP(pstart
, s
, n
);
3303 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3311 case OP_BACKREF_N_IC
: SOP_IN(OP_BACKREF_N_IC
);
3312 GET_MEMNUM_INC(mem
, p
);
3315 UChar
*pstart
, *pend
;
3317 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3318 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3320 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3321 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3323 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3325 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3326 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3327 : (UChar
* )((void* )mem_end_stk
[mem
]));
3328 n
= (int )(pend
- pstart
);
3331 STRING_CMP_IC(case_fold_flag
, pstart
, &s
, n
);
3332 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3340 case OP_BACKREF_MULTI
: SOP_IN(OP_BACKREF_MULTI
);
3343 UChar
*pstart
, *pend
, *swork
;
3345 GET_LENGTH_INC(tlen
, p
);
3346 for (i
= 0; i
< tlen
; i
++) {
3347 GET_MEMNUM_INC(mem
, p
);
3349 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3350 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3352 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3353 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3355 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3357 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3358 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3359 : (UChar
* )((void* )mem_end_stk
[mem
]));
3360 n
= (int )(pend
- pstart
);
3364 STRING_CMP_VALUE(pstart
, swork
, n
, is_fail
);
3365 if (is_fail
) continue;
3367 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3370 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
3371 break; /* success */
3373 if (i
== tlen
) goto fail
;
3379 case OP_BACKREF_MULTI_IC
: SOP_IN(OP_BACKREF_MULTI_IC
);
3382 UChar
*pstart
, *pend
, *swork
;
3384 GET_LENGTH_INC(tlen
, p
);
3385 for (i
= 0; i
< tlen
; i
++) {
3386 GET_MEMNUM_INC(mem
, p
);
3388 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3389 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3391 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3392 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3394 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3396 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3397 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3398 : (UChar
* )((void* )mem_end_stk
[mem
]));
3399 n
= (int )(pend
- pstart
);
3403 STRING_CMP_VALUE_IC(case_fold_flag
, pstart
, &swork
, n
, is_fail
);
3404 if (is_fail
) continue;
3406 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3409 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
3410 break; /* success */
3412 if (i
== tlen
) goto fail
;
3418 #ifdef USE_BACKREF_WITH_LEVEL
3419 case OP_BACKREF_WITH_LEVEL
:
3425 GET_OPTION_INC(ic
, p
);
3426 GET_LENGTH_INC(level
, p
);
3427 GET_LENGTH_INC(tlen
, p
);
3430 if (backref_match_at_nested_level(reg
, stk
, stk_base
, ic
3431 , case_fold_flag
, (int )level
, (int )tlen
, p
, &s
, end
)) {
3433 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3436 p
+= (SIZE_MEMNUM
* tlen
);
3447 case OP_BACKREF_CHECK
: SOP_IN(OP_BACKREF_CHECK
);
3449 GET_LENGTH_INC(tlen
, p
);
3450 for (i
= 0; i
< tlen
; i
++) {
3451 GET_MEMNUM_INC(mem
, p
);
3453 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3454 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3456 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
3457 break; /* success */
3459 if (i
== tlen
) goto fail
;
3465 #ifdef USE_BACKREF_WITH_LEVEL
3466 case OP_BACKREF_CHECK_WITH_LEVEL
:
3470 GET_LENGTH_INC(level
, p
);
3471 GET_LENGTH_INC(tlen
, p
);
3473 if (backref_check_at_nested_level(reg
, stk
, stk_base
,
3474 (int )level
, (int )tlen
, p
) != 0) {
3475 p
+= (SIZE_MEMNUM
* tlen
);
3486 case OP_EMPTY_CHECK_START
: SOP_IN(OP_EMPTY_CHECK_START
);
3487 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
3488 STACK_PUSH_EMPTY_CHECK_START(mem
, s
);
3493 case OP_EMPTY_CHECK_END
: SOP_IN(OP_EMPTY_CHECK_END
);
3497 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
3498 STACK_EMPTY_CHECK(is_empty
, mem
, s
);
3500 #ifdef ONIG_DEBUG_MATCH
3501 fprintf(stderr
, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem
, s
);
3504 /* empty loop founded, skip next instruction */
3511 case OP_REPEAT_INC_NG
:
3512 case OP_REPEAT_INC_SG
:
3513 case OP_REPEAT_INC_NG_SG
:
3517 goto unexpected_bytecode_error
;
3526 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3527 case OP_EMPTY_CHECK_END_MEMST
: SOP_IN(OP_EMPTY_CHECK_END_MEMST
);
3531 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
3532 STACK_EMPTY_CHECK_MEM(is_empty
, mem
, s
, reg
);
3534 #ifdef ONIG_DEBUG_MATCH
3535 fprintf(stderr
, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem
, s
);
3537 if (is_empty
== -1) goto fail
;
3538 goto empty_check_found
;
3547 case OP_EMPTY_CHECK_END_MEMST_PUSH
:
3548 SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH
);
3552 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
3553 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3554 STACK_EMPTY_CHECK_MEM_REC(is_empty
, mem
, s
, reg
);
3556 STACK_EMPTY_CHECK_REC(is_empty
, mem
, s
);
3559 #ifdef ONIG_DEBUG_MATCH
3560 fprintf(stderr
, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
3563 if (is_empty
== -1) goto fail
;
3564 goto empty_check_found
;
3567 STACK_PUSH_EMPTY_CHECK_END(mem
);
3575 case OP_JUMP
: SOP_IN(OP_JUMP
);
3576 GET_RELADDR_INC(addr
, p
);
3579 CHECK_INTERRUPT_IN_MATCH
;
3583 case OP_PUSH
: SOP_IN(OP_PUSH
);
3584 GET_RELADDR_INC(addr
, p
);
3585 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3590 case OP_PUSH_SUPER
: SOP_IN(OP_PUSH_SUPER
);
3591 GET_RELADDR_INC(addr
, p
);
3592 STACK_PUSH_SUPER_ALT(p
+ addr
, s
, sprev
);
3597 case OP_POP_OUT
: SOP_IN(OP_POP_OUT
);
3599 /* for stop backtrack */
3600 /* CHECK_RETRY_LIMIT_IN_MATCH; */
3605 case OP_PUSH_OR_JUMP_EXACT1
: SOP_IN(OP_PUSH_OR_JUMP_EXACT1
);
3606 GET_RELADDR_INC(addr
, p
);
3607 if (*p
== *s
&& DATA_ENSURE_CHECK1
) {
3609 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3618 case OP_PUSH_IF_PEEK_NEXT
: SOP_IN(OP_PUSH_IF_PEEK_NEXT
);
3619 GET_RELADDR_INC(addr
, p
);
3622 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3631 case OP_REPEAT
: SOP_IN(OP_REPEAT
);
3633 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
3634 GET_RELADDR_INC(addr
, p
);
3637 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
3638 STACK_PUSH_REPEAT(mem
, p
);
3640 if (reg
->repeat_range
[mem
].lower
== 0) {
3641 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3648 case OP_REPEAT_NG
: SOP_IN(OP_REPEAT_NG
);
3650 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
3651 GET_RELADDR_INC(addr
, p
);
3654 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
3655 STACK_PUSH_REPEAT(mem
, p
);
3657 if (reg
->repeat_range
[mem
].lower
== 0) {
3658 STACK_PUSH_ALT(p
, s
, sprev
);
3666 case OP_REPEAT_INC
: SOP_IN(OP_REPEAT_INC
);
3667 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
3668 si
= repeat_stk
[mem
];
3669 stkp
= STACK_AT(si
);
3672 stkp
->u
.repeat
.count
++;
3673 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].upper
) {
3674 /* end of repeat. Nothing to do. */
3676 else if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
3677 STACK_PUSH_ALT(p
, s
, sprev
);
3678 p
= STACK_AT(si
)->u
.repeat
.pcode
; /* Don't use stkp after PUSH. */
3681 p
= stkp
->u
.repeat
.pcode
;
3683 STACK_PUSH_REPEAT_INC(si
);
3685 CHECK_INTERRUPT_IN_MATCH
;
3689 case OP_REPEAT_INC_SG
: SOP_IN(OP_REPEAT_INC_SG
);
3690 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
3691 STACK_GET_REPEAT(mem
, stkp
);
3692 si
= GET_STACK_INDEX(stkp
);
3696 case OP_REPEAT_INC_NG
: SOP_IN(OP_REPEAT_INC_NG
);
3697 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
3698 si
= repeat_stk
[mem
];
3699 stkp
= STACK_AT(si
);
3702 stkp
->u
.repeat
.count
++;
3703 if (stkp
->u
.repeat
.count
< reg
->repeat_range
[mem
].upper
) {
3704 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
3705 UChar
* pcode
= stkp
->u
.repeat
.pcode
;
3707 STACK_PUSH_REPEAT_INC(si
);
3708 STACK_PUSH_ALT(pcode
, s
, sprev
);
3711 p
= stkp
->u
.repeat
.pcode
;
3712 STACK_PUSH_REPEAT_INC(si
);
3715 else if (stkp
->u
.repeat
.count
== reg
->repeat_range
[mem
].upper
) {
3716 STACK_PUSH_REPEAT_INC(si
);
3719 CHECK_INTERRUPT_IN_MATCH
;
3723 case OP_REPEAT_INC_NG_SG
: SOP_IN(OP_REPEAT_INC_NG_SG
);
3724 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
3725 STACK_GET_REPEAT(mem
, stkp
);
3726 si
= GET_STACK_INDEX(stkp
);
3730 case OP_PREC_READ_START
: SOP_IN(OP_PREC_READ_START
);
3731 STACK_PUSH_POS(s
, sprev
);
3736 case OP_PREC_READ_END
: SOP_IN(OP_PREC_READ_END
);
3738 STACK_EXEC_TO_VOID(stkp
);
3739 s
= stkp
->u
.state
.pstr
;
3740 sprev
= stkp
->u
.state
.pstr_prev
;
3746 case OP_PREC_READ_NOT_START
: SOP_IN(OP_PREC_READ_NOT_START
);
3747 GET_RELADDR_INC(addr
, p
);
3748 STACK_PUSH_ALT_PREC_READ_NOT(p
+ addr
, s
, sprev
);
3753 case OP_PREC_READ_NOT_END
: SOP_IN(OP_PREC_READ_NOT_END
);
3754 STACK_POP_TIL_ALT_PREC_READ_NOT
;
3758 case OP_ATOMIC_START
: SOP_IN(OP_ATOMIC_START
);
3759 STACK_PUSH_TO_VOID_START
;
3764 case OP_ATOMIC_END
: SOP_IN(OP_ATOMIC_END
);
3765 STACK_EXEC_TO_VOID(stkp
);
3770 case OP_LOOK_BEHIND
: SOP_IN(OP_LOOK_BEHIND
);
3771 GET_LENGTH_INC(tlen
, p
);
3772 s
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
3773 if (IS_NULL(s
)) goto fail
;
3774 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
3779 case OP_LOOK_BEHIND_NOT_START
: SOP_IN(OP_LOOK_BEHIND_NOT_START
);
3780 GET_RELADDR_INC(addr
, p
);
3781 GET_LENGTH_INC(tlen
, p
);
3782 q
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
3784 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3785 If you want to change to fail, replace following line. */
3790 STACK_PUSH_ALT_LOOK_BEHIND_NOT(p
+ addr
, s
, sprev
);
3792 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
3798 case OP_LOOK_BEHIND_NOT_END
: SOP_IN(OP_LOOK_BEHIND_NOT_END
);
3799 STACK_POP_TIL_ALT_LOOK_BEHIND_NOT
;
3804 case OP_CALL
: SOP_IN(OP_CALL
);
3805 GET_ABSADDR_INC(addr
, p
);
3806 STACK_PUSH_CALL_FRAME(p
);
3812 case OP_RETURN
: SOP_IN(OP_RETURN
);
3820 case OP_PUSH_SAVE_VAL
: SOP_IN(OP_PUSH_SAVE_VAL
);
3823 GET_SAVE_TYPE_INC(type
, p
);
3824 GET_MEMNUM_INC(mem
, p
); /* mem: save id */
3825 switch ((enum SaveType
)type
) {
3827 STACK_PUSH_SAVE_VAL(mem
, type
, s
);
3831 STACK_PUSH_SAVE_VAL_WITH_SPREV(mem
, type
, s
);
3834 case SAVE_RIGHT_RANGE
:
3835 STACK_PUSH_SAVE_VAL(mem
, SAVE_RIGHT_RANGE
, right_range
);
3843 case OP_UPDATE_VAR
: SOP_IN(OP_UPDATE_VAR
);
3846 enum SaveType save_type
;
3848 GET_UPDATE_VAR_TYPE_INC(type
, p
);
3849 GET_MEMNUM_INC(mem
, p
); /* mem: save id */
3850 switch ((enum UpdateVarType
)type
) {
3851 case UPDATE_VAR_KEEP_FROM_STACK_LAST
:
3852 STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP
, keep
);
3854 case UPDATE_VAR_S_FROM_STACK
:
3855 STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S
, mem
, s
);
3857 case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK
:
3859 goto get_save_val_type_last_id
;
3861 case UPDATE_VAR_RIGHT_RANGE_FROM_STACK
:
3862 save_type
= SAVE_RIGHT_RANGE
;
3863 get_save_val_type_last_id
:
3864 STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type
, mem
, right_range
);
3866 case UPDATE_VAR_RIGHT_RANGE_INIT
:
3876 case OP_CALLOUT_CONTENTS
: SOP_IN(OP_CALLOUT_CONTENTS
);
3877 of
= ONIG_CALLOUT_OF_CONTENTS
;
3878 goto callout_common_entry
;
3884 case OP_CALLOUT_NAME
: SOP_IN(OP_CALLOUT_NAME
);
3890 CalloutListEntry
* e
;
3891 OnigCalloutFunc func
;
3892 OnigCalloutArgs args
;
3894 of
= ONIG_CALLOUT_OF_NAME
;
3895 GET_MEMNUM_INC(name_id
, p
);
3897 callout_common_entry
:
3898 GET_MEMNUM_INC(num
, p
);
3899 e
= onig_reg_callout_list_at(reg
, num
);
3901 if (of
== ONIG_CALLOUT_OF_NAME
) {
3902 func
= onig_get_callout_start_func(reg
, num
);
3905 name_id
= ONIG_NON_NAME_ID
;
3906 func
= msa
->mp
->progress_callout_of_contents
;
3909 if (IS_NOT_NULL(func
) && (in
& ONIG_CALLOUT_IN_PROGRESS
) != 0) {
3910 CALLOUT_BODY(func
, ONIG_CALLOUT_IN_PROGRESS
, name_id
,
3911 num
, msa
->mp
->callout_user_data
, args
, call_result
);
3912 switch (call_result
) {
3913 case ONIG_CALLOUT_FAIL
:
3916 case ONIG_CALLOUT_SUCCESS
:
3917 goto retraction_callout2
;
3919 default: /* error code */
3920 if (call_result
> 0) {
3921 call_result
= ONIGERR_INVALID_ARGUMENT
;
3923 best_len
= call_result
;
3929 retraction_callout2
:
3930 if ((in
& ONIG_CALLOUT_IN_RETRACTION
) != 0) {
3931 if (of
== ONIG_CALLOUT_OF_NAME
) {
3932 if (IS_NOT_NULL(func
)) {
3933 STACK_PUSH_CALLOUT_NAME(name_id
, num
, func
);
3937 func
= msa
->mp
->retraction_callout_of_contents
;
3938 if (IS_NOT_NULL(func
)) {
3939 STACK_PUSH_CALLOUT_CONTENTS(num
, func
);
3957 case OP_FAIL
: SOP_IN(OP_FAIL
);
3959 p
= stk
->u
.state
.pcode
;
3960 s
= stk
->u
.state
.pstr
;
3961 sprev
= stk
->u
.state
.pstr_prev
;
3962 CHECK_RETRY_LIMIT_IN_MATCH
;
3968 goto bytecode_error
;
3970 } /* end of switch */
3972 } /* end of while(1) */
3981 return ONIGERR_STACK_BUG
;
3986 return ONIGERR_UNDEFINED_BYTECODE
;
3988 unexpected_bytecode_error
:
3990 return ONIGERR_UNEXPECTED_BYTECODE
;
3992 #ifdef USE_RETRY_LIMIT_IN_MATCH
3993 retry_limit_in_match_over
:
3995 return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER
;
4001 slow_search(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
4002 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
4004 UChar
*t
, *p
, *s
, *end
;
4006 end
= (UChar
* )text_end
;
4007 end
-= target_end
- target
- 1;
4008 if (end
> text_range
)
4014 if (*s
== *target
) {
4017 while (t
< target_end
) {
4022 if (t
== target_end
)
4025 s
+= enclen(enc
, s
);
4028 return (UChar
* )NULL
;
4032 str_lower_case_match(OnigEncoding enc
, int case_fold_flag
,
4033 const UChar
* t
, const UChar
* tend
,
4034 const UChar
* p
, const UChar
* end
)
4037 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
4040 lowlen
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &p
, end
, lowbuf
);
4042 while (lowlen
> 0) {
4043 if (*t
++ != *q
++) return 0;
4052 slow_search_ic(OnigEncoding enc
, int case_fold_flag
,
4053 UChar
* target
, UChar
* target_end
,
4054 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
4058 end
= (UChar
* )text_end
;
4059 end
-= target_end
- target
- 1;
4060 if (end
> text_range
)
4066 if (str_lower_case_match(enc
, case_fold_flag
, target
, target_end
,
4070 s
+= enclen(enc
, s
);
4073 return (UChar
* )NULL
;
4077 slow_search_backward(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
4078 const UChar
* text
, const UChar
* adjust_text
,
4079 const UChar
* text_end
, const UChar
* text_start
)
4083 s
= (UChar
* )text_end
;
4084 s
-= (target_end
- target
);
4086 s
= (UChar
* )text_start
;
4088 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
4091 //if text is not null,the logic is correct.
4092 //this function is only invoked by backward_search_range,parameter text come
4093 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4094 //so the check is just for passing static analysis.
4095 if(IS_NULL(s
))break;
4096 if (*s
== *target
) {
4099 while (t
< target_end
) {
4104 if (t
== target_end
)
4107 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
4110 return (UChar
* )NULL
;
4114 slow_search_backward_ic(OnigEncoding enc
, int case_fold_flag
,
4115 UChar
* target
, UChar
* target_end
,
4116 const UChar
* text
, const UChar
* adjust_text
,
4117 const UChar
* text_end
, const UChar
* text_start
)
4121 s
= (UChar
* )text_end
;
4122 s
-= (target_end
- target
);
4124 s
= (UChar
* )text_start
;
4126 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
4129 if (str_lower_case_match(enc
, case_fold_flag
,
4130 target
, target_end
, s
, text_end
))
4133 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
4136 return (UChar
* )NULL
;
4140 bm_search_notrev(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
4141 const UChar
* text
, const UChar
* text_end
,
4142 const UChar
* text_range
)
4144 const UChar
*s
, *se
, *t
, *p
, *end
;
4148 #ifdef ONIG_DEBUG_SEARCH
4149 fprintf(stderr
, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n",
4150 text
, text_end
, text_range
);
4153 tail
= target_end
- 1;
4154 tlen1
= (int )(tail
- target
);
4156 if (end
+ tlen1
> text_end
)
4157 end
= text_end
- tlen1
;
4161 if (IS_NULL(reg
->int_map
)) {
4166 if (t
== target
) return (UChar
* )s
;
4169 skip
= reg
->map
[*se
];
4172 s
+= enclen(reg
->enc
, s
);
4173 } while ((s
- t
) < skip
&& s
< end
);
4181 if (t
== target
) return (UChar
* )s
;
4184 skip
= reg
->int_map
[*se
];
4187 s
+= enclen(reg
->enc
, s
);
4188 } while ((s
- t
) < skip
&& s
< end
);
4192 return (UChar
* )NULL
;
4196 bm_search(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
4197 const UChar
* text
, const UChar
* text_end
, const UChar
* text_range
)
4199 const UChar
*s
, *t
, *p
, *end
;
4202 end
= text_range
+ (target_end
- target
) - 1;
4206 tail
= target_end
- 1;
4207 s
= text
+ (target_end
- target
) - 1;
4208 if (IS_NULL(reg
->int_map
)) {
4213 if (t
== target
) return (UChar
* )p
;
4219 else { /* see int_map[] */
4224 if (t
== target
) return (UChar
* )p
;
4227 s
+= reg
->int_map
[*s
];
4230 return (UChar
* )NULL
;
4233 #ifdef USE_INT_MAP_BACKWARD
4235 set_bm_backward_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
, int** skip
)
4239 if (IS_NULL(*skip
)) {
4240 *skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
4241 if (IS_NULL(*skip
)) return ONIGERR_MEMORY
;
4245 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
4248 for (i
= len
- 1; i
> 0; i
--)
4255 bm_search_backward(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
4256 const UChar
* text
, const UChar
* adjust_text
,
4257 const UChar
* text_end
, const UChar
* text_start
)
4259 const UChar
*s
, *t
, *p
;
4261 s
= text_end
- (target_end
- target
);
4265 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
4270 while (t
< target_end
&& *p
== *t
) {
4273 if (t
== target_end
)
4276 s
-= reg
->int_map_backward
[*s
];
4277 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
4280 return (UChar
* )NULL
;
4285 map_search(OnigEncoding enc
, UChar map
[],
4286 const UChar
* text
, const UChar
* text_range
)
4288 const UChar
*s
= text
;
4290 while (s
< text_range
) {
4291 if (map
[*s
]) return (UChar
* )s
;
4293 s
+= enclen(enc
, s
);
4295 return (UChar
* )NULL
;
4299 map_search_backward(OnigEncoding enc
, UChar map
[],
4300 const UChar
* text
, const UChar
* adjust_text
,
4301 const UChar
* text_start
)
4303 const UChar
*s
= text_start
;
4306 //if text is not null,the logic is correct.
4307 //this function is only invoked by backward_search_range,parameter text come
4308 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4309 //so the check is just for passing static analysis.
4310 if(IS_NULL(s
))break;
4311 if (map
[*s
]) return (UChar
* )s
;
4313 s
= onigenc_get_prev_char_head(enc
, adjust_text
, s
);
4315 return (UChar
* )NULL
;
4318 onig_match(regex_t
* reg
, const UChar
* str
, const UChar
* end
, const UChar
* at
,
4319 OnigRegion
* region
, OnigOptionType option
)
4324 onig_initialize_match_param(&mp
);
4325 r
= onig_match_with_param(reg
, str
, end
, at
, region
, option
, &mp
);
4326 onig_free_match_param_content(&mp
);
4331 onig_match_with_param(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4332 const UChar
* at
, OnigRegion
* region
, OnigOptionType option
,
4339 ADJUST_MATCH_PARAM(reg
, mp
);
4340 MATCH_ARG_INIT(msa
, reg
, option
, region
, at
, mp
);
4342 #ifdef USE_POSIX_API_REGION_OPTION
4343 && !IS_POSIX_REGION(option
)
4346 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
4352 if (ONIG_IS_OPTION_ON(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
)) {
4353 if (! ONIGENC_IS_VALID_MBC_STRING(reg
->enc
, str
, end
)) {
4354 r
= ONIGERR_INVALID_WIDE_CHAR_VALUE
;
4359 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
, str
, at
);
4360 r
= match_at(reg
, str
, end
, end
, at
, prev
, &msa
);
4364 MATCH_ARG_FREE(msa
);
4369 forward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
, UChar
* s
,
4370 UChar
* range
, UChar
** low
, UChar
** high
, UChar
** low_prev
)
4372 UChar
*p
, *pprev
= (UChar
* )NULL
;
4374 #ifdef ONIG_DEBUG_SEARCH
4375 fprintf(stderr
, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
4376 str
, end
, s
, range
);
4380 if (reg
->dmin
> 0) {
4381 if (ONIGENC_IS_SINGLEBYTE(reg
->enc
)) {
4385 UChar
*q
= p
+ reg
->dmin
;
4387 if (q
>= end
) return 0; /* fail */
4388 while (p
< q
) p
+= enclen(reg
->enc
, p
);
4393 switch (reg
->optimize
) {
4394 case OPTIMIZE_EXACT
:
4395 p
= slow_search(reg
->enc
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
4397 case OPTIMIZE_EXACT_IC
:
4398 p
= slow_search_ic(reg
->enc
, reg
->case_fold_flag
,
4399 reg
->exact
, reg
->exact_end
, p
, end
, range
);
4402 case OPTIMIZE_EXACT_BM
:
4403 p
= bm_search(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
4406 case OPTIMIZE_EXACT_BM_NO_REV
:
4407 p
= bm_search_notrev(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
4411 p
= map_search(reg
->enc
, reg
->map
, p
, range
);
4415 if (p
&& p
< range
) {
4416 if (p
- reg
->dmin
< s
) {
4419 p
+= enclen(reg
->enc
, p
);
4423 if (reg
->sub_anchor
) {
4426 switch (reg
->sub_anchor
) {
4427 case ANCHOR_BEGIN_LINE
:
4428 if (!ON_STR_BEGIN(p
)) {
4429 prev
= onigenc_get_prev_char_head(reg
->enc
,
4430 (pprev
? pprev
: str
), p
);
4431 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
4436 case ANCHOR_END_LINE
:
4437 if (ON_STR_END(p
)) {
4438 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4439 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
,
4440 (pprev
? pprev
: str
), p
);
4441 if (prev
&& ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
4445 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
4446 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4447 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
4455 if (reg
->dmax
== 0) {
4459 *low_prev
= onigenc_get_prev_char_head(reg
->enc
, s
, p
);
4461 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
4462 (pprev
? pprev
: str
), p
);
4466 if (reg
->dmax
!= INFINITE_LEN
) {
4467 if (p
- str
< reg
->dmax
) {
4468 *low
= (UChar
* )str
;
4470 *low_prev
= onigenc_get_prev_char_head(reg
->enc
, str
, *low
);
4473 *low
= p
- reg
->dmax
;
4475 *low
= onigenc_get_right_adjust_char_head_with_prev(reg
->enc
, s
,
4476 *low
, (const UChar
** )low_prev
);
4477 if (low_prev
&& IS_NULL(*low_prev
))
4478 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
4479 (pprev
? pprev
: s
), *low
);
4483 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
4484 (pprev
? pprev
: str
), *low
);
4489 /* no needs to adjust *high, *high is used as range check only */
4490 *high
= p
- reg
->dmin
;
4492 #ifdef ONIG_DEBUG_SEARCH
4494 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
4495 (int )(*low
- str
), (int )(*high
- str
), reg
->dmin
, reg
->dmax
);
4497 return 1; /* success */
4500 return 0; /* fail */
4504 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4507 backward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4508 UChar
* s
, const UChar
* range
, UChar
* adjrange
,
4509 UChar
** low
, UChar
** high
)
4512 if (range
== 0) goto fail
;
4517 switch (reg
->optimize
) {
4518 case OPTIMIZE_EXACT
:
4520 p
= slow_search_backward(reg
->enc
, reg
->exact
, reg
->exact_end
,
4521 range
, adjrange
, end
, p
);
4524 case OPTIMIZE_EXACT_IC
:
4525 p
= slow_search_backward_ic(reg
->enc
, reg
->case_fold_flag
,
4526 reg
->exact
, reg
->exact_end
,
4527 range
, adjrange
, end
, p
);
4530 case OPTIMIZE_EXACT_BM
:
4531 case OPTIMIZE_EXACT_BM_NO_REV
:
4532 #ifdef USE_INT_MAP_BACKWARD
4533 if (IS_NULL(reg
->int_map_backward
)) {
4536 if (s
- range
< BM_BACKWARD_SEARCH_LENGTH_THRESHOLD
)
4539 r
= set_bm_backward_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
4540 &(reg
->int_map_backward
));
4541 if (r
!= 0) return r
;
4543 p
= bm_search_backward(reg
, reg
->exact
, reg
->exact_end
, range
, adjrange
,
4551 p
= map_search_backward(reg
->enc
, reg
->map
, range
, adjrange
, p
);
4556 if (reg
->sub_anchor
) {
4559 switch (reg
->sub_anchor
) {
4560 case ANCHOR_BEGIN_LINE
:
4561 if (!ON_STR_BEGIN(p
)) {
4562 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, p
);
4563 if (IS_NOT_NULL(prev
) && !ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
4570 case ANCHOR_END_LINE
:
4571 if (ON_STR_END(p
)) {
4572 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4573 prev
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
4574 if (IS_NULL(prev
)) goto fail
;
4575 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
4581 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
4582 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4583 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
4586 p
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
4587 if (IS_NULL(p
)) goto fail
;
4594 /* no needs to adjust *high, *high is used as range check only */
4595 if (reg
->dmax
!= INFINITE_LEN
) {
4596 *low
= p
- reg
->dmax
;
4597 *high
= p
- reg
->dmin
;
4598 *high
= onigenc_get_right_adjust_char_head(reg
->enc
, adjrange
, *high
);
4601 #ifdef ONIG_DEBUG_SEARCH
4602 fprintf(stderr
, "backward_search_range: low: %d, high: %d\n",
4603 (int )(*low
- str
), (int )(*high
- str
));
4605 return 1; /* success */
4609 #ifdef ONIG_DEBUG_SEARCH
4610 fprintf(stderr
, "backward_search_range: fail.\n");
4612 return 0; /* fail */
4617 onig_search(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4618 const UChar
* start
, const UChar
* range
, OnigRegion
* region
,
4619 OnigOptionType option
)
4624 onig_initialize_match_param(&mp
);
4625 r
= onig_search_with_param(reg
, str
, end
, start
, range
, region
, option
, &mp
);
4626 onig_free_match_param_content(&mp
);
4632 onig_search_with_param(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4633 const UChar
* start
, const UChar
* range
, OnigRegion
* region
,
4634 OnigOptionType option
, OnigMatchParam
* mp
)
4639 const UChar
*orig_start
= start
;
4640 const UChar
*orig_range
= range
;
4642 #ifdef ONIG_DEBUG_SEARCH
4644 "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
4645 str
, (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
4648 ADJUST_MATCH_PARAM(reg
, mp
);
4651 #ifdef USE_POSIX_API_REGION_OPTION
4652 && !IS_POSIX_REGION(option
)
4655 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
4656 if (r
!= 0) goto finish_no_msa
;
4659 if (start
> end
|| start
< str
) goto mismatch_no_msa
;
4661 if (ONIG_IS_OPTION_ON(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
)) {
4662 if (! ONIGENC_IS_VALID_MBC_STRING(reg
->enc
, str
, end
)) {
4663 r
= ONIGERR_INVALID_WIDE_CHAR_VALUE
;
4669 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4670 #define MATCH_AND_RETURN_CHECK(upper_range) \
4671 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4672 if (r != ONIG_MISMATCH) {\
4674 if (! IS_FIND_LONGEST(reg->options)) {\
4678 else goto finish; /* error */ \
4681 #define MATCH_AND_RETURN_CHECK(upper_range) \
4682 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4683 if (r != ONIG_MISMATCH) {\
4687 else goto finish; /* error */ \
4689 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4692 /* anchor optimize: resume search range */
4693 if (reg
->anchor
!= 0 && str
< end
) {
4694 UChar
*min_semi_end
, *max_semi_end
;
4696 if (reg
->anchor
& ANCHOR_BEGIN_POSITION
) {
4697 /* search start-position only */
4704 else if (reg
->anchor
& ANCHOR_BEGIN_BUF
) {
4705 /* search str-position only */
4706 if (range
> start
) {
4707 if (start
!= str
) goto mismatch_no_msa
;
4716 goto mismatch_no_msa
;
4719 else if (reg
->anchor
& ANCHOR_END_BUF
) {
4720 min_semi_end
= max_semi_end
= (UChar
* )end
;
4723 if ((OnigLen
)(max_semi_end
- str
) < reg
->anchor_dmin
)
4724 goto mismatch_no_msa
;
4726 if (range
> start
) {
4727 if ((OnigLen
)(min_semi_end
- start
) > reg
->anchor_dmax
) {
4728 start
= min_semi_end
- reg
->anchor_dmax
;
4730 start
= onigenc_get_right_adjust_char_head(reg
->enc
, str
, start
);
4732 if ((OnigLen
)(max_semi_end
- (range
- 1)) < reg
->anchor_dmin
) {
4733 range
= max_semi_end
- reg
->anchor_dmin
+ 1;
4736 if (start
> range
) goto mismatch_no_msa
;
4737 /* If start == range, match with empty at end.
4738 Backward search is used. */
4741 if ((OnigLen
)(min_semi_end
- range
) > reg
->anchor_dmax
) {
4742 range
= min_semi_end
- reg
->anchor_dmax
;
4744 if ((OnigLen
)(max_semi_end
- start
) < reg
->anchor_dmin
) {
4745 start
= max_semi_end
- reg
->anchor_dmin
;
4746 start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, start
);
4748 if (range
> start
) goto mismatch_no_msa
;
4751 else if (reg
->anchor
& ANCHOR_SEMI_END_BUF
) {
4753 UChar
* pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, end
, 1);
4755 max_semi_end
= (UChar
* )end
;
4756 // only when str > end, pre_end will be null
4757 // line 4659 "if (start > end || start < str) goto mismatch_no_msa"
4758 // will guarantee str alwayls less than end
4759 // so pre_end won't be null,this check is just for passing staic analysis
4760 if (IS_NOT_NULL(pre_end
) && ONIGENC_IS_MBC_NEWLINE(reg
->enc
, pre_end
, end
)) {
4761 min_semi_end
= pre_end
;
4763 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4764 pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, pre_end
, 1);
4765 if (IS_NOT_NULL(pre_end
) &&
4766 ONIGENC_IS_MBC_CRNL(reg
->enc
, pre_end
, end
)) {
4767 min_semi_end
= pre_end
;
4770 if (min_semi_end
> str
&& start
<= min_semi_end
) {
4775 min_semi_end
= (UChar
* )end
;
4779 else if ((reg
->anchor
& ANCHOR_ANYCHAR_INF_ML
)) {
4780 goto begin_position
;
4783 else if (str
== end
) { /* empty string */
4784 static const UChar
* address_for_empty_string
= (UChar
* )"";
4786 #ifdef ONIG_DEBUG_SEARCH
4787 fprintf(stderr
, "onig_search: empty string.\n");
4790 if (reg
->threshold_len
== 0) {
4791 start
= end
= str
= address_for_empty_string
;
4793 prev
= (UChar
* )NULL
;
4795 MATCH_ARG_INIT(msa
, reg
, option
, region
, start
, mp
);
4796 MATCH_AND_RETURN_CHECK(end
);
4799 goto mismatch_no_msa
;
4802 #ifdef ONIG_DEBUG_SEARCH
4803 fprintf(stderr
, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4804 (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
4807 MATCH_ARG_INIT(msa
, reg
, option
, region
, orig_start
, mp
);
4810 if (range
> start
) { /* forward search */
4812 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
4814 prev
= (UChar
* )NULL
;
4816 if (reg
->optimize
!= OPTIMIZE_NONE
) {
4817 UChar
*sch_range
, *low
, *high
, *low_prev
;
4819 sch_range
= (UChar
* )range
;
4820 if (reg
->dmax
!= 0) {
4821 if (reg
->dmax
== INFINITE_LEN
)
4822 sch_range
= (UChar
* )end
;
4824 sch_range
+= reg
->dmax
;
4825 if (sch_range
> end
) sch_range
= (UChar
* )end
;
4829 if ((end
- start
) < reg
->threshold_len
)
4832 if (reg
->dmax
!= INFINITE_LEN
) {
4834 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
4835 &low
, &high
, &low_prev
)) goto mismatch
;
4841 MATCH_AND_RETURN_CHECK(orig_range
);
4843 s
+= enclen(reg
->enc
, s
);
4845 } while (s
< range
);
4848 else { /* check only. */
4849 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
4850 &low
, &high
, (UChar
** )NULL
)) goto mismatch
;
4852 if ((reg
->anchor
& ANCHOR_ANYCHAR_INF
) != 0) {
4854 MATCH_AND_RETURN_CHECK(orig_range
);
4856 s
+= enclen(reg
->enc
, s
);
4858 if ((reg
->anchor
& (ANCHOR_LOOK_BEHIND
| ANCHOR_PREC_READ_NOT
)) == 0) {
4859 while (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
) && s
< range
) {
4861 s
+= enclen(reg
->enc
, s
);
4864 } while (s
< range
);
4871 MATCH_AND_RETURN_CHECK(orig_range
);
4873 s
+= enclen(reg
->enc
, s
);
4874 } while (s
< range
);
4876 if (s
== range
) { /* because empty match with /$/. */
4877 MATCH_AND_RETURN_CHECK(orig_range
);
4880 else { /* backward search */
4881 if (orig_start
< end
)
4882 orig_start
+= enclen(reg
->enc
, orig_start
); /* is upper range */
4884 if (reg
->optimize
!= OPTIMIZE_NONE
) {
4885 UChar
*low
, *high
, *adjrange
, *sch_start
;
4888 adjrange
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, range
);
4890 adjrange
= (UChar
* )end
;
4892 if (reg
->dmax
!= INFINITE_LEN
&&
4893 (end
- range
) >= reg
->threshold_len
) {
4895 sch_start
= s
+ reg
->dmax
;
4896 if (sch_start
> end
) sch_start
= (UChar
* )end
;
4897 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
4905 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
4906 MATCH_AND_RETURN_CHECK(orig_start
);
4909 // if range is not null,the check is not necessary.
4910 // the range is actually the pointer of the end of the matched string
4911 // or assigned by "range = str" in line 4708. In RegularExpressionMatch
4912 // protocol, the matched string is the parameter String. And str in
4913 // line 4708 is the String,too. and the range is calculated from
4914 // "Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start)" in
4915 // line 146 in RegularExpressionDxe.c. RegularExpressionMatch ensure
4916 // the String is not null,So in both situation, the range can not be NULL.
4917 // This check is just for passing static analysis.
4918 if(IS_NULL(s
))break;
4919 } while (s
>= range
);
4922 else { /* check only. */
4923 if ((end
- range
) < reg
->threshold_len
) goto mismatch
;
4926 if (reg
->dmax
!= 0) {
4927 if (reg
->dmax
== INFINITE_LEN
)
4928 sch_start
= (UChar
* )end
;
4930 sch_start
+= reg
->dmax
;
4931 if (sch_start
> end
) sch_start
= (UChar
* )end
;
4933 sch_start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
,
4937 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
4938 &low
, &high
) <= 0) goto mismatch
;
4943 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
4944 MATCH_AND_RETURN_CHECK(orig_start
);
4946 } while (s
>= range
);
4950 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4951 if (IS_FIND_LONGEST(reg
->options
)) {
4952 if (msa
.best_len
>= 0) {
4961 MATCH_ARG_FREE(msa
);
4963 /* If result is mismatch and no FIND_NOT_EMPTY option,
4964 then the region is not set in match_at(). */
4965 if (IS_FIND_NOT_EMPTY(reg
->options
) && region
4966 #ifdef USE_POSIX_API_REGION_OPTION
4967 && !IS_POSIX_REGION(option
)
4970 onig_region_clear(region
);
4974 if (r
!= ONIG_MISMATCH
)
4975 fprintf(stderr
, "onig_search: error %d\n", r
);
4983 if (r
!= ONIG_MISMATCH
)
4984 fprintf(stderr
, "onig_search: error %d\n", r
);
4989 MATCH_ARG_FREE(msa
);
4990 return (int )(s
- str
);
4994 onig_scan(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4995 OnigRegion
* region
, OnigOptionType option
,
4996 int (*scan_callback
)(int, int, OnigRegion
*, void*),
5004 if (ONIG_IS_OPTION_ON(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
)) {
5005 if (! ONIGENC_IS_VALID_MBC_STRING(reg
->enc
, str
, end
))
5006 return ONIGERR_INVALID_WIDE_CHAR_VALUE
;
5008 ONIG_OPTION_OFF(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
);
5014 r
= onig_search(reg
, str
, end
, start
, end
, region
, option
);
5016 rs
= scan_callback(n
, r
, region
, callback_arg
);
5021 if (region
->end
[0] == start
- str
) {
5022 if (start
>= end
) break;
5023 start
+= enclen(reg
->enc
, start
);
5026 start
= str
+ region
->end
[0];
5031 else if (r
== ONIG_MISMATCH
) {
5043 onig_get_encoding(regex_t
* reg
)
5048 extern OnigOptionType
5049 onig_get_options(regex_t
* reg
)
5051 return reg
->options
;
5054 extern OnigCaseFoldType
5055 onig_get_case_fold_flag(regex_t
* reg
)
5057 return reg
->case_fold_flag
;
5060 extern OnigSyntaxType
*
5061 onig_get_syntax(regex_t
* reg
)
5067 onig_number_of_captures(regex_t
* reg
)
5069 return reg
->num_mem
;
5073 onig_number_of_capture_histories(regex_t
* reg
)
5075 #ifdef USE_CAPTURE_HISTORY
5079 for (i
= 0; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
5080 if (MEM_STATUS_AT(reg
->capture_history
, i
) != 0)
5090 onig_copy_encoding(OnigEncoding to
, OnigEncoding from
)
5096 /* for callout functions */
5100 extern OnigCalloutFunc
5101 onig_get_progress_callout(void)
5103 return DefaultProgressCallout
;
5107 onig_set_progress_callout(OnigCalloutFunc f
)
5109 DefaultProgressCallout
= f
;
5113 extern OnigCalloutFunc
5114 onig_get_retraction_callout(void)
5116 return DefaultRetractionCallout
;
5120 onig_set_retraction_callout(OnigCalloutFunc f
)
5122 DefaultRetractionCallout
= f
;
5127 onig_get_callout_num_by_callout_args(OnigCalloutArgs
* args
)
5132 extern OnigCalloutIn
5133 onig_get_callout_in_by_callout_args(OnigCalloutArgs
* args
)
5139 onig_get_name_id_by_callout_args(OnigCalloutArgs
* args
)
5141 return args
->name_id
;
5145 onig_get_contents_by_callout_args(OnigCalloutArgs
* args
)
5148 CalloutListEntry
* e
;
5151 e
= onig_reg_callout_list_at(args
->regex
, num
);
5152 if (IS_NULL(e
)) return 0;
5153 if (e
->of
== ONIG_CALLOUT_OF_CONTENTS
) {
5154 return e
->u
.content
.start
;
5161 onig_get_contents_end_by_callout_args(OnigCalloutArgs
* args
)
5164 CalloutListEntry
* e
;
5167 e
= onig_reg_callout_list_at(args
->regex
, num
);
5168 if (IS_NULL(e
)) return 0;
5169 if (e
->of
== ONIG_CALLOUT_OF_CONTENTS
) {
5170 return e
->u
.content
.end
;
5177 onig_get_args_num_by_callout_args(OnigCalloutArgs
* args
)
5180 CalloutListEntry
* e
;
5183 e
= onig_reg_callout_list_at(args
->regex
, num
);
5184 if (IS_NULL(e
)) return ONIGERR_INVALID_ARGUMENT
;
5185 if (e
->of
== ONIG_CALLOUT_OF_NAME
) {
5186 return e
->u
.arg
.num
;
5189 return ONIGERR_INVALID_ARGUMENT
;
5193 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs
* args
)
5196 CalloutListEntry
* e
;
5199 e
= onig_reg_callout_list_at(args
->regex
, num
);
5200 if (IS_NULL(e
)) return ONIGERR_INVALID_ARGUMENT
;
5201 if (e
->of
== ONIG_CALLOUT_OF_NAME
) {
5202 return e
->u
.arg
.passed_num
;
5205 return ONIGERR_INVALID_ARGUMENT
;
5209 onig_get_arg_by_callout_args(OnigCalloutArgs
* args
, int index
,
5210 OnigType
* type
, OnigValue
* val
)
5213 CalloutListEntry
* e
;
5216 e
= onig_reg_callout_list_at(args
->regex
, num
);
5217 if (IS_NULL(e
)) return ONIGERR_INVALID_ARGUMENT
;
5218 if (e
->of
== ONIG_CALLOUT_OF_NAME
) {
5219 if (IS_NOT_NULL(type
)) *type
= e
->u
.arg
.types
[index
];
5220 if (IS_NOT_NULL(val
)) *val
= e
->u
.arg
.vals
[index
];
5224 return ONIGERR_INVALID_ARGUMENT
;
5228 onig_get_string_by_callout_args(OnigCalloutArgs
* args
)
5230 return args
->string
;
5234 onig_get_string_end_by_callout_args(OnigCalloutArgs
* args
)
5236 return args
->string_end
;
5240 onig_get_start_by_callout_args(OnigCalloutArgs
* args
)
5246 onig_get_right_range_by_callout_args(OnigCalloutArgs
* args
)
5248 return args
->right_range
;
5252 onig_get_current_by_callout_args(OnigCalloutArgs
* args
)
5254 return args
->current
;
5258 onig_get_regex_by_callout_args(OnigCalloutArgs
* args
)
5263 extern unsigned long
5264 onig_get_retry_counter_by_callout_args(OnigCalloutArgs
* args
)
5266 return args
->retry_in_match_counter
;
5271 onig_get_capture_range_in_callout(OnigCalloutArgs
* a
, int mem_num
, int* begin
, int* end
)
5275 StackType
* stk_base
;
5281 stk_base
= a
->stk_base
;
5284 if (a
->mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
5285 if (MEM_STATUS_AT(reg
->bt_mem_start
, i
))
5286 *begin
= (int )(STACK_AT(a
->mem_start_stk
[i
])->u
.mem
.pstr
- str
);
5288 *begin
= (int )((UChar
* )((void* )a
->mem_start_stk
[i
]) - str
);
5290 *end
= (int )((MEM_STATUS_AT(reg
->bt_mem_end
, i
)
5291 ? STACK_AT(a
->mem_end_stk
[i
])->u
.mem
.pstr
5292 : (UChar
* )((void* )a
->mem_end_stk
[i
])) - str
);
5295 *begin
= *end
= ONIG_REGION_NOTPOS
;
5300 *begin
= a
->start
- str
;
5301 *end
= a
->current
- str
;
5303 return ONIGERR_INVALID_ARGUMENT
;
5307 return ONIGERR_INVALID_ARGUMENT
;
5313 onig_get_used_stack_size_in_callout(OnigCalloutArgs
* a
, int* used_num
, int* used_bytes
)
5317 n
= (int )(a
->stk
- a
->stk_base
);
5322 if (used_bytes
!= 0)
5323 *used_bytes
= n
* sizeof(StackType
);
5329 /* builtin callout functions */
5332 onig_builtin_fail(OnigCalloutArgs
* args ARG_UNUSED
, void* user_data ARG_UNUSED
)
5334 return ONIG_CALLOUT_FAIL
;
5338 onig_builtin_mismatch(OnigCalloutArgs
* args ARG_UNUSED
, void* user_data ARG_UNUSED
)
5340 return ONIG_MISMATCH
;
5345 onig_builtin_success(OnigCalloutArgs
* args ARG_UNUSED
, void* user_data ARG_UNUSED
)
5347 return ONIG_CALLOUT_SUCCESS
;
5352 onig_builtin_error(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5358 r
= onig_get_arg_by_callout_args(args
, 0, 0, &val
);
5359 if (r
!= ONIG_NORMAL
) return r
;
5363 n
= ONIGERR_INVALID_CALLOUT_BODY
;
5370 onig_builtin_count(OnigCalloutArgs
* args
, void* user_data
)
5372 (void )onig_check_callout_data_and_clear_old_values(args
);
5374 return onig_builtin_total_count(args
, user_data
);
5378 onig_builtin_total_count(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5385 OnigCodePoint count_type
;
5387 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &aval
);
5388 if (r
!= ONIG_NORMAL
) return r
;
5390 count_type
= aval
.c
;
5391 if (count_type
!= '>' && count_type
!= 'X' && count_type
!= '<')
5392 return ONIGERR_INVALID_CALLOUT_ARG
;
5394 r
= onig_get_callout_data_by_callout_args_self_dont_clear_old(args
, 0,
5396 if (r
< ONIG_NORMAL
)
5398 else if (r
> ONIG_NORMAL
) {
5399 /* type == void: initial state */
5403 if (args
->in
== ONIG_CALLOUT_IN_RETRACTION
) {
5405 if (count_type
== '<')
5407 else if (count_type
== 'X')
5412 if (count_type
!= '<')
5416 r
= onig_set_callout_data_by_callout_args_self(args
, 0, ONIG_TYPE_LONG
, &val
);
5417 if (r
!= ONIG_NORMAL
) return r
;
5419 /* slot 1: in progress counter, slot 2: in retraction counter */
5420 r
= onig_get_callout_data_by_callout_args_self_dont_clear_old(args
, slot
,
5422 if (r
< ONIG_NORMAL
)
5424 else if (r
> ONIG_NORMAL
) {
5429 r
= onig_set_callout_data_by_callout_args_self(args
, slot
, ONIG_TYPE_LONG
, &val
);
5430 if (r
!= ONIG_NORMAL
) return r
;
5432 return ONIG_CALLOUT_SUCCESS
;
5436 onig_builtin_max(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5441 OnigCodePoint count_type
;
5446 (void )onig_check_callout_data_and_clear_old_values(args
);
5449 r
= onig_get_callout_data_by_callout_args_self(args
, slot
, &type
, &val
);
5450 if (r
< ONIG_NORMAL
)
5452 else if (r
> ONIG_NORMAL
) {
5453 /* type == void: initial state */
5454 type
= ONIG_TYPE_LONG
;
5458 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &aval
);
5459 if (r
!= ONIG_NORMAL
) return r
;
5460 if (type
== ONIG_TYPE_TAG
) {
5461 r
= onig_get_callout_data_by_callout_args(args
, aval
.tag
, 0, &type
, &aval
);
5462 if (r
< ONIG_NORMAL
) return r
;
5463 else if (r
> ONIG_NORMAL
)
5472 r
= onig_get_arg_by_callout_args(args
, 1, &type
, &aval
);
5473 if (r
!= ONIG_NORMAL
) return r
;
5475 count_type
= aval
.c
;
5476 if (count_type
!= '>' && count_type
!= 'X' && count_type
!= '<')
5477 return ONIGERR_INVALID_CALLOUT_ARG
;
5479 if (args
->in
== ONIG_CALLOUT_IN_RETRACTION
) {
5480 if (count_type
== '<') {
5481 if (val
.l
>= max_val
) return ONIG_CALLOUT_FAIL
;
5484 else if (count_type
== 'X')
5488 if (count_type
!= '<') {
5489 if (val
.l
>= max_val
) return ONIG_CALLOUT_FAIL
;
5494 r
= onig_set_callout_data_by_callout_args_self(args
, slot
, ONIG_TYPE_LONG
, &val
);
5495 if (r
!= ONIG_NORMAL
) return r
;
5497 return ONIG_CALLOUT_SUCCESS
;
5510 onig_builtin_cmp(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5523 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &val
);
5524 if (r
!= ONIG_NORMAL
) return r
;
5526 if (type
== ONIG_TYPE_TAG
) {
5527 r
= onig_get_callout_data_by_callout_args(args
, val
.tag
, 0, &type
, &val
);
5528 if (r
< ONIG_NORMAL
) return r
;
5529 else if (r
> ONIG_NORMAL
)
5534 else { /* ONIG_TYPE_LONG */
5538 r
= onig_get_arg_by_callout_args(args
, 2, &type
, &val
);
5539 if (r
!= ONIG_NORMAL
) return r
;
5541 if (type
== ONIG_TYPE_TAG
) {
5542 r
= onig_get_callout_data_by_callout_args(args
, val
.tag
, 0, &type
, &val
);
5543 if (r
< ONIG_NORMAL
) return r
;
5544 else if (r
> ONIG_NORMAL
)
5549 else { /* ONIG_TYPE_LONG */
5554 r
= onig_get_callout_data_by_callout_args_self(args
, slot
, &type
, &val
);
5555 if (r
< ONIG_NORMAL
)
5557 else if (r
> ONIG_NORMAL
) {
5558 /* type == void: initial state */
5559 OnigCodePoint c1
, c2
;
5562 r
= onig_get_arg_by_callout_args(args
, 1, &type
, &val
);
5563 if (r
!= ONIG_NORMAL
) return r
;
5566 c1
= ONIGENC_MBC_TO_CODE(reg
->enc
, p
, val
.s
.end
);
5567 p
+= ONIGENC_MBC_ENC_LEN(reg
->enc
, p
);
5568 if (p
< val
.s
.end
) {
5569 c2
= ONIGENC_MBC_TO_CODE(reg
->enc
, p
, val
.s
.end
);
5570 p
+= ONIGENC_MBC_ENC_LEN(reg
->enc
, p
);
5571 if (p
!= val
.s
.end
) return ONIGERR_INVALID_CALLOUT_ARG
;
5578 if (c2
!= '=') return ONIGERR_INVALID_CALLOUT_ARG
;
5582 if (c2
!= '=') return ONIGERR_INVALID_CALLOUT_ARG
;
5586 if (c2
== '=') op
= OP_LE
;
5587 else if (c2
== 0) op
= OP_LT
;
5588 else return ONIGERR_INVALID_CALLOUT_ARG
;
5591 if (c2
== '=') op
= OP_GE
;
5592 else if (c2
== 0) op
= OP_GT
;
5593 else return ONIGERR_INVALID_CALLOUT_ARG
;
5596 return ONIGERR_INVALID_CALLOUT_ARG
;
5600 r
= onig_set_callout_data_by_callout_args_self(args
, slot
, ONIG_TYPE_LONG
, &val
);
5601 if (r
!= ONIG_NORMAL
) return r
;
5604 op
= (enum OP_CMP
)val
.l
;
5608 case OP_EQ
: r
= (lv
== rv
); break;
5609 case OP_NE
: r
= (lv
!= rv
); break;
5610 case OP_LT
: r
= (lv
< rv
); break;
5611 case OP_GT
: r
= (lv
> rv
); break;
5612 case OP_LE
: r
= (lv
<= rv
); break;
5613 case OP_GE
: r
= (lv
>= rv
); break;
5616 return r
== 0 ? ONIG_CALLOUT_FAIL
: ONIG_CALLOUT_SUCCESS
;
5620 //#include <stdio.h>
5624 /* name start with "onig_" for macros. */
5626 onig_builtin_monitor(OnigCalloutArgs
* args
, void* user_data
)
5631 // const UChar* start;
5632 // const UChar* right;
5633 // const UChar* current;
5634 // const UChar* string;
5635 // const UChar* strend;
5636 const UChar
* tag_start
;
5637 const UChar
* tag_end
;
5647 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &val
);
5648 if (r
!= ONIG_NORMAL
) return r
;
5650 in
= onig_get_callout_in_by_callout_args(args
);
5651 if (in
== ONIG_CALLOUT_IN_PROGRESS
) {
5653 return ONIG_CALLOUT_SUCCESS
;
5656 if (val
.c
!= 'X' && val
.c
!= '<')
5657 return ONIG_CALLOUT_SUCCESS
;
5660 num
= onig_get_callout_num_by_callout_args(args
);
5661 // start = onig_get_start_by_callout_args(args);
5662 // right = onig_get_right_range_by_callout_args(args);
5663 // current = onig_get_current_by_callout_args(args);
5664 // string = onig_get_string_by_callout_args(args);
5665 // strend = onig_get_string_end_by_callout_args(args);
5666 reg
= onig_get_regex_by_callout_args(args
);
5667 tag_start
= onig_get_callout_tag_start(reg
, num
);
5668 tag_end
= onig_get_callout_tag_end(reg
, num
);
5671 sprintf_s(buf
, sizeof(buf
), "#%d", num
);
5673 /* CAUTION: tag string is not terminated with NULL. */
5676 tag_len
= tag_end
- tag_start
;
5677 if (tag_len
>= sizeof(buf
)) tag_len
= sizeof(buf
) - 1;
5678 for (i
= 0; i
< tag_len
; i
++) buf
[i
] = tag_start
[i
];
5679 buf
[tag_len
] = '\0';
5682 fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
5684 in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
5685 (int )(current - string),
5686 (int )(start - string),
5687 (int )(right - string),
5688 (int )(strend - string));
5691 return ONIG_CALLOUT_SUCCESS
;
5695 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp
/* FILE* */)
5703 if (IS_NOT_NULL(fp
))
5708 enc
= ONIG_ENCODING_ASCII
;
5711 ts
[0] = ONIG_TYPE_CHAR
;
5713 BC_B_O(name
, monitor
, 1, ts
, 1, opts
);
5718 #endif /* USE_CALLOUT */