1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
32 ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
40 #define CHECK_INTERRUPT_IN_MATCH
44 int last_match_at_call_counter
;
48 } slot
[ONIG_CALLOUT_DATA_SLOT_NUM
];
52 struct OnigMatchParamStruct
{
53 unsigned int match_stack_limit
;
54 unsigned long retry_limit_in_match
;
56 OnigCalloutFunc progress_callout_of_contents
;
57 OnigCalloutFunc retraction_callout_of_contents
;
58 int match_at_call_counter
;
59 void* callout_user_data
;
60 CalloutData
* callout_data
;
61 int callout_data_alloc_num
;
66 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam
* param
,
69 param
->match_stack_limit
= limit
;
74 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam
* param
,
77 param
->retry_limit_in_match
= limit
;
82 onig_set_progress_callout_of_match_param(OnigMatchParam
* param
, OnigCalloutFunc f
)
85 param
->progress_callout_of_contents
= f
;
88 return ONIG_NO_SUPPORT_CONFIG
;
93 onig_set_retraction_callout_of_match_param(OnigMatchParam
* param
, OnigCalloutFunc f
)
96 param
->retraction_callout_of_contents
= f
;
99 return ONIG_NO_SUPPORT_CONFIG
;
104 onig_set_callout_user_data_of_match_param(OnigMatchParam
* param
, void* user_data
)
107 param
->callout_user_data
= user_data
;
110 return ONIG_NO_SUPPORT_CONFIG
;
118 OnigOptionType options
;
121 const UChar
* start
; /* search start position (for \G: BEGIN_POSITION) */
122 unsigned int match_stack_limit
;
123 unsigned long retry_limit_in_match
;
125 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
126 int best_len
; /* for ONIG_OPTION_FIND_LONGEST */
151 static OpInfoType OpInfo
[] = {
152 { OP_FINISH
, "finish" },
154 { OP_EXACT1
, "exact1" },
155 { OP_EXACT2
, "exact2" },
156 { OP_EXACT3
, "exact3" },
157 { OP_EXACT4
, "exact4" },
158 { OP_EXACT5
, "exact5" },
159 { OP_EXACTN
, "exactn" },
160 { OP_EXACTMB2N1
, "exactmb2-n1" },
161 { OP_EXACTMB2N2
, "exactmb2-n2" },
162 { OP_EXACTMB2N3
, "exactmb2-n3" },
163 { OP_EXACTMB2N
, "exactmb2-n" },
164 { OP_EXACTMB3N
, "exactmb3n" },
165 { OP_EXACTMBN
, "exactmbn" },
166 { OP_EXACT1_IC
, "exact1-ic" },
167 { OP_EXACTN_IC
, "exactn-ic" },
168 { OP_CCLASS
, "cclass" },
169 { OP_CCLASS_MB
, "cclass-mb" },
170 { OP_CCLASS_MIX
, "cclass-mix" },
171 { OP_CCLASS_NOT
, "cclass-not" },
172 { OP_CCLASS_MB_NOT
, "cclass-mb-not" },
173 { OP_CCLASS_MIX_NOT
, "cclass-mix-not" },
174 { OP_ANYCHAR
, "anychar" },
175 { OP_ANYCHAR_ML
, "anychar-ml" },
176 { OP_ANYCHAR_STAR
, "anychar*" },
177 { OP_ANYCHAR_ML_STAR
, "anychar-ml*" },
178 { OP_ANYCHAR_STAR_PEEK_NEXT
, "anychar*-peek-next" },
179 { OP_ANYCHAR_ML_STAR_PEEK_NEXT
, "anychar-ml*-peek-next" },
181 { OP_WORD_ASCII
, "word-ascii" },
182 { OP_NO_WORD
, "not-word" },
183 { OP_NO_WORD_ASCII
, "not-word-ascii" },
184 { OP_WORD_BOUNDARY
, "word-boundary" },
185 { OP_NO_WORD_BOUNDARY
, "not-word-boundary" },
186 { OP_WORD_BEGIN
, "word-begin" },
187 { OP_WORD_END
, "word-end" },
188 { OP_TEXT_SEGMENT_BOUNDARY
, "text-segment-boundary" },
189 { OP_BEGIN_BUF
, "begin-buf" },
190 { OP_END_BUF
, "end-buf" },
191 { OP_BEGIN_LINE
, "begin-line" },
192 { OP_END_LINE
, "end-line" },
193 { OP_SEMI_END_BUF
, "semi-end-buf" },
194 { OP_BEGIN_POSITION
, "begin-position" },
195 { OP_BACKREF1
, "backref1" },
196 { OP_BACKREF2
, "backref2" },
197 { OP_BACKREF_N
, "backref-n" },
198 { OP_BACKREF_N_IC
, "backref-n-ic" },
199 { OP_BACKREF_MULTI
, "backref_multi" },
200 { OP_BACKREF_MULTI_IC
, "backref_multi-ic" },
201 { OP_BACKREF_WITH_LEVEL
, "backref_with_level" },
202 { OP_BACKREF_WITH_LEVEL_IC
, "backref_with_level-c" },
203 { OP_BACKREF_CHECK
, "backref_check" },
204 { OP_BACKREF_CHECK_WITH_LEVEL
, "backref_check_with_level" },
205 { OP_MEMORY_START_PUSH
, "mem-start-push" },
206 { OP_MEMORY_START
, "mem-start" },
207 { OP_MEMORY_END_PUSH
, "mem-end-push" },
208 { OP_MEMORY_END_PUSH_REC
, "mem-end-push-rec" },
209 { OP_MEMORY_END
, "mem-end" },
210 { OP_MEMORY_END_REC
, "mem-end-rec" },
214 { OP_PUSH_SUPER
, "push-super" },
215 { OP_POP_OUT
, "pop-out" },
216 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
217 { OP_PUSH_OR_JUMP_EXACT1
, "push-or-jump-e1" },
219 { OP_PUSH_IF_PEEK_NEXT
, "push-if-peek-next" },
220 { OP_REPEAT
, "repeat" },
221 { OP_REPEAT_NG
, "repeat-ng" },
222 { OP_REPEAT_INC
, "repeat-inc" },
223 { OP_REPEAT_INC_NG
, "repeat-inc-ng" },
224 { OP_REPEAT_INC_SG
, "repeat-inc-sg" },
225 { OP_REPEAT_INC_NG_SG
, "repeat-inc-ng-sg" },
226 { OP_EMPTY_CHECK_START
, "empty-check-start" },
227 { OP_EMPTY_CHECK_END
, "empty-check-end" },
228 { OP_EMPTY_CHECK_END_MEMST
, "empty-check-end-memst" },
229 { OP_EMPTY_CHECK_END_MEMST_PUSH
,"empty-check-end-memst-push" },
230 { OP_PREC_READ_START
, "push-pos" },
231 { OP_PREC_READ_END
, "pop-pos" },
232 { OP_PREC_READ_NOT_START
, "prec-read-not-start" },
233 { OP_PREC_READ_NOT_END
, "prec-read-not-end" },
234 { OP_ATOMIC_START
, "atomic-start" },
235 { OP_ATOMIC_END
, "atomic-end" },
236 { OP_LOOK_BEHIND
, "look-behind" },
237 { OP_LOOK_BEHIND_NOT_START
, "look-behind-not-start" },
238 { OP_LOOK_BEHIND_NOT_END
, "look-behind-not-end" },
240 { OP_RETURN
, "return" },
241 { OP_PUSH_SAVE_VAL
, "push-save-val" },
242 { OP_UPDATE_VAR
, "update-var" },
244 { OP_CALLOUT_CONTENTS
, "callout-contents" },
245 { OP_CALLOUT_NAME
, "callout-name" },
255 for (i
= 0; OpInfo
[i
].opcode
>= 0; i
++) {
256 if (opcode
== OpInfo
[i
].opcode
) return OpInfo
[i
].name
;
263 p_string(FILE* f
, int len
, UChar
* s
)
266 while (len
-- > 0) { fputc(*s
++, f
); }
270 p_len_string(FILE* f
, LengthType len
, int mb_len
, UChar
* s
)
272 int x
= len
* mb_len
;
274 fprintf(f
, ":%d:", len
);
275 while (x
-- > 0) { fputc(*s
++, f
); }
279 p_rel_addr(FILE* f
, RelAddrType rel_addr
, Operation
* p
, Operation
* start
)
281 RelAddrType curr
= (RelAddrType
)(p
- start
);
283 fprintf(f
, "{%d/%d}", rel_addr
, curr
+ rel_addr
);
287 bitset_on_num(BitSetRef bs
)
292 for (i
= 0; i
< SINGLE_BYTE_SIZE
; i
++) {
293 if (BITSET_AT(bs
, i
)) n
++;
300 print_compiled_byte_code(FILE* f
, regex_t
* reg
, int index
,
301 Operation
* start
, OnigEncoding enc
)
313 p
= reg
->ops
+ index
;
315 #ifdef USE_DIRECT_THREADED_CODE
316 opcode
= reg
->ocs
[index
];
321 fprintf(f
, "%s", op2name(opcode
));
324 p_string(f
, 1, p
->exact
.s
); break;
326 p_string(f
, 2, p
->exact
.s
); break;
328 p_string(f
, 3, p
->exact
.s
); break;
330 p_string(f
, 4, p
->exact
.s
); break;
332 p_string(f
, 5, p
->exact
.s
); break;
335 p_string(f
, len
, p
->exact_n
.s
); break;
337 p_string(f
, 2, p
->exact
.s
); break;
339 p_string(f
, 4, p
->exact
.s
); break;
341 p_string(f
, 3, p
->exact
.s
); break;
344 p_len_string(f
, len
, 2, p
->exact_n
.s
); break;
347 p_len_string(f
, len
, 3, p
->exact_n
.s
); break;
352 mb_len
= p
->exact_len_n
.len
;
353 len
= p
->exact_len_n
.n
;
354 q
= p
->exact_len_n
.s
;
355 fprintf(f
, ":%d:%d:", mb_len
, len
);
357 while (n
-- > 0) { fputc(*q
++, f
); }
361 len
= enclen(enc
, p
->exact
.s
);
362 p_string(f
, len
, p
->exact
.s
);
366 p_len_string(f
, len
, 1, p
->exact_n
.s
);
371 n
= bitset_on_num(p
->cclass
.bsp
);
372 fprintf(f
, ":%d", n
);
375 case OP_CCLASS_MB_NOT
:
378 OnigCodePoint
* codes
;
380 codes
= (OnigCodePoint
* )p
->cclass_mb
.mb
;
381 GET_CODE_POINT(ncode
, codes
);
383 GET_CODE_POINT(code
, codes
);
384 fprintf(f
, ":%u:%u", code
, ncode
);
388 case OP_CCLASS_MIX_NOT
:
391 OnigCodePoint
* codes
;
393 codes
= (OnigCodePoint
* )p
->cclass_mix
.mb
;
394 n
= bitset_on_num(p
->cclass_mix
.bsp
);
396 GET_CODE_POINT(ncode
, codes
);
398 GET_CODE_POINT(code
, codes
);
399 fprintf(f
, ":%d:%u:%u", n
, code
, ncode
);
403 case OP_ANYCHAR_STAR_PEEK_NEXT
:
404 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:
405 p_string(f
, 1, &(p
->anychar_star_peek_next
.c
));
408 case OP_WORD_BOUNDARY
:
409 case OP_NO_WORD_BOUNDARY
:
412 mode
= p
->word_boundary
.mode
;
413 fprintf(f
, ":%d", mode
);
417 case OP_BACKREF_N_IC
:
418 mem
= p
->backref_n
.n1
;
419 fprintf(f
, ":%d", mem
);
421 case OP_BACKREF_MULTI_IC
:
422 case OP_BACKREF_MULTI
:
423 case OP_BACKREF_CHECK
:
425 n
= p
->backref_general
.num
;
426 for (i
= 0; i
< n
; i
++) {
427 mem
= (n
== 1) ? p
->backref_general
.n1
: p
->backref_general
.ns
[i
];
428 if (i
> 0) fputs(", ", f
);
429 fprintf(f
, "%d", mem
);
432 case OP_BACKREF_WITH_LEVEL
:
433 case OP_BACKREF_WITH_LEVEL_IC
:
434 case OP_BACKREF_CHECK_WITH_LEVEL
:
438 level
= p
->backref_general
.nest_level
;
439 fprintf(f
, ":%d", level
);
441 n
= p
->backref_general
.num
;
442 for (i
= 0; i
< n
; i
++) {
443 mem
= (n
== 1) ? p
->backref_general
.n1
: p
->backref_general
.ns
[i
];
444 if (i
> 0) fputs(", ", f
);
445 fprintf(f
, "%d", mem
);
450 case OP_MEMORY_START
:
451 case OP_MEMORY_START_PUSH
:
452 mem
= p
->memory_start
.num
;
453 fprintf(f
, ":%d", mem
);
455 case OP_MEMORY_END_PUSH
:
456 case OP_MEMORY_END_PUSH_REC
:
458 case OP_MEMORY_END_REC
:
459 mem
= p
->memory_end
.num
;
460 fprintf(f
, ":%d", mem
);
466 p_rel_addr(f
, addr
, p
, start
);
473 p_rel_addr(f
, addr
, p
, start
);
476 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
477 case OP_PUSH_OR_JUMP_EXACT1
:
478 addr
= p
->push_or_jump_exact1
.addr
;
480 p_rel_addr(f
, addr
, p
, start
);
481 p_string(f
, 1, &(p
->push_or_jump_exact1
.c
));
485 case OP_PUSH_IF_PEEK_NEXT
:
486 addr
= p
->push_if_peek_next
.addr
;
488 p_rel_addr(f
, addr
, p
, start
);
489 p_string(f
, 1, &(p
->push_if_peek_next
.c
));
495 addr
= p
->repeat
.addr
;
496 fprintf(f
, ":%d:", mem
);
497 p_rel_addr(f
, addr
, p
, start
);
501 case OP_REPEAT_INC_NG
:
502 case OP_REPEAT_INC_SG
:
503 case OP_REPEAT_INC_NG_SG
:
505 fprintf(f
, ":%d", mem
);
508 case OP_EMPTY_CHECK_START
:
509 mem
= p
->empty_check_start
.mem
;
510 fprintf(f
, ":%d", mem
);
512 case OP_EMPTY_CHECK_END
:
513 case OP_EMPTY_CHECK_END_MEMST
:
514 case OP_EMPTY_CHECK_END_MEMST_PUSH
:
515 mem
= p
->empty_check_end
.mem
;
516 fprintf(f
, ":%d", mem
);
519 case OP_PREC_READ_NOT_START
:
520 addr
= p
->prec_read_not_start
.addr
;
522 p_rel_addr(f
, addr
, p
, start
);
526 len
= p
->look_behind
.len
;
527 fprintf(f
, ":%d", len
);
530 case OP_LOOK_BEHIND_NOT_START
:
531 addr
= p
->look_behind_not_start
.addr
;
532 len
= p
->look_behind_not_start
.len
;
533 fprintf(f
, ":%d:", len
);
534 p_rel_addr(f
, addr
, p
, start
);
539 fprintf(f
, ":{/%d}", addr
);
542 case OP_PUSH_SAVE_VAL
:
546 type
= p
->push_save_val
.type
;
547 mem
= p
->push_save_val
.id
;
548 fprintf(f
, ":%d:%d", type
, mem
);
556 type
= p
->update_var
.type
;
557 mem
= p
->update_var
.id
;
558 fprintf(f
, ":%d:%d", type
, mem
);
563 case OP_CALLOUT_CONTENTS
:
564 mem
= p
->callout_contents
.num
;
565 fprintf(f
, ":%d", mem
);
568 case OP_CALLOUT_NAME
:
572 id
= p
->callout_name
.id
;
573 mem
= p
->callout_name
.num
;
574 fprintf(f
, ":%d:%d", id
, mem
);
579 case OP_TEXT_SEGMENT_BOUNDARY
:
580 if (p
->text_segment_boundary
.not != 0)
588 case OP_ANYCHAR_STAR
:
589 case OP_ANYCHAR_ML_STAR
:
593 case OP_NO_WORD_ASCII
:
598 case OP_SEMI_END_BUF
:
599 case OP_BEGIN_POSITION
:
604 case OP_PREC_READ_START
:
605 case OP_PREC_READ_END
:
606 case OP_PREC_READ_NOT_END
:
607 case OP_ATOMIC_START
:
609 case OP_LOOK_BEHIND_NOT_END
:
614 fprintf(stderr
, "print_compiled_byte_code: undefined code %d\n", opcode
);
618 #endif /* ONIG_DEBUG */
620 #ifdef ONIG_DEBUG_COMPILE
622 onig_print_compiled_byte_code_list(FILE* f
, regex_t
* reg
)
625 Operation
* start
= reg
->ops
;
626 Operation
* end
= reg
->ops
+ reg
->ops_used
;
628 fprintf(f
, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
629 reg
->bt_mem_start
, reg
->bt_mem_end
);
630 fprintf(f
, "code-length: %d\n", reg
->ops_used
);
634 int pos
= bp
- start
;
636 fprintf(f
, "%4d: ", pos
);
637 print_compiled_byte_code(f
, reg
, pos
, start
, reg
->enc
);
646 #ifdef USE_CAPTURE_HISTORY
647 static void history_tree_free(OnigCaptureTreeNode
* node
);
650 history_tree_clear(OnigCaptureTreeNode
* node
)
654 if (IS_NULL(node
)) return ;
656 for (i
= 0; i
< node
->num_childs
; i
++) {
657 if (IS_NOT_NULL(node
->childs
[i
])) {
658 history_tree_free(node
->childs
[i
]);
661 for (i
= 0; i
< node
->allocated
; i
++) {
662 node
->childs
[i
] = (OnigCaptureTreeNode
* )0;
664 node
->num_childs
= 0;
665 node
->beg
= ONIG_REGION_NOTPOS
;
666 node
->end
= ONIG_REGION_NOTPOS
;
671 history_tree_free(OnigCaptureTreeNode
* node
)
673 history_tree_clear(node
);
674 if (IS_NOT_NULL(node
->childs
)) xfree(node
->childs
);
680 history_root_free(OnigRegion
* r
)
682 if (IS_NULL(r
->history_root
)) return ;
684 history_tree_free(r
->history_root
);
685 r
->history_root
= (OnigCaptureTreeNode
* )0;
688 static OnigCaptureTreeNode
*
689 history_node_new(void)
691 OnigCaptureTreeNode
* node
;
693 node
= (OnigCaptureTreeNode
* )xmalloc(sizeof(OnigCaptureTreeNode
));
694 CHECK_NULL_RETURN(node
);
696 node
->childs
= (OnigCaptureTreeNode
** )0;
698 node
->num_childs
= 0;
700 node
->beg
= ONIG_REGION_NOTPOS
;
701 node
->end
= ONIG_REGION_NOTPOS
;
707 history_tree_add_child(OnigCaptureTreeNode
* parent
, OnigCaptureTreeNode
* child
)
709 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
711 if (parent
->num_childs
>= parent
->allocated
) {
714 if (IS_NULL(parent
->childs
)) {
715 n
= HISTORY_TREE_INIT_ALLOC_SIZE
;
717 (OnigCaptureTreeNode
** )xmalloc(sizeof(parent
->childs
[0]) * n
);
720 n
= parent
->allocated
* 2;
722 (OnigCaptureTreeNode
** )xrealloc(parent
->childs
,
723 sizeof(parent
->childs
[0]) * n
,
724 sizeof(parent
->childs
[0]) * parent
->allocated
);
726 CHECK_NULL_RETURN_MEMERR(parent
->childs
);
727 for (i
= parent
->allocated
; i
< n
; i
++) {
728 parent
->childs
[i
] = (OnigCaptureTreeNode
* )0;
730 parent
->allocated
= n
;
733 parent
->childs
[parent
->num_childs
] = child
;
734 parent
->num_childs
++;
738 static OnigCaptureTreeNode
*
739 history_tree_clone(OnigCaptureTreeNode
* node
)
742 OnigCaptureTreeNode
*clone
, *child
;
744 clone
= history_node_new();
745 CHECK_NULL_RETURN(clone
);
747 clone
->beg
= node
->beg
;
748 clone
->end
= node
->end
;
749 for (i
= 0; i
< node
->num_childs
; i
++) {
750 child
= history_tree_clone(node
->childs
[i
]);
751 if (IS_NULL(child
)) {
752 history_tree_free(clone
);
753 return (OnigCaptureTreeNode
* )0;
755 history_tree_add_child(clone
, child
);
761 extern OnigCaptureTreeNode
*
762 onig_get_capture_tree(OnigRegion
* region
)
764 return region
->history_root
;
766 #endif /* USE_CAPTURE_HISTORY */
769 onig_region_clear(OnigRegion
* region
)
773 for (i
= 0; i
< region
->num_regs
; i
++) {
774 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
776 #ifdef USE_CAPTURE_HISTORY
777 history_root_free(region
);
782 onig_region_resize(OnigRegion
* region
, int n
)
784 region
->num_regs
= n
;
786 if (n
< ONIG_NREGION
)
789 if (region
->allocated
== 0) {
790 region
->beg
= (int* )xmalloc(n
* sizeof(int));
791 region
->end
= (int* )xmalloc(n
* sizeof(int));
793 if (region
->beg
== 0 || region
->end
== 0)
794 return ONIGERR_MEMORY
;
796 region
->allocated
= n
;
798 else if (region
->allocated
< n
) {
799 region
->beg
= (int* )xrealloc(region
->beg
, n
* sizeof(int), region
->allocated
* sizeof(int));
800 region
->end
= (int* )xrealloc(region
->end
, n
* sizeof(int), region
->allocated
* sizeof(int));
802 if (region
->beg
== 0 || region
->end
== 0)
803 return ONIGERR_MEMORY
;
805 region
->allocated
= n
;
812 onig_region_resize_clear(OnigRegion
* region
, int n
)
816 r
= onig_region_resize(region
, n
);
817 if (r
!= 0) return r
;
818 onig_region_clear(region
);
823 onig_region_set(OnigRegion
* region
, int at
, int beg
, int end
)
825 if (at
< 0) return ONIGERR_INVALID_ARGUMENT
;
827 if (at
>= region
->allocated
) {
828 int r
= onig_region_resize(region
, at
+ 1);
832 region
->beg
[at
] = beg
;
833 region
->end
[at
] = end
;
838 onig_region_init(OnigRegion
* region
)
840 region
->num_regs
= 0;
841 region
->allocated
= 0;
842 region
->beg
= (int* )0;
843 region
->end
= (int* )0;
844 region
->history_root
= (OnigCaptureTreeNode
* )0;
848 onig_region_new(void)
852 r
= (OnigRegion
* )xmalloc(sizeof(OnigRegion
));
853 CHECK_NULL_RETURN(r
);
859 onig_region_free(OnigRegion
* r
, int free_self
)
862 if (r
->allocated
> 0) {
863 if (r
->beg
) xfree(r
->beg
);
864 if (r
->end
) xfree(r
->end
);
867 #ifdef USE_CAPTURE_HISTORY
868 history_root_free(r
);
870 if (free_self
) xfree(r
);
875 onig_region_copy(OnigRegion
* to
, OnigRegion
* from
)
877 #define RREGC_SIZE (sizeof(int) * from->num_regs)
880 if (to
== from
) return;
882 if (to
->allocated
== 0) {
883 if (from
->num_regs
> 0) {
884 to
->beg
= (int* )xmalloc(RREGC_SIZE
);
885 if (IS_NULL(to
->beg
)) return;
886 to
->end
= (int* )xmalloc(RREGC_SIZE
);
887 if (IS_NULL(to
->end
)) return;
888 to
->allocated
= from
->num_regs
;
891 else if (to
->allocated
< from
->num_regs
) {
892 to
->beg
= (int* )xrealloc(to
->beg
, RREGC_SIZE
, sizeof(int) * to
->allocated
);
893 if (IS_NULL(to
->beg
)) return;
894 to
->end
= (int* )xrealloc(to
->end
, RREGC_SIZE
, sizeof(int) * to
->allocated
);
895 if (IS_NULL(to
->end
)) return;
896 to
->allocated
= from
->num_regs
;
899 for (i
= 0; i
< from
->num_regs
; i
++) {
900 to
->beg
[i
] = from
->beg
[i
];
901 to
->end
[i
] = from
->end
[i
];
903 to
->num_regs
= from
->num_regs
;
905 #ifdef USE_CAPTURE_HISTORY
906 history_root_free(to
);
908 if (IS_NOT_NULL(from
->history_root
)) {
909 to
->history_root
= history_tree_clone(from
->history_root
);
915 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
917 args.name_id = (aname_id);\
921 args.string_end = end;\
922 args.start = sstart;\
923 args.right_range = right_range;\
925 args.retry_in_match_counter = retry_in_match_counter;\
927 args.stk_base = stk_base;\
929 args.mem_start_stk = mem_start_stk;\
930 args.mem_end_stk = mem_end_stk;\
931 result = (func)(&args, user);\
934 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
936 OnigCalloutArgs args;\
937 CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
939 case ONIG_CALLOUT_FAIL:\
940 case ONIG_CALLOUT_SUCCESS:\
944 result = ONIGERR_INVALID_ARGUMENT;\
955 #define INVALID_STACK_INDEX -1
957 #define STK_ALT_FLAG 0x0001
960 /* used by normal-POP */
961 #define STK_SUPER_ALT STK_ALT_FLAG
962 #define STK_ALT (0x0002 | STK_ALT_FLAG)
963 #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
964 #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
966 /* handled by normal-POP */
967 #define STK_MEM_START 0x0010
968 #define STK_MEM_END 0x8030
969 #define STK_REPEAT_INC 0x0050
971 #define STK_CALLOUT 0x0070
974 /* avoided by normal-POP */
975 #define STK_VOID 0x0000 /* for fill a blank */
976 #define STK_EMPTY_CHECK_START 0x3000
977 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
978 #define STK_MEM_END_MARK 0x8100
979 #define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
980 #define STK_REPEAT 0x0300
981 #define STK_CALL_FRAME 0x0400
982 #define STK_RETURN 0x0500
983 #define STK_SAVE_VAL 0x0600
984 #define STK_PREC_READ_START 0x0700
985 #define STK_PREC_READ_END 0x0800
987 /* stack type check mask */
988 #define STK_MASK_POP_USED STK_ALT_FLAG
989 #define STK_MASK_POP_HANDLED 0x0010
990 #define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
991 #define STK_MASK_TO_VOID_TARGET 0x100e
992 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
994 typedef intptr_t StackIndex
;
996 typedef struct _StackType
{
1001 Operation
* pcode
; /* byte code position */
1002 UChar
* pstr
; /* string position */
1003 UChar
* pstr_prev
; /* previous char position of pstr */
1006 int count
; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
1007 Operation
* pcode
; /* byte code position (head of repeated target) */
1010 StackIndex si
; /* index of stack */
1013 UChar
*pstr
; /* start/end position */
1014 /* Following information is set, if this stack type is MEM-START */
1015 StackIndex prev_start
; /* prev. info (for backtrack "(...)*" ) */
1016 StackIndex prev_end
; /* prev. info (for backtrack "(...)*" ) */
1019 UChar
*pstr
; /* start position */
1023 Operation
*ret_addr
; /* byte code position */
1024 UChar
*pstr
; /* string position */
1035 OnigCalloutFunc func
;
1043 struct OnigCalloutArgsStruct
{
1045 int name_id
; /* name id or ONIG_NON_NAME_ID */
1048 const OnigUChar
* string
;
1049 const OnigUChar
* string_end
;
1050 const OnigUChar
* start
;
1051 const OnigUChar
* right_range
;
1052 const OnigUChar
* current
; /* current matching position */
1053 unsigned long retry_in_match_counter
;
1055 /* invisible to users */
1057 StackType
* stk_base
;
1059 StackIndex
* mem_start_stk
;
1060 StackIndex
* mem_end_stk
;
1066 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1067 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1068 (msa).stack_p = (void* )0;\
1069 (msa).options = (arg_option);\
1070 (msa).region = (arg_region);\
1071 (msa).start = (arg_start);\
1072 (msa).match_stack_limit = (mp)->match_stack_limit;\
1073 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1075 (msa).best_len = ONIG_MISMATCH;\
1076 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1079 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1080 (msa).stack_p = (void* )0;\
1081 (msa).options = (arg_option);\
1082 (msa).region = (arg_region);\
1083 (msa).start = (arg_start);\
1084 (msa).match_stack_limit = (mp)->match_stack_limit;\
1085 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1087 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1091 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
1094 #define ALLOCA_PTR_NUM_LIMIT 50
1096 #define STACK_INIT(stack_num) do {\
1097 if (msa->stack_p) {\
1099 alloc_base = msa->stack_p;\
1100 stk_base = (StackType* )(alloc_base\
1101 + (sizeof(StackIndex) * msa->ptr_num));\
1103 stk_end = stk_base + msa->stack_n;\
1105 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1107 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1108 + sizeof(StackType) * (stack_num));\
1109 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1110 stk_base = (StackType* )(alloc_base\
1111 + (sizeof(StackIndex) * msa->ptr_num));\
1113 stk_end = stk_base + (stack_num);\
1117 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1118 + sizeof(StackType) * (stack_num));\
1119 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1120 stk_base = (StackType* )(alloc_base\
1121 + (sizeof(StackIndex) * msa->ptr_num));\
1123 stk_end = stk_base + (stack_num);\
1128 #define STACK_SAVE do{\
1129 msa->stack_n = (int )(stk_end - stk_base);\
1130 if (is_alloca != 0) {\
1131 size_t size = sizeof(StackIndex) * msa->ptr_num \
1132 + sizeof(StackType) * msa->stack_n;\
1133 msa->stack_p = xmalloc(size);\
1134 CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
1135 xmemcpy(msa->stack_p, alloc_base, size);\
1138 msa->stack_p = alloc_base;\
1142 #define UPDATE_FOR_STACK_REALLOC do{\
1143 repeat_stk = (StackIndex* )alloc_base;\
1144 mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1145 mem_end_stk = mem_start_stk + num_mem + 1;\
1148 static unsigned int MatchStackLimit
= DEFAULT_MATCH_STACK_LIMIT_SIZE
;
1151 onig_get_match_stack_limit_size(void)
1153 return MatchStackLimit
;
1157 onig_set_match_stack_limit_size(unsigned int size
)
1159 MatchStackLimit
= size
;
1163 #ifdef USE_RETRY_LIMIT_IN_MATCH
1165 static unsigned long RetryLimitInMatch
= DEFAULT_RETRY_LIMIT_IN_MATCH
;
1167 #define CHECK_RETRY_LIMIT_IN_MATCH do {\
1168 if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
1173 #define CHECK_RETRY_LIMIT_IN_MATCH
1175 #endif /* USE_RETRY_LIMIT_IN_MATCH */
1177 extern unsigned long
1178 onig_get_retry_limit_in_match(void)
1180 #ifdef USE_RETRY_LIMIT_IN_MATCH
1181 return RetryLimitInMatch
;
1183 /* return ONIG_NO_SUPPORT_CONFIG; */
1189 onig_set_retry_limit_in_match(unsigned long size
)
1191 #ifdef USE_RETRY_LIMIT_IN_MATCH
1192 RetryLimitInMatch
= size
;
1195 return ONIG_NO_SUPPORT_CONFIG
;
1200 static OnigCalloutFunc DefaultProgressCallout
;
1201 static OnigCalloutFunc DefaultRetractionCallout
;
1204 extern OnigMatchParam
*
1205 onig_new_match_param(void)
1209 p
= (OnigMatchParam
* )xmalloc(sizeof(*p
));
1210 if (IS_NOT_NULL(p
)) {
1211 onig_initialize_match_param(p
);
1218 onig_free_match_param_content(OnigMatchParam
* p
)
1221 if (IS_NOT_NULL(p
->callout_data
)) {
1222 xfree(p
->callout_data
);
1223 p
->callout_data
= 0;
1229 onig_free_match_param(OnigMatchParam
* p
)
1231 if (IS_NOT_NULL(p
)) {
1232 onig_free_match_param_content(p
);
1238 onig_initialize_match_param(OnigMatchParam
* mp
)
1240 mp
->match_stack_limit
= MatchStackLimit
;
1241 #ifdef USE_RETRY_LIMIT_IN_MATCH
1242 mp
->retry_limit_in_match
= RetryLimitInMatch
;
1246 mp
->progress_callout_of_contents
= DefaultProgressCallout
;
1247 mp
->retraction_callout_of_contents
= DefaultRetractionCallout
;
1248 mp
->match_at_call_counter
= 0;
1249 mp
->callout_user_data
= 0;
1250 mp
->callout_data
= 0;
1251 mp
->callout_data_alloc_num
= 0;
1260 adjust_match_param(regex_t
* reg
, OnigMatchParam
* mp
)
1262 RegexExt
* ext
= reg
->extp
;
1264 mp
->match_at_call_counter
= 0;
1266 if (IS_NULL(ext
) || ext
->callout_num
== 0) return ONIG_NORMAL
;
1268 if (ext
->callout_num
> mp
->callout_data_alloc_num
) {
1270 size_t n
= ext
->callout_num
* sizeof(*d
);
1271 if (IS_NOT_NULL(mp
->callout_data
))
1272 d
= (CalloutData
* )xrealloc(mp
->callout_data
, n
, mp
->callout_data_alloc_num
* sizeof(*d
));
1274 d
= (CalloutData
* )xmalloc(n
);
1275 CHECK_NULL_RETURN_MEMERR(d
);
1277 mp
->callout_data
= d
;
1278 mp
->callout_data_alloc_num
= ext
->callout_num
;
1281 xmemset(mp
->callout_data
, 0, mp
->callout_data_alloc_num
* sizeof(CalloutData
));
1285 #define ADJUST_MATCH_PARAM(reg, mp) \
1286 r = adjust_match_param(reg, mp);\
1287 if (r != ONIG_NORMAL) return r;
1289 #define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
1292 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs
* args
)
1301 d
= CALLOUT_DATA_AT_NUM(mp
, num
);
1302 if (d
->last_match_at_call_counter
!= mp
->match_at_call_counter
) {
1303 xmemset(d
, 0, sizeof(*d
));
1304 d
->last_match_at_call_counter
= mp
->match_at_call_counter
;
1305 return d
->last_match_at_call_counter
;
1312 onig_get_callout_data_dont_clear_old(regex_t
* reg
, OnigMatchParam
* mp
,
1313 int callout_num
, int slot
,
1314 OnigType
* type
, OnigValue
* val
)
1319 if (callout_num
<= 0) return ONIGERR_INVALID_ARGUMENT
;
1321 d
= CALLOUT_DATA_AT_NUM(mp
, callout_num
);
1322 t
= d
->slot
[slot
].type
;
1323 if (IS_NOT_NULL(type
)) *type
= t
;
1324 if (IS_NOT_NULL(val
)) *val
= d
->slot
[slot
].val
;
1325 return (t
== ONIG_TYPE_VOID
? 1 : ONIG_NORMAL
);
1329 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs
* args
,
1330 int slot
, OnigType
* type
,
1333 return onig_get_callout_data_dont_clear_old(args
->regex
, args
->msa
->mp
,
1334 args
->num
, slot
, type
, val
);
1338 onig_get_callout_data(regex_t
* reg
, OnigMatchParam
* mp
,
1339 int callout_num
, int slot
,
1340 OnigType
* type
, OnigValue
* val
)
1345 if (callout_num
<= 0) return ONIGERR_INVALID_ARGUMENT
;
1347 d
= CALLOUT_DATA_AT_NUM(mp
, callout_num
);
1348 if (d
->last_match_at_call_counter
!= mp
->match_at_call_counter
) {
1349 xmemset(d
, 0, sizeof(*d
));
1350 d
->last_match_at_call_counter
= mp
->match_at_call_counter
;
1353 t
= d
->slot
[slot
].type
;
1354 if (IS_NOT_NULL(type
)) *type
= t
;
1355 if (IS_NOT_NULL(val
)) *val
= d
->slot
[slot
].val
;
1356 return (t
== ONIG_TYPE_VOID
? 1 : ONIG_NORMAL
);
1360 onig_get_callout_data_by_tag(regex_t
* reg
, OnigMatchParam
* mp
,
1361 const UChar
* tag
, const UChar
* tag_end
, int slot
,
1362 OnigType
* type
, OnigValue
* val
)
1366 num
= onig_get_callout_num_by_tag(reg
, tag
, tag_end
);
1367 if (num
< 0) return num
;
1368 if (num
== 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME
;
1370 return onig_get_callout_data(reg
, mp
, num
, slot
, type
, val
);
1374 onig_get_callout_data_by_callout_args(OnigCalloutArgs
* args
,
1375 int callout_num
, int slot
,
1376 OnigType
* type
, OnigValue
* val
)
1378 return onig_get_callout_data(args
->regex
, args
->msa
->mp
, callout_num
, slot
,
1383 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs
* args
,
1384 int slot
, OnigType
* type
, OnigValue
* val
)
1386 return onig_get_callout_data(args
->regex
, args
->msa
->mp
, args
->num
, slot
,
1391 onig_set_callout_data(regex_t
* reg
, OnigMatchParam
* mp
,
1392 int callout_num
, int slot
,
1393 OnigType type
, OnigValue
* val
)
1397 if (callout_num
<= 0) return ONIGERR_INVALID_ARGUMENT
;
1399 d
= CALLOUT_DATA_AT_NUM(mp
, callout_num
);
1400 d
->slot
[slot
].type
= type
;
1401 d
->slot
[slot
].val
= *val
;
1402 d
->last_match_at_call_counter
= mp
->match_at_call_counter
;
1408 onig_set_callout_data_by_tag(regex_t
* reg
, OnigMatchParam
* mp
,
1409 const UChar
* tag
, const UChar
* tag_end
, int slot
,
1410 OnigType type
, OnigValue
* val
)
1414 num
= onig_get_callout_num_by_tag(reg
, tag
, tag_end
);
1415 if (num
< 0) return num
;
1416 if (num
== 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME
;
1418 return onig_set_callout_data(reg
, mp
, num
, slot
, type
, val
);
1422 onig_set_callout_data_by_callout_args(OnigCalloutArgs
* args
,
1423 int callout_num
, int slot
,
1424 OnigType type
, OnigValue
* val
)
1426 return onig_set_callout_data(args
->regex
, args
->msa
->mp
, callout_num
, slot
,
1431 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs
* args
,
1432 int slot
, OnigType type
, OnigValue
* val
)
1434 return onig_set_callout_data(args
->regex
, args
->msa
->mp
, args
->num
, slot
,
1439 #define ADJUST_MATCH_PARAM(reg, mp)
1440 #endif /* USE_CALLOUT */
1444 stack_double(int is_alloca
, char** arg_alloc_base
,
1445 StackType
** arg_stk_base
, StackType
** arg_stk_end
, StackType
** arg_stk
,
1453 char* new_alloc_base
;
1454 StackType
*stk_base
, *stk_end
, *stk
;
1456 alloc_base
= *arg_alloc_base
;
1457 stk_base
= *arg_stk_base
;
1458 stk_end
= *arg_stk_end
;
1461 n
= (unsigned int )(stk_end
- stk_base
);
1462 size
= sizeof(StackIndex
) * msa
->ptr_num
+ sizeof(StackType
) * n
;
1464 new_size
= sizeof(StackIndex
) * msa
->ptr_num
+ sizeof(StackType
) * n
;
1465 if (is_alloca
!= 0) {
1466 new_alloc_base
= (char* )xmalloc(new_size
);
1467 if (IS_NULL(new_alloc_base
)) {
1469 return ONIGERR_MEMORY
;
1471 xmemcpy(new_alloc_base
, alloc_base
, size
);
1474 if (msa
->match_stack_limit
!= 0 && n
> msa
->match_stack_limit
) {
1475 if ((unsigned int )(stk_end
- stk_base
) == msa
->match_stack_limit
)
1476 return ONIGERR_MATCH_STACK_LIMIT_OVER
;
1478 n
= msa
->match_stack_limit
;
1480 new_alloc_base
= (char* )xrealloc(alloc_base
, new_size
, size
);
1481 if (IS_NULL(new_alloc_base
)) {
1483 return ONIGERR_MEMORY
;
1487 alloc_base
= new_alloc_base
;
1488 used
= (int )(stk
- stk_base
);
1489 *arg_alloc_base
= alloc_base
;
1490 *arg_stk_base
= (StackType
* )(alloc_base
1491 + (sizeof(StackIndex
) * msa
->ptr_num
));
1492 *arg_stk
= *arg_stk_base
+ used
;
1493 *arg_stk_end
= *arg_stk_base
+ n
;
1497 #define STACK_ENSURE(n) do {\
1498 if ((int )(stk_end - stk) < (n)) {\
1499 int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1500 if (r != 0) { STACK_SAVE; return r; } \
1502 UPDATE_FOR_STACK_REALLOC;\
1506 #define STACK_AT(index) (stk_base + (index))
1507 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1509 #define STACK_PUSH_TYPE(stack_type) do {\
1511 stk->type = (stack_type);\
1515 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1517 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
1519 stk->type = (stack_type);\
1520 stk->u.state.pcode = (pat);\
1521 stk->u.state.pstr = (s);\
1522 stk->u.state.pstr_prev = (sprev);\
1526 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1527 stk->type = (stack_type);\
1528 stk->u.state.pcode = (pat);\
1532 #ifdef ONIG_DEBUG_MATCH
1533 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1534 stk->type = (stack_type);\
1535 stk->u.state.pcode = (pat);\
1536 stk->u.state.pstr = s;\
1537 stk->u.state.pstr_prev = sprev;\
1541 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1542 stk->type = (stack_type);\
1543 stk->u.state.pcode = (pat);\
1548 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
1549 #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
1550 #define STACK_PUSH_PREC_READ_START(s,sprev) \
1551 STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
1552 #define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
1553 STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
1554 #define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
1555 #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
1556 STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
1558 #define STACK_PUSH_REPEAT(sid, pat) do {\
1560 stk->type = STK_REPEAT;\
1562 stk->u.repeat.pcode = (pat);\
1563 stk->u.repeat.count = 0;\
1567 #define STACK_PUSH_REPEAT_INC(sindex) do {\
1569 stk->type = STK_REPEAT_INC;\
1570 stk->u.repeat_inc.si = (sindex);\
1574 #define STACK_PUSH_MEM_START(mnum, s) do {\
1576 stk->type = STK_MEM_START;\
1578 stk->u.mem.pstr = (s);\
1579 stk->u.mem.prev_start = mem_start_stk[mnum];\
1580 stk->u.mem.prev_end = mem_end_stk[mnum];\
1581 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1582 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1586 #define STACK_PUSH_MEM_END(mnum, s) do {\
1588 stk->type = STK_MEM_END;\
1590 stk->u.mem.pstr = (s);\
1591 stk->u.mem.prev_start = mem_start_stk[mnum];\
1592 stk->u.mem.prev_end = mem_end_stk[mnum];\
1593 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1597 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1599 stk->type = STK_MEM_END_MARK;\
1604 #define STACK_GET_MEM_START(mnum, k) do {\
1607 while (k > stk_base) {\
1609 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1610 && k->zid == (mnum)) {\
1613 else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1614 if (level == 0) break;\
1620 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1623 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1624 if (level == 0) (start) = k->u.mem.pstr;\
1627 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1630 (end) = k->u.mem.pstr;\
1638 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1640 stk->type = STK_EMPTY_CHECK_START;\
1642 stk->u.empty_check.pstr = (s);\
1646 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1648 stk->type = STK_EMPTY_CHECK_END;\
1653 #define STACK_PUSH_CALL_FRAME(pat) do {\
1655 stk->type = STK_CALL_FRAME;\
1656 stk->u.call_frame.ret_addr = (pat);\
1660 #define STACK_PUSH_RETURN do {\
1662 stk->type = STK_RETURN;\
1666 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1668 stk->type = STK_SAVE_VAL;\
1670 stk->u.val.type = (stype);\
1671 stk->u.val.v = (UChar* )(sval);\
1675 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1677 stk->type = STK_SAVE_VAL;\
1679 stk->u.val.type = (stype);\
1680 stk->u.val.v = (UChar* )(sval);\
1681 stk->u.val.v2 = sprev;\
1685 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1686 StackType *k = stk;\
1687 while (k > stk_base) {\
1689 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1690 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1691 (sval) = k->u.val.v;\
1697 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
1699 StackType *k = stk;\
1700 while (k > stk_base) {\
1702 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1703 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1704 && k->zid == (sid)) {\
1706 (sval) = k->u.val.v;\
1710 else if (k->type == STK_CALL_FRAME)\
1712 else if (k->type == STK_RETURN)\
1717 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1719 StackType *k = stk;\
1720 while (k > stk_base) {\
1722 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1723 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1724 && k->zid == (sid)) {\
1726 (sval) = k->u.val.v;\
1727 sprev = k->u.val.v2;\
1731 else if (k->type == STK_CALL_FRAME)\
1733 else if (k->type == STK_RETURN)\
1738 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
1740 StackType *k = (stk_from);\
1741 while (k > stk_base) {\
1742 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
1743 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1744 && k->u.val.id == (sid)) {\
1746 (sval) = k->u.val.v;\
1750 else if (k->type == STK_CALL_FRAME)\
1752 else if (k->type == STK_RETURN)\
1758 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1760 stk->type = STK_CALLOUT;\
1761 stk->zid = ONIG_NON_NAME_ID;\
1762 stk->u.callout.num = (anum);\
1763 stk->u.callout.func = (func);\
1767 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1769 stk->type = STK_CALLOUT;\
1771 stk->u.callout.num = (anum);\
1772 stk->u.callout.func = (func);\
1777 #define STACK_BASE_CHECK(p, at) \
1778 if ((p) < stk_base) {\
1779 fprintf(stderr, "at %s\n", at);\
1783 #define STACK_BASE_CHECK(p, at)
1786 #define STACK_POP_ONE do {\
1788 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1793 #define POP_CALLOUT_CASE \
1794 else if (stk->type == STK_CALLOUT) {\
1795 RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
1798 #define POP_CALLOUT_CASE
1801 #define STACK_POP do {\
1802 switch (pop_level) {\
1803 case STACK_POP_LEVEL_FREE:\
1806 STACK_BASE_CHECK(stk, "STACK_POP"); \
1807 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1810 case STACK_POP_LEVEL_MEM_START:\
1813 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1814 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1815 else if (stk->type == STK_MEM_START) {\
1816 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1817 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1824 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1825 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1826 else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
1827 if (stk->type == STK_MEM_START) {\
1828 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1829 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1831 else if (stk->type == STK_REPEAT_INC) {\
1832 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1834 else if (stk->type == STK_MEM_END) {\
1835 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1836 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1845 #define POP_TIL_BODY(aname, til_type) do {\
1848 STACK_BASE_CHECK(stk, (aname));\
1849 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
1850 if (stk->type == (til_type)) break;\
1852 if (stk->type == STK_MEM_START) {\
1853 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1854 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1856 else if (stk->type == STK_REPEAT_INC) {\
1857 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1859 else if (stk->type == STK_MEM_END) {\
1860 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1861 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1863 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
1869 #define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
1870 POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
1873 #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
1874 POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
1878 #define STACK_EXEC_TO_VOID(k) do {\
1882 STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
1883 if (IS_TO_VOID_TARGET(k)) {\
1884 if (k->type == STK_TO_VOID_START) {\
1885 k->type = STK_VOID;\
1888 k->type = STK_VOID;\
1893 #define STACK_GET_PREC_READ_START(k) do {\
1898 STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
1899 if (IS_TO_VOID_TARGET(k)) {\
1900 k->type = STK_VOID;\
1902 else if (k->type == STK_PREC_READ_START) {\
1908 else if (k->type == STK_PREC_READ_END) {\
1914 #define STACK_EMPTY_CHECK(isnull,sid,s) do {\
1915 StackType* k = stk;\
1918 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
1919 if (k->type == STK_EMPTY_CHECK_START) {\
1920 if (k->zid == (sid)) {\
1921 (isnull) = (k->u.empty_check.pstr == (s));\
1928 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
1929 if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
1933 if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
1934 (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
1936 (addr) = (UChar* )k->u.mem.prev_end;\
1940 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
1941 #define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
1942 StackType* k = stk;\
1945 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
1946 if (k->type == STK_EMPTY_CHECK_START) {\
1947 if (k->zid == (sid)) {\
1948 if (k->u.empty_check.pstr != (s)) {\
1957 if (k->type == STK_MEM_START && level == 0) {\
1958 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
1960 (isnull) = 0; break;\
1962 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
1963 (isnull) = 0; break;\
1965 else if (endp != s) {\
1966 (isnull) = -1; /* empty, but position changed */ \
1969 else if (k->type == STK_PREC_READ_START) {\
1972 else if (k->type == STK_PREC_READ_END) {\
1984 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
1986 StackType* k = stk;\
1989 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
1990 if (k->type == STK_EMPTY_CHECK_START) {\
1991 if (k->zid == (sid)) {\
1993 if (k->u.empty_check.pstr != (s)) {\
1999 int prec_level = 0;\
2002 if (k->type == STK_MEM_START) {\
2003 if (level == 0 && prec_level == 0) {\
2004 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2006 (isnull) = 0; break;\
2008 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
2009 (isnull) = 0; break;\
2011 else if (endp != s) {\
2012 (isnull) = -1; /* empty, but position changed */\
2016 else if (k->type == STK_EMPTY_CHECK_START) {\
2017 if (k->zid == (sid)) level++;\
2019 else if (k->type == STK_EMPTY_CHECK_END) {\
2020 if (k->zid == (sid)) level--;\
2022 else if (k->type == STK_PREC_READ_START) {\
2025 else if (k->type == STK_PREC_READ_END) {\
2038 else if (k->type == STK_EMPTY_CHECK_END) {\
2039 if (k->zid == (sid)) level++;\
2044 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2046 StackType* k = stk;\
2049 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2050 if (k->type == STK_EMPTY_CHECK_START) {\
2051 if (k->u.empty_check.num == (id)) {\
2053 (isnull) = (k->u.empty_check.pstr == (s));\
2059 else if (k->type == STK_EMPTY_CHECK_END) {\
2064 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2066 #define STACK_GET_REPEAT(sid, k) do {\
2071 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
2072 if (k->type == STK_REPEAT) {\
2074 if (k->zid == (sid)) {\
2079 else if (k->type == STK_CALL_FRAME) level--;\
2080 else if (k->type == STK_RETURN) level++;\
2084 #define STACK_RETURN(addr) do {\
2086 StackType* k = stk;\
2089 STACK_BASE_CHECK(k, "STACK_RETURN"); \
2090 if (k->type == STK_CALL_FRAME) {\
2092 (addr) = k->u.call_frame.ret_addr;\
2097 else if (k->type == STK_RETURN)\
2103 #define STRING_CMP(s1,s2,len) do {\
2104 while (len-- > 0) {\
2105 if (*s1++ != *s2++) goto fail;\
2109 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2110 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2114 static int string_cmp_ic(OnigEncoding enc
, int case_fold_flag
,
2115 UChar
* s1
, UChar
** ps2
, int mblen
)
2117 UChar buf1
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2118 UChar buf2
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2119 UChar
*p1
, *p2
, *end1
, *s2
, *end2
;
2126 len1
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s1
, end1
, buf1
);
2127 len2
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s2
, end2
, buf2
);
2128 if (len1
!= len2
) return 0;
2131 while (len1
-- > 0) {
2132 if (*p1
!= *p2
) return 0;
2142 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2144 while (len-- > 0) {\
2145 if (*s1++ != *s2++) {\
2146 is_fail = 1; break;\
2151 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2152 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2159 #define IS_EMPTY_STR (str == end)
2160 #define ON_STR_BEGIN(s) ((s) == str)
2161 #define ON_STR_END(s) ((s) == end)
2162 #define DATA_ENSURE_CHECK1 (s < right_range)
2163 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
2164 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
2166 #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
2168 #ifdef USE_CAPTURE_HISTORY
2170 make_capture_history_tree(OnigCaptureTreeNode
* node
, StackType
** kp
,
2171 StackType
* stk_top
, UChar
* str
, regex_t
* reg
)
2174 OnigCaptureTreeNode
* child
;
2177 while (k
< stk_top
) {
2178 if (k
->type
== STK_MEM_START
) {
2180 if (n
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
&&
2181 MEM_STATUS_AT(reg
->capture_history
, n
) != 0) {
2182 child
= history_node_new();
2183 CHECK_NULL_RETURN_MEMERR(child
);
2185 child
->beg
= (int )(k
->u
.mem
.pstr
- str
);
2186 r
= history_tree_add_child(node
, child
);
2187 if (r
!= 0) return r
;
2189 r
= make_capture_history_tree(child
, kp
, stk_top
, str
, reg
);
2190 if (r
!= 0) return r
;
2193 child
->end
= (int )(k
->u
.mem
.pstr
- str
);
2196 else if (k
->type
== STK_MEM_END
) {
2197 if (k
->zid
== node
->group
) {
2198 node
->end
= (int )(k
->u
.mem
.pstr
- str
);
2206 return 1; /* 1: root node ending. */
2210 #ifdef USE_BACKREF_WITH_LEVEL
2211 static int mem_is_in_memp(int mem
, int num
, MemNumType
* memp
)
2215 for (i
= 0; i
< num
; i
++) {
2216 if (mem
== (int )memp
[i
]) return 1;
2222 backref_match_at_nested_level(regex_t
* reg
,
2223 StackType
* top
, StackType
* stk_base
,
2224 int ignore_case
, int case_fold_flag
,
2225 int nest
, int mem_num
, MemNumType
* memp
,
2226 UChar
** s
, const UChar
* send
)
2228 UChar
*ss
, *p
, *pstart
, *pend
= NULL_UCHARP
;
2235 while (k
>= stk_base
) {
2236 if (k
->type
== STK_CALL_FRAME
) {
2239 else if (k
->type
== STK_RETURN
) {
2242 else if (level
== nest
) {
2243 if (k
->type
== STK_MEM_START
) {
2244 if (mem_is_in_memp(k
->zid
, mem_num
, memp
)) {
2245 pstart
= k
->u
.mem
.pstr
;
2246 if (IS_NOT_NULL(pend
)) {
2247 if (pend
- pstart
> send
- *s
) return 0; /* or goto next_mem; */
2251 if (ignore_case
!= 0) {
2252 if (string_cmp_ic(reg
->enc
, case_fold_flag
,
2253 pstart
, &ss
, (int )(pend
- pstart
)) == 0)
2254 return 0; /* or goto next_mem; */
2258 if (*p
++ != *ss
++) return 0; /* or goto next_mem; */
2267 else if (k
->type
== STK_MEM_END
) {
2268 if (mem_is_in_memp(k
->zid
, mem_num
, memp
)) {
2269 pend
= k
->u
.mem
.pstr
;
2280 backref_check_at_nested_level(regex_t
* reg
,
2281 StackType
* top
, StackType
* stk_base
,
2282 int nest
, int mem_num
, MemNumType
* memp
)
2290 while (k
>= stk_base
) {
2291 if (k
->type
== STK_CALL_FRAME
) {
2294 else if (k
->type
== STK_RETURN
) {
2297 else if (level
== nest
) {
2298 if (k
->type
== STK_MEM_END
) {
2299 if (mem_is_in_memp(k
->zid
, mem_num
, memp
)) {
2309 #endif /* USE_BACKREF_WITH_LEVEL */
2312 #ifdef ONIG_DEBUG_STATISTICS
2314 #define USE_TIMEOFDAY
2316 #ifdef USE_TIMEOFDAY
2317 #ifdef HAVE_SYS_TIME_H
2318 #include <sys/time.h>
2320 #ifdef HAVE_UNISTD_H
2323 static struct timeval ts
, te
;
2324 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2325 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2326 (((te).tv_sec - (ts).tv_sec)*1000000))
2328 #ifdef HAVE_SYS_TIMES_H
2329 #include <sys/times.h>
2331 static struct tms ts
, te
;
2332 #define GETTIME(t) times(&(t))
2333 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2336 static int OpCounter
[256];
2337 static int OpPrevCounter
[256];
2338 static unsigned long OpTime
[256];
2339 static int OpCurr
= OP_FINISH
;
2340 static int OpPrevTarget
= OP_FAIL
;
2341 static int MaxStackDepth
= 0;
2343 #define SOP_IN(opcode) do {\
2344 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2346 OpCounter[opcode]++;\
2350 #define SOP_OUT do {\
2352 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2356 onig_statistics_init(void)
2359 for (i
= 0; i
< 256; i
++) {
2360 OpCounter
[i
] = OpPrevCounter
[i
] = 0; OpTime
[i
] = 0;
2366 onig_print_statistics(FILE* f
)
2371 r
= fprintf(f
, " count prev time\n");
2372 if (r
< 0) return -1;
2374 for (i
= 0; OpInfo
[i
].opcode
>= 0; i
++) {
2375 r
= fprintf(f
, "%8d: %8d: %10ld: %s\n",
2376 OpCounter
[i
], OpPrevCounter
[i
], OpTime
[i
], OpInfo
[i
].name
);
2377 if (r
< 0) return -1;
2379 r
= fprintf(f
, "\nmax stack depth: %d\n", MaxStackDepth
);
2380 if (r
< 0) return -1;
2385 #define STACK_INC do {\
2387 if (stk - stk_base > MaxStackDepth) \
2388 MaxStackDepth = stk - stk_base;\
2392 #define STACK_INC stk++
2394 #define SOP_IN(opcode)
2399 /* matching region of POSIX API */
2400 typedef int regoff_t
;
2409 #ifdef USE_THREADED_CODE
2411 #define BYTECODE_INTERPRETER_START GOTO_OP;
2412 #define BYTECODE_INTERPRETER_END
2413 #define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
2414 #define DEFAULT_OP /* L_DEFAULT: */
2415 #define NEXT_OP sprev = sbegin; JUMP_OP
2416 #define JUMP_OP GOTO_OP
2417 #ifdef USE_DIRECT_THREADED_CODE
2418 #define GOTO_OP goto *(p->opaddr)
2420 #define GOTO_OP goto *opcode_to_label[p->opcode]
2422 #define BREAK_OP /* Nothing */
2426 #define BYTECODE_INTERPRETER_START \
2430 switch (p->opcode) {
2431 #define BYTECODE_INTERPRETER_END } sprev = sbegin; }
2432 #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
2433 #define DEFAULT_OP default:
2434 #define NEXT_OP break
2435 #define JUMP_OP GOTO_OP
2436 #define GOTO_OP continue; break
2437 #define BREAK_OP break
2439 #endif /* USE_THREADED_CODE */
2442 #define NEXT_OUT SOP_OUT; NEXT_OP
2443 #define JUMP_OUT SOP_OUT; JUMP_OP
2444 #define BREAK_OUT SOP_OUT; BREAK_OP
2445 #define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2448 #ifdef ONIG_DEBUG_MATCH
2449 #define MATCH_DEBUG_OUT(offset) do {\
2451 UChar *q, *bp, buf[50];\
2453 spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2455 fprintf(stderr, "%7u: %7ld: %4d> \"",\
2456 counter, GET_STACK_INDEX(stk), spos);\
2459 if (IS_NOT_NULL(s)) {\
2460 for (i = 0, q = s; i < 7 && q < end; i++) {\
2461 len = enclen(encode, q);\
2462 while (len-- > 0) *bp++ = *q++;\
2464 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2465 else { xmemcpy(bp, "\"", 1); bp += 1; }\
2468 xmemcpy(bp, "\"", 1); bp += 1;\
2471 fputs((char* )buf, stderr);\
2472 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
2473 if (xp == FinishCode)\
2474 fprintf(stderr, "----: finish");\
2476 fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\
2477 print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\
2479 fprintf(stderr, "\n");\
2482 #define MATCH_DEBUG_OUT(offset)
2486 /* match data(str - end) from position (sstart). */
2487 /* if sstart == str then set sprev to NULL. */
2489 match_at(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
2490 const UChar
* in_right_range
, const UChar
* sstart
, UChar
* sprev
,
2494 #if defined(USE_DIRECT_THREADED_CODE)
2495 static Operation FinishCode
[] = { { .opaddr
=&&L_FINISH
} };
2497 static Operation FinishCode
[] = { { OP_FINISH
} };
2500 #ifdef USE_THREADED_CODE
2501 static const void *opcode_to_label
[] = {
2527 &&L_ANYCHAR_ML_STAR
,
2528 &&L_ANYCHAR_STAR_PEEK_NEXT
,
2529 &&L_ANYCHAR_ML_STAR_PEEK_NEXT
,
2535 &&L_NO_WORD_BOUNDARY
,
2538 &&L_TEXT_SEGMENT_BOUNDARY
,
2550 &&L_BACKREF_MULTI_IC
,
2551 &&L_BACKREF_WITH_LEVEL
,
2552 &&L_BACKREF_WITH_LEVEL_IC
,
2554 &&L_BACKREF_CHECK_WITH_LEVEL
,
2556 &&L_MEMORY_START_PUSH
,
2557 &&L_MEMORY_END_PUSH
,
2558 &&L_MEMORY_END_PUSH_REC
,
2566 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2567 &&L_PUSH_OR_JUMP_EXACT1
,
2569 &&L_PUSH_IF_PEEK_NEXT
,
2575 &&L_REPEAT_INC_NG_SG
,
2576 &&L_EMPTY_CHECK_START
,
2577 &&L_EMPTY_CHECK_END
,
2578 &&L_EMPTY_CHECK_END_MEMST
,
2579 &&L_EMPTY_CHECK_END_MEMST_PUSH
,
2580 &&L_PREC_READ_START
,
2582 &&L_PREC_READ_NOT_START
,
2583 &&L_PREC_READ_NOT_END
,
2587 &&L_LOOK_BEHIND_NOT_START
,
2588 &&L_LOOK_BEHIND_NOT_END
,
2594 &&L_CALLOUT_CONTENTS
,
2600 int i
, n
, num_mem
, best_len
, pop_level
;
2601 LengthType tlen
, tlen2
;
2604 UChar
*s
, *q
, *ps
, *sbegin
;
2608 StackType
*stk_base
, *stk
, *stk_end
;
2609 StackType
*stkp
; /* used as any purpose. */
2611 StackIndex
*repeat_stk
;
2612 StackIndex
*mem_start_stk
, *mem_end_stk
;
2614 #ifdef USE_RETRY_LIMIT_IN_MATCH
2615 unsigned long retry_limit_in_match
;
2616 unsigned long retry_in_match_counter
;
2623 Operation
* p
= reg
->ops
;
2624 OnigOptionType option
= reg
->options
;
2625 OnigEncoding encode
= reg
->enc
;
2626 OnigCaseFoldType case_fold_flag
= reg
->case_fold_flag
;
2628 #ifdef ONIG_DEBUG_MATCH
2629 static unsigned int counter
= 1;
2632 #ifdef USE_DIRECT_THREADED_CODE
2634 for (i
= 0; i
< reg
->ops_used
; i
++) {
2636 addr
= opcode_to_label
[reg
->ocs
[i
]];
2645 msa
->mp
->match_at_call_counter
++;
2648 #ifdef USE_RETRY_LIMIT_IN_MATCH
2649 retry_limit_in_match
= msa
->retry_limit_in_match
;
2652 pop_level
= reg
->stack_pop_level
;
2653 num_mem
= reg
->num_mem
;
2654 STACK_INIT(INIT_MATCH_STACK_SIZE
);
2655 UPDATE_FOR_STACK_REALLOC
;
2656 for (i
= 1; i
<= num_mem
; i
++) {
2657 mem_start_stk
[i
] = mem_end_stk
[i
] = INVALID_STACK_INDEX
;
2660 #ifdef ONIG_DEBUG_MATCH
2661 fprintf(stderr
, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
2662 str
, end
, sstart
, sprev
);
2663 fprintf(stderr
, "size: %d, start offset: %d\n",
2664 (int )(end
- str
), (int )(sstart
- str
));
2667 best_len
= ONIG_MISMATCH
;
2668 keep
= s
= (UChar
* )sstart
;
2669 STACK_PUSH_BOTTOM(STK_ALT
, FinishCode
); /* bottom stack */
2672 #ifdef USE_RETRY_LIMIT_IN_MATCH
2673 retry_in_match_counter
= 0;
2676 BYTECODE_INTERPRETER_START
{
2678 n
= (int )(s
- sstart
);
2681 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2682 if (IS_FIND_LONGEST(option
)) {
2683 if (n
> msa
->best_len
) {
2685 msa
->best_s
= (UChar
* )sstart
;
2692 region
= msa
->region
;
2694 if (keep
> s
) keep
= s
;
2696 #ifdef USE_POSIX_API_REGION_OPTION
2697 if (IS_POSIX_REGION(msa
->options
)) {
2698 posix_regmatch_t
* rmt
= (posix_regmatch_t
* )region
;
2700 rmt
[0].rm_so
= (regoff_t
)(keep
- str
);
2701 rmt
[0].rm_eo
= (regoff_t
)(s
- str
);
2702 for (i
= 1; i
<= num_mem
; i
++) {
2703 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
2704 if (MEM_STATUS_AT(reg
->bt_mem_start
, i
))
2705 rmt
[i
].rm_so
= (regoff_t
)(STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
);
2707 rmt
[i
].rm_so
= (regoff_t
)((UChar
* )((void* )(mem_start_stk
[i
])) - str
);
2709 rmt
[i
].rm_eo
= (regoff_t
)((MEM_STATUS_AT(reg
->bt_mem_end
, i
)
2710 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
2711 : (UChar
* )((void* )mem_end_stk
[i
]))
2715 rmt
[i
].rm_so
= rmt
[i
].rm_eo
= ONIG_REGION_NOTPOS
;
2720 #endif /* USE_POSIX_API_REGION_OPTION */
2721 region
->beg
[0] = (int )(keep
- str
);
2722 region
->end
[0] = (int )(s
- str
);
2723 for (i
= 1; i
<= num_mem
; i
++) {
2724 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
2725 if (MEM_STATUS_AT(reg
->bt_mem_start
, i
))
2726 region
->beg
[i
] = (int )(STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
);
2728 region
->beg
[i
] = (int )((UChar
* )((void* )mem_start_stk
[i
]) - str
);
2730 region
->end
[i
] = (int )((MEM_STATUS_AT(reg
->bt_mem_end
, i
)
2731 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
2732 : (UChar
* )((void* )mem_end_stk
[i
])) - str
);
2735 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
2739 #ifdef USE_CAPTURE_HISTORY
2740 if (reg
->capture_history
!= 0) {
2742 OnigCaptureTreeNode
* node
;
2744 if (IS_NULL(region
->history_root
)) {
2745 region
->history_root
= node
= history_node_new();
2746 CHECK_NULL_RETURN_MEMERR(node
);
2749 node
= region
->history_root
;
2750 history_tree_clear(node
);
2754 node
->beg
= (int )(keep
- str
);
2755 node
->end
= (int )(s
- str
);
2758 r
= make_capture_history_tree(region
->history_root
, &stkp
,
2759 stk
, (UChar
* )str
, reg
);
2761 best_len
= r
; /* error code */
2765 #endif /* USE_CAPTURE_HISTORY */
2766 #ifdef USE_POSIX_API_REGION_OPTION
2767 } /* else IS_POSIX_REGION() */
2770 } /* n > best_len */
2772 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2777 if (IS_FIND_CONDITION(option
)) {
2778 if (IS_FIND_NOT_EMPTY(option
) && s
== sstart
) {
2779 best_len
= ONIG_MISMATCH
;
2780 goto fail
; /* for retry */
2782 if (IS_FIND_LONGEST(option
) && DATA_ENSURE_CHECK1
) {
2783 goto fail
; /* for retry */
2787 /* default behavior: return first-matching result. */
2793 if (*ps
!= *s
) goto fail
;
2801 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2804 len
= ONIGENC_MBC_CASE_FOLD(encode
,
2805 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2812 if (*ps
!= *q
) goto fail
;
2822 if (*ps
!= *s
) goto fail
;
2824 if (*ps
!= *s
) goto fail
;
2833 if (*ps
!= *s
) goto fail
;
2835 if (*ps
!= *s
) goto fail
;
2837 if (*ps
!= *s
) goto fail
;
2846 if (*ps
!= *s
) goto fail
;
2848 if (*ps
!= *s
) goto fail
;
2850 if (*ps
!= *s
) goto fail
;
2852 if (*ps
!= *s
) goto fail
;
2861 if (*ps
!= *s
) goto fail
;
2863 if (*ps
!= *s
) goto fail
;
2865 if (*ps
!= *s
) goto fail
;
2867 if (*ps
!= *s
) goto fail
;
2869 if (*ps
!= *s
) goto fail
;
2876 tlen
= p
->exact_n
.n
;
2879 while (tlen
-- > 0) {
2880 if (*ps
++ != *s
++) goto fail
;
2889 UChar
*q
, *endp
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2891 tlen
= p
->exact_n
.n
;
2897 len
= ONIGENC_MBC_CASE_FOLD(encode
,
2898 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2904 if (*ps
!= *q
) goto fail
;
2916 if (*ps
!= *s
) goto fail
;
2918 if (*ps
!= *s
) goto fail
;
2926 if (*ps
!= *s
) goto fail
;
2928 if (*ps
!= *s
) goto fail
;
2931 if (*ps
!= *s
) goto fail
;
2933 if (*ps
!= *s
) goto fail
;
2941 if (*ps
!= *s
) goto fail
;
2943 if (*ps
!= *s
) goto fail
;
2945 if (*ps
!= *s
) goto fail
;
2947 if (*ps
!= *s
) goto fail
;
2950 if (*ps
!= *s
) goto fail
;
2952 if (*ps
!= *s
) goto fail
;
2958 tlen
= p
->exact_n
.n
;
2959 DATA_ENSURE(tlen
* 2);
2961 while (tlen
-- > 0) {
2962 if (*ps
!= *s
) goto fail
;
2964 if (*ps
!= *s
) goto fail
;
2972 tlen
= p
->exact_n
.n
;
2973 DATA_ENSURE(tlen
* 3);
2975 while (tlen
-- > 0) {
2976 if (*ps
!= *s
) goto fail
;
2978 if (*ps
!= *s
) goto fail
;
2980 if (*ps
!= *s
) goto fail
;
2988 tlen
= p
->exact_len_n
.len
; /* mb byte len */
2989 tlen2
= p
->exact_len_n
.n
; /* number of chars */
2992 ps
= p
->exact_len_n
.s
;
2993 while (tlen2
-- > 0) {
2994 if (*ps
!= *s
) goto fail
;
3003 if (BITSET_AT(p
->cclass
.bsp
, *s
) == 0) goto fail
;
3010 if (! ONIGENC_IS_MBC_HEAD(encode
, s
)) goto fail
;
3019 mb_len
= enclen(encode
, s
);
3020 DATA_ENSURE(mb_len
);
3023 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
3024 if (! onig_is_in_code_range(p
->cclass_mb
.mb
, code
)) goto fail
;
3031 if (ONIGENC_IS_MBC_HEAD(encode
, s
)) {
3035 if (BITSET_AT(p
->cclass_mix
.bsp
, *s
) == 0)
3045 if (BITSET_AT(p
->cclass
.bsp
, *s
) != 0) goto fail
;
3046 s
+= enclen(encode
, s
);
3050 CASE_OP(CCLASS_MB_NOT
)
3052 if (! ONIGENC_IS_MBC_HEAD(encode
, s
)) {
3054 goto cc_mb_not_success
;
3061 int mb_len
= enclen(encode
, s
);
3063 if (! DATA_ENSURE_CHECK(mb_len
)) {
3066 goto cc_mb_not_success
;
3071 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
3072 if (onig_is_in_code_range(p
->cclass_mb
.mb
, code
)) goto fail
;
3079 CASE_OP(CCLASS_MIX_NOT
)
3081 if (ONIGENC_IS_MBC_HEAD(encode
, s
)) {
3085 if (BITSET_AT(p
->cclass_mix
.bsp
, *s
) != 0)
3095 n
= enclen(encode
, s
);
3097 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
3104 n
= enclen(encode
, s
);
3110 CASE_OP(ANYCHAR_STAR
)
3112 while (DATA_ENSURE_CHECK1
) {
3113 STACK_PUSH_ALT(p
, s
, sprev
);
3114 n
= enclen(encode
, s
);
3116 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
3122 CASE_OP(ANYCHAR_ML_STAR
)
3124 while (DATA_ENSURE_CHECK1
) {
3125 STACK_PUSH_ALT(p
, s
, sprev
);
3126 n
= enclen(encode
, s
);
3139 CASE_OP(ANYCHAR_STAR_PEEK_NEXT
)
3143 c
= p
->anychar_star_peek_next
.c
;
3145 while (DATA_ENSURE_CHECK1
) {
3147 STACK_PUSH_ALT(p
, s
, sprev
);
3149 n
= enclen(encode
, s
);
3151 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
3158 CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT
)
3162 c
= p
->anychar_star_peek_next
.c
;
3164 while (DATA_ENSURE_CHECK1
) {
3166 STACK_PUSH_ALT(p
, s
, sprev
);
3168 n
= enclen(encode
, s
);
3184 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
3187 s
+= enclen(encode
, s
);
3193 if (! ONIGENC_IS_MBC_WORD_ASCII(encode
, s
, end
))
3196 s
+= enclen(encode
, s
);
3202 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
))
3205 s
+= enclen(encode
, s
);
3209 CASE_OP(NO_WORD_ASCII
)
3211 if (ONIGENC_IS_MBC_WORD_ASCII(encode
, s
, end
))
3214 s
+= enclen(encode
, s
);
3218 CASE_OP(WORD_BOUNDARY
)
3222 mode
= p
->word_boundary
.mode
;
3223 if (ON_STR_BEGIN(s
)) {
3225 if (! IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
))
3228 else if (ON_STR_END(s
)) {
3229 if (! IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3233 if (IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)
3234 == IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3241 CASE_OP(NO_WORD_BOUNDARY
)
3245 mode
= p
->word_boundary
.mode
;
3246 if (ON_STR_BEGIN(s
)) {
3247 if (DATA_ENSURE_CHECK1
&& IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
))
3250 else if (ON_STR_END(s
)) {
3251 if (IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3255 if (IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)
3256 != IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
))
3263 #ifdef USE_WORD_BEGIN_END
3268 mode
= p
->word_boundary
.mode
;
3269 if (DATA_ENSURE_CHECK1
&& IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)) {
3270 if (ON_STR_BEGIN(s
) || !IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
)) {
3282 mode
= p
->word_boundary
.mode
;
3283 if (!ON_STR_BEGIN(s
) && IS_MBC_WORD_ASCII_MODE(encode
, sprev
, end
, mode
)) {
3284 if (ON_STR_END(s
) || ! IS_MBC_WORD_ASCII_MODE(encode
, s
, end
, mode
)) {
3293 CASE_OP(TEXT_SEGMENT_BOUNDARY
)
3297 switch (p
->text_segment_boundary
.type
) {
3298 case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY
:
3299 is_break
= onigenc_egcb_is_break_position(encode
, s
, sprev
, str
, end
);
3301 #ifdef USE_UNICODE_WORD_BREAK
3303 is_break
= onigenc_wb_is_break_position(encode
, s
, sprev
, str
, end
);
3307 goto bytecode_error
;
3311 if (p
->text_segment_boundary
.not != 0)
3312 is_break
= ! is_break
;
3314 if (is_break
!= 0) {
3324 if (! ON_STR_BEGIN(s
)) goto fail
;
3330 if (! ON_STR_END(s
)) goto fail
;
3336 if (ON_STR_BEGIN(s
)) {
3337 if (IS_NOTBOL(msa
->options
)) goto fail
;
3341 else if (ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
) && !ON_STR_END(s
)) {
3348 if (ON_STR_END(s
)) {
3349 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3350 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
3352 if (IS_NOTEOL(msa
->options
)) goto fail
;
3355 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3359 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) {
3363 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3364 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
3371 CASE_OP(SEMI_END_BUF
)
3372 if (ON_STR_END(s
)) {
3373 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3374 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
3376 if (IS_NOTEOL(msa
->options
)) goto fail
;
3379 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3383 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
) &&
3384 ON_STR_END(s
+ enclen(encode
, s
))) {
3388 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3389 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
3390 UChar
* ss
= s
+ enclen(encode
, s
);
3391 ss
+= enclen(encode
, ss
);
3392 if (ON_STR_END(ss
)) {
3400 CASE_OP(BEGIN_POSITION
)
3401 if (s
!= msa
->start
)
3407 CASE_OP(MEMORY_START_PUSH
)
3408 mem
= p
->memory_start
.num
;
3409 STACK_PUSH_MEM_START(mem
, s
);
3413 CASE_OP(MEMORY_START
)
3414 mem
= p
->memory_start
.num
;
3415 mem_start_stk
[mem
] = (StackIndex
)((void* )s
);
3419 CASE_OP(MEMORY_END_PUSH
)
3420 mem
= p
->memory_end
.num
;
3421 STACK_PUSH_MEM_END(mem
, s
);
3426 mem
= p
->memory_end
.num
;
3427 mem_end_stk
[mem
] = (StackIndex
)((void* )s
);
3432 CASE_OP(MEMORY_END_PUSH_REC
)
3433 mem
= p
->memory_end
.num
;
3434 STACK_GET_MEM_START(mem
, stkp
); /* should be before push mem-end. */
3435 si
= GET_STACK_INDEX(stkp
);
3436 STACK_PUSH_MEM_END(mem
, s
);
3437 mem_start_stk
[mem
] = si
;
3441 CASE_OP(MEMORY_END_REC
)
3442 mem
= p
->memory_end
.num
;
3443 mem_end_stk
[mem
] = (StackIndex
)((void* )s
);
3444 STACK_GET_MEM_START(mem
, stkp
);
3446 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3447 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
3449 mem_start_stk
[mem
] = (StackIndex
)((void* )stkp
->u
.mem
.pstr
);
3451 STACK_PUSH_MEM_END_MARK(mem
);
3465 mem
= p
->backref_n
.n1
;
3469 UChar
*pstart
, *pend
;
3471 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3472 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3474 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3475 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3477 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3479 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3480 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3481 : (UChar
* )((void* )mem_end_stk
[mem
]));
3482 n
= (int )(pend
- pstart
);
3486 STRING_CMP(s
, pstart
, n
);
3487 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3494 CASE_OP(BACKREF_N_IC
)
3495 mem
= p
->backref_n
.n1
;
3498 UChar
*pstart
, *pend
;
3500 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3501 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
3503 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3504 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3506 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3508 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3509 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3510 : (UChar
* )((void* )mem_end_stk
[mem
]));
3511 n
= (int )(pend
- pstart
);
3515 STRING_CMP_IC(case_fold_flag
, pstart
, &s
, n
);
3516 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3523 CASE_OP(BACKREF_MULTI
)
3526 UChar
*pstart
, *pend
, *swork
;
3528 tlen
= p
->backref_general
.num
;
3529 for (i
= 0; i
< tlen
; i
++) {
3530 mem
= tlen
== 1 ? p
->backref_general
.n1
: p
->backref_general
.ns
[i
];
3532 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3533 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3535 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3536 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3538 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3540 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3541 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3542 : (UChar
* )((void* )mem_end_stk
[mem
]));
3543 n
= (int )(pend
- pstart
);
3548 STRING_CMP_VALUE(swork
, pstart
, n
, is_fail
);
3549 if (is_fail
) continue;
3551 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3554 break; /* success */
3556 if (i
== tlen
) goto fail
;
3561 CASE_OP(BACKREF_MULTI_IC
)
3564 UChar
*pstart
, *pend
, *swork
;
3566 tlen
= p
->backref_general
.num
;
3567 for (i
= 0; i
< tlen
; i
++) {
3568 mem
= tlen
== 1 ? p
->backref_general
.n1
: p
->backref_general
.ns
[i
];
3570 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3571 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3573 if (MEM_STATUS_AT(reg
->bt_mem_start
, mem
))
3574 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
3576 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
3578 pend
= (MEM_STATUS_AT(reg
->bt_mem_end
, mem
)
3579 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
3580 : (UChar
* )((void* )mem_end_stk
[mem
]));
3581 n
= (int )(pend
- pstart
);
3586 STRING_CMP_VALUE_IC(case_fold_flag
, pstart
, &swork
, n
, is_fail
);
3587 if (is_fail
) continue;
3589 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3592 break; /* success */
3594 if (i
== tlen
) goto fail
;
3599 #ifdef USE_BACKREF_WITH_LEVEL
3600 CASE_OP(BACKREF_WITH_LEVEL_IC
)
3601 n
= 1; /* ignore case */
3602 goto backref_with_level
;
3603 CASE_OP(BACKREF_WITH_LEVEL
)
3612 level
= p
->backref_general
.nest_level
;
3613 tlen
= p
->backref_general
.num
;
3614 mems
= tlen
== 1 ? &(p
->backref_general
.n1
) : p
->backref_general
.ns
;
3617 if (backref_match_at_nested_level(reg
, stk
, stk_base
, n
,
3618 case_fold_flag
, level
, (int )tlen
, mems
, &s
, end
)) {
3621 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
3632 CASE_OP(BACKREF_CHECK
)
3636 tlen
= p
->backref_general
.num
;
3637 mems
= tlen
== 1 ? &(p
->backref_general
.n1
) : p
->backref_general
.ns
;
3639 for (i
= 0; i
< tlen
; i
++) {
3641 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3642 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
3643 break; /* success */
3645 if (i
== tlen
) goto fail
;
3650 #ifdef USE_BACKREF_WITH_LEVEL
3651 CASE_OP(BACKREF_CHECK_WITH_LEVEL
)
3656 level
= p
->backref_general
.nest_level
;
3657 tlen
= p
->backref_general
.num
;
3658 mems
= tlen
== 1 ? &(p
->backref_general
.n1
) : p
->backref_general
.ns
;
3660 if (backref_check_at_nested_level(reg
, stk
, stk_base
,
3661 (int )level
, (int )tlen
, mems
) == 0)
3668 CASE_OP(EMPTY_CHECK_START
)
3669 mem
= p
->empty_check_start
.mem
; /* mem: null check id */
3670 STACK_PUSH_EMPTY_CHECK_START(mem
, s
);
3674 CASE_OP(EMPTY_CHECK_END
)
3678 mem
= p
->empty_check_end
.mem
; /* mem: null check id */
3679 STACK_EMPTY_CHECK(is_empty
, mem
, s
);
3682 #ifdef ONIG_DEBUG_MATCH
3683 fprintf(stderr
, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem
, s
);
3686 /* empty loop founded, skip next instruction */
3687 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3688 switch (p
->opcode
) {
3692 case OP_REPEAT_INC_NG
:
3693 case OP_REPEAT_INC_SG
:
3694 case OP_REPEAT_INC_NG_SG
:
3698 goto unexpected_bytecode_error
;
3708 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3709 CASE_OP(EMPTY_CHECK_END_MEMST
)
3713 mem
= p
->empty_check_end
.mem
; /* mem: null check id */
3714 STACK_EMPTY_CHECK_MEM(is_empty
, mem
, s
, reg
);
3717 #ifdef ONIG_DEBUG_MATCH
3718 fprintf(stderr
, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem
, s
);
3720 if (is_empty
== -1) goto fail
;
3721 goto empty_check_found
;
3728 CASE_OP(EMPTY_CHECK_END_MEMST_PUSH
)
3732 mem
= p
->empty_check_end
.mem
; /* mem: null check id */
3733 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3734 STACK_EMPTY_CHECK_MEM_REC(is_empty
, mem
, s
, reg
);
3736 STACK_EMPTY_CHECK_REC(is_empty
, mem
, s
);
3740 #ifdef ONIG_DEBUG_MATCH
3741 fprintf(stderr
, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
3744 if (is_empty
== -1) goto fail
;
3745 goto empty_check_found
;
3748 STACK_PUSH_EMPTY_CHECK_END(mem
);
3755 addr
= p
->jump
.addr
;
3757 CHECK_INTERRUPT_JUMP_OUT
;
3760 addr
= p
->push
.addr
;
3761 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3766 addr
= p
->push
.addr
;
3767 STACK_PUSH_SUPER_ALT(p
+ addr
, s
, sprev
);
3773 /* for stop backtrack */
3774 /* CHECK_RETRY_LIMIT_IN_MATCH; */
3778 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3779 CASE_OP(PUSH_OR_JUMP_EXACT1
)
3783 addr
= p
->push_or_jump_exact1
.addr
;
3784 c
= p
->push_or_jump_exact1
.c
;
3785 if (DATA_ENSURE_CHECK1
&& c
== *s
) {
3786 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3795 CASE_OP(PUSH_IF_PEEK_NEXT
)
3799 addr
= p
->push_if_peek_next
.addr
;
3800 c
= p
->push_if_peek_next
.c
;
3802 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3811 mem
= p
->repeat
.id
; /* mem: OP_REPEAT ID */
3812 addr
= p
->repeat
.addr
;
3815 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
3816 STACK_PUSH_REPEAT(mem
, p
+ 1);
3818 if (reg
->repeat_range
[mem
].lower
== 0) {
3819 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
3825 mem
= p
->repeat
.id
; /* mem: OP_REPEAT ID */
3826 addr
= p
->repeat
.addr
;
3829 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
3830 STACK_PUSH_REPEAT(mem
, p
+ 1);
3832 if (reg
->repeat_range
[mem
].lower
== 0) {
3833 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
3841 mem
= p
->repeat_inc
.id
; /* mem: OP_REPEAT ID */
3842 si
= repeat_stk
[mem
];
3843 stkp
= STACK_AT(si
);
3846 stkp
->u
.repeat
.count
++;
3847 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].upper
) {
3848 /* end of repeat. Nothing to do. */
3851 else if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
3853 STACK_PUSH_ALT(p
, s
, sprev
);
3854 p
= STACK_AT(si
)->u
.repeat
.pcode
; /* Don't use stkp after PUSH. */
3857 p
= stkp
->u
.repeat
.pcode
;
3859 STACK_PUSH_REPEAT_INC(si
);
3860 CHECK_INTERRUPT_JUMP_OUT
;
3862 CASE_OP(REPEAT_INC_SG
)
3863 mem
= p
->repeat_inc
.id
; /* mem: OP_REPEAT ID */
3864 STACK_GET_REPEAT(mem
, stkp
);
3865 si
= GET_STACK_INDEX(stkp
);
3868 CASE_OP(REPEAT_INC_NG
)
3869 mem
= p
->repeat_inc
.id
; /* mem: OP_REPEAT ID */
3870 si
= repeat_stk
[mem
];
3871 stkp
= STACK_AT(si
);
3874 stkp
->u
.repeat
.count
++;
3875 if (stkp
->u
.repeat
.count
< reg
->repeat_range
[mem
].upper
) {
3876 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
3877 Operation
* pcode
= stkp
->u
.repeat
.pcode
;
3879 STACK_PUSH_REPEAT_INC(si
);
3880 STACK_PUSH_ALT(pcode
, s
, sprev
);
3884 p
= stkp
->u
.repeat
.pcode
;
3885 STACK_PUSH_REPEAT_INC(si
);
3888 else if (stkp
->u
.repeat
.count
== reg
->repeat_range
[mem
].upper
) {
3889 STACK_PUSH_REPEAT_INC(si
);
3892 CHECK_INTERRUPT_JUMP_OUT
;
3894 CASE_OP(REPEAT_INC_NG_SG
)
3895 mem
= p
->repeat_inc
.id
; /* mem: OP_REPEAT ID */
3896 STACK_GET_REPEAT(mem
, stkp
);
3897 si
= GET_STACK_INDEX(stkp
);
3900 CASE_OP(PREC_READ_START
)
3901 STACK_PUSH_PREC_READ_START(s
, sprev
);
3905 CASE_OP(PREC_READ_END
)
3906 STACK_GET_PREC_READ_START(stkp
);
3907 s
= stkp
->u
.state
.pstr
;
3908 sprev
= stkp
->u
.state
.pstr_prev
;
3909 STACK_PUSH(STK_PREC_READ_END
,0,0,0);
3913 CASE_OP(PREC_READ_NOT_START
)
3914 addr
= p
->prec_read_not_start
.addr
;
3915 STACK_PUSH_ALT_PREC_READ_NOT(p
+ addr
, s
, sprev
);
3919 CASE_OP(PREC_READ_NOT_END
)
3920 STACK_POP_TIL_ALT_PREC_READ_NOT
;
3923 CASE_OP(ATOMIC_START
)
3924 STACK_PUSH_TO_VOID_START
;
3929 STACK_EXEC_TO_VOID(stkp
);
3933 CASE_OP(LOOK_BEHIND
)
3934 tlen
= p
->look_behind
.len
;
3935 s
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
3936 if (IS_NULL(s
)) goto fail
;
3937 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
3941 CASE_OP(LOOK_BEHIND_NOT_START
)
3942 addr
= p
->look_behind_not_start
.addr
;
3943 tlen
= p
->look_behind_not_start
.len
;
3944 q
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
3946 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3947 If you want to change to fail, replace following line. */
3952 STACK_PUSH_ALT_LOOK_BEHIND_NOT(p
+ addr
, s
, sprev
);
3954 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
3959 CASE_OP(LOOK_BEHIND_NOT_END
)
3960 STACK_POP_TIL_ALT_LOOK_BEHIND_NOT
;
3966 addr
= p
->call
.addr
;
3967 INC_OP
; STACK_PUSH_CALL_FRAME(p
);
3968 p
= reg
->ops
+ addr
;
3977 CASE_OP(PUSH_SAVE_VAL
)
3981 type
= p
->push_save_val
.type
;
3982 mem
= p
->push_save_val
.id
; /* mem: save id */
3983 switch ((enum SaveType
)type
) {
3985 STACK_PUSH_SAVE_VAL(mem
, type
, s
);
3989 STACK_PUSH_SAVE_VAL_WITH_SPREV(mem
, type
, s
);
3992 case SAVE_RIGHT_RANGE
:
3993 STACK_PUSH_SAVE_VAL(mem
, SAVE_RIGHT_RANGE
, right_range
);
4003 enum SaveType save_type
;
4005 type
= p
->update_var
.type
;
4006 mem
= p
->update_var
.id
; /* mem: save id */
4008 switch ((enum UpdateVarType
)type
) {
4009 case UPDATE_VAR_KEEP_FROM_STACK_LAST
:
4010 STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP
, keep
);
4012 case UPDATE_VAR_S_FROM_STACK
:
4013 STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S
, mem
, s
);
4015 case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK
:
4017 goto get_save_val_type_last_id
;
4019 case UPDATE_VAR_RIGHT_RANGE_FROM_STACK
:
4020 save_type
= SAVE_RIGHT_RANGE
;
4021 get_save_val_type_last_id
:
4022 STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type
, mem
, right_range
);
4024 case UPDATE_VAR_RIGHT_RANGE_INIT
:
4033 CASE_OP(CALLOUT_CONTENTS
)
4034 of
= ONIG_CALLOUT_OF_CONTENTS
;
4035 mem
= p
->callout_contents
.num
;
4036 goto callout_common_entry
;
4039 CASE_OP(CALLOUT_NAME
)
4044 CalloutListEntry
* e
;
4045 OnigCalloutFunc func
;
4046 OnigCalloutArgs args
;
4048 of
= ONIG_CALLOUT_OF_NAME
;
4049 name_id
= p
->callout_name
.id
;
4050 mem
= p
->callout_name
.num
;
4052 callout_common_entry
:
4053 e
= onig_reg_callout_list_at(reg
, mem
);
4055 if (of
== ONIG_CALLOUT_OF_NAME
) {
4056 func
= onig_get_callout_start_func(reg
, mem
);
4059 name_id
= ONIG_NON_NAME_ID
;
4060 func
= msa
->mp
->progress_callout_of_contents
;
4063 if (IS_NOT_NULL(func
) && (in
& ONIG_CALLOUT_IN_PROGRESS
) != 0) {
4064 CALLOUT_BODY(func
, ONIG_CALLOUT_IN_PROGRESS
, name_id
,
4065 (int )mem
, msa
->mp
->callout_user_data
, args
, call_result
);
4066 switch (call_result
) {
4067 case ONIG_CALLOUT_FAIL
:
4070 case ONIG_CALLOUT_SUCCESS
:
4071 goto retraction_callout2
;
4073 default: /* error code */
4074 if (call_result
> 0) {
4075 call_result
= ONIGERR_INVALID_ARGUMENT
;
4077 best_len
= call_result
;
4083 retraction_callout2
:
4084 if ((in
& ONIG_CALLOUT_IN_RETRACTION
) != 0) {
4085 if (of
== ONIG_CALLOUT_OF_NAME
) {
4086 if (IS_NOT_NULL(func
)) {
4087 STACK_PUSH_CALLOUT_NAME(name_id
, mem
, func
);
4091 func
= msa
->mp
->retraction_callout_of_contents
;
4092 if (IS_NOT_NULL(func
)) {
4093 STACK_PUSH_CALLOUT_CONTENTS(mem
, func
);
4106 #ifdef ONIG_DEBUG_STATISTICS
4112 #ifdef ONIG_DEBUG_STATISTICS
4118 p
= stk
->u
.state
.pcode
;
4119 s
= stk
->u
.state
.pstr
;
4120 sprev
= stk
->u
.state
.pstr_prev
;
4121 CHECK_RETRY_LIMIT_IN_MATCH
;
4125 goto bytecode_error
;
4127 } BYTECODE_INTERPRETER_END
;
4136 return ONIGERR_STACK_BUG
;
4141 return ONIGERR_UNDEFINED_BYTECODE
;
4143 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
4144 unexpected_bytecode_error
:
4146 return ONIGERR_UNEXPECTED_BYTECODE
;
4149 #ifdef USE_RETRY_LIMIT_IN_MATCH
4150 retry_limit_in_match_over
:
4152 return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER
;
4158 slow_search(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
4159 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
4161 UChar
*t
, *p
, *s
, *end
;
4163 end
= (UChar
* )text_end
;
4164 end
-= target_end
- target
- 1;
4165 if (end
> text_range
)
4171 if (*s
== *target
) {
4174 while (t
< target_end
) {
4179 if (t
== target_end
)
4182 s
+= enclen(enc
, s
);
4185 return (UChar
* )NULL
;
4189 str_lower_case_match(OnigEncoding enc
, int case_fold_flag
,
4190 const UChar
* t
, const UChar
* tend
,
4191 const UChar
* p
, const UChar
* end
)
4194 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
4197 lowlen
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &p
, end
, lowbuf
);
4199 while (lowlen
> 0) {
4200 if (*t
++ != *q
++) return 0;
4209 slow_search_ic(OnigEncoding enc
, int case_fold_flag
,
4210 UChar
* target
, UChar
* target_end
,
4211 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
4215 end
= (UChar
* )text_end
;
4216 end
-= target_end
- target
- 1;
4217 if (end
> text_range
)
4223 if (str_lower_case_match(enc
, case_fold_flag
, target
, target_end
,
4227 s
+= enclen(enc
, s
);
4230 return (UChar
* )NULL
;
4234 slow_search_backward(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
4235 const UChar
* text
, const UChar
* adjust_text
,
4236 const UChar
* text_end
, const UChar
* text_start
)
4240 s
= (UChar
* )text_end
;
4241 s
-= (target_end
- target
);
4243 s
= (UChar
* )text_start
;
4245 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
4248 //if text is not null,the logic is correct.
4249 //this function is only invoked by backward_search_range,parameter text come
4250 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4251 //so the check is just for passing static analysis.
4252 if(IS_NULL(s
))break;
4253 if (*s
== *target
) {
4256 while (t
< target_end
) {
4261 if (t
== target_end
)
4264 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
4267 return (UChar
* )NULL
;
4271 slow_search_backward_ic(OnigEncoding enc
, int case_fold_flag
,
4272 UChar
* target
, UChar
* target_end
,
4273 const UChar
* text
, const UChar
* adjust_text
,
4274 const UChar
* text_end
, const UChar
* text_start
)
4278 s
= (UChar
* )text_end
;
4279 s
-= (target_end
- target
);
4281 s
= (UChar
* )text_start
;
4283 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
4286 if (str_lower_case_match(enc
, case_fold_flag
,
4287 target
, target_end
, s
, text_end
))
4290 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
4293 return (UChar
* )NULL
;
4298 sunday_quick_search_step_forward(regex_t
* reg
,
4299 const UChar
* target
, const UChar
* target_end
,
4300 const UChar
* text
, const UChar
* text_end
,
4301 const UChar
* text_range
)
4303 const UChar
*s
, *se
, *t
, *p
, *end
;
4309 #ifdef ONIG_DEBUG_SEARCH
4311 "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text
, text_end
, text_range
);
4316 tail
= target_end
- 1;
4317 tlen1
= (int )(tail
- target
);
4319 if (end
+ tlen1
> text_end
)
4320 end
= text_end
- tlen1
;
4322 map_offset
= reg
->map_offset
;
4329 if (t
== target
) return (UChar
* )s
;
4332 if (se
+ map_offset
>= text_end
) break;
4333 skip
= reg
->map
[*(se
+ map_offset
)];
4337 s
+= enclen(enc
, s
);
4338 } while ((s
- t
) < skip
&& s
< end
);
4342 s
= onigenc_get_right_adjust_char_head(enc
, text
, s
);
4346 return (UChar
* )NULL
;
4350 sunday_quick_search(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
4351 const UChar
* text
, const UChar
* text_end
,
4352 const UChar
* text_range
)
4354 const UChar
*s
, *t
, *p
, *end
;
4358 end
= text_range
+ (target_end
- target
);
4362 map_offset
= reg
->map_offset
;
4363 tail
= target_end
- 1;
4364 s
= text
+ (tail
- target
);
4370 if (t
== target
) return (UChar
* )p
;
4373 if (s
+ map_offset
>= text_end
) break;
4374 s
+= reg
->map
[*(s
+ map_offset
)];
4377 return (UChar
* )NULL
;
4381 sunday_quick_search_case_fold(regex_t
* reg
,
4382 const UChar
* target
, const UChar
* target_end
,
4383 const UChar
* text
, const UChar
* text_end
,
4384 const UChar
* text_range
)
4386 const UChar
*s
, *se
, *end
;
4393 #ifdef ONIG_DEBUG_SEARCH
4395 "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text
, text_end
, text_range
);
4399 case_fold_flag
= reg
->case_fold_flag
;
4401 tail
= target_end
- 1;
4402 tlen1
= (int )(tail
- target
);
4404 if (end
+ tlen1
> text_end
)
4405 end
= text_end
- tlen1
;
4407 map_offset
= reg
->map_offset
;
4411 if (str_lower_case_match(enc
, case_fold_flag
, target
, target_end
,
4416 if (se
+ map_offset
>= text_end
) break;
4417 skip
= reg
->map
[*(se
+ map_offset
)];
4421 s
+= enclen(enc
, s
);
4422 } while ((s
- p
) < skip
&& s
< end
);
4424 /* This is faster than prev code for long text. ex: /(?i)Twain/ */
4427 s
= onigenc_get_right_adjust_char_head(enc
, text
, s
);
4431 return (UChar
* )NULL
;
4435 map_search(OnigEncoding enc
, UChar map
[],
4436 const UChar
* text
, const UChar
* text_range
)
4438 const UChar
*s
= text
;
4440 while (s
< text_range
) {
4441 if (map
[*s
]) return (UChar
* )s
;
4443 s
+= enclen(enc
, s
);
4445 return (UChar
* )NULL
;
4449 map_search_backward(OnigEncoding enc
, UChar map
[],
4450 const UChar
* text
, const UChar
* adjust_text
,
4451 const UChar
* text_start
)
4453 const UChar
*s
= text_start
;
4456 //if text is not null,the logic is correct.
4457 //this function is only invoked by backward_search_range,parameter text come
4458 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4459 //so the check is just for passing static analysis.
4460 if(IS_NULL(s
))break;
4461 if (map
[*s
]) return (UChar
* )s
;
4463 s
= onigenc_get_prev_char_head(enc
, adjust_text
, s
);
4465 return (UChar
* )NULL
;
4468 onig_match(regex_t
* reg
, const UChar
* str
, const UChar
* end
, const UChar
* at
,
4469 OnigRegion
* region
, OnigOptionType option
)
4474 onig_initialize_match_param(&mp
);
4475 r
= onig_match_with_param(reg
, str
, end
, at
, region
, option
, &mp
);
4476 onig_free_match_param_content(&mp
);
4481 onig_match_with_param(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4482 const UChar
* at
, OnigRegion
* region
, OnigOptionType option
,
4489 ADJUST_MATCH_PARAM(reg
, mp
);
4490 MATCH_ARG_INIT(msa
, reg
, option
, region
, at
, mp
);
4492 #ifdef USE_POSIX_API_REGION_OPTION
4493 && !IS_POSIX_REGION(option
)
4496 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
4502 if (ONIG_IS_OPTION_ON(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
)) {
4503 if (! ONIGENC_IS_VALID_MBC_STRING(reg
->enc
, str
, end
)) {
4504 r
= ONIGERR_INVALID_WIDE_CHAR_VALUE
;
4509 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
, str
, at
);
4510 r
= match_at(reg
, str
, end
, end
, at
, prev
, &msa
);
4514 MATCH_ARG_FREE(msa
);
4519 forward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
, UChar
* s
,
4520 UChar
* range
, UChar
** low
, UChar
** high
, UChar
** low_prev
)
4522 UChar
*p
, *pprev
= (UChar
* )NULL
;
4524 #ifdef ONIG_DEBUG_SEARCH
4525 fprintf(stderr
, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
4526 str
, end
, s
, range
);
4530 if (reg
->dmin
> 0) {
4531 if (ONIGENC_IS_SINGLEBYTE(reg
->enc
)) {
4535 UChar
*q
= p
+ reg
->dmin
;
4537 if (q
>= end
) return 0; /* fail */
4538 while (p
< q
) p
+= enclen(reg
->enc
, p
);
4543 switch (reg
->optimize
) {
4545 p
= slow_search(reg
->enc
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
4547 case OPTIMIZE_STR_CASE_FOLD
:
4548 p
= slow_search_ic(reg
->enc
, reg
->case_fold_flag
,
4549 reg
->exact
, reg
->exact_end
, p
, end
, range
);
4552 case OPTIMIZE_STR_CASE_FOLD_FAST
:
4553 p
= sunday_quick_search_case_fold(reg
, reg
->exact
, reg
->exact_end
, p
, end
,
4557 case OPTIMIZE_STR_FAST
:
4558 p
= sunday_quick_search(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
4561 case OPTIMIZE_STR_FAST_STEP_FORWARD
:
4562 p
= sunday_quick_search_step_forward(reg
, reg
->exact
, reg
->exact_end
,
4567 p
= map_search(reg
->enc
, reg
->map
, p
, range
);
4571 if (p
&& p
< range
) {
4572 if (p
- reg
->dmin
< s
) {
4575 p
+= enclen(reg
->enc
, p
);
4579 if (reg
->sub_anchor
) {
4582 switch (reg
->sub_anchor
) {
4583 case ANCR_BEGIN_LINE
:
4584 if (!ON_STR_BEGIN(p
)) {
4585 prev
= onigenc_get_prev_char_head(reg
->enc
,
4586 (pprev
? pprev
: str
), p
);
4587 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
4593 if (ON_STR_END(p
)) {
4594 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4595 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
,
4596 (pprev
? pprev
: str
), p
);
4597 if (prev
&& ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
4601 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
4602 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4603 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
4611 if (reg
->dmax
== 0) {
4615 *low_prev
= onigenc_get_prev_char_head(reg
->enc
, s
, p
);
4617 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
4618 (pprev
? pprev
: str
), p
);
4622 if (reg
->dmax
!= INFINITE_LEN
) {
4623 if (p
- str
< reg
->dmax
) {
4624 *low
= (UChar
* )str
;
4626 *low_prev
= onigenc_get_prev_char_head(reg
->enc
, str
, *low
);
4629 *low
= p
- reg
->dmax
;
4631 *low
= onigenc_get_right_adjust_char_head_with_prev(reg
->enc
, s
,
4632 *low
, (const UChar
** )low_prev
);
4633 if (low_prev
&& IS_NULL(*low_prev
))
4634 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
4635 (pprev
? pprev
: s
), *low
);
4639 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
4640 (pprev
? pprev
: str
), *low
);
4645 /* no needs to adjust *high, *high is used as range check only */
4646 *high
= p
- reg
->dmin
;
4648 #ifdef ONIG_DEBUG_SEARCH
4650 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
4651 (int )(*low
- str
), (int )(*high
- str
), reg
->dmin
, reg
->dmax
);
4653 return 1; /* success */
4656 return 0; /* fail */
4661 backward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4662 UChar
* s
, const UChar
* range
, UChar
* adjrange
,
4663 UChar
** low
, UChar
** high
)
4667 if (range
== 0) goto fail
;
4673 switch (reg
->optimize
) {
4676 p
= slow_search_backward(reg
->enc
, reg
->exact
, reg
->exact_end
,
4677 range
, adjrange
, end
, p
);
4680 case OPTIMIZE_STR_CASE_FOLD
:
4681 case OPTIMIZE_STR_CASE_FOLD_FAST
:
4682 p
= slow_search_backward_ic(reg
->enc
, reg
->case_fold_flag
,
4683 reg
->exact
, reg
->exact_end
,
4684 range
, adjrange
, end
, p
);
4687 case OPTIMIZE_STR_FAST
:
4688 case OPTIMIZE_STR_FAST_STEP_FORWARD
:
4693 p
= map_search_backward(reg
->enc
, reg
->map
, range
, adjrange
, p
);
4698 if (reg
->sub_anchor
) {
4701 switch (reg
->sub_anchor
) {
4702 case ANCR_BEGIN_LINE
:
4703 if (!ON_STR_BEGIN(p
)) {
4704 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, p
);
4705 if (IS_NOT_NULL(prev
) && !ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
4713 if (ON_STR_END(p
)) {
4714 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4715 prev
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
4716 if (IS_NULL(prev
)) goto fail
;
4717 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
4723 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
4724 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4725 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
4728 p
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
4729 if (IS_NULL(p
)) goto fail
;
4736 /* no needs to adjust *high, *high is used as range check only */
4737 if (reg
->dmax
!= INFINITE_LEN
) {
4738 *low
= p
- reg
->dmax
;
4739 *high
= p
- reg
->dmin
;
4740 *high
= onigenc_get_right_adjust_char_head(reg
->enc
, adjrange
, *high
);
4743 #ifdef ONIG_DEBUG_SEARCH
4744 fprintf(stderr
, "backward_search_range: low: %d, high: %d\n",
4745 (int )(*low
- str
), (int )(*high
- str
));
4747 return 1; /* success */
4751 #ifdef ONIG_DEBUG_SEARCH
4752 fprintf(stderr
, "backward_search_range: fail.\n");
4754 return 0; /* fail */
4759 onig_search(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4760 const UChar
* start
, const UChar
* range
, OnigRegion
* region
,
4761 OnigOptionType option
)
4766 onig_initialize_match_param(&mp
);
4767 r
= onig_search_with_param(reg
, str
, end
, start
, range
, region
, option
, &mp
);
4768 onig_free_match_param_content(&mp
);
4774 onig_search_with_param(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
4775 const UChar
* start
, const UChar
* range
, OnigRegion
* region
,
4776 OnigOptionType option
, OnigMatchParam
* mp
)
4781 const UChar
*orig_start
= start
;
4782 const UChar
*orig_range
= range
;
4784 #ifdef ONIG_DEBUG_SEARCH
4786 "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
4787 str
, (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
4790 ADJUST_MATCH_PARAM(reg
, mp
);
4793 #ifdef USE_POSIX_API_REGION_OPTION
4794 && !IS_POSIX_REGION(option
)
4797 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
4798 if (r
!= 0) goto finish_no_msa
;
4801 if (start
> end
|| start
< str
) goto mismatch_no_msa
;
4803 if (ONIG_IS_OPTION_ON(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
)) {
4804 if (! ONIGENC_IS_VALID_MBC_STRING(reg
->enc
, str
, end
)) {
4805 r
= ONIGERR_INVALID_WIDE_CHAR_VALUE
;
4811 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4812 #define MATCH_AND_RETURN_CHECK(upper_range) \
4813 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4814 if (r != ONIG_MISMATCH) {\
4816 if (! IS_FIND_LONGEST(reg->options)) {\
4820 else goto finish; /* error */ \
4823 #define MATCH_AND_RETURN_CHECK(upper_range) \
4824 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4825 if (r != ONIG_MISMATCH) {\
4829 else goto finish; /* error */ \
4831 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4834 /* anchor optimize: resume search range */
4835 if (reg
->anchor
!= 0 && str
< end
) {
4836 UChar
*min_semi_end
, *max_semi_end
;
4838 if (reg
->anchor
& ANCR_BEGIN_POSITION
) {
4839 /* search start-position only */
4846 else if (reg
->anchor
& ANCR_BEGIN_BUF
) {
4847 /* search str-position only */
4848 if (range
> start
) {
4849 if (start
!= str
) goto mismatch_no_msa
;
4858 goto mismatch_no_msa
;
4861 else if (reg
->anchor
& ANCR_END_BUF
) {
4862 min_semi_end
= max_semi_end
= (UChar
* )end
;
4865 if ((OnigLen
)(max_semi_end
- str
) < reg
->anchor_dmin
)
4866 goto mismatch_no_msa
;
4868 if (range
> start
) {
4869 if ((OnigLen
)(min_semi_end
- start
) > reg
->anchor_dmax
) {
4870 start
= min_semi_end
- reg
->anchor_dmax
;
4872 start
= onigenc_get_right_adjust_char_head(reg
->enc
, str
, start
);
4874 if ((OnigLen
)(max_semi_end
- (range
- 1)) < reg
->anchor_dmin
) {
4875 range
= max_semi_end
- reg
->anchor_dmin
+ 1;
4878 if (start
> range
) goto mismatch_no_msa
;
4879 /* If start == range, match with empty at end.
4880 Backward search is used. */
4883 if ((OnigLen
)(min_semi_end
- range
) > reg
->anchor_dmax
) {
4884 range
= min_semi_end
- reg
->anchor_dmax
;
4886 if ((OnigLen
)(max_semi_end
- start
) < reg
->anchor_dmin
) {
4887 start
= max_semi_end
- reg
->anchor_dmin
;
4888 start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, start
);
4890 if (range
> start
) goto mismatch_no_msa
;
4893 else if (reg
->anchor
& ANCR_SEMI_END_BUF
) {
4894 UChar
* pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, end
, 1);
4896 max_semi_end
= (UChar
* )end
;
4897 // only when str > end, pre_end will be null
4898 // line 4659 "if (start > end || start < str) goto mismatch_no_msa"
4899 // will guarantee str alwayls less than end
4900 // so pre_end won't be null,this check is just for passing staic analysis
4901 if (IS_NOT_NULL(pre_end
) && ONIGENC_IS_MBC_NEWLINE(reg
->enc
, pre_end
, end
)) {
4902 min_semi_end
= pre_end
;
4904 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4905 pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, pre_end
, 1);
4906 if (IS_NOT_NULL(pre_end
) &&
4907 ONIGENC_IS_MBC_CRNL(reg
->enc
, pre_end
, end
)) {
4908 min_semi_end
= pre_end
;
4911 if (min_semi_end
> str
&& start
<= min_semi_end
) {
4916 min_semi_end
= (UChar
* )end
;
4920 else if ((reg
->anchor
& ANCR_ANYCHAR_INF_ML
)) {
4921 goto begin_position
;
4924 else if (str
== end
) { /* empty string */
4925 static const UChar
* address_for_empty_string
= (UChar
* )"";
4927 #ifdef ONIG_DEBUG_SEARCH
4928 fprintf(stderr
, "onig_search: empty string.\n");
4931 if (reg
->threshold_len
== 0) {
4932 start
= end
= str
= address_for_empty_string
;
4934 prev
= (UChar
* )NULL
;
4936 MATCH_ARG_INIT(msa
, reg
, option
, region
, start
, mp
);
4937 MATCH_AND_RETURN_CHECK(end
);
4940 goto mismatch_no_msa
;
4943 #ifdef ONIG_DEBUG_SEARCH
4944 fprintf(stderr
, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4945 (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
4948 MATCH_ARG_INIT(msa
, reg
, option
, region
, orig_start
, mp
);
4951 if (range
> start
) { /* forward search */
4953 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
4955 prev
= (UChar
* )NULL
;
4957 if (reg
->optimize
!= OPTIMIZE_NONE
) {
4958 UChar
*sch_range
, *low
, *high
, *low_prev
;
4960 sch_range
= (UChar
* )range
;
4961 if (reg
->dmax
!= 0) {
4962 if (reg
->dmax
== INFINITE_LEN
)
4963 sch_range
= (UChar
* )end
;
4965 sch_range
+= reg
->dmax
;
4966 if (sch_range
> end
) sch_range
= (UChar
* )end
;
4970 if ((end
- start
) < reg
->threshold_len
)
4973 if (reg
->dmax
!= INFINITE_LEN
) {
4975 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
4976 &low
, &high
, &low_prev
)) goto mismatch
;
4982 MATCH_AND_RETURN_CHECK(orig_range
);
4984 s
+= enclen(reg
->enc
, s
);
4986 } while (s
< range
);
4989 else { /* check only. */
4990 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
4991 &low
, &high
, (UChar
** )NULL
)) goto mismatch
;
4993 if ((reg
->anchor
& ANCR_ANYCHAR_INF
) != 0) {
4995 MATCH_AND_RETURN_CHECK(orig_range
);
4997 s
+= enclen(reg
->enc
, s
);
4999 if ((reg
->anchor
& (ANCR_LOOK_BEHIND
| ANCR_PREC_READ_NOT
)) == 0) {
5000 while (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
) && s
< range
) {
5002 s
+= enclen(reg
->enc
, s
);
5005 } while (s
< range
);
5012 MATCH_AND_RETURN_CHECK(orig_range
);
5014 s
+= enclen(reg
->enc
, s
);
5015 } while (s
< range
);
5017 if (s
== range
) { /* because empty match with /$/. */
5018 MATCH_AND_RETURN_CHECK(orig_range
);
5021 else { /* backward search */
5022 if (range
< str
) goto mismatch
;
5024 if (orig_start
< end
)
5025 orig_start
+= enclen(reg
->enc
, orig_start
); /* is upper range */
5027 if (reg
->optimize
!= OPTIMIZE_NONE
) {
5028 UChar
*low
, *high
, *adjrange
, *sch_start
;
5031 adjrange
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, range
);
5033 adjrange
= (UChar
* )end
;
5035 if (reg
->dmax
!= INFINITE_LEN
&&
5036 (end
- range
) >= reg
->threshold_len
) {
5038 sch_start
= s
+ reg
->dmax
;
5039 if (sch_start
> end
) sch_start
= (UChar
* )end
;
5040 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
5048 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
5049 MATCH_AND_RETURN_CHECK(orig_start
);
5052 // if range is not null,the check is not necessary.
5053 // the range is actually the pointer of the end of the matched string
5054 // or assigned by "range = str" in line 4708. In RegularExpressionMatch
5055 // protocol, the matched string is the parameter String. And str in
5056 // line 4708 is the String,too. and the range is calculated from
5057 // "Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start)" in
5058 // line 146 in RegularExpressionDxe.c. RegularExpressionMatch ensure
5059 // the String is not null,So in both situation, the range can not be NULL.
5060 // This check is just for passing static analysis.
5061 if(IS_NULL(s
))break;
5062 } while (s
>= range
);
5065 else { /* check only. */
5066 if ((end
- range
) < reg
->threshold_len
) goto mismatch
;
5069 if (reg
->dmax
!= 0) {
5070 if (reg
->dmax
== INFINITE_LEN
)
5071 sch_start
= (UChar
* )end
;
5073 sch_start
+= reg
->dmax
;
5074 if (sch_start
> end
) sch_start
= (UChar
* )end
;
5076 sch_start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
,
5080 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
5081 &low
, &high
) <= 0) goto mismatch
;
5086 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
5087 MATCH_AND_RETURN_CHECK(orig_start
);
5089 } while (s
>= range
);
5093 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5094 if (IS_FIND_LONGEST(reg
->options
)) {
5095 if (msa
.best_len
>= 0) {
5104 MATCH_ARG_FREE(msa
);
5106 /* If result is mismatch and no FIND_NOT_EMPTY option,
5107 then the region is not set in match_at(). */
5108 if (IS_FIND_NOT_EMPTY(reg
->options
) && region
5109 #ifdef USE_POSIX_API_REGION_OPTION
5110 && !IS_POSIX_REGION(option
)
5113 onig_region_clear(region
);
5117 if (r
!= ONIG_MISMATCH
)
5118 fprintf(stderr
, "onig_search: error %d\n", r
);
5126 if (r
!= ONIG_MISMATCH
)
5127 fprintf(stderr
, "onig_search: error %d\n", r
);
5132 MATCH_ARG_FREE(msa
);
5133 return (int )(s
- str
);
5137 onig_scan(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
5138 OnigRegion
* region
, OnigOptionType option
,
5139 int (*scan_callback
)(int, int, OnigRegion
*, void*),
5147 if (ONIG_IS_OPTION_ON(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
)) {
5148 if (! ONIGENC_IS_VALID_MBC_STRING(reg
->enc
, str
, end
))
5149 return ONIGERR_INVALID_WIDE_CHAR_VALUE
;
5151 ONIG_OPTION_OFF(option
, ONIG_OPTION_CHECK_VALIDITY_OF_STRING
);
5157 r
= onig_search(reg
, str
, end
, start
, end
, region
, option
);
5159 rs
= scan_callback(n
, r
, region
, callback_arg
);
5164 if (region
->end
[0] == start
- str
) {
5165 if (start
>= end
) break;
5166 start
+= enclen(reg
->enc
, start
);
5169 start
= str
+ region
->end
[0];
5174 else if (r
== ONIG_MISMATCH
) {
5186 onig_get_encoding(regex_t
* reg
)
5191 extern OnigOptionType
5192 onig_get_options(regex_t
* reg
)
5194 return reg
->options
;
5197 extern OnigCaseFoldType
5198 onig_get_case_fold_flag(regex_t
* reg
)
5200 return reg
->case_fold_flag
;
5203 extern OnigSyntaxType
*
5204 onig_get_syntax(regex_t
* reg
)
5210 onig_number_of_captures(regex_t
* reg
)
5212 return reg
->num_mem
;
5216 onig_number_of_capture_histories(regex_t
* reg
)
5218 #ifdef USE_CAPTURE_HISTORY
5222 for (i
= 0; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
5223 if (MEM_STATUS_AT(reg
->capture_history
, i
) != 0)
5233 onig_copy_encoding(OnigEncoding to
, OnigEncoding from
)
5238 #ifdef USE_DIRECT_THREADED_CODE
5240 onig_init_for_match_at(regex_t
* reg
)
5242 return match_at(reg
, (const UChar
* )NULL
, (const UChar
* )NULL
,
5243 (const UChar
* )NULL
, (const UChar
* )NULL
, (UChar
* )NULL
,
5249 /* for callout functions */
5253 extern OnigCalloutFunc
5254 onig_get_progress_callout(void)
5256 return DefaultProgressCallout
;
5260 onig_set_progress_callout(OnigCalloutFunc f
)
5262 DefaultProgressCallout
= f
;
5266 extern OnigCalloutFunc
5267 onig_get_retraction_callout(void)
5269 return DefaultRetractionCallout
;
5273 onig_set_retraction_callout(OnigCalloutFunc f
)
5275 DefaultRetractionCallout
= f
;
5280 onig_get_callout_num_by_callout_args(OnigCalloutArgs
* args
)
5285 extern OnigCalloutIn
5286 onig_get_callout_in_by_callout_args(OnigCalloutArgs
* args
)
5292 onig_get_name_id_by_callout_args(OnigCalloutArgs
* args
)
5294 return args
->name_id
;
5298 onig_get_contents_by_callout_args(OnigCalloutArgs
* args
)
5301 CalloutListEntry
* e
;
5304 e
= onig_reg_callout_list_at(args
->regex
, num
);
5305 if (IS_NULL(e
)) return 0;
5306 if (e
->of
== ONIG_CALLOUT_OF_CONTENTS
) {
5307 return e
->u
.content
.start
;
5314 onig_get_contents_end_by_callout_args(OnigCalloutArgs
* args
)
5317 CalloutListEntry
* e
;
5320 e
= onig_reg_callout_list_at(args
->regex
, num
);
5321 if (IS_NULL(e
)) return 0;
5322 if (e
->of
== ONIG_CALLOUT_OF_CONTENTS
) {
5323 return e
->u
.content
.end
;
5330 onig_get_args_num_by_callout_args(OnigCalloutArgs
* args
)
5333 CalloutListEntry
* e
;
5336 e
= onig_reg_callout_list_at(args
->regex
, num
);
5337 if (IS_NULL(e
)) return ONIGERR_INVALID_ARGUMENT
;
5338 if (e
->of
== ONIG_CALLOUT_OF_NAME
) {
5339 return e
->u
.arg
.num
;
5342 return ONIGERR_INVALID_ARGUMENT
;
5346 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs
* args
)
5349 CalloutListEntry
* e
;
5352 e
= onig_reg_callout_list_at(args
->regex
, num
);
5353 if (IS_NULL(e
)) return ONIGERR_INVALID_ARGUMENT
;
5354 if (e
->of
== ONIG_CALLOUT_OF_NAME
) {
5355 return e
->u
.arg
.passed_num
;
5358 return ONIGERR_INVALID_ARGUMENT
;
5362 onig_get_arg_by_callout_args(OnigCalloutArgs
* args
, int index
,
5363 OnigType
* type
, OnigValue
* val
)
5366 CalloutListEntry
* e
;
5369 e
= onig_reg_callout_list_at(args
->regex
, num
);
5370 if (IS_NULL(e
)) return ONIGERR_INVALID_ARGUMENT
;
5371 if (e
->of
== ONIG_CALLOUT_OF_NAME
) {
5372 if (IS_NOT_NULL(type
)) *type
= e
->u
.arg
.types
[index
];
5373 if (IS_NOT_NULL(val
)) *val
= e
->u
.arg
.vals
[index
];
5377 return ONIGERR_INVALID_ARGUMENT
;
5381 onig_get_string_by_callout_args(OnigCalloutArgs
* args
)
5383 return args
->string
;
5387 onig_get_string_end_by_callout_args(OnigCalloutArgs
* args
)
5389 return args
->string_end
;
5393 onig_get_start_by_callout_args(OnigCalloutArgs
* args
)
5399 onig_get_right_range_by_callout_args(OnigCalloutArgs
* args
)
5401 return args
->right_range
;
5405 onig_get_current_by_callout_args(OnigCalloutArgs
* args
)
5407 return args
->current
;
5411 onig_get_regex_by_callout_args(OnigCalloutArgs
* args
)
5416 extern unsigned long
5417 onig_get_retry_counter_by_callout_args(OnigCalloutArgs
* args
)
5419 return args
->retry_in_match_counter
;
5424 onig_get_capture_range_in_callout(OnigCalloutArgs
* a
, int mem_num
, int* begin
, int* end
)
5428 StackType
* stk_base
;
5434 stk_base
= a
->stk_base
;
5437 if (a
->mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
5438 if (MEM_STATUS_AT(reg
->bt_mem_start
, i
))
5439 *begin
= (int )(STACK_AT(a
->mem_start_stk
[i
])->u
.mem
.pstr
- str
);
5441 *begin
= (int )((UChar
* )((void* )a
->mem_start_stk
[i
]) - str
);
5443 *end
= (int )((MEM_STATUS_AT(reg
->bt_mem_end
, i
)
5444 ? STACK_AT(a
->mem_end_stk
[i
])->u
.mem
.pstr
5445 : (UChar
* )((void* )a
->mem_end_stk
[i
])) - str
);
5448 *begin
= *end
= ONIG_REGION_NOTPOS
;
5453 *begin
= a
->start
- str
;
5454 *end
= a
->current
- str
;
5456 return ONIGERR_INVALID_ARGUMENT
;
5460 return ONIGERR_INVALID_ARGUMENT
;
5466 onig_get_used_stack_size_in_callout(OnigCalloutArgs
* a
, int* used_num
, int* used_bytes
)
5470 n
= (int )(a
->stk
- a
->stk_base
);
5475 if (used_bytes
!= 0)
5476 *used_bytes
= n
* sizeof(StackType
);
5482 /* builtin callout functions */
5485 onig_builtin_fail(OnigCalloutArgs
* args ARG_UNUSED
, void* user_data ARG_UNUSED
)
5487 return ONIG_CALLOUT_FAIL
;
5491 onig_builtin_mismatch(OnigCalloutArgs
* args ARG_UNUSED
, void* user_data ARG_UNUSED
)
5493 return ONIG_MISMATCH
;
5498 onig_builtin_success(OnigCalloutArgs
* args ARG_UNUSED
, void* user_data ARG_UNUSED
)
5500 return ONIG_CALLOUT_SUCCESS
;
5505 onig_builtin_error(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5511 r
= onig_get_arg_by_callout_args(args
, 0, 0, &val
);
5512 if (r
!= ONIG_NORMAL
) return r
;
5516 n
= ONIGERR_INVALID_CALLOUT_BODY
;
5518 else if (onig_is_error_code_needs_param(n
)) {
5519 n
= ONIGERR_INVALID_CALLOUT_BODY
;
5526 onig_builtin_count(OnigCalloutArgs
* args
, void* user_data
)
5528 (void )onig_check_callout_data_and_clear_old_values(args
);
5530 return onig_builtin_total_count(args
, user_data
);
5534 onig_builtin_total_count(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5541 OnigCodePoint count_type
;
5543 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &aval
);
5544 if (r
!= ONIG_NORMAL
) return r
;
5546 count_type
= aval
.c
;
5547 if (count_type
!= '>' && count_type
!= 'X' && count_type
!= '<')
5548 return ONIGERR_INVALID_CALLOUT_ARG
;
5550 r
= onig_get_callout_data_by_callout_args_self_dont_clear_old(args
, 0,
5552 if (r
< ONIG_NORMAL
)
5554 else if (r
> ONIG_NORMAL
) {
5555 /* type == void: initial state */
5559 if (args
->in
== ONIG_CALLOUT_IN_RETRACTION
) {
5561 if (count_type
== '<')
5563 else if (count_type
== 'X')
5568 if (count_type
!= '<')
5572 r
= onig_set_callout_data_by_callout_args_self(args
, 0, ONIG_TYPE_LONG
, &val
);
5573 if (r
!= ONIG_NORMAL
) return r
;
5575 /* slot 1: in progress counter, slot 2: in retraction counter */
5576 r
= onig_get_callout_data_by_callout_args_self_dont_clear_old(args
, slot
,
5578 if (r
< ONIG_NORMAL
)
5580 else if (r
> ONIG_NORMAL
) {
5585 r
= onig_set_callout_data_by_callout_args_self(args
, slot
, ONIG_TYPE_LONG
, &val
);
5586 if (r
!= ONIG_NORMAL
) return r
;
5588 return ONIG_CALLOUT_SUCCESS
;
5592 onig_builtin_max(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5597 OnigCodePoint count_type
;
5602 (void )onig_check_callout_data_and_clear_old_values(args
);
5605 r
= onig_get_callout_data_by_callout_args_self(args
, slot
, &type
, &val
);
5606 if (r
< ONIG_NORMAL
)
5608 else if (r
> ONIG_NORMAL
) {
5609 /* type == void: initial state */
5610 type
= ONIG_TYPE_LONG
;
5614 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &aval
);
5615 if (r
!= ONIG_NORMAL
) return r
;
5616 if (type
== ONIG_TYPE_TAG
) {
5617 r
= onig_get_callout_data_by_callout_args(args
, aval
.tag
, 0, &type
, &aval
);
5618 if (r
< ONIG_NORMAL
) return r
;
5619 else if (r
> ONIG_NORMAL
)
5628 r
= onig_get_arg_by_callout_args(args
, 1, &type
, &aval
);
5629 if (r
!= ONIG_NORMAL
) return r
;
5631 count_type
= aval
.c
;
5632 if (count_type
!= '>' && count_type
!= 'X' && count_type
!= '<')
5633 return ONIGERR_INVALID_CALLOUT_ARG
;
5635 if (args
->in
== ONIG_CALLOUT_IN_RETRACTION
) {
5636 if (count_type
== '<') {
5637 if (val
.l
>= max_val
) return ONIG_CALLOUT_FAIL
;
5640 else if (count_type
== 'X')
5644 if (count_type
!= '<') {
5645 if (val
.l
>= max_val
) return ONIG_CALLOUT_FAIL
;
5650 r
= onig_set_callout_data_by_callout_args_self(args
, slot
, ONIG_TYPE_LONG
, &val
);
5651 if (r
!= ONIG_NORMAL
) return r
;
5653 return ONIG_CALLOUT_SUCCESS
;
5666 onig_builtin_cmp(OnigCalloutArgs
* args
, void* user_data ARG_UNUSED
)
5679 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &val
);
5680 if (r
!= ONIG_NORMAL
) return r
;
5682 if (type
== ONIG_TYPE_TAG
) {
5683 r
= onig_get_callout_data_by_callout_args(args
, val
.tag
, 0, &type
, &val
);
5684 if (r
< ONIG_NORMAL
) return r
;
5685 else if (r
> ONIG_NORMAL
)
5690 else { /* ONIG_TYPE_LONG */
5694 r
= onig_get_arg_by_callout_args(args
, 2, &type
, &val
);
5695 if (r
!= ONIG_NORMAL
) return r
;
5697 if (type
== ONIG_TYPE_TAG
) {
5698 r
= onig_get_callout_data_by_callout_args(args
, val
.tag
, 0, &type
, &val
);
5699 if (r
< ONIG_NORMAL
) return r
;
5700 else if (r
> ONIG_NORMAL
)
5705 else { /* ONIG_TYPE_LONG */
5710 r
= onig_get_callout_data_by_callout_args_self(args
, slot
, &type
, &val
);
5711 if (r
< ONIG_NORMAL
)
5713 else if (r
> ONIG_NORMAL
) {
5714 /* type == void: initial state */
5715 OnigCodePoint c1
, c2
;
5718 r
= onig_get_arg_by_callout_args(args
, 1, &type
, &val
);
5719 if (r
!= ONIG_NORMAL
) return r
;
5722 c1
= ONIGENC_MBC_TO_CODE(reg
->enc
, p
, val
.s
.end
);
5723 p
+= ONIGENC_MBC_ENC_LEN(reg
->enc
, p
);
5724 if (p
< val
.s
.end
) {
5725 c2
= ONIGENC_MBC_TO_CODE(reg
->enc
, p
, val
.s
.end
);
5726 p
+= ONIGENC_MBC_ENC_LEN(reg
->enc
, p
);
5727 if (p
!= val
.s
.end
) return ONIGERR_INVALID_CALLOUT_ARG
;
5734 if (c2
!= '=') return ONIGERR_INVALID_CALLOUT_ARG
;
5738 if (c2
!= '=') return ONIGERR_INVALID_CALLOUT_ARG
;
5742 if (c2
== '=') op
= OP_LE
;
5743 else if (c2
== 0) op
= OP_LT
;
5744 else return ONIGERR_INVALID_CALLOUT_ARG
;
5747 if (c2
== '=') op
= OP_GE
;
5748 else if (c2
== 0) op
= OP_GT
;
5749 else return ONIGERR_INVALID_CALLOUT_ARG
;
5752 return ONIGERR_INVALID_CALLOUT_ARG
;
5756 r
= onig_set_callout_data_by_callout_args_self(args
, slot
, ONIG_TYPE_LONG
, &val
);
5757 if (r
!= ONIG_NORMAL
) return r
;
5760 op
= (enum OP_CMP
)val
.l
;
5764 case OP_EQ
: r
= (lv
== rv
); break;
5765 case OP_NE
: r
= (lv
!= rv
); break;
5766 case OP_LT
: r
= (lv
< rv
); break;
5767 case OP_GT
: r
= (lv
> rv
); break;
5768 case OP_LE
: r
= (lv
<= rv
); break;
5769 case OP_GE
: r
= (lv
>= rv
); break;
5772 return r
== 0 ? ONIG_CALLOUT_FAIL
: ONIG_CALLOUT_SUCCESS
;
5776 //#include <stdio.h>
5780 /* name start with "onig_" for macros. */
5782 onig_builtin_monitor(OnigCalloutArgs
* args
, void* user_data
)
5787 // const UChar* start;
5788 // const UChar* right;
5789 // const UChar* current;
5790 // const UChar* string;
5791 // const UChar* strend;
5792 const UChar
* tag_start
;
5793 const UChar
* tag_end
;
5803 r
= onig_get_arg_by_callout_args(args
, 0, &type
, &val
);
5804 if (r
!= ONIG_NORMAL
) return r
;
5806 in
= onig_get_callout_in_by_callout_args(args
);
5807 if (in
== ONIG_CALLOUT_IN_PROGRESS
) {
5809 return ONIG_CALLOUT_SUCCESS
;
5812 if (val
.c
!= 'X' && val
.c
!= '<')
5813 return ONIG_CALLOUT_SUCCESS
;
5816 num
= onig_get_callout_num_by_callout_args(args
);
5817 // start = onig_get_start_by_callout_args(args);
5818 // right = onig_get_right_range_by_callout_args(args);
5819 // current = onig_get_current_by_callout_args(args);
5820 // string = onig_get_string_by_callout_args(args);
5821 // strend = onig_get_string_end_by_callout_args(args);
5822 reg
= onig_get_regex_by_callout_args(args
);
5823 tag_start
= onig_get_callout_tag_start(reg
, num
);
5824 tag_end
= onig_get_callout_tag_end(reg
, num
);
5827 sprintf_s(buf
, sizeof(buf
), "#%d", num
);
5829 /* CAUTION: tag string is not terminated with NULL. */
5832 tag_len
= tag_end
- tag_start
;
5833 if (tag_len
>= sizeof(buf
)) tag_len
= sizeof(buf
) - 1;
5834 for (i
= 0; i
< tag_len
; i
++) buf
[i
] = tag_start
[i
];
5835 buf
[tag_len
] = '\0';
5838 fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
5840 in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
5841 (int )(current - string),
5842 (int )(start - string),
5843 (int )(right - string),
5844 (int )(strend - string));
5847 return ONIG_CALLOUT_SUCCESS
;
5851 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp
/* FILE* */)
5859 if (IS_NOT_NULL(fp
))
5864 enc
= ONIG_ENCODING_ASCII
;
5867 ts
[0] = ONIG_TYPE_CHAR
;
5869 BC_B_O(name
, monitor
, 1, ts
, 1, opts
);
5874 #endif /* USE_CALLOUT */