1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.<BR>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
36 #ifdef USE_CRNL_AS_LINE_TERMINATOR
37 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
38 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
39 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
42 #ifdef USE_CAPTURE_HISTORY
43 static void history_tree_free(OnigCaptureTreeNode
* node
);
46 history_tree_clear(OnigCaptureTreeNode
* node
)
50 if (IS_NOT_NULL(node
)) {
51 for (i
= 0; i
< node
->num_childs
; i
++) {
52 if (IS_NOT_NULL(node
->childs
[i
])) {
53 history_tree_free(node
->childs
[i
]);
56 for (i
= 0; i
< node
->allocated
; i
++) {
57 node
->childs
[i
] = (OnigCaptureTreeNode
* )0;
60 node
->beg
= ONIG_REGION_NOTPOS
;
61 node
->end
= ONIG_REGION_NOTPOS
;
67 history_tree_free(OnigCaptureTreeNode
* node
)
69 history_tree_clear(node
);
74 history_root_free(OnigRegion
* r
)
76 if (IS_NOT_NULL(r
->history_root
)) {
77 history_tree_free(r
->history_root
);
78 r
->history_root
= (OnigCaptureTreeNode
* )0;
82 static OnigCaptureTreeNode
*
83 history_node_new(void)
85 OnigCaptureTreeNode
* node
;
87 node
= (OnigCaptureTreeNode
* )xmalloc(sizeof(OnigCaptureTreeNode
));
88 CHECK_NULL_RETURN(node
);
89 node
->childs
= (OnigCaptureTreeNode
** )0;
93 node
->beg
= ONIG_REGION_NOTPOS
;
94 node
->end
= ONIG_REGION_NOTPOS
;
100 history_tree_add_child(OnigCaptureTreeNode
* parent
, OnigCaptureTreeNode
* child
)
102 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
104 if (parent
->num_childs
>= parent
->allocated
) {
107 if (IS_NULL(parent
->childs
)) {
108 n
= HISTORY_TREE_INIT_ALLOC_SIZE
;
110 (OnigCaptureTreeNode
** )xmalloc(sizeof(OnigCaptureTreeNode
*) * n
);
113 n
= parent
->allocated
* 2;
115 (OnigCaptureTreeNode
** )xrealloc(parent
->childs
,
116 sizeof(OnigCaptureTreeNode
*) * n
,
117 sizeof(OnigCaptureTreeNode
*) * parent
->allocated
);
119 CHECK_NULL_RETURN_MEMERR(parent
->childs
);
120 for (i
= parent
->allocated
; i
< n
; i
++) {
121 parent
->childs
[i
] = (OnigCaptureTreeNode
* )0;
123 parent
->allocated
= n
;
126 parent
->childs
[parent
->num_childs
] = child
;
127 parent
->num_childs
++;
131 static OnigCaptureTreeNode
*
132 history_tree_clone(OnigCaptureTreeNode
* node
)
135 OnigCaptureTreeNode
*clone
, *child
;
137 clone
= history_node_new();
138 CHECK_NULL_RETURN(clone
);
140 clone
->beg
= node
->beg
;
141 clone
->end
= node
->end
;
142 for (i
= 0; i
< node
->num_childs
; i
++) {
143 child
= history_tree_clone(node
->childs
[i
]);
144 if (IS_NULL(child
)) {
145 history_tree_free(clone
);
146 return (OnigCaptureTreeNode
* )0;
148 history_tree_add_child(clone
, child
);
154 extern OnigCaptureTreeNode
*
155 onig_get_capture_tree(OnigRegion
* region
)
157 return region
->history_root
;
159 #endif /* USE_CAPTURE_HISTORY */
162 onig_region_clear(OnigRegion
* region
)
166 for (i
= 0; i
< region
->num_regs
; i
++) {
167 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
169 #ifdef USE_CAPTURE_HISTORY
170 history_root_free(region
);
175 onig_region_resize(OnigRegion
* region
, int n
)
177 region
->num_regs
= n
;
179 if (n
< ONIG_NREGION
)
182 if (region
->allocated
== 0) {
183 region
->beg
= (int* )xmalloc(n
* sizeof(int));
184 region
->end
= (int* )xmalloc(n
* sizeof(int));
186 if (region
->beg
== 0 || region
->end
== 0)
187 return ONIGERR_MEMORY
;
189 region
->allocated
= n
;
191 else if (region
->allocated
< n
) {
192 region
->beg
= (int* )xrealloc(region
->beg
, n
* sizeof(int), region
->allocated
* sizeof(int));
193 region
->end
= (int* )xrealloc(region
->end
, n
* sizeof(int), region
->allocated
* sizeof(int));
195 if (region
->beg
== 0 || region
->end
== 0)
196 return ONIGERR_MEMORY
;
198 region
->allocated
= n
;
205 onig_region_resize_clear(OnigRegion
* region
, int n
)
209 r
= onig_region_resize(region
, n
);
210 if (r
!= 0) return r
;
211 onig_region_clear(region
);
216 onig_region_set(OnigRegion
* region
, int at
, int beg
, int end
)
218 if (at
< 0) return ONIGERR_INVALID_ARGUMENT
;
220 if (at
>= region
->allocated
) {
221 int r
= onig_region_resize(region
, at
+ 1);
225 region
->beg
[at
] = beg
;
226 region
->end
[at
] = end
;
231 onig_region_init(OnigRegion
* region
)
233 region
->num_regs
= 0;
234 region
->allocated
= 0;
235 region
->beg
= (int* )0;
236 region
->end
= (int* )0;
237 region
->history_root
= (OnigCaptureTreeNode
* )0;
241 onig_region_new(void)
245 r
= (OnigRegion
* )xmalloc(sizeof(OnigRegion
));
251 onig_region_free(OnigRegion
* r
, int free_self
)
254 if (r
->allocated
> 0) {
255 if (r
->beg
) xfree(r
->beg
);
256 if (r
->end
) xfree(r
->end
);
259 #ifdef USE_CAPTURE_HISTORY
260 history_root_free(r
);
262 if (free_self
) xfree(r
);
267 onig_region_copy(OnigRegion
* to
, OnigRegion
* from
)
269 #define RREGC_SIZE (sizeof(int) * from->num_regs)
272 if (to
== from
) return;
274 if (to
->allocated
== 0) {
275 if (from
->num_regs
> 0) {
276 to
->beg
= (int* )xmalloc(RREGC_SIZE
);
277 to
->end
= (int* )xmalloc(RREGC_SIZE
);
278 to
->allocated
= from
->num_regs
;
281 else if (to
->allocated
< from
->num_regs
) {
282 to
->beg
= (int* )xrealloc(to
->beg
, RREGC_SIZE
, sizeof(int) * to
->allocated
);
283 to
->end
= (int* )xrealloc(to
->end
, RREGC_SIZE
, sizeof(int) * to
->allocated
);
284 to
->allocated
= from
->num_regs
;
287 for (i
= 0; i
< from
->num_regs
; i
++) {
288 to
->beg
[i
] = from
->beg
[i
];
289 to
->end
[i
] = from
->end
[i
];
291 to
->num_regs
= from
->num_regs
;
293 #ifdef USE_CAPTURE_HISTORY
294 history_root_free(to
);
296 if (IS_NOT_NULL(from
->history_root
)) {
297 to
->history_root
= history_tree_clone(from
->history_root
);
304 #define INVALID_STACK_INDEX -1
307 /* used by normal-POP */
308 #define STK_ALT 0x0001
309 #define STK_LOOK_BEHIND_NOT 0x0002
310 #define STK_POS_NOT 0x0003
311 /* handled by normal-POP */
312 #define STK_MEM_START 0x0100
313 #define STK_MEM_END 0x8200
314 #define STK_REPEAT_INC 0x0300
315 #define STK_STATE_CHECK_MARK 0x1000
316 /* avoided by normal-POP */
317 #define STK_NULL_CHECK_START 0x3000
318 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
319 #define STK_MEM_END_MARK 0x8400
320 #define STK_POS 0x0500 /* used when POP-POS */
321 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
322 #define STK_REPEAT 0x0700
323 #define STK_CALL_FRAME 0x0800
324 #define STK_RETURN 0x0900
325 #define STK_VOID 0x0a00 /* for fill a blank */
327 /* stack type check mask */
328 #define STK_MASK_POP_USED 0x00ff
329 #define STK_MASK_TO_VOID_TARGET 0x10ff
330 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
332 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
333 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
334 (msa).stack_p = (void* )0;\
335 (msa).options = (arg_option);\
336 (msa).region = (arg_region);\
337 (msa).start = (arg_start);\
338 (msa).best_len = ONIG_MISMATCH;\
341 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
342 (msa).stack_p = (void* )0;\
343 (msa).options = (arg_option);\
344 (msa).region = (arg_region);\
345 (msa).start = (arg_start);\
349 #ifdef USE_COMBINATION_EXPLOSION_CHECK
351 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
353 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
354 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
355 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
356 offset = ((offset) * (state_num)) >> 3;\
357 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
358 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
359 (msa).state_check_buff = (void* )xmalloc(size);\
361 (msa).state_check_buff = (void* )xalloca(size);\
362 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
363 (size_t )(size - (offset))); \
364 (msa).state_check_buff_size = size;\
367 (msa).state_check_buff = (void* )0;\
368 (msa).state_check_buff_size = 0;\
372 (msa).state_check_buff = (void* )0;\
373 (msa).state_check_buff_size = 0;\
377 #define MATCH_ARG_FREE(msa) do {\
378 if ((msa).stack_p) xfree((msa).stack_p);\
379 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
380 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
384 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
385 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
390 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
392 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num));\
393 stk_alloc = (OnigStackType* )(msa->stack_p);\
394 stk_base = stk_alloc;\
396 stk_end = stk_base + msa->stack_n;\
399 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num)\
400 + sizeof(OnigStackType) * (stack_num));\
401 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
402 stk_base = stk_alloc;\
404 stk_end = stk_base + (stack_num);\
408 #define STACK_SAVE do{\
409 if (stk_base != stk_alloc) {\
410 msa->stack_p = stk_base;\
411 msa->stack_n = (int)(stk_end - stk_base);\
415 static unsigned int MatchStackLimitSize
= DEFAULT_MATCH_STACK_LIMIT_SIZE
;
418 onig_get_match_stack_limit_size(void)
420 return MatchStackLimitSize
;
424 onig_set_match_stack_limit_size(unsigned int size
)
426 MatchStackLimitSize
= size
;
431 stack_double(OnigStackType
** arg_stk_base
, OnigStackType
** arg_stk_end
,
432 OnigStackType
** arg_stk
, OnigStackType
* stk_alloc
, OnigMatchArg
* msa
)
435 OnigStackType
*x
, *stk_base
, *stk_end
, *stk
;
437 stk_base
= *arg_stk_base
;
438 stk_end
= *arg_stk_end
;
441 n
= (unsigned int)(stk_end
- stk_base
);
442 if (stk_base
== stk_alloc
&& IS_NULL(msa
->stack_p
)) {
443 x
= (OnigStackType
* )xmalloc(sizeof(OnigStackType
) * n
* 2);
446 return ONIGERR_MEMORY
;
448 xmemcpy(x
, stk_base
, n
* sizeof(OnigStackType
));
453 if (MatchStackLimitSize
!= 0 && n
> MatchStackLimitSize
) {
454 if ((unsigned int )(stk_end
- stk_base
) == MatchStackLimitSize
)
455 return ONIGERR_MATCH_STACK_LIMIT_OVER
;
457 n
= MatchStackLimitSize
;
459 x
= (OnigStackType
* )xrealloc(stk_base
, sizeof(OnigStackType
) * n
, sizeof(OnigStackType
) * (stk_end
- stk_base
));
462 return ONIGERR_MEMORY
;
465 *arg_stk
= x
+ (stk
- stk_base
);
467 *arg_stk_end
= x
+ n
;
471 #define STACK_ENSURE(n) do {\
472 if (stk_end - stk < (n)) {\
473 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
474 if (r != 0) { STACK_SAVE; return r; } \
478 #define STACK_AT(index) (stk_base + (index))
479 #define GET_STACK_INDEX(stk) ((OnigStackIndex)((stk) - stk_base))
481 #define STACK_PUSH_TYPE(stack_type) do {\
483 stk->type = (stack_type);\
487 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
489 #ifdef USE_COMBINATION_EXPLOSION_CHECK
490 #define STATE_CHECK_POS(s,snum) \
491 (((s) - str) * num_comb_exp_check + ((snum) - 1))
492 #define STATE_CHECK_VAL(v,snum) do {\
493 if (state_check_buff != NULL) {\
494 int x = STATE_CHECK_POS(s,snum);\
495 (v) = state_check_buff[x/8] & (1<<(x%8));\
501 #define ELSE_IF_STATE_CHECK_MARK(stk) \
502 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
503 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
504 state_check_buff[x/8] |= (1<<(x%8)); \
507 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
509 stk->type = (stack_type);\
510 stk->u.state.pcode = (pat);\
511 stk->u.state.pstr = (s);\
512 stk->u.state.pstr_prev = (sprev);\
513 stk->u.state.state_check = 0;\
517 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
518 stk->type = (stack_type);\
519 stk->u.state.pcode = (pat);\
520 stk->u.state.state_check = 0;\
524 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
526 stk->type = STK_ALT;\
527 stk->u.state.pcode = (pat);\
528 stk->u.state.pstr = (s);\
529 stk->u.state.pstr_prev = (sprev);\
530 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
534 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
535 if (state_check_buff != NULL) {\
537 stk->type = STK_STATE_CHECK_MARK;\
538 stk->u.state.pstr = (s);\
539 stk->u.state.state_check = (snum);\
544 #else /* USE_COMBINATION_EXPLOSION_CHECK */
546 #define ELSE_IF_STATE_CHECK_MARK(stk)
548 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
550 stk->type = (stack_type);\
551 stk->u.state.pcode = (pat);\
552 stk->u.state.pstr = (s);\
553 stk->u.state.pstr_prev = (sprev);\
557 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
558 stk->type = (stack_type);\
559 stk->u.state.pcode = (pat);\
562 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
564 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
565 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
566 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
567 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
568 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
569 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
571 #define STACK_PUSH_REPEAT(id, pat) do {\
573 stk->type = STK_REPEAT;\
574 stk->u.repeat.num = (id);\
575 stk->u.repeat.pcode = (pat);\
576 stk->u.repeat.count = 0;\
580 #define STACK_PUSH_REPEAT_INC(sindex) do {\
582 stk->type = STK_REPEAT_INC;\
583 stk->u.repeat_inc.si = (sindex);\
587 #define STACK_PUSH_MEM_START(mnum, s) do {\
589 stk->type = STK_MEM_START;\
590 stk->u.mem.num = (int)(mnum);\
591 stk->u.mem.pstr = (s);\
592 stk->u.mem.start = mem_start_stk[mnum];\
593 stk->u.mem.end = mem_end_stk[mnum];\
594 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
595 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
599 #define STACK_PUSH_MEM_END(mnum, s) do {\
601 stk->type = STK_MEM_END;\
602 stk->u.mem.num = (mnum);\
603 stk->u.mem.pstr = (s);\
604 stk->u.mem.start = mem_start_stk[mnum];\
605 stk->u.mem.end = mem_end_stk[mnum];\
606 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
610 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
612 stk->type = STK_MEM_END_MARK;\
613 stk->u.mem.num = (mnum);\
617 #define STACK_GET_MEM_START(mnum, k) do {\
620 while (k > stk_base) {\
622 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
623 && k->u.mem.num == (mnum)) {\
626 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
627 if (level == 0) break;\
633 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
636 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
637 if (level == 0) (start) = k->u.mem.pstr;\
640 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
643 (end) = k->u.mem.pstr;\
651 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
653 stk->type = STK_NULL_CHECK_START;\
654 stk->u.null_check.num = (cnum);\
655 stk->u.null_check.pstr = (s);\
659 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
661 stk->type = STK_NULL_CHECK_END;\
662 stk->u.null_check.num = (cnum);\
666 #define STACK_PUSH_CALL_FRAME(pat) do {\
668 stk->type = STK_CALL_FRAME;\
669 stk->u.call_frame.ret_addr = (pat);\
673 #define STACK_PUSH_RETURN do {\
675 stk->type = STK_RETURN;\
681 #define STACK_BASE_CHECK(p, at) \
682 if ((p) < stk_base) {\
683 fprintf(stderr, "at %s\n", at);\
687 #define STACK_BASE_CHECK(p, at)
690 #define STACK_POP_ONE do {\
692 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
695 #define STACK_POP do {\
696 switch (pop_level) {\
697 case STACK_POP_LEVEL_FREE:\
700 STACK_BASE_CHECK(stk, "STACK_POP"); \
701 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
702 ELSE_IF_STATE_CHECK_MARK(stk);\
705 case STACK_POP_LEVEL_MEM_START:\
708 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
709 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
710 else if (stk->type == STK_MEM_START) {\
711 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
712 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
714 ELSE_IF_STATE_CHECK_MARK(stk);\
720 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
721 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
722 else if (stk->type == STK_MEM_START) {\
723 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
724 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
726 else if (stk->type == STK_REPEAT_INC) {\
727 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
729 else if (stk->type == STK_MEM_END) {\
730 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
731 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
733 ELSE_IF_STATE_CHECK_MARK(stk);\
739 #define STACK_POP_TIL_POS_NOT do {\
742 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
743 if (stk->type == STK_POS_NOT) break;\
744 else if (stk->type == STK_MEM_START) {\
745 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
746 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
748 else if (stk->type == STK_REPEAT_INC) {\
749 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
751 else if (stk->type == STK_MEM_END) {\
752 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
753 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
755 ELSE_IF_STATE_CHECK_MARK(stk);\
759 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
762 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
763 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
764 else if (stk->type == STK_MEM_START) {\
765 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
766 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
768 else if (stk->type == STK_REPEAT_INC) {\
769 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
771 else if (stk->type == STK_MEM_END) {\
772 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
773 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
775 ELSE_IF_STATE_CHECK_MARK(stk);\
779 #define STACK_POS_END(k) do {\
783 STACK_BASE_CHECK(k, "STACK_POS_END"); \
784 if (IS_TO_VOID_TARGET(k)) {\
787 else if (k->type == STK_POS) {\
794 #define STACK_STOP_BT_END do {\
795 OnigStackType *k = stk;\
798 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
799 if (IS_TO_VOID_TARGET(k)) {\
802 else if (k->type == STK_STOP_BT) {\
809 #define STACK_NULL_CHECK(isnull,id,s) do {\
810 OnigStackType* k = stk;\
813 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
814 if (k->type == STK_NULL_CHECK_START) {\
815 if (k->u.null_check.num == (id)) {\
816 (isnull) = (k->u.null_check.pstr == (s));\
823 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
825 OnigStackType* k = stk;\
828 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
829 if (k->type == STK_NULL_CHECK_START) {\
830 if (k->u.null_check.num == (id)) {\
832 (isnull) = (k->u.null_check.pstr == (s));\
838 else if (k->type == STK_NULL_CHECK_END) {\
844 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
845 OnigStackType* k = stk;\
848 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
849 if (k->type == STK_NULL_CHECK_START) {\
850 if (k->u.null_check.num == (id)) {\
851 if (k->u.null_check.pstr != (s)) {\
859 if (k->type == STK_MEM_START) {\
860 if (k->u.mem.end == INVALID_STACK_INDEX) {\
861 (isnull) = 0; break;\
863 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
864 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
866 endp = (UChar* )k->u.mem.end;\
867 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
868 (isnull) = 0; break;\
870 else if (endp != s) {\
871 (isnull) = -1; /* empty, but position changed */ \
883 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
885 OnigStackType* k = stk;\
888 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
889 if (k->type == STK_NULL_CHECK_START) {\
890 if (k->u.null_check.num == (id)) {\
892 if (k->u.null_check.pstr != (s)) {\
900 if (k->type == STK_MEM_START) {\
901 if (k->u.mem.end == INVALID_STACK_INDEX) {\
902 (isnull) = 0; break;\
904 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
905 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
907 endp = (UChar* )k->u.mem.end;\
908 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
909 (isnull) = 0; break;\
911 else if (endp != s) {\
912 (isnull) = -1; /* empty, but position changed */ \
925 else if (k->type == STK_NULL_CHECK_END) {\
926 if (k->u.null_check.num == (id)) level++;\
931 #define STACK_GET_REPEAT(id, k) do {\
936 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
937 if (k->type == STK_REPEAT) {\
939 if (k->u.repeat.num == (id)) {\
944 else if (k->type == STK_CALL_FRAME) level--;\
945 else if (k->type == STK_RETURN) level++;\
949 #define STACK_RETURN(addr) do {\
951 OnigStackType* k = stk;\
954 STACK_BASE_CHECK(k, "STACK_RETURN"); \
955 if (k->type == STK_CALL_FRAME) {\
957 (addr) = k->u.call_frame.ret_addr;\
962 else if (k->type == STK_RETURN)\
968 #define STRING_CMP(s1,s2,len) do {\
970 if (*s1++ != *s2++) goto fail;\
974 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
975 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
979 static int string_cmp_ic(OnigEncoding enc
, int case_fold_flag
,
980 UChar
* s1
, UChar
** ps2
, int mblen
)
982 UChar buf1
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
983 UChar buf2
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
984 UChar
*p1
, *p2
, *end1
, *s2
, *end2
;
991 len1
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s1
, end1
, buf1
);
992 len2
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &s2
, end2
, buf2
);
993 if (len1
!= len2
) return 0;
997 if (*p1
!= *p2
) return 0;
1007 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1009 while (len-- > 0) {\
1010 if (*s1++ != *s2++) {\
1011 is_fail = 1; break;\
1016 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1017 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1024 #define IS_EMPTY_STR (str == end)
1025 #define ON_STR_BEGIN(s) ((s) == str)
1026 #define ON_STR_END(s) ((s) == end)
1027 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1028 #define DATA_ENSURE_CHECK1 (s < right_range)
1029 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1030 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1032 #define DATA_ENSURE_CHECK1 (s < end)
1033 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1034 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1035 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1038 #ifdef USE_CAPTURE_HISTORY
1040 make_capture_history_tree(OnigCaptureTreeNode
* node
, OnigStackType
** kp
,
1041 OnigStackType
* stk_top
, UChar
* str
, regex_t
* reg
)
1044 OnigCaptureTreeNode
* child
;
1045 OnigStackType
* k
= *kp
;
1047 while (k
< stk_top
) {
1048 if (k
->type
== STK_MEM_START
) {
1050 if (n
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
&&
1051 BIT_STATUS_AT(reg
->capture_history
, n
) != 0) {
1052 child
= history_node_new();
1053 CHECK_NULL_RETURN_MEMERR(child
);
1055 child
->beg
= (int )(k
->u
.mem
.pstr
- str
);
1056 r
= history_tree_add_child(node
, child
);
1057 if (r
!= 0) return r
;
1059 r
= make_capture_history_tree(child
, kp
, stk_top
, str
, reg
);
1060 if (r
!= 0) return r
;
1063 child
->end
= (int )(k
->u
.mem
.pstr
- str
);
1066 else if (k
->type
== STK_MEM_END
) {
1067 if (k
->u
.mem
.num
== node
->group
) {
1068 node
->end
= (int )(k
->u
.mem
.pstr
- str
);
1076 return 1; /* 1: root node ending. */
1080 #ifdef USE_BACKREF_WITH_LEVEL
1081 static int mem_is_in_memp(int mem
, int num
, UChar
* memp
)
1086 for (i
= 0; i
< num
; i
++) {
1087 GET_MEMNUM_INC(m
, memp
);
1088 if (mem
== (int )m
) return 1;
1093 static int backref_match_at_nested_level(regex_t
* reg
1094 , OnigStackType
* top
, OnigStackType
* stk_base
1095 , int ignore_case
, int case_fold_flag
1096 , int nest
, int mem_num
, UChar
* memp
, UChar
** s
, const UChar
* send
)
1098 UChar
*ss
, *p
, *pstart
, *pend
= NULL_UCHARP
;
1105 while (k
>= stk_base
) {
1106 if (k
->type
== STK_CALL_FRAME
) {
1109 else if (k
->type
== STK_RETURN
) {
1112 else if (level
== nest
) {
1113 if (k
->type
== STK_MEM_START
) {
1114 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1115 pstart
= k
->u
.mem
.pstr
;
1116 if (pend
!= NULL_UCHARP
) {
1117 if (pend
- pstart
> send
- *s
) return 0; /* or goto next_mem; */
1121 if (ignore_case
!= 0) {
1122 if (string_cmp_ic(reg
->enc
, case_fold_flag
,
1123 pstart
, &ss
, (int )(pend
- pstart
)) == 0)
1124 return 0; /* or goto next_mem; */
1128 if (*p
++ != *ss
++) return 0; /* or goto next_mem; */
1137 else if (k
->type
== STK_MEM_END
) {
1138 if (mem_is_in_memp(k
->u
.mem
.num
, mem_num
, memp
)) {
1139 pend
= k
->u
.mem
.pstr
;
1148 #endif /* USE_BACKREF_WITH_LEVEL */
1151 #ifdef ONIG_DEBUG_STATISTICS
1153 #define USE_TIMEOFDAY
1155 #ifdef USE_TIMEOFDAY
1156 #ifdef HAVE_SYS_TIME_H
1157 #include <sys/time.h>
1159 #ifdef HAVE_UNISTD_H
1162 static struct timeval ts
, te
;
1163 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1164 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1165 (((te).tv_sec - (ts).tv_sec)*1000000))
1167 #ifdef HAVE_SYS_TIMES_H
1168 #include <sys/times.h>
1170 static struct tms ts
, te
;
1171 #define GETTIME(t) times(&(t))
1172 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1175 static int OpCounter
[256];
1176 static int OpPrevCounter
[256];
1177 static unsigned long OpTime
[256];
1178 static int OpCurr
= OP_FINISH
;
1179 static int OpPrevTarget
= OP_FAIL
;
1180 static int MaxStackDepth
= 0;
1182 #define MOP_IN(opcode) do {\
1183 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1185 OpCounter[opcode]++;\
1189 #define MOP_OUT do {\
1191 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1195 onig_statistics_init(void)
1198 for (i
= 0; i
< 256; i
++) {
1199 OpCounter
[i
] = OpPrevCounter
[i
] = 0; OpTime
[i
] = 0;
1205 onig_print_statistics(FILE* f
)
1208 fprintf(f
, " count prev time\n");
1209 for (i
= 0; OnigOpInfo
[i
].opcode
>= 0; i
++) {
1210 fprintf(f
, "%8d: %8d: %10ld: %s\n",
1211 OpCounter
[i
], OpPrevCounter
[i
], OpTime
[i
], OnigOpInfo
[i
].name
);
1213 fprintf(f
, "\nmax stack depth: %d\n", MaxStackDepth
);
1216 #define STACK_INC do {\
1218 if (stk - stk_base > MaxStackDepth) \
1219 MaxStackDepth = stk - stk_base;\
1223 #define STACK_INC stk++
1225 #define MOP_IN(opcode)
1230 /* matching region of POSIX API */
1231 typedef int regoff_t
;
1238 /* match data(str - end) from position (sstart). */
1239 /* if sstart == str then set sprev to NULL. */
1241 match_at(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
1242 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1243 const UChar
* right_range
,
1245 const UChar
* sstart
, UChar
* sprev
, OnigMatchArg
* msa
)
1247 static UChar FinishCode
[] = { OP_FINISH
};
1249 int i
, n
, num_mem
, best_len
, pop_level
;
1250 LengthType tlen
, tlen2
;
1253 OnigOptionType option
= reg
->options
;
1254 OnigEncoding encode
= reg
->enc
;
1255 OnigCaseFoldType case_fold_flag
= reg
->case_fold_flag
;
1256 UChar
*s
, *q
, *sbegin
;
1259 OnigStackType
*stk_alloc
, *stk_base
, *stk
, *stk_end
;
1260 OnigStackType
*stkp
; /* used as any purpose. */
1262 OnigStackIndex
*repeat_stk
;
1263 OnigStackIndex
*mem_start_stk
, *mem_end_stk
;
1264 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1266 unsigned char* state_check_buff
= msa
->state_check_buff
;
1267 int num_comb_exp_check
= reg
->num_comb_exp_check
;
1269 n
= reg
->num_repeat
+ reg
->num_mem
* 2;
1271 STACK_INIT(alloca_base
, n
, INIT_MATCH_STACK_SIZE
);
1272 pop_level
= reg
->stack_pop_level
;
1273 num_mem
= reg
->num_mem
;
1274 repeat_stk
= (OnigStackIndex
* )alloca_base
;
1276 mem_start_stk
= (OnigStackIndex
* )(repeat_stk
+ reg
->num_repeat
);
1277 mem_end_stk
= mem_start_stk
+ num_mem
;
1278 mem_start_stk
--; /* for index start from 1,
1279 mem_start_stk[1]..mem_start_stk[num_mem] */
1280 mem_end_stk
--; /* for index start from 1,
1281 mem_end_stk[1]..mem_end_stk[num_mem] */
1282 for (i
= 1; i
<= num_mem
; i
++) {
1283 mem_start_stk
[i
] = mem_end_stk
[i
] = INVALID_STACK_INDEX
;
1286 #ifdef ONIG_DEBUG_MATCH
1287 fprintf(stderr
, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1288 (int )str
, (int )end
, (int )sstart
, (int )sprev
);
1289 fprintf(stderr
, "size: %d, start offset: %d\n",
1290 (int )(end
- str
), (int )(sstart
- str
));
1293 STACK_PUSH_ENSURED(STK_ALT
, FinishCode
); /* bottom stack */
1294 best_len
= ONIG_MISMATCH
;
1295 s
= (UChar
* )sstart
;
1297 #ifdef ONIG_DEBUG_MATCH
1299 UChar
*q
, *bp
, buf
[50];
1301 fprintf(stderr
, "%4d> \"", (int )(s
- str
));
1303 for (i
= 0, q
= s
; i
< 7 && q
< end
; i
++) {
1304 len
= enclen(encode
, q
);
1305 while (len
-- > 0) *bp
++ = *q
++;
1307 if (q
< end
) { xmemcpy(bp
, "...\"", 4); bp
+= 4; }
1308 else { xmemcpy(bp
, "\"", 1); bp
+= 1; }
1310 fputs((char* )buf
, stderr
);
1311 for (i
= 0; i
< 20 - (bp
- buf
); i
++) fputc(' ', stderr
);
1312 onig_print_compiled_byte_code(stderr
, p
, NULL
, encode
);
1313 fprintf(stderr
, "\n");
1319 case OP_END
: MOP_IN(OP_END
);
1320 n
= (int)(s
- sstart
);
1323 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1324 if (IS_FIND_LONGEST(option
)) {
1325 if (n
> msa
->best_len
) {
1327 msa
->best_s
= (UChar
* )sstart
;
1334 region
= msa
->region
;
1336 #ifdef USE_POSIX_API_REGION_OPTION
1337 if (IS_POSIX_REGION(msa
->options
)) {
1338 posix_regmatch_t
* rmt
= (posix_regmatch_t
* )region
;
1340 rmt
[0].rm_so
= (regoff_t
)(sstart
- str
);
1341 rmt
[0].rm_eo
= (regoff_t
)(s
- str
);
1342 for (i
= 1; i
<= num_mem
; i
++) {
1343 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
1344 if (BIT_STATUS_AT(reg
->bt_mem_start
, i
))
1345 rmt
[i
].rm_so
= (regoff_t
)(STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
);
1347 rmt
[i
].rm_so
= (regoff_t
)((UChar
* )((void* )(mem_start_stk
[i
])) - str
);
1349 rmt
[i
].rm_eo
= (regoff_t
)((BIT_STATUS_AT(reg
->bt_mem_end
, i
)
1350 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
1351 : (UChar
* )((void* )mem_end_stk
[i
])) - str
);
1354 rmt
[i
].rm_so
= rmt
[i
].rm_eo
= ONIG_REGION_NOTPOS
;
1359 #endif /* USE_POSIX_API_REGION_OPTION */
1360 region
->beg
[0] = (int)(sstart
- str
);
1361 region
->end
[0] = (int)(s
- str
);
1362 for (i
= 1; i
<= num_mem
; i
++) {
1363 if (mem_end_stk
[i
] != INVALID_STACK_INDEX
) {
1364 if (BIT_STATUS_AT(reg
->bt_mem_start
, i
))
1365 region
->beg
[i
] = (int)(STACK_AT(mem_start_stk
[i
])->u
.mem
.pstr
- str
);
1367 region
->beg
[i
] = (int)((UChar
* )((void* )mem_start_stk
[i
]) - str
);
1369 region
->end
[i
] = (int)((BIT_STATUS_AT(reg
->bt_mem_end
, i
)
1370 ? STACK_AT(mem_end_stk
[i
])->u
.mem
.pstr
1371 : (UChar
* )((void* )mem_end_stk
[i
])) - str
);
1374 region
->beg
[i
] = region
->end
[i
] = ONIG_REGION_NOTPOS
;
1378 #ifdef USE_CAPTURE_HISTORY
1379 if (reg
->capture_history
!= 0) {
1381 OnigCaptureTreeNode
* node
;
1383 if (IS_NULL(region
->history_root
)) {
1384 region
->history_root
= node
= history_node_new();
1385 CHECK_NULL_RETURN_MEMERR(node
);
1388 node
= region
->history_root
;
1389 history_tree_clear(node
);
1393 node
->beg
= (int)(sstart
- str
);
1394 node
->end
= (int)(s
- str
);
1397 r
= make_capture_history_tree(region
->history_root
, &stkp
,
1398 stk
, (UChar
* )str
, reg
);
1400 best_len
= r
; /* error code */
1404 #endif /* USE_CAPTURE_HISTORY */
1405 #ifdef USE_POSIX_API_REGION_OPTION
1406 } /* else IS_POSIX_REGION() */
1409 } /* n > best_len */
1411 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1416 if (IS_FIND_CONDITION(option
)) {
1417 if (IS_FIND_NOT_EMPTY(option
) && s
== sstart
) {
1418 best_len
= ONIG_MISMATCH
;
1419 goto fail
; /* for retry */
1421 if (IS_FIND_LONGEST(option
) && DATA_ENSURE_CHECK1
) {
1422 goto fail
; /* for retry */
1426 /* default behavior: return first-matching result. */
1430 case OP_EXACT1
: MOP_IN(OP_EXACT1
);
1433 if (*p
!= *s
) goto fail
;
1436 if (*p
!= *s
++) goto fail
;
1442 case OP_EXACT1_IC
: MOP_IN(OP_EXACT1_IC
);
1445 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1448 len
= ONIGENC_MBC_CASE_FOLD(encode
,
1449 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1464 case OP_EXACT2
: MOP_IN(OP_EXACT2
);
1466 if (*p
!= *s
) goto fail
;
1468 if (*p
!= *s
) goto fail
;
1475 case OP_EXACT3
: MOP_IN(OP_EXACT3
);
1477 if (*p
!= *s
) goto fail
;
1479 if (*p
!= *s
) goto fail
;
1481 if (*p
!= *s
) goto fail
;
1488 case OP_EXACT4
: MOP_IN(OP_EXACT4
);
1490 if (*p
!= *s
) goto fail
;
1492 if (*p
!= *s
) goto fail
;
1494 if (*p
!= *s
) goto fail
;
1496 if (*p
!= *s
) goto fail
;
1503 case OP_EXACT5
: MOP_IN(OP_EXACT5
);
1505 if (*p
!= *s
) goto fail
;
1507 if (*p
!= *s
) goto fail
;
1509 if (*p
!= *s
) goto fail
;
1511 if (*p
!= *s
) goto fail
;
1513 if (*p
!= *s
) goto fail
;
1520 case OP_EXACTN
: MOP_IN(OP_EXACTN
);
1521 GET_LENGTH_INC(tlen
, p
);
1523 while (tlen
-- > 0) {
1524 if (*p
++ != *s
++) goto fail
;
1531 case OP_EXACTN_IC
: MOP_IN(OP_EXACTN_IC
);
1534 UChar
*q
, *endp
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
1536 GET_LENGTH_INC(tlen
, p
);
1542 len
= ONIGENC_MBC_CASE_FOLD(encode
,
1543 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1549 if (*p
!= *q
) goto fail
;
1559 case OP_EXACTMB2N1
: MOP_IN(OP_EXACTMB2N1
);
1561 if (*p
!= *s
) goto fail
;
1563 if (*p
!= *s
) goto fail
;
1568 case OP_EXACTMB2N2
: MOP_IN(OP_EXACTMB2N2
);
1570 if (*p
!= *s
) goto fail
;
1572 if (*p
!= *s
) goto fail
;
1575 if (*p
!= *s
) goto fail
;
1577 if (*p
!= *s
) goto fail
;
1583 case OP_EXACTMB2N3
: MOP_IN(OP_EXACTMB2N3
);
1585 if (*p
!= *s
) goto fail
;
1587 if (*p
!= *s
) goto fail
;
1589 if (*p
!= *s
) goto fail
;
1591 if (*p
!= *s
) goto fail
;
1594 if (*p
!= *s
) goto fail
;
1596 if (*p
!= *s
) goto fail
;
1602 case OP_EXACTMB2N
: MOP_IN(OP_EXACTMB2N
);
1603 GET_LENGTH_INC(tlen
, p
);
1604 DATA_ENSURE(tlen
* 2);
1605 while (tlen
-- > 0) {
1606 if (*p
!= *s
) goto fail
;
1608 if (*p
!= *s
) goto fail
;
1616 case OP_EXACTMB3N
: MOP_IN(OP_EXACTMB3N
);
1617 GET_LENGTH_INC(tlen
, p
);
1618 DATA_ENSURE(tlen
* 3);
1619 while (tlen
-- > 0) {
1620 if (*p
!= *s
) goto fail
;
1622 if (*p
!= *s
) goto fail
;
1624 if (*p
!= *s
) goto fail
;
1632 case OP_EXACTMBN
: MOP_IN(OP_EXACTMBN
);
1633 GET_LENGTH_INC(tlen
, p
); /* mb-len */
1634 GET_LENGTH_INC(tlen2
, p
); /* string len */
1637 while (tlen2
-- > 0) {
1638 if (*p
!= *s
) goto fail
;
1646 case OP_CCLASS
: MOP_IN(OP_CCLASS
);
1648 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0) goto fail
;
1650 s
+= enclen(encode
, s
); /* OP_CCLASS can match mb-code. \D, \S */
1654 case OP_CCLASS_MB
: MOP_IN(OP_CCLASS_MB
);
1655 if (! ONIGENC_IS_MBC_HEAD(encode
, s
)) goto fail
;
1658 GET_LENGTH_INC(tlen
, p
);
1665 mb_len
= enclen(encode
, s
);
1666 DATA_ENSURE(mb_len
);
1669 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1671 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1672 if (! onig_is_in_code_range(p
, code
)) goto fail
;
1676 if (! onig_is_in_code_range(q
, code
)) goto fail
;
1683 case OP_CCLASS_MIX
: MOP_IN(OP_CCLASS_MIX
);
1685 if (ONIGENC_IS_MBC_HEAD(encode
, s
)) {
1690 if (BITSET_AT(((BitSetRef
)p
), *s
) == 0)
1694 GET_LENGTH_INC(tlen
, p
);
1701 case OP_CCLASS_NOT
: MOP_IN(OP_CCLASS_NOT
);
1703 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0) goto fail
;
1705 s
+= enclen(encode
, s
);
1709 case OP_CCLASS_MB_NOT
: MOP_IN(OP_CCLASS_MB_NOT
);
1711 if (! ONIGENC_IS_MBC_HEAD(encode
, s
)) {
1713 GET_LENGTH_INC(tlen
, p
);
1715 goto cc_mb_not_success
;
1719 GET_LENGTH_INC(tlen
, p
);
1723 int mb_len
= enclen(encode
, s
);
1725 if (! DATA_ENSURE_CHECK(mb_len
)) {
1729 goto cc_mb_not_success
;
1734 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1736 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1737 if (onig_is_in_code_range(p
, code
)) goto fail
;
1741 if (onig_is_in_code_range(q
, code
)) goto fail
;
1750 case OP_CCLASS_MIX_NOT
: MOP_IN(OP_CCLASS_MIX_NOT
);
1752 if (ONIGENC_IS_MBC_HEAD(encode
, s
)) {
1757 if (BITSET_AT(((BitSetRef
)p
), *s
) != 0)
1761 GET_LENGTH_INC(tlen
, p
);
1768 case OP_CCLASS_NODE
: MOP_IN(OP_CCLASS_NODE
);
1776 GET_POINTER_INC(node
, p
);
1777 mb_len
= enclen(encode
, s
);
1781 code
= ONIGENC_MBC_TO_CODE(encode
, ss
, s
);
1782 if (onig_is_code_in_cc_len(mb_len
, code
, node
) == 0) goto fail
;
1787 case OP_ANYCHAR
: MOP_IN(OP_ANYCHAR
);
1789 n
= enclen(encode
, s
);
1791 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1796 case OP_ANYCHAR_ML
: MOP_IN(OP_ANYCHAR_ML
);
1798 n
= enclen(encode
, s
);
1804 case OP_ANYCHAR_STAR
: MOP_IN(OP_ANYCHAR_STAR
);
1805 while (DATA_ENSURE_CHECK1
) {
1806 STACK_PUSH_ALT(p
, s
, sprev
);
1807 n
= enclen(encode
, s
);
1809 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1816 case OP_ANYCHAR_ML_STAR
: MOP_IN(OP_ANYCHAR_ML_STAR
);
1817 while (DATA_ENSURE_CHECK1
) {
1818 STACK_PUSH_ALT(p
, s
, sprev
);
1819 n
= enclen(encode
, s
);
1833 case OP_ANYCHAR_STAR_PEEK_NEXT
: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT
);
1834 while (DATA_ENSURE_CHECK1
) {
1836 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
1838 n
= enclen(encode
, s
);
1840 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1848 case OP_ANYCHAR_ML_STAR_PEEK_NEXT
:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT
);
1849 while (DATA_ENSURE_CHECK1
) {
1851 STACK_PUSH_ALT(p
+ 1, s
, sprev
);
1853 n
= enclen(encode
, s
);
1868 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1869 case OP_STATE_CHECK_ANYCHAR_STAR
: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR
);
1870 GET_STATE_CHECK_NUM_INC(mem
, p
);
1871 while (DATA_ENSURE_CHECK1
) {
1872 STATE_CHECK_VAL(scv
, mem
);
1875 STACK_PUSH_ALT_WITH_STATE_CHECK(p
, s
, sprev
, mem
);
1876 n
= enclen(encode
, s
);
1878 if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) goto fail
;
1885 case OP_STATE_CHECK_ANYCHAR_ML_STAR
:
1886 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR
);
1888 GET_STATE_CHECK_NUM_INC(mem
, p
);
1889 while (DATA_ENSURE_CHECK1
) {
1890 STATE_CHECK_VAL(scv
, mem
);
1893 STACK_PUSH_ALT_WITH_STATE_CHECK(p
, s
, sprev
, mem
);
1894 n
= enclen(encode
, s
);
1907 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1909 case OP_WORD
: MOP_IN(OP_WORD
);
1911 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1914 s
+= enclen(encode
, s
);
1918 case OP_NOT_WORD
: MOP_IN(OP_NOT_WORD
);
1920 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1923 s
+= enclen(encode
, s
);
1927 case OP_WORD_BOUND
: MOP_IN(OP_WORD_BOUND
);
1928 if (ON_STR_BEGIN(s
)) {
1930 if (! ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1933 else if (ON_STR_END(s
)) {
1934 if (! ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1938 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
)
1939 == ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1946 case OP_NOT_WORD_BOUND
: MOP_IN(OP_NOT_WORD_BOUND
);
1947 if (ON_STR_BEGIN(s
)) {
1948 if (DATA_ENSURE_CHECK1
&& ONIGENC_IS_MBC_WORD(encode
, s
, end
))
1951 else if (ON_STR_END(s
)) {
1952 if (ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1956 if (ONIGENC_IS_MBC_WORD(encode
, s
, end
)
1957 != ONIGENC_IS_MBC_WORD(encode
, sprev
, end
))
1964 #ifdef USE_WORD_BEGIN_END
1965 case OP_WORD_BEGIN
: MOP_IN(OP_WORD_BEGIN
);
1966 if (DATA_ENSURE_CHECK1
&& ONIGENC_IS_MBC_WORD(encode
, s
, end
)) {
1967 if (ON_STR_BEGIN(s
) || !ONIGENC_IS_MBC_WORD(encode
, sprev
, end
)) {
1975 case OP_WORD_END
: MOP_IN(OP_WORD_END
);
1976 if (!ON_STR_BEGIN(s
) && ONIGENC_IS_MBC_WORD(encode
, sprev
, end
)) {
1977 if (ON_STR_END(s
) || !ONIGENC_IS_MBC_WORD(encode
, s
, end
)) {
1986 case OP_BEGIN_BUF
: MOP_IN(OP_BEGIN_BUF
);
1987 if (! ON_STR_BEGIN(s
)) goto fail
;
1993 case OP_END_BUF
: MOP_IN(OP_END_BUF
);
1994 if (! ON_STR_END(s
)) goto fail
;
2000 case OP_BEGIN_LINE
: MOP_IN(OP_BEGIN_LINE
);
2001 if (ON_STR_BEGIN(s
)) {
2002 if (IS_NOTBOL(msa
->options
)) goto fail
;
2006 else if (ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
) && !ON_STR_END(s
)) {
2013 case OP_END_LINE
: MOP_IN(OP_END_LINE
);
2014 if (ON_STR_END(s
)) {
2015 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2016 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
2018 if (IS_NOTEOL(msa
->options
)) goto fail
;
2021 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2025 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
)) {
2029 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2030 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
2038 case OP_SEMI_END_BUF
: MOP_IN(OP_SEMI_END_BUF
);
2039 if (ON_STR_END(s
)) {
2040 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2041 if (IS_EMPTY_STR
|| !ONIGENC_IS_MBC_NEWLINE(encode
, sprev
, end
)) {
2043 if (IS_NOTEOL(msa
->options
)) goto fail
;
2046 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2050 else if (ONIGENC_IS_MBC_NEWLINE(encode
, s
, end
) &&
2051 ON_STR_END(s
+ enclen(encode
, s
))) {
2055 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2056 else if (ONIGENC_IS_MBC_CRNL(encode
, s
, end
)) {
2057 UChar
* ss
= s
+ enclen(encode
, s
);
2058 ss
+= enclen(encode
, ss
);
2059 if (ON_STR_END(ss
)) {
2068 case OP_BEGIN_POSITION
: MOP_IN(OP_BEGIN_POSITION
);
2069 if (s
!= msa
->start
)
2076 case OP_MEMORY_START_PUSH
: MOP_IN(OP_MEMORY_START_PUSH
);
2077 GET_MEMNUM_INC(mem
, p
);
2078 STACK_PUSH_MEM_START(mem
, s
);
2083 case OP_MEMORY_START
: MOP_IN(OP_MEMORY_START
);
2084 GET_MEMNUM_INC(mem
, p
);
2085 mem_start_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2090 case OP_MEMORY_END_PUSH
: MOP_IN(OP_MEMORY_END_PUSH
);
2091 GET_MEMNUM_INC(mem
, p
);
2092 STACK_PUSH_MEM_END(mem
, s
);
2097 case OP_MEMORY_END
: MOP_IN(OP_MEMORY_END
);
2098 GET_MEMNUM_INC(mem
, p
);
2099 mem_end_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2104 #ifdef USE_SUBEXP_CALL
2105 case OP_MEMORY_END_PUSH_REC
: MOP_IN(OP_MEMORY_END_PUSH_REC
);
2106 GET_MEMNUM_INC(mem
, p
);
2107 STACK_GET_MEM_START(mem
, stkp
); /* should be before push mem-end. */
2108 STACK_PUSH_MEM_END(mem
, s
);
2109 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
2114 case OP_MEMORY_END_REC
: MOP_IN(OP_MEMORY_END_REC
);
2115 GET_MEMNUM_INC(mem
, p
);
2116 mem_end_stk
[mem
] = (OnigStackIndex
)((void* )s
);
2117 STACK_GET_MEM_START(mem
, stkp
);
2119 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2120 mem_start_stk
[mem
] = GET_STACK_INDEX(stkp
);
2122 mem_start_stk
[mem
] = (OnigStackIndex
)((void* )stkp
->u
.mem
.pstr
);
2124 STACK_PUSH_MEM_END_MARK(mem
);
2130 case OP_BACKREF1
: MOP_IN(OP_BACKREF1
);
2135 case OP_BACKREF2
: MOP_IN(OP_BACKREF2
);
2140 case OP_BACKREFN
: MOP_IN(OP_BACKREFN
);
2141 GET_MEMNUM_INC(mem
, p
);
2145 UChar
*pstart
, *pend
;
2147 /* if you want to remove following line,
2148 you should check in parse and compile time. */
2149 if (mem
> num_mem
) goto fail
;
2150 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2151 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2153 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2154 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2156 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2158 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2159 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2160 : (UChar
* )((void* )mem_end_stk
[mem
]));
2161 n
= (int)(pend
- pstart
);
2164 STRING_CMP(pstart
, s
, n
);
2165 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
2173 case OP_BACKREFN_IC
: MOP_IN(OP_BACKREFN_IC
);
2174 GET_MEMNUM_INC(mem
, p
);
2177 UChar
*pstart
, *pend
;
2179 /* if you want to remove following line,
2180 you should check in parse and compile time. */
2181 if (mem
> num_mem
) goto fail
;
2182 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2183 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) goto fail
;
2185 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2186 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2188 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2190 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2191 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2192 : (UChar
* )((void* )mem_end_stk
[mem
]));
2193 n
= (int)(pend
- pstart
);
2196 STRING_CMP_IC(case_fold_flag
, pstart
, &s
, n
);
2197 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
2205 case OP_BACKREF_MULTI
: MOP_IN(OP_BACKREF_MULTI
);
2208 UChar
*pstart
, *pend
, *swork
;
2210 GET_LENGTH_INC(tlen
, p
);
2211 for (i
= 0; i
< tlen
; i
++) {
2212 GET_MEMNUM_INC(mem
, p
);
2214 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2215 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2217 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2218 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2220 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2222 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2223 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2224 : (UChar
* )((void* )mem_end_stk
[mem
]));
2225 n
= (int)(pend
- pstart
);
2229 STRING_CMP_VALUE(pstart
, swork
, n
, is_fail
);
2230 if (is_fail
) continue;
2232 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
2235 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
2236 break; /* success */
2238 if (i
== tlen
) goto fail
;
2244 case OP_BACKREF_MULTI_IC
: MOP_IN(OP_BACKREF_MULTI_IC
);
2247 UChar
*pstart
, *pend
, *swork
;
2249 GET_LENGTH_INC(tlen
, p
);
2250 for (i
= 0; i
< tlen
; i
++) {
2251 GET_MEMNUM_INC(mem
, p
);
2253 if (mem_end_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2254 if (mem_start_stk
[mem
] == INVALID_STACK_INDEX
) continue;
2256 if (BIT_STATUS_AT(reg
->bt_mem_start
, mem
))
2257 pstart
= STACK_AT(mem_start_stk
[mem
])->u
.mem
.pstr
;
2259 pstart
= (UChar
* )((void* )mem_start_stk
[mem
]);
2261 pend
= (BIT_STATUS_AT(reg
->bt_mem_end
, mem
)
2262 ? STACK_AT(mem_end_stk
[mem
])->u
.mem
.pstr
2263 : (UChar
* )((void* )mem_end_stk
[mem
]));
2264 n
= (int)(pend
- pstart
);
2268 STRING_CMP_VALUE_IC(case_fold_flag
, pstart
, &swork
, n
, is_fail
);
2269 if (is_fail
) continue;
2271 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
2274 p
+= (SIZE_MEMNUM
* (tlen
- i
- 1));
2275 break; /* success */
2277 if (i
== tlen
) goto fail
;
2283 #ifdef USE_BACKREF_WITH_LEVEL
2284 case OP_BACKREF_WITH_LEVEL
:
2290 GET_OPTION_INC(ic
, p
);
2291 GET_LENGTH_INC(level
, p
);
2292 GET_LENGTH_INC(tlen
, p
);
2295 if (backref_match_at_nested_level(reg
, stk
, stk_base
, ic
2296 , case_fold_flag
, (int )level
, (int )tlen
, p
, &s
, end
)) {
2297 while (sprev
+ (len
= enclen(encode
, sprev
)) < s
)
2300 p
+= (SIZE_MEMNUM
* tlen
);
2312 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2313 case OP_SET_OPTION_PUSH
: MOP_IN(OP_SET_OPTION_PUSH
);
2314 GET_OPTION_INC(option
, p
);
2315 STACK_PUSH_ALT(p
, s
, sprev
);
2316 p
+= SIZE_OP_SET_OPTION
+ SIZE_OP_FAIL
;
2321 case OP_SET_OPTION
: MOP_IN(OP_SET_OPTION
);
2322 GET_OPTION_INC(option
, p
);
2328 case OP_NULL_CHECK_START
: MOP_IN(OP_NULL_CHECK_START
);
2329 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2330 STACK_PUSH_NULL_CHECK_START(mem
, s
);
2335 case OP_NULL_CHECK_END
: MOP_IN(OP_NULL_CHECK_END
);
2339 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2340 STACK_NULL_CHECK(isnull
, mem
, s
);
2342 #ifdef ONIG_DEBUG_MATCH
2343 fprintf(stderr
, "NULL_CHECK_END: skip id:%d, s:%d\n",
2344 (int )mem
, (int )s
);
2347 /* empty loop founded, skip next instruction */
2354 case OP_REPEAT_INC_NG
:
2355 case OP_REPEAT_INC_SG
:
2356 case OP_REPEAT_INC_NG_SG
:
2360 goto unexpected_bytecode_error
;
2369 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2370 case OP_NULL_CHECK_END_MEMST
: MOP_IN(OP_NULL_CHECK_END_MEMST
);
2374 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2375 STACK_NULL_CHECK_MEMST(isnull
, mem
, s
, reg
);
2377 #ifdef ONIG_DEBUG_MATCH
2378 fprintf(stderr
, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2379 (int )mem
, (int )s
);
2381 if (isnull
== -1) goto fail
;
2382 goto null_check_found
;
2390 #ifdef USE_SUBEXP_CALL
2391 case OP_NULL_CHECK_END_MEMST_PUSH
:
2392 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH
);
2396 GET_MEMNUM_INC(mem
, p
); /* mem: null check id */
2397 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2398 STACK_NULL_CHECK_MEMST_REC(isnull
, mem
, s
, reg
);
2400 STACK_NULL_CHECK_REC(isnull
, mem
, s
);
2403 #ifdef ONIG_DEBUG_MATCH
2404 fprintf(stderr
, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2405 (int )mem
, (int )s
);
2407 if (isnull
== -1) goto fail
;
2408 goto null_check_found
;
2411 STACK_PUSH_NULL_CHECK_END(mem
);
2419 case OP_JUMP
: MOP_IN(OP_JUMP
);
2420 GET_RELADDR_INC(addr
, p
);
2423 CHECK_INTERRUPT_IN_MATCH_AT
;
2427 case OP_PUSH
: MOP_IN(OP_PUSH
);
2428 GET_RELADDR_INC(addr
, p
);
2429 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2434 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2435 case OP_STATE_CHECK_PUSH
: MOP_IN(OP_STATE_CHECK_PUSH
);
2436 GET_STATE_CHECK_NUM_INC(mem
, p
);
2437 STATE_CHECK_VAL(scv
, mem
);
2440 GET_RELADDR_INC(addr
, p
);
2441 STACK_PUSH_ALT_WITH_STATE_CHECK(p
+ addr
, s
, sprev
, mem
);
2446 case OP_STATE_CHECK_PUSH_OR_JUMP
: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP
);
2447 GET_STATE_CHECK_NUM_INC(mem
, p
);
2448 GET_RELADDR_INC(addr
, p
);
2449 STATE_CHECK_VAL(scv
, mem
);
2454 STACK_PUSH_ALT_WITH_STATE_CHECK(p
+ addr
, s
, sprev
, mem
);
2460 case OP_STATE_CHECK
: MOP_IN(OP_STATE_CHECK
);
2461 GET_STATE_CHECK_NUM_INC(mem
, p
);
2462 STATE_CHECK_VAL(scv
, mem
);
2465 STACK_PUSH_STATE_CHECK(s
, mem
);
2469 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2471 case OP_POP
: MOP_IN(OP_POP
);
2477 case OP_PUSH_OR_JUMP_EXACT1
: MOP_IN(OP_PUSH_OR_JUMP_EXACT1
);
2478 GET_RELADDR_INC(addr
, p
);
2479 if (*p
== *s
&& DATA_ENSURE_CHECK1
) {
2481 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2490 case OP_PUSH_IF_PEEK_NEXT
: MOP_IN(OP_PUSH_IF_PEEK_NEXT
);
2491 GET_RELADDR_INC(addr
, p
);
2494 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2503 case OP_REPEAT
: MOP_IN(OP_REPEAT
);
2505 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2506 GET_RELADDR_INC(addr
, p
);
2509 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
2510 STACK_PUSH_REPEAT(mem
, p
);
2512 if (reg
->repeat_range
[mem
].lower
== 0) {
2513 STACK_PUSH_ALT(p
+ addr
, s
, sprev
);
2520 case OP_REPEAT_NG
: MOP_IN(OP_REPEAT_NG
);
2522 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2523 GET_RELADDR_INC(addr
, p
);
2526 repeat_stk
[mem
] = GET_STACK_INDEX(stk
);
2527 STACK_PUSH_REPEAT(mem
, p
);
2529 if (reg
->repeat_range
[mem
].lower
== 0) {
2530 STACK_PUSH_ALT(p
, s
, sprev
);
2538 case OP_REPEAT_INC
: MOP_IN(OP_REPEAT_INC
);
2539 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2540 si
= repeat_stk
[mem
];
2541 stkp
= STACK_AT(si
);
2544 stkp
->u
.repeat
.count
++;
2545 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].upper
) {
2546 /* end of repeat. Nothing to do. */
2548 else if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
2549 STACK_PUSH_ALT(p
, s
, sprev
);
2550 p
= STACK_AT(si
)->u
.repeat
.pcode
; /* Don't use stkp after PUSH. */
2553 p
= stkp
->u
.repeat
.pcode
;
2555 STACK_PUSH_REPEAT_INC(si
);
2557 CHECK_INTERRUPT_IN_MATCH_AT
;
2561 case OP_REPEAT_INC_SG
: MOP_IN(OP_REPEAT_INC_SG
);
2562 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2563 STACK_GET_REPEAT(mem
, stkp
);
2564 si
= GET_STACK_INDEX(stkp
);
2568 case OP_REPEAT_INC_NG
: MOP_IN(OP_REPEAT_INC_NG
);
2569 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2570 si
= repeat_stk
[mem
];
2571 stkp
= STACK_AT(si
);
2574 stkp
->u
.repeat
.count
++;
2575 if (stkp
->u
.repeat
.count
< reg
->repeat_range
[mem
].upper
) {
2576 if (stkp
->u
.repeat
.count
>= reg
->repeat_range
[mem
].lower
) {
2577 UChar
* pcode
= stkp
->u
.repeat
.pcode
;
2579 STACK_PUSH_REPEAT_INC(si
);
2580 STACK_PUSH_ALT(pcode
, s
, sprev
);
2583 p
= stkp
->u
.repeat
.pcode
;
2584 STACK_PUSH_REPEAT_INC(si
);
2587 else if (stkp
->u
.repeat
.count
== reg
->repeat_range
[mem
].upper
) {
2588 STACK_PUSH_REPEAT_INC(si
);
2591 CHECK_INTERRUPT_IN_MATCH_AT
;
2595 case OP_REPEAT_INC_NG_SG
: MOP_IN(OP_REPEAT_INC_NG_SG
);
2596 GET_MEMNUM_INC(mem
, p
); /* mem: OP_REPEAT ID */
2597 STACK_GET_REPEAT(mem
, stkp
);
2598 si
= GET_STACK_INDEX(stkp
);
2602 case OP_PUSH_POS
: MOP_IN(OP_PUSH_POS
);
2603 STACK_PUSH_POS(s
, sprev
);
2608 case OP_POP_POS
: MOP_IN(OP_POP_POS
);
2610 STACK_POS_END(stkp
);
2611 s
= stkp
->u
.state
.pstr
;
2612 sprev
= stkp
->u
.state
.pstr_prev
;
2618 case OP_PUSH_POS_NOT
: MOP_IN(OP_PUSH_POS_NOT
);
2619 GET_RELADDR_INC(addr
, p
);
2620 STACK_PUSH_POS_NOT(p
+ addr
, s
, sprev
);
2625 case OP_FAIL_POS
: MOP_IN(OP_FAIL_POS
);
2626 STACK_POP_TIL_POS_NOT
;
2630 case OP_PUSH_STOP_BT
: MOP_IN(OP_PUSH_STOP_BT
);
2636 case OP_POP_STOP_BT
: MOP_IN(OP_POP_STOP_BT
);
2642 case OP_LOOK_BEHIND
: MOP_IN(OP_LOOK_BEHIND
);
2643 GET_LENGTH_INC(tlen
, p
);
2644 s
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
2645 if (IS_NULL(s
)) goto fail
;
2646 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
2651 case OP_PUSH_LOOK_BEHIND_NOT
: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT
);
2652 GET_RELADDR_INC(addr
, p
);
2653 GET_LENGTH_INC(tlen
, p
);
2654 q
= (UChar
* )ONIGENC_STEP_BACK(encode
, str
, s
, (int )tlen
);
2656 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2657 If you want to change to fail, replace following line. */
2662 STACK_PUSH_LOOK_BEHIND_NOT(p
+ addr
, s
, sprev
);
2664 sprev
= (UChar
* )onigenc_get_prev_char_head(encode
, str
, s
);
2670 case OP_FAIL_LOOK_BEHIND_NOT
: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT
);
2671 STACK_POP_TIL_LOOK_BEHIND_NOT
;
2675 #ifdef USE_SUBEXP_CALL
2676 case OP_CALL
: MOP_IN(OP_CALL
);
2677 GET_ABSADDR_INC(addr
, p
);
2678 STACK_PUSH_CALL_FRAME(p
);
2684 case OP_RETURN
: MOP_IN(OP_RETURN
);
2699 case OP_FAIL
: MOP_IN(OP_FAIL
);
2701 p
= stk
->u
.state
.pcode
;
2702 s
= stk
->u
.state
.pstr
;
2703 sprev
= stk
->u
.state
.pstr_prev
;
2705 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2706 if (stk
->u
.state
.state_check
!= 0) {
2707 stk
->type
= STK_STATE_CHECK_MARK
;
2717 goto bytecode_error
;
2719 } /* end of switch */
2721 } /* end of while(1) */
2732 return ONIGERR_STACK_BUG
;
2738 return ONIGERR_UNDEFINED_BYTECODE
;
2740 unexpected_bytecode_error
:
2743 return ONIGERR_UNEXPECTED_BYTECODE
;
2748 slow_search(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
2749 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
2751 UChar
*t
, *p
, *s
, *end
;
2753 end
= (UChar
* )text_end
;
2754 end
-= target_end
- target
- 1;
2755 if (end
> text_range
)
2761 if (*s
== *target
) {
2764 while (t
< target_end
) {
2769 if (t
== target_end
)
2772 s
+= enclen(enc
, s
);
2775 return (UChar
* )NULL
;
2779 str_lower_case_match(OnigEncoding enc
, int case_fold_flag
,
2780 const UChar
* t
, const UChar
* tend
,
2781 const UChar
* p
, const UChar
* end
)
2784 UChar
*q
, lowbuf
[ONIGENC_MBC_CASE_FOLD_MAXLEN
];
2787 lowlen
= ONIGENC_MBC_CASE_FOLD(enc
, case_fold_flag
, &p
, end
, lowbuf
);
2789 while (lowlen
> 0) {
2790 if (*t
++ != *q
++) return 0;
2799 slow_search_ic(OnigEncoding enc
, int case_fold_flag
,
2800 UChar
* target
, UChar
* target_end
,
2801 const UChar
* text
, const UChar
* text_end
, UChar
* text_range
)
2805 end
= (UChar
* )text_end
;
2806 end
-= target_end
- target
- 1;
2807 if (end
> text_range
)
2813 if (str_lower_case_match(enc
, case_fold_flag
, target
, target_end
,
2817 s
+= enclen(enc
, s
);
2820 return (UChar
* )NULL
;
2824 slow_search_backward(OnigEncoding enc
, UChar
* target
, UChar
* target_end
,
2825 const UChar
* text
, const UChar
* adjust_text
,
2826 const UChar
* text_end
, const UChar
* text_start
)
2830 s
= (UChar
* )text_end
;
2831 s
-= (target_end
- target
);
2833 s
= (UChar
* )text_start
;
2835 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
2838 if (*s
== *target
) {
2841 while (t
< target_end
) {
2846 if (t
== target_end
)
2849 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
2852 return (UChar
* )NULL
;
2856 slow_search_backward_ic(OnigEncoding enc
, int case_fold_flag
,
2857 UChar
* target
, UChar
* target_end
,
2858 const UChar
* text
, const UChar
* adjust_text
,
2859 const UChar
* text_end
, const UChar
* text_start
)
2863 s
= (UChar
* )text_end
;
2864 s
-= (target_end
- target
);
2866 s
= (UChar
* )text_start
;
2868 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, adjust_text
, s
);
2871 if (str_lower_case_match(enc
, case_fold_flag
,
2872 target
, target_end
, s
, text_end
))
2875 s
= (UChar
* )onigenc_get_prev_char_head(enc
, adjust_text
, s
);
2878 return (UChar
* )NULL
;
2882 bm_search_notrev(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2883 const UChar
* text
, const UChar
* text_end
,
2884 const UChar
* text_range
)
2886 const UChar
*s
, *se
, *t
, *p
, *end
;
2890 #ifdef ONIG_DEBUG_SEARCH
2891 fprintf(stderr
, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2892 (int )text
, (int )text_end
, (int )text_range
);
2895 tail
= target_end
- 1;
2896 tlen1
= (int)(tail
- target
);
2898 if (end
+ tlen1
> text_end
)
2899 end
= text_end
- tlen1
;
2903 if (IS_NULL(reg
->int_map
)) {
2908 if (t
== target
) return (UChar
* )s
;
2911 skip
= reg
->map
[*se
];
2914 s
+= enclen(reg
->enc
, s
);
2915 } while ((s
- t
) < skip
&& s
< end
);
2923 if (t
== target
) return (UChar
* )s
;
2926 skip
= reg
->int_map
[*se
];
2929 s
+= enclen(reg
->enc
, s
);
2930 } while ((s
- t
) < skip
&& s
< end
);
2934 return (UChar
* )NULL
;
2938 bm_search(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2939 const UChar
* text
, const UChar
* text_end
, const UChar
* text_range
)
2941 const UChar
*s
, *t
, *p
, *end
;
2944 end
= text_range
+ (target_end
- target
) - 1;
2948 tail
= target_end
- 1;
2949 s
= text
+ (target_end
- target
) - 1;
2950 if (IS_NULL(reg
->int_map
)) {
2955 if (t
== target
) return (UChar
* )p
;
2961 else { /* see int_map[] */
2966 if (t
== target
) return (UChar
* )p
;
2969 s
+= reg
->int_map
[*s
];
2972 return (UChar
* )NULL
;
2976 set_bm_backward_skip(UChar
* s
, UChar
* end
, OnigEncoding enc ARG_UNUSED
,
2982 if (IS_NULL(*skip
)) {
2983 *skip
= (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE
);
2984 if (IS_NULL(*skip
)) return ONIGERR_MEMORY
;
2987 len
= (int)(end
- s
);
2988 for (i
= 0; i
< ONIG_CHAR_TABLE_SIZE
; i
++)
2991 for (i
= len
- 1; i
> 0; i
--)
2998 bm_search_backward(regex_t
* reg
, const UChar
* target
, const UChar
* target_end
,
2999 const UChar
* text
, const UChar
* adjust_text
,
3000 const UChar
* text_end
, const UChar
* text_start
)
3002 const UChar
*s
, *t
, *p
;
3004 s
= text_end
- (target_end
- target
);
3008 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
3013 while (t
< target_end
&& *p
== *t
) {
3016 if (t
== target_end
)
3019 s
-= reg
->int_map_backward
[*s
];
3020 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, adjust_text
, s
);
3023 return (UChar
* )NULL
;
3027 map_search(OnigEncoding enc
, UChar map
[],
3028 const UChar
* text
, const UChar
* text_range
)
3030 const UChar
*s
= text
;
3032 while (s
< text_range
) {
3033 if (map
[*s
]) return (UChar
* )s
;
3035 s
+= enclen(enc
, s
);
3037 return (UChar
* )NULL
;
3041 map_search_backward(OnigEncoding enc
, UChar map
[],
3042 const UChar
* text
, const UChar
* adjust_text
,
3043 const UChar
* text_start
)
3045 const UChar
*s
= text_start
;
3048 if (map
[*s
]) return (UChar
* )s
;
3050 s
= onigenc_get_prev_char_head(enc
, adjust_text
, s
);
3052 return (UChar
* )NULL
;
3056 onig_match(regex_t
* reg
, const UChar
* str
, const UChar
* end
, const UChar
* at
, OnigRegion
* region
,
3057 OnigOptionType option
)
3063 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3065 THREAD_ATOMIC_START
;
3066 if (ONIG_STATE(reg
) >= ONIG_STATE_NORMAL
) {
3067 ONIG_STATE_INC(reg
);
3068 if (IS_NOT_NULL(reg
->chain
) && ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
3069 onig_chain_reduce(reg
);
3070 ONIG_STATE_INC(reg
);
3078 while (ONIG_STATE(reg
) < ONIG_STATE_NORMAL
) {
3079 if (++n
> THREAD_PASS_LIMIT_COUNT
)
3080 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
;
3086 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3088 MATCH_ARG_INIT(msa
, option
, region
, at
);
3089 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3091 int offset
= at
- str
;
3092 STATE_CHECK_BUFF_INIT(msa
, end
- str
, offset
, reg
->num_comb_exp_check
);
3097 #ifdef USE_POSIX_API_REGION_OPTION
3098 && !IS_POSIX_REGION(option
)
3101 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
3107 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
, str
, at
);
3108 r
= match_at(reg
, str
, end
,
3109 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3115 MATCH_ARG_FREE(msa
);
3116 ONIG_STATE_DEC_THREAD(reg
);
3121 forward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
, UChar
* s
,
3122 UChar
* range
, UChar
** low
, UChar
** high
, UChar
** low_prev
)
3124 UChar
*p
, *pprev
= (UChar
* )NULL
;
3126 #ifdef ONIG_DEBUG_SEARCH
3127 fprintf(stderr
, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3128 (int )str
, (int )end
, (int )s
, (int )range
);
3132 if (reg
->dmin
> 0) {
3133 if (ONIGENC_IS_SINGLEBYTE(reg
->enc
)) {
3137 UChar
*q
= p
+ reg
->dmin
;
3138 while (p
< q
) p
+= enclen(reg
->enc
, p
);
3143 switch (reg
->optimize
) {
3144 case ONIG_OPTIMIZE_EXACT
:
3145 p
= slow_search(reg
->enc
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3147 case ONIG_OPTIMIZE_EXACT_IC
:
3148 p
= slow_search_ic(reg
->enc
, reg
->case_fold_flag
,
3149 reg
->exact
, reg
->exact_end
, p
, end
, range
);
3152 case ONIG_OPTIMIZE_EXACT_BM
:
3153 p
= bm_search(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3156 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV
:
3157 p
= bm_search_notrev(reg
, reg
->exact
, reg
->exact_end
, p
, end
, range
);
3160 case ONIG_OPTIMIZE_MAP
:
3161 p
= map_search(reg
->enc
, reg
->map
, p
, range
);
3165 if (p
&& p
< range
) {
3166 if (p
- reg
->dmin
< s
) {
3169 p
+= enclen(reg
->enc
, p
);
3173 if (reg
->sub_anchor
) {
3176 switch (reg
->sub_anchor
) {
3177 case ANCHOR_BEGIN_LINE
:
3178 if (!ON_STR_BEGIN(p
)) {
3179 prev
= onigenc_get_prev_char_head(reg
->enc
,
3180 (pprev
? pprev
: str
), p
);
3181 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
3186 case ANCHOR_END_LINE
:
3187 if (ON_STR_END(p
)) {
3188 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3189 prev
= (UChar
* )onigenc_get_prev_char_head(reg
->enc
,
3190 (pprev
? pprev
: str
), p
);
3191 if (prev
&& ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
))
3195 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
3196 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3197 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
3205 if (reg
->dmax
== 0) {
3209 *low_prev
= onigenc_get_prev_char_head(reg
->enc
, s
, p
);
3211 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3212 (pprev
? pprev
: str
), p
);
3216 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3217 *low
= p
- reg
->dmax
;
3219 *low
= onigenc_get_right_adjust_char_head_with_prev(reg
->enc
, s
,
3220 *low
, (const UChar
** )low_prev
);
3221 if (low_prev
&& IS_NULL(*low_prev
))
3222 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3223 (pprev
? pprev
: s
), *low
);
3227 *low_prev
= onigenc_get_prev_char_head(reg
->enc
,
3228 (pprev
? pprev
: str
), *low
);
3232 /* no needs to adjust *high, *high is used as range check only */
3233 *high
= p
- reg
->dmin
;
3235 #ifdef ONIG_DEBUG_SEARCH
3237 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3238 (int )(*low
- str
), (int )(*high
- str
), reg
->dmin
, reg
->dmax
);
3240 return 1; /* success */
3243 return 0; /* fail */
3246 static int set_bm_backward_skip
P_((UChar
* s
, UChar
* end
, OnigEncoding enc
,
3249 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3252 backward_search_range(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
3253 UChar
* s
, const UChar
* range
, UChar
* adjrange
,
3254 UChar
** low
, UChar
** high
)
3263 switch (reg
->optimize
) {
3264 case ONIG_OPTIMIZE_EXACT
:
3266 p
= slow_search_backward(reg
->enc
, reg
->exact
, reg
->exact_end
,
3267 range
, adjrange
, end
, p
);
3270 case ONIG_OPTIMIZE_EXACT_IC
:
3271 p
= slow_search_backward_ic(reg
->enc
, reg
->case_fold_flag
,
3272 reg
->exact
, reg
->exact_end
,
3273 range
, adjrange
, end
, p
);
3276 case ONIG_OPTIMIZE_EXACT_BM
:
3277 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV
:
3278 if (IS_NULL(reg
->int_map_backward
)) {
3279 if (s
- range
< BM_BACKWARD_SEARCH_LENGTH_THRESHOLD
)
3282 r
= set_bm_backward_skip(reg
->exact
, reg
->exact_end
, reg
->enc
,
3283 &(reg
->int_map_backward
));
3286 p
= bm_search_backward(reg
, reg
->exact
, reg
->exact_end
, range
, adjrange
,
3290 case ONIG_OPTIMIZE_MAP
:
3291 p
= map_search_backward(reg
->enc
, reg
->map
, range
, adjrange
, p
);
3296 if (reg
->sub_anchor
) {
3299 switch (reg
->sub_anchor
) {
3300 case ANCHOR_BEGIN_LINE
:
3301 if (!ON_STR_BEGIN(p
)) {
3302 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, p
);
3303 if (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
3310 case ANCHOR_END_LINE
:
3311 if (ON_STR_END(p
)) {
3312 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3313 prev
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
3314 if (IS_NULL(prev
)) goto fail
;
3315 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
)) {
3321 else if (! ONIGENC_IS_MBC_NEWLINE(reg
->enc
, p
, end
)
3322 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3323 && ! ONIGENC_IS_MBC_CRNL(reg
->enc
, p
, end
)
3326 p
= onigenc_get_prev_char_head(reg
->enc
, adjrange
, p
);
3327 if (IS_NULL(p
)) goto fail
;
3334 /* no needs to adjust *high, *high is used as range check only */
3335 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3336 *low
= p
- reg
->dmax
;
3337 *high
= p
- reg
->dmin
;
3338 *high
= onigenc_get_right_adjust_char_head(reg
->enc
, adjrange
, *high
);
3341 #ifdef ONIG_DEBUG_SEARCH
3342 fprintf(stderr
, "backward_search_range: low: %d, high: %d\n",
3343 (int )(*low
- str
), (int )(*high
- str
));
3345 return 1; /* success */
3349 #ifdef ONIG_DEBUG_SEARCH
3350 fprintf(stderr
, "backward_search_range: fail.\n");
3352 return 0; /* fail */
3357 onig_search(regex_t
* reg
, const UChar
* str
, const UChar
* end
,
3358 const UChar
* start
, const UChar
* range
, OnigRegion
* region
, OnigOptionType option
)
3363 const UChar
*orig_start
= start
;
3364 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3365 const UChar
*orig_range
= range
;
3368 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3370 THREAD_ATOMIC_START
;
3371 if (ONIG_STATE(reg
) >= ONIG_STATE_NORMAL
) {
3372 ONIG_STATE_INC(reg
);
3373 if (IS_NOT_NULL(reg
->chain
) && ONIG_STATE(reg
) == ONIG_STATE_NORMAL
) {
3374 onig_chain_reduce(reg
);
3375 ONIG_STATE_INC(reg
);
3383 while (ONIG_STATE(reg
) < ONIG_STATE_NORMAL
) {
3384 if (++n
> THREAD_PASS_LIMIT_COUNT
)
3385 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT
;
3391 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3393 #ifdef ONIG_DEBUG_SEARCH
3395 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3396 (int )str
, (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
3400 #ifdef USE_POSIX_API_REGION_OPTION
3401 && !IS_POSIX_REGION(option
)
3404 r
= onig_region_resize_clear(region
, reg
->num_mem
+ 1);
3405 if (r
) goto finish_no_msa
;
3408 if (start
> end
|| start
< str
) goto mismatch_no_msa
;
3411 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3412 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3413 #define MATCH_AND_RETURN_CHECK(upper_range) \
3414 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3415 if (r != ONIG_MISMATCH) {\
3417 if (! IS_FIND_LONGEST(reg->options)) {\
3421 else goto finish; /* error */ \
3424 #define MATCH_AND_RETURN_CHECK(upper_range) \
3425 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3426 if (r != ONIG_MISMATCH) {\
3430 else goto finish; /* error */ \
3432 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3434 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3435 #define MATCH_AND_RETURN_CHECK(none) \
3436 r = match_at(reg, str, end, s, prev, &msa);\
3437 if (r != ONIG_MISMATCH) {\
3439 if (! IS_FIND_LONGEST(reg->options)) {\
3443 else goto finish; /* error */ \
3446 #define MATCH_AND_RETURN_CHECK(none) \
3447 r = match_at(reg, str, end, s, prev, &msa);\
3448 if (r != ONIG_MISMATCH) {\
3452 else goto finish; /* error */ \
3454 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3455 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3458 /* anchor optimize: resume search range */
3459 if (reg
->anchor
!= 0 && str
< end
) {
3460 UChar
*min_semi_end
, *max_semi_end
;
3462 if (reg
->anchor
& ANCHOR_BEGIN_POSITION
) {
3463 /* search start-position only */
3470 else if (reg
->anchor
& ANCHOR_BEGIN_BUF
) {
3471 /* search str-position only */
3472 if (range
> start
) {
3473 if (start
!= str
) goto mismatch_no_msa
;
3482 goto mismatch_no_msa
;
3485 else if (reg
->anchor
& ANCHOR_END_BUF
) {
3486 min_semi_end
= max_semi_end
= (UChar
* )end
;
3489 if ((OnigDistance
)(max_semi_end
- str
) < reg
->anchor_dmin
)
3490 goto mismatch_no_msa
;
3492 if (range
> start
) {
3493 if ((OnigDistance
)(min_semi_end
- start
) > reg
->anchor_dmax
) {
3494 start
= min_semi_end
- reg
->anchor_dmax
;
3496 start
= onigenc_get_right_adjust_char_head(reg
->enc
, str
, start
);
3497 else { /* match with empty at end */
3498 start
= onigenc_get_prev_char_head(reg
->enc
, str
, end
);
3501 if ((OnigDistance
)(max_semi_end
- (range
- 1)) < reg
->anchor_dmin
) {
3502 range
= max_semi_end
- reg
->anchor_dmin
+ 1;
3505 if (start
>= range
) goto mismatch_no_msa
;
3508 if ((OnigDistance
)(min_semi_end
- range
) > reg
->anchor_dmax
) {
3509 range
= min_semi_end
- reg
->anchor_dmax
;
3511 if ((OnigDistance
)(max_semi_end
- start
) < reg
->anchor_dmin
) {
3512 start
= max_semi_end
- reg
->anchor_dmin
;
3513 start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, start
);
3515 if (range
> start
) goto mismatch_no_msa
;
3518 else if (reg
->anchor
& ANCHOR_SEMI_END_BUF
) {
3519 UChar
* pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, end
, 1);
3521 max_semi_end
= (UChar
* )end
;
3522 if (ONIGENC_IS_MBC_NEWLINE(reg
->enc
, pre_end
, end
)) {
3523 min_semi_end
= pre_end
;
3525 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3526 pre_end
= ONIGENC_STEP_BACK(reg
->enc
, str
, pre_end
, 1);
3527 if (IS_NOT_NULL(pre_end
) &&
3528 ONIGENC_IS_MBC_CRNL(reg
->enc
, pre_end
, end
)) {
3529 min_semi_end
= pre_end
;
3532 if (min_semi_end
> str
&& start
<= min_semi_end
) {
3537 min_semi_end
= (UChar
* )end
;
3541 else if ((reg
->anchor
& ANCHOR_ANYCHAR_STAR_ML
)) {
3542 goto begin_position
;
3545 else if (str
== end
) { /* empty string */
3546 static const UChar
* address_for_empty_string
= (UChar
* )"";
3548 #ifdef ONIG_DEBUG_SEARCH
3549 fprintf(stderr
, "onig_search: empty string.\n");
3552 if (reg
->threshold_len
== 0) {
3553 start
= end
= str
= address_for_empty_string
;
3555 prev
= (UChar
* )NULL
;
3557 MATCH_ARG_INIT(msa
, option
, region
, start
);
3558 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3559 msa
.state_check_buff
= (void* )0;
3560 msa
.state_check_buff_size
= 0; /* NO NEED, for valgrind */
3562 MATCH_AND_RETURN_CHECK(end
);
3565 goto mismatch_no_msa
;
3568 #ifdef ONIG_DEBUG_SEARCH
3569 fprintf(stderr
, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3570 (int )(end
- str
), (int )(start
- str
), (int )(range
- str
));
3573 MATCH_ARG_INIT(msa
, option
, region
, orig_start
);
3574 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3576 int offset
= (MIN(start
, range
) - str
);
3577 STATE_CHECK_BUFF_INIT(msa
, end
- str
, offset
, reg
->num_comb_exp_check
);
3582 if (range
> start
) { /* forward search */
3584 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3586 prev
= (UChar
* )NULL
;
3588 if (reg
->optimize
!= ONIG_OPTIMIZE_NONE
) {
3589 UChar
*sch_range
, *low
, *high
, *low_prev
;
3591 sch_range
= (UChar
* )range
;
3592 if (reg
->dmax
!= 0) {
3593 if (reg
->dmax
== ONIG_INFINITE_DISTANCE
)
3594 sch_range
= (UChar
* )end
;
3596 sch_range
+= reg
->dmax
;
3597 if (sch_range
> end
) sch_range
= (UChar
* )end
;
3601 if ((end
- start
) < reg
->threshold_len
)
3604 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
) {
3606 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
3607 &low
, &high
, &low_prev
)) goto mismatch
;
3613 MATCH_AND_RETURN_CHECK(orig_range
);
3615 s
+= enclen(reg
->enc
, s
);
3617 } while (s
< range
);
3620 else { /* check only. */
3621 if (! forward_search_range(reg
, str
, end
, s
, sch_range
,
3622 &low
, &high
, (UChar
** )NULL
)) goto mismatch
;
3624 if ((reg
->anchor
& ANCHOR_ANYCHAR_STAR
) != 0) {
3626 MATCH_AND_RETURN_CHECK(orig_range
);
3628 s
+= enclen(reg
->enc
, s
);
3630 while (!ONIGENC_IS_MBC_NEWLINE(reg
->enc
, prev
, end
) && s
< range
) {
3632 s
+= enclen(reg
->enc
, s
);
3634 } while (s
< range
);
3641 MATCH_AND_RETURN_CHECK(orig_range
);
3643 s
+= enclen(reg
->enc
, s
);
3644 } while (s
< range
);
3646 if (s
== range
) { /* because empty match with /$/. */
3647 MATCH_AND_RETURN_CHECK(orig_range
);
3650 else { /* backward search */
3651 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3652 if (orig_start
< end
)
3653 orig_start
+= enclen(reg
->enc
, orig_start
); /* is upper range */
3656 if (reg
->optimize
!= ONIG_OPTIMIZE_NONE
) {
3657 UChar
*low
, *high
, *adjrange
, *sch_start
;
3660 adjrange
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
, str
, range
);
3662 adjrange
= (UChar
* )end
;
3664 if (reg
->dmax
!= ONIG_INFINITE_DISTANCE
&&
3665 (end
- range
) >= reg
->threshold_len
) {
3667 sch_start
= s
+ reg
->dmax
;
3668 if (sch_start
> end
) sch_start
= (UChar
* )end
;
3669 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
3677 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3678 MATCH_AND_RETURN_CHECK(orig_start
);
3681 } while (s
>= range
);
3684 else { /* check only. */
3685 if ((end
- range
) < reg
->threshold_len
) goto mismatch
;
3688 if (reg
->dmax
!= 0) {
3689 if (reg
->dmax
== ONIG_INFINITE_DISTANCE
)
3690 sch_start
= (UChar
* )end
;
3692 sch_start
+= reg
->dmax
;
3693 if (sch_start
> end
) sch_start
= (UChar
* )end
;
3695 sch_start
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg
->enc
,
3699 if (backward_search_range(reg
, str
, end
, sch_start
, range
, adjrange
,
3700 &low
, &high
) <= 0) goto mismatch
;
3705 prev
= onigenc_get_prev_char_head(reg
->enc
, str
, s
);
3706 MATCH_AND_RETURN_CHECK(orig_start
);
3708 } while (s
>= range
);
3712 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3713 if (IS_FIND_LONGEST(reg
->options
)) {
3714 if (msa
.best_len
>= 0) {
3723 MATCH_ARG_FREE(msa
);
3724 ONIG_STATE_DEC_THREAD(reg
);
3726 /* If result is mismatch and no FIND_NOT_EMPTY option,
3727 then the region is not setted in match_at(). */
3728 if (IS_FIND_NOT_EMPTY(reg
->options
) && region
3729 #ifdef USE_POSIX_API_REGION_OPTION
3730 && !IS_POSIX_REGION(option
)
3733 onig_region_clear(region
);
3737 if (r
!= ONIG_MISMATCH
)
3738 fprintf(stderr
, "onig_search: error %d\n", r
);
3745 ONIG_STATE_DEC_THREAD(reg
);
3747 if (r
!= ONIG_MISMATCH
)
3748 fprintf(stderr
, "onig_search: error %d\n", r
);
3753 ONIG_STATE_DEC_THREAD(reg
);
3754 MATCH_ARG_FREE(msa
);
3755 return (int)(s
- str
);
3759 onig_get_encoding(regex_t
* reg
)
3764 extern OnigOptionType
3765 onig_get_options(regex_t
* reg
)
3767 return reg
->options
;
3770 extern OnigCaseFoldType
3771 onig_get_case_fold_flag(regex_t
* reg
)
3773 return reg
->case_fold_flag
;
3776 extern OnigSyntaxType
*
3777 onig_get_syntax(regex_t
* reg
)
3783 onig_number_of_captures(regex_t
* reg
)
3785 return reg
->num_mem
;
3789 onig_number_of_capture_histories(regex_t
* reg
)
3791 #ifdef USE_CAPTURE_HISTORY
3795 for (i
= 0; i
<= ONIG_MAX_CAPTURE_HISTORY_GROUP
; i
++) {
3796 if (BIT_STATUS_AT(reg
->capture_history
, i
) != 0)
3806 onig_copy_encoding(OnigEncoding to
, OnigEncoding from
)