]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c
5e3ffa1838e1fa942e08322b12bfcb93b47e1baa
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regexec.c
1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "regint.h"
33
34 #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35
36 #ifdef USE_CRNL_AS_LINE_TERMINATOR
37 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
38 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
39 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
40 #endif
41
42 #ifdef USE_CAPTURE_HISTORY
43 static void history_tree_free(OnigCaptureTreeNode* node);
44
45 static void
46 history_tree_clear(OnigCaptureTreeNode* node)
47 {
48 int i;
49
50 if (IS_NOT_NULL(node)) {
51 for (i = 0; i < node->num_childs; i++) {
52 if (IS_NOT_NULL(node->childs[i])) {
53 history_tree_free(node->childs[i]);
54 }
55 }
56 for (i = 0; i < node->allocated; i++) {
57 node->childs[i] = (OnigCaptureTreeNode* )0;
58 }
59 node->num_childs = 0;
60 node->beg = ONIG_REGION_NOTPOS;
61 node->end = ONIG_REGION_NOTPOS;
62 node->group = -1;
63 }
64 }
65
66 static void
67 history_tree_free(OnigCaptureTreeNode* node)
68 {
69 history_tree_clear(node);
70 xfree(node);
71 }
72
73 static void
74 history_root_free(OnigRegion* r)
75 {
76 if (IS_NOT_NULL(r->history_root)) {
77 history_tree_free(r->history_root);
78 r->history_root = (OnigCaptureTreeNode* )0;
79 }
80 }
81
82 static OnigCaptureTreeNode*
83 history_node_new(void)
84 {
85 OnigCaptureTreeNode* node;
86
87 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
88 CHECK_NULL_RETURN(node);
89 node->childs = (OnigCaptureTreeNode** )0;
90 node->allocated = 0;
91 node->num_childs = 0;
92 node->group = -1;
93 node->beg = ONIG_REGION_NOTPOS;
94 node->end = ONIG_REGION_NOTPOS;
95
96 return node;
97 }
98
99 static int
100 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
101 {
102 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
103
104 if (parent->num_childs >= parent->allocated) {
105 int n, i;
106
107 if (IS_NULL(parent->childs)) {
108 n = HISTORY_TREE_INIT_ALLOC_SIZE;
109 parent->childs =
110 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
111 }
112 else {
113 n = parent->allocated * 2;
114 parent->childs =
115 (OnigCaptureTreeNode** )xrealloc(parent->childs,
116 sizeof(OnigCaptureTreeNode*) * n,
117 sizeof(OnigCaptureTreeNode*) * parent->allocated);
118 }
119 CHECK_NULL_RETURN_MEMERR(parent->childs);
120 for (i = parent->allocated; i < n; i++) {
121 parent->childs[i] = (OnigCaptureTreeNode* )0;
122 }
123 parent->allocated = n;
124 }
125
126 parent->childs[parent->num_childs] = child;
127 parent->num_childs++;
128 return 0;
129 }
130
131 static OnigCaptureTreeNode*
132 history_tree_clone(OnigCaptureTreeNode* node)
133 {
134 int i;
135 OnigCaptureTreeNode *clone, *child;
136
137 clone = history_node_new();
138 CHECK_NULL_RETURN(clone);
139
140 clone->beg = node->beg;
141 clone->end = node->end;
142 for (i = 0; i < node->num_childs; i++) {
143 child = history_tree_clone(node->childs[i]);
144 if (IS_NULL(child)) {
145 history_tree_free(clone);
146 return (OnigCaptureTreeNode* )0;
147 }
148 history_tree_add_child(clone, child);
149 }
150
151 return clone;
152 }
153
154 extern OnigCaptureTreeNode*
155 onig_get_capture_tree(OnigRegion* region)
156 {
157 return region->history_root;
158 }
159 #endif /* USE_CAPTURE_HISTORY */
160
161 extern void
162 onig_region_clear(OnigRegion* region)
163 {
164 int i;
165
166 for (i = 0; i < region->num_regs; i++) {
167 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
168 }
169 #ifdef USE_CAPTURE_HISTORY
170 history_root_free(region);
171 #endif
172 }
173
174 extern int
175 onig_region_resize(OnigRegion* region, int n)
176 {
177 region->num_regs = n;
178
179 if (n < ONIG_NREGION)
180 n = ONIG_NREGION;
181
182 if (region->allocated == 0) {
183 region->beg = (int* )xmalloc(n * sizeof(int));
184 region->end = (int* )xmalloc(n * sizeof(int));
185
186 if (region->beg == 0 || region->end == 0)
187 return ONIGERR_MEMORY;
188
189 region->allocated = n;
190 }
191 else if (region->allocated < n) {
192 region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int));
193 region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int));
194
195 if (region->beg == 0 || region->end == 0)
196 return ONIGERR_MEMORY;
197
198 region->allocated = n;
199 }
200
201 return 0;
202 }
203
204 static int
205 onig_region_resize_clear(OnigRegion* region, int n)
206 {
207 int r;
208
209 r = onig_region_resize(region, n);
210 if (r != 0) return r;
211 onig_region_clear(region);
212 return 0;
213 }
214
215 extern int
216 onig_region_set(OnigRegion* region, int at, int beg, int end)
217 {
218 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
219
220 if (at >= region->allocated) {
221 int r = onig_region_resize(region, at + 1);
222 if (r < 0) return r;
223 }
224
225 region->beg[at] = beg;
226 region->end[at] = end;
227 return 0;
228 }
229
230 extern void
231 onig_region_init(OnigRegion* region)
232 {
233 region->num_regs = 0;
234 region->allocated = 0;
235 region->beg = (int* )0;
236 region->end = (int* )0;
237 region->history_root = (OnigCaptureTreeNode* )0;
238 }
239
240 extern OnigRegion*
241 onig_region_new(void)
242 {
243 OnigRegion* r;
244
245 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
246 onig_region_init(r);
247 return r;
248 }
249
250 extern void
251 onig_region_free(OnigRegion* r, int free_self)
252 {
253 if (r) {
254 if (r->allocated > 0) {
255 if (r->beg) xfree(r->beg);
256 if (r->end) xfree(r->end);
257 r->allocated = 0;
258 }
259 #ifdef USE_CAPTURE_HISTORY
260 history_root_free(r);
261 #endif
262 if (free_self) xfree(r);
263 }
264 }
265
266 extern void
267 onig_region_copy(OnigRegion* to, OnigRegion* from)
268 {
269 #define RREGC_SIZE (sizeof(int) * from->num_regs)
270 int i;
271
272 if (to == from) return;
273
274 if (to->allocated == 0) {
275 if (from->num_regs > 0) {
276 to->beg = (int* )xmalloc(RREGC_SIZE);
277 to->end = (int* )xmalloc(RREGC_SIZE);
278 to->allocated = from->num_regs;
279 }
280 }
281 else if (to->allocated < from->num_regs) {
282 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated);
283 to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated);
284 to->allocated = from->num_regs;
285 }
286
287 for (i = 0; i < from->num_regs; i++) {
288 to->beg[i] = from->beg[i];
289 to->end[i] = from->end[i];
290 }
291 to->num_regs = from->num_regs;
292
293 #ifdef USE_CAPTURE_HISTORY
294 history_root_free(to);
295
296 if (IS_NOT_NULL(from->history_root)) {
297 to->history_root = history_tree_clone(from->history_root);
298 }
299 #endif
300 }
301
302
303 /** stack **/
304 #define INVALID_STACK_INDEX -1
305
306 /* stack type */
307 /* used by normal-POP */
308 #define STK_ALT 0x0001
309 #define STK_LOOK_BEHIND_NOT 0x0002
310 #define STK_POS_NOT 0x0003
311 /* handled by normal-POP */
312 #define STK_MEM_START 0x0100
313 #define STK_MEM_END 0x8200
314 #define STK_REPEAT_INC 0x0300
315 #define STK_STATE_CHECK_MARK 0x1000
316 /* avoided by normal-POP */
317 #define STK_NULL_CHECK_START 0x3000
318 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
319 #define STK_MEM_END_MARK 0x8400
320 #define STK_POS 0x0500 /* used when POP-POS */
321 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
322 #define STK_REPEAT 0x0700
323 #define STK_CALL_FRAME 0x0800
324 #define STK_RETURN 0x0900
325 #define STK_VOID 0x0a00 /* for fill a blank */
326
327 /* stack type check mask */
328 #define STK_MASK_POP_USED 0x00ff
329 #define STK_MASK_TO_VOID_TARGET 0x10ff
330 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
331
332 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
333 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
334 (msa).stack_p = (void* )0;\
335 (msa).options = (arg_option);\
336 (msa).region = (arg_region);\
337 (msa).start = (arg_start);\
338 (msa).best_len = ONIG_MISMATCH;\
339 } while(0)
340 #else
341 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
342 (msa).stack_p = (void* )0;\
343 (msa).options = (arg_option);\
344 (msa).region = (arg_region);\
345 (msa).start = (arg_start);\
346 } while(0)
347 #endif
348
349 #ifdef USE_COMBINATION_EXPLOSION_CHECK
350
351 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
352
353 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
354 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
355 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
356 offset = ((offset) * (state_num)) >> 3;\
357 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
358 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
359 (msa).state_check_buff = (void* )xmalloc(size);\
360 else \
361 (msa).state_check_buff = (void* )xalloca(size);\
362 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
363 (size_t )(size - (offset))); \
364 (msa).state_check_buff_size = size;\
365 }\
366 else {\
367 (msa).state_check_buff = (void* )0;\
368 (msa).state_check_buff_size = 0;\
369 }\
370 }\
371 else {\
372 (msa).state_check_buff = (void* )0;\
373 (msa).state_check_buff_size = 0;\
374 }\
375 } while(0)
376
377 #define MATCH_ARG_FREE(msa) do {\
378 if ((msa).stack_p) xfree((msa).stack_p);\
379 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
380 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
381 }\
382 } while(0)
383 #else
384 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
385 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
386 #endif
387
388
389
390 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
391 if (msa->stack_p) {\
392 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num));\
393 stk_alloc = (OnigStackType* )(msa->stack_p);\
394 stk_base = stk_alloc;\
395 stk = stk_base;\
396 stk_end = stk_base + msa->stack_n;\
397 }\
398 else {\
399 alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num)\
400 + sizeof(OnigStackType) * (stack_num));\
401 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
402 stk_base = stk_alloc;\
403 stk = stk_base;\
404 stk_end = stk_base + (stack_num);\
405 }\
406 } while(0)
407
408 #define STACK_SAVE do{\
409 if (stk_base != stk_alloc) {\
410 msa->stack_p = stk_base;\
411 msa->stack_n = (int)(stk_end - stk_base);\
412 };\
413 } while(0)
414
415 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
416
417 extern unsigned int
418 onig_get_match_stack_limit_size(void)
419 {
420 return MatchStackLimitSize;
421 }
422
423 extern int
424 onig_set_match_stack_limit_size(unsigned int size)
425 {
426 MatchStackLimitSize = size;
427 return 0;
428 }
429
430 static int
431 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
432 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
433 {
434 unsigned int n;
435 OnigStackType *x, *stk_base, *stk_end, *stk;
436
437 stk_base = *arg_stk_base;
438 stk_end = *arg_stk_end;
439 stk = *arg_stk;
440
441 n = (unsigned int)(stk_end - stk_base);
442 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
443 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
444 if (IS_NULL(x)) {
445 STACK_SAVE;
446 return ONIGERR_MEMORY;
447 }
448 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
449 n *= 2;
450 }
451 else {
452 n *= 2;
453 if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
454 if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
455 return ONIGERR_MATCH_STACK_LIMIT_OVER;
456 else
457 n = MatchStackLimitSize;
458 }
459 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n, sizeof(OnigStackType) * (stk_end - stk_base));
460 if (IS_NULL(x)) {
461 STACK_SAVE;
462 return ONIGERR_MEMORY;
463 }
464 }
465 *arg_stk = x + (stk - stk_base);
466 *arg_stk_base = x;
467 *arg_stk_end = x + n;
468 return 0;
469 }
470
471 #define STACK_ENSURE(n) do {\
472 if (stk_end - stk < (n)) {\
473 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
474 if (r != 0) { STACK_SAVE; return r; } \
475 }\
476 } while(0)
477
478 #define STACK_AT(index) (stk_base + (index))
479 #define GET_STACK_INDEX(stk) ((OnigStackIndex)((stk) - stk_base))
480
481 #define STACK_PUSH_TYPE(stack_type) do {\
482 STACK_ENSURE(1);\
483 stk->type = (stack_type);\
484 STACK_INC;\
485 } while(0)
486
487 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
488
489 #ifdef USE_COMBINATION_EXPLOSION_CHECK
490 #define STATE_CHECK_POS(s,snum) \
491 (((s) - str) * num_comb_exp_check + ((snum) - 1))
492 #define STATE_CHECK_VAL(v,snum) do {\
493 if (state_check_buff != NULL) {\
494 int x = STATE_CHECK_POS(s,snum);\
495 (v) = state_check_buff[x/8] & (1<<(x%8));\
496 }\
497 else (v) = 0;\
498 } while(0)
499
500
501 #define ELSE_IF_STATE_CHECK_MARK(stk) \
502 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
503 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
504 state_check_buff[x/8] |= (1<<(x%8)); \
505 }
506
507 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
508 STACK_ENSURE(1);\
509 stk->type = (stack_type);\
510 stk->u.state.pcode = (pat);\
511 stk->u.state.pstr = (s);\
512 stk->u.state.pstr_prev = (sprev);\
513 stk->u.state.state_check = 0;\
514 STACK_INC;\
515 } while(0)
516
517 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
518 stk->type = (stack_type);\
519 stk->u.state.pcode = (pat);\
520 stk->u.state.state_check = 0;\
521 STACK_INC;\
522 } while(0)
523
524 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
525 STACK_ENSURE(1);\
526 stk->type = STK_ALT;\
527 stk->u.state.pcode = (pat);\
528 stk->u.state.pstr = (s);\
529 stk->u.state.pstr_prev = (sprev);\
530 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
531 STACK_INC;\
532 } while(0)
533
534 #define STACK_PUSH_STATE_CHECK(s,snum) do {\
535 if (state_check_buff != NULL) {\
536 STACK_ENSURE(1);\
537 stk->type = STK_STATE_CHECK_MARK;\
538 stk->u.state.pstr = (s);\
539 stk->u.state.state_check = (snum);\
540 STACK_INC;\
541 }\
542 } while(0)
543
544 #else /* USE_COMBINATION_EXPLOSION_CHECK */
545
546 #define ELSE_IF_STATE_CHECK_MARK(stk)
547
548 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
549 STACK_ENSURE(1);\
550 stk->type = (stack_type);\
551 stk->u.state.pcode = (pat);\
552 stk->u.state.pstr = (s);\
553 stk->u.state.pstr_prev = (sprev);\
554 STACK_INC;\
555 } while(0)
556
557 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
558 stk->type = (stack_type);\
559 stk->u.state.pcode = (pat);\
560 STACK_INC;\
561 } while(0)
562 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
563
564 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
565 #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
566 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
567 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
568 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
569 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
570
571 #define STACK_PUSH_REPEAT(id, pat) do {\
572 STACK_ENSURE(1);\
573 stk->type = STK_REPEAT;\
574 stk->u.repeat.num = (id);\
575 stk->u.repeat.pcode = (pat);\
576 stk->u.repeat.count = 0;\
577 STACK_INC;\
578 } while(0)
579
580 #define STACK_PUSH_REPEAT_INC(sindex) do {\
581 STACK_ENSURE(1);\
582 stk->type = STK_REPEAT_INC;\
583 stk->u.repeat_inc.si = (sindex);\
584 STACK_INC;\
585 } while(0)
586
587 #define STACK_PUSH_MEM_START(mnum, s) do {\
588 STACK_ENSURE(1);\
589 stk->type = STK_MEM_START;\
590 stk->u.mem.num = (int)(mnum);\
591 stk->u.mem.pstr = (s);\
592 stk->u.mem.start = mem_start_stk[mnum];\
593 stk->u.mem.end = mem_end_stk[mnum];\
594 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
595 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
596 STACK_INC;\
597 } while(0)
598
599 #define STACK_PUSH_MEM_END(mnum, s) do {\
600 STACK_ENSURE(1);\
601 stk->type = STK_MEM_END;\
602 stk->u.mem.num = (mnum);\
603 stk->u.mem.pstr = (s);\
604 stk->u.mem.start = mem_start_stk[mnum];\
605 stk->u.mem.end = mem_end_stk[mnum];\
606 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
607 STACK_INC;\
608 } while(0)
609
610 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
611 STACK_ENSURE(1);\
612 stk->type = STK_MEM_END_MARK;\
613 stk->u.mem.num = (mnum);\
614 STACK_INC;\
615 } while(0)
616
617 #define STACK_GET_MEM_START(mnum, k) do {\
618 int level = 0;\
619 k = stk;\
620 while (k > stk_base) {\
621 k--;\
622 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
623 && k->u.mem.num == (mnum)) {\
624 level++;\
625 }\
626 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
627 if (level == 0) break;\
628 level--;\
629 }\
630 }\
631 } while(0)
632
633 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
634 int level = 0;\
635 while (k < stk) {\
636 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
637 if (level == 0) (start) = k->u.mem.pstr;\
638 level++;\
639 }\
640 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
641 level--;\
642 if (level == 0) {\
643 (end) = k->u.mem.pstr;\
644 break;\
645 }\
646 }\
647 k++;\
648 }\
649 } while(0)
650
651 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
652 STACK_ENSURE(1);\
653 stk->type = STK_NULL_CHECK_START;\
654 stk->u.null_check.num = (cnum);\
655 stk->u.null_check.pstr = (s);\
656 STACK_INC;\
657 } while(0)
658
659 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
660 STACK_ENSURE(1);\
661 stk->type = STK_NULL_CHECK_END;\
662 stk->u.null_check.num = (cnum);\
663 STACK_INC;\
664 } while(0)
665
666 #define STACK_PUSH_CALL_FRAME(pat) do {\
667 STACK_ENSURE(1);\
668 stk->type = STK_CALL_FRAME;\
669 stk->u.call_frame.ret_addr = (pat);\
670 STACK_INC;\
671 } while(0)
672
673 #define STACK_PUSH_RETURN do {\
674 STACK_ENSURE(1);\
675 stk->type = STK_RETURN;\
676 STACK_INC;\
677 } while(0)
678
679
680 #ifdef ONIG_DEBUG
681 #define STACK_BASE_CHECK(p, at) \
682 if ((p) < stk_base) {\
683 fprintf(stderr, "at %s\n", at);\
684 goto stack_error;\
685 }
686 #else
687 #define STACK_BASE_CHECK(p, at)
688 #endif
689
690 #define STACK_POP_ONE do {\
691 stk--;\
692 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
693 } while(0)
694
695 #define STACK_POP do {\
696 switch (pop_level) {\
697 case STACK_POP_LEVEL_FREE:\
698 while (1) {\
699 stk--;\
700 STACK_BASE_CHECK(stk, "STACK_POP"); \
701 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
702 ELSE_IF_STATE_CHECK_MARK(stk);\
703 }\
704 break;\
705 case STACK_POP_LEVEL_MEM_START:\
706 while (1) {\
707 stk--;\
708 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
709 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
710 else if (stk->type == STK_MEM_START) {\
711 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
712 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
713 }\
714 ELSE_IF_STATE_CHECK_MARK(stk);\
715 }\
716 break;\
717 default:\
718 while (1) {\
719 stk--;\
720 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
721 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
722 else if (stk->type == STK_MEM_START) {\
723 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
724 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
725 }\
726 else if (stk->type == STK_REPEAT_INC) {\
727 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
728 }\
729 else if (stk->type == STK_MEM_END) {\
730 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
731 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
732 }\
733 ELSE_IF_STATE_CHECK_MARK(stk);\
734 }\
735 break;\
736 }\
737 } while(0)
738
739 #define STACK_POP_TIL_POS_NOT do {\
740 while (1) {\
741 stk--;\
742 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
743 if (stk->type == STK_POS_NOT) break;\
744 else if (stk->type == STK_MEM_START) {\
745 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
746 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
747 }\
748 else if (stk->type == STK_REPEAT_INC) {\
749 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
750 }\
751 else if (stk->type == STK_MEM_END) {\
752 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
753 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
754 }\
755 ELSE_IF_STATE_CHECK_MARK(stk);\
756 }\
757 } while(0)
758
759 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
760 while (1) {\
761 stk--;\
762 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
763 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
764 else if (stk->type == STK_MEM_START) {\
765 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
766 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
767 }\
768 else if (stk->type == STK_REPEAT_INC) {\
769 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
770 }\
771 else if (stk->type == STK_MEM_END) {\
772 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
773 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
774 }\
775 ELSE_IF_STATE_CHECK_MARK(stk);\
776 }\
777 } while(0)
778
779 #define STACK_POS_END(k) do {\
780 k = stk;\
781 while (1) {\
782 k--;\
783 STACK_BASE_CHECK(k, "STACK_POS_END"); \
784 if (IS_TO_VOID_TARGET(k)) {\
785 k->type = STK_VOID;\
786 }\
787 else if (k->type == STK_POS) {\
788 k->type = STK_VOID;\
789 break;\
790 }\
791 }\
792 } while(0)
793
794 #define STACK_STOP_BT_END do {\
795 OnigStackType *k = stk;\
796 while (1) {\
797 k--;\
798 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
799 if (IS_TO_VOID_TARGET(k)) {\
800 k->type = STK_VOID;\
801 }\
802 else if (k->type == STK_STOP_BT) {\
803 k->type = STK_VOID;\
804 break;\
805 }\
806 }\
807 } while(0)
808
809 #define STACK_NULL_CHECK(isnull,id,s) do {\
810 OnigStackType* k = stk;\
811 while (1) {\
812 k--;\
813 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
814 if (k->type == STK_NULL_CHECK_START) {\
815 if (k->u.null_check.num == (id)) {\
816 (isnull) = (k->u.null_check.pstr == (s));\
817 break;\
818 }\
819 }\
820 }\
821 } while(0)
822
823 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
824 int level = 0;\
825 OnigStackType* k = stk;\
826 while (1) {\
827 k--;\
828 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
829 if (k->type == STK_NULL_CHECK_START) {\
830 if (k->u.null_check.num == (id)) {\
831 if (level == 0) {\
832 (isnull) = (k->u.null_check.pstr == (s));\
833 break;\
834 }\
835 else level--;\
836 }\
837 }\
838 else if (k->type == STK_NULL_CHECK_END) {\
839 level++;\
840 }\
841 }\
842 } while(0)
843
844 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
845 OnigStackType* k = stk;\
846 while (1) {\
847 k--;\
848 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
849 if (k->type == STK_NULL_CHECK_START) {\
850 if (k->u.null_check.num == (id)) {\
851 if (k->u.null_check.pstr != (s)) {\
852 (isnull) = 0;\
853 break;\
854 }\
855 else {\
856 UChar* endp;\
857 (isnull) = 1;\
858 while (k < stk) {\
859 if (k->type == STK_MEM_START) {\
860 if (k->u.mem.end == INVALID_STACK_INDEX) {\
861 (isnull) = 0; break;\
862 }\
863 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
864 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
865 else\
866 endp = (UChar* )k->u.mem.end;\
867 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
868 (isnull) = 0; break;\
869 }\
870 else if (endp != s) {\
871 (isnull) = -1; /* empty, but position changed */ \
872 }\
873 }\
874 k++;\
875 }\
876 break;\
877 }\
878 }\
879 }\
880 }\
881 } while(0)
882
883 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
884 int level = 0;\
885 OnigStackType* k = stk;\
886 while (1) {\
887 k--;\
888 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
889 if (k->type == STK_NULL_CHECK_START) {\
890 if (k->u.null_check.num == (id)) {\
891 if (level == 0) {\
892 if (k->u.null_check.pstr != (s)) {\
893 (isnull) = 0;\
894 break;\
895 }\
896 else {\
897 UChar* endp;\
898 (isnull) = 1;\
899 while (k < stk) {\
900 if (k->type == STK_MEM_START) {\
901 if (k->u.mem.end == INVALID_STACK_INDEX) {\
902 (isnull) = 0; break;\
903 }\
904 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
905 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
906 else\
907 endp = (UChar* )k->u.mem.end;\
908 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
909 (isnull) = 0; break;\
910 }\
911 else if (endp != s) {\
912 (isnull) = -1; /* empty, but position changed */ \
913 }\
914 }\
915 k++;\
916 }\
917 break;\
918 }\
919 }\
920 else {\
921 level--;\
922 }\
923 }\
924 }\
925 else if (k->type == STK_NULL_CHECK_END) {\
926 if (k->u.null_check.num == (id)) level++;\
927 }\
928 }\
929 } while(0)
930
931 #define STACK_GET_REPEAT(id, k) do {\
932 int level = 0;\
933 k = stk;\
934 while (1) {\
935 k--;\
936 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
937 if (k->type == STK_REPEAT) {\
938 if (level == 0) {\
939 if (k->u.repeat.num == (id)) {\
940 break;\
941 }\
942 }\
943 }\
944 else if (k->type == STK_CALL_FRAME) level--;\
945 else if (k->type == STK_RETURN) level++;\
946 }\
947 } while(0)
948
949 #define STACK_RETURN(addr) do {\
950 int level = 0;\
951 OnigStackType* k = stk;\
952 while (1) {\
953 k--;\
954 STACK_BASE_CHECK(k, "STACK_RETURN"); \
955 if (k->type == STK_CALL_FRAME) {\
956 if (level == 0) {\
957 (addr) = k->u.call_frame.ret_addr;\
958 break;\
959 }\
960 else level--;\
961 }\
962 else if (k->type == STK_RETURN)\
963 level++;\
964 }\
965 } while(0)
966
967
968 #define STRING_CMP(s1,s2,len) do {\
969 while (len-- > 0) {\
970 if (*s1++ != *s2++) goto fail;\
971 }\
972 } while(0)
973
974 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
975 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
976 goto fail; \
977 } while(0)
978
979 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
980 UChar* s1, UChar** ps2, int mblen)
981 {
982 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
983 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
984 UChar *p1, *p2, *end1, *s2, *end2;
985 int len1, len2;
986
987 s2 = *ps2;
988 end1 = s1 + mblen;
989 end2 = s2 + mblen;
990 while (s1 < end1) {
991 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
992 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
993 if (len1 != len2) return 0;
994 p1 = buf1;
995 p2 = buf2;
996 while (len1-- > 0) {
997 if (*p1 != *p2) return 0;
998 p1++;
999 p2++;
1000 }
1001 }
1002
1003 *ps2 = s2;
1004 return 1;
1005 }
1006
1007 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1008 is_fail = 0;\
1009 while (len-- > 0) {\
1010 if (*s1++ != *s2++) {\
1011 is_fail = 1; break;\
1012 }\
1013 }\
1014 } while(0)
1015
1016 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
1017 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
1018 is_fail = 1; \
1019 else \
1020 is_fail = 0; \
1021 } while(0)
1022
1023
1024 #define IS_EMPTY_STR (str == end)
1025 #define ON_STR_BEGIN(s) ((s) == str)
1026 #define ON_STR_END(s) ((s) == end)
1027 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1028 #define DATA_ENSURE_CHECK1 (s < right_range)
1029 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1030 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1031 #else
1032 #define DATA_ENSURE_CHECK1 (s < end)
1033 #define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1034 #define DATA_ENSURE(n) if (s + (n) > end) goto fail
1035 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1036
1037
1038 #ifdef USE_CAPTURE_HISTORY
1039 static int
1040 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1041 OnigStackType* stk_top, UChar* str, regex_t* reg)
1042 {
1043 int n, r;
1044 OnigCaptureTreeNode* child;
1045 OnigStackType* k = *kp;
1046
1047 while (k < stk_top) {
1048 if (k->type == STK_MEM_START) {
1049 n = k->u.mem.num;
1050 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1051 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1052 child = history_node_new();
1053 CHECK_NULL_RETURN_MEMERR(child);
1054 child->group = n;
1055 child->beg = (int )(k->u.mem.pstr - str);
1056 r = history_tree_add_child(node, child);
1057 if (r != 0) return r;
1058 *kp = (k + 1);
1059 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1060 if (r != 0) return r;
1061
1062 k = *kp;
1063 child->end = (int )(k->u.mem.pstr - str);
1064 }
1065 }
1066 else if (k->type == STK_MEM_END) {
1067 if (k->u.mem.num == node->group) {
1068 node->end = (int )(k->u.mem.pstr - str);
1069 *kp = k;
1070 return 0;
1071 }
1072 }
1073 k++;
1074 }
1075
1076 return 1; /* 1: root node ending. */
1077 }
1078 #endif
1079
1080 #ifdef USE_BACKREF_WITH_LEVEL
1081 static int mem_is_in_memp(int mem, int num, UChar* memp)
1082 {
1083 int i;
1084 MemNumType m;
1085
1086 for (i = 0; i < num; i++) {
1087 GET_MEMNUM_INC(m, memp);
1088 if (mem == (int )m) return 1;
1089 }
1090 return 0;
1091 }
1092
1093 static int backref_match_at_nested_level(regex_t* reg
1094 , OnigStackType* top, OnigStackType* stk_base
1095 , int ignore_case, int case_fold_flag
1096 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1097 {
1098 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1099 int level;
1100 OnigStackType* k;
1101
1102 level = 0;
1103 k = top;
1104 k--;
1105 while (k >= stk_base) {
1106 if (k->type == STK_CALL_FRAME) {
1107 level--;
1108 }
1109 else if (k->type == STK_RETURN) {
1110 level++;
1111 }
1112 else if (level == nest) {
1113 if (k->type == STK_MEM_START) {
1114 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1115 pstart = k->u.mem.pstr;
1116 if (pend != NULL_UCHARP) {
1117 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1118 p = pstart;
1119 ss = *s;
1120
1121 if (ignore_case != 0) {
1122 if (string_cmp_ic(reg->enc, case_fold_flag,
1123 pstart, &ss, (int )(pend - pstart)) == 0)
1124 return 0; /* or goto next_mem; */
1125 }
1126 else {
1127 while (p < pend) {
1128 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1129 }
1130 }
1131
1132 *s = ss;
1133 return 1;
1134 }
1135 }
1136 }
1137 else if (k->type == STK_MEM_END) {
1138 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1139 pend = k->u.mem.pstr;
1140 }
1141 }
1142 }
1143 k--;
1144 }
1145
1146 return 0;
1147 }
1148 #endif /* USE_BACKREF_WITH_LEVEL */
1149
1150
1151 #ifdef ONIG_DEBUG_STATISTICS
1152
1153 #define USE_TIMEOFDAY
1154
1155 #ifdef USE_TIMEOFDAY
1156 #ifdef HAVE_SYS_TIME_H
1157 #include <sys/time.h>
1158 #endif
1159 #ifdef HAVE_UNISTD_H
1160 #include <unistd.h>
1161 #endif
1162 static struct timeval ts, te;
1163 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1164 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1165 (((te).tv_sec - (ts).tv_sec)*1000000))
1166 #else
1167 #ifdef HAVE_SYS_TIMES_H
1168 #include <sys/times.h>
1169 #endif
1170 static struct tms ts, te;
1171 #define GETTIME(t) times(&(t))
1172 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1173 #endif
1174
1175 static int OpCounter[256];
1176 static int OpPrevCounter[256];
1177 static unsigned long OpTime[256];
1178 static int OpCurr = OP_FINISH;
1179 static int OpPrevTarget = OP_FAIL;
1180 static int MaxStackDepth = 0;
1181
1182 #define MOP_IN(opcode) do {\
1183 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1184 OpCurr = opcode;\
1185 OpCounter[opcode]++;\
1186 GETTIME(ts);\
1187 } while(0)
1188
1189 #define MOP_OUT do {\
1190 GETTIME(te);\
1191 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1192 } while(0)
1193
1194 extern void
1195 onig_statistics_init(void)
1196 {
1197 int i;
1198 for (i = 0; i < 256; i++) {
1199 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1200 }
1201 MaxStackDepth = 0;
1202 }
1203
1204 extern void
1205 onig_print_statistics(FILE* f)
1206 {
1207 int i;
1208 fprintf(f, " count prev time\n");
1209 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1210 fprintf(f, "%8d: %8d: %10ld: %s\n",
1211 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1212 }
1213 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1214 }
1215
1216 #define STACK_INC do {\
1217 stk++;\
1218 if (stk - stk_base > MaxStackDepth) \
1219 MaxStackDepth = stk - stk_base;\
1220 } while(0)
1221
1222 #else
1223 #define STACK_INC stk++
1224
1225 #define MOP_IN(opcode)
1226 #define MOP_OUT
1227 #endif
1228
1229
1230 /* matching region of POSIX API */
1231 typedef int regoff_t;
1232
1233 typedef struct {
1234 regoff_t rm_so;
1235 regoff_t rm_eo;
1236 } posix_regmatch_t;
1237
1238 /* match data(str - end) from position (sstart). */
1239 /* if sstart == str then set sprev to NULL. */
1240 static int
1241 match_at(regex_t* reg, const UChar* str, const UChar* end,
1242 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1243 const UChar* right_range,
1244 #endif
1245 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1246 {
1247 static UChar FinishCode[] = { OP_FINISH };
1248
1249 int i, n, num_mem, best_len, pop_level;
1250 LengthType tlen, tlen2;
1251 MemNumType mem;
1252 RelAddrType addr;
1253 OnigOptionType option = reg->options;
1254 OnigEncoding encode = reg->enc;
1255 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1256 UChar *s, *q, *sbegin;
1257 UChar *p = reg->p;
1258 char *alloca_base;
1259 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1260 OnigStackType *stkp; /* used as any purpose. */
1261 OnigStackIndex si;
1262 OnigStackIndex *repeat_stk;
1263 OnigStackIndex *mem_start_stk, *mem_end_stk;
1264 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1265 int scv;
1266 unsigned char* state_check_buff = msa->state_check_buff;
1267 int num_comb_exp_check = reg->num_comb_exp_check;
1268 #endif
1269 n = reg->num_repeat + reg->num_mem * 2;
1270
1271 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
1272 pop_level = reg->stack_pop_level;
1273 num_mem = reg->num_mem;
1274 repeat_stk = (OnigStackIndex* )alloca_base;
1275
1276 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1277 mem_end_stk = mem_start_stk + num_mem;
1278 mem_start_stk--; /* for index start from 1,
1279 mem_start_stk[1]..mem_start_stk[num_mem] */
1280 mem_end_stk--; /* for index start from 1,
1281 mem_end_stk[1]..mem_end_stk[num_mem] */
1282 for (i = 1; i <= num_mem; i++) {
1283 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
1284 }
1285
1286 #ifdef ONIG_DEBUG_MATCH
1287 fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
1288 (int )str, (int )end, (int )sstart, (int )sprev);
1289 fprintf(stderr, "size: %d, start offset: %d\n",
1290 (int )(end - str), (int )(sstart - str));
1291 #endif
1292
1293 STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
1294 best_len = ONIG_MISMATCH;
1295 s = (UChar* )sstart;
1296 while (1) {
1297 #ifdef ONIG_DEBUG_MATCH
1298 {
1299 UChar *q, *bp, buf[50];
1300 int len;
1301 fprintf(stderr, "%4d> \"", (int )(s - str));
1302 bp = buf;
1303 for (i = 0, q = s; i < 7 && q < end; i++) {
1304 len = enclen(encode, q);
1305 while (len-- > 0) *bp++ = *q++;
1306 }
1307 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
1308 else { xmemcpy(bp, "\"", 1); bp += 1; }
1309 *bp = 0;
1310 fputs((char* )buf, stderr);
1311 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
1312 onig_print_compiled_byte_code(stderr, p, NULL, encode);
1313 fprintf(stderr, "\n");
1314 }
1315 #endif
1316
1317 sbegin = s;
1318 switch (*p++) {
1319 case OP_END: MOP_IN(OP_END);
1320 n = (int)(s - sstart);
1321 if (n > best_len) {
1322 OnigRegion* region;
1323 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1324 if (IS_FIND_LONGEST(option)) {
1325 if (n > msa->best_len) {
1326 msa->best_len = n;
1327 msa->best_s = (UChar* )sstart;
1328 }
1329 else
1330 goto end_best_len;
1331 }
1332 #endif
1333 best_len = n;
1334 region = msa->region;
1335 if (region) {
1336 #ifdef USE_POSIX_API_REGION_OPTION
1337 if (IS_POSIX_REGION(msa->options)) {
1338 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
1339
1340 rmt[0].rm_so = (regoff_t)(sstart - str);
1341 rmt[0].rm_eo = (regoff_t)(s - str);
1342 for (i = 1; i <= num_mem; i++) {
1343 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1344 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1345 rmt[i].rm_so = (regoff_t)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
1346 else
1347 rmt[i].rm_so = (regoff_t)((UChar* )((void* )(mem_start_stk[i])) - str);
1348
1349 rmt[i].rm_eo = (regoff_t)((BIT_STATUS_AT(reg->bt_mem_end, i)
1350 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1351 : (UChar* )((void* )mem_end_stk[i])) - str);
1352 }
1353 else {
1354 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
1355 }
1356 }
1357 }
1358 else {
1359 #endif /* USE_POSIX_API_REGION_OPTION */
1360 region->beg[0] = (int)(sstart - str);
1361 region->end[0] = (int)(s - str);
1362 for (i = 1; i <= num_mem; i++) {
1363 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1364 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1365 region->beg[i] = (int)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
1366 else
1367 region->beg[i] = (int)((UChar* )((void* )mem_start_stk[i]) - str);
1368
1369 region->end[i] = (int)((BIT_STATUS_AT(reg->bt_mem_end, i)
1370 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1371 : (UChar* )((void* )mem_end_stk[i])) - str);
1372 }
1373 else {
1374 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1375 }
1376 }
1377
1378 #ifdef USE_CAPTURE_HISTORY
1379 if (reg->capture_history != 0) {
1380 int r;
1381 OnigCaptureTreeNode* node;
1382
1383 if (IS_NULL(region->history_root)) {
1384 region->history_root = node = history_node_new();
1385 CHECK_NULL_RETURN_MEMERR(node);
1386 }
1387 else {
1388 node = region->history_root;
1389 history_tree_clear(node);
1390 }
1391
1392 node->group = 0;
1393 node->beg = (int)(sstart - str);
1394 node->end = (int)(s - str);
1395
1396 stkp = stk_base;
1397 r = make_capture_history_tree(region->history_root, &stkp,
1398 stk, (UChar* )str, reg);
1399 if (r < 0) {
1400 best_len = r; /* error code */
1401 goto finish;
1402 }
1403 }
1404 #endif /* USE_CAPTURE_HISTORY */
1405 #ifdef USE_POSIX_API_REGION_OPTION
1406 } /* else IS_POSIX_REGION() */
1407 #endif
1408 } /* if (region) */
1409 } /* n > best_len */
1410
1411 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1412 end_best_len:
1413 #endif
1414 MOP_OUT;
1415
1416 if (IS_FIND_CONDITION(option)) {
1417 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1418 best_len = ONIG_MISMATCH;
1419 goto fail; /* for retry */
1420 }
1421 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1422 goto fail; /* for retry */
1423 }
1424 }
1425
1426 /* default behavior: return first-matching result. */
1427 goto finish;
1428 break;
1429
1430 case OP_EXACT1: MOP_IN(OP_EXACT1);
1431 #if 0
1432 DATA_ENSURE(1);
1433 if (*p != *s) goto fail;
1434 p++; s++;
1435 #endif
1436 if (*p != *s++) goto fail;
1437 DATA_ENSURE(0);
1438 p++;
1439 MOP_OUT;
1440 break;
1441
1442 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
1443 {
1444 int len;
1445 UChar *q1, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1446
1447 DATA_ENSURE(1);
1448 len = ONIGENC_MBC_CASE_FOLD(encode,
1449 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1450 case_fold_flag,
1451 &s, end, lowbuf);
1452 DATA_ENSURE(0);
1453 q1 = lowbuf;
1454 while (len-- > 0) {
1455 if (*p != *q1) {
1456 goto fail;
1457 }
1458 p++; q1++;
1459 }
1460 }
1461 MOP_OUT;
1462 break;
1463
1464 case OP_EXACT2: MOP_IN(OP_EXACT2);
1465 DATA_ENSURE(2);
1466 if (*p != *s) goto fail;
1467 p++; s++;
1468 if (*p != *s) goto fail;
1469 sprev = s;
1470 p++; s++;
1471 MOP_OUT;
1472 continue;
1473 break;
1474
1475 case OP_EXACT3: MOP_IN(OP_EXACT3);
1476 DATA_ENSURE(3);
1477 if (*p != *s) goto fail;
1478 p++; s++;
1479 if (*p != *s) goto fail;
1480 p++; s++;
1481 if (*p != *s) goto fail;
1482 sprev = s;
1483 p++; s++;
1484 MOP_OUT;
1485 continue;
1486 break;
1487
1488 case OP_EXACT4: MOP_IN(OP_EXACT4);
1489 DATA_ENSURE(4);
1490 if (*p != *s) goto fail;
1491 p++; s++;
1492 if (*p != *s) goto fail;
1493 p++; s++;
1494 if (*p != *s) goto fail;
1495 p++; s++;
1496 if (*p != *s) goto fail;
1497 sprev = s;
1498 p++; s++;
1499 MOP_OUT;
1500 continue;
1501 break;
1502
1503 case OP_EXACT5: MOP_IN(OP_EXACT5);
1504 DATA_ENSURE(5);
1505 if (*p != *s) goto fail;
1506 p++; s++;
1507 if (*p != *s) goto fail;
1508 p++; s++;
1509 if (*p != *s) goto fail;
1510 p++; s++;
1511 if (*p != *s) goto fail;
1512 p++; s++;
1513 if (*p != *s) goto fail;
1514 sprev = s;
1515 p++; s++;
1516 MOP_OUT;
1517 continue;
1518 break;
1519
1520 case OP_EXACTN: MOP_IN(OP_EXACTN);
1521 GET_LENGTH_INC(tlen, p);
1522 DATA_ENSURE(tlen);
1523 while (tlen-- > 0) {
1524 if (*p++ != *s++) goto fail;
1525 }
1526 sprev = s - 1;
1527 MOP_OUT;
1528 continue;
1529 break;
1530
1531 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
1532 {
1533 int len;
1534 UChar *qn, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1535
1536 GET_LENGTH_INC(tlen, p);
1537 endp = p + tlen;
1538
1539 while (p < endp) {
1540 sprev = s;
1541 DATA_ENSURE(1);
1542 len = ONIGENC_MBC_CASE_FOLD(encode,
1543 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1544 case_fold_flag,
1545 &s, end, lowbuf);
1546 DATA_ENSURE(0);
1547 qn = lowbuf;
1548 while (len-- > 0) {
1549 if (*p != *qn) goto fail;
1550 p++; qn++;
1551 }
1552 }
1553 }
1554
1555 MOP_OUT;
1556 continue;
1557 break;
1558
1559 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
1560 DATA_ENSURE(2);
1561 if (*p != *s) goto fail;
1562 p++; s++;
1563 if (*p != *s) goto fail;
1564 p++; s++;
1565 MOP_OUT;
1566 break;
1567
1568 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
1569 DATA_ENSURE(4);
1570 if (*p != *s) goto fail;
1571 p++; s++;
1572 if (*p != *s) goto fail;
1573 p++; s++;
1574 sprev = s;
1575 if (*p != *s) goto fail;
1576 p++; s++;
1577 if (*p != *s) goto fail;
1578 p++; s++;
1579 MOP_OUT;
1580 continue;
1581 break;
1582
1583 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
1584 DATA_ENSURE(6);
1585 if (*p != *s) goto fail;
1586 p++; s++;
1587 if (*p != *s) goto fail;
1588 p++; s++;
1589 if (*p != *s) goto fail;
1590 p++; s++;
1591 if (*p != *s) goto fail;
1592 p++; s++;
1593 sprev = s;
1594 if (*p != *s) goto fail;
1595 p++; s++;
1596 if (*p != *s) goto fail;
1597 p++; s++;
1598 MOP_OUT;
1599 continue;
1600 break;
1601
1602 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
1603 GET_LENGTH_INC(tlen, p);
1604 DATA_ENSURE(tlen * 2);
1605 while (tlen-- > 0) {
1606 if (*p != *s) goto fail;
1607 p++; s++;
1608 if (*p != *s) goto fail;
1609 p++; s++;
1610 }
1611 sprev = s - 2;
1612 MOP_OUT;
1613 continue;
1614 break;
1615
1616 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
1617 GET_LENGTH_INC(tlen, p);
1618 DATA_ENSURE(tlen * 3);
1619 while (tlen-- > 0) {
1620 if (*p != *s) goto fail;
1621 p++; s++;
1622 if (*p != *s) goto fail;
1623 p++; s++;
1624 if (*p != *s) goto fail;
1625 p++; s++;
1626 }
1627 sprev = s - 3;
1628 MOP_OUT;
1629 continue;
1630 break;
1631
1632 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
1633 GET_LENGTH_INC(tlen, p); /* mb-len */
1634 GET_LENGTH_INC(tlen2, p); /* string len */
1635 tlen2 *= tlen;
1636 DATA_ENSURE(tlen2);
1637 while (tlen2-- > 0) {
1638 if (*p != *s) goto fail;
1639 p++; s++;
1640 }
1641 sprev = s - tlen;
1642 MOP_OUT;
1643 continue;
1644 break;
1645
1646 case OP_CCLASS: MOP_IN(OP_CCLASS);
1647 DATA_ENSURE(1);
1648 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
1649 p += SIZE_BITSET;
1650 s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
1651 MOP_OUT;
1652 break;
1653
1654 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
1655 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
1656
1657 cclass_mb:
1658 GET_LENGTH_INC(tlen, p);
1659 {
1660 OnigCodePoint code;
1661 UChar *ss;
1662 int mb_len;
1663
1664 DATA_ENSURE(1);
1665 mb_len = enclen(encode, s);
1666 DATA_ENSURE(mb_len);
1667 ss = s;
1668 s += mb_len;
1669 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1670
1671 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1672 if (! onig_is_in_code_range(p, code)) goto fail;
1673 #else
1674 q = p;
1675 ALIGNMENT_RIGHT(q);
1676 if (! onig_is_in_code_range(q, code)) goto fail;
1677 #endif
1678 }
1679 p += tlen;
1680 MOP_OUT;
1681 break;
1682
1683 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
1684 DATA_ENSURE(1);
1685 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1686 p += SIZE_BITSET;
1687 goto cclass_mb;
1688 }
1689 else {
1690 if (BITSET_AT(((BitSetRef )p), *s) == 0)
1691 goto fail;
1692
1693 p += SIZE_BITSET;
1694 GET_LENGTH_INC(tlen, p);
1695 p += tlen;
1696 s++;
1697 }
1698 MOP_OUT;
1699 break;
1700
1701 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
1702 DATA_ENSURE(1);
1703 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
1704 p += SIZE_BITSET;
1705 s += enclen(encode, s);
1706 MOP_OUT;
1707 break;
1708
1709 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
1710 DATA_ENSURE(1);
1711 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
1712 s++;
1713 GET_LENGTH_INC(tlen, p);
1714 p += tlen;
1715 goto cc_mb_not_success;
1716 }
1717
1718 cclass_mb_not:
1719 GET_LENGTH_INC(tlen, p);
1720 {
1721 OnigCodePoint code;
1722 UChar *ss;
1723 int mb_len = enclen(encode, s);
1724
1725 if (! DATA_ENSURE_CHECK(mb_len)) {
1726 DATA_ENSURE(1);
1727 s = (UChar* )end;
1728 p += tlen;
1729 goto cc_mb_not_success;
1730 }
1731
1732 ss = s;
1733 s += mb_len;
1734 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1735
1736 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
1737 if (onig_is_in_code_range(p, code)) goto fail;
1738 #else
1739 q = p;
1740 ALIGNMENT_RIGHT(q);
1741 if (onig_is_in_code_range(q, code)) goto fail;
1742 #endif
1743 }
1744 p += tlen;
1745
1746 cc_mb_not_success:
1747 MOP_OUT;
1748 break;
1749
1750 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
1751 DATA_ENSURE(1);
1752 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
1753 p += SIZE_BITSET;
1754 goto cclass_mb_not;
1755 }
1756 else {
1757 if (BITSET_AT(((BitSetRef )p), *s) != 0)
1758 goto fail;
1759
1760 p += SIZE_BITSET;
1761 GET_LENGTH_INC(tlen, p);
1762 p += tlen;
1763 s++;
1764 }
1765 MOP_OUT;
1766 break;
1767
1768 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
1769 {
1770 OnigCodePoint code;
1771 void *node;
1772 int mb_len;
1773 UChar *ss;
1774
1775 DATA_ENSURE(1);
1776 GET_POINTER_INC(node, p);
1777 mb_len = enclen(encode, s);
1778 ss = s;
1779 s += mb_len;
1780 DATA_ENSURE(0);
1781 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
1782 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
1783 }
1784 MOP_OUT;
1785 break;
1786
1787 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
1788 DATA_ENSURE(1);
1789 n = enclen(encode, s);
1790 DATA_ENSURE(n);
1791 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1792 s += n;
1793 MOP_OUT;
1794 break;
1795
1796 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
1797 DATA_ENSURE(1);
1798 n = enclen(encode, s);
1799 DATA_ENSURE(n);
1800 s += n;
1801 MOP_OUT;
1802 break;
1803
1804 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
1805 while (DATA_ENSURE_CHECK1) {
1806 STACK_PUSH_ALT(p, s, sprev);
1807 n = enclen(encode, s);
1808 DATA_ENSURE(n);
1809 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1810 sprev = s;
1811 s += n;
1812 }
1813 MOP_OUT;
1814 break;
1815
1816 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
1817 while (DATA_ENSURE_CHECK1) {
1818 STACK_PUSH_ALT(p, s, sprev);
1819 n = enclen(encode, s);
1820 if (n > 1) {
1821 DATA_ENSURE(n);
1822 sprev = s;
1823 s += n;
1824 }
1825 else {
1826 sprev = s;
1827 s++;
1828 }
1829 }
1830 MOP_OUT;
1831 break;
1832
1833 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
1834 while (DATA_ENSURE_CHECK1) {
1835 if (*p == *s) {
1836 STACK_PUSH_ALT(p + 1, s, sprev);
1837 }
1838 n = enclen(encode, s);
1839 DATA_ENSURE(n);
1840 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1841 sprev = s;
1842 s += n;
1843 }
1844 p++;
1845 MOP_OUT;
1846 break;
1847
1848 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1849 while (DATA_ENSURE_CHECK1) {
1850 if (*p == *s) {
1851 STACK_PUSH_ALT(p + 1, s, sprev);
1852 }
1853 n = enclen(encode, s);
1854 if (n > 1) {
1855 DATA_ENSURE(n);
1856 sprev = s;
1857 s += n;
1858 }
1859 else {
1860 sprev = s;
1861 s++;
1862 }
1863 }
1864 p++;
1865 MOP_OUT;
1866 break;
1867
1868 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1869 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
1870 GET_STATE_CHECK_NUM_INC(mem, p);
1871 while (DATA_ENSURE_CHECK1) {
1872 STATE_CHECK_VAL(scv, mem);
1873 if (scv) goto fail;
1874
1875 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1876 n = enclen(encode, s);
1877 DATA_ENSURE(n);
1878 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
1879 sprev = s;
1880 s += n;
1881 }
1882 MOP_OUT;
1883 break;
1884
1885 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
1886 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
1887
1888 GET_STATE_CHECK_NUM_INC(mem, p);
1889 while (DATA_ENSURE_CHECK1) {
1890 STATE_CHECK_VAL(scv, mem);
1891 if (scv) goto fail;
1892
1893 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
1894 n = enclen(encode, s);
1895 if (n > 1) {
1896 DATA_ENSURE(n);
1897 sprev = s;
1898 s += n;
1899 }
1900 else {
1901 sprev = s;
1902 s++;
1903 }
1904 }
1905 MOP_OUT;
1906 break;
1907 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1908
1909 case OP_WORD: MOP_IN(OP_WORD);
1910 DATA_ENSURE(1);
1911 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1912 goto fail;
1913
1914 s += enclen(encode, s);
1915 MOP_OUT;
1916 break;
1917
1918 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
1919 DATA_ENSURE(1);
1920 if (ONIGENC_IS_MBC_WORD(encode, s, end))
1921 goto fail;
1922
1923 s += enclen(encode, s);
1924 MOP_OUT;
1925 break;
1926
1927 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
1928 if (ON_STR_BEGIN(s)) {
1929 DATA_ENSURE(1);
1930 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
1931 goto fail;
1932 }
1933 else if (ON_STR_END(s)) {
1934 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
1935 goto fail;
1936 }
1937 else {
1938 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1939 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
1940 goto fail;
1941 }
1942 MOP_OUT;
1943 continue;
1944 break;
1945
1946 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
1947 if (ON_STR_BEGIN(s)) {
1948 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
1949 goto fail;
1950 }
1951 else if (ON_STR_END(s)) {
1952 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
1953 goto fail;
1954 }
1955 else {
1956 if (ONIGENC_IS_MBC_WORD(encode, s, end)
1957 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
1958 goto fail;
1959 }
1960 MOP_OUT;
1961 continue;
1962 break;
1963
1964 #ifdef USE_WORD_BEGIN_END
1965 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
1966 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
1967 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1968 MOP_OUT;
1969 continue;
1970 }
1971 }
1972 goto fail;
1973 break;
1974
1975 case OP_WORD_END: MOP_IN(OP_WORD_END);
1976 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
1977 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
1978 MOP_OUT;
1979 continue;
1980 }
1981 }
1982 goto fail;
1983 break;
1984 #endif
1985
1986 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
1987 if (! ON_STR_BEGIN(s)) goto fail;
1988
1989 MOP_OUT;
1990 continue;
1991 break;
1992
1993 case OP_END_BUF: MOP_IN(OP_END_BUF);
1994 if (! ON_STR_END(s)) goto fail;
1995
1996 MOP_OUT;
1997 continue;
1998 break;
1999
2000 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
2001 if (ON_STR_BEGIN(s)) {
2002 if (IS_NOTBOL(msa->options)) goto fail;
2003 MOP_OUT;
2004 continue;
2005 }
2006 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
2007 MOP_OUT;
2008 continue;
2009 }
2010 goto fail;
2011 break;
2012
2013 case OP_END_LINE: MOP_IN(OP_END_LINE);
2014 if (ON_STR_END(s)) {
2015 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2016 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2017 #endif
2018 if (IS_NOTEOL(msa->options)) goto fail;
2019 MOP_OUT;
2020 continue;
2021 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2022 }
2023 #endif
2024 }
2025 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
2026 MOP_OUT;
2027 continue;
2028 }
2029 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2030 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2031 MOP_OUT;
2032 continue;
2033 }
2034 #endif
2035 goto fail;
2036 break;
2037
2038 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
2039 if (ON_STR_END(s)) {
2040 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2041 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
2042 #endif
2043 if (IS_NOTEOL(msa->options)) goto fail;
2044 MOP_OUT;
2045 continue;
2046 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2047 }
2048 #endif
2049 }
2050 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
2051 ON_STR_END(s + enclen(encode, s))) {
2052 MOP_OUT;
2053 continue;
2054 }
2055 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2056 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2057 UChar* ss = s + enclen(encode, s);
2058 ss += enclen(encode, ss);
2059 if (ON_STR_END(ss)) {
2060 MOP_OUT;
2061 continue;
2062 }
2063 }
2064 #endif
2065 goto fail;
2066 break;
2067
2068 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
2069 if (s != msa->start)
2070 goto fail;
2071
2072 MOP_OUT;
2073 continue;
2074 break;
2075
2076 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
2077 GET_MEMNUM_INC(mem, p);
2078 STACK_PUSH_MEM_START(mem, s);
2079 MOP_OUT;
2080 continue;
2081 break;
2082
2083 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
2084 GET_MEMNUM_INC(mem, p);
2085 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2086 MOP_OUT;
2087 continue;
2088 break;
2089
2090 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
2091 GET_MEMNUM_INC(mem, p);
2092 STACK_PUSH_MEM_END(mem, s);
2093 MOP_OUT;
2094 continue;
2095 break;
2096
2097 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
2098 GET_MEMNUM_INC(mem, p);
2099 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2100 MOP_OUT;
2101 continue;
2102 break;
2103
2104 #ifdef USE_SUBEXP_CALL
2105 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
2106 GET_MEMNUM_INC(mem, p);
2107 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2108 STACK_PUSH_MEM_END(mem, s);
2109 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2110 MOP_OUT;
2111 continue;
2112 break;
2113
2114 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
2115 GET_MEMNUM_INC(mem, p);
2116 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2117 STACK_GET_MEM_START(mem, stkp);
2118
2119 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2120 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2121 else
2122 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2123
2124 STACK_PUSH_MEM_END_MARK(mem);
2125 MOP_OUT;
2126 continue;
2127 break;
2128 #endif
2129
2130 case OP_BACKREF1: MOP_IN(OP_BACKREF1);
2131 mem = 1;
2132 goto backref;
2133 break;
2134
2135 case OP_BACKREF2: MOP_IN(OP_BACKREF2);
2136 mem = 2;
2137 goto backref;
2138 break;
2139
2140 case OP_BACKREFN: MOP_IN(OP_BACKREFN);
2141 GET_MEMNUM_INC(mem, p);
2142 backref:
2143 {
2144 int len;
2145 UChar *pstart, *pend;
2146
2147 /* if you want to remove following line,
2148 you should check in parse and compile time. */
2149 if (mem > num_mem) goto fail;
2150 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2151 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2152
2153 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2154 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2155 else
2156 pstart = (UChar* )((void* )mem_start_stk[mem]);
2157
2158 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2159 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2160 : (UChar* )((void* )mem_end_stk[mem]));
2161 n = (int)(pend - pstart);
2162 DATA_ENSURE(n);
2163 sprev = s;
2164 STRING_CMP(pstart, s, n);
2165 while (sprev + (len = enclen(encode, sprev)) < s)
2166 sprev += len;
2167
2168 MOP_OUT;
2169 continue;
2170 }
2171 break;
2172
2173 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
2174 GET_MEMNUM_INC(mem, p);
2175 {
2176 int len;
2177 UChar *pstart, *pend;
2178
2179 /* if you want to remove following line,
2180 you should check in parse and compile time. */
2181 if (mem > num_mem) goto fail;
2182 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2183 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2184
2185 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2186 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2187 else
2188 pstart = (UChar* )((void* )mem_start_stk[mem]);
2189
2190 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2191 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2192 : (UChar* )((void* )mem_end_stk[mem]));
2193 n = (int)(pend - pstart);
2194 DATA_ENSURE(n);
2195 sprev = s;
2196 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
2197 while (sprev + (len = enclen(encode, sprev)) < s)
2198 sprev += len;
2199
2200 MOP_OUT;
2201 continue;
2202 }
2203 break;
2204
2205 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
2206 {
2207 int len, is_fail;
2208 UChar *pstart, *pend, *swork;
2209
2210 GET_LENGTH_INC(tlen, p);
2211 for (i = 0; i < tlen; i++) {
2212 GET_MEMNUM_INC(mem, p);
2213
2214 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2215 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2216
2217 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2218 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2219 else
2220 pstart = (UChar* )((void* )mem_start_stk[mem]);
2221
2222 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2223 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2224 : (UChar* )((void* )mem_end_stk[mem]));
2225 n = (int)(pend - pstart);
2226 DATA_ENSURE(n);
2227 sprev = s;
2228 swork = s;
2229 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2230 if (is_fail) continue;
2231 s = swork;
2232 while (sprev + (len = enclen(encode, sprev)) < s)
2233 sprev += len;
2234
2235 p += (SIZE_MEMNUM * (tlen - i - 1));
2236 break; /* success */
2237 }
2238 if (i == tlen) goto fail;
2239 MOP_OUT;
2240 continue;
2241 }
2242 break;
2243
2244 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
2245 {
2246 int len, is_fail;
2247 UChar *pstart, *pend, *swork;
2248
2249 GET_LENGTH_INC(tlen, p);
2250 for (i = 0; i < tlen; i++) {
2251 GET_MEMNUM_INC(mem, p);
2252
2253 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2254 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2255
2256 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2257 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2258 else
2259 pstart = (UChar* )((void* )mem_start_stk[mem]);
2260
2261 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2262 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2263 : (UChar* )((void* )mem_end_stk[mem]));
2264 n = (int)(pend - pstart);
2265 DATA_ENSURE(n);
2266 sprev = s;
2267 swork = s;
2268 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
2269 if (is_fail) continue;
2270 s = swork;
2271 while (sprev + (len = enclen(encode, sprev)) < s)
2272 sprev += len;
2273
2274 p += (SIZE_MEMNUM * (tlen - i - 1));
2275 break; /* success */
2276 }
2277 if (i == tlen) goto fail;
2278 MOP_OUT;
2279 continue;
2280 }
2281 break;
2282
2283 #ifdef USE_BACKREF_WITH_LEVEL
2284 case OP_BACKREF_WITH_LEVEL:
2285 {
2286 int len;
2287 OnigOptionType ic;
2288 LengthType level;
2289
2290 GET_OPTION_INC(ic, p);
2291 GET_LENGTH_INC(level, p);
2292 GET_LENGTH_INC(tlen, p);
2293
2294 sprev = s;
2295 if (backref_match_at_nested_level(reg, stk, stk_base, ic
2296 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2297 while (sprev + (len = enclen(encode, sprev)) < s)
2298 sprev += len;
2299
2300 p += (SIZE_MEMNUM * tlen);
2301 }
2302 else
2303 goto fail;
2304
2305 MOP_OUT;
2306 continue;
2307 }
2308
2309 break;
2310 #endif
2311
2312 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2313 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);
2314 GET_OPTION_INC(option, p);
2315 STACK_PUSH_ALT(p, s, sprev);
2316 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2317 MOP_OUT;
2318 continue;
2319 break;
2320
2321 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);
2322 GET_OPTION_INC(option, p);
2323 MOP_OUT;
2324 continue;
2325 break;
2326 #endif
2327
2328 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
2329 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2330 STACK_PUSH_NULL_CHECK_START(mem, s);
2331 MOP_OUT;
2332 continue;
2333 break;
2334
2335 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
2336 {
2337 int isnull;
2338
2339 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2340 STACK_NULL_CHECK(isnull, mem, s);
2341 if (isnull) {
2342 #ifdef ONIG_DEBUG_MATCH
2343 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
2344 (int )mem, (int )s);
2345 #endif
2346 null_check_found:
2347 /* empty loop founded, skip next instruction */
2348 switch (*p++) {
2349 case OP_JUMP:
2350 case OP_PUSH:
2351 p += SIZE_RELADDR;
2352 break;
2353 case OP_REPEAT_INC:
2354 case OP_REPEAT_INC_NG:
2355 case OP_REPEAT_INC_SG:
2356 case OP_REPEAT_INC_NG_SG:
2357 p += SIZE_MEMNUM;
2358 break;
2359 default:
2360 goto unexpected_bytecode_error;
2361 break;
2362 }
2363 }
2364 }
2365 MOP_OUT;
2366 continue;
2367 break;
2368
2369 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2370 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
2371 {
2372 int isnull;
2373
2374 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2375 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2376 if (isnull) {
2377 #ifdef ONIG_DEBUG_MATCH
2378 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
2379 (int )mem, (int )s);
2380 #endif
2381 if (isnull == -1) goto fail;
2382 goto null_check_found;
2383 }
2384 }
2385 MOP_OUT;
2386 continue;
2387 break;
2388 #endif
2389
2390 #ifdef USE_SUBEXP_CALL
2391 case OP_NULL_CHECK_END_MEMST_PUSH:
2392 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2393 {
2394 int isnull;
2395
2396 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2397 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2398 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2399 #else
2400 STACK_NULL_CHECK_REC(isnull, mem, s);
2401 #endif
2402 if (isnull) {
2403 #ifdef ONIG_DEBUG_MATCH
2404 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
2405 (int )mem, (int )s);
2406 #endif
2407 if (isnull == -1) goto fail;
2408 goto null_check_found;
2409 }
2410 else {
2411 STACK_PUSH_NULL_CHECK_END(mem);
2412 }
2413 }
2414 MOP_OUT;
2415 continue;
2416 break;
2417 #endif
2418
2419 case OP_JUMP: MOP_IN(OP_JUMP);
2420 GET_RELADDR_INC(addr, p);
2421 p += addr;
2422 MOP_OUT;
2423 CHECK_INTERRUPT_IN_MATCH_AT;
2424 continue;
2425 break;
2426
2427 case OP_PUSH: MOP_IN(OP_PUSH);
2428 GET_RELADDR_INC(addr, p);
2429 STACK_PUSH_ALT(p + addr, s, sprev);
2430 MOP_OUT;
2431 continue;
2432 break;
2433
2434 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2435 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
2436 GET_STATE_CHECK_NUM_INC(mem, p);
2437 STATE_CHECK_VAL(scv, mem);
2438 if (scv) goto fail;
2439
2440 GET_RELADDR_INC(addr, p);
2441 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2442 MOP_OUT;
2443 continue;
2444 break;
2445
2446 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2447 GET_STATE_CHECK_NUM_INC(mem, p);
2448 GET_RELADDR_INC(addr, p);
2449 STATE_CHECK_VAL(scv, mem);
2450 if (scv) {
2451 p += addr;
2452 }
2453 else {
2454 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
2455 }
2456 MOP_OUT;
2457 continue;
2458 break;
2459
2460 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
2461 GET_STATE_CHECK_NUM_INC(mem, p);
2462 STATE_CHECK_VAL(scv, mem);
2463 if (scv) goto fail;
2464
2465 STACK_PUSH_STATE_CHECK(s, mem);
2466 MOP_OUT;
2467 continue;
2468 break;
2469 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2470
2471 case OP_POP: MOP_IN(OP_POP);
2472 STACK_POP_ONE;
2473 MOP_OUT;
2474 continue;
2475 break;
2476
2477 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2478 GET_RELADDR_INC(addr, p);
2479 if (*p == *s && DATA_ENSURE_CHECK1) {
2480 p++;
2481 STACK_PUSH_ALT(p + addr, s, sprev);
2482 MOP_OUT;
2483 continue;
2484 }
2485 p += (addr + 1);
2486 MOP_OUT;
2487 continue;
2488 break;
2489
2490 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2491 GET_RELADDR_INC(addr, p);
2492 if (*p == *s) {
2493 p++;
2494 STACK_PUSH_ALT(p + addr, s, sprev);
2495 MOP_OUT;
2496 continue;
2497 }
2498 p++;
2499 MOP_OUT;
2500 continue;
2501 break;
2502
2503 case OP_REPEAT: MOP_IN(OP_REPEAT);
2504 {
2505 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2506 GET_RELADDR_INC(addr, p);
2507
2508 STACK_ENSURE(1);
2509 repeat_stk[mem] = GET_STACK_INDEX(stk);
2510 STACK_PUSH_REPEAT(mem, p);
2511
2512 if (reg->repeat_range[mem].lower == 0) {
2513 STACK_PUSH_ALT(p + addr, s, sprev);
2514 }
2515 }
2516 MOP_OUT;
2517 continue;
2518 break;
2519
2520 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
2521 {
2522 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2523 GET_RELADDR_INC(addr, p);
2524
2525 STACK_ENSURE(1);
2526 repeat_stk[mem] = GET_STACK_INDEX(stk);
2527 STACK_PUSH_REPEAT(mem, p);
2528
2529 if (reg->repeat_range[mem].lower == 0) {
2530 STACK_PUSH_ALT(p, s, sprev);
2531 p += addr;
2532 }
2533 }
2534 MOP_OUT;
2535 continue;
2536 break;
2537
2538 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
2539 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2540 si = repeat_stk[mem];
2541 stkp = STACK_AT(si);
2542
2543 repeat_inc:
2544 stkp->u.repeat.count++;
2545 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2546 /* end of repeat. Nothing to do. */
2547 }
2548 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2549 STACK_PUSH_ALT(p, s, sprev);
2550 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2551 }
2552 else {
2553 p = stkp->u.repeat.pcode;
2554 }
2555 STACK_PUSH_REPEAT_INC(si);
2556 MOP_OUT;
2557 CHECK_INTERRUPT_IN_MATCH_AT;
2558 continue;
2559 break;
2560
2561 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
2562 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2563 STACK_GET_REPEAT(mem, stkp);
2564 si = GET_STACK_INDEX(stkp);
2565 goto repeat_inc;
2566 break;
2567
2568 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
2569 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2570 si = repeat_stk[mem];
2571 stkp = STACK_AT(si);
2572
2573 repeat_inc_ng:
2574 stkp->u.repeat.count++;
2575 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2576 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2577 UChar* pcode = stkp->u.repeat.pcode;
2578
2579 STACK_PUSH_REPEAT_INC(si);
2580 STACK_PUSH_ALT(pcode, s, sprev);
2581 }
2582 else {
2583 p = stkp->u.repeat.pcode;
2584 STACK_PUSH_REPEAT_INC(si);
2585 }
2586 }
2587 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2588 STACK_PUSH_REPEAT_INC(si);
2589 }
2590 MOP_OUT;
2591 CHECK_INTERRUPT_IN_MATCH_AT;
2592 continue;
2593 break;
2594
2595 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
2596 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2597 STACK_GET_REPEAT(mem, stkp);
2598 si = GET_STACK_INDEX(stkp);
2599 goto repeat_inc_ng;
2600 break;
2601
2602 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
2603 STACK_PUSH_POS(s, sprev);
2604 MOP_OUT;
2605 continue;
2606 break;
2607
2608 case OP_POP_POS: MOP_IN(OP_POP_POS);
2609 {
2610 STACK_POS_END(stkp);
2611 s = stkp->u.state.pstr;
2612 sprev = stkp->u.state.pstr_prev;
2613 }
2614 MOP_OUT;
2615 continue;
2616 break;
2617
2618 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
2619 GET_RELADDR_INC(addr, p);
2620 STACK_PUSH_POS_NOT(p + addr, s, sprev);
2621 MOP_OUT;
2622 continue;
2623 break;
2624
2625 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
2626 STACK_POP_TIL_POS_NOT;
2627 goto fail;
2628 break;
2629
2630 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
2631 STACK_PUSH_STOP_BT;
2632 MOP_OUT;
2633 continue;
2634 break;
2635
2636 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
2637 STACK_STOP_BT_END;
2638 MOP_OUT;
2639 continue;
2640 break;
2641
2642 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
2643 GET_LENGTH_INC(tlen, p);
2644 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2645 if (IS_NULL(s)) goto fail;
2646 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2647 MOP_OUT;
2648 continue;
2649 break;
2650
2651 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
2652 GET_RELADDR_INC(addr, p);
2653 GET_LENGTH_INC(tlen, p);
2654 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
2655 if (IS_NULL(q)) {
2656 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
2657 If you want to change to fail, replace following line. */
2658 p += addr;
2659 /* goto fail; */
2660 }
2661 else {
2662 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
2663 s = q;
2664 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
2665 }
2666 MOP_OUT;
2667 continue;
2668 break;
2669
2670 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
2671 STACK_POP_TIL_LOOK_BEHIND_NOT;
2672 goto fail;
2673 break;
2674
2675 #ifdef USE_SUBEXP_CALL
2676 case OP_CALL: MOP_IN(OP_CALL);
2677 GET_ABSADDR_INC(addr, p);
2678 STACK_PUSH_CALL_FRAME(p);
2679 p = reg->p + addr;
2680 MOP_OUT;
2681 continue;
2682 break;
2683
2684 case OP_RETURN: MOP_IN(OP_RETURN);
2685 STACK_RETURN(p);
2686 STACK_PUSH_RETURN;
2687 MOP_OUT;
2688 continue;
2689 break;
2690 #endif
2691
2692 case OP_FINISH:
2693 goto finish;
2694 break;
2695
2696 fail:
2697 MOP_OUT;
2698 /* fall */
2699 case OP_FAIL: MOP_IN(OP_FAIL);
2700 STACK_POP;
2701 p = stk->u.state.pcode;
2702 s = stk->u.state.pstr;
2703 sprev = stk->u.state.pstr_prev;
2704
2705 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2706 if (stk->u.state.state_check != 0) {
2707 stk->type = STK_STATE_CHECK_MARK;
2708 stk++;
2709 }
2710 #endif
2711
2712 MOP_OUT;
2713 continue;
2714 break;
2715
2716 default:
2717 goto bytecode_error;
2718
2719 } /* end of switch */
2720 sprev = sbegin;
2721 } /* end of while(1) */
2722
2723 finish:
2724 STACK_SAVE;
2725 xfree(alloca_base);
2726 return best_len;
2727
2728 #ifdef ONIG_DEBUG
2729 stack_error:
2730 STACK_SAVE;
2731 xfree(alloca_base);
2732 return ONIGERR_STACK_BUG;
2733 #endif
2734
2735 bytecode_error:
2736 STACK_SAVE;
2737 xfree(alloca_base);
2738 return ONIGERR_UNDEFINED_BYTECODE;
2739
2740 unexpected_bytecode_error:
2741 STACK_SAVE;
2742 xfree(alloca_base);
2743 return ONIGERR_UNEXPECTED_BYTECODE;
2744 }
2745
2746
2747 static UChar*
2748 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
2749 const UChar* text, const UChar* text_end, UChar* text_range)
2750 {
2751 UChar *t, *p, *s, *end;
2752
2753 end = (UChar* )text_end;
2754 end -= target_end - target - 1;
2755 if (end > text_range)
2756 end = text_range;
2757
2758 s = (UChar* )text;
2759
2760 while (s < end) {
2761 if (*s == *target) {
2762 p = s + 1;
2763 t = target + 1;
2764 while (t < target_end) {
2765 if (*t != *p++)
2766 break;
2767 t++;
2768 }
2769 if (t == target_end)
2770 return s;
2771 }
2772 s += enclen(enc, s);
2773 }
2774
2775 return (UChar* )NULL;
2776 }
2777
2778 static int
2779 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
2780 const UChar* t, const UChar* tend,
2781 const UChar* p, const UChar* end)
2782 {
2783 int lowlen;
2784 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2785
2786 while (t < tend) {
2787 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
2788 q = lowbuf;
2789 while (lowlen > 0) {
2790 if (*t++ != *q++) return 0;
2791 lowlen--;
2792 }
2793 }
2794
2795 return 1;
2796 }
2797
2798 static UChar*
2799 slow_search_ic(OnigEncoding enc, int case_fold_flag,
2800 UChar* target, UChar* target_end,
2801 const UChar* text, const UChar* text_end, UChar* text_range)
2802 {
2803 UChar *s, *end;
2804
2805 end = (UChar* )text_end;
2806 end -= target_end - target - 1;
2807 if (end > text_range)
2808 end = text_range;
2809
2810 s = (UChar* )text;
2811
2812 while (s < end) {
2813 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
2814 s, text_end))
2815 return s;
2816
2817 s += enclen(enc, s);
2818 }
2819
2820 return (UChar* )NULL;
2821 }
2822
2823 static UChar*
2824 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
2825 const UChar* text, const UChar* adjust_text,
2826 const UChar* text_end, const UChar* text_start)
2827 {
2828 UChar *t, *p, *s;
2829
2830 s = (UChar* )text_end;
2831 s -= (target_end - target);
2832 if (s > text_start)
2833 s = (UChar* )text_start;
2834 else
2835 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2836
2837 while (s >= text) {
2838 if (*s == *target) {
2839 p = s + 1;
2840 t = target + 1;
2841 while (t < target_end) {
2842 if (*t != *p++)
2843 break;
2844 t++;
2845 }
2846 if (t == target_end)
2847 return s;
2848 }
2849 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2850 }
2851
2852 return (UChar* )NULL;
2853 }
2854
2855 static UChar*
2856 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
2857 UChar* target, UChar* target_end,
2858 const UChar* text, const UChar* adjust_text,
2859 const UChar* text_end, const UChar* text_start)
2860 {
2861 UChar *s;
2862
2863 s = (UChar* )text_end;
2864 s -= (target_end - target);
2865 if (s > text_start)
2866 s = (UChar* )text_start;
2867 else
2868 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
2869
2870 while (s >= text) {
2871 if (str_lower_case_match(enc, case_fold_flag,
2872 target, target_end, s, text_end))
2873 return s;
2874
2875 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
2876 }
2877
2878 return (UChar* )NULL;
2879 }
2880
2881 static UChar*
2882 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
2883 const UChar* text, const UChar* text_end,
2884 const UChar* text_range)
2885 {
2886 const UChar *s, *se, *t, *p, *end;
2887 const UChar *tail;
2888 int skip, tlen1;
2889
2890 #ifdef ONIG_DEBUG_SEARCH
2891 fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
2892 (int )text, (int )text_end, (int )text_range);
2893 #endif
2894
2895 tail = target_end - 1;
2896 tlen1 = (int)(tail - target);
2897 end = text_range;
2898 if (end + tlen1 > text_end)
2899 end = text_end - tlen1;
2900
2901 s = text;
2902
2903 if (IS_NULL(reg->int_map)) {
2904 while (s < end) {
2905 p = se = s + tlen1;
2906 t = tail;
2907 while (*p == *t) {
2908 if (t == target) return (UChar* )s;
2909 p--; t--;
2910 }
2911 skip = reg->map[*se];
2912 t = s;
2913 do {
2914 s += enclen(reg->enc, s);
2915 } while ((s - t) < skip && s < end);
2916 }
2917 }
2918 else {
2919 while (s < end) {
2920 p = se = s + tlen1;
2921 t = tail;
2922 while (*p == *t) {
2923 if (t == target) return (UChar* )s;
2924 p--; t--;
2925 }
2926 skip = reg->int_map[*se];
2927 t = s;
2928 do {
2929 s += enclen(reg->enc, s);
2930 } while ((s - t) < skip && s < end);
2931 }
2932 }
2933
2934 return (UChar* )NULL;
2935 }
2936
2937 static UChar*
2938 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
2939 const UChar* text, const UChar* text_end, const UChar* text_range)
2940 {
2941 const UChar *s, *t, *p, *end;
2942 const UChar *tail;
2943
2944 end = text_range + (target_end - target) - 1;
2945 if (end > text_end)
2946 end = text_end;
2947
2948 tail = target_end - 1;
2949 s = text + (target_end - target) - 1;
2950 if (IS_NULL(reg->int_map)) {
2951 while (s < end) {
2952 p = s;
2953 t = tail;
2954 while (*p == *t) {
2955 if (t == target) return (UChar* )p;
2956 p--; t--;
2957 }
2958 s += reg->map[*s];
2959 }
2960 }
2961 else { /* see int_map[] */
2962 while (s < end) {
2963 p = s;
2964 t = tail;
2965 while (*p == *t) {
2966 if (t == target) return (UChar* )p;
2967 p--; t--;
2968 }
2969 s += reg->int_map[*s];
2970 }
2971 }
2972 return (UChar* )NULL;
2973 }
2974
2975 static int
2976 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
2977 int** skip)
2978
2979 {
2980 int i, len;
2981
2982 if (IS_NULL(*skip)) {
2983 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
2984 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
2985 }
2986
2987 len = (int)(end - s);
2988 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
2989 (*skip)[i] = len;
2990
2991 for (i = len - 1; i > 0; i--)
2992 (*skip)[s[i]] = i;
2993
2994 return 0;
2995 }
2996
2997 static UChar*
2998 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
2999 const UChar* text, const UChar* adjust_text,
3000 const UChar* text_end, const UChar* text_start)
3001 {
3002 const UChar *s, *t, *p;
3003
3004 s = text_end - (target_end - target);
3005 if (text_start < s)
3006 s = text_start;
3007 else
3008 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3009
3010 while (s >= text) {
3011 p = s;
3012 t = target;
3013 while (t < target_end && *p == *t) {
3014 p++; t++;
3015 }
3016 if (t == target_end)
3017 return (UChar* )s;
3018
3019 s -= reg->int_map_backward[*s];
3020 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
3021 }
3022
3023 return (UChar* )NULL;
3024 }
3025
3026 static UChar*
3027 map_search(OnigEncoding enc, UChar map[],
3028 const UChar* text, const UChar* text_range)
3029 {
3030 const UChar *s = text;
3031
3032 while (s < text_range) {
3033 if (map[*s]) return (UChar* )s;
3034
3035 s += enclen(enc, s);
3036 }
3037 return (UChar* )NULL;
3038 }
3039
3040 static UChar*
3041 map_search_backward(OnigEncoding enc, UChar map[],
3042 const UChar* text, const UChar* adjust_text,
3043 const UChar* text_start)
3044 {
3045 const UChar *s = text_start;
3046
3047 while (s >= text) {
3048 if (map[*s]) return (UChar* )s;
3049
3050 s = onigenc_get_prev_char_head(enc, adjust_text, s);
3051 }
3052 return (UChar* )NULL;
3053 }
3054
3055 extern int
3056 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3057 OnigOptionType option)
3058 {
3059 int r;
3060 UChar *prev;
3061 OnigMatchArg msa;
3062
3063 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3064 start:
3065 THREAD_ATOMIC_START;
3066 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3067 ONIG_STATE_INC(reg);
3068 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3069 onig_chain_reduce(reg);
3070 ONIG_STATE_INC(reg);
3071 }
3072 }
3073 else {
3074 int n;
3075
3076 THREAD_ATOMIC_END;
3077 n = 0;
3078 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3079 if (++n > THREAD_PASS_LIMIT_COUNT)
3080 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3081 THREAD_PASS;
3082 }
3083 goto start;
3084 }
3085 THREAD_ATOMIC_END;
3086 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3087
3088 MATCH_ARG_INIT(msa, option, region, at);
3089 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3090 {
3091 int offset = at - str;
3092 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3093 }
3094 #endif
3095
3096 if (region
3097 #ifdef USE_POSIX_API_REGION_OPTION
3098 && !IS_POSIX_REGION(option)
3099 #endif
3100 ) {
3101 r = onig_region_resize_clear(region, reg->num_mem + 1);
3102 }
3103 else
3104 r = 0;
3105
3106 if (r == 0) {
3107 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
3108 r = match_at(reg, str, end,
3109 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3110 end,
3111 #endif
3112 at, prev, &msa);
3113 }
3114
3115 MATCH_ARG_FREE(msa);
3116 ONIG_STATE_DEC_THREAD(reg);
3117 return r;
3118 }
3119
3120 static int
3121 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3122 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3123 {
3124 UChar *p, *pprev = (UChar* )NULL;
3125
3126 #ifdef ONIG_DEBUG_SEARCH
3127 fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
3128 (int )str, (int )end, (int )s, (int )range);
3129 #endif
3130
3131 p = s;
3132 if (reg->dmin > 0) {
3133 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3134 p += reg->dmin;
3135 }
3136 else {
3137 UChar *q = p + reg->dmin;
3138 while (p < q) p += enclen(reg->enc, p);
3139 }
3140 }
3141
3142 retry:
3143 switch (reg->optimize) {
3144 case ONIG_OPTIMIZE_EXACT:
3145 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3146 break;
3147 case ONIG_OPTIMIZE_EXACT_IC:
3148 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3149 reg->exact, reg->exact_end, p, end, range);
3150 break;
3151
3152 case ONIG_OPTIMIZE_EXACT_BM:
3153 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3154 break;
3155
3156 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3157 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3158 break;
3159
3160 case ONIG_OPTIMIZE_MAP:
3161 p = map_search(reg->enc, reg->map, p, range);
3162 break;
3163 }
3164
3165 if (p && p < range) {
3166 if (p - reg->dmin < s) {
3167 retry_gate:
3168 pprev = p;
3169 p += enclen(reg->enc, p);
3170 goto retry;
3171 }
3172
3173 if (reg->sub_anchor) {
3174 UChar* prev;
3175
3176 switch (reg->sub_anchor) {
3177 case ANCHOR_BEGIN_LINE:
3178 if (!ON_STR_BEGIN(p)) {
3179 prev = onigenc_get_prev_char_head(reg->enc,
3180 (pprev ? pprev : str), p);
3181 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3182 goto retry_gate;
3183 }
3184 break;
3185
3186 case ANCHOR_END_LINE:
3187 if (ON_STR_END(p)) {
3188 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3189 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3190 (pprev ? pprev : str), p);
3191 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
3192 goto retry_gate;
3193 #endif
3194 }
3195 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3196 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3197 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3198 #endif
3199 )
3200 goto retry_gate;
3201 break;
3202 }
3203 }
3204
3205 if (reg->dmax == 0) {
3206 *low = p;
3207 if (low_prev) {
3208 if (*low > s)
3209 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
3210 else
3211 *low_prev = onigenc_get_prev_char_head(reg->enc,
3212 (pprev ? pprev : str), p);
3213 }
3214 }
3215 else {
3216 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3217 *low = p - reg->dmax;
3218 if (*low > s) {
3219 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
3220 *low, (const UChar** )low_prev);
3221 if (low_prev && IS_NULL(*low_prev))
3222 *low_prev = onigenc_get_prev_char_head(reg->enc,
3223 (pprev ? pprev : s), *low);
3224 }
3225 else {
3226 if (low_prev)
3227 *low_prev = onigenc_get_prev_char_head(reg->enc,
3228 (pprev ? pprev : str), *low);
3229 }
3230 }
3231 }
3232 /* no needs to adjust *high, *high is used as range check only */
3233 *high = p - reg->dmin;
3234
3235 #ifdef ONIG_DEBUG_SEARCH
3236 fprintf(stderr,
3237 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
3238 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
3239 #endif
3240 return 1; /* success */
3241 }
3242
3243 return 0; /* fail */
3244 }
3245
3246 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
3247 int** skip));
3248
3249 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
3250
3251 static int
3252 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
3253 UChar* s, const UChar* range, UChar* adjrange,
3254 UChar** low, UChar** high)
3255 {
3256 int r;
3257 UChar *p;
3258
3259 range += reg->dmin;
3260 p = s;
3261
3262 retry:
3263 switch (reg->optimize) {
3264 case ONIG_OPTIMIZE_EXACT:
3265 exact_method:
3266 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
3267 range, adjrange, end, p);
3268 break;
3269
3270 case ONIG_OPTIMIZE_EXACT_IC:
3271 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
3272 reg->exact, reg->exact_end,
3273 range, adjrange, end, p);
3274 break;
3275
3276 case ONIG_OPTIMIZE_EXACT_BM:
3277 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3278 if (IS_NULL(reg->int_map_backward)) {
3279 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
3280 goto exact_method;
3281
3282 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
3283 &(reg->int_map_backward));
3284 if (r) return r;
3285 }
3286 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
3287 end, p);
3288 break;
3289
3290 case ONIG_OPTIMIZE_MAP:
3291 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
3292 break;
3293 }
3294
3295 if (p) {
3296 if (reg->sub_anchor) {
3297 UChar* prev;
3298
3299 switch (reg->sub_anchor) {
3300 case ANCHOR_BEGIN_LINE:
3301 if (!ON_STR_BEGIN(p)) {
3302 prev = onigenc_get_prev_char_head(reg->enc, str, p);
3303 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3304 p = prev;
3305 goto retry;
3306 }
3307 }
3308 break;
3309
3310 case ANCHOR_END_LINE:
3311 if (ON_STR_END(p)) {
3312 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3313 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3314 if (IS_NULL(prev)) goto fail;
3315 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
3316 p = prev;
3317 goto retry;
3318 }
3319 #endif
3320 }
3321 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
3322 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3323 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
3324 #endif
3325 ) {
3326 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
3327 if (IS_NULL(p)) goto fail;
3328 goto retry;
3329 }
3330 break;
3331 }
3332 }
3333
3334 /* no needs to adjust *high, *high is used as range check only */
3335 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3336 *low = p - reg->dmax;
3337 *high = p - reg->dmin;
3338 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
3339 }
3340
3341 #ifdef ONIG_DEBUG_SEARCH
3342 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
3343 (int )(*low - str), (int )(*high - str));
3344 #endif
3345 return 1; /* success */
3346 }
3347
3348 fail:
3349 #ifdef ONIG_DEBUG_SEARCH
3350 fprintf(stderr, "backward_search_range: fail.\n");
3351 #endif
3352 return 0; /* fail */
3353 }
3354
3355
3356 extern int
3357 onig_search(regex_t* reg, const UChar* str, const UChar* end,
3358 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
3359 {
3360 int r;
3361 UChar *s, *prev;
3362 OnigMatchArg msa;
3363 const UChar *orig_start = start;
3364 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3365 const UChar *orig_range = range;
3366 #endif
3367
3368 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
3369 start:
3370 THREAD_ATOMIC_START;
3371 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
3372 ONIG_STATE_INC(reg);
3373 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
3374 onig_chain_reduce(reg);
3375 ONIG_STATE_INC(reg);
3376 }
3377 }
3378 else {
3379 int n;
3380
3381 THREAD_ATOMIC_END;
3382 n = 0;
3383 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
3384 if (++n > THREAD_PASS_LIMIT_COUNT)
3385 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
3386 THREAD_PASS;
3387 }
3388 goto start;
3389 }
3390 THREAD_ATOMIC_END;
3391 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
3392
3393 #ifdef ONIG_DEBUG_SEARCH
3394 fprintf(stderr,
3395 "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
3396 (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
3397 #endif
3398
3399 if (region
3400 #ifdef USE_POSIX_API_REGION_OPTION
3401 && !IS_POSIX_REGION(option)
3402 #endif
3403 ) {
3404 r = onig_region_resize_clear(region, reg->num_mem + 1);
3405 if (r) goto finish_no_msa;
3406 }
3407
3408 if (start > end || start < str) goto mismatch_no_msa;
3409
3410
3411 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3412 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3413 #define MATCH_AND_RETURN_CHECK(upper_range) \
3414 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3415 if (r != ONIG_MISMATCH) {\
3416 if (r >= 0) {\
3417 if (! IS_FIND_LONGEST(reg->options)) {\
3418 goto match;\
3419 }\
3420 }\
3421 else goto finish; /* error */ \
3422 }
3423 #else
3424 #define MATCH_AND_RETURN_CHECK(upper_range) \
3425 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
3426 if (r != ONIG_MISMATCH) {\
3427 if (r >= 0) {\
3428 goto match;\
3429 }\
3430 else goto finish; /* error */ \
3431 }
3432 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3433 #else
3434 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3435 #define MATCH_AND_RETURN_CHECK(none) \
3436 r = match_at(reg, str, end, s, prev, &msa);\
3437 if (r != ONIG_MISMATCH) {\
3438 if (r >= 0) {\
3439 if (! IS_FIND_LONGEST(reg->options)) {\
3440 goto match;\
3441 }\
3442 }\
3443 else goto finish; /* error */ \
3444 }
3445 #else
3446 #define MATCH_AND_RETURN_CHECK(none) \
3447 r = match_at(reg, str, end, s, prev, &msa);\
3448 if (r != ONIG_MISMATCH) {\
3449 if (r >= 0) {\
3450 goto match;\
3451 }\
3452 else goto finish; /* error */ \
3453 }
3454 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
3455 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
3456
3457
3458 /* anchor optimize: resume search range */
3459 if (reg->anchor != 0 && str < end) {
3460 UChar *min_semi_end, *max_semi_end;
3461
3462 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
3463 /* search start-position only */
3464 begin_position:
3465 if (range > start)
3466 range = start + 1;
3467 else
3468 range = start;
3469 }
3470 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
3471 /* search str-position only */
3472 if (range > start) {
3473 if (start != str) goto mismatch_no_msa;
3474 range = str + 1;
3475 }
3476 else {
3477 if (range <= str) {
3478 start = str;
3479 range = str;
3480 }
3481 else
3482 goto mismatch_no_msa;
3483 }
3484 }
3485 else if (reg->anchor & ANCHOR_END_BUF) {
3486 min_semi_end = max_semi_end = (UChar* )end;
3487
3488 end_buf:
3489 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
3490 goto mismatch_no_msa;
3491
3492 if (range > start) {
3493 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
3494 start = min_semi_end - reg->anchor_dmax;
3495 if (start < end)
3496 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
3497 else { /* match with empty at end */
3498 start = onigenc_get_prev_char_head(reg->enc, str, end);
3499 }
3500 }
3501 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
3502 range = max_semi_end - reg->anchor_dmin + 1;
3503 }
3504
3505 if (start >= range) goto mismatch_no_msa;
3506 }
3507 else {
3508 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
3509 range = min_semi_end - reg->anchor_dmax;
3510 }
3511 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
3512 start = max_semi_end - reg->anchor_dmin;
3513 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
3514 }
3515 if (range > start) goto mismatch_no_msa;
3516 }
3517 }
3518 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
3519 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
3520
3521 max_semi_end = (UChar* )end;
3522 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
3523 min_semi_end = pre_end;
3524
3525 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3526 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
3527 if (IS_NOT_NULL(pre_end) &&
3528 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
3529 min_semi_end = pre_end;
3530 }
3531 #endif
3532 if (min_semi_end > str && start <= min_semi_end) {
3533 goto end_buf;
3534 }
3535 }
3536 else {
3537 min_semi_end = (UChar* )end;
3538 goto end_buf;
3539 }
3540 }
3541 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
3542 goto begin_position;
3543 }
3544 }
3545 else if (str == end) { /* empty string */
3546 static const UChar* address_for_empty_string = (UChar* )"";
3547
3548 #ifdef ONIG_DEBUG_SEARCH
3549 fprintf(stderr, "onig_search: empty string.\n");
3550 #endif
3551
3552 if (reg->threshold_len == 0) {
3553 start = end = str = address_for_empty_string;
3554 s = (UChar* )start;
3555 prev = (UChar* )NULL;
3556
3557 MATCH_ARG_INIT(msa, option, region, start);
3558 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3559 msa.state_check_buff = (void* )0;
3560 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
3561 #endif
3562 MATCH_AND_RETURN_CHECK(end);
3563 goto mismatch;
3564 }
3565 goto mismatch_no_msa;
3566 }
3567
3568 #ifdef ONIG_DEBUG_SEARCH
3569 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
3570 (int )(end - str), (int )(start - str), (int )(range - str));
3571 #endif
3572
3573 MATCH_ARG_INIT(msa, option, region, orig_start);
3574 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3575 {
3576 int offset = (MIN(start, range) - str);
3577 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3578 }
3579 #endif
3580
3581 s = (UChar* )start;
3582 if (range > start) { /* forward search */
3583 if (s > str)
3584 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3585 else
3586 prev = (UChar* )NULL;
3587
3588 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3589 UChar *sch_range, *low, *high, *low_prev;
3590
3591 sch_range = (UChar* )range;
3592 if (reg->dmax != 0) {
3593 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3594 sch_range = (UChar* )end;
3595 else {
3596 sch_range += reg->dmax;
3597 if (sch_range > end) sch_range = (UChar* )end;
3598 }
3599 }
3600
3601 if ((end - start) < reg->threshold_len)
3602 goto mismatch;
3603
3604 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
3605 do {
3606 if (! forward_search_range(reg, str, end, s, sch_range,
3607 &low, &high, &low_prev)) goto mismatch;
3608 if (s < low) {
3609 s = low;
3610 prev = low_prev;
3611 }
3612 while (s <= high) {
3613 MATCH_AND_RETURN_CHECK(orig_range);
3614 prev = s;
3615 s += enclen(reg->enc, s);
3616 }
3617 } while (s < range);
3618 goto mismatch;
3619 }
3620 else { /* check only. */
3621 if (! forward_search_range(reg, str, end, s, sch_range,
3622 &low, &high, (UChar** )NULL)) goto mismatch;
3623
3624 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
3625 do {
3626 MATCH_AND_RETURN_CHECK(orig_range);
3627 prev = s;
3628 s += enclen(reg->enc, s);
3629
3630 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
3631 prev = s;
3632 s += enclen(reg->enc, s);
3633 }
3634 } while (s < range);
3635 goto mismatch;
3636 }
3637 }
3638 }
3639
3640 do {
3641 MATCH_AND_RETURN_CHECK(orig_range);
3642 prev = s;
3643 s += enclen(reg->enc, s);
3644 } while (s < range);
3645
3646 if (s == range) { /* because empty match with /$/. */
3647 MATCH_AND_RETURN_CHECK(orig_range);
3648 }
3649 }
3650 else { /* backward search */
3651 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3652 if (orig_start < end)
3653 orig_start += enclen(reg->enc, orig_start); /* is upper range */
3654 #endif
3655
3656 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
3657 UChar *low, *high, *adjrange, *sch_start;
3658
3659 if (range < end)
3660 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
3661 else
3662 adjrange = (UChar* )end;
3663
3664 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
3665 (end - range) >= reg->threshold_len) {
3666 do {
3667 sch_start = s + reg->dmax;
3668 if (sch_start > end) sch_start = (UChar* )end;
3669 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3670 &low, &high) <= 0)
3671 goto mismatch;
3672
3673 if (s > high)
3674 s = high;
3675
3676 while (s >= low) {
3677 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3678 MATCH_AND_RETURN_CHECK(orig_start);
3679 s = prev;
3680 }
3681 } while (s >= range);
3682 goto mismatch;
3683 }
3684 else { /* check only. */
3685 if ((end - range) < reg->threshold_len) goto mismatch;
3686
3687 sch_start = s;
3688 if (reg->dmax != 0) {
3689 if (reg->dmax == ONIG_INFINITE_DISTANCE)
3690 sch_start = (UChar* )end;
3691 else {
3692 sch_start += reg->dmax;
3693 if (sch_start > end) sch_start = (UChar* )end;
3694 else
3695 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
3696 start, sch_start);
3697 }
3698 }
3699 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
3700 &low, &high) <= 0) goto mismatch;
3701 }
3702 }
3703
3704 do {
3705 prev = onigenc_get_prev_char_head(reg->enc, str, s);
3706 MATCH_AND_RETURN_CHECK(orig_start);
3707 s = prev;
3708 } while (s >= range);
3709 }
3710
3711 mismatch:
3712 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
3713 if (IS_FIND_LONGEST(reg->options)) {
3714 if (msa.best_len >= 0) {
3715 s = msa.best_s;
3716 goto match;
3717 }
3718 }
3719 #endif
3720 r = ONIG_MISMATCH;
3721
3722 finish:
3723 MATCH_ARG_FREE(msa);
3724 ONIG_STATE_DEC_THREAD(reg);
3725
3726 /* If result is mismatch and no FIND_NOT_EMPTY option,
3727 then the region is not setted in match_at(). */
3728 if (IS_FIND_NOT_EMPTY(reg->options) && region
3729 #ifdef USE_POSIX_API_REGION_OPTION
3730 && !IS_POSIX_REGION(option)
3731 #endif
3732 ) {
3733 onig_region_clear(region);
3734 }
3735
3736 #ifdef ONIG_DEBUG
3737 if (r != ONIG_MISMATCH)
3738 fprintf(stderr, "onig_search: error %d\n", r);
3739 #endif
3740 return r;
3741
3742 mismatch_no_msa:
3743 r = ONIG_MISMATCH;
3744 finish_no_msa:
3745 ONIG_STATE_DEC_THREAD(reg);
3746 #ifdef ONIG_DEBUG
3747 if (r != ONIG_MISMATCH)
3748 fprintf(stderr, "onig_search: error %d\n", r);
3749 #endif
3750 return r;
3751
3752 match:
3753 ONIG_STATE_DEC_THREAD(reg);
3754 MATCH_ARG_FREE(msa);
3755 return (int)(s - str);
3756 }
3757
3758 extern OnigEncoding
3759 onig_get_encoding(regex_t* reg)
3760 {
3761 return reg->enc;
3762 }
3763
3764 extern OnigOptionType
3765 onig_get_options(regex_t* reg)
3766 {
3767 return reg->options;
3768 }
3769
3770 extern OnigCaseFoldType
3771 onig_get_case_fold_flag(regex_t* reg)
3772 {
3773 return reg->case_fold_flag;
3774 }
3775
3776 extern OnigSyntaxType*
3777 onig_get_syntax(regex_t* reg)
3778 {
3779 return reg->syntax;
3780 }
3781
3782 extern int
3783 onig_number_of_captures(regex_t* reg)
3784 {
3785 return reg->num_mem;
3786 }
3787
3788 extern int
3789 onig_number_of_capture_histories(regex_t* reg)
3790 {
3791 #ifdef USE_CAPTURE_HISTORY
3792 int i, n;
3793
3794 n = 0;
3795 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
3796 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
3797 n++;
3798 }
3799 return n;
3800 #else
3801 return 0;
3802 #endif
3803 }
3804
3805 extern void
3806 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
3807 {
3808 *to = *from;
3809 }
3810