]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regexec.c
1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include "regint.h"
30
31 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
32 ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
33
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39
40 #define CHECK_INTERRUPT_IN_MATCH
41
42 #ifdef USE_CALLOUT
43 typedef struct {
44 int last_match_at_call_counter;
45 struct {
46 OnigType type;
47 OnigValue val;
48 } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
49 } CalloutData;
50 #endif
51
52 struct OnigMatchParamStruct {
53 unsigned int match_stack_limit;
54 unsigned long retry_limit_in_match;
55 #ifdef USE_CALLOUT
56 OnigCalloutFunc progress_callout_of_contents;
57 OnigCalloutFunc retraction_callout_of_contents;
58 int match_at_call_counter;
59 void* callout_user_data;
60 CalloutData* callout_data;
61 int callout_data_alloc_num;
62 #endif
63 };
64
65 extern int
66 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
67 unsigned int limit)
68 {
69 param->match_stack_limit = limit;
70 return ONIG_NORMAL;
71 }
72
73 extern int
74 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
75 unsigned long limit)
76 {
77 param->retry_limit_in_match = limit;
78 return ONIG_NORMAL;
79 }
80
81 extern int
82 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
83 {
84 #ifdef USE_CALLOUT
85 param->progress_callout_of_contents = f;
86 return ONIG_NORMAL;
87 #else
88 return ONIG_NO_SUPPORT_CONFIG;
89 #endif
90 }
91
92 extern int
93 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
94 {
95 #ifdef USE_CALLOUT
96 param->retraction_callout_of_contents = f;
97 return ONIG_NORMAL;
98 #else
99 return ONIG_NO_SUPPORT_CONFIG;
100 #endif
101 }
102
103 extern int
104 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
105 {
106 #ifdef USE_CALLOUT
107 param->callout_user_data = user_data;
108 return ONIG_NORMAL;
109 #else
110 return ONIG_NO_SUPPORT_CONFIG;
111 #endif
112 }
113
114
115 typedef struct {
116 void* stack_p;
117 int stack_n;
118 OnigOptionType options;
119 OnigRegion* region;
120 int ptr_num;
121 const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
122 unsigned int match_stack_limit;
123 unsigned long retry_limit_in_match;
124 OnigMatchParam* mp;
125 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
126 int best_len; /* for ONIG_OPTION_FIND_LONGEST */
127 UChar* best_s;
128 #endif
129 } MatchArg;
130
131
132 #ifdef ONIG_DEBUG
133
134 /* arguments type */
135 typedef enum {
136 ARG_SPECIAL = -1,
137 ARG_NON = 0,
138 ARG_RELADDR = 1,
139 ARG_ABSADDR = 2,
140 ARG_LENGTH = 3,
141 ARG_MEMNUM = 4,
142 ARG_OPTION = 5,
143 ARG_MODE = 6
144 } OpArgType;
145
146 typedef struct {
147 short int opcode;
148 char* name;
149 } OpInfoType;
150
151 static OpInfoType OpInfo[] = {
152 { OP_FINISH, "finish" },
153 { OP_END, "end" },
154 { OP_EXACT1, "exact1" },
155 { OP_EXACT2, "exact2" },
156 { OP_EXACT3, "exact3" },
157 { OP_EXACT4, "exact4" },
158 { OP_EXACT5, "exact5" },
159 { OP_EXACTN, "exactn" },
160 { OP_EXACTMB2N1, "exactmb2-n1" },
161 { OP_EXACTMB2N2, "exactmb2-n2" },
162 { OP_EXACTMB2N3, "exactmb2-n3" },
163 { OP_EXACTMB2N, "exactmb2-n" },
164 { OP_EXACTMB3N, "exactmb3n" },
165 { OP_EXACTMBN, "exactmbn" },
166 { OP_EXACT1_IC, "exact1-ic" },
167 { OP_EXACTN_IC, "exactn-ic" },
168 { OP_CCLASS, "cclass" },
169 { OP_CCLASS_MB, "cclass-mb" },
170 { OP_CCLASS_MIX, "cclass-mix" },
171 { OP_CCLASS_NOT, "cclass-not" },
172 { OP_CCLASS_MB_NOT, "cclass-mb-not" },
173 { OP_CCLASS_MIX_NOT, "cclass-mix-not" },
174 { OP_ANYCHAR, "anychar" },
175 { OP_ANYCHAR_ML, "anychar-ml" },
176 { OP_ANYCHAR_STAR, "anychar*" },
177 { OP_ANYCHAR_ML_STAR, "anychar-ml*" },
178 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next" },
179 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next" },
180 { OP_WORD, "word" },
181 { OP_WORD_ASCII, "word-ascii" },
182 { OP_NO_WORD, "not-word" },
183 { OP_NO_WORD_ASCII, "not-word-ascii" },
184 { OP_WORD_BOUNDARY, "word-boundary" },
185 { OP_NO_WORD_BOUNDARY, "not-word-boundary" },
186 { OP_WORD_BEGIN, "word-begin" },
187 { OP_WORD_END, "word-end" },
188 { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary" },
189 { OP_BEGIN_BUF, "begin-buf" },
190 { OP_END_BUF, "end-buf" },
191 { OP_BEGIN_LINE, "begin-line" },
192 { OP_END_LINE, "end-line" },
193 { OP_SEMI_END_BUF, "semi-end-buf" },
194 { OP_BEGIN_POSITION, "begin-position" },
195 { OP_BACKREF1, "backref1" },
196 { OP_BACKREF2, "backref2" },
197 { OP_BACKREF_N, "backref-n" },
198 { OP_BACKREF_N_IC, "backref-n-ic" },
199 { OP_BACKREF_MULTI, "backref_multi" },
200 { OP_BACKREF_MULTI_IC, "backref_multi-ic" },
201 { OP_BACKREF_WITH_LEVEL, "backref_with_level" },
202 { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c" },
203 { OP_BACKREF_CHECK, "backref_check" },
204 { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level" },
205 { OP_MEMORY_START_PUSH, "mem-start-push" },
206 { OP_MEMORY_START, "mem-start" },
207 { OP_MEMORY_END_PUSH, "mem-end-push" },
208 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec" },
209 { OP_MEMORY_END, "mem-end" },
210 { OP_MEMORY_END_REC, "mem-end-rec" },
211 { OP_FAIL, "fail" },
212 { OP_JUMP, "jump" },
213 { OP_PUSH, "push" },
214 { OP_PUSH_SUPER, "push-super" },
215 { OP_POP_OUT, "pop-out" },
216 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
217 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1" },
218 #endif
219 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next" },
220 { OP_REPEAT, "repeat" },
221 { OP_REPEAT_NG, "repeat-ng" },
222 { OP_REPEAT_INC, "repeat-inc" },
223 { OP_REPEAT_INC_NG, "repeat-inc-ng" },
224 { OP_REPEAT_INC_SG, "repeat-inc-sg" },
225 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg" },
226 { OP_EMPTY_CHECK_START, "empty-check-start" },
227 { OP_EMPTY_CHECK_END, "empty-check-end" },
228 { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst" },
229 { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push" },
230 { OP_PREC_READ_START, "push-pos" },
231 { OP_PREC_READ_END, "pop-pos" },
232 { OP_PREC_READ_NOT_START, "prec-read-not-start" },
233 { OP_PREC_READ_NOT_END, "prec-read-not-end" },
234 { OP_ATOMIC_START, "atomic-start" },
235 { OP_ATOMIC_END, "atomic-end" },
236 { OP_LOOK_BEHIND, "look-behind" },
237 { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start" },
238 { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end" },
239 { OP_CALL, "call" },
240 { OP_RETURN, "return" },
241 { OP_PUSH_SAVE_VAL, "push-save-val" },
242 { OP_UPDATE_VAR, "update-var" },
243 #ifdef USE_CALLOUT
244 { OP_CALLOUT_CONTENTS, "callout-contents" },
245 { OP_CALLOUT_NAME, "callout-name" },
246 #endif
247 { -1, "" }
248 };
249
250 static char*
251 op2name(int opcode)
252 {
253 int i;
254
255 for (i = 0; OpInfo[i].opcode >= 0; i++) {
256 if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
257 }
258
259 return "";
260 }
261
262 static void
263 p_string(FILE* f, int len, UChar* s)
264 {
265 fputs(":", f);
266 while (len-- > 0) { fputc(*s++, f); }
267 }
268
269 static void
270 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
271 {
272 int x = len * mb_len;
273
274 fprintf(f, ":%d:", len);
275 while (x-- > 0) { fputc(*s++, f); }
276 }
277
278 static void
279 p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
280 {
281 RelAddrType curr = (RelAddrType )(p - start);
282
283 fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
284 }
285
286 static int
287 bitset_on_num(BitSetRef bs)
288 {
289 int i, n;
290
291 n = 0;
292 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
293 if (BITSET_AT(bs, i)) n++;
294 }
295
296 return n;
297 }
298
299 static void
300 print_compiled_byte_code(FILE* f, regex_t* reg, int index,
301 Operation* start, OnigEncoding enc)
302 {
303 int i, n;
304 RelAddrType addr;
305 LengthType len;
306 MemNumType mem;
307 OnigCodePoint code;
308 ModeType mode;
309 UChar *q;
310 Operation* p;
311 enum OpCode opcode;
312
313 p = reg->ops + index;
314
315 #ifdef USE_DIRECT_THREADED_CODE
316 opcode = reg->ocs[index];
317 #else
318 opcode = p->opcode;
319 #endif
320
321 fprintf(f, "%s", op2name(opcode));
322 switch (opcode) {
323 case OP_EXACT1:
324 p_string(f, 1, p->exact.s); break;
325 case OP_EXACT2:
326 p_string(f, 2, p->exact.s); break;
327 case OP_EXACT3:
328 p_string(f, 3, p->exact.s); break;
329 case OP_EXACT4:
330 p_string(f, 4, p->exact.s); break;
331 case OP_EXACT5:
332 p_string(f, 5, p->exact.s); break;
333 case OP_EXACTN:
334 len = p->exact_n.n;
335 p_string(f, len, p->exact_n.s); break;
336 case OP_EXACTMB2N1:
337 p_string(f, 2, p->exact.s); break;
338 case OP_EXACTMB2N2:
339 p_string(f, 4, p->exact.s); break;
340 case OP_EXACTMB2N3:
341 p_string(f, 3, p->exact.s); break;
342 case OP_EXACTMB2N:
343 len = p->exact_n.n;
344 p_len_string(f, len, 2, p->exact_n.s); break;
345 case OP_EXACTMB3N:
346 len = p->exact_n.n;
347 p_len_string(f, len, 3, p->exact_n.s); break;
348 case OP_EXACTMBN:
349 {
350 int mb_len;
351
352 mb_len = p->exact_len_n.len;
353 len = p->exact_len_n.n;
354 q = p->exact_len_n.s;
355 fprintf(f, ":%d:%d:", mb_len, len);
356 n = len * mb_len;
357 while (n-- > 0) { fputc(*q++, f); }
358 }
359 break;
360 case OP_EXACT1_IC:
361 len = enclen(enc, p->exact.s);
362 p_string(f, len, p->exact.s);
363 break;
364 case OP_EXACTN_IC:
365 len = p->exact_n.n;
366 p_len_string(f, len, 1, p->exact_n.s);
367 break;
368
369 case OP_CCLASS:
370 case OP_CCLASS_NOT:
371 n = bitset_on_num(p->cclass.bsp);
372 fprintf(f, ":%d", n);
373 break;
374 case OP_CCLASS_MB:
375 case OP_CCLASS_MB_NOT:
376 {
377 OnigCodePoint ncode;
378 OnigCodePoint* codes;
379
380 codes = (OnigCodePoint* )p->cclass_mb.mb;
381 GET_CODE_POINT(ncode, codes);
382 codes++;
383 GET_CODE_POINT(code, codes);
384 fprintf(f, ":%u:%u", code, ncode);
385 }
386 break;
387 case OP_CCLASS_MIX:
388 case OP_CCLASS_MIX_NOT:
389 {
390 OnigCodePoint ncode;
391 OnigCodePoint* codes;
392
393 codes = (OnigCodePoint* )p->cclass_mix.mb;
394 n = bitset_on_num(p->cclass_mix.bsp);
395
396 GET_CODE_POINT(ncode, codes);
397 codes++;
398 GET_CODE_POINT(code, codes);
399 fprintf(f, ":%d:%u:%u", n, code, ncode);
400 }
401 break;
402
403 case OP_ANYCHAR_STAR_PEEK_NEXT:
404 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
405 p_string(f, 1, &(p->anychar_star_peek_next.c));
406 break;
407
408 case OP_WORD_BOUNDARY:
409 case OP_NO_WORD_BOUNDARY:
410 case OP_WORD_BEGIN:
411 case OP_WORD_END:
412 mode = p->word_boundary.mode;
413 fprintf(f, ":%d", mode);
414 break;
415
416 case OP_BACKREF_N:
417 case OP_BACKREF_N_IC:
418 mem = p->backref_n.n1;
419 fprintf(f, ":%d", mem);
420 break;
421 case OP_BACKREF_MULTI_IC:
422 case OP_BACKREF_MULTI:
423 case OP_BACKREF_CHECK:
424 fputs(" ", f);
425 n = p->backref_general.num;
426 for (i = 0; i < n; i++) {
427 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
428 if (i > 0) fputs(", ", f);
429 fprintf(f, "%d", mem);
430 }
431 break;
432 case OP_BACKREF_WITH_LEVEL:
433 case OP_BACKREF_WITH_LEVEL_IC:
434 case OP_BACKREF_CHECK_WITH_LEVEL:
435 {
436 LengthType level;
437
438 level = p->backref_general.nest_level;
439 fprintf(f, ":%d", level);
440 fputs(" ", f);
441 n = p->backref_general.num;
442 for (i = 0; i < n; i++) {
443 mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
444 if (i > 0) fputs(", ", f);
445 fprintf(f, "%d", mem);
446 }
447 }
448 break;
449
450 case OP_MEMORY_START:
451 case OP_MEMORY_START_PUSH:
452 mem = p->memory_start.num;
453 fprintf(f, ":%d", mem);
454 break;
455 case OP_MEMORY_END_PUSH:
456 case OP_MEMORY_END_PUSH_REC:
457 case OP_MEMORY_END:
458 case OP_MEMORY_END_REC:
459 mem = p->memory_end.num;
460 fprintf(f, ":%d", mem);
461 break;
462
463 case OP_JUMP:
464 addr = p->jump.addr;
465 fputc(':', f);
466 p_rel_addr(f, addr, p, start);
467 break;
468
469 case OP_PUSH:
470 case OP_PUSH_SUPER:
471 addr = p->push.addr;
472 fputc(':', f);
473 p_rel_addr(f, addr, p, start);
474 break;
475
476 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
477 case OP_PUSH_OR_JUMP_EXACT1:
478 addr = p->push_or_jump_exact1.addr;
479 fputc(':', f);
480 p_rel_addr(f, addr, p, start);
481 p_string(f, 1, &(p->push_or_jump_exact1.c));
482 break;
483 #endif
484
485 case OP_PUSH_IF_PEEK_NEXT:
486 addr = p->push_if_peek_next.addr;
487 fputc(':', f);
488 p_rel_addr(f, addr, p, start);
489 p_string(f, 1, &(p->push_if_peek_next.c));
490 break;
491
492 case OP_REPEAT:
493 case OP_REPEAT_NG:
494 mem = p->repeat.id;
495 addr = p->repeat.addr;
496 fprintf(f, ":%d:", mem);
497 p_rel_addr(f, addr, p, start);
498 break;
499
500 case OP_REPEAT_INC:
501 case OP_REPEAT_INC_NG:
502 case OP_REPEAT_INC_SG:
503 case OP_REPEAT_INC_NG_SG:
504 mem = p->repeat.id;
505 fprintf(f, ":%d", mem);
506 break;
507
508 case OP_EMPTY_CHECK_START:
509 mem = p->empty_check_start.mem;
510 fprintf(f, ":%d", mem);
511 break;
512 case OP_EMPTY_CHECK_END:
513 case OP_EMPTY_CHECK_END_MEMST:
514 case OP_EMPTY_CHECK_END_MEMST_PUSH:
515 mem = p->empty_check_end.mem;
516 fprintf(f, ":%d", mem);
517 break;
518
519 case OP_PREC_READ_NOT_START:
520 addr = p->prec_read_not_start.addr;
521 fputc(':', f);
522 p_rel_addr(f, addr, p, start);
523 break;
524
525 case OP_LOOK_BEHIND:
526 len = p->look_behind.len;
527 fprintf(f, ":%d", len);
528 break;
529
530 case OP_LOOK_BEHIND_NOT_START:
531 addr = p->look_behind_not_start.addr;
532 len = p->look_behind_not_start.len;
533 fprintf(f, ":%d:", len);
534 p_rel_addr(f, addr, p, start);
535 break;
536
537 case OP_CALL:
538 addr = p->call.addr;
539 fprintf(f, ":{/%d}", addr);
540 break;
541
542 case OP_PUSH_SAVE_VAL:
543 {
544 SaveType type;
545
546 type = p->push_save_val.type;
547 mem = p->push_save_val.id;
548 fprintf(f, ":%d:%d", type, mem);
549 }
550 break;
551
552 case OP_UPDATE_VAR:
553 {
554 UpdateVarType type;
555
556 type = p->update_var.type;
557 mem = p->update_var.id;
558 fprintf(f, ":%d:%d", type, mem);
559 }
560 break;
561
562 #ifdef USE_CALLOUT
563 case OP_CALLOUT_CONTENTS:
564 mem = p->callout_contents.num;
565 fprintf(f, ":%d", mem);
566 break;
567
568 case OP_CALLOUT_NAME:
569 {
570 int id;
571
572 id = p->callout_name.id;
573 mem = p->callout_name.num;
574 fprintf(f, ":%d:%d", id, mem);
575 }
576 break;
577 #endif
578
579 case OP_TEXT_SEGMENT_BOUNDARY:
580 if (p->text_segment_boundary.not != 0)
581 fprintf(f, ":not");
582 break;
583
584 case OP_FINISH:
585 case OP_END:
586 case OP_ANYCHAR:
587 case OP_ANYCHAR_ML:
588 case OP_ANYCHAR_STAR:
589 case OP_ANYCHAR_ML_STAR:
590 case OP_WORD:
591 case OP_WORD_ASCII:
592 case OP_NO_WORD:
593 case OP_NO_WORD_ASCII:
594 case OP_BEGIN_BUF:
595 case OP_END_BUF:
596 case OP_BEGIN_LINE:
597 case OP_END_LINE:
598 case OP_SEMI_END_BUF:
599 case OP_BEGIN_POSITION:
600 case OP_BACKREF1:
601 case OP_BACKREF2:
602 case OP_FAIL:
603 case OP_POP_OUT:
604 case OP_PREC_READ_START:
605 case OP_PREC_READ_END:
606 case OP_PREC_READ_NOT_END:
607 case OP_ATOMIC_START:
608 case OP_ATOMIC_END:
609 case OP_LOOK_BEHIND_NOT_END:
610 case OP_RETURN:
611 break;
612
613 default:
614 fprintf(stderr, "print_compiled_byte_code: undefined code %d\n", opcode);
615 break;
616 }
617 }
618 #endif /* ONIG_DEBUG */
619
620 #ifdef ONIG_DEBUG_COMPILE
621 extern void
622 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
623 {
624 Operation* bp;
625 Operation* start = reg->ops;
626 Operation* end = reg->ops + reg->ops_used;
627
628 fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
629 reg->bt_mem_start, reg->bt_mem_end);
630 fprintf(f, "code-length: %d\n", reg->ops_used);
631
632 bp = start;
633 while (bp < end) {
634 int pos = bp - start;
635
636 fprintf(f, "%4d: ", pos);
637 print_compiled_byte_code(f, reg, pos, start, reg->enc);
638 fprintf(f, "\n");
639 bp++;
640 }
641 fprintf(f, "\n");
642 }
643 #endif
644
645
646 #ifdef USE_CAPTURE_HISTORY
647 static void history_tree_free(OnigCaptureTreeNode* node);
648
649 static void
650 history_tree_clear(OnigCaptureTreeNode* node)
651 {
652 int i;
653
654 if (IS_NULL(node)) return ;
655
656 for (i = 0; i < node->num_childs; i++) {
657 if (IS_NOT_NULL(node->childs[i])) {
658 history_tree_free(node->childs[i]);
659 }
660 }
661 for (i = 0; i < node->allocated; i++) {
662 node->childs[i] = (OnigCaptureTreeNode* )0;
663 }
664 node->num_childs = 0;
665 node->beg = ONIG_REGION_NOTPOS;
666 node->end = ONIG_REGION_NOTPOS;
667 node->group = -1;
668 }
669
670 static void
671 history_tree_free(OnigCaptureTreeNode* node)
672 {
673 history_tree_clear(node);
674 if (IS_NOT_NULL(node->childs)) xfree(node->childs);
675
676 xfree(node);
677 }
678
679 static void
680 history_root_free(OnigRegion* r)
681 {
682 if (IS_NULL(r->history_root)) return ;
683
684 history_tree_free(r->history_root);
685 r->history_root = (OnigCaptureTreeNode* )0;
686 }
687
688 static OnigCaptureTreeNode*
689 history_node_new(void)
690 {
691 OnigCaptureTreeNode* node;
692
693 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
694 CHECK_NULL_RETURN(node);
695
696 node->childs = (OnigCaptureTreeNode** )0;
697 node->allocated = 0;
698 node->num_childs = 0;
699 node->group = -1;
700 node->beg = ONIG_REGION_NOTPOS;
701 node->end = ONIG_REGION_NOTPOS;
702
703 return node;
704 }
705
706 static int
707 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
708 {
709 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
710
711 if (parent->num_childs >= parent->allocated) {
712 int n, i;
713
714 if (IS_NULL(parent->childs)) {
715 n = HISTORY_TREE_INIT_ALLOC_SIZE;
716 parent->childs =
717 (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
718 }
719 else {
720 n = parent->allocated * 2;
721 parent->childs =
722 (OnigCaptureTreeNode** )xrealloc(parent->childs,
723 sizeof(parent->childs[0]) * n,
724 sizeof(parent->childs[0]) * parent->allocated);
725 }
726 CHECK_NULL_RETURN_MEMERR(parent->childs);
727 for (i = parent->allocated; i < n; i++) {
728 parent->childs[i] = (OnigCaptureTreeNode* )0;
729 }
730 parent->allocated = n;
731 }
732
733 parent->childs[parent->num_childs] = child;
734 parent->num_childs++;
735 return 0;
736 }
737
738 static OnigCaptureTreeNode*
739 history_tree_clone(OnigCaptureTreeNode* node)
740 {
741 int i;
742 OnigCaptureTreeNode *clone, *child;
743
744 clone = history_node_new();
745 CHECK_NULL_RETURN(clone);
746
747 clone->beg = node->beg;
748 clone->end = node->end;
749 for (i = 0; i < node->num_childs; i++) {
750 child = history_tree_clone(node->childs[i]);
751 if (IS_NULL(child)) {
752 history_tree_free(clone);
753 return (OnigCaptureTreeNode* )0;
754 }
755 history_tree_add_child(clone, child);
756 }
757
758 return clone;
759 }
760
761 extern OnigCaptureTreeNode*
762 onig_get_capture_tree(OnigRegion* region)
763 {
764 return region->history_root;
765 }
766 #endif /* USE_CAPTURE_HISTORY */
767
768 extern void
769 onig_region_clear(OnigRegion* region)
770 {
771 int i;
772
773 for (i = 0; i < region->num_regs; i++) {
774 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
775 }
776 #ifdef USE_CAPTURE_HISTORY
777 history_root_free(region);
778 #endif
779 }
780
781 extern int
782 onig_region_resize(OnigRegion* region, int n)
783 {
784 region->num_regs = n;
785
786 if (n < ONIG_NREGION)
787 n = ONIG_NREGION;
788
789 if (region->allocated == 0) {
790 region->beg = (int* )xmalloc(n * sizeof(int));
791 region->end = (int* )xmalloc(n * sizeof(int));
792
793 if (region->beg == 0 || region->end == 0)
794 return ONIGERR_MEMORY;
795
796 region->allocated = n;
797 }
798 else if (region->allocated < n) {
799 region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int));
800 region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int));
801
802 if (region->beg == 0 || region->end == 0)
803 return ONIGERR_MEMORY;
804
805 region->allocated = n;
806 }
807
808 return 0;
809 }
810
811 static int
812 onig_region_resize_clear(OnigRegion* region, int n)
813 {
814 int r;
815
816 r = onig_region_resize(region, n);
817 if (r != 0) return r;
818 onig_region_clear(region);
819 return 0;
820 }
821
822 extern int
823 onig_region_set(OnigRegion* region, int at, int beg, int end)
824 {
825 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
826
827 if (at >= region->allocated) {
828 int r = onig_region_resize(region, at + 1);
829 if (r < 0) return r;
830 }
831
832 region->beg[at] = beg;
833 region->end[at] = end;
834 return 0;
835 }
836
837 extern void
838 onig_region_init(OnigRegion* region)
839 {
840 region->num_regs = 0;
841 region->allocated = 0;
842 region->beg = (int* )0;
843 region->end = (int* )0;
844 region->history_root = (OnigCaptureTreeNode* )0;
845 }
846
847 extern OnigRegion*
848 onig_region_new(void)
849 {
850 OnigRegion* r;
851
852 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
853 CHECK_NULL_RETURN(r);
854 onig_region_init(r);
855 return r;
856 }
857
858 extern void
859 onig_region_free(OnigRegion* r, int free_self)
860 {
861 if (r != 0) {
862 if (r->allocated > 0) {
863 if (r->beg) xfree(r->beg);
864 if (r->end) xfree(r->end);
865 r->allocated = 0;
866 }
867 #ifdef USE_CAPTURE_HISTORY
868 history_root_free(r);
869 #endif
870 if (free_self) xfree(r);
871 }
872 }
873
874 extern void
875 onig_region_copy(OnigRegion* to, OnigRegion* from)
876 {
877 #define RREGC_SIZE (sizeof(int) * from->num_regs)
878 int i;
879
880 if (to == from) return;
881
882 if (to->allocated == 0) {
883 if (from->num_regs > 0) {
884 to->beg = (int* )xmalloc(RREGC_SIZE);
885 if (IS_NULL(to->beg)) return;
886 to->end = (int* )xmalloc(RREGC_SIZE);
887 if (IS_NULL(to->end)) return;
888 to->allocated = from->num_regs;
889 }
890 }
891 else if (to->allocated < from->num_regs) {
892 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated);
893 if (IS_NULL(to->beg)) return;
894 to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated);
895 if (IS_NULL(to->end)) return;
896 to->allocated = from->num_regs;
897 }
898
899 for (i = 0; i < from->num_regs; i++) {
900 to->beg[i] = from->beg[i];
901 to->end[i] = from->end[i];
902 }
903 to->num_regs = from->num_regs;
904
905 #ifdef USE_CAPTURE_HISTORY
906 history_root_free(to);
907
908 if (IS_NOT_NULL(from->history_root)) {
909 to->history_root = history_tree_clone(from->history_root);
910 }
911 #endif
912 }
913
914 #ifdef USE_CALLOUT
915 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
916 args.in = (ain);\
917 args.name_id = (aname_id);\
918 args.num = anum;\
919 args.regex = reg;\
920 args.string = str;\
921 args.string_end = end;\
922 args.start = sstart;\
923 args.right_range = right_range;\
924 args.current = s;\
925 args.retry_in_match_counter = retry_in_match_counter;\
926 args.msa = msa;\
927 args.stk_base = stk_base;\
928 args.stk = stk;\
929 args.mem_start_stk = mem_start_stk;\
930 args.mem_end_stk = mem_end_stk;\
931 result = (func)(&args, user);\
932 } while (0)
933
934 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
935 int result;\
936 OnigCalloutArgs args;\
937 CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
938 switch (result) {\
939 case ONIG_CALLOUT_FAIL:\
940 case ONIG_CALLOUT_SUCCESS:\
941 break;\
942 default:\
943 if (result > 0) {\
944 result = ONIGERR_INVALID_ARGUMENT;\
945 }\
946 best_len = result;\
947 goto finish;\
948 break;\
949 }\
950 } while(0)
951 #endif
952
953
954 /** stack **/
955 #define INVALID_STACK_INDEX -1
956
957 #define STK_ALT_FLAG 0x0001
958
959 /* stack type */
960 /* used by normal-POP */
961 #define STK_SUPER_ALT STK_ALT_FLAG
962 #define STK_ALT (0x0002 | STK_ALT_FLAG)
963 #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
964 #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
965
966 /* handled by normal-POP */
967 #define STK_MEM_START 0x0010
968 #define STK_MEM_END 0x8030
969 #define STK_REPEAT_INC 0x0050
970 #ifdef USE_CALLOUT
971 #define STK_CALLOUT 0x0070
972 #endif
973
974 /* avoided by normal-POP */
975 #define STK_VOID 0x0000 /* for fill a blank */
976 #define STK_EMPTY_CHECK_START 0x3000
977 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
978 #define STK_MEM_END_MARK 0x8100
979 #define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
980 #define STK_REPEAT 0x0300
981 #define STK_CALL_FRAME 0x0400
982 #define STK_RETURN 0x0500
983 #define STK_SAVE_VAL 0x0600
984 #define STK_PREC_READ_START 0x0700
985 #define STK_PREC_READ_END 0x0800
986
987 /* stack type check mask */
988 #define STK_MASK_POP_USED STK_ALT_FLAG
989 #define STK_MASK_POP_HANDLED 0x0010
990 #define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
991 #define STK_MASK_TO_VOID_TARGET 0x100e
992 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
993
994 typedef intptr_t StackIndex;
995
996 typedef struct _StackType {
997 unsigned int type;
998 int zid;
999 union {
1000 struct {
1001 Operation* pcode; /* byte code position */
1002 UChar* pstr; /* string position */
1003 UChar* pstr_prev; /* previous char position of pstr */
1004 } state;
1005 struct {
1006 int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
1007 Operation* pcode; /* byte code position (head of repeated target) */
1008 } repeat;
1009 struct {
1010 StackIndex si; /* index of stack */
1011 } repeat_inc;
1012 struct {
1013 UChar *pstr; /* start/end position */
1014 /* Following information is set, if this stack type is MEM-START */
1015 StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */
1016 StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
1017 } mem;
1018 struct {
1019 UChar *pstr; /* start position */
1020 } empty_check;
1021 #ifdef USE_CALL
1022 struct {
1023 Operation *ret_addr; /* byte code position */
1024 UChar *pstr; /* string position */
1025 } call_frame;
1026 #endif
1027 struct {
1028 enum SaveType type;
1029 UChar* v;
1030 UChar* v2;
1031 } val;
1032 #ifdef USE_CALLOUT
1033 struct {
1034 int num;
1035 OnigCalloutFunc func;
1036 } callout;
1037 #endif
1038 } u;
1039 } StackType;
1040
1041 #ifdef USE_CALLOUT
1042
1043 struct OnigCalloutArgsStruct {
1044 OnigCalloutIn in;
1045 int name_id; /* name id or ONIG_NON_NAME_ID */
1046 int num;
1047 OnigRegex regex;
1048 const OnigUChar* string;
1049 const OnigUChar* string_end;
1050 const OnigUChar* start;
1051 const OnigUChar* right_range;
1052 const OnigUChar* current; /* current matching position */
1053 unsigned long retry_in_match_counter;
1054
1055 /* invisible to users */
1056 MatchArg* msa;
1057 StackType* stk_base;
1058 StackType* stk;
1059 StackIndex* mem_start_stk;
1060 StackIndex* mem_end_stk;
1061 };
1062
1063 #endif
1064
1065
1066 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1067 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1068 (msa).stack_p = (void* )0;\
1069 (msa).options = (arg_option);\
1070 (msa).region = (arg_region);\
1071 (msa).start = (arg_start);\
1072 (msa).match_stack_limit = (mp)->match_stack_limit;\
1073 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1074 (msa).mp = mp;\
1075 (msa).best_len = ONIG_MISMATCH;\
1076 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1077 } while(0)
1078 #else
1079 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1080 (msa).stack_p = (void* )0;\
1081 (msa).options = (arg_option);\
1082 (msa).region = (arg_region);\
1083 (msa).start = (arg_start);\
1084 (msa).match_stack_limit = (mp)->match_stack_limit;\
1085 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1086 (msa).mp = mp;\
1087 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1088 } while(0)
1089 #endif
1090
1091 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
1092
1093
1094 #define ALLOCA_PTR_NUM_LIMIT 50
1095
1096 #define STACK_INIT(stack_num) do {\
1097 if (msa->stack_p) {\
1098 is_alloca = 0;\
1099 alloc_base = msa->stack_p;\
1100 stk_base = (StackType* )(alloc_base\
1101 + (sizeof(StackIndex) * msa->ptr_num));\
1102 stk = stk_base;\
1103 stk_end = stk_base + msa->stack_n;\
1104 }\
1105 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1106 is_alloca = 0;\
1107 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1108 + sizeof(StackType) * (stack_num));\
1109 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1110 stk_base = (StackType* )(alloc_base\
1111 + (sizeof(StackIndex) * msa->ptr_num));\
1112 stk = stk_base;\
1113 stk_end = stk_base + (stack_num);\
1114 }\
1115 else {\
1116 is_alloca = 1;\
1117 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1118 + sizeof(StackType) * (stack_num));\
1119 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1120 stk_base = (StackType* )(alloc_base\
1121 + (sizeof(StackIndex) * msa->ptr_num));\
1122 stk = stk_base;\
1123 stk_end = stk_base + (stack_num);\
1124 }\
1125 } while(0);
1126
1127
1128 #define STACK_SAVE do{\
1129 msa->stack_n = (int )(stk_end - stk_base);\
1130 if (is_alloca != 0) {\
1131 size_t size = sizeof(StackIndex) * msa->ptr_num \
1132 + sizeof(StackType) * msa->stack_n;\
1133 msa->stack_p = xmalloc(size);\
1134 CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
1135 xmemcpy(msa->stack_p, alloc_base, size);\
1136 }\
1137 else {\
1138 msa->stack_p = alloc_base;\
1139 };\
1140 } while(0)
1141
1142 #define UPDATE_FOR_STACK_REALLOC do{\
1143 repeat_stk = (StackIndex* )alloc_base;\
1144 mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1145 mem_end_stk = mem_start_stk + num_mem + 1;\
1146 } while(0)
1147
1148 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1149
1150 extern unsigned int
1151 onig_get_match_stack_limit_size(void)
1152 {
1153 return MatchStackLimit;
1154 }
1155
1156 extern int
1157 onig_set_match_stack_limit_size(unsigned int size)
1158 {
1159 MatchStackLimit = size;
1160 return 0;
1161 }
1162
1163 #ifdef USE_RETRY_LIMIT_IN_MATCH
1164
1165 static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
1166
1167 #define CHECK_RETRY_LIMIT_IN_MATCH do {\
1168 if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
1169 } while (0)
1170
1171 #else
1172
1173 #define CHECK_RETRY_LIMIT_IN_MATCH
1174
1175 #endif /* USE_RETRY_LIMIT_IN_MATCH */
1176
1177 extern unsigned long
1178 onig_get_retry_limit_in_match(void)
1179 {
1180 #ifdef USE_RETRY_LIMIT_IN_MATCH
1181 return RetryLimitInMatch;
1182 #else
1183 /* return ONIG_NO_SUPPORT_CONFIG; */
1184 return 0;
1185 #endif
1186 }
1187
1188 extern int
1189 onig_set_retry_limit_in_match(unsigned long size)
1190 {
1191 #ifdef USE_RETRY_LIMIT_IN_MATCH
1192 RetryLimitInMatch = size;
1193 return 0;
1194 #else
1195 return ONIG_NO_SUPPORT_CONFIG;
1196 #endif
1197 }
1198
1199 #ifdef USE_CALLOUT
1200 static OnigCalloutFunc DefaultProgressCallout;
1201 static OnigCalloutFunc DefaultRetractionCallout;
1202 #endif
1203
1204 extern OnigMatchParam*
1205 onig_new_match_param(void)
1206 {
1207 OnigMatchParam* p;
1208
1209 p = (OnigMatchParam* )xmalloc(sizeof(*p));
1210 if (IS_NOT_NULL(p)) {
1211 onig_initialize_match_param(p);
1212 }
1213
1214 return p;
1215 }
1216
1217 extern void
1218 onig_free_match_param_content(OnigMatchParam* p)
1219 {
1220 #ifdef USE_CALLOUT
1221 if (IS_NOT_NULL(p->callout_data)) {
1222 xfree(p->callout_data);
1223 p->callout_data = 0;
1224 }
1225 #endif
1226 }
1227
1228 extern void
1229 onig_free_match_param(OnigMatchParam* p)
1230 {
1231 if (IS_NOT_NULL(p)) {
1232 onig_free_match_param_content(p);
1233 xfree(p);
1234 }
1235 }
1236
1237 extern int
1238 onig_initialize_match_param(OnigMatchParam* mp)
1239 {
1240 mp->match_stack_limit = MatchStackLimit;
1241 #ifdef USE_RETRY_LIMIT_IN_MATCH
1242 mp->retry_limit_in_match = RetryLimitInMatch;
1243 #endif
1244
1245 #ifdef USE_CALLOUT
1246 mp->progress_callout_of_contents = DefaultProgressCallout;
1247 mp->retraction_callout_of_contents = DefaultRetractionCallout;
1248 mp->match_at_call_counter = 0;
1249 mp->callout_user_data = 0;
1250 mp->callout_data = 0;
1251 mp->callout_data_alloc_num = 0;
1252 #endif
1253
1254 return ONIG_NORMAL;
1255 }
1256
1257 #ifdef USE_CALLOUT
1258
1259 static int
1260 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1261 {
1262 RegexExt* ext = reg->extp;
1263
1264 mp->match_at_call_counter = 0;
1265
1266 if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1267
1268 if (ext->callout_num > mp->callout_data_alloc_num) {
1269 CalloutData* d;
1270 size_t n = ext->callout_num * sizeof(*d);
1271 if (IS_NOT_NULL(mp->callout_data))
1272 d = (CalloutData* )xrealloc(mp->callout_data, n, mp->callout_data_alloc_num * sizeof(*d));
1273 else
1274 d = (CalloutData* )xmalloc(n);
1275 CHECK_NULL_RETURN_MEMERR(d);
1276
1277 mp->callout_data = d;
1278 mp->callout_data_alloc_num = ext->callout_num;
1279 }
1280
1281 xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1282 return ONIG_NORMAL;
1283 }
1284
1285 #define ADJUST_MATCH_PARAM(reg, mp) \
1286 r = adjust_match_param(reg, mp);\
1287 if (r != ONIG_NORMAL) return r;
1288
1289 #define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
1290
1291 extern int
1292 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1293 {
1294 OnigMatchParam* mp;
1295 int num;
1296 CalloutData* d;
1297
1298 mp = args->msa->mp;
1299 num = args->num;
1300
1301 d = CALLOUT_DATA_AT_NUM(mp, num);
1302 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1303 xmemset(d, 0, sizeof(*d));
1304 d->last_match_at_call_counter = mp->match_at_call_counter;
1305 return d->last_match_at_call_counter;
1306 }
1307
1308 return 0;
1309 }
1310
1311 extern int
1312 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1313 int callout_num, int slot,
1314 OnigType* type, OnigValue* val)
1315 {
1316 OnigType t;
1317 CalloutData* d;
1318
1319 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1320
1321 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1322 t = d->slot[slot].type;
1323 if (IS_NOT_NULL(type)) *type = t;
1324 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1325 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1326 }
1327
1328 extern int
1329 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
1330 int slot, OnigType* type,
1331 OnigValue* val)
1332 {
1333 return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1334 args->num, slot, type, val);
1335 }
1336
1337 extern int
1338 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1339 int callout_num, int slot,
1340 OnigType* type, OnigValue* val)
1341 {
1342 OnigType t;
1343 CalloutData* d;
1344
1345 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1346
1347 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1348 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1349 xmemset(d, 0, sizeof(*d));
1350 d->last_match_at_call_counter = mp->match_at_call_counter;
1351 }
1352
1353 t = d->slot[slot].type;
1354 if (IS_NOT_NULL(type)) *type = t;
1355 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1356 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1357 }
1358
1359 extern int
1360 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1361 const UChar* tag, const UChar* tag_end, int slot,
1362 OnigType* type, OnigValue* val)
1363 {
1364 int num;
1365
1366 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1367 if (num < 0) return num;
1368 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1369
1370 return onig_get_callout_data(reg, mp, num, slot, type, val);
1371 }
1372
1373 extern int
1374 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1375 int callout_num, int slot,
1376 OnigType* type, OnigValue* val)
1377 {
1378 return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1379 type, val);
1380 }
1381
1382 extern int
1383 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1384 int slot, OnigType* type, OnigValue* val)
1385 {
1386 return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1387 type, val);
1388 }
1389
1390 extern int
1391 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1392 int callout_num, int slot,
1393 OnigType type, OnigValue* val)
1394 {
1395 CalloutData* d;
1396
1397 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1398
1399 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1400 d->slot[slot].type = type;
1401 d->slot[slot].val = *val;
1402 d->last_match_at_call_counter = mp->match_at_call_counter;
1403
1404 return ONIG_NORMAL;
1405 }
1406
1407 extern int
1408 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1409 const UChar* tag, const UChar* tag_end, int slot,
1410 OnigType type, OnigValue* val)
1411 {
1412 int num;
1413
1414 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1415 if (num < 0) return num;
1416 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1417
1418 return onig_set_callout_data(reg, mp, num, slot, type, val);
1419 }
1420
1421 extern int
1422 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1423 int callout_num, int slot,
1424 OnigType type, OnigValue* val)
1425 {
1426 return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1427 type, val);
1428 }
1429
1430 extern int
1431 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1432 int slot, OnigType type, OnigValue* val)
1433 {
1434 return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1435 type, val);
1436 }
1437
1438 #else
1439 #define ADJUST_MATCH_PARAM(reg, mp)
1440 #endif /* USE_CALLOUT */
1441
1442
1443 static int
1444 stack_double(int is_alloca, char** arg_alloc_base,
1445 StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk,
1446 MatchArg* msa)
1447 {
1448 unsigned int n;
1449 int used;
1450 size_t size;
1451 size_t new_size;
1452 char* alloc_base;
1453 char* new_alloc_base;
1454 StackType *stk_base, *stk_end, *stk;
1455
1456 alloc_base = *arg_alloc_base;
1457 stk_base = *arg_stk_base;
1458 stk_end = *arg_stk_end;
1459 stk = *arg_stk;
1460
1461 n = (unsigned int )(stk_end - stk_base);
1462 size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1463 n *= 2;
1464 new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1465 if (is_alloca != 0) {
1466 new_alloc_base = (char* )xmalloc(new_size);
1467 if (IS_NULL(new_alloc_base)) {
1468 STACK_SAVE;
1469 return ONIGERR_MEMORY;
1470 }
1471 xmemcpy(new_alloc_base, alloc_base, size);
1472 }
1473 else {
1474 if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1475 if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)
1476 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1477 else
1478 n = msa->match_stack_limit;
1479 }
1480 new_alloc_base = (char* )xrealloc(alloc_base, new_size, size);
1481 if (IS_NULL(new_alloc_base)) {
1482 STACK_SAVE;
1483 return ONIGERR_MEMORY;
1484 }
1485 }
1486
1487 alloc_base = new_alloc_base;
1488 used = (int )(stk - stk_base);
1489 *arg_alloc_base = alloc_base;
1490 *arg_stk_base = (StackType* )(alloc_base
1491 + (sizeof(StackIndex) * msa->ptr_num));
1492 *arg_stk = *arg_stk_base + used;
1493 *arg_stk_end = *arg_stk_base + n;
1494 return 0;
1495 }
1496
1497 #define STACK_ENSURE(n) do {\
1498 if ((int )(stk_end - stk) < (n)) {\
1499 int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1500 if (r != 0) { STACK_SAVE; return r; } \
1501 is_alloca = 0;\
1502 UPDATE_FOR_STACK_REALLOC;\
1503 }\
1504 } while(0)
1505
1506 #define STACK_AT(index) (stk_base + (index))
1507 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1508
1509 #define STACK_PUSH_TYPE(stack_type) do {\
1510 STACK_ENSURE(1);\
1511 stk->type = (stack_type);\
1512 STACK_INC;\
1513 } while(0)
1514
1515 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1516
1517 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
1518 STACK_ENSURE(1);\
1519 stk->type = (stack_type);\
1520 stk->u.state.pcode = (pat);\
1521 stk->u.state.pstr = (s);\
1522 stk->u.state.pstr_prev = (sprev);\
1523 STACK_INC;\
1524 } while(0)
1525
1526 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1527 stk->type = (stack_type);\
1528 stk->u.state.pcode = (pat);\
1529 STACK_INC;\
1530 } while(0)
1531
1532 #ifdef ONIG_DEBUG_MATCH
1533 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1534 stk->type = (stack_type);\
1535 stk->u.state.pcode = (pat);\
1536 stk->u.state.pstr = s;\
1537 stk->u.state.pstr_prev = sprev;\
1538 STACK_INC;\
1539 } while (0)
1540 #else
1541 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1542 stk->type = (stack_type);\
1543 stk->u.state.pcode = (pat);\
1544 STACK_INC;\
1545 } while (0)
1546 #endif
1547
1548 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
1549 #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
1550 #define STACK_PUSH_PREC_READ_START(s,sprev) \
1551 STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
1552 #define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
1553 STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
1554 #define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
1555 #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
1556 STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
1557
1558 #define STACK_PUSH_REPEAT(sid, pat) do {\
1559 STACK_ENSURE(1);\
1560 stk->type = STK_REPEAT;\
1561 stk->zid = (sid);\
1562 stk->u.repeat.pcode = (pat);\
1563 stk->u.repeat.count = 0;\
1564 STACK_INC;\
1565 } while(0)
1566
1567 #define STACK_PUSH_REPEAT_INC(sindex) do {\
1568 STACK_ENSURE(1);\
1569 stk->type = STK_REPEAT_INC;\
1570 stk->u.repeat_inc.si = (sindex);\
1571 STACK_INC;\
1572 } while(0)
1573
1574 #define STACK_PUSH_MEM_START(mnum, s) do {\
1575 STACK_ENSURE(1);\
1576 stk->type = STK_MEM_START;\
1577 stk->zid = (mnum);\
1578 stk->u.mem.pstr = (s);\
1579 stk->u.mem.prev_start = mem_start_stk[mnum];\
1580 stk->u.mem.prev_end = mem_end_stk[mnum];\
1581 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1582 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1583 STACK_INC;\
1584 } while(0)
1585
1586 #define STACK_PUSH_MEM_END(mnum, s) do {\
1587 STACK_ENSURE(1);\
1588 stk->type = STK_MEM_END;\
1589 stk->zid = (mnum);\
1590 stk->u.mem.pstr = (s);\
1591 stk->u.mem.prev_start = mem_start_stk[mnum];\
1592 stk->u.mem.prev_end = mem_end_stk[mnum];\
1593 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1594 STACK_INC;\
1595 } while(0)
1596
1597 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1598 STACK_ENSURE(1);\
1599 stk->type = STK_MEM_END_MARK;\
1600 stk->zid = (mnum);\
1601 STACK_INC;\
1602 } while(0)
1603
1604 #define STACK_GET_MEM_START(mnum, k) do {\
1605 int level = 0;\
1606 k = stk;\
1607 while (k > stk_base) {\
1608 k--;\
1609 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1610 && k->zid == (mnum)) {\
1611 level++;\
1612 }\
1613 else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1614 if (level == 0) break;\
1615 level--;\
1616 }\
1617 }\
1618 } while(0)
1619
1620 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1621 int level = 0;\
1622 while (k < stk) {\
1623 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1624 if (level == 0) (start) = k->u.mem.pstr;\
1625 level++;\
1626 }\
1627 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1628 level--;\
1629 if (level == 0) {\
1630 (end) = k->u.mem.pstr;\
1631 break;\
1632 }\
1633 }\
1634 k++;\
1635 }\
1636 } while(0)
1637
1638 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1639 STACK_ENSURE(1);\
1640 stk->type = STK_EMPTY_CHECK_START;\
1641 stk->zid = (cnum);\
1642 stk->u.empty_check.pstr = (s);\
1643 STACK_INC;\
1644 } while(0)
1645
1646 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1647 STACK_ENSURE(1);\
1648 stk->type = STK_EMPTY_CHECK_END;\
1649 stk->zid = (cnum);\
1650 STACK_INC;\
1651 } while(0)
1652
1653 #define STACK_PUSH_CALL_FRAME(pat) do {\
1654 STACK_ENSURE(1);\
1655 stk->type = STK_CALL_FRAME;\
1656 stk->u.call_frame.ret_addr = (pat);\
1657 STACK_INC;\
1658 } while(0)
1659
1660 #define STACK_PUSH_RETURN do {\
1661 STACK_ENSURE(1);\
1662 stk->type = STK_RETURN;\
1663 STACK_INC;\
1664 } while(0)
1665
1666 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1667 STACK_ENSURE(1);\
1668 stk->type = STK_SAVE_VAL;\
1669 stk->zid = (sid);\
1670 stk->u.val.type = (stype);\
1671 stk->u.val.v = (UChar* )(sval);\
1672 STACK_INC;\
1673 } while(0)
1674
1675 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1676 STACK_ENSURE(1);\
1677 stk->type = STK_SAVE_VAL;\
1678 stk->zid = (sid);\
1679 stk->u.val.type = (stype);\
1680 stk->u.val.v = (UChar* )(sval);\
1681 stk->u.val.v2 = sprev;\
1682 STACK_INC;\
1683 } while(0)
1684
1685 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1686 StackType *k = stk;\
1687 while (k > stk_base) {\
1688 k--;\
1689 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1690 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1691 (sval) = k->u.val.v;\
1692 break;\
1693 }\
1694 }\
1695 } while (0)
1696
1697 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
1698 int level = 0;\
1699 StackType *k = stk;\
1700 while (k > stk_base) {\
1701 k--;\
1702 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1703 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1704 && k->zid == (sid)) {\
1705 if (level == 0) {\
1706 (sval) = k->u.val.v;\
1707 break;\
1708 }\
1709 }\
1710 else if (k->type == STK_CALL_FRAME)\
1711 level--;\
1712 else if (k->type == STK_RETURN)\
1713 level++;\
1714 }\
1715 } while (0)
1716
1717 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1718 int level = 0;\
1719 StackType *k = stk;\
1720 while (k > stk_base) {\
1721 k--;\
1722 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1723 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1724 && k->zid == (sid)) {\
1725 if (level == 0) {\
1726 (sval) = k->u.val.v;\
1727 sprev = k->u.val.v2;\
1728 break;\
1729 }\
1730 }\
1731 else if (k->type == STK_CALL_FRAME)\
1732 level--;\
1733 else if (k->type == STK_RETURN)\
1734 level++;\
1735 }\
1736 } while (0)
1737
1738 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
1739 int level = 0;\
1740 StackType *k = (stk_from);\
1741 while (k > stk_base) {\
1742 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
1743 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1744 && k->u.val.id == (sid)) {\
1745 if (level == 0) {\
1746 (sval) = k->u.val.v;\
1747 break;\
1748 }\
1749 }\
1750 else if (k->type == STK_CALL_FRAME)\
1751 level--;\
1752 else if (k->type == STK_RETURN)\
1753 level++;\
1754 k--;\
1755 }\
1756 } while (0)
1757
1758 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1759 STACK_ENSURE(1);\
1760 stk->type = STK_CALLOUT;\
1761 stk->zid = ONIG_NON_NAME_ID;\
1762 stk->u.callout.num = (anum);\
1763 stk->u.callout.func = (func);\
1764 STACK_INC;\
1765 } while(0)
1766
1767 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1768 STACK_ENSURE(1);\
1769 stk->type = STK_CALLOUT;\
1770 stk->zid = (aid);\
1771 stk->u.callout.num = (anum);\
1772 stk->u.callout.func = (func);\
1773 STACK_INC;\
1774 } while(0)
1775
1776 #ifdef ONIG_DEBUG
1777 #define STACK_BASE_CHECK(p, at) \
1778 if ((p) < stk_base) {\
1779 fprintf(stderr, "at %s\n", at);\
1780 goto stack_error;\
1781 }
1782 #else
1783 #define STACK_BASE_CHECK(p, at)
1784 #endif
1785
1786 #define STACK_POP_ONE do {\
1787 stk--;\
1788 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1789 } while(0)
1790
1791
1792 #ifdef USE_CALLOUT
1793 #define POP_CALLOUT_CASE \
1794 else if (stk->type == STK_CALLOUT) {\
1795 RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
1796 }
1797 #else
1798 #define POP_CALLOUT_CASE
1799 #endif
1800
1801 #define STACK_POP do {\
1802 switch (pop_level) {\
1803 case STACK_POP_LEVEL_FREE:\
1804 while (1) {\
1805 stk--;\
1806 STACK_BASE_CHECK(stk, "STACK_POP"); \
1807 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1808 }\
1809 break;\
1810 case STACK_POP_LEVEL_MEM_START:\
1811 while (1) {\
1812 stk--;\
1813 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1814 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1815 else if (stk->type == STK_MEM_START) {\
1816 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1817 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1818 }\
1819 }\
1820 break;\
1821 default:\
1822 while (1) {\
1823 stk--;\
1824 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1825 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1826 else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
1827 if (stk->type == STK_MEM_START) {\
1828 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1829 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1830 }\
1831 else if (stk->type == STK_REPEAT_INC) {\
1832 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1833 }\
1834 else if (stk->type == STK_MEM_END) {\
1835 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1836 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1837 }\
1838 POP_CALLOUT_CASE\
1839 }\
1840 }\
1841 break;\
1842 }\
1843 } while(0)
1844
1845 #define POP_TIL_BODY(aname, til_type) do {\
1846 while (1) {\
1847 stk--;\
1848 STACK_BASE_CHECK(stk, (aname));\
1849 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
1850 if (stk->type == (til_type)) break;\
1851 else {\
1852 if (stk->type == STK_MEM_START) {\
1853 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1854 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1855 }\
1856 else if (stk->type == STK_REPEAT_INC) {\
1857 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1858 }\
1859 else if (stk->type == STK_MEM_END) {\
1860 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1861 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1862 }\
1863 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
1864 }\
1865 }\
1866 }\
1867 } while(0)
1868
1869 #define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
1870 POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
1871 } while(0)
1872
1873 #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
1874 POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
1875 } while(0)
1876
1877
1878 #define STACK_EXEC_TO_VOID(k) do {\
1879 k = stk;\
1880 while (1) {\
1881 k--;\
1882 STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
1883 if (IS_TO_VOID_TARGET(k)) {\
1884 if (k->type == STK_TO_VOID_START) {\
1885 k->type = STK_VOID;\
1886 break;\
1887 }\
1888 k->type = STK_VOID;\
1889 }\
1890 }\
1891 } while(0)
1892
1893 #define STACK_GET_PREC_READ_START(k) do {\
1894 int level = 0;\
1895 k = stk;\
1896 while (1) {\
1897 k--;\
1898 STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
1899 if (IS_TO_VOID_TARGET(k)) {\
1900 k->type = STK_VOID;\
1901 }\
1902 else if (k->type == STK_PREC_READ_START) {\
1903 if (level == 0) {\
1904 break;\
1905 }\
1906 level--;\
1907 }\
1908 else if (k->type == STK_PREC_READ_END) {\
1909 level++;\
1910 }\
1911 }\
1912 } while(0)
1913
1914 #define STACK_EMPTY_CHECK(isnull,sid,s) do {\
1915 StackType* k = stk;\
1916 while (1) {\
1917 k--;\
1918 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
1919 if (k->type == STK_EMPTY_CHECK_START) {\
1920 if (k->zid == (sid)) {\
1921 (isnull) = (k->u.empty_check.pstr == (s));\
1922 break;\
1923 }\
1924 }\
1925 }\
1926 } while(0)
1927
1928 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
1929 if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
1930 (addr) = 0;\
1931 }\
1932 else {\
1933 if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
1934 (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
1935 else\
1936 (addr) = (UChar* )k->u.mem.prev_end;\
1937 }\
1938 } while (0)
1939
1940 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
1941 #define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
1942 StackType* k = stk;\
1943 while (1) {\
1944 k--;\
1945 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
1946 if (k->type == STK_EMPTY_CHECK_START) {\
1947 if (k->zid == (sid)) {\
1948 if (k->u.empty_check.pstr != (s)) {\
1949 (isnull) = 0;\
1950 break;\
1951 }\
1952 else {\
1953 UChar* endp;\
1954 int level = 0;\
1955 (isnull) = 1;\
1956 while (k < stk) {\
1957 if (k->type == STK_MEM_START && level == 0) {\
1958 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
1959 if (endp == 0) {\
1960 (isnull) = 0; break;\
1961 }\
1962 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
1963 (isnull) = 0; break;\
1964 }\
1965 else if (endp != s) {\
1966 (isnull) = -1; /* empty, but position changed */ \
1967 }\
1968 }\
1969 else if (k->type == STK_PREC_READ_START) {\
1970 level++;\
1971 }\
1972 else if (k->type == STK_PREC_READ_END) {\
1973 level--;\
1974 }\
1975 k++;\
1976 }\
1977 break;\
1978 }\
1979 }\
1980 }\
1981 }\
1982 } while(0)
1983
1984 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
1985 int level = 0;\
1986 StackType* k = stk;\
1987 while (1) {\
1988 k--;\
1989 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
1990 if (k->type == STK_EMPTY_CHECK_START) {\
1991 if (k->zid == (sid)) {\
1992 if (level == 0) {\
1993 if (k->u.empty_check.pstr != (s)) {\
1994 (isnull) = 0;\
1995 break;\
1996 }\
1997 else {\
1998 UChar* endp;\
1999 int prec_level = 0;\
2000 (isnull) = 1;\
2001 while (k < stk) {\
2002 if (k->type == STK_MEM_START) {\
2003 if (level == 0 && prec_level == 0) {\
2004 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
2005 if (endp == 0) {\
2006 (isnull) = 0; break;\
2007 }\
2008 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
2009 (isnull) = 0; break;\
2010 }\
2011 else if (endp != s) {\
2012 (isnull) = -1; /* empty, but position changed */\
2013 }\
2014 }\
2015 }\
2016 else if (k->type == STK_EMPTY_CHECK_START) {\
2017 if (k->zid == (sid)) level++;\
2018 }\
2019 else if (k->type == STK_EMPTY_CHECK_END) {\
2020 if (k->zid == (sid)) level--;\
2021 }\
2022 else if (k->type == STK_PREC_READ_START) {\
2023 prec_level++;\
2024 }\
2025 else if (k->type == STK_PREC_READ_END) {\
2026 prec_level--;\
2027 }\
2028 k++;\
2029 }\
2030 break;\
2031 }\
2032 }\
2033 else {\
2034 level--;\
2035 }\
2036 }\
2037 }\
2038 else if (k->type == STK_EMPTY_CHECK_END) {\
2039 if (k->zid == (sid)) level++;\
2040 }\
2041 }\
2042 } while(0)
2043 #else
2044 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
2045 int level = 0;\
2046 StackType* k = stk;\
2047 while (1) {\
2048 k--;\
2049 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
2050 if (k->type == STK_EMPTY_CHECK_START) {\
2051 if (k->u.empty_check.num == (id)) {\
2052 if (level == 0) {\
2053 (isnull) = (k->u.empty_check.pstr == (s));\
2054 break;\
2055 }\
2056 }\
2057 level--;\
2058 }\
2059 else if (k->type == STK_EMPTY_CHECK_END) {\
2060 level++;\
2061 }\
2062 }\
2063 } while(0)
2064 #endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
2065
2066 #define STACK_GET_REPEAT(sid, k) do {\
2067 int level = 0;\
2068 k = stk;\
2069 while (1) {\
2070 k--;\
2071 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
2072 if (k->type == STK_REPEAT) {\
2073 if (level == 0) {\
2074 if (k->zid == (sid)) {\
2075 break;\
2076 }\
2077 }\
2078 }\
2079 else if (k->type == STK_CALL_FRAME) level--;\
2080 else if (k->type == STK_RETURN) level++;\
2081 }\
2082 } while(0)
2083
2084 #define STACK_RETURN(addr) do {\
2085 int level = 0;\
2086 StackType* k = stk;\
2087 while (1) {\
2088 k--;\
2089 STACK_BASE_CHECK(k, "STACK_RETURN"); \
2090 if (k->type == STK_CALL_FRAME) {\
2091 if (level == 0) {\
2092 (addr) = k->u.call_frame.ret_addr;\
2093 break;\
2094 }\
2095 else level--;\
2096 }\
2097 else if (k->type == STK_RETURN)\
2098 level++;\
2099 }\
2100 } while(0)
2101
2102
2103 #define STRING_CMP(s1,s2,len) do {\
2104 while (len-- > 0) {\
2105 if (*s1++ != *s2++) goto fail;\
2106 }\
2107 } while(0)
2108
2109 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2110 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2111 goto fail; \
2112 } while(0)
2113
2114 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2115 UChar* s1, UChar** ps2, int mblen)
2116 {
2117 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2118 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2119 UChar *p1, *p2, *end1, *s2, *end2;
2120 int len1, len2;
2121
2122 s2 = *ps2;
2123 end1 = s1 + mblen;
2124 end2 = s2 + mblen;
2125 while (s1 < end1) {
2126 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2127 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2128 if (len1 != len2) return 0;
2129 p1 = buf1;
2130 p2 = buf2;
2131 while (len1-- > 0) {
2132 if (*p1 != *p2) return 0;
2133 p1++;
2134 p2++;
2135 }
2136 }
2137
2138 *ps2 = s2;
2139 return 1;
2140 }
2141
2142 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2143 is_fail = 0;\
2144 while (len-- > 0) {\
2145 if (*s1++ != *s2++) {\
2146 is_fail = 1; break;\
2147 }\
2148 }\
2149 } while(0)
2150
2151 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2152 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2153 is_fail = 1; \
2154 else \
2155 is_fail = 0; \
2156 } while(0)
2157
2158
2159 #define IS_EMPTY_STR (str == end)
2160 #define ON_STR_BEGIN(s) ((s) == str)
2161 #define ON_STR_END(s) ((s) == end)
2162 #define DATA_ENSURE_CHECK1 (s < right_range)
2163 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
2164 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
2165
2166 #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
2167
2168 #ifdef USE_CAPTURE_HISTORY
2169 static int
2170 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2171 StackType* stk_top, UChar* str, regex_t* reg)
2172 {
2173 int n, r;
2174 OnigCaptureTreeNode* child;
2175 StackType* k = *kp;
2176
2177 while (k < stk_top) {
2178 if (k->type == STK_MEM_START) {
2179 n = k->zid;
2180 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2181 MEM_STATUS_AT(reg->capture_history, n) != 0) {
2182 child = history_node_new();
2183 CHECK_NULL_RETURN_MEMERR(child);
2184 child->group = n;
2185 child->beg = (int )(k->u.mem.pstr - str);
2186 r = history_tree_add_child(node, child);
2187 if (r != 0) return r;
2188 *kp = (k + 1);
2189 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2190 if (r != 0) return r;
2191
2192 k = *kp;
2193 child->end = (int )(k->u.mem.pstr - str);
2194 }
2195 }
2196 else if (k->type == STK_MEM_END) {
2197 if (k->zid == node->group) {
2198 node->end = (int )(k->u.mem.pstr - str);
2199 *kp = k;
2200 return 0;
2201 }
2202 }
2203 k++;
2204 }
2205
2206 return 1; /* 1: root node ending. */
2207 }
2208 #endif
2209
2210 #ifdef USE_BACKREF_WITH_LEVEL
2211 static int mem_is_in_memp(int mem, int num, MemNumType* memp)
2212 {
2213 int i;
2214
2215 for (i = 0; i < num; i++) {
2216 if (mem == (int )memp[i]) return 1;
2217 }
2218 return 0;
2219 }
2220
2221 static int
2222 backref_match_at_nested_level(regex_t* reg,
2223 StackType* top, StackType* stk_base,
2224 int ignore_case, int case_fold_flag,
2225 int nest, int mem_num, MemNumType* memp,
2226 UChar** s, const UChar* send)
2227 {
2228 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2229 int level;
2230 StackType* k;
2231
2232 level = 0;
2233 k = top;
2234 k--;
2235 while (k >= stk_base) {
2236 if (k->type == STK_CALL_FRAME) {
2237 level--;
2238 }
2239 else if (k->type == STK_RETURN) {
2240 level++;
2241 }
2242 else if (level == nest) {
2243 if (k->type == STK_MEM_START) {
2244 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2245 pstart = k->u.mem.pstr;
2246 if (IS_NOT_NULL(pend)) {
2247 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2248 p = pstart;
2249 ss = *s;
2250
2251 if (ignore_case != 0) {
2252 if (string_cmp_ic(reg->enc, case_fold_flag,
2253 pstart, &ss, (int )(pend - pstart)) == 0)
2254 return 0; /* or goto next_mem; */
2255 }
2256 else {
2257 while (p < pend) {
2258 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2259 }
2260 }
2261
2262 *s = ss;
2263 return 1;
2264 }
2265 }
2266 }
2267 else if (k->type == STK_MEM_END) {
2268 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2269 pend = k->u.mem.pstr;
2270 }
2271 }
2272 }
2273 k--;
2274 }
2275
2276 return 0;
2277 }
2278
2279 static int
2280 backref_check_at_nested_level(regex_t* reg,
2281 StackType* top, StackType* stk_base,
2282 int nest, int mem_num, MemNumType* memp)
2283 {
2284 int level;
2285 StackType* k;
2286
2287 level = 0;
2288 k = top;
2289 k--;
2290 while (k >= stk_base) {
2291 if (k->type == STK_CALL_FRAME) {
2292 level--;
2293 }
2294 else if (k->type == STK_RETURN) {
2295 level++;
2296 }
2297 else if (level == nest) {
2298 if (k->type == STK_MEM_END) {
2299 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2300 return 1;
2301 }
2302 }
2303 }
2304 k--;
2305 }
2306
2307 return 0;
2308 }
2309 #endif /* USE_BACKREF_WITH_LEVEL */
2310
2311
2312 #ifdef ONIG_DEBUG_STATISTICS
2313
2314 #define USE_TIMEOFDAY
2315
2316 #ifdef USE_TIMEOFDAY
2317 #ifdef HAVE_SYS_TIME_H
2318 #include <sys/time.h>
2319 #endif
2320 #ifdef HAVE_UNISTD_H
2321 #include <unistd.h>
2322 #endif
2323 static struct timeval ts, te;
2324 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2325 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2326 (((te).tv_sec - (ts).tv_sec)*1000000))
2327 #else
2328 #ifdef HAVE_SYS_TIMES_H
2329 #include <sys/times.h>
2330 #endif
2331 static struct tms ts, te;
2332 #define GETTIME(t) times(&(t))
2333 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2334 #endif
2335
2336 static int OpCounter[256];
2337 static int OpPrevCounter[256];
2338 static unsigned long OpTime[256];
2339 static int OpCurr = OP_FINISH;
2340 static int OpPrevTarget = OP_FAIL;
2341 static int MaxStackDepth = 0;
2342
2343 #define SOP_IN(opcode) do {\
2344 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2345 OpCurr = opcode;\
2346 OpCounter[opcode]++;\
2347 GETTIME(ts);\
2348 } while(0)
2349
2350 #define SOP_OUT do {\
2351 GETTIME(te);\
2352 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2353 } while(0)
2354
2355 extern void
2356 onig_statistics_init(void)
2357 {
2358 int i;
2359 for (i = 0; i < 256; i++) {
2360 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2361 }
2362 MaxStackDepth = 0;
2363 }
2364
2365 extern int
2366 onig_print_statistics(FILE* f)
2367 {
2368 int r;
2369 int i;
2370
2371 r = fprintf(f, " count prev time\n");
2372 if (r < 0) return -1;
2373
2374 for (i = 0; OpInfo[i].opcode >= 0; i++) {
2375 r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2376 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2377 if (r < 0) return -1;
2378 }
2379 r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2380 if (r < 0) return -1;
2381
2382 return 0;
2383 }
2384
2385 #define STACK_INC do {\
2386 stk++;\
2387 if (stk - stk_base > MaxStackDepth) \
2388 MaxStackDepth = stk - stk_base;\
2389 } while(0)
2390
2391 #else
2392 #define STACK_INC stk++
2393
2394 #define SOP_IN(opcode)
2395 #define SOP_OUT
2396 #endif
2397
2398
2399 /* matching region of POSIX API */
2400 typedef int regoff_t;
2401
2402 typedef struct {
2403 regoff_t rm_so;
2404 regoff_t rm_eo;
2405 } posix_regmatch_t;
2406
2407
2408
2409 #ifdef USE_THREADED_CODE
2410
2411 #define BYTECODE_INTERPRETER_START GOTO_OP;
2412 #define BYTECODE_INTERPRETER_END
2413 #define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
2414 #define DEFAULT_OP /* L_DEFAULT: */
2415 #define NEXT_OP sprev = sbegin; JUMP_OP
2416 #define JUMP_OP GOTO_OP
2417 #ifdef USE_DIRECT_THREADED_CODE
2418 #define GOTO_OP goto *(p->opaddr)
2419 #else
2420 #define GOTO_OP goto *opcode_to_label[p->opcode]
2421 #endif
2422 #define BREAK_OP /* Nothing */
2423
2424 #else
2425
2426 #define BYTECODE_INTERPRETER_START \
2427 while (1) {\
2428 MATCH_DEBUG_OUT(0)\
2429 sbegin = s;\
2430 switch (p->opcode) {
2431 #define BYTECODE_INTERPRETER_END } sprev = sbegin; }
2432 #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
2433 #define DEFAULT_OP default:
2434 #define NEXT_OP break
2435 #define JUMP_OP GOTO_OP
2436 #define GOTO_OP continue; break
2437 #define BREAK_OP break
2438
2439 #endif /* USE_THREADED_CODE */
2440
2441 #define INC_OP p++
2442 #define NEXT_OUT SOP_OUT; NEXT_OP
2443 #define JUMP_OUT SOP_OUT; JUMP_OP
2444 #define BREAK_OUT SOP_OUT; BREAK_OP
2445 #define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
2446
2447
2448 #ifdef ONIG_DEBUG_MATCH
2449 #define MATCH_DEBUG_OUT(offset) do {\
2450 Operation *xp;\
2451 UChar *q, *bp, buf[50];\
2452 int len, spos;\
2453 spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
2454 xp = p - (offset);\
2455 fprintf(stderr, "%7u: %7ld: %4d> \"",\
2456 counter, GET_STACK_INDEX(stk), spos);\
2457 counter++;\
2458 bp = buf;\
2459 if (IS_NOT_NULL(s)) {\
2460 for (i = 0, q = s; i < 7 && q < end; i++) {\
2461 len = enclen(encode, q);\
2462 while (len-- > 0) *bp++ = *q++;\
2463 }\
2464 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
2465 else { xmemcpy(bp, "\"", 1); bp += 1; }\
2466 }\
2467 else {\
2468 xmemcpy(bp, "\"", 1); bp += 1;\
2469 }\
2470 *bp = 0;\
2471 fputs((char* )buf, stderr);\
2472 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
2473 if (xp == FinishCode)\
2474 fprintf(stderr, "----: finish");\
2475 else {\
2476 fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\
2477 print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\
2478 }\
2479 fprintf(stderr, "\n");\
2480 } while(0);
2481 #else
2482 #define MATCH_DEBUG_OUT(offset)
2483 #endif
2484
2485
2486 /* match data(str - end) from position (sstart). */
2487 /* if sstart == str then set sprev to NULL. */
2488 static int
2489 match_at(regex_t* reg, const UChar* str, const UChar* end,
2490 const UChar* in_right_range, const UChar* sstart, UChar* sprev,
2491 MatchArg* msa)
2492 {
2493
2494 #if defined(USE_DIRECT_THREADED_CODE)
2495 static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
2496 #else
2497 static Operation FinishCode[] = { { OP_FINISH } };
2498 #endif
2499
2500 #ifdef USE_THREADED_CODE
2501 static const void *opcode_to_label[] = {
2502 &&L_FINISH,
2503 &&L_END,
2504 &&L_EXACT1,
2505 &&L_EXACT2,
2506 &&L_EXACT3,
2507 &&L_EXACT4,
2508 &&L_EXACT5,
2509 &&L_EXACTN,
2510 &&L_EXACTMB2N1,
2511 &&L_EXACTMB2N2,
2512 &&L_EXACTMB2N3,
2513 &&L_EXACTMB2N,
2514 &&L_EXACTMB3N,
2515 &&L_EXACTMBN,
2516 &&L_EXACT1_IC,
2517 &&L_EXACTN_IC,
2518 &&L_CCLASS,
2519 &&L_CCLASS_MB,
2520 &&L_CCLASS_MIX,
2521 &&L_CCLASS_NOT,
2522 &&L_CCLASS_MB_NOT,
2523 &&L_CCLASS_MIX_NOT,
2524 &&L_ANYCHAR,
2525 &&L_ANYCHAR_ML,
2526 &&L_ANYCHAR_STAR,
2527 &&L_ANYCHAR_ML_STAR,
2528 &&L_ANYCHAR_STAR_PEEK_NEXT,
2529 &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
2530 &&L_WORD,
2531 &&L_WORD_ASCII,
2532 &&L_NO_WORD,
2533 &&L_NO_WORD_ASCII,
2534 &&L_WORD_BOUNDARY,
2535 &&L_NO_WORD_BOUNDARY,
2536 &&L_WORD_BEGIN,
2537 &&L_WORD_END,
2538 &&L_TEXT_SEGMENT_BOUNDARY,
2539 &&L_BEGIN_BUF,
2540 &&L_END_BUF,
2541 &&L_BEGIN_LINE,
2542 &&L_END_LINE,
2543 &&L_SEMI_END_BUF,
2544 &&L_BEGIN_POSITION,
2545 &&L_BACKREF1,
2546 &&L_BACKREF2,
2547 &&L_BACKREF_N,
2548 &&L_BACKREF_N_IC,
2549 &&L_BACKREF_MULTI,
2550 &&L_BACKREF_MULTI_IC,
2551 &&L_BACKREF_WITH_LEVEL,
2552 &&L_BACKREF_WITH_LEVEL_IC,
2553 &&L_BACKREF_CHECK,
2554 &&L_BACKREF_CHECK_WITH_LEVEL,
2555 &&L_MEMORY_START,
2556 &&L_MEMORY_START_PUSH,
2557 &&L_MEMORY_END_PUSH,
2558 &&L_MEMORY_END_PUSH_REC,
2559 &&L_MEMORY_END,
2560 &&L_MEMORY_END_REC,
2561 &&L_FAIL,
2562 &&L_JUMP,
2563 &&L_PUSH,
2564 &&L_PUSH_SUPER,
2565 &&L_POP_OUT,
2566 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2567 &&L_PUSH_OR_JUMP_EXACT1,
2568 #endif
2569 &&L_PUSH_IF_PEEK_NEXT,
2570 &&L_REPEAT,
2571 &&L_REPEAT_NG,
2572 &&L_REPEAT_INC,
2573 &&L_REPEAT_INC_NG,
2574 &&L_REPEAT_INC_SG,
2575 &&L_REPEAT_INC_NG_SG,
2576 &&L_EMPTY_CHECK_START,
2577 &&L_EMPTY_CHECK_END,
2578 &&L_EMPTY_CHECK_END_MEMST,
2579 &&L_EMPTY_CHECK_END_MEMST_PUSH,
2580 &&L_PREC_READ_START,
2581 &&L_PREC_READ_END,
2582 &&L_PREC_READ_NOT_START,
2583 &&L_PREC_READ_NOT_END,
2584 &&L_ATOMIC_START,
2585 &&L_ATOMIC_END,
2586 &&L_LOOK_BEHIND,
2587 &&L_LOOK_BEHIND_NOT_START,
2588 &&L_LOOK_BEHIND_NOT_END,
2589 &&L_CALL,
2590 &&L_RETURN,
2591 &&L_PUSH_SAVE_VAL,
2592 &&L_UPDATE_VAR,
2593 #ifdef USE_CALLOUT
2594 &&L_CALLOUT_CONTENTS,
2595 &&L_CALLOUT_NAME,
2596 #endif
2597 };
2598 #endif
2599
2600 int i, n, num_mem, best_len, pop_level;
2601 LengthType tlen, tlen2;
2602 MemNumType mem;
2603 RelAddrType addr;
2604 UChar *s, *q, *ps, *sbegin;
2605 UChar *right_range;
2606 int is_alloca;
2607 char *alloc_base;
2608 StackType *stk_base, *stk, *stk_end;
2609 StackType *stkp; /* used as any purpose. */
2610 StackIndex si;
2611 StackIndex *repeat_stk;
2612 StackIndex *mem_start_stk, *mem_end_stk;
2613 UChar* keep;
2614 #ifdef USE_RETRY_LIMIT_IN_MATCH
2615 unsigned long retry_limit_in_match;
2616 unsigned long retry_in_match_counter;
2617 #endif
2618
2619 #ifdef USE_CALLOUT
2620 int of;
2621 #endif
2622
2623 Operation* p = reg->ops;
2624 OnigOptionType option = reg->options;
2625 OnigEncoding encode = reg->enc;
2626 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2627
2628 #ifdef ONIG_DEBUG_MATCH
2629 static unsigned int counter = 1;
2630 #endif
2631
2632 #ifdef USE_DIRECT_THREADED_CODE
2633 if (IS_NULL(msa)) {
2634 for (i = 0; i < reg->ops_used; i++) {
2635 const void* addr;
2636 addr = opcode_to_label[reg->ocs[i]];
2637 p->opaddr = addr;
2638 p++;
2639 }
2640 return ONIG_NORMAL;
2641 }
2642 #endif
2643
2644 #ifdef USE_CALLOUT
2645 msa->mp->match_at_call_counter++;
2646 #endif
2647
2648 #ifdef USE_RETRY_LIMIT_IN_MATCH
2649 retry_limit_in_match = msa->retry_limit_in_match;
2650 #endif
2651
2652 pop_level = reg->stack_pop_level;
2653 num_mem = reg->num_mem;
2654 STACK_INIT(INIT_MATCH_STACK_SIZE);
2655 UPDATE_FOR_STACK_REALLOC;
2656 for (i = 1; i <= num_mem; i++) {
2657 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
2658 }
2659
2660 #ifdef ONIG_DEBUG_MATCH
2661 fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
2662 str, end, sstart, sprev);
2663 fprintf(stderr, "size: %d, start offset: %d\n",
2664 (int )(end - str), (int )(sstart - str));
2665 #endif
2666
2667 best_len = ONIG_MISMATCH;
2668 keep = s = (UChar* )sstart;
2669 STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
2670 INIT_RIGHT_RANGE;
2671
2672 #ifdef USE_RETRY_LIMIT_IN_MATCH
2673 retry_in_match_counter = 0;
2674 #endif
2675
2676 BYTECODE_INTERPRETER_START {
2677 CASE_OP(END)
2678 n = (int )(s - sstart);
2679 if (n > best_len) {
2680 OnigRegion* region;
2681 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2682 if (IS_FIND_LONGEST(option)) {
2683 if (n > msa->best_len) {
2684 msa->best_len = n;
2685 msa->best_s = (UChar* )sstart;
2686 }
2687 else
2688 goto end_best_len;
2689 }
2690 #endif
2691 best_len = n;
2692 region = msa->region;
2693 if (region) {
2694 if (keep > s) keep = s;
2695
2696 #ifdef USE_POSIX_API_REGION_OPTION
2697 if (IS_POSIX_REGION(msa->options)) {
2698 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
2699
2700 rmt[0].rm_so = (regoff_t )(keep - str);
2701 rmt[0].rm_eo = (regoff_t )(s - str);
2702 for (i = 1; i <= num_mem; i++) {
2703 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2704 if (MEM_STATUS_AT(reg->bt_mem_start, i))
2705 rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
2706 else
2707 rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
2708
2709 rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i)
2710 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2711 : (UChar* )((void* )mem_end_stk[i]))
2712 - str);
2713 }
2714 else {
2715 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
2716 }
2717 }
2718 }
2719 else {
2720 #endif /* USE_POSIX_API_REGION_OPTION */
2721 region->beg[0] = (int )(keep - str);
2722 region->end[0] = (int )(s - str);
2723 for (i = 1; i <= num_mem; i++) {
2724 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2725 if (MEM_STATUS_AT(reg->bt_mem_start, i))
2726 region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
2727 else
2728 region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str);
2729
2730 region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
2731 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2732 : (UChar* )((void* )mem_end_stk[i])) - str);
2733 }
2734 else {
2735 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2736 }
2737 }
2738
2739 #ifdef USE_CAPTURE_HISTORY
2740 if (reg->capture_history != 0) {
2741 int r;
2742 OnigCaptureTreeNode* node;
2743
2744 if (IS_NULL(region->history_root)) {
2745 region->history_root = node = history_node_new();
2746 CHECK_NULL_RETURN_MEMERR(node);
2747 }
2748 else {
2749 node = region->history_root;
2750 history_tree_clear(node);
2751 }
2752
2753 node->group = 0;
2754 node->beg = (int )(keep - str);
2755 node->end = (int )(s - str);
2756
2757 stkp = stk_base;
2758 r = make_capture_history_tree(region->history_root, &stkp,
2759 stk, (UChar* )str, reg);
2760 if (r < 0) {
2761 best_len = r; /* error code */
2762 goto finish;
2763 }
2764 }
2765 #endif /* USE_CAPTURE_HISTORY */
2766 #ifdef USE_POSIX_API_REGION_OPTION
2767 } /* else IS_POSIX_REGION() */
2768 #endif
2769 } /* if (region) */
2770 } /* n > best_len */
2771
2772 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2773 end_best_len:
2774 #endif
2775 SOP_OUT;
2776
2777 if (IS_FIND_CONDITION(option)) {
2778 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2779 best_len = ONIG_MISMATCH;
2780 goto fail; /* for retry */
2781 }
2782 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2783 goto fail; /* for retry */
2784 }
2785 }
2786
2787 /* default behavior: return first-matching result. */
2788 goto finish;
2789
2790 CASE_OP(EXACT1)
2791 DATA_ENSURE(1);
2792 ps = p->exact.s;
2793 if (*ps != *s) goto fail;
2794 s++;
2795 INC_OP;
2796 NEXT_OUT;
2797
2798 CASE_OP(EXACT1_IC)
2799 {
2800 int len;
2801 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2802
2803 DATA_ENSURE(1);
2804 len = ONIGENC_MBC_CASE_FOLD(encode,
2805 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2806 case_fold_flag,
2807 &s, end, lowbuf);
2808 DATA_ENSURE(0);
2809 q = lowbuf;
2810 ps = p->exact.s;
2811 while (len-- > 0) {
2812 if (*ps != *q) goto fail;
2813 ps++; q++;
2814 }
2815 }
2816 INC_OP;
2817 NEXT_OUT;
2818
2819 CASE_OP(EXACT2)
2820 DATA_ENSURE(2);
2821 ps = p->exact.s;
2822 if (*ps != *s) goto fail;
2823 ps++; s++;
2824 if (*ps != *s) goto fail;
2825 sprev = s;
2826 s++;
2827 INC_OP;
2828 JUMP_OUT;
2829
2830 CASE_OP(EXACT3)
2831 DATA_ENSURE(3);
2832 ps = p->exact.s;
2833 if (*ps != *s) goto fail;
2834 ps++; s++;
2835 if (*ps != *s) goto fail;
2836 ps++; s++;
2837 if (*ps != *s) goto fail;
2838 sprev = s;
2839 s++;
2840 INC_OP;
2841 JUMP_OUT;
2842
2843 CASE_OP(EXACT4)
2844 DATA_ENSURE(4);
2845 ps = p->exact.s;
2846 if (*ps != *s) goto fail;
2847 ps++; s++;
2848 if (*ps != *s) goto fail;
2849 ps++; s++;
2850 if (*ps != *s) goto fail;
2851 ps++; s++;
2852 if (*ps != *s) goto fail;
2853 sprev = s;
2854 s++;
2855 INC_OP;
2856 JUMP_OUT;
2857
2858 CASE_OP(EXACT5)
2859 DATA_ENSURE(5);
2860 ps = p->exact.s;
2861 if (*ps != *s) goto fail;
2862 ps++; s++;
2863 if (*ps != *s) goto fail;
2864 ps++; s++;
2865 if (*ps != *s) goto fail;
2866 ps++; s++;
2867 if (*ps != *s) goto fail;
2868 ps++; s++;
2869 if (*ps != *s) goto fail;
2870 sprev = s;
2871 s++;
2872 INC_OP;
2873 JUMP_OUT;
2874
2875 CASE_OP(EXACTN)
2876 tlen = p->exact_n.n;
2877 DATA_ENSURE(tlen);
2878 ps = p->exact_n.s;
2879 while (tlen-- > 0) {
2880 if (*ps++ != *s++) goto fail;
2881 }
2882 sprev = s - 1;
2883 INC_OP;
2884 JUMP_OUT;
2885
2886 CASE_OP(EXACTN_IC)
2887 {
2888 int len;
2889 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2890
2891 tlen = p->exact_n.n;
2892 ps = p->exact_n.s;
2893 endp = ps + tlen;
2894 while (ps < endp) {
2895 sprev = s;
2896 DATA_ENSURE(1);
2897 len = ONIGENC_MBC_CASE_FOLD(encode,
2898 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2899 case_fold_flag,
2900 &s, end, lowbuf);
2901 DATA_ENSURE(0);
2902 q = lowbuf;
2903 while (len-- > 0) {
2904 if (*ps != *q) goto fail;
2905 ps++; q++;
2906 }
2907 }
2908 }
2909
2910 INC_OP;
2911 JUMP_OUT;
2912
2913 CASE_OP(EXACTMB2N1)
2914 DATA_ENSURE(2);
2915 ps = p->exact.s;
2916 if (*ps != *s) goto fail;
2917 ps++; s++;
2918 if (*ps != *s) goto fail;
2919 s++;
2920 INC_OP;
2921 NEXT_OUT;
2922
2923 CASE_OP(EXACTMB2N2)
2924 DATA_ENSURE(4);
2925 ps = p->exact.s;
2926 if (*ps != *s) goto fail;
2927 ps++; s++;
2928 if (*ps != *s) goto fail;
2929 ps++; s++;
2930 sprev = s;
2931 if (*ps != *s) goto fail;
2932 ps++; s++;
2933 if (*ps != *s) goto fail;
2934 s++;
2935 INC_OP;
2936 JUMP_OUT;
2937
2938 CASE_OP(EXACTMB2N3)
2939 DATA_ENSURE(6);
2940 ps = p->exact.s;
2941 if (*ps != *s) goto fail;
2942 ps++; s++;
2943 if (*ps != *s) goto fail;
2944 ps++; s++;
2945 if (*ps != *s) goto fail;
2946 ps++; s++;
2947 if (*ps != *s) goto fail;
2948 ps++; s++;
2949 sprev = s;
2950 if (*ps != *s) goto fail;
2951 ps++; s++;
2952 if (*ps != *s) goto fail;
2953 ps++; s++;
2954 INC_OP;
2955 JUMP_OUT;
2956
2957 CASE_OP(EXACTMB2N)
2958 tlen = p->exact_n.n;
2959 DATA_ENSURE(tlen * 2);
2960 ps = p->exact_n.s;
2961 while (tlen-- > 0) {
2962 if (*ps != *s) goto fail;
2963 ps++; s++;
2964 if (*ps != *s) goto fail;
2965 ps++; s++;
2966 }
2967 sprev = s - 2;
2968 INC_OP;
2969 JUMP_OUT;
2970
2971 CASE_OP(EXACTMB3N)
2972 tlen = p->exact_n.n;
2973 DATA_ENSURE(tlen * 3);
2974 ps = p->exact_n.s;
2975 while (tlen-- > 0) {
2976 if (*ps != *s) goto fail;
2977 ps++; s++;
2978 if (*ps != *s) goto fail;
2979 ps++; s++;
2980 if (*ps != *s) goto fail;
2981 ps++; s++;
2982 }
2983 sprev = s - 3;
2984 INC_OP;
2985 JUMP_OUT;
2986
2987 CASE_OP(EXACTMBN)
2988 tlen = p->exact_len_n.len; /* mb byte len */
2989 tlen2 = p->exact_len_n.n; /* number of chars */
2990 tlen2 *= tlen;
2991 DATA_ENSURE(tlen2);
2992 ps = p->exact_len_n.s;
2993 while (tlen2-- > 0) {
2994 if (*ps != *s) goto fail;
2995 ps++; s++;
2996 }
2997 sprev = s - tlen;
2998 INC_OP;
2999 JUMP_OUT;
3000
3001 CASE_OP(CCLASS)
3002 DATA_ENSURE(1);
3003 if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
3004 s++;
3005 INC_OP;
3006 NEXT_OUT;
3007
3008 CASE_OP(CCLASS_MB)
3009 DATA_ENSURE(1);
3010 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
3011
3012 cclass_mb:
3013 {
3014 OnigCodePoint code;
3015 UChar *ss;
3016 int mb_len;
3017
3018 DATA_ENSURE(1);
3019 mb_len = enclen(encode, s);
3020 DATA_ENSURE(mb_len);
3021 ss = s;
3022 s += mb_len;
3023 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3024 if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3025 }
3026 INC_OP;
3027 NEXT_OUT;
3028
3029 CASE_OP(CCLASS_MIX)
3030 DATA_ENSURE(1);
3031 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3032 goto cclass_mb;
3033 }
3034 else {
3035 if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
3036 goto fail;
3037
3038 s++;
3039 }
3040 INC_OP;
3041 NEXT_OUT;
3042
3043 CASE_OP(CCLASS_NOT)
3044 DATA_ENSURE(1);
3045 if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
3046 s += enclen(encode, s);
3047 INC_OP;
3048 NEXT_OUT;
3049
3050 CASE_OP(CCLASS_MB_NOT)
3051 DATA_ENSURE(1);
3052 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
3053 s++;
3054 goto cc_mb_not_success;
3055 }
3056
3057 cclass_mb_not:
3058 {
3059 OnigCodePoint code;
3060 UChar *ss;
3061 int mb_len = enclen(encode, s);
3062
3063 if (! DATA_ENSURE_CHECK(mb_len)) {
3064 DATA_ENSURE(1);
3065 s = (UChar* )end;
3066 goto cc_mb_not_success;
3067 }
3068
3069 ss = s;
3070 s += mb_len;
3071 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3072 if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
3073 }
3074
3075 cc_mb_not_success:
3076 INC_OP;
3077 NEXT_OUT;
3078
3079 CASE_OP(CCLASS_MIX_NOT)
3080 DATA_ENSURE(1);
3081 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
3082 goto cclass_mb_not;
3083 }
3084 else {
3085 if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
3086 goto fail;
3087
3088 s++;
3089 }
3090 INC_OP;
3091 NEXT_OUT;
3092
3093 CASE_OP(ANYCHAR)
3094 DATA_ENSURE(1);
3095 n = enclen(encode, s);
3096 DATA_ENSURE(n);
3097 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3098 s += n;
3099 INC_OP;
3100 NEXT_OUT;
3101
3102 CASE_OP(ANYCHAR_ML)
3103 DATA_ENSURE(1);
3104 n = enclen(encode, s);
3105 DATA_ENSURE(n);
3106 s += n;
3107 INC_OP;
3108 NEXT_OUT;
3109
3110 CASE_OP(ANYCHAR_STAR)
3111 INC_OP;
3112 while (DATA_ENSURE_CHECK1) {
3113 STACK_PUSH_ALT(p, s, sprev);
3114 n = enclen(encode, s);
3115 DATA_ENSURE(n);
3116 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3117 sprev = s;
3118 s += n;
3119 }
3120 JUMP_OUT;
3121
3122 CASE_OP(ANYCHAR_ML_STAR)
3123 INC_OP;
3124 while (DATA_ENSURE_CHECK1) {
3125 STACK_PUSH_ALT(p, s, sprev);
3126 n = enclen(encode, s);
3127 if (n > 1) {
3128 DATA_ENSURE(n);
3129 sprev = s;
3130 s += n;
3131 }
3132 else {
3133 sprev = s;
3134 s++;
3135 }
3136 }
3137 JUMP_OUT;
3138
3139 CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
3140 {
3141 UChar c;
3142
3143 c = p->anychar_star_peek_next.c;
3144 INC_OP;
3145 while (DATA_ENSURE_CHECK1) {
3146 if (c == *s) {
3147 STACK_PUSH_ALT(p, s, sprev);
3148 }
3149 n = enclen(encode, s);
3150 DATA_ENSURE(n);
3151 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
3152 sprev = s;
3153 s += n;
3154 }
3155 }
3156 NEXT_OUT;
3157
3158 CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
3159 {
3160 UChar c;
3161
3162 c = p->anychar_star_peek_next.c;
3163 INC_OP;
3164 while (DATA_ENSURE_CHECK1) {
3165 if (c == *s) {
3166 STACK_PUSH_ALT(p, s, sprev);
3167 }
3168 n = enclen(encode, s);
3169 if (n > 1) {
3170 DATA_ENSURE(n);
3171 sprev = s;
3172 s += n;
3173 }
3174 else {
3175 sprev = s;
3176 s++;
3177 }
3178 }
3179 }
3180 NEXT_OUT;
3181
3182 CASE_OP(WORD)
3183 DATA_ENSURE(1);
3184 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3185 goto fail;
3186
3187 s += enclen(encode, s);
3188 INC_OP;
3189 NEXT_OUT;
3190
3191 CASE_OP(WORD_ASCII)
3192 DATA_ENSURE(1);
3193 if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3194 goto fail;
3195
3196 s += enclen(encode, s);
3197 INC_OP;
3198 NEXT_OUT;
3199
3200 CASE_OP(NO_WORD)
3201 DATA_ENSURE(1);
3202 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3203 goto fail;
3204
3205 s += enclen(encode, s);
3206 INC_OP;
3207 NEXT_OUT;
3208
3209 CASE_OP(NO_WORD_ASCII)
3210 DATA_ENSURE(1);
3211 if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3212 goto fail;
3213
3214 s += enclen(encode, s);
3215 INC_OP;
3216 NEXT_OUT;
3217
3218 CASE_OP(WORD_BOUNDARY)
3219 {
3220 ModeType mode;
3221
3222 mode = p->word_boundary.mode;
3223 if (ON_STR_BEGIN(s)) {
3224 DATA_ENSURE(1);
3225 if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3226 goto fail;
3227 }
3228 else if (ON_STR_END(s)) {
3229 if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3230 goto fail;
3231 }
3232 else {
3233 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3234 == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3235 goto fail;
3236 }
3237 }
3238 INC_OP;
3239 JUMP_OUT;
3240
3241 CASE_OP(NO_WORD_BOUNDARY)
3242 {
3243 ModeType mode;
3244
3245 mode = p->word_boundary.mode;
3246 if (ON_STR_BEGIN(s)) {
3247 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3248 goto fail;
3249 }
3250 else if (ON_STR_END(s)) {
3251 if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3252 goto fail;
3253 }
3254 else {
3255 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3256 != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3257 goto fail;
3258 }
3259 }
3260 INC_OP;
3261 JUMP_OUT;
3262
3263 #ifdef USE_WORD_BEGIN_END
3264 CASE_OP(WORD_BEGIN)
3265 {
3266 ModeType mode;
3267
3268 mode = p->word_boundary.mode;
3269 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3270 if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3271 INC_OP;
3272 JUMP_OUT;
3273 }
3274 }
3275 }
3276 goto fail;
3277
3278 CASE_OP(WORD_END)
3279 {
3280 ModeType mode;
3281
3282 mode = p->word_boundary.mode;
3283 if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3284 if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3285 INC_OP;
3286 JUMP_OUT;
3287 }
3288 }
3289 }
3290 goto fail;
3291 #endif
3292
3293 CASE_OP(TEXT_SEGMENT_BOUNDARY)
3294 {
3295 int is_break;
3296
3297 switch (p->text_segment_boundary.type) {
3298 case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3299 is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
3300 break;
3301 #ifdef USE_UNICODE_WORD_BREAK
3302 case WORD_BOUNDARY:
3303 is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
3304 break;
3305 #endif
3306 default:
3307 goto bytecode_error;
3308 break;
3309 }
3310
3311 if (p->text_segment_boundary.not != 0)
3312 is_break = ! is_break;
3313
3314 if (is_break != 0) {
3315 INC_OP;
3316 JUMP_OUT;
3317 }
3318 else {
3319 goto fail;
3320 }
3321 }
3322
3323 CASE_OP(BEGIN_BUF)
3324 if (! ON_STR_BEGIN(s)) goto fail;
3325
3326 INC_OP;
3327 JUMP_OUT;
3328
3329 CASE_OP(END_BUF)
3330 if (! ON_STR_END(s)) goto fail;
3331
3332 INC_OP;
3333 JUMP_OUT;
3334
3335 CASE_OP(BEGIN_LINE)
3336 if (ON_STR_BEGIN(s)) {
3337 if (IS_NOTBOL(msa->options)) goto fail;
3338 INC_OP;
3339 JUMP_OUT;
3340 }
3341 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
3342 INC_OP;
3343 JUMP_OUT;
3344 }
3345 goto fail;
3346
3347 CASE_OP(END_LINE)
3348 if (ON_STR_END(s)) {
3349 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3350 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3351 #endif
3352 if (IS_NOTEOL(msa->options)) goto fail;
3353 INC_OP;
3354 JUMP_OUT;
3355 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3356 }
3357 #endif
3358 }
3359 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3360 INC_OP;
3361 JUMP_OUT;
3362 }
3363 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3364 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3365 INC_OP;
3366 JUMP_OUT;
3367 }
3368 #endif
3369 goto fail;
3370
3371 CASE_OP(SEMI_END_BUF)
3372 if (ON_STR_END(s)) {
3373 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3374 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3375 #endif
3376 if (IS_NOTEOL(msa->options)) goto fail;
3377 INC_OP;
3378 JUMP_OUT;
3379 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3380 }
3381 #endif
3382 }
3383 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3384 ON_STR_END(s + enclen(encode, s))) {
3385 INC_OP;
3386 JUMP_OUT;
3387 }
3388 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3389 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3390 UChar* ss = s + enclen(encode, s);
3391 ss += enclen(encode, ss);
3392 if (ON_STR_END(ss)) {
3393 INC_OP;
3394 JUMP_OUT;
3395 }
3396 }
3397 #endif
3398 goto fail;
3399
3400 CASE_OP(BEGIN_POSITION)
3401 if (s != msa->start)
3402 goto fail;
3403
3404 INC_OP;
3405 JUMP_OUT;
3406
3407 CASE_OP(MEMORY_START_PUSH)
3408 mem = p->memory_start.num;
3409 STACK_PUSH_MEM_START(mem, s);
3410 INC_OP;
3411 JUMP_OUT;
3412
3413 CASE_OP(MEMORY_START)
3414 mem = p->memory_start.num;
3415 mem_start_stk[mem] = (StackIndex )((void* )s);
3416 INC_OP;
3417 JUMP_OUT;
3418
3419 CASE_OP(MEMORY_END_PUSH)
3420 mem = p->memory_end.num;
3421 STACK_PUSH_MEM_END(mem, s);
3422 INC_OP;
3423 JUMP_OUT;
3424
3425 CASE_OP(MEMORY_END)
3426 mem = p->memory_end.num;
3427 mem_end_stk[mem] = (StackIndex )((void* )s);
3428 INC_OP;
3429 JUMP_OUT;
3430
3431 #ifdef USE_CALL
3432 CASE_OP(MEMORY_END_PUSH_REC)
3433 mem = p->memory_end.num;
3434 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3435 si = GET_STACK_INDEX(stkp);
3436 STACK_PUSH_MEM_END(mem, s);
3437 mem_start_stk[mem] = si;
3438 INC_OP;
3439 JUMP_OUT;
3440
3441 CASE_OP(MEMORY_END_REC)
3442 mem = p->memory_end.num;
3443 mem_end_stk[mem] = (StackIndex )((void* )s);
3444 STACK_GET_MEM_START(mem, stkp);
3445
3446 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3447 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3448 else
3449 mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
3450
3451 STACK_PUSH_MEM_END_MARK(mem);
3452 INC_OP;
3453 JUMP_OUT;
3454 #endif
3455
3456 CASE_OP(BACKREF1)
3457 mem = 1;
3458 goto backref;
3459
3460 CASE_OP(BACKREF2)
3461 mem = 2;
3462 goto backref;
3463
3464 CASE_OP(BACKREF_N)
3465 mem = p->backref_n.n1;
3466 backref:
3467 {
3468 int len;
3469 UChar *pstart, *pend;
3470
3471 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3472 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3473
3474 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3475 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3476 else
3477 pstart = (UChar* )((void* )mem_start_stk[mem]);
3478
3479 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3480 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3481 : (UChar* )((void* )mem_end_stk[mem]));
3482 n = (int )(pend - pstart);
3483 if (n != 0) {
3484 DATA_ENSURE(n);
3485 sprev = s;
3486 STRING_CMP(s, pstart, n);
3487 while (sprev + (len = enclen(encode, sprev)) < s)
3488 sprev += len;
3489 }
3490 }
3491 INC_OP;
3492 JUMP_OUT;
3493
3494 CASE_OP(BACKREF_N_IC)
3495 mem = p->backref_n.n1;
3496 {
3497 int len;
3498 UChar *pstart, *pend;
3499
3500 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3501 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3502
3503 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3504 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3505 else
3506 pstart = (UChar* )((void* )mem_start_stk[mem]);
3507
3508 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3509 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3510 : (UChar* )((void* )mem_end_stk[mem]));
3511 n = (int )(pend - pstart);
3512 if (n != 0) {
3513 DATA_ENSURE(n);
3514 sprev = s;
3515 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3516 while (sprev + (len = enclen(encode, sprev)) < s)
3517 sprev += len;
3518 }
3519 }
3520 INC_OP;
3521 JUMP_OUT;
3522
3523 CASE_OP(BACKREF_MULTI)
3524 {
3525 int len, is_fail;
3526 UChar *pstart, *pend, *swork;
3527
3528 tlen = p->backref_general.num;
3529 for (i = 0; i < tlen; i++) {
3530 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3531
3532 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3533 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3534
3535 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3536 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3537 else
3538 pstart = (UChar* )((void* )mem_start_stk[mem]);
3539
3540 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3541 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3542 : (UChar* )((void* )mem_end_stk[mem]));
3543 n = (int )(pend - pstart);
3544 if (n != 0) {
3545 DATA_ENSURE(n);
3546 sprev = s;
3547 swork = s;
3548 STRING_CMP_VALUE(swork, pstart, n, is_fail);
3549 if (is_fail) continue;
3550 s = swork;
3551 while (sprev + (len = enclen(encode, sprev)) < s)
3552 sprev += len;
3553 }
3554 break; /* success */
3555 }
3556 if (i == tlen) goto fail;
3557 }
3558 INC_OP;
3559 JUMP_OUT;
3560
3561 CASE_OP(BACKREF_MULTI_IC)
3562 {
3563 int len, is_fail;
3564 UChar *pstart, *pend, *swork;
3565
3566 tlen = p->backref_general.num;
3567 for (i = 0; i < tlen; i++) {
3568 mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
3569
3570 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3571 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3572
3573 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3574 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3575 else
3576 pstart = (UChar* )((void* )mem_start_stk[mem]);
3577
3578 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3579 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3580 : (UChar* )((void* )mem_end_stk[mem]));
3581 n = (int )(pend - pstart);
3582 if (n != 0) {
3583 DATA_ENSURE(n);
3584 sprev = s;
3585 swork = s;
3586 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3587 if (is_fail) continue;
3588 s = swork;
3589 while (sprev + (len = enclen(encode, sprev)) < s)
3590 sprev += len;
3591 }
3592 break; /* success */
3593 }
3594 if (i == tlen) goto fail;
3595 }
3596 INC_OP;
3597 JUMP_OUT;
3598
3599 #ifdef USE_BACKREF_WITH_LEVEL
3600 CASE_OP(BACKREF_WITH_LEVEL_IC)
3601 n = 1; /* ignore case */
3602 goto backref_with_level;
3603 CASE_OP(BACKREF_WITH_LEVEL)
3604 {
3605 int len;
3606 int level;
3607 MemNumType* mems;
3608 UChar* ssave;
3609
3610 n = 0;
3611 backref_with_level:
3612 level = p->backref_general.nest_level;
3613 tlen = p->backref_general.num;
3614 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3615
3616 ssave = s;
3617 if (backref_match_at_nested_level(reg, stk, stk_base, n,
3618 case_fold_flag, level, (int )tlen, mems, &s, end)) {
3619 if (ssave != s) {
3620 sprev = ssave;
3621 while (sprev + (len = enclen(encode, sprev)) < s)
3622 sprev += len;
3623 }
3624 }
3625 else
3626 goto fail;
3627 }
3628 INC_OP;
3629 JUMP_OUT;
3630 #endif
3631
3632 CASE_OP(BACKREF_CHECK)
3633 {
3634 MemNumType* mems;
3635
3636 tlen = p->backref_general.num;
3637 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3638
3639 for (i = 0; i < tlen; i++) {
3640 mem = mems[i];
3641 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3642 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3643 break; /* success */
3644 }
3645 if (i == tlen) goto fail;
3646 }
3647 INC_OP;
3648 JUMP_OUT;
3649
3650 #ifdef USE_BACKREF_WITH_LEVEL
3651 CASE_OP(BACKREF_CHECK_WITH_LEVEL)
3652 {
3653 LengthType level;
3654 MemNumType* mems;
3655
3656 level = p->backref_general.nest_level;
3657 tlen = p->backref_general.num;
3658 mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
3659
3660 if (backref_check_at_nested_level(reg, stk, stk_base,
3661 (int )level, (int )tlen, mems) == 0)
3662 goto fail;
3663 }
3664 INC_OP;
3665 JUMP_OUT;
3666 #endif
3667
3668 CASE_OP(EMPTY_CHECK_START)
3669 mem = p->empty_check_start.mem; /* mem: null check id */
3670 STACK_PUSH_EMPTY_CHECK_START(mem, s);
3671 INC_OP;
3672 JUMP_OUT;
3673
3674 CASE_OP(EMPTY_CHECK_END)
3675 {
3676 int is_empty;
3677
3678 mem = p->empty_check_end.mem; /* mem: null check id */
3679 STACK_EMPTY_CHECK(is_empty, mem, s);
3680 INC_OP;
3681 if (is_empty) {
3682 #ifdef ONIG_DEBUG_MATCH
3683 fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
3684 #endif
3685 empty_check_found:
3686 /* empty loop founded, skip next instruction */
3687 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
3688 switch (p->opcode) {
3689 case OP_JUMP:
3690 case OP_PUSH:
3691 case OP_REPEAT_INC:
3692 case OP_REPEAT_INC_NG:
3693 case OP_REPEAT_INC_SG:
3694 case OP_REPEAT_INC_NG_SG:
3695 INC_OP;
3696 break;
3697 default:
3698 goto unexpected_bytecode_error;
3699 break;
3700 }
3701 #else
3702 INC_OP;
3703 #endif
3704 }
3705 }
3706 JUMP_OUT;
3707
3708 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3709 CASE_OP(EMPTY_CHECK_END_MEMST)
3710 {
3711 int is_empty;
3712
3713 mem = p->empty_check_end.mem; /* mem: null check id */
3714 STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
3715 INC_OP;
3716 if (is_empty) {
3717 #ifdef ONIG_DEBUG_MATCH
3718 fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
3719 #endif
3720 if (is_empty == -1) goto fail;
3721 goto empty_check_found;
3722 }
3723 }
3724 JUMP_OUT;
3725 #endif
3726
3727 #ifdef USE_CALL
3728 CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
3729 {
3730 int is_empty;
3731
3732 mem = p->empty_check_end.mem; /* mem: null check id */
3733 #ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
3734 STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
3735 #else
3736 STACK_EMPTY_CHECK_REC(is_empty, mem, s);
3737 #endif
3738 INC_OP;
3739 if (is_empty) {
3740 #ifdef ONIG_DEBUG_MATCH
3741 fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
3742 (int )mem, s);
3743 #endif
3744 if (is_empty == -1) goto fail;
3745 goto empty_check_found;
3746 }
3747 else {
3748 STACK_PUSH_EMPTY_CHECK_END(mem);
3749 }
3750 }
3751 JUMP_OUT;
3752 #endif
3753
3754 CASE_OP(JUMP)
3755 addr = p->jump.addr;
3756 p += addr;
3757 CHECK_INTERRUPT_JUMP_OUT;
3758
3759 CASE_OP(PUSH)
3760 addr = p->push.addr;
3761 STACK_PUSH_ALT(p + addr, s, sprev);
3762 INC_OP;
3763 JUMP_OUT;
3764
3765 CASE_OP(PUSH_SUPER)
3766 addr = p->push.addr;
3767 STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
3768 INC_OP;
3769 JUMP_OUT;
3770
3771 CASE_OP(POP_OUT)
3772 STACK_POP_ONE;
3773 /* for stop backtrack */
3774 /* CHECK_RETRY_LIMIT_IN_MATCH; */
3775 INC_OP;
3776 JUMP_OUT;
3777
3778 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
3779 CASE_OP(PUSH_OR_JUMP_EXACT1)
3780 {
3781 UChar c;
3782
3783 addr = p->push_or_jump_exact1.addr;
3784 c = p->push_or_jump_exact1.c;
3785 if (DATA_ENSURE_CHECK1 && c == *s) {
3786 STACK_PUSH_ALT(p + addr, s, sprev);
3787 INC_OP;
3788 JUMP_OUT;
3789 }
3790 }
3791 p += addr;
3792 JUMP_OUT;
3793 #endif
3794
3795 CASE_OP(PUSH_IF_PEEK_NEXT)
3796 {
3797 UChar c;
3798
3799 addr = p->push_if_peek_next.addr;
3800 c = p->push_if_peek_next.c;
3801 if (c == *s) {
3802 STACK_PUSH_ALT(p + addr, s, sprev);
3803 INC_OP;
3804 JUMP_OUT;
3805 }
3806 }
3807 INC_OP;
3808 JUMP_OUT;
3809
3810 CASE_OP(REPEAT)
3811 mem = p->repeat.id; /* mem: OP_REPEAT ID */
3812 addr = p->repeat.addr;
3813
3814 STACK_ENSURE(1);
3815 repeat_stk[mem] = GET_STACK_INDEX(stk);
3816 STACK_PUSH_REPEAT(mem, p + 1);
3817
3818 if (reg->repeat_range[mem].lower == 0) {
3819 STACK_PUSH_ALT(p + addr, s, sprev);
3820 }
3821 INC_OP;
3822 JUMP_OUT;
3823
3824 CASE_OP(REPEAT_NG)
3825 mem = p->repeat.id; /* mem: OP_REPEAT ID */
3826 addr = p->repeat.addr;
3827
3828 STACK_ENSURE(1);
3829 repeat_stk[mem] = GET_STACK_INDEX(stk);
3830 STACK_PUSH_REPEAT(mem, p + 1);
3831
3832 if (reg->repeat_range[mem].lower == 0) {
3833 STACK_PUSH_ALT(p + 1, s, sprev);
3834 p += addr;
3835 }
3836 else
3837 INC_OP;
3838 JUMP_OUT;
3839
3840 CASE_OP(REPEAT_INC)
3841 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
3842 si = repeat_stk[mem];
3843 stkp = STACK_AT(si);
3844
3845 repeat_inc:
3846 stkp->u.repeat.count++;
3847 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3848 /* end of repeat. Nothing to do. */
3849 INC_OP;
3850 }
3851 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3852 INC_OP;
3853 STACK_PUSH_ALT(p, s, sprev);
3854 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3855 }
3856 else {
3857 p = stkp->u.repeat.pcode;
3858 }
3859 STACK_PUSH_REPEAT_INC(si);
3860 CHECK_INTERRUPT_JUMP_OUT;
3861
3862 CASE_OP(REPEAT_INC_SG)
3863 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
3864 STACK_GET_REPEAT(mem, stkp);
3865 si = GET_STACK_INDEX(stkp);
3866 goto repeat_inc;
3867
3868 CASE_OP(REPEAT_INC_NG)
3869 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
3870 si = repeat_stk[mem];
3871 stkp = STACK_AT(si);
3872
3873 repeat_inc_ng:
3874 stkp->u.repeat.count++;
3875 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3876 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3877 Operation* pcode = stkp->u.repeat.pcode;
3878
3879 STACK_PUSH_REPEAT_INC(si);
3880 STACK_PUSH_ALT(pcode, s, sprev);
3881 INC_OP;
3882 }
3883 else {
3884 p = stkp->u.repeat.pcode;
3885 STACK_PUSH_REPEAT_INC(si);
3886 }
3887 }
3888 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3889 STACK_PUSH_REPEAT_INC(si);
3890 INC_OP;
3891 }
3892 CHECK_INTERRUPT_JUMP_OUT;
3893
3894 CASE_OP(REPEAT_INC_NG_SG)
3895 mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
3896 STACK_GET_REPEAT(mem, stkp);
3897 si = GET_STACK_INDEX(stkp);
3898 goto repeat_inc_ng;
3899
3900 CASE_OP(PREC_READ_START)
3901 STACK_PUSH_PREC_READ_START(s, sprev);
3902 INC_OP;
3903 JUMP_OUT;
3904
3905 CASE_OP(PREC_READ_END)
3906 STACK_GET_PREC_READ_START(stkp);
3907 s = stkp->u.state.pstr;
3908 sprev = stkp->u.state.pstr_prev;
3909 STACK_PUSH(STK_PREC_READ_END,0,0,0);
3910 INC_OP;
3911 JUMP_OUT;
3912
3913 CASE_OP(PREC_READ_NOT_START)
3914 addr = p->prec_read_not_start.addr;
3915 STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
3916 INC_OP;
3917 JUMP_OUT;
3918
3919 CASE_OP(PREC_READ_NOT_END)
3920 STACK_POP_TIL_ALT_PREC_READ_NOT;
3921 goto fail;
3922
3923 CASE_OP(ATOMIC_START)
3924 STACK_PUSH_TO_VOID_START;
3925 INC_OP;
3926 JUMP_OUT;
3927
3928 CASE_OP(ATOMIC_END)
3929 STACK_EXEC_TO_VOID(stkp);
3930 INC_OP;
3931 JUMP_OUT;
3932
3933 CASE_OP(LOOK_BEHIND)
3934 tlen = p->look_behind.len;
3935 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3936 if (IS_NULL(s)) goto fail;
3937 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3938 INC_OP;
3939 JUMP_OUT;
3940
3941 CASE_OP(LOOK_BEHIND_NOT_START)
3942 addr = p->look_behind_not_start.addr;
3943 tlen = p->look_behind_not_start.len;
3944 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3945 if (IS_NULL(q)) {
3946 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3947 If you want to change to fail, replace following line. */
3948 p += addr;
3949 /* goto fail; */
3950 }
3951 else {
3952 STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev);
3953 s = q;
3954 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3955 INC_OP;
3956 }
3957 JUMP_OUT;
3958
3959 CASE_OP(LOOK_BEHIND_NOT_END)
3960 STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
3961 INC_OP;
3962 goto fail;
3963
3964 #ifdef USE_CALL
3965 CASE_OP(CALL)
3966 addr = p->call.addr;
3967 INC_OP; STACK_PUSH_CALL_FRAME(p);
3968 p = reg->ops + addr;
3969 JUMP_OUT;
3970
3971 CASE_OP(RETURN)
3972 STACK_RETURN(p);
3973 STACK_PUSH_RETURN;
3974 JUMP_OUT;
3975 #endif
3976
3977 CASE_OP(PUSH_SAVE_VAL)
3978 {
3979 SaveType type;
3980
3981 type = p->push_save_val.type;
3982 mem = p->push_save_val.id; /* mem: save id */
3983 switch ((enum SaveType )type) {
3984 case SAVE_KEEP:
3985 STACK_PUSH_SAVE_VAL(mem, type, s);
3986 break;
3987
3988 case SAVE_S:
3989 STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
3990 break;
3991
3992 case SAVE_RIGHT_RANGE:
3993 STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
3994 break;
3995 }
3996 }
3997 INC_OP;
3998 JUMP_OUT;
3999
4000 CASE_OP(UPDATE_VAR)
4001 {
4002 UpdateVarType type;
4003 enum SaveType save_type;
4004
4005 type = p->update_var.type;
4006 mem = p->update_var.id; /* mem: save id */
4007
4008 switch ((enum UpdateVarType )type) {
4009 case UPDATE_VAR_KEEP_FROM_STACK_LAST:
4010 STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
4011 break;
4012 case UPDATE_VAR_S_FROM_STACK:
4013 STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
4014 break;
4015 case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
4016 save_type = SAVE_S;
4017 goto get_save_val_type_last_id;
4018 break;
4019 case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
4020 save_type = SAVE_RIGHT_RANGE;
4021 get_save_val_type_last_id:
4022 STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range);
4023 break;
4024 case UPDATE_VAR_RIGHT_RANGE_INIT:
4025 INIT_RIGHT_RANGE;
4026 break;
4027 }
4028 }
4029 INC_OP;
4030 JUMP_OUT;
4031
4032 #ifdef USE_CALLOUT
4033 CASE_OP(CALLOUT_CONTENTS)
4034 of = ONIG_CALLOUT_OF_CONTENTS;
4035 mem = p->callout_contents.num;
4036 goto callout_common_entry;
4037 BREAK_OUT;
4038
4039 CASE_OP(CALLOUT_NAME)
4040 {
4041 int call_result;
4042 int name_id;
4043 int in;
4044 CalloutListEntry* e;
4045 OnigCalloutFunc func;
4046 OnigCalloutArgs args;
4047
4048 of = ONIG_CALLOUT_OF_NAME;
4049 name_id = p->callout_name.id;
4050 mem = p->callout_name.num;
4051
4052 callout_common_entry:
4053 e = onig_reg_callout_list_at(reg, mem);
4054 in = e->in;
4055 if (of == ONIG_CALLOUT_OF_NAME) {
4056 func = onig_get_callout_start_func(reg, mem);
4057 }
4058 else {
4059 name_id = ONIG_NON_NAME_ID;
4060 func = msa->mp->progress_callout_of_contents;
4061 }
4062
4063 if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
4064 CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
4065 (int )mem, msa->mp->callout_user_data, args, call_result);
4066 switch (call_result) {
4067 case ONIG_CALLOUT_FAIL:
4068 goto fail;
4069 break;
4070 case ONIG_CALLOUT_SUCCESS:
4071 goto retraction_callout2;
4072 break;
4073 default: /* error code */
4074 if (call_result > 0) {
4075 call_result = ONIGERR_INVALID_ARGUMENT;
4076 }
4077 best_len = call_result;
4078 goto finish;
4079 break;
4080 }
4081 }
4082 else {
4083 retraction_callout2:
4084 if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
4085 if (of == ONIG_CALLOUT_OF_NAME) {
4086 if (IS_NOT_NULL(func)) {
4087 STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
4088 }
4089 }
4090 else {
4091 func = msa->mp->retraction_callout_of_contents;
4092 if (IS_NOT_NULL(func)) {
4093 STACK_PUSH_CALLOUT_CONTENTS(mem, func);
4094 }
4095 }
4096 }
4097 }
4098 }
4099 INC_OP;
4100 JUMP_OUT;
4101 #endif
4102
4103 CASE_OP(FINISH)
4104 goto finish;
4105
4106 #ifdef ONIG_DEBUG_STATISTICS
4107 fail:
4108 SOP_OUT;
4109 goto fail2;
4110 #endif
4111 CASE_OP(FAIL)
4112 #ifdef ONIG_DEBUG_STATISTICS
4113 fail2:
4114 #else
4115 fail:
4116 #endif
4117 STACK_POP;
4118 p = stk->u.state.pcode;
4119 s = stk->u.state.pstr;
4120 sprev = stk->u.state.pstr_prev;
4121 CHECK_RETRY_LIMIT_IN_MATCH;
4122 JUMP_OUT;
4123
4124 DEFAULT_OP
4125 goto bytecode_error;
4126
4127 } BYTECODE_INTERPRETER_END;
4128
4129 finish:
4130 STACK_SAVE;
4131 return best_len;
4132
4133 #ifdef ONIG_DEBUG
4134 stack_error:
4135 STACK_SAVE;
4136 return ONIGERR_STACK_BUG;
4137 #endif
4138
4139 bytecode_error:
4140 STACK_SAVE;
4141 return ONIGERR_UNDEFINED_BYTECODE;
4142
4143 #if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
4144 unexpected_bytecode_error:
4145 STACK_SAVE;
4146 return ONIGERR_UNEXPECTED_BYTECODE;
4147 #endif
4148
4149 #ifdef USE_RETRY_LIMIT_IN_MATCH
4150 retry_limit_in_match_over:
4151 STACK_SAVE;
4152 return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
4153 #endif
4154 }
4155
4156
4157 static UChar*
4158 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4159 const UChar* text, const UChar* text_end, UChar* text_range)
4160 {
4161 UChar *t, *p, *s, *end;
4162
4163 end = (UChar* )text_end;
4164 end -= target_end - target - 1;
4165 if (end > text_range)
4166 end = text_range;
4167
4168 s = (UChar* )text;
4169
4170 while (s < end) {
4171 if (*s == *target) {
4172 p = s + 1;
4173 t = target + 1;
4174 while (t < target_end) {
4175 if (*t != *p++)
4176 break;
4177 t++;
4178 }
4179 if (t == target_end)
4180 return s;
4181 }
4182 s += enclen(enc, s);
4183 }
4184
4185 return (UChar* )NULL;
4186 }
4187
4188 static int
4189 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4190 const UChar* t, const UChar* tend,
4191 const UChar* p, const UChar* end)
4192 {
4193 int lowlen;
4194 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4195
4196 while (t < tend) {
4197 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4198 q = lowbuf;
4199 while (lowlen > 0) {
4200 if (*t++ != *q++) return 0;
4201 lowlen--;
4202 }
4203 }
4204
4205 return 1;
4206 }
4207
4208 static UChar*
4209 slow_search_ic(OnigEncoding enc, int case_fold_flag,
4210 UChar* target, UChar* target_end,
4211 const UChar* text, const UChar* text_end, UChar* text_range)
4212 {
4213 UChar *s, *end;
4214
4215 end = (UChar* )text_end;
4216 end -= target_end - target - 1;
4217 if (end > text_range)
4218 end = text_range;
4219
4220 s = (UChar* )text;
4221
4222 while (s < end) {
4223 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4224 s, text_end))
4225 return s;
4226
4227 s += enclen(enc, s);
4228 }
4229
4230 return (UChar* )NULL;
4231 }
4232
4233 static UChar*
4234 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4235 const UChar* text, const UChar* adjust_text,
4236 const UChar* text_end, const UChar* text_start)
4237 {
4238 UChar *t, *p, *s;
4239
4240 s = (UChar* )text_end;
4241 s -= (target_end - target);
4242 if (s > text_start)
4243 s = (UChar* )text_start;
4244 else
4245 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4246
4247 while (s >= text) {
4248 //if text is not null,the logic is correct.
4249 //this function is only invoked by backward_search_range,parameter text come
4250 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4251 //so the check is just for passing static analysis.
4252 if(IS_NULL(s))break;
4253 if (*s == *target) {
4254 p = s + 1;
4255 t = target + 1;
4256 while (t < target_end) {
4257 if (*t != *p++)
4258 break;
4259 t++;
4260 }
4261 if (t == target_end)
4262 return s;
4263 }
4264 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4265 }
4266
4267 return (UChar* )NULL;
4268 }
4269
4270 static UChar*
4271 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4272 UChar* target, UChar* target_end,
4273 const UChar* text, const UChar* adjust_text,
4274 const UChar* text_end, const UChar* text_start)
4275 {
4276 UChar *s;
4277
4278 s = (UChar* )text_end;
4279 s -= (target_end - target);
4280 if (s > text_start)
4281 s = (UChar* )text_start;
4282 else
4283 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4284
4285 while (s >= text) {
4286 if (str_lower_case_match(enc, case_fold_flag,
4287 target, target_end, s, text_end))
4288 return s;
4289
4290 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4291 }
4292
4293 return (UChar* )NULL;
4294 }
4295
4296
4297 static UChar*
4298 sunday_quick_search_step_forward(regex_t* reg,
4299 const UChar* target, const UChar* target_end,
4300 const UChar* text, const UChar* text_end,
4301 const UChar* text_range)
4302 {
4303 const UChar *s, *se, *t, *p, *end;
4304 const UChar *tail;
4305 int skip, tlen1;
4306 int map_offset;
4307 OnigEncoding enc;
4308
4309 #ifdef ONIG_DEBUG_SEARCH
4310 fprintf(stderr,
4311 "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
4312 #endif
4313
4314 enc = reg->enc;
4315
4316 tail = target_end - 1;
4317 tlen1 = (int )(tail - target);
4318 end = text_range;
4319 if (end + tlen1 > text_end)
4320 end = text_end - tlen1;
4321
4322 map_offset = reg->map_offset;
4323 s = text;
4324
4325 while (s < end) {
4326 p = se = s + tlen1;
4327 t = tail;
4328 while (*p == *t) {
4329 if (t == target) return (UChar* )s;
4330 p--; t--;
4331 }
4332 if (se + map_offset >= text_end) break;
4333 skip = reg->map[*(se + map_offset)];
4334 #if 0
4335 t = s;
4336 do {
4337 s += enclen(enc, s);
4338 } while ((s - t) < skip && s < end);
4339 #else
4340 s += skip;
4341 if (s < end)
4342 s = onigenc_get_right_adjust_char_head(enc, text, s);
4343 #endif
4344 }
4345
4346 return (UChar* )NULL;
4347 }
4348
4349 static UChar*
4350 sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
4351 const UChar* text, const UChar* text_end,
4352 const UChar* text_range)
4353 {
4354 const UChar *s, *t, *p, *end;
4355 const UChar *tail;
4356 int map_offset;
4357
4358 end = text_range + (target_end - target);
4359 if (end > text_end)
4360 end = text_end;
4361
4362 map_offset = reg->map_offset;
4363 tail = target_end - 1;
4364 s = text + (tail - target);
4365
4366 while (s < end) {
4367 p = s;
4368 t = tail;
4369 while (*p == *t) {
4370 if (t == target) return (UChar* )p;
4371 p--; t--;
4372 }
4373 if (s + map_offset >= text_end) break;
4374 s += reg->map[*(s + map_offset)];
4375 }
4376
4377 return (UChar* )NULL;
4378 }
4379
4380 static UChar*
4381 sunday_quick_search_case_fold(regex_t* reg,
4382 const UChar* target, const UChar* target_end,
4383 const UChar* text, const UChar* text_end,
4384 const UChar* text_range)
4385 {
4386 const UChar *s, *se, *end;
4387 const UChar *tail;
4388 int skip, tlen1;
4389 int map_offset;
4390 int case_fold_flag;
4391 OnigEncoding enc;
4392
4393 #ifdef ONIG_DEBUG_SEARCH
4394 fprintf(stderr,
4395 "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
4396 #endif
4397
4398 enc = reg->enc;
4399 case_fold_flag = reg->case_fold_flag;
4400
4401 tail = target_end - 1;
4402 tlen1 = (int )(tail - target);
4403 end = text_range;
4404 if (end + tlen1 > text_end)
4405 end = text_end - tlen1;
4406
4407 map_offset = reg->map_offset;
4408 s = text;
4409
4410 while (s < end) {
4411 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4412 s, text_end))
4413 return (UChar* )s;
4414
4415 se = s + tlen1;
4416 if (se + map_offset >= text_end) break;
4417 skip = reg->map[*(se + map_offset)];
4418 #if 0
4419 p = s;
4420 do {
4421 s += enclen(enc, s);
4422 } while ((s - p) < skip && s < end);
4423 #else
4424 /* This is faster than prev code for long text. ex: /(?i)Twain/ */
4425 s += skip;
4426 if (s < end)
4427 s = onigenc_get_right_adjust_char_head(enc, text, s);
4428 #endif
4429 }
4430
4431 return (UChar* )NULL;
4432 }
4433
4434 static UChar*
4435 map_search(OnigEncoding enc, UChar map[],
4436 const UChar* text, const UChar* text_range)
4437 {
4438 const UChar *s = text;
4439
4440 while (s < text_range) {
4441 if (map[*s]) return (UChar* )s;
4442
4443 s += enclen(enc, s);
4444 }
4445 return (UChar* )NULL;
4446 }
4447
4448 static UChar*
4449 map_search_backward(OnigEncoding enc, UChar map[],
4450 const UChar* text, const UChar* adjust_text,
4451 const UChar* text_start)
4452 {
4453 const UChar *s = text_start;
4454
4455 while (s >= text) {
4456 //if text is not null,the logic is correct.
4457 //this function is only invoked by backward_search_range,parameter text come
4458 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4459 //so the check is just for passing static analysis.
4460 if(IS_NULL(s))break;
4461 if (map[*s]) return (UChar* )s;
4462
4463 s = onigenc_get_prev_char_head(enc, adjust_text, s);
4464 }
4465 return (UChar* )NULL;
4466 }
4467 extern int
4468 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
4469 OnigRegion* region, OnigOptionType option)
4470 {
4471 int r;
4472 OnigMatchParam mp;
4473
4474 onig_initialize_match_param(&mp);
4475 r = onig_match_with_param(reg, str, end, at, region, option, &mp);
4476 onig_free_match_param_content(&mp);
4477 return r;
4478 }
4479
4480 extern int
4481 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
4482 const UChar* at, OnigRegion* region, OnigOptionType option,
4483 OnigMatchParam* mp)
4484 {
4485 int r;
4486 UChar *prev;
4487 MatchArg msa;
4488
4489 ADJUST_MATCH_PARAM(reg, mp);
4490 MATCH_ARG_INIT(msa, reg, option, region, at, mp);
4491 if (region
4492 #ifdef USE_POSIX_API_REGION_OPTION
4493 && !IS_POSIX_REGION(option)
4494 #endif
4495 ) {
4496 r = onig_region_resize_clear(region, reg->num_mem + 1);
4497 }
4498 else
4499 r = 0;
4500
4501 if (r == 0) {
4502 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4503 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
4504 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4505 goto end;
4506 }
4507 }
4508
4509 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
4510 r = match_at(reg, str, end, end, at, prev, &msa);
4511 }
4512
4513 end:
4514 MATCH_ARG_FREE(msa);
4515 return r;
4516 }
4517
4518 static int
4519 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4520 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4521 {
4522 UChar *p, *pprev = (UChar* )NULL;
4523
4524 #ifdef ONIG_DEBUG_SEARCH
4525 fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
4526 str, end, s, range);
4527 #endif
4528
4529 p = s;
4530 if (reg->dmin > 0) {
4531 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4532 p += reg->dmin;
4533 }
4534 else {
4535 UChar *q = p + reg->dmin;
4536
4537 if (q >= end) return 0; /* fail */
4538 while (p < q) p += enclen(reg->enc, p);
4539 }
4540 }
4541
4542 retry:
4543 switch (reg->optimize) {
4544 case OPTIMIZE_STR:
4545 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4546 break;
4547 case OPTIMIZE_STR_CASE_FOLD:
4548 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4549 reg->exact, reg->exact_end, p, end, range);
4550 break;
4551
4552 case OPTIMIZE_STR_CASE_FOLD_FAST:
4553 p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end,
4554 range);
4555 break;
4556
4557 case OPTIMIZE_STR_FAST:
4558 p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
4559 break;
4560
4561 case OPTIMIZE_STR_FAST_STEP_FORWARD:
4562 p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
4563 p, end, range);
4564 break;
4565
4566 case OPTIMIZE_MAP:
4567 p = map_search(reg->enc, reg->map, p, range);
4568 break;
4569 }
4570
4571 if (p && p < range) {
4572 if (p - reg->dmin < s) {
4573 retry_gate:
4574 pprev = p;
4575 p += enclen(reg->enc, p);
4576 goto retry;
4577 }
4578
4579 if (reg->sub_anchor) {
4580 UChar* prev;
4581
4582 switch (reg->sub_anchor) {
4583 case ANCR_BEGIN_LINE:
4584 if (!ON_STR_BEGIN(p)) {
4585 prev = onigenc_get_prev_char_head(reg->enc,
4586 (pprev ? pprev : str), p);
4587 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4588 goto retry_gate;
4589 }
4590 break;
4591
4592 case ANCR_END_LINE:
4593 if (ON_STR_END(p)) {
4594 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4595 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4596 (pprev ? pprev : str), p);
4597 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4598 goto retry_gate;
4599 #endif
4600 }
4601 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
4602 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4603 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
4604 #endif
4605 )
4606 goto retry_gate;
4607 break;
4608 }
4609 }
4610
4611 if (reg->dmax == 0) {
4612 *low = p;
4613 if (low_prev) {
4614 if (*low > s)
4615 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
4616 else
4617 *low_prev = onigenc_get_prev_char_head(reg->enc,
4618 (pprev ? pprev : str), p);
4619 }
4620 }
4621 else {
4622 if (reg->dmax != INFINITE_LEN) {
4623 if (p - str < reg->dmax) {
4624 *low = (UChar* )str;
4625 if (low_prev)
4626 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
4627 }
4628 else {
4629 *low = p - reg->dmax;
4630 if (*low > s) {
4631 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4632 *low, (const UChar** )low_prev);
4633 if (low_prev && IS_NULL(*low_prev))
4634 *low_prev = onigenc_get_prev_char_head(reg->enc,
4635 (pprev ? pprev : s), *low);
4636 }
4637 else {
4638 if (low_prev)
4639 *low_prev = onigenc_get_prev_char_head(reg->enc,
4640 (pprev ? pprev : str), *low);
4641 }
4642 }
4643 }
4644 }
4645 /* no needs to adjust *high, *high is used as range check only */
4646 *high = p - reg->dmin;
4647
4648 #ifdef ONIG_DEBUG_SEARCH
4649 fprintf(stderr,
4650 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
4651 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
4652 #endif
4653 return 1; /* success */
4654 }
4655
4656 return 0; /* fail */
4657 }
4658
4659
4660 static int
4661 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4662 UChar* s, const UChar* range, UChar* adjrange,
4663 UChar** low, UChar** high)
4664 {
4665 UChar *p;
4666
4667 if (range == 0) goto fail;
4668
4669 range += reg->dmin;
4670 p = s;
4671
4672 retry:
4673 switch (reg->optimize) {
4674 case OPTIMIZE_STR:
4675 exact_method:
4676 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4677 range, adjrange, end, p);
4678 break;
4679
4680 case OPTIMIZE_STR_CASE_FOLD:
4681 case OPTIMIZE_STR_CASE_FOLD_FAST:
4682 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4683 reg->exact, reg->exact_end,
4684 range, adjrange, end, p);
4685 break;
4686
4687 case OPTIMIZE_STR_FAST:
4688 case OPTIMIZE_STR_FAST_STEP_FORWARD:
4689 goto exact_method;
4690 break;
4691
4692 case OPTIMIZE_MAP:
4693 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
4694 break;
4695 }
4696
4697 if (p) {
4698 if (reg->sub_anchor) {
4699 UChar* prev;
4700
4701 switch (reg->sub_anchor) {
4702 case ANCR_BEGIN_LINE:
4703 if (!ON_STR_BEGIN(p)) {
4704 prev = onigenc_get_prev_char_head(reg->enc, str, p);
4705 if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
4706 p = prev;
4707 goto retry;
4708 }
4709 }
4710 break;
4711
4712 case ANCR_END_LINE:
4713 if (ON_STR_END(p)) {
4714 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4715 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4716 if (IS_NULL(prev)) goto fail;
4717 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
4718 p = prev;
4719 goto retry;
4720 }
4721 #endif
4722 }
4723 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
4724 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4725 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
4726 #endif
4727 ) {
4728 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4729 if (IS_NULL(p)) goto fail;
4730 goto retry;
4731 }
4732 break;
4733 }
4734 }
4735
4736 /* no needs to adjust *high, *high is used as range check only */
4737 if (reg->dmax != INFINITE_LEN) {
4738 *low = p - reg->dmax;
4739 *high = p - reg->dmin;
4740 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
4741 }
4742
4743 #ifdef ONIG_DEBUG_SEARCH
4744 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4745 (int )(*low - str), (int )(*high - str));
4746 #endif
4747 return 1; /* success */
4748 }
4749
4750 fail:
4751 #ifdef ONIG_DEBUG_SEARCH
4752 fprintf(stderr, "backward_search_range: fail.\n");
4753 #endif
4754 return 0; /* fail */
4755 }
4756
4757
4758 extern int
4759 onig_search(regex_t* reg, const UChar* str, const UChar* end,
4760 const UChar* start, const UChar* range, OnigRegion* region,
4761 OnigOptionType option)
4762 {
4763 int r;
4764 OnigMatchParam mp;
4765
4766 onig_initialize_match_param(&mp);
4767 r = onig_search_with_param(reg, str, end, start, range, region, option, &mp);
4768 onig_free_match_param_content(&mp);
4769 return r;
4770
4771 }
4772
4773 extern int
4774 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
4775 const UChar* start, const UChar* range, OnigRegion* region,
4776 OnigOptionType option, OnigMatchParam* mp)
4777 {
4778 int r;
4779 UChar *s, *prev;
4780 MatchArg msa;
4781 const UChar *orig_start = start;
4782 const UChar *orig_range = range;
4783
4784 #ifdef ONIG_DEBUG_SEARCH
4785 fprintf(stderr,
4786 "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
4787 str, (int )(end - str), (int )(start - str), (int )(range - str));
4788 #endif
4789
4790 ADJUST_MATCH_PARAM(reg, mp);
4791
4792 if (region
4793 #ifdef USE_POSIX_API_REGION_OPTION
4794 && !IS_POSIX_REGION(option)
4795 #endif
4796 ) {
4797 r = onig_region_resize_clear(region, reg->num_mem + 1);
4798 if (r != 0) goto finish_no_msa;
4799 }
4800
4801 if (start > end || start < str) goto mismatch_no_msa;
4802
4803 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4804 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
4805 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4806 goto finish_no_msa;
4807 }
4808 }
4809
4810
4811 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4812 #define MATCH_AND_RETURN_CHECK(upper_range) \
4813 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4814 if (r != ONIG_MISMATCH) {\
4815 if (r >= 0) {\
4816 if (! IS_FIND_LONGEST(reg->options)) {\
4817 goto match;\
4818 }\
4819 }\
4820 else goto finish; /* error */ \
4821 }
4822 #else
4823 #define MATCH_AND_RETURN_CHECK(upper_range) \
4824 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4825 if (r != ONIG_MISMATCH) {\
4826 if (r >= 0) {\
4827 goto match;\
4828 }\
4829 else goto finish; /* error */ \
4830 }
4831 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4832
4833
4834 /* anchor optimize: resume search range */
4835 if (reg->anchor != 0 && str < end) {
4836 UChar *min_semi_end, *max_semi_end;
4837
4838 if (reg->anchor & ANCR_BEGIN_POSITION) {
4839 /* search start-position only */
4840 begin_position:
4841 if (range > start)
4842 range = start + 1;
4843 else
4844 range = start;
4845 }
4846 else if (reg->anchor & ANCR_BEGIN_BUF) {
4847 /* search str-position only */
4848 if (range > start) {
4849 if (start != str) goto mismatch_no_msa;
4850 range = str + 1;
4851 }
4852 else {
4853 if (range <= str) {
4854 start = str;
4855 range = str;
4856 }
4857 else
4858 goto mismatch_no_msa;
4859 }
4860 }
4861 else if (reg->anchor & ANCR_END_BUF) {
4862 min_semi_end = max_semi_end = (UChar* )end;
4863
4864 end_buf:
4865 if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
4866 goto mismatch_no_msa;
4867
4868 if (range > start) {
4869 if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
4870 start = min_semi_end - reg->anchor_dmax;
4871 if (start < end)
4872 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
4873 }
4874 if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4875 range = max_semi_end - reg->anchor_dmin + 1;
4876 }
4877
4878 if (start > range) goto mismatch_no_msa;
4879 /* If start == range, match with empty at end.
4880 Backward search is used. */
4881 }
4882 else {
4883 if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
4884 range = min_semi_end - reg->anchor_dmax;
4885 }
4886 if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
4887 start = max_semi_end - reg->anchor_dmin;
4888 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
4889 }
4890 if (range > start) goto mismatch_no_msa;
4891 }
4892 }
4893 else if (reg->anchor & ANCR_SEMI_END_BUF) {
4894 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
4895
4896 max_semi_end = (UChar* )end;
4897 // only when str > end, pre_end will be null
4898 // line 4659 "if (start > end || start < str) goto mismatch_no_msa"
4899 // will guarantee str alwayls less than end
4900 // so pre_end won't be null,this check is just for passing staic analysis
4901 if (IS_NOT_NULL(pre_end) && ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4902 min_semi_end = pre_end;
4903
4904 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4905 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
4906 if (IS_NOT_NULL(pre_end) &&
4907 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4908 min_semi_end = pre_end;
4909 }
4910 #endif
4911 if (min_semi_end > str && start <= min_semi_end) {
4912 goto end_buf;
4913 }
4914 }
4915 else {
4916 min_semi_end = (UChar* )end;
4917 goto end_buf;
4918 }
4919 }
4920 else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) {
4921 goto begin_position;
4922 }
4923 }
4924 else if (str == end) { /* empty string */
4925 static const UChar* address_for_empty_string = (UChar* )"";
4926
4927 #ifdef ONIG_DEBUG_SEARCH
4928 fprintf(stderr, "onig_search: empty string.\n");
4929 #endif
4930
4931 if (reg->threshold_len == 0) {
4932 start = end = str = address_for_empty_string;
4933 s = (UChar* )start;
4934 prev = (UChar* )NULL;
4935
4936 MATCH_ARG_INIT(msa, reg, option, region, start, mp);
4937 MATCH_AND_RETURN_CHECK(end);
4938 goto mismatch;
4939 }
4940 goto mismatch_no_msa;
4941 }
4942
4943 #ifdef ONIG_DEBUG_SEARCH
4944 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4945 (int )(end - str), (int )(start - str), (int )(range - str));
4946 #endif
4947
4948 MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
4949
4950 s = (UChar* )start;
4951 if (range > start) { /* forward search */
4952 if (s > str)
4953 prev = onigenc_get_prev_char_head(reg->enc, str, s);
4954 else
4955 prev = (UChar* )NULL;
4956
4957 if (reg->optimize != OPTIMIZE_NONE) {
4958 UChar *sch_range, *low, *high, *low_prev;
4959
4960 sch_range = (UChar* )range;
4961 if (reg->dmax != 0) {
4962 if (reg->dmax == INFINITE_LEN)
4963 sch_range = (UChar* )end;
4964 else {
4965 sch_range += reg->dmax;
4966 if (sch_range > end) sch_range = (UChar* )end;
4967 }
4968 }
4969
4970 if ((end - start) < reg->threshold_len)
4971 goto mismatch;
4972
4973 if (reg->dmax != INFINITE_LEN) {
4974 do {
4975 if (! forward_search_range(reg, str, end, s, sch_range,
4976 &low, &high, &low_prev)) goto mismatch;
4977 if (s < low) {
4978 s = low;
4979 prev = low_prev;
4980 }
4981 while (s <= high) {
4982 MATCH_AND_RETURN_CHECK(orig_range);
4983 prev = s;
4984 s += enclen(reg->enc, s);
4985 }
4986 } while (s < range);
4987 goto mismatch;
4988 }
4989 else { /* check only. */
4990 if (! forward_search_range(reg, str, end, s, sch_range,
4991 &low, &high, (UChar** )NULL)) goto mismatch;
4992
4993 if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {
4994 do {
4995 MATCH_AND_RETURN_CHECK(orig_range);
4996 prev = s;
4997 s += enclen(reg->enc, s);
4998
4999 if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
5000 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
5001 prev = s;
5002 s += enclen(reg->enc, s);
5003 }
5004 }
5005 } while (s < range);
5006 goto mismatch;
5007 }
5008 }
5009 }
5010
5011 do {
5012 MATCH_AND_RETURN_CHECK(orig_range);
5013 prev = s;
5014 s += enclen(reg->enc, s);
5015 } while (s < range);
5016
5017 if (s == range) { /* because empty match with /$/. */
5018 MATCH_AND_RETURN_CHECK(orig_range);
5019 }
5020 }
5021 else { /* backward search */
5022 if (range < str) goto mismatch;
5023
5024 if (orig_start < end)
5025 orig_start += enclen(reg->enc, orig_start); /* is upper range */
5026
5027 if (reg->optimize != OPTIMIZE_NONE) {
5028 UChar *low, *high, *adjrange, *sch_start;
5029
5030 if (range < end)
5031 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
5032 else
5033 adjrange = (UChar* )end;
5034
5035 if (reg->dmax != INFINITE_LEN &&
5036 (end - range) >= reg->threshold_len) {
5037 do {
5038 sch_start = s + reg->dmax;
5039 if (sch_start > end) sch_start = (UChar* )end;
5040 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5041 &low, &high) <= 0)
5042 goto mismatch;
5043
5044 if (s > high)
5045 s = high;
5046
5047 while (s >= low) {
5048 prev = onigenc_get_prev_char_head(reg->enc, str, s);
5049 MATCH_AND_RETURN_CHECK(orig_start);
5050 s = prev;
5051 }
5052 // if range is not null,the check is not necessary.
5053 // the range is actually the pointer of the end of the matched string
5054 // or assigned by "range = str" in line 4708. In RegularExpressionMatch
5055 // protocol, the matched string is the parameter String. And str in
5056 // line 4708 is the String,too. and the range is calculated from
5057 // "Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start)" in
5058 // line 146 in RegularExpressionDxe.c. RegularExpressionMatch ensure
5059 // the String is not null,So in both situation, the range can not be NULL.
5060 // This check is just for passing static analysis.
5061 if(IS_NULL(s))break;
5062 } while (s >= range);
5063 goto mismatch;
5064 }
5065 else { /* check only. */
5066 if ((end - range) < reg->threshold_len) goto mismatch;
5067
5068 sch_start = s;
5069 if (reg->dmax != 0) {
5070 if (reg->dmax == INFINITE_LEN)
5071 sch_start = (UChar* )end;
5072 else {
5073 sch_start += reg->dmax;
5074 if (sch_start > end) sch_start = (UChar* )end;
5075 else
5076 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5077 start, sch_start);
5078 }
5079 }
5080 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5081 &low, &high) <= 0) goto mismatch;
5082 }
5083 }
5084
5085 do {
5086 prev = onigenc_get_prev_char_head(reg->enc, str, s);
5087 MATCH_AND_RETURN_CHECK(orig_start);
5088 s = prev;
5089 } while (s >= range);
5090 }
5091
5092 mismatch:
5093 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5094 if (IS_FIND_LONGEST(reg->options)) {
5095 if (msa.best_len >= 0) {
5096 s = msa.best_s;
5097 goto match;
5098 }
5099 }
5100 #endif
5101 r = ONIG_MISMATCH;
5102
5103 finish:
5104 MATCH_ARG_FREE(msa);
5105
5106 /* If result is mismatch and no FIND_NOT_EMPTY option,
5107 then the region is not set in match_at(). */
5108 if (IS_FIND_NOT_EMPTY(reg->options) && region
5109 #ifdef USE_POSIX_API_REGION_OPTION
5110 && !IS_POSIX_REGION(option)
5111 #endif
5112 ) {
5113 onig_region_clear(region);
5114 }
5115
5116 #ifdef ONIG_DEBUG
5117 if (r != ONIG_MISMATCH)
5118 fprintf(stderr, "onig_search: error %d\n", r);
5119 #endif
5120 return r;
5121
5122 mismatch_no_msa:
5123 r = ONIG_MISMATCH;
5124 finish_no_msa:
5125 #ifdef ONIG_DEBUG
5126 if (r != ONIG_MISMATCH)
5127 fprintf(stderr, "onig_search: error %d\n", r);
5128 #endif
5129 return r;
5130
5131 match:
5132 MATCH_ARG_FREE(msa);
5133 return (int )(s - str);
5134 }
5135
5136 extern int
5137 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5138 OnigRegion* region, OnigOptionType option,
5139 int (*scan_callback)(int, int, OnigRegion*, void*),
5140 void* callback_arg)
5141 {
5142 int r;
5143 int n;
5144 int rs;
5145 const UChar* start;
5146
5147 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
5148 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5149 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5150
5151 ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5152 }
5153
5154 n = 0;
5155 start = str;
5156 while (1) {
5157 r = onig_search(reg, str, end, start, end, region, option);
5158 if (r >= 0) {
5159 rs = scan_callback(n, r, region, callback_arg);
5160 n++;
5161 if (rs != 0)
5162 return rs;
5163
5164 if (region->end[0] == start - str) {
5165 if (start >= end) break;
5166 start += enclen(reg->enc, start);
5167 }
5168 else
5169 start = str + region->end[0];
5170
5171 if (start > end)
5172 break;
5173 }
5174 else if (r == ONIG_MISMATCH) {
5175 break;
5176 }
5177 else { /* error */
5178 return r;
5179 }
5180 }
5181
5182 return n;
5183 }
5184
5185 extern OnigEncoding
5186 onig_get_encoding(regex_t* reg)
5187 {
5188 return reg->enc;
5189 }
5190
5191 extern OnigOptionType
5192 onig_get_options(regex_t* reg)
5193 {
5194 return reg->options;
5195 }
5196
5197 extern OnigCaseFoldType
5198 onig_get_case_fold_flag(regex_t* reg)
5199 {
5200 return reg->case_fold_flag;
5201 }
5202
5203 extern OnigSyntaxType*
5204 onig_get_syntax(regex_t* reg)
5205 {
5206 return reg->syntax;
5207 }
5208
5209 extern int
5210 onig_number_of_captures(regex_t* reg)
5211 {
5212 return reg->num_mem;
5213 }
5214
5215 extern int
5216 onig_number_of_capture_histories(regex_t* reg)
5217 {
5218 #ifdef USE_CAPTURE_HISTORY
5219 int i, n;
5220
5221 n = 0;
5222 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5223 if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5224 n++;
5225 }
5226 return n;
5227 #else
5228 return 0;
5229 #endif
5230 }
5231
5232 extern void
5233 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5234 {
5235 *to = *from;
5236 }
5237
5238 #ifdef USE_DIRECT_THREADED_CODE
5239 extern int
5240 onig_init_for_match_at(regex_t* reg)
5241 {
5242 return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
5243 (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
5244 (MatchArg* )NULL);
5245 }
5246 #endif
5247
5248
5249 /* for callout functions */
5250
5251 #ifdef USE_CALLOUT
5252
5253 extern OnigCalloutFunc
5254 onig_get_progress_callout(void)
5255 {
5256 return DefaultProgressCallout;
5257 }
5258
5259 extern int
5260 onig_set_progress_callout(OnigCalloutFunc f)
5261 {
5262 DefaultProgressCallout = f;
5263 return ONIG_NORMAL;
5264 }
5265
5266 extern OnigCalloutFunc
5267 onig_get_retraction_callout(void)
5268 {
5269 return DefaultRetractionCallout;
5270 }
5271
5272 extern int
5273 onig_set_retraction_callout(OnigCalloutFunc f)
5274 {
5275 DefaultRetractionCallout = f;
5276 return ONIG_NORMAL;
5277 }
5278
5279 extern int
5280 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
5281 {
5282 return args->num;
5283 }
5284
5285 extern OnigCalloutIn
5286 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
5287 {
5288 return args->in;
5289 }
5290
5291 extern int
5292 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
5293 {
5294 return args->name_id;
5295 }
5296
5297 extern const UChar*
5298 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
5299 {
5300 int num;
5301 CalloutListEntry* e;
5302
5303 num = args->num;
5304 e = onig_reg_callout_list_at(args->regex, num);
5305 if (IS_NULL(e)) return 0;
5306 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5307 return e->u.content.start;
5308 }
5309
5310 return 0;
5311 }
5312
5313 extern const UChar*
5314 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
5315 {
5316 int num;
5317 CalloutListEntry* e;
5318
5319 num = args->num;
5320 e = onig_reg_callout_list_at(args->regex, num);
5321 if (IS_NULL(e)) return 0;
5322 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5323 return e->u.content.end;
5324 }
5325
5326 return 0;
5327 }
5328
5329 extern int
5330 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
5331 {
5332 int num;
5333 CalloutListEntry* e;
5334
5335 num = args->num;
5336 e = onig_reg_callout_list_at(args->regex, num);
5337 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5338 if (e->of == ONIG_CALLOUT_OF_NAME) {
5339 return e->u.arg.num;
5340 }
5341
5342 return ONIGERR_INVALID_ARGUMENT;
5343 }
5344
5345 extern int
5346 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
5347 {
5348 int num;
5349 CalloutListEntry* e;
5350
5351 num = args->num;
5352 e = onig_reg_callout_list_at(args->regex, num);
5353 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5354 if (e->of == ONIG_CALLOUT_OF_NAME) {
5355 return e->u.arg.passed_num;
5356 }
5357
5358 return ONIGERR_INVALID_ARGUMENT;
5359 }
5360
5361 extern int
5362 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
5363 OnigType* type, OnigValue* val)
5364 {
5365 int num;
5366 CalloutListEntry* e;
5367
5368 num = args->num;
5369 e = onig_reg_callout_list_at(args->regex, num);
5370 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5371 if (e->of == ONIG_CALLOUT_OF_NAME) {
5372 if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
5373 if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
5374 return ONIG_NORMAL;
5375 }
5376
5377 return ONIGERR_INVALID_ARGUMENT;
5378 }
5379
5380 extern const UChar*
5381 onig_get_string_by_callout_args(OnigCalloutArgs* args)
5382 {
5383 return args->string;
5384 }
5385
5386 extern const UChar*
5387 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
5388 {
5389 return args->string_end;
5390 }
5391
5392 extern const UChar*
5393 onig_get_start_by_callout_args(OnigCalloutArgs* args)
5394 {
5395 return args->start;
5396 }
5397
5398 extern const UChar*
5399 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
5400 {
5401 return args->right_range;
5402 }
5403
5404 extern const UChar*
5405 onig_get_current_by_callout_args(OnigCalloutArgs* args)
5406 {
5407 return args->current;
5408 }
5409
5410 extern OnigRegex
5411 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
5412 {
5413 return args->regex;
5414 }
5415
5416 extern unsigned long
5417 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
5418 {
5419 return args->retry_in_match_counter;
5420 }
5421
5422
5423 extern int
5424 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
5425 {
5426 OnigRegex reg;
5427 const UChar* str;
5428 StackType* stk_base;
5429 int i;
5430
5431 i = mem_num;
5432 reg = a->regex;
5433 str = a->string;
5434 stk_base = a->stk_base;
5435
5436 if (i > 0) {
5437 if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
5438 if (MEM_STATUS_AT(reg->bt_mem_start, i))
5439 *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str);
5440 else
5441 *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str);
5442
5443 *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
5444 ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr
5445 : (UChar* )((void* )a->mem_end_stk[i])) - str);
5446 }
5447 else {
5448 *begin = *end = ONIG_REGION_NOTPOS;
5449 }
5450 }
5451 else if (i == 0) {
5452 #if 0
5453 *begin = a->start - str;
5454 *end = a->current - str;
5455 #else
5456 return ONIGERR_INVALID_ARGUMENT;
5457 #endif
5458 }
5459 else
5460 return ONIGERR_INVALID_ARGUMENT;
5461
5462 return ONIG_NORMAL;
5463 }
5464
5465 extern int
5466 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
5467 {
5468 int n;
5469
5470 n = (int )(a->stk - a->stk_base);
5471
5472 if (used_num != 0)
5473 *used_num = n;
5474
5475 if (used_bytes != 0)
5476 *used_bytes = n * sizeof(StackType);
5477
5478 return ONIG_NORMAL;
5479 }
5480
5481
5482 /* builtin callout functions */
5483
5484 extern int
5485 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
5486 {
5487 return ONIG_CALLOUT_FAIL;
5488 }
5489
5490 extern int
5491 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
5492 {
5493 return ONIG_MISMATCH;
5494 }
5495
5496 #if 0
5497 extern int
5498 onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
5499 {
5500 return ONIG_CALLOUT_SUCCESS;
5501 }
5502 #endif
5503
5504 extern int
5505 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5506 {
5507 int r;
5508 int n;
5509 OnigValue val;
5510
5511 r = onig_get_arg_by_callout_args(args, 0, 0, &val);
5512 if (r != ONIG_NORMAL) return r;
5513
5514 n = (int )val.l;
5515 if (n >= 0) {
5516 n = ONIGERR_INVALID_CALLOUT_BODY;
5517 }
5518 else if (onig_is_error_code_needs_param(n)) {
5519 n = ONIGERR_INVALID_CALLOUT_BODY;
5520 }
5521
5522 return n;
5523 }
5524
5525 extern int
5526 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
5527 {
5528 (void )onig_check_callout_data_and_clear_old_values(args);
5529
5530 return onig_builtin_total_count(args, user_data);
5531 }
5532
5533 extern int
5534 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5535 {
5536 int r;
5537 int slot;
5538 OnigType type;
5539 OnigValue val;
5540 OnigValue aval;
5541 OnigCodePoint count_type;
5542
5543 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
5544 if (r != ONIG_NORMAL) return r;
5545
5546 count_type = aval.c;
5547 if (count_type != '>' && count_type != 'X' && count_type != '<')
5548 return ONIGERR_INVALID_CALLOUT_ARG;
5549
5550 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
5551 &type, &val);
5552 if (r < ONIG_NORMAL)
5553 return r;
5554 else if (r > ONIG_NORMAL) {
5555 /* type == void: initial state */
5556 val.l = 0;
5557 }
5558
5559 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
5560 slot = 2;
5561 if (count_type == '<')
5562 val.l++;
5563 else if (count_type == 'X')
5564 val.l--;
5565 }
5566 else {
5567 slot = 1;
5568 if (count_type != '<')
5569 val.l++;
5570 }
5571
5572 r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
5573 if (r != ONIG_NORMAL) return r;
5574
5575 /* slot 1: in progress counter, slot 2: in retraction counter */
5576 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
5577 &type, &val);
5578 if (r < ONIG_NORMAL)
5579 return r;
5580 else if (r > ONIG_NORMAL) {
5581 val.l = 0;
5582 }
5583
5584 val.l++;
5585 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
5586 if (r != ONIG_NORMAL) return r;
5587
5588 return ONIG_CALLOUT_SUCCESS;
5589 }
5590
5591 extern int
5592 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5593 {
5594 int r;
5595 int slot;
5596 long max_val;
5597 OnigCodePoint count_type;
5598 OnigType type;
5599 OnigValue val;
5600 OnigValue aval;
5601
5602 (void )onig_check_callout_data_and_clear_old_values(args);
5603
5604 slot = 0;
5605 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
5606 if (r < ONIG_NORMAL)
5607 return r;
5608 else if (r > ONIG_NORMAL) {
5609 /* type == void: initial state */
5610 type = ONIG_TYPE_LONG;
5611 val.l = 0;
5612 }
5613
5614 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
5615 if (r != ONIG_NORMAL) return r;
5616 if (type == ONIG_TYPE_TAG) {
5617 r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
5618 if (r < ONIG_NORMAL) return r;
5619 else if (r > ONIG_NORMAL)
5620 max_val = 0L;
5621 else
5622 max_val = aval.l;
5623 }
5624 else { /* LONG */
5625 max_val = aval.l;
5626 }
5627
5628 r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
5629 if (r != ONIG_NORMAL) return r;
5630
5631 count_type = aval.c;
5632 if (count_type != '>' && count_type != 'X' && count_type != '<')
5633 return ONIGERR_INVALID_CALLOUT_ARG;
5634
5635 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
5636 if (count_type == '<') {
5637 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
5638 val.l++;
5639 }
5640 else if (count_type == 'X')
5641 val.l--;
5642 }
5643 else {
5644 if (count_type != '<') {
5645 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
5646 val.l++;
5647 }
5648 }
5649
5650 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
5651 if (r != ONIG_NORMAL) return r;
5652
5653 return ONIG_CALLOUT_SUCCESS;
5654 }
5655
5656 enum OP_CMP {
5657 OP_EQ,
5658 OP_NE,
5659 OP_LT,
5660 OP_GT,
5661 OP_LE,
5662 OP_GE
5663 };
5664
5665 extern int
5666 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5667 {
5668 int r;
5669 int slot;
5670 long lv;
5671 long rv;
5672 OnigType type;
5673 OnigValue val;
5674 regex_t* reg;
5675 enum OP_CMP op;
5676
5677 reg = args->regex;
5678
5679 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
5680 if (r != ONIG_NORMAL) return r;
5681
5682 if (type == ONIG_TYPE_TAG) {
5683 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
5684 if (r < ONIG_NORMAL) return r;
5685 else if (r > ONIG_NORMAL)
5686 lv = 0L;
5687 else
5688 lv = val.l;
5689 }
5690 else { /* ONIG_TYPE_LONG */
5691 lv = val.l;
5692 }
5693
5694 r = onig_get_arg_by_callout_args(args, 2, &type, &val);
5695 if (r != ONIG_NORMAL) return r;
5696
5697 if (type == ONIG_TYPE_TAG) {
5698 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
5699 if (r < ONIG_NORMAL) return r;
5700 else if (r > ONIG_NORMAL)
5701 rv = 0L;
5702 else
5703 rv = val.l;
5704 }
5705 else { /* ONIG_TYPE_LONG */
5706 rv = val.l;
5707 }
5708
5709 slot = 0;
5710 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
5711 if (r < ONIG_NORMAL)
5712 return r;
5713 else if (r > ONIG_NORMAL) {
5714 /* type == void: initial state */
5715 OnigCodePoint c1, c2;
5716 UChar* p;
5717
5718 r = onig_get_arg_by_callout_args(args, 1, &type, &val);
5719 if (r != ONIG_NORMAL) return r;
5720
5721 p = val.s.start;
5722 c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
5723 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
5724 if (p < val.s.end) {
5725 c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
5726 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
5727 if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
5728 }
5729 else
5730 c2 = 0;
5731
5732 switch (c1) {
5733 case '=':
5734 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
5735 op = OP_EQ;
5736 break;
5737 case '!':
5738 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
5739 op = OP_NE;
5740 break;
5741 case '<':
5742 if (c2 == '=') op = OP_LE;
5743 else if (c2 == 0) op = OP_LT;
5744 else return ONIGERR_INVALID_CALLOUT_ARG;
5745 break;
5746 case '>':
5747 if (c2 == '=') op = OP_GE;
5748 else if (c2 == 0) op = OP_GT;
5749 else return ONIGERR_INVALID_CALLOUT_ARG;
5750 break;
5751 default:
5752 return ONIGERR_INVALID_CALLOUT_ARG;
5753 break;
5754 }
5755 val.l = (long )op;
5756 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
5757 if (r != ONIG_NORMAL) return r;
5758 }
5759 else {
5760 op = (enum OP_CMP )val.l;
5761 }
5762
5763 switch (op) {
5764 case OP_EQ: r = (lv == rv); break;
5765 case OP_NE: r = (lv != rv); break;
5766 case OP_LT: r = (lv < rv); break;
5767 case OP_GT: r = (lv > rv); break;
5768 case OP_LE: r = (lv <= rv); break;
5769 case OP_GE: r = (lv >= rv); break;
5770 }
5771
5772 return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
5773 }
5774
5775
5776 //#include <stdio.h>
5777
5778 static FILE* OutFp;
5779
5780 /* name start with "onig_" for macros. */
5781 static int
5782 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
5783 {
5784 int r;
5785 int num;
5786 size_t tag_len;
5787 // const UChar* start;
5788 // const UChar* right;
5789 // const UChar* current;
5790 // const UChar* string;
5791 // const UChar* strend;
5792 const UChar* tag_start;
5793 const UChar* tag_end;
5794 regex_t* reg;
5795 OnigCalloutIn in;
5796 OnigType type;
5797 OnigValue val;
5798 char buf[20];
5799 // FILE* fp;
5800
5801 // fp = OutFp;
5802
5803 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
5804 if (r != ONIG_NORMAL) return r;
5805
5806 in = onig_get_callout_in_by_callout_args(args);
5807 if (in == ONIG_CALLOUT_IN_PROGRESS) {
5808 if (val.c == '<')
5809 return ONIG_CALLOUT_SUCCESS;
5810 }
5811 else {
5812 if (val.c != 'X' && val.c != '<')
5813 return ONIG_CALLOUT_SUCCESS;
5814 }
5815
5816 num = onig_get_callout_num_by_callout_args(args);
5817 // start = onig_get_start_by_callout_args(args);
5818 // right = onig_get_right_range_by_callout_args(args);
5819 // current = onig_get_current_by_callout_args(args);
5820 // string = onig_get_string_by_callout_args(args);
5821 // strend = onig_get_string_end_by_callout_args(args);
5822 reg = onig_get_regex_by_callout_args(args);
5823 tag_start = onig_get_callout_tag_start(reg, num);
5824 tag_end = onig_get_callout_tag_end(reg, num);
5825
5826 if (tag_start == 0)
5827 sprintf_s(buf, sizeof(buf), "#%d", num);
5828 else {
5829 /* CAUTION: tag string is not terminated with NULL. */
5830 int i;
5831
5832 tag_len = tag_end - tag_start;
5833 if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
5834 for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
5835 buf[tag_len] = '\0';
5836 }
5837 /*
5838 fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
5839 buf,
5840 in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
5841 (int )(current - string),
5842 (int )(start - string),
5843 (int )(right - string),
5844 (int )(strend - string));
5845 //fflush(fp);
5846 */
5847 return ONIG_CALLOUT_SUCCESS;
5848 }
5849
5850 extern int
5851 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
5852 {
5853 int id;
5854 char* name;
5855 OnigEncoding enc;
5856 unsigned int ts[4];
5857 OnigValue opts[4];
5858
5859 if (IS_NOT_NULL(fp))
5860 OutFp = (FILE* )fp;
5861 else
5862 OutFp = stdout;
5863
5864 enc = ONIG_ENCODING_ASCII;
5865
5866 name = "MON";
5867 ts[0] = ONIG_TYPE_CHAR;
5868 opts[0].c = '>';
5869 BC_B_O(name, monitor, 1, ts, 1, opts);
5870
5871 return ONIG_NORMAL;
5872 }
5873
5874 #endif /* USE_CALLOUT */