]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c
6a55045d64f5884674fff526f409c779c907962b
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regexec.c
1 /**********************************************************************
2 regexec.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 #include "regint.h"
30
31 #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
32 ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
33
34 #ifdef USE_CRNL_AS_LINE_TERMINATOR
35 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
36 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
37 ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
38 #endif
39
40 #define CHECK_INTERRUPT_IN_MATCH
41
42 #ifdef USE_CALLOUT
43 typedef struct {
44 int last_match_at_call_counter;
45 struct {
46 OnigType type;
47 OnigValue val;
48 } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
49 } CalloutData;
50 #endif
51
52 struct OnigMatchParamStruct {
53 unsigned int match_stack_limit;
54 unsigned long retry_limit_in_match;
55 #ifdef USE_CALLOUT
56 OnigCalloutFunc progress_callout_of_contents;
57 OnigCalloutFunc retraction_callout_of_contents;
58 int match_at_call_counter;
59 void* callout_user_data;
60 CalloutData* callout_data;
61 int callout_data_alloc_num;
62 #endif
63 };
64
65 extern int
66 onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
67 unsigned int limit)
68 {
69 param->match_stack_limit = limit;
70 return ONIG_NORMAL;
71 }
72
73 extern int
74 onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
75 unsigned long limit)
76 {
77 param->retry_limit_in_match = limit;
78 return ONIG_NORMAL;
79 }
80
81 extern int
82 onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
83 {
84 #ifdef USE_CALLOUT
85 param->progress_callout_of_contents = f;
86 return ONIG_NORMAL;
87 #else
88 return ONIG_NO_SUPPORT_CONFIG;
89 #endif
90 }
91
92 extern int
93 onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
94 {
95 #ifdef USE_CALLOUT
96 param->retraction_callout_of_contents = f;
97 return ONIG_NORMAL;
98 #else
99 return ONIG_NO_SUPPORT_CONFIG;
100 #endif
101 }
102
103 extern int
104 onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
105 {
106 #ifdef USE_CALLOUT
107 param->callout_user_data = user_data;
108 return ONIG_NORMAL;
109 #else
110 return ONIG_NO_SUPPORT_CONFIG;
111 #endif
112 }
113
114
115
116 typedef struct {
117 void* stack_p;
118 int stack_n;
119 OnigOptionType options;
120 OnigRegion* region;
121 int ptr_num;
122 const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
123 unsigned int match_stack_limit;
124 unsigned long retry_limit_in_match;
125 OnigMatchParam* mp;
126 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
127 int best_len; /* for ONIG_OPTION_FIND_LONGEST */
128 UChar* best_s;
129 #endif
130 } MatchArg;
131
132
133 #ifdef ONIG_DEBUG
134
135 /* arguments type */
136 typedef enum {
137 ARG_SPECIAL = -1,
138 ARG_NON = 0,
139 ARG_RELADDR = 1,
140 ARG_ABSADDR = 2,
141 ARG_LENGTH = 3,
142 ARG_MEMNUM = 4,
143 ARG_OPTION = 5,
144 ARG_MODE = 6
145 } OpArgType;
146
147 typedef struct {
148 short int opcode;
149 char* name;
150 OpArgType arg_type;
151 } OpInfoType;
152
153 static OpInfoType OpInfo[] = {
154 { OP_FINISH, "finish", ARG_NON },
155 { OP_END, "end", ARG_NON },
156 { OP_EXACT1, "exact1", ARG_SPECIAL },
157 { OP_EXACT2, "exact2", ARG_SPECIAL },
158 { OP_EXACT3, "exact3", ARG_SPECIAL },
159 { OP_EXACT4, "exact4", ARG_SPECIAL },
160 { OP_EXACT5, "exact5", ARG_SPECIAL },
161 { OP_EXACTN, "exactn", ARG_SPECIAL },
162 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
163 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
164 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
165 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
166 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
167 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
168 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
169 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
170 { OP_CCLASS, "cclass", ARG_SPECIAL },
171 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
172 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
173 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
174 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
175 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
176 #ifdef USE_OP_CCLASS_NODE
177 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
178 #endif
179 { OP_ANYCHAR, "anychar", ARG_NON },
180 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
181 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
182 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
183 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
184 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
185 { OP_WORD, "word", ARG_NON },
186 { OP_WORD_ASCII, "word-ascii", ARG_NON },
187 { OP_NO_WORD, "not-word", ARG_NON },
188 { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON },
189 { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE },
190 { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE },
191 { OP_WORD_BEGIN, "word-begin", ARG_MODE },
192 { OP_WORD_END, "word-end", ARG_MODE },
193 { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON },
194 { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON },
195 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
196 { OP_END_BUF, "end-buf", ARG_NON },
197 { OP_BEGIN_LINE, "begin-line", ARG_NON },
198 { OP_END_LINE, "end-line", ARG_NON },
199 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
200 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
201 { OP_BACKREF1, "backref1", ARG_NON },
202 { OP_BACKREF2, "backref2", ARG_NON },
203 { OP_BACKREF_N, "backref-n", ARG_MEMNUM },
204 { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL },
205 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
206 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
207 { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL },
208 { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL },
209 { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL },
210 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
211 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
212 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
213 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
214 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
215 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
216 { OP_FAIL, "fail", ARG_NON },
217 { OP_JUMP, "jump", ARG_RELADDR },
218 { OP_PUSH, "push", ARG_RELADDR },
219 { OP_PUSH_SUPER, "push-super", ARG_RELADDR },
220 { OP_POP_OUT, "pop-out", ARG_NON },
221 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
222 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
223 { OP_REPEAT, "repeat", ARG_SPECIAL },
224 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
225 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
226 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
227 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
228 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
229 { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM },
230 { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM },
231 { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM },
232 { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM },
233 { OP_PREC_READ_START, "push-pos", ARG_NON },
234 { OP_PREC_READ_END, "pop-pos", ARG_NON },
235 { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR },
236 { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON },
237 { OP_ATOMIC_START, "atomic-start", ARG_NON },
238 { OP_ATOMIC_END, "atomic-end", ARG_NON },
239 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
240 { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL },
241 { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON },
242 { OP_CALL, "call", ARG_ABSADDR },
243 { OP_RETURN, "return", ARG_NON },
244 { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL },
245 { OP_UPDATE_VAR, "update-var", ARG_SPECIAL },
246 #ifdef USE_CALLOUT
247 { OP_CALLOUT_CONTENTS, "callout-contents", ARG_SPECIAL },
248 { OP_CALLOUT_NAME, "callout-name", ARG_SPECIAL },
249 #endif
250 { -1, "", ARG_NON }
251 };
252
253 static char*
254 op2name(int opcode)
255 {
256 int i;
257
258 for (i = 0; OpInfo[i].opcode >= 0; i++) {
259 if (opcode == OpInfo[i].opcode)
260 return OpInfo[i].name;
261 }
262 return "";
263 }
264
265 static int
266 op2arg_type(int opcode)
267 {
268 int i;
269
270 for (i = 0; OpInfo[i].opcode >= 0; i++) {
271 if (opcode == OpInfo[i].opcode)
272 return OpInfo[i].arg_type;
273 }
274 return ARG_SPECIAL;
275 }
276
277 static void
278 p_string(FILE* f, int len, UChar* s)
279 {
280 fputs(":", f);
281 while (len-- > 0) { fputc(*s++, f); }
282 }
283
284 static void
285 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
286 {
287 int x = len * mb_len;
288
289 fprintf(f, ":%d:", len);
290 while (x-- > 0) { fputc(*s++, f); }
291 }
292
293 static void
294 p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start)
295 {
296 RelAddrType curr = (RelAddrType )(p - start);
297
298 fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
299 }
300
301 static int
302 bitset_on_num(BitSetRef bs)
303 {
304 int i, n;
305
306 n = 0;
307 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
308 if (BITSET_AT(bs, i)) n++;
309 }
310 return n;
311 }
312
313 extern void
314 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
315 OnigEncoding enc)
316 {
317 int i, n;
318 OpArgType arg_type;
319 RelAddrType addr;
320 LengthType len;
321 MemNumType mem;
322 OnigCodePoint code;
323 OnigOptionType option;
324 ModeType mode;
325 UChar *q;
326
327 fprintf(f, "%s", op2name(*bp));
328 arg_type = op2arg_type(*bp);
329 if (arg_type != ARG_SPECIAL) {
330 bp++;
331 switch (arg_type) {
332 case ARG_NON:
333 break;
334 case ARG_RELADDR:
335 GET_RELADDR_INC(addr, bp);
336 fputc(':', f);
337 p_rel_addr(f, addr, bp, start);
338 break;
339 case ARG_ABSADDR:
340 GET_ABSADDR_INC(addr, bp);
341 fprintf(f, ":{/%d}", addr);
342 break;
343 case ARG_LENGTH:
344 GET_LENGTH_INC(len, bp);
345 fprintf(f, ":%d", len);
346 break;
347 case ARG_MEMNUM:
348 mem = *((MemNumType* )bp);
349 bp += SIZE_MEMNUM;
350 fprintf(f, ":%d", mem);
351 break;
352 case ARG_OPTION:
353 {
354 OnigOptionType option = *((OnigOptionType* )bp);
355 bp += SIZE_OPTION;
356 fprintf(f, ":%d", option);
357 }
358 break;
359 case ARG_MODE:
360 mode = *((ModeType* )bp);
361 bp += SIZE_MODE;
362 fprintf(f, ":%d", mode);
363 break;
364 default:
365 break;
366 }
367 }
368 else {
369 switch (*bp++) {
370 case OP_EXACT1:
371 case OP_ANYCHAR_STAR_PEEK_NEXT:
372 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
373 p_string(f, 1, bp++); break;
374 case OP_EXACT2:
375 p_string(f, 2, bp); bp += 2; break;
376 case OP_EXACT3:
377 p_string(f, 3, bp); bp += 3; break;
378 case OP_EXACT4:
379 p_string(f, 4, bp); bp += 4; break;
380 case OP_EXACT5:
381 p_string(f, 5, bp); bp += 5; break;
382 case OP_EXACTN:
383 GET_LENGTH_INC(len, bp);
384 p_len_string(f, len, 1, bp);
385 bp += len;
386 break;
387
388 case OP_EXACTMB2N1:
389 p_string(f, 2, bp); bp += 2; break;
390 case OP_EXACTMB2N2:
391 p_string(f, 4, bp); bp += 4; break;
392 case OP_EXACTMB2N3:
393 p_string(f, 6, bp); bp += 6; break;
394 case OP_EXACTMB2N:
395 GET_LENGTH_INC(len, bp);
396 p_len_string(f, len, 2, bp);
397 bp += len * 2;
398 break;
399 case OP_EXACTMB3N:
400 GET_LENGTH_INC(len, bp);
401 p_len_string(f, len, 3, bp);
402 bp += len * 3;
403 break;
404 case OP_EXACTMBN:
405 {
406 int mb_len;
407
408 GET_LENGTH_INC(mb_len, bp);
409 GET_LENGTH_INC(len, bp);
410 fprintf(f, ":%d:%d:", mb_len, len);
411 n = len * mb_len;
412 while (n-- > 0) { fputc(*bp++, f); }
413 }
414 break;
415
416 case OP_EXACT1_IC:
417 len = enclen(enc, bp);
418 p_string(f, len, bp);
419 bp += len;
420 break;
421 case OP_EXACTN_IC:
422 GET_LENGTH_INC(len, bp);
423 p_len_string(f, len, 1, bp);
424 bp += len;
425 break;
426
427 case OP_CCLASS:
428 n = bitset_on_num((BitSetRef )bp);
429 bp += SIZE_BITSET;
430 fprintf(f, ":%d", n);
431 break;
432
433 case OP_CCLASS_NOT:
434 n = bitset_on_num((BitSetRef )bp);
435 bp += SIZE_BITSET;
436 fprintf(f, ":%d", n);
437 break;
438
439 case OP_CCLASS_MB:
440 case OP_CCLASS_MB_NOT:
441 GET_LENGTH_INC(len, bp);
442 q = bp;
443 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
444 ALIGNMENT_RIGHT(q);
445 #endif
446 GET_CODE_POINT(code, q);
447 bp += len;
448 fprintf(f, ":%d:%d", (int )code, len);
449 break;
450
451 case OP_CCLASS_MIX:
452 case OP_CCLASS_MIX_NOT:
453 n = bitset_on_num((BitSetRef )bp);
454 bp += SIZE_BITSET;
455 GET_LENGTH_INC(len, bp);
456 q = bp;
457 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
458 ALIGNMENT_RIGHT(q);
459 #endif
460 GET_CODE_POINT(code, q);
461 bp += len;
462 fprintf(f, ":%d:%d:%d", n, (int )code, len);
463 break;
464
465 #ifdef USE_OP_CCLASS_NODE
466 case OP_CCLASS_NODE:
467 {
468 CClassNode *cc;
469
470 GET_POINTER_INC(cc, bp);
471 n = bitset_on_num(cc->bs);
472 fprintf(f, ":%p:%d", cc, n);
473 }
474 break;
475 #endif
476
477 case OP_BACKREF_N_IC:
478 mem = *((MemNumType* )bp);
479 bp += SIZE_MEMNUM;
480 fprintf(f, ":%d", mem);
481 break;
482
483 case OP_BACKREF_MULTI_IC:
484 case OP_BACKREF_MULTI:
485 case OP_BACKREF_CHECK:
486 fputs(" ", f);
487 GET_LENGTH_INC(len, bp);
488 for (i = 0; i < len; i++) {
489 GET_MEMNUM_INC(mem, bp);
490 if (i > 0) fputs(", ", f);
491 fprintf(f, "%d", mem);
492 }
493 break;
494
495 case OP_BACKREF_WITH_LEVEL:
496 GET_OPTION_INC(option, bp);
497 fprintf(f, ":%d", option);
498 /* fall */
499 case OP_BACKREF_CHECK_WITH_LEVEL:
500 {
501 LengthType level;
502
503 GET_LENGTH_INC(level, bp);
504 fprintf(f, ":%d", level);
505
506 fputs(" ", f);
507 GET_LENGTH_INC(len, bp);
508 for (i = 0; i < len; i++) {
509 GET_MEMNUM_INC(mem, bp);
510 if (i > 0) fputs(", ", f);
511 fprintf(f, "%d", mem);
512 }
513 }
514 break;
515
516 case OP_REPEAT:
517 case OP_REPEAT_NG:
518 {
519 mem = *((MemNumType* )bp);
520 bp += SIZE_MEMNUM;
521 addr = *((RelAddrType* )bp);
522 bp += SIZE_RELADDR;
523 fprintf(f, ":%d:%d", mem, addr);
524 }
525 break;
526
527 case OP_PUSH_OR_JUMP_EXACT1:
528 case OP_PUSH_IF_PEEK_NEXT:
529 addr = *((RelAddrType* )bp);
530 bp += SIZE_RELADDR;
531 fputc(':', f);
532 p_rel_addr(f, addr, bp, start);
533 p_string(f, 1, bp);
534 bp += 1;
535 break;
536
537 case OP_LOOK_BEHIND:
538 GET_LENGTH_INC(len, bp);
539 fprintf(f, ":%d", len);
540 break;
541
542 case OP_LOOK_BEHIND_NOT_START:
543 GET_RELADDR_INC(addr, bp);
544 GET_LENGTH_INC(len, bp);
545 fprintf(f, ":%d:", len);
546 p_rel_addr(f, addr, bp, start);
547 break;
548
549 case OP_PUSH_SAVE_VAL:
550 {
551 SaveType type;
552 GET_SAVE_TYPE_INC(type, bp);
553 GET_MEMNUM_INC(mem, bp);
554 fprintf(f, ":%d:%d", type, mem);
555 }
556 break;
557
558 case OP_UPDATE_VAR:
559 {
560 UpdateVarType type;
561 GET_UPDATE_VAR_TYPE_INC(type, bp);
562 GET_MEMNUM_INC(mem, bp);
563 fprintf(f, ":%d:%d", type, mem);
564 }
565 break;
566
567 #ifdef USE_CALLOUT
568 case OP_CALLOUT_CONTENTS:
569 {
570 GET_MEMNUM_INC(mem, bp); /* number */
571 fprintf(f, ":%d", mem);
572 }
573 break;
574
575 case OP_CALLOUT_NAME:
576 {
577 int id;
578
579 GET_MEMNUM_INC(id, bp); /* id */
580 GET_MEMNUM_INC(mem, bp); /* number */
581
582 fprintf(f, ":%d:%d", id, mem);
583 }
584 break;
585 #endif
586
587 default:
588 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp);
589 }
590 }
591 if (nextp) *nextp = bp;
592 }
593 #endif /* ONIG_DEBUG */
594
595 #ifdef ONIG_DEBUG_COMPILE
596 extern void
597 onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
598 {
599 UChar* bp;
600 UChar* start = reg->p;
601 UChar* end = reg->p + reg->used;
602
603 fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
604 reg->bt_mem_start, reg->bt_mem_end);
605 fprintf(f, "code-length: %d\n", reg->used);
606
607 bp = start;
608 while (bp < end) {
609 int pos = bp - start;
610
611 fprintf(f, "%4d: ", pos);
612 onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc);
613 fprintf(f, "\n");
614 }
615 fprintf(f, "\n");
616 }
617 #endif
618
619
620 #ifdef USE_CAPTURE_HISTORY
621 static void history_tree_free(OnigCaptureTreeNode* node);
622
623 static void
624 history_tree_clear(OnigCaptureTreeNode* node)
625 {
626 int i;
627
628 if (IS_NOT_NULL(node)) {
629 for (i = 0; i < node->num_childs; i++) {
630 if (IS_NOT_NULL(node->childs[i])) {
631 history_tree_free(node->childs[i]);
632 }
633 }
634 for (i = 0; i < node->allocated; i++) {
635 node->childs[i] = (OnigCaptureTreeNode* )0;
636 }
637 node->num_childs = 0;
638 node->beg = ONIG_REGION_NOTPOS;
639 node->end = ONIG_REGION_NOTPOS;
640 node->group = -1;
641 }
642 }
643
644 static void
645 history_tree_free(OnigCaptureTreeNode* node)
646 {
647 history_tree_clear(node);
648 xfree(node);
649 }
650
651 static void
652 history_root_free(OnigRegion* r)
653 {
654 if (IS_NOT_NULL(r->history_root)) {
655 history_tree_free(r->history_root);
656 r->history_root = (OnigCaptureTreeNode* )0;
657 }
658 }
659
660 static OnigCaptureTreeNode*
661 history_node_new(void)
662 {
663 OnigCaptureTreeNode* node;
664
665 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
666 CHECK_NULL_RETURN(node);
667 node->childs = (OnigCaptureTreeNode** )0;
668 node->allocated = 0;
669 node->num_childs = 0;
670 node->group = -1;
671 node->beg = ONIG_REGION_NOTPOS;
672 node->end = ONIG_REGION_NOTPOS;
673
674 return node;
675 }
676
677 static int
678 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
679 {
680 #define HISTORY_TREE_INIT_ALLOC_SIZE 8
681
682 if (parent->num_childs >= parent->allocated) {
683 int n, i;
684
685 if (IS_NULL(parent->childs)) {
686 n = HISTORY_TREE_INIT_ALLOC_SIZE;
687 parent->childs =
688 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
689 }
690 else {
691 n = parent->allocated * 2;
692 parent->childs =
693 (OnigCaptureTreeNode** )xrealloc(parent->childs,
694 sizeof(OnigCaptureTreeNode*) * n,
695 sizeof(OnigCaptureTreeNode*) * parent->allocated);
696 }
697 CHECK_NULL_RETURN_MEMERR(parent->childs);
698 for (i = parent->allocated; i < n; i++) {
699 parent->childs[i] = (OnigCaptureTreeNode* )0;
700 }
701 parent->allocated = n;
702 }
703
704 parent->childs[parent->num_childs] = child;
705 parent->num_childs++;
706 return 0;
707 }
708
709 static OnigCaptureTreeNode*
710 history_tree_clone(OnigCaptureTreeNode* node)
711 {
712 int i;
713 OnigCaptureTreeNode *clone, *child;
714
715 clone = history_node_new();
716 CHECK_NULL_RETURN(clone);
717
718 clone->beg = node->beg;
719 clone->end = node->end;
720 for (i = 0; i < node->num_childs; i++) {
721 child = history_tree_clone(node->childs[i]);
722 if (IS_NULL(child)) {
723 history_tree_free(clone);
724 return (OnigCaptureTreeNode* )0;
725 }
726 history_tree_add_child(clone, child);
727 }
728
729 return clone;
730 }
731
732 extern OnigCaptureTreeNode*
733 onig_get_capture_tree(OnigRegion* region)
734 {
735 return region->history_root;
736 }
737 #endif /* USE_CAPTURE_HISTORY */
738
739 extern void
740 onig_region_clear(OnigRegion* region)
741 {
742 int i;
743
744 for (i = 0; i < region->num_regs; i++) {
745 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
746 }
747 #ifdef USE_CAPTURE_HISTORY
748 history_root_free(region);
749 #endif
750 }
751
752 extern int
753 onig_region_resize(OnigRegion* region, int n)
754 {
755 region->num_regs = n;
756
757 if (n < ONIG_NREGION)
758 n = ONIG_NREGION;
759
760 if (region->allocated == 0) {
761 region->beg = (int* )xmalloc(n * sizeof(int));
762 region->end = (int* )xmalloc(n * sizeof(int));
763
764 if (region->beg == 0 || region->end == 0)
765 return ONIGERR_MEMORY;
766
767 region->allocated = n;
768 }
769 else if (region->allocated < n) {
770 region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int));
771 region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int));
772
773 if (region->beg == 0 || region->end == 0)
774 return ONIGERR_MEMORY;
775
776 region->allocated = n;
777 }
778
779 return 0;
780 }
781
782 static int
783 onig_region_resize_clear(OnigRegion* region, int n)
784 {
785 int r;
786
787 r = onig_region_resize(region, n);
788 if (r != 0) return r;
789 onig_region_clear(region);
790 return 0;
791 }
792
793 extern int
794 onig_region_set(OnigRegion* region, int at, int beg, int end)
795 {
796 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
797
798 if (at >= region->allocated) {
799 int r = onig_region_resize(region, at + 1);
800 if (r < 0) return r;
801 }
802
803 region->beg[at] = beg;
804 region->end[at] = end;
805 return 0;
806 }
807
808 extern void
809 onig_region_init(OnigRegion* region)
810 {
811 region->num_regs = 0;
812 region->allocated = 0;
813 region->beg = (int* )0;
814 region->end = (int* )0;
815 region->history_root = (OnigCaptureTreeNode* )0;
816 }
817
818 extern OnigRegion*
819 onig_region_new(void)
820 {
821 OnigRegion* r;
822
823 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
824 CHECK_NULL_RETURN(r);
825 onig_region_init(r);
826 return r;
827 }
828
829 extern void
830 onig_region_free(OnigRegion* r, int free_self)
831 {
832 if (r != 0) {
833 if (r->allocated > 0) {
834 if (r->beg) xfree(r->beg);
835 if (r->end) xfree(r->end);
836 r->allocated = 0;
837 }
838 #ifdef USE_CAPTURE_HISTORY
839 history_root_free(r);
840 #endif
841 if (free_self) xfree(r);
842 }
843 }
844
845 extern void
846 onig_region_copy(OnigRegion* to, OnigRegion* from)
847 {
848 #define RREGC_SIZE (sizeof(int) * from->num_regs)
849 int i;
850
851 if (to == from) return;
852
853 if (to->allocated == 0) {
854 if (from->num_regs > 0) {
855 to->beg = (int* )xmalloc(RREGC_SIZE);
856 if (IS_NULL(to->beg)) return;
857 to->end = (int* )xmalloc(RREGC_SIZE);
858 if (IS_NULL(to->end)) return;
859 to->allocated = from->num_regs;
860 }
861 }
862 else if (to->allocated < from->num_regs) {
863 to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated);
864 if (IS_NULL(to->beg)) return;
865 to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated);
866 if (IS_NULL(to->end)) return;
867 to->allocated = from->num_regs;
868 }
869
870 for (i = 0; i < from->num_regs; i++) {
871 to->beg[i] = from->beg[i];
872 to->end[i] = from->end[i];
873 }
874 to->num_regs = from->num_regs;
875
876 #ifdef USE_CAPTURE_HISTORY
877 history_root_free(to);
878
879 if (IS_NOT_NULL(from->history_root)) {
880 to->history_root = history_tree_clone(from->history_root);
881 }
882 #endif
883 }
884
885 #ifdef USE_CALLOUT
886 #define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
887 args.in = (ain);\
888 args.name_id = (aname_id);\
889 args.num = anum;\
890 args.regex = reg;\
891 args.string = str;\
892 args.string_end = end;\
893 args.start = sstart;\
894 args.right_range = right_range;\
895 args.current = s;\
896 args.retry_in_match_counter = retry_in_match_counter;\
897 args.msa = msa;\
898 args.stk_base = stk_base;\
899 args.stk = stk;\
900 args.mem_start_stk = mem_start_stk;\
901 args.mem_end_stk = mem_end_stk;\
902 result = (func)(&args, user);\
903 } while (0)
904
905 #define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
906 int result;\
907 OnigCalloutArgs args;\
908 CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
909 switch (result) {\
910 case ONIG_CALLOUT_FAIL:\
911 case ONIG_CALLOUT_SUCCESS:\
912 break;\
913 default:\
914 if (result > 0) {\
915 result = ONIGERR_INVALID_ARGUMENT;\
916 }\
917 best_len = result;\
918 goto finish;\
919 break;\
920 }\
921 } while(0)
922 #endif
923
924
925 /** stack **/
926 #define INVALID_STACK_INDEX -1
927
928 #define STK_ALT_FLAG 0x0001
929
930 /* stack type */
931 /* used by normal-POP */
932 #define STK_SUPER_ALT STK_ALT_FLAG
933 #define STK_ALT (0x0002 | STK_ALT_FLAG)
934 #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
935 #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
936
937 /* handled by normal-POP */
938 #define STK_MEM_START 0x0010
939 #define STK_MEM_END 0x8030
940 #define STK_REPEAT_INC 0x0050
941 #ifdef USE_CALLOUT
942 #define STK_CALLOUT 0x0070
943 #endif
944
945 /* avoided by normal-POP */
946 #define STK_VOID 0x0000 /* for fill a blank */
947 #define STK_EMPTY_CHECK_START 0x3000
948 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
949 #define STK_MEM_END_MARK 0x8100
950 #define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
951 #define STK_REPEAT 0x0300
952 #define STK_CALL_FRAME 0x0400
953 #define STK_RETURN 0x0500
954 #define STK_SAVE_VAL 0x0600
955
956 /* stack type check mask */
957 #define STK_MASK_POP_USED STK_ALT_FLAG
958 #define STK_MASK_POP_HANDLED 0x0010
959 #define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
960 #define STK_MASK_TO_VOID_TARGET 0x100e
961 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
962
963 typedef intptr_t StackIndex;
964
965 typedef struct _StackType {
966 unsigned int type;
967 int zid;
968 union {
969 struct {
970 UChar *pcode; /* byte code position */
971 UChar *pstr; /* string position */
972 UChar *pstr_prev; /* previous char position of pstr */
973 } state;
974 struct {
975 int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
976 UChar *pcode; /* byte code position (head of repeated target) */
977 } repeat;
978 struct {
979 StackIndex si; /* index of stack */
980 } repeat_inc;
981 struct {
982 UChar *pstr; /* start/end position */
983 /* Following information is set, if this stack type is MEM-START */
984 StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */
985 StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
986 } mem;
987 struct {
988 UChar *pstr; /* start position */
989 } empty_check;
990 #ifdef USE_CALL
991 struct {
992 UChar *ret_addr; /* byte code position */
993 UChar *pstr; /* string position */
994 } call_frame;
995 #endif
996 struct {
997 enum SaveType type;
998 UChar* v;
999 UChar* v2;
1000 } val;
1001 #ifdef USE_CALLOUT
1002 struct {
1003 int num;
1004 OnigCalloutFunc func;
1005 } callout;
1006 #endif
1007 } u;
1008 } StackType;
1009
1010 #ifdef USE_CALLOUT
1011
1012 struct OnigCalloutArgsStruct {
1013 OnigCalloutIn in;
1014 int name_id; /* name id or ONIG_NON_NAME_ID */
1015 int num;
1016 OnigRegex regex;
1017 const OnigUChar* string;
1018 const OnigUChar* string_end;
1019 const OnigUChar* start;
1020 const OnigUChar* right_range;
1021 const OnigUChar* current; /* current matching position */
1022 unsigned long retry_in_match_counter;
1023
1024 /* invisible to users */
1025 MatchArg* msa;
1026 StackType* stk_base;
1027 StackType* stk;
1028 StackIndex* mem_start_stk;
1029 StackIndex* mem_end_stk;
1030 };
1031
1032 #endif
1033
1034
1035 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1036 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1037 (msa).stack_p = (void* )0;\
1038 (msa).options = (arg_option);\
1039 (msa).region = (arg_region);\
1040 (msa).start = (arg_start);\
1041 (msa).match_stack_limit = (mp)->match_stack_limit;\
1042 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1043 (msa).mp = mp;\
1044 (msa).best_len = ONIG_MISMATCH;\
1045 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1046 } while(0)
1047 #else
1048 #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
1049 (msa).stack_p = (void* )0;\
1050 (msa).options = (arg_option);\
1051 (msa).region = (arg_region);\
1052 (msa).start = (arg_start);\
1053 (msa).match_stack_limit = (mp)->match_stack_limit;\
1054 (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
1055 (msa).mp = mp;\
1056 (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
1057 } while(0)
1058 #endif
1059
1060 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
1061
1062
1063 #define ALLOCA_PTR_NUM_LIMIT 50
1064
1065 #define STACK_INIT(stack_num) do {\
1066 if (msa->stack_p) {\
1067 is_alloca = 0;\
1068 alloc_base = msa->stack_p;\
1069 stk_base = (StackType* )(alloc_base\
1070 + (sizeof(StackIndex) * msa->ptr_num));\
1071 stk = stk_base;\
1072 stk_end = stk_base + msa->stack_n;\
1073 }\
1074 else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
1075 is_alloca = 0;\
1076 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1077 + sizeof(StackType) * (stack_num));\
1078 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1079 stk_base = (StackType* )(alloc_base\
1080 + (sizeof(StackIndex) * msa->ptr_num));\
1081 stk = stk_base;\
1082 stk_end = stk_base + (stack_num);\
1083 }\
1084 else {\
1085 is_alloca = 1;\
1086 alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
1087 + sizeof(StackType) * (stack_num));\
1088 CHECK_NULL_RETURN_MEMERR(alloc_base);\
1089 stk_base = (StackType* )(alloc_base\
1090 + (sizeof(StackIndex) * msa->ptr_num));\
1091 stk = stk_base;\
1092 stk_end = stk_base + (stack_num);\
1093 }\
1094 } while(0);
1095
1096
1097 #define STACK_SAVE do{\
1098 msa->stack_n = (int )(stk_end - stk_base);\
1099 if (is_alloca != 0) {\
1100 size_t size = sizeof(StackIndex) * msa->ptr_num \
1101 + sizeof(StackType) * msa->stack_n;\
1102 msa->stack_p = xmalloc(size);\
1103 CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
1104 xmemcpy(msa->stack_p, alloc_base, size);\
1105 }\
1106 else {\
1107 msa->stack_p = alloc_base;\
1108 };\
1109 } while(0)
1110
1111 #define UPDATE_FOR_STACK_REALLOC do{\
1112 repeat_stk = (StackIndex* )alloc_base;\
1113 mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
1114 mem_end_stk = mem_start_stk + num_mem + 1;\
1115 } while(0)
1116
1117 static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1118
1119 extern unsigned int
1120 onig_get_match_stack_limit_size(void)
1121 {
1122 return MatchStackLimit;
1123 }
1124
1125 extern int
1126 onig_set_match_stack_limit_size(unsigned int size)
1127 {
1128 MatchStackLimit = size;
1129 return 0;
1130 }
1131
1132 #ifdef USE_RETRY_LIMIT_IN_MATCH
1133
1134 static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
1135
1136 #define CHECK_RETRY_LIMIT_IN_MATCH do {\
1137 if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
1138 } while (0)
1139
1140 #else
1141
1142 #define CHECK_RETRY_LIMIT_IN_MATCH
1143
1144 #endif /* USE_RETRY_LIMIT_IN_MATCH */
1145
1146 extern unsigned long
1147 onig_get_retry_limit_in_match(void)
1148 {
1149 #ifdef USE_RETRY_LIMIT_IN_MATCH
1150 return RetryLimitInMatch;
1151 #else
1152 /* return ONIG_NO_SUPPORT_CONFIG; */
1153 return 0;
1154 #endif
1155 }
1156
1157 extern int
1158 onig_set_retry_limit_in_match(unsigned long size)
1159 {
1160 #ifdef USE_RETRY_LIMIT_IN_MATCH
1161 RetryLimitInMatch = size;
1162 return 0;
1163 #else
1164 return ONIG_NO_SUPPORT_CONFIG;
1165 #endif
1166 }
1167
1168 static OnigCalloutFunc DefaultProgressCallout;
1169 static OnigCalloutFunc DefaultRetractionCallout;
1170
1171 extern OnigMatchParam*
1172 onig_new_match_param(void)
1173 {
1174 OnigMatchParam* p;
1175
1176 p = (OnigMatchParam* )xmalloc(sizeof(*p));
1177 if (IS_NOT_NULL(p)) {
1178 onig_initialize_match_param(p);
1179 }
1180
1181 return p;
1182 }
1183
1184 extern void
1185 onig_free_match_param_content(OnigMatchParam* p)
1186 {
1187 #ifdef USE_CALLOUT
1188 if (IS_NOT_NULL(p->callout_data)) {
1189 xfree(p->callout_data);
1190 p->callout_data = 0;
1191 }
1192 #endif
1193 }
1194
1195 extern void
1196 onig_free_match_param(OnigMatchParam* p)
1197 {
1198 if (IS_NOT_NULL(p)) {
1199 onig_free_match_param_content(p);
1200 xfree(p);
1201 }
1202 }
1203
1204 extern int
1205 onig_initialize_match_param(OnigMatchParam* mp)
1206 {
1207 mp->match_stack_limit = MatchStackLimit;
1208 #ifdef USE_RETRY_LIMIT_IN_MATCH
1209 mp->retry_limit_in_match = RetryLimitInMatch;
1210 #endif
1211 mp->progress_callout_of_contents = DefaultProgressCallout;
1212 mp->retraction_callout_of_contents = DefaultRetractionCallout;
1213
1214 #ifdef USE_CALLOUT
1215 mp->match_at_call_counter = 0;
1216 mp->callout_user_data = 0;
1217 mp->callout_data = 0;
1218 mp->callout_data_alloc_num = 0;
1219 #endif
1220
1221 return ONIG_NORMAL;
1222 }
1223
1224 #ifdef USE_CALLOUT
1225
1226 static int
1227 adjust_match_param(regex_t* reg, OnigMatchParam* mp)
1228 {
1229 RegexExt* ext = REG_EXTP(reg);
1230
1231 mp->match_at_call_counter = 0;
1232
1233 if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
1234
1235 if (ext->callout_num > mp->callout_data_alloc_num) {
1236 CalloutData* d;
1237 size_t n = ext->callout_num * sizeof(*d);
1238 if (IS_NOT_NULL(mp->callout_data))
1239 d = (CalloutData* )xrealloc(mp->callout_data, n, mp->callout_data_alloc_num * sizeof(*d));
1240 else
1241 d = (CalloutData* )xmalloc(n);
1242 CHECK_NULL_RETURN_MEMERR(d);
1243
1244 mp->callout_data = d;
1245 mp->callout_data_alloc_num = ext->callout_num;
1246 }
1247
1248 xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
1249 return ONIG_NORMAL;
1250 }
1251
1252 #define ADJUST_MATCH_PARAM(reg, mp) \
1253 r = adjust_match_param(reg, mp);\
1254 if (r != ONIG_NORMAL) return r;
1255
1256 #define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
1257
1258 extern int
1259 onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
1260 {
1261 OnigMatchParam* mp;
1262 int num;
1263 CalloutData* d;
1264
1265 mp = args->msa->mp;
1266 num = args->num;
1267
1268 d = CALLOUT_DATA_AT_NUM(mp, num);
1269 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1270 xmemset(d, 0, sizeof(*d));
1271 d->last_match_at_call_counter = mp->match_at_call_counter;
1272 return d->last_match_at_call_counter;
1273 }
1274
1275 return 0;
1276 }
1277
1278 extern int
1279 onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
1280 int callout_num, int slot,
1281 OnigType* type, OnigValue* val)
1282 {
1283 OnigType t;
1284 CalloutData* d;
1285
1286 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1287
1288 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1289 t = d->slot[slot].type;
1290 if (IS_NOT_NULL(type)) *type = t;
1291 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1292 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1293 }
1294
1295 extern int
1296 onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
1297 int slot, OnigType* type,
1298 OnigValue* val)
1299 {
1300 return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
1301 args->num, slot, type, val);
1302 }
1303
1304 extern int
1305 onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
1306 int callout_num, int slot,
1307 OnigType* type, OnigValue* val)
1308 {
1309 OnigType t;
1310 CalloutData* d;
1311
1312 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1313
1314 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1315 if (d->last_match_at_call_counter != mp->match_at_call_counter) {
1316 xmemset(d, 0, sizeof(*d));
1317 d->last_match_at_call_counter = mp->match_at_call_counter;
1318 }
1319
1320 t = d->slot[slot].type;
1321 if (IS_NOT_NULL(type)) *type = t;
1322 if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
1323 return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
1324 }
1325
1326 extern int
1327 onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1328 const UChar* tag, const UChar* tag_end, int slot,
1329 OnigType* type, OnigValue* val)
1330 {
1331 int num;
1332
1333 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1334 if (num < 0) return num;
1335 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1336
1337 return onig_get_callout_data(reg, mp, num, slot, type, val);
1338 }
1339
1340 extern int
1341 onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
1342 int callout_num, int slot,
1343 OnigType* type, OnigValue* val)
1344 {
1345 return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
1346 type, val);
1347 }
1348
1349 extern int
1350 onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1351 int slot, OnigType* type, OnigValue* val)
1352 {
1353 return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
1354 type, val);
1355 }
1356
1357 extern int
1358 onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
1359 int callout_num, int slot,
1360 OnigType type, OnigValue* val)
1361 {
1362 CalloutData* d;
1363
1364 if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
1365
1366 d = CALLOUT_DATA_AT_NUM(mp, callout_num);
1367 d->slot[slot].type = type;
1368 d->slot[slot].val = *val;
1369 d->last_match_at_call_counter = mp->match_at_call_counter;
1370
1371 return ONIG_NORMAL;
1372 }
1373
1374 extern int
1375 onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
1376 const UChar* tag, const UChar* tag_end, int slot,
1377 OnigType type, OnigValue* val)
1378 {
1379 int num;
1380
1381 num = onig_get_callout_num_by_tag(reg, tag, tag_end);
1382 if (num < 0) return num;
1383 if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1384
1385 return onig_set_callout_data(reg, mp, num, slot, type, val);
1386 }
1387
1388 extern int
1389 onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
1390 int callout_num, int slot,
1391 OnigType type, OnigValue* val)
1392 {
1393 return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
1394 type, val);
1395 }
1396
1397 extern int
1398 onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
1399 int slot, OnigType type, OnigValue* val)
1400 {
1401 return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
1402 type, val);
1403 }
1404
1405 #else
1406 #define ADJUST_MATCH_PARAM(reg, mp)
1407 #endif /* USE_CALLOUT */
1408
1409
1410 static int
1411 stack_double(int is_alloca, char** arg_alloc_base,
1412 StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk,
1413 MatchArg* msa)
1414 {
1415 unsigned int n;
1416 int used;
1417 size_t size;
1418 size_t new_size;
1419 char* alloc_base;
1420 char* new_alloc_base;
1421 StackType *stk_base, *stk_end, *stk;
1422
1423 alloc_base = *arg_alloc_base;
1424 stk_base = *arg_stk_base;
1425 stk_end = *arg_stk_end;
1426 stk = *arg_stk;
1427
1428 n = (unsigned int )(stk_end - stk_base);
1429 size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1430 n *= 2;
1431 new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
1432 if (is_alloca != 0) {
1433 new_alloc_base = (char* )xmalloc(new_size);
1434 if (IS_NULL(new_alloc_base)) {
1435 STACK_SAVE;
1436 return ONIGERR_MEMORY;
1437 }
1438 xmemcpy(new_alloc_base, alloc_base, size);
1439 }
1440 else {
1441 if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
1442 if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)
1443 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1444 else
1445 n = msa->match_stack_limit;
1446 }
1447 new_alloc_base = (char* )xrealloc(alloc_base, new_size, size);
1448 if (IS_NULL(new_alloc_base)) {
1449 STACK_SAVE;
1450 return ONIGERR_MEMORY;
1451 }
1452 }
1453
1454 alloc_base = new_alloc_base;
1455 used = (int )(stk - stk_base);
1456 *arg_alloc_base = alloc_base;
1457 *arg_stk_base = (StackType* )(alloc_base
1458 + (sizeof(StackIndex) * msa->ptr_num));
1459 *arg_stk = *arg_stk_base + used;
1460 *arg_stk_end = *arg_stk_base + n;
1461 return 0;
1462 }
1463
1464 #define STACK_ENSURE(n) do {\
1465 if ((int )(stk_end - stk) < (n)) {\
1466 int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
1467 if (r != 0) { STACK_SAVE; return r; } \
1468 is_alloca = 0;\
1469 UPDATE_FOR_STACK_REALLOC;\
1470 }\
1471 } while(0)
1472
1473 #define STACK_AT(index) (stk_base + (index))
1474 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
1475
1476 #define STACK_PUSH_TYPE(stack_type) do {\
1477 STACK_ENSURE(1);\
1478 stk->type = (stack_type);\
1479 STACK_INC;\
1480 } while(0)
1481
1482 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1483
1484 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
1485 STACK_ENSURE(1);\
1486 stk->type = (stack_type);\
1487 stk->u.state.pcode = (pat);\
1488 stk->u.state.pstr = (s);\
1489 stk->u.state.pstr_prev = (sprev);\
1490 STACK_INC;\
1491 } while(0)
1492
1493 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
1494 stk->type = (stack_type);\
1495 stk->u.state.pcode = (pat);\
1496 STACK_INC;\
1497 } while(0)
1498
1499 #ifdef ONIG_DEBUG_MATCH
1500 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1501 stk->type = (stack_type);\
1502 stk->u.state.pcode = (pat);\
1503 stk->u.state.pstr = s;\
1504 stk->u.state.pstr_prev = sprev;\
1505 STACK_INC;\
1506 } while (0)
1507 #else
1508 #define STACK_PUSH_BOTTOM(stack_type,pat) do {\
1509 stk->type = (stack_type);\
1510 stk->u.state.pcode = (pat);\
1511 STACK_INC;\
1512 } while (0)
1513 #endif
1514
1515 #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
1516 #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
1517 #define STACK_PUSH_POS(s,sprev) \
1518 STACK_PUSH(STK_TO_VOID_START,NULL_UCHARP,s,sprev)
1519 #define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
1520 STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
1521 #define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
1522 #define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
1523 STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
1524
1525 #define STACK_PUSH_REPEAT(sid, pat) do {\
1526 STACK_ENSURE(1);\
1527 stk->type = STK_REPEAT;\
1528 stk->zid = (sid);\
1529 stk->u.repeat.pcode = (pat);\
1530 stk->u.repeat.count = 0;\
1531 STACK_INC;\
1532 } while(0)
1533
1534 #define STACK_PUSH_REPEAT_INC(sindex) do {\
1535 STACK_ENSURE(1);\
1536 stk->type = STK_REPEAT_INC;\
1537 stk->u.repeat_inc.si = (sindex);\
1538 STACK_INC;\
1539 } while(0)
1540
1541 #define STACK_PUSH_MEM_START(mnum, s) do {\
1542 STACK_ENSURE(1);\
1543 stk->type = STK_MEM_START;\
1544 stk->zid = (mnum);\
1545 stk->u.mem.pstr = (s);\
1546 stk->u.mem.prev_start = mem_start_stk[mnum];\
1547 stk->u.mem.prev_end = mem_end_stk[mnum];\
1548 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1549 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1550 STACK_INC;\
1551 } while(0)
1552
1553 #define STACK_PUSH_MEM_END(mnum, s) do {\
1554 STACK_ENSURE(1);\
1555 stk->type = STK_MEM_END;\
1556 stk->zid = (mnum);\
1557 stk->u.mem.pstr = (s);\
1558 stk->u.mem.prev_start = mem_start_stk[mnum];\
1559 stk->u.mem.prev_end = mem_end_stk[mnum];\
1560 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1561 STACK_INC;\
1562 } while(0)
1563
1564 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
1565 STACK_ENSURE(1);\
1566 stk->type = STK_MEM_END_MARK;\
1567 stk->zid = (mnum);\
1568 STACK_INC;\
1569 } while(0)
1570
1571 #define STACK_GET_MEM_START(mnum, k) do {\
1572 int level = 0;\
1573 k = stk;\
1574 while (k > stk_base) {\
1575 k--;\
1576 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1577 && k->zid == (mnum)) {\
1578 level++;\
1579 }\
1580 else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
1581 if (level == 0) break;\
1582 level--;\
1583 }\
1584 }\
1585 } while(0)
1586
1587 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1588 int level = 0;\
1589 while (k < stk) {\
1590 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1591 if (level == 0) (start) = k->u.mem.pstr;\
1592 level++;\
1593 }\
1594 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1595 level--;\
1596 if (level == 0) {\
1597 (end) = k->u.mem.pstr;\
1598 break;\
1599 }\
1600 }\
1601 k++;\
1602 }\
1603 } while(0)
1604
1605 #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
1606 STACK_ENSURE(1);\
1607 stk->type = STK_EMPTY_CHECK_START;\
1608 stk->zid = (cnum);\
1609 stk->u.empty_check.pstr = (s);\
1610 STACK_INC;\
1611 } while(0)
1612
1613 #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
1614 STACK_ENSURE(1);\
1615 stk->type = STK_EMPTY_CHECK_END;\
1616 stk->zid = (cnum);\
1617 STACK_INC;\
1618 } while(0)
1619
1620 #define STACK_PUSH_CALL_FRAME(pat) do {\
1621 STACK_ENSURE(1);\
1622 stk->type = STK_CALL_FRAME;\
1623 stk->u.call_frame.ret_addr = (pat);\
1624 STACK_INC;\
1625 } while(0)
1626
1627 #define STACK_PUSH_RETURN do {\
1628 STACK_ENSURE(1);\
1629 stk->type = STK_RETURN;\
1630 STACK_INC;\
1631 } while(0)
1632
1633 #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
1634 STACK_ENSURE(1);\
1635 stk->type = STK_SAVE_VAL;\
1636 stk->zid = (sid);\
1637 stk->u.val.type = (stype);\
1638 stk->u.val.v = (UChar* )(sval);\
1639 STACK_INC;\
1640 } while(0)
1641
1642 #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
1643 STACK_ENSURE(1);\
1644 stk->type = STK_SAVE_VAL;\
1645 stk->zid = (sid);\
1646 stk->u.val.type = (stype);\
1647 stk->u.val.v = (UChar* )(sval);\
1648 stk->u.val.v2 = sprev;\
1649 STACK_INC;\
1650 } while(0)
1651
1652 #define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
1653 StackType *k = stk;\
1654 while (k > stk_base) {\
1655 k--;\
1656 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
1657 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
1658 (sval) = k->u.val.v;\
1659 break;\
1660 }\
1661 }\
1662 } while (0)
1663
1664 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
1665 int level = 0;\
1666 StackType *k = stk;\
1667 while (k > stk_base) {\
1668 k--;\
1669 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1670 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1671 && k->zid == (sid)) {\
1672 if (level == 0) {\
1673 (sval) = k->u.val.v;\
1674 break;\
1675 }\
1676 }\
1677 else if (k->type == STK_CALL_FRAME)\
1678 level--;\
1679 else if (k->type == STK_RETURN)\
1680 level++;\
1681 }\
1682 } while (0)
1683
1684 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
1685 int level = 0;\
1686 StackType *k = stk;\
1687 while (k > stk_base) {\
1688 k--;\
1689 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
1690 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1691 && k->zid == (sid)) {\
1692 if (level == 0) {\
1693 (sval) = k->u.val.v;\
1694 sprev = k->u.val.v2;\
1695 break;\
1696 }\
1697 }\
1698 else if (k->type == STK_CALL_FRAME)\
1699 level--;\
1700 else if (k->type == STK_RETURN)\
1701 level++;\
1702 }\
1703 } while (0)
1704
1705 #define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
1706 int level = 0;\
1707 StackType *k = (stk_from);\
1708 while (k > stk_base) {\
1709 STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
1710 if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
1711 && k->u.val.id == (sid)) {\
1712 if (level == 0) {\
1713 (sval) = k->u.val.v;\
1714 break;\
1715 }\
1716 }\
1717 else if (k->type == STK_CALL_FRAME)\
1718 level--;\
1719 else if (k->type == STK_RETURN)\
1720 level++;\
1721 k--;\
1722 }\
1723 } while (0)
1724
1725 #define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
1726 STACK_ENSURE(1);\
1727 stk->type = STK_CALLOUT;\
1728 stk->zid = ONIG_NON_NAME_ID;\
1729 stk->u.callout.num = (anum);\
1730 stk->u.callout.func = (func);\
1731 STACK_INC;\
1732 } while(0)
1733
1734 #define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
1735 STACK_ENSURE(1);\
1736 stk->type = STK_CALLOUT;\
1737 stk->zid = (aid);\
1738 stk->u.callout.num = (anum);\
1739 stk->u.callout.func = (func);\
1740 STACK_INC;\
1741 } while(0)
1742
1743 #ifdef ONIG_DEBUG
1744 #define STACK_BASE_CHECK(p, at) \
1745 if ((p) < stk_base) {\
1746 fprintf(stderr, "at %s\n", at);\
1747 goto stack_error;\
1748 }
1749 #else
1750 #define STACK_BASE_CHECK(p, at)
1751 #endif
1752
1753 #define STACK_POP_ONE do {\
1754 stk--;\
1755 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1756 } while(0)
1757
1758
1759 #ifdef USE_CALLOUT
1760 #define POP_CALLOUT_CASE \
1761 else if (stk->type == STK_CALLOUT) {\
1762 RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
1763 }
1764 #else
1765 #define POP_CALLOUT_CASE
1766 #endif
1767
1768 #define STACK_POP do {\
1769 switch (pop_level) {\
1770 case STACK_POP_LEVEL_FREE:\
1771 while (1) {\
1772 stk--;\
1773 STACK_BASE_CHECK(stk, "STACK_POP"); \
1774 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1775 }\
1776 break;\
1777 case STACK_POP_LEVEL_MEM_START:\
1778 while (1) {\
1779 stk--;\
1780 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1781 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1782 else if (stk->type == STK_MEM_START) {\
1783 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1784 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1785 }\
1786 }\
1787 break;\
1788 default:\
1789 while (1) {\
1790 stk--;\
1791 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1792 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1793 else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
1794 if (stk->type == STK_MEM_START) {\
1795 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1796 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1797 }\
1798 else if (stk->type == STK_REPEAT_INC) {\
1799 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1800 }\
1801 else if (stk->type == STK_MEM_END) {\
1802 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1803 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1804 }\
1805 POP_CALLOUT_CASE\
1806 }\
1807 }\
1808 break;\
1809 }\
1810 } while(0)
1811
1812 #define POP_TIL_BODY(aname, til_type) do {\
1813 while (1) {\
1814 stk--;\
1815 STACK_BASE_CHECK(stk, (aname));\
1816 if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
1817 if (stk->type == (til_type)) break;\
1818 else {\
1819 if (stk->type == STK_MEM_START) {\
1820 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1821 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1822 }\
1823 else if (stk->type == STK_REPEAT_INC) {\
1824 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1825 }\
1826 else if (stk->type == STK_MEM_END) {\
1827 mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
1828 mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
1829 }\
1830 /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
1831 }\
1832 }\
1833 }\
1834 } while(0)
1835
1836 #define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
1837 POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
1838 } while(0)
1839
1840 #define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
1841 POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
1842 } while(0)
1843
1844
1845 #define STACK_EXEC_TO_VOID(k) do {\
1846 k = stk;\
1847 while (1) {\
1848 k--;\
1849 STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
1850 if (IS_TO_VOID_TARGET(k)) {\
1851 if (k->type == STK_TO_VOID_START) {\
1852 k->type = STK_VOID;\
1853 break;\
1854 }\
1855 k->type = STK_VOID;\
1856 }\
1857 }\
1858 } while(0)
1859
1860 #define STACK_EMPTY_CHECK(isnull,sid,s) do {\
1861 StackType* k = stk;\
1862 while (1) {\
1863 k--;\
1864 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
1865 if (k->type == STK_EMPTY_CHECK_START) {\
1866 if (k->zid == (sid)) {\
1867 (isnull) = (k->u.empty_check.pstr == (s));\
1868 break;\
1869 }\
1870 }\
1871 }\
1872 } while(0)
1873
1874 #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
1875 if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
1876 (addr) = 0;\
1877 }\
1878 else {\
1879 if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
1880 (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
1881 else\
1882 (addr) = (UChar* )k->u.mem.prev_end;\
1883 }\
1884 } while (0)
1885
1886 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
1887 #define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
1888 StackType* k = stk;\
1889 while (1) {\
1890 k--;\
1891 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
1892 if (k->type == STK_EMPTY_CHECK_START) {\
1893 if (k->zid == (sid)) {\
1894 if (k->u.empty_check.pstr != (s)) {\
1895 (isnull) = 0;\
1896 break;\
1897 }\
1898 else {\
1899 UChar* endp;\
1900 (isnull) = 1;\
1901 while (k < stk) {\
1902 if (k->type == STK_MEM_START) {\
1903 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
1904 if (endp == 0) {\
1905 (isnull) = 0; break;\
1906 }\
1907 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
1908 (isnull) = 0; break;\
1909 }\
1910 else if (endp != s) {\
1911 (isnull) = -1; /* empty, but position changed */ \
1912 }\
1913 }\
1914 k++;\
1915 }\
1916 break;\
1917 }\
1918 }\
1919 }\
1920 }\
1921 } while(0)
1922
1923 #define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
1924 int level = 0;\
1925 StackType* k = stk;\
1926 while (1) {\
1927 k--;\
1928 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
1929 if (k->type == STK_EMPTY_CHECK_START) {\
1930 if (k->zid == (sid)) {\
1931 if (level == 0) {\
1932 if (k->u.empty_check.pstr != (s)) {\
1933 (isnull) = 0;\
1934 break;\
1935 }\
1936 else {\
1937 UChar* endp;\
1938 (isnull) = 1;\
1939 while (k < stk) {\
1940 if (k->type == STK_MEM_START) {\
1941 if (level == 0) {\
1942 STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
1943 if (endp == 0) {\
1944 (isnull) = 0; break;\
1945 }\
1946 else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
1947 (isnull) = 0; break;\
1948 }\
1949 else if (endp != s) {\
1950 (isnull) = -1; /* empty, but position changed */\
1951 }\
1952 }\
1953 }\
1954 else if (k->type == STK_EMPTY_CHECK_START) {\
1955 if (k->zid == (sid)) level++;\
1956 }\
1957 else if (k->type == STK_EMPTY_CHECK_END) {\
1958 if (k->zid == (sid)) level--;\
1959 }\
1960 k++;\
1961 }\
1962 break;\
1963 }\
1964 }\
1965 else {\
1966 level--;\
1967 }\
1968 }\
1969 }\
1970 else if (k->type == STK_EMPTY_CHECK_END) {\
1971 if (k->zid == (sid)) level++;\
1972 }\
1973 }\
1974 } while(0)
1975 #else
1976 #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
1977 int level = 0;\
1978 StackType* k = stk;\
1979 while (1) {\
1980 k--;\
1981 STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
1982 if (k->type == STK_EMPTY_CHECK_START) {\
1983 if (k->u.empty_check.num == (id)) {\
1984 if (level == 0) {\
1985 (isnull) = (k->u.empty_check.pstr == (s));\
1986 break;\
1987 }\
1988 }\
1989 level--;\
1990 }\
1991 else if (k->type == STK_EMPTY_CHECK_END) {\
1992 level++;\
1993 }\
1994 }\
1995 } while(0)
1996 #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
1997
1998 #define STACK_GET_REPEAT(sid, k) do {\
1999 int level = 0;\
2000 k = stk;\
2001 while (1) {\
2002 k--;\
2003 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
2004 if (k->type == STK_REPEAT) {\
2005 if (level == 0) {\
2006 if (k->zid == (sid)) {\
2007 break;\
2008 }\
2009 }\
2010 }\
2011 else if (k->type == STK_CALL_FRAME) level--;\
2012 else if (k->type == STK_RETURN) level++;\
2013 }\
2014 } while(0)
2015
2016 #define STACK_RETURN(addr) do {\
2017 int level = 0;\
2018 StackType* k = stk;\
2019 while (1) {\
2020 k--;\
2021 STACK_BASE_CHECK(k, "STACK_RETURN"); \
2022 if (k->type == STK_CALL_FRAME) {\
2023 if (level == 0) {\
2024 (addr) = k->u.call_frame.ret_addr;\
2025 break;\
2026 }\
2027 else level--;\
2028 }\
2029 else if (k->type == STK_RETURN)\
2030 level++;\
2031 }\
2032 } while(0)
2033
2034
2035 #define STRING_CMP(s1,s2,len) do {\
2036 while (len-- > 0) {\
2037 if (*s1++ != *s2++) goto fail;\
2038 }\
2039 } while(0)
2040
2041 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
2042 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2043 goto fail; \
2044 } while(0)
2045
2046 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
2047 UChar* s1, UChar** ps2, int mblen)
2048 {
2049 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2050 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2051 UChar *p1, *p2, *end1, *s2, *end2;
2052 int len1, len2;
2053
2054 s2 = *ps2;
2055 end1 = s1 + mblen;
2056 end2 = s2 + mblen;
2057 while (s1 < end1) {
2058 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
2059 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
2060 if (len1 != len2) return 0;
2061 p1 = buf1;
2062 p2 = buf2;
2063 while (len1-- > 0) {
2064 if (*p1 != *p2) return 0;
2065 p1++;
2066 p2++;
2067 }
2068 }
2069
2070 *ps2 = s2;
2071 return 1;
2072 }
2073
2074 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
2075 is_fail = 0;\
2076 while (len-- > 0) {\
2077 if (*s1++ != *s2++) {\
2078 is_fail = 1; break;\
2079 }\
2080 }\
2081 } while(0)
2082
2083 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
2084 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
2085 is_fail = 1; \
2086 else \
2087 is_fail = 0; \
2088 } while(0)
2089
2090
2091 #define IS_EMPTY_STR (str == end)
2092 #define ON_STR_BEGIN(s) ((s) == str)
2093 #define ON_STR_END(s) ((s) == end)
2094 #define DATA_ENSURE_CHECK1 (s < right_range)
2095 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
2096 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
2097
2098 #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
2099
2100 #ifdef USE_CAPTURE_HISTORY
2101 static int
2102 make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
2103 StackType* stk_top, UChar* str, regex_t* reg)
2104 {
2105 int n, r;
2106 OnigCaptureTreeNode* child;
2107 StackType* k = *kp;
2108
2109 while (k < stk_top) {
2110 if (k->type == STK_MEM_START) {
2111 n = k->zid;
2112 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
2113 MEM_STATUS_AT(reg->capture_history, n) != 0) {
2114 child = history_node_new();
2115 CHECK_NULL_RETURN_MEMERR(child);
2116 child->group = n;
2117 child->beg = (int )(k->u.mem.pstr - str);
2118 r = history_tree_add_child(node, child);
2119 if (r != 0) return r;
2120 *kp = (k + 1);
2121 r = make_capture_history_tree(child, kp, stk_top, str, reg);
2122 if (r != 0) return r;
2123
2124 k = *kp;
2125 child->end = (int )(k->u.mem.pstr - str);
2126 }
2127 }
2128 else if (k->type == STK_MEM_END) {
2129 if (k->zid == node->group) {
2130 node->end = (int )(k->u.mem.pstr - str);
2131 *kp = k;
2132 return 0;
2133 }
2134 }
2135 k++;
2136 }
2137
2138 return 1; /* 1: root node ending. */
2139 }
2140 #endif
2141
2142 #ifdef USE_BACKREF_WITH_LEVEL
2143 static int mem_is_in_memp(int mem, int num, UChar* memp)
2144 {
2145 int i;
2146 MemNumType m;
2147
2148 for (i = 0; i < num; i++) {
2149 GET_MEMNUM_INC(m, memp);
2150 if (mem == (int )m) return 1;
2151 }
2152 return 0;
2153 }
2154
2155 static int
2156 backref_match_at_nested_level(regex_t* reg,
2157 StackType* top, StackType* stk_base,
2158 int ignore_case, int case_fold_flag,
2159 int nest, int mem_num, UChar* memp,
2160 UChar** s, const UChar* send)
2161 {
2162 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2163 int level;
2164 StackType* k;
2165
2166 level = 0;
2167 k = top;
2168 k--;
2169 while (k >= stk_base) {
2170 if (k->type == STK_CALL_FRAME) {
2171 level--;
2172 }
2173 else if (k->type == STK_RETURN) {
2174 level++;
2175 }
2176 else if (level == nest) {
2177 if (k->type == STK_MEM_START) {
2178 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2179 pstart = k->u.mem.pstr;
2180 if (IS_NOT_NULL(pend)) {
2181 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2182 p = pstart;
2183 ss = *s;
2184
2185 if (ignore_case != 0) {
2186 if (string_cmp_ic(reg->enc, case_fold_flag,
2187 pstart, &ss, (int )(pend - pstart)) == 0)
2188 return 0; /* or goto next_mem; */
2189 }
2190 else {
2191 while (p < pend) {
2192 if (*p++ != *ss++) return 0; /* or goto next_mem; */
2193 }
2194 }
2195
2196 *s = ss;
2197 return 1;
2198 }
2199 }
2200 }
2201 else if (k->type == STK_MEM_END) {
2202 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2203 pend = k->u.mem.pstr;
2204 }
2205 }
2206 }
2207 k--;
2208 }
2209
2210 return 0;
2211 }
2212
2213 static int
2214 backref_check_at_nested_level(regex_t* reg,
2215 StackType* top, StackType* stk_base,
2216 int nest, int mem_num, UChar* memp)
2217 {
2218 int level;
2219 StackType* k;
2220
2221 level = 0;
2222 k = top;
2223 k--;
2224 while (k >= stk_base) {
2225 if (k->type == STK_CALL_FRAME) {
2226 level--;
2227 }
2228 else if (k->type == STK_RETURN) {
2229 level++;
2230 }
2231 else if (level == nest) {
2232 if (k->type == STK_MEM_END) {
2233 if (mem_is_in_memp(k->zid, mem_num, memp)) {
2234 return 1;
2235 }
2236 }
2237 }
2238 k--;
2239 }
2240
2241 return 0;
2242 }
2243 #endif /* USE_BACKREF_WITH_LEVEL */
2244
2245
2246 #ifdef ONIG_DEBUG_STATISTICS
2247
2248 #define USE_TIMEOFDAY
2249
2250 #ifdef USE_TIMEOFDAY
2251 #ifdef HAVE_SYS_TIME_H
2252 #include <sys/time.h>
2253 #endif
2254 #ifdef HAVE_UNISTD_H
2255 #include <unistd.h>
2256 #endif
2257 static struct timeval ts, te;
2258 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
2259 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2260 (((te).tv_sec - (ts).tv_sec)*1000000))
2261 #else
2262 #ifdef HAVE_SYS_TIMES_H
2263 #include <sys/times.h>
2264 #endif
2265 static struct tms ts, te;
2266 #define GETTIME(t) times(&(t))
2267 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
2268 #endif
2269
2270 static int OpCounter[256];
2271 static int OpPrevCounter[256];
2272 static unsigned long OpTime[256];
2273 static int OpCurr = OP_FINISH;
2274 static int OpPrevTarget = OP_FAIL;
2275 static int MaxStackDepth = 0;
2276
2277 #define SOP_IN(opcode) do {\
2278 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2279 OpCurr = opcode;\
2280 OpCounter[opcode]++;\
2281 GETTIME(ts);\
2282 } while(0)
2283
2284 #define SOP_OUT do {\
2285 GETTIME(te);\
2286 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2287 } while(0)
2288
2289 extern void
2290 onig_statistics_init(void)
2291 {
2292 int i;
2293 for (i = 0; i < 256; i++) {
2294 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2295 }
2296 MaxStackDepth = 0;
2297 }
2298
2299 extern int
2300 onig_print_statistics(FILE* f)
2301 {
2302 int r;
2303 int i;
2304
2305 r = fprintf(f, " count prev time\n");
2306 if (r < 0) return -1;
2307
2308 for (i = 0; OpInfo[i].opcode >= 0; i++) {
2309 r = fprintf(f, "%8d: %8d: %10ld: %s\n",
2310 OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
2311 if (r < 0) return -1;
2312 }
2313 r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2314 if (r < 0) return -1;
2315
2316 return 0;
2317 }
2318
2319 #define STACK_INC do {\
2320 stk++;\
2321 if (stk - stk_base > MaxStackDepth) \
2322 MaxStackDepth = stk - stk_base;\
2323 } while(0)
2324
2325 #else
2326 #define STACK_INC stk++
2327
2328 #define SOP_IN(opcode)
2329 #define SOP_OUT
2330 #endif
2331
2332
2333 /* matching region of POSIX API */
2334 typedef int regoff_t;
2335
2336 typedef struct {
2337 regoff_t rm_so;
2338 regoff_t rm_eo;
2339 } posix_regmatch_t;
2340
2341 /* match data(str - end) from position (sstart). */
2342 /* if sstart == str then set sprev to NULL. */
2343 static int
2344 match_at(regex_t* reg, const UChar* str, const UChar* end,
2345 const UChar* in_right_range, const UChar* sstart, UChar* sprev,
2346 MatchArg* msa)
2347 {
2348 static UChar FinishCode[] = { OP_FINISH };
2349
2350 int i, n, num_mem, best_len, pop_level;
2351 LengthType tlen, tlen2;
2352 MemNumType mem;
2353 RelAddrType addr;
2354 UChar *s, *q, *sbegin;
2355 UChar *right_range;
2356 int is_alloca;
2357 char *alloc_base;
2358 StackType *stk_base, *stk, *stk_end;
2359 StackType *stkp; /* used as any purpose. */
2360 StackIndex si;
2361 StackIndex *repeat_stk;
2362 StackIndex *mem_start_stk, *mem_end_stk;
2363 UChar* keep;
2364 #ifdef USE_RETRY_LIMIT_IN_MATCH
2365 unsigned long retry_limit_in_match;
2366 unsigned long retry_in_match_counter;
2367 #endif
2368
2369 #ifdef USE_CALLOUT
2370 int of;
2371 #endif
2372
2373 UChar *p = reg->p;
2374 OnigOptionType option = reg->options;
2375 OnigEncoding encode = reg->enc;
2376 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2377
2378 #ifdef USE_CALLOUT
2379 msa->mp->match_at_call_counter++;
2380 #endif
2381
2382 #ifdef USE_RETRY_LIMIT_IN_MATCH
2383 retry_limit_in_match = msa->retry_limit_in_match;
2384 #endif
2385
2386 pop_level = reg->stack_pop_level;
2387 num_mem = reg->num_mem;
2388 STACK_INIT(INIT_MATCH_STACK_SIZE);
2389 UPDATE_FOR_STACK_REALLOC;
2390 for (i = 1; i <= num_mem; i++) {
2391 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
2392 }
2393
2394 #ifdef ONIG_DEBUG_MATCH
2395 fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
2396 str, end, sstart, sprev);
2397 fprintf(stderr, "size: %d, start offset: %d\n",
2398 (int )(end - str), (int )(sstart - str));
2399 #endif
2400
2401 best_len = ONIG_MISMATCH;
2402 keep = s = (UChar* )sstart;
2403 STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
2404 INIT_RIGHT_RANGE;
2405
2406 #ifdef USE_RETRY_LIMIT_IN_MATCH
2407 retry_in_match_counter = 0;
2408 #endif
2409
2410 while (1) {
2411 #ifdef ONIG_DEBUG_MATCH
2412 {
2413 static unsigned int counter = 1;
2414
2415 UChar *q, *bp, buf[50];
2416 int len;
2417 fprintf(stderr, "%7u: %7ld: %4d> \"",
2418 counter, GET_STACK_INDEX(stk), (int )(s - str));
2419 counter++;
2420
2421 bp = buf;
2422 for (i = 0, q = s; i < 7 && q < end; i++) {
2423 len = enclen(encode, q);
2424 while (len-- > 0) *bp++ = *q++;
2425 }
2426 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
2427 else { xmemcpy(bp, "\"", 1); bp += 1; }
2428 *bp = 0;
2429 fputs((char* )buf, stderr);
2430
2431 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
2432 if (p == FinishCode)
2433 fprintf(stderr, "----: ");
2434 else
2435 fprintf(stderr, "%4d: ", (int )(p - reg->p));
2436 onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode);
2437 fprintf(stderr, "\n");
2438 }
2439 #endif
2440
2441 sbegin = s;
2442 switch (*p++) {
2443 case OP_END: SOP_IN(OP_END);
2444 n = (int )(s - sstart);
2445 if (n > best_len) {
2446 OnigRegion* region;
2447 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2448 if (IS_FIND_LONGEST(option)) {
2449 if (n > msa->best_len) {
2450 msa->best_len = n;
2451 msa->best_s = (UChar* )sstart;
2452 }
2453 else
2454 goto end_best_len;
2455 }
2456 #endif
2457 best_len = n;
2458 region = msa->region;
2459 if (region) {
2460 if (keep > s) keep = s;
2461
2462 #ifdef USE_POSIX_API_REGION_OPTION
2463 if (IS_POSIX_REGION(msa->options)) {
2464 posix_regmatch_t* rmt = (posix_regmatch_t* )region;
2465
2466 rmt[0].rm_so = (regoff_t )(keep - str);
2467 rmt[0].rm_eo = (regoff_t )(s - str);
2468 for (i = 1; i <= num_mem; i++) {
2469 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2470 if (MEM_STATUS_AT(reg->bt_mem_start, i))
2471 rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
2472 else
2473 rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
2474
2475 rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i)
2476 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2477 : (UChar* )((void* )mem_end_stk[i]))
2478 - str);
2479 }
2480 else {
2481 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
2482 }
2483 }
2484 }
2485 else {
2486 #endif /* USE_POSIX_API_REGION_OPTION */
2487 region->beg[0] = (int )(keep - str);
2488 region->end[0] = (int )(s - str);
2489 for (i = 1; i <= num_mem; i++) {
2490 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2491 if (MEM_STATUS_AT(reg->bt_mem_start, i))
2492 region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
2493 else
2494 region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str);
2495
2496 region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
2497 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2498 : (UChar* )((void* )mem_end_stk[i])) - str);
2499 }
2500 else {
2501 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2502 }
2503 }
2504
2505 #ifdef USE_CAPTURE_HISTORY
2506 if (reg->capture_history != 0) {
2507 int r;
2508 OnigCaptureTreeNode* node;
2509
2510 if (IS_NULL(region->history_root)) {
2511 region->history_root = node = history_node_new();
2512 CHECK_NULL_RETURN_MEMERR(node);
2513 }
2514 else {
2515 node = region->history_root;
2516 history_tree_clear(node);
2517 }
2518
2519 node->group = 0;
2520 node->beg = (int )(keep - str);
2521 node->end = (int )(s - str);
2522
2523 stkp = stk_base;
2524 r = make_capture_history_tree(region->history_root, &stkp,
2525 stk, (UChar* )str, reg);
2526 if (r < 0) {
2527 best_len = r; /* error code */
2528 goto finish;
2529 }
2530 }
2531 #endif /* USE_CAPTURE_HISTORY */
2532 #ifdef USE_POSIX_API_REGION_OPTION
2533 } /* else IS_POSIX_REGION() */
2534 #endif
2535 } /* if (region) */
2536 } /* n > best_len */
2537
2538 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2539 end_best_len:
2540 #endif
2541 SOP_OUT;
2542
2543 if (IS_FIND_CONDITION(option)) {
2544 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2545 best_len = ONIG_MISMATCH;
2546 goto fail; /* for retry */
2547 }
2548 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2549 goto fail; /* for retry */
2550 }
2551 }
2552
2553 /* default behavior: return first-matching result. */
2554 goto finish;
2555 break;
2556
2557 case OP_EXACT1: SOP_IN(OP_EXACT1);
2558 DATA_ENSURE(1);
2559 if (*p != *s) goto fail;
2560 p++; s++;
2561 SOP_OUT;
2562 break;
2563
2564 case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC);
2565 {
2566 int len;
2567 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2568
2569 DATA_ENSURE(1);
2570 len = ONIGENC_MBC_CASE_FOLD(encode,
2571 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2572 case_fold_flag,
2573 &s, end, lowbuf);
2574 DATA_ENSURE(0);
2575 q = lowbuf;
2576 while (len-- > 0) {
2577 if (*p != *q) {
2578 goto fail;
2579 }
2580 p++; q++;
2581 }
2582 }
2583 SOP_OUT;
2584 break;
2585
2586 case OP_EXACT2: SOP_IN(OP_EXACT2);
2587 DATA_ENSURE(2);
2588 if (*p != *s) goto fail;
2589 p++; s++;
2590 if (*p != *s) goto fail;
2591 sprev = s;
2592 p++; s++;
2593 SOP_OUT;
2594 continue;
2595 break;
2596
2597 case OP_EXACT3: SOP_IN(OP_EXACT3);
2598 DATA_ENSURE(3);
2599 if (*p != *s) goto fail;
2600 p++; s++;
2601 if (*p != *s) goto fail;
2602 p++; s++;
2603 if (*p != *s) goto fail;
2604 sprev = s;
2605 p++; s++;
2606 SOP_OUT;
2607 continue;
2608 break;
2609
2610 case OP_EXACT4: SOP_IN(OP_EXACT4);
2611 DATA_ENSURE(4);
2612 if (*p != *s) goto fail;
2613 p++; s++;
2614 if (*p != *s) goto fail;
2615 p++; s++;
2616 if (*p != *s) goto fail;
2617 p++; s++;
2618 if (*p != *s) goto fail;
2619 sprev = s;
2620 p++; s++;
2621 SOP_OUT;
2622 continue;
2623 break;
2624
2625 case OP_EXACT5: SOP_IN(OP_EXACT5);
2626 DATA_ENSURE(5);
2627 if (*p != *s) goto fail;
2628 p++; s++;
2629 if (*p != *s) goto fail;
2630 p++; s++;
2631 if (*p != *s) goto fail;
2632 p++; s++;
2633 if (*p != *s) goto fail;
2634 p++; s++;
2635 if (*p != *s) goto fail;
2636 sprev = s;
2637 p++; s++;
2638 SOP_OUT;
2639 continue;
2640 break;
2641
2642 case OP_EXACTN: SOP_IN(OP_EXACTN);
2643 GET_LENGTH_INC(tlen, p);
2644 DATA_ENSURE(tlen);
2645 while (tlen-- > 0) {
2646 if (*p++ != *s++) goto fail;
2647 }
2648 sprev = s - 1;
2649 SOP_OUT;
2650 continue;
2651 break;
2652
2653 case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC);
2654 {
2655 int len;
2656 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2657
2658 GET_LENGTH_INC(tlen, p);
2659 endp = p + tlen;
2660
2661 while (p < endp) {
2662 sprev = s;
2663 DATA_ENSURE(1);
2664 len = ONIGENC_MBC_CASE_FOLD(encode,
2665 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2666 case_fold_flag,
2667 &s, end, lowbuf);
2668 DATA_ENSURE(0);
2669 q = lowbuf;
2670 while (len-- > 0) {
2671 if (*p != *q) goto fail;
2672 p++; q++;
2673 }
2674 }
2675 }
2676
2677 SOP_OUT;
2678 continue;
2679 break;
2680
2681 case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1);
2682 DATA_ENSURE(2);
2683 if (*p != *s) goto fail;
2684 p++; s++;
2685 if (*p != *s) goto fail;
2686 p++; s++;
2687 SOP_OUT;
2688 break;
2689
2690 case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2);
2691 DATA_ENSURE(4);
2692 if (*p != *s) goto fail;
2693 p++; s++;
2694 if (*p != *s) goto fail;
2695 p++; s++;
2696 sprev = s;
2697 if (*p != *s) goto fail;
2698 p++; s++;
2699 if (*p != *s) goto fail;
2700 p++; s++;
2701 SOP_OUT;
2702 continue;
2703 break;
2704
2705 case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3);
2706 DATA_ENSURE(6);
2707 if (*p != *s) goto fail;
2708 p++; s++;
2709 if (*p != *s) goto fail;
2710 p++; s++;
2711 if (*p != *s) goto fail;
2712 p++; s++;
2713 if (*p != *s) goto fail;
2714 p++; s++;
2715 sprev = s;
2716 if (*p != *s) goto fail;
2717 p++; s++;
2718 if (*p != *s) goto fail;
2719 p++; s++;
2720 SOP_OUT;
2721 continue;
2722 break;
2723
2724 case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N);
2725 GET_LENGTH_INC(tlen, p);
2726 DATA_ENSURE(tlen * 2);
2727 while (tlen-- > 0) {
2728 if (*p != *s) goto fail;
2729 p++; s++;
2730 if (*p != *s) goto fail;
2731 p++; s++;
2732 }
2733 sprev = s - 2;
2734 SOP_OUT;
2735 continue;
2736 break;
2737
2738 case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N);
2739 GET_LENGTH_INC(tlen, p);
2740 DATA_ENSURE(tlen * 3);
2741 while (tlen-- > 0) {
2742 if (*p != *s) goto fail;
2743 p++; s++;
2744 if (*p != *s) goto fail;
2745 p++; s++;
2746 if (*p != *s) goto fail;
2747 p++; s++;
2748 }
2749 sprev = s - 3;
2750 SOP_OUT;
2751 continue;
2752 break;
2753
2754 case OP_EXACTMBN: SOP_IN(OP_EXACTMBN);
2755 GET_LENGTH_INC(tlen, p); /* mb-len */
2756 GET_LENGTH_INC(tlen2, p); /* string len */
2757 tlen2 *= tlen;
2758 DATA_ENSURE(tlen2);
2759 while (tlen2-- > 0) {
2760 if (*p != *s) goto fail;
2761 p++; s++;
2762 }
2763 sprev = s - tlen;
2764 SOP_OUT;
2765 continue;
2766 break;
2767
2768 case OP_CCLASS: SOP_IN(OP_CCLASS);
2769 DATA_ENSURE(1);
2770 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2771 p += SIZE_BITSET;
2772 s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
2773 SOP_OUT;
2774 break;
2775
2776 case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB);
2777 if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
2778
2779 cclass_mb:
2780 GET_LENGTH_INC(tlen, p);
2781 {
2782 OnigCodePoint code;
2783 UChar *ss;
2784 int mb_len;
2785
2786 DATA_ENSURE(1);
2787 mb_len = enclen(encode, s);
2788 DATA_ENSURE(mb_len);
2789 ss = s;
2790 s += mb_len;
2791 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2792
2793 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2794 if (! onig_is_in_code_range(p, code)) goto fail;
2795 #else
2796 q = p;
2797 ALIGNMENT_RIGHT(q);
2798 if (! onig_is_in_code_range(q, code)) goto fail;
2799 #endif
2800 }
2801 p += tlen;
2802 SOP_OUT;
2803 break;
2804
2805 case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX);
2806 DATA_ENSURE(1);
2807 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
2808 p += SIZE_BITSET;
2809 goto cclass_mb;
2810 }
2811 else {
2812 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2813 goto fail;
2814
2815 p += SIZE_BITSET;
2816 GET_LENGTH_INC(tlen, p);
2817 p += tlen;
2818 s++;
2819 }
2820 SOP_OUT;
2821 break;
2822
2823 case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT);
2824 DATA_ENSURE(1);
2825 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2826 p += SIZE_BITSET;
2827 s += enclen(encode, s);
2828 SOP_OUT;
2829 break;
2830
2831 case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT);
2832 DATA_ENSURE(1);
2833 if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
2834 s++;
2835 GET_LENGTH_INC(tlen, p);
2836 p += tlen;
2837 goto cc_mb_not_success;
2838 }
2839
2840 cclass_mb_not:
2841 GET_LENGTH_INC(tlen, p);
2842 {
2843 OnigCodePoint code;
2844 UChar *ss;
2845 int mb_len = enclen(encode, s);
2846
2847 if (! DATA_ENSURE_CHECK(mb_len)) {
2848 DATA_ENSURE(1);
2849 s = (UChar* )end;
2850 p += tlen;
2851 goto cc_mb_not_success;
2852 }
2853
2854 ss = s;
2855 s += mb_len;
2856 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2857
2858 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2859 if (onig_is_in_code_range(p, code)) goto fail;
2860 #else
2861 q = p;
2862 ALIGNMENT_RIGHT(q);
2863 if (onig_is_in_code_range(q, code)) goto fail;
2864 #endif
2865 }
2866 p += tlen;
2867
2868 cc_mb_not_success:
2869 SOP_OUT;
2870 break;
2871
2872 case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT);
2873 DATA_ENSURE(1);
2874 if (ONIGENC_IS_MBC_HEAD(encode, s)) {
2875 p += SIZE_BITSET;
2876 goto cclass_mb_not;
2877 }
2878 else {
2879 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2880 goto fail;
2881
2882 p += SIZE_BITSET;
2883 GET_LENGTH_INC(tlen, p);
2884 p += tlen;
2885 s++;
2886 }
2887 SOP_OUT;
2888 break;
2889
2890 #ifdef USE_OP_CCLASS_NODE
2891 case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE);
2892 {
2893 OnigCodePoint code;
2894 void *node;
2895 int mb_len;
2896 UChar *ss;
2897
2898 DATA_ENSURE(1);
2899 GET_POINTER_INC(node, p);
2900 mb_len = enclen(encode, s);
2901 ss = s;
2902 s += mb_len;
2903 DATA_ENSURE(0);
2904 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2905 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
2906 }
2907 SOP_OUT;
2908 break;
2909 #endif
2910
2911 case OP_ANYCHAR: SOP_IN(OP_ANYCHAR);
2912 DATA_ENSURE(1);
2913 n = enclen(encode, s);
2914 DATA_ENSURE(n);
2915 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
2916 s += n;
2917 SOP_OUT;
2918 break;
2919
2920 case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML);
2921 DATA_ENSURE(1);
2922 n = enclen(encode, s);
2923 DATA_ENSURE(n);
2924 s += n;
2925 SOP_OUT;
2926 break;
2927
2928 case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR);
2929 while (DATA_ENSURE_CHECK1) {
2930 STACK_PUSH_ALT(p, s, sprev);
2931 n = enclen(encode, s);
2932 DATA_ENSURE(n);
2933 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
2934 sprev = s;
2935 s += n;
2936 }
2937 SOP_OUT;
2938 continue;
2939 break;
2940
2941 case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR);
2942 while (DATA_ENSURE_CHECK1) {
2943 STACK_PUSH_ALT(p, s, sprev);
2944 n = enclen(encode, s);
2945 if (n > 1) {
2946 DATA_ENSURE(n);
2947 sprev = s;
2948 s += n;
2949 }
2950 else {
2951 sprev = s;
2952 s++;
2953 }
2954 }
2955 SOP_OUT;
2956 continue;
2957 break;
2958
2959 case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2960 while (DATA_ENSURE_CHECK1) {
2961 if (*p == *s) {
2962 STACK_PUSH_ALT(p + 1, s, sprev);
2963 }
2964 n = enclen(encode, s);
2965 DATA_ENSURE(n);
2966 if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
2967 sprev = s;
2968 s += n;
2969 }
2970 p++;
2971 SOP_OUT;
2972 break;
2973
2974 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2975 while (DATA_ENSURE_CHECK1) {
2976 if (*p == *s) {
2977 STACK_PUSH_ALT(p + 1, s, sprev);
2978 }
2979 n = enclen(encode, s);
2980 if (n > 1) {
2981 DATA_ENSURE(n);
2982 sprev = s;
2983 s += n;
2984 }
2985 else {
2986 sprev = s;
2987 s++;
2988 }
2989 }
2990 p++;
2991 SOP_OUT;
2992 break;
2993
2994 case OP_WORD: SOP_IN(OP_WORD);
2995 DATA_ENSURE(1);
2996 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2997 goto fail;
2998
2999 s += enclen(encode, s);
3000 SOP_OUT;
3001 break;
3002
3003 case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII);
3004 DATA_ENSURE(1);
3005 if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3006 goto fail;
3007
3008 s += enclen(encode, s);
3009 SOP_OUT;
3010 break;
3011
3012 case OP_NO_WORD: SOP_IN(OP_NO_WORD);
3013 DATA_ENSURE(1);
3014 if (ONIGENC_IS_MBC_WORD(encode, s, end))
3015 goto fail;
3016
3017 s += enclen(encode, s);
3018 SOP_OUT;
3019 break;
3020
3021 case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII);
3022 DATA_ENSURE(1);
3023 if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
3024 goto fail;
3025
3026 s += enclen(encode, s);
3027 SOP_OUT;
3028 break;
3029
3030 case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY);
3031 {
3032 ModeType mode;
3033 GET_MODE_INC(mode, p); /* ascii_mode */
3034
3035 if (ON_STR_BEGIN(s)) {
3036 DATA_ENSURE(1);
3037 if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3038 goto fail;
3039 }
3040 else if (ON_STR_END(s)) {
3041 if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3042 goto fail;
3043 }
3044 else {
3045 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3046 == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3047 goto fail;
3048 }
3049 }
3050 SOP_OUT;
3051 continue;
3052 break;
3053
3054 case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY);
3055 {
3056 ModeType mode;
3057 GET_MODE_INC(mode, p); /* ascii_mode */
3058
3059 if (ON_STR_BEGIN(s)) {
3060 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
3061 goto fail;
3062 }
3063 else if (ON_STR_END(s)) {
3064 if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3065 goto fail;
3066 }
3067 else {
3068 if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
3069 != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
3070 goto fail;
3071 }
3072 }
3073 SOP_OUT;
3074 continue;
3075 break;
3076
3077 #ifdef USE_WORD_BEGIN_END
3078 case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN);
3079 {
3080 ModeType mode;
3081 GET_MODE_INC(mode, p); /* ascii_mode */
3082
3083 if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3084 if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3085 SOP_OUT;
3086 continue;
3087 }
3088 }
3089 }
3090 goto fail;
3091 break;
3092
3093 case OP_WORD_END: SOP_IN(OP_WORD_END);
3094 {
3095 ModeType mode;
3096 GET_MODE_INC(mode, p); /* ascii_mode */
3097
3098 if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
3099 if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
3100 SOP_OUT;
3101 continue;
3102 }
3103 }
3104 }
3105 goto fail;
3106 break;
3107 #endif
3108
3109 case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3110 SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
3111 if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) {
3112 SOP_OUT;
3113 continue;
3114 }
3115 goto fail;
3116 break;
3117
3118 case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
3119 SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
3120 if (onigenc_egcb_is_break_position(encode, s, sprev, str, end))
3121 goto fail;
3122
3123 SOP_OUT;
3124 continue;
3125 break;
3126
3127 case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF);
3128 if (! ON_STR_BEGIN(s)) goto fail;
3129
3130 SOP_OUT;
3131 continue;
3132 break;
3133
3134 case OP_END_BUF: SOP_IN(OP_END_BUF);
3135 if (! ON_STR_END(s)) goto fail;
3136
3137 SOP_OUT;
3138 continue;
3139 break;
3140
3141 case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE);
3142 if (ON_STR_BEGIN(s)) {
3143 if (IS_NOTBOL(msa->options)) goto fail;
3144 SOP_OUT;
3145 continue;
3146 }
3147 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
3148 SOP_OUT;
3149 continue;
3150 }
3151 goto fail;
3152 break;
3153
3154 case OP_END_LINE: SOP_IN(OP_END_LINE);
3155 if (ON_STR_END(s)) {
3156 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3157 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3158 #endif
3159 if (IS_NOTEOL(msa->options)) goto fail;
3160 SOP_OUT;
3161 continue;
3162 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3163 }
3164 #endif
3165 }
3166 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
3167 SOP_OUT;
3168 continue;
3169 }
3170 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3171 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3172 SOP_OUT;
3173 continue;
3174 }
3175 #endif
3176 goto fail;
3177 break;
3178
3179 case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF);
3180 if (ON_STR_END(s)) {
3181 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3182 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
3183 #endif
3184 if (IS_NOTEOL(msa->options)) goto fail;
3185 SOP_OUT;
3186 continue;
3187 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3188 }
3189 #endif
3190 }
3191 else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
3192 ON_STR_END(s + enclen(encode, s))) {
3193 SOP_OUT;
3194 continue;
3195 }
3196 #ifdef USE_CRNL_AS_LINE_TERMINATOR
3197 else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3198 UChar* ss = s + enclen(encode, s);
3199 ss += enclen(encode, ss);
3200 if (ON_STR_END(ss)) {
3201 SOP_OUT;
3202 continue;
3203 }
3204 }
3205 #endif
3206 goto fail;
3207 break;
3208
3209 case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION);
3210 if (s != msa->start)
3211 goto fail;
3212
3213 SOP_OUT;
3214 continue;
3215 break;
3216
3217 case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH);
3218 GET_MEMNUM_INC(mem, p);
3219 STACK_PUSH_MEM_START(mem, s);
3220 SOP_OUT;
3221 continue;
3222 break;
3223
3224 case OP_MEMORY_START: SOP_IN(OP_MEMORY_START);
3225 GET_MEMNUM_INC(mem, p);
3226 mem_start_stk[mem] = (StackIndex )((void* )s);
3227 SOP_OUT;
3228 continue;
3229 break;
3230
3231 case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH);
3232 GET_MEMNUM_INC(mem, p);
3233 STACK_PUSH_MEM_END(mem, s);
3234 SOP_OUT;
3235 continue;
3236 break;
3237
3238 case OP_MEMORY_END: SOP_IN(OP_MEMORY_END);
3239 GET_MEMNUM_INC(mem, p);
3240 mem_end_stk[mem] = (StackIndex )((void* )s);
3241 SOP_OUT;
3242 continue;
3243 break;
3244
3245 #ifdef USE_CALL
3246 case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC);
3247 GET_MEMNUM_INC(mem, p);
3248 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3249 STACK_PUSH_MEM_END(mem, s);
3250 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3251 SOP_OUT;
3252 continue;
3253 break;
3254
3255 case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC);
3256 GET_MEMNUM_INC(mem, p);
3257 mem_end_stk[mem] = (StackIndex )((void* )s);
3258 STACK_GET_MEM_START(mem, stkp);
3259
3260 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3261 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3262 else
3263 mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
3264
3265 STACK_PUSH_MEM_END_MARK(mem);
3266 SOP_OUT;
3267 continue;
3268 break;
3269 #endif
3270
3271 case OP_BACKREF1: SOP_IN(OP_BACKREF1);
3272 mem = 1;
3273 goto backref;
3274 break;
3275
3276 case OP_BACKREF2: SOP_IN(OP_BACKREF2);
3277 mem = 2;
3278 goto backref;
3279 break;
3280
3281 case OP_BACKREF_N: SOP_IN(OP_BACKREF_N);
3282 GET_MEMNUM_INC(mem, p);
3283 backref:
3284 {
3285 int len;
3286 UChar *pstart, *pend;
3287
3288 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3289 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3290
3291 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3292 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3293 else
3294 pstart = (UChar* )((void* )mem_start_stk[mem]);
3295
3296 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3297 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3298 : (UChar* )((void* )mem_end_stk[mem]));
3299 n = (int )(pend - pstart);
3300 DATA_ENSURE(n);
3301 sprev = s;
3302 STRING_CMP(pstart, s, n);
3303 while (sprev + (len = enclen(encode, sprev)) < s)
3304 sprev += len;
3305
3306 SOP_OUT;
3307 continue;
3308 }
3309 break;
3310
3311 case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC);
3312 GET_MEMNUM_INC(mem, p);
3313 {
3314 int len;
3315 UChar *pstart, *pend;
3316
3317 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3318 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3319
3320 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3321 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3322 else
3323 pstart = (UChar* )((void* )mem_start_stk[mem]);
3324
3325 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3326 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3327 : (UChar* )((void* )mem_end_stk[mem]));
3328 n = (int )(pend - pstart);
3329 DATA_ENSURE(n);
3330 sprev = s;
3331 STRING_CMP_IC(case_fold_flag, pstart, &s, n);
3332 while (sprev + (len = enclen(encode, sprev)) < s)
3333 sprev += len;
3334
3335 SOP_OUT;
3336 continue;
3337 }
3338 break;
3339
3340 case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI);
3341 {
3342 int len, is_fail;
3343 UChar *pstart, *pend, *swork;
3344
3345 GET_LENGTH_INC(tlen, p);
3346 for (i = 0; i < tlen; i++) {
3347 GET_MEMNUM_INC(mem, p);
3348
3349 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3350 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3351
3352 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3353 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3354 else
3355 pstart = (UChar* )((void* )mem_start_stk[mem]);
3356
3357 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3358 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3359 : (UChar* )((void* )mem_end_stk[mem]));
3360 n = (int )(pend - pstart);
3361 DATA_ENSURE(n);
3362 sprev = s;
3363 swork = s;
3364 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3365 if (is_fail) continue;
3366 s = swork;
3367 while (sprev + (len = enclen(encode, sprev)) < s)
3368 sprev += len;
3369
3370 p += (SIZE_MEMNUM * (tlen - i - 1));
3371 break; /* success */
3372 }
3373 if (i == tlen) goto fail;
3374 SOP_OUT;
3375 continue;
3376 }
3377 break;
3378
3379 case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC);
3380 {
3381 int len, is_fail;
3382 UChar *pstart, *pend, *swork;
3383
3384 GET_LENGTH_INC(tlen, p);
3385 for (i = 0; i < tlen; i++) {
3386 GET_MEMNUM_INC(mem, p);
3387
3388 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3389 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3390
3391 if (MEM_STATUS_AT(reg->bt_mem_start, mem))
3392 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3393 else
3394 pstart = (UChar* )((void* )mem_start_stk[mem]);
3395
3396 pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
3397 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3398 : (UChar* )((void* )mem_end_stk[mem]));
3399 n = (int )(pend - pstart);
3400 DATA_ENSURE(n);
3401 sprev = s;
3402 swork = s;
3403 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
3404 if (is_fail) continue;
3405 s = swork;
3406 while (sprev + (len = enclen(encode, sprev)) < s)
3407 sprev += len;
3408
3409 p += (SIZE_MEMNUM * (tlen - i - 1));
3410 break; /* success */
3411 }
3412 if (i == tlen) goto fail;
3413 SOP_OUT;
3414 continue;
3415 }
3416 break;
3417
3418 #ifdef USE_BACKREF_WITH_LEVEL
3419 case OP_BACKREF_WITH_LEVEL:
3420 {
3421 int len;
3422 OnigOptionType ic;
3423 LengthType level;
3424
3425 GET_OPTION_INC(ic, p);
3426 GET_LENGTH_INC(level, p);
3427 GET_LENGTH_INC(tlen, p);
3428
3429 sprev = s;
3430 if (backref_match_at_nested_level(reg, stk, stk_base, ic
3431 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3432 if (sprev < end) {
3433 while (sprev + (len = enclen(encode, sprev)) < s)
3434 sprev += len;
3435 }
3436 p += (SIZE_MEMNUM * tlen);
3437 }
3438 else
3439 goto fail;
3440
3441 SOP_OUT;
3442 continue;
3443 }
3444 break;
3445 #endif
3446
3447 case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK);
3448 {
3449 GET_LENGTH_INC(tlen, p);
3450 for (i = 0; i < tlen; i++) {
3451 GET_MEMNUM_INC(mem, p);
3452
3453 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3454 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3455
3456 p += (SIZE_MEMNUM * (tlen - i - 1));
3457 break; /* success */
3458 }
3459 if (i == tlen) goto fail;
3460 SOP_OUT;
3461 continue;
3462 }
3463 break;
3464
3465 #ifdef USE_BACKREF_WITH_LEVEL
3466 case OP_BACKREF_CHECK_WITH_LEVEL:
3467 {
3468 LengthType level;
3469
3470 GET_LENGTH_INC(level, p);
3471 GET_LENGTH_INC(tlen, p);
3472
3473 if (backref_check_at_nested_level(reg, stk, stk_base,
3474 (int )level, (int )tlen, p) != 0) {
3475 p += (SIZE_MEMNUM * tlen);
3476 }
3477 else
3478 goto fail;
3479
3480 SOP_OUT;
3481 continue;
3482 }
3483 break;
3484 #endif
3485
3486 case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START);
3487 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3488 STACK_PUSH_EMPTY_CHECK_START(mem, s);
3489 SOP_OUT;
3490 continue;
3491 break;
3492
3493 case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END);
3494 {
3495 int is_empty;
3496
3497 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3498 STACK_EMPTY_CHECK(is_empty, mem, s);
3499 if (is_empty) {
3500 #ifdef ONIG_DEBUG_MATCH
3501 fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
3502 #endif
3503 empty_check_found:
3504 /* empty loop founded, skip next instruction */
3505 switch (*p++) {
3506 case OP_JUMP:
3507 case OP_PUSH:
3508 p += SIZE_RELADDR;
3509 break;
3510 case OP_REPEAT_INC:
3511 case OP_REPEAT_INC_NG:
3512 case OP_REPEAT_INC_SG:
3513 case OP_REPEAT_INC_NG_SG:
3514 p += SIZE_MEMNUM;
3515 break;
3516 default:
3517 goto unexpected_bytecode_error;
3518 break;
3519 }
3520 }
3521 }
3522 SOP_OUT;
3523 continue;
3524 break;
3525
3526 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3527 case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST);
3528 {
3529 int is_empty;
3530
3531 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3532 STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
3533 if (is_empty) {
3534 #ifdef ONIG_DEBUG_MATCH
3535 fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
3536 #endif
3537 if (is_empty == -1) goto fail;
3538 goto empty_check_found;
3539 }
3540 }
3541 SOP_OUT;
3542 continue;
3543 break;
3544 #endif
3545
3546 #ifdef USE_CALL
3547 case OP_EMPTY_CHECK_END_MEMST_PUSH:
3548 SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
3549 {
3550 int is_empty;
3551
3552 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3553 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3554 STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
3555 #else
3556 STACK_EMPTY_CHECK_REC(is_empty, mem, s);
3557 #endif
3558 if (is_empty) {
3559 #ifdef ONIG_DEBUG_MATCH
3560 fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
3561 (int )mem, s);
3562 #endif
3563 if (is_empty == -1) goto fail;
3564 goto empty_check_found;
3565 }
3566 else {
3567 STACK_PUSH_EMPTY_CHECK_END(mem);
3568 }
3569 }
3570 SOP_OUT;
3571 continue;
3572 break;
3573 #endif
3574
3575 case OP_JUMP: SOP_IN(OP_JUMP);
3576 GET_RELADDR_INC(addr, p);
3577 p += addr;
3578 SOP_OUT;
3579 CHECK_INTERRUPT_IN_MATCH;
3580 continue;
3581 break;
3582
3583 case OP_PUSH: SOP_IN(OP_PUSH);
3584 GET_RELADDR_INC(addr, p);
3585 STACK_PUSH_ALT(p + addr, s, sprev);
3586 SOP_OUT;
3587 continue;
3588 break;
3589
3590 case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER);
3591 GET_RELADDR_INC(addr, p);
3592 STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
3593 SOP_OUT;
3594 continue;
3595 break;
3596
3597 case OP_POP_OUT: SOP_IN(OP_POP_OUT);
3598 STACK_POP_ONE;
3599 /* for stop backtrack */
3600 /* CHECK_RETRY_LIMIT_IN_MATCH; */
3601 SOP_OUT;
3602 continue;
3603 break;
3604
3605 case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3606 GET_RELADDR_INC(addr, p);
3607 if (*p == *s && DATA_ENSURE_CHECK1) {
3608 p++;
3609 STACK_PUSH_ALT(p + addr, s, sprev);
3610 SOP_OUT;
3611 continue;
3612 }
3613 p += (addr + 1);
3614 SOP_OUT;
3615 continue;
3616 break;
3617
3618 case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT);
3619 GET_RELADDR_INC(addr, p);
3620 if (*p == *s) {
3621 p++;
3622 STACK_PUSH_ALT(p + addr, s, sprev);
3623 SOP_OUT;
3624 continue;
3625 }
3626 p++;
3627 SOP_OUT;
3628 continue;
3629 break;
3630
3631 case OP_REPEAT: SOP_IN(OP_REPEAT);
3632 {
3633 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3634 GET_RELADDR_INC(addr, p);
3635
3636 STACK_ENSURE(1);
3637 repeat_stk[mem] = GET_STACK_INDEX(stk);
3638 STACK_PUSH_REPEAT(mem, p);
3639
3640 if (reg->repeat_range[mem].lower == 0) {
3641 STACK_PUSH_ALT(p + addr, s, sprev);
3642 }
3643 }
3644 SOP_OUT;
3645 continue;
3646 break;
3647
3648 case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG);
3649 {
3650 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3651 GET_RELADDR_INC(addr, p);
3652
3653 STACK_ENSURE(1);
3654 repeat_stk[mem] = GET_STACK_INDEX(stk);
3655 STACK_PUSH_REPEAT(mem, p);
3656
3657 if (reg->repeat_range[mem].lower == 0) {
3658 STACK_PUSH_ALT(p, s, sprev);
3659 p += addr;
3660 }
3661 }
3662 SOP_OUT;
3663 continue;
3664 break;
3665
3666 case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC);
3667 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3668 si = repeat_stk[mem];
3669 stkp = STACK_AT(si);
3670
3671 repeat_inc:
3672 stkp->u.repeat.count++;
3673 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3674 /* end of repeat. Nothing to do. */
3675 }
3676 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3677 STACK_PUSH_ALT(p, s, sprev);
3678 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3679 }
3680 else {
3681 p = stkp->u.repeat.pcode;
3682 }
3683 STACK_PUSH_REPEAT_INC(si);
3684 SOP_OUT;
3685 CHECK_INTERRUPT_IN_MATCH;
3686 continue;
3687 break;
3688
3689 case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG);
3690 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3691 STACK_GET_REPEAT(mem, stkp);
3692 si = GET_STACK_INDEX(stkp);
3693 goto repeat_inc;
3694 break;
3695
3696 case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG);
3697 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3698 si = repeat_stk[mem];
3699 stkp = STACK_AT(si);
3700
3701 repeat_inc_ng:
3702 stkp->u.repeat.count++;
3703 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3704 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3705 UChar* pcode = stkp->u.repeat.pcode;
3706
3707 STACK_PUSH_REPEAT_INC(si);
3708 STACK_PUSH_ALT(pcode, s, sprev);
3709 }
3710 else {
3711 p = stkp->u.repeat.pcode;
3712 STACK_PUSH_REPEAT_INC(si);
3713 }
3714 }
3715 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3716 STACK_PUSH_REPEAT_INC(si);
3717 }
3718 SOP_OUT;
3719 CHECK_INTERRUPT_IN_MATCH;
3720 continue;
3721 break;
3722
3723 case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG);
3724 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3725 STACK_GET_REPEAT(mem, stkp);
3726 si = GET_STACK_INDEX(stkp);
3727 goto repeat_inc_ng;
3728 break;
3729
3730 case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START);
3731 STACK_PUSH_POS(s, sprev);
3732 SOP_OUT;
3733 continue;
3734 break;
3735
3736 case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END);
3737 {
3738 STACK_EXEC_TO_VOID(stkp);
3739 s = stkp->u.state.pstr;
3740 sprev = stkp->u.state.pstr_prev;
3741 }
3742 SOP_OUT;
3743 continue;
3744 break;
3745
3746 case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START);
3747 GET_RELADDR_INC(addr, p);
3748 STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
3749 SOP_OUT;
3750 continue;
3751 break;
3752
3753 case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END);
3754 STACK_POP_TIL_ALT_PREC_READ_NOT;
3755 goto fail;
3756 break;
3757
3758 case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START);
3759 STACK_PUSH_TO_VOID_START;
3760 SOP_OUT;
3761 continue;
3762 break;
3763
3764 case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END);
3765 STACK_EXEC_TO_VOID(stkp);
3766 SOP_OUT;
3767 continue;
3768 break;
3769
3770 case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND);
3771 GET_LENGTH_INC(tlen, p);
3772 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3773 if (IS_NULL(s)) goto fail;
3774 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3775 SOP_OUT;
3776 continue;
3777 break;
3778
3779 case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START);
3780 GET_RELADDR_INC(addr, p);
3781 GET_LENGTH_INC(tlen, p);
3782 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
3783 if (IS_NULL(q)) {
3784 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3785 If you want to change to fail, replace following line. */
3786 p += addr;
3787 /* goto fail; */
3788 }
3789 else {
3790 STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev);
3791 s = q;
3792 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
3793 }
3794 SOP_OUT;
3795 continue;
3796 break;
3797
3798 case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END);
3799 STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
3800 goto fail;
3801 break;
3802
3803 #ifdef USE_CALL
3804 case OP_CALL: SOP_IN(OP_CALL);
3805 GET_ABSADDR_INC(addr, p);
3806 STACK_PUSH_CALL_FRAME(p);
3807 p = reg->p + addr;
3808 SOP_OUT;
3809 continue;
3810 break;
3811
3812 case OP_RETURN: SOP_IN(OP_RETURN);
3813 STACK_RETURN(p);
3814 STACK_PUSH_RETURN;
3815 SOP_OUT;
3816 continue;
3817 break;
3818 #endif
3819
3820 case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL);
3821 {
3822 SaveType type;
3823 GET_SAVE_TYPE_INC(type, p);
3824 GET_MEMNUM_INC(mem, p); /* mem: save id */
3825 switch ((enum SaveType )type) {
3826 case SAVE_KEEP:
3827 STACK_PUSH_SAVE_VAL(mem, type, s);
3828 break;
3829
3830 case SAVE_S:
3831 STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
3832 break;
3833
3834 case SAVE_RIGHT_RANGE:
3835 STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
3836 break;
3837 }
3838 }
3839 SOP_OUT;
3840 continue;
3841 break;
3842
3843 case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR);
3844 {
3845 UpdateVarType type;
3846 enum SaveType save_type;
3847
3848 GET_UPDATE_VAR_TYPE_INC(type, p);
3849 GET_MEMNUM_INC(mem, p); /* mem: save id */
3850 switch ((enum UpdateVarType )type) {
3851 case UPDATE_VAR_KEEP_FROM_STACK_LAST:
3852 STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
3853 break;
3854 case UPDATE_VAR_S_FROM_STACK:
3855 STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
3856 break;
3857 case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
3858 save_type = SAVE_S;
3859 goto get_save_val_type_last_id;
3860 break;
3861 case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
3862 save_type = SAVE_RIGHT_RANGE;
3863 get_save_val_type_last_id:
3864 STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range);
3865 break;
3866 case UPDATE_VAR_RIGHT_RANGE_INIT:
3867 INIT_RIGHT_RANGE;
3868 break;
3869 }
3870 }
3871 SOP_OUT;
3872 continue;
3873 break;
3874
3875 #ifdef USE_CALLOUT
3876 case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS);
3877 of = ONIG_CALLOUT_OF_CONTENTS;
3878 goto callout_common_entry;
3879
3880 SOP_OUT;
3881 continue;
3882 break;
3883
3884 case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME);
3885 {
3886 int call_result;
3887 int name_id;
3888 int num;
3889 int in;
3890 CalloutListEntry* e;
3891 OnigCalloutFunc func;
3892 OnigCalloutArgs args;
3893
3894 of = ONIG_CALLOUT_OF_NAME;
3895 GET_MEMNUM_INC(name_id, p);
3896
3897 callout_common_entry:
3898 GET_MEMNUM_INC(num, p);
3899 e = onig_reg_callout_list_at(reg, num);
3900 in = e->in;
3901 if (of == ONIG_CALLOUT_OF_NAME) {
3902 func = onig_get_callout_start_func(reg, num);
3903 }
3904 else {
3905 name_id = ONIG_NON_NAME_ID;
3906 func = msa->mp->progress_callout_of_contents;
3907 }
3908
3909 if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
3910 CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
3911 num, msa->mp->callout_user_data, args, call_result);
3912 switch (call_result) {
3913 case ONIG_CALLOUT_FAIL:
3914 goto fail;
3915 break;
3916 case ONIG_CALLOUT_SUCCESS:
3917 goto retraction_callout2;
3918 break;
3919 default: /* error code */
3920 if (call_result > 0) {
3921 call_result = ONIGERR_INVALID_ARGUMENT;
3922 }
3923 best_len = call_result;
3924 goto finish;
3925 break;
3926 }
3927 }
3928 else {
3929 retraction_callout2:
3930 if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
3931 if (of == ONIG_CALLOUT_OF_NAME) {
3932 if (IS_NOT_NULL(func)) {
3933 STACK_PUSH_CALLOUT_NAME(name_id, num, func);
3934 }
3935 }
3936 else {
3937 func = msa->mp->retraction_callout_of_contents;
3938 if (IS_NOT_NULL(func)) {
3939 STACK_PUSH_CALLOUT_CONTENTS(num, func);
3940 }
3941 }
3942 }
3943 }
3944 }
3945 SOP_OUT;
3946 continue;
3947 break;
3948 #endif
3949
3950 case OP_FINISH:
3951 goto finish;
3952 break;
3953
3954 fail:
3955 SOP_OUT;
3956 /* fall */
3957 case OP_FAIL: SOP_IN(OP_FAIL);
3958 STACK_POP;
3959 p = stk->u.state.pcode;
3960 s = stk->u.state.pstr;
3961 sprev = stk->u.state.pstr_prev;
3962 CHECK_RETRY_LIMIT_IN_MATCH;
3963 SOP_OUT;
3964 continue;
3965 break;
3966
3967 default:
3968 goto bytecode_error;
3969
3970 } /* end of switch */
3971 sprev = sbegin;
3972 } /* end of while(1) */
3973
3974 finish:
3975 STACK_SAVE;
3976 return best_len;
3977
3978 #ifdef ONIG_DEBUG
3979 stack_error:
3980 STACK_SAVE;
3981 return ONIGERR_STACK_BUG;
3982 #endif
3983
3984 bytecode_error:
3985 STACK_SAVE;
3986 return ONIGERR_UNDEFINED_BYTECODE;
3987
3988 unexpected_bytecode_error:
3989 STACK_SAVE;
3990 return ONIGERR_UNEXPECTED_BYTECODE;
3991
3992 #ifdef USE_RETRY_LIMIT_IN_MATCH
3993 retry_limit_in_match_over:
3994 STACK_SAVE;
3995 return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
3996 #endif
3997 }
3998
3999
4000 static UChar*
4001 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4002 const UChar* text, const UChar* text_end, UChar* text_range)
4003 {
4004 UChar *t, *p, *s, *end;
4005
4006 end = (UChar* )text_end;
4007 end -= target_end - target - 1;
4008 if (end > text_range)
4009 end = text_range;
4010
4011 s = (UChar* )text;
4012
4013 while (s < end) {
4014 if (*s == *target) {
4015 p = s + 1;
4016 t = target + 1;
4017 while (t < target_end) {
4018 if (*t != *p++)
4019 break;
4020 t++;
4021 }
4022 if (t == target_end)
4023 return s;
4024 }
4025 s += enclen(enc, s);
4026 }
4027
4028 return (UChar* )NULL;
4029 }
4030
4031 static int
4032 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4033 const UChar* t, const UChar* tend,
4034 const UChar* p, const UChar* end)
4035 {
4036 int lowlen;
4037 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4038
4039 while (t < tend) {
4040 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4041 q = lowbuf;
4042 while (lowlen > 0) {
4043 if (*t++ != *q++) return 0;
4044 lowlen--;
4045 }
4046 }
4047
4048 return 1;
4049 }
4050
4051 static UChar*
4052 slow_search_ic(OnigEncoding enc, int case_fold_flag,
4053 UChar* target, UChar* target_end,
4054 const UChar* text, const UChar* text_end, UChar* text_range)
4055 {
4056 UChar *s, *end;
4057
4058 end = (UChar* )text_end;
4059 end -= target_end - target - 1;
4060 if (end > text_range)
4061 end = text_range;
4062
4063 s = (UChar* )text;
4064
4065 while (s < end) {
4066 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4067 s, text_end))
4068 return s;
4069
4070 s += enclen(enc, s);
4071 }
4072
4073 return (UChar* )NULL;
4074 }
4075
4076 static UChar*
4077 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4078 const UChar* text, const UChar* adjust_text,
4079 const UChar* text_end, const UChar* text_start)
4080 {
4081 UChar *t, *p, *s;
4082
4083 s = (UChar* )text_end;
4084 s -= (target_end - target);
4085 if (s > text_start)
4086 s = (UChar* )text_start;
4087 else
4088 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4089
4090 while (s >= text) {
4091 //if text is not null,the logic is correct.
4092 //this function is only invoked by backward_search_range,parameter text come
4093 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4094 //so the check is just for passing static analysis.
4095 if(IS_NULL(s))break;
4096 if (*s == *target) {
4097 p = s + 1;
4098 t = target + 1;
4099 while (t < target_end) {
4100 if (*t != *p++)
4101 break;
4102 t++;
4103 }
4104 if (t == target_end)
4105 return s;
4106 }
4107 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4108 }
4109
4110 return (UChar* )NULL;
4111 }
4112
4113 static UChar*
4114 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4115 UChar* target, UChar* target_end,
4116 const UChar* text, const UChar* adjust_text,
4117 const UChar* text_end, const UChar* text_start)
4118 {
4119 UChar *s;
4120
4121 s = (UChar* )text_end;
4122 s -= (target_end - target);
4123 if (s > text_start)
4124 s = (UChar* )text_start;
4125 else
4126 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
4127
4128 while (s >= text) {
4129 if (str_lower_case_match(enc, case_fold_flag,
4130 target, target_end, s, text_end))
4131 return s;
4132
4133 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
4134 }
4135
4136 return (UChar* )NULL;
4137 }
4138
4139 static UChar*
4140 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4141 const UChar* text, const UChar* text_end,
4142 const UChar* text_range)
4143 {
4144 const UChar *s, *se, *t, *p, *end;
4145 const UChar *tail;
4146 int skip, tlen1;
4147
4148 #ifdef ONIG_DEBUG_SEARCH
4149 fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n",
4150 text, text_end, text_range);
4151 #endif
4152
4153 tail = target_end - 1;
4154 tlen1 = (int )(tail - target);
4155 end = text_range;
4156 if (end + tlen1 > text_end)
4157 end = text_end - tlen1;
4158
4159 s = text;
4160
4161 if (IS_NULL(reg->int_map)) {
4162 while (s < end) {
4163 p = se = s + tlen1;
4164 t = tail;
4165 while (*p == *t) {
4166 if (t == target) return (UChar* )s;
4167 p--; t--;
4168 }
4169 skip = reg->map[*se];
4170 t = s;
4171 do {
4172 s += enclen(reg->enc, s);
4173 } while ((s - t) < skip && s < end);
4174 }
4175 }
4176 else {
4177 while (s < end) {
4178 p = se = s + tlen1;
4179 t = tail;
4180 while (*p == *t) {
4181 if (t == target) return (UChar* )s;
4182 p--; t--;
4183 }
4184 skip = reg->int_map[*se];
4185 t = s;
4186 do {
4187 s += enclen(reg->enc, s);
4188 } while ((s - t) < skip && s < end);
4189 }
4190 }
4191
4192 return (UChar* )NULL;
4193 }
4194
4195 static UChar*
4196 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4197 const UChar* text, const UChar* text_end, const UChar* text_range)
4198 {
4199 const UChar *s, *t, *p, *end;
4200 const UChar *tail;
4201
4202 end = text_range + (target_end - target) - 1;
4203 if (end > text_end)
4204 end = text_end;
4205
4206 tail = target_end - 1;
4207 s = text + (target_end - target) - 1;
4208 if (IS_NULL(reg->int_map)) {
4209 while (s < end) {
4210 p = s;
4211 t = tail;
4212 while (*p == *t) {
4213 if (t == target) return (UChar* )p;
4214 p--; t--;
4215 }
4216 s += reg->map[*s];
4217 }
4218 }
4219 else { /* see int_map[] */
4220 while (s < end) {
4221 p = s;
4222 t = tail;
4223 while (*p == *t) {
4224 if (t == target) return (UChar* )p;
4225 p--; t--;
4226 }
4227 s += reg->int_map[*s];
4228 }
4229 }
4230 return (UChar* )NULL;
4231 }
4232
4233 #ifdef USE_INT_MAP_BACKWARD
4234 static int
4235 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip)
4236 {
4237 int i, len;
4238
4239 if (IS_NULL(*skip)) {
4240 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4241 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4242 }
4243
4244 len = end - s;
4245 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4246 (*skip)[i] = len;
4247
4248 for (i = len - 1; i > 0; i--)
4249 (*skip)[s[i]] = i;
4250
4251 return 0;
4252 }
4253
4254 static UChar*
4255 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4256 const UChar* text, const UChar* adjust_text,
4257 const UChar* text_end, const UChar* text_start)
4258 {
4259 const UChar *s, *t, *p;
4260
4261 s = text_end - (target_end - target);
4262 if (text_start < s)
4263 s = text_start;
4264 else
4265 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
4266
4267 while (s >= text) {
4268 p = s;
4269 t = target;
4270 while (t < target_end && *p == *t) {
4271 p++; t++;
4272 }
4273 if (t == target_end)
4274 return (UChar* )s;
4275
4276 s -= reg->int_map_backward[*s];
4277 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
4278 }
4279
4280 return (UChar* )NULL;
4281 }
4282 #endif
4283
4284 static UChar*
4285 map_search(OnigEncoding enc, UChar map[],
4286 const UChar* text, const UChar* text_range)
4287 {
4288 const UChar *s = text;
4289
4290 while (s < text_range) {
4291 if (map[*s]) return (UChar* )s;
4292
4293 s += enclen(enc, s);
4294 }
4295 return (UChar* )NULL;
4296 }
4297
4298 static UChar*
4299 map_search_backward(OnigEncoding enc, UChar map[],
4300 const UChar* text, const UChar* adjust_text,
4301 const UChar* text_start)
4302 {
4303 const UChar *s = text_start;
4304
4305 while (s >= text) {
4306 //if text is not null,the logic is correct.
4307 //this function is only invoked by backward_search_range,parameter text come
4308 //from range, which is checked by "if (range == 0) goto fail" in line 4512
4309 //so the check is just for passing static analysis.
4310 if(IS_NULL(s))break;
4311 if (map[*s]) return (UChar* )s;
4312
4313 s = onigenc_get_prev_char_head(enc, adjust_text, s);
4314 }
4315 return (UChar* )NULL;
4316 }
4317 extern int
4318 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
4319 OnigRegion* region, OnigOptionType option)
4320 {
4321 int r;
4322 OnigMatchParam mp;
4323
4324 onig_initialize_match_param(&mp);
4325 r = onig_match_with_param(reg, str, end, at, region, option, &mp);
4326 onig_free_match_param_content(&mp);
4327 return r;
4328 }
4329
4330 extern int
4331 onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
4332 const UChar* at, OnigRegion* region, OnigOptionType option,
4333 OnigMatchParam* mp)
4334 {
4335 int r;
4336 UChar *prev;
4337 MatchArg msa;
4338
4339 ADJUST_MATCH_PARAM(reg, mp);
4340 MATCH_ARG_INIT(msa, reg, option, region, at, mp);
4341 if (region
4342 #ifdef USE_POSIX_API_REGION_OPTION
4343 && !IS_POSIX_REGION(option)
4344 #endif
4345 ) {
4346 r = onig_region_resize_clear(region, reg->num_mem + 1);
4347 }
4348 else
4349 r = 0;
4350
4351 if (r == 0) {
4352 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4353 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
4354 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4355 goto end;
4356 }
4357 }
4358
4359 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
4360 r = match_at(reg, str, end, end, at, prev, &msa);
4361 }
4362
4363 end:
4364 MATCH_ARG_FREE(msa);
4365 return r;
4366 }
4367
4368 static int
4369 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4370 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4371 {
4372 UChar *p, *pprev = (UChar* )NULL;
4373
4374 #ifdef ONIG_DEBUG_SEARCH
4375 fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
4376 str, end, s, range);
4377 #endif
4378
4379 p = s;
4380 if (reg->dmin > 0) {
4381 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4382 p += reg->dmin;
4383 }
4384 else {
4385 UChar *q = p + reg->dmin;
4386
4387 if (q >= end) return 0; /* fail */
4388 while (p < q) p += enclen(reg->enc, p);
4389 }
4390 }
4391
4392 retry:
4393 switch (reg->optimize) {
4394 case OPTIMIZE_EXACT:
4395 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4396 break;
4397 case OPTIMIZE_EXACT_IC:
4398 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4399 reg->exact, reg->exact_end, p, end, range);
4400 break;
4401
4402 case OPTIMIZE_EXACT_BM:
4403 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4404 break;
4405
4406 case OPTIMIZE_EXACT_BM_NO_REV:
4407 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4408 break;
4409
4410 case OPTIMIZE_MAP:
4411 p = map_search(reg->enc, reg->map, p, range);
4412 break;
4413 }
4414
4415 if (p && p < range) {
4416 if (p - reg->dmin < s) {
4417 retry_gate:
4418 pprev = p;
4419 p += enclen(reg->enc, p);
4420 goto retry;
4421 }
4422
4423 if (reg->sub_anchor) {
4424 UChar* prev;
4425
4426 switch (reg->sub_anchor) {
4427 case ANCHOR_BEGIN_LINE:
4428 if (!ON_STR_BEGIN(p)) {
4429 prev = onigenc_get_prev_char_head(reg->enc,
4430 (pprev ? pprev : str), p);
4431 if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4432 goto retry_gate;
4433 }
4434 break;
4435
4436 case ANCHOR_END_LINE:
4437 if (ON_STR_END(p)) {
4438 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4439 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4440 (pprev ? pprev : str), p);
4441 if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
4442 goto retry_gate;
4443 #endif
4444 }
4445 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
4446 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4447 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
4448 #endif
4449 )
4450 goto retry_gate;
4451 break;
4452 }
4453 }
4454
4455 if (reg->dmax == 0) {
4456 *low = p;
4457 if (low_prev) {
4458 if (*low > s)
4459 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
4460 else
4461 *low_prev = onigenc_get_prev_char_head(reg->enc,
4462 (pprev ? pprev : str), p);
4463 }
4464 }
4465 else {
4466 if (reg->dmax != INFINITE_LEN) {
4467 if (p - str < reg->dmax) {
4468 *low = (UChar* )str;
4469 if (low_prev)
4470 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
4471 }
4472 else {
4473 *low = p - reg->dmax;
4474 if (*low > s) {
4475 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4476 *low, (const UChar** )low_prev);
4477 if (low_prev && IS_NULL(*low_prev))
4478 *low_prev = onigenc_get_prev_char_head(reg->enc,
4479 (pprev ? pprev : s), *low);
4480 }
4481 else {
4482 if (low_prev)
4483 *low_prev = onigenc_get_prev_char_head(reg->enc,
4484 (pprev ? pprev : str), *low);
4485 }
4486 }
4487 }
4488 }
4489 /* no needs to adjust *high, *high is used as range check only */
4490 *high = p - reg->dmin;
4491
4492 #ifdef ONIG_DEBUG_SEARCH
4493 fprintf(stderr,
4494 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
4495 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
4496 #endif
4497 return 1; /* success */
4498 }
4499
4500 return 0; /* fail */
4501 }
4502
4503
4504 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4505
4506 static int
4507 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4508 UChar* s, const UChar* range, UChar* adjrange,
4509 UChar** low, UChar** high)
4510 {
4511 UChar *p;
4512 if (range == 0) goto fail;
4513 range += reg->dmin;
4514 p = s;
4515
4516 retry:
4517 switch (reg->optimize) {
4518 case OPTIMIZE_EXACT:
4519 exact_method:
4520 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4521 range, adjrange, end, p);
4522 break;
4523
4524 case OPTIMIZE_EXACT_IC:
4525 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4526 reg->exact, reg->exact_end,
4527 range, adjrange, end, p);
4528 break;
4529
4530 case OPTIMIZE_EXACT_BM:
4531 case OPTIMIZE_EXACT_BM_NO_REV:
4532 #ifdef USE_INT_MAP_BACKWARD
4533 if (IS_NULL(reg->int_map_backward)) {
4534 int r;
4535
4536 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4537 goto exact_method;
4538
4539 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4540 &(reg->int_map_backward));
4541 if (r != 0) return r;
4542 }
4543 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4544 end, p);
4545 #else
4546 goto exact_method;
4547 #endif
4548 break;
4549
4550 case OPTIMIZE_MAP:
4551 p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
4552 break;
4553 }
4554
4555 if (p) {
4556 if (reg->sub_anchor) {
4557 UChar* prev;
4558
4559 switch (reg->sub_anchor) {
4560 case ANCHOR_BEGIN_LINE:
4561 if (!ON_STR_BEGIN(p)) {
4562 prev = onigenc_get_prev_char_head(reg->enc, str, p);
4563 if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
4564 p = prev;
4565 goto retry;
4566 }
4567 }
4568 break;
4569
4570 case ANCHOR_END_LINE:
4571 if (ON_STR_END(p)) {
4572 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4573 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4574 if (IS_NULL(prev)) goto fail;
4575 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
4576 p = prev;
4577 goto retry;
4578 }
4579 #endif
4580 }
4581 else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
4582 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4583 && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
4584 #endif
4585 ) {
4586 p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4587 if (IS_NULL(p)) goto fail;
4588 goto retry;
4589 }
4590 break;
4591 }
4592 }
4593
4594 /* no needs to adjust *high, *high is used as range check only */
4595 if (reg->dmax != INFINITE_LEN) {
4596 *low = p - reg->dmax;
4597 *high = p - reg->dmin;
4598 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
4599 }
4600
4601 #ifdef ONIG_DEBUG_SEARCH
4602 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4603 (int )(*low - str), (int )(*high - str));
4604 #endif
4605 return 1; /* success */
4606 }
4607
4608 fail:
4609 #ifdef ONIG_DEBUG_SEARCH
4610 fprintf(stderr, "backward_search_range: fail.\n");
4611 #endif
4612 return 0; /* fail */
4613 }
4614
4615
4616 extern int
4617 onig_search(regex_t* reg, const UChar* str, const UChar* end,
4618 const UChar* start, const UChar* range, OnigRegion* region,
4619 OnigOptionType option)
4620 {
4621 int r;
4622 OnigMatchParam mp;
4623
4624 onig_initialize_match_param(&mp);
4625 r = onig_search_with_param(reg, str, end, start, range, region, option, &mp);
4626 onig_free_match_param_content(&mp);
4627 return r;
4628
4629 }
4630
4631 extern int
4632 onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
4633 const UChar* start, const UChar* range, OnigRegion* region,
4634 OnigOptionType option, OnigMatchParam* mp)
4635 {
4636 int r;
4637 UChar *s, *prev;
4638 MatchArg msa;
4639 const UChar *orig_start = start;
4640 const UChar *orig_range = range;
4641
4642 #ifdef ONIG_DEBUG_SEARCH
4643 fprintf(stderr,
4644 "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
4645 str, (int )(end - str), (int )(start - str), (int )(range - str));
4646 #endif
4647
4648 ADJUST_MATCH_PARAM(reg, mp);
4649
4650 if (region
4651 #ifdef USE_POSIX_API_REGION_OPTION
4652 && !IS_POSIX_REGION(option)
4653 #endif
4654 ) {
4655 r = onig_region_resize_clear(region, reg->num_mem + 1);
4656 if (r != 0) goto finish_no_msa;
4657 }
4658
4659 if (start > end || start < str) goto mismatch_no_msa;
4660
4661 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
4662 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
4663 r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
4664 goto finish_no_msa;
4665 }
4666 }
4667
4668
4669 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4670 #define MATCH_AND_RETURN_CHECK(upper_range) \
4671 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4672 if (r != ONIG_MISMATCH) {\
4673 if (r >= 0) {\
4674 if (! IS_FIND_LONGEST(reg->options)) {\
4675 goto match;\
4676 }\
4677 }\
4678 else goto finish; /* error */ \
4679 }
4680 #else
4681 #define MATCH_AND_RETURN_CHECK(upper_range) \
4682 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4683 if (r != ONIG_MISMATCH) {\
4684 if (r >= 0) {\
4685 goto match;\
4686 }\
4687 else goto finish; /* error */ \
4688 }
4689 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4690
4691
4692 /* anchor optimize: resume search range */
4693 if (reg->anchor != 0 && str < end) {
4694 UChar *min_semi_end, *max_semi_end;
4695
4696 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4697 /* search start-position only */
4698 begin_position:
4699 if (range > start)
4700 range = start + 1;
4701 else
4702 range = start;
4703 }
4704 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4705 /* search str-position only */
4706 if (range > start) {
4707 if (start != str) goto mismatch_no_msa;
4708 range = str + 1;
4709 }
4710 else {
4711 if (range <= str) {
4712 start = str;
4713 range = str;
4714 }
4715 else
4716 goto mismatch_no_msa;
4717 }
4718 }
4719 else if (reg->anchor & ANCHOR_END_BUF) {
4720 min_semi_end = max_semi_end = (UChar* )end;
4721
4722 end_buf:
4723 if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
4724 goto mismatch_no_msa;
4725
4726 if (range > start) {
4727 if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
4728 start = min_semi_end - reg->anchor_dmax;
4729 if (start < end)
4730 start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
4731 }
4732 if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4733 range = max_semi_end - reg->anchor_dmin + 1;
4734 }
4735
4736 if (start > range) goto mismatch_no_msa;
4737 /* If start == range, match with empty at end.
4738 Backward search is used. */
4739 }
4740 else {
4741 if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
4742 range = min_semi_end - reg->anchor_dmax;
4743 }
4744 if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
4745 start = max_semi_end - reg->anchor_dmin;
4746 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
4747 }
4748 if (range > start) goto mismatch_no_msa;
4749 }
4750 }
4751 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4752
4753 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
4754
4755 max_semi_end = (UChar* )end;
4756 // only when str > end, pre_end will be null
4757 // line 4659 "if (start > end || start < str) goto mismatch_no_msa"
4758 // will guarantee str alwayls less than end
4759 // so pre_end won't be null,this check is just for passing staic analysis
4760 if (IS_NOT_NULL(pre_end) && ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4761 min_semi_end = pre_end;
4762
4763 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4764 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
4765 if (IS_NOT_NULL(pre_end) &&
4766 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4767 min_semi_end = pre_end;
4768 }
4769 #endif
4770 if (min_semi_end > str && start <= min_semi_end) {
4771 goto end_buf;
4772 }
4773 }
4774 else {
4775 min_semi_end = (UChar* )end;
4776 goto end_buf;
4777 }
4778 }
4779 else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) {
4780 goto begin_position;
4781 }
4782 }
4783 else if (str == end) { /* empty string */
4784 static const UChar* address_for_empty_string = (UChar* )"";
4785
4786 #ifdef ONIG_DEBUG_SEARCH
4787 fprintf(stderr, "onig_search: empty string.\n");
4788 #endif
4789
4790 if (reg->threshold_len == 0) {
4791 start = end = str = address_for_empty_string;
4792 s = (UChar* )start;
4793 prev = (UChar* )NULL;
4794
4795 MATCH_ARG_INIT(msa, reg, option, region, start, mp);
4796 MATCH_AND_RETURN_CHECK(end);
4797 goto mismatch;
4798 }
4799 goto mismatch_no_msa;
4800 }
4801
4802 #ifdef ONIG_DEBUG_SEARCH
4803 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4804 (int )(end - str), (int )(start - str), (int )(range - str));
4805 #endif
4806
4807 MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
4808
4809 s = (UChar* )start;
4810 if (range > start) { /* forward search */
4811 if (s > str)
4812 prev = onigenc_get_prev_char_head(reg->enc, str, s);
4813 else
4814 prev = (UChar* )NULL;
4815
4816 if (reg->optimize != OPTIMIZE_NONE) {
4817 UChar *sch_range, *low, *high, *low_prev;
4818
4819 sch_range = (UChar* )range;
4820 if (reg->dmax != 0) {
4821 if (reg->dmax == INFINITE_LEN)
4822 sch_range = (UChar* )end;
4823 else {
4824 sch_range += reg->dmax;
4825 if (sch_range > end) sch_range = (UChar* )end;
4826 }
4827 }
4828
4829 if ((end - start) < reg->threshold_len)
4830 goto mismatch;
4831
4832 if (reg->dmax != INFINITE_LEN) {
4833 do {
4834 if (! forward_search_range(reg, str, end, s, sch_range,
4835 &low, &high, &low_prev)) goto mismatch;
4836 if (s < low) {
4837 s = low;
4838 prev = low_prev;
4839 }
4840 while (s <= high) {
4841 MATCH_AND_RETURN_CHECK(orig_range);
4842 prev = s;
4843 s += enclen(reg->enc, s);
4844 }
4845 } while (s < range);
4846 goto mismatch;
4847 }
4848 else { /* check only. */
4849 if (! forward_search_range(reg, str, end, s, sch_range,
4850 &low, &high, (UChar** )NULL)) goto mismatch;
4851
4852 if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) {
4853 do {
4854 MATCH_AND_RETURN_CHECK(orig_range);
4855 prev = s;
4856 s += enclen(reg->enc, s);
4857
4858 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4859 while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
4860 prev = s;
4861 s += enclen(reg->enc, s);
4862 }
4863 }
4864 } while (s < range);
4865 goto mismatch;
4866 }
4867 }
4868 }
4869
4870 do {
4871 MATCH_AND_RETURN_CHECK(orig_range);
4872 prev = s;
4873 s += enclen(reg->enc, s);
4874 } while (s < range);
4875
4876 if (s == range) { /* because empty match with /$/. */
4877 MATCH_AND_RETURN_CHECK(orig_range);
4878 }
4879 }
4880 else { /* backward search */
4881 if (orig_start < end)
4882 orig_start += enclen(reg->enc, orig_start); /* is upper range */
4883
4884 if (reg->optimize != OPTIMIZE_NONE) {
4885 UChar *low, *high, *adjrange, *sch_start;
4886
4887 if (range < end)
4888 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
4889 else
4890 adjrange = (UChar* )end;
4891
4892 if (reg->dmax != INFINITE_LEN &&
4893 (end - range) >= reg->threshold_len) {
4894 do {
4895 sch_start = s + reg->dmax;
4896 if (sch_start > end) sch_start = (UChar* )end;
4897 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4898 &low, &high) <= 0)
4899 goto mismatch;
4900
4901 if (s > high)
4902 s = high;
4903
4904 while (s >= low) {
4905 prev = onigenc_get_prev_char_head(reg->enc, str, s);
4906 MATCH_AND_RETURN_CHECK(orig_start);
4907 s = prev;
4908 }
4909 // if range is not null,the check is not necessary.
4910 // the range is actually the pointer of the end of the matched string
4911 // or assigned by "range = str" in line 4708. In RegularExpressionMatch
4912 // protocol, the matched string is the parameter String. And str in
4913 // line 4708 is the String,too. and the range is calculated from
4914 // "Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start)" in
4915 // line 146 in RegularExpressionDxe.c. RegularExpressionMatch ensure
4916 // the String is not null,So in both situation, the range can not be NULL.
4917 // This check is just for passing static analysis.
4918 if(IS_NULL(s))break;
4919 } while (s >= range);
4920 goto mismatch;
4921 }
4922 else { /* check only. */
4923 if ((end - range) < reg->threshold_len) goto mismatch;
4924
4925 sch_start = s;
4926 if (reg->dmax != 0) {
4927 if (reg->dmax == INFINITE_LEN)
4928 sch_start = (UChar* )end;
4929 else {
4930 sch_start += reg->dmax;
4931 if (sch_start > end) sch_start = (UChar* )end;
4932 else
4933 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4934 start, sch_start);
4935 }
4936 }
4937 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4938 &low, &high) <= 0) goto mismatch;
4939 }
4940 }
4941
4942 do {
4943 prev = onigenc_get_prev_char_head(reg->enc, str, s);
4944 MATCH_AND_RETURN_CHECK(orig_start);
4945 s = prev;
4946 } while (s >= range);
4947 }
4948
4949 mismatch:
4950 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4951 if (IS_FIND_LONGEST(reg->options)) {
4952 if (msa.best_len >= 0) {
4953 s = msa.best_s;
4954 goto match;
4955 }
4956 }
4957 #endif
4958 r = ONIG_MISMATCH;
4959
4960 finish:
4961 MATCH_ARG_FREE(msa);
4962
4963 /* If result is mismatch and no FIND_NOT_EMPTY option,
4964 then the region is not set in match_at(). */
4965 if (IS_FIND_NOT_EMPTY(reg->options) && region
4966 #ifdef USE_POSIX_API_REGION_OPTION
4967 && !IS_POSIX_REGION(option)
4968 #endif
4969 ) {
4970 onig_region_clear(region);
4971 }
4972
4973 #ifdef ONIG_DEBUG
4974 if (r != ONIG_MISMATCH)
4975 fprintf(stderr, "onig_search: error %d\n", r);
4976 #endif
4977 return r;
4978
4979 mismatch_no_msa:
4980 r = ONIG_MISMATCH;
4981 finish_no_msa:
4982 #ifdef ONIG_DEBUG
4983 if (r != ONIG_MISMATCH)
4984 fprintf(stderr, "onig_search: error %d\n", r);
4985 #endif
4986 return r;
4987
4988 match:
4989 MATCH_ARG_FREE(msa);
4990 return (int )(s - str);
4991 }
4992
4993 extern int
4994 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
4995 OnigRegion* region, OnigOptionType option,
4996 int (*scan_callback)(int, int, OnigRegion*, void*),
4997 void* callback_arg)
4998 {
4999 int r;
5000 int n;
5001 int rs;
5002 const UChar* start;
5003
5004 if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
5005 if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
5006 return ONIGERR_INVALID_WIDE_CHAR_VALUE;
5007
5008 ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
5009 }
5010
5011 n = 0;
5012 start = str;
5013 while (1) {
5014 r = onig_search(reg, str, end, start, end, region, option);
5015 if (r >= 0) {
5016 rs = scan_callback(n, r, region, callback_arg);
5017 n++;
5018 if (rs != 0)
5019 return rs;
5020
5021 if (region->end[0] == start - str) {
5022 if (start >= end) break;
5023 start += enclen(reg->enc, start);
5024 }
5025 else
5026 start = str + region->end[0];
5027
5028 if (start > end)
5029 break;
5030 }
5031 else if (r == ONIG_MISMATCH) {
5032 break;
5033 }
5034 else { /* error */
5035 return r;
5036 }
5037 }
5038
5039 return n;
5040 }
5041
5042 extern OnigEncoding
5043 onig_get_encoding(regex_t* reg)
5044 {
5045 return reg->enc;
5046 }
5047
5048 extern OnigOptionType
5049 onig_get_options(regex_t* reg)
5050 {
5051 return reg->options;
5052 }
5053
5054 extern OnigCaseFoldType
5055 onig_get_case_fold_flag(regex_t* reg)
5056 {
5057 return reg->case_fold_flag;
5058 }
5059
5060 extern OnigSyntaxType*
5061 onig_get_syntax(regex_t* reg)
5062 {
5063 return reg->syntax;
5064 }
5065
5066 extern int
5067 onig_number_of_captures(regex_t* reg)
5068 {
5069 return reg->num_mem;
5070 }
5071
5072 extern int
5073 onig_number_of_capture_histories(regex_t* reg)
5074 {
5075 #ifdef USE_CAPTURE_HISTORY
5076 int i, n;
5077
5078 n = 0;
5079 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5080 if (MEM_STATUS_AT(reg->capture_history, i) != 0)
5081 n++;
5082 }
5083 return n;
5084 #else
5085 return 0;
5086 #endif
5087 }
5088
5089 extern void
5090 onig_copy_encoding(OnigEncoding to, OnigEncoding from)
5091 {
5092 *to = *from;
5093 }
5094
5095
5096 /* for callout functions */
5097
5098 #ifdef USE_CALLOUT
5099
5100 extern OnigCalloutFunc
5101 onig_get_progress_callout(void)
5102 {
5103 return DefaultProgressCallout;
5104 }
5105
5106 extern int
5107 onig_set_progress_callout(OnigCalloutFunc f)
5108 {
5109 DefaultProgressCallout = f;
5110 return ONIG_NORMAL;
5111 }
5112
5113 extern OnigCalloutFunc
5114 onig_get_retraction_callout(void)
5115 {
5116 return DefaultRetractionCallout;
5117 }
5118
5119 extern int
5120 onig_set_retraction_callout(OnigCalloutFunc f)
5121 {
5122 DefaultRetractionCallout = f;
5123 return ONIG_NORMAL;
5124 }
5125
5126 extern int
5127 onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
5128 {
5129 return args->num;
5130 }
5131
5132 extern OnigCalloutIn
5133 onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
5134 {
5135 return args->in;
5136 }
5137
5138 extern int
5139 onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
5140 {
5141 return args->name_id;
5142 }
5143
5144 extern const UChar*
5145 onig_get_contents_by_callout_args(OnigCalloutArgs* args)
5146 {
5147 int num;
5148 CalloutListEntry* e;
5149
5150 num = args->num;
5151 e = onig_reg_callout_list_at(args->regex, num);
5152 if (IS_NULL(e)) return 0;
5153 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5154 return e->u.content.start;
5155 }
5156
5157 return 0;
5158 }
5159
5160 extern const UChar*
5161 onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
5162 {
5163 int num;
5164 CalloutListEntry* e;
5165
5166 num = args->num;
5167 e = onig_reg_callout_list_at(args->regex, num);
5168 if (IS_NULL(e)) return 0;
5169 if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
5170 return e->u.content.end;
5171 }
5172
5173 return 0;
5174 }
5175
5176 extern int
5177 onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
5178 {
5179 int num;
5180 CalloutListEntry* e;
5181
5182 num = args->num;
5183 e = onig_reg_callout_list_at(args->regex, num);
5184 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5185 if (e->of == ONIG_CALLOUT_OF_NAME) {
5186 return e->u.arg.num;
5187 }
5188
5189 return ONIGERR_INVALID_ARGUMENT;
5190 }
5191
5192 extern int
5193 onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
5194 {
5195 int num;
5196 CalloutListEntry* e;
5197
5198 num = args->num;
5199 e = onig_reg_callout_list_at(args->regex, num);
5200 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5201 if (e->of == ONIG_CALLOUT_OF_NAME) {
5202 return e->u.arg.passed_num;
5203 }
5204
5205 return ONIGERR_INVALID_ARGUMENT;
5206 }
5207
5208 extern int
5209 onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
5210 OnigType* type, OnigValue* val)
5211 {
5212 int num;
5213 CalloutListEntry* e;
5214
5215 num = args->num;
5216 e = onig_reg_callout_list_at(args->regex, num);
5217 if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
5218 if (e->of == ONIG_CALLOUT_OF_NAME) {
5219 if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
5220 if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
5221 return ONIG_NORMAL;
5222 }
5223
5224 return ONIGERR_INVALID_ARGUMENT;
5225 }
5226
5227 extern const UChar*
5228 onig_get_string_by_callout_args(OnigCalloutArgs* args)
5229 {
5230 return args->string;
5231 }
5232
5233 extern const UChar*
5234 onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
5235 {
5236 return args->string_end;
5237 }
5238
5239 extern const UChar*
5240 onig_get_start_by_callout_args(OnigCalloutArgs* args)
5241 {
5242 return args->start;
5243 }
5244
5245 extern const UChar*
5246 onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
5247 {
5248 return args->right_range;
5249 }
5250
5251 extern const UChar*
5252 onig_get_current_by_callout_args(OnigCalloutArgs* args)
5253 {
5254 return args->current;
5255 }
5256
5257 extern OnigRegex
5258 onig_get_regex_by_callout_args(OnigCalloutArgs* args)
5259 {
5260 return args->regex;
5261 }
5262
5263 extern unsigned long
5264 onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
5265 {
5266 return args->retry_in_match_counter;
5267 }
5268
5269
5270 extern int
5271 onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
5272 {
5273 OnigRegex reg;
5274 const UChar* str;
5275 StackType* stk_base;
5276 int i;
5277
5278 i = mem_num;
5279 reg = a->regex;
5280 str = a->string;
5281 stk_base = a->stk_base;
5282
5283 if (i > 0) {
5284 if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
5285 if (MEM_STATUS_AT(reg->bt_mem_start, i))
5286 *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str);
5287 else
5288 *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str);
5289
5290 *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
5291 ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr
5292 : (UChar* )((void* )a->mem_end_stk[i])) - str);
5293 }
5294 else {
5295 *begin = *end = ONIG_REGION_NOTPOS;
5296 }
5297 }
5298 else if (i == 0) {
5299 #if 0
5300 *begin = a->start - str;
5301 *end = a->current - str;
5302 #else
5303 return ONIGERR_INVALID_ARGUMENT;
5304 #endif
5305 }
5306 else
5307 return ONIGERR_INVALID_ARGUMENT;
5308
5309 return ONIG_NORMAL;
5310 }
5311
5312 extern int
5313 onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
5314 {
5315 int n;
5316
5317 n = (int )(a->stk - a->stk_base);
5318
5319 if (used_num != 0)
5320 *used_num = n;
5321
5322 if (used_bytes != 0)
5323 *used_bytes = n * sizeof(StackType);
5324
5325 return ONIG_NORMAL;
5326 }
5327
5328
5329 /* builtin callout functions */
5330
5331 extern int
5332 onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
5333 {
5334 return ONIG_CALLOUT_FAIL;
5335 }
5336
5337 extern int
5338 onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
5339 {
5340 return ONIG_MISMATCH;
5341 }
5342
5343 #if 0
5344 extern int
5345 onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
5346 {
5347 return ONIG_CALLOUT_SUCCESS;
5348 }
5349 #endif
5350
5351 extern int
5352 onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5353 {
5354 int r;
5355 int n;
5356 OnigValue val;
5357
5358 r = onig_get_arg_by_callout_args(args, 0, 0, &val);
5359 if (r != ONIG_NORMAL) return r;
5360
5361 n = (int )val.l;
5362 if (n >= 0) {
5363 n = ONIGERR_INVALID_CALLOUT_BODY;
5364 }
5365
5366 return n;
5367 }
5368
5369 extern int
5370 onig_builtin_count(OnigCalloutArgs* args, void* user_data)
5371 {
5372 (void )onig_check_callout_data_and_clear_old_values(args);
5373
5374 return onig_builtin_total_count(args, user_data);
5375 }
5376
5377 extern int
5378 onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5379 {
5380 int r;
5381 int slot;
5382 OnigType type;
5383 OnigValue val;
5384 OnigValue aval;
5385 OnigCodePoint count_type;
5386
5387 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
5388 if (r != ONIG_NORMAL) return r;
5389
5390 count_type = aval.c;
5391 if (count_type != '>' && count_type != 'X' && count_type != '<')
5392 return ONIGERR_INVALID_CALLOUT_ARG;
5393
5394 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
5395 &type, &val);
5396 if (r < ONIG_NORMAL)
5397 return r;
5398 else if (r > ONIG_NORMAL) {
5399 /* type == void: initial state */
5400 val.l = 0;
5401 }
5402
5403 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
5404 slot = 2;
5405 if (count_type == '<')
5406 val.l++;
5407 else if (count_type == 'X')
5408 val.l--;
5409 }
5410 else {
5411 slot = 1;
5412 if (count_type != '<')
5413 val.l++;
5414 }
5415
5416 r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
5417 if (r != ONIG_NORMAL) return r;
5418
5419 /* slot 1: in progress counter, slot 2: in retraction counter */
5420 r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
5421 &type, &val);
5422 if (r < ONIG_NORMAL)
5423 return r;
5424 else if (r > ONIG_NORMAL) {
5425 val.l = 0;
5426 }
5427
5428 val.l++;
5429 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
5430 if (r != ONIG_NORMAL) return r;
5431
5432 return ONIG_CALLOUT_SUCCESS;
5433 }
5434
5435 extern int
5436 onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5437 {
5438 int r;
5439 int slot;
5440 long max_val;
5441 OnigCodePoint count_type;
5442 OnigType type;
5443 OnigValue val;
5444 OnigValue aval;
5445
5446 (void )onig_check_callout_data_and_clear_old_values(args);
5447
5448 slot = 0;
5449 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
5450 if (r < ONIG_NORMAL)
5451 return r;
5452 else if (r > ONIG_NORMAL) {
5453 /* type == void: initial state */
5454 type = ONIG_TYPE_LONG;
5455 val.l = 0;
5456 }
5457
5458 r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
5459 if (r != ONIG_NORMAL) return r;
5460 if (type == ONIG_TYPE_TAG) {
5461 r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
5462 if (r < ONIG_NORMAL) return r;
5463 else if (r > ONIG_NORMAL)
5464 max_val = 0L;
5465 else
5466 max_val = aval.l;
5467 }
5468 else { /* LONG */
5469 max_val = aval.l;
5470 }
5471
5472 r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
5473 if (r != ONIG_NORMAL) return r;
5474
5475 count_type = aval.c;
5476 if (count_type != '>' && count_type != 'X' && count_type != '<')
5477 return ONIGERR_INVALID_CALLOUT_ARG;
5478
5479 if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
5480 if (count_type == '<') {
5481 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
5482 val.l++;
5483 }
5484 else if (count_type == 'X')
5485 val.l--;
5486 }
5487 else {
5488 if (count_type != '<') {
5489 if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
5490 val.l++;
5491 }
5492 }
5493
5494 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
5495 if (r != ONIG_NORMAL) return r;
5496
5497 return ONIG_CALLOUT_SUCCESS;
5498 }
5499
5500 enum OP_CMP {
5501 OP_EQ,
5502 OP_NE,
5503 OP_LT,
5504 OP_GT,
5505 OP_LE,
5506 OP_GE
5507 };
5508
5509 extern int
5510 onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
5511 {
5512 int r;
5513 int slot;
5514 long lv;
5515 long rv;
5516 OnigType type;
5517 OnigValue val;
5518 regex_t* reg;
5519 enum OP_CMP op;
5520
5521 reg = args->regex;
5522
5523 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
5524 if (r != ONIG_NORMAL) return r;
5525
5526 if (type == ONIG_TYPE_TAG) {
5527 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
5528 if (r < ONIG_NORMAL) return r;
5529 else if (r > ONIG_NORMAL)
5530 lv = 0L;
5531 else
5532 lv = val.l;
5533 }
5534 else { /* ONIG_TYPE_LONG */
5535 lv = val.l;
5536 }
5537
5538 r = onig_get_arg_by_callout_args(args, 2, &type, &val);
5539 if (r != ONIG_NORMAL) return r;
5540
5541 if (type == ONIG_TYPE_TAG) {
5542 r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
5543 if (r < ONIG_NORMAL) return r;
5544 else if (r > ONIG_NORMAL)
5545 rv = 0L;
5546 else
5547 rv = val.l;
5548 }
5549 else { /* ONIG_TYPE_LONG */
5550 rv = val.l;
5551 }
5552
5553 slot = 0;
5554 r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
5555 if (r < ONIG_NORMAL)
5556 return r;
5557 else if (r > ONIG_NORMAL) {
5558 /* type == void: initial state */
5559 OnigCodePoint c1, c2;
5560 UChar* p;
5561
5562 r = onig_get_arg_by_callout_args(args, 1, &type, &val);
5563 if (r != ONIG_NORMAL) return r;
5564
5565 p = val.s.start;
5566 c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
5567 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
5568 if (p < val.s.end) {
5569 c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
5570 p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
5571 if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
5572 }
5573 else
5574 c2 = 0;
5575
5576 switch (c1) {
5577 case '=':
5578 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
5579 op = OP_EQ;
5580 break;
5581 case '!':
5582 if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
5583 op = OP_NE;
5584 break;
5585 case '<':
5586 if (c2 == '=') op = OP_LE;
5587 else if (c2 == 0) op = OP_LT;
5588 else return ONIGERR_INVALID_CALLOUT_ARG;
5589 break;
5590 case '>':
5591 if (c2 == '=') op = OP_GE;
5592 else if (c2 == 0) op = OP_GT;
5593 else return ONIGERR_INVALID_CALLOUT_ARG;
5594 break;
5595 default:
5596 return ONIGERR_INVALID_CALLOUT_ARG;
5597 break;
5598 }
5599 val.l = (long )op;
5600 r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
5601 if (r != ONIG_NORMAL) return r;
5602 }
5603 else {
5604 op = (enum OP_CMP )val.l;
5605 }
5606
5607 switch (op) {
5608 case OP_EQ: r = (lv == rv); break;
5609 case OP_NE: r = (lv != rv); break;
5610 case OP_LT: r = (lv < rv); break;
5611 case OP_GT: r = (lv > rv); break;
5612 case OP_LE: r = (lv <= rv); break;
5613 case OP_GE: r = (lv >= rv); break;
5614 }
5615
5616 return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
5617 }
5618
5619
5620 //#include <stdio.h>
5621
5622 static FILE* OutFp;
5623
5624 /* name start with "onig_" for macros. */
5625 static int
5626 onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
5627 {
5628 int r;
5629 int num;
5630 size_t tag_len;
5631 // const UChar* start;
5632 // const UChar* right;
5633 // const UChar* current;
5634 // const UChar* string;
5635 // const UChar* strend;
5636 const UChar* tag_start;
5637 const UChar* tag_end;
5638 regex_t* reg;
5639 OnigCalloutIn in;
5640 OnigType type;
5641 OnigValue val;
5642 char buf[20];
5643 // FILE* fp;
5644
5645 // fp = OutFp;
5646
5647 r = onig_get_arg_by_callout_args(args, 0, &type, &val);
5648 if (r != ONIG_NORMAL) return r;
5649
5650 in = onig_get_callout_in_by_callout_args(args);
5651 if (in == ONIG_CALLOUT_IN_PROGRESS) {
5652 if (val.c == '<')
5653 return ONIG_CALLOUT_SUCCESS;
5654 }
5655 else {
5656 if (val.c != 'X' && val.c != '<')
5657 return ONIG_CALLOUT_SUCCESS;
5658 }
5659
5660 num = onig_get_callout_num_by_callout_args(args);
5661 // start = onig_get_start_by_callout_args(args);
5662 // right = onig_get_right_range_by_callout_args(args);
5663 // current = onig_get_current_by_callout_args(args);
5664 // string = onig_get_string_by_callout_args(args);
5665 // strend = onig_get_string_end_by_callout_args(args);
5666 reg = onig_get_regex_by_callout_args(args);
5667 tag_start = onig_get_callout_tag_start(reg, num);
5668 tag_end = onig_get_callout_tag_end(reg, num);
5669
5670 if (tag_start == 0)
5671 sprintf_s(buf, sizeof(buf), "#%d", num);
5672 else {
5673 /* CAUTION: tag string is not terminated with NULL. */
5674 int i;
5675
5676 tag_len = tag_end - tag_start;
5677 if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
5678 for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
5679 buf[tag_len] = '\0';
5680 }
5681 /*
5682 fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
5683 buf,
5684 in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
5685 (int )(current - string),
5686 (int )(start - string),
5687 (int )(right - string),
5688 (int )(strend - string));
5689 //fflush(fp);
5690 */
5691 return ONIG_CALLOUT_SUCCESS;
5692 }
5693
5694 extern int
5695 onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
5696 {
5697 int id;
5698 char* name;
5699 OnigEncoding enc;
5700 unsigned int ts[4];
5701 OnigValue opts[4];
5702
5703 if (IS_NOT_NULL(fp))
5704 OutFp = (FILE* )fp;
5705 else
5706 OutFp = stdout;
5707
5708 enc = ONIG_ENCODING_ASCII;
5709
5710 name = "MON";
5711 ts[0] = ONIG_TYPE_CHAR;
5712 opts[0].c = '>';
5713 BC_B_O(name, monitor, 1, ts, 1, opts);
5714
5715 return ONIG_NORMAL;
5716 }
5717
5718 #endif /* USE_CALLOUT */