]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regparse.h"
31
32 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
33
34 #if 0
35 typedef struct {
36 int n;
37 int alloc;
38 int* v;
39 } int_stack;
40
41 static int
42 make_int_stack(int_stack** rs, int init_size)
43 {
44 int_stack* s;
45 int* v;
46
47 *rs = 0;
48
49 s = xmalloc(sizeof(*s));
50 if (IS_NULL(s)) return ONIGERR_MEMORY;
51
52 v = (int* )xmalloc(sizeof(int) * init_size);
53 if (IS_NULL(v)) {
54 xfree(s);
55 return ONIGERR_MEMORY;
56 }
57
58 s->n = 0;
59 s->alloc = init_size;
60 s->v = v;
61
62 *rs = s;
63 return ONIG_NORMAL;
64 }
65
66 static void
67 free_int_stack(int_stack* s)
68 {
69 if (IS_NOT_NULL(s)) {
70 if (IS_NOT_NULL(s->v))
71 xfree(s->v);
72 xfree(s);
73 }
74 }
75
76 static int
77 int_stack_push(int_stack* s, int v)
78 {
79 if (s->n >= s->alloc) {
80 int new_size = s->alloc * 2;
81 int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);
82 if (IS_NULL(nv)) return ONIGERR_MEMORY;
83
84 s->alloc = new_size;
85 s->v = nv;
86 }
87
88 s->v[s->n] = v;
89 s->n++;
90 return ONIG_NORMAL;
91 }
92
93 static int
94 int_stack_pop(int_stack* s)
95 {
96 int v;
97
98 #ifdef ONIG_DEBUG
99 if (s->n <= 0) {
100 fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);
101 return 0;
102 }
103 #endif
104
105 v = s->v[s->n];
106 s->n--;
107 return v;
108 }
109 #endif
110
111 extern OnigCaseFoldType
112 onig_get_default_case_fold_flag(void)
113 {
114 return OnigDefaultCaseFoldFlag;
115 }
116
117 extern int
118 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
119 {
120 OnigDefaultCaseFoldFlag = case_fold_flag;
121 return 0;
122 }
123
124 static int
125 int_multiply_cmp(int x, int y, int v)
126 {
127 if (x == 0 || y == 0) return -1;
128
129 if (x < INT_MAX / y) {
130 int xy = x * y;
131 if (xy > v) return 1;
132 else {
133 if (xy == v) return 0;
134 else return -1;
135 }
136 }
137 else
138 return 1;
139 }
140
141
142 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
143 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
144 #endif
145
146 static void
147 swap_node(Node* a, Node* b)
148 {
149 Node c;
150
151 c = *a; *a = *b; *b = c;
152
153 if (NODE_TYPE(a) == NODE_STRING) {
154 StrNode* sn = STR_(a);
155 if (sn->capa == 0) {
156 int len = (int )(sn->end - sn->s);
157 sn->s = sn->buf;
158 sn->end = sn->s + len;
159 }
160 }
161
162 if (NODE_TYPE(b) == NODE_STRING) {
163 StrNode* sn = STR_(b);
164 if (sn->capa == 0) {
165 int len = (int )(sn->end - sn->s);
166 sn->s = sn->buf;
167 sn->end = sn->s + len;
168 }
169 }
170 }
171
172 static OnigLen
173 distance_add(OnigLen d1, OnigLen d2)
174 {
175 if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
176 return INFINITE_LEN;
177 else {
178 if (d1 <= INFINITE_LEN - d2) return d1 + d2;
179 else return INFINITE_LEN;
180 }
181 }
182
183 static OnigLen
184 distance_multiply(OnigLen d, int m)
185 {
186 if (m == 0) return 0;
187
188 if (d < INFINITE_LEN / m)
189 return d * m;
190 else
191 return INFINITE_LEN;
192 }
193
194 static int
195 bitset_is_empty(BitSetRef bs)
196 {
197 int i;
198
199 for (i = 0; i < (int )BITSET_SIZE; i++) {
200 if (bs[i] != 0) return 0;
201 }
202 return 1;
203 }
204
205 extern int
206 onig_bbuf_init(BBuf* buf, int size)
207 {
208 if (size <= 0) {
209 size = 0;
210 buf->p = NULL;
211 }
212 else {
213 buf->p = (UChar* )xmalloc(size);
214 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
215 }
216
217 buf->alloc = size;
218 buf->used = 0;
219 return 0;
220 }
221
222
223 #ifdef USE_CALL
224
225 static int
226 unset_addr_list_init(UnsetAddrList* list, int size)
227 {
228 UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
229 CHECK_NULL_RETURN_MEMERR(p);
230
231 list->num = 0;
232 list->alloc = size;
233 list->us = p;
234 return 0;
235 }
236
237 static void
238 unset_addr_list_end(UnsetAddrList* list)
239 {
240 if (IS_NOT_NULL(list->us))
241 xfree(list->us);
242 }
243
244 static int
245 unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
246 {
247 UnsetAddr* p;
248 int size;
249
250 if (list->num >= list->alloc) {
251 size = list->alloc * 2;
252 p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);
253 CHECK_NULL_RETURN_MEMERR(p);
254 list->alloc = size;
255 list->us = p;
256 }
257
258 list->us[list->num].offset = offset;
259 list->us[list->num].target = node;
260 list->num++;
261 return 0;
262 }
263 #endif /* USE_CALL */
264
265
266 static int
267 add_opcode(regex_t* reg, int opcode)
268 {
269 BB_ADD1(reg, opcode);
270 return 0;
271 }
272
273 static int
274 add_rel_addr(regex_t* reg, int addr)
275 {
276 RelAddrType ra = (RelAddrType )addr;
277
278 BB_ADD(reg, &ra, SIZE_RELADDR);
279 return 0;
280 }
281
282 static int
283 add_abs_addr(regex_t* reg, int addr)
284 {
285 AbsAddrType ra = (AbsAddrType )addr;
286
287 BB_ADD(reg, &ra, SIZE_ABSADDR);
288 return 0;
289 }
290
291 static int
292 add_length(regex_t* reg, int len)
293 {
294 LengthType l = (LengthType )len;
295
296 BB_ADD(reg, &l, SIZE_LENGTH);
297 return 0;
298 }
299
300 static int
301 add_mem_num(regex_t* reg, int num)
302 {
303 MemNumType n = (MemNumType )num;
304
305 BB_ADD(reg, &n, SIZE_MEMNUM);
306 return 0;
307 }
308
309 #if 0
310 static int
311 add_pointer(regex_t* reg, void* addr)
312 {
313 PointerType ptr = (PointerType )addr;
314
315 BB_ADD(reg, &ptr, SIZE_POINTER);
316 return 0;
317 }
318 #endif
319
320 static int
321 add_option(regex_t* reg, OnigOptionType option)
322 {
323 BB_ADD(reg, &option, SIZE_OPTION);
324 return 0;
325 }
326
327 static int
328 add_save_type(regex_t* reg, enum SaveType type)
329 {
330 SaveType t = (SaveType )type;
331
332 BB_ADD(reg, &t, SIZE_SAVE_TYPE);
333 return 0;
334 }
335
336 static int
337 add_update_var_type(regex_t* reg, enum UpdateVarType type)
338 {
339 UpdateVarType t = (UpdateVarType )type;
340
341 BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
342 return 0;
343 }
344
345 static int
346 add_mode(regex_t* reg, ModeType mode)
347 {
348 BB_ADD(reg, &mode, SIZE_MODE);
349 return 0;
350 }
351
352 static int
353 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
354 {
355 int r;
356
357 r = add_opcode(reg, opcode);
358 if (r != 0) return r;
359 r = add_rel_addr(reg, addr);
360 return r;
361 }
362
363 static int
364 add_bytes(regex_t* reg, UChar* bytes, int len)
365 {
366 BB_ADD(reg, bytes, len);
367 return 0;
368 }
369
370 static int
371 add_bitset(regex_t* reg, BitSetRef bs)
372 {
373 BB_ADD(reg, bs, SIZE_BITSET);
374 return 0;
375 }
376
377 static int compile_length_tree(Node* node, regex_t* reg);
378 static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
379
380
381 #define IS_NEED_STR_LEN_OP_EXACT(op) \
382 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
383 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
384
385 static int
386 select_str_opcode(int mb_len, int str_len, int ignore_case)
387 {
388 int op;
389
390 if (ignore_case) {
391 switch (str_len) {
392 case 1: op = OP_EXACT1_IC; break;
393 default: op = OP_EXACTN_IC; break;
394 }
395 }
396 else {
397 switch (mb_len) {
398 case 1:
399 switch (str_len) {
400 case 1: op = OP_EXACT1; break;
401 case 2: op = OP_EXACT2; break;
402 case 3: op = OP_EXACT3; break;
403 case 4: op = OP_EXACT4; break;
404 case 5: op = OP_EXACT5; break;
405 default: op = OP_EXACTN; break;
406 }
407 break;
408
409 case 2:
410 switch (str_len) {
411 case 1: op = OP_EXACTMB2N1; break;
412 case 2: op = OP_EXACTMB2N2; break;
413 case 3: op = OP_EXACTMB2N3; break;
414 default: op = OP_EXACTMB2N; break;
415 }
416 break;
417
418 case 3:
419 op = OP_EXACTMB3N;
420 break;
421
422 default:
423 op = OP_EXACTMBN;
424 break;
425 }
426 }
427 return op;
428 }
429
430 static int
431 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
432 {
433 int r;
434 int saved_num_null_check = reg->num_null_check;
435
436 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
437 r = add_opcode(reg, OP_EMPTY_CHECK_START);
438 if (r != 0) return r;
439 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
440 if (r != 0) return r;
441 reg->num_null_check++;
442 }
443
444 r = compile_tree(node, reg, env);
445 if (r != 0) return r;
446
447 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
448 if (empty_info == QUANT_BODY_IS_EMPTY)
449 r = add_opcode(reg, OP_EMPTY_CHECK_END);
450 else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)
451 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);
452 else if (empty_info == QUANT_BODY_IS_EMPTY_REC)
453 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
454
455 if (r != 0) return r;
456 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
457 }
458 return r;
459 }
460
461 #ifdef USE_CALL
462 static int
463 compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
464 {
465 int r;
466
467 r = add_opcode(reg, OP_CALL);
468 if (r != 0) return r;
469 r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),
470 NODE_CALL_BODY(node));
471 if (r != 0) return r;
472 r = add_abs_addr(reg, 0 /*dummy addr.*/);
473 return r;
474 }
475 #endif
476
477 static int
478 compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
479 {
480 int i, r;
481
482 for (i = 0; i < n; i++) {
483 r = compile_tree(node, reg, env);
484 if (r != 0) return r;
485 }
486 return 0;
487 }
488
489 static int
490 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
491 regex_t* reg ARG_UNUSED, int ignore_case)
492 {
493 int len;
494 int op = select_str_opcode(mb_len, str_len, ignore_case);
495
496 len = SIZE_OPCODE;
497
498 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
499 if (IS_NEED_STR_LEN_OP_EXACT(op))
500 len += SIZE_LENGTH;
501
502 len += mb_len * str_len;
503 return len;
504 }
505
506 static int
507 add_compile_string(UChar* s, int mb_len, int str_len,
508 regex_t* reg, int ignore_case)
509 {
510 int op = select_str_opcode(mb_len, str_len, ignore_case);
511 add_opcode(reg, op);
512
513 if (op == OP_EXACTMBN)
514 add_length(reg, mb_len);
515
516 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
517 if (op == OP_EXACTN_IC)
518 add_length(reg, mb_len * str_len);
519 else
520 add_length(reg, str_len);
521 }
522
523 add_bytes(reg, s, mb_len * str_len);
524 return 0;
525 }
526
527
528 static int
529 compile_length_string_node(Node* node, regex_t* reg)
530 {
531 int rlen, r, len, prev_len, slen, ambig;
532 UChar *p, *prev;
533 StrNode* sn;
534 OnigEncoding enc = reg->enc;
535
536 sn = STR_(node);
537 if (sn->end <= sn->s)
538 return 0;
539
540 ambig = NODE_STRING_IS_AMBIG(node);
541
542 p = prev = sn->s;
543 prev_len = enclen(enc, p);
544 p += prev_len;
545 slen = 1;
546 rlen = 0;
547
548 for (; p < sn->end; ) {
549 len = enclen(enc, p);
550 if (len == prev_len) {
551 slen++;
552 }
553 else {
554 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
555 rlen += r;
556 prev = p;
557 slen = 1;
558 prev_len = len;
559 }
560 p += len;
561 }
562
563 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
564 rlen += r;
565 return rlen;
566 }
567
568 static int
569 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
570 {
571 if (sn->end <= sn->s)
572 return 0;
573
574 return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
575 reg, 0);
576 }
577
578 static int
579 compile_string_node(Node* node, regex_t* reg)
580 {
581 int r, len, prev_len, slen, ambig;
582 UChar *p, *prev, *end;
583 StrNode* sn;
584 OnigEncoding enc = reg->enc;
585
586 sn = STR_(node);
587 if (sn->end <= sn->s)
588 return 0;
589
590 end = sn->end;
591 ambig = NODE_STRING_IS_AMBIG(node);
592
593 p = prev = sn->s;
594 prev_len = enclen(enc, p);
595 p += prev_len;
596 slen = 1;
597
598 for (; p < end; ) {
599 len = enclen(enc, p);
600 if (len == prev_len) {
601 slen++;
602 }
603 else {
604 r = add_compile_string(prev, prev_len, slen, reg, ambig);
605 if (r != 0) return r;
606
607 prev = p;
608 slen = 1;
609 prev_len = len;
610 }
611
612 p += len;
613 }
614
615 return add_compile_string(prev, prev_len, slen, reg, ambig);
616 }
617
618 static int
619 compile_string_raw_node(StrNode* sn, regex_t* reg)
620 {
621 if (sn->end <= sn->s)
622 return 0;
623
624 return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
625 }
626
627 static int
628 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
629 {
630 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
631 add_length(reg, mbuf->used);
632 return add_bytes(reg, mbuf->p, mbuf->used);
633 #else
634 int r, pad_size;
635 UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
636
637 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
638 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
639 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
640
641 r = add_bytes(reg, mbuf->p, mbuf->used);
642
643 /* padding for return value from compile_length_cclass_node() to be fix. */
644 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
645 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
646 return r;
647 #endif
648 }
649
650 static int
651 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
652 {
653 int len;
654
655 if (IS_NULL(cc->mbuf)) {
656 len = SIZE_OPCODE + SIZE_BITSET;
657 }
658 else {
659 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
660 len = SIZE_OPCODE;
661 }
662 else {
663 len = SIZE_OPCODE + SIZE_BITSET;
664 }
665 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
666 len += SIZE_LENGTH + cc->mbuf->used;
667 #else
668 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
669 #endif
670 }
671
672 return len;
673 }
674
675 static int
676 compile_cclass_node(CClassNode* cc, regex_t* reg)
677 {
678 int r;
679
680 if (IS_NULL(cc->mbuf)) {
681 if (IS_NCCLASS_NOT(cc))
682 add_opcode(reg, OP_CCLASS_NOT);
683 else
684 add_opcode(reg, OP_CCLASS);
685
686 r = add_bitset(reg, cc->bs);
687 }
688 else {
689 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
690 if (IS_NCCLASS_NOT(cc))
691 add_opcode(reg, OP_CCLASS_MB_NOT);
692 else
693 add_opcode(reg, OP_CCLASS_MB);
694
695 r = add_multi_byte_cclass(cc->mbuf, reg);
696 }
697 else {
698 if (IS_NCCLASS_NOT(cc))
699 add_opcode(reg, OP_CCLASS_MIX_NOT);
700 else
701 add_opcode(reg, OP_CCLASS_MIX);
702
703 r = add_bitset(reg, cc->bs);
704 if (r != 0) return r;
705 r = add_multi_byte_cclass(cc->mbuf, reg);
706 }
707 }
708
709 return r;
710 }
711
712 static int
713 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
714 {
715 #define REPEAT_RANGE_ALLOC 4
716
717 OnigRepeatRange* p;
718
719 if (reg->repeat_range_alloc == 0) {
720 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
721 CHECK_NULL_RETURN_MEMERR(p);
722 reg->repeat_range = p;
723 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
724 }
725 else if (reg->repeat_range_alloc <= id) {
726 int n;
727 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
728 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
729 sizeof(OnigRepeatRange) * n,
730 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
731 CHECK_NULL_RETURN_MEMERR(p);
732 reg->repeat_range = p;
733 reg->repeat_range_alloc = n;
734 }
735 else {
736 p = reg->repeat_range;
737 }
738
739 p[id].lower = lower;
740 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
741 return 0;
742 }
743
744 static int
745 compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
746 regex_t* reg, ScanEnv* env)
747 {
748 int r;
749 int num_repeat = reg->num_repeat;
750
751 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
752 if (r != 0) return r;
753 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
754 reg->num_repeat++;
755 if (r != 0) return r;
756 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
757 if (r != 0) return r;
758
759 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
760 if (r != 0) return r;
761
762 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
763 if (r != 0) return r;
764
765 if (
766 #ifdef USE_CALL
767 NODE_IS_IN_MULTI_ENTRY(qn) ||
768 #endif
769 NODE_IS_IN_REAL_REPEAT(qn)) {
770 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
771 }
772 else {
773 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
774 }
775 if (r != 0) return r;
776 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
777 return r;
778 }
779
780 static int
781 is_anychar_infinite_greedy(QuantNode* qn)
782 {
783 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
784 NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
785 return 1;
786 else
787 return 0;
788 }
789
790 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
791 #define CKN_ON (ckn > 0)
792
793 static int
794 compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
795 {
796 int len, mod_tlen;
797 int infinite = IS_REPEAT_INFINITE(qn->upper);
798 enum QuantBodyEmpty empty_info = qn->body_empty_info;
799 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
800
801 if (tlen < 0) return tlen;
802 if (tlen == 0) return 0;
803
804 /* anychar repeat */
805 if (is_anychar_infinite_greedy(qn)) {
806 if (qn->lower <= 1 ||
807 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
808 if (IS_NOT_NULL(qn->next_head_exact))
809 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
810 else
811 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
812 }
813 }
814
815 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
816 mod_tlen = tlen;
817 else
818 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
819
820 if (infinite &&
821 (qn->lower <= 1 ||
822 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
823 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
824 len = SIZE_OP_JUMP;
825 }
826 else {
827 len = tlen * qn->lower;
828 }
829
830 if (qn->greedy) {
831 if (IS_NOT_NULL(qn->head_exact))
832 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
833 else if (IS_NOT_NULL(qn->next_head_exact))
834 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
835 else
836 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
837 }
838 else
839 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
840 }
841 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
842 len = SIZE_OP_JUMP + tlen;
843 }
844 else if (!infinite && qn->greedy &&
845 (qn->upper == 1 ||
846 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
847 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
848 len = tlen * qn->lower;
849 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
850 }
851 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
852 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
853 }
854 else {
855 len = SIZE_OP_REPEAT_INC
856 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
857 }
858
859 return len;
860 }
861
862 static int
863 compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
864 {
865 int i, r, mod_tlen;
866 int infinite = IS_REPEAT_INFINITE(qn->upper);
867 enum QuantBodyEmpty empty_info = qn->body_empty_info;
868 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
869
870 if (tlen < 0) return tlen;
871 if (tlen == 0) return 0;
872
873 if (is_anychar_infinite_greedy(qn) &&
874 (qn->lower <= 1 ||
875 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
876 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
877 if (r != 0) return r;
878 if (IS_NOT_NULL(qn->next_head_exact)) {
879 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
880 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
881 else
882 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
883 if (r != 0) return r;
884 return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
885 }
886 else {
887 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
888 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
889 else
890 return add_opcode(reg, OP_ANYCHAR_STAR);
891 }
892 }
893
894 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
895 mod_tlen = tlen;
896 else
897 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
898
899 if (infinite &&
900 (qn->lower <= 1 ||
901 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
902 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
903 if (qn->greedy) {
904 if (IS_NOT_NULL(qn->head_exact))
905 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
906 else if (IS_NOT_NULL(qn->next_head_exact))
907 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
908 else
909 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
910 }
911 else {
912 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
913 }
914 if (r != 0) return r;
915 }
916 else {
917 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
918 if (r != 0) return r;
919 }
920
921 if (qn->greedy) {
922 if (IS_NOT_NULL(qn->head_exact)) {
923 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
924 mod_tlen + SIZE_OP_JUMP);
925 if (r != 0) return r;
926 add_bytes(reg, STR_(qn->head_exact)->s, 1);
927 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
928 if (r != 0) return r;
929 r = add_opcode_rel_addr(reg, OP_JUMP,
930 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
931 }
932 else if (IS_NOT_NULL(qn->next_head_exact)) {
933 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
934 mod_tlen + SIZE_OP_JUMP);
935 if (r != 0) return r;
936 add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
937 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
938 if (r != 0) return r;
939 r = add_opcode_rel_addr(reg, OP_JUMP,
940 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
941 }
942 else {
943 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
944 if (r != 0) return r;
945 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
946 if (r != 0) return r;
947 r = add_opcode_rel_addr(reg, OP_JUMP,
948 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
949 }
950 }
951 else {
952 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
953 if (r != 0) return r;
954 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
955 if (r != 0) return r;
956 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
957 }
958 }
959 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
960 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
961 if (r != 0) return r;
962 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
963 }
964 else if (! infinite && qn->greedy &&
965 (qn->upper == 1 ||
966 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
967 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
968 int n = qn->upper - qn->lower;
969
970 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
971 if (r != 0) return r;
972
973 for (i = 0; i < n; i++) {
974 r = add_opcode_rel_addr(reg, OP_PUSH,
975 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
976 if (r != 0) return r;
977 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
978 if (r != 0) return r;
979 }
980 }
981 else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
982 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
983 if (r != 0) return r;
984 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
985 if (r != 0) return r;
986 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
987 }
988 else {
989 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
990 }
991 return r;
992 }
993
994 static int
995 compile_length_option_node(EnclosureNode* node, regex_t* reg)
996 {
997 int tlen;
998 OnigOptionType prev = reg->options;
999
1000 reg->options = node->o.options;
1001 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1002 reg->options = prev;
1003
1004 return tlen;
1005 }
1006
1007 static int
1008 compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1009 {
1010 int r;
1011 OnigOptionType prev = reg->options;
1012
1013 reg->options = node->o.options;
1014 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1015 reg->options = prev;
1016
1017 return r;
1018 }
1019
1020 static int
1021 compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
1022 {
1023 int len;
1024 int tlen;
1025
1026 if (node->type == ENCLOSURE_OPTION)
1027 return compile_length_option_node(node, reg);
1028
1029 if (NODE_ENCLOSURE_BODY(node)) {
1030 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1031 if (tlen < 0) return tlen;
1032 }
1033 else
1034 tlen = 0;
1035
1036 switch (node->type) {
1037 case ENCLOSURE_MEMORY:
1038 #ifdef USE_CALL
1039
1040 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
1041 len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1042 return len;
1043 }
1044
1045 if (NODE_IS_CALLED(node)) {
1046 len = SIZE_OP_MEMORY_START_PUSH + tlen
1047 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1048 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1049 len += (NODE_IS_RECURSION(node)
1050 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1051 else
1052 len += (NODE_IS_RECURSION(node)
1053 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1054 }
1055 else if (NODE_IS_RECURSION(node)) {
1056 len = SIZE_OP_MEMORY_START_PUSH;
1057 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1058 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1059 }
1060 else
1061 #endif
1062 {
1063 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1064 len = SIZE_OP_MEMORY_START_PUSH;
1065 else
1066 len = SIZE_OP_MEMORY_START;
1067
1068 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1069 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1070 }
1071 break;
1072
1073 case ENCLOSURE_STOP_BACKTRACK:
1074 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
1075 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
1076 tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1077 if (tlen < 0) return tlen;
1078
1079 len = tlen * qn->lower
1080 + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
1081 }
1082 else {
1083 len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
1084 }
1085 break;
1086
1087 case ENCLOSURE_IF_ELSE:
1088 {
1089 Node* cond = NODE_ENCLOSURE_BODY(node);
1090 Node* Then = node->te.Then;
1091 Node* Else = node->te.Else;
1092
1093 len = compile_length_tree(cond, reg);
1094 if (len < 0) return len;
1095 len += SIZE_OP_PUSH;
1096 len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
1097
1098 if (IS_NOT_NULL(Then)) {
1099 tlen = compile_length_tree(Then, reg);
1100 if (tlen < 0) return tlen;
1101 len += tlen;
1102 }
1103
1104 if (IS_NOT_NULL(Else)) {
1105 len += SIZE_OP_JUMP;
1106 tlen = compile_length_tree(Else, reg);
1107 if (tlen < 0) return tlen;
1108 len += tlen;
1109 }
1110 }
1111 break;
1112
1113 default:
1114 return ONIGERR_TYPE_BUG;
1115 break;
1116 }
1117
1118 return len;
1119 }
1120
1121 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1122
1123 static int
1124 compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1125 {
1126 int r;
1127 int len;
1128
1129 #ifdef USE_CALL
1130 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
1131 r = add_opcode(reg, OP_CALL);
1132 if (r != 0) return r;
1133 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1134 NODE_STATUS_ADD(node, ADDR_FIXED);
1135 r = add_abs_addr(reg, (int )node->m.called_addr);
1136 if (r != 0) return r;
1137 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1138 len += SIZE_OP_RETURN;
1139 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1140 if (r != 0) return r;
1141
1142 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1143 if (r != 0) return r;
1144 r = add_opcode(reg, OP_RETURN);
1145 return r;
1146 }
1147
1148 if (NODE_IS_CALLED(node)) {
1149 r = add_opcode(reg, OP_CALL);
1150 if (r != 0) return r;
1151 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1152 NODE_STATUS_ADD(node, ADDR_FIXED);
1153 r = add_abs_addr(reg, (int )node->m.called_addr);
1154 if (r != 0) return r;
1155 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1156 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1157 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1158 len += (NODE_IS_RECURSION(node)
1159 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1160 else
1161 len += (NODE_IS_RECURSION(node)
1162 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1163
1164 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1165 if (r != 0) return r;
1166 }
1167 #endif
1168
1169 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1170 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1171 else
1172 r = add_opcode(reg, OP_MEMORY_START);
1173 if (r != 0) return r;
1174 r = add_mem_num(reg, node->m.regnum);
1175 if (r != 0) return r;
1176 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1177 if (r != 0) return r;
1178
1179 #ifdef USE_CALL
1180 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1181 r = add_opcode(reg, (NODE_IS_RECURSION(node)
1182 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1183 else
1184 r = add_opcode(reg, (NODE_IS_RECURSION(node)
1185 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1186 if (r != 0) return r;
1187 r = add_mem_num(reg, node->m.regnum);
1188 if (NODE_IS_CALLED(node)) {
1189 if (r != 0) return r;
1190 r = add_opcode(reg, OP_RETURN);
1191 }
1192 #else
1193 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1194 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1195 else
1196 r = add_opcode(reg, OP_MEMORY_END);
1197 if (r != 0) return r;
1198 r = add_mem_num(reg, node->m.regnum);
1199 #endif
1200
1201 return r;
1202 }
1203
1204 static int
1205 compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1206 {
1207 int r, len;
1208
1209 switch (node->type) {
1210 case ENCLOSURE_MEMORY:
1211 r = compile_enclosure_memory_node(node, reg, env);
1212 break;
1213
1214 case ENCLOSURE_OPTION:
1215 r = compile_option_node(node, reg, env);
1216 break;
1217
1218 case ENCLOSURE_STOP_BACKTRACK:
1219 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
1220 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
1221 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
1222 if (r != 0) return r;
1223
1224 len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1225 if (len < 0) return len;
1226
1227 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);
1228 if (r != 0) return r;
1229 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
1230 if (r != 0) return r;
1231 r = add_opcode(reg, OP_POP_OUT);
1232 if (r != 0) return r;
1233 r = add_opcode_rel_addr(reg, OP_JUMP,
1234 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));
1235 }
1236 else {
1237 r = add_opcode(reg, OP_ATOMIC_START);
1238 if (r != 0) return r;
1239 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1240 if (r != 0) return r;
1241 r = add_opcode(reg, OP_ATOMIC_END);
1242 }
1243 break;
1244
1245 case ENCLOSURE_IF_ELSE:
1246 {
1247 int cond_len, then_len, jump_len;
1248 Node* cond = NODE_ENCLOSURE_BODY(node);
1249 Node* Then = node->te.Then;
1250 Node* Else = node->te.Else;
1251
1252 r = add_opcode(reg, OP_ATOMIC_START);
1253 if (r != 0) return r;
1254
1255 cond_len = compile_length_tree(cond, reg);
1256 if (cond_len < 0) return cond_len;
1257 if (IS_NOT_NULL(Then)) {
1258 then_len = compile_length_tree(Then, reg);
1259 if (then_len < 0) return then_len;
1260 }
1261 else
1262 then_len = 0;
1263
1264 jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
1265 if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
1266
1267 r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);
1268 if (r != 0) return r;
1269 r = compile_tree(cond, reg, env);
1270 if (r != 0) return r;
1271 r = add_opcode(reg, OP_ATOMIC_END);
1272 if (r != 0) return r;
1273
1274 if (IS_NOT_NULL(Then)) {
1275 r = compile_tree(Then, reg, env);
1276 if (r != 0) return r;
1277 }
1278
1279 if (IS_NOT_NULL(Else)) {
1280 int else_len = compile_length_tree(Else, reg);
1281 r = add_opcode_rel_addr(reg, OP_JUMP, else_len);
1282 if (r != 0) return r;
1283 r = compile_tree(Else, reg, env);
1284 }
1285 }
1286 break;
1287
1288 default:
1289 return ONIGERR_TYPE_BUG;
1290 break;
1291 }
1292
1293 return r;
1294 }
1295
1296 static int
1297 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1298 {
1299 int len;
1300 int tlen = 0;
1301
1302 if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {
1303 tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1304 if (tlen < 0) return tlen;
1305 }
1306
1307 switch (node->type) {
1308 case ANCHOR_PREC_READ:
1309 len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
1310 break;
1311 case ANCHOR_PREC_READ_NOT:
1312 len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
1313 break;
1314 case ANCHOR_LOOK_BEHIND:
1315 len = SIZE_OP_LOOK_BEHIND + tlen;
1316 break;
1317 case ANCHOR_LOOK_BEHIND_NOT:
1318 len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
1319 break;
1320
1321 case ANCHOR_WORD_BOUNDARY:
1322 case ANCHOR_NO_WORD_BOUNDARY:
1323 #ifdef USE_WORD_BEGIN_END
1324 case ANCHOR_WORD_BEGIN:
1325 case ANCHOR_WORD_END:
1326 #endif
1327 len = SIZE_OP_WORD_BOUNDARY;
1328 break;
1329
1330 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1331 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1332 len = SIZE_OPCODE;
1333 break;
1334
1335 default:
1336 len = SIZE_OPCODE;
1337 break;
1338 }
1339
1340 return len;
1341 }
1342
1343 static int
1344 compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
1345 {
1346 int r, len;
1347 enum OpCode op;
1348
1349 switch (node->type) {
1350 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1351 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1352 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1353 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1354 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1355 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1356
1357 case ANCHOR_WORD_BOUNDARY:
1358 op = OP_WORD_BOUNDARY;
1359 word:
1360 r = add_opcode(reg, op);
1361 if (r != 0) return r;
1362 r = add_mode(reg, (ModeType )node->ascii_mode);
1363 break;
1364
1365 case ANCHOR_NO_WORD_BOUNDARY:
1366 op = OP_NO_WORD_BOUNDARY; goto word;
1367 break;
1368 #ifdef USE_WORD_BEGIN_END
1369 case ANCHOR_WORD_BEGIN:
1370 op = OP_WORD_BEGIN; goto word;
1371 break;
1372 case ANCHOR_WORD_END:
1373 op = OP_WORD_END; goto word;
1374 break;
1375 #endif
1376
1377 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1378 r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
1379 break;
1380
1381 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1382 r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
1383 break;
1384
1385 case ANCHOR_PREC_READ:
1386 r = add_opcode(reg, OP_PREC_READ_START);
1387 if (r != 0) return r;
1388 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1389 if (r != 0) return r;
1390 r = add_opcode(reg, OP_PREC_READ_END);
1391 break;
1392
1393 case ANCHOR_PREC_READ_NOT:
1394 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1395 if (len < 0) return len;
1396 r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);
1397 if (r != 0) return r;
1398 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1399 if (r != 0) return r;
1400 r = add_opcode(reg, OP_PREC_READ_NOT_END);
1401 break;
1402
1403 case ANCHOR_LOOK_BEHIND:
1404 {
1405 int n;
1406 r = add_opcode(reg, OP_LOOK_BEHIND);
1407 if (r != 0) return r;
1408 if (node->char_len < 0) {
1409 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
1410 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1411 }
1412 else
1413 n = node->char_len;
1414
1415 r = add_length(reg, n);
1416 if (r != 0) return r;
1417 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1418 }
1419 break;
1420
1421 case ANCHOR_LOOK_BEHIND_NOT:
1422 {
1423 int n;
1424
1425 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1426 r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,
1427 len + SIZE_OP_LOOK_BEHIND_NOT_END);
1428 if (r != 0) return r;
1429 if (node->char_len < 0) {
1430 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
1431 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1432 }
1433 else
1434 n = node->char_len;
1435 r = add_length(reg, n);
1436 if (r != 0) return r;
1437 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1438 if (r != 0) return r;
1439 r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);
1440 }
1441 break;
1442
1443 default:
1444 return ONIGERR_TYPE_BUG;
1445 break;
1446 }
1447
1448 return r;
1449 }
1450
1451 static int
1452 compile_gimmick_node(GimmickNode* node, regex_t* reg)
1453 {
1454 int r;
1455
1456 switch (node->type) {
1457 case GIMMICK_FAIL:
1458 r = add_opcode(reg, OP_FAIL);
1459 break;
1460
1461 case GIMMICK_KEEP:
1462 r = add_opcode(reg, OP_PUSH_SAVE_VAL);
1463 if (r != 0) return r;
1464 r = add_save_type(reg, SAVE_KEEP);
1465 if (r != 0) return r;
1466 r = add_mem_num(reg, node->id);
1467 break;
1468
1469 case GIMMICK_SAVE:
1470 r = add_opcode(reg, OP_PUSH_SAVE_VAL);
1471 if (r != 0) return r;
1472 r = add_save_type(reg, node->detail_type);
1473 if (r != 0) return r;
1474 r = add_mem_num(reg, node->id);
1475 break;
1476
1477 case GIMMICK_UPDATE_VAR:
1478 r = add_opcode(reg, OP_UPDATE_VAR);
1479 if (r != 0) return r;
1480 r = add_update_var_type(reg, node->detail_type);
1481 if (r != 0) return r;
1482 r = add_mem_num(reg, node->id);
1483 break;
1484
1485 #ifdef USE_CALLOUT
1486 case GIMMICK_CALLOUT:
1487 switch (node->detail_type) {
1488 case ONIG_CALLOUT_OF_CONTENTS:
1489 case ONIG_CALLOUT_OF_NAME:
1490 {
1491 r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?
1492 OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);
1493 if (r != 0) return r;
1494 if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
1495 r = add_mem_num(reg, node->id);
1496 if (r != 0) return r;
1497 }
1498 r = add_mem_num(reg, node->num);
1499 if (r != 0) return r;
1500 }
1501 break;
1502
1503 default:
1504 r = ONIGERR_TYPE_BUG;
1505 break;
1506 }
1507 #endif
1508 }
1509
1510 return r;
1511 }
1512
1513 static int
1514 compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
1515 {
1516 int len;
1517
1518 switch (node->type) {
1519 case GIMMICK_FAIL:
1520 len = SIZE_OP_FAIL;
1521 break;
1522
1523 case GIMMICK_KEEP:
1524 case GIMMICK_SAVE:
1525 len = SIZE_OP_PUSH_SAVE_VAL;
1526 break;
1527
1528 case GIMMICK_UPDATE_VAR:
1529 len = SIZE_OP_UPDATE_VAR;
1530 break;
1531
1532 #ifdef USE_CALLOUT
1533 case GIMMICK_CALLOUT:
1534 switch (node->detail_type) {
1535 case ONIG_CALLOUT_OF_CONTENTS:
1536 len = SIZE_OP_CALLOUT_CONTENTS;
1537 break;
1538 case ONIG_CALLOUT_OF_NAME:
1539 len = SIZE_OP_CALLOUT_NAME;
1540 break;
1541
1542 default:
1543 len = ONIGERR_TYPE_BUG;
1544 break;
1545 }
1546 break;
1547 #endif
1548 }
1549
1550 return len;
1551 }
1552
1553 static int
1554 compile_length_tree(Node* node, regex_t* reg)
1555 {
1556 int len, r;
1557
1558 switch (NODE_TYPE(node)) {
1559 case NODE_LIST:
1560 len = 0;
1561 do {
1562 r = compile_length_tree(NODE_CAR(node), reg);
1563 if (r < 0) return r;
1564 len += r;
1565 } while (IS_NOT_NULL(node = NODE_CDR(node)));
1566 r = len;
1567 break;
1568
1569 case NODE_ALT:
1570 {
1571 int n;
1572
1573 n = r = 0;
1574 do {
1575 r += compile_length_tree(NODE_CAR(node), reg);
1576 n++;
1577 } while (IS_NOT_NULL(node = NODE_CDR(node)));
1578 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1579 }
1580 break;
1581
1582 case NODE_STRING:
1583 if (NODE_STRING_IS_RAW(node))
1584 r = compile_length_string_raw_node(STR_(node), reg);
1585 else
1586 r = compile_length_string_node(node, reg);
1587 break;
1588
1589 case NODE_CCLASS:
1590 r = compile_length_cclass_node(CCLASS_(node), reg);
1591 break;
1592
1593 case NODE_CTYPE:
1594 r = SIZE_OPCODE;
1595 break;
1596
1597 case NODE_BACKREF:
1598 {
1599 BackRefNode* br = BACKREF_(node);
1600
1601 if (NODE_IS_CHECKER(node)) {
1602 #ifdef USE_BACKREF_WITH_LEVEL
1603 if (NODE_IS_NEST_LEVEL(node)) {
1604 r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1605 }
1606 else
1607 #endif
1608 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1609 }
1610 else {
1611 #ifdef USE_BACKREF_WITH_LEVEL
1612 if (NODE_IS_NEST_LEVEL(node)) {
1613 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1614 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1615 }
1616 else
1617 #endif
1618 if (br->back_num == 1) {
1619 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1620 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1621 }
1622 else {
1623 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1624 }
1625 }
1626 }
1627 break;
1628
1629 #ifdef USE_CALL
1630 case NODE_CALL:
1631 r = SIZE_OP_CALL;
1632 break;
1633 #endif
1634
1635 case NODE_QUANT:
1636 r = compile_length_quantifier_node(QUANT_(node), reg);
1637 break;
1638
1639 case NODE_ENCLOSURE:
1640 r = compile_length_enclosure_node(ENCLOSURE_(node), reg);
1641 break;
1642
1643 case NODE_ANCHOR:
1644 r = compile_length_anchor_node(ANCHOR_(node), reg);
1645 break;
1646
1647 case NODE_GIMMICK:
1648 r = compile_length_gimmick_node(GIMMICK_(node), reg);
1649 break;
1650
1651 default:
1652 return ONIGERR_TYPE_BUG;
1653 break;
1654 }
1655
1656 return r;
1657 }
1658
1659 static int
1660 compile_tree(Node* node, regex_t* reg, ScanEnv* env)
1661 {
1662 int n, len, pos, r = 0;
1663
1664 switch (NODE_TYPE(node)) {
1665 case NODE_LIST:
1666 do {
1667 r = compile_tree(NODE_CAR(node), reg, env);
1668 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1669 break;
1670
1671 case NODE_ALT:
1672 {
1673 Node* x = node;
1674 len = 0;
1675 do {
1676 len += compile_length_tree(NODE_CAR(x), reg);
1677 if (IS_NOT_NULL(NODE_CDR(x))) {
1678 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1679 }
1680 } while (IS_NOT_NULL(x = NODE_CDR(x)));
1681 pos = reg->used + len; /* goal position */
1682
1683 do {
1684 len = compile_length_tree(NODE_CAR(node), reg);
1685 if (IS_NOT_NULL(NODE_CDR(node))) {
1686 enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
1687 r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);
1688 if (r != 0) break;
1689 }
1690 r = compile_tree(NODE_CAR(node), reg, env);
1691 if (r != 0) break;
1692 if (IS_NOT_NULL(NODE_CDR(node))) {
1693 len = pos - (reg->used + SIZE_OP_JUMP);
1694 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1695 if (r != 0) break;
1696 }
1697 } while (IS_NOT_NULL(node = NODE_CDR(node)));
1698 }
1699 break;
1700
1701 case NODE_STRING:
1702 if (NODE_STRING_IS_RAW(node))
1703 r = compile_string_raw_node(STR_(node), reg);
1704 else
1705 r = compile_string_node(node, reg);
1706 break;
1707
1708 case NODE_CCLASS:
1709 r = compile_cclass_node(CCLASS_(node), reg);
1710 break;
1711
1712 case NODE_CTYPE:
1713 {
1714 int op;
1715
1716 switch (CTYPE_(node)->ctype) {
1717 case CTYPE_ANYCHAR:
1718 if (IS_MULTILINE(CTYPE_OPTION(node, reg)))
1719 r = add_opcode(reg, OP_ANYCHAR_ML);
1720 else
1721 r = add_opcode(reg, OP_ANYCHAR);
1722 break;
1723
1724 case ONIGENC_CTYPE_WORD:
1725 if (CTYPE_(node)->ascii_mode == 0) {
1726 op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
1727 }
1728 else {
1729 op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
1730 }
1731 r = add_opcode(reg, op);
1732 break;
1733
1734 default:
1735 return ONIGERR_TYPE_BUG;
1736 break;
1737 }
1738 }
1739 break;
1740
1741 case NODE_BACKREF:
1742 {
1743 BackRefNode* br = BACKREF_(node);
1744
1745 if (NODE_IS_CHECKER(node)) {
1746 #ifdef USE_BACKREF_WITH_LEVEL
1747 if (NODE_IS_NEST_LEVEL(node)) {
1748 r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);
1749 if (r != 0) return r;
1750 r = add_length(reg, br->nest_level);
1751 if (r != 0) return r;
1752 }
1753 else
1754 #endif
1755 {
1756 r = add_opcode(reg, OP_BACKREF_CHECK);
1757 if (r != 0) return r;
1758 }
1759
1760 goto add_bacref_mems;
1761 }
1762 else {
1763 #ifdef USE_BACKREF_WITH_LEVEL
1764 if (NODE_IS_NEST_LEVEL(node)) {
1765 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1766 if (r != 0) return r;
1767 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1768 if (r != 0) return r;
1769 r = add_length(reg, br->nest_level);
1770 if (r != 0) return r;
1771
1772 goto add_bacref_mems;
1773 }
1774 else
1775 #endif
1776 if (br->back_num == 1) {
1777 n = br->back_static[0];
1778 if (IS_IGNORECASE(reg->options)) {
1779 r = add_opcode(reg, OP_BACKREF_N_IC);
1780 if (r != 0) return r;
1781 r = add_mem_num(reg, n);
1782 }
1783 else {
1784 switch (n) {
1785 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1786 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1787 default:
1788 r = add_opcode(reg, OP_BACKREF_N);
1789 if (r != 0) return r;
1790 r = add_mem_num(reg, n);
1791 break;
1792 }
1793 }
1794 }
1795 else {
1796 int i;
1797 int* p;
1798
1799 if (IS_IGNORECASE(reg->options)) {
1800 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1801 }
1802 else {
1803 r = add_opcode(reg, OP_BACKREF_MULTI);
1804 }
1805 if (r != 0) return r;
1806
1807 add_bacref_mems:
1808 r = add_length(reg, br->back_num);
1809 if (r != 0) return r;
1810 p = BACKREFS_P(br);
1811 for (i = br->back_num - 1; i >= 0; i--) {
1812 r = add_mem_num(reg, p[i]);
1813 if (r != 0) return r;
1814 }
1815 }
1816 }
1817 }
1818 break;
1819
1820 #ifdef USE_CALL
1821 case NODE_CALL:
1822 r = compile_call(CALL_(node), reg, env);
1823 break;
1824 #endif
1825
1826 case NODE_QUANT:
1827 r = compile_quantifier_node(QUANT_(node), reg, env);
1828 break;
1829
1830 case NODE_ENCLOSURE:
1831 r = compile_enclosure_node(ENCLOSURE_(node), reg, env);
1832 break;
1833
1834 case NODE_ANCHOR:
1835 r = compile_anchor_node(ANCHOR_(node), reg, env);
1836 break;
1837
1838 case NODE_GIMMICK:
1839 r = compile_gimmick_node(GIMMICK_(node), reg);
1840 break;
1841
1842 default:
1843 #ifdef ONIG_DEBUG
1844 fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
1845 #endif
1846 break;
1847 }
1848
1849 return r;
1850 }
1851
1852 static int
1853 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1854 {
1855 int r = 0;
1856 Node* node = *plink;
1857
1858 switch (NODE_TYPE(node)) {
1859 case NODE_LIST:
1860 case NODE_ALT:
1861 do {
1862 r = noname_disable_map(&(NODE_CAR(node)), map, counter);
1863 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1864 break;
1865
1866 case NODE_QUANT:
1867 {
1868 Node** ptarget = &(NODE_BODY(node));
1869 Node* old = *ptarget;
1870 r = noname_disable_map(ptarget, map, counter);
1871 if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
1872 onig_reduce_nested_quantifier(node, *ptarget);
1873 }
1874 }
1875 break;
1876
1877 case NODE_ENCLOSURE:
1878 {
1879 EnclosureNode* en = ENCLOSURE_(node);
1880 if (en->type == ENCLOSURE_MEMORY) {
1881 if (NODE_IS_NAMED_GROUP(node)) {
1882 (*counter)++;
1883 map[en->m.regnum].new_val = *counter;
1884 en->m.regnum = *counter;
1885 r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1886 }
1887 else {
1888 *plink = NODE_BODY(node);
1889 NODE_BODY(node) = NULL_NODE;
1890 onig_node_free(node);
1891 r = noname_disable_map(plink, map, counter);
1892 }
1893 }
1894 else if (en->type == ENCLOSURE_IF_ELSE) {
1895 r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);
1896 if (r != 0) return r;
1897 if (IS_NOT_NULL(en->te.Then)) {
1898 r = noname_disable_map(&(en->te.Then), map, counter);
1899 if (r != 0) return r;
1900 }
1901 if (IS_NOT_NULL(en->te.Else)) {
1902 r = noname_disable_map(&(en->te.Else), map, counter);
1903 if (r != 0) return r;
1904 }
1905 }
1906 else
1907 r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1908 }
1909 break;
1910
1911 case NODE_ANCHOR:
1912 if (IS_NOT_NULL(NODE_BODY(node)))
1913 r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1914 break;
1915
1916 default:
1917 break;
1918 }
1919
1920 return r;
1921 }
1922
1923 static int
1924 renumber_node_backref(Node* node, GroupNumRemap* map)
1925 {
1926 int i, pos, n, old_num;
1927 int *backs;
1928 BackRefNode* bn = BACKREF_(node);
1929
1930 if (! NODE_IS_BY_NAME(node))
1931 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1932
1933 old_num = bn->back_num;
1934 if (IS_NULL(bn->back_dynamic))
1935 backs = bn->back_static;
1936 else
1937 backs = bn->back_dynamic;
1938
1939 for (i = 0, pos = 0; i < old_num; i++) {
1940 n = map[backs[i]].new_val;
1941 if (n > 0) {
1942 backs[pos] = n;
1943 pos++;
1944 }
1945 }
1946
1947 bn->back_num = pos;
1948 return 0;
1949 }
1950
1951 static int
1952 renumber_by_map(Node* node, GroupNumRemap* map)
1953 {
1954 int r = 0;
1955
1956 switch (NODE_TYPE(node)) {
1957 case NODE_LIST:
1958 case NODE_ALT:
1959 do {
1960 r = renumber_by_map(NODE_CAR(node), map);
1961 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1962 break;
1963
1964 case NODE_QUANT:
1965 r = renumber_by_map(NODE_BODY(node), map);
1966 break;
1967
1968 case NODE_ENCLOSURE:
1969 {
1970 EnclosureNode* en = ENCLOSURE_(node);
1971
1972 r = renumber_by_map(NODE_BODY(node), map);
1973 if (r != 0) return r;
1974
1975 if (en->type == ENCLOSURE_IF_ELSE) {
1976 if (IS_NOT_NULL(en->te.Then)) {
1977 r = renumber_by_map(en->te.Then, map);
1978 if (r != 0) return r;
1979 }
1980 if (IS_NOT_NULL(en->te.Else)) {
1981 r = renumber_by_map(en->te.Else, map);
1982 if (r != 0) return r;
1983 }
1984 }
1985 }
1986 break;
1987
1988 case NODE_BACKREF:
1989 r = renumber_node_backref(node, map);
1990 break;
1991
1992 case NODE_ANCHOR:
1993 if (IS_NOT_NULL(NODE_BODY(node)))
1994 r = renumber_by_map(NODE_BODY(node), map);
1995 break;
1996
1997 default:
1998 break;
1999 }
2000
2001 return r;
2002 }
2003
2004 static int
2005 numbered_ref_check(Node* node)
2006 {
2007 int r = 0;
2008
2009 switch (NODE_TYPE(node)) {
2010 case NODE_LIST:
2011 case NODE_ALT:
2012 do {
2013 r = numbered_ref_check(NODE_CAR(node));
2014 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2015 break;
2016
2017 case NODE_ANCHOR:
2018 if (IS_NULL(NODE_BODY(node)))
2019 break;
2020 /* fall */
2021 case NODE_QUANT:
2022 r = numbered_ref_check(NODE_BODY(node));
2023 break;
2024
2025 case NODE_ENCLOSURE:
2026 {
2027 EnclosureNode* en = ENCLOSURE_(node);
2028
2029 r = numbered_ref_check(NODE_BODY(node));
2030 if (r != 0) return r;
2031
2032 if (en->type == ENCLOSURE_IF_ELSE) {
2033 if (IS_NOT_NULL(en->te.Then)) {
2034 r = numbered_ref_check(en->te.Then);
2035 if (r != 0) return r;
2036 }
2037 if (IS_NOT_NULL(en->te.Else)) {
2038 r = numbered_ref_check(en->te.Else);
2039 if (r != 0) return r;
2040 }
2041 }
2042 }
2043
2044 break;
2045
2046 case NODE_BACKREF:
2047 if (! NODE_IS_BY_NAME(node))
2048 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2049 break;
2050
2051 default:
2052 break;
2053 }
2054
2055 return r;
2056 }
2057
2058 static int
2059 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2060 {
2061 int r, i, pos, counter;
2062 int result;
2063 MemStatusType loc;
2064 GroupNumRemap* map;
2065
2066 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
2067 CHECK_NULL_RETURN_MEMERR(map);
2068 for (i = 1; i <= env->num_mem; i++) {
2069 map[i].new_val = 0;
2070 }
2071 counter = 0;
2072 r = noname_disable_map(root, map, &counter);
2073 if (r != 0) return r;
2074
2075 r = renumber_by_map(*root, map);
2076 if (r != 0) return r;
2077
2078 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2079 if (map[i].new_val > 0) {
2080 SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];
2081 pos++;
2082 }
2083 }
2084
2085 loc = env->capture_history;
2086 MEM_STATUS_CLEAR(env->capture_history);
2087 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2088 if (MEM_STATUS_AT(loc, i)) {
2089 MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
2090 }
2091 }
2092
2093 env->num_mem = env->num_named;
2094 reg->num_mem = env->num_named;
2095 result = onig_renumber_name_table(reg, map);
2096 xfree(map);
2097 return result;
2098 }
2099
2100 #ifdef USE_CALL
2101 static int
2102 fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
2103 {
2104 int i, offset;
2105 EnclosureNode* en;
2106 AbsAddrType addr;
2107
2108 for (i = 0; i < uslist->num; i++) {
2109 if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
2110 return ONIGERR_PARSER_BUG;
2111
2112 en = ENCLOSURE_(uslist->us[i].target);
2113 addr = en->m.called_addr;
2114 offset = uslist->us[i].offset;
2115
2116 BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2117 }
2118 return 0;
2119 }
2120 #endif
2121
2122
2123 #define GET_CHAR_LEN_VARLEN -1
2124 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2125
2126 /* fixed size pattern node only */
2127 static int
2128 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2129 {
2130 int tlen;
2131 int r = 0;
2132
2133 level++;
2134 *len = 0;
2135 switch (NODE_TYPE(node)) {
2136 case NODE_LIST:
2137 do {
2138 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
2139 if (r == 0)
2140 *len = distance_add(*len, tlen);
2141 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2142 break;
2143
2144 case NODE_ALT:
2145 {
2146 int tlen2;
2147 int varlen = 0;
2148
2149 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
2150 while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
2151 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);
2152 if (r == 0) {
2153 if (tlen != tlen2)
2154 varlen = 1;
2155 }
2156 }
2157 if (r == 0) {
2158 if (varlen != 0) {
2159 if (level == 1)
2160 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2161 else
2162 r = GET_CHAR_LEN_VARLEN;
2163 }
2164 else
2165 *len = tlen;
2166 }
2167 }
2168 break;
2169
2170 case NODE_STRING:
2171 {
2172 StrNode* sn = STR_(node);
2173 UChar *s = sn->s;
2174
2175 while (s < sn->end) {
2176 s += enclen(reg->enc, s);
2177 (*len)++;
2178 }
2179 }
2180 break;
2181
2182 case NODE_QUANT:
2183 {
2184 QuantNode* qn = QUANT_(node);
2185
2186 if (qn->lower == qn->upper) {
2187 if (qn->upper == 0) {
2188 *len = 0;
2189 }
2190 else {
2191 r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
2192 if (r == 0)
2193 *len = distance_multiply(tlen, qn->lower);
2194 }
2195 }
2196 else
2197 r = GET_CHAR_LEN_VARLEN;
2198 }
2199 break;
2200
2201 #ifdef USE_CALL
2202 case NODE_CALL:
2203 if (! NODE_IS_RECURSION(node))
2204 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2205 else
2206 r = GET_CHAR_LEN_VARLEN;
2207 break;
2208 #endif
2209
2210 case NODE_CTYPE:
2211 case NODE_CCLASS:
2212 *len = 1;
2213 break;
2214
2215 case NODE_ENCLOSURE:
2216 {
2217 EnclosureNode* en = ENCLOSURE_(node);
2218
2219 switch (en->type) {
2220 case ENCLOSURE_MEMORY:
2221 #ifdef USE_CALL
2222 if (NODE_IS_CLEN_FIXED(node))
2223 *len = en->char_len;
2224 else {
2225 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2226 if (r == 0) {
2227 en->char_len = *len;
2228 NODE_STATUS_ADD(node, CLEN_FIXED);
2229 }
2230 }
2231 break;
2232 #endif
2233 case ENCLOSURE_OPTION:
2234 case ENCLOSURE_STOP_BACKTRACK:
2235 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2236 break;
2237 case ENCLOSURE_IF_ELSE:
2238 {
2239 int clen, elen;
2240
2241 r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);
2242 if (r == 0) {
2243 if (IS_NOT_NULL(en->te.Then)) {
2244 r = get_char_length_tree1(en->te.Then, reg, &tlen, level);
2245 if (r != 0) break;
2246 }
2247 else tlen = 0;
2248 if (IS_NOT_NULL(en->te.Else)) {
2249 r = get_char_length_tree1(en->te.Else, reg, &elen, level);
2250 if (r != 0) break;
2251 }
2252 else elen = 0;
2253
2254 if (clen + tlen != elen) {
2255 r = GET_CHAR_LEN_VARLEN;
2256 }
2257 else {
2258 *len = elen;
2259 }
2260 }
2261 }
2262 break;
2263
2264 default:
2265 break;
2266 }
2267 }
2268 break;
2269
2270 case NODE_ANCHOR:
2271 case NODE_GIMMICK:
2272 break;
2273
2274 case NODE_BACKREF:
2275 if (NODE_IS_CHECKER(node))
2276 break;
2277 /* fall */
2278 default:
2279 r = GET_CHAR_LEN_VARLEN;
2280 break;
2281 }
2282
2283 return r;
2284 }
2285
2286 static int
2287 get_char_length_tree(Node* node, regex_t* reg, int* len)
2288 {
2289 return get_char_length_tree1(node, reg, len, 0);
2290 }
2291
2292 /* x is not included y ==> 1 : 0 */
2293 static int
2294 is_exclusive(Node* x, Node* y, regex_t* reg)
2295 {
2296 int i, len;
2297 OnigCodePoint code;
2298 UChar *p;
2299 NodeType ytype;
2300
2301 retry:
2302 ytype = NODE_TYPE(y);
2303 switch (NODE_TYPE(x)) {
2304 case NODE_CTYPE:
2305 {
2306 if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
2307 CTYPE_(y)->ctype == CTYPE_ANYCHAR)
2308 break;
2309
2310 switch (ytype) {
2311 case NODE_CTYPE:
2312 if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
2313 CTYPE_(y)->not != CTYPE_(x)->not &&
2314 CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
2315 return 1;
2316 else
2317 return 0;
2318 break;
2319
2320 case NODE_CCLASS:
2321 swap:
2322 {
2323 Node* tmp;
2324 tmp = x; x = y; y = tmp;
2325 goto retry;
2326 }
2327 break;
2328
2329 case NODE_STRING:
2330 goto swap;
2331 break;
2332
2333 default:
2334 break;
2335 }
2336 }
2337 break;
2338
2339 case NODE_CCLASS:
2340 {
2341 int range;
2342 CClassNode* xc = CCLASS_(x);
2343
2344 switch (ytype) {
2345 case NODE_CTYPE:
2346 switch (CTYPE_(y)->ctype) {
2347 case CTYPE_ANYCHAR:
2348 return 0;
2349 break;
2350
2351 case ONIGENC_CTYPE_WORD:
2352 if (CTYPE_(y)->not == 0) {
2353 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2354 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2355 for (i = 0; i < range; i++) {
2356 if (BITSET_AT(xc->bs, i)) {
2357 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2358 }
2359 }
2360 return 1;
2361 }
2362 return 0;
2363 }
2364 else {
2365 if (IS_NOT_NULL(xc->mbuf)) return 0;
2366 if (IS_NCCLASS_NOT(xc)) return 0;
2367
2368 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2369 for (i = 0; i < range; i++) {
2370 if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
2371 if (BITSET_AT(xc->bs, i))
2372 return 0;
2373 }
2374 }
2375 for (i = range; i < SINGLE_BYTE_SIZE; i++) {
2376 if (BITSET_AT(xc->bs, i)) return 0;
2377 }
2378 return 1;
2379 }
2380 break;
2381
2382 default:
2383 break;
2384 }
2385 break;
2386
2387 case NODE_CCLASS:
2388 {
2389 int v;
2390 CClassNode* yc = CCLASS_(y);
2391
2392 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2393 v = BITSET_AT(xc->bs, i);
2394 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
2395 v = BITSET_AT(yc->bs, i);
2396 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2397 (v == 0 && IS_NCCLASS_NOT(yc)))
2398 return 0;
2399 }
2400 }
2401 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2402 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2403 return 1;
2404 return 0;
2405 }
2406 break;
2407
2408 case NODE_STRING:
2409 goto swap;
2410 break;
2411
2412 default:
2413 break;
2414 }
2415 }
2416 break;
2417
2418 case NODE_STRING:
2419 {
2420 StrNode* xs = STR_(x);
2421
2422 if (NODE_STRING_LEN(x) == 0)
2423 break;
2424
2425 switch (ytype) {
2426 case NODE_CTYPE:
2427 switch (CTYPE_(y)->ctype) {
2428 case CTYPE_ANYCHAR:
2429 break;
2430
2431 case ONIGENC_CTYPE_WORD:
2432 if (CTYPE_(y)->ascii_mode == 0) {
2433 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2434 return CTYPE_(y)->not;
2435 else
2436 return !(CTYPE_(y)->not);
2437 }
2438 else {
2439 if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
2440 return CTYPE_(y)->not;
2441 else
2442 return !(CTYPE_(y)->not);
2443 }
2444 break;
2445 default:
2446 break;
2447 }
2448 break;
2449
2450 case NODE_CCLASS:
2451 {
2452 CClassNode* cc = CCLASS_(y);
2453
2454 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2455 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2456 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2457 }
2458 break;
2459
2460 case NODE_STRING:
2461 {
2462 UChar *q;
2463 StrNode* ys = STR_(y);
2464
2465 len = NODE_STRING_LEN(x);
2466 if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
2467 if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
2468 /* tiny version */
2469 return 0;
2470 }
2471 else {
2472 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2473 if (*p != *q) return 1;
2474 }
2475 }
2476 }
2477 break;
2478
2479 default:
2480 break;
2481 }
2482 }
2483 break;
2484
2485 default:
2486 break;
2487 }
2488
2489 return 0;
2490 }
2491
2492 static Node*
2493 get_head_value_node(Node* node, int exact, regex_t* reg)
2494 {
2495 Node* n = NULL_NODE;
2496
2497 switch (NODE_TYPE(node)) {
2498 case NODE_BACKREF:
2499 case NODE_ALT:
2500 #ifdef USE_CALL
2501 case NODE_CALL:
2502 #endif
2503 break;
2504
2505 case NODE_CTYPE:
2506 if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
2507 break;
2508 /* fall */
2509 case NODE_CCLASS:
2510 if (exact == 0) {
2511 n = node;
2512 }
2513 break;
2514
2515 case NODE_LIST:
2516 n = get_head_value_node(NODE_CAR(node), exact, reg);
2517 break;
2518
2519 case NODE_STRING:
2520 {
2521 StrNode* sn = STR_(node);
2522
2523 if (sn->end <= sn->s)
2524 break;
2525
2526 if (exact != 0 &&
2527 !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2528 }
2529 else {
2530 n = node;
2531 }
2532 }
2533 break;
2534
2535 case NODE_QUANT:
2536 {
2537 QuantNode* qn = QUANT_(node);
2538 if (qn->lower > 0) {
2539 if (IS_NOT_NULL(qn->head_exact))
2540 n = qn->head_exact;
2541 else
2542 n = get_head_value_node(NODE_BODY(node), exact, reg);
2543 }
2544 }
2545 break;
2546
2547 case NODE_ENCLOSURE:
2548 {
2549 EnclosureNode* en = ENCLOSURE_(node);
2550 switch (en->type) {
2551 case ENCLOSURE_OPTION:
2552 {
2553 OnigOptionType options = reg->options;
2554
2555 reg->options = ENCLOSURE_(node)->o.options;
2556 n = get_head_value_node(NODE_BODY(node), exact, reg);
2557 reg->options = options;
2558 }
2559 break;
2560
2561 case ENCLOSURE_MEMORY:
2562 case ENCLOSURE_STOP_BACKTRACK:
2563 case ENCLOSURE_IF_ELSE:
2564 n = get_head_value_node(NODE_BODY(node), exact, reg);
2565 break;
2566 }
2567 }
2568 break;
2569
2570 case NODE_ANCHOR:
2571 if (ANCHOR_(node)->type == ANCHOR_PREC_READ)
2572 n = get_head_value_node(NODE_BODY(node), exact, reg);
2573 break;
2574
2575 case NODE_GIMMICK:
2576 default:
2577 break;
2578 }
2579
2580 return n;
2581 }
2582
2583 static int
2584 check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
2585 {
2586 NodeType type;
2587 int r = 0;
2588
2589 type = NODE_TYPE(node);
2590 if ((NODE_TYPE2BIT(type) & type_mask) == 0)
2591 return 1;
2592
2593 switch (type) {
2594 case NODE_LIST:
2595 case NODE_ALT:
2596 do {
2597 r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,
2598 anchor_mask);
2599 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2600 break;
2601
2602 case NODE_QUANT:
2603 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2604 break;
2605
2606 case NODE_ENCLOSURE:
2607 {
2608 EnclosureNode* en = ENCLOSURE_(node);
2609 if (((1<<en->type) & enclosure_mask) == 0)
2610 return 1;
2611
2612 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2613 if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {
2614 if (IS_NOT_NULL(en->te.Then)) {
2615 r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);
2616 if (r != 0) break;
2617 }
2618 if (IS_NOT_NULL(en->te.Else)) {
2619 r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);
2620 }
2621 }
2622 }
2623 break;
2624
2625 case NODE_ANCHOR:
2626 type = ANCHOR_(node)->type;
2627 if ((type & anchor_mask) == 0)
2628 return 1;
2629
2630 if (IS_NOT_NULL(NODE_BODY(node)))
2631 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2632 break;
2633
2634 case NODE_GIMMICK:
2635 default:
2636 break;
2637 }
2638 return r;
2639 }
2640
2641 static OnigLen
2642 tree_min_len(Node* node, ScanEnv* env)
2643 {
2644 OnigLen len;
2645 OnigLen tmin;
2646
2647 len = 0;
2648 switch (NODE_TYPE(node)) {
2649 case NODE_BACKREF:
2650 if (! NODE_IS_CHECKER(node)) {
2651 int i;
2652 int* backs;
2653 MemEnv* mem_env = SCANENV_MEMENV(env);
2654 BackRefNode* br = BACKREF_(node);
2655 if (NODE_IS_RECURSION(node)) break;
2656
2657 backs = BACKREFS_P(br);
2658 len = tree_min_len(mem_env[backs[0]].node, env);
2659 for (i = 1; i < br->back_num; i++) {
2660 tmin = tree_min_len(mem_env[backs[i]].node, env);
2661 if (len > tmin) len = tmin;
2662 }
2663 }
2664 break;
2665
2666 #ifdef USE_CALL
2667 case NODE_CALL:
2668 {
2669 Node* t = NODE_BODY(node);
2670 if (NODE_IS_RECURSION(node)) {
2671 if (NODE_IS_MIN_FIXED(t))
2672 len = ENCLOSURE_(t)->min_len;
2673 }
2674 else
2675 len = tree_min_len(t, env);
2676 }
2677 break;
2678 #endif
2679
2680 case NODE_LIST:
2681 do {
2682 tmin = tree_min_len(NODE_CAR(node), env);
2683 len = distance_add(len, tmin);
2684 } while (IS_NOT_NULL(node = NODE_CDR(node)));
2685 break;
2686
2687 case NODE_ALT:
2688 {
2689 Node *x, *y;
2690 y = node;
2691 do {
2692 x = NODE_CAR(y);
2693 tmin = tree_min_len(x, env);
2694 if (y == node) len = tmin;
2695 else if (len > tmin) len = tmin;
2696 } while (IS_NOT_NULL(y = NODE_CDR(y)));
2697 }
2698 break;
2699
2700 case NODE_STRING:
2701 {
2702 StrNode* sn = STR_(node);
2703 len = (int )(sn->end - sn->s);
2704 }
2705 break;
2706
2707 case NODE_CTYPE:
2708 case NODE_CCLASS:
2709 len = ONIGENC_MBC_MINLEN(env->enc);
2710 break;
2711
2712 case NODE_QUANT:
2713 {
2714 QuantNode* qn = QUANT_(node);
2715
2716 if (qn->lower > 0) {
2717 len = tree_min_len(NODE_BODY(node), env);
2718 len = distance_multiply(len, qn->lower);
2719 }
2720 }
2721 break;
2722
2723 case NODE_ENCLOSURE:
2724 {
2725 EnclosureNode* en = ENCLOSURE_(node);
2726 switch (en->type) {
2727 case ENCLOSURE_MEMORY:
2728 if (NODE_IS_MIN_FIXED(node))
2729 len = en->min_len;
2730 else {
2731 if (NODE_IS_MARK1(node))
2732 len = 0; /* recursive */
2733 else {
2734 NODE_STATUS_ADD(node, MARK1);
2735 len = tree_min_len(NODE_BODY(node), env);
2736 NODE_STATUS_REMOVE(node, MARK1);
2737
2738 en->min_len = len;
2739 NODE_STATUS_ADD(node, MIN_FIXED);
2740 }
2741 }
2742 break;
2743
2744 case ENCLOSURE_OPTION:
2745 case ENCLOSURE_STOP_BACKTRACK:
2746 len = tree_min_len(NODE_BODY(node), env);
2747 break;
2748 case ENCLOSURE_IF_ELSE:
2749 {
2750 OnigLen elen;
2751
2752 len = tree_min_len(NODE_BODY(node), env);
2753 if (IS_NOT_NULL(en->te.Then))
2754 len += tree_min_len(en->te.Then, env);
2755 if (IS_NOT_NULL(en->te.Else))
2756 elen = tree_min_len(en->te.Else, env);
2757 else elen = 0;
2758
2759 if (elen < len) len = elen;
2760 }
2761 break;
2762 }
2763 }
2764 break;
2765
2766 case NODE_GIMMICK:
2767 {
2768 GimmickNode* g = GIMMICK_(node);
2769 if (g->type == GIMMICK_FAIL) {
2770 len = INFINITE_LEN;
2771 break;
2772 }
2773 }
2774 /* fall */
2775 case NODE_ANCHOR:
2776 default:
2777 break;
2778 }
2779
2780 return len;
2781 }
2782
2783 static OnigLen
2784 tree_max_len(Node* node, ScanEnv* env)
2785 {
2786 OnigLen len;
2787 OnigLen tmax;
2788
2789 len = 0;
2790 switch (NODE_TYPE(node)) {
2791 case NODE_LIST:
2792 do {
2793 tmax = tree_max_len(NODE_CAR(node), env);
2794 len = distance_add(len, tmax);
2795 } while (IS_NOT_NULL(node = NODE_CDR(node)));
2796 break;
2797
2798 case NODE_ALT:
2799 do {
2800 tmax = tree_max_len(NODE_CAR(node), env);
2801 if (len < tmax) len = tmax;
2802 } while (IS_NOT_NULL(node = NODE_CDR(node)));
2803 break;
2804
2805 case NODE_STRING:
2806 {
2807 StrNode* sn = STR_(node);
2808 len = (OnigLen )(sn->end - sn->s);
2809 }
2810 break;
2811
2812 case NODE_CTYPE:
2813 case NODE_CCLASS:
2814 len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2815 break;
2816
2817 case NODE_BACKREF:
2818 if (! NODE_IS_CHECKER(node)) {
2819 int i;
2820 int* backs;
2821 MemEnv* mem_env = SCANENV_MEMENV(env);
2822 BackRefNode* br = BACKREF_(node);
2823 if (NODE_IS_RECURSION(node)) {
2824 len = INFINITE_LEN;
2825 break;
2826 }
2827 backs = BACKREFS_P(br);
2828 for (i = 0; i < br->back_num; i++) {
2829 tmax = tree_max_len(mem_env[backs[i]].node, env);
2830 if (len < tmax) len = tmax;
2831 }
2832 }
2833 break;
2834
2835 #ifdef USE_CALL
2836 case NODE_CALL:
2837 if (! NODE_IS_RECURSION(node))
2838 len = tree_max_len(NODE_BODY(node), env);
2839 else
2840 len = INFINITE_LEN;
2841 break;
2842 #endif
2843
2844 case NODE_QUANT:
2845 {
2846 QuantNode* qn = QUANT_(node);
2847
2848 if (qn->upper != 0) {
2849 len = tree_max_len(NODE_BODY(node), env);
2850 if (len != 0) {
2851 if (! IS_REPEAT_INFINITE(qn->upper))
2852 len = distance_multiply(len, qn->upper);
2853 else
2854 len = INFINITE_LEN;
2855 }
2856 }
2857 }
2858 break;
2859
2860 case NODE_ENCLOSURE:
2861 {
2862 EnclosureNode* en = ENCLOSURE_(node);
2863 switch (en->type) {
2864 case ENCLOSURE_MEMORY:
2865 if (NODE_IS_MAX_FIXED(node))
2866 len = en->max_len;
2867 else {
2868 if (NODE_IS_MARK1(node))
2869 len = INFINITE_LEN;
2870 else {
2871 NODE_STATUS_ADD(node, MARK1);
2872 len = tree_max_len(NODE_BODY(node), env);
2873 NODE_STATUS_REMOVE(node, MARK1);
2874
2875 en->max_len = len;
2876 NODE_STATUS_ADD(node, MAX_FIXED);
2877 }
2878 }
2879 break;
2880
2881 case ENCLOSURE_OPTION:
2882 case ENCLOSURE_STOP_BACKTRACK:
2883 len = tree_max_len(NODE_BODY(node), env);
2884 break;
2885 case ENCLOSURE_IF_ELSE:
2886 {
2887 OnigLen tlen, elen;
2888
2889 len = tree_max_len(NODE_BODY(node), env);
2890 if (IS_NOT_NULL(en->te.Then)) {
2891 tlen = tree_max_len(en->te.Then, env);
2892 len = distance_add(len, tlen);
2893 }
2894 if (IS_NOT_NULL(en->te.Else))
2895 elen = tree_max_len(en->te.Else, env);
2896 else elen = 0;
2897
2898 if (elen > len) len = elen;
2899 }
2900 break;
2901 }
2902 }
2903 break;
2904
2905 case NODE_ANCHOR:
2906 case NODE_GIMMICK:
2907 default:
2908 break;
2909 }
2910
2911 return len;
2912 }
2913
2914 static int
2915 check_backrefs(Node* node, ScanEnv* env)
2916 {
2917 int r;
2918
2919 switch (NODE_TYPE(node)) {
2920 case NODE_LIST:
2921 case NODE_ALT:
2922 do {
2923 r = check_backrefs(NODE_CAR(node), env);
2924 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2925 break;
2926
2927 case NODE_ANCHOR:
2928 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
2929 r = 0;
2930 break;
2931 }
2932 /* fall */
2933 case NODE_QUANT:
2934 r = check_backrefs(NODE_BODY(node), env);
2935 break;
2936
2937 case NODE_ENCLOSURE:
2938 r = check_backrefs(NODE_BODY(node), env);
2939 {
2940 EnclosureNode* en = ENCLOSURE_(node);
2941
2942 if (en->type == ENCLOSURE_IF_ELSE) {
2943 if (r != 0) return r;
2944 if (IS_NOT_NULL(en->te.Then)) {
2945 r = check_backrefs(en->te.Then, env);
2946 if (r != 0) return r;
2947 }
2948 if (IS_NOT_NULL(en->te.Else)) {
2949 r = check_backrefs(en->te.Else, env);
2950 }
2951 }
2952 }
2953 break;
2954
2955 case NODE_BACKREF:
2956 {
2957 int i;
2958 BackRefNode* br = BACKREF_(node);
2959 int* backs = BACKREFS_P(br);
2960 MemEnv* mem_env = SCANENV_MEMENV(env);
2961
2962 for (i = 0; i < br->back_num; i++) {
2963 if (backs[i] > env->num_mem)
2964 return ONIGERR_INVALID_BACKREF;
2965
2966 NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
2967 }
2968 r = 0;
2969 }
2970 break;
2971
2972 default:
2973 r = 0;
2974 break;
2975 }
2976
2977 return r;
2978 }
2979
2980
2981 #ifdef USE_CALL
2982
2983 #define RECURSION_EXIST (1<<0)
2984 #define RECURSION_MUST (1<<1)
2985 #define RECURSION_INFINITE (1<<2)
2986
2987 static int
2988 infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
2989 {
2990 int ret;
2991 int r = 0;
2992
2993 switch (NODE_TYPE(node)) {
2994 case NODE_LIST:
2995 {
2996 Node *x;
2997 OnigLen min;
2998
2999 x = node;
3000 do {
3001 ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
3002 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3003 r |= ret;
3004 if (head != 0) {
3005 min = tree_min_len(NODE_CAR(x), env);
3006 if (min != 0) head = 0;
3007 }
3008 } while (IS_NOT_NULL(x = NODE_CDR(x)));
3009 }
3010 break;
3011
3012 case NODE_ALT:
3013 {
3014 int must;
3015
3016 must = RECURSION_MUST;
3017 do {
3018 ret = infinite_recursive_call_check(NODE_CAR(node), env, head);
3019 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3020
3021 r |= (ret & RECURSION_EXIST);
3022 must &= ret;
3023 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3024 r |= must;
3025 }
3026 break;
3027
3028 case NODE_QUANT:
3029 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3030 if (r < 0) return r;
3031 if ((r & RECURSION_MUST) != 0) {
3032 if (QUANT_(node)->lower == 0)
3033 r &= ~RECURSION_MUST;
3034 }
3035 break;
3036
3037 case NODE_ANCHOR:
3038 if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
3039 break;
3040 /* fall */
3041 case NODE_CALL:
3042 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3043 break;
3044
3045 case NODE_ENCLOSURE:
3046 {
3047 EnclosureNode* en = ENCLOSURE_(node);
3048
3049 if (en->type == ENCLOSURE_MEMORY) {
3050 if (NODE_IS_MARK2(node))
3051 return 0;
3052 else if (NODE_IS_MARK1(node))
3053 return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
3054 : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
3055 else {
3056 NODE_STATUS_ADD(node, MARK2);
3057 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3058 NODE_STATUS_REMOVE(node, MARK2);
3059 }
3060 }
3061 else if (en->type == ENCLOSURE_IF_ELSE) {
3062 int eret;
3063
3064 ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
3065 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3066 r |= ret;
3067 if (IS_NOT_NULL(en->te.Then)) {
3068 OnigLen min;
3069 if (head != 0) {
3070 min = tree_min_len(NODE_BODY(node), env);
3071 }
3072 else min = 0;
3073
3074 ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
3075 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3076 r |= ret;
3077 }
3078 if (IS_NOT_NULL(en->te.Else)) {
3079 eret = infinite_recursive_call_check(en->te.Else, env, head);
3080 if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
3081 r |= (eret & RECURSION_EXIST);
3082 if ((eret & RECURSION_MUST) == 0)
3083 r &= ~RECURSION_MUST;
3084 }
3085 }
3086 else {
3087 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3088 }
3089 }
3090 break;
3091
3092 default:
3093 break;
3094 }
3095
3096 return r;
3097 }
3098
3099 static int
3100 infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
3101 {
3102 int r;
3103
3104 switch (NODE_TYPE(node)) {
3105 case NODE_LIST:
3106 case NODE_ALT:
3107 do {
3108 r = infinite_recursive_call_check_trav(NODE_CAR(node), env);
3109 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3110 break;
3111
3112 case NODE_ANCHOR:
3113 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3114 r = 0;
3115 break;
3116 }
3117 /* fall */
3118 case NODE_QUANT:
3119 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3120 break;
3121
3122 case NODE_ENCLOSURE:
3123 {
3124 EnclosureNode* en = ENCLOSURE_(node);
3125
3126 if (en->type == ENCLOSURE_MEMORY) {
3127 if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
3128 int ret;
3129
3130 NODE_STATUS_ADD(node, MARK1);
3131
3132 ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);
3133 if (ret < 0) return ret;
3134 else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
3135 return ONIGERR_NEVER_ENDING_RECURSION;
3136
3137 NODE_STATUS_REMOVE(node, MARK1);
3138 }
3139 }
3140 else if (en->type == ENCLOSURE_IF_ELSE) {
3141 if (IS_NOT_NULL(en->te.Then)) {
3142 r = infinite_recursive_call_check_trav(en->te.Then, env);
3143 if (r != 0) return r;
3144 }
3145 if (IS_NOT_NULL(en->te.Else)) {
3146 r = infinite_recursive_call_check_trav(en->te.Else, env);
3147 if (r != 0) return r;
3148 }
3149 }
3150 }
3151
3152 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3153 break;
3154
3155 default:
3156 r = 0;
3157 break;
3158 }
3159
3160 return r;
3161 }
3162
3163 static int
3164 recursive_call_check(Node* node)
3165 {
3166 int r;
3167
3168 switch (NODE_TYPE(node)) {
3169 case NODE_LIST:
3170 case NODE_ALT:
3171 r = 0;
3172 do {
3173 r |= recursive_call_check(NODE_CAR(node));
3174 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3175 break;
3176
3177 case NODE_ANCHOR:
3178 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3179 r = 0;
3180 break;
3181 }
3182 /* fall */
3183 case NODE_QUANT:
3184 r = recursive_call_check(NODE_BODY(node));
3185 break;
3186
3187 case NODE_CALL:
3188 r = recursive_call_check(NODE_BODY(node));
3189 if (r != 0) {
3190 if (NODE_IS_MARK1(NODE_BODY(node)))
3191 NODE_STATUS_ADD(node, RECURSION);
3192 }
3193 break;
3194
3195 case NODE_ENCLOSURE:
3196 {
3197 EnclosureNode* en = ENCLOSURE_(node);
3198
3199 if (en->type == ENCLOSURE_MEMORY) {
3200 if (NODE_IS_MARK2(node))
3201 return 0;
3202 else if (NODE_IS_MARK1(node))
3203 return 1; /* recursion */
3204 else {
3205 NODE_STATUS_ADD(node, MARK2);
3206 r = recursive_call_check(NODE_BODY(node));
3207 NODE_STATUS_REMOVE(node, MARK2);
3208 }
3209 }
3210 else if (en->type == ENCLOSURE_IF_ELSE) {
3211 r = 0;
3212 if (IS_NOT_NULL(en->te.Then)) {
3213 r |= recursive_call_check(en->te.Then);
3214 }
3215 if (IS_NOT_NULL(en->te.Else)) {
3216 r |= recursive_call_check(en->te.Else);
3217 }
3218 r |= recursive_call_check(NODE_BODY(node));
3219 }
3220 else {
3221 r = recursive_call_check(NODE_BODY(node));
3222 }
3223 }
3224 break;
3225
3226 default:
3227 r = 0;
3228 break;
3229 }
3230
3231 return r;
3232 }
3233
3234 #define IN_RECURSION (1<<0)
3235 #define FOUND_CALLED_NODE 1
3236
3237 static int
3238 recursive_call_check_trav(Node* node, ScanEnv* env, int state)
3239 {
3240 int r = 0;
3241
3242 switch (NODE_TYPE(node)) {
3243 case NODE_LIST:
3244 case NODE_ALT:
3245 {
3246 int ret;
3247 do {
3248 ret = recursive_call_check_trav(NODE_CAR(node), env, state);
3249 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3250 else if (ret < 0) return ret;
3251 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3252 }
3253 break;
3254
3255 case NODE_QUANT:
3256 r = recursive_call_check_trav(NODE_BODY(node), env, state);
3257 if (QUANT_(node)->upper == 0) {
3258 if (r == FOUND_CALLED_NODE)
3259 QUANT_(node)->is_refered = 1;
3260 }
3261 break;
3262
3263 case NODE_ANCHOR:
3264 {
3265 AnchorNode* an = ANCHOR_(node);
3266 if (ANCHOR_HAS_BODY(an))
3267 r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);
3268 }
3269 break;
3270
3271 case NODE_ENCLOSURE:
3272 {
3273 int ret;
3274 int state1;
3275 EnclosureNode* en = ENCLOSURE_(node);
3276
3277 if (en->type == ENCLOSURE_MEMORY) {
3278 if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
3279 if (! NODE_IS_RECURSION(node)) {
3280 NODE_STATUS_ADD(node, MARK1);
3281 r = recursive_call_check(NODE_BODY(node));
3282 if (r != 0)
3283 NODE_STATUS_ADD(node, RECURSION);
3284 NODE_STATUS_REMOVE(node, MARK1);
3285 }
3286
3287 if (NODE_IS_CALLED(node))
3288 r = FOUND_CALLED_NODE;
3289 }
3290 }
3291
3292 state1 = state;
3293 if (NODE_IS_RECURSION(node))
3294 state1 |= IN_RECURSION;
3295
3296 ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
3297 if (ret == FOUND_CALLED_NODE)
3298 r = FOUND_CALLED_NODE;
3299
3300 if (en->type == ENCLOSURE_IF_ELSE) {
3301 if (IS_NOT_NULL(en->te.Then)) {
3302 ret = recursive_call_check_trav(en->te.Then, env, state1);
3303 if (ret == FOUND_CALLED_NODE)
3304 r = FOUND_CALLED_NODE;
3305 }
3306 if (IS_NOT_NULL(en->te.Else)) {
3307 ret = recursive_call_check_trav(en->te.Else, env, state1);
3308 if (ret == FOUND_CALLED_NODE)
3309 r = FOUND_CALLED_NODE;
3310 }
3311 }
3312 }
3313 break;
3314
3315 default:
3316 break;
3317 }
3318
3319 return r;
3320 }
3321
3322 #endif
3323
3324 /* divide different length alternatives in look-behind.
3325 (?<=A|B) ==> (?<=A)|(?<=B)
3326 (?<!A|B) ==> (?<!A)(?<!B)
3327 */
3328 static int
3329 divide_look_behind_alternatives(Node* node)
3330 {
3331 Node *head, *np, *insert_node;
3332 AnchorNode* an = ANCHOR_(node);
3333 int anc_type = an->type;
3334
3335 head = NODE_ANCHOR_BODY(an);
3336 np = NODE_CAR(head);
3337 swap_node(node, head);
3338 NODE_CAR(node) = head;
3339 NODE_BODY(head) = np;
3340
3341 np = node;
3342 while (IS_NOT_NULL(np = NODE_CDR(np))) {
3343 insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);
3344 CHECK_NULL_RETURN_MEMERR(insert_node);
3345 NODE_BODY(insert_node) = NODE_CAR(np);
3346 NODE_CAR(np) = insert_node;
3347 }
3348
3349 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3350 np = node;
3351 do {
3352 NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */
3353 } while (IS_NOT_NULL(np = NODE_CDR(np)));
3354 }
3355 return 0;
3356 }
3357
3358 static int
3359 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3360 {
3361 int r, len;
3362 AnchorNode* an = ANCHOR_(node);
3363
3364 r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);
3365 if (r == 0)
3366 an->char_len = len;
3367 else if (r == GET_CHAR_LEN_VARLEN)
3368 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3369 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3370 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3371 r = divide_look_behind_alternatives(node);
3372 else
3373 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3374 }
3375
3376 return r;
3377 }
3378
3379 static int
3380 next_setup(Node* node, Node* next_node, regex_t* reg)
3381 {
3382 NodeType type;
3383
3384 retry:
3385 type = NODE_TYPE(node);
3386 if (type == NODE_QUANT) {
3387 QuantNode* qn = QUANT_(node);
3388 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3389 #ifdef USE_QUANT_PEEK_NEXT
3390 Node* n = get_head_value_node(next_node, 1, reg);
3391 /* '\0': for UTF-16BE etc... */
3392 if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
3393 qn->next_head_exact = n;
3394 }
3395 #endif
3396 /* automatic posseivation a*b ==> (?>a*)b */
3397 if (qn->lower <= 1) {
3398 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
3399 Node *x, *y;
3400 x = get_head_value_node(NODE_BODY(node), 0, reg);
3401 if (IS_NOT_NULL(x)) {
3402 y = get_head_value_node(next_node, 0, reg);
3403 if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
3404 Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
3405 CHECK_NULL_RETURN_MEMERR(en);
3406 NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
3407 swap_node(node, en);
3408 NODE_BODY(node) = en;
3409 }
3410 }
3411 }
3412 }
3413 }
3414 }
3415 else if (type == NODE_ENCLOSURE) {
3416 EnclosureNode* en = ENCLOSURE_(node);
3417 if (en->type == ENCLOSURE_MEMORY) {
3418 node = NODE_BODY(node);
3419 goto retry;
3420 }
3421 }
3422 return 0;
3423 }
3424
3425
3426 static int
3427 update_string_node_case_fold(regex_t* reg, Node *node)
3428 {
3429 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3430 UChar *sbuf, *ebuf, *sp;
3431 int r, i, len, sbuf_size;
3432 StrNode* sn = STR_(node);
3433
3434 end = sn->end;
3435 sbuf_size = (int )(end - sn->s) * 2;
3436 sbuf = (UChar* )xmalloc(sbuf_size);
3437 CHECK_NULL_RETURN_MEMERR(sbuf);
3438 ebuf = sbuf + sbuf_size;
3439
3440 sp = sbuf;
3441 p = sn->s;
3442 while (p < end) {
3443 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3444 for (i = 0; i < len; i++) {
3445 if (sp >= ebuf) {
3446 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3447 CHECK_NULL_RETURN_MEMERR(sbuf);
3448 sp = sbuf + sbuf_size;
3449 sbuf_size *= 2;
3450 ebuf = sbuf + sbuf_size;
3451 }
3452
3453 *sp++ = buf[i];
3454 }
3455 }
3456
3457 r = onig_node_str_set(node, sbuf, sp);
3458 if (r != 0) {
3459 xfree(sbuf);
3460 return r;
3461 }
3462
3463 xfree(sbuf);
3464 return 0;
3465 }
3466
3467 static int
3468 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
3469 {
3470 int r;
3471 Node *node;
3472
3473 node = onig_node_new_str(s, end);
3474 if (IS_NULL(node)) return ONIGERR_MEMORY;
3475
3476 r = update_string_node_case_fold(reg, node);
3477 if (r != 0) {
3478 onig_node_free(node);
3479 return r;
3480 }
3481
3482 NODE_STRING_SET_AMBIG(node);
3483 NODE_STRING_SET_DONT_GET_OPT_INFO(node);
3484 *rnode = node;
3485 return 0;
3486 }
3487
3488 static int
3489 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
3490 int slen, UChar *end, regex_t* reg, Node **rnode)
3491 {
3492 int r, i, j;
3493 int len;
3494 int varlen;
3495 Node *anode, *var_anode, *snode, *xnode, *an;
3496 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3497
3498 *rnode = var_anode = NULL_NODE;
3499
3500 varlen = 0;
3501 for (i = 0; i < item_num; i++) {
3502 if (items[i].byte_len != slen) {
3503 varlen = 1;
3504 break;
3505 }
3506 }
3507
3508 if (varlen != 0) {
3509 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3510 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3511
3512 xnode = onig_node_new_list(NULL, NULL);
3513 if (IS_NULL(xnode)) goto mem_err;
3514 NODE_CAR(var_anode) = xnode;
3515
3516 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517 if (IS_NULL(anode)) goto mem_err;
3518 NODE_CAR(xnode) = anode;
3519 }
3520 else {
3521 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3522 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3523 }
3524
3525 snode = onig_node_new_str(p, p + slen);
3526 if (IS_NULL(snode)) goto mem_err;
3527
3528 NODE_CAR(anode) = snode;
3529
3530 for (i = 0; i < item_num; i++) {
3531 snode = onig_node_new_str(NULL, NULL);
3532 if (IS_NULL(snode)) goto mem_err;
3533
3534 for (j = 0; j < items[i].code_len; j++) {
3535 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3536 if (len < 0) {
3537 r = len;
3538 goto mem_err2;
3539 }
3540
3541 r = onig_node_str_cat(snode, buf, buf + len);
3542 if (r != 0) goto mem_err2;
3543 }
3544
3545 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3546 if (IS_NULL(an)) {
3547 goto mem_err2;
3548 }
3549
3550 if (items[i].byte_len != slen) {
3551 Node *rem;
3552 UChar *q = p + items[i].byte_len;
3553
3554 if (q < end) {
3555 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3556 if (r != 0) {
3557 onig_node_free(an);
3558 goto mem_err2;
3559 }
3560
3561 xnode = onig_node_list_add(NULL_NODE, snode);
3562 if (IS_NULL(xnode)) {
3563 onig_node_free(an);
3564 onig_node_free(rem);
3565 goto mem_err2;
3566 }
3567 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3568 onig_node_free(an);
3569 onig_node_free(xnode);
3570 onig_node_free(rem);
3571 goto mem_err;
3572 }
3573
3574 NODE_CAR(an) = xnode;
3575 }
3576 else {
3577 NODE_CAR(an) = snode;
3578 }
3579
3580 NODE_CDR(var_anode) = an;
3581 var_anode = an;
3582 }
3583 else {
3584 NODE_CAR(an) = snode;
3585 NODE_CDR(anode) = an;
3586 anode = an;
3587 }
3588 }
3589
3590 return varlen;
3591
3592 mem_err2:
3593 onig_node_free(snode);
3594
3595 mem_err:
3596 onig_node_free(*rnode);
3597
3598 return ONIGERR_MEMORY;
3599 }
3600
3601 static int
3602 expand_case_fold_string(Node* node, regex_t* reg)
3603 {
3604 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3605
3606 int r, n, len, alt_num;
3607 UChar *start, *end, *p;
3608 Node *top_root, *root, *snode, *prev_node;
3609 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3610 StrNode* sn = STR_(node);
3611
3612 if (NODE_STRING_IS_AMBIG(node)) return 0;
3613
3614 start = sn->s;
3615 end = sn->end;
3616 if (start >= end) return 0;
3617
3618 r = 0;
3619 top_root = root = prev_node = snode = NULL_NODE;
3620 alt_num = 1;
3621 p = start;
3622 while (p < end) {
3623 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,
3624 items);
3625 if (n < 0) {
3626 r = n;
3627 goto err;
3628 }
3629
3630 len = enclen(reg->enc, p);
3631
3632 if (n == 0) {
3633 if (IS_NULL(snode)) {
3634 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3635 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3636 if (IS_NULL(root)) {
3637 onig_node_free(prev_node);
3638 goto mem_err;
3639 }
3640 }
3641
3642 prev_node = snode = onig_node_new_str(NULL, NULL);
3643 if (IS_NULL(snode)) goto mem_err;
3644 if (IS_NOT_NULL(root)) {
3645 if (IS_NULL(onig_node_list_add(root, snode))) {
3646 onig_node_free(snode);
3647 goto mem_err;
3648 }
3649 }
3650 }
3651
3652 r = onig_node_str_cat(snode, p, p + len);
3653 if (r != 0) goto err;
3654 }
3655 else {
3656 alt_num *= (n + 1);
3657 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3658
3659 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3660 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3661 if (IS_NULL(root)) {
3662 onig_node_free(prev_node);
3663 goto mem_err;
3664 }
3665 }
3666
3667 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3668 if (r < 0) goto mem_err;
3669 if (r == 1) {
3670 if (IS_NULL(root)) {
3671 top_root = prev_node;
3672 }
3673 else {
3674 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3675 onig_node_free(prev_node);
3676 goto mem_err;
3677 }
3678 }
3679
3680 root = NODE_CAR(prev_node);
3681 }
3682 else { /* r == 0 */
3683 if (IS_NOT_NULL(root)) {
3684 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3685 onig_node_free(prev_node);
3686 goto mem_err;
3687 }
3688 }
3689 }
3690
3691 snode = NULL_NODE;
3692 }
3693
3694 p += len;
3695 }
3696
3697 if (p < end) {
3698 Node *srem;
3699
3700 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3701 if (r != 0) goto mem_err;
3702
3703 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3704 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3705 if (IS_NULL(root)) {
3706 onig_node_free(srem);
3707 onig_node_free(prev_node);
3708 goto mem_err;
3709 }
3710 }
3711
3712 if (IS_NULL(root)) {
3713 prev_node = srem;
3714 }
3715 else {
3716 if (IS_NULL(onig_node_list_add(root, srem))) {
3717 onig_node_free(srem);
3718 goto mem_err;
3719 }
3720 }
3721 }
3722
3723 /* ending */
3724 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3725 swap_node(node, top_root);
3726 onig_node_free(top_root);
3727 return 0;
3728
3729 mem_err:
3730 r = ONIGERR_MEMORY;
3731
3732 err:
3733 onig_node_free(top_root);
3734 return r;
3735 }
3736
3737 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3738 static enum QuantBodyEmpty
3739 quantifiers_memory_node_info(Node* node)
3740 {
3741 int r = QUANT_BODY_IS_EMPTY;
3742
3743 switch (NODE_TYPE(node)) {
3744 case NODE_LIST:
3745 case NODE_ALT:
3746 {
3747 int v;
3748 do {
3749 v = quantifiers_memory_node_info(NODE_CAR(node));
3750 if (v > r) r = v;
3751 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3752 }
3753 break;
3754
3755 #ifdef USE_CALL
3756 case NODE_CALL:
3757 if (NODE_IS_RECURSION(node)) {
3758 return QUANT_BODY_IS_EMPTY_REC; /* tiny version */
3759 }
3760 else
3761 r = quantifiers_memory_node_info(NODE_BODY(node));
3762 break;
3763 #endif
3764
3765 case NODE_QUANT:
3766 {
3767 QuantNode* qn = QUANT_(node);
3768 if (qn->upper != 0) {
3769 r = quantifiers_memory_node_info(NODE_BODY(node));
3770 }
3771 }
3772 break;
3773
3774 case NODE_ENCLOSURE:
3775 {
3776 EnclosureNode* en = ENCLOSURE_(node);
3777 switch (en->type) {
3778 case ENCLOSURE_MEMORY:
3779 if (NODE_IS_RECURSION(node)) {
3780 return QUANT_BODY_IS_EMPTY_REC;
3781 }
3782 return QUANT_BODY_IS_EMPTY_MEM;
3783 break;
3784
3785 case ENCLOSURE_OPTION:
3786 case ENCLOSURE_STOP_BACKTRACK:
3787 r = quantifiers_memory_node_info(NODE_BODY(node));
3788 break;
3789 case ENCLOSURE_IF_ELSE:
3790 {
3791 int v;
3792 r = quantifiers_memory_node_info(NODE_BODY(node));
3793 if (IS_NOT_NULL(en->te.Then)) {
3794 v = quantifiers_memory_node_info(en->te.Then);
3795 if (v > r) r = v;
3796 }
3797 if (IS_NOT_NULL(en->te.Else)) {
3798 v = quantifiers_memory_node_info(en->te.Else);
3799 if (v > r) r = v;
3800 }
3801 }
3802 break;
3803 default:
3804 break;
3805 }
3806 }
3807 break;
3808
3809 case NODE_BACKREF:
3810 case NODE_STRING:
3811 case NODE_CTYPE:
3812 case NODE_CCLASS:
3813 case NODE_ANCHOR:
3814 case NODE_GIMMICK:
3815 default:
3816 break;
3817 }
3818
3819 return r;
3820 }
3821 #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
3822
3823
3824 #define IN_ALT (1<<0)
3825 #define IN_NOT (1<<1)
3826 #define IN_REAL_REPEAT (1<<2)
3827 #define IN_VAR_REPEAT (1<<3)
3828 #define IN_ZERO_REPEAT (1<<4)
3829 #define IN_MULTI_ENTRY (1<<5)
3830
3831 #ifdef USE_CALL
3832
3833 #ifdef __GNUC__
3834 __inline
3835 #endif
3836 static int
3837 setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
3838 {
3839 MemEnv* mem_env = SCANENV_MEMENV(env);
3840
3841 if (cn->by_number != 0) {
3842 int gnum = cn->group_num;
3843
3844 if (env->num_named > 0 &&
3845 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3846 ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
3847 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3848 }
3849
3850 if (gnum > env->num_mem) {
3851 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
3852 cn->name, cn->name_end);
3853 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3854 }
3855
3856 set_call_attr:
3857 NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
3858 if (IS_NULL(NODE_CALL_BODY(cn))) {
3859 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
3860 cn->name, cn->name_end);
3861 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3862 }
3863 }
3864 else {
3865 int *refs;
3866
3867 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
3868 if (n <= 0) {
3869 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
3870 cn->name, cn->name_end);
3871 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3872 }
3873 else if (n > 1) {
3874 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
3875 cn->name, cn->name_end);
3876 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3877 }
3878 else {
3879 cn->group_num = refs[0];
3880 goto set_call_attr;
3881 }
3882 }
3883
3884 return 0;
3885 }
3886
3887 static void
3888 setup_call2_call(Node* node)
3889 {
3890 switch (NODE_TYPE(node)) {
3891 case NODE_LIST:
3892 case NODE_ALT:
3893 do {
3894 setup_call2_call(NODE_CAR(node));
3895 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3896 break;
3897
3898 case NODE_QUANT:
3899 setup_call2_call(NODE_BODY(node));
3900 break;
3901
3902 case NODE_ANCHOR:
3903 if (ANCHOR_HAS_BODY(ANCHOR_(node)))
3904 setup_call2_call(NODE_BODY(node));
3905 break;
3906
3907 case NODE_ENCLOSURE:
3908 {
3909 EnclosureNode* en = ENCLOSURE_(node);
3910
3911 if (en->type == ENCLOSURE_MEMORY) {
3912 if (! NODE_IS_MARK1(node)) {
3913 NODE_STATUS_ADD(node, MARK1);
3914 setup_call2_call(NODE_BODY(node));
3915 NODE_STATUS_REMOVE(node, MARK1);
3916 }
3917 }
3918 else if (en->type == ENCLOSURE_IF_ELSE) {
3919 setup_call2_call(NODE_BODY(node));
3920 if (IS_NOT_NULL(en->te.Then))
3921 setup_call2_call(en->te.Then);
3922 if (IS_NOT_NULL(en->te.Else))
3923 setup_call2_call(en->te.Else);
3924 }
3925 else {
3926 setup_call2_call(NODE_BODY(node));
3927 }
3928 }
3929 break;
3930
3931 case NODE_CALL:
3932 if (! NODE_IS_MARK1(node)) {
3933 NODE_STATUS_ADD(node, MARK1);
3934 {
3935 CallNode* cn = CALL_(node);
3936 Node* called = NODE_CALL_BODY(cn);
3937
3938 cn->entry_count++;
3939
3940 NODE_STATUS_ADD(called, CALLED);
3941 ENCLOSURE_(called)->m.entry_count++;
3942 setup_call2_call(called);
3943 }
3944 NODE_STATUS_REMOVE(node, MARK1);
3945 }
3946 break;
3947
3948 default:
3949 break;
3950 }
3951 }
3952
3953 static int
3954 setup_call(Node* node, ScanEnv* env, int state)
3955 {
3956 int r;
3957
3958 switch (NODE_TYPE(node)) {
3959 case NODE_LIST:
3960 case NODE_ALT:
3961 do {
3962 r = setup_call(NODE_CAR(node), env, state);
3963 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3964 break;
3965
3966 case NODE_QUANT:
3967 if (QUANT_(node)->upper == 0)
3968 state |= IN_ZERO_REPEAT;
3969
3970 r = setup_call(NODE_BODY(node), env, state);
3971 break;
3972
3973 case NODE_ANCHOR:
3974 if (ANCHOR_HAS_BODY(ANCHOR_(node)))
3975 r = setup_call(NODE_BODY(node), env, state);
3976 else
3977 r = 0;
3978 break;
3979
3980 case NODE_ENCLOSURE:
3981 {
3982 EnclosureNode* en = ENCLOSURE_(node);
3983
3984 if (en->type == ENCLOSURE_MEMORY) {
3985 if ((state & IN_ZERO_REPEAT) != 0) {
3986 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
3987 ENCLOSURE_(node)->m.entry_count--;
3988 }
3989 r = setup_call(NODE_BODY(node), env, state);
3990 }
3991 else if (en->type == ENCLOSURE_IF_ELSE) {
3992 r = setup_call(NODE_BODY(node), env, state);
3993 if (r != 0) return r;
3994 if (IS_NOT_NULL(en->te.Then)) {
3995 r = setup_call(en->te.Then, env, state);
3996 if (r != 0) return r;
3997 }
3998 if (IS_NOT_NULL(en->te.Else))
3999 r = setup_call(en->te.Else, env, state);
4000 }
4001 else
4002 r = setup_call(NODE_BODY(node), env, state);
4003 }
4004 break;
4005
4006 case NODE_CALL:
4007 if ((state & IN_ZERO_REPEAT) != 0) {
4008 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
4009 CALL_(node)->entry_count--;
4010 }
4011
4012 r = setup_call_node_call(CALL_(node), env, state);
4013 break;
4014
4015 default:
4016 r = 0;
4017 break;
4018 }
4019
4020 return r;
4021 }
4022
4023 static int
4024 setup_call2(Node* node)
4025 {
4026 int r = 0;
4027
4028 switch (NODE_TYPE(node)) {
4029 case NODE_LIST:
4030 case NODE_ALT:
4031 do {
4032 r = setup_call2(NODE_CAR(node));
4033 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4034 break;
4035
4036 case NODE_QUANT:
4037 if (QUANT_(node)->upper != 0)
4038 r = setup_call2(NODE_BODY(node));
4039 break;
4040
4041 case NODE_ANCHOR:
4042 if (ANCHOR_HAS_BODY(ANCHOR_(node)))
4043 r = setup_call2(NODE_BODY(node));
4044 break;
4045
4046 case NODE_ENCLOSURE:
4047 if (! NODE_IS_IN_ZERO_REPEAT(node))
4048 r = setup_call2(NODE_BODY(node));
4049
4050 {
4051 EnclosureNode* en = ENCLOSURE_(node);
4052
4053 if (r != 0) return r;
4054 if (en->type == ENCLOSURE_IF_ELSE) {
4055 if (IS_NOT_NULL(en->te.Then)) {
4056 r = setup_call2(en->te.Then);
4057 if (r != 0) return r;
4058 }
4059 if (IS_NOT_NULL(en->te.Else))
4060 r = setup_call2(en->te.Else);
4061 }
4062 }
4063 break;
4064
4065 case NODE_CALL:
4066 if (! NODE_IS_IN_ZERO_REPEAT(node)) {
4067 setup_call2_call(node);
4068 }
4069 break;
4070
4071 default:
4072 break;
4073 }
4074
4075 return r;
4076 }
4077
4078
4079 static void
4080 setup_called_state_call(Node* node, int state)
4081 {
4082 switch (NODE_TYPE(node)) {
4083 case NODE_ALT:
4084 state |= IN_ALT;
4085 /* fall */
4086 case NODE_LIST:
4087 do {
4088 setup_called_state_call(NODE_CAR(node), state);
4089 } while (IS_NOT_NULL(node = NODE_CDR(node)));
4090 break;
4091
4092 case NODE_QUANT:
4093 {
4094 QuantNode* qn = QUANT_(node);
4095
4096 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4097 state |= IN_REAL_REPEAT;
4098 if (qn->lower != qn->upper)
4099 state |= IN_VAR_REPEAT;
4100
4101 setup_called_state_call(NODE_QUANT_BODY(qn), state);
4102 }
4103 break;
4104
4105 case NODE_ANCHOR:
4106 {
4107 AnchorNode* an = ANCHOR_(node);
4108
4109 switch (an->type) {
4110 case ANCHOR_PREC_READ_NOT:
4111 case ANCHOR_LOOK_BEHIND_NOT:
4112 state |= IN_NOT;
4113 /* fall */
4114 case ANCHOR_PREC_READ:
4115 case ANCHOR_LOOK_BEHIND:
4116 setup_called_state_call(NODE_ANCHOR_BODY(an), state);
4117 break;
4118 default:
4119 break;
4120 }
4121 }
4122 break;
4123
4124 case NODE_ENCLOSURE:
4125 {
4126 EnclosureNode* en = ENCLOSURE_(node);
4127
4128 if (en->type == ENCLOSURE_MEMORY) {
4129 if (NODE_IS_MARK1(node)) {
4130 if ((~en->m.called_state & state) != 0) {
4131 en->m.called_state |= state;
4132 setup_called_state_call(NODE_BODY(node), state);
4133 }
4134 }
4135 else {
4136 NODE_STATUS_ADD(node, MARK1);
4137 en->m.called_state |= state;
4138 setup_called_state_call(NODE_BODY(node), state);
4139 NODE_STATUS_REMOVE(node, MARK1);
4140 }
4141 }
4142 else if (en->type == ENCLOSURE_IF_ELSE) {
4143 if (IS_NOT_NULL(en->te.Then)) {
4144 setup_called_state_call(en->te.Then, state);
4145 }
4146 if (IS_NOT_NULL(en->te.Else))
4147 setup_called_state_call(en->te.Else, state);
4148 }
4149 else {
4150 setup_called_state_call(NODE_BODY(node), state);
4151 }
4152 }
4153 break;
4154
4155 case NODE_CALL:
4156 setup_called_state_call(NODE_BODY(node), state);
4157 break;
4158
4159 default:
4160 break;
4161 }
4162 }
4163
4164 static void
4165 setup_called_state(Node* node, int state)
4166 {
4167 switch (NODE_TYPE(node)) {
4168 case NODE_ALT:
4169 state |= IN_ALT;
4170 /* fall */
4171 case NODE_LIST:
4172 do {
4173 setup_called_state(NODE_CAR(node), state);
4174 } while (IS_NOT_NULL(node = NODE_CDR(node)));
4175 break;
4176
4177 #ifdef USE_CALL
4178 case NODE_CALL:
4179 setup_called_state_call(node, state);
4180 break;
4181 #endif
4182
4183 case NODE_ENCLOSURE:
4184 {
4185 EnclosureNode* en = ENCLOSURE_(node);
4186
4187 switch (en->type) {
4188 case ENCLOSURE_MEMORY:
4189 if (en->m.entry_count > 1)
4190 state |= IN_MULTI_ENTRY;
4191
4192 en->m.called_state |= state;
4193 /* fall */
4194 case ENCLOSURE_OPTION:
4195 case ENCLOSURE_STOP_BACKTRACK:
4196 setup_called_state(NODE_BODY(node), state);
4197 break;
4198 case ENCLOSURE_IF_ELSE:
4199 setup_called_state(NODE_BODY(node), state);
4200 if (IS_NOT_NULL(en->te.Then))
4201 setup_called_state(en->te.Then, state);
4202 if (IS_NOT_NULL(en->te.Else))
4203 setup_called_state(en->te.Else, state);
4204 break;
4205 }
4206 }
4207 break;
4208
4209 case NODE_QUANT:
4210 {
4211 QuantNode* qn = QUANT_(node);
4212
4213 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4214 state |= IN_REAL_REPEAT;
4215 if (qn->lower != qn->upper)
4216 state |= IN_VAR_REPEAT;
4217
4218 setup_called_state(NODE_QUANT_BODY(qn), state);
4219 }
4220 break;
4221
4222 case NODE_ANCHOR:
4223 {
4224 AnchorNode* an = ANCHOR_(node);
4225
4226 switch (an->type) {
4227 case ANCHOR_PREC_READ_NOT:
4228 case ANCHOR_LOOK_BEHIND_NOT:
4229 state |= IN_NOT;
4230 /* fall */
4231 case ANCHOR_PREC_READ:
4232 case ANCHOR_LOOK_BEHIND:
4233 setup_called_state(NODE_ANCHOR_BODY(an), state);
4234 break;
4235 default:
4236 break;
4237 }
4238 }
4239 break;
4240
4241 case NODE_BACKREF:
4242 case NODE_STRING:
4243 case NODE_CTYPE:
4244 case NODE_CCLASS:
4245 case NODE_GIMMICK:
4246 default:
4247 break;
4248 }
4249 }
4250
4251 #endif /* USE_CALL */
4252
4253
4254 static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
4255
4256 #ifdef __GNUC__
4257 __inline
4258 #endif
4259 static int
4260 setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
4261 {
4262 /* allowed node types in look-behind */
4263 #define ALLOWED_TYPE_IN_LB \
4264 ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
4265 | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \
4266 | NODE_BIT_CALL | NODE_BIT_GIMMICK)
4267
4268 #define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
4269 #define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)
4270
4271 #define ALLOWED_ANCHOR_IN_LB \
4272 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
4273 | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \
4274 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4275 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4276 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4277
4278 #define ALLOWED_ANCHOR_IN_LB_NOT \
4279 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
4280 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \
4281 | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4282 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4283 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4284
4285 int r;
4286 AnchorNode* an = ANCHOR_(node);
4287
4288 switch (an->type) {
4289 case ANCHOR_PREC_READ:
4290 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
4291 break;
4292 case ANCHOR_PREC_READ_NOT:
4293 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
4294 break;
4295
4296 case ANCHOR_LOOK_BEHIND:
4297 {
4298 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4299 ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4300 if (r < 0) return r;
4301 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4302 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
4303 if (r != 0) return r;
4304 r = setup_look_behind(node, reg, env);
4305 }
4306 break;
4307
4308 case ANCHOR_LOOK_BEHIND_NOT:
4309 {
4310 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4311 ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4312 if (r < 0) return r;
4313 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4314 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
4315 if (r != 0) return r;
4316 r = setup_look_behind(node, reg, env);
4317 }
4318 break;
4319
4320 default:
4321 r = 0;
4322 break;
4323 }
4324
4325 return r;
4326 }
4327
4328 #ifdef __GNUC__
4329 __inline
4330 #endif
4331 static int
4332 setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
4333 {
4334 int r;
4335 OnigLen d;
4336 QuantNode* qn = QUANT_(node);
4337 Node* body = NODE_BODY(node);
4338
4339 if ((state & IN_REAL_REPEAT) != 0) {
4340 NODE_STATUS_ADD(node, IN_REAL_REPEAT);
4341 }
4342 if ((state & IN_MULTI_ENTRY) != 0) {
4343 NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
4344 }
4345
4346 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
4347 d = tree_min_len(body, env);
4348 if (d == 0) {
4349 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
4350 qn->body_empty_info = quantifiers_memory_node_info(body);
4351 if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
4352 if (NODE_TYPE(body) == NODE_ENCLOSURE &&
4353 ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {
4354 MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);
4355 }
4356 }
4357 #else
4358 qn->body_empty_info = QUANT_BODY_IS_EMPTY;
4359 #endif
4360 }
4361 }
4362
4363 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4364 state |= IN_REAL_REPEAT;
4365 if (qn->lower != qn->upper)
4366 state |= IN_VAR_REPEAT;
4367
4368 r = setup_tree(body, reg, state, env);
4369 if (r != 0) return r;
4370
4371 /* expand string */
4372 #define EXPAND_STRING_MAX_LENGTH 100
4373 if (NODE_TYPE(body) == NODE_STRING) {
4374 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
4375 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4376 int len = NODE_STRING_LEN(body);
4377 StrNode* sn = STR_(body);
4378
4379 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4380 int i, n = qn->lower;
4381 onig_node_conv_to_str_node(node, STR_(body)->flag);
4382 for (i = 0; i < n; i++) {
4383 r = onig_node_str_cat(node, sn->s, sn->end);
4384 if (r != 0) return r;
4385 }
4386 onig_node_free(body);
4387 return r;
4388 }
4389 }
4390 }
4391
4392 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4393 if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {
4394 if (NODE_TYPE(body) == NODE_QUANT) {
4395 QuantNode* tqn = QUANT_(body);
4396 if (IS_NOT_NULL(tqn->head_exact)) {
4397 qn->head_exact = tqn->head_exact;
4398 tqn->head_exact = NULL;
4399 }
4400 }
4401 else {
4402 qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
4403 }
4404 }
4405 #endif
4406
4407 return r;
4408 }
4409
4410 /* setup_tree does the following work.
4411 1. check empty loop. (set qn->body_empty_info)
4412 2. expand ignore-case in char class.
4413 3. set memory status bit flags. (reg->mem_stats)
4414 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
4415 5. find invalid patterns in look-behind.
4416 6. expand repeated string.
4417 */
4418 static int
4419 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
4420 {
4421 int r = 0;
4422
4423 switch (NODE_TYPE(node)) {
4424 case NODE_LIST:
4425 {
4426 Node* prev = NULL_NODE;
4427 do {
4428 r = setup_tree(NODE_CAR(node), reg, state, env);
4429 if (IS_NOT_NULL(prev) && r == 0) {
4430 r = next_setup(prev, NODE_CAR(node), reg);
4431 }
4432 prev = NODE_CAR(node);
4433 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4434 }
4435 break;
4436
4437 case NODE_ALT:
4438 do {
4439 r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
4440 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4441 break;
4442
4443 case NODE_STRING:
4444 if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
4445 r = expand_case_fold_string(node, reg);
4446 }
4447 break;
4448
4449 case NODE_BACKREF:
4450 {
4451 int i;
4452 int* p;
4453 BackRefNode* br = BACKREF_(node);
4454 p = BACKREFS_P(br);
4455 for (i = 0; i < br->back_num; i++) {
4456 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
4457 MEM_STATUS_ON(env->backrefed_mem, p[i]);
4458 MEM_STATUS_ON(env->bt_mem_start, p[i]);
4459 #ifdef USE_BACKREF_WITH_LEVEL
4460 if (NODE_IS_NEST_LEVEL(node)) {
4461 MEM_STATUS_ON(env->bt_mem_end, p[i]);
4462 }
4463 #endif
4464 }
4465 }
4466 break;
4467
4468 case NODE_ENCLOSURE:
4469 {
4470 EnclosureNode* en = ENCLOSURE_(node);
4471
4472 switch (en->type) {
4473 case ENCLOSURE_OPTION:
4474 {
4475 OnigOptionType options = reg->options;
4476 reg->options = ENCLOSURE_(node)->o.options;
4477 r = setup_tree(NODE_BODY(node), reg, state, env);
4478 reg->options = options;
4479 }
4480 break;
4481
4482 case ENCLOSURE_MEMORY:
4483 #ifdef USE_CALL
4484 state |= en->m.called_state;
4485 #endif
4486
4487 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
4488 || NODE_IS_RECURSION(node)) {
4489 MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
4490 }
4491 r = setup_tree(NODE_BODY(node), reg, state, env);
4492 break;
4493
4494 case ENCLOSURE_STOP_BACKTRACK:
4495 {
4496 Node* target = NODE_BODY(node);
4497 r = setup_tree(target, reg, state, env);
4498 if (NODE_TYPE(target) == NODE_QUANT) {
4499 QuantNode* tqn = QUANT_(target);
4500 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4501 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4502 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
4503 NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
4504 }
4505 }
4506 }
4507 break;
4508
4509 case ENCLOSURE_IF_ELSE:
4510 r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
4511 if (r != 0) return r;
4512 if (IS_NOT_NULL(en->te.Then)) {
4513 r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
4514 if (r != 0) return r;
4515 }
4516 if (IS_NOT_NULL(en->te.Else))
4517 r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
4518 break;
4519 }
4520 }
4521 break;
4522
4523 case NODE_QUANT:
4524 r = setup_quant(node, reg, state, env);
4525 break;
4526
4527 case NODE_ANCHOR:
4528 r = setup_anchor(node, reg, state, env);
4529 break;
4530
4531 #ifdef USE_CALL
4532 case NODE_CALL:
4533 #endif
4534 case NODE_CTYPE:
4535 case NODE_CCLASS:
4536 case NODE_GIMMICK:
4537 default:
4538 break;
4539 }
4540
4541 return r;
4542 }
4543
4544 /* set skip map for Boyer-Moore search */
4545 static int
4546 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4547 UChar skip[], int** int_skip)
4548 {
4549 int i, len;
4550
4551 len = (int )(end - s);
4552 if (len < ONIG_CHAR_TABLE_SIZE) {
4553 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
4554
4555 for (i = 0; i < len - 1; i++)
4556 skip[s[i]] = len - 1 - i;
4557 }
4558 else {
4559 if (IS_NULL(*int_skip)) {
4560 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4561 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4562 }
4563 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
4564
4565 for (i = 0; i < len - 1; i++)
4566 (*int_skip)[s[i]] = len - 1 - i;
4567 }
4568 return 0;
4569 }
4570
4571 #define OPT_EXACT_MAXLEN 24
4572
4573 typedef struct {
4574 OnigLen min; /* min byte length */
4575 OnigLen max; /* max byte length */
4576 } MinMax;
4577
4578 typedef struct {
4579 MinMax mmd;
4580 OnigEncoding enc;
4581 OnigOptionType options;
4582 OnigCaseFoldType case_fold_flag;
4583 ScanEnv* scan_env;
4584 } OptEnv;
4585
4586 typedef struct {
4587 int left;
4588 int right;
4589 } OptAnc;
4590
4591 typedef struct {
4592 MinMax mmd; /* position */
4593 OptAnc anc;
4594 int reach_end;
4595 int ignore_case;
4596 int len;
4597 UChar s[OPT_EXACT_MAXLEN];
4598 } OptExact;
4599
4600 typedef struct {
4601 MinMax mmd; /* position */
4602 OptAnc anc;
4603 int value; /* weighted value */
4604 UChar map[ONIG_CHAR_TABLE_SIZE];
4605 } OptMap;
4606
4607 typedef struct {
4608 MinMax len;
4609 OptAnc anc;
4610 OptExact exb; /* boundary */
4611 OptExact exm; /* middle */
4612 OptExact expr; /* prec read (?=...) */
4613 OptMap map; /* boundary */
4614 } NodeOpt;
4615
4616
4617 static int
4618 map_position_value(OnigEncoding enc, int i)
4619 {
4620 static const short int Vals[] = {
4621 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4622 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4623 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4624 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4625 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4626 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4627 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4628 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4629 };
4630
4631 if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
4632 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4633 return 20;
4634 else
4635 return (int )Vals[i];
4636 }
4637 else
4638 return 4; /* Take it easy. */
4639 }
4640
4641 static int
4642 distance_value(MinMax* mm)
4643 {
4644 /* 1000 / (min-max-dist + 1) */
4645 static const short int dist_vals[] = {
4646 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4647 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4648 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4649 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4650 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4651 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4652 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4653 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4654 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4655 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4656 };
4657
4658 OnigLen d;
4659
4660 if (mm->max == INFINITE_LEN) return 0;
4661
4662 d = mm->max - mm->min;
4663 if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4664 /* return dist_vals[d] * 16 / (mm->min + 12); */
4665 return (int )dist_vals[d];
4666 else
4667 return 1;
4668 }
4669
4670 static int
4671 comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
4672 {
4673 if (v2 <= 0) return -1;
4674 if (v1 <= 0) return 1;
4675
4676 v1 *= distance_value(d1);
4677 v2 *= distance_value(d2);
4678
4679 if (v2 > v1) return 1;
4680 if (v2 < v1) return -1;
4681
4682 if (d2->min < d1->min) return 1;
4683 if (d2->min > d1->min) return -1;
4684 return 0;
4685 }
4686
4687 static int
4688 is_equal_mml(MinMax* a, MinMax* b)
4689 {
4690 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4691 }
4692
4693 static void
4694 set_mml(MinMax* l, OnigLen min, OnigLen max)
4695 {
4696 l->min = min;
4697 l->max = max;
4698 }
4699
4700 static void
4701 clear_mml(MinMax* l)
4702 {
4703 l->min = l->max = 0;
4704 }
4705
4706 static void
4707 copy_mml(MinMax* to, MinMax* from)
4708 {
4709 to->min = from->min;
4710 to->max = from->max;
4711 }
4712
4713 static void
4714 add_mml(MinMax* to, MinMax* from)
4715 {
4716 to->min = distance_add(to->min, from->min);
4717 to->max = distance_add(to->max, from->max);
4718 }
4719
4720 static void
4721 alt_merge_mml(MinMax* to, MinMax* from)
4722 {
4723 if (to->min > from->min) to->min = from->min;
4724 if (to->max < from->max) to->max = from->max;
4725 }
4726
4727 static void
4728 copy_opt_env(OptEnv* to, OptEnv* from)
4729 {
4730 *to = *from;
4731 }
4732
4733 static void
4734 clear_opt_anc_info(OptAnc* a)
4735 {
4736 a->left = 0;
4737 a->right = 0;
4738 }
4739
4740 static void
4741 copy_opt_anc_info(OptAnc* to, OptAnc* from)
4742 {
4743 *to = *from;
4744 }
4745
4746 static void
4747 concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
4748 OnigLen left_len, OnigLen right_len)
4749 {
4750 clear_opt_anc_info(to);
4751
4752 to->left = left->left;
4753 if (left_len == 0) {
4754 to->left |= right->left;
4755 }
4756
4757 to->right = right->right;
4758 if (right_len == 0) {
4759 to->right |= left->right;
4760 }
4761 else {
4762 to->right |= (left->right & ANCHOR_PREC_READ_NOT);
4763 }
4764 }
4765
4766 static int
4767 is_left(int a)
4768 {
4769 if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||
4770 a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)
4771 return 0;
4772
4773 return 1;
4774 }
4775
4776 static int
4777 is_set_opt_anc_info(OptAnc* to, int anc)
4778 {
4779 if ((to->left & anc) != 0) return 1;
4780
4781 return ((to->right & anc) != 0 ? 1 : 0);
4782 }
4783
4784 static void
4785 add_opt_anc_info(OptAnc* to, int anc)
4786 {
4787 if (is_left(anc))
4788 to->left |= anc;
4789 else
4790 to->right |= anc;
4791 }
4792
4793 static void
4794 remove_opt_anc_info(OptAnc* to, int anc)
4795 {
4796 if (is_left(anc))
4797 to->left &= ~anc;
4798 else
4799 to->right &= ~anc;
4800 }
4801
4802 static void
4803 alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
4804 {
4805 to->left &= add->left;
4806 to->right &= add->right;
4807 }
4808
4809 static int
4810 is_full_opt_exact(OptExact* e)
4811 {
4812 return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4813 }
4814
4815 static void
4816 clear_opt_exact(OptExact* e)
4817 {
4818 clear_mml(&e->mmd);
4819 clear_opt_anc_info(&e->anc);
4820 e->reach_end = 0;
4821 e->ignore_case = 0;
4822 e->len = 0;
4823 e->s[0] = '\0';
4824 }
4825
4826 static void
4827 copy_opt_exact(OptExact* to, OptExact* from)
4828 {
4829 *to = *from;
4830 }
4831
4832 static int
4833 concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
4834 {
4835 int i, j, len, r;
4836 UChar *p, *end;
4837 OptAnc tanc;
4838
4839 if (! to->ignore_case && add->ignore_case) {
4840 if (to->len >= add->len) return 0; /* avoid */
4841
4842 to->ignore_case = 1;
4843 }
4844
4845 r = 0;
4846 p = add->s;
4847 end = p + add->len;
4848 for (i = to->len; p < end; ) {
4849 len = enclen(enc, p);
4850 if (i + len > OPT_EXACT_MAXLEN) {
4851 r = 1; /* 1:full */
4852 break;
4853 }
4854 for (j = 0; j < len && p < end; j++)
4855 to->s[i++] = *p++;
4856 }
4857
4858 to->len = i;
4859 to->reach_end = (p == end ? add->reach_end : 0);
4860
4861 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4862 if (! to->reach_end) tanc.right = 0;
4863 copy_opt_anc_info(&to->anc, &tanc);
4864
4865 return r;
4866 }
4867
4868 static void
4869 concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)
4870 {
4871 int i, j, len;
4872 UChar *p;
4873
4874 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4875 len = enclen(enc, p);
4876 if (i + len > OPT_EXACT_MAXLEN) break;
4877 for (j = 0; j < len && p < end; j++)
4878 to->s[i++] = *p++;
4879 }
4880
4881 to->len = i;
4882 }
4883
4884 static void
4885 alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)
4886 {
4887 int i, j, len;
4888
4889 if (add->len == 0 || to->len == 0) {
4890 clear_opt_exact(to);
4891 return ;
4892 }
4893
4894 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4895 clear_opt_exact(to);
4896 return ;
4897 }
4898
4899 for (i = 0; i < to->len && i < add->len; ) {
4900 if (to->s[i] != add->s[i]) break;
4901 len = enclen(env->enc, to->s + i);
4902
4903 for (j = 1; j < len; j++) {
4904 if (to->s[i+j] != add->s[i+j]) break;
4905 }
4906 if (j < len) break;
4907 i += len;
4908 }
4909
4910 if (! add->reach_end || i < add->len || i < to->len) {
4911 to->reach_end = 0;
4912 }
4913 to->len = i;
4914 to->ignore_case |= add->ignore_case;
4915
4916 alt_merge_opt_anc_info(&to->anc, &add->anc);
4917 if (! to->reach_end) to->anc.right = 0;
4918 }
4919
4920 static void
4921 select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)
4922 {
4923 int vn, va;
4924
4925 vn = now->len;
4926 va = alt->len;
4927
4928 if (va == 0) {
4929 return ;
4930 }
4931 else if (vn == 0) {
4932 copy_opt_exact(now, alt);
4933 return ;
4934 }
4935 else if (vn <= 2 && va <= 2) {
4936 /* ByteValTable[x] is big value --> low price */
4937 va = map_position_value(enc, now->s[0]);
4938 vn = map_position_value(enc, alt->s[0]);
4939
4940 if (now->len > 1) vn += 5;
4941 if (alt->len > 1) va += 5;
4942 }
4943
4944 if (now->ignore_case == 0) vn *= 2;
4945 if (alt->ignore_case == 0) va *= 2;
4946
4947 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
4948 copy_opt_exact(now, alt);
4949 }
4950
4951 static void
4952 clear_opt_map(OptMap* map)
4953 {
4954 static const OptMap clean_info = {
4955 {0, 0}, {0, 0}, 0,
4956 {
4957 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4958 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4959 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4960 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4961 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4963 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4964 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4965 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4966 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4967 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4968 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4969 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4970 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4971 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4972 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4973 }
4974 };
4975
4976 xmemcpy(map, &clean_info, sizeof(OptMap));
4977 }
4978
4979 static void
4980 copy_opt_map(OptMap* to, OptMap* from)
4981 {
4982 xmemcpy(to,from,sizeof(OptMap));
4983 }
4984
4985 static void
4986 add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
4987 {
4988 if (m->map[c] == 0) {
4989 m->map[c] = 1;
4990 m->value += map_position_value(enc, c);
4991 }
4992 }
4993
4994 static int
4995 add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
4996 OnigEncoding enc, OnigCaseFoldType fold_flag)
4997 {
4998 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4999 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5000 int i, n;
5001
5002 add_char_opt_map(map, p[0], enc);
5003
5004 fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);
5005 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);
5006 if (n < 0) return n;
5007
5008 for (i = 0; i < n; i++) {
5009 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
5010 add_char_opt_map(map, buf[0], enc);
5011 }
5012
5013 return 0;
5014 }
5015
5016 static void
5017 select_opt_map(OptMap* now, OptMap* alt)
5018 {
5019 static int z = 1<<15; /* 32768: something big value */
5020
5021 int vn, va;
5022
5023 if (alt->value == 0) return ;
5024 if (now->value == 0) {
5025 copy_opt_map(now, alt);
5026 return ;
5027 }
5028
5029 vn = z / now->value;
5030 va = z / alt->value;
5031 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
5032 copy_opt_map(now, alt);
5033 }
5034
5035 static int
5036 comp_opt_exact_or_map(OptExact* e, OptMap* m)
5037 {
5038 #define COMP_EM_BASE 20
5039 int ae, am;
5040
5041 if (m->value <= 0) return -1;
5042
5043 ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
5044 am = COMP_EM_BASE * 5 * 2 / m->value;
5045 return comp_distance_value(&e->mmd, &m->mmd, ae, am);
5046 }
5047
5048 static void
5049 alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
5050 {
5051 int i, val;
5052
5053 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
5054 if (to->value == 0) return ;
5055 if (add->value == 0 || to->mmd.max < add->mmd.min) {
5056 clear_opt_map(to);
5057 return ;
5058 }
5059
5060 alt_merge_mml(&to->mmd, &add->mmd);
5061
5062 val = 0;
5063 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5064 if (add->map[i])
5065 to->map[i] = 1;
5066
5067 if (to->map[i])
5068 val += map_position_value(enc, i);
5069 }
5070 to->value = val;
5071
5072 alt_merge_opt_anc_info(&to->anc, &add->anc);
5073 }
5074
5075 static void
5076 set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)
5077 {
5078 copy_mml(&(opt->exb.mmd), plen);
5079 copy_mml(&(opt->expr.mmd), plen);
5080 copy_mml(&(opt->map.mmd), plen);
5081 }
5082
5083 static void
5084 clear_node_opt_info(NodeOpt* opt)
5085 {
5086 clear_mml(&opt->len);
5087 clear_opt_anc_info(&opt->anc);
5088 clear_opt_exact(&opt->exb);
5089 clear_opt_exact(&opt->exm);
5090 clear_opt_exact(&opt->expr);
5091 clear_opt_map(&opt->map);
5092 }
5093
5094 static void
5095 copy_node_opt_info(NodeOpt* to, NodeOpt* from)
5096 {
5097 xmemcpy(to,from,sizeof(NodeOpt));
5098 }
5099
5100 static void
5101 concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)
5102 {
5103 int exb_reach, exm_reach;
5104 OptAnc tanc;
5105
5106 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
5107 copy_opt_anc_info(&to->anc, &tanc);
5108
5109 if (add->exb.len > 0 && to->len.max == 0) {
5110 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);
5111 copy_opt_anc_info(&add->exb.anc, &tanc);
5112 }
5113
5114 if (add->map.value > 0 && to->len.max == 0) {
5115 if (add->map.mmd.max == 0)
5116 add->map.anc.left |= to->anc.left;
5117 }
5118
5119 exb_reach = to->exb.reach_end;
5120 exm_reach = to->exm.reach_end;
5121
5122 if (add->len.max != 0)
5123 to->exb.reach_end = to->exm.reach_end = 0;
5124
5125 if (add->exb.len > 0) {
5126 if (exb_reach) {
5127 concat_opt_exact(&to->exb, &add->exb, enc);
5128 clear_opt_exact(&add->exb);
5129 }
5130 else if (exm_reach) {
5131 concat_opt_exact(&to->exm, &add->exb, enc);
5132 clear_opt_exact(&add->exb);
5133 }
5134 }
5135 select_opt_exact(enc, &to->exm, &add->exb);
5136 select_opt_exact(enc, &to->exm, &add->exm);
5137
5138 if (to->expr.len > 0) {
5139 if (add->len.max > 0) {
5140 if (to->expr.len > (int )add->len.max)
5141 to->expr.len = add->len.max;
5142
5143 if (to->expr.mmd.max == 0)
5144 select_opt_exact(enc, &to->exb, &to->expr);
5145 else
5146 select_opt_exact(enc, &to->exm, &to->expr);
5147 }
5148 }
5149 else if (add->expr.len > 0) {
5150 copy_opt_exact(&to->expr, &add->expr);
5151 }
5152
5153 select_opt_map(&to->map, &add->map);
5154 add_mml(&to->len, &add->len);
5155 }
5156
5157 static void
5158 alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)
5159 {
5160 alt_merge_opt_anc_info(&to->anc, &add->anc);
5161 alt_merge_opt_exact(&to->exb, &add->exb, env);
5162 alt_merge_opt_exact(&to->exm, &add->exm, env);
5163 alt_merge_opt_exact(&to->expr, &add->expr, env);
5164 alt_merge_opt_map(env->enc, &to->map, &add->map);
5165
5166 alt_merge_mml(&to->len, &add->len);
5167 }
5168
5169
5170 #define MAX_NODE_OPT_INFO_REF_COUNT 5
5171
5172 static int
5173 optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
5174 {
5175 int i;
5176 int r;
5177 NodeOpt xo;
5178 OnigEncoding enc;
5179
5180 r = 0;
5181 enc = env->enc;
5182 clear_node_opt_info(opt);
5183 set_bound_node_opt_info(opt, &env->mmd);
5184
5185 switch (NODE_TYPE(node)) {
5186 case NODE_LIST:
5187 {
5188 OptEnv nenv;
5189 Node* nd = node;
5190
5191 copy_opt_env(&nenv, env);
5192 do {
5193 r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
5194 if (r == 0) {
5195 add_mml(&nenv.mmd, &xo.len);
5196 concat_left_node_opt_info(enc, opt, &xo);
5197 }
5198 } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
5199 }
5200 break;
5201
5202 case NODE_ALT:
5203 {
5204 Node* nd = node;
5205
5206 do {
5207 r = optimize_nodes(NODE_CAR(nd), &xo, env);
5208 if (r == 0) {
5209 if (nd == node) copy_node_opt_info(opt, &xo);
5210 else alt_merge_node_opt_info(opt, &xo, env);
5211 }
5212 } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
5213 }
5214 break;
5215
5216 case NODE_STRING:
5217 {
5218 StrNode* sn = STR_(node);
5219 int slen = (int )(sn->end - sn->s);
5220 /* int is_raw = NODE_STRING_IS_RAW(node); */
5221
5222 if (! NODE_STRING_IS_AMBIG(node)) {
5223 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
5224 if (slen > 0) {
5225 add_char_opt_map(&opt->map, *(sn->s), enc);
5226 }
5227 set_mml(&opt->len, slen, slen);
5228 }
5229 else {
5230 int max;
5231
5232 if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
5233 int n = onigenc_strlen(enc, sn->s, sn->end);
5234 max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
5235 }
5236 else {
5237 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
5238 opt->exb.ignore_case = 1;
5239
5240 if (slen > 0) {
5241 r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
5242 enc, env->case_fold_flag);
5243 if (r != 0) break;
5244 }
5245
5246 max = slen;
5247 }
5248
5249 set_mml(&opt->len, slen, max);
5250 }
5251
5252 if (opt->exb.len == slen)
5253 opt->exb.reach_end = 1;
5254 }
5255 break;
5256
5257 case NODE_CCLASS:
5258 {
5259 int z;
5260 CClassNode* cc = CCLASS_(node);
5261
5262 /* no need to check ignore case. (set in setup_tree()) */
5263
5264 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5265 OnigLen min = ONIGENC_MBC_MINLEN(enc);
5266 OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
5267
5268 set_mml(&opt->len, min, max);
5269 }
5270 else {
5271 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5272 z = BITSET_AT(cc->bs, i);
5273 if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
5274 add_char_opt_map(&opt->map, (UChar )i, enc);
5275 }
5276 }
5277 set_mml(&opt->len, 1, 1);
5278 }
5279 }
5280 break;
5281
5282 case NODE_CTYPE:
5283 {
5284 int min, max;
5285 int range;
5286
5287 max = ONIGENC_MBC_MAXLEN_DIST(enc);
5288
5289 if (max == 1) {
5290 min = 1;
5291
5292 switch (CTYPE_(node)->ctype) {
5293 case CTYPE_ANYCHAR:
5294 break;
5295
5296 case ONIGENC_CTYPE_WORD:
5297 range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
5298 if (CTYPE_(node)->not != 0) {
5299 for (i = 0; i < range; i++) {
5300 if (! ONIGENC_IS_CODE_WORD(enc, i)) {
5301 add_char_opt_map(&opt->map, (UChar )i, enc);
5302 }
5303 }
5304 for (i = range; i < SINGLE_BYTE_SIZE; i++) {
5305 add_char_opt_map(&opt->map, (UChar )i, enc);
5306 }
5307 }
5308 else {
5309 for (i = 0; i < range; i++) {
5310 if (ONIGENC_IS_CODE_WORD(enc, i)) {
5311 add_char_opt_map(&opt->map, (UChar )i, enc);
5312 }
5313 }
5314 }
5315 break;
5316 }
5317 }
5318 else {
5319 min = ONIGENC_MBC_MINLEN(enc);
5320 }
5321 set_mml(&opt->len, min, max);
5322 }
5323 break;
5324
5325 case NODE_ANCHOR:
5326 switch (ANCHOR_(node)->type) {
5327 case ANCHOR_BEGIN_BUF:
5328 case ANCHOR_BEGIN_POSITION:
5329 case ANCHOR_BEGIN_LINE:
5330 case ANCHOR_END_BUF:
5331 case ANCHOR_SEMI_END_BUF:
5332 case ANCHOR_END_LINE:
5333 case ANCHOR_PREC_READ_NOT:
5334 case ANCHOR_LOOK_BEHIND:
5335 add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
5336 break;
5337
5338 case ANCHOR_PREC_READ:
5339 {
5340 r = optimize_nodes(NODE_BODY(node), &xo, env);
5341 if (r == 0) {
5342 if (xo.exb.len > 0)
5343 copy_opt_exact(&opt->expr, &xo.exb);
5344 else if (xo.exm.len > 0)
5345 copy_opt_exact(&opt->expr, &xo.exm);
5346
5347 opt->expr.reach_end = 0;
5348
5349 if (xo.map.value > 0)
5350 copy_opt_map(&opt->map, &xo.map);
5351 }
5352 }
5353 break;
5354
5355 case ANCHOR_LOOK_BEHIND_NOT:
5356 break;
5357 }
5358 break;
5359
5360 case NODE_BACKREF:
5361 if (! NODE_IS_CHECKER(node)) {
5362 int* backs;
5363 OnigLen min, max, tmin, tmax;
5364 MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
5365 BackRefNode* br = BACKREF_(node);
5366
5367 if (NODE_IS_RECURSION(node)) {
5368 set_mml(&opt->len, 0, INFINITE_LEN);
5369 break;
5370 }
5371 backs = BACKREFS_P(br);
5372 min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
5373 max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
5374 for (i = 1; i < br->back_num; i++) {
5375 tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
5376 tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
5377 if (min > tmin) min = tmin;
5378 if (max < tmax) max = tmax;
5379 }
5380 set_mml(&opt->len, min, max);
5381 }
5382 break;
5383
5384 #ifdef USE_CALL
5385 case NODE_CALL:
5386 if (NODE_IS_RECURSION(node))
5387 set_mml(&opt->len, 0, INFINITE_LEN);
5388 else {
5389 OnigOptionType save = env->options;
5390 env->options = ENCLOSURE_(NODE_BODY(node))->o.options;
5391 r = optimize_nodes(NODE_BODY(node), opt, env);
5392 env->options = save;
5393 }
5394 break;
5395 #endif
5396
5397 case NODE_QUANT:
5398 {
5399 OnigLen min, max;
5400 QuantNode* qn = QUANT_(node);
5401
5402 r = optimize_nodes(NODE_BODY(node), &xo, env);
5403 if (r != 0) break;
5404
5405 if (qn->lower > 0) {
5406 copy_node_opt_info(opt, &xo);
5407 if (xo.exb.len > 0) {
5408 if (xo.exb.reach_end) {
5409 for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {
5410 int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);
5411 if (rc > 0) break;
5412 }
5413 if (i < qn->lower) opt->exb.reach_end = 0;
5414 }
5415 }
5416
5417 if (qn->lower != qn->upper) {
5418 opt->exb.reach_end = 0;
5419 opt->exm.reach_end = 0;
5420 }
5421 if (qn->lower > 1)
5422 opt->exm.reach_end = 0;
5423 }
5424
5425 if (IS_REPEAT_INFINITE(qn->upper)) {
5426 if (env->mmd.max == 0 &&
5427 NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
5428 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
5429 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);
5430 else
5431 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);
5432 }
5433
5434 max = (xo.len.max > 0 ? INFINITE_LEN : 0);
5435 }
5436 else {
5437 max = distance_multiply(xo.len.max, qn->upper);
5438 }
5439
5440 min = distance_multiply(xo.len.min, qn->lower);
5441 set_mml(&opt->len, min, max);
5442 }
5443 break;
5444
5445 case NODE_ENCLOSURE:
5446 {
5447 EnclosureNode* en = ENCLOSURE_(node);
5448
5449 switch (en->type) {
5450 case ENCLOSURE_OPTION:
5451 {
5452 OnigOptionType save = env->options;
5453
5454 env->options = en->o.options;
5455 r = optimize_nodes(NODE_BODY(node), opt, env);
5456 env->options = save;
5457 }
5458 break;
5459
5460 case ENCLOSURE_MEMORY:
5461 #ifdef USE_CALL
5462 en->opt_count++;
5463 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5464 OnigLen min, max;
5465
5466 min = 0;
5467 max = INFINITE_LEN;
5468 if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
5469 if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
5470 set_mml(&opt->len, min, max);
5471 }
5472 else
5473 #endif
5474 {
5475 r = optimize_nodes(NODE_BODY(node), opt, env);
5476 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {
5477 if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
5478 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);
5479 }
5480 }
5481 break;
5482
5483 case ENCLOSURE_STOP_BACKTRACK:
5484 r = optimize_nodes(NODE_BODY(node), opt, env);
5485 break;
5486
5487 case ENCLOSURE_IF_ELSE:
5488 {
5489 OptEnv nenv;
5490
5491 copy_opt_env(&nenv, env);
5492 r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);
5493 if (r == 0) {
5494 add_mml(&nenv.mmd, &xo.len);
5495 concat_left_node_opt_info(enc, opt, &xo);
5496 if (IS_NOT_NULL(en->te.Then)) {
5497 r = optimize_nodes(en->te.Then, &xo, &nenv);
5498 if (r == 0) {
5499 concat_left_node_opt_info(enc, opt, &xo);
5500 }
5501 }
5502
5503 if (IS_NOT_NULL(en->te.Else)) {
5504 r = optimize_nodes(en->te.Else, &xo, env);
5505 if (r == 0)
5506 alt_merge_node_opt_info(opt, &xo, env);
5507 }
5508 }
5509 }
5510 break;
5511 }
5512 }
5513 break;
5514
5515 case NODE_GIMMICK:
5516 break;
5517
5518 default:
5519 #ifdef ONIG_DEBUG
5520 fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));
5521 #endif
5522 r = ONIGERR_TYPE_BUG;
5523 break;
5524 }
5525
5526 return r;
5527 }
5528
5529 static int
5530 set_optimize_exact(regex_t* reg, OptExact* e)
5531 {
5532 int r;
5533
5534 if (e->len == 0) return 0;
5535
5536 if (e->ignore_case) {
5537 reg->exact = (UChar* )xmalloc(e->len);
5538 CHECK_NULL_RETURN_MEMERR(reg->exact);
5539 xmemcpy(reg->exact, e->s, e->len);
5540 reg->exact_end = reg->exact + e->len;
5541 reg->optimize = OPTIMIZE_EXACT_IC;
5542 }
5543 else {
5544 int allow_reverse;
5545
5546 reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);
5547 CHECK_NULL_RETURN_MEMERR(reg->exact);
5548 reg->exact_end = reg->exact + e->len;
5549
5550 allow_reverse =
5551 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5552
5553 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5554 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
5555 reg->map, &(reg->int_map));
5556 if (r != 0) return r;
5557
5558 reg->optimize = (allow_reverse != 0
5559 ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);
5560 }
5561 else {
5562 reg->optimize = OPTIMIZE_EXACT;
5563 }
5564 }
5565
5566 reg->dmin = e->mmd.min;
5567 reg->dmax = e->mmd.max;
5568
5569 if (reg->dmin != INFINITE_LEN) {
5570 reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
5571 }
5572
5573 return 0;
5574 }
5575
5576 static void
5577 set_optimize_map(regex_t* reg, OptMap* m)
5578 {
5579 int i;
5580
5581 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5582 reg->map[i] = m->map[i];
5583
5584 reg->optimize = OPTIMIZE_MAP;
5585 reg->dmin = m->mmd.min;
5586 reg->dmax = m->mmd.max;
5587
5588 if (reg->dmin != INFINITE_LEN) {
5589 reg->threshold_len = reg->dmin + 1;
5590 }
5591 }
5592
5593 static void
5594 set_sub_anchor(regex_t* reg, OptAnc* anc)
5595 {
5596 reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE;
5597 reg->sub_anchor |= anc->right & ANCHOR_END_LINE;
5598 }
5599
5600 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5601 static void print_optimize_info(FILE* f, regex_t* reg);
5602 #endif
5603
5604 static int
5605 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5606 {
5607 int r;
5608 NodeOpt opt;
5609 OptEnv env;
5610
5611 env.enc = reg->enc;
5612 env.options = reg->options;
5613 env.case_fold_flag = reg->case_fold_flag;
5614 env.scan_env = scan_env;
5615 clear_mml(&env.mmd);
5616
5617 r = optimize_nodes(node, &opt, &env);
5618 if (r != 0) return r;
5619
5620 reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |
5621 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |
5622 ANCHOR_LOOK_BEHIND);
5623
5624 if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5625 reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;
5626
5627 reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5628 ANCHOR_PREC_READ_NOT);
5629
5630 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5631 reg->anchor_dmin = opt.len.min;
5632 reg->anchor_dmax = opt.len.max;
5633 }
5634
5635 if (opt.exb.len > 0 || opt.exm.len > 0) {
5636 select_opt_exact(reg->enc, &opt.exb, &opt.exm);
5637 if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {
5638 goto set_map;
5639 }
5640 else {
5641 r = set_optimize_exact(reg, &opt.exb);
5642 set_sub_anchor(reg, &opt.exb.anc);
5643 }
5644 }
5645 else if (opt.map.value > 0) {
5646 set_map:
5647 set_optimize_map(reg, &opt.map);
5648 set_sub_anchor(reg, &opt.map.anc);
5649 }
5650 else {
5651 reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;
5652 if (opt.len.max == 0)
5653 reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;
5654 }
5655
5656 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5657 print_optimize_info(stderr, reg);
5658 #endif
5659 return r;
5660 }
5661
5662 static void
5663 clear_optimize_info(regex_t* reg)
5664 {
5665 reg->optimize = OPTIMIZE_NONE;
5666 reg->anchor = 0;
5667 reg->anchor_dmin = 0;
5668 reg->anchor_dmax = 0;
5669 reg->sub_anchor = 0;
5670 reg->exact_end = (UChar* )NULL;
5671 reg->threshold_len = 0;
5672 if (IS_NOT_NULL(reg->exact)) {
5673 xfree(reg->exact);
5674 reg->exact = (UChar* )NULL;
5675 }
5676 }
5677
5678 #ifdef ONIG_DEBUG
5679
5680 static void print_enc_string(FILE* fp, OnigEncoding enc,
5681 const UChar *s, const UChar *end)
5682 {
5683 fprintf(fp, "\nPATTERN: /");
5684
5685 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5686 const UChar *p;
5687 OnigCodePoint code;
5688
5689 p = s;
5690 while (p < end) {
5691 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5692 if (code >= 0x80) {
5693 fprintf(fp, " 0x%04x ", (int )code);
5694 }
5695 else {
5696 fputc((int )code, fp);
5697 }
5698
5699 p += enclen(enc, p);
5700 }
5701 }
5702 else {
5703 while (s < end) {
5704 fputc((int )*s, fp);
5705 s++;
5706 }
5707 }
5708
5709 fprintf(fp, "/\n");
5710 }
5711
5712 #endif /* ONIG_DEBUG */
5713
5714 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5715
5716 static void
5717 print_distance_range(FILE* f, OnigLen a, OnigLen b)
5718 {
5719 if (a == INFINITE_LEN)
5720 fputs("inf", f);
5721 else
5722 fprintf(f, "(%u)", a);
5723
5724 fputs("-", f);
5725
5726 if (b == INFINITE_LEN)
5727 fputs("inf", f);
5728 else
5729 fprintf(f, "(%u)", b);
5730 }
5731
5732 static void
5733 print_anchor(FILE* f, int anchor)
5734 {
5735 int q = 0;
5736
5737 fprintf(f, "[");
5738
5739 if (anchor & ANCHOR_BEGIN_BUF) {
5740 fprintf(f, "begin-buf");
5741 q = 1;
5742 }
5743 if (anchor & ANCHOR_BEGIN_LINE) {
5744 if (q) fprintf(f, ", ");
5745 q = 1;
5746 fprintf(f, "begin-line");
5747 }
5748 if (anchor & ANCHOR_BEGIN_POSITION) {
5749 if (q) fprintf(f, ", ");
5750 q = 1;
5751 fprintf(f, "begin-pos");
5752 }
5753 if (anchor & ANCHOR_END_BUF) {
5754 if (q) fprintf(f, ", ");
5755 q = 1;
5756 fprintf(f, "end-buf");
5757 }
5758 if (anchor & ANCHOR_SEMI_END_BUF) {
5759 if (q) fprintf(f, ", ");
5760 q = 1;
5761 fprintf(f, "semi-end-buf");
5762 }
5763 if (anchor & ANCHOR_END_LINE) {
5764 if (q) fprintf(f, ", ");
5765 q = 1;
5766 fprintf(f, "end-line");
5767 }
5768 if (anchor & ANCHOR_ANYCHAR_INF) {
5769 if (q) fprintf(f, ", ");
5770 q = 1;
5771 fprintf(f, "anychar-inf");
5772 }
5773 if (anchor & ANCHOR_ANYCHAR_INF_ML) {
5774 if (q) fprintf(f, ", ");
5775 fprintf(f, "anychar-inf-ml");
5776 }
5777
5778 fprintf(f, "]");
5779 }
5780
5781 static void
5782 print_optimize_info(FILE* f, regex_t* reg)
5783 {
5784 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5785 "EXACT_IC", "MAP" };
5786
5787 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5788 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5789 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5790 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5791 fprintf(f, "\n");
5792
5793 if (reg->optimize) {
5794 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5795 fprintf(f, "\n");
5796 }
5797 fprintf(f, "\n");
5798
5799 if (reg->exact) {
5800 UChar *p;
5801 fprintf(f, "exact: [");
5802 for (p = reg->exact; p < reg->exact_end; p++) {
5803 fputc(*p, f);
5804 }
5805 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5806 }
5807 else if (reg->optimize & OPTIMIZE_MAP) {
5808 int c, i, n = 0;
5809
5810 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5811 if (reg->map[i]) n++;
5812
5813 fprintf(f, "map: n=%d\n", n);
5814 if (n > 0) {
5815 c = 0;
5816 fputc('[', f);
5817 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5818 if (reg->map[i] != 0) {
5819 if (c > 0) fputs(", ", f);
5820 c++;
5821 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5822 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5823 fputc(i, f);
5824 else
5825 fprintf(f, "%d", i);
5826 }
5827 }
5828 fprintf(f, "]\n");
5829 }
5830 }
5831 }
5832 #endif
5833
5834
5835 extern RegexExt*
5836 onig_get_regex_ext(regex_t* reg)
5837 {
5838 if (IS_NULL(REG_EXTP(reg))) {
5839 RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
5840 if (IS_NULL(ext)) return 0;
5841
5842 ext->pattern = 0;
5843 ext->pattern_end = 0;
5844 #ifdef USE_CALLOUT
5845 ext->tag_table = 0;
5846 ext->callout_num = 0;
5847 ext->callout_list_alloc = 0;
5848 ext->callout_list = 0;
5849 #endif
5850
5851 REG_EXTPL(reg) = (void* )ext;
5852 }
5853
5854 return REG_EXTP(reg);
5855 }
5856
5857 static void
5858 free_regex_ext(RegexExt* ext)
5859 {
5860 if (IS_NOT_NULL(ext)) {
5861 if (IS_NOT_NULL(ext->pattern))
5862 xfree((void* )ext->pattern);
5863
5864 #ifdef USE_CALLOUT
5865 if (IS_NOT_NULL(ext->tag_table))
5866 onig_callout_tag_table_free(ext->tag_table);
5867
5868 if (IS_NOT_NULL(ext->callout_list))
5869 onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
5870 #endif
5871
5872 xfree(ext);
5873 }
5874 }
5875
5876 extern int
5877 onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
5878 {
5879 RegexExt* ext;
5880 UChar* s;
5881
5882 ext = onig_get_regex_ext(reg);
5883 CHECK_NULL_RETURN_MEMERR(ext);
5884
5885 s = onigenc_strdup(reg->enc, pattern, pattern_end);
5886 CHECK_NULL_RETURN_MEMERR(s);
5887
5888 ext->pattern = s;
5889 ext->pattern_end = s + (pattern_end - pattern);
5890
5891 return ONIG_NORMAL;
5892 }
5893
5894
5895 extern void
5896 onig_free_body(regex_t* reg)
5897 {
5898 if (IS_NOT_NULL(reg)) {
5899 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5900 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5901 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5902 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5903 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5904 if (IS_NOT_NULL(REG_EXTP(reg))) {
5905 free_regex_ext(REG_EXTP(reg));
5906 REG_EXTPL(reg) = 0;
5907 }
5908
5909 onig_names_free(reg);
5910 }
5911 }
5912
5913 extern void
5914 onig_free(regex_t* reg)
5915 {
5916 if (IS_NOT_NULL(reg)) {
5917 onig_free_body(reg);
5918 xfree(reg);
5919 }
5920 }
5921
5922 #define REGEX_TRANSFER(to,from) do {\
5923 onig_free_body(to);\
5924 xmemcpy(to, from, sizeof(regex_t));\
5925 xfree(from);\
5926 } while (0)
5927
5928 extern void
5929 onig_transfer(regex_t* to, regex_t* from)
5930 {
5931 REGEX_TRANSFER(to, from);
5932 }
5933
5934
5935 #ifdef ONIG_DEBUG_PARSE
5936 static void print_tree P_((FILE* f, Node* node));
5937 #endif
5938
5939 extern int
5940 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5941 OnigErrorInfo* einfo)
5942 {
5943 #define COMPILE_INIT_SIZE 20
5944
5945 int r, init_size;
5946 Node* root;
5947 ScanEnv scan_env;
5948 #ifdef USE_CALL
5949 UnsetAddrList uslist;
5950 #endif
5951
5952 root = 0;
5953 if (IS_NOT_NULL(einfo)) {
5954 einfo->enc = reg->enc;
5955 einfo->par = (UChar* )NULL;
5956 }
5957
5958 #ifdef ONIG_DEBUG
5959 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5960 #endif
5961
5962 if (reg->alloc == 0) {
5963 init_size = (int )(pattern_end - pattern) * 2;
5964 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5965 r = BB_INIT(reg, init_size);
5966 if (r != 0) goto end;
5967 }
5968 else
5969 reg->used = 0;
5970
5971 reg->num_mem = 0;
5972 reg->num_repeat = 0;
5973 reg->num_null_check = 0;
5974 reg->repeat_range_alloc = 0;
5975 reg->repeat_range = (OnigRepeatRange* )NULL;
5976
5977 r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
5978 if (r != 0) goto err;
5979
5980 /* mixed use named group and no-named group */
5981 if (scan_env.num_named > 0 &&
5982 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5983 ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5984 if (scan_env.num_named != scan_env.num_mem)
5985 r = disable_noname_group_capture(&root, reg, &scan_env);
5986 else
5987 r = numbered_ref_check(root);
5988
5989 if (r != 0) goto err;
5990 }
5991
5992 r = check_backrefs(root, &scan_env);
5993 if (r != 0) goto err;
5994
5995 #ifdef USE_CALL
5996 if (scan_env.num_call > 0) {
5997 r = unset_addr_list_init(&uslist, scan_env.num_call);
5998 if (r != 0) goto err;
5999 scan_env.unset_addr_list = &uslist;
6000 r = setup_call(root, &scan_env, 0);
6001 if (r != 0) goto err_unset;
6002 r = setup_call2(root);
6003 if (r != 0) goto err_unset;
6004 r = recursive_call_check_trav(root, &scan_env, 0);
6005 if (r < 0) goto err_unset;
6006 r = infinite_recursive_call_check_trav(root, &scan_env);
6007 if (r != 0) goto err_unset;
6008
6009 setup_called_state(root, 0);
6010 }
6011
6012 reg->num_call = scan_env.num_call;
6013 #endif
6014
6015 r = setup_tree(root, reg, 0, &scan_env);
6016 if (r != 0) goto err_unset;
6017
6018 #ifdef ONIG_DEBUG_PARSE
6019 print_tree(stderr, root);
6020 #endif
6021
6022 reg->capture_history = scan_env.capture_history;
6023 reg->bt_mem_start = scan_env.bt_mem_start;
6024 reg->bt_mem_start |= reg->capture_history;
6025 if (IS_FIND_CONDITION(reg->options))
6026 MEM_STATUS_ON_ALL(reg->bt_mem_end);
6027 else {
6028 reg->bt_mem_end = scan_env.bt_mem_end;
6029 reg->bt_mem_end |= reg->capture_history;
6030 }
6031 reg->bt_mem_start |= reg->bt_mem_end;
6032
6033 clear_optimize_info(reg);
6034 #ifndef ONIG_DONT_OPTIMIZE
6035 r = set_optimize_info_from_tree(root, reg, &scan_env);
6036 if (r != 0) goto err_unset;
6037 #endif
6038
6039 if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
6040 xfree(scan_env.mem_env_dynamic);
6041 scan_env.mem_env_dynamic = (MemEnv* )NULL;
6042 }
6043
6044 r = compile_tree(root, reg, &scan_env);
6045 if (r == 0) {
6046 if (scan_env.keep_num > 0) {
6047 r = add_opcode(reg, OP_UPDATE_VAR);
6048 if (r != 0) goto err;
6049 r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);
6050 if (r != 0) goto err;
6051 r = add_mem_num(reg, 0 /* not used */);
6052 if (r != 0) goto err;
6053 }
6054
6055 r = add_opcode(reg, OP_END);
6056 #ifdef USE_CALL
6057 if (scan_env.num_call > 0) {
6058 r = fix_unset_addr_list(&uslist, reg);
6059 unset_addr_list_end(&uslist);
6060 if (r != 0) goto err;
6061 }
6062 #endif
6063
6064 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
6065 #ifdef USE_CALLOUT
6066 || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)
6067 #endif
6068 )
6069 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
6070 else {
6071 if (reg->bt_mem_start != 0)
6072 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
6073 else
6074 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
6075 }
6076 }
6077 #ifdef USE_CALL
6078 else if (scan_env.num_call > 0) {
6079 unset_addr_list_end(&uslist);
6080 }
6081 #endif
6082 onig_node_free(root);
6083
6084 #ifdef ONIG_DEBUG_COMPILE
6085 onig_print_names(stderr, reg);
6086 onig_print_compiled_byte_code_list(stderr, reg);
6087 #endif
6088
6089 end:
6090 return r;
6091
6092 err_unset:
6093 #ifdef USE_CALL
6094 if (scan_env.num_call > 0) {
6095 unset_addr_list_end(&uslist);
6096 }
6097 #endif
6098 err:
6099 if (IS_NOT_NULL(scan_env.error)) {
6100 if (IS_NOT_NULL(einfo)) {
6101 einfo->par = scan_env.error;
6102 einfo->par_end = scan_env.error_end;
6103 }
6104 }
6105
6106 onig_node_free(root);
6107 if (IS_NOT_NULL(scan_env.mem_env_dynamic))
6108 xfree(scan_env.mem_env_dynamic);
6109 return r;
6110 }
6111
6112
6113 static int onig_inited = 0;
6114
6115 extern int
6116 onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
6117 OnigEncoding enc, OnigSyntaxType* syntax)
6118 {
6119 int r;
6120
6121 xmemset(reg, 0, sizeof(*reg));
6122
6123 if (onig_inited == 0) {
6124 #if 0
6125 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
6126 #else
6127 r = onig_initialize(&enc, 1);
6128 if (r != 0)
6129 return ONIGERR_FAIL_TO_INITIALIZE;
6130
6131 onig_warning("You didn't call onig_initialize() explicitly");
6132 #endif
6133 }
6134
6135 if (IS_NULL(reg))
6136 return ONIGERR_INVALID_ARGUMENT;
6137
6138 if (ONIGENC_IS_UNDEF(enc))
6139 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
6140
6141 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6142 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6143 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6144 }
6145
6146 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6147 option |= syntax->options;
6148 option &= ~ONIG_OPTION_SINGLELINE;
6149 }
6150 else
6151 option |= syntax->options;
6152
6153 (reg)->enc = enc;
6154 (reg)->options = option;
6155 (reg)->syntax = syntax;
6156 (reg)->optimize = 0;
6157 (reg)->exact = (UChar* )NULL;
6158 (reg)->int_map = (int* )NULL;
6159 (reg)->int_map_backward = (int* )NULL;
6160 REG_EXTPL(reg) = NULL;
6161
6162 (reg)->p = (UChar* )NULL;
6163 (reg)->alloc = 0;
6164 (reg)->used = 0;
6165 (reg)->name_table = (void* )NULL;
6166
6167 (reg)->case_fold_flag = case_fold_flag;
6168 return 0;
6169 }
6170
6171 extern int
6172 onig_new_without_alloc(regex_t* reg,
6173 const UChar* pattern, const UChar* pattern_end,
6174 OnigOptionType option, OnigEncoding enc,
6175 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6176 {
6177 int r;
6178
6179 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6180 if (r != 0) return r;
6181
6182 r = onig_compile(reg, pattern, pattern_end, einfo);
6183 return r;
6184 }
6185
6186 extern int
6187 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6188 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
6189 OnigErrorInfo* einfo)
6190 {
6191 int r;
6192
6193 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6194 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6195
6196 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6197 if (r != 0) goto err;
6198
6199 r = onig_compile(*reg, pattern, pattern_end, einfo);
6200 if (r != 0) {
6201 err:
6202 onig_free(*reg);
6203 *reg = NULL;
6204 }
6205 return r;
6206 }
6207
6208 extern int
6209 onig_initialize(OnigEncoding encodings[], int n)
6210 {
6211 int i;
6212 int r;
6213
6214 if (onig_inited != 0)
6215 return 0;
6216
6217 onigenc_init();
6218
6219 onig_inited = 1;
6220
6221 for (i = 0; i < n; i++) {
6222 OnigEncoding enc = encodings[i];
6223 r = onig_initialize_encoding(enc);
6224 if (r != 0)
6225 return r;
6226 }
6227
6228 return ONIG_NORMAL;
6229 }
6230
6231 typedef struct EndCallListItem {
6232 struct EndCallListItem* next;
6233 void (*func)(void);
6234 } EndCallListItemType;
6235
6236 static EndCallListItemType* EndCallTop;
6237
6238 extern void onig_add_end_call(void (*func)(void))
6239 {
6240 EndCallListItemType* item;
6241
6242 item = (EndCallListItemType* )xmalloc(sizeof(*item));
6243 if (item == 0) return ;
6244
6245 item->next = EndCallTop;
6246 item->func = func;
6247
6248 EndCallTop = item;
6249 }
6250
6251 static void
6252 exec_end_call_list(void)
6253 {
6254 EndCallListItemType* prev;
6255 void (*func)(void);
6256
6257 while (EndCallTop != 0) {
6258 func = EndCallTop->func;
6259 (*func)();
6260
6261 prev = EndCallTop;
6262 EndCallTop = EndCallTop->next;
6263 xfree(prev);
6264 }
6265 }
6266
6267 extern int
6268 onig_end(void)
6269 {
6270 exec_end_call_list();
6271
6272 #ifdef USE_CALLOUT
6273 onig_global_callout_names_free();
6274 #endif
6275
6276 onigenc_end();
6277
6278 onig_inited = 0;
6279
6280 return 0;
6281 }
6282
6283 extern int
6284 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6285 {
6286 OnigCodePoint n, *data;
6287 OnigCodePoint low, high, x;
6288
6289 GET_CODE_POINT(n, p);
6290 data = (OnigCodePoint* )p;
6291 data++;
6292
6293 for (low = 0, high = n; low < high; ) {
6294 x = (low + high) >> 1;
6295 if (code > data[x * 2 + 1])
6296 low = x + 1;
6297 else
6298 high = x;
6299 }
6300
6301 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6302 }
6303
6304 extern int
6305 onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
6306 {
6307 int found;
6308 CClassNode* cc = (CClassNode* )cc_arg;
6309
6310 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6311 if (IS_NULL(cc->mbuf)) {
6312 found = 0;
6313 }
6314 else {
6315 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6316 }
6317 }
6318 else {
6319 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6320 }
6321
6322 if (IS_NCCLASS_NOT(cc))
6323 return !found;
6324 else
6325 return found;
6326 }
6327
6328 extern int
6329 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6330 {
6331 int len;
6332
6333 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6334 len = 2;
6335 }
6336 else {
6337 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6338 }
6339 return onig_is_code_in_cc_len(len, code, cc);
6340 }
6341
6342
6343 #ifdef ONIG_DEBUG_PARSE
6344
6345 static void
6346 p_string(FILE* f, int len, UChar* s)
6347 {
6348 fputs(":", f);
6349 while (len-- > 0) { fputc(*s++, f); }
6350 }
6351
6352 static void
6353 Indent(FILE* f, int indent)
6354 {
6355 int i;
6356 for (i = 0; i < indent; i++) putc(' ', f);
6357 }
6358
6359 static void
6360 print_indent_tree(FILE* f, Node* node, int indent)
6361 {
6362 int i;
6363 NodeType type;
6364 UChar* p;
6365 int add = 3;
6366
6367 Indent(f, indent);
6368 if (IS_NULL(node)) {
6369 fprintf(f, "ERROR: null node!!!\n");
6370 exit (0);
6371 }
6372
6373 type = NODE_TYPE(node);
6374 switch (type) {
6375 case NODE_LIST:
6376 case NODE_ALT:
6377 if (type == NODE_LIST)
6378 fprintf(f, "<list:%p>\n", node);
6379 else
6380 fprintf(f, "<alt:%p>\n", node);
6381
6382 print_indent_tree(f, NODE_CAR(node), indent + add);
6383 while (IS_NOT_NULL(node = NODE_CDR(node))) {
6384 if (NODE_TYPE(node) != type) {
6385 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));
6386 exit(0);
6387 }
6388 print_indent_tree(f, NODE_CAR(node), indent + add);
6389 }
6390 break;
6391
6392 case NODE_STRING:
6393 fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);
6394 for (p = STR_(node)->s; p < STR_(node)->end; p++) {
6395 if (*p >= 0x20 && *p < 0x7f)
6396 fputc(*p, f);
6397 else {
6398 fprintf(f, " 0x%02x", *p);
6399 }
6400 }
6401 break;
6402
6403 case NODE_CCLASS:
6404 fprintf(f, "<cclass:%p>", node);
6405 if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
6406 if (CCLASS_(node)->mbuf) {
6407 BBuf* bbuf = CCLASS_(node)->mbuf;
6408 for (i = 0; i < bbuf->used; i++) {
6409 if (i > 0) fprintf(f, ",");
6410 fprintf(f, "%0x", bbuf->p[i]);
6411 }
6412 }
6413 break;
6414
6415 case NODE_CTYPE:
6416 fprintf(f, "<ctype:%p> ", node);
6417 switch (CTYPE_(node)->ctype) {
6418 case CTYPE_ANYCHAR:
6419 fprintf(f, "<anychar:%p>", node);
6420 break;
6421
6422 case ONIGENC_CTYPE_WORD:
6423 if (CTYPE_(node)->not != 0)
6424 fputs("not word", f);
6425 else
6426 fputs("word", f);
6427
6428 if (CTYPE_(node)->ascii_mode != 0)
6429 fputs(" (ascii)", f);
6430
6431 break;
6432
6433 default:
6434 fprintf(f, "ERROR: undefined ctype.\n");
6435 exit(0);
6436 }
6437 break;
6438
6439 case NODE_ANCHOR:
6440 fprintf(f, "<anchor:%p> ", node);
6441 switch (ANCHOR_(node)->type) {
6442 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6443 case ANCHOR_END_BUF: fputs("end buf", f); break;
6444 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6445 case ANCHOR_END_LINE: fputs("end line", f); break;
6446 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6447 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6448
6449 case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;
6450 case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
6451 #ifdef USE_WORD_BEGIN_END
6452 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6453 case ANCHOR_WORD_END: fputs("word end", f); break;
6454 #endif
6455 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
6456 fputs("extended-grapheme-cluster boundary", f); break;
6457 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
6458 fputs("no-extended-grapheme-cluster boundary", f); break;
6459 case ANCHOR_PREC_READ:
6460 fprintf(f, "prec read\n");
6461 print_indent_tree(f, NODE_BODY(node), indent + add);
6462 break;
6463 case ANCHOR_PREC_READ_NOT:
6464 fprintf(f, "prec read not\n");
6465 print_indent_tree(f, NODE_BODY(node), indent + add);
6466 break;
6467 case ANCHOR_LOOK_BEHIND:
6468 fprintf(f, "look behind\n");
6469 print_indent_tree(f, NODE_BODY(node), indent + add);
6470 break;
6471 case ANCHOR_LOOK_BEHIND_NOT:
6472 fprintf(f, "look behind not\n");
6473 print_indent_tree(f, NODE_BODY(node), indent + add);
6474 break;
6475
6476 default:
6477 fprintf(f, "ERROR: undefined anchor type.\n");
6478 break;
6479 }
6480 break;
6481
6482 case NODE_BACKREF:
6483 {
6484 int* p;
6485 BackRefNode* br = BACKREF_(node);
6486 p = BACKREFS_P(br);
6487 fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);
6488 for (i = 0; i < br->back_num; i++) {
6489 if (i > 0) fputs(", ", f);
6490 fprintf(f, "%d", p[i]);
6491 }
6492 }
6493 break;
6494
6495 #ifdef USE_CALL
6496 case NODE_CALL:
6497 {
6498 CallNode* cn = CALL_(node);
6499 fprintf(f, "<call:%p>", node);
6500 p_string(f, cn->name_end - cn->name, cn->name);
6501 }
6502 break;
6503 #endif
6504
6505 case NODE_QUANT:
6506 fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,
6507 QUANT_(node)->lower, QUANT_(node)->upper,
6508 (QUANT_(node)->greedy ? "" : "?"));
6509 print_indent_tree(f, NODE_BODY(node), indent + add);
6510 break;
6511
6512 case NODE_ENCLOSURE:
6513 fprintf(f, "<enclosure:%p> ", node);
6514 switch (ENCLOSURE_(node)->type) {
6515 case ENCLOSURE_OPTION:
6516 fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);
6517 break;
6518 case ENCLOSURE_MEMORY:
6519 fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);
6520 break;
6521 case ENCLOSURE_STOP_BACKTRACK:
6522 fprintf(f, "stop-bt");
6523 break;
6524
6525 default:
6526 break;
6527 }
6528 fprintf(f, "\n");
6529 print_indent_tree(f, NODE_BODY(node), indent + add);
6530 break;
6531
6532 case NODE_GIMMICK:
6533 fprintf(f, "<gimmick:%p> ", node);
6534 switch (GIMMICK_(node)->type) {
6535 case GIMMICK_FAIL:
6536 fprintf(f, "fail");
6537 break;
6538 case GIMMICK_KEEP:
6539 fprintf(f, "keep:%d", GIMMICK_(node)->id);
6540 break;
6541 case GIMMICK_SAVE:
6542 fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6543 break;
6544 case GIMMICK_UPDATE_VAR:
6545 fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6546 break;
6547 #ifdef USE_CALLOUT
6548 case GIMMICK_CALLOUT:
6549 switch (GIMMICK_(node)->detail_type) {
6550 case ONIG_CALLOUT_OF_CONTENTS:
6551 fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
6552 break;
6553 case ONIG_CALLOUT_OF_NAME:
6554 fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
6555 break;
6556 }
6557 #endif
6558 }
6559 break;
6560
6561 default:
6562 fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
6563 break;
6564 }
6565
6566 if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
6567 type != NODE_ENCLOSURE)
6568 fprintf(f, "\n");
6569 fflush(f);
6570 }
6571
6572 static void
6573 print_tree(FILE* f, Node* node)
6574 {
6575 print_indent_tree(f, node, 0);
6576 }
6577 #endif