]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg/HiiDB: Reorganize codes of exporting HII settings
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
1 /**********************************************************************
2 regcomp.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include "regparse.h"
31
32 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
33
34 #if 0
35 typedef struct {
36 int n;
37 int alloc;
38 int* v;
39 } int_stack;
40
41 static int
42 make_int_stack(int_stack** rs, int init_size)
43 {
44 int_stack* s;
45 int* v;
46
47 *rs = 0;
48
49 s = xmalloc(sizeof(*s));
50 if (IS_NULL(s)) return ONIGERR_MEMORY;
51
52 v = (int* )xmalloc(sizeof(int) * init_size);
53 if (IS_NULL(v)) {
54 xfree(s);
55 return ONIGERR_MEMORY;
56 }
57
58 s->n = 0;
59 s->alloc = init_size;
60 s->v = v;
61
62 *rs = s;
63 return ONIG_NORMAL;
64 }
65
66 static void
67 free_int_stack(int_stack* s)
68 {
69 if (IS_NOT_NULL(s)) {
70 if (IS_NOT_NULL(s->v))
71 xfree(s->v);
72 xfree(s);
73 }
74 }
75
76 static int
77 int_stack_push(int_stack* s, int v)
78 {
79 if (s->n >= s->alloc) {
80 int new_size = s->alloc * 2;
81 int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);
82 if (IS_NULL(nv)) return ONIGERR_MEMORY;
83
84 s->alloc = new_size;
85 s->v = nv;
86 }
87
88 s->v[s->n] = v;
89 s->n++;
90 return ONIG_NORMAL;
91 }
92
93 static int
94 int_stack_pop(int_stack* s)
95 {
96 int v;
97
98 #ifdef ONIG_DEBUG
99 if (s->n <= 0) {
100 fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);
101 return 0;
102 }
103 #endif
104
105 v = s->v[s->n];
106 s->n--;
107 return v;
108 }
109 #endif
110
111 extern OnigCaseFoldType
112 onig_get_default_case_fold_flag(void)
113 {
114 return OnigDefaultCaseFoldFlag;
115 }
116
117 extern int
118 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
119 {
120 OnigDefaultCaseFoldFlag = case_fold_flag;
121 return 0;
122 }
123
124 static int
125 int_multiply_cmp(int x, int y, int v)
126 {
127 if (x == 0 || y == 0) return -1;
128
129 if (x < INT_MAX / y) {
130 int xy = x * y;
131 if (xy > v) return 1;
132 else {
133 if (xy == v) return 0;
134 else return -1;
135 }
136 }
137 else
138 return 1;
139 }
140
141
142 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
143 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
144 #endif
145
146 static void
147 swap_node(Node* a, Node* b)
148 {
149 Node c;
150
151 c = *a; *a = *b; *b = c;
152
153 if (NODE_TYPE(a) == NODE_STRING) {
154 StrNode* sn = STR_(a);
155 if (sn->capa == 0) {
156 int len = (int )(sn->end - sn->s);
157 sn->s = sn->buf;
158 sn->end = sn->s + len;
159 }
160 }
161
162 if (NODE_TYPE(b) == NODE_STRING) {
163 StrNode* sn = STR_(b);
164 if (sn->capa == 0) {
165 int len = (int )(sn->end - sn->s);
166 sn->s = sn->buf;
167 sn->end = sn->s + len;
168 }
169 }
170 }
171
172 static OnigLen
173 distance_add(OnigLen d1, OnigLen d2)
174 {
175 if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
176 return INFINITE_LEN;
177 else {
178 if (d1 <= INFINITE_LEN - d2) return d1 + d2;
179 else return INFINITE_LEN;
180 }
181 }
182
183 static OnigLen
184 distance_multiply(OnigLen d, int m)
185 {
186 if (m == 0) return 0;
187
188 if (d < INFINITE_LEN / m)
189 return d * m;
190 else
191 return INFINITE_LEN;
192 }
193
194 static int
195 bitset_is_empty(BitSetRef bs)
196 {
197 int i;
198
199 for (i = 0; i < (int )BITSET_SIZE; i++) {
200 if (bs[i] != 0) return 0;
201 }
202 return 1;
203 }
204
205 extern int
206 onig_bbuf_init(BBuf* buf, int size)
207 {
208 if (size <= 0) {
209 size = 0;
210 buf->p = NULL;
211 }
212 else {
213 buf->p = (UChar* )xmalloc(size);
214 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
215 }
216
217 buf->alloc = size;
218 buf->used = 0;
219 return 0;
220 }
221
222
223 #ifdef USE_CALL
224
225 static int
226 unset_addr_list_init(UnsetAddrList* list, int size)
227 {
228 UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
229 CHECK_NULL_RETURN_MEMERR(p);
230
231 list->num = 0;
232 list->alloc = size;
233 list->us = p;
234 return 0;
235 }
236
237 static void
238 unset_addr_list_end(UnsetAddrList* list)
239 {
240 if (IS_NOT_NULL(list->us))
241 xfree(list->us);
242 }
243
244 static int
245 unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
246 {
247 UnsetAddr* p;
248 int size;
249
250 if (list->num >= list->alloc) {
251 size = list->alloc * 2;
252 p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);
253 CHECK_NULL_RETURN_MEMERR(p);
254 list->alloc = size;
255 list->us = p;
256 }
257
258 list->us[list->num].offset = offset;
259 list->us[list->num].target = node;
260 list->num++;
261 return 0;
262 }
263 #endif /* USE_CALL */
264
265
266 static int
267 add_opcode(regex_t* reg, int opcode)
268 {
269 BB_ADD1(reg, opcode);
270 return 0;
271 }
272
273 static int
274 add_rel_addr(regex_t* reg, int addr)
275 {
276 RelAddrType ra = (RelAddrType )addr;
277
278 BB_ADD(reg, &ra, SIZE_RELADDR);
279 return 0;
280 }
281
282 static int
283 add_abs_addr(regex_t* reg, int addr)
284 {
285 AbsAddrType ra = (AbsAddrType )addr;
286
287 BB_ADD(reg, &ra, SIZE_ABSADDR);
288 return 0;
289 }
290
291 static int
292 add_length(regex_t* reg, int len)
293 {
294 LengthType l = (LengthType )len;
295
296 BB_ADD(reg, &l, SIZE_LENGTH);
297 return 0;
298 }
299
300 static int
301 add_mem_num(regex_t* reg, int num)
302 {
303 MemNumType n = (MemNumType )num;
304
305 BB_ADD(reg, &n, SIZE_MEMNUM);
306 return 0;
307 }
308
309 #if 0
310 static int
311 add_pointer(regex_t* reg, void* addr)
312 {
313 PointerType ptr = (PointerType )addr;
314
315 BB_ADD(reg, &ptr, SIZE_POINTER);
316 return 0;
317 }
318 #endif
319
320 static int
321 add_option(regex_t* reg, OnigOptionType option)
322 {
323 BB_ADD(reg, &option, SIZE_OPTION);
324 return 0;
325 }
326
327 static int
328 add_save_type(regex_t* reg, enum SaveType type)
329 {
330 SaveType t = (SaveType )type;
331
332 BB_ADD(reg, &t, SIZE_SAVE_TYPE);
333 return 0;
334 }
335
336 static int
337 add_update_var_type(regex_t* reg, enum UpdateVarType type)
338 {
339 UpdateVarType t = (UpdateVarType )type;
340
341 BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
342 return 0;
343 }
344
345 static int
346 add_mode(regex_t* reg, ModeType mode)
347 {
348 BB_ADD(reg, &mode, SIZE_MODE);
349 return 0;
350 }
351
352 static int
353 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
354 {
355 int r;
356
357 r = add_opcode(reg, opcode);
358 if (r != 0) return r;
359 r = add_rel_addr(reg, addr);
360 return r;
361 }
362
363 static int
364 add_bytes(regex_t* reg, UChar* bytes, int len)
365 {
366 BB_ADD(reg, bytes, len);
367 return 0;
368 }
369
370 static int
371 add_bitset(regex_t* reg, BitSetRef bs)
372 {
373 BB_ADD(reg, bs, SIZE_BITSET);
374 return 0;
375 }
376
377 static int compile_length_tree(Node* node, regex_t* reg);
378 static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
379
380
381 #define IS_NEED_STR_LEN_OP_EXACT(op) \
382 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
383 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
384
385 static int
386 select_str_opcode(int mb_len, int str_len, int ignore_case)
387 {
388 int op;
389
390 if (ignore_case) {
391 switch (str_len) {
392 case 1: op = OP_EXACT1_IC; break;
393 default: op = OP_EXACTN_IC; break;
394 }
395 }
396 else {
397 switch (mb_len) {
398 case 1:
399 switch (str_len) {
400 case 1: op = OP_EXACT1; break;
401 case 2: op = OP_EXACT2; break;
402 case 3: op = OP_EXACT3; break;
403 case 4: op = OP_EXACT4; break;
404 case 5: op = OP_EXACT5; break;
405 default: op = OP_EXACTN; break;
406 }
407 break;
408
409 case 2:
410 switch (str_len) {
411 case 1: op = OP_EXACTMB2N1; break;
412 case 2: op = OP_EXACTMB2N2; break;
413 case 3: op = OP_EXACTMB2N3; break;
414 default: op = OP_EXACTMB2N; break;
415 }
416 break;
417
418 case 3:
419 op = OP_EXACTMB3N;
420 break;
421
422 default:
423 op = OP_EXACTMBN;
424 break;
425 }
426 }
427 return op;
428 }
429
430 static int
431 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
432 {
433 int r;
434 int saved_num_null_check = reg->num_null_check;
435
436 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
437 r = add_opcode(reg, OP_EMPTY_CHECK_START);
438 if (r != 0) return r;
439 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
440 if (r != 0) return r;
441 reg->num_null_check++;
442 }
443
444 r = compile_tree(node, reg, env);
445 if (r != 0) return r;
446
447 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
448 if (empty_info == QUANT_BODY_IS_EMPTY)
449 r = add_opcode(reg, OP_EMPTY_CHECK_END);
450 else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)
451 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);
452 else if (empty_info == QUANT_BODY_IS_EMPTY_REC)
453 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
454
455 if (r != 0) return r;
456 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
457 }
458 return r;
459 }
460
461 #ifdef USE_CALL
462 static int
463 compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
464 {
465 int r;
466
467 r = add_opcode(reg, OP_CALL);
468 if (r != 0) return r;
469 r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),
470 NODE_CALL_BODY(node));
471 if (r != 0) return r;
472 r = add_abs_addr(reg, 0 /*dummy addr.*/);
473 return r;
474 }
475 #endif
476
477 static int
478 compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
479 {
480 int i, r;
481
482 for (i = 0; i < n; i++) {
483 r = compile_tree(node, reg, env);
484 if (r != 0) return r;
485 }
486 return 0;
487 }
488
489 static int
490 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
491 regex_t* reg ARG_UNUSED, int ignore_case)
492 {
493 int len;
494 int op = select_str_opcode(mb_len, str_len, ignore_case);
495
496 len = SIZE_OPCODE;
497
498 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
499 if (IS_NEED_STR_LEN_OP_EXACT(op))
500 len += SIZE_LENGTH;
501
502 len += mb_len * str_len;
503 return len;
504 }
505
506 static int
507 add_compile_string(UChar* s, int mb_len, int str_len,
508 regex_t* reg, int ignore_case)
509 {
510 int op = select_str_opcode(mb_len, str_len, ignore_case);
511 add_opcode(reg, op);
512
513 if (op == OP_EXACTMBN)
514 add_length(reg, mb_len);
515
516 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
517 if (op == OP_EXACTN_IC)
518 add_length(reg, mb_len * str_len);
519 else
520 add_length(reg, str_len);
521 }
522
523 add_bytes(reg, s, mb_len * str_len);
524 return 0;
525 }
526
527
528 static int
529 compile_length_string_node(Node* node, regex_t* reg)
530 {
531 int rlen, r, len, prev_len, slen, ambig;
532 UChar *p, *prev;
533 StrNode* sn;
534 OnigEncoding enc = reg->enc;
535
536 sn = STR_(node);
537 if (sn->end <= sn->s)
538 return 0;
539
540 ambig = NODE_STRING_IS_AMBIG(node);
541
542 p = prev = sn->s;
543 prev_len = enclen(enc, p);
544 p += prev_len;
545 slen = 1;
546 rlen = 0;
547
548 for (; p < sn->end; ) {
549 len = enclen(enc, p);
550 if (len == prev_len) {
551 slen++;
552 }
553 else {
554 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
555 rlen += r;
556 prev = p;
557 slen = 1;
558 prev_len = len;
559 }
560 p += len;
561 }
562
563 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
564 rlen += r;
565 return rlen;
566 }
567
568 static int
569 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
570 {
571 if (sn->end <= sn->s)
572 return 0;
573
574 return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
575 reg, 0);
576 }
577
578 static int
579 compile_string_node(Node* node, regex_t* reg)
580 {
581 int r, len, prev_len, slen, ambig;
582 UChar *p, *prev, *end;
583 StrNode* sn;
584 OnigEncoding enc = reg->enc;
585
586 sn = STR_(node);
587 if (sn->end <= sn->s)
588 return 0;
589
590 end = sn->end;
591 ambig = NODE_STRING_IS_AMBIG(node);
592
593 p = prev = sn->s;
594 prev_len = enclen(enc, p);
595 p += prev_len;
596 slen = 1;
597
598 for (; p < end; ) {
599 len = enclen(enc, p);
600 if (len == prev_len) {
601 slen++;
602 }
603 else {
604 r = add_compile_string(prev, prev_len, slen, reg, ambig);
605 if (r != 0) return r;
606
607 prev = p;
608 slen = 1;
609 prev_len = len;
610 }
611
612 p += len;
613 }
614
615 return add_compile_string(prev, prev_len, slen, reg, ambig);
616 }
617
618 static int
619 compile_string_raw_node(StrNode* sn, regex_t* reg)
620 {
621 if (sn->end <= sn->s)
622 return 0;
623
624 return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
625 }
626
627 static int
628 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
629 {
630 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
631 add_length(reg, mbuf->used);
632 return add_bytes(reg, mbuf->p, mbuf->used);
633 #else
634 int r, pad_size;
635 UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
636
637 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
638 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
639 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
640
641 r = add_bytes(reg, mbuf->p, mbuf->used);
642
643 /* padding for return value from compile_length_cclass_node() to be fix. */
644 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
645 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
646 return r;
647 #endif
648 }
649
650 static int
651 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
652 {
653 int len;
654
655 if (IS_NULL(cc->mbuf)) {
656 len = SIZE_OPCODE + SIZE_BITSET;
657 }
658 else {
659 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
660 len = SIZE_OPCODE;
661 }
662 else {
663 len = SIZE_OPCODE + SIZE_BITSET;
664 }
665 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
666 len += SIZE_LENGTH + cc->mbuf->used;
667 #else
668 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
669 #endif
670 }
671
672 return len;
673 }
674
675 static int
676 compile_cclass_node(CClassNode* cc, regex_t* reg)
677 {
678 int r;
679
680 if (IS_NULL(cc->mbuf)) {
681 if (IS_NCCLASS_NOT(cc))
682 add_opcode(reg, OP_CCLASS_NOT);
683 else
684 add_opcode(reg, OP_CCLASS);
685
686 r = add_bitset(reg, cc->bs);
687 }
688 else {
689 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
690 if (IS_NCCLASS_NOT(cc))
691 add_opcode(reg, OP_CCLASS_MB_NOT);
692 else
693 add_opcode(reg, OP_CCLASS_MB);
694
695 r = add_multi_byte_cclass(cc->mbuf, reg);
696 }
697 else {
698 if (IS_NCCLASS_NOT(cc))
699 add_opcode(reg, OP_CCLASS_MIX_NOT);
700 else
701 add_opcode(reg, OP_CCLASS_MIX);
702
703 r = add_bitset(reg, cc->bs);
704 if (r != 0) return r;
705 r = add_multi_byte_cclass(cc->mbuf, reg);
706 }
707 }
708
709 return r;
710 }
711
712 static int
713 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
714 {
715 #define REPEAT_RANGE_ALLOC 4
716
717 OnigRepeatRange* p;
718
719 if (reg->repeat_range_alloc == 0) {
720 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
721 CHECK_NULL_RETURN_MEMERR(p);
722 reg->repeat_range = p;
723 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
724 }
725 else if (reg->repeat_range_alloc <= id) {
726 int n;
727 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
728 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
729 sizeof(OnigRepeatRange) * n,
730 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
731 CHECK_NULL_RETURN_MEMERR(p);
732 reg->repeat_range = p;
733 reg->repeat_range_alloc = n;
734 }
735 else {
736 p = reg->repeat_range;
737 }
738
739 p[id].lower = lower;
740 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
741 return 0;
742 }
743
744 static int
745 compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
746 regex_t* reg, ScanEnv* env)
747 {
748 int r;
749 int num_repeat = reg->num_repeat;
750
751 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
752 if (r != 0) return r;
753 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
754 reg->num_repeat++;
755 if (r != 0) return r;
756 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
757 if (r != 0) return r;
758
759 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
760 if (r != 0) return r;
761
762 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
763 if (r != 0) return r;
764
765 if (
766 #ifdef USE_CALL
767 NODE_IS_IN_MULTI_ENTRY(qn) ||
768 #endif
769 NODE_IS_IN_REAL_REPEAT(qn)) {
770 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
771 }
772 else {
773 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
774 }
775 if (r != 0) return r;
776 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
777 return r;
778 }
779
780 static int
781 is_anychar_infinite_greedy(QuantNode* qn)
782 {
783 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
784 NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
785 return 1;
786 else
787 return 0;
788 }
789
790 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
791 #define CKN_ON (ckn > 0)
792
793 static int
794 compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
795 {
796 int len, mod_tlen;
797 int infinite = IS_REPEAT_INFINITE(qn->upper);
798 enum QuantBodyEmpty empty_info = qn->body_empty_info;
799 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
800
801 if (tlen < 0) return tlen;
802 if (tlen == 0) return 0;
803
804 /* anychar repeat */
805 if (is_anychar_infinite_greedy(qn)) {
806 if (qn->lower <= 1 ||
807 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
808 if (IS_NOT_NULL(qn->next_head_exact))
809 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
810 else
811 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
812 }
813 }
814
815 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
816 mod_tlen = tlen;
817 else
818 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
819
820 if (infinite &&
821 (qn->lower <= 1 ||
822 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
823 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
824 len = SIZE_OP_JUMP;
825 }
826 else {
827 len = tlen * qn->lower;
828 }
829
830 if (qn->greedy) {
831 if (IS_NOT_NULL(qn->head_exact))
832 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
833 else if (IS_NOT_NULL(qn->next_head_exact))
834 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
835 else
836 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
837 }
838 else
839 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
840 }
841 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
842 len = SIZE_OP_JUMP + tlen;
843 }
844 else if (!infinite && qn->greedy &&
845 (qn->upper == 1 ||
846 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
847 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
848 len = tlen * qn->lower;
849 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
850 }
851 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
852 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
853 }
854 else {
855 len = SIZE_OP_REPEAT_INC
856 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
857 }
858
859 return len;
860 }
861
862 static int
863 compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
864 {
865 int i, r, mod_tlen;
866 int infinite = IS_REPEAT_INFINITE(qn->upper);
867 enum QuantBodyEmpty empty_info = qn->body_empty_info;
868 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
869
870 if (tlen < 0) return tlen;
871 if (tlen == 0) return 0;
872
873 if (is_anychar_infinite_greedy(qn) &&
874 (qn->lower <= 1 ||
875 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
876 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
877 if (r != 0) return r;
878 if (IS_NOT_NULL(qn->next_head_exact)) {
879 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
880 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
881 else
882 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
883 if (r != 0) return r;
884 return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
885 }
886 else {
887 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
888 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
889 else
890 return add_opcode(reg, OP_ANYCHAR_STAR);
891 }
892 }
893
894 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
895 mod_tlen = tlen;
896 else
897 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
898
899 if (infinite &&
900 (qn->lower <= 1 ||
901 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
902 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
903 if (qn->greedy) {
904 if (IS_NOT_NULL(qn->head_exact))
905 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
906 else if (IS_NOT_NULL(qn->next_head_exact))
907 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
908 else
909 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
910 }
911 else {
912 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
913 }
914 if (r != 0) return r;
915 }
916 else {
917 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
918 if (r != 0) return r;
919 }
920
921 if (qn->greedy) {
922 if (IS_NOT_NULL(qn->head_exact)) {
923 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
924 mod_tlen + SIZE_OP_JUMP);
925 if (r != 0) return r;
926 add_bytes(reg, STR_(qn->head_exact)->s, 1);
927 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
928 if (r != 0) return r;
929 r = add_opcode_rel_addr(reg, OP_JUMP,
930 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
931 }
932 else if (IS_NOT_NULL(qn->next_head_exact)) {
933 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
934 mod_tlen + SIZE_OP_JUMP);
935 if (r != 0) return r;
936 add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
937 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
938 if (r != 0) return r;
939 r = add_opcode_rel_addr(reg, OP_JUMP,
940 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
941 }
942 else {
943 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
944 if (r != 0) return r;
945 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
946 if (r != 0) return r;
947 r = add_opcode_rel_addr(reg, OP_JUMP,
948 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
949 }
950 }
951 else {
952 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
953 if (r != 0) return r;
954 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
955 if (r != 0) return r;
956 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
957 }
958 }
959 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
960 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
961 if (r != 0) return r;
962 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
963 }
964 else if (! infinite && qn->greedy &&
965 (qn->upper == 1 ||
966 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
967 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
968 int n = qn->upper - qn->lower;
969
970 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
971 if (r != 0) return r;
972
973 for (i = 0; i < n; i++) {
974 r = add_opcode_rel_addr(reg, OP_PUSH,
975 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
976 if (r != 0) return r;
977 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
978 if (r != 0) return r;
979 }
980 }
981 else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
982 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
983 if (r != 0) return r;
984 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
985 if (r != 0) return r;
986 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
987 }
988 else {
989 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
990 }
991 return r;
992 }
993
994 static int
995 compile_length_option_node(EnclosureNode* node, regex_t* reg)
996 {
997 int tlen;
998 OnigOptionType prev = reg->options;
999
1000 reg->options = node->o.options;
1001 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1002 reg->options = prev;
1003
1004 return tlen;
1005 }
1006
1007 static int
1008 compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1009 {
1010 int r;
1011 OnigOptionType prev = reg->options;
1012
1013 reg->options = node->o.options;
1014 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1015 reg->options = prev;
1016
1017 return r;
1018 }
1019
1020 static int
1021 compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
1022 {
1023 int len;
1024 int tlen;
1025
1026 if (node->type == ENCLOSURE_OPTION)
1027 return compile_length_option_node(node, reg);
1028
1029 if (NODE_ENCLOSURE_BODY(node)) {
1030 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1031 if (tlen < 0) return tlen;
1032 }
1033 else
1034 tlen = 0;
1035
1036 switch (node->type) {
1037 case ENCLOSURE_MEMORY:
1038 #ifdef USE_CALL
1039
1040 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
1041 len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1042 return len;
1043 }
1044
1045 if (NODE_IS_CALLED(node)) {
1046 len = SIZE_OP_MEMORY_START_PUSH + tlen
1047 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1048 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1049 len += (NODE_IS_RECURSION(node)
1050 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1051 else
1052 len += (NODE_IS_RECURSION(node)
1053 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1054 }
1055 else if (NODE_IS_RECURSION(node)) {
1056 len = SIZE_OP_MEMORY_START_PUSH;
1057 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1058 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1059 }
1060 else
1061 #endif
1062 {
1063 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1064 len = SIZE_OP_MEMORY_START_PUSH;
1065 else
1066 len = SIZE_OP_MEMORY_START;
1067
1068 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
1069 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1070 }
1071 break;
1072
1073 case ENCLOSURE_STOP_BACKTRACK:
1074 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
1075 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
1076 tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1077 if (tlen < 0) return tlen;
1078
1079 len = tlen * qn->lower
1080 + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
1081 }
1082 else {
1083 len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
1084 }
1085 break;
1086
1087 case ENCLOSURE_IF_ELSE:
1088 {
1089 Node* cond = NODE_ENCLOSURE_BODY(node);
1090 Node* Then = node->te.Then;
1091 Node* Else = node->te.Else;
1092
1093 len = compile_length_tree(cond, reg);
1094 if (len < 0) return len;
1095 len += SIZE_OP_PUSH;
1096 len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
1097
1098 if (IS_NOT_NULL(Then)) {
1099 tlen = compile_length_tree(Then, reg);
1100 if (tlen < 0) return tlen;
1101 len += tlen;
1102 }
1103
1104 if (IS_NOT_NULL(Else)) {
1105 len += SIZE_OP_JUMP;
1106 tlen = compile_length_tree(Else, reg);
1107 if (tlen < 0) return tlen;
1108 len += tlen;
1109 }
1110 }
1111 break;
1112
1113 default:
1114 return ONIGERR_TYPE_BUG;
1115 break;
1116 }
1117
1118 return len;
1119 }
1120
1121 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1122
1123 static int
1124 compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1125 {
1126 int r;
1127 int len;
1128
1129 #ifdef USE_CALL
1130 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
1131 r = add_opcode(reg, OP_CALL);
1132 if (r != 0) return r;
1133 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1134 NODE_STATUS_ADD(node, ADDR_FIXED);
1135 r = add_abs_addr(reg, (int )node->m.called_addr);
1136 if (r != 0) return r;
1137 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1138 len += SIZE_OP_RETURN;
1139 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1140 if (r != 0) return r;
1141
1142 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1143 if (r != 0) return r;
1144 r = add_opcode(reg, OP_RETURN);
1145 return r;
1146 }
1147
1148 if (NODE_IS_CALLED(node)) {
1149 r = add_opcode(reg, OP_CALL);
1150 if (r != 0) return r;
1151 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1152 NODE_STATUS_ADD(node, ADDR_FIXED);
1153 r = add_abs_addr(reg, (int )node->m.called_addr);
1154 if (r != 0) return r;
1155 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
1156 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1157 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1158 len += (NODE_IS_RECURSION(node)
1159 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1160 else
1161 len += (NODE_IS_RECURSION(node)
1162 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1163
1164 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1165 if (r != 0) return r;
1166 }
1167 #endif
1168
1169 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
1170 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1171 else
1172 r = add_opcode(reg, OP_MEMORY_START);
1173 if (r != 0) return r;
1174 r = add_mem_num(reg, node->m.regnum);
1175 if (r != 0) return r;
1176 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1177 if (r != 0) return r;
1178
1179 #ifdef USE_CALL
1180 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1181 r = add_opcode(reg, (NODE_IS_RECURSION(node)
1182 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1183 else
1184 r = add_opcode(reg, (NODE_IS_RECURSION(node)
1185 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1186 if (r != 0) return r;
1187 r = add_mem_num(reg, node->m.regnum);
1188 if (NODE_IS_CALLED(node)) {
1189 if (r != 0) return r;
1190 r = add_opcode(reg, OP_RETURN);
1191 }
1192 #else
1193 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
1194 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1195 else
1196 r = add_opcode(reg, OP_MEMORY_END);
1197 if (r != 0) return r;
1198 r = add_mem_num(reg, node->m.regnum);
1199 #endif
1200
1201 return r;
1202 }
1203
1204 static int
1205 compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
1206 {
1207 int r, len;
1208
1209 switch (node->type) {
1210 case ENCLOSURE_MEMORY:
1211 r = compile_enclosure_memory_node(node, reg, env);
1212 break;
1213
1214 case ENCLOSURE_OPTION:
1215 r = compile_option_node(node, reg, env);
1216 break;
1217
1218 case ENCLOSURE_STOP_BACKTRACK:
1219 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
1220 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
1221 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
1222 if (r != 0) return r;
1223
1224 len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
1225 if (len < 0) return len;
1226
1227 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);
1228 if (r != 0) return r;
1229 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
1230 if (r != 0) return r;
1231 r = add_opcode(reg, OP_POP_OUT);
1232 if (r != 0) return r;
1233 r = add_opcode_rel_addr(reg, OP_JUMP,
1234 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));
1235 }
1236 else {
1237 r = add_opcode(reg, OP_ATOMIC_START);
1238 if (r != 0) return r;
1239 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
1240 if (r != 0) return r;
1241 r = add_opcode(reg, OP_ATOMIC_END);
1242 }
1243 break;
1244
1245 case ENCLOSURE_IF_ELSE:
1246 {
1247 int cond_len, then_len, jump_len;
1248 Node* cond = NODE_ENCLOSURE_BODY(node);
1249 Node* Then = node->te.Then;
1250 Node* Else = node->te.Else;
1251
1252 r = add_opcode(reg, OP_ATOMIC_START);
1253 if (r != 0) return r;
1254
1255 cond_len = compile_length_tree(cond, reg);
1256 if (cond_len < 0) return cond_len;
1257 if (IS_NOT_NULL(Then)) {
1258 then_len = compile_length_tree(Then, reg);
1259 if (then_len < 0) return then_len;
1260 }
1261 else
1262 then_len = 0;
1263
1264 jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
1265 if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
1266
1267 r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);
1268 if (r != 0) return r;
1269 r = compile_tree(cond, reg, env);
1270 if (r != 0) return r;
1271 r = add_opcode(reg, OP_ATOMIC_END);
1272 if (r != 0) return r;
1273
1274 if (IS_NOT_NULL(Then)) {
1275 r = compile_tree(Then, reg, env);
1276 if (r != 0) return r;
1277 }
1278
1279 if (IS_NOT_NULL(Else)) {
1280 int else_len = compile_length_tree(Else, reg);
1281 r = add_opcode_rel_addr(reg, OP_JUMP, else_len);
1282 if (r != 0) return r;
1283 r = compile_tree(Else, reg, env);
1284 }
1285 }
1286 break;
1287
1288 default:
1289 return ONIGERR_TYPE_BUG;
1290 break;
1291 }
1292
1293 return r;
1294 }
1295
1296 static int
1297 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1298 {
1299 int len;
1300 int tlen = 0;
1301
1302 if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {
1303 tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1304 if (tlen < 0) return tlen;
1305 }
1306
1307 switch (node->type) {
1308 case ANCHOR_PREC_READ:
1309 len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
1310 break;
1311 case ANCHOR_PREC_READ_NOT:
1312 len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
1313 break;
1314 case ANCHOR_LOOK_BEHIND:
1315 len = SIZE_OP_LOOK_BEHIND + tlen;
1316 break;
1317 case ANCHOR_LOOK_BEHIND_NOT:
1318 len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
1319 break;
1320
1321 case ANCHOR_WORD_BOUNDARY:
1322 case ANCHOR_NO_WORD_BOUNDARY:
1323 #ifdef USE_WORD_BEGIN_END
1324 case ANCHOR_WORD_BEGIN:
1325 case ANCHOR_WORD_END:
1326 #endif
1327 len = SIZE_OP_WORD_BOUNDARY;
1328 break;
1329
1330 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1331 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1332 len = SIZE_OPCODE;
1333 break;
1334
1335 default:
1336 len = SIZE_OPCODE;
1337 break;
1338 }
1339
1340 return len;
1341 }
1342
1343 static int
1344 compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
1345 {
1346 int r, len;
1347 enum OpCode op;
1348
1349 switch (node->type) {
1350 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1351 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1352 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1353 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1354 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1355 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1356
1357 case ANCHOR_WORD_BOUNDARY:
1358 op = OP_WORD_BOUNDARY;
1359 word:
1360 r = add_opcode(reg, op);
1361 if (r != 0) return r;
1362 r = add_mode(reg, (ModeType )node->ascii_mode);
1363 break;
1364
1365 case ANCHOR_NO_WORD_BOUNDARY:
1366 op = OP_NO_WORD_BOUNDARY; goto word;
1367 break;
1368 #ifdef USE_WORD_BEGIN_END
1369 case ANCHOR_WORD_BEGIN:
1370 op = OP_WORD_BEGIN; goto word;
1371 break;
1372 case ANCHOR_WORD_END:
1373 op = OP_WORD_END; goto word;
1374 break;
1375 #endif
1376
1377 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1378 r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
1379 break;
1380
1381 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
1382 r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
1383 break;
1384
1385 case ANCHOR_PREC_READ:
1386 r = add_opcode(reg, OP_PREC_READ_START);
1387 if (r != 0) return r;
1388 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1389 if (r != 0) return r;
1390 r = add_opcode(reg, OP_PREC_READ_END);
1391 break;
1392
1393 case ANCHOR_PREC_READ_NOT:
1394 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1395 if (len < 0) return len;
1396 r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);
1397 if (r != 0) return r;
1398 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1399 if (r != 0) return r;
1400 r = add_opcode(reg, OP_PREC_READ_NOT_END);
1401 break;
1402
1403 case ANCHOR_LOOK_BEHIND:
1404 {
1405 int n;
1406 r = add_opcode(reg, OP_LOOK_BEHIND);
1407 if (r != 0) return r;
1408 if (node->char_len < 0) {
1409 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
1410 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1411 }
1412 else
1413 n = node->char_len;
1414
1415 r = add_length(reg, n);
1416 if (r != 0) return r;
1417 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1418 }
1419 break;
1420
1421 case ANCHOR_LOOK_BEHIND_NOT:
1422 {
1423 int n;
1424
1425 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
1426 r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,
1427 len + SIZE_OP_LOOK_BEHIND_NOT_END);
1428 if (r != 0) return r;
1429 if (node->char_len < 0) {
1430 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
1431 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1432 }
1433 else
1434 n = node->char_len;
1435 r = add_length(reg, n);
1436 if (r != 0) return r;
1437 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
1438 if (r != 0) return r;
1439 r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);
1440 }
1441 break;
1442
1443 default:
1444 return ONIGERR_TYPE_BUG;
1445 break;
1446 }
1447
1448 return r;
1449 }
1450
1451 static int
1452 compile_gimmick_node(GimmickNode* node, regex_t* reg)
1453 {
1454 int r;
1455
1456 switch (node->type) {
1457 case GIMMICK_FAIL:
1458 r = add_opcode(reg, OP_FAIL);
1459 break;
1460
1461 case GIMMICK_KEEP:
1462 r = add_opcode(reg, OP_PUSH_SAVE_VAL);
1463 if (r != 0) return r;
1464 r = add_save_type(reg, SAVE_KEEP);
1465 if (r != 0) return r;
1466 r = add_mem_num(reg, node->id);
1467 break;
1468
1469 case GIMMICK_SAVE:
1470 r = add_opcode(reg, OP_PUSH_SAVE_VAL);
1471 if (r != 0) return r;
1472 r = add_save_type(reg, node->detail_type);
1473 if (r != 0) return r;
1474 r = add_mem_num(reg, node->id);
1475 break;
1476
1477 case GIMMICK_UPDATE_VAR:
1478 r = add_opcode(reg, OP_UPDATE_VAR);
1479 if (r != 0) return r;
1480 r = add_update_var_type(reg, node->detail_type);
1481 if (r != 0) return r;
1482 r = add_mem_num(reg, node->id);
1483 break;
1484
1485 #ifdef USE_CALLOUT
1486 case GIMMICK_CALLOUT:
1487 switch (node->detail_type) {
1488 case ONIG_CALLOUT_OF_CONTENTS:
1489 case ONIG_CALLOUT_OF_NAME:
1490 {
1491 r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?
1492 OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);
1493 if (r != 0) return r;
1494 if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
1495 r = add_mem_num(reg, node->id);
1496 if (r != 0) return r;
1497 }
1498 r = add_mem_num(reg, node->num);
1499 if (r != 0) return r;
1500 }
1501 break;
1502
1503 default:
1504 r = ONIGERR_TYPE_BUG;
1505 break;
1506 }
1507 #endif
1508 }
1509
1510 return r;
1511 }
1512
1513 static int
1514 compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
1515 {
1516 int len;
1517
1518 switch (node->type) {
1519 case GIMMICK_FAIL:
1520 len = SIZE_OP_FAIL;
1521 break;
1522
1523 case GIMMICK_KEEP:
1524 case GIMMICK_SAVE:
1525 len = SIZE_OP_PUSH_SAVE_VAL;
1526 break;
1527
1528 case GIMMICK_UPDATE_VAR:
1529 len = SIZE_OP_UPDATE_VAR;
1530 break;
1531
1532 #ifdef USE_CALLOUT
1533 case GIMMICK_CALLOUT:
1534 switch (node->detail_type) {
1535 case ONIG_CALLOUT_OF_CONTENTS:
1536 len = SIZE_OP_CALLOUT_CONTENTS;
1537 break;
1538 case ONIG_CALLOUT_OF_NAME:
1539 len = SIZE_OP_CALLOUT_NAME;
1540 break;
1541
1542 default:
1543 len = ONIGERR_TYPE_BUG;
1544 break;
1545 }
1546 break;
1547 #endif
1548 }
1549
1550 return len;
1551 }
1552
1553 static int
1554 compile_length_tree(Node* node, regex_t* reg)
1555 {
1556 int len, r;
1557
1558 switch (NODE_TYPE(node)) {
1559 case NODE_LIST:
1560 len = 0;
1561 do {
1562 r = compile_length_tree(NODE_CAR(node), reg);
1563 if (r < 0) return r;
1564 len += r;
1565 } while (IS_NOT_NULL(node = NODE_CDR(node)));
1566 r = len;
1567 break;
1568
1569 case NODE_ALT:
1570 {
1571 int n;
1572
1573 n = r = 0;
1574 do {
1575 r += compile_length_tree(NODE_CAR(node), reg);
1576 n++;
1577 } while (IS_NOT_NULL(node = NODE_CDR(node)));
1578 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1579 }
1580 break;
1581
1582 case NODE_STRING:
1583 if (NODE_STRING_IS_RAW(node))
1584 r = compile_length_string_raw_node(STR_(node), reg);
1585 else
1586 r = compile_length_string_node(node, reg);
1587 break;
1588
1589 case NODE_CCLASS:
1590 r = compile_length_cclass_node(CCLASS_(node), reg);
1591 break;
1592
1593 case NODE_CTYPE:
1594 r = SIZE_OPCODE;
1595 break;
1596
1597 case NODE_BACKREF:
1598 {
1599 BackRefNode* br = BACKREF_(node);
1600
1601 if (NODE_IS_CHECKER(node)) {
1602 #ifdef USE_BACKREF_WITH_LEVEL
1603 if (NODE_IS_NEST_LEVEL(node)) {
1604 r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1605 }
1606 else
1607 #endif
1608 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1609 }
1610 else {
1611 #ifdef USE_BACKREF_WITH_LEVEL
1612 if (NODE_IS_NEST_LEVEL(node)) {
1613 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1614 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1615 }
1616 else
1617 #endif
1618 if (br->back_num == 1) {
1619 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1620 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1621 }
1622 else {
1623 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1624 }
1625 }
1626 }
1627 break;
1628
1629 #ifdef USE_CALL
1630 case NODE_CALL:
1631 r = SIZE_OP_CALL;
1632 break;
1633 #endif
1634
1635 case NODE_QUANT:
1636 r = compile_length_quantifier_node(QUANT_(node), reg);
1637 break;
1638
1639 case NODE_ENCLOSURE:
1640 r = compile_length_enclosure_node(ENCLOSURE_(node), reg);
1641 break;
1642
1643 case NODE_ANCHOR:
1644 r = compile_length_anchor_node(ANCHOR_(node), reg);
1645 break;
1646
1647 case NODE_GIMMICK:
1648 r = compile_length_gimmick_node(GIMMICK_(node), reg);
1649 break;
1650
1651 default:
1652 return ONIGERR_TYPE_BUG;
1653 break;
1654 }
1655
1656 return r;
1657 }
1658
1659 static int
1660 compile_tree(Node* node, regex_t* reg, ScanEnv* env)
1661 {
1662 int n, len, pos, r = 0;
1663
1664 switch (NODE_TYPE(node)) {
1665 case NODE_LIST:
1666 do {
1667 r = compile_tree(NODE_CAR(node), reg, env);
1668 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1669 break;
1670
1671 case NODE_ALT:
1672 {
1673 Node* x = node;
1674 len = 0;
1675 do {
1676 len += compile_length_tree(NODE_CAR(x), reg);
1677 if (IS_NOT_NULL(NODE_CDR(x))) {
1678 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1679 }
1680 } while (IS_NOT_NULL(x = NODE_CDR(x)));
1681 pos = reg->used + len; /* goal position */
1682
1683 do {
1684 len = compile_length_tree(NODE_CAR(node), reg);
1685 if (IS_NOT_NULL(NODE_CDR(node))) {
1686 enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
1687 r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);
1688 if (r != 0) break;
1689 }
1690 r = compile_tree(NODE_CAR(node), reg, env);
1691 if (r != 0) break;
1692 if (IS_NOT_NULL(NODE_CDR(node))) {
1693 len = pos - (reg->used + SIZE_OP_JUMP);
1694 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1695 if (r != 0) break;
1696 }
1697 } while (IS_NOT_NULL(node = NODE_CDR(node)));
1698 }
1699 break;
1700
1701 case NODE_STRING:
1702 if (NODE_STRING_IS_RAW(node))
1703 r = compile_string_raw_node(STR_(node), reg);
1704 else
1705 r = compile_string_node(node, reg);
1706 break;
1707
1708 case NODE_CCLASS:
1709 r = compile_cclass_node(CCLASS_(node), reg);
1710 break;
1711
1712 case NODE_CTYPE:
1713 {
1714 int op;
1715
1716 switch (CTYPE_(node)->ctype) {
1717 case CTYPE_ANYCHAR:
1718 if (IS_MULTILINE(CTYPE_OPTION(node, reg)))
1719 r = add_opcode(reg, OP_ANYCHAR_ML);
1720 else
1721 r = add_opcode(reg, OP_ANYCHAR);
1722 break;
1723
1724 case ONIGENC_CTYPE_WORD:
1725 if (CTYPE_(node)->ascii_mode == 0) {
1726 op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
1727 }
1728 else {
1729 op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
1730 }
1731 r = add_opcode(reg, op);
1732 break;
1733
1734 default:
1735 return ONIGERR_TYPE_BUG;
1736 break;
1737 }
1738 }
1739 break;
1740
1741 case NODE_BACKREF:
1742 {
1743 BackRefNode* br = BACKREF_(node);
1744
1745 if (NODE_IS_CHECKER(node)) {
1746 #ifdef USE_BACKREF_WITH_LEVEL
1747 if (NODE_IS_NEST_LEVEL(node)) {
1748 r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);
1749 if (r != 0) return r;
1750 r = add_length(reg, br->nest_level);
1751 if (r != 0) return r;
1752 }
1753 else
1754 #endif
1755 {
1756 r = add_opcode(reg, OP_BACKREF_CHECK);
1757 if (r != 0) return r;
1758 }
1759
1760 goto add_bacref_mems;
1761 }
1762 else {
1763 #ifdef USE_BACKREF_WITH_LEVEL
1764 if (NODE_IS_NEST_LEVEL(node)) {
1765 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1766 if (r != 0) return r;
1767 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1768 if (r != 0) return r;
1769 r = add_length(reg, br->nest_level);
1770 if (r != 0) return r;
1771
1772 goto add_bacref_mems;
1773 }
1774 else
1775 #endif
1776 if (br->back_num == 1) {
1777 n = br->back_static[0];
1778 if (IS_IGNORECASE(reg->options)) {
1779 r = add_opcode(reg, OP_BACKREF_N_IC);
1780 if (r != 0) return r;
1781 r = add_mem_num(reg, n);
1782 }
1783 else {
1784 switch (n) {
1785 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1786 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1787 default:
1788 r = add_opcode(reg, OP_BACKREF_N);
1789 if (r != 0) return r;
1790 r = add_mem_num(reg, n);
1791 break;
1792 }
1793 }
1794 }
1795 else {
1796 int i;
1797 int* p;
1798
1799 if (IS_IGNORECASE(reg->options)) {
1800 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1801 }
1802 else {
1803 r = add_opcode(reg, OP_BACKREF_MULTI);
1804 }
1805 if (r != 0) return r;
1806
1807 add_bacref_mems:
1808 r = add_length(reg, br->back_num);
1809 if (r != 0) return r;
1810 p = BACKREFS_P(br);
1811 for (i = br->back_num - 1; i >= 0; i--) {
1812 r = add_mem_num(reg, p[i]);
1813 if (r != 0) return r;
1814 }
1815 }
1816 }
1817 }
1818 break;
1819
1820 #ifdef USE_CALL
1821 case NODE_CALL:
1822 r = compile_call(CALL_(node), reg, env);
1823 break;
1824 #endif
1825
1826 case NODE_QUANT:
1827 r = compile_quantifier_node(QUANT_(node), reg, env);
1828 break;
1829
1830 case NODE_ENCLOSURE:
1831 r = compile_enclosure_node(ENCLOSURE_(node), reg, env);
1832 break;
1833
1834 case NODE_ANCHOR:
1835 r = compile_anchor_node(ANCHOR_(node), reg, env);
1836 break;
1837
1838 case NODE_GIMMICK:
1839 r = compile_gimmick_node(GIMMICK_(node), reg);
1840 break;
1841
1842 default:
1843 #ifdef ONIG_DEBUG
1844 fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
1845 #endif
1846 break;
1847 }
1848
1849 return r;
1850 }
1851
1852 static int
1853 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1854 {
1855 int r = 0;
1856 Node* node = *plink;
1857
1858 switch (NODE_TYPE(node)) {
1859 case NODE_LIST:
1860 case NODE_ALT:
1861 do {
1862 r = noname_disable_map(&(NODE_CAR(node)), map, counter);
1863 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1864 break;
1865
1866 case NODE_QUANT:
1867 {
1868 Node** ptarget = &(NODE_BODY(node));
1869 Node* old = *ptarget;
1870 r = noname_disable_map(ptarget, map, counter);
1871 if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
1872 onig_reduce_nested_quantifier(node, *ptarget);
1873 }
1874 }
1875 break;
1876
1877 case NODE_ENCLOSURE:
1878 {
1879 EnclosureNode* en = ENCLOSURE_(node);
1880 if (en->type == ENCLOSURE_MEMORY) {
1881 if (NODE_IS_NAMED_GROUP(node)) {
1882 (*counter)++;
1883 map[en->m.regnum].new_val = *counter;
1884 en->m.regnum = *counter;
1885 r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1886 }
1887 else {
1888 *plink = NODE_BODY(node);
1889 NODE_BODY(node) = NULL_NODE;
1890 onig_node_free(node);
1891 r = noname_disable_map(plink, map, counter);
1892 }
1893 }
1894 else if (en->type == ENCLOSURE_IF_ELSE) {
1895 r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);
1896 if (r != 0) return r;
1897 if (IS_NOT_NULL(en->te.Then)) {
1898 r = noname_disable_map(&(en->te.Then), map, counter);
1899 if (r != 0) return r;
1900 }
1901 if (IS_NOT_NULL(en->te.Else)) {
1902 r = noname_disable_map(&(en->te.Else), map, counter);
1903 if (r != 0) return r;
1904 }
1905 }
1906 else
1907 r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1908 }
1909 break;
1910
1911 case NODE_ANCHOR:
1912 if (IS_NOT_NULL(NODE_BODY(node)))
1913 r = noname_disable_map(&(NODE_BODY(node)), map, counter);
1914 break;
1915
1916 default:
1917 break;
1918 }
1919
1920 return r;
1921 }
1922
1923 static int
1924 renumber_node_backref(Node* node, GroupNumRemap* map)
1925 {
1926 int i, pos, n, old_num;
1927 int *backs;
1928 BackRefNode* bn = BACKREF_(node);
1929
1930 if (! NODE_IS_BY_NAME(node))
1931 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1932
1933 old_num = bn->back_num;
1934 if (IS_NULL(bn->back_dynamic))
1935 backs = bn->back_static;
1936 else
1937 backs = bn->back_dynamic;
1938
1939 for (i = 0, pos = 0; i < old_num; i++) {
1940 n = map[backs[i]].new_val;
1941 if (n > 0) {
1942 backs[pos] = n;
1943 pos++;
1944 }
1945 }
1946
1947 bn->back_num = pos;
1948 return 0;
1949 }
1950
1951 static int
1952 renumber_by_map(Node* node, GroupNumRemap* map)
1953 {
1954 int r = 0;
1955
1956 switch (NODE_TYPE(node)) {
1957 case NODE_LIST:
1958 case NODE_ALT:
1959 do {
1960 r = renumber_by_map(NODE_CAR(node), map);
1961 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
1962 break;
1963
1964 case NODE_QUANT:
1965 r = renumber_by_map(NODE_BODY(node), map);
1966 break;
1967
1968 case NODE_ENCLOSURE:
1969 {
1970 EnclosureNode* en = ENCLOSURE_(node);
1971
1972 r = renumber_by_map(NODE_BODY(node), map);
1973 if (r != 0) return r;
1974
1975 if (en->type == ENCLOSURE_IF_ELSE) {
1976 if (IS_NOT_NULL(en->te.Then)) {
1977 r = renumber_by_map(en->te.Then, map);
1978 if (r != 0) return r;
1979 }
1980 if (IS_NOT_NULL(en->te.Else)) {
1981 r = renumber_by_map(en->te.Else, map);
1982 if (r != 0) return r;
1983 }
1984 }
1985 }
1986 break;
1987
1988 case NODE_BACKREF:
1989 r = renumber_node_backref(node, map);
1990 break;
1991
1992 case NODE_ANCHOR:
1993 if (IS_NOT_NULL(NODE_BODY(node)))
1994 r = renumber_by_map(NODE_BODY(node), map);
1995 break;
1996
1997 default:
1998 break;
1999 }
2000
2001 return r;
2002 }
2003
2004 static int
2005 numbered_ref_check(Node* node)
2006 {
2007 int r = 0;
2008
2009 switch (NODE_TYPE(node)) {
2010 case NODE_LIST:
2011 case NODE_ALT:
2012 do {
2013 r = numbered_ref_check(NODE_CAR(node));
2014 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2015 break;
2016
2017 case NODE_ANCHOR:
2018 if (IS_NULL(NODE_BODY(node)))
2019 break;
2020 /* fall */
2021 case NODE_QUANT:
2022 r = numbered_ref_check(NODE_BODY(node));
2023 break;
2024
2025 case NODE_ENCLOSURE:
2026 {
2027 EnclosureNode* en = ENCLOSURE_(node);
2028
2029 r = numbered_ref_check(NODE_BODY(node));
2030 if (r != 0) return r;
2031
2032 if (en->type == ENCLOSURE_IF_ELSE) {
2033 if (IS_NOT_NULL(en->te.Then)) {
2034 r = numbered_ref_check(en->te.Then);
2035 if (r != 0) return r;
2036 }
2037 if (IS_NOT_NULL(en->te.Else)) {
2038 r = numbered_ref_check(en->te.Else);
2039 if (r != 0) return r;
2040 }
2041 }
2042 }
2043
2044 break;
2045
2046 case NODE_BACKREF:
2047 if (! NODE_IS_BY_NAME(node))
2048 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2049 break;
2050
2051 default:
2052 break;
2053 }
2054
2055 return r;
2056 }
2057
2058 static int
2059 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2060 {
2061 int r, i, pos, counter;
2062 int result;
2063 MemStatusType loc;
2064 GroupNumRemap* map;
2065
2066 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
2067 CHECK_NULL_RETURN_MEMERR(map);
2068 for (i = 1; i <= env->num_mem; i++) {
2069 map[i].new_val = 0;
2070 }
2071 counter = 0;
2072 r = noname_disable_map(root, map, &counter);
2073 if (r != 0) return r;
2074
2075 r = renumber_by_map(*root, map);
2076 if (r != 0) return r;
2077
2078 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2079 if (map[i].new_val > 0) {
2080 SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];
2081 pos++;
2082 }
2083 }
2084
2085 loc = env->capture_history;
2086 MEM_STATUS_CLEAR(env->capture_history);
2087 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2088 if (MEM_STATUS_AT(loc, i)) {
2089 MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
2090 }
2091 }
2092
2093 env->num_mem = env->num_named;
2094 reg->num_mem = env->num_named;
2095 result = onig_renumber_name_table(reg, map);
2096 xfree(map);
2097 return result;
2098 }
2099
2100 #ifdef USE_CALL
2101 static int
2102 fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
2103 {
2104 int i, offset;
2105 EnclosureNode* en;
2106 AbsAddrType addr;
2107
2108 for (i = 0; i < uslist->num; i++) {
2109 if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
2110 return ONIGERR_PARSER_BUG;
2111
2112 en = ENCLOSURE_(uslist->us[i].target);
2113 addr = en->m.called_addr;
2114 offset = uslist->us[i].offset;
2115
2116 BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2117 }
2118 return 0;
2119 }
2120 #endif
2121
2122
2123 #define GET_CHAR_LEN_VARLEN -1
2124 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2125
2126 /* fixed size pattern node only */
2127 static int
2128 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2129 {
2130 int tlen;
2131 int r = 0;
2132
2133 level++;
2134 *len = 0;
2135 switch (NODE_TYPE(node)) {
2136 case NODE_LIST:
2137 do {
2138 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
2139 if (r == 0)
2140 *len = distance_add(*len, tlen);
2141 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2142 break;
2143
2144 case NODE_ALT:
2145 {
2146 int tlen2;
2147 int varlen = 0;
2148
2149 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
2150 while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
2151 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);
2152 if (r == 0) {
2153 if (tlen != tlen2)
2154 varlen = 1;
2155 }
2156 }
2157 if (r == 0) {
2158 if (varlen != 0) {
2159 if (level == 1)
2160 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2161 else
2162 r = GET_CHAR_LEN_VARLEN;
2163 }
2164 else
2165 *len = tlen;
2166 }
2167 }
2168 break;
2169
2170 case NODE_STRING:
2171 {
2172 StrNode* sn = STR_(node);
2173 UChar *s = sn->s;
2174
2175 while (s < sn->end) {
2176 s += enclen(reg->enc, s);
2177 (*len)++;
2178 }
2179 }
2180 break;
2181
2182 case NODE_QUANT:
2183 {
2184 QuantNode* qn = QUANT_(node);
2185
2186 if (qn->lower == qn->upper) {
2187 if (qn->upper == 0) {
2188 *len = 0;
2189 }
2190 else {
2191 r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
2192 if (r == 0)
2193 *len = distance_multiply(tlen, qn->lower);
2194 }
2195 }
2196 else
2197 r = GET_CHAR_LEN_VARLEN;
2198 }
2199 break;
2200
2201 #ifdef USE_CALL
2202 case NODE_CALL:
2203 if (! NODE_IS_RECURSION(node))
2204 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2205 else
2206 r = GET_CHAR_LEN_VARLEN;
2207 break;
2208 #endif
2209
2210 case NODE_CTYPE:
2211 case NODE_CCLASS:
2212 *len = 1;
2213 break;
2214
2215 case NODE_ENCLOSURE:
2216 {
2217 EnclosureNode* en = ENCLOSURE_(node);
2218
2219 switch (en->type) {
2220 case ENCLOSURE_MEMORY:
2221 #ifdef USE_CALL
2222 if (NODE_IS_CLEN_FIXED(node))
2223 *len = en->char_len;
2224 else {
2225 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2226 if (r == 0) {
2227 en->char_len = *len;
2228 NODE_STATUS_ADD(node, CLEN_FIXED);
2229 }
2230 }
2231 break;
2232 #endif
2233 case ENCLOSURE_OPTION:
2234 case ENCLOSURE_STOP_BACKTRACK:
2235 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
2236 break;
2237 case ENCLOSURE_IF_ELSE:
2238 {
2239 int clen, elen;
2240
2241 r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);
2242 if (r == 0) {
2243 if (IS_NOT_NULL(en->te.Then)) {
2244 r = get_char_length_tree1(en->te.Then, reg, &tlen, level);
2245 if (r != 0) break;
2246 }
2247 else tlen = 0;
2248 if (IS_NOT_NULL(en->te.Else)) {
2249 r = get_char_length_tree1(en->te.Else, reg, &elen, level);
2250 if (r != 0) break;
2251 }
2252 else elen = 0;
2253
2254 if (clen + tlen != elen) {
2255 r = GET_CHAR_LEN_VARLEN;
2256 }
2257 else {
2258 *len = elen;
2259 }
2260 }
2261 }
2262 break;
2263
2264 default:
2265 break;
2266 }
2267 }
2268 break;
2269
2270 case NODE_ANCHOR:
2271 case NODE_GIMMICK:
2272 break;
2273
2274 case NODE_BACKREF:
2275 if (NODE_IS_CHECKER(node))
2276 break;
2277 /* fall */
2278 default:
2279 r = GET_CHAR_LEN_VARLEN;
2280 break;
2281 }
2282
2283 return r;
2284 }
2285
2286 static int
2287 get_char_length_tree(Node* node, regex_t* reg, int* len)
2288 {
2289 return get_char_length_tree1(node, reg, len, 0);
2290 }
2291
2292 /* x is not included y ==> 1 : 0 */
2293 static int
2294 is_exclusive(Node* x, Node* y, regex_t* reg)
2295 {
2296 int i, len;
2297 OnigCodePoint code;
2298 UChar *p;
2299 NodeType ytype;
2300
2301 retry:
2302 ytype = NODE_TYPE(y);
2303 switch (NODE_TYPE(x)) {
2304 case NODE_CTYPE:
2305 {
2306 if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
2307 CTYPE_(y)->ctype == CTYPE_ANYCHAR)
2308 break;
2309
2310 switch (ytype) {
2311 case NODE_CTYPE:
2312 if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
2313 CTYPE_(y)->not != CTYPE_(x)->not &&
2314 CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
2315 return 1;
2316 else
2317 return 0;
2318 break;
2319
2320 case NODE_CCLASS:
2321 swap:
2322 {
2323 Node* tmp;
2324 tmp = x; x = y; y = tmp;
2325 goto retry;
2326 }
2327 break;
2328
2329 case NODE_STRING:
2330 goto swap;
2331 break;
2332
2333 default:
2334 break;
2335 }
2336 }
2337 break;
2338
2339 case NODE_CCLASS:
2340 {
2341 int range;
2342 CClassNode* xc = CCLASS_(x);
2343
2344 switch (ytype) {
2345 case NODE_CTYPE:
2346 switch (CTYPE_(y)->ctype) {
2347 case CTYPE_ANYCHAR:
2348 return 0;
2349 break;
2350
2351 case ONIGENC_CTYPE_WORD:
2352 if (CTYPE_(y)->not == 0) {
2353 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2354 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2355 for (i = 0; i < range; i++) {
2356 if (BITSET_AT(xc->bs, i)) {
2357 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2358 }
2359 }
2360 return 1;
2361 }
2362 return 0;
2363 }
2364 else {
2365 if (IS_NOT_NULL(xc->mbuf)) return 0;
2366 if (IS_NCCLASS_NOT(xc)) return 0;
2367
2368 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
2369 for (i = 0; i < range; i++) {
2370 if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
2371 if (BITSET_AT(xc->bs, i))
2372 return 0;
2373 }
2374 }
2375 for (i = range; i < SINGLE_BYTE_SIZE; i++) {
2376 if (BITSET_AT(xc->bs, i)) return 0;
2377 }
2378 return 1;
2379 }
2380 break;
2381
2382 default:
2383 break;
2384 }
2385 break;
2386
2387 case NODE_CCLASS:
2388 {
2389 int v;
2390 CClassNode* yc = CCLASS_(y);
2391
2392 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2393 v = BITSET_AT(xc->bs, i);
2394 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
2395 v = BITSET_AT(yc->bs, i);
2396 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2397 (v == 0 && IS_NCCLASS_NOT(yc)))
2398 return 0;
2399 }
2400 }
2401 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2402 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2403 return 1;
2404 return 0;
2405 }
2406 break;
2407
2408 case NODE_STRING:
2409 goto swap;
2410 break;
2411
2412 default:
2413 break;
2414 }
2415 }
2416 break;
2417
2418 case NODE_STRING:
2419 {
2420 StrNode* xs = STR_(x);
2421
2422 if (NODE_STRING_LEN(x) == 0)
2423 break;
2424
2425 switch (ytype) {
2426 case NODE_CTYPE:
2427 switch (CTYPE_(y)->ctype) {
2428 case CTYPE_ANYCHAR:
2429 break;
2430
2431 case ONIGENC_CTYPE_WORD:
2432 if (CTYPE_(y)->ascii_mode == 0) {
2433 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2434 return CTYPE_(y)->not;
2435 else
2436 return !(CTYPE_(y)->not);
2437 }
2438 else {
2439 if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
2440 return CTYPE_(y)->not;
2441 else
2442 return !(CTYPE_(y)->not);
2443 }
2444 break;
2445 default:
2446 break;
2447 }
2448 break;
2449
2450 case NODE_CCLASS:
2451 {
2452 CClassNode* cc = CCLASS_(y);
2453
2454 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2455 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2456 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2457 }
2458 break;
2459
2460 case NODE_STRING:
2461 {
2462 UChar *q;
2463 StrNode* ys = STR_(y);
2464
2465 len = NODE_STRING_LEN(x);
2466 if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
2467 if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
2468 /* tiny version */
2469 return 0;
2470 }
2471 else {
2472 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
2473 if (*p != *q) return 1;
2474 }
2475 }
2476 }
2477 break;
2478
2479 default:
2480 break;
2481 }
2482 }
2483 break;
2484
2485 default:
2486 break;
2487 }
2488
2489 return 0;
2490 }
2491
2492 static Node*
2493 get_head_value_node(Node* node, int exact, regex_t* reg)
2494 {
2495 Node* n = NULL_NODE;
2496
2497 switch (NODE_TYPE(node)) {
2498 case NODE_BACKREF:
2499 case NODE_ALT:
2500 #ifdef USE_CALL
2501 case NODE_CALL:
2502 #endif
2503 break;
2504
2505 case NODE_CTYPE:
2506 if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
2507 break;
2508 /* fall */
2509 case NODE_CCLASS:
2510 if (exact == 0) {
2511 n = node;
2512 }
2513 break;
2514
2515 case NODE_LIST:
2516 n = get_head_value_node(NODE_CAR(node), exact, reg);
2517 break;
2518
2519 case NODE_STRING:
2520 {
2521 StrNode* sn = STR_(node);
2522
2523 if (sn->end <= sn->s)
2524 break;
2525
2526 if (exact != 0 &&
2527 !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2528 }
2529 else {
2530 n = node;
2531 }
2532 }
2533 break;
2534
2535 case NODE_QUANT:
2536 {
2537 QuantNode* qn = QUANT_(node);
2538 if (qn->lower > 0) {
2539 if (IS_NOT_NULL(qn->head_exact))
2540 n = qn->head_exact;
2541 else
2542 n = get_head_value_node(NODE_BODY(node), exact, reg);
2543 }
2544 }
2545 break;
2546
2547 case NODE_ENCLOSURE:
2548 {
2549 EnclosureNode* en = ENCLOSURE_(node);
2550 switch (en->type) {
2551 case ENCLOSURE_OPTION:
2552 {
2553 OnigOptionType options = reg->options;
2554
2555 reg->options = ENCLOSURE_(node)->o.options;
2556 n = get_head_value_node(NODE_BODY(node), exact, reg);
2557 reg->options = options;
2558 }
2559 break;
2560
2561 case ENCLOSURE_MEMORY:
2562 case ENCLOSURE_STOP_BACKTRACK:
2563 case ENCLOSURE_IF_ELSE:
2564 n = get_head_value_node(NODE_BODY(node), exact, reg);
2565 break;
2566 }
2567 }
2568 break;
2569
2570 case NODE_ANCHOR:
2571 if (ANCHOR_(node)->type == ANCHOR_PREC_READ)
2572 n = get_head_value_node(NODE_BODY(node), exact, reg);
2573 break;
2574
2575 case NODE_GIMMICK:
2576 default:
2577 break;
2578 }
2579
2580 return n;
2581 }
2582
2583 static int
2584 check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
2585 {
2586 NodeType type;
2587 int r = 0;
2588
2589 type = NODE_TYPE(node);
2590 if ((NODE_TYPE2BIT(type) & type_mask) == 0)
2591 return 1;
2592
2593 switch (type) {
2594 case NODE_LIST:
2595 case NODE_ALT:
2596 do {
2597 r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,
2598 anchor_mask);
2599 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2600 break;
2601
2602 case NODE_QUANT:
2603 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2604 break;
2605
2606 case NODE_ENCLOSURE:
2607 {
2608 EnclosureNode* en = ENCLOSURE_(node);
2609 if (((1<<en->type) & enclosure_mask) == 0)
2610 return 1;
2611
2612 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2613 if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {
2614 if (IS_NOT_NULL(en->te.Then)) {
2615 r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);
2616 if (r != 0) break;
2617 }
2618 if (IS_NOT_NULL(en->te.Else)) {
2619 r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);
2620 }
2621 }
2622 }
2623 break;
2624
2625 case NODE_ANCHOR:
2626 type = ANCHOR_(node)->type;
2627 if ((type & anchor_mask) == 0)
2628 return 1;
2629
2630 if (IS_NOT_NULL(NODE_BODY(node)))
2631 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
2632 break;
2633
2634 case NODE_GIMMICK:
2635 default:
2636 break;
2637 }
2638 return r;
2639 }
2640
2641 static OnigLen
2642 tree_min_len(Node* node, ScanEnv* env)
2643 {
2644 OnigLen len;
2645 OnigLen tmin;
2646
2647 len = 0;
2648 switch (NODE_TYPE(node)) {
2649 case NODE_BACKREF:
2650 if (! NODE_IS_CHECKER(node)) {
2651 int i;
2652 int* backs;
2653 MemEnv* mem_env = SCANENV_MEMENV(env);
2654 BackRefNode* br = BACKREF_(node);
2655 if (NODE_IS_RECURSION(node)) break;
2656
2657 backs = BACKREFS_P(br);
2658 len = tree_min_len(mem_env[backs[0]].node, env);
2659 for (i = 1; i < br->back_num; i++) {
2660 tmin = tree_min_len(mem_env[backs[i]].node, env);
2661 if (len > tmin) len = tmin;
2662 }
2663 }
2664 break;
2665
2666 #ifdef USE_CALL
2667 case NODE_CALL:
2668 {
2669 Node* t = NODE_BODY(node);
2670 if (NODE_IS_RECURSION(node)) {
2671 if (NODE_IS_MIN_FIXED(t))
2672 len = ENCLOSURE_(t)->min_len;
2673 }
2674 else
2675 len = tree_min_len(t, env);
2676 }
2677 break;
2678 #endif
2679
2680 case NODE_LIST:
2681 do {
2682 tmin = tree_min_len(NODE_CAR(node), env);
2683 len = distance_add(len, tmin);
2684 } while (IS_NOT_NULL(node = NODE_CDR(node)));
2685 break;
2686
2687 case NODE_ALT:
2688 {
2689 Node *x, *y;
2690 y = node;
2691 do {
2692 x = NODE_CAR(y);
2693 tmin = tree_min_len(x, env);
2694 if (y == node) len = tmin;
2695 else if (len > tmin) len = tmin;
2696 } while (IS_NOT_NULL(y = NODE_CDR(y)));
2697 }
2698 break;
2699
2700 case NODE_STRING:
2701 {
2702 StrNode* sn = STR_(node);
2703 len = (int )(sn->end - sn->s);
2704 }
2705 break;
2706
2707 case NODE_CTYPE:
2708 case NODE_CCLASS:
2709 len = ONIGENC_MBC_MINLEN(env->enc);
2710 break;
2711
2712 case NODE_QUANT:
2713 {
2714 QuantNode* qn = QUANT_(node);
2715
2716 if (qn->lower > 0) {
2717 len = tree_min_len(NODE_BODY(node), env);
2718 len = distance_multiply(len, qn->lower);
2719 }
2720 }
2721 break;
2722
2723 case NODE_ENCLOSURE:
2724 {
2725 EnclosureNode* en = ENCLOSURE_(node);
2726 switch (en->type) {
2727 case ENCLOSURE_MEMORY:
2728 if (NODE_IS_MIN_FIXED(node))
2729 len = en->min_len;
2730 else {
2731 if (NODE_IS_MARK1(node))
2732 len = 0; /* recursive */
2733 else {
2734 NODE_STATUS_ADD(node, MARK1);
2735 len = tree_min_len(NODE_BODY(node), env);
2736 NODE_STATUS_REMOVE(node, MARK1);
2737
2738 en->min_len = len;
2739 NODE_STATUS_ADD(node, MIN_FIXED);
2740 }
2741 }
2742 break;
2743
2744 case ENCLOSURE_OPTION:
2745 case ENCLOSURE_STOP_BACKTRACK:
2746 len = tree_min_len(NODE_BODY(node), env);
2747 break;
2748 case ENCLOSURE_IF_ELSE:
2749 {
2750 OnigLen elen;
2751
2752 len = tree_min_len(NODE_BODY(node), env);
2753 if (IS_NOT_NULL(en->te.Then))
2754 len += tree_min_len(en->te.Then, env);
2755 if (IS_NOT_NULL(en->te.Else))
2756 elen = tree_min_len(en->te.Else, env);
2757 else elen = 0;
2758
2759 if (elen < len) len = elen;
2760 }
2761 break;
2762 }
2763 }
2764 break;
2765
2766 case NODE_GIMMICK:
2767 {
2768 GimmickNode* g = GIMMICK_(node);
2769 if (g->type == GIMMICK_FAIL) {
2770 len = INFINITE_LEN;
2771 break;
2772 }
2773 }
2774 /* fall */
2775 case NODE_ANCHOR:
2776 default:
2777 break;
2778 }
2779
2780 return len;
2781 }
2782
2783 static OnigLen
2784 tree_max_len(Node* node, ScanEnv* env)
2785 {
2786 OnigLen len;
2787 OnigLen tmax;
2788
2789 len = 0;
2790 switch (NODE_TYPE(node)) {
2791 case NODE_LIST:
2792 do {
2793 tmax = tree_max_len(NODE_CAR(node), env);
2794 len = distance_add(len, tmax);
2795 } while (IS_NOT_NULL(node = NODE_CDR(node)));
2796 break;
2797
2798 case NODE_ALT:
2799 do {
2800 tmax = tree_max_len(NODE_CAR(node), env);
2801 if (len < tmax) len = tmax;
2802 } while (IS_NOT_NULL(node = NODE_CDR(node)));
2803 break;
2804
2805 case NODE_STRING:
2806 {
2807 StrNode* sn = STR_(node);
2808 len = (OnigLen )(sn->end - sn->s);
2809 }
2810 break;
2811
2812 case NODE_CTYPE:
2813 case NODE_CCLASS:
2814 len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2815 break;
2816
2817 case NODE_BACKREF:
2818 if (! NODE_IS_CHECKER(node)) {
2819 int i;
2820 int* backs;
2821 MemEnv* mem_env = SCANENV_MEMENV(env);
2822 BackRefNode* br = BACKREF_(node);
2823 if (NODE_IS_RECURSION(node)) {
2824 len = INFINITE_LEN;
2825 break;
2826 }
2827 backs = BACKREFS_P(br);
2828 for (i = 0; i < br->back_num; i++) {
2829 tmax = tree_max_len(mem_env[backs[i]].node, env);
2830 if (len < tmax) len = tmax;
2831 }
2832 }
2833 break;
2834
2835 #ifdef USE_CALL
2836 case NODE_CALL:
2837 if (! NODE_IS_RECURSION(node))
2838 len = tree_max_len(NODE_BODY(node), env);
2839 else
2840 len = INFINITE_LEN;
2841 break;
2842 #endif
2843
2844 case NODE_QUANT:
2845 {
2846 QuantNode* qn = QUANT_(node);
2847
2848 if (qn->upper != 0) {
2849 len = tree_max_len(NODE_BODY(node), env);
2850 if (len != 0) {
2851 if (! IS_REPEAT_INFINITE(qn->upper))
2852 len = distance_multiply(len, qn->upper);
2853 else
2854 len = INFINITE_LEN;
2855 }
2856 }
2857 }
2858 break;
2859
2860 case NODE_ENCLOSURE:
2861 {
2862 EnclosureNode* en = ENCLOSURE_(node);
2863 switch (en->type) {
2864 case ENCLOSURE_MEMORY:
2865 if (NODE_IS_MAX_FIXED(node))
2866 len = en->max_len;
2867 else {
2868 if (NODE_IS_MARK1(node))
2869 len = INFINITE_LEN;
2870 else {
2871 NODE_STATUS_ADD(node, MARK1);
2872 len = tree_max_len(NODE_BODY(node), env);
2873 NODE_STATUS_REMOVE(node, MARK1);
2874
2875 en->max_len = len;
2876 NODE_STATUS_ADD(node, MAX_FIXED);
2877 }
2878 }
2879 break;
2880
2881 case ENCLOSURE_OPTION:
2882 case ENCLOSURE_STOP_BACKTRACK:
2883 len = tree_max_len(NODE_BODY(node), env);
2884 break;
2885 case ENCLOSURE_IF_ELSE:
2886 {
2887 OnigLen tlen, elen;
2888
2889 len = tree_max_len(NODE_BODY(node), env);
2890 if (IS_NOT_NULL(en->te.Then)) {
2891 tlen = tree_max_len(en->te.Then, env);
2892 len = distance_add(len, tlen);
2893 }
2894 if (IS_NOT_NULL(en->te.Else))
2895 elen = tree_max_len(en->te.Else, env);
2896 else elen = 0;
2897
2898 if (elen > len) len = elen;
2899 }
2900 break;
2901 }
2902 }
2903 break;
2904
2905 case NODE_ANCHOR:
2906 case NODE_GIMMICK:
2907 default:
2908 break;
2909 }
2910
2911 return len;
2912 }
2913
2914 static int
2915 check_backrefs(Node* node, ScanEnv* env)
2916 {
2917 int r;
2918
2919 switch (NODE_TYPE(node)) {
2920 case NODE_LIST:
2921 case NODE_ALT:
2922 do {
2923 r = check_backrefs(NODE_CAR(node), env);
2924 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
2925 break;
2926
2927 case NODE_ANCHOR:
2928 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
2929 r = 0;
2930 break;
2931 }
2932 /* fall */
2933 case NODE_QUANT:
2934 r = check_backrefs(NODE_BODY(node), env);
2935 break;
2936
2937 case NODE_ENCLOSURE:
2938 r = check_backrefs(NODE_BODY(node), env);
2939 {
2940 EnclosureNode* en = ENCLOSURE_(node);
2941
2942 if (en->type == ENCLOSURE_IF_ELSE) {
2943 if (r != 0) return r;
2944 if (IS_NOT_NULL(en->te.Then)) {
2945 r = check_backrefs(en->te.Then, env);
2946 if (r != 0) return r;
2947 }
2948 if (IS_NOT_NULL(en->te.Else)) {
2949 r = check_backrefs(en->te.Else, env);
2950 }
2951 }
2952 }
2953 break;
2954
2955 case NODE_BACKREF:
2956 {
2957 int i;
2958 BackRefNode* br = BACKREF_(node);
2959 int* backs = BACKREFS_P(br);
2960 MemEnv* mem_env = SCANENV_MEMENV(env);
2961
2962 for (i = 0; i < br->back_num; i++) {
2963 if (backs[i] > env->num_mem)
2964 return ONIGERR_INVALID_BACKREF;
2965
2966 NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
2967 }
2968 r = 0;
2969 }
2970 break;
2971
2972 default:
2973 r = 0;
2974 break;
2975 }
2976
2977 return r;
2978 }
2979
2980
2981 #ifdef USE_CALL
2982
2983 #define RECURSION_EXIST (1<<0)
2984 #define RECURSION_MUST (1<<1)
2985 #define RECURSION_INFINITE (1<<2)
2986
2987 static int
2988 infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
2989 {
2990 int ret;
2991 int r = 0;
2992
2993 switch (NODE_TYPE(node)) {
2994 case NODE_LIST:
2995 {
2996 Node *x;
2997 OnigLen min;
2998
2999 x = node;
3000 do {
3001 ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
3002 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3003 r |= ret;
3004 if (head != 0) {
3005 min = tree_min_len(NODE_CAR(x), env);
3006 if (min != 0) head = 0;
3007 }
3008 } while (IS_NOT_NULL(x = NODE_CDR(x)));
3009 }
3010 break;
3011
3012 case NODE_ALT:
3013 {
3014 int must;
3015
3016 must = RECURSION_MUST;
3017 do {
3018 ret = infinite_recursive_call_check(NODE_CAR(node), env, head);
3019 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3020
3021 r |= (ret & RECURSION_EXIST);
3022 must &= ret;
3023 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3024 r |= must;
3025 }
3026 break;
3027
3028 case NODE_QUANT:
3029 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3030 if (r < 0) return r;
3031 if ((r & RECURSION_MUST) != 0) {
3032 if (QUANT_(node)->lower == 0)
3033 r &= ~RECURSION_MUST;
3034 }
3035 break;
3036
3037 case NODE_ANCHOR:
3038 if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
3039 break;
3040 /* fall */
3041 case NODE_CALL:
3042 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3043 break;
3044
3045 case NODE_ENCLOSURE:
3046 {
3047 EnclosureNode* en = ENCLOSURE_(node);
3048
3049 if (en->type == ENCLOSURE_MEMORY) {
3050 if (NODE_IS_MARK2(node))
3051 return 0;
3052 else if (NODE_IS_MARK1(node))
3053 return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
3054 : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
3055 else {
3056 NODE_STATUS_ADD(node, MARK2);
3057 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3058 NODE_STATUS_REMOVE(node, MARK2);
3059 }
3060 }
3061 else if (en->type == ENCLOSURE_IF_ELSE) {
3062 int eret;
3063
3064 ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
3065 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3066 r |= ret;
3067 if (IS_NOT_NULL(en->te.Then)) {
3068 OnigLen min;
3069 if (head != 0) {
3070 min = tree_min_len(NODE_BODY(node), env);
3071 }
3072 else min = 0;
3073
3074 ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
3075 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
3076 r |= ret;
3077 }
3078 if (IS_NOT_NULL(en->te.Else)) {
3079 eret = infinite_recursive_call_check(en->te.Else, env, head);
3080 if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
3081 r |= (eret & RECURSION_EXIST);
3082 if ((eret & RECURSION_MUST) == 0)
3083 r &= ~RECURSION_MUST;
3084 }
3085 }
3086 else {
3087 r = infinite_recursive_call_check(NODE_BODY(node), env, head);
3088 }
3089 }
3090 break;
3091
3092 default:
3093 break;
3094 }
3095
3096 return r;
3097 }
3098
3099 static int
3100 infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
3101 {
3102 int r;
3103
3104 switch (NODE_TYPE(node)) {
3105 case NODE_LIST:
3106 case NODE_ALT:
3107 do {
3108 r = infinite_recursive_call_check_trav(NODE_CAR(node), env);
3109 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3110 break;
3111
3112 case NODE_ANCHOR:
3113 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3114 r = 0;
3115 break;
3116 }
3117 /* fall */
3118 case NODE_QUANT:
3119 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3120 break;
3121
3122 case NODE_ENCLOSURE:
3123 {
3124 EnclosureNode* en = ENCLOSURE_(node);
3125
3126 if (en->type == ENCLOSURE_MEMORY) {
3127 if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
3128 int ret;
3129
3130 NODE_STATUS_ADD(node, MARK1);
3131
3132 ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);
3133 if (ret < 0) return ret;
3134 else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
3135 return ONIGERR_NEVER_ENDING_RECURSION;
3136
3137 NODE_STATUS_REMOVE(node, MARK1);
3138 }
3139 }
3140 else if (en->type == ENCLOSURE_IF_ELSE) {
3141 if (IS_NOT_NULL(en->te.Then)) {
3142 r = infinite_recursive_call_check_trav(en->te.Then, env);
3143 if (r != 0) return r;
3144 }
3145 if (IS_NOT_NULL(en->te.Else)) {
3146 r = infinite_recursive_call_check_trav(en->te.Else, env);
3147 if (r != 0) return r;
3148 }
3149 }
3150 }
3151
3152 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
3153 break;
3154
3155 default:
3156 r = 0;
3157 break;
3158 }
3159
3160 return r;
3161 }
3162
3163 static int
3164 recursive_call_check(Node* node)
3165 {
3166 int r;
3167
3168 switch (NODE_TYPE(node)) {
3169 case NODE_LIST:
3170 case NODE_ALT:
3171 r = 0;
3172 do {
3173 r |= recursive_call_check(NODE_CAR(node));
3174 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3175 break;
3176
3177 case NODE_ANCHOR:
3178 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3179 r = 0;
3180 break;
3181 }
3182 /* fall */
3183 case NODE_QUANT:
3184 r = recursive_call_check(NODE_BODY(node));
3185 break;
3186
3187 case NODE_CALL:
3188 r = recursive_call_check(NODE_BODY(node));
3189 if (r != 0) {
3190 if (NODE_IS_MARK1(NODE_BODY(node)))
3191 NODE_STATUS_ADD(node, RECURSION);
3192 }
3193 break;
3194
3195 case NODE_ENCLOSURE:
3196 {
3197 EnclosureNode* en = ENCLOSURE_(node);
3198
3199 if (en->type == ENCLOSURE_MEMORY) {
3200 if (NODE_IS_MARK2(node))
3201 return 0;
3202 else if (NODE_IS_MARK1(node))
3203 return 1; /* recursion */
3204 else {
3205 NODE_STATUS_ADD(node, MARK2);
3206 r = recursive_call_check(NODE_BODY(node));
3207 NODE_STATUS_REMOVE(node, MARK2);
3208 }
3209 }
3210 else if (en->type == ENCLOSURE_IF_ELSE) {
3211 r = 0;
3212 if (IS_NOT_NULL(en->te.Then)) {
3213 r |= recursive_call_check(en->te.Then);
3214 }
3215 if (IS_NOT_NULL(en->te.Else)) {
3216 r |= recursive_call_check(en->te.Else);
3217 }
3218 r |= recursive_call_check(NODE_BODY(node));
3219 }
3220 else {
3221 r = recursive_call_check(NODE_BODY(node));
3222 }
3223 }
3224 break;
3225
3226 default:
3227 r = 0;
3228 break;
3229 }
3230
3231 return r;
3232 }
3233
3234 #define IN_RECURSION (1<<0)
3235 #define FOUND_CALLED_NODE 1
3236
3237 static int
3238 recursive_call_check_trav(Node* node, ScanEnv* env, int state)
3239 {
3240 int r = 0;
3241
3242 switch (NODE_TYPE(node)) {
3243 case NODE_LIST:
3244 case NODE_ALT:
3245 {
3246 int ret;
3247 do {
3248 ret = recursive_call_check_trav(NODE_CAR(node), env, state);
3249 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3250 else if (ret < 0) return ret;
3251 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3252 }
3253 break;
3254
3255 case NODE_QUANT:
3256 r = recursive_call_check_trav(NODE_BODY(node), env, state);
3257 if (QUANT_(node)->upper == 0) {
3258 if (r == FOUND_CALLED_NODE)
3259 QUANT_(node)->is_refered = 1;
3260 }
3261 break;
3262
3263 case NODE_ANCHOR:
3264 {
3265 AnchorNode* an = ANCHOR_(node);
3266 if (ANCHOR_HAS_BODY(an))
3267 r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);
3268 }
3269 break;
3270
3271 case NODE_ENCLOSURE:
3272 {
3273 int ret;
3274 int state1;
3275 EnclosureNode* en = ENCLOSURE_(node);
3276
3277 if (en->type == ENCLOSURE_MEMORY) {
3278 if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
3279 if (! NODE_IS_RECURSION(node)) {
3280 NODE_STATUS_ADD(node, MARK1);
3281 r = recursive_call_check(NODE_BODY(node));
3282 if (r != 0)
3283 NODE_STATUS_ADD(node, RECURSION);
3284 NODE_STATUS_REMOVE(node, MARK1);
3285 }
3286
3287 if (NODE_IS_CALLED(node))
3288 r = FOUND_CALLED_NODE;
3289 }
3290 }
3291
3292 state1 = state;
3293 if (NODE_IS_RECURSION(node))
3294 state1 |= IN_RECURSION;
3295
3296 ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
3297 if (ret == FOUND_CALLED_NODE)
3298 r = FOUND_CALLED_NODE;
3299
3300 if (en->type == ENCLOSURE_IF_ELSE) {
3301 if (IS_NOT_NULL(en->te.Then)) {
3302 ret = recursive_call_check_trav(en->te.Then, env, state1);
3303 if (ret == FOUND_CALLED_NODE)
3304 r = FOUND_CALLED_NODE;
3305 }
3306 if (IS_NOT_NULL(en->te.Else)) {
3307 ret = recursive_call_check_trav(en->te.Else, env, state1);
3308 if (ret == FOUND_CALLED_NODE)
3309 r = FOUND_CALLED_NODE;
3310 }
3311 }
3312 }
3313 break;
3314
3315 default:
3316 break;
3317 }
3318
3319 return r;
3320 }
3321
3322 #endif
3323
3324 /* divide different length alternatives in look-behind.
3325 (?<=A|B) ==> (?<=A)|(?<=B)
3326 (?<!A|B) ==> (?<!A)(?<!B)
3327 */
3328 static int
3329 divide_look_behind_alternatives(Node* node)
3330 {
3331 Node *head, *np, *insert_node;
3332 AnchorNode* an = ANCHOR_(node);
3333 int anc_type = an->type;
3334
3335 head = NODE_ANCHOR_BODY(an);
3336 np = NODE_CAR(head);
3337 swap_node(node, head);
3338 NODE_CAR(node) = head;
3339 NODE_BODY(head) = np;
3340
3341 np = node;
3342 while (IS_NOT_NULL(np = NODE_CDR(np))) {
3343 insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);
3344 CHECK_NULL_RETURN_MEMERR(insert_node);
3345 NODE_BODY(insert_node) = NODE_CAR(np);
3346 NODE_CAR(np) = insert_node;
3347 }
3348
3349 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3350 np = node;
3351 do {
3352 NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */
3353 } while (IS_NOT_NULL(np = NODE_CDR(np)));
3354 }
3355 return 0;
3356 }
3357
3358 static int
3359 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3360 {
3361 int r, len;
3362 AnchorNode* an = ANCHOR_(node);
3363
3364 r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);
3365 if (r == 0)
3366 an->char_len = len;
3367 else if (r == GET_CHAR_LEN_VARLEN)
3368 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3369 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3370 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3371 r = divide_look_behind_alternatives(node);
3372 else
3373 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3374 }
3375
3376 return r;
3377 }
3378
3379 static int
3380 next_setup(Node* node, Node* next_node, regex_t* reg)
3381 {
3382 NodeType type;
3383
3384 retry:
3385 type = NODE_TYPE(node);
3386 if (type == NODE_QUANT) {
3387 QuantNode* qn = QUANT_(node);
3388 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3389 #ifdef USE_QUANT_PEEK_NEXT
3390 Node* n = get_head_value_node(next_node, 1, reg);
3391 /* '\0': for UTF-16BE etc... */
3392 if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
3393 qn->next_head_exact = n;
3394 }
3395 #endif
3396 /* automatic posseivation a*b ==> (?>a*)b */
3397 if (qn->lower <= 1) {
3398 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
3399 Node *x, *y;
3400 x = get_head_value_node(NODE_BODY(node), 0, reg);
3401 if (IS_NOT_NULL(x)) {
3402 y = get_head_value_node(next_node, 0, reg);
3403 if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
3404 Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
3405 CHECK_NULL_RETURN_MEMERR(en);
3406 NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
3407 swap_node(node, en);
3408 NODE_BODY(node) = en;
3409 }
3410 }
3411 }
3412 }
3413 }
3414 }
3415 else if (type == NODE_ENCLOSURE) {
3416 EnclosureNode* en = ENCLOSURE_(node);
3417 if (en->type == ENCLOSURE_MEMORY) {
3418 node = NODE_BODY(node);
3419 goto retry;
3420 }
3421 }
3422 return 0;
3423 }
3424
3425
3426 static int
3427 update_string_node_case_fold(regex_t* reg, Node *node)
3428 {
3429 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3430 UChar *sbuf, *ebuf, *sp;
3431 int r, i, len, sbuf_size;
3432 StrNode* sn = STR_(node);
3433
3434 end = sn->end;
3435 sbuf_size = (int )(end - sn->s) * 2;
3436 sbuf = (UChar* )xmalloc(sbuf_size);
3437 CHECK_NULL_RETURN_MEMERR(sbuf);
3438 ebuf = sbuf + sbuf_size;
3439
3440 sp = sbuf;
3441 p = sn->s;
3442 while (p < end) {
3443 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3444 for (i = 0; i < len; i++) {
3445 if (sp >= ebuf) {
3446 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
3447 CHECK_NULL_RETURN_MEMERR(sbuf);
3448 sp = sbuf + sbuf_size;
3449 sbuf_size *= 2;
3450 ebuf = sbuf + sbuf_size;
3451 }
3452
3453 *sp++ = buf[i];
3454 }
3455 }
3456
3457 r = onig_node_str_set(node, sbuf, sp);
3458 if (r != 0) {
3459 xfree(sbuf);
3460 return r;
3461 }
3462
3463 xfree(sbuf);
3464 return 0;
3465 }
3466
3467 static int
3468 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
3469 {
3470 int r;
3471 Node *node;
3472
3473 node = onig_node_new_str(s, end);
3474 if (IS_NULL(node)) return ONIGERR_MEMORY;
3475
3476 r = update_string_node_case_fold(reg, node);
3477 if (r != 0) {
3478 onig_node_free(node);
3479 return r;
3480 }
3481
3482 NODE_STRING_SET_AMBIG(node);
3483 NODE_STRING_SET_DONT_GET_OPT_INFO(node);
3484 *rnode = node;
3485 return 0;
3486 }
3487
3488 static int
3489 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
3490 int slen, UChar *end, regex_t* reg, Node **rnode)
3491 {
3492 int r, i, j;
3493 int len;
3494 int varlen;
3495 Node *anode, *var_anode, *snode, *xnode, *an;
3496 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3497
3498 *rnode = var_anode = NULL_NODE;
3499
3500 varlen = 0;
3501 for (i = 0; i < item_num; i++) {
3502 if (items[i].byte_len != slen) {
3503 varlen = 1;
3504 break;
3505 }
3506 }
3507
3508 if (varlen != 0) {
3509 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3510 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3511
3512 xnode = onig_node_new_list(NULL, NULL);
3513 if (IS_NULL(xnode)) goto mem_err;
3514 NODE_CAR(var_anode) = xnode;
3515
3516 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517 if (IS_NULL(anode)) goto mem_err;
3518 NODE_CAR(xnode) = anode;
3519 }
3520 else {
3521 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3522 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3523 }
3524
3525 snode = onig_node_new_str(p, p + slen);
3526 if (IS_NULL(snode)) goto mem_err;
3527
3528 NODE_CAR(anode) = snode;
3529
3530 for (i = 0; i < item_num; i++) {
3531 snode = onig_node_new_str(NULL, NULL);
3532 if (IS_NULL(snode)) goto mem_err;
3533
3534 for (j = 0; j < items[i].code_len; j++) {
3535 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3536 if (len < 0) {
3537 r = len;
3538 goto mem_err2;
3539 }
3540
3541 r = onig_node_str_cat(snode, buf, buf + len);
3542 if (r != 0) goto mem_err2;
3543 }
3544
3545 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3546 if (IS_NULL(an)) {
3547 goto mem_err2;
3548 }
3549 //The NULL pointer check is not necessary. It is added just for pass static
3550 //analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"
3551 //in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"
3552 //in line 3510 will be executed, so the null pointer has been checked before
3553 //deferenced in line 3584.
3554 if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
3555 Node *rem;
3556 UChar *q = p + items[i].byte_len;
3557
3558 if (q < end) {
3559 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3560 if (r != 0) {
3561 onig_node_free(an);
3562 goto mem_err2;
3563 }
3564
3565 xnode = onig_node_list_add(NULL_NODE, snode);
3566 if (IS_NULL(xnode)) {
3567 onig_node_free(an);
3568 onig_node_free(rem);
3569 goto mem_err2;
3570 }
3571 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3572 onig_node_free(an);
3573 onig_node_free(xnode);
3574 onig_node_free(rem);
3575 goto mem_err;
3576 }
3577
3578 NODE_CAR(an) = xnode;
3579 }
3580 else {
3581 NODE_CAR(an) = snode;
3582 }
3583
3584 NODE_CDR(var_anode) = an;
3585 var_anode = an;
3586 }
3587 else {
3588 NODE_CAR(an) = snode;
3589 NODE_CDR(anode) = an;
3590 anode = an;
3591 }
3592 }
3593
3594 return varlen;
3595
3596 mem_err2:
3597 onig_node_free(snode);
3598
3599 mem_err:
3600 onig_node_free(*rnode);
3601
3602 return ONIGERR_MEMORY;
3603 }
3604
3605 static int
3606 expand_case_fold_string(Node* node, regex_t* reg)
3607 {
3608 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3609
3610 int r, n, len, alt_num;
3611 UChar *start, *end, *p;
3612 Node *top_root, *root, *snode, *prev_node;
3613 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3614 StrNode* sn = STR_(node);
3615
3616 if (NODE_STRING_IS_AMBIG(node)) return 0;
3617
3618 start = sn->s;
3619 end = sn->end;
3620 if (start >= end) return 0;
3621
3622 r = 0;
3623 top_root = root = prev_node = snode = NULL_NODE;
3624 alt_num = 1;
3625 p = start;
3626 while (p < end) {
3627 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,
3628 items);
3629 if (n < 0) {
3630 r = n;
3631 goto err;
3632 }
3633
3634 len = enclen(reg->enc, p);
3635
3636 if (n == 0) {
3637 if (IS_NULL(snode)) {
3638 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3639 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3640 if (IS_NULL(root)) {
3641 onig_node_free(prev_node);
3642 goto mem_err;
3643 }
3644 }
3645
3646 prev_node = snode = onig_node_new_str(NULL, NULL);
3647 if (IS_NULL(snode)) goto mem_err;
3648 if (IS_NOT_NULL(root)) {
3649 if (IS_NULL(onig_node_list_add(root, snode))) {
3650 onig_node_free(snode);
3651 goto mem_err;
3652 }
3653 }
3654 }
3655
3656 r = onig_node_str_cat(snode, p, p + len);
3657 if (r != 0) goto err;
3658 }
3659 else {
3660 alt_num *= (n + 1);
3661 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3662
3663 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3664 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3665 if (IS_NULL(root)) {
3666 onig_node_free(prev_node);
3667 goto mem_err;
3668 }
3669 }
3670
3671 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3672 if (r < 0) goto mem_err;
3673 if (r == 1) {
3674 if (IS_NULL(root)) {
3675 top_root = prev_node;
3676 }
3677 else {
3678 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3679 onig_node_free(prev_node);
3680 goto mem_err;
3681 }
3682 }
3683
3684 root = NODE_CAR(prev_node);
3685 }
3686 else { /* r == 0 */
3687 if (IS_NOT_NULL(root)) {
3688 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3689 onig_node_free(prev_node);
3690 goto mem_err;
3691 }
3692 }
3693 }
3694
3695 snode = NULL_NODE;
3696 }
3697
3698 p += len;
3699 }
3700
3701 if (p < end) {
3702 Node *srem;
3703
3704 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3705 if (r != 0) goto mem_err;
3706
3707 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3708 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3709 if (IS_NULL(root)) {
3710 onig_node_free(srem);
3711 onig_node_free(prev_node);
3712 goto mem_err;
3713 }
3714 }
3715
3716 if (IS_NULL(root)) {
3717 prev_node = srem;
3718 }
3719 else {
3720 if (IS_NULL(onig_node_list_add(root, srem))) {
3721 onig_node_free(srem);
3722 goto mem_err;
3723 }
3724 }
3725 }
3726
3727 /* ending */
3728 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3729 swap_node(node, top_root);
3730 onig_node_free(top_root);
3731 return 0;
3732
3733 mem_err:
3734 r = ONIGERR_MEMORY;
3735
3736 err:
3737 onig_node_free(top_root);
3738 return r;
3739 }
3740
3741 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
3742 static enum QuantBodyEmpty
3743 quantifiers_memory_node_info(Node* node)
3744 {
3745 int r = QUANT_BODY_IS_EMPTY;
3746
3747 switch (NODE_TYPE(node)) {
3748 case NODE_LIST:
3749 case NODE_ALT:
3750 {
3751 int v;
3752 do {
3753 v = quantifiers_memory_node_info(NODE_CAR(node));
3754 if (v > r) r = v;
3755 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3756 }
3757 break;
3758
3759 #ifdef USE_CALL
3760 case NODE_CALL:
3761 if (NODE_IS_RECURSION(node)) {
3762 return QUANT_BODY_IS_EMPTY_REC; /* tiny version */
3763 }
3764 else
3765 r = quantifiers_memory_node_info(NODE_BODY(node));
3766 break;
3767 #endif
3768
3769 case NODE_QUANT:
3770 {
3771 QuantNode* qn = QUANT_(node);
3772 if (qn->upper != 0) {
3773 r = quantifiers_memory_node_info(NODE_BODY(node));
3774 }
3775 }
3776 break;
3777
3778 case NODE_ENCLOSURE:
3779 {
3780 EnclosureNode* en = ENCLOSURE_(node);
3781 switch (en->type) {
3782 case ENCLOSURE_MEMORY:
3783 if (NODE_IS_RECURSION(node)) {
3784 return QUANT_BODY_IS_EMPTY_REC;
3785 }
3786 return QUANT_BODY_IS_EMPTY_MEM;
3787 break;
3788
3789 case ENCLOSURE_OPTION:
3790 case ENCLOSURE_STOP_BACKTRACK:
3791 r = quantifiers_memory_node_info(NODE_BODY(node));
3792 break;
3793 case ENCLOSURE_IF_ELSE:
3794 {
3795 int v;
3796 r = quantifiers_memory_node_info(NODE_BODY(node));
3797 if (IS_NOT_NULL(en->te.Then)) {
3798 v = quantifiers_memory_node_info(en->te.Then);
3799 if (v > r) r = v;
3800 }
3801 if (IS_NOT_NULL(en->te.Else)) {
3802 v = quantifiers_memory_node_info(en->te.Else);
3803 if (v > r) r = v;
3804 }
3805 }
3806 break;
3807 default:
3808 break;
3809 }
3810 }
3811 break;
3812
3813 case NODE_BACKREF:
3814 case NODE_STRING:
3815 case NODE_CTYPE:
3816 case NODE_CCLASS:
3817 case NODE_ANCHOR:
3818 case NODE_GIMMICK:
3819 default:
3820 break;
3821 }
3822
3823 return r;
3824 }
3825 #endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
3826
3827
3828 #define IN_ALT (1<<0)
3829 #define IN_NOT (1<<1)
3830 #define IN_REAL_REPEAT (1<<2)
3831 #define IN_VAR_REPEAT (1<<3)
3832 #define IN_ZERO_REPEAT (1<<4)
3833 #define IN_MULTI_ENTRY (1<<5)
3834
3835 #ifdef USE_CALL
3836
3837 #ifdef __GNUC__
3838 __inline
3839 #endif
3840 static int
3841 setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
3842 {
3843 MemEnv* mem_env = SCANENV_MEMENV(env);
3844
3845 if (cn->by_number != 0) {
3846 int gnum = cn->group_num;
3847
3848 if (env->num_named > 0 &&
3849 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3850 ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
3851 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3852 }
3853
3854 if (gnum > env->num_mem) {
3855 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
3856 cn->name, cn->name_end);
3857 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3858 }
3859
3860 set_call_attr:
3861 NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
3862 if (IS_NULL(NODE_CALL_BODY(cn))) {
3863 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
3864 cn->name, cn->name_end);
3865 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3866 }
3867 }
3868 else {
3869 int *refs;
3870
3871 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
3872 if (n <= 0) {
3873 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
3874 cn->name, cn->name_end);
3875 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3876 }
3877 else if (n > 1) {
3878 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
3879 cn->name, cn->name_end);
3880 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3881 }
3882 else {
3883 cn->group_num = refs[0];
3884 goto set_call_attr;
3885 }
3886 }
3887
3888 return 0;
3889 }
3890
3891 static void
3892 setup_call2_call(Node* node)
3893 {
3894 switch (NODE_TYPE(node)) {
3895 case NODE_LIST:
3896 case NODE_ALT:
3897 do {
3898 setup_call2_call(NODE_CAR(node));
3899 } while (IS_NOT_NULL(node = NODE_CDR(node)));
3900 break;
3901
3902 case NODE_QUANT:
3903 setup_call2_call(NODE_BODY(node));
3904 break;
3905
3906 case NODE_ANCHOR:
3907 if (ANCHOR_HAS_BODY(ANCHOR_(node)))
3908 setup_call2_call(NODE_BODY(node));
3909 break;
3910
3911 case NODE_ENCLOSURE:
3912 {
3913 EnclosureNode* en = ENCLOSURE_(node);
3914
3915 if (en->type == ENCLOSURE_MEMORY) {
3916 if (! NODE_IS_MARK1(node)) {
3917 NODE_STATUS_ADD(node, MARK1);
3918 setup_call2_call(NODE_BODY(node));
3919 NODE_STATUS_REMOVE(node, MARK1);
3920 }
3921 }
3922 else if (en->type == ENCLOSURE_IF_ELSE) {
3923 setup_call2_call(NODE_BODY(node));
3924 if (IS_NOT_NULL(en->te.Then))
3925 setup_call2_call(en->te.Then);
3926 if (IS_NOT_NULL(en->te.Else))
3927 setup_call2_call(en->te.Else);
3928 }
3929 else {
3930 setup_call2_call(NODE_BODY(node));
3931 }
3932 }
3933 break;
3934
3935 case NODE_CALL:
3936 if (! NODE_IS_MARK1(node)) {
3937 NODE_STATUS_ADD(node, MARK1);
3938 {
3939 CallNode* cn = CALL_(node);
3940 Node* called = NODE_CALL_BODY(cn);
3941
3942 cn->entry_count++;
3943
3944 NODE_STATUS_ADD(called, CALLED);
3945 ENCLOSURE_(called)->m.entry_count++;
3946 setup_call2_call(called);
3947 }
3948 NODE_STATUS_REMOVE(node, MARK1);
3949 }
3950 break;
3951
3952 default:
3953 break;
3954 }
3955 }
3956
3957 static int
3958 setup_call(Node* node, ScanEnv* env, int state)
3959 {
3960 int r;
3961
3962 switch (NODE_TYPE(node)) {
3963 case NODE_LIST:
3964 case NODE_ALT:
3965 do {
3966 r = setup_call(NODE_CAR(node), env, state);
3967 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
3968 break;
3969
3970 case NODE_QUANT:
3971 if (QUANT_(node)->upper == 0)
3972 state |= IN_ZERO_REPEAT;
3973
3974 r = setup_call(NODE_BODY(node), env, state);
3975 break;
3976
3977 case NODE_ANCHOR:
3978 if (ANCHOR_HAS_BODY(ANCHOR_(node)))
3979 r = setup_call(NODE_BODY(node), env, state);
3980 else
3981 r = 0;
3982 break;
3983
3984 case NODE_ENCLOSURE:
3985 {
3986 EnclosureNode* en = ENCLOSURE_(node);
3987
3988 if (en->type == ENCLOSURE_MEMORY) {
3989 if ((state & IN_ZERO_REPEAT) != 0) {
3990 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
3991 ENCLOSURE_(node)->m.entry_count--;
3992 }
3993 r = setup_call(NODE_BODY(node), env, state);
3994 }
3995 else if (en->type == ENCLOSURE_IF_ELSE) {
3996 r = setup_call(NODE_BODY(node), env, state);
3997 if (r != 0) return r;
3998 if (IS_NOT_NULL(en->te.Then)) {
3999 r = setup_call(en->te.Then, env, state);
4000 if (r != 0) return r;
4001 }
4002 if (IS_NOT_NULL(en->te.Else))
4003 r = setup_call(en->te.Else, env, state);
4004 }
4005 else
4006 r = setup_call(NODE_BODY(node), env, state);
4007 }
4008 break;
4009
4010 case NODE_CALL:
4011 if ((state & IN_ZERO_REPEAT) != 0) {
4012 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
4013 CALL_(node)->entry_count--;
4014 }
4015
4016 r = setup_call_node_call(CALL_(node), env, state);
4017 break;
4018
4019 default:
4020 r = 0;
4021 break;
4022 }
4023
4024 return r;
4025 }
4026
4027 static int
4028 setup_call2(Node* node)
4029 {
4030 int r = 0;
4031
4032 switch (NODE_TYPE(node)) {
4033 case NODE_LIST:
4034 case NODE_ALT:
4035 do {
4036 r = setup_call2(NODE_CAR(node));
4037 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4038 break;
4039
4040 case NODE_QUANT:
4041 if (QUANT_(node)->upper != 0)
4042 r = setup_call2(NODE_BODY(node));
4043 break;
4044
4045 case NODE_ANCHOR:
4046 if (ANCHOR_HAS_BODY(ANCHOR_(node)))
4047 r = setup_call2(NODE_BODY(node));
4048 break;
4049
4050 case NODE_ENCLOSURE:
4051 if (! NODE_IS_IN_ZERO_REPEAT(node))
4052 r = setup_call2(NODE_BODY(node));
4053
4054 {
4055 EnclosureNode* en = ENCLOSURE_(node);
4056
4057 if (r != 0) return r;
4058 if (en->type == ENCLOSURE_IF_ELSE) {
4059 if (IS_NOT_NULL(en->te.Then)) {
4060 r = setup_call2(en->te.Then);
4061 if (r != 0) return r;
4062 }
4063 if (IS_NOT_NULL(en->te.Else))
4064 r = setup_call2(en->te.Else);
4065 }
4066 }
4067 break;
4068
4069 case NODE_CALL:
4070 if (! NODE_IS_IN_ZERO_REPEAT(node)) {
4071 setup_call2_call(node);
4072 }
4073 break;
4074
4075 default:
4076 break;
4077 }
4078
4079 return r;
4080 }
4081
4082
4083 static void
4084 setup_called_state_call(Node* node, int state)
4085 {
4086 switch (NODE_TYPE(node)) {
4087 case NODE_ALT:
4088 state |= IN_ALT;
4089 /* fall */
4090 case NODE_LIST:
4091 do {
4092 setup_called_state_call(NODE_CAR(node), state);
4093 } while (IS_NOT_NULL(node = NODE_CDR(node)));
4094 break;
4095
4096 case NODE_QUANT:
4097 {
4098 QuantNode* qn = QUANT_(node);
4099
4100 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4101 state |= IN_REAL_REPEAT;
4102 if (qn->lower != qn->upper)
4103 state |= IN_VAR_REPEAT;
4104
4105 setup_called_state_call(NODE_QUANT_BODY(qn), state);
4106 }
4107 break;
4108
4109 case NODE_ANCHOR:
4110 {
4111 AnchorNode* an = ANCHOR_(node);
4112
4113 switch (an->type) {
4114 case ANCHOR_PREC_READ_NOT:
4115 case ANCHOR_LOOK_BEHIND_NOT:
4116 state |= IN_NOT;
4117 /* fall */
4118 case ANCHOR_PREC_READ:
4119 case ANCHOR_LOOK_BEHIND:
4120 setup_called_state_call(NODE_ANCHOR_BODY(an), state);
4121 break;
4122 default:
4123 break;
4124 }
4125 }
4126 break;
4127
4128 case NODE_ENCLOSURE:
4129 {
4130 EnclosureNode* en = ENCLOSURE_(node);
4131
4132 if (en->type == ENCLOSURE_MEMORY) {
4133 if (NODE_IS_MARK1(node)) {
4134 if ((~en->m.called_state & state) != 0) {
4135 en->m.called_state |= state;
4136 setup_called_state_call(NODE_BODY(node), state);
4137 }
4138 }
4139 else {
4140 NODE_STATUS_ADD(node, MARK1);
4141 en->m.called_state |= state;
4142 setup_called_state_call(NODE_BODY(node), state);
4143 NODE_STATUS_REMOVE(node, MARK1);
4144 }
4145 }
4146 else if (en->type == ENCLOSURE_IF_ELSE) {
4147 if (IS_NOT_NULL(en->te.Then)) {
4148 setup_called_state_call(en->te.Then, state);
4149 }
4150 if (IS_NOT_NULL(en->te.Else))
4151 setup_called_state_call(en->te.Else, state);
4152 }
4153 else {
4154 setup_called_state_call(NODE_BODY(node), state);
4155 }
4156 }
4157 break;
4158
4159 case NODE_CALL:
4160 setup_called_state_call(NODE_BODY(node), state);
4161 break;
4162
4163 default:
4164 break;
4165 }
4166 }
4167
4168 static void
4169 setup_called_state(Node* node, int state)
4170 {
4171 switch (NODE_TYPE(node)) {
4172 case NODE_ALT:
4173 state |= IN_ALT;
4174 /* fall */
4175 case NODE_LIST:
4176 do {
4177 setup_called_state(NODE_CAR(node), state);
4178 } while (IS_NOT_NULL(node = NODE_CDR(node)));
4179 break;
4180
4181 #ifdef USE_CALL
4182 case NODE_CALL:
4183 setup_called_state_call(node, state);
4184 break;
4185 #endif
4186
4187 case NODE_ENCLOSURE:
4188 {
4189 EnclosureNode* en = ENCLOSURE_(node);
4190
4191 switch (en->type) {
4192 case ENCLOSURE_MEMORY:
4193 if (en->m.entry_count > 1)
4194 state |= IN_MULTI_ENTRY;
4195
4196 en->m.called_state |= state;
4197 /* fall */
4198 case ENCLOSURE_OPTION:
4199 case ENCLOSURE_STOP_BACKTRACK:
4200 setup_called_state(NODE_BODY(node), state);
4201 break;
4202 case ENCLOSURE_IF_ELSE:
4203 setup_called_state(NODE_BODY(node), state);
4204 if (IS_NOT_NULL(en->te.Then))
4205 setup_called_state(en->te.Then, state);
4206 if (IS_NOT_NULL(en->te.Else))
4207 setup_called_state(en->te.Else, state);
4208 break;
4209 }
4210 }
4211 break;
4212
4213 case NODE_QUANT:
4214 {
4215 QuantNode* qn = QUANT_(node);
4216
4217 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4218 state |= IN_REAL_REPEAT;
4219 if (qn->lower != qn->upper)
4220 state |= IN_VAR_REPEAT;
4221
4222 setup_called_state(NODE_QUANT_BODY(qn), state);
4223 }
4224 break;
4225
4226 case NODE_ANCHOR:
4227 {
4228 AnchorNode* an = ANCHOR_(node);
4229
4230 switch (an->type) {
4231 case ANCHOR_PREC_READ_NOT:
4232 case ANCHOR_LOOK_BEHIND_NOT:
4233 state |= IN_NOT;
4234 /* fall */
4235 case ANCHOR_PREC_READ:
4236 case ANCHOR_LOOK_BEHIND:
4237 setup_called_state(NODE_ANCHOR_BODY(an), state);
4238 break;
4239 default:
4240 break;
4241 }
4242 }
4243 break;
4244
4245 case NODE_BACKREF:
4246 case NODE_STRING:
4247 case NODE_CTYPE:
4248 case NODE_CCLASS:
4249 case NODE_GIMMICK:
4250 default:
4251 break;
4252 }
4253 }
4254
4255 #endif /* USE_CALL */
4256
4257
4258 static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
4259
4260 #ifdef __GNUC__
4261 __inline
4262 #endif
4263 static int
4264 setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
4265 {
4266 /* allowed node types in look-behind */
4267 #define ALLOWED_TYPE_IN_LB \
4268 ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
4269 | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \
4270 | NODE_BIT_CALL | NODE_BIT_GIMMICK)
4271
4272 #define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )
4273 #define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)
4274
4275 #define ALLOWED_ANCHOR_IN_LB \
4276 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
4277 | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \
4278 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4279 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4280 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4281
4282 #define ALLOWED_ANCHOR_IN_LB_NOT \
4283 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
4284 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \
4285 | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \
4286 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
4287 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
4288
4289 int r;
4290 AnchorNode* an = ANCHOR_(node);
4291
4292 switch (an->type) {
4293 case ANCHOR_PREC_READ:
4294 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
4295 break;
4296 case ANCHOR_PREC_READ_NOT:
4297 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
4298 break;
4299
4300 case ANCHOR_LOOK_BEHIND:
4301 {
4302 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4303 ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4304 if (r < 0) return r;
4305 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4306 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
4307 if (r != 0) return r;
4308 r = setup_look_behind(node, reg, env);
4309 }
4310 break;
4311
4312 case ANCHOR_LOOK_BEHIND_NOT:
4313 {
4314 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
4315 ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4316 if (r < 0) return r;
4317 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4318 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
4319 if (r != 0) return r;
4320 r = setup_look_behind(node, reg, env);
4321 }
4322 break;
4323
4324 default:
4325 r = 0;
4326 break;
4327 }
4328
4329 return r;
4330 }
4331
4332 #ifdef __GNUC__
4333 __inline
4334 #endif
4335 static int
4336 setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
4337 {
4338 int r;
4339 OnigLen d;
4340 QuantNode* qn = QUANT_(node);
4341 Node* body = NODE_BODY(node);
4342
4343 if ((state & IN_REAL_REPEAT) != 0) {
4344 NODE_STATUS_ADD(node, IN_REAL_REPEAT);
4345 }
4346 if ((state & IN_MULTI_ENTRY) != 0) {
4347 NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
4348 }
4349
4350 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
4351 d = tree_min_len(body, env);
4352 if (d == 0) {
4353 #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
4354 qn->body_empty_info = quantifiers_memory_node_info(body);
4355 if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
4356 if (NODE_TYPE(body) == NODE_ENCLOSURE &&
4357 ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {
4358 MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);
4359 }
4360 }
4361 #else
4362 qn->body_empty_info = QUANT_BODY_IS_EMPTY;
4363 #endif
4364 }
4365 }
4366
4367 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
4368 state |= IN_REAL_REPEAT;
4369 if (qn->lower != qn->upper)
4370 state |= IN_VAR_REPEAT;
4371
4372 r = setup_tree(body, reg, state, env);
4373 if (r != 0) return r;
4374
4375 /* expand string */
4376 #define EXPAND_STRING_MAX_LENGTH 100
4377 if (NODE_TYPE(body) == NODE_STRING) {
4378 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
4379 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4380 int len = NODE_STRING_LEN(body);
4381 StrNode* sn = STR_(body);
4382
4383 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
4384 int i, n = qn->lower;
4385 onig_node_conv_to_str_node(node, STR_(body)->flag);
4386 for (i = 0; i < n; i++) {
4387 r = onig_node_str_cat(node, sn->s, sn->end);
4388 if (r != 0) return r;
4389 }
4390 onig_node_free(body);
4391 return r;
4392 }
4393 }
4394 }
4395
4396 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4397 if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {
4398 if (NODE_TYPE(body) == NODE_QUANT) {
4399 QuantNode* tqn = QUANT_(body);
4400 if (IS_NOT_NULL(tqn->head_exact)) {
4401 qn->head_exact = tqn->head_exact;
4402 tqn->head_exact = NULL;
4403 }
4404 }
4405 else {
4406 qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
4407 }
4408 }
4409 #endif
4410
4411 return r;
4412 }
4413
4414 /* setup_tree does the following work.
4415 1. check empty loop. (set qn->body_empty_info)
4416 2. expand ignore-case in char class.
4417 3. set memory status bit flags. (reg->mem_stats)
4418 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
4419 5. find invalid patterns in look-behind.
4420 6. expand repeated string.
4421 */
4422 static int
4423 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
4424 {
4425 int r = 0;
4426
4427 switch (NODE_TYPE(node)) {
4428 case NODE_LIST:
4429 {
4430 Node* prev = NULL_NODE;
4431 do {
4432 r = setup_tree(NODE_CAR(node), reg, state, env);
4433 if (IS_NOT_NULL(prev) && r == 0) {
4434 r = next_setup(prev, NODE_CAR(node), reg);
4435 }
4436 prev = NODE_CAR(node);
4437 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4438 }
4439 break;
4440
4441 case NODE_ALT:
4442 do {
4443 r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
4444 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
4445 break;
4446
4447 case NODE_STRING:
4448 if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
4449 r = expand_case_fold_string(node, reg);
4450 }
4451 break;
4452
4453 case NODE_BACKREF:
4454 {
4455 int i;
4456 int* p;
4457 BackRefNode* br = BACKREF_(node);
4458 p = BACKREFS_P(br);
4459 for (i = 0; i < br->back_num; i++) {
4460 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
4461 MEM_STATUS_ON(env->backrefed_mem, p[i]);
4462 MEM_STATUS_ON(env->bt_mem_start, p[i]);
4463 #ifdef USE_BACKREF_WITH_LEVEL
4464 if (NODE_IS_NEST_LEVEL(node)) {
4465 MEM_STATUS_ON(env->bt_mem_end, p[i]);
4466 }
4467 #endif
4468 }
4469 }
4470 break;
4471
4472 case NODE_ENCLOSURE:
4473 {
4474 EnclosureNode* en = ENCLOSURE_(node);
4475
4476 switch (en->type) {
4477 case ENCLOSURE_OPTION:
4478 {
4479 OnigOptionType options = reg->options;
4480 reg->options = ENCLOSURE_(node)->o.options;
4481 r = setup_tree(NODE_BODY(node), reg, state, env);
4482 reg->options = options;
4483 }
4484 break;
4485
4486 case ENCLOSURE_MEMORY:
4487 #ifdef USE_CALL
4488 state |= en->m.called_state;
4489 #endif
4490
4491 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
4492 || NODE_IS_RECURSION(node)) {
4493 MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
4494 }
4495 r = setup_tree(NODE_BODY(node), reg, state, env);
4496 break;
4497
4498 case ENCLOSURE_STOP_BACKTRACK:
4499 {
4500 Node* target = NODE_BODY(node);
4501 r = setup_tree(target, reg, state, env);
4502 if (NODE_TYPE(target) == NODE_QUANT) {
4503 QuantNode* tqn = QUANT_(target);
4504 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4505 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4506 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
4507 NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
4508 }
4509 }
4510 }
4511 break;
4512
4513 case ENCLOSURE_IF_ELSE:
4514 r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
4515 if (r != 0) return r;
4516 if (IS_NOT_NULL(en->te.Then)) {
4517 r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
4518 if (r != 0) return r;
4519 }
4520 if (IS_NOT_NULL(en->te.Else))
4521 r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
4522 break;
4523 }
4524 }
4525 break;
4526
4527 case NODE_QUANT:
4528 r = setup_quant(node, reg, state, env);
4529 break;
4530
4531 case NODE_ANCHOR:
4532 r = setup_anchor(node, reg, state, env);
4533 break;
4534
4535 #ifdef USE_CALL
4536 case NODE_CALL:
4537 #endif
4538 case NODE_CTYPE:
4539 case NODE_CCLASS:
4540 case NODE_GIMMICK:
4541 default:
4542 break;
4543 }
4544
4545 return r;
4546 }
4547
4548 /* set skip map for Boyer-Moore search */
4549 static int
4550 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4551 UChar skip[], int** int_skip)
4552 {
4553 int i, len;
4554
4555 len = (int )(end - s);
4556 if (len < ONIG_CHAR_TABLE_SIZE) {
4557 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
4558
4559 for (i = 0; i < len - 1; i++)
4560 skip[s[i]] = len - 1 - i;
4561 }
4562 else {
4563 if (IS_NULL(*int_skip)) {
4564 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4565 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4566 }
4567 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
4568
4569 for (i = 0; i < len - 1; i++)
4570 (*int_skip)[s[i]] = len - 1 - i;
4571 }
4572 return 0;
4573 }
4574
4575 #define OPT_EXACT_MAXLEN 24
4576
4577 typedef struct {
4578 OnigLen min; /* min byte length */
4579 OnigLen max; /* max byte length */
4580 } MinMax;
4581
4582 typedef struct {
4583 MinMax mmd;
4584 OnigEncoding enc;
4585 OnigOptionType options;
4586 OnigCaseFoldType case_fold_flag;
4587 ScanEnv* scan_env;
4588 } OptEnv;
4589
4590 typedef struct {
4591 int left;
4592 int right;
4593 } OptAnc;
4594
4595 typedef struct {
4596 MinMax mmd; /* position */
4597 OptAnc anc;
4598 int reach_end;
4599 int ignore_case;
4600 int len;
4601 UChar s[OPT_EXACT_MAXLEN];
4602 } OptExact;
4603
4604 typedef struct {
4605 MinMax mmd; /* position */
4606 OptAnc anc;
4607 int value; /* weighted value */
4608 UChar map[ONIG_CHAR_TABLE_SIZE];
4609 } OptMap;
4610
4611 typedef struct {
4612 MinMax len;
4613 OptAnc anc;
4614 OptExact exb; /* boundary */
4615 OptExact exm; /* middle */
4616 OptExact expr; /* prec read (?=...) */
4617 OptMap map; /* boundary */
4618 } NodeOpt;
4619
4620
4621 static int
4622 map_position_value(OnigEncoding enc, int i)
4623 {
4624 static const short int Vals[] = {
4625 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4627 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4628 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4629 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4630 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4631 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4632 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4633 };
4634
4635 if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
4636 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4637 return 20;
4638 else
4639 return (int )Vals[i];
4640 }
4641 else
4642 return 4; /* Take it easy. */
4643 }
4644
4645 static int
4646 distance_value(MinMax* mm)
4647 {
4648 /* 1000 / (min-max-dist + 1) */
4649 static const short int dist_vals[] = {
4650 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4651 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4652 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4653 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4654 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4655 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4656 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4657 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4658 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4659 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4660 };
4661
4662 OnigLen d;
4663
4664 if (mm->max == INFINITE_LEN) return 0;
4665
4666 d = mm->max - mm->min;
4667 if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
4668 /* return dist_vals[d] * 16 / (mm->min + 12); */
4669 return (int )dist_vals[d];
4670 else
4671 return 1;
4672 }
4673
4674 static int
4675 comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
4676 {
4677 if (v2 <= 0) return -1;
4678 if (v1 <= 0) return 1;
4679
4680 v1 *= distance_value(d1);
4681 v2 *= distance_value(d2);
4682
4683 if (v2 > v1) return 1;
4684 if (v2 < v1) return -1;
4685
4686 if (d2->min < d1->min) return 1;
4687 if (d2->min > d1->min) return -1;
4688 return 0;
4689 }
4690
4691 static int
4692 is_equal_mml(MinMax* a, MinMax* b)
4693 {
4694 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4695 }
4696
4697 static void
4698 set_mml(MinMax* l, OnigLen min, OnigLen max)
4699 {
4700 l->min = min;
4701 l->max = max;
4702 }
4703
4704 static void
4705 clear_mml(MinMax* l)
4706 {
4707 l->min = l->max = 0;
4708 }
4709
4710 static void
4711 copy_mml(MinMax* to, MinMax* from)
4712 {
4713 to->min = from->min;
4714 to->max = from->max;
4715 }
4716
4717 static void
4718 add_mml(MinMax* to, MinMax* from)
4719 {
4720 to->min = distance_add(to->min, from->min);
4721 to->max = distance_add(to->max, from->max);
4722 }
4723
4724 static void
4725 alt_merge_mml(MinMax* to, MinMax* from)
4726 {
4727 if (to->min > from->min) to->min = from->min;
4728 if (to->max < from->max) to->max = from->max;
4729 }
4730
4731 static void
4732 copy_opt_env(OptEnv* to, OptEnv* from)
4733 {
4734 *to = *from;
4735 }
4736
4737 static void
4738 clear_opt_anc_info(OptAnc* a)
4739 {
4740 a->left = 0;
4741 a->right = 0;
4742 }
4743
4744 static void
4745 copy_opt_anc_info(OptAnc* to, OptAnc* from)
4746 {
4747 *to = *from;
4748 }
4749
4750 static void
4751 concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
4752 OnigLen left_len, OnigLen right_len)
4753 {
4754 clear_opt_anc_info(to);
4755
4756 to->left = left->left;
4757 if (left_len == 0) {
4758 to->left |= right->left;
4759 }
4760
4761 to->right = right->right;
4762 if (right_len == 0) {
4763 to->right |= left->right;
4764 }
4765 else {
4766 to->right |= (left->right & ANCHOR_PREC_READ_NOT);
4767 }
4768 }
4769
4770 static int
4771 is_left(int a)
4772 {
4773 if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||
4774 a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)
4775 return 0;
4776
4777 return 1;
4778 }
4779
4780 static int
4781 is_set_opt_anc_info(OptAnc* to, int anc)
4782 {
4783 if ((to->left & anc) != 0) return 1;
4784
4785 return ((to->right & anc) != 0 ? 1 : 0);
4786 }
4787
4788 static void
4789 add_opt_anc_info(OptAnc* to, int anc)
4790 {
4791 if (is_left(anc))
4792 to->left |= anc;
4793 else
4794 to->right |= anc;
4795 }
4796
4797 static void
4798 remove_opt_anc_info(OptAnc* to, int anc)
4799 {
4800 if (is_left(anc))
4801 to->left &= ~anc;
4802 else
4803 to->right &= ~anc;
4804 }
4805
4806 static void
4807 alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
4808 {
4809 to->left &= add->left;
4810 to->right &= add->right;
4811 }
4812
4813 static int
4814 is_full_opt_exact(OptExact* e)
4815 {
4816 return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4817 }
4818
4819 static void
4820 clear_opt_exact(OptExact* e)
4821 {
4822 clear_mml(&e->mmd);
4823 clear_opt_anc_info(&e->anc);
4824 e->reach_end = 0;
4825 e->ignore_case = 0;
4826 e->len = 0;
4827 e->s[0] = '\0';
4828 }
4829
4830 static void
4831 copy_opt_exact(OptExact* to, OptExact* from)
4832 {
4833 *to = *from;
4834 }
4835
4836 static int
4837 concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)
4838 {
4839 int i, j, len, r;
4840 UChar *p, *end;
4841 OptAnc tanc;
4842
4843 if (! to->ignore_case && add->ignore_case) {
4844 if (to->len >= add->len) return 0; /* avoid */
4845
4846 to->ignore_case = 1;
4847 }
4848
4849 r = 0;
4850 p = add->s;
4851 end = p + add->len;
4852 for (i = to->len; p < end; ) {
4853 len = enclen(enc, p);
4854 if (i + len > OPT_EXACT_MAXLEN) {
4855 r = 1; /* 1:full */
4856 break;
4857 }
4858 for (j = 0; j < len && p < end; j++)
4859 to->s[i++] = *p++;
4860 }
4861
4862 to->len = i;
4863 to->reach_end = (p == end ? add->reach_end : 0);
4864
4865 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4866 if (! to->reach_end) tanc.right = 0;
4867 copy_opt_anc_info(&to->anc, &tanc);
4868
4869 return r;
4870 }
4871
4872 static void
4873 concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)
4874 {
4875 int i, j, len;
4876 UChar *p;
4877
4878 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4879 len = enclen(enc, p);
4880 if (i + len > OPT_EXACT_MAXLEN) break;
4881 for (j = 0; j < len && p < end; j++)
4882 to->s[i++] = *p++;
4883 }
4884
4885 to->len = i;
4886 }
4887
4888 static void
4889 alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)
4890 {
4891 int i, j, len;
4892
4893 if (add->len == 0 || to->len == 0) {
4894 clear_opt_exact(to);
4895 return ;
4896 }
4897
4898 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4899 clear_opt_exact(to);
4900 return ;
4901 }
4902
4903 for (i = 0; i < to->len && i < add->len; ) {
4904 if (to->s[i] != add->s[i]) break;
4905 len = enclen(env->enc, to->s + i);
4906
4907 for (j = 1; j < len; j++) {
4908 if (to->s[i+j] != add->s[i+j]) break;
4909 }
4910 if (j < len) break;
4911 i += len;
4912 }
4913
4914 if (! add->reach_end || i < add->len || i < to->len) {
4915 to->reach_end = 0;
4916 }
4917 to->len = i;
4918 to->ignore_case |= add->ignore_case;
4919
4920 alt_merge_opt_anc_info(&to->anc, &add->anc);
4921 if (! to->reach_end) to->anc.right = 0;
4922 }
4923
4924 static void
4925 select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)
4926 {
4927 int vn, va;
4928
4929 vn = now->len;
4930 va = alt->len;
4931
4932 if (va == 0) {
4933 return ;
4934 }
4935 else if (vn == 0) {
4936 copy_opt_exact(now, alt);
4937 return ;
4938 }
4939 else if (vn <= 2 && va <= 2) {
4940 /* ByteValTable[x] is big value --> low price */
4941 va = map_position_value(enc, now->s[0]);
4942 vn = map_position_value(enc, alt->s[0]);
4943
4944 if (now->len > 1) vn += 5;
4945 if (alt->len > 1) va += 5;
4946 }
4947
4948 if (now->ignore_case == 0) vn *= 2;
4949 if (alt->ignore_case == 0) va *= 2;
4950
4951 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
4952 copy_opt_exact(now, alt);
4953 }
4954
4955 static void
4956 clear_opt_map(OptMap* map)
4957 {
4958 static const OptMap clean_info = {
4959 {0, 0}, {0, 0}, 0,
4960 {
4961 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4963 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4964 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4965 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4966 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4967 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4968 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4969 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4970 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4971 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4972 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4973 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4974 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4975 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4976 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4977 }
4978 };
4979
4980 xmemcpy(map, &clean_info, sizeof(OptMap));
4981 }
4982
4983 static void
4984 copy_opt_map(OptMap* to, OptMap* from)
4985 {
4986 xmemcpy(to,from,sizeof(OptMap));
4987 }
4988
4989 static void
4990 add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
4991 {
4992 if (m->map[c] == 0) {
4993 m->map[c] = 1;
4994 m->value += map_position_value(enc, c);
4995 }
4996 }
4997
4998 static int
4999 add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
5000 OnigEncoding enc, OnigCaseFoldType fold_flag)
5001 {
5002 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
5003 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
5004 int i, n;
5005
5006 add_char_opt_map(map, p[0], enc);
5007
5008 fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);
5009 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);
5010 if (n < 0) return n;
5011
5012 for (i = 0; i < n; i++) {
5013 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
5014 add_char_opt_map(map, buf[0], enc);
5015 }
5016
5017 return 0;
5018 }
5019
5020 static void
5021 select_opt_map(OptMap* now, OptMap* alt)
5022 {
5023 static int z = 1<<15; /* 32768: something big value */
5024
5025 int vn, va;
5026
5027 if (alt->value == 0) return ;
5028 if (now->value == 0) {
5029 copy_opt_map(now, alt);
5030 return ;
5031 }
5032
5033 vn = z / now->value;
5034 va = z / alt->value;
5035 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
5036 copy_opt_map(now, alt);
5037 }
5038
5039 static int
5040 comp_opt_exact_or_map(OptExact* e, OptMap* m)
5041 {
5042 #define COMP_EM_BASE 20
5043 int ae, am;
5044
5045 if (m->value <= 0) return -1;
5046
5047 ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
5048 am = COMP_EM_BASE * 5 * 2 / m->value;
5049 return comp_distance_value(&e->mmd, &m->mmd, ae, am);
5050 }
5051
5052 static void
5053 alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
5054 {
5055 int i, val;
5056
5057 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
5058 if (to->value == 0) return ;
5059 if (add->value == 0 || to->mmd.max < add->mmd.min) {
5060 clear_opt_map(to);
5061 return ;
5062 }
5063
5064 alt_merge_mml(&to->mmd, &add->mmd);
5065
5066 val = 0;
5067 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5068 if (add->map[i])
5069 to->map[i] = 1;
5070
5071 if (to->map[i])
5072 val += map_position_value(enc, i);
5073 }
5074 to->value = val;
5075
5076 alt_merge_opt_anc_info(&to->anc, &add->anc);
5077 }
5078
5079 static void
5080 set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)
5081 {
5082 copy_mml(&(opt->exb.mmd), plen);
5083 copy_mml(&(opt->expr.mmd), plen);
5084 copy_mml(&(opt->map.mmd), plen);
5085 }
5086
5087 static void
5088 clear_node_opt_info(NodeOpt* opt)
5089 {
5090 clear_mml(&opt->len);
5091 clear_opt_anc_info(&opt->anc);
5092 clear_opt_exact(&opt->exb);
5093 clear_opt_exact(&opt->exm);
5094 clear_opt_exact(&opt->expr);
5095 clear_opt_map(&opt->map);
5096 }
5097
5098 static void
5099 copy_node_opt_info(NodeOpt* to, NodeOpt* from)
5100 {
5101 xmemcpy(to,from,sizeof(NodeOpt));
5102 }
5103
5104 static void
5105 concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)
5106 {
5107 int exb_reach, exm_reach;
5108 OptAnc tanc;
5109
5110 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
5111 copy_opt_anc_info(&to->anc, &tanc);
5112
5113 if (add->exb.len > 0 && to->len.max == 0) {
5114 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);
5115 copy_opt_anc_info(&add->exb.anc, &tanc);
5116 }
5117
5118 if (add->map.value > 0 && to->len.max == 0) {
5119 if (add->map.mmd.max == 0)
5120 add->map.anc.left |= to->anc.left;
5121 }
5122
5123 exb_reach = to->exb.reach_end;
5124 exm_reach = to->exm.reach_end;
5125
5126 if (add->len.max != 0)
5127 to->exb.reach_end = to->exm.reach_end = 0;
5128
5129 if (add->exb.len > 0) {
5130 if (exb_reach) {
5131 concat_opt_exact(&to->exb, &add->exb, enc);
5132 clear_opt_exact(&add->exb);
5133 }
5134 else if (exm_reach) {
5135 concat_opt_exact(&to->exm, &add->exb, enc);
5136 clear_opt_exact(&add->exb);
5137 }
5138 }
5139 select_opt_exact(enc, &to->exm, &add->exb);
5140 select_opt_exact(enc, &to->exm, &add->exm);
5141
5142 if (to->expr.len > 0) {
5143 if (add->len.max > 0) {
5144 if (to->expr.len > (int )add->len.max)
5145 to->expr.len = add->len.max;
5146
5147 if (to->expr.mmd.max == 0)
5148 select_opt_exact(enc, &to->exb, &to->expr);
5149 else
5150 select_opt_exact(enc, &to->exm, &to->expr);
5151 }
5152 }
5153 else if (add->expr.len > 0) {
5154 copy_opt_exact(&to->expr, &add->expr);
5155 }
5156
5157 select_opt_map(&to->map, &add->map);
5158 add_mml(&to->len, &add->len);
5159 }
5160
5161 static void
5162 alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)
5163 {
5164 alt_merge_opt_anc_info(&to->anc, &add->anc);
5165 alt_merge_opt_exact(&to->exb, &add->exb, env);
5166 alt_merge_opt_exact(&to->exm, &add->exm, env);
5167 alt_merge_opt_exact(&to->expr, &add->expr, env);
5168 alt_merge_opt_map(env->enc, &to->map, &add->map);
5169
5170 alt_merge_mml(&to->len, &add->len);
5171 }
5172
5173
5174 #define MAX_NODE_OPT_INFO_REF_COUNT 5
5175
5176 static int
5177 optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)
5178 {
5179 int i;
5180 int r;
5181 NodeOpt xo;
5182 OnigEncoding enc;
5183
5184 r = 0;
5185 enc = env->enc;
5186 clear_node_opt_info(opt);
5187 set_bound_node_opt_info(opt, &env->mmd);
5188
5189 switch (NODE_TYPE(node)) {
5190 case NODE_LIST:
5191 {
5192 OptEnv nenv;
5193 Node* nd = node;
5194
5195 copy_opt_env(&nenv, env);
5196 do {
5197 r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
5198 if (r == 0) {
5199 add_mml(&nenv.mmd, &xo.len);
5200 concat_left_node_opt_info(enc, opt, &xo);
5201 }
5202 } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
5203 }
5204 break;
5205
5206 case NODE_ALT:
5207 {
5208 Node* nd = node;
5209
5210 do {
5211 r = optimize_nodes(NODE_CAR(nd), &xo, env);
5212 if (r == 0) {
5213 if (nd == node) copy_node_opt_info(opt, &xo);
5214 else alt_merge_node_opt_info(opt, &xo, env);
5215 }
5216 } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
5217 }
5218 break;
5219
5220 case NODE_STRING:
5221 {
5222 StrNode* sn = STR_(node);
5223 int slen = (int )(sn->end - sn->s);
5224 /* int is_raw = NODE_STRING_IS_RAW(node); */
5225
5226 if (! NODE_STRING_IS_AMBIG(node)) {
5227 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
5228 if (slen > 0) {
5229 add_char_opt_map(&opt->map, *(sn->s), enc);
5230 }
5231 set_mml(&opt->len, slen, slen);
5232 }
5233 else {
5234 int max;
5235
5236 if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
5237 int n = onigenc_strlen(enc, sn->s, sn->end);
5238 max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
5239 }
5240 else {
5241 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);
5242 opt->exb.ignore_case = 1;
5243
5244 if (slen > 0) {
5245 r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
5246 enc, env->case_fold_flag);
5247 if (r != 0) break;
5248 }
5249
5250 max = slen;
5251 }
5252
5253 set_mml(&opt->len, slen, max);
5254 }
5255
5256 if (opt->exb.len == slen)
5257 opt->exb.reach_end = 1;
5258 }
5259 break;
5260
5261 case NODE_CCLASS:
5262 {
5263 int z;
5264 CClassNode* cc = CCLASS_(node);
5265
5266 /* no need to check ignore case. (set in setup_tree()) */
5267
5268 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5269 OnigLen min = ONIGENC_MBC_MINLEN(enc);
5270 OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
5271
5272 set_mml(&opt->len, min, max);
5273 }
5274 else {
5275 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5276 z = BITSET_AT(cc->bs, i);
5277 if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
5278 add_char_opt_map(&opt->map, (UChar )i, enc);
5279 }
5280 }
5281 set_mml(&opt->len, 1, 1);
5282 }
5283 }
5284 break;
5285
5286 case NODE_CTYPE:
5287 {
5288 int min, max;
5289 int range;
5290
5291 max = ONIGENC_MBC_MAXLEN_DIST(enc);
5292
5293 if (max == 1) {
5294 min = 1;
5295
5296 switch (CTYPE_(node)->ctype) {
5297 case CTYPE_ANYCHAR:
5298 break;
5299
5300 case ONIGENC_CTYPE_WORD:
5301 range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
5302 if (CTYPE_(node)->not != 0) {
5303 for (i = 0; i < range; i++) {
5304 if (! ONIGENC_IS_CODE_WORD(enc, i)) {
5305 add_char_opt_map(&opt->map, (UChar )i, enc);
5306 }
5307 }
5308 for (i = range; i < SINGLE_BYTE_SIZE; i++) {
5309 add_char_opt_map(&opt->map, (UChar )i, enc);
5310 }
5311 }
5312 else {
5313 for (i = 0; i < range; i++) {
5314 if (ONIGENC_IS_CODE_WORD(enc, i)) {
5315 add_char_opt_map(&opt->map, (UChar )i, enc);
5316 }
5317 }
5318 }
5319 break;
5320 }
5321 }
5322 else {
5323 min = ONIGENC_MBC_MINLEN(enc);
5324 }
5325 set_mml(&opt->len, min, max);
5326 }
5327 break;
5328
5329 case NODE_ANCHOR:
5330 switch (ANCHOR_(node)->type) {
5331 case ANCHOR_BEGIN_BUF:
5332 case ANCHOR_BEGIN_POSITION:
5333 case ANCHOR_BEGIN_LINE:
5334 case ANCHOR_END_BUF:
5335 case ANCHOR_SEMI_END_BUF:
5336 case ANCHOR_END_LINE:
5337 case ANCHOR_PREC_READ_NOT:
5338 case ANCHOR_LOOK_BEHIND:
5339 add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
5340 break;
5341
5342 case ANCHOR_PREC_READ:
5343 {
5344 r = optimize_nodes(NODE_BODY(node), &xo, env);
5345 if (r == 0) {
5346 if (xo.exb.len > 0)
5347 copy_opt_exact(&opt->expr, &xo.exb);
5348 else if (xo.exm.len > 0)
5349 copy_opt_exact(&opt->expr, &xo.exm);
5350
5351 opt->expr.reach_end = 0;
5352
5353 if (xo.map.value > 0)
5354 copy_opt_map(&opt->map, &xo.map);
5355 }
5356 }
5357 break;
5358
5359 case ANCHOR_LOOK_BEHIND_NOT:
5360 break;
5361 }
5362 break;
5363
5364 case NODE_BACKREF:
5365 if (! NODE_IS_CHECKER(node)) {
5366 int* backs;
5367 OnigLen min, max, tmin, tmax;
5368 MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
5369 BackRefNode* br = BACKREF_(node);
5370
5371 if (NODE_IS_RECURSION(node)) {
5372 set_mml(&opt->len, 0, INFINITE_LEN);
5373 break;
5374 }
5375 backs = BACKREFS_P(br);
5376 min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
5377 max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
5378 for (i = 1; i < br->back_num; i++) {
5379 tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
5380 tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
5381 if (min > tmin) min = tmin;
5382 if (max < tmax) max = tmax;
5383 }
5384 set_mml(&opt->len, min, max);
5385 }
5386 break;
5387
5388 #ifdef USE_CALL
5389 case NODE_CALL:
5390 if (NODE_IS_RECURSION(node))
5391 set_mml(&opt->len, 0, INFINITE_LEN);
5392 else {
5393 OnigOptionType save = env->options;
5394 env->options = ENCLOSURE_(NODE_BODY(node))->o.options;
5395 r = optimize_nodes(NODE_BODY(node), opt, env);
5396 env->options = save;
5397 }
5398 break;
5399 #endif
5400
5401 case NODE_QUANT:
5402 {
5403 OnigLen min, max;
5404 QuantNode* qn = QUANT_(node);
5405
5406 r = optimize_nodes(NODE_BODY(node), &xo, env);
5407 if (r != 0) break;
5408
5409 if (qn->lower > 0) {
5410 copy_node_opt_info(opt, &xo);
5411 if (xo.exb.len > 0) {
5412 if (xo.exb.reach_end) {
5413 for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {
5414 int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);
5415 if (rc > 0) break;
5416 }
5417 if (i < qn->lower) opt->exb.reach_end = 0;
5418 }
5419 }
5420
5421 if (qn->lower != qn->upper) {
5422 opt->exb.reach_end = 0;
5423 opt->exm.reach_end = 0;
5424 }
5425 if (qn->lower > 1)
5426 opt->exm.reach_end = 0;
5427 }
5428
5429 if (IS_REPEAT_INFINITE(qn->upper)) {
5430 if (env->mmd.max == 0 &&
5431 NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
5432 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
5433 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);
5434 else
5435 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);
5436 }
5437
5438 max = (xo.len.max > 0 ? INFINITE_LEN : 0);
5439 }
5440 else {
5441 max = distance_multiply(xo.len.max, qn->upper);
5442 }
5443
5444 min = distance_multiply(xo.len.min, qn->lower);
5445 set_mml(&opt->len, min, max);
5446 }
5447 break;
5448
5449 case NODE_ENCLOSURE:
5450 {
5451 EnclosureNode* en = ENCLOSURE_(node);
5452
5453 switch (en->type) {
5454 case ENCLOSURE_OPTION:
5455 {
5456 OnigOptionType save = env->options;
5457
5458 env->options = en->o.options;
5459 r = optimize_nodes(NODE_BODY(node), opt, env);
5460 env->options = save;
5461 }
5462 break;
5463
5464 case ENCLOSURE_MEMORY:
5465 #ifdef USE_CALL
5466 en->opt_count++;
5467 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5468 OnigLen min, max;
5469
5470 min = 0;
5471 max = INFINITE_LEN;
5472 if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
5473 if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
5474 set_mml(&opt->len, min, max);
5475 }
5476 else
5477 #endif
5478 {
5479 r = optimize_nodes(NODE_BODY(node), opt, env);
5480 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {
5481 if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
5482 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);
5483 }
5484 }
5485 break;
5486
5487 case ENCLOSURE_STOP_BACKTRACK:
5488 r = optimize_nodes(NODE_BODY(node), opt, env);
5489 break;
5490
5491 case ENCLOSURE_IF_ELSE:
5492 {
5493 OptEnv nenv;
5494
5495 copy_opt_env(&nenv, env);
5496 r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);
5497 if (r == 0) {
5498 add_mml(&nenv.mmd, &xo.len);
5499 concat_left_node_opt_info(enc, opt, &xo);
5500 if (IS_NOT_NULL(en->te.Then)) {
5501 r = optimize_nodes(en->te.Then, &xo, &nenv);
5502 if (r == 0) {
5503 concat_left_node_opt_info(enc, opt, &xo);
5504 }
5505 }
5506
5507 if (IS_NOT_NULL(en->te.Else)) {
5508 r = optimize_nodes(en->te.Else, &xo, env);
5509 if (r == 0)
5510 alt_merge_node_opt_info(opt, &xo, env);
5511 }
5512 }
5513 }
5514 break;
5515 }
5516 }
5517 break;
5518
5519 case NODE_GIMMICK:
5520 break;
5521
5522 default:
5523 #ifdef ONIG_DEBUG
5524 fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));
5525 #endif
5526 r = ONIGERR_TYPE_BUG;
5527 break;
5528 }
5529
5530 return r;
5531 }
5532
5533 static int
5534 set_optimize_exact(regex_t* reg, OptExact* e)
5535 {
5536 int r;
5537
5538 if (e->len == 0) return 0;
5539
5540 if (e->ignore_case) {
5541 reg->exact = (UChar* )xmalloc(e->len);
5542 CHECK_NULL_RETURN_MEMERR(reg->exact);
5543 xmemcpy(reg->exact, e->s, e->len);
5544 reg->exact_end = reg->exact + e->len;
5545 reg->optimize = OPTIMIZE_EXACT_IC;
5546 }
5547 else {
5548 int allow_reverse;
5549
5550 reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);
5551 CHECK_NULL_RETURN_MEMERR(reg->exact);
5552 reg->exact_end = reg->exact + e->len;
5553
5554 allow_reverse =
5555 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5556
5557 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5558 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
5559 reg->map, &(reg->int_map));
5560 if (r != 0) return r;
5561
5562 reg->optimize = (allow_reverse != 0
5563 ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);
5564 }
5565 else {
5566 reg->optimize = OPTIMIZE_EXACT;
5567 }
5568 }
5569
5570 reg->dmin = e->mmd.min;
5571 reg->dmax = e->mmd.max;
5572
5573 if (reg->dmin != INFINITE_LEN) {
5574 reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
5575 }
5576
5577 return 0;
5578 }
5579
5580 static void
5581 set_optimize_map(regex_t* reg, OptMap* m)
5582 {
5583 int i;
5584
5585 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5586 reg->map[i] = m->map[i];
5587
5588 reg->optimize = OPTIMIZE_MAP;
5589 reg->dmin = m->mmd.min;
5590 reg->dmax = m->mmd.max;
5591
5592 if (reg->dmin != INFINITE_LEN) {
5593 reg->threshold_len = reg->dmin + 1;
5594 }
5595 }
5596
5597 static void
5598 set_sub_anchor(regex_t* reg, OptAnc* anc)
5599 {
5600 reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE;
5601 reg->sub_anchor |= anc->right & ANCHOR_END_LINE;
5602 }
5603
5604 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5605 static void print_optimize_info(FILE* f, regex_t* reg);
5606 #endif
5607
5608 static int
5609 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5610 {
5611 int r;
5612 NodeOpt opt;
5613 OptEnv env;
5614
5615 env.enc = reg->enc;
5616 env.options = reg->options;
5617 env.case_fold_flag = reg->case_fold_flag;
5618 env.scan_env = scan_env;
5619 clear_mml(&env.mmd);
5620
5621 r = optimize_nodes(node, &opt, &env);
5622 if (r != 0) return r;
5623
5624 reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |
5625 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |
5626 ANCHOR_LOOK_BEHIND);
5627
5628 if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5629 reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;
5630
5631 reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5632 ANCHOR_PREC_READ_NOT);
5633
5634 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5635 reg->anchor_dmin = opt.len.min;
5636 reg->anchor_dmax = opt.len.max;
5637 }
5638
5639 if (opt.exb.len > 0 || opt.exm.len > 0) {
5640 select_opt_exact(reg->enc, &opt.exb, &opt.exm);
5641 if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {
5642 goto set_map;
5643 }
5644 else {
5645 r = set_optimize_exact(reg, &opt.exb);
5646 set_sub_anchor(reg, &opt.exb.anc);
5647 }
5648 }
5649 else if (opt.map.value > 0) {
5650 set_map:
5651 set_optimize_map(reg, &opt.map);
5652 set_sub_anchor(reg, &opt.map.anc);
5653 }
5654 else {
5655 reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;
5656 if (opt.len.max == 0)
5657 reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;
5658 }
5659
5660 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5661 print_optimize_info(stderr, reg);
5662 #endif
5663 return r;
5664 }
5665
5666 static void
5667 clear_optimize_info(regex_t* reg)
5668 {
5669 reg->optimize = OPTIMIZE_NONE;
5670 reg->anchor = 0;
5671 reg->anchor_dmin = 0;
5672 reg->anchor_dmax = 0;
5673 reg->sub_anchor = 0;
5674 reg->exact_end = (UChar* )NULL;
5675 reg->threshold_len = 0;
5676 if (IS_NOT_NULL(reg->exact)) {
5677 xfree(reg->exact);
5678 reg->exact = (UChar* )NULL;
5679 }
5680 }
5681
5682 #ifdef ONIG_DEBUG
5683
5684 static void print_enc_string(FILE* fp, OnigEncoding enc,
5685 const UChar *s, const UChar *end)
5686 {
5687 fprintf(fp, "\nPATTERN: /");
5688
5689 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5690 const UChar *p;
5691 OnigCodePoint code;
5692
5693 p = s;
5694 while (p < end) {
5695 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5696 if (code >= 0x80) {
5697 fprintf(fp, " 0x%04x ", (int )code);
5698 }
5699 else {
5700 fputc((int )code, fp);
5701 }
5702
5703 p += enclen(enc, p);
5704 }
5705 }
5706 else {
5707 while (s < end) {
5708 fputc((int )*s, fp);
5709 s++;
5710 }
5711 }
5712
5713 fprintf(fp, "/\n");
5714 }
5715
5716 #endif /* ONIG_DEBUG */
5717
5718 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5719
5720 static void
5721 print_distance_range(FILE* f, OnigLen a, OnigLen b)
5722 {
5723 if (a == INFINITE_LEN)
5724 fputs("inf", f);
5725 else
5726 fprintf(f, "(%u)", a);
5727
5728 fputs("-", f);
5729
5730 if (b == INFINITE_LEN)
5731 fputs("inf", f);
5732 else
5733 fprintf(f, "(%u)", b);
5734 }
5735
5736 static void
5737 print_anchor(FILE* f, int anchor)
5738 {
5739 int q = 0;
5740
5741 fprintf(f, "[");
5742
5743 if (anchor & ANCHOR_BEGIN_BUF) {
5744 fprintf(f, "begin-buf");
5745 q = 1;
5746 }
5747 if (anchor & ANCHOR_BEGIN_LINE) {
5748 if (q) fprintf(f, ", ");
5749 q = 1;
5750 fprintf(f, "begin-line");
5751 }
5752 if (anchor & ANCHOR_BEGIN_POSITION) {
5753 if (q) fprintf(f, ", ");
5754 q = 1;
5755 fprintf(f, "begin-pos");
5756 }
5757 if (anchor & ANCHOR_END_BUF) {
5758 if (q) fprintf(f, ", ");
5759 q = 1;
5760 fprintf(f, "end-buf");
5761 }
5762 if (anchor & ANCHOR_SEMI_END_BUF) {
5763 if (q) fprintf(f, ", ");
5764 q = 1;
5765 fprintf(f, "semi-end-buf");
5766 }
5767 if (anchor & ANCHOR_END_LINE) {
5768 if (q) fprintf(f, ", ");
5769 q = 1;
5770 fprintf(f, "end-line");
5771 }
5772 if (anchor & ANCHOR_ANYCHAR_INF) {
5773 if (q) fprintf(f, ", ");
5774 q = 1;
5775 fprintf(f, "anychar-inf");
5776 }
5777 if (anchor & ANCHOR_ANYCHAR_INF_ML) {
5778 if (q) fprintf(f, ", ");
5779 fprintf(f, "anychar-inf-ml");
5780 }
5781
5782 fprintf(f, "]");
5783 }
5784
5785 static void
5786 print_optimize_info(FILE* f, regex_t* reg)
5787 {
5788 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5789 "EXACT_IC", "MAP" };
5790
5791 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5792 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5793 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5794 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5795 fprintf(f, "\n");
5796
5797 if (reg->optimize) {
5798 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5799 fprintf(f, "\n");
5800 }
5801 fprintf(f, "\n");
5802
5803 if (reg->exact) {
5804 UChar *p;
5805 fprintf(f, "exact: [");
5806 for (p = reg->exact; p < reg->exact_end; p++) {
5807 fputc(*p, f);
5808 }
5809 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5810 }
5811 else if (reg->optimize & OPTIMIZE_MAP) {
5812 int c, i, n = 0;
5813
5814 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5815 if (reg->map[i]) n++;
5816
5817 fprintf(f, "map: n=%d\n", n);
5818 if (n > 0) {
5819 c = 0;
5820 fputc('[', f);
5821 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5822 if (reg->map[i] != 0) {
5823 if (c > 0) fputs(", ", f);
5824 c++;
5825 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5826 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5827 fputc(i, f);
5828 else
5829 fprintf(f, "%d", i);
5830 }
5831 }
5832 fprintf(f, "]\n");
5833 }
5834 }
5835 }
5836 #endif
5837
5838
5839 extern RegexExt*
5840 onig_get_regex_ext(regex_t* reg)
5841 {
5842 if (IS_NULL(REG_EXTP(reg))) {
5843 RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
5844 if (IS_NULL(ext)) return 0;
5845
5846 ext->pattern = 0;
5847 ext->pattern_end = 0;
5848 #ifdef USE_CALLOUT
5849 ext->tag_table = 0;
5850 ext->callout_num = 0;
5851 ext->callout_list_alloc = 0;
5852 ext->callout_list = 0;
5853 #endif
5854
5855 REG_EXTPL(reg) = (void* )ext;
5856 }
5857
5858 return REG_EXTP(reg);
5859 }
5860
5861 static void
5862 free_regex_ext(RegexExt* ext)
5863 {
5864 if (IS_NOT_NULL(ext)) {
5865 if (IS_NOT_NULL(ext->pattern))
5866 xfree((void* )ext->pattern);
5867
5868 #ifdef USE_CALLOUT
5869 if (IS_NOT_NULL(ext->tag_table))
5870 onig_callout_tag_table_free(ext->tag_table);
5871
5872 if (IS_NOT_NULL(ext->callout_list))
5873 onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
5874 #endif
5875
5876 xfree(ext);
5877 }
5878 }
5879
5880 extern int
5881 onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
5882 {
5883 RegexExt* ext;
5884 UChar* s;
5885
5886 ext = onig_get_regex_ext(reg);
5887 CHECK_NULL_RETURN_MEMERR(ext);
5888
5889 s = onigenc_strdup(reg->enc, pattern, pattern_end);
5890 CHECK_NULL_RETURN_MEMERR(s);
5891
5892 ext->pattern = s;
5893 ext->pattern_end = s + (pattern_end - pattern);
5894
5895 return ONIG_NORMAL;
5896 }
5897
5898
5899 extern void
5900 onig_free_body(regex_t* reg)
5901 {
5902 if (IS_NOT_NULL(reg)) {
5903 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5904 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5905 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5906 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5907 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5908 if (IS_NOT_NULL(REG_EXTP(reg))) {
5909 free_regex_ext(REG_EXTP(reg));
5910 REG_EXTPL(reg) = 0;
5911 }
5912
5913 onig_names_free(reg);
5914 }
5915 }
5916
5917 extern void
5918 onig_free(regex_t* reg)
5919 {
5920 if (IS_NOT_NULL(reg)) {
5921 onig_free_body(reg);
5922 xfree(reg);
5923 }
5924 }
5925
5926 #define REGEX_TRANSFER(to,from) do {\
5927 onig_free_body(to);\
5928 xmemcpy(to, from, sizeof(regex_t));\
5929 xfree(from);\
5930 } while (0)
5931
5932 extern void
5933 onig_transfer(regex_t* to, regex_t* from)
5934 {
5935 REGEX_TRANSFER(to, from);
5936 }
5937
5938
5939 #ifdef ONIG_DEBUG_PARSE
5940 static void print_tree P_((FILE* f, Node* node));
5941 #endif
5942
5943 extern int
5944 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5945 OnigErrorInfo* einfo)
5946 {
5947 #define COMPILE_INIT_SIZE 20
5948
5949 int r, init_size;
5950 Node* root;
5951 ScanEnv scan_env;
5952 #ifdef USE_CALL
5953 UnsetAddrList uslist;
5954 #endif
5955
5956 root = 0;
5957 if (IS_NOT_NULL(einfo)) {
5958 einfo->enc = reg->enc;
5959 einfo->par = (UChar* )NULL;
5960 }
5961
5962 #ifdef ONIG_DEBUG
5963 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5964 #endif
5965
5966 if (reg->alloc == 0) {
5967 init_size = (int )(pattern_end - pattern) * 2;
5968 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5969 r = BB_INIT(reg, init_size);
5970 if (r != 0) goto end;
5971 }
5972 else
5973 reg->used = 0;
5974
5975 reg->num_mem = 0;
5976 reg->num_repeat = 0;
5977 reg->num_null_check = 0;
5978 reg->repeat_range_alloc = 0;
5979 reg->repeat_range = (OnigRepeatRange* )NULL;
5980
5981 r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
5982 if (r != 0) goto err;
5983
5984 /* mixed use named group and no-named group */
5985 if (scan_env.num_named > 0 &&
5986 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5987 ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5988 if (scan_env.num_named != scan_env.num_mem)
5989 r = disable_noname_group_capture(&root, reg, &scan_env);
5990 else
5991 r = numbered_ref_check(root);
5992
5993 if (r != 0) goto err;
5994 }
5995
5996 r = check_backrefs(root, &scan_env);
5997 if (r != 0) goto err;
5998
5999 #ifdef USE_CALL
6000 if (scan_env.num_call > 0) {
6001 r = unset_addr_list_init(&uslist, scan_env.num_call);
6002 if (r != 0) goto err;
6003 scan_env.unset_addr_list = &uslist;
6004 r = setup_call(root, &scan_env, 0);
6005 if (r != 0) goto err_unset;
6006 r = setup_call2(root);
6007 if (r != 0) goto err_unset;
6008 r = recursive_call_check_trav(root, &scan_env, 0);
6009 if (r < 0) goto err_unset;
6010 r = infinite_recursive_call_check_trav(root, &scan_env);
6011 if (r != 0) goto err_unset;
6012
6013 setup_called_state(root, 0);
6014 }
6015
6016 reg->num_call = scan_env.num_call;
6017 #endif
6018
6019 r = setup_tree(root, reg, 0, &scan_env);
6020 if (r != 0) goto err_unset;
6021
6022 #ifdef ONIG_DEBUG_PARSE
6023 print_tree(stderr, root);
6024 #endif
6025
6026 reg->capture_history = scan_env.capture_history;
6027 reg->bt_mem_start = scan_env.bt_mem_start;
6028 reg->bt_mem_start |= reg->capture_history;
6029 if (IS_FIND_CONDITION(reg->options))
6030 MEM_STATUS_ON_ALL(reg->bt_mem_end);
6031 else {
6032 reg->bt_mem_end = scan_env.bt_mem_end;
6033 reg->bt_mem_end |= reg->capture_history;
6034 }
6035 reg->bt_mem_start |= reg->bt_mem_end;
6036
6037 clear_optimize_info(reg);
6038 #ifndef ONIG_DONT_OPTIMIZE
6039 r = set_optimize_info_from_tree(root, reg, &scan_env);
6040 if (r != 0) goto err_unset;
6041 #endif
6042
6043 if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
6044 xfree(scan_env.mem_env_dynamic);
6045 scan_env.mem_env_dynamic = (MemEnv* )NULL;
6046 }
6047
6048 r = compile_tree(root, reg, &scan_env);
6049 if (r == 0) {
6050 if (scan_env.keep_num > 0) {
6051 r = add_opcode(reg, OP_UPDATE_VAR);
6052 if (r != 0) goto err;
6053 r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);
6054 if (r != 0) goto err;
6055 r = add_mem_num(reg, 0 /* not used */);
6056 if (r != 0) goto err;
6057 }
6058
6059 r = add_opcode(reg, OP_END);
6060 #ifdef USE_CALL
6061 if (scan_env.num_call > 0) {
6062 r = fix_unset_addr_list(&uslist, reg);
6063 unset_addr_list_end(&uslist);
6064 if (r != 0) goto err;
6065 }
6066 #endif
6067
6068 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
6069 #ifdef USE_CALLOUT
6070 || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)
6071 #endif
6072 )
6073 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
6074 else {
6075 if (reg->bt_mem_start != 0)
6076 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
6077 else
6078 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
6079 }
6080 }
6081 #ifdef USE_CALL
6082 else if (scan_env.num_call > 0) {
6083 unset_addr_list_end(&uslist);
6084 }
6085 #endif
6086 onig_node_free(root);
6087
6088 #ifdef ONIG_DEBUG_COMPILE
6089 onig_print_names(stderr, reg);
6090 onig_print_compiled_byte_code_list(stderr, reg);
6091 #endif
6092
6093 end:
6094 return r;
6095
6096 err_unset:
6097 #ifdef USE_CALL
6098 if (scan_env.num_call > 0) {
6099 unset_addr_list_end(&uslist);
6100 }
6101 #endif
6102 err:
6103 if (IS_NOT_NULL(scan_env.error)) {
6104 if (IS_NOT_NULL(einfo)) {
6105 einfo->par = scan_env.error;
6106 einfo->par_end = scan_env.error_end;
6107 }
6108 }
6109
6110 onig_node_free(root);
6111 if (IS_NOT_NULL(scan_env.mem_env_dynamic))
6112 xfree(scan_env.mem_env_dynamic);
6113 return r;
6114 }
6115
6116
6117 static int onig_inited = 0;
6118
6119 extern int
6120 onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
6121 OnigEncoding enc, OnigSyntaxType* syntax)
6122 {
6123 int r;
6124
6125 xmemset(reg, 0, sizeof(*reg));
6126
6127 if (onig_inited == 0) {
6128 #if 0
6129 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
6130 #else
6131 r = onig_initialize(&enc, 1);
6132 if (r != 0)
6133 return ONIGERR_FAIL_TO_INITIALIZE;
6134
6135 onig_warning("You didn't call onig_initialize() explicitly");
6136 #endif
6137 }
6138
6139 if (IS_NULL(reg))
6140 return ONIGERR_INVALID_ARGUMENT;
6141
6142 if (ONIGENC_IS_UNDEF(enc))
6143 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
6144
6145 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
6146 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
6147 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
6148 }
6149
6150 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
6151 option |= syntax->options;
6152 option &= ~ONIG_OPTION_SINGLELINE;
6153 }
6154 else
6155 option |= syntax->options;
6156
6157 (reg)->enc = enc;
6158 (reg)->options = option;
6159 (reg)->syntax = syntax;
6160 (reg)->optimize = 0;
6161 (reg)->exact = (UChar* )NULL;
6162 (reg)->int_map = (int* )NULL;
6163 (reg)->int_map_backward = (int* )NULL;
6164 REG_EXTPL(reg) = NULL;
6165
6166 (reg)->p = (UChar* )NULL;
6167 (reg)->alloc = 0;
6168 (reg)->used = 0;
6169 (reg)->name_table = (void* )NULL;
6170
6171 (reg)->case_fold_flag = case_fold_flag;
6172 return 0;
6173 }
6174
6175 extern int
6176 onig_new_without_alloc(regex_t* reg,
6177 const UChar* pattern, const UChar* pattern_end,
6178 OnigOptionType option, OnigEncoding enc,
6179 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
6180 {
6181 int r;
6182
6183 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6184 if (r != 0) return r;
6185
6186 r = onig_compile(reg, pattern, pattern_end, einfo);
6187 return r;
6188 }
6189
6190 extern int
6191 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6192 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
6193 OnigErrorInfo* einfo)
6194 {
6195 int r;
6196
6197 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6198 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6199
6200 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6201 if (r != 0) goto err;
6202
6203 r = onig_compile(*reg, pattern, pattern_end, einfo);
6204 if (r != 0) {
6205 err:
6206 onig_free(*reg);
6207 *reg = NULL;
6208 }
6209 return r;
6210 }
6211
6212 extern int
6213 onig_initialize(OnigEncoding encodings[], int n)
6214 {
6215 int i;
6216 int r;
6217
6218 if (onig_inited != 0)
6219 return 0;
6220
6221 onigenc_init();
6222
6223 onig_inited = 1;
6224
6225 for (i = 0; i < n; i++) {
6226 OnigEncoding enc = encodings[i];
6227 r = onig_initialize_encoding(enc);
6228 if (r != 0)
6229 return r;
6230 }
6231
6232 return ONIG_NORMAL;
6233 }
6234
6235 typedef struct EndCallListItem {
6236 struct EndCallListItem* next;
6237 void (*func)(void);
6238 } EndCallListItemType;
6239
6240 static EndCallListItemType* EndCallTop;
6241
6242 extern void onig_add_end_call(void (*func)(void))
6243 {
6244 EndCallListItemType* item;
6245
6246 item = (EndCallListItemType* )xmalloc(sizeof(*item));
6247 if (item == 0) return ;
6248
6249 item->next = EndCallTop;
6250 item->func = func;
6251
6252 EndCallTop = item;
6253 }
6254
6255 static void
6256 exec_end_call_list(void)
6257 {
6258 EndCallListItemType* prev;
6259 void (*func)(void);
6260
6261 while (EndCallTop != 0) {
6262 func = EndCallTop->func;
6263 (*func)();
6264
6265 prev = EndCallTop;
6266 EndCallTop = EndCallTop->next;
6267 xfree(prev);
6268 }
6269 }
6270
6271 extern int
6272 onig_end(void)
6273 {
6274 exec_end_call_list();
6275
6276 #ifdef USE_CALLOUT
6277 onig_global_callout_names_free();
6278 #endif
6279
6280 onigenc_end();
6281
6282 onig_inited = 0;
6283
6284 return 0;
6285 }
6286
6287 extern int
6288 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6289 {
6290 OnigCodePoint n, *data;
6291 OnigCodePoint low, high, x;
6292
6293 GET_CODE_POINT(n, p);
6294 data = (OnigCodePoint* )p;
6295 data++;
6296
6297 for (low = 0, high = n; low < high; ) {
6298 x = (low + high) >> 1;
6299 if (code > data[x * 2 + 1])
6300 low = x + 1;
6301 else
6302 high = x;
6303 }
6304
6305 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6306 }
6307
6308 extern int
6309 onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
6310 {
6311 int found;
6312 CClassNode* cc = (CClassNode* )cc_arg;
6313
6314 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6315 if (IS_NULL(cc->mbuf)) {
6316 found = 0;
6317 }
6318 else {
6319 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6320 }
6321 }
6322 else {
6323 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6324 }
6325
6326 if (IS_NCCLASS_NOT(cc))
6327 return !found;
6328 else
6329 return found;
6330 }
6331
6332 extern int
6333 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6334 {
6335 int len;
6336
6337 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6338 len = 2;
6339 }
6340 else {
6341 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6342 }
6343 return onig_is_code_in_cc_len(len, code, cc);
6344 }
6345
6346
6347 #ifdef ONIG_DEBUG_PARSE
6348
6349 static void
6350 p_string(FILE* f, int len, UChar* s)
6351 {
6352 fputs(":", f);
6353 while (len-- > 0) { fputc(*s++, f); }
6354 }
6355
6356 static void
6357 Indent(FILE* f, int indent)
6358 {
6359 int i;
6360 for (i = 0; i < indent; i++) putc(' ', f);
6361 }
6362
6363 static void
6364 print_indent_tree(FILE* f, Node* node, int indent)
6365 {
6366 int i;
6367 NodeType type;
6368 UChar* p;
6369 int add = 3;
6370
6371 Indent(f, indent);
6372 if (IS_NULL(node)) {
6373 fprintf(f, "ERROR: null node!!!\n");
6374 exit (0);
6375 }
6376
6377 type = NODE_TYPE(node);
6378 switch (type) {
6379 case NODE_LIST:
6380 case NODE_ALT:
6381 if (type == NODE_LIST)
6382 fprintf(f, "<list:%p>\n", node);
6383 else
6384 fprintf(f, "<alt:%p>\n", node);
6385
6386 print_indent_tree(f, NODE_CAR(node), indent + add);
6387 while (IS_NOT_NULL(node = NODE_CDR(node))) {
6388 if (NODE_TYPE(node) != type) {
6389 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));
6390 exit(0);
6391 }
6392 print_indent_tree(f, NODE_CAR(node), indent + add);
6393 }
6394 break;
6395
6396 case NODE_STRING:
6397 fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);
6398 for (p = STR_(node)->s; p < STR_(node)->end; p++) {
6399 if (*p >= 0x20 && *p < 0x7f)
6400 fputc(*p, f);
6401 else {
6402 fprintf(f, " 0x%02x", *p);
6403 }
6404 }
6405 break;
6406
6407 case NODE_CCLASS:
6408 fprintf(f, "<cclass:%p>", node);
6409 if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
6410 if (CCLASS_(node)->mbuf) {
6411 BBuf* bbuf = CCLASS_(node)->mbuf;
6412 for (i = 0; i < bbuf->used; i++) {
6413 if (i > 0) fprintf(f, ",");
6414 fprintf(f, "%0x", bbuf->p[i]);
6415 }
6416 }
6417 break;
6418
6419 case NODE_CTYPE:
6420 fprintf(f, "<ctype:%p> ", node);
6421 switch (CTYPE_(node)->ctype) {
6422 case CTYPE_ANYCHAR:
6423 fprintf(f, "<anychar:%p>", node);
6424 break;
6425
6426 case ONIGENC_CTYPE_WORD:
6427 if (CTYPE_(node)->not != 0)
6428 fputs("not word", f);
6429 else
6430 fputs("word", f);
6431
6432 if (CTYPE_(node)->ascii_mode != 0)
6433 fputs(" (ascii)", f);
6434
6435 break;
6436
6437 default:
6438 fprintf(f, "ERROR: undefined ctype.\n");
6439 exit(0);
6440 }
6441 break;
6442
6443 case NODE_ANCHOR:
6444 fprintf(f, "<anchor:%p> ", node);
6445 switch (ANCHOR_(node)->type) {
6446 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6447 case ANCHOR_END_BUF: fputs("end buf", f); break;
6448 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6449 case ANCHOR_END_LINE: fputs("end line", f); break;
6450 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6451 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6452
6453 case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;
6454 case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
6455 #ifdef USE_WORD_BEGIN_END
6456 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6457 case ANCHOR_WORD_END: fputs("word end", f); break;
6458 #endif
6459 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
6460 fputs("extended-grapheme-cluster boundary", f); break;
6461 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
6462 fputs("no-extended-grapheme-cluster boundary", f); break;
6463 case ANCHOR_PREC_READ:
6464 fprintf(f, "prec read\n");
6465 print_indent_tree(f, NODE_BODY(node), indent + add);
6466 break;
6467 case ANCHOR_PREC_READ_NOT:
6468 fprintf(f, "prec read not\n");
6469 print_indent_tree(f, NODE_BODY(node), indent + add);
6470 break;
6471 case ANCHOR_LOOK_BEHIND:
6472 fprintf(f, "look behind\n");
6473 print_indent_tree(f, NODE_BODY(node), indent + add);
6474 break;
6475 case ANCHOR_LOOK_BEHIND_NOT:
6476 fprintf(f, "look behind not\n");
6477 print_indent_tree(f, NODE_BODY(node), indent + add);
6478 break;
6479
6480 default:
6481 fprintf(f, "ERROR: undefined anchor type.\n");
6482 break;
6483 }
6484 break;
6485
6486 case NODE_BACKREF:
6487 {
6488 int* p;
6489 BackRefNode* br = BACKREF_(node);
6490 p = BACKREFS_P(br);
6491 fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);
6492 for (i = 0; i < br->back_num; i++) {
6493 if (i > 0) fputs(", ", f);
6494 fprintf(f, "%d", p[i]);
6495 }
6496 }
6497 break;
6498
6499 #ifdef USE_CALL
6500 case NODE_CALL:
6501 {
6502 CallNode* cn = CALL_(node);
6503 fprintf(f, "<call:%p>", node);
6504 p_string(f, cn->name_end - cn->name, cn->name);
6505 }
6506 break;
6507 #endif
6508
6509 case NODE_QUANT:
6510 fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,
6511 QUANT_(node)->lower, QUANT_(node)->upper,
6512 (QUANT_(node)->greedy ? "" : "?"));
6513 print_indent_tree(f, NODE_BODY(node), indent + add);
6514 break;
6515
6516 case NODE_ENCLOSURE:
6517 fprintf(f, "<enclosure:%p> ", node);
6518 switch (ENCLOSURE_(node)->type) {
6519 case ENCLOSURE_OPTION:
6520 fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);
6521 break;
6522 case ENCLOSURE_MEMORY:
6523 fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);
6524 break;
6525 case ENCLOSURE_STOP_BACKTRACK:
6526 fprintf(f, "stop-bt");
6527 break;
6528
6529 default:
6530 break;
6531 }
6532 fprintf(f, "\n");
6533 print_indent_tree(f, NODE_BODY(node), indent + add);
6534 break;
6535
6536 case NODE_GIMMICK:
6537 fprintf(f, "<gimmick:%p> ", node);
6538 switch (GIMMICK_(node)->type) {
6539 case GIMMICK_FAIL:
6540 fprintf(f, "fail");
6541 break;
6542 case GIMMICK_KEEP:
6543 fprintf(f, "keep:%d", GIMMICK_(node)->id);
6544 break;
6545 case GIMMICK_SAVE:
6546 fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6547 break;
6548 case GIMMICK_UPDATE_VAR:
6549 fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
6550 break;
6551 #ifdef USE_CALLOUT
6552 case GIMMICK_CALLOUT:
6553 switch (GIMMICK_(node)->detail_type) {
6554 case ONIG_CALLOUT_OF_CONTENTS:
6555 fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
6556 break;
6557 case ONIG_CALLOUT_OF_NAME:
6558 fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
6559 break;
6560 }
6561 #endif
6562 }
6563 break;
6564
6565 default:
6566 fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
6567 break;
6568 }
6569
6570 if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
6571 type != NODE_ENCLOSURE)
6572 fprintf(f, "\n");
6573 fflush(f);
6574 }
6575
6576 static void
6577 print_tree(FILE* f, Node* node)
6578 {
6579 print_indent_tree(f, node, 0);
6580 }
6581 #endif