]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg: Delete useless case code
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regcomp.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
6 * All rights reserved.\r
7 *\r
0af8e57c 8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>\r
14b0e578
CS
9 *\r
10 * Redistribution and use in source and binary forms, with or without\r
11 * modification, are permitted provided that the following conditions\r
12 * are met:\r
13 * 1. Redistributions of source code must retain the above copyright\r
14 * notice, this list of conditions and the following disclaimer.\r
15 * 2. Redistributions in binary form must reproduce the above copyright\r
16 * notice, this list of conditions and the following disclaimer in the\r
17 * documentation and/or other materials provided with the distribution.\r
18 *\r
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
29 * SUCH DAMAGE.\r
30 */\r
31\r
32#include "regparse.h"\r
33\r
34OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;\r
35\r
36extern OnigCaseFoldType\r
37onig_get_default_case_fold_flag(void)\r
38{\r
39 return OnigDefaultCaseFoldFlag;\r
40}\r
41\r
42extern int\r
43onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)\r
44{\r
45 OnigDefaultCaseFoldFlag = case_fold_flag;\r
46 return 0;\r
47}\r
48\r
49\r
50#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
51static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];\r
52#endif\r
53\r
54static UChar*\r
55str_dup(UChar* s, UChar* end)\r
56{\r
57 int len = (int)(end - s);\r
58\r
59 if (len > 0) {\r
60 UChar* r = (UChar* )xmalloc(len + 1);\r
61 CHECK_NULL_RETURN(r);\r
62 xmemcpy(r, s, len);\r
63 r[len] = (UChar )0;\r
64 return r;\r
65 }\r
66 else return NULL;\r
67}\r
68\r
69static void\r
70swap_node(Node* a, Node* b)\r
71{\r
72 Node c;\r
b2e35426
SEHM
73 CopyMem (&c, a, sizeof (Node));\r
74 CopyMem (a, b, sizeof (Node));\r
75 CopyMem (b, &c, sizeof (Node));\r
14b0e578
CS
76\r
77 if (NTYPE(a) == NT_STR) {\r
78 StrNode* sn = NSTR(a);\r
79 if (sn->capa == 0) {\r
80 int len = (int)(sn->end - sn->s);\r
81 sn->s = sn->buf;\r
82 sn->end = sn->s + len;\r
83 }\r
84 }\r
85\r
86 if (NTYPE(b) == NT_STR) {\r
87 StrNode* sn = NSTR(b);\r
88 if (sn->capa == 0) {\r
89 int len = (int)(sn->end - sn->s);\r
90 sn->s = sn->buf;\r
91 sn->end = sn->s + len;\r
92 }\r
93 }\r
94}\r
95\r
96static OnigDistance\r
97distance_add(OnigDistance d1, OnigDistance d2)\r
98{\r
99 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)\r
100 return ONIG_INFINITE_DISTANCE;\r
101 else {\r
102 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;\r
103 else return ONIG_INFINITE_DISTANCE;\r
104 }\r
105}\r
106\r
107static OnigDistance\r
108distance_multiply(OnigDistance d, int m)\r
109{\r
110 if (m == 0) return 0;\r
111\r
112 if (d < ONIG_INFINITE_DISTANCE / m)\r
113 return d * m;\r
114 else\r
115 return ONIG_INFINITE_DISTANCE;\r
116}\r
117\r
118static int\r
119bitset_is_empty(BitSetRef bs)\r
120{\r
121 int i;\r
122 for (i = 0; i < (int )BITSET_SIZE; i++) {\r
123 if (bs[i] != 0) return 0;\r
124 }\r
125 return 1;\r
126}\r
127\r
128#ifdef ONIG_DEBUG\r
129static int\r
130bitset_on_num(BitSetRef bs)\r
131{\r
132 int i, n;\r
133\r
134 n = 0;\r
135 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
136 if (BITSET_AT(bs, i)) n++;\r
137 }\r
138 return n;\r
139}\r
140#endif\r
141\r
142extern int\r
143onig_bbuf_init(BBuf* buf, int size)\r
144{\r
145 if (size <= 0) {\r
146 size = 0;\r
147 buf->p = NULL;\r
148 }\r
149 else {\r
150 buf->p = (UChar* )xmalloc(size);\r
151 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);\r
152 }\r
153\r
154 buf->alloc = size;\r
155 buf->used = 0;\r
156 return 0;\r
157}\r
158\r
159\r
160#ifdef USE_SUBEXP_CALL\r
161\r
162static int\r
163unset_addr_list_init(UnsetAddrList* uslist, int size)\r
164{\r
165 UnsetAddr* p;\r
166\r
167 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);\r
168 CHECK_NULL_RETURN_MEMERR(p);\r
169 uslist->num = 0;\r
170 uslist->alloc = size;\r
171 uslist->us = p;\r
172 return 0;\r
173}\r
174\r
175static void\r
176unset_addr_list_end(UnsetAddrList* uslist)\r
177{\r
178 if (IS_NOT_NULL(uslist->us))\r
179 xfree(uslist->us);\r
180}\r
181\r
182static int\r
183unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)\r
184{\r
185 UnsetAddr* p;\r
186 int size;\r
187\r
188 if (uslist->num >= uslist->alloc) {\r
189 size = uslist->alloc * 2;\r
190 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc);\r
191 CHECK_NULL_RETURN_MEMERR(p);\r
192 uslist->alloc = size;\r
193 uslist->us = p;\r
194 }\r
195\r
196 uslist->us[uslist->num].offset = offset;\r
197 uslist->us[uslist->num].target = node;\r
198 uslist->num++;\r
199 return 0;\r
200}\r
201#endif /* USE_SUBEXP_CALL */\r
202\r
203\r
204static int\r
205add_opcode(regex_t* reg, int opcode)\r
206{\r
207 BBUF_ADD1(reg, ((unsigned char)opcode));\r
208 return 0;\r
209}\r
210\r
211#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
212static int\r
213add_state_check_num(regex_t* reg, int num)\r
214{\r
215 StateCheckNumType n = (StateCheckNumType )num;\r
216\r
217 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);\r
218 return 0;\r
219}\r
220#endif\r
221\r
222static int\r
223add_rel_addr(regex_t* reg, int addr)\r
224{\r
225 RelAddrType ra = (RelAddrType )addr;\r
226\r
227 BBUF_ADD(reg, &ra, SIZE_RELADDR);\r
228 return 0;\r
229}\r
230\r
231static int\r
232add_abs_addr(regex_t* reg, int addr)\r
233{\r
234 AbsAddrType ra = (AbsAddrType )addr;\r
235\r
236 BBUF_ADD(reg, &ra, SIZE_ABSADDR);\r
237 return 0;\r
238}\r
239\r
240static int\r
241add_length(regex_t* reg, int len)\r
242{\r
243 LengthType l = (LengthType )len;\r
244\r
245 BBUF_ADD(reg, &l, SIZE_LENGTH);\r
246 return 0;\r
247}\r
248\r
249static int\r
250add_mem_num(regex_t* reg, int num)\r
251{\r
252 MemNumType n = (MemNumType )num;\r
253\r
254 BBUF_ADD(reg, &n, SIZE_MEMNUM);\r
255 return 0;\r
256}\r
257\r
258static int\r
259add_pointer(regex_t* reg, void* addr)\r
260{\r
261 PointerType ptr = (PointerType )addr;\r
262\r
263 BBUF_ADD(reg, &ptr, SIZE_POINTER);\r
264 return 0;\r
265}\r
266\r
267static int\r
268add_option(regex_t* reg, OnigOptionType option)\r
269{\r
270 BBUF_ADD(reg, &option, SIZE_OPTION);\r
271 return 0;\r
272}\r
273\r
274static int\r
275add_opcode_rel_addr(regex_t* reg, int opcode, int addr)\r
276{\r
277 int r;\r
278\r
279 r = add_opcode(reg, opcode);\r
280 if (r) return r;\r
281 r = add_rel_addr(reg, addr);\r
282 return r;\r
283}\r
284\r
285static int\r
286add_bytes(regex_t* reg, UChar* bytes, int len)\r
287{\r
288 BBUF_ADD(reg, bytes, len);\r
289 return 0;\r
290}\r
291\r
292static int\r
293add_bitset(regex_t* reg, BitSetRef bs)\r
294{\r
295 BBUF_ADD(reg, bs, SIZE_BITSET);\r
296 return 0;\r
297}\r
298\r
299static int\r
300add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)\r
301{\r
302 int r;\r
303\r
304 r = add_opcode(reg, opcode);\r
305 if (r) return r;\r
306 r = add_option(reg, option);\r
307 return r;\r
308}\r
309\r
310static int compile_length_tree(Node* node, regex_t* reg);\r
311static int compile_tree(Node* node, regex_t* reg);\r
312\r
313\r
314#define IS_NEED_STR_LEN_OP_EXACT(op) \\r
315 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\\r
316 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)\r
317\r
318static int\r
319select_str_opcode(int mb_len, int str_len, int ignore_case)\r
320{\r
321 int op;\r
322\r
323 if (ignore_case) {\r
324 switch (str_len) {\r
325 case 1: op = OP_EXACT1_IC; break;\r
326 default: op = OP_EXACTN_IC; break;\r
327 }\r
328 }\r
329 else {\r
330 switch (mb_len) {\r
331 case 1:\r
332 switch (str_len) {\r
333 case 1: op = OP_EXACT1; break;\r
334 case 2: op = OP_EXACT2; break;\r
335 case 3: op = OP_EXACT3; break;\r
336 case 4: op = OP_EXACT4; break;\r
337 case 5: op = OP_EXACT5; break;\r
338 default: op = OP_EXACTN; break;\r
339 }\r
340 break;\r
341\r
342 case 2:\r
343 switch (str_len) {\r
344 case 1: op = OP_EXACTMB2N1; break;\r
345 case 2: op = OP_EXACTMB2N2; break;\r
346 case 3: op = OP_EXACTMB2N3; break;\r
347 default: op = OP_EXACTMB2N; break;\r
348 }\r
349 break;\r
350\r
351 case 3:\r
352 op = OP_EXACTMB3N;\r
353 break;\r
354\r
355 default:\r
356 op = OP_EXACTMBN;\r
357 break;\r
358 }\r
359 }\r
360 return op;\r
361}\r
362\r
363static int\r
364compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)\r
365{\r
366 int r;\r
367 int saved_num_null_check = reg->num_null_check;\r
368\r
369 if (empty_info != 0) {\r
370 r = add_opcode(reg, OP_NULL_CHECK_START);\r
371 if (r) return r;\r
372 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */\r
373 if (r) return r;\r
374 reg->num_null_check++;\r
375 }\r
376\r
377 r = compile_tree(node, reg);\r
378 if (r) return r;\r
379\r
380 if (empty_info != 0) {\r
381 if (empty_info == NQ_TARGET_IS_EMPTY)\r
382 r = add_opcode(reg, OP_NULL_CHECK_END);\r
383 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)\r
384 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);\r
385 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)\r
386 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);\r
387\r
388 if (r) return r;\r
389 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */\r
390 }\r
391 return r;\r
392}\r
393\r
394#ifdef USE_SUBEXP_CALL\r
395static int\r
396compile_call(CallNode* node, regex_t* reg)\r
397{\r
398 int r;\r
399\r
400 r = add_opcode(reg, OP_CALL);\r
401 if (r) return r;\r
402 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),\r
403 node->target);\r
404 if (r) return r;\r
405 r = add_abs_addr(reg, 0 /*dummy addr.*/);\r
406 return r;\r
407}\r
408#endif\r
409\r
410static int\r
411compile_tree_n_times(Node* node, int n, regex_t* reg)\r
412{\r
413 int i, r;\r
414\r
415 for (i = 0; i < n; i++) {\r
416 r = compile_tree(node, reg);\r
417 if (r) return r;\r
418 }\r
419 return 0;\r
420}\r
421\r
422static int\r
423add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,\r
424 regex_t* reg ARG_UNUSED, int ignore_case)\r
425{\r
426 int len;\r
427 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
428\r
429 len = SIZE_OPCODE;\r
430\r
431 if (op == OP_EXACTMBN) len += SIZE_LENGTH;\r
432 if (IS_NEED_STR_LEN_OP_EXACT(op))\r
433 len += SIZE_LENGTH;\r
434\r
435 len += mb_len * str_len;\r
436 return len;\r
437}\r
438\r
439static int\r
440add_compile_string(UChar* s, int mb_len, int str_len,\r
441 regex_t* reg, int ignore_case)\r
442{\r
443 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
444 add_opcode(reg, op);\r
445\r
446 if (op == OP_EXACTMBN)\r
447 add_length(reg, mb_len);\r
448\r
449 if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
450 if (op == OP_EXACTN_IC)\r
451 add_length(reg, mb_len * str_len);\r
452 else\r
453 add_length(reg, str_len);\r
454 }\r
455\r
456 add_bytes(reg, s, mb_len * str_len);\r
457 return 0;\r
458}\r
459\r
460\r
461static int\r
462compile_length_string_node(Node* node, regex_t* reg)\r
463{\r
464 int rlen, r, len, prev_len, slen, ambig;\r
465 OnigEncoding enc = reg->enc;\r
466 UChar *p, *prev;\r
467 StrNode* sn;\r
468\r
469 sn = NSTR(node);\r
470 if (sn->end <= sn->s)\r
471 return 0;\r
472\r
473 ambig = NSTRING_IS_AMBIG(node);\r
474\r
475 p = prev = sn->s;\r
476 prev_len = enclen(enc, p);\r
477 p += prev_len;\r
478 slen = 1;\r
479 rlen = 0;\r
480\r
481 for (; p < sn->end; ) {\r
482 len = enclen(enc, p);\r
483 if (len == prev_len) {\r
484 slen++;\r
485 }\r
486 else {\r
487 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
488 rlen += r;\r
489 prev = p;\r
490 slen = 1;\r
491 prev_len = len;\r
492 }\r
493 p += len;\r
494 }\r
495 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
496 rlen += r;\r
497 return rlen;\r
498}\r
499\r
500static int\r
501compile_length_string_raw_node(StrNode* sn, regex_t* reg)\r
502{\r
503 if (sn->end <= sn->s)\r
504 return 0;\r
505\r
506 return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);\r
507}\r
508\r
509static int\r
510compile_string_node(Node* node, regex_t* reg)\r
511{\r
512 int r, len, prev_len, slen, ambig;\r
513 OnigEncoding enc = reg->enc;\r
514 UChar *p, *prev, *end;\r
515 StrNode* sn;\r
516\r
517 sn = NSTR(node);\r
518 if (sn->end <= sn->s)\r
519 return 0;\r
520\r
521 end = sn->end;\r
522 ambig = NSTRING_IS_AMBIG(node);\r
523\r
524 p = prev = sn->s;\r
525 prev_len = enclen(enc, p);\r
526 p += prev_len;\r
527 slen = 1;\r
528\r
529 for (; p < end; ) {\r
530 len = enclen(enc, p);\r
531 if (len == prev_len) {\r
532 slen++;\r
533 }\r
534 else {\r
535 r = add_compile_string(prev, prev_len, slen, reg, ambig);\r
536 if (r) return r;\r
537\r
538 prev = p;\r
539 slen = 1;\r
540 prev_len = len;\r
541 }\r
542\r
543 p += len;\r
544 }\r
545 return add_compile_string(prev, prev_len, slen, reg, ambig);\r
546}\r
547\r
548static int\r
549compile_string_raw_node(StrNode* sn, regex_t* reg)\r
550{\r
551 if (sn->end <= sn->s)\r
552 return 0;\r
553\r
554 return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);\r
555}\r
556\r
557static int\r
558add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
559{\r
560#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
561 add_length(reg, mbuf->used);\r
562 return add_bytes(reg, mbuf->p, mbuf->used);\r
563#else\r
564 int r, pad_size;\r
565 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;\r
566\r
567 GET_ALIGNMENT_PAD_SIZE(p, pad_size);\r
568 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));\r
569 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
570\r
571 r = add_bytes(reg, mbuf->p, mbuf->used);\r
572\r
573 /* padding for return value from compile_length_cclass_node() to be fix. */\r
574 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;\r
575 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
576 return r;\r
577#endif\r
578}\r
579\r
580static int\r
581compile_length_cclass_node(CClassNode* cc, regex_t* reg)\r
582{\r
583 int len;\r
584\r
585 if (IS_NCCLASS_SHARE(cc)) {\r
586 len = SIZE_OPCODE + SIZE_POINTER;\r
587 return len;\r
588 }\r
589\r
590 if (IS_NULL(cc->mbuf)) {\r
591 len = SIZE_OPCODE + SIZE_BITSET;\r
592 }\r
593 else {\r
594 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
595 len = SIZE_OPCODE;\r
596 }\r
597 else {\r
598 len = SIZE_OPCODE + SIZE_BITSET;\r
599 }\r
600#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
601 len += SIZE_LENGTH + cc->mbuf->used;\r
602#else\r
603 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);\r
604#endif\r
605 }\r
606\r
607 return len;\r
608}\r
609\r
610static int\r
611compile_cclass_node(CClassNode* cc, regex_t* reg)\r
612{\r
613 int r;\r
614\r
615 if (IS_NCCLASS_SHARE(cc)) {\r
616 add_opcode(reg, OP_CCLASS_NODE);\r
617 r = add_pointer(reg, cc);\r
618 return r;\r
619 }\r
620\r
621 if (IS_NULL(cc->mbuf)) {\r
622 if (IS_NCCLASS_NOT(cc))\r
623 add_opcode(reg, OP_CCLASS_NOT);\r
624 else\r
625 add_opcode(reg, OP_CCLASS);\r
626\r
627 r = add_bitset(reg, cc->bs);\r
628 }\r
629 else {\r
630 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
631 if (IS_NCCLASS_NOT(cc))\r
632 add_opcode(reg, OP_CCLASS_MB_NOT);\r
633 else\r
634 add_opcode(reg, OP_CCLASS_MB);\r
635\r
636 r = add_multi_byte_cclass(cc->mbuf, reg);\r
637 }\r
638 else {\r
639 if (IS_NCCLASS_NOT(cc))\r
640 add_opcode(reg, OP_CCLASS_MIX_NOT);\r
641 else\r
642 add_opcode(reg, OP_CCLASS_MIX);\r
643\r
644 r = add_bitset(reg, cc->bs);\r
645 if (r) return r;\r
646 r = add_multi_byte_cclass(cc->mbuf, reg);\r
647 }\r
648 }\r
649\r
650 return r;\r
651}\r
652\r
653static int\r
654entry_repeat_range(regex_t* reg, int id, int lower, int upper)\r
655{\r
656#define REPEAT_RANGE_ALLOC 4\r
657\r
658 OnigRepeatRange* p;\r
659\r
660 if (reg->repeat_range_alloc == 0) {\r
661 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);\r
662 CHECK_NULL_RETURN_MEMERR(p);\r
663 reg->repeat_range = p;\r
664 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;\r
665 }\r
666 else if (reg->repeat_range_alloc <= id) {\r
667 int n;\r
668 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;\r
669 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,\r
670 sizeof(OnigRepeatRange) * n,\r
671 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);\r
672 CHECK_NULL_RETURN_MEMERR(p);\r
673 reg->repeat_range = p;\r
674 reg->repeat_range_alloc = n;\r
675 }\r
676 else {\r
677 p = reg->repeat_range;\r
678 }\r
679\r
680 p[id].lower = lower;\r
681 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);\r
682 return 0;\r
683}\r
684\r
685static int\r
686compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,\r
687 regex_t* reg)\r
688{\r
689 int r;\r
690 int num_repeat = reg->num_repeat;\r
691\r
692 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
693 if (r) return r;\r
694 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
695 reg->num_repeat++;\r
696 if (r) return r;\r
697 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);\r
698 if (r) return r;\r
699\r
700 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);\r
701 if (r) return r;\r
702\r
703 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
704 if (r) return r;\r
705\r
706 if (\r
707#ifdef USE_SUBEXP_CALL\r
708 reg->num_call > 0 ||\r
709#endif\r
710 IS_QUANTIFIER_IN_REPEAT(qn)) {\r
711 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
712 }\r
713 else {\r
714 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
715 }\r
716 if (r) return r;\r
717 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
718 return r;\r
719}\r
720\r
721static int\r
722is_anychar_star_quantifier(QtfrNode* qn)\r
723{\r
724 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&\r
725 NTYPE(qn->target) == NT_CANY)\r
726 return 1;\r
727 else\r
728 return 0;\r
729}\r
730\r
731#define QUANTIFIER_EXPAND_LIMIT_SIZE 50\r
732#define CKN_ON (ckn > 0)\r
733\r
734#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
735\r
736static int\r
737compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)\r
738{\r
739 int len, mod_tlen, cklen;\r
740 int ckn;\r
741 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
742 int empty_info = qn->target_empty_info;\r
743 int tlen = compile_length_tree(qn->target, reg);\r
744\r
745 if (tlen < 0) return tlen;\r
746\r
747 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);\r
748\r
749 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);\r
750\r
751 /* anychar repeat */\r
752 if (NTYPE(qn->target) == NT_CANY) {\r
753 if (qn->greedy && infinite) {\r
754 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)\r
755 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;\r
756 else\r
757 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;\r
758 }\r
759 }\r
760\r
761 if (empty_info != 0)\r
762 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
763 else\r
764 mod_tlen = tlen;\r
765\r
766 if (infinite && qn->lower <= 1) {\r
767 if (qn->greedy) {\r
768 if (qn->lower == 1)\r
769 len = SIZE_OP_JUMP;\r
770 else\r
771 len = 0;\r
772\r
773 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;\r
774 }\r
775 else {\r
776 if (qn->lower == 0)\r
777 len = SIZE_OP_JUMP;\r
778 else\r
779 len = 0;\r
780\r
781 len += mod_tlen + SIZE_OP_PUSH + cklen;\r
782 }\r
783 }\r
784 else if (qn->upper == 0) {\r
785 if (qn->is_refered != 0) /* /(?<n>..){0}/ */\r
786 len = SIZE_OP_JUMP + tlen;\r
787 else\r
788 len = 0;\r
789 }\r
790 else if (qn->upper == 1 && qn->greedy) {\r
791 if (qn->lower == 0) {\r
792 if (CKN_ON) {\r
793 len = SIZE_OP_STATE_CHECK_PUSH + tlen;\r
794 }\r
795 else {\r
796 len = SIZE_OP_PUSH + tlen;\r
797 }\r
798 }\r
799 else {\r
800 len = tlen;\r
801 }\r
802 }\r
803 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
804 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;\r
805 }\r
806 else {\r
807 len = SIZE_OP_REPEAT_INC\r
808 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;\r
809 if (CKN_ON)\r
810 len += SIZE_OP_STATE_CHECK;\r
811 }\r
812\r
813 return len;\r
814}\r
815\r
816static int\r
817compile_quantifier_node(QtfrNode* qn, regex_t* reg)\r
818{\r
819 int r, mod_tlen;\r
820 int ckn;\r
821 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
822 int empty_info = qn->target_empty_info;\r
823 int tlen = compile_length_tree(qn->target, reg);\r
824\r
825 if (tlen < 0) return tlen;\r
826\r
827 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);\r
828\r
829 if (is_anychar_star_quantifier(qn)) {\r
830 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
831 if (r) return r;\r
832 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {\r
833 if (IS_MULTILINE(reg->options))\r
834 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r
835 else\r
836 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);\r
837 if (r) return r;\r
838 if (CKN_ON) {\r
839 r = add_state_check_num(reg, ckn);\r
840 if (r) return r;\r
841 }\r
842\r
843 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);\r
844 }\r
845 else {\r
846 if (IS_MULTILINE(reg->options)) {\r
847 r = add_opcode(reg, (CKN_ON ?\r
848 OP_STATE_CHECK_ANYCHAR_ML_STAR\r
849 : OP_ANYCHAR_ML_STAR));\r
850 }\r
851 else {\r
852 r = add_opcode(reg, (CKN_ON ?\r
853 OP_STATE_CHECK_ANYCHAR_STAR\r
854 : OP_ANYCHAR_STAR));\r
855 }\r
856 if (r) return r;\r
857 if (CKN_ON)\r
858 r = add_state_check_num(reg, ckn);\r
859\r
860 return r;\r
861 }\r
862 }\r
863\r
864 if (empty_info != 0)\r
865 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
866 else\r
867 mod_tlen = tlen;\r
868\r
869 if (infinite && qn->lower <= 1) {\r
870 if (qn->greedy) {\r
871 if (qn->lower == 1) {\r
872 r = add_opcode_rel_addr(reg, OP_JUMP,\r
873 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));\r
874 if (r) return r;\r
875 }\r
876\r
877 if (CKN_ON) {\r
878 r = add_opcode(reg, OP_STATE_CHECK_PUSH);\r
879 if (r) return r;\r
880 r = add_state_check_num(reg, ckn);\r
881 if (r) return r;\r
882 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);\r
883 }\r
884 else {\r
885 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);\r
886 }\r
887 if (r) return r;\r
888 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
889 if (r) return r;\r
890 r = add_opcode_rel_addr(reg, OP_JUMP,\r
891 -(mod_tlen + (int )SIZE_OP_JUMP\r
892 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));\r
893 }\r
894 else {\r
895 if (qn->lower == 0) {\r
896 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);\r
897 if (r) return r;\r
898 }\r
899 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
900 if (r) return r;\r
901 if (CKN_ON) {\r
902 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);\r
903 if (r) return r;\r
904 r = add_state_check_num(reg, ckn);\r
905 if (r) return r;\r
906 r = add_rel_addr(reg,\r
907 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));\r
908 }\r
909 else\r
910 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));\r
911 }\r
912 }\r
913 else if (qn->upper == 0) {\r
914 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
915 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
916 if (r) return r;\r
917 r = compile_tree(qn->target, reg);\r
918 }\r
919 else\r
920 r = 0;\r
921 }\r
922 else if (qn->upper == 1 && qn->greedy) {\r
923 if (qn->lower == 0) {\r
924 if (CKN_ON) {\r
925 r = add_opcode(reg, OP_STATE_CHECK_PUSH);\r
926 if (r) return r;\r
927 r = add_state_check_num(reg, ckn);\r
928 if (r) return r;\r
929 r = add_rel_addr(reg, tlen);\r
930 }\r
931 else {\r
932 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);\r
933 }\r
934 if (r) return r;\r
935 }\r
936\r
937 r = compile_tree(qn->target, reg);\r
938 }\r
939 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
940 if (CKN_ON) {\r
941 r = add_opcode(reg, OP_STATE_CHECK_PUSH);\r
942 if (r) return r;\r
943 r = add_state_check_num(reg, ckn);\r
944 if (r) return r;\r
945 r = add_rel_addr(reg, SIZE_OP_JUMP);\r
946 }\r
947 else {\r
948 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);\r
949 }\r
950\r
951 if (r) return r;\r
952 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
953 if (r) return r;\r
954 r = compile_tree(qn->target, reg);\r
955 }\r
956 else {\r
957 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);\r
958 if (CKN_ON) {\r
959 if (r) return r;\r
960 r = add_opcode(reg, OP_STATE_CHECK);\r
961 if (r) return r;\r
962 r = add_state_check_num(reg, ckn);\r
963 }\r
964 }\r
965 return r;\r
966}\r
967\r
968#else /* USE_COMBINATION_EXPLOSION_CHECK */\r
969\r
970static int\r
971compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)\r
972{\r
973 int len, mod_tlen;\r
974 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
975 int empty_info = qn->target_empty_info;\r
976 int tlen = compile_length_tree(qn->target, reg);\r
977\r
978 if (tlen < 0) return tlen;\r
979\r
980 /* anychar repeat */\r
981 if (NTYPE(qn->target) == NT_CANY) {\r
982 if (qn->greedy && infinite) {\r
983 if (IS_NOT_NULL(qn->next_head_exact))\r
984 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;\r
985 else\r
986 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;\r
987 }\r
988 }\r
989\r
990 if (empty_info != 0)\r
991 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
992 else\r
993 mod_tlen = tlen;\r
994\r
995 if (infinite &&\r
996 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
997 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
998 len = SIZE_OP_JUMP;\r
999 }\r
1000 else {\r
1001 len = tlen * qn->lower;\r
1002 }\r
1003\r
1004 if (qn->greedy) {\r
1005 if (IS_NOT_NULL(qn->head_exact))\r
1006 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;\r
1007 else if (IS_NOT_NULL(qn->next_head_exact))\r
1008 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;\r
1009 else\r
1010 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;\r
1011 }\r
1012 else\r
1013 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;\r
1014 }\r
1015 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
1016 len = SIZE_OP_JUMP + tlen;\r
1017 }\r
1018 else if (!infinite && qn->greedy &&\r
1019 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper\r
1020 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
1021 len = tlen * qn->lower;\r
1022 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);\r
1023 }\r
1024 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
1025 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;\r
1026 }\r
1027 else {\r
1028 len = SIZE_OP_REPEAT_INC\r
1029 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;\r
1030 }\r
1031\r
1032 return len;\r
1033}\r
1034\r
1035static int\r
1036compile_quantifier_node(QtfrNode* qn, regex_t* reg)\r
1037{\r
1038 int i, r, mod_tlen;\r
1039 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
1040 int empty_info = qn->target_empty_info;\r
1041 int tlen = compile_length_tree(qn->target, reg);\r
1042\r
1043 if (tlen < 0) return tlen;\r
1044\r
1045 if (is_anychar_star_quantifier(qn)) {\r
1046 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1047 if (r) return r;\r
1048 if (IS_NOT_NULL(qn->next_head_exact)) {\r
1049 if (IS_MULTILINE(reg->options))\r
1050 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r
1051 else\r
1052 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);\r
1053 if (r) return r;\r
1054 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);\r
1055 }\r
1056 else {\r
1057 if (IS_MULTILINE(reg->options))\r
1058 return add_opcode(reg, OP_ANYCHAR_ML_STAR);\r
1059 else\r
1060 return add_opcode(reg, OP_ANYCHAR_STAR);\r
1061 }\r
1062 }\r
1063\r
1064 if (empty_info != 0)\r
1065 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
1066 else\r
1067 mod_tlen = tlen;\r
1068\r
1069 if (infinite &&\r
1070 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
1071 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
1072 if (qn->greedy) {\r
1073 if (IS_NOT_NULL(qn->head_exact))\r
1074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
1075 else if (IS_NOT_NULL(qn->next_head_exact))\r
1076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);\r
1077 else\r
1078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);\r
1079 }\r
1080 else {\r
1081 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);\r
1082 }\r
1083 if (r) return r;\r
1084 }\r
1085 else {\r
1086 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1087 if (r) return r;\r
1088 }\r
1089\r
1090 if (qn->greedy) {\r
1091 if (IS_NOT_NULL(qn->head_exact)) {\r
1092 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,\r
1093 mod_tlen + SIZE_OP_JUMP);\r
1094 if (r) return r;\r
1095 add_bytes(reg, NSTR(qn->head_exact)->s, 1);\r
1096 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1097 if (r) return r;\r
1098 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1099 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));\r
1100 }\r
1101 else if (IS_NOT_NULL(qn->next_head_exact)) {\r
1102 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,\r
1103 mod_tlen + SIZE_OP_JUMP);\r
1104 if (r) return r;\r
1105 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);\r
1106 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1107 if (r) return r;\r
1108 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1109 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));\r
1110 }\r
1111 else {\r
1112 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);\r
1113 if (r) return r;\r
1114 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1115 if (r) return r;\r
1116 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));\r
1118 }\r
1119 }\r
1120 else {\r
1121 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);\r
1122 if (r) return r;\r
1123 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1124 if (r) return r;\r
1125 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));\r
1126 }\r
1127 }\r
1128 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
1129 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
1130 if (r) return r;\r
1131 r = compile_tree(qn->target, reg);\r
1132 }\r
1133 else if (!infinite && qn->greedy &&\r
1134 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper\r
1135 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
1136 int n = qn->upper - qn->lower;\r
1137\r
1138 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1139 if (r) return r;\r
1140\r
1141 for (i = 0; i < n; i++) {\r
1142 r = add_opcode_rel_addr(reg, OP_PUSH,\r
1143 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);\r
1144 if (r) return r;\r
1145 r = compile_tree(qn->target, reg);\r
1146 if (r) return r;\r
1147 }\r
1148 }\r
1149 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
1150 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);\r
1151 if (r) return r;\r
1152 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
1153 if (r) return r;\r
1154 r = compile_tree(qn->target, reg);\r
1155 }\r
1156 else {\r
1157 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);\r
1158 }\r
1159 return r;\r
1160}\r
1161#endif /* USE_COMBINATION_EXPLOSION_CHECK */\r
1162\r
1163static int\r
1164compile_length_option_node(EncloseNode* node, regex_t* reg)\r
1165{\r
1166 int tlen;\r
1167 OnigOptionType prev = reg->options;\r
1168\r
1169 reg->options = node->option;\r
1170 tlen = compile_length_tree(node->target, reg);\r
1171 reg->options = prev;\r
1172\r
1173 if (tlen < 0) return tlen;\r
1174\r
1175 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {\r
1176 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL\r
1177 + tlen + SIZE_OP_SET_OPTION;\r
1178 }\r
1179 else\r
1180 return tlen;\r
1181}\r
1182\r
1183static int\r
1184compile_option_node(EncloseNode* node, regex_t* reg)\r
1185{\r
1186 int r;\r
1187 OnigOptionType prev = reg->options;\r
1188\r
1189 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {\r
1190 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);\r
1191 if (r) return r;\r
1192 r = add_opcode_option(reg, OP_SET_OPTION, prev);\r
1193 if (r) return r;\r
1194 r = add_opcode(reg, OP_FAIL);\r
1195 if (r) return r;\r
1196 }\r
1197\r
1198 reg->options = node->option;\r
1199 r = compile_tree(node->target, reg);\r
1200 reg->options = prev;\r
1201\r
1202 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {\r
1203 if (r) return r;\r
1204 r = add_opcode_option(reg, OP_SET_OPTION, prev);\r
1205 }\r
1206 return r;\r
1207}\r
1208\r
1209static int\r
1210compile_length_enclose_node(EncloseNode* node, regex_t* reg)\r
1211{\r
1212 int len;\r
1213 int tlen;\r
fbaab715
QS
1214 QtfrNode* qn;\r
1215\r
1216 qn = NULL;\r
14b0e578
CS
1217\r
1218 if (node->type == ENCLOSE_OPTION)\r
1219 return compile_length_option_node(node, reg);\r
1220\r
1221 if (node->target) {\r
1222 tlen = compile_length_tree(node->target, reg);\r
1223 if (tlen < 0) return tlen;\r
1224 }\r
1225 else\r
1226 tlen = 0;\r
1227\r
1228 switch (node->type) {\r
1229 case ENCLOSE_MEMORY:\r
1230#ifdef USE_SUBEXP_CALL\r
1231 if (IS_ENCLOSE_CALLED(node)) {\r
1232 len = SIZE_OP_MEMORY_START_PUSH + tlen\r
1233 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1234 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1235 len += (IS_ENCLOSE_RECURSION(node)\r
1236 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
1237 else\r
1238 len += (IS_ENCLOSE_RECURSION(node)\r
1239 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1240 }\r
1241 else\r
1242#endif\r
1243 {\r
1244 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))\r
1245 len = SIZE_OP_MEMORY_START_PUSH;\r
1246 else\r
1247 len = SIZE_OP_MEMORY_START;\r
1248\r
1249 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)\r
1250 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);\r
1251 }\r
1252 break;\r
1253\r
1254 case ENCLOSE_STOP_BACKTRACK:\r
1255 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {\r
b0c2b797
QS
1256 if (node->target == NULL) {\r
1257 CHECK_NULL_RETURN_MEMERR(node->target);\r
1258 }\r
fbaab715 1259 qn = NQTFR(node->target);\r
14b0e578
CS
1260 tlen = compile_length_tree(qn->target, reg);\r
1261 if (tlen < 0) return tlen;\r
1262\r
1263 len = tlen * qn->lower\r
1264 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;\r
1265 }\r
1266 else {\r
1267 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;\r
1268 }\r
1269 break;\r
1270\r
1271 default:\r
1272 return ONIGERR_TYPE_BUG;\r
1273 break;\r
1274 }\r
1275\r
1276 return len;\r
1277}\r
1278\r
1279static int get_char_length_tree(Node* node, regex_t* reg, int* len);\r
1280\r
1281static int\r
1282compile_enclose_node(EncloseNode* node, regex_t* reg)\r
1283{\r
1284 int r, len;\r
1285\r
1286 if (node->type == ENCLOSE_OPTION)\r
1287 return compile_option_node(node, reg);\r
1288\r
1289 switch (node->type) {\r
1290 case ENCLOSE_MEMORY:\r
1291#ifdef USE_SUBEXP_CALL\r
1292 if (IS_ENCLOSE_CALLED(node)) {\r
1293 r = add_opcode(reg, OP_CALL);\r
1294 if (r) return r;\r
1295 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
1296 node->state |= NST_ADDR_FIXED;\r
1297 r = add_abs_addr(reg, (int )node->call_addr);\r
1298 if (r) return r;\r
1299 len = compile_length_tree(node->target, reg);\r
1300 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
1301 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1302 len += (IS_ENCLOSE_RECURSION(node)\r
1303 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
1304 else\r
1305 len += (IS_ENCLOSE_RECURSION(node)\r
1306 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1307\r
1308 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1309 if (r) return r;\r
1310 }\r
1311#endif\r
1312 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))\r
1313 r = add_opcode(reg, OP_MEMORY_START_PUSH);\r
1314 else\r
1315 r = add_opcode(reg, OP_MEMORY_START);\r
1316 if (r) return r;\r
1317 r = add_mem_num(reg, node->regnum);\r
1318 if (r) return r;\r
1319 r = compile_tree(node->target, reg);\r
1320 if (r) return r;\r
1321#ifdef USE_SUBEXP_CALL\r
1322 if (IS_ENCLOSE_CALLED(node)) {\r
1323 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1324 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)\r
1325 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
1326 else\r
1327 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)\r
1328 ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
1329\r
1330 if (r) return r;\r
1331 r = add_mem_num(reg, node->regnum);\r
1332 if (r) return r;\r
1333 r = add_opcode(reg, OP_RETURN);\r
1334 }\r
1335 else\r
1336#endif\r
1337 {\r
1338 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1339 r = add_opcode(reg, OP_MEMORY_END_PUSH);\r
1340 else\r
1341 r = add_opcode(reg, OP_MEMORY_END);\r
1342 if (r) return r;\r
1343 r = add_mem_num(reg, node->regnum);\r
1344 }\r
1345 break;\r
1346\r
1347 case ENCLOSE_STOP_BACKTRACK:\r
1348 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {\r
1349 QtfrNode* qn = NQTFR(node->target);\r
1350 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1351 if (r) return r;\r
1352\r
1353 len = compile_length_tree(qn->target, reg);\r
1354 if (len < 0) return len;\r
1355\r
1356 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);\r
1357 if (r) return r;\r
1358 r = compile_tree(qn->target, reg);\r
1359 if (r) return r;\r
1360 r = add_opcode(reg, OP_POP);\r
1361 if (r) return r;\r
1362 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1363 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));\r
1364 }\r
1365 else {\r
1366 r = add_opcode(reg, OP_PUSH_STOP_BT);\r
1367 if (r) return r;\r
1368 r = compile_tree(node->target, reg);\r
1369 if (r) return r;\r
1370 r = add_opcode(reg, OP_POP_STOP_BT);\r
1371 }\r
1372 break;\r
1373\r
1374 default:\r
1375 return ONIGERR_TYPE_BUG;\r
1376 break;\r
1377 }\r
1378\r
1379 return r;\r
1380}\r
1381\r
1382static int\r
1383compile_length_anchor_node(AnchorNode* node, regex_t* reg)\r
1384{\r
1385 int len;\r
1386 int tlen = 0;\r
1387\r
1388 if (node->target) {\r
1389 tlen = compile_length_tree(node->target, reg);\r
1390 if (tlen < 0) return tlen;\r
1391 }\r
1392\r
1393 switch (node->type) {\r
1394 case ANCHOR_PREC_READ:\r
1395 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;\r
1396 break;\r
1397 case ANCHOR_PREC_READ_NOT:\r
1398 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;\r
1399 break;\r
1400 case ANCHOR_LOOK_BEHIND:\r
1401 len = SIZE_OP_LOOK_BEHIND + tlen;\r
1402 break;\r
1403 case ANCHOR_LOOK_BEHIND_NOT:\r
1404 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;\r
1405 break;\r
1406\r
1407 default:\r
1408 len = SIZE_OPCODE;\r
1409 break;\r
1410 }\r
1411\r
1412 return len;\r
1413}\r
1414\r
1415static int\r
1416compile_anchor_node(AnchorNode* node, regex_t* reg)\r
1417{\r
1418 int r, len;\r
1419\r
1420 switch (node->type) {\r
1421 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;\r
1422 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;\r
1423 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;\r
1424 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;\r
1425 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;\r
1426 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;\r
1427\r
1428 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;\r
1429 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;\r
1430#ifdef USE_WORD_BEGIN_END\r
1431 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;\r
1432 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;\r
1433#endif\r
1434\r
1435 case ANCHOR_PREC_READ:\r
1436 r = add_opcode(reg, OP_PUSH_POS);\r
1437 if (r) return r;\r
1438 r = compile_tree(node->target, reg);\r
1439 if (r) return r;\r
1440 r = add_opcode(reg, OP_POP_POS);\r
1441 break;\r
1442\r
1443 case ANCHOR_PREC_READ_NOT:\r
1444 len = compile_length_tree(node->target, reg);\r
1445 if (len < 0) return len;\r
1446 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);\r
1447 if (r) return r;\r
1448 r = compile_tree(node->target, reg);\r
1449 if (r) return r;\r
1450 r = add_opcode(reg, OP_FAIL_POS);\r
1451 break;\r
1452\r
1453 case ANCHOR_LOOK_BEHIND:\r
1454 {\r
1455 int n;\r
1456 r = add_opcode(reg, OP_LOOK_BEHIND);\r
1457 if (r) return r;\r
1458 if (node->char_len < 0) {\r
1459 r = get_char_length_tree(node->target, reg, &n);\r
1460 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
1461 }\r
1462 else\r
1463 n = node->char_len;\r
1464 r = add_length(reg, n);\r
1465 if (r) return r;\r
1466 r = compile_tree(node->target, reg);\r
1467 }\r
1468 break;\r
1469\r
1470 case ANCHOR_LOOK_BEHIND_NOT:\r
1471 {\r
1472 int n;\r
1473 len = compile_length_tree(node->target, reg);\r
1474 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,\r
1475 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);\r
1476 if (r) return r;\r
1477 if (node->char_len < 0) {\r
1478 r = get_char_length_tree(node->target, reg, &n);\r
1479 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
1480 }\r
1481 else\r
1482 n = node->char_len;\r
1483 r = add_length(reg, n);\r
1484 if (r) return r;\r
1485 r = compile_tree(node->target, reg);\r
1486 if (r) return r;\r
1487 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);\r
1488 }\r
1489 break;\r
1490\r
1491 default:\r
1492 return ONIGERR_TYPE_BUG;\r
1493 break;\r
1494 }\r
1495\r
1496 return r;\r
1497}\r
1498\r
1499static int\r
1500compile_length_tree(Node* node, regex_t* reg)\r
1501{\r
1502 int len, type, r;\r
1503\r
1504 type = NTYPE(node);\r
1505 switch (type) {\r
1506 case NT_LIST:\r
1507 len = 0;\r
1508 do {\r
1509 r = compile_length_tree(NCAR(node), reg);\r
1510 if (r < 0) return r;\r
1511 len += r;\r
1512 } while (IS_NOT_NULL(node = NCDR(node)));\r
1513 r = len;\r
1514 break;\r
1515\r
1516 case NT_ALT:\r
1517 {\r
1518 int n;\r
1519\r
1520 n = r = 0;\r
1521 do {\r
1522 r += compile_length_tree(NCAR(node), reg);\r
1523 n++;\r
1524 } while (IS_NOT_NULL(node = NCDR(node)));\r
1525 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);\r
1526 }\r
1527 break;\r
1528\r
1529 case NT_STR:\r
1530 if (NSTRING_IS_RAW(node))\r
1531 r = compile_length_string_raw_node(NSTR(node), reg);\r
1532 else\r
1533 r = compile_length_string_node(node, reg);\r
1534 break;\r
1535\r
1536 case NT_CCLASS:\r
1537 r = compile_length_cclass_node(NCCLASS(node), reg);\r
1538 break;\r
1539\r
1540 case NT_CTYPE:\r
1541 case NT_CANY:\r
1542 r = SIZE_OPCODE;\r
1543 break;\r
1544\r
1545 case NT_BREF:\r
1546 {\r
1547 BRefNode* br = NBREF(node);\r
1548\r
1549#ifdef USE_BACKREF_WITH_LEVEL\r
1550 if (IS_BACKREF_NEST_LEVEL(br)) {\r
1551 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +\r
1552 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1553 }\r
1554 else\r
1555#endif\r
1556 if (br->back_num == 1) {\r
1557 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)\r
1558 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));\r
1559 }\r
1560 else {\r
1561 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1562 }\r
1563 }\r
1564 break;\r
1565\r
1566#ifdef USE_SUBEXP_CALL\r
1567 case NT_CALL:\r
1568 r = SIZE_OP_CALL;\r
1569 break;\r
1570#endif\r
1571\r
1572 case NT_QTFR:\r
1573 r = compile_length_quantifier_node(NQTFR(node), reg);\r
1574 break;\r
1575\r
1576 case NT_ENCLOSE:\r
1577 r = compile_length_enclose_node(NENCLOSE(node), reg);\r
1578 break;\r
1579\r
1580 case NT_ANCHOR:\r
1581 r = compile_length_anchor_node(NANCHOR(node), reg);\r
1582 break;\r
1583\r
1584 default:\r
1585 return ONIGERR_TYPE_BUG;\r
1586 break;\r
1587 }\r
1588\r
1589 return r;\r
1590}\r
1591\r
1592static int\r
1593compile_tree(Node* node, regex_t* reg)\r
1594{\r
1595 int n, type, len, pos, r = 0;\r
1596\r
1597 type = NTYPE(node);\r
1598 switch (type) {\r
1599 case NT_LIST:\r
1600 do {\r
1601 r = compile_tree(NCAR(node), reg);\r
1602 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1603 break;\r
1604\r
1605 case NT_ALT:\r
1606 {\r
1607 Node* x = node;\r
1608 len = 0;\r
1609 do {\r
1610 len += compile_length_tree(NCAR(x), reg);\r
1611 if (NCDR(x) != NULL) {\r
1612 len += SIZE_OP_PUSH + SIZE_OP_JUMP;\r
1613 }\r
1614 } while (IS_NOT_NULL(x = NCDR(x)));\r
1615 pos = reg->used + len; /* goal position */\r
1616\r
1617 do {\r
1618 len = compile_length_tree(NCAR(node), reg);\r
1619 if (IS_NOT_NULL(NCDR(node))) {\r
1620 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);\r
1621 if (r) break;\r
1622 }\r
1623 r = compile_tree(NCAR(node), reg);\r
1624 if (r) break;\r
1625 if (IS_NOT_NULL(NCDR(node))) {\r
1626 len = pos - (reg->used + SIZE_OP_JUMP);\r
1627 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1628 if (r) break;\r
1629 }\r
1630 } while (IS_NOT_NULL(node = NCDR(node)));\r
1631 }\r
1632 break;\r
1633\r
1634 case NT_STR:\r
1635 if (NSTRING_IS_RAW(node))\r
1636 r = compile_string_raw_node(NSTR(node), reg);\r
1637 else\r
1638 r = compile_string_node(node, reg);\r
1639 break;\r
1640\r
1641 case NT_CCLASS:\r
1642 r = compile_cclass_node(NCCLASS(node), reg);\r
1643 break;\r
1644\r
1645 case NT_CTYPE:\r
1646 {\r
1647 int op;\r
1648\r
1649 switch (NCTYPE(node)->ctype) {\r
1650 case ONIGENC_CTYPE_WORD:\r
1651 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;\r
1652 else op = OP_WORD;\r
1653 break;\r
1654 default:\r
1655 return ONIGERR_TYPE_BUG;\r
1656 break;\r
1657 }\r
1658 r = add_opcode(reg, op);\r
1659 }\r
1660 break;\r
1661\r
1662 case NT_CANY:\r
1663 if (IS_MULTILINE(reg->options))\r
1664 r = add_opcode(reg, OP_ANYCHAR_ML);\r
1665 else\r
1666 r = add_opcode(reg, OP_ANYCHAR);\r
1667 break;\r
1668\r
1669 case NT_BREF:\r
1670 {\r
1671 BRefNode* br = NBREF(node);\r
1672\r
1673#ifdef USE_BACKREF_WITH_LEVEL\r
1674 if (IS_BACKREF_NEST_LEVEL(br)) {\r
1675 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);\r
1676 if (r) return r;\r
1677 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));\r
1678 if (r) return r;\r
1679 r = add_length(reg, br->nest_level);\r
1680 if (r) return r;\r
1681\r
1682 goto add_bacref_mems;\r
1683 }\r
1684 else\r
1685#endif\r
1686 if (br->back_num == 1) {\r
1687 n = br->back_static[0];\r
1688 if (IS_IGNORECASE(reg->options)) {\r
1689 r = add_opcode(reg, OP_BACKREFN_IC);\r
1690 if (r) return r;\r
1691 r = add_mem_num(reg, n);\r
1692 }\r
1693 else {\r
1694 switch (n) {\r
1695 case 1: r = add_opcode(reg, OP_BACKREF1); break;\r
1696 case 2: r = add_opcode(reg, OP_BACKREF2); break;\r
1697 default:\r
1698 r = add_opcode(reg, OP_BACKREFN);\r
1699 if (r) return r;\r
1700 r = add_mem_num(reg, n);\r
1701 break;\r
1702 }\r
1703 }\r
1704 }\r
1705 else {\r
1706 int i;\r
1707 int* p;\r
1708\r
1709 if (IS_IGNORECASE(reg->options)) {\r
1710 r = add_opcode(reg, OP_BACKREF_MULTI_IC);\r
1711 }\r
1712 else {\r
1713 r = add_opcode(reg, OP_BACKREF_MULTI);\r
1714 }\r
1715 if (r) return r;\r
1716\r
1717#ifdef USE_BACKREF_WITH_LEVEL\r
1718 add_bacref_mems:\r
1719#endif\r
1720 r = add_length(reg, br->back_num);\r
1721 if (r) return r;\r
1722 p = BACKREFS_P(br);\r
1723 for (i = br->back_num - 1; i >= 0; i--) {\r
1724 r = add_mem_num(reg, p[i]);\r
1725 if (r) return r;\r
1726 }\r
1727 }\r
1728 }\r
1729 break;\r
1730\r
1731#ifdef USE_SUBEXP_CALL\r
1732 case NT_CALL:\r
1733 r = compile_call(NCALL(node), reg);\r
1734 break;\r
1735#endif\r
1736\r
1737 case NT_QTFR:\r
1738 r = compile_quantifier_node(NQTFR(node), reg);\r
1739 break;\r
1740\r
1741 case NT_ENCLOSE:\r
1742 r = compile_enclose_node(NENCLOSE(node), reg);\r
1743 break;\r
1744\r
1745 case NT_ANCHOR:\r
1746 r = compile_anchor_node(NANCHOR(node), reg);\r
1747 break;\r
1748\r
1749 default:\r
1750#ifdef ONIG_DEBUG\r
1751 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));\r
1752#endif\r
1753 break;\r
1754 }\r
1755\r
1756 return r;\r
1757}\r
1758\r
1759#ifdef USE_NAMED_GROUP\r
1760\r
1761static int\r
1762noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)\r
1763{\r
1764 int r = 0;\r
1765 Node* node = *plink;\r
1766\r
1767 switch (NTYPE(node)) {\r
1768 case NT_LIST:\r
1769 case NT_ALT:\r
1770 do {\r
1771 r = noname_disable_map(&(NCAR(node)), map, counter);\r
1772 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1773 break;\r
1774\r
1775 case NT_QTFR:\r
1776 {\r
1777 Node** ptarget = &(NQTFR(node)->target);\r
1778 Node* old = *ptarget;\r
1779 r = noname_disable_map(ptarget, map, counter);\r
1780 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {\r
1781 onig_reduce_nested_quantifier(node, *ptarget);\r
1782 }\r
1783 }\r
1784 break;\r
1785\r
1786 case NT_ENCLOSE:\r
1787 {\r
1788 EncloseNode* en = NENCLOSE(node);\r
1789 if (en->type == ENCLOSE_MEMORY) {\r
1790 if (IS_ENCLOSE_NAMED_GROUP(en)) {\r
1791 (*counter)++;\r
1792 map[en->regnum].new_val = *counter;\r
1793 en->regnum = *counter;\r
1794 r = noname_disable_map(&(en->target), map, counter);\r
1795 }\r
1796 else {\r
1797 *plink = en->target;\r
1798 en->target = NULL_NODE;\r
1799 onig_node_free(node);\r
1800 r = noname_disable_map(plink, map, counter);\r
1801 }\r
1802 }\r
1803 else\r
1804 r = noname_disable_map(&(en->target), map, counter);\r
1805 }\r
1806 break;\r
1807\r
1808 default:\r
1809 break;\r
1810 }\r
1811\r
1812 return r;\r
1813}\r
1814\r
1815static int\r
1816renumber_node_backref(Node* node, GroupNumRemap* map)\r
1817{\r
1818 int i, pos, n, old_num;\r
1819 int *backs;\r
1820 BRefNode* bn = NBREF(node);\r
1821\r
1822 if (! IS_BACKREF_NAME_REF(bn))\r
1823 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
1824\r
1825 old_num = bn->back_num;\r
1826 if (IS_NULL(bn->back_dynamic))\r
1827 backs = bn->back_static;\r
1828 else\r
1829 backs = bn->back_dynamic;\r
1830\r
1831 for (i = 0, pos = 0; i < old_num; i++) {\r
1832 n = map[backs[i]].new_val;\r
1833 if (n > 0) {\r
1834 backs[pos] = n;\r
1835 pos++;\r
1836 }\r
1837 }\r
1838\r
1839 bn->back_num = pos;\r
1840 return 0;\r
1841}\r
1842\r
1843static int\r
1844renumber_by_map(Node* node, GroupNumRemap* map)\r
1845{\r
1846 int r = 0;\r
1847\r
1848 switch (NTYPE(node)) {\r
1849 case NT_LIST:\r
1850 case NT_ALT:\r
1851 do {\r
1852 r = renumber_by_map(NCAR(node), map);\r
1853 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1854 break;\r
1855 case NT_QTFR:\r
1856 r = renumber_by_map(NQTFR(node)->target, map);\r
1857 break;\r
1858 case NT_ENCLOSE:\r
1859 r = renumber_by_map(NENCLOSE(node)->target, map);\r
1860 break;\r
1861\r
1862 case NT_BREF:\r
1863 r = renumber_node_backref(node, map);\r
1864 break;\r
1865\r
1866 default:\r
1867 break;\r
1868 }\r
1869\r
1870 return r;\r
1871}\r
1872\r
1873static int\r
1874numbered_ref_check(Node* node)\r
1875{\r
1876 int r = 0;\r
1877\r
1878 switch (NTYPE(node)) {\r
1879 case NT_LIST:\r
1880 case NT_ALT:\r
1881 do {\r
1882 r = numbered_ref_check(NCAR(node));\r
1883 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1884 break;\r
1885 case NT_QTFR:\r
1886 r = numbered_ref_check(NQTFR(node)->target);\r
1887 break;\r
1888 case NT_ENCLOSE:\r
1889 r = numbered_ref_check(NENCLOSE(node)->target);\r
1890 break;\r
1891\r
1892 case NT_BREF:\r
1893 if (! IS_BACKREF_NAME_REF(NBREF(node)))\r
1894 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
1895 break;\r
1896\r
1897 default:\r
1898 break;\r
1899 }\r
1900\r
1901 return r;\r
1902}\r
1903\r
1904static int\r
1905disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)\r
1906{\r
1907 int r, i, pos, counter;\r
1908 int Result;\r
1909 BitStatusType loc;\r
1910 GroupNumRemap* map;\r
1911\r
1912 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));\r
1913 CHECK_NULL_RETURN_MEMERR(map);\r
1914 for (i = 1; i <= env->num_mem; i++) {\r
1915 map[i].new_val = 0;\r
1916 }\r
1917 counter = 0;\r
1918 r = noname_disable_map(root, map, &counter);\r
1919 if (r != 0) return r;\r
1920\r
1921 r = renumber_by_map(*root, map);\r
1922 if (r != 0) return r;\r
1923\r
1924 for (i = 1, pos = 1; i <= env->num_mem; i++) {\r
1925 if (map[i].new_val > 0) {\r
1926 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];\r
1927 pos++;\r
1928 }\r
1929 }\r
1930\r
1931 loc = env->capture_history;\r
1932 BIT_STATUS_CLEAR(env->capture_history);\r
1933 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {\r
1934 if (BIT_STATUS_AT(loc, i)) {\r
1935 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);\r
1936 }\r
1937 }\r
1938\r
1939 env->num_mem = env->num_named;\r
1940 reg->num_mem = env->num_named;\r
1941\r
1942 Result = onig_renumber_name_table(reg, map);\r
1943 xfree(map);\r
1944 return Result;\r
1945}\r
1946#endif /* USE_NAMED_GROUP */\r
1947\r
1948#ifdef USE_SUBEXP_CALL\r
1949static int\r
1950unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)\r
1951{\r
1952 int i, offset;\r
1953 EncloseNode* en;\r
1954 AbsAddrType addr;\r
1955\r
1956 for (i = 0; i < uslist->num; i++) {\r
1957 en = NENCLOSE(uslist->us[i].target);\r
1958 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;\r
1959 addr = en->call_addr;\r
1960 offset = uslist->us[i].offset;\r
1961\r
1962 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);\r
1963 }\r
1964 return 0;\r
1965}\r
1966#endif\r
1967\r
1968#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT\r
1969static int\r
1970quantifiers_memory_node_info(Node* node)\r
1971{\r
1972 int r = 0;\r
1973\r
1974 switch (NTYPE(node)) {\r
1975 case NT_LIST:\r
1976 case NT_ALT:\r
1977 {\r
1978 int v;\r
1979 do {\r
1980 v = quantifiers_memory_node_info(NCAR(node));\r
1981 if (v > r) r = v;\r
1982 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));\r
1983 }\r
1984 break;\r
1985\r
1986#ifdef USE_SUBEXP_CALL\r
1987 case NT_CALL:\r
1988 if (IS_CALL_RECURSION(NCALL(node))) {\r
1989 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */\r
1990 }\r
1991 else\r
1992 r = quantifiers_memory_node_info(NCALL(node)->target);\r
1993 break;\r
1994#endif\r
1995\r
1996 case NT_QTFR:\r
1997 {\r
1998 QtfrNode* qn = NQTFR(node);\r
1999 if (qn->upper != 0) {\r
2000 r = quantifiers_memory_node_info(qn->target);\r
2001 }\r
2002 }\r
2003 break;\r
2004\r
2005 case NT_ENCLOSE:\r
2006 {\r
2007 EncloseNode* en = NENCLOSE(node);\r
2008 switch (en->type) {\r
2009 case ENCLOSE_MEMORY:\r
2010 return NQ_TARGET_IS_EMPTY_MEM;\r
2011 break;\r
2012\r
2013 case ENCLOSE_OPTION:\r
2014 case ENCLOSE_STOP_BACKTRACK:\r
2015 r = quantifiers_memory_node_info(en->target);\r
2016 break;\r
2017 default:\r
2018 break;\r
2019 }\r
2020 }\r
2021 break;\r
2022\r
2023 case NT_BREF:\r
2024 case NT_STR:\r
2025 case NT_CTYPE:\r
2026 case NT_CCLASS:\r
2027 case NT_CANY:\r
2028 case NT_ANCHOR:\r
2029 default:\r
2030 break;\r
2031 }\r
2032\r
2033 return r;\r
2034}\r
2035#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */\r
2036\r
2037static int\r
2038get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)\r
2039{\r
2040 OnigDistance tmin;\r
2041 int r = 0;\r
2042\r
2043 *min = 0;\r
2044 switch (NTYPE(node)) {\r
2045 case NT_BREF:\r
2046 {\r
2047 int i;\r
2048 int* backs;\r
2049 Node** nodes = SCANENV_MEM_NODES(env);\r
2050 BRefNode* br = NBREF(node);\r
2051 if (br->state & NST_RECURSION) break;\r
2052\r
2053 backs = BACKREFS_P(br);\r
2054 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
2055 r = get_min_match_length(nodes[backs[0]], min, env);\r
2056 if (r != 0) break;\r
2057 for (i = 1; i < br->back_num; i++) {\r
2058 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
2059 r = get_min_match_length(nodes[backs[i]], &tmin, env);\r
2060 if (r != 0) break;\r
2061 if (*min > tmin) *min = tmin;\r
2062 }\r
2063 }\r
2064 break;\r
2065\r
2066#ifdef USE_SUBEXP_CALL\r
2067 case NT_CALL:\r
2068 if (IS_CALL_RECURSION(NCALL(node))) {\r
2069 EncloseNode* en = NENCLOSE(NCALL(node)->target);\r
2070 if (IS_ENCLOSE_MIN_FIXED(en))\r
2071 *min = en->min_len;\r
2072 }\r
2073 else\r
2074 r = get_min_match_length(NCALL(node)->target, min, env);\r
2075 break;\r
2076#endif\r
2077\r
2078 case NT_LIST:\r
2079 do {\r
2080 r = get_min_match_length(NCAR(node), &tmin, env);\r
2081 if (r == 0) *min += tmin;\r
2082 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2083 break;\r
2084\r
2085 case NT_ALT:\r
2086 {\r
2087 Node *x, *y;\r
2088 y = node;\r
2089 do {\r
2090 x = NCAR(y);\r
2091 r = get_min_match_length(x, &tmin, env);\r
2092 if (r != 0) break;\r
2093 if (y == node) *min = tmin;\r
2094 else if (*min > tmin) *min = tmin;\r
2095 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));\r
2096 }\r
2097 break;\r
2098\r
2099 case NT_STR:\r
2100 {\r
2101 StrNode* sn = NSTR(node);\r
2102 *min = (OnigDistance)(sn->end - sn->s);\r
2103 }\r
2104 break;\r
2105\r
2106 case NT_CTYPE:\r
2107 *min = 1;\r
2108 break;\r
2109\r
2110 case NT_CCLASS:\r
2111 case NT_CANY:\r
2112 *min = 1;\r
2113 break;\r
2114\r
2115 case NT_QTFR:\r
2116 {\r
2117 QtfrNode* qn = NQTFR(node);\r
2118\r
2119 if (qn->lower > 0) {\r
2120 r = get_min_match_length(qn->target, min, env);\r
2121 if (r == 0)\r
2122 *min = distance_multiply(*min, qn->lower);\r
2123 }\r
2124 }\r
2125 break;\r
2126\r
2127 case NT_ENCLOSE:\r
2128 {\r
2129 EncloseNode* en = NENCLOSE(node);\r
2130 switch (en->type) {\r
2131 case ENCLOSE_MEMORY:\r
2132#ifdef USE_SUBEXP_CALL\r
2133 if (IS_ENCLOSE_MIN_FIXED(en))\r
2134 *min = en->min_len;\r
2135 else {\r
2136 r = get_min_match_length(en->target, min, env);\r
2137 if (r == 0) {\r
2138 en->min_len = *min;\r
2139 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);\r
2140 }\r
2141 }\r
2142 break;\r
2143#endif\r
2144 case ENCLOSE_OPTION:\r
2145 case ENCLOSE_STOP_BACKTRACK:\r
2146 r = get_min_match_length(en->target, min, env);\r
2147 break;\r
2148 }\r
2149 }\r
2150 break;\r
2151\r
2152 case NT_ANCHOR:\r
2153 default:\r
2154 break;\r
2155 }\r
2156\r
2157 return r;\r
2158}\r
2159\r
2160static int\r
2161get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)\r
2162{\r
2163 OnigDistance tmax;\r
2164 int r = 0;\r
2165\r
2166 *max = 0;\r
2167 switch (NTYPE(node)) {\r
2168 case NT_LIST:\r
2169 do {\r
2170 r = get_max_match_length(NCAR(node), &tmax, env);\r
2171 if (r == 0)\r
2172 *max = distance_add(*max, tmax);\r
2173 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2174 break;\r
2175\r
2176 case NT_ALT:\r
2177 do {\r
2178 r = get_max_match_length(NCAR(node), &tmax, env);\r
2179 if (r == 0 && *max < tmax) *max = tmax;\r
2180 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2181 break;\r
2182\r
2183 case NT_STR:\r
2184 {\r
2185 StrNode* sn = NSTR(node);\r
2186 *max = (OnigDistance)(sn->end - sn->s);\r
2187 }\r
2188 break;\r
2189\r
2190 case NT_CTYPE:\r
2191 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
2192 break;\r
2193\r
2194 case NT_CCLASS:\r
2195 case NT_CANY:\r
2196 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
2197 break;\r
2198\r
2199 case NT_BREF:\r
2200 {\r
2201 int i;\r
2202 int* backs;\r
2203 Node** nodes = SCANENV_MEM_NODES(env);\r
2204 BRefNode* br = NBREF(node);\r
2205 if (br->state & NST_RECURSION) {\r
2206 *max = ONIG_INFINITE_DISTANCE;\r
2207 break;\r
2208 }\r
2209 backs = BACKREFS_P(br);\r
2210 for (i = 0; i < br->back_num; i++) {\r
2211 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
2212 r = get_max_match_length(nodes[backs[i]], &tmax, env);\r
2213 if (r != 0) break;\r
2214 if (*max < tmax) *max = tmax;\r
2215 }\r
2216 }\r
2217 break;\r
2218\r
2219#ifdef USE_SUBEXP_CALL\r
2220 case NT_CALL:\r
2221 if (! IS_CALL_RECURSION(NCALL(node)))\r
2222 r = get_max_match_length(NCALL(node)->target, max, env);\r
2223 else\r
2224 *max = ONIG_INFINITE_DISTANCE;\r
2225 break;\r
2226#endif\r
2227\r
2228 case NT_QTFR:\r
2229 {\r
2230 QtfrNode* qn = NQTFR(node);\r
2231\r
2232 if (qn->upper != 0) {\r
2233 r = get_max_match_length(qn->target, max, env);\r
2234 if (r == 0 && *max != 0) {\r
2235 if (! IS_REPEAT_INFINITE(qn->upper))\r
2236 *max = distance_multiply(*max, qn->upper);\r
2237 else\r
2238 *max = ONIG_INFINITE_DISTANCE;\r
2239 }\r
2240 }\r
2241 }\r
2242 break;\r
2243\r
2244 case NT_ENCLOSE:\r
2245 {\r
2246 EncloseNode* en = NENCLOSE(node);\r
2247 switch (en->type) {\r
2248 case ENCLOSE_MEMORY:\r
2249#ifdef USE_SUBEXP_CALL\r
2250 if (IS_ENCLOSE_MAX_FIXED(en))\r
2251 *max = en->max_len;\r
2252 else {\r
2253 r = get_max_match_length(en->target, max, env);\r
2254 if (r == 0) {\r
2255 en->max_len = *max;\r
2256 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);\r
2257 }\r
2258 }\r
2259 break;\r
2260#endif\r
2261 case ENCLOSE_OPTION:\r
2262 case ENCLOSE_STOP_BACKTRACK:\r
2263 r = get_max_match_length(en->target, max, env);\r
2264 break;\r
2265 }\r
2266 }\r
2267 break;\r
2268\r
2269 case NT_ANCHOR:\r
2270 default:\r
2271 break;\r
2272 }\r
2273\r
2274 return r;\r
2275}\r
2276\r
2277#define GET_CHAR_LEN_VARLEN -1\r
2278#define GET_CHAR_LEN_TOP_ALT_VARLEN -2\r
2279\r
2280/* fixed size pattern node only */\r
2281static int\r
2282get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)\r
2283{\r
2284 int tlen;\r
2285 int r = 0;\r
2286\r
2287 level++;\r
2288 *len = 0;\r
2289 switch (NTYPE(node)) {\r
2290 case NT_LIST:\r
2291 do {\r
2292 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);\r
2293 if (r == 0)\r
2294 *len = distance_add(*len, tlen);\r
2295 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2296 break;\r
2297\r
2298 case NT_ALT:\r
2299 {\r
2300 int tlen2;\r
2301 int varlen = 0;\r
2302\r
2303 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);\r
2304 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {\r
2305 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);\r
2306 if (r == 0) {\r
2307 if (tlen != tlen2)\r
2308 varlen = 1;\r
2309 }\r
2310 }\r
2311 if (r == 0) {\r
2312 if (varlen != 0) {\r
2313 if (level == 1)\r
2314 r = GET_CHAR_LEN_TOP_ALT_VARLEN;\r
2315 else\r
2316 r = GET_CHAR_LEN_VARLEN;\r
2317 }\r
2318 else\r
2319 *len = tlen;\r
2320 }\r
2321 }\r
2322 break;\r
2323\r
2324 case NT_STR:\r
2325 {\r
2326 StrNode* sn = NSTR(node);\r
2327 UChar *s = sn->s;\r
2328 while (s < sn->end) {\r
2329 s += enclen(reg->enc, s);\r
2330 (*len)++;\r
2331 }\r
2332 }\r
2333 break;\r
2334\r
2335 case NT_QTFR:\r
2336 {\r
2337 QtfrNode* qn = NQTFR(node);\r
2338 if (qn->lower == qn->upper) {\r
2339 r = get_char_length_tree1(qn->target, reg, &tlen, level);\r
2340 if (r == 0)\r
2341 *len = distance_multiply(tlen, qn->lower);\r
2342 }\r
2343 else\r
2344 r = GET_CHAR_LEN_VARLEN;\r
2345 }\r
2346 break;\r
2347\r
2348#ifdef USE_SUBEXP_CALL\r
2349 case NT_CALL:\r
2350 if (! IS_CALL_RECURSION(NCALL(node)))\r
2351 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);\r
2352 else\r
2353 r = GET_CHAR_LEN_VARLEN;\r
2354 break;\r
2355#endif\r
2356\r
2357 case NT_CTYPE:\r
2358 *len = 1;\r
2359 break;\r
2360\r
2361 case NT_CCLASS:\r
2362 case NT_CANY:\r
2363 *len = 1;\r
2364 break;\r
2365\r
2366 case NT_ENCLOSE:\r
2367 {\r
2368 EncloseNode* en = NENCLOSE(node);\r
2369 switch (en->type) {\r
2370 case ENCLOSE_MEMORY:\r
2371#ifdef USE_SUBEXP_CALL\r
2372 if (IS_ENCLOSE_CLEN_FIXED(en))\r
2373 *len = en->char_len;\r
2374 else {\r
2375 r = get_char_length_tree1(en->target, reg, len, level);\r
2376 if (r == 0) {\r
2377 en->char_len = *len;\r
2378 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);\r
2379 }\r
2380 }\r
2381 break;\r
2382#endif\r
2383 case ENCLOSE_OPTION:\r
2384 case ENCLOSE_STOP_BACKTRACK:\r
2385 r = get_char_length_tree1(en->target, reg, len, level);\r
2386 break;\r
2387 default:\r
2388 break;\r
2389 }\r
2390 }\r
2391 break;\r
2392\r
2393 case NT_ANCHOR:\r
2394 break;\r
2395\r
2396 default:\r
2397 r = GET_CHAR_LEN_VARLEN;\r
2398 break;\r
2399 }\r
2400\r
2401 return r;\r
2402}\r
2403\r
2404static int\r
2405get_char_length_tree(Node* node, regex_t* reg, int* len)\r
2406{\r
2407 return get_char_length_tree1(node, reg, len, 0);\r
2408}\r
2409\r
2410/* x is not included y ==> 1 : 0 */\r
2411static int\r
2412is_not_included(Node* x, Node* y, regex_t* reg)\r
2413{\r
2414 int i, len;\r
2415 OnigCodePoint code;\r
2416 UChar *p;\r
2417 int ytype;\r
2418\r
2419 retry:\r
2420 ytype = NTYPE(y);\r
2421 switch (NTYPE(x)) {\r
2422 case NT_CTYPE:\r
2423 {\r
2424 switch (ytype) {\r
2425 case NT_CTYPE:\r
2426 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&\r
2427 NCTYPE(y)->not != NCTYPE(x)->not)\r
2428 return 1;\r
2429 else\r
2430 return 0;\r
2431 break;\r
2432\r
2433 case NT_CCLASS:\r
2434 swap:\r
2435 {\r
2436 Node* tmp;\r
2437 tmp = x; x = y; y = tmp;\r
2438 goto retry;\r
2439 }\r
2440 break;\r
2441\r
2442 case NT_STR:\r
2443 goto swap;\r
2444 break;\r
2445\r
2446 default:\r
2447 break;\r
2448 }\r
2449 }\r
2450 break;\r
2451\r
2452 case NT_CCLASS:\r
2453 {\r
2454 CClassNode* xc = NCCLASS(x);\r
2455 switch (ytype) {\r
2456 case NT_CTYPE:\r
2457 switch (NCTYPE(y)->ctype) {\r
2458 case ONIGENC_CTYPE_WORD:\r
2459 if (NCTYPE(y)->not == 0) {\r
2460 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {\r
2461 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2462 if (BITSET_AT(xc->bs, i)) {\r
2463 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;\r
2464 }\r
2465 }\r
2466 return 1;\r
2467 }\r
2468 return 0;\r
2469 }\r
2470 else {\r
2471 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2472 if (! IS_CODE_SB_WORD(reg->enc, i)) {\r
2473 if (!IS_NCCLASS_NOT(xc)) {\r
2474 if (BITSET_AT(xc->bs, i))\r
2475 return 0;\r
2476 }\r
2477 else {\r
2478 if (! BITSET_AT(xc->bs, i))\r
2479 return 0;\r
2480 }\r
2481 }\r
2482 }\r
2483 return 1;\r
2484 }\r
2485 break;\r
2486\r
2487 default:\r
2488 break;\r
2489 }\r
2490 break;\r
2491\r
2492 case NT_CCLASS:\r
2493 {\r
2494 int v;\r
2495 CClassNode* yc = NCCLASS(y);\r
2496\r
2497 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2498 v = BITSET_AT(xc->bs, i);\r
2499 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||\r
2500 (v == 0 && IS_NCCLASS_NOT(xc))) {\r
2501 v = BITSET_AT(yc->bs, i);\r
2502 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||\r
2503 (v == 0 && IS_NCCLASS_NOT(yc)))\r
2504 return 0;\r
2505 }\r
2506 }\r
2507 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||\r
2508 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))\r
2509 return 1;\r
2510 return 0;\r
2511 }\r
2512 break;\r
2513\r
2514 case NT_STR:\r
2515 goto swap;\r
2516 break;\r
2517\r
2518 default:\r
2519 break;\r
2520 }\r
2521 }\r
2522 break;\r
2523\r
2524 case NT_STR:\r
2525 {\r
2526 StrNode* xs = NSTR(x);\r
2527 if (NSTRING_LEN(x) == 0)\r
2528 break;\r
2529\r
2530 //c = *(xs->s);\r
2531 switch (ytype) {\r
2532 case NT_CTYPE:\r
2533 switch (NCTYPE(y)->ctype) {\r
2534 case ONIGENC_CTYPE_WORD:\r
2535 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))\r
2536 return NCTYPE(y)->not;\r
2537 else\r
2538 return !(NCTYPE(y)->not);\r
2539 break;\r
2540 default:\r
2541 break;\r
2542 }\r
2543 break;\r
2544\r
2545 case NT_CCLASS:\r
2546 {\r
2547 CClassNode* cc = NCCLASS(y);\r
2548\r
2549 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,\r
2550 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));\r
2551 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);\r
2552 }\r
2553 break;\r
2554\r
2555 case NT_STR:\r
2556 {\r
2557 UChar *q;\r
2558 StrNode* ys = NSTR(y);\r
2559 len = NSTRING_LEN(x);\r
2560 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);\r
2561 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {\r
2562 /* tiny version */\r
2563 return 0;\r
2564 }\r
2565 else {\r
2566 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {\r
2567 if (*p != *q) return 1;\r
2568 }\r
2569 }\r
2570 }\r
2571 break;\r
2572 \r
2573 default:\r
2574 break;\r
2575 }\r
2576 }\r
2577 break;\r
2578\r
2579 default:\r
2580 break;\r
2581 }\r
2582\r
2583 return 0;\r
2584}\r
2585\r
2586static Node*\r
2587get_head_value_node(Node* node, int exact, regex_t* reg)\r
2588{\r
2589 Node* n = NULL_NODE;\r
2590\r
2591 switch (NTYPE(node)) {\r
2592 case NT_BREF:\r
2593 case NT_ALT:\r
2594 case NT_CANY:\r
2595#ifdef USE_SUBEXP_CALL\r
2596 case NT_CALL:\r
2597#endif\r
2598 break;\r
2599\r
2600 case NT_CTYPE:\r
2601 case NT_CCLASS:\r
2602 if (exact == 0) {\r
2603 n = node;\r
2604 }\r
2605 break;\r
2606\r
2607 case NT_LIST:\r
2608 n = get_head_value_node(NCAR(node), exact, reg);\r
2609 break;\r
2610\r
2611 case NT_STR:\r
2612 {\r
2613 StrNode* sn = NSTR(node);\r
2614\r
2615 if (sn->end <= sn->s)\r
2616 break;\r
2617\r
2618 if (exact != 0 &&\r
2619 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {\r
2620 }\r
2621 else {\r
2622 n = node;\r
2623 }\r
2624 }\r
2625 break;\r
2626\r
2627 case NT_QTFR:\r
2628 {\r
2629 QtfrNode* qn = NQTFR(node);\r
2630 if (qn->lower > 0) {\r
2631 if (IS_NOT_NULL(qn->head_exact))\r
2632 n = qn->head_exact;\r
2633 else\r
2634 n = get_head_value_node(qn->target, exact, reg);\r
2635 }\r
2636 }\r
2637 break;\r
2638\r
2639 case NT_ENCLOSE:\r
2640 {\r
2641 EncloseNode* en = NENCLOSE(node);\r
2642 switch (en->type) {\r
2643 case ENCLOSE_OPTION:\r
2644 {\r
2645 OnigOptionType options = reg->options;\r
2646\r
2647 reg->options = NENCLOSE(node)->option;\r
2648 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);\r
2649 reg->options = options;\r
2650 }\r
2651 break;\r
2652\r
2653 case ENCLOSE_MEMORY:\r
2654 case ENCLOSE_STOP_BACKTRACK:\r
2655 n = get_head_value_node(en->target, exact, reg);\r
2656 break;\r
2657 }\r
2658 }\r
2659 break;\r
2660\r
2661 case NT_ANCHOR:\r
2662 if (NANCHOR(node)->type == ANCHOR_PREC_READ)\r
2663 n = get_head_value_node(NANCHOR(node)->target, exact, reg);\r
2664 break;\r
2665\r
2666 default:\r
2667 break;\r
2668 }\r
2669\r
2670 return n;\r
2671}\r
2672\r
2673static int\r
2674check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)\r
2675{\r
2676 int type, r = 0;\r
2677\r
2678 type = NTYPE(node);\r
2679 if ((NTYPE2BIT(type) & type_mask) == 0)\r
2680 return 1;\r
2681\r
2682 switch (type) {\r
2683 case NT_LIST:\r
2684 case NT_ALT:\r
2685 do {\r
2686 r = check_type_tree(NCAR(node), type_mask, enclose_mask,\r
2687 anchor_mask);\r
2688 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2689 break;\r
2690\r
2691 case NT_QTFR:\r
2692 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,\r
2693 anchor_mask);\r
2694 break;\r
2695\r
2696 case NT_ENCLOSE:\r
2697 {\r
2698 EncloseNode* en = NENCLOSE(node);\r
2699 if ((en->type & enclose_mask) == 0)\r
2700 return 1;\r
2701\r
2702 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);\r
2703 }\r
2704 break;\r
2705\r
2706 case NT_ANCHOR:\r
2707 type = NANCHOR(node)->type;\r
2708 if ((type & anchor_mask) == 0)\r
2709 return 1;\r
2710\r
2711 if (NANCHOR(node)->target)\r
2712 r = check_type_tree(NANCHOR(node)->target,\r
2713 type_mask, enclose_mask, anchor_mask);\r
2714 break;\r
2715\r
2716 default:\r
2717 break;\r
2718 }\r
2719 return r;\r
2720}\r
2721\r
2722#ifdef USE_SUBEXP_CALL\r
2723\r
2724#define RECURSION_EXIST 1\r
2725#define RECURSION_INFINITE 2\r
2726\r
2727static int\r
2728subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)\r
2729{\r
2730 int type;\r
2731 int r = 0;\r
2732\r
2733 type = NTYPE(node);\r
2734 switch (type) {\r
2735 case NT_LIST:\r
2736 {\r
2737 Node *x;\r
2738 OnigDistance min;\r
2739 int ret;\r
2740\r
2741 x = node;\r
2742 do {\r
2743 ret = subexp_inf_recursive_check(NCAR(x), env, head);\r
2744 if (ret < 0 || ret == RECURSION_INFINITE) return ret;\r
2745 r |= ret;\r
2746 if (head) {\r
2747 ret = get_min_match_length(NCAR(x), &min, env);\r
2748 if (ret != 0) return ret;\r
2749 if (min != 0) head = 0;\r
2750 }\r
2751 } while (IS_NOT_NULL(x = NCDR(x)));\r
2752 }\r
2753 break;\r
2754\r
2755 case NT_ALT:\r
2756 {\r
2757 int ret;\r
2758 r = RECURSION_EXIST;\r
2759 do {\r
2760 ret = subexp_inf_recursive_check(NCAR(node), env, head);\r
2761 if (ret < 0 || ret == RECURSION_INFINITE) return ret;\r
2762 r &= ret;\r
2763 } while (IS_NOT_NULL(node = NCDR(node)));\r
2764 }\r
2765 break;\r
2766\r
2767 case NT_QTFR:\r
2768 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);\r
2769 if (r == RECURSION_EXIST) {\r
2770 if (NQTFR(node)->lower == 0) r = 0;\r
2771 }\r
2772 break;\r
2773\r
2774 case NT_ANCHOR:\r
2775 {\r
2776 AnchorNode* an = NANCHOR(node);\r
2777 switch (an->type) {\r
2778 case ANCHOR_PREC_READ:\r
2779 case ANCHOR_PREC_READ_NOT:\r
2780 case ANCHOR_LOOK_BEHIND:\r
2781 case ANCHOR_LOOK_BEHIND_NOT:\r
2782 r = subexp_inf_recursive_check(an->target, env, head);\r
2783 break;\r
2784 }\r
2785 }\r
2786 break;\r
2787\r
2788 case NT_CALL:\r
2789 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);\r
2790 break;\r
2791\r
2792 case NT_ENCLOSE:\r
2793 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))\r
2794 return 0;\r
2795 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))\r
2796 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);\r
2797 else {\r
2798 SET_ENCLOSE_STATUS(node, NST_MARK2);\r
2799 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);\r
2800 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);\r
2801 }\r
2802 break;\r
2803\r
2804 default:\r
2805 break;\r
2806 }\r
2807\r
2808 return r;\r
2809}\r
2810\r
2811static int\r
2812subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)\r
2813{\r
2814 int type;\r
2815 int r = 0;\r
2816\r
2817 type = NTYPE(node);\r
2818 switch (type) {\r
2819 case NT_LIST:\r
2820 case NT_ALT:\r
2821 do {\r
2822 r = subexp_inf_recursive_check_trav(NCAR(node), env);\r
2823 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2824 break;\r
2825\r
2826 case NT_QTFR:\r
2827 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);\r
2828 break;\r
2829\r
2830 case NT_ANCHOR:\r
2831 {\r
2832 AnchorNode* an = NANCHOR(node);\r
2833 switch (an->type) {\r
2834 case ANCHOR_PREC_READ:\r
2835 case ANCHOR_PREC_READ_NOT:\r
2836 case ANCHOR_LOOK_BEHIND:\r
2837 case ANCHOR_LOOK_BEHIND_NOT:\r
2838 r = subexp_inf_recursive_check_trav(an->target, env);\r
2839 break;\r
2840 }\r
2841 }\r
2842 break;\r
2843\r
2844 case NT_ENCLOSE:\r
2845 {\r
2846 EncloseNode* en = NENCLOSE(node);\r
2847\r
2848 if (IS_ENCLOSE_RECURSION(en)) {\r
2849 SET_ENCLOSE_STATUS(node, NST_MARK1);\r
2850 r = subexp_inf_recursive_check(en->target, env, 1);\r
2851 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;\r
2852 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);\r
2853 }\r
2854 r = subexp_inf_recursive_check_trav(en->target, env);\r
2855 }\r
2856\r
2857 break;\r
2858\r
2859 default:\r
2860 break;\r
2861 }\r
2862\r
2863 return r;\r
2864}\r
2865\r
2866static int\r
2867subexp_recursive_check(Node* node)\r
2868{\r
2869 int r = 0;\r
2870\r
2871 switch (NTYPE(node)) {\r
2872 case NT_LIST:\r
2873 case NT_ALT:\r
2874 do {\r
2875 r |= subexp_recursive_check(NCAR(node));\r
2876 } while (IS_NOT_NULL(node = NCDR(node)));\r
2877 break;\r
2878\r
2879 case NT_QTFR:\r
2880 r = subexp_recursive_check(NQTFR(node)->target);\r
2881 break;\r
2882\r
2883 case NT_ANCHOR:\r
2884 {\r
2885 AnchorNode* an = NANCHOR(node);\r
2886 switch (an->type) {\r
2887 case ANCHOR_PREC_READ:\r
2888 case ANCHOR_PREC_READ_NOT:\r
2889 case ANCHOR_LOOK_BEHIND:\r
2890 case ANCHOR_LOOK_BEHIND_NOT:\r
2891 r = subexp_recursive_check(an->target);\r
2892 break;\r
2893 }\r
2894 }\r
2895 break;\r
2896\r
2897 case NT_CALL:\r
2898 r = subexp_recursive_check(NCALL(node)->target);\r
2899 if (r != 0) SET_CALL_RECURSION(node);\r
2900 break;\r
2901\r
2902 case NT_ENCLOSE:\r
2903 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))\r
2904 return 0;\r
2905 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))\r
2906 return 1; /* recursion */\r
2907 else {\r
2908 SET_ENCLOSE_STATUS(node, NST_MARK2);\r
2909 r = subexp_recursive_check(NENCLOSE(node)->target);\r
2910 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);\r
2911 }\r
2912 break;\r
2913\r
2914 default:\r
2915 break;\r
2916 }\r
2917\r
2918 return r;\r
2919}\r
2920\r
2921\r
2922static int\r
2923subexp_recursive_check_trav(Node* node, ScanEnv* env)\r
2924{\r
2925#define FOUND_CALLED_NODE 1\r
2926\r
2927 int type;\r
2928 int r = 0;\r
2929\r
2930 type = NTYPE(node);\r
2931 switch (type) {\r
2932 case NT_LIST:\r
2933 case NT_ALT:\r
2934 {\r
2935 int ret;\r
2936 do {\r
2937 ret = subexp_recursive_check_trav(NCAR(node), env);\r
2938 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;\r
2939 else if (ret < 0) return ret;\r
2940 } while (IS_NOT_NULL(node = NCDR(node)));\r
2941 }\r
2942 break;\r
2943\r
2944 case NT_QTFR:\r
2945 r = subexp_recursive_check_trav(NQTFR(node)->target, env);\r
2946 if (NQTFR(node)->upper == 0) {\r
2947 if (r == FOUND_CALLED_NODE)\r
2948 NQTFR(node)->is_refered = 1;\r
2949 }\r
2950 break;\r
2951\r
2952 case NT_ANCHOR:\r
2953 {\r
2954 AnchorNode* an = NANCHOR(node);\r
2955 switch (an->type) {\r
2956 case ANCHOR_PREC_READ:\r
2957 case ANCHOR_PREC_READ_NOT:\r
2958 case ANCHOR_LOOK_BEHIND:\r
2959 case ANCHOR_LOOK_BEHIND_NOT:\r
2960 r = subexp_recursive_check_trav(an->target, env);\r
2961 break;\r
2962 }\r
2963 }\r
2964 break;\r
2965\r
2966 case NT_ENCLOSE:\r
2967 {\r
2968 EncloseNode* en = NENCLOSE(node);\r
2969\r
2970 if (! IS_ENCLOSE_RECURSION(en)) {\r
2971 if (IS_ENCLOSE_CALLED(en)) {\r
2972 SET_ENCLOSE_STATUS(node, NST_MARK1);\r
2973 r = subexp_recursive_check(en->target);\r
2974 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);\r
2975 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);\r
2976 }\r
2977 }\r
2978 r = subexp_recursive_check_trav(en->target, env);\r
2979 if (IS_ENCLOSE_CALLED(en))\r
2980 r |= FOUND_CALLED_NODE;\r
2981 }\r
2982 break;\r
2983\r
2984 default:\r
2985 break;\r
2986 }\r
2987\r
2988 return r;\r
2989}\r
2990\r
2991static int\r
2992setup_subexp_call(Node* node, ScanEnv* env)\r
2993{\r
2994 int type;\r
2995 int r = 0;\r
2996\r
2997 type = NTYPE(node);\r
2998 switch (type) {\r
2999 case NT_LIST:\r
3000 do {\r
3001 r = setup_subexp_call(NCAR(node), env);\r
3002 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
3003 break;\r
3004\r
3005 case NT_ALT:\r
3006 do {\r
3007 r = setup_subexp_call(NCAR(node), env);\r
3008 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
3009 break;\r
3010\r
3011 case NT_QTFR:\r
3012 r = setup_subexp_call(NQTFR(node)->target, env);\r
3013 break;\r
3014 case NT_ENCLOSE:\r
3015 r = setup_subexp_call(NENCLOSE(node)->target, env);\r
3016 break;\r
3017\r
3018 case NT_CALL:\r
3019 {\r
3020 CallNode* cn = NCALL(node);\r
3021 Node** nodes = SCANENV_MEM_NODES(env);\r
3022\r
3023 if (cn->group_num != 0) {\r
3024 int gnum = cn->group_num;\r
3025\r
3026#ifdef USE_NAMED_GROUP\r
3027 if (env->num_named > 0 &&\r
3028 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
3029 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {\r
3030 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
3031 }\r
3032#endif\r
3033 if (gnum > env->num_mem) {\r
3034 onig_scan_env_set_error_string(env,\r
3035 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);\r
3036 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
3037 }\r
3038\r
3039#ifdef USE_NAMED_GROUP\r
3040 set_call_attr:\r
3041#endif\r
3042 cn->target = nodes[cn->group_num];\r
3043 if (IS_NULL(cn->target)) {\r
3044 onig_scan_env_set_error_string(env,\r
3045 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);\r
3046 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3047 }\r
3048 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);\r
3049 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);\r
3050 cn->unset_addr_list = env->unset_addr_list;\r
3051 }\r
3052#ifdef USE_NAMED_GROUP\r
3053 else {\r
3054 int *refs;\r
3055\r
3056 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,\r
3057 &refs);\r
3058 if (n <= 0) {\r
3059 onig_scan_env_set_error_string(env,\r
3060 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);\r
3061 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3062 }\r
3063 else if (n > 1) {\r
3064 onig_scan_env_set_error_string(env,\r
3065 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);\r
3066 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;\r
3067 }\r
3068 else {\r
3069 cn->group_num = refs[0];\r
3070 goto set_call_attr;\r
3071 }\r
3072 }\r
3073#endif\r
3074 }\r
3075 break;\r
3076\r
3077 case NT_ANCHOR:\r
3078 {\r
3079 AnchorNode* an = NANCHOR(node);\r
3080\r
3081 switch (an->type) {\r
3082 case ANCHOR_PREC_READ:\r
3083 case ANCHOR_PREC_READ_NOT:\r
3084 case ANCHOR_LOOK_BEHIND:\r
3085 case ANCHOR_LOOK_BEHIND_NOT:\r
3086 r = setup_subexp_call(an->target, env);\r
3087 break;\r
3088 }\r
3089 }\r
3090 break;\r
3091\r
3092 default:\r
3093 break;\r
3094 }\r
3095\r
3096 return r;\r
3097}\r
3098#endif\r
3099\r
3100/* divide different length alternatives in look-behind.\r
3101 (?<=A|B) ==> (?<=A)|(?<=B)\r
3102 (?<!A|B) ==> (?<!A)(?<!B)\r
3103*/\r
3104static int\r
3105divide_look_behind_alternatives(Node* node)\r
3106{\r
3107 Node *head, *np, *insert_node;\r
3108 AnchorNode* an = NANCHOR(node);\r
3109 int anc_type = an->type;\r
3110\r
3111 head = an->target;\r
3112 np = NCAR(head);\r
3113 swap_node(node, head);\r
3114 NCAR(node) = head;\r
3115 NANCHOR(head)->target = np;\r
3116\r
3117 np = node;\r
3118 while ((np = NCDR(np)) != NULL_NODE) {\r
3119 insert_node = onig_node_new_anchor(anc_type);\r
3120 CHECK_NULL_RETURN_MEMERR(insert_node);\r
3121 NANCHOR(insert_node)->target = NCAR(np);\r
3122 NCAR(np) = insert_node;\r
3123 }\r
3124\r
3125 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {\r
3126 np = node;\r
3127 do {\r
3128 SET_NTYPE(np, NT_LIST); /* alt -> list */\r
3129 } while ((np = NCDR(np)) != NULL_NODE);\r
3130 }\r
3131 return 0;\r
3132}\r
3133\r
3134static int\r
3135setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)\r
3136{\r
3137 int r, len;\r
3138 AnchorNode* an = NANCHOR(node);\r
3139\r
3140 r = get_char_length_tree(an->target, reg, &len);\r
3141 if (r == 0)\r
3142 an->char_len = len;\r
3143 else if (r == GET_CHAR_LEN_VARLEN)\r
3144 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3145 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {\r
3146 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))\r
3147 r = divide_look_behind_alternatives(node);\r
3148 else\r
3149 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3150 }\r
3151\r
3152 return r;\r
3153}\r
3154\r
3155static int\r
3156next_setup(Node* node, Node* next_node, regex_t* reg)\r
3157{\r
3158 int type;\r
3159\r
3160 retry:\r
3161 type = NTYPE(node);\r
3162 if (type == NT_QTFR) {\r
3163 QtfrNode* qn = NQTFR(node);\r
3164 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {\r
3165#ifdef USE_QTFR_PEEK_NEXT\r
3166 Node* n = get_head_value_node(next_node, 1, reg);\r
3167 /* '\0': for UTF-16BE etc... */\r
3168 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {\r
3169 qn->next_head_exact = n;\r
3170 }\r
3171#endif\r
3172 /* automatic posseivation a*b ==> (?>a*)b */\r
3173 if (qn->lower <= 1) {\r
3174 int ttype = NTYPE(qn->target);\r
3175 if (IS_NODE_TYPE_SIMPLE(ttype)) {\r
3176 Node *x, *y;\r
3177 x = get_head_value_node(qn->target, 0, reg);\r
3178 if (IS_NOT_NULL(x)) {\r
3179 y = get_head_value_node(next_node, 0, reg);\r
3180 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {\r
3181 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
3182 CHECK_NULL_RETURN_MEMERR(en);\r
3183 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);\r
3184 swap_node(node, en);\r
3185 NENCLOSE(node)->target = en;\r
3186 }\r
3187 }\r
3188 }\r
3189 }\r
3190 }\r
3191 }\r
3192 else if (type == NT_ENCLOSE) {\r
3193 EncloseNode* en = NENCLOSE(node);\r
3194 if (en->type == ENCLOSE_MEMORY) {\r
3195 node = en->target;\r
3196 goto retry;\r
3197 }\r
3198 }\r
3199 return 0;\r
3200}\r
3201\r
3202\r
3203static int\r
3204update_string_node_case_fold(regex_t* reg, Node *node)\r
3205{\r
3206 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
3207 UChar *sbuf, *ebuf, *sp;\r
3208 int r, i, len, sbuf_size;\r
3209 StrNode* sn = NSTR(node);\r
3210\r
3211 end = sn->end;\r
3212 sbuf_size = (int)(end - sn->s) * 2;\r
3213 sbuf = (UChar* )xmalloc(sbuf_size);\r
3214 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3215 ebuf = sbuf + sbuf_size;\r
3216\r
3217 sp = sbuf;\r
3218 p = sn->s;\r
3219 while (p < end) {\r
3220 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);\r
3221 for (i = 0; i < len; i++) {\r
3222 if (sp >= ebuf) {\r
3223 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);\r
3224 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3225 sp = sbuf + sbuf_size;\r
3226 sbuf_size *= 2;\r
3227 ebuf = sbuf + sbuf_size;\r
3228 }\r
3229\r
3230 *sp++ = buf[i];\r
3231 }\r
3232 }\r
3233\r
3234 r = onig_node_str_set(node, sbuf, sp);\r
3235 if (r != 0) {\r
3236 xfree(sbuf);\r
3237 return r;\r
3238 }\r
3239\r
3240 xfree(sbuf);\r
3241 return 0;\r
3242}\r
3243\r
3244static int\r
3245expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,\r
3246 regex_t* reg)\r
3247{\r
3248 int r;\r
3249 Node *node;\r
3250\r
3251 node = onig_node_new_str(s, end);\r
3252 if (IS_NULL(node)) return ONIGERR_MEMORY;\r
3253\r
3254 r = update_string_node_case_fold(reg, node);\r
3255 if (r != 0) {\r
3256 onig_node_free(node);\r
3257 return r;\r
3258 }\r
3259\r
3260 NSTRING_SET_AMBIG(node);\r
3261 NSTRING_SET_DONT_GET_OPT_INFO(node);\r
3262 *rnode = node;\r
3263 return 0;\r
3264}\r
3265\r
3266static int\r
3267expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],\r
3268 UChar *p, int slen, UChar *end,\r
3269 regex_t* reg, Node **rnode)\r
3270{\r
3271 int r, i, j, len, varlen;\r
3272 Node *anode, *var_anode, *snode, *xnode, *an;\r
3273 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
b0c2b797 3274 xnode = NULL_NODE;\r
14b0e578
CS
3275\r
3276 *rnode = var_anode = NULL_NODE;\r
3277\r
3278 varlen = 0;\r
3279 for (i = 0; i < item_num; i++) {\r
3280 if (items[i].byte_len != slen) {\r
3281 varlen = 1;\r
3282 break;\r
3283 }\r
3284 }\r
3285\r
3286 if (varlen != 0) {\r
3287 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3288 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;\r
3289\r
3290 xnode = onig_node_new_list(NULL, NULL);\r
3291 if (IS_NULL(xnode)) goto mem_err;\r
3292 NCAR(var_anode) = xnode;\r
3293\r
3294 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3295 if (IS_NULL(anode)) goto mem_err;\r
3296 NCAR(xnode) = anode;\r
3297 }\r
3298 else {\r
3299 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3300 if (IS_NULL(anode)) return ONIGERR_MEMORY;\r
3301 }\r
3302\r
3303 snode = onig_node_new_str(p, p + slen);\r
3304 if (IS_NULL(snode)) goto mem_err;\r
3305\r
3306 NCAR(anode) = snode;\r
3307\r
3308 for (i = 0; i < item_num; i++) {\r
3309 snode = onig_node_new_str(NULL, NULL);\r
3310 if (IS_NULL(snode)) goto mem_err;\r
3311 \r
3312 for (j = 0; j < items[i].code_len; j++) {\r
3313 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);\r
3314 if (len < 0) {\r
3315 r = len;\r
3316 goto mem_err2;\r
3317 }\r
3318\r
3319 r = onig_node_str_cat(snode, buf, buf + len);\r
3320 if (r != 0) goto mem_err2;\r
3321 }\r
3322\r
3323 an = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3324 if (IS_NULL(an)) {\r
3325 goto mem_err2;\r
3326 }\r
3327\r
3328 if (items[i].byte_len != slen) {\r
b0c2b797 3329 Node *rem = NULL_NODE;\r
14b0e578
CS
3330 UChar *q = p + items[i].byte_len;\r
3331\r
3332 if (q < end) {\r
3333 r = expand_case_fold_make_rem_string(&rem, q, end, reg);\r
3334 if (r != 0) {\r
3335 onig_node_free(an);\r
3336 goto mem_err2;\r
3337 }\r
3338\r
3339 xnode = onig_node_list_add(NULL_NODE, snode);\r
3340 if (IS_NULL(xnode)) {\r
3341 onig_node_free(an);\r
3342 onig_node_free(rem);\r
3343 goto mem_err2;\r
3344 }\r
3345 if (IS_NULL(onig_node_list_add(xnode, rem))) {\r
3346 onig_node_free(an);\r
3347 onig_node_free(xnode);\r
3348 onig_node_free(rem);\r
3349 goto mem_err;\r
3350 }\r
3351\r
3352 NCAR(an) = xnode;\r
3353 }\r
3354 else {\r
3355 NCAR(an) = snode;\r
3356 }\r
3357\r
b0c2b797
QS
3358 if (var_anode == NULL) {\r
3359 onig_node_free(an);\r
3360 onig_node_free(xnode);\r
3361 onig_node_free(rem);\r
3362 goto mem_err2;\r
3363 }\r
14b0e578
CS
3364 NCDR(var_anode) = an;\r
3365 var_anode = an;\r
3366 }\r
3367 else {\r
3368 NCAR(an) = snode;\r
3369 NCDR(anode) = an;\r
3370 anode = an;\r
3371 }\r
3372 }\r
3373\r
3374 return varlen;\r
3375\r
3376 mem_err2:\r
3377 onig_node_free(snode);\r
3378\r
3379 mem_err:\r
3380 onig_node_free(*rnode);\r
3381\r
3382 return ONIGERR_MEMORY;\r
3383}\r
3384\r
3385static int\r
3386expand_case_fold_string(Node* node, regex_t* reg)\r
3387{\r
3388#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8\r
3389\r
3390 int r, n, len, alt_num;\r
3391 UChar *start, *end, *p;\r
3392 Node *top_root, *root, *snode, *prev_node;\r
3393 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
3394 StrNode* sn = NSTR(node);\r
3395\r
3396 if (NSTRING_IS_AMBIG(node)) return 0;\r
3397\r
3398 start = sn->s;\r
3399 end = sn->end;\r
3400 if (start >= end) return 0;\r
3401\r
3402 r = 0;\r
3403 top_root = root = prev_node = snode = NULL_NODE;\r
3404 alt_num = 1;\r
3405 p = start;\r
3406 while (p < end) {\r
3407 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,\r
3408 p, end, items);\r
3409 if (n < 0) {\r
3410 r = n;\r
3411 goto err;\r
3412 }\r
3413\r
3414 len = enclen(reg->enc, p);\r
3415\r
3416 if (n == 0) {\r
3417 if (IS_NULL(snode)) {\r
3418 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
3419 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3420 if (IS_NULL(root)) {\r
3421 onig_node_free(prev_node);\r
3422 goto mem_err;\r
3423 }\r
3424 }\r
3425\r
3426 prev_node = snode = onig_node_new_str(NULL, NULL);\r
3427 if (IS_NULL(snode)) goto mem_err;\r
3428 if (IS_NOT_NULL(root)) {\r
3429 if (IS_NULL(onig_node_list_add(root, snode))) {\r
3430 onig_node_free(snode);\r
3431 goto mem_err;\r
3432 }\r
3433 }\r
3434 }\r
3435\r
3436 r = onig_node_str_cat(snode, p, p + len);\r
3437 if (r != 0) goto err;\r
3438 }\r
3439 else {\r
3440 alt_num *= (n + 1);\r
3441 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;\r
3442\r
3443 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
3444 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3445 if (IS_NULL(root)) {\r
3446 onig_node_free(prev_node);\r
3447 goto mem_err;\r
3448 }\r
3449 }\r
3450\r
3451 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);\r
3452 if (r < 0) goto mem_err;\r
3453 if (r == 1) {\r
3454 if (IS_NULL(root)) {\r
3455 top_root = prev_node;\r
3456 }\r
3457 else {\r
3458 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3459 onig_node_free(prev_node);\r
3460 goto mem_err;\r
3461 }\r
3462 }\r
3463\r
3464 root = NCAR(prev_node);\r
3465 }\r
3466 else { /* r == 0 */\r
3467 if (IS_NOT_NULL(root)) {\r
3468 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3469 onig_node_free(prev_node);\r
3470 goto mem_err;\r
3471 }\r
3472 }\r
3473 }\r
3474\r
3475 snode = NULL_NODE;\r
3476 }\r
3477\r
3478 p += len;\r
3479 }\r
3480\r
3481 if (p < end) {\r
3482 Node *srem;\r
3483\r
3484 r = expand_case_fold_make_rem_string(&srem, p, end, reg);\r
3485 if (r != 0) goto mem_err;\r
3486\r
3487 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {\r
3488 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3489 if (IS_NULL(root)) {\r
3490 onig_node_free(srem);\r
3491 onig_node_free(prev_node);\r
3492 goto mem_err;\r
3493 }\r
3494 }\r
3495\r
3496 if (IS_NULL(root)) {\r
3497 prev_node = srem;\r
3498 }\r
3499 else {\r
3500 if (IS_NULL(onig_node_list_add(root, srem))) {\r
3501 onig_node_free(srem);\r
3502 goto mem_err;\r
3503 }\r
3504 }\r
3505 }\r
3506\r
3507 /* ending */\r
3508 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);\r
3509 swap_node(node, top_root);\r
3510 onig_node_free(top_root);\r
3511 return 0;\r
3512\r
3513 mem_err:\r
3514 r = ONIGERR_MEMORY;\r
3515\r
3516 err:\r
3517 onig_node_free(top_root);\r
3518 return r;\r
3519}\r
3520\r
3521\r
3522#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
3523\r
3524#define CEC_THRES_NUM_BIG_REPEAT 512\r
3525#define CEC_INFINITE_NUM 0x7fffffff\r
3526\r
3527#define CEC_IN_INFINITE_REPEAT (1<<0)\r
3528#define CEC_IN_FINITE_REPEAT (1<<1)\r
3529#define CEC_CONT_BIG_REPEAT (1<<2)\r
3530\r
3531static int\r
3532setup_comb_exp_check(Node* node, int state, ScanEnv* env)\r
3533{\r
3534 int type;\r
3535 int r = state;\r
3536\r
3537 type = NTYPE(node);\r
3538 switch (type) {\r
3539 case NT_LIST:\r
3540 {\r
3541 Node* prev = NULL_NODE;\r
3542 do {\r
3543 r = setup_comb_exp_check(NCAR(node), r, env);\r
3544 prev = NCAR(node);\r
3545 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));\r
3546 }\r
3547 break;\r
3548\r
3549 case NT_ALT:\r
3550 {\r
3551 int ret;\r
3552 do {\r
3553 ret = setup_comb_exp_check(NCAR(node), state, env);\r
3554 r |= ret;\r
3555 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));\r
3556 }\r
3557 break;\r
3558\r
3559 case NT_QTFR:\r
3560 {\r
3561 int child_state = state;\r
3562 int add_state = 0;\r
3563 QtfrNode* qn = NQTFR(node);\r
3564 Node* target = qn->target;\r
3565 int var_num;\r
3566\r
3567 if (! IS_REPEAT_INFINITE(qn->upper)) {\r
3568 if (qn->upper > 1) {\r
3569 /* {0,1}, {1,1} are allowed */\r
3570 child_state |= CEC_IN_FINITE_REPEAT;\r
3571\r
3572 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */\r
3573 if (env->backrefed_mem == 0) {\r
3574 if (NTYPE(qn->target) == NT_ENCLOSE) {\r
3575 EncloseNode* en = NENCLOSE(qn->target);\r
3576 if (en->type == ENCLOSE_MEMORY) {\r
3577 if (NTYPE(en->target) == NT_QTFR) {\r
3578 QtfrNode* q = NQTFR(en->target);\r
3579 if (IS_REPEAT_INFINITE(q->upper)\r
3580 && q->greedy == qn->greedy) {\r
3581 qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
3582 if (qn->upper == 1)\r
3583 child_state = state;\r
3584 }\r
3585 }\r
3586 }\r
3587 }\r
3588 }\r
3589 }\r
3590 }\r
3591\r
3592 if (state & CEC_IN_FINITE_REPEAT) {\r
3593 qn->comb_exp_check_num = -1;\r
3594 }\r
3595 else {\r
3596 if (IS_REPEAT_INFINITE(qn->upper)) {\r
3597 var_num = CEC_INFINITE_NUM;\r
3598 child_state |= CEC_IN_INFINITE_REPEAT;\r
3599 }\r
3600 else {\r
3601 var_num = qn->upper - qn->lower;\r
3602 }\r
3603\r
3604 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)\r
3605 add_state |= CEC_CONT_BIG_REPEAT;\r
3606\r
3607 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||\r
3608 ((state & CEC_CONT_BIG_REPEAT) != 0 &&\r
3609 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {\r
3610 if (qn->comb_exp_check_num == 0) {\r
3611 env->num_comb_exp_check++;\r
3612 qn->comb_exp_check_num = env->num_comb_exp_check;\r
3613 if (env->curr_max_regnum > env->comb_exp_max_regnum)\r
3614 env->comb_exp_max_regnum = env->curr_max_regnum;\r
3615 }\r
3616 }\r
3617 }\r
3618\r
3619 r = setup_comb_exp_check(target, child_state, env);\r
3620 r |= add_state;\r
3621 }\r
3622 break;\r
3623\r
3624 case NT_ENCLOSE:\r
3625 {\r
3626 EncloseNode* en = NENCLOSE(node);\r
3627\r
3628 switch (en->type) {\r
3629 case ENCLOSE_MEMORY:\r
3630 {\r
3631 if (env->curr_max_regnum < en->regnum)\r
3632 env->curr_max_regnum = en->regnum;\r
3633\r
3634 r = setup_comb_exp_check(en->target, state, env);\r
3635 }\r
3636 break;\r
3637\r
3638 default:\r
3639 r = setup_comb_exp_check(en->target, state, env);\r
3640 break;\r
3641 }\r
3642 }\r
3643 break;\r
3644\r
3645#ifdef USE_SUBEXP_CALL\r
3646 case NT_CALL:\r
3647 if (IS_CALL_RECURSION(NCALL(node)))\r
3648 env->has_recursion = 1;\r
3649 else\r
3650 r = setup_comb_exp_check(NCALL(node)->target, state, env);\r
3651 break;\r
3652#endif\r
3653\r
3654 default:\r
3655 break;\r
3656 }\r
3657\r
3658 return r;\r
3659}\r
3660#endif\r
3661\r
3662#define IN_ALT (1<<0)\r
3663#define IN_NOT (1<<1)\r
3664#define IN_REPEAT (1<<2)\r
3665#define IN_VAR_REPEAT (1<<3)\r
3666\r
3667/* setup_tree does the following work.\r
3668 1. check empty loop. (set qn->target_empty_info)\r
3669 2. expand ignore-case in char class.\r
3670 3. set memory status bit flags. (reg->mem_stats)\r
3671 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].\r
3672 5. find invalid patterns in look-behind.\r
3673 6. expand repeated string.\r
3674 */\r
3675static int\r
3676setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)\r
3677{\r
3678 int type;\r
3679 int r = 0;\r
3680\r
3681 type = NTYPE(node);\r
3682 switch (type) {\r
3683 case NT_LIST:\r
3684 {\r
3685 Node* prev = NULL_NODE;\r
3686 do {\r
3687 r = setup_tree(NCAR(node), reg, state, env);\r
3688 if (IS_NOT_NULL(prev) && r == 0) {\r
3689 r = next_setup(prev, NCAR(node), reg);\r
3690 }\r
3691 prev = NCAR(node);\r
3692 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
3693 }\r
3694 break;\r
3695\r
3696 case NT_ALT:\r
3697 do {\r
3698 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);\r
3699 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
3700 break;\r
3701\r
3702 case NT_CCLASS:\r
3703 break;\r
3704\r
3705 case NT_STR:\r
3706 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {\r
3707 r = expand_case_fold_string(node, reg);\r
3708 }\r
3709 break;\r
3710\r
3711 case NT_CTYPE:\r
3712 case NT_CANY:\r
3713 break;\r
3714\r
3715#ifdef USE_SUBEXP_CALL\r
3716 case NT_CALL:\r
3717 break;\r
3718#endif\r
3719\r
3720 case NT_BREF:\r
3721 {\r
3722 int i;\r
3723 int* p;\r
3724 Node** nodes = SCANENV_MEM_NODES(env);\r
3725 BRefNode* br = NBREF(node);\r
3726 p = BACKREFS_P(br);\r
3727 for (i = 0; i < br->back_num; i++) {\r
3728 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
3729 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);\r
3730 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);\r
3731#ifdef USE_BACKREF_WITH_LEVEL\r
3732 if (IS_BACKREF_NEST_LEVEL(br)) {\r
3733 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);\r
3734 }\r
3735#endif\r
3736 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);\r
3737 }\r
3738 }\r
3739 break;\r
3740\r
3741 case NT_QTFR:\r
3742 {\r
3743 OnigDistance d;\r
3744 QtfrNode* qn = NQTFR(node);\r
3745 Node* target = qn->target;\r
3746\r
3747 if ((state & IN_REPEAT) != 0) {\r
3748 qn->state |= NST_IN_REPEAT;\r
3749 }\r
3750\r
3751 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {\r
3752 r = get_min_match_length(target, &d, env);\r
3753 if (r) break;\r
3754 if (d == 0) {\r
3755 qn->target_empty_info = NQ_TARGET_IS_EMPTY;\r
3756#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT\r
3757 r = quantifiers_memory_node_info(target);\r
3758 if (r < 0) break;\r
3759 if (r > 0) {\r
3760 qn->target_empty_info = r;\r
3761 }\r
3762#endif\r
3763#if 0\r
3764 r = get_max_match_length(target, &d, env);\r
3765 if (r == 0 && d == 0) {\r
3766 /* ()* ==> ()?, ()+ ==> () */\r
3767 qn->upper = 1;\r
3768 if (qn->lower > 1) qn->lower = 1;\r
3769 if (NTYPE(target) == NT_STR) {\r
3770 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */\r
3771 }\r
3772 }\r
3773#endif\r
3774 }\r
3775 }\r
3776\r
3777 state |= IN_REPEAT;\r
3778 if (qn->lower != qn->upper)\r
3779 state |= IN_VAR_REPEAT;\r
3780 r = setup_tree(target, reg, state, env);\r
3781 if (r) break;\r
3782\r
3783 /* expand string */\r
3784#define EXPAND_STRING_MAX_LENGTH 100\r
3785 if (NTYPE(target) == NT_STR) {\r
3786 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&\r
3787 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
3788 int len = NSTRING_LEN(target);\r
3789 StrNode* sn = NSTR(target);\r
3790\r
3791 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
3792 int i, n = qn->lower;\r
3793 onig_node_conv_to_str_node(node, NSTR(target)->flag);\r
3794 for (i = 0; i < n; i++) {\r
3795 r = onig_node_str_cat(node, sn->s, sn->end);\r
3796 if (r) break;\r
3797 }\r
3798 onig_node_free(target);\r
3799 break; /* break case NT_QTFR: */\r
3800 }\r
3801 }\r
3802 }\r
3803\r
3804#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
3805 if (qn->greedy && (qn->target_empty_info != 0)) {\r
3806 if (NTYPE(target) == NT_QTFR) {\r
3807 QtfrNode* tqn = NQTFR(target);\r
3808 if (IS_NOT_NULL(tqn->head_exact)) {\r
3809 qn->head_exact = tqn->head_exact;\r
3810 tqn->head_exact = NULL;\r
3811 }\r
3812 }\r
3813 else {\r
3814 qn->head_exact = get_head_value_node(qn->target, 1, reg);\r
3815 }\r
3816 }\r
3817#endif\r
3818 }\r
3819 break;\r
3820\r
3821 case NT_ENCLOSE:\r
3822 {\r
3823 EncloseNode* en = NENCLOSE(node);\r
3824\r
3825 switch (en->type) {\r
3826 case ENCLOSE_OPTION:\r
3827 {\r
3828 OnigOptionType options = reg->options;\r
3829 reg->options = NENCLOSE(node)->option;\r
3830 r = setup_tree(NENCLOSE(node)->target, reg, state, env);\r
3831 reg->options = options;\r
3832 }\r
3833 break;\r
3834\r
3835 case ENCLOSE_MEMORY:\r
3836 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {\r
3837 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);\r
3838 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */\r
3839 }\r
3840 r = setup_tree(en->target, reg, state, env);\r
3841 break;\r
3842\r
3843 case ENCLOSE_STOP_BACKTRACK:\r
3844 {\r
3845 Node* target = en->target;\r
3846 r = setup_tree(target, reg, state, env);\r
3847 if (NTYPE(target) == NT_QTFR) {\r
3848 QtfrNode* tqn = NQTFR(target);\r
3849 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&\r
3850 tqn->greedy != 0) { /* (?>a*), a*+ etc... */\r
3851 int qtype = NTYPE(tqn->target);\r
3852 if (IS_NODE_TYPE_SIMPLE(qtype))\r
3853 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);\r
3854 }\r
3855 }\r
3856 }\r
3857 break;\r
3858 }\r
3859 }\r
3860 break;\r
3861\r
3862 case NT_ANCHOR:\r
3863 {\r
3864 AnchorNode* an = NANCHOR(node);\r
3865\r
3866 switch (an->type) {\r
3867 case ANCHOR_PREC_READ:\r
3868 r = setup_tree(an->target, reg, state, env);\r
3869 break;\r
3870 case ANCHOR_PREC_READ_NOT:\r
3871 r = setup_tree(an->target, reg, (state | IN_NOT), env);\r
3872 break;\r
3873\r
3874/* allowed node types in look-behind */\r
3875#define ALLOWED_TYPE_IN_LB \\r
3876 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \\r
3877 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )\r
3878\r
3879#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )\r
3880#define ALLOWED_ENCLOSE_IN_LB_NOT 0\r
3881\r
3882#define ALLOWED_ANCHOR_IN_LB \\r
3883( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )\r
3884#define ALLOWED_ANCHOR_IN_LB_NOT \\r
3885( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )\r
3886\r
3887 case ANCHOR_LOOK_BEHIND:\r
3888 {\r
3889 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,\r
3890 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
3891 if (r < 0) return r;\r
3892 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3893 r = setup_look_behind(node, reg, env);\r
3894 if (r != 0) return r;\r
3895 r = setup_tree(an->target, reg, state, env);\r
3896 }\r
3897 break;\r
3898\r
3899 case ANCHOR_LOOK_BEHIND_NOT:\r
3900 {\r
3901 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,\r
3902 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
3903 if (r < 0) return r;\r
3904 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3905 r = setup_look_behind(node, reg, env);\r
3906 if (r != 0) return r;\r
3907 r = setup_tree(an->target, reg, (state | IN_NOT), env);\r
3908 }\r
3909 break;\r
3910 }\r
3911 }\r
3912 break;\r
3913\r
3914 default:\r
3915 break;\r
3916 }\r
3917\r
3918 return r;\r
3919}\r
3920\r
3921/* set skip map for Boyer-Moor search */\r
3922static int\r
3923set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,\r
3924 UChar skip[], int** int_skip)\r
3925{\r
3926 int i, len;\r
3927\r
3928 len = (int)(end - s);\r
3929 if (len < ONIG_CHAR_TABLE_SIZE) {\r
3930 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len;\r
3931\r
3932 for (i = 0; i < len - 1; i++)\r
3933 skip[s[i]] = (UChar)(len - 1 - i);\r
3934 }\r
3935 else {\r
3936 if (IS_NULL(*int_skip)) {\r
3937 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);\r
3938 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;\r
3939 }\r
3940 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;\r
3941\r
3942 for (i = 0; i < len - 1; i++)\r
3943 (*int_skip)[s[i]] = len - 1 - i;\r
3944 }\r
3945 return 0;\r
3946}\r
3947\r
3948#define OPT_EXACT_MAXLEN 24\r
3949\r
3950typedef struct {\r
3951 OnigDistance min; /* min byte length */\r
3952 OnigDistance max; /* max byte length */\r
3953} MinMaxLen;\r
3954\r
3955typedef struct {\r
3956 MinMaxLen mmd;\r
3957 OnigEncoding enc;\r
3958 OnigOptionType options;\r
3959 OnigCaseFoldType case_fold_flag;\r
3960 ScanEnv* scan_env;\r
3961} OptEnv;\r
3962\r
3963typedef struct {\r
3964 int left_anchor;\r
3965 int right_anchor;\r
3966} OptAncInfo;\r
3967\r
3968typedef struct {\r
3969 MinMaxLen mmd; /* info position */\r
3970 OptAncInfo anc;\r
3971\r
3972 int reach_end;\r
3973 int ignore_case;\r
3974 int len;\r
3975 UChar s[OPT_EXACT_MAXLEN];\r
3976} OptExactInfo;\r
3977\r
3978typedef struct {\r
3979 MinMaxLen mmd; /* info position */\r
3980 OptAncInfo anc;\r
3981\r
3982 int value; /* weighted value */\r
3983 UChar map[ONIG_CHAR_TABLE_SIZE];\r
3984} OptMapInfo;\r
3985\r
3986typedef struct {\r
3987 MinMaxLen len;\r
3988\r
3989 OptAncInfo anc;\r
3990 OptExactInfo exb; /* boundary */\r
3991 OptExactInfo exm; /* middle */\r
3992 OptExactInfo expr; /* prec read (?=...) */\r
3993\r
3994 OptMapInfo map; /* boundary */\r
3995} NodeOptInfo;\r
3996\r
3997\r
3998static int\r
3999map_position_value(OnigEncoding enc, int i)\r
4000{\r
4001 static const short int ByteValTable[] = {\r
4002 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,\r
4003 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
4004 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,\r
4005 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,\r
4006 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4007 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,\r
4008 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4009 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1\r
4010 };\r
4011\r
4012 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {\r
4013 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)\r
4014 return 20;\r
4015 else\r
4016 return (int )ByteValTable[i];\r
4017 }\r
4018 else\r
4019 return 4; /* Take it easy. */\r
4020}\r
4021\r
4022static int\r
4023distance_value(MinMaxLen* mm)\r
4024{\r
4025 /* 1000 / (min-max-dist + 1) */\r
4026 static const short int dist_vals[] = {\r
4027 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, \r
4028 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, \r
4029 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, \r
4030 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, \r
4031 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, \r
4032 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, \r
4033 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, \r
4034 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, \r
4035 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, \r
4036 11, 11, 11, 11, 11, 10, 10, 10, 10, 10\r
4037 };\r
4038\r
4039 int d;\r
4040\r
4041 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;\r
4042\r
4043 d = mm->max - mm->min;\r
4044 if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))\r
4045 /* return dist_vals[d] * 16 / (mm->min + 12); */\r
4046 return (int )dist_vals[d];\r
4047 else\r
4048 return 1;\r
4049}\r
4050\r
4051static int\r
4052comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)\r
4053{\r
4054 if (v2 <= 0) return -1;\r
4055 if (v1 <= 0) return 1;\r
4056\r
4057 v1 *= distance_value(d1);\r
4058 v2 *= distance_value(d2);\r
4059\r
4060 if (v2 > v1) return 1;\r
4061 if (v2 < v1) return -1;\r
4062\r
4063 if (d2->min < d1->min) return 1;\r
4064 if (d2->min > d1->min) return -1;\r
4065 return 0;\r
4066}\r
4067\r
4068static int\r
4069is_equal_mml(MinMaxLen* a, MinMaxLen* b)\r
4070{\r
4071 return (a->min == b->min && a->max == b->max) ? 1 : 0;\r
4072}\r
4073\r
4074\r
4075static void\r
4076set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)\r
4077{\r
4078 mml->min = min;\r
4079 mml->max = max;\r
4080}\r
4081\r
4082static void\r
4083clear_mml(MinMaxLen* mml)\r
4084{\r
4085 mml->min = mml->max = 0;\r
4086}\r
4087\r
4088static void\r
4089copy_mml(MinMaxLen* to, MinMaxLen* from)\r
4090{\r
4091 to->min = from->min;\r
4092 to->max = from->max;\r
4093}\r
4094\r
4095static void\r
4096add_mml(MinMaxLen* to, MinMaxLen* from)\r
4097{\r
4098 to->min = distance_add(to->min, from->min);\r
4099 to->max = distance_add(to->max, from->max);\r
4100}\r
4101\r
4102#if 0\r
4103static void\r
4104add_len_mml(MinMaxLen* to, OnigDistance len)\r
4105{\r
4106 to->min = distance_add(to->min, len);\r
4107 to->max = distance_add(to->max, len);\r
4108}\r
4109#endif\r
4110\r
4111static void\r
4112alt_merge_mml(MinMaxLen* to, MinMaxLen* from)\r
4113{\r
4114 if (to->min > from->min) to->min = from->min;\r
4115 if (to->max < from->max) to->max = from->max;\r
4116}\r
4117\r
4118static void\r
4119copy_opt_env(OptEnv* to, OptEnv* from)\r
4120{\r
b2e35426 4121 CopyMem (to, from, sizeof (OptEnv));\r
14b0e578
CS
4122}\r
4123\r
4124static void\r
4125clear_opt_anc_info(OptAncInfo* anc)\r
4126{\r
4127 anc->left_anchor = 0;\r
4128 anc->right_anchor = 0;\r
4129}\r
4130\r
4131static void\r
4132copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)\r
4133{\r
b2e35426 4134 CopyMem (to, from, sizeof (OptAncInfo));\r
14b0e578
CS
4135}\r
4136\r
4137static void\r
4138concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,\r
4139 OnigDistance left_len, OnigDistance right_len)\r
4140{\r
4141 clear_opt_anc_info(to);\r
4142\r
4143 to->left_anchor = left->left_anchor;\r
4144 if (left_len == 0) {\r
4145 to->left_anchor |= right->left_anchor;\r
4146 }\r
4147\r
4148 to->right_anchor = right->right_anchor;\r
4149 if (right_len == 0) {\r
4150 to->right_anchor |= left->right_anchor;\r
4151 }\r
4152}\r
4153\r
4154static int\r
4155is_left_anchor(int anc)\r
4156{\r
4157 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||\r
4158 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||\r
4159 anc == ANCHOR_PREC_READ_NOT)\r
4160 return 0;\r
4161\r
4162 return 1;\r
4163}\r
4164\r
4165static int\r
4166is_set_opt_anc_info(OptAncInfo* to, int anc)\r
4167{\r
4168 if ((to->left_anchor & anc) != 0) return 1;\r
4169\r
4170 return ((to->right_anchor & anc) != 0 ? 1 : 0);\r
4171}\r
4172\r
4173static void\r
4174add_opt_anc_info(OptAncInfo* to, int anc)\r
4175{\r
4176 if (is_left_anchor(anc))\r
4177 to->left_anchor |= anc;\r
4178 else\r
4179 to->right_anchor |= anc;\r
4180}\r
4181\r
4182static void\r
4183remove_opt_anc_info(OptAncInfo* to, int anc)\r
4184{\r
4185 if (is_left_anchor(anc))\r
4186 to->left_anchor &= ~anc;\r
4187 else\r
4188 to->right_anchor &= ~anc;\r
4189}\r
4190\r
4191static void\r
4192alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)\r
4193{\r
4194 to->left_anchor &= add->left_anchor;\r
4195 to->right_anchor &= add->right_anchor;\r
4196}\r
4197\r
4198static int\r
4199is_full_opt_exact_info(OptExactInfo* ex)\r
4200{\r
4201 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);\r
4202}\r
4203\r
4204static void\r
4205clear_opt_exact_info(OptExactInfo* ex)\r
4206{\r
4207 clear_mml(&ex->mmd);\r
4208 clear_opt_anc_info(&ex->anc);\r
4209 ex->reach_end = 0;\r
4210 ex->ignore_case = 0;\r
4211 ex->len = 0;\r
4212 ex->s[0] = '\0';\r
4213}\r
4214\r
4215static void\r
4216copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)\r
4217{\r
b2e35426 4218 CopyMem (to, from, sizeof (OptExactInfo));\r
14b0e578
CS
4219}\r
4220\r
4221static void\r
4222concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)\r
4223{\r
4224 int i, j, len;\r
4225 UChar *p, *end;\r
4226 OptAncInfo tanc;\r
4227\r
4228 if (! to->ignore_case && add->ignore_case) {\r
4229 if (to->len >= add->len) return ; /* avoid */\r
4230\r
4231 to->ignore_case = 1;\r
4232 }\r
4233\r
4234 p = add->s;\r
4235 end = p + add->len;\r
4236 for (i = to->len; p < end; ) {\r
4237 len = enclen(enc, p);\r
4238 if (i + len > OPT_EXACT_MAXLEN) break;\r
4239 for (j = 0; j < len && p < end; j++)\r
4240 to->s[i++] = *p++;\r
4241 }\r
4242\r
4243 to->len = i;\r
4244 to->reach_end = (p == end ? add->reach_end : 0);\r
4245\r
4246 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);\r
4247 if (! to->reach_end) tanc.right_anchor = 0;\r
4248 copy_opt_anc_info(&to->anc, &tanc);\r
4249}\r
4250\r
4251static void\r
4252concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,\r
4253 int raw ARG_UNUSED, OnigEncoding enc)\r
4254{\r
4255 int i, j, len;\r
4256 UChar *p;\r
4257\r
4258 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {\r
4259 len = enclen(enc, p);\r
4260 if (i + len > OPT_EXACT_MAXLEN) break;\r
4261 for (j = 0; j < len && p < end; j++)\r
4262 to->s[i++] = *p++;\r
4263 }\r
4264\r
4265 to->len = i;\r
4266}\r
4267\r
4268static void\r
4269alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)\r
4270{\r
4271 int i, j, len;\r
4272\r
4273 if (add->len == 0 || to->len == 0) {\r
4274 clear_opt_exact_info(to);\r
4275 return ;\r
4276 }\r
4277\r
4278 if (! is_equal_mml(&to->mmd, &add->mmd)) {\r
4279 clear_opt_exact_info(to);\r
4280 return ;\r
4281 }\r
4282\r
4283 for (i = 0; i < to->len && i < add->len; ) {\r
4284 if (to->s[i] != add->s[i]) break;\r
4285 len = enclen(env->enc, to->s + i);\r
4286\r
4287 for (j = 1; j < len; j++) {\r
4288 if (to->s[i+j] != add->s[i+j]) break;\r
4289 }\r
4290 if (j < len) break;\r
4291 i += len;\r
4292 }\r
4293\r
4294 if (! add->reach_end || i < add->len || i < to->len) {\r
4295 to->reach_end = 0;\r
4296 }\r
4297 to->len = i;\r
4298 to->ignore_case |= add->ignore_case;\r
4299\r
4300 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
4301 if (! to->reach_end) to->anc.right_anchor = 0;\r
4302}\r
4303\r
4304static void\r
4305select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)\r
4306{\r
4307 int v1, v2;\r
4308\r
4309 v1 = now->len;\r
4310 v2 = alt->len;\r
4311\r
4312 if (v2 == 0) {\r
4313 return ;\r
4314 }\r
4315 else if (v1 == 0) {\r
4316 copy_opt_exact_info(now, alt);\r
4317 return ;\r
4318 }\r
4319 else if (v1 <= 2 && v2 <= 2) {\r
4320 /* ByteValTable[x] is big value --> low price */\r
4321 v2 = map_position_value(enc, now->s[0]);\r
4322 v1 = map_position_value(enc, alt->s[0]);\r
4323\r
4324 if (now->len > 1) v1 += 5;\r
4325 if (alt->len > 1) v2 += 5;\r
4326 }\r
4327\r
4328 if (now->ignore_case == 0) v1 *= 2;\r
4329 if (alt->ignore_case == 0) v2 *= 2;\r
4330\r
4331 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)\r
4332 copy_opt_exact_info(now, alt);\r
4333}\r
4334\r
4335static void\r
4336clear_opt_map_info(OptMapInfo* map)\r
4337{\r
4338 static const OptMapInfo clean_info = {\r
4339 {0, 0}, {0, 0}, 0,\r
4340 {\r
4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r
4357 }\r
4358 };\r
4359\r
4360 xmemcpy(map, &clean_info, sizeof(OptMapInfo));\r
4361}\r
4362\r
4363static void\r
4364copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)\r
4365{\r
b2e35426 4366 CopyMem (to, from, sizeof (OptMapInfo));\r
14b0e578
CS
4367}\r
4368\r
4369static void\r
4370add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)\r
4371{\r
4372 if (map->map[c] == 0) {\r
4373 map->map[c] = 1;\r
4374 map->value += map_position_value(enc, c);\r
4375 }\r
4376}\r
4377\r
4378static int\r
4379add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,\r
4380 OnigEncoding enc, OnigCaseFoldType case_fold_flag)\r
4381{\r
4382 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
4383 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
4384 int i, n;\r
4385\r
4386 add_char_opt_map_info(map, p[0], enc);\r
4387\r
4388 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);\r
4389 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);\r
4390 if (n < 0) return n;\r
4391\r
4392 for (i = 0; i < n; i++) {\r
4393 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);\r
4394 add_char_opt_map_info(map, buf[0], enc);\r
4395 }\r
4396\r
4397 return 0;\r
4398}\r
4399\r
4400static void\r
4401select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)\r
4402{\r
4403 static int z = 1<<15; /* 32768: something big value */\r
4404\r
4405 int v1, v2;\r
4406\r
4407 if (alt->value == 0) return ;\r
4408 if (now->value == 0) {\r
4409 copy_opt_map_info(now, alt);\r
4410 return ;\r
4411 }\r
4412\r
4413 v1 = z / now->value;\r
4414 v2 = z / alt->value;\r
4415 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)\r
4416 copy_opt_map_info(now, alt);\r
4417}\r
4418\r
4419static int\r
4420comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)\r
4421{\r
4422#define COMP_EM_BASE 20\r
4423 int ve, vm;\r
4424\r
4425 if (m->value <= 0) return -1;\r
4426\r
4427 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);\r
4428 vm = COMP_EM_BASE * 5 * 2 / m->value;\r
4429 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);\r
4430}\r
4431\r
4432static void\r
4433alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)\r
4434{\r
4435 int i, val;\r
4436\r
4437 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */\r
4438 if (to->value == 0) return ;\r
4439 if (add->value == 0 || to->mmd.max < add->mmd.min) {\r
4440 clear_opt_map_info(to);\r
4441 return ;\r
4442 }\r
4443\r
4444 alt_merge_mml(&to->mmd, &add->mmd);\r
4445\r
4446 val = 0;\r
4447 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
4448 if (add->map[i])\r
4449 to->map[i] = 1;\r
4450\r
4451 if (to->map[i])\r
4452 val += map_position_value(enc, i);\r
4453 }\r
4454 to->value = val;\r
4455\r
4456 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
4457}\r
4458\r
4459static void\r
4460set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)\r
4461{\r
4462 copy_mml(&(opt->exb.mmd), mmd);\r
4463 copy_mml(&(opt->expr.mmd), mmd);\r
4464 copy_mml(&(opt->map.mmd), mmd);\r
4465}\r
4466\r
4467static void\r
4468clear_node_opt_info(NodeOptInfo* opt)\r
4469{\r
4470 clear_mml(&opt->len);\r
4471 clear_opt_anc_info(&opt->anc);\r
4472 clear_opt_exact_info(&opt->exb);\r
4473 clear_opt_exact_info(&opt->exm);\r
4474 clear_opt_exact_info(&opt->expr);\r
4475 clear_opt_map_info(&opt->map);\r
4476}\r
4477\r
4478static void\r
4479copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)\r
4480{\r
b2e35426 4481 CopyMem (to, from, sizeof (NodeOptInfo));\r
14b0e578
CS
4482}\r
4483\r
4484static void\r
4485concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)\r
4486{\r
4487 int exb_reach, exm_reach;\r
4488 OptAncInfo tanc;\r
4489\r
4490 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);\r
4491 copy_opt_anc_info(&to->anc, &tanc);\r
4492\r
4493 if (add->exb.len > 0 && to->len.max == 0) {\r
4494 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,\r
4495 to->len.max, add->len.max);\r
4496 copy_opt_anc_info(&add->exb.anc, &tanc);\r
4497 }\r
4498\r
4499 if (add->map.value > 0 && to->len.max == 0) {\r
4500 if (add->map.mmd.max == 0)\r
4501 add->map.anc.left_anchor |= to->anc.left_anchor;\r
4502 }\r
4503\r
4504 exb_reach = to->exb.reach_end;\r
4505 exm_reach = to->exm.reach_end;\r
4506\r
4507 if (add->len.max != 0)\r
4508 to->exb.reach_end = to->exm.reach_end = 0;\r
4509\r
4510 if (add->exb.len > 0) {\r
4511 if (exb_reach) {\r
4512 concat_opt_exact_info(&to->exb, &add->exb, enc);\r
4513 clear_opt_exact_info(&add->exb);\r
4514 }\r
4515 else if (exm_reach) {\r
4516 concat_opt_exact_info(&to->exm, &add->exb, enc);\r
4517 clear_opt_exact_info(&add->exb);\r
4518 }\r
4519 }\r
4520 select_opt_exact_info(enc, &to->exm, &add->exb);\r
4521 select_opt_exact_info(enc, &to->exm, &add->exm);\r
4522\r
4523 if (to->expr.len > 0) {\r
4524 if (add->len.max > 0) {\r
4525 if (to->expr.len > (int )add->len.max)\r
4526 to->expr.len = add->len.max;\r
4527\r
4528 if (to->expr.mmd.max == 0)\r
4529 select_opt_exact_info(enc, &to->exb, &to->expr);\r
4530 else\r
4531 select_opt_exact_info(enc, &to->exm, &to->expr);\r
4532 }\r
4533 }\r
4534 else if (add->expr.len > 0) {\r
4535 copy_opt_exact_info(&to->expr, &add->expr);\r
4536 }\r
4537\r
4538 select_opt_map_info(&to->map, &add->map);\r
4539\r
4540 add_mml(&to->len, &add->len);\r
4541}\r
4542\r
4543static void\r
4544alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)\r
4545{\r
4546 alt_merge_opt_anc_info (&to->anc, &add->anc);\r
4547 alt_merge_opt_exact_info(&to->exb, &add->exb, env);\r
4548 alt_merge_opt_exact_info(&to->exm, &add->exm, env);\r
4549 alt_merge_opt_exact_info(&to->expr, &add->expr, env);\r
4550 alt_merge_opt_map_info(env->enc, &to->map, &add->map);\r
4551\r
4552 alt_merge_mml(&to->len, &add->len);\r
4553}\r
4554\r
4555\r
4556#define MAX_NODE_OPT_INFO_REF_COUNT 5\r
4557\r
4558static int\r
4559optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)\r
4560{\r
4561 int type;\r
4562 int r = 0;\r
4563\r
4564 clear_node_opt_info(opt);\r
4565 set_bound_node_opt_info(opt, &env->mmd);\r
4566\r
4567 type = NTYPE(node);\r
4568 switch (type) {\r
4569 case NT_LIST:\r
4570 {\r
4571 OptEnv nenv;\r
4572 NodeOptInfo nopt;\r
4573 Node* nd = node;\r
4574\r
4575 copy_opt_env(&nenv, env);\r
4576 do {\r
4577 r = optimize_node_left(NCAR(nd), &nopt, &nenv);\r
4578 if (r == 0) {\r
4579 add_mml(&nenv.mmd, &nopt.len);\r
4580 concat_left_node_opt_info(env->enc, opt, &nopt);\r
4581 }\r
4582 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));\r
4583 }\r
4584 break;\r
4585\r
4586 case NT_ALT:\r
4587 {\r
4588 NodeOptInfo nopt;\r
4589 Node* nd = node;\r
4590\r
4591 do {\r
4592 r = optimize_node_left(NCAR(nd), &nopt, env);\r
4593 if (r == 0) {\r
4594 if (nd == node) copy_node_opt_info(opt, &nopt);\r
4595 else alt_merge_node_opt_info(opt, &nopt, env);\r
4596 }\r
4597 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));\r
4598 }\r
4599 break;\r
4600\r
4601 case NT_STR:\r
4602 {\r
4603 StrNode* sn = NSTR(node);\r
4604 int slen = (int)(sn->end - sn->s);\r
4605 int is_raw = NSTRING_IS_RAW(node);\r
4606\r
4607 if (! NSTRING_IS_AMBIG(node)) {\r
4608 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,\r
4609 NSTRING_IS_RAW(node), env->enc);\r
4610 if (slen > 0) {\r
4611 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);\r
4612 }\r
4613 set_mml(&opt->len, slen, slen);\r
4614 }\r
4615 else {\r
4616 int max;\r
4617\r
4618 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {\r
4619 int n = onigenc_strlen(env->enc, sn->s, sn->end);\r
4620 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;\r
4621 }\r
4622 else {\r
4623 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,\r
4624 is_raw, env->enc);\r
4625 opt->exb.ignore_case = 1;\r
4626\r
4627 if (slen > 0) {\r
4628 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,\r
4629 env->enc, env->case_fold_flag);\r
4630 if (r != 0) break;\r
4631 }\r
4632\r
4633 max = slen;\r
4634 }\r
4635\r
4636 set_mml(&opt->len, slen, max);\r
4637 }\r
4638\r
4639 if (opt->exb.len == slen)\r
4640 opt->exb.reach_end = 1;\r
4641 }\r
4642 break;\r
4643\r
4644 case NT_CCLASS:\r
4645 {\r
4646 int i, z;\r
4647 CClassNode* cc = NCCLASS(node);\r
4648\r
4649 /* no need to check ignore case. (setted in setup_tree()) */\r
4650\r
4651 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {\r
4652 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);\r
4653 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
4654\r
4655 set_mml(&opt->len, min, max);\r
4656 }\r
4657 else {\r
4658 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
4659 z = BITSET_AT(cc->bs, i);\r
4660 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {\r
4661 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);\r
4662 }\r
4663 }\r
4664 set_mml(&opt->len, 1, 1);\r
4665 }\r
4666 }\r
4667 break;\r
4668\r
4669 case NT_CTYPE:\r
4670 {\r
4671 int i, min, max;\r
4672\r
4673 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
4674\r
4675 if (max == 1) {\r
4676 min = 1;\r
4677\r
4678 switch (NCTYPE(node)->ctype) {\r
4679 case ONIGENC_CTYPE_WORD:\r
4680 if (NCTYPE(node)->not != 0) {\r
4681 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
4682 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {\r
4683 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);\r
4684 }\r
4685 }\r
4686 }\r
4687 else {\r
4688 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
4689 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {\r
4690 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);\r
4691 }\r
4692 }\r
4693 }\r
4694 break;\r
4695 }\r
4696 }\r
4697 else {\r
4698 min = ONIGENC_MBC_MINLEN(env->enc);\r
4699 }\r
4700 set_mml(&opt->len, min, max);\r
4701 }\r
4702 break;\r
4703\r
4704 case NT_CANY:\r
4705 {\r
4706 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);\r
4707 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
4708 set_mml(&opt->len, min, max);\r
4709 }\r
4710 break;\r
4711\r
4712 case NT_ANCHOR:\r
4713 switch (NANCHOR(node)->type) {\r
4714 case ANCHOR_BEGIN_BUF:\r
4715 case ANCHOR_BEGIN_POSITION:\r
4716 case ANCHOR_BEGIN_LINE:\r
4717 case ANCHOR_END_BUF:\r
4718 case ANCHOR_SEMI_END_BUF:\r
4719 case ANCHOR_END_LINE:\r
4720 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);\r
4721 break;\r
4722\r
4723 case ANCHOR_PREC_READ:\r
4724 {\r
4725 NodeOptInfo nopt;\r
4726\r
4727 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);\r
4728 if (r == 0) {\r
4729 if (nopt.exb.len > 0)\r
4730 copy_opt_exact_info(&opt->expr, &nopt.exb);\r
4731 else if (nopt.exm.len > 0)\r
4732 copy_opt_exact_info(&opt->expr, &nopt.exm);\r
4733\r
4734 opt->expr.reach_end = 0;\r
4735\r
4736 if (nopt.map.value > 0)\r
4737 copy_opt_map_info(&opt->map, &nopt.map);\r
4738 }\r
4739 }\r
4740 break;\r
4741\r
4742 case ANCHOR_PREC_READ_NOT:\r
4743 case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */\r
4744 case ANCHOR_LOOK_BEHIND_NOT:\r
4745 break;\r
4746 }\r
4747 break;\r
4748\r
4749 case NT_BREF:\r
4750 {\r
4751 int i;\r
4752 int* backs;\r
4753 OnigDistance min, max, tmin, tmax;\r
4754 Node** nodes = SCANENV_MEM_NODES(env->scan_env);\r
4755 BRefNode* br = NBREF(node);\r
4756\r
4757 if (br->state & NST_RECURSION) {\r
4758 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);\r
4759 break;\r
4760 }\r
4761 backs = BACKREFS_P(br);\r
4762 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);\r
4763 if (r != 0) break;\r
4764 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);\r
4765 if (r != 0) break;\r
4766 for (i = 1; i < br->back_num; i++) {\r
4767 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);\r
4768 if (r != 0) break;\r
4769 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);\r
4770 if (r != 0) break;\r
4771 if (min > tmin) min = tmin;\r
4772 if (max < tmax) max = tmax;\r
4773 }\r
4774 if (r == 0) set_mml(&opt->len, min, max);\r
4775 }\r
4776 break;\r
4777\r
4778#ifdef USE_SUBEXP_CALL\r
4779 case NT_CALL:\r
4780 if (IS_CALL_RECURSION(NCALL(node)))\r
4781 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);\r
4782 else {\r
4783 OnigOptionType save = env->options;\r
4784 env->options = NENCLOSE(NCALL(node)->target)->option;\r
4785 r = optimize_node_left(NCALL(node)->target, opt, env);\r
4786 env->options = save;\r
4787 }\r
4788 break;\r
4789#endif\r
4790\r
4791 case NT_QTFR:\r
4792 {\r
4793 int i;\r
4794 OnigDistance min, max;\r
4795 NodeOptInfo nopt;\r
4796 QtfrNode* qn = NQTFR(node);\r
4797\r
4798 r = optimize_node_left(qn->target, &nopt, env);\r
4799 if (r) break;\r
4800\r
4801 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {\r
4802 if (env->mmd.max == 0 &&\r
4803 NTYPE(qn->target) == NT_CANY && qn->greedy) {\r
4804 if (IS_MULTILINE(env->options))\r
4805 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);\r
4806 else\r
4807 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);\r
4808 }\r
4809 }\r
4810 else {\r
4811 if (qn->lower > 0) {\r
4812 copy_node_opt_info(opt, &nopt);\r
4813 if (nopt.exb.len > 0) {\r
4814 if (nopt.exb.reach_end) {\r
4815 for (i = 2; i <= qn->lower &&\r
4816 ! is_full_opt_exact_info(&opt->exb); i++) {\r
4817 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);\r
4818 }\r
4819 if (i < qn->lower) {\r
4820 opt->exb.reach_end = 0;\r
4821 }\r
4822 }\r
4823 }\r
4824\r
4825 if (qn->lower != qn->upper) {\r
4826 opt->exb.reach_end = 0;\r
4827 opt->exm.reach_end = 0;\r
4828 }\r
4829 if (qn->lower > 1)\r
4830 opt->exm.reach_end = 0;\r
4831 }\r
4832 }\r
4833\r
4834 min = distance_multiply(nopt.len.min, qn->lower);\r
4835 if (IS_REPEAT_INFINITE(qn->upper))\r
4836 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);\r
4837 else\r
4838 max = distance_multiply(nopt.len.max, qn->upper);\r
4839\r
4840 set_mml(&opt->len, min, max);\r
4841 }\r
4842 break;\r
4843\r
4844 case NT_ENCLOSE:\r
4845 {\r
4846 EncloseNode* en = NENCLOSE(node);\r
4847\r
4848 switch (en->type) {\r
4849 case ENCLOSE_OPTION:\r
4850 {\r
4851 OnigOptionType save = env->options;\r
4852\r
4853 env->options = en->option;\r
4854 r = optimize_node_left(en->target, opt, env);\r
4855 env->options = save;\r
4856 }\r
4857 break;\r
4858\r
4859 case ENCLOSE_MEMORY:\r
4860#ifdef USE_SUBEXP_CALL\r
4861 en->opt_count++;\r
4862 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {\r
4863 OnigDistance min, max;\r
4864\r
4865 min = 0;\r
4866 max = ONIG_INFINITE_DISTANCE;\r
4867 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;\r
4868 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;\r
4869 set_mml(&opt->len, min, max);\r
4870 }\r
4871 else\r
4872#endif\r
4873 {\r
4874 r = optimize_node_left(en->target, opt, env);\r
4875\r
4876 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {\r
4877 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))\r
4878 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);\r
4879 }\r
4880 }\r
4881 break;\r
4882\r
4883 case ENCLOSE_STOP_BACKTRACK:\r
4884 r = optimize_node_left(en->target, opt, env);\r
4885 break;\r
4886 }\r
4887 }\r
4888 break;\r
4889\r
4890 default:\r
4891#ifdef ONIG_DEBUG\r
4892 fprintf(stderr, "optimize_node_left: undefined node type %d\n",\r
4893 NTYPE(node));\r
4894#endif\r
4895 r = ONIGERR_TYPE_BUG;\r
4896 break;\r
4897 }\r
4898\r
4899 return r;\r
4900}\r
4901\r
4902static int\r
4903set_optimize_exact_info(regex_t* reg, OptExactInfo* e)\r
4904{\r
4905 int r;\r
4906\r
4907 if (e->len == 0) return 0;\r
4908\r
4909 if (e->ignore_case) {\r
4910 reg->exact = (UChar* )xmalloc(e->len);\r
4911 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
4912 xmemcpy(reg->exact, e->s, e->len);\r
4913 reg->exact_end = reg->exact + e->len;\r
4914 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;\r
4915 }\r
4916 else {\r
4917 int allow_reverse;\r
4918\r
4919 reg->exact = str_dup(e->s, e->s + e->len);\r
4920 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
4921 reg->exact_end = reg->exact + e->len;\r
4922 \r
4923 allow_reverse =\r
4924 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);\r
4925\r
4926 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {\r
4927 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,\r
4928 reg->map, &(reg->int_map));\r
4929 if (r) return r;\r
4930\r
4931 reg->optimize = (allow_reverse != 0\r
4932 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);\r
4933 }\r
4934 else {\r
4935 reg->optimize = ONIG_OPTIMIZE_EXACT;\r
4936 }\r
4937 }\r
4938\r
4939 reg->dmin = e->mmd.min;\r
4940 reg->dmax = e->mmd.max;\r
4941\r
4942 if (reg->dmin != ONIG_INFINITE_DISTANCE) {\r
4943 reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact);\r
4944 }\r
4945\r
4946 return 0;\r
4947}\r
4948\r
4949static void\r
4950set_optimize_map_info(regex_t* reg, OptMapInfo* m)\r
4951{\r
4952 int i;\r
4953\r
4954 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
4955 reg->map[i] = m->map[i];\r
4956\r
4957 reg->optimize = ONIG_OPTIMIZE_MAP;\r
4958 reg->dmin = m->mmd.min;\r
4959 reg->dmax = m->mmd.max;\r
4960\r
4961 if (reg->dmin != ONIG_INFINITE_DISTANCE) {\r
4962 reg->threshold_len = reg->dmin + 1;\r
4963 }\r
4964}\r
4965\r
4966static void\r
4967set_sub_anchor(regex_t* reg, OptAncInfo* anc)\r
4968{\r
4969 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;\r
4970 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;\r
4971}\r
4972\r
4973#ifdef ONIG_DEBUG\r
4974static void print_optimize_info(FILE* f, regex_t* reg);\r
4975#endif\r
4976\r
4977static int\r
4978set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)\r
4979{\r
4980\r
4981 int r;\r
4982 NodeOptInfo opt;\r
4983 OptEnv env;\r
4984\r
4985 env.enc = reg->enc;\r
4986 env.options = reg->options;\r
4987 env.case_fold_flag = reg->case_fold_flag;\r
4988 env.scan_env = scan_env;\r
4989 clear_mml(&env.mmd);\r
4990\r
4991 r = optimize_node_left(node, &opt, &env);\r
4992 if (r) return r;\r
4993\r
4994 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |\r
4995 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);\r
4996\r
4997 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);\r
4998\r
4999 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {\r
5000 reg->anchor_dmin = opt.len.min;\r
5001 reg->anchor_dmax = opt.len.max;\r
5002 }\r
5003\r
5004 if (opt.exb.len > 0 || opt.exm.len > 0) {\r
5005 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);\r
5006 if (opt.map.value > 0 &&\r
5007 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {\r
5008 goto set_map;\r
5009 }\r
5010 else {\r
5011 r = set_optimize_exact_info(reg, &opt.exb);\r
5012 set_sub_anchor(reg, &opt.exb.anc);\r
5013 }\r
5014 }\r
5015 else if (opt.map.value > 0) {\r
5016 set_map:\r
5017 set_optimize_map_info(reg, &opt.map);\r
5018 set_sub_anchor(reg, &opt.map.anc);\r
5019 }\r
5020 else {\r
5021 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;\r
5022 if (opt.len.max == 0)\r
5023 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;\r
5024 }\r
5025\r
5026#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
5027 print_optimize_info(stderr, reg);\r
5028#endif\r
5029 return r;\r
5030}\r
5031\r
5032static void\r
5033clear_optimize_info(regex_t* reg)\r
5034{\r
5035 reg->optimize = ONIG_OPTIMIZE_NONE;\r
5036 reg->anchor = 0;\r
5037 reg->anchor_dmin = 0;\r
5038 reg->anchor_dmax = 0;\r
5039 reg->sub_anchor = 0;\r
5040 reg->exact_end = (UChar* )NULL;\r
5041 reg->threshold_len = 0;\r
5042 if (IS_NOT_NULL(reg->exact)) {\r
5043 xfree(reg->exact);\r
5044 reg->exact = (UChar* )NULL;\r
5045 }\r
5046}\r
5047\r
5048#ifdef ONIG_DEBUG\r
5049\r
5050static void print_enc_string(FILE* fp, OnigEncoding enc,\r
5051 const UChar *s, const UChar *end)\r
5052{\r
5053 fprintf(fp, "\nPATTERN: /");\r
5054\r
5055 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
5056 const UChar *p;\r
5057 OnigCodePoint code;\r
5058\r
5059 p = s;\r
5060 while (p < end) {\r
5061 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
5062 if (code >= 0x80) {\r
5063 fprintf(fp, " 0x%04x ", (int )code);\r
5064 }\r
5065 else {\r
5066 fputc((int )code, fp);\r
5067 }\r
5068\r
5069 p += enclen(enc, p);\r
5070 }\r
5071 }\r
5072 else {\r
5073 while (s < end) {\r
5074 fputc((int )*s, fp);\r
5075 s++;\r
5076 }\r
5077 }\r
5078\r
5079 fprintf(fp, "/\n");\r
5080}\r
5081\r
5082static void\r
5083print_distance_range(FILE* f, OnigDistance a, OnigDistance b)\r
5084{\r
5085 if (a == ONIG_INFINITE_DISTANCE)\r
5086 fputs("inf", f);\r
5087 else\r
5088 fprintf(f, "(%u)", a);\r
5089\r
5090 fputs("-", f);\r
5091\r
5092 if (b == ONIG_INFINITE_DISTANCE)\r
5093 fputs("inf", f);\r
5094 else\r
5095 fprintf(f, "(%u)", b);\r
5096}\r
5097\r
5098static void\r
5099print_anchor(FILE* f, int anchor)\r
5100{\r
5101 int q = 0;\r
5102\r
5103 fprintf(f, "[");\r
5104\r
5105 if (anchor & ANCHOR_BEGIN_BUF) {\r
5106 fprintf(f, "begin-buf");\r
5107 q = 1;\r
5108 }\r
5109 if (anchor & ANCHOR_BEGIN_LINE) {\r
5110 if (q) fprintf(f, ", ");\r
5111 q = 1;\r
5112 fprintf(f, "begin-line");\r
5113 }\r
5114 if (anchor & ANCHOR_BEGIN_POSITION) {\r
5115 if (q) fprintf(f, ", ");\r
5116 q = 1;\r
5117 fprintf(f, "begin-pos");\r
5118 }\r
5119 if (anchor & ANCHOR_END_BUF) {\r
5120 if (q) fprintf(f, ", ");\r
5121 q = 1;\r
5122 fprintf(f, "end-buf");\r
5123 }\r
5124 if (anchor & ANCHOR_SEMI_END_BUF) {\r
5125 if (q) fprintf(f, ", ");\r
5126 q = 1;\r
5127 fprintf(f, "semi-end-buf");\r
5128 }\r
5129 if (anchor & ANCHOR_END_LINE) {\r
5130 if (q) fprintf(f, ", ");\r
5131 q = 1;\r
5132 fprintf(f, "end-line");\r
5133 }\r
5134 if (anchor & ANCHOR_ANYCHAR_STAR) {\r
5135 if (q) fprintf(f, ", ");\r
5136 q = 1;\r
5137 fprintf(f, "anychar-star");\r
5138 }\r
5139 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {\r
5140 if (q) fprintf(f, ", ");\r
5141 fprintf(f, "anychar-star-pl");\r
5142 }\r
5143\r
5144 fprintf(f, "]");\r
5145}\r
5146\r
5147static void\r
5148print_optimize_info(FILE* f, regex_t* reg)\r
5149{\r
5150 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",\r
5151 "EXACT_IC", "MAP" };\r
5152\r
5153 fprintf(f, "optimize: %s\n", on[reg->optimize]);\r
5154 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);\r
5155 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)\r
5156 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);\r
5157 fprintf(f, "\n");\r
5158\r
5159 if (reg->optimize) {\r
5160 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);\r
5161 fprintf(f, "\n");\r
5162 }\r
5163 fprintf(f, "\n");\r
5164\r
5165 if (reg->exact) {\r
5166 UChar *p;\r
5167 fprintf(f, "exact: [");\r
5168 for (p = reg->exact; p < reg->exact_end; p++) {\r
5169 fputc(*p, f);\r
5170 }\r
5171 fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));\r
5172 }\r
5173 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {\r
5174 int c, i, n = 0;\r
5175\r
5176 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
5177 if (reg->map[i]) n++;\r
5178\r
5179 fprintf(f, "map: n=%d\n", n);\r
5180 if (n > 0) {\r
5181 c = 0;\r
5182 fputc('[', f);\r
5183 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
5184 if (reg->map[i] != 0) {\r
5185 if (c > 0) fputs(", ", f);\r
5186 c++;\r
5187 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&\r
5188 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))\r
5189 fputc(i, f);\r
5190 else\r
5191 fprintf(f, "%d", i);\r
5192 }\r
5193 }\r
5194 fprintf(f, "]\n");\r
5195 }\r
5196 }\r
5197}\r
5198#endif /* ONIG_DEBUG */\r
5199\r
5200\r
5201extern void\r
5202onig_free_body(regex_t* reg)\r
5203{\r
5204 if (IS_NOT_NULL(reg)) {\r
5205 if (IS_NOT_NULL(reg->p)) xfree(reg->p);\r
5206 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);\r
5207 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);\r
5208 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);\r
5209 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);\r
5210 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);\r
5211\r
5212#ifdef USE_NAMED_GROUP\r
5213 onig_names_free(reg);\r
5214#endif\r
5215 }\r
5216}\r
5217\r
5218extern void\r
5219onig_free(regex_t* reg)\r
5220{\r
5221 if (IS_NOT_NULL(reg)) {\r
5222 onig_free_body(reg);\r
5223 xfree(reg);\r
5224 }\r
5225}\r
5226\r
5227#define REGEX_TRANSFER(to,from) do {\\r
5228 (to)->state = ONIG_STATE_MODIFY;\\r
5229 onig_free_body(to);\\r
5230 xmemcpy(to, from, sizeof(regex_t));\\r
5231 xfree(from);\\r
5232} while (0)\r
5233\r
5234extern void\r
5235onig_transfer(regex_t* to, regex_t* from)\r
5236{\r
5237 THREAD_ATOMIC_START;\r
5238 REGEX_TRANSFER(to, from);\r
5239 THREAD_ATOMIC_END;\r
5240}\r
5241\r
5242#define REGEX_CHAIN_HEAD(reg) do {\\r
5243 while (IS_NOT_NULL((reg)->chain)) {\\r
5244 (reg) = (reg)->chain;\\r
5245 }\\r
5246} while (0)\r
5247\r
5248extern void\r
5249onig_chain_link_add(regex_t* to, regex_t* add)\r
5250{\r
5251 THREAD_ATOMIC_START;\r
5252 REGEX_CHAIN_HEAD(to);\r
5253 to->chain = add;\r
5254 THREAD_ATOMIC_END;\r
5255}\r
5256\r
5257extern void\r
5258onig_chain_reduce(regex_t* reg)\r
5259{\r
5260 regex_t *head, *prev;\r
5261\r
5262 prev = reg;\r
5263 head = prev->chain;\r
5264 if (IS_NOT_NULL(head)) {\r
5265 reg->state = ONIG_STATE_MODIFY;\r
5266 while (IS_NOT_NULL(head->chain)) {\r
5267 prev = head;\r
5268 head = head->chain;\r
5269 }\r
5270 prev->chain = (regex_t* )NULL;\r
5271 REGEX_TRANSFER(reg, head);\r
5272 }\r
5273}\r
5274\r
5275#ifdef ONIG_DEBUG\r
5276static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));\r
5277#endif\r
5278#ifdef ONIG_DEBUG_PARSE_TREE\r
5279static void print_tree P_((FILE* f, Node* node));\r
5280#endif\r
5281\r
5282extern int\r
5283onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
5284 OnigErrorInfo* einfo)\r
5285{\r
5286#define COMPILE_INIT_SIZE 20\r
5287\r
5288 int r, init_size;\r
5289 Node* root;\r
5290 ScanEnv scan_env;\r
5291#ifdef USE_SUBEXP_CALL\r
5292 UnsetAddrList uslist;\r
5293#endif\r
5294\r
5295 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;\r
5296\r
5297 reg->state = ONIG_STATE_COMPILING;\r
5298\r
5299#ifdef ONIG_DEBUG\r
5300 print_enc_string(stderr, reg->enc, pattern, pattern_end);\r
5301#endif\r
5302\r
5303 if (reg->alloc == 0) {\r
5304 init_size = ((int)(pattern_end - pattern)) * 2;\r
5305 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;\r
5306 r = BBUF_INIT(reg, init_size);\r
5307 if (r != 0) goto end;\r
5308 }\r
5309 else\r
5310 reg->used = 0;\r
5311\r
5312 reg->num_mem = 0;\r
5313 reg->num_repeat = 0;\r
5314 reg->num_null_check = 0;\r
5315 reg->repeat_range_alloc = 0;\r
5316 reg->repeat_range = (OnigRepeatRange* )NULL;\r
5317#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
5318 reg->num_comb_exp_check = 0;\r
5319#endif\r
5320\r
5321 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);\r
b0c2b797 5322 if (r != 0 || root == NULL) goto err;\r
14b0e578
CS
5323\r
5324#ifdef USE_NAMED_GROUP\r
5325 /* mixed use named group and no-named group */\r
5326 if (scan_env.num_named > 0 &&\r
5327 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
5328 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
5329 if (scan_env.num_named != scan_env.num_mem)\r
5330 r = disable_noname_group_capture(&root, reg, &scan_env);\r
5331 else\r
5332 r = numbered_ref_check(root);\r
5333\r
5334 if (r != 0) goto err;\r
5335 }\r
5336#endif\r
5337\r
5338#ifdef USE_SUBEXP_CALL\r
5339 if (scan_env.num_call > 0) {\r
5340 r = unset_addr_list_init(&uslist, scan_env.num_call);\r
5341 if (r != 0) goto err;\r
5342 scan_env.unset_addr_list = &uslist;\r
5343 r = setup_subexp_call(root, &scan_env);\r
5344 if (r != 0) goto err_unset;\r
5345 r = subexp_recursive_check_trav(root, &scan_env);\r
5346 if (r < 0) goto err_unset;\r
5347 r = subexp_inf_recursive_check_trav(root, &scan_env);\r
5348 if (r != 0) goto err_unset;\r
5349\r
5350 reg->num_call = scan_env.num_call;\r
5351 }\r
5352 else\r
5353 reg->num_call = 0;\r
5354#endif\r
5355\r
5356 r = setup_tree(root, reg, 0, &scan_env);\r
5357 if (r != 0) goto err_unset;\r
5358\r
5359#ifdef ONIG_DEBUG_PARSE_TREE\r
5360 print_tree(stderr, root);\r
5361#endif\r
5362\r
5363 reg->capture_history = scan_env.capture_history;\r
5364 reg->bt_mem_start = scan_env.bt_mem_start;\r
5365 reg->bt_mem_start |= reg->capture_history;\r
5366 if (IS_FIND_CONDITION(reg->options))\r
5367 BIT_STATUS_ON_ALL(reg->bt_mem_end);\r
5368 else {\r
5369 reg->bt_mem_end = scan_env.bt_mem_end;\r
5370 reg->bt_mem_end |= reg->capture_history;\r
5371 }\r
5372\r
5373#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
5374 if (scan_env.backrefed_mem == 0\r
5375#ifdef USE_SUBEXP_CALL\r
5376 || scan_env.num_call == 0\r
5377#endif\r
5378 ) {\r
5379 setup_comb_exp_check(root, 0, &scan_env);\r
5380#ifdef USE_SUBEXP_CALL\r
5381 if (scan_env.has_recursion != 0) {\r
5382 scan_env.num_comb_exp_check = 0;\r
5383 }\r
5384 else\r
5385#endif\r
5386 if (scan_env.comb_exp_max_regnum > 0) {\r
5387 int i;\r
5388 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {\r
5389 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {\r
5390 scan_env.num_comb_exp_check = 0;\r
5391 break;\r
5392 }\r
5393 }\r
5394 }\r
5395 }\r
5396\r
5397 reg->num_comb_exp_check = scan_env.num_comb_exp_check;\r
5398#endif\r
5399\r
5400 clear_optimize_info(reg);\r
5401#ifndef ONIG_DONT_OPTIMIZE\r
5402 r = set_optimize_info_from_tree(root, reg, &scan_env);\r
5403 if (r != 0) goto err_unset;\r
5404#endif\r
5405\r
5406 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {\r
5407 xfree(scan_env.mem_nodes_dynamic);\r
5408 scan_env.mem_nodes_dynamic = (Node** )NULL;\r
5409 }\r
5410\r
5411 r = compile_tree(root, reg);\r
5412 if (r == 0) {\r
5413 r = add_opcode(reg, OP_END);\r
5414#ifdef USE_SUBEXP_CALL\r
5415 if (scan_env.num_call > 0) {\r
5416 r = unset_addr_list_fix(&uslist, reg);\r
5417 unset_addr_list_end(&uslist);\r
5418 if (r) goto err;\r
5419 }\r
5420#endif\r
5421\r
5422 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))\r
5423 reg->stack_pop_level = STACK_POP_LEVEL_ALL;\r
5424 else {\r
5425 if (reg->bt_mem_start != 0)\r
5426 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;\r
5427 else\r
5428 reg->stack_pop_level = STACK_POP_LEVEL_FREE;\r
5429 }\r
5430 }\r
5431#ifdef USE_SUBEXP_CALL\r
5432 else if (scan_env.num_call > 0) {\r
5433 unset_addr_list_end(&uslist);\r
5434 }\r
5435#endif\r
5436 onig_node_free(root);\r
5437\r
5438#ifdef ONIG_DEBUG_COMPILE\r
5439#ifdef USE_NAMED_GROUP\r
5440 onig_print_names(stderr, reg);\r
5441#endif\r
5442 print_compiled_byte_code_list(stderr, reg);\r
5443#endif\r
5444\r
5445 end:\r
5446 reg->state = ONIG_STATE_NORMAL;\r
5447 return r;\r
5448\r
5449 err_unset:\r
5450#ifdef USE_SUBEXP_CALL\r
5451 if (scan_env.num_call > 0) {\r
5452 unset_addr_list_end(&uslist);\r
5453 }\r
5454#endif\r
5455 err:\r
5456 if (IS_NOT_NULL(scan_env.error)) {\r
5457 if (IS_NOT_NULL(einfo)) {\r
5458 einfo->enc = scan_env.enc;\r
5459 einfo->par = scan_env.error;\r
5460 einfo->par_end = scan_env.error_end;\r
5461 }\r
5462 }\r
5463\r
5464 onig_node_free(root);\r
5465 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))\r
5466 xfree(scan_env.mem_nodes_dynamic);\r
5467 return r;\r
5468}\r
5469\r
5470#ifdef USE_RECOMPILE_API\r
5471extern int\r
5472onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
5473 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
5474 OnigErrorInfo* einfo)\r
5475{\r
5476 int r;\r
5477 regex_t *new_reg;\r
5478\r
5479 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);\r
5480 if (r) return r;\r
5481 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {\r
5482 onig_transfer(reg, new_reg);\r
5483 }\r
5484 else {\r
5485 onig_chain_link_add(reg, new_reg);\r
5486 }\r
5487 return 0;\r
5488}\r
5489#endif\r
5490\r
5491static int onig_inited = 0;\r
5492\r
5493extern int\r
5494onig_reg_init(regex_t* reg, OnigOptionType option,\r
5495 OnigCaseFoldType case_fold_flag,\r
5496 OnigEncoding enc, OnigSyntaxType* syntax)\r
5497{\r
5498 if (! onig_inited)\r
5499 onig_init();\r
5500\r
5501 if (IS_NULL(reg))\r
5502 return ONIGERR_INVALID_ARGUMENT;\r
5503\r
5504 if (ONIGENC_IS_UNDEF(enc))\r
5505 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;\r
5506\r
5507 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))\r
5508 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {\r
5509 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;\r
5510 }\r
5511\r
5512 (reg)->state = ONIG_STATE_MODIFY;\r
5513\r
5514 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {\r
5515 option |= syntax->options;\r
5516 option &= ~ONIG_OPTION_SINGLELINE;\r
5517 }\r
5518 else\r
5519 option |= syntax->options;\r
5520\r
5521 (reg)->enc = enc;\r
5522 (reg)->options = option;\r
5523 (reg)->syntax = syntax;\r
5524 (reg)->optimize = 0;\r
5525 (reg)->exact = (UChar* )NULL;\r
5526 (reg)->int_map = (int* )NULL;\r
5527 (reg)->int_map_backward = (int* )NULL;\r
5528 (reg)->chain = (regex_t* )NULL;\r
5529\r
5530 (reg)->p = (UChar* )NULL;\r
5531 (reg)->alloc = 0;\r
5532 (reg)->used = 0;\r
5533 (reg)->name_table = (void* )NULL;\r
5534\r
5535 (reg)->case_fold_flag = case_fold_flag;\r
5536 return 0;\r
5537}\r
5538\r
5539extern int\r
5540onig_new_without_alloc(regex_t* reg, const UChar* pattern,\r
5541 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,\r
5542 OnigSyntaxType* syntax, OnigErrorInfo* einfo)\r
5543{\r
5544 int r;\r
5545\r
5546 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
5547 if (r) return r;\r
5548\r
5549 r = onig_compile(reg, pattern, pattern_end, einfo);\r
5550 return r;\r
5551}\r
5552\r
5553extern int\r
5554onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,\r
5555 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
5556 OnigErrorInfo* einfo)\r
5557{\r
5558 int r;\r
5559\r
5560 *reg = (regex_t* )xmalloc(sizeof(regex_t));\r
5561 if (IS_NULL(*reg)) return ONIGERR_MEMORY;\r
5562\r
5563 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
5564 if (r) goto err;\r
5565\r
5566 r = onig_compile(*reg, pattern, pattern_end, einfo);\r
5567 if (r) {\r
5568 err:\r
5569 onig_free(*reg);\r
5570 *reg = NULL;\r
5571 }\r
5572 return r;\r
5573}\r
5574\r
5575\r
5576extern int\r
5577onig_init(void)\r
5578{\r
5579 if (onig_inited != 0)\r
5580 return 0;\r
5581\r
5582 THREAD_SYSTEM_INIT;\r
5583 THREAD_ATOMIC_START;\r
5584\r
5585 onig_inited = 1;\r
5586\r
5587 onigenc_init();\r
5588 /* onigenc_set_default_caseconv_table((UChar* )0); */\r
5589\r
5590#ifdef ONIG_DEBUG_STATISTICS\r
5591 onig_statistics_init();\r
5592#endif\r
5593\r
5594 THREAD_ATOMIC_END;\r
5595 return 0;\r
5596}\r
5597\r
5598\r
5599static OnigEndCallListItemType* EndCallTop;\r
5600\r
5601extern void onig_add_end_call(void (*func)(void))\r
5602{\r
5603 OnigEndCallListItemType* item;\r
5604\r
5605 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));\r
5606 if (item == 0) return ;\r
5607\r
5608 item->next = EndCallTop;\r
5609 item->func = func;\r
5610\r
5611 EndCallTop = item;\r
5612}\r
5613\r
5614static void\r
5615exec_end_call_list(void)\r
5616{\r
5617 OnigEndCallListItemType* prev;\r
5618 void (*func)(void);\r
5619\r
5620 while (EndCallTop != 0) {\r
5621 func = EndCallTop->func;\r
5622 (*func)();\r
5623\r
5624 prev = EndCallTop;\r
5625 EndCallTop = EndCallTop->next;\r
5626 xfree(prev);\r
5627 }\r
5628}\r
5629\r
5630extern int\r
5631onig_end(void)\r
5632{\r
5633 THREAD_ATOMIC_START;\r
5634\r
5635 exec_end_call_list();\r
5636\r
5637#ifdef ONIG_DEBUG_STATISTICS\r
5638 onig_print_statistics(stderr);\r
5639#endif\r
5640\r
5641#ifdef USE_SHARED_CCLASS_TABLE\r
5642 onig_free_shared_cclass_table();\r
5643#endif\r
5644\r
5645#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
5646 onig_free_node_list();\r
5647#endif\r
5648\r
5649 onig_inited = 0;\r
5650\r
5651 THREAD_ATOMIC_END;\r
5652 THREAD_SYSTEM_END;\r
5653 return 0;\r
5654}\r
5655\r
5656extern int\r
5657onig_is_in_code_range(const UChar* p, OnigCodePoint code)\r
5658{\r
5659 OnigCodePoint n, *data;\r
5660 OnigCodePoint low, high, x;\r
5661\r
5662 GET_CODE_POINT(n, p);\r
5663 data = (OnigCodePoint* )p;\r
5664 data++;\r
5665\r
5666 for (low = 0, high = n; low < high; ) {\r
5667 x = (low + high) >> 1;\r
5668 if (code > data[x * 2 + 1])\r
5669 low = x + 1;\r
5670 else\r
5671 high = x;\r
5672 }\r
5673\r
5674 return ((low < n && code >= data[low * 2]) ? 1 : 0);\r
5675}\r
5676\r
5677extern int\r
5678onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)\r
5679{\r
5680 int found;\r
5681\r
5682 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {\r
5683 if (IS_NULL(cc->mbuf)) {\r
5684 found = 0;\r
5685 }\r
5686 else {\r
5687 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);\r
5688 }\r
5689 }\r
5690 else {\r
5691 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);\r
5692 }\r
5693\r
5694 if (IS_NCCLASS_NOT(cc))\r
5695 return !found;\r
5696 else\r
5697 return found;\r
5698}\r
5699\r
5700extern int\r
5701onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)\r
5702{\r
5703 int len;\r
5704\r
5705 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
5706 len = 2;\r
5707 }\r
5708 else {\r
5709 len = ONIGENC_CODE_TO_MBCLEN(enc, code);\r
5710 }\r
5711 return onig_is_code_in_cc_len(len, code, cc);\r
5712}\r
5713\r
5714\r
5715#ifdef ONIG_DEBUG\r
5716\r
5717/* arguments type */\r
5718#define ARG_SPECIAL -1\r
5719#define ARG_NON 0\r
5720#define ARG_RELADDR 1\r
5721#define ARG_ABSADDR 2\r
5722#define ARG_LENGTH 3\r
5723#define ARG_MEMNUM 4\r
5724#define ARG_OPTION 5\r
5725#define ARG_STATE_CHECK 6\r
5726\r
5727OnigOpInfoType OnigOpInfo[] = {\r
5728 { OP_FINISH, "finish", ARG_NON },\r
5729 { OP_END, "end", ARG_NON },\r
5730 { OP_EXACT1, "exact1", ARG_SPECIAL },\r
5731 { OP_EXACT2, "exact2", ARG_SPECIAL },\r
5732 { OP_EXACT3, "exact3", ARG_SPECIAL },\r
5733 { OP_EXACT4, "exact4", ARG_SPECIAL },\r
5734 { OP_EXACT5, "exact5", ARG_SPECIAL },\r
5735 { OP_EXACTN, "exactn", ARG_SPECIAL },\r
5736 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },\r
5737 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },\r
5738 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },\r
5739 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },\r
5740 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },\r
5741 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },\r
5742 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },\r
5743 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },\r
5744 { OP_CCLASS, "cclass", ARG_SPECIAL },\r
5745 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },\r
5746 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },\r
5747 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },\r
5748 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },\r
5749 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },\r
5750 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },\r
5751 { OP_ANYCHAR, "anychar", ARG_NON },\r
5752 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },\r
5753 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },\r
5754 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },\r
5755 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },\r
5756 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },\r
5757 { OP_WORD, "word", ARG_NON },\r
5758 { OP_NOT_WORD, "not-word", ARG_NON },\r
5759 { OP_WORD_BOUND, "word-bound", ARG_NON },\r
5760 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },\r
5761 { OP_WORD_BEGIN, "word-begin", ARG_NON },\r
5762 { OP_WORD_END, "word-end", ARG_NON },\r
5763 { OP_BEGIN_BUF, "begin-buf", ARG_NON },\r
5764 { OP_END_BUF, "end-buf", ARG_NON },\r
5765 { OP_BEGIN_LINE, "begin-line", ARG_NON },\r
5766 { OP_END_LINE, "end-line", ARG_NON },\r
5767 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },\r
5768 { OP_BEGIN_POSITION, "begin-position", ARG_NON },\r
5769 { OP_BACKREF1, "backref1", ARG_NON },\r
5770 { OP_BACKREF2, "backref2", ARG_NON },\r
5771 { OP_BACKREFN, "backrefn", ARG_MEMNUM },\r
5772 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },\r
5773 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },\r
5774 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },\r
5775 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },\r
5776 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },\r
5777 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },\r
5778 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },\r
5779 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },\r
5780 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },\r
5781 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },\r
5782 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },\r
5783 { OP_SET_OPTION, "set-option", ARG_OPTION },\r
5784 { OP_FAIL, "fail", ARG_NON },\r
5785 { OP_JUMP, "jump", ARG_RELADDR },\r
5786 { OP_PUSH, "push", ARG_RELADDR },\r
5787 { OP_POP, "pop", ARG_NON },\r
5788 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },\r
5789 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },\r
5790 { OP_REPEAT, "repeat", ARG_SPECIAL },\r
5791 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },\r
5792 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },\r
5793 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },\r
5794 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },\r
5795 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },\r
5796 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },\r
5797 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },\r
5798 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },\r
5799 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },\r
5800 { OP_PUSH_POS, "push-pos", ARG_NON },\r
5801 { OP_POP_POS, "pop-pos", ARG_NON },\r
5802 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },\r
5803 { OP_FAIL_POS, "fail-pos", ARG_NON },\r
5804 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },\r
5805 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },\r
5806 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },\r
5807 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },\r
5808 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },\r
5809 { OP_CALL, "call", ARG_ABSADDR },\r
5810 { OP_RETURN, "return", ARG_NON },\r
5811 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },\r
5812 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },\r
5813 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },\r
5814 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },\r
5815 { OP_STATE_CHECK_ANYCHAR_ML_STAR,\r
5816 "state-check-anychar-ml*", ARG_STATE_CHECK },\r
5817 { -1, "", ARG_NON }\r
5818};\r
5819\r
5820static char*\r
5821op2name(int opcode)\r
5822{\r
5823 int i;\r
5824\r
5825 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {\r
5826 if (opcode == OnigOpInfo[i].opcode)\r
5827 return OnigOpInfo[i].name;\r
5828 }\r
5829 return "";\r
5830}\r
5831\r
5832static int\r
5833op2arg_type(int opcode)\r
5834{\r
5835 int i;\r
5836\r
5837 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {\r
5838 if (opcode == OnigOpInfo[i].opcode)\r
5839 return OnigOpInfo[i].arg_type;\r
5840 }\r
5841 return ARG_SPECIAL;\r
5842}\r
5843\r
5844static void\r
5845Indent(FILE* f, int indent)\r
5846{\r
5847 int i;\r
5848 for (i = 0; i < indent; i++) putc(' ', f);\r
5849}\r
5850\r
5851static void\r
5852p_string(FILE* f, int len, UChar* s)\r
5853{\r
5854 fputs(":", f);\r
5855 while (len-- > 0) { fputc(*s++, f); }\r
5856}\r
5857\r
5858static void\r
5859p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)\r
5860{\r
5861 int x = len * mb_len;\r
5862\r
5863 fprintf(f, ":%d:", len);\r
5864 while (x-- > 0) { fputc(*s++, f); }\r
5865}\r
5866\r
5867extern void\r
5868onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,\r
5869 OnigEncoding enc)\r
5870{\r
5871 int i, n, arg_type;\r
5872 RelAddrType addr;\r
5873 LengthType len;\r
5874 MemNumType mem;\r
5875 StateCheckNumType scn;\r
5876 OnigCodePoint code;\r
5877 UChar *q;\r
5878\r
5879 fprintf(f, "[%s", op2name(*bp));\r
5880 arg_type = op2arg_type(*bp);\r
5881 if (arg_type != ARG_SPECIAL) {\r
5882 bp++;\r
5883 switch (arg_type) {\r
5884 case ARG_NON:\r
5885 break;\r
5886 case ARG_RELADDR:\r
5887 GET_RELADDR_INC(addr, bp);\r
5888 fprintf(f, ":(%d)", addr);\r
5889 break;\r
5890 case ARG_ABSADDR:\r
5891 GET_ABSADDR_INC(addr, bp);\r
5892 fprintf(f, ":(%d)", addr);\r
5893 break;\r
5894 case ARG_LENGTH:\r
5895 GET_LENGTH_INC(len, bp);\r
5896 fprintf(f, ":%d", len);\r
5897 break;\r
5898 case ARG_MEMNUM:\r
5899 mem = *((MemNumType* )bp);\r
5900 bp += SIZE_MEMNUM;\r
5901 fprintf(f, ":%d", mem);\r
5902 break;\r
5903 case ARG_OPTION:\r
5904 {\r
5905 OnigOptionType option = *((OnigOptionType* )bp);\r
5906 bp += SIZE_OPTION;\r
5907 fprintf(f, ":%d", option);\r
5908 }\r
5909 break;\r
5910\r
5911 case ARG_STATE_CHECK:\r
5912 scn = *((StateCheckNumType* )bp);\r
5913 bp += SIZE_STATE_CHECK_NUM;\r
5914 fprintf(f, ":%d", scn);\r
5915 break;\r
5916 }\r
5917 }\r
5918 else {\r
5919 switch (*bp++) {\r
5920 case OP_EXACT1:\r
5921 case OP_ANYCHAR_STAR_PEEK_NEXT:\r
5922 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:\r
5923 p_string(f, 1, bp++); break;\r
5924 case OP_EXACT2:\r
5925 p_string(f, 2, bp); bp += 2; break;\r
5926 case OP_EXACT3:\r
5927 p_string(f, 3, bp); bp += 3; break;\r
5928 case OP_EXACT4:\r
5929 p_string(f, 4, bp); bp += 4; break;\r
5930 case OP_EXACT5:\r
5931 p_string(f, 5, bp); bp += 5; break;\r
5932 case OP_EXACTN:\r
5933 GET_LENGTH_INC(len, bp);\r
5934 p_len_string(f, len, 1, bp);\r
5935 bp += len;\r
5936 break;\r
5937 \r
5938 case OP_EXACTMB2N1:\r
5939 p_string(f, 2, bp); bp += 2; break;\r
5940 case OP_EXACTMB2N2:\r
5941 p_string(f, 4, bp); bp += 4; break;\r
5942 case OP_EXACTMB2N3:\r
5943 p_string(f, 6, bp); bp += 6; break;\r
5944 case OP_EXACTMB2N:\r
5945 GET_LENGTH_INC(len, bp);\r
5946 p_len_string(f, len, 2, bp);\r
5947 bp += len * 2;\r
5948 break;\r
5949 case OP_EXACTMB3N:\r
5950 GET_LENGTH_INC(len, bp);\r
5951 p_len_string(f, len, 3, bp);\r
5952 bp += len * 3;\r
5953 break;\r
5954 case OP_EXACTMBN:\r
5955 {\r
5956 int mb_len;\r
5957 \r
5958 GET_LENGTH_INC(mb_len, bp);\r
5959 GET_LENGTH_INC(len, bp);\r
5960 fprintf(f, ":%d:%d:", mb_len, len);\r
5961 n = len * mb_len;\r
5962 while (n-- > 0) { fputc(*bp++, f); }\r
5963 }\r
5964 break;\r
5965\r
5966 case OP_EXACT1_IC:\r
5967 len = enclen(enc, bp);\r
5968 p_string(f, len, bp);\r
5969 bp += len;\r
5970 break;\r
5971 case OP_EXACTN_IC:\r
5972 GET_LENGTH_INC(len, bp);\r
5973 p_len_string(f, len, 1, bp);\r
5974 bp += len;\r
5975 break;\r
5976\r
5977 case OP_CCLASS:\r
5978 n = bitset_on_num((BitSetRef )bp);\r
5979 bp += SIZE_BITSET;\r
5980 fprintf(f, ":%d", n);\r
5981 break;\r
5982\r
5983 case OP_CCLASS_NOT:\r
5984 n = bitset_on_num((BitSetRef )bp);\r
5985 bp += SIZE_BITSET;\r
5986 fprintf(f, ":%d", n);\r
5987 break;\r
5988\r
5989 case OP_CCLASS_MB:\r
5990 case OP_CCLASS_MB_NOT:\r
5991 GET_LENGTH_INC(len, bp);\r
5992 q = bp;\r
5993#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
5994 ALIGNMENT_RIGHT(q);\r
5995#endif\r
5996 GET_CODE_POINT(code, q);\r
5997 bp += len;\r
5998 fprintf(f, ":%d:%d", (int )code, len);\r
5999 break;\r
6000\r
6001 case OP_CCLASS_MIX:\r
6002 case OP_CCLASS_MIX_NOT:\r
6003 n = bitset_on_num((BitSetRef )bp);\r
6004 bp += SIZE_BITSET;\r
6005 GET_LENGTH_INC(len, bp);\r
6006 q = bp;\r
6007#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
6008 ALIGNMENT_RIGHT(q);\r
6009#endif\r
6010 GET_CODE_POINT(code, q);\r
6011 bp += len;\r
6012 fprintf(f, ":%d:%d:%d", n, (int )code, len);\r
6013 break;\r
6014\r
6015 case OP_CCLASS_NODE:\r
6016 {\r
6017 CClassNode *cc;\r
6018\r
6019 GET_POINTER_INC(cc, bp);\r
6020 n = bitset_on_num(cc->bs);\r
6021 fprintf(f, ":%u:%d", (unsigned int )cc, n);\r
6022 }\r
6023 break;\r
6024\r
6025 case OP_BACKREFN_IC:\r
6026 mem = *((MemNumType* )bp);\r
6027 bp += SIZE_MEMNUM;\r
6028 fprintf(f, ":%d", mem);\r
6029 break;\r
6030\r
6031 case OP_BACKREF_MULTI_IC:\r
6032 case OP_BACKREF_MULTI:\r
6033 fputs(" ", f);\r
6034 GET_LENGTH_INC(len, bp);\r
6035 for (i = 0; i < len; i++) {\r
6036 GET_MEMNUM_INC(mem, bp);\r
6037 if (i > 0) fputs(", ", f);\r
6038 fprintf(f, "%d", mem);\r
6039 }\r
6040 break;\r
6041\r
6042 case OP_BACKREF_WITH_LEVEL:\r
6043 {\r
6044 OnigOptionType option;\r
6045 LengthType level;\r
6046\r
6047 GET_OPTION_INC(option, bp);\r
6048 fprintf(f, ":%d", option);\r
6049 GET_LENGTH_INC(level, bp);\r
6050 fprintf(f, ":%d", level);\r
6051\r
6052 fputs(" ", f);\r
6053 GET_LENGTH_INC(len, bp);\r
6054 for (i = 0; i < len; i++) {\r
6055 GET_MEMNUM_INC(mem, bp);\r
6056 if (i > 0) fputs(", ", f);\r
6057 fprintf(f, "%d", mem);\r
6058 }\r
6059 }\r
6060 break;\r
6061\r
6062 case OP_REPEAT:\r
6063 case OP_REPEAT_NG:\r
6064 {\r
6065 mem = *((MemNumType* )bp);\r
6066 bp += SIZE_MEMNUM;\r
6067 addr = *((RelAddrType* )bp);\r
6068 bp += SIZE_RELADDR;\r
6069 fprintf(f, ":%d:%d", mem, addr);\r
6070 }\r
6071 break;\r
6072\r
6073 case OP_PUSH_OR_JUMP_EXACT1:\r
6074 case OP_PUSH_IF_PEEK_NEXT:\r
6075 addr = *((RelAddrType* )bp);\r
6076 bp += SIZE_RELADDR;\r
6077 fprintf(f, ":(%d)", addr);\r
6078 p_string(f, 1, bp);\r
6079 bp += 1;\r
6080 break;\r
6081\r
6082 case OP_LOOK_BEHIND:\r
6083 GET_LENGTH_INC(len, bp);\r
6084 fprintf(f, ":%d", len);\r
6085 break;\r
6086\r
6087 case OP_PUSH_LOOK_BEHIND_NOT:\r
6088 GET_RELADDR_INC(addr, bp);\r
6089 GET_LENGTH_INC(len, bp);\r
6090 fprintf(f, ":%d:(%d)", len, addr);\r
6091 break;\r
6092\r
6093 case OP_STATE_CHECK_PUSH:\r
6094 case OP_STATE_CHECK_PUSH_OR_JUMP:\r
6095 scn = *((StateCheckNumType* )bp);\r
6096 bp += SIZE_STATE_CHECK_NUM;\r
6097 addr = *((RelAddrType* )bp);\r
6098 bp += SIZE_RELADDR;\r
6099 fprintf(f, ":%d:(%d)", scn, addr);\r
6100 break;\r
6101\r
6102 default:\r
6103 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",\r
6104 *--bp);\r
6105 }\r
6106 }\r
6107 fputs("]", f);\r
6108 if (nextp) *nextp = bp;\r
6109}\r
6110\r
6111static void\r
6112print_compiled_byte_code_list(FILE* f, regex_t* reg)\r
6113{\r
6114 int ncode;\r
6115 UChar* bp = reg->p;\r
6116 UChar* end = reg->p + reg->used;\r
6117\r
6118 fprintf(f, "code length: %d\n", reg->used);\r
6119\r
6120 ncode = 0;\r
6121 while (bp < end) {\r
6122 ncode++;\r
6123 if (bp > reg->p) {\r
6124 if (ncode % 5 == 0)\r
6125 fprintf(f, "\n");\r
6126 else\r
6127 fputs(" ", f);\r
6128 }\r
6129 onig_print_compiled_byte_code(f, bp, &bp, reg->enc);\r
6130 }\r
6131\r
6132 fprintf(f, "\n");\r
6133}\r
6134\r
6135static void\r
6136print_indent_tree(FILE* f, Node* node, int indent)\r
6137{\r
6138 int i, type;\r
6139 int add = 3;\r
6140 UChar* p;\r
6141\r
6142 Indent(f, indent);\r
6143 if (IS_NULL(node)) {\r
6144 fprintf(f, "ERROR: null node!!!\n");\r
6145 exit (0);\r
6146 }\r
6147\r
6148 type = NTYPE(node);\r
6149 switch (type) {\r
6150 case NT_LIST:\r
6151 case NT_ALT:\r
6152 if (NTYPE(node) == NT_LIST)\r
6153 fprintf(f, "<list:%x>\n", (int )node);\r
6154 else\r
6155 fprintf(f, "<alt:%x>\n", (int )node);\r
6156\r
6157 print_indent_tree(f, NCAR(node), indent + add);\r
6158 while (IS_NOT_NULL(node = NCDR(node))) {\r
6159 if (NTYPE(node) != type) {\r
6160 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));\r
6161 exit(0);\r
6162 }\r
6163 print_indent_tree(f, NCAR(node), indent + add);\r
6164 }\r
6165 break;\r
6166\r
6167 case NT_STR:\r
6168 fprintf(f, "<string%s:%x>",\r
6169 (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);\r
6170 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {\r
6171 if (*p >= 0x20 && *p < 0x7f)\r
6172 fputc(*p, f);\r
6173 else {\r
6174 fprintf(f, " 0x%02x", *p);\r
6175 }\r
6176 }\r
6177 break;\r
6178\r
6179 case NT_CCLASS:\r
6180 fprintf(f, "<cclass:%x>", (int )node);\r
6181 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);\r
6182 if (NCCLASS(node)->mbuf) {\r
6183 BBuf* bbuf = NCCLASS(node)->mbuf;\r
6184 for (i = 0; i < bbuf->used; i++) {\r
6185 if (i > 0) fprintf(f, ",");\r
6186 fprintf(f, "%0x", bbuf->p[i]);\r
6187 }\r
6188 }\r
6189 break;\r
6190\r
6191 case NT_CTYPE:\r
6192 fprintf(f, "<ctype:%x> ", (int )node);\r
6193 switch (NCTYPE(node)->ctype) {\r
6194 case ONIGENC_CTYPE_WORD:\r
6195 if (NCTYPE(node)->not != 0)\r
6196 fputs("not word", f);\r
6197 else\r
6198 fputs("word", f);\r
6199 break;\r
6200\r
6201 default:\r
6202 fprintf(f, "ERROR: undefined ctype.\n");\r
6203 exit(0);\r
6204 }\r
6205 break;\r
6206\r
6207 case NT_CANY:\r
6208 fprintf(f, "<anychar:%x>", (int )node);\r
6209 break;\r
6210\r
6211 case NT_ANCHOR:\r
6212 fprintf(f, "<anchor:%x> ", (int )node);\r
6213 switch (NANCHOR(node)->type) {\r
6214 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;\r
6215 case ANCHOR_END_BUF: fputs("end buf", f); break;\r
6216 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;\r
6217 case ANCHOR_END_LINE: fputs("end line", f); break;\r
6218 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
6219 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;\r
6220\r
6221 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;\r
6222 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;\r
6223#ifdef USE_WORD_BEGIN_END\r
6224 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;\r
6225 case ANCHOR_WORD_END: fputs("word end", f); break;\r
6226#endif\r
6227 case ANCHOR_PREC_READ: fputs("prec read", f); break;\r
6228 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;\r
6229 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;\r
6230 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;\r
6231\r
6232 default:\r
6233 fprintf(f, "ERROR: undefined anchor type.\n");\r
6234 break;\r
6235 }\r
6236 break;\r
6237\r
6238 case NT_BREF:\r
6239 {\r
6240 int* p;\r
6241 BRefNode* br = NBREF(node);\r
6242 p = BACKREFS_P(br);\r
6243 fprintf(f, "<backref:%x>", (int )node);\r
6244 for (i = 0; i < br->back_num; i++) {\r
6245 if (i > 0) fputs(", ", f);\r
6246 fprintf(f, "%d", p[i]);\r
6247 }\r
6248 }\r
6249 break;\r
6250\r
6251#ifdef USE_SUBEXP_CALL\r
6252 case NT_CALL:\r
6253 {\r
6254 CallNode* cn = NCALL(node);\r
6255 fprintf(f, "<call:%x>", (int )node);\r
6256 p_string(f, cn->name_end - cn->name, cn->name);\r
6257 }\r
6258 break;\r
6259#endif\r
6260\r
6261 case NT_QTFR:\r
6262 fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,\r
6263 NQTFR(node)->lower, NQTFR(node)->upper,\r
6264 (NQTFR(node)->greedy ? "" : "?"));\r
6265 print_indent_tree(f, NQTFR(node)->target, indent + add);\r
6266 break;\r
6267\r
6268 case NT_ENCLOSE:\r
6269 fprintf(f, "<enclose:%x> ", (int )node);\r
6270 switch (NENCLOSE(node)->type) {\r
6271 case ENCLOSE_OPTION:\r
6272 fprintf(f, "option:%d", NENCLOSE(node)->option);\r
6273 break;\r
6274 case ENCLOSE_MEMORY:\r
6275 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);\r
6276 break;\r
6277 case ENCLOSE_STOP_BACKTRACK:\r
6278 fprintf(f, "stop-bt");\r
6279 break;\r
6280\r
6281 default:\r
6282 break;\r
6283 }\r
6284 fprintf(f, "\n");\r
6285 print_indent_tree(f, NENCLOSE(node)->target, indent + add);\r
6286 break;\r
6287\r
6288 default:\r
6289 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));\r
6290 break;\r
6291 }\r
6292\r
6293 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&\r
6294 type != NT_ENCLOSE)\r
6295 fprintf(f, "\n");\r
6296 fflush(f);\r
6297}\r
6298#endif /* ONIG_DEBUG */\r
6299\r
6300#ifdef ONIG_DEBUG_PARSE_TREE\r
6301static void\r
6302print_tree(FILE* f, Node* node)\r
6303{\r
6304 print_indent_tree(f, node, 0);\r
6305}\r
6306#endif\r