]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg: Add NULL pointer check for RegularExpressionDxe.
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regcomp.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
6 * All rights reserved.\r
7 *\r
0af8e57c 8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>\r
14b0e578
CS
9 *\r
10 * Redistribution and use in source and binary forms, with or without\r
11 * modification, are permitted provided that the following conditions\r
12 * are met:\r
13 * 1. Redistributions of source code must retain the above copyright\r
14 * notice, this list of conditions and the following disclaimer.\r
15 * 2. Redistributions in binary form must reproduce the above copyright\r
16 * notice, this list of conditions and the following disclaimer in the\r
17 * documentation and/or other materials provided with the distribution.\r
18 *\r
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
29 * SUCH DAMAGE.\r
30 */\r
31\r
32#include "regparse.h"\r
33\r
34OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;\r
35\r
36extern OnigCaseFoldType\r
37onig_get_default_case_fold_flag(void)\r
38{\r
39 return OnigDefaultCaseFoldFlag;\r
40}\r
41\r
42extern int\r
43onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)\r
44{\r
45 OnigDefaultCaseFoldFlag = case_fold_flag;\r
46 return 0;\r
47}\r
48\r
49\r
50#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
51static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];\r
52#endif\r
53\r
54static UChar*\r
55str_dup(UChar* s, UChar* end)\r
56{\r
57 int len = (int)(end - s);\r
58\r
59 if (len > 0) {\r
60 UChar* r = (UChar* )xmalloc(len + 1);\r
61 CHECK_NULL_RETURN(r);\r
62 xmemcpy(r, s, len);\r
63 r[len] = (UChar )0;\r
64 return r;\r
65 }\r
66 else return NULL;\r
67}\r
68\r
69static void\r
70swap_node(Node* a, Node* b)\r
71{\r
72 Node c;\r
73 c = *a; *a = *b; *b = c;\r
74\r
75 if (NTYPE(a) == NT_STR) {\r
76 StrNode* sn = NSTR(a);\r
77 if (sn->capa == 0) {\r
78 int len = (int)(sn->end - sn->s);\r
79 sn->s = sn->buf;\r
80 sn->end = sn->s + len;\r
81 }\r
82 }\r
83\r
84 if (NTYPE(b) == NT_STR) {\r
85 StrNode* sn = NSTR(b);\r
86 if (sn->capa == 0) {\r
87 int len = (int)(sn->end - sn->s);\r
88 sn->s = sn->buf;\r
89 sn->end = sn->s + len;\r
90 }\r
91 }\r
92}\r
93\r
94static OnigDistance\r
95distance_add(OnigDistance d1, OnigDistance d2)\r
96{\r
97 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)\r
98 return ONIG_INFINITE_DISTANCE;\r
99 else {\r
100 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;\r
101 else return ONIG_INFINITE_DISTANCE;\r
102 }\r
103}\r
104\r
105static OnigDistance\r
106distance_multiply(OnigDistance d, int m)\r
107{\r
108 if (m == 0) return 0;\r
109\r
110 if (d < ONIG_INFINITE_DISTANCE / m)\r
111 return d * m;\r
112 else\r
113 return ONIG_INFINITE_DISTANCE;\r
114}\r
115\r
116static int\r
117bitset_is_empty(BitSetRef bs)\r
118{\r
119 int i;\r
120 for (i = 0; i < (int )BITSET_SIZE; i++) {\r
121 if (bs[i] != 0) return 0;\r
122 }\r
123 return 1;\r
124}\r
125\r
126#ifdef ONIG_DEBUG\r
127static int\r
128bitset_on_num(BitSetRef bs)\r
129{\r
130 int i, n;\r
131\r
132 n = 0;\r
133 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
134 if (BITSET_AT(bs, i)) n++;\r
135 }\r
136 return n;\r
137}\r
138#endif\r
139\r
140extern int\r
141onig_bbuf_init(BBuf* buf, int size)\r
142{\r
143 if (size <= 0) {\r
144 size = 0;\r
145 buf->p = NULL;\r
146 }\r
147 else {\r
148 buf->p = (UChar* )xmalloc(size);\r
149 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);\r
150 }\r
151\r
152 buf->alloc = size;\r
153 buf->used = 0;\r
154 return 0;\r
155}\r
156\r
157\r
158#ifdef USE_SUBEXP_CALL\r
159\r
160static int\r
161unset_addr_list_init(UnsetAddrList* uslist, int size)\r
162{\r
163 UnsetAddr* p;\r
164\r
165 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);\r
166 CHECK_NULL_RETURN_MEMERR(p);\r
167 uslist->num = 0;\r
168 uslist->alloc = size;\r
169 uslist->us = p;\r
170 return 0;\r
171}\r
172\r
173static void\r
174unset_addr_list_end(UnsetAddrList* uslist)\r
175{\r
176 if (IS_NOT_NULL(uslist->us))\r
177 xfree(uslist->us);\r
178}\r
179\r
180static int\r
181unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)\r
182{\r
183 UnsetAddr* p;\r
184 int size;\r
185\r
186 if (uslist->num >= uslist->alloc) {\r
187 size = uslist->alloc * 2;\r
188 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr) * uslist->alloc);\r
189 CHECK_NULL_RETURN_MEMERR(p);\r
190 uslist->alloc = size;\r
191 uslist->us = p;\r
192 }\r
193\r
194 uslist->us[uslist->num].offset = offset;\r
195 uslist->us[uslist->num].target = node;\r
196 uslist->num++;\r
197 return 0;\r
198}\r
199#endif /* USE_SUBEXP_CALL */\r
200\r
201\r
202static int\r
203add_opcode(regex_t* reg, int opcode)\r
204{\r
205 BBUF_ADD1(reg, ((unsigned char)opcode));\r
206 return 0;\r
207}\r
208\r
209#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
210static int\r
211add_state_check_num(regex_t* reg, int num)\r
212{\r
213 StateCheckNumType n = (StateCheckNumType )num;\r
214\r
215 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);\r
216 return 0;\r
217}\r
218#endif\r
219\r
220static int\r
221add_rel_addr(regex_t* reg, int addr)\r
222{\r
223 RelAddrType ra = (RelAddrType )addr;\r
224\r
225 BBUF_ADD(reg, &ra, SIZE_RELADDR);\r
226 return 0;\r
227}\r
228\r
229static int\r
230add_abs_addr(regex_t* reg, int addr)\r
231{\r
232 AbsAddrType ra = (AbsAddrType )addr;\r
233\r
234 BBUF_ADD(reg, &ra, SIZE_ABSADDR);\r
235 return 0;\r
236}\r
237\r
238static int\r
239add_length(regex_t* reg, int len)\r
240{\r
241 LengthType l = (LengthType )len;\r
242\r
243 BBUF_ADD(reg, &l, SIZE_LENGTH);\r
244 return 0;\r
245}\r
246\r
247static int\r
248add_mem_num(regex_t* reg, int num)\r
249{\r
250 MemNumType n = (MemNumType )num;\r
251\r
252 BBUF_ADD(reg, &n, SIZE_MEMNUM);\r
253 return 0;\r
254}\r
255\r
256static int\r
257add_pointer(regex_t* reg, void* addr)\r
258{\r
259 PointerType ptr = (PointerType )addr;\r
260\r
261 BBUF_ADD(reg, &ptr, SIZE_POINTER);\r
262 return 0;\r
263}\r
264\r
265static int\r
266add_option(regex_t* reg, OnigOptionType option)\r
267{\r
268 BBUF_ADD(reg, &option, SIZE_OPTION);\r
269 return 0;\r
270}\r
271\r
272static int\r
273add_opcode_rel_addr(regex_t* reg, int opcode, int addr)\r
274{\r
275 int r;\r
276\r
277 r = add_opcode(reg, opcode);\r
278 if (r) return r;\r
279 r = add_rel_addr(reg, addr);\r
280 return r;\r
281}\r
282\r
283static int\r
284add_bytes(regex_t* reg, UChar* bytes, int len)\r
285{\r
286 BBUF_ADD(reg, bytes, len);\r
287 return 0;\r
288}\r
289\r
290static int\r
291add_bitset(regex_t* reg, BitSetRef bs)\r
292{\r
293 BBUF_ADD(reg, bs, SIZE_BITSET);\r
294 return 0;\r
295}\r
296\r
297static int\r
298add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)\r
299{\r
300 int r;\r
301\r
302 r = add_opcode(reg, opcode);\r
303 if (r) return r;\r
304 r = add_option(reg, option);\r
305 return r;\r
306}\r
307\r
308static int compile_length_tree(Node* node, regex_t* reg);\r
309static int compile_tree(Node* node, regex_t* reg);\r
310\r
311\r
312#define IS_NEED_STR_LEN_OP_EXACT(op) \\r
313 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\\r
314 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)\r
315\r
316static int\r
317select_str_opcode(int mb_len, int str_len, int ignore_case)\r
318{\r
319 int op;\r
320\r
321 if (ignore_case) {\r
322 switch (str_len) {\r
323 case 1: op = OP_EXACT1_IC; break;\r
324 default: op = OP_EXACTN_IC; break;\r
325 }\r
326 }\r
327 else {\r
328 switch (mb_len) {\r
329 case 1:\r
330 switch (str_len) {\r
331 case 1: op = OP_EXACT1; break;\r
332 case 2: op = OP_EXACT2; break;\r
333 case 3: op = OP_EXACT3; break;\r
334 case 4: op = OP_EXACT4; break;\r
335 case 5: op = OP_EXACT5; break;\r
336 default: op = OP_EXACTN; break;\r
337 }\r
338 break;\r
339\r
340 case 2:\r
341 switch (str_len) {\r
342 case 1: op = OP_EXACTMB2N1; break;\r
343 case 2: op = OP_EXACTMB2N2; break;\r
344 case 3: op = OP_EXACTMB2N3; break;\r
345 default: op = OP_EXACTMB2N; break;\r
346 }\r
347 break;\r
348\r
349 case 3:\r
350 op = OP_EXACTMB3N;\r
351 break;\r
352\r
353 default:\r
354 op = OP_EXACTMBN;\r
355 break;\r
356 }\r
357 }\r
358 return op;\r
359}\r
360\r
361static int\r
362compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)\r
363{\r
364 int r;\r
365 int saved_num_null_check = reg->num_null_check;\r
366\r
367 if (empty_info != 0) {\r
368 r = add_opcode(reg, OP_NULL_CHECK_START);\r
369 if (r) return r;\r
370 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */\r
371 if (r) return r;\r
372 reg->num_null_check++;\r
373 }\r
374\r
375 r = compile_tree(node, reg);\r
376 if (r) return r;\r
377\r
378 if (empty_info != 0) {\r
379 if (empty_info == NQ_TARGET_IS_EMPTY)\r
380 r = add_opcode(reg, OP_NULL_CHECK_END);\r
381 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)\r
382 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);\r
383 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)\r
384 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);\r
385\r
386 if (r) return r;\r
387 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */\r
388 }\r
389 return r;\r
390}\r
391\r
392#ifdef USE_SUBEXP_CALL\r
393static int\r
394compile_call(CallNode* node, regex_t* reg)\r
395{\r
396 int r;\r
397\r
398 r = add_opcode(reg, OP_CALL);\r
399 if (r) return r;\r
400 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),\r
401 node->target);\r
402 if (r) return r;\r
403 r = add_abs_addr(reg, 0 /*dummy addr.*/);\r
404 return r;\r
405}\r
406#endif\r
407\r
408static int\r
409compile_tree_n_times(Node* node, int n, regex_t* reg)\r
410{\r
411 int i, r;\r
412\r
413 for (i = 0; i < n; i++) {\r
414 r = compile_tree(node, reg);\r
415 if (r) return r;\r
416 }\r
417 return 0;\r
418}\r
419\r
420static int\r
421add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,\r
422 regex_t* reg ARG_UNUSED, int ignore_case)\r
423{\r
424 int len;\r
425 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
426\r
427 len = SIZE_OPCODE;\r
428\r
429 if (op == OP_EXACTMBN) len += SIZE_LENGTH;\r
430 if (IS_NEED_STR_LEN_OP_EXACT(op))\r
431 len += SIZE_LENGTH;\r
432\r
433 len += mb_len * str_len;\r
434 return len;\r
435}\r
436\r
437static int\r
438add_compile_string(UChar* s, int mb_len, int str_len,\r
439 regex_t* reg, int ignore_case)\r
440{\r
441 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
442 add_opcode(reg, op);\r
443\r
444 if (op == OP_EXACTMBN)\r
445 add_length(reg, mb_len);\r
446\r
447 if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
448 if (op == OP_EXACTN_IC)\r
449 add_length(reg, mb_len * str_len);\r
450 else\r
451 add_length(reg, str_len);\r
452 }\r
453\r
454 add_bytes(reg, s, mb_len * str_len);\r
455 return 0;\r
456}\r
457\r
458\r
459static int\r
460compile_length_string_node(Node* node, regex_t* reg)\r
461{\r
462 int rlen, r, len, prev_len, slen, ambig;\r
463 OnigEncoding enc = reg->enc;\r
464 UChar *p, *prev;\r
465 StrNode* sn;\r
466\r
467 sn = NSTR(node);\r
468 if (sn->end <= sn->s)\r
469 return 0;\r
470\r
471 ambig = NSTRING_IS_AMBIG(node);\r
472\r
473 p = prev = sn->s;\r
474 prev_len = enclen(enc, p);\r
475 p += prev_len;\r
476 slen = 1;\r
477 rlen = 0;\r
478\r
479 for (; p < sn->end; ) {\r
480 len = enclen(enc, p);\r
481 if (len == prev_len) {\r
482 slen++;\r
483 }\r
484 else {\r
485 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
486 rlen += r;\r
487 prev = p;\r
488 slen = 1;\r
489 prev_len = len;\r
490 }\r
491 p += len;\r
492 }\r
493 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
494 rlen += r;\r
495 return rlen;\r
496}\r
497\r
498static int\r
499compile_length_string_raw_node(StrNode* sn, regex_t* reg)\r
500{\r
501 if (sn->end <= sn->s)\r
502 return 0;\r
503\r
504 return add_compile_string_length(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);\r
505}\r
506\r
507static int\r
508compile_string_node(Node* node, regex_t* reg)\r
509{\r
510 int r, len, prev_len, slen, ambig;\r
511 OnigEncoding enc = reg->enc;\r
512 UChar *p, *prev, *end;\r
513 StrNode* sn;\r
514\r
515 sn = NSTR(node);\r
516 if (sn->end <= sn->s)\r
517 return 0;\r
518\r
519 end = sn->end;\r
520 ambig = NSTRING_IS_AMBIG(node);\r
521\r
522 p = prev = sn->s;\r
523 prev_len = enclen(enc, p);\r
524 p += prev_len;\r
525 slen = 1;\r
526\r
527 for (; p < end; ) {\r
528 len = enclen(enc, p);\r
529 if (len == prev_len) {\r
530 slen++;\r
531 }\r
532 else {\r
533 r = add_compile_string(prev, prev_len, slen, reg, ambig);\r
534 if (r) return r;\r
535\r
536 prev = p;\r
537 slen = 1;\r
538 prev_len = len;\r
539 }\r
540\r
541 p += len;\r
542 }\r
543 return add_compile_string(prev, prev_len, slen, reg, ambig);\r
544}\r
545\r
546static int\r
547compile_string_raw_node(StrNode* sn, regex_t* reg)\r
548{\r
549 if (sn->end <= sn->s)\r
550 return 0;\r
551\r
552 return add_compile_string(sn->s, 1 /* sb */, (int)(sn->end - sn->s), reg, 0);\r
553}\r
554\r
555static int\r
556add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
557{\r
558#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
559 add_length(reg, mbuf->used);\r
560 return add_bytes(reg, mbuf->p, mbuf->used);\r
561#else\r
562 int r, pad_size;\r
563 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;\r
564\r
565 GET_ALIGNMENT_PAD_SIZE(p, pad_size);\r
566 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));\r
567 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
568\r
569 r = add_bytes(reg, mbuf->p, mbuf->used);\r
570\r
571 /* padding for return value from compile_length_cclass_node() to be fix. */\r
572 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;\r
573 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
574 return r;\r
575#endif\r
576}\r
577\r
578static int\r
579compile_length_cclass_node(CClassNode* cc, regex_t* reg)\r
580{\r
581 int len;\r
582\r
583 if (IS_NCCLASS_SHARE(cc)) {\r
584 len = SIZE_OPCODE + SIZE_POINTER;\r
585 return len;\r
586 }\r
587\r
588 if (IS_NULL(cc->mbuf)) {\r
589 len = SIZE_OPCODE + SIZE_BITSET;\r
590 }\r
591 else {\r
592 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
593 len = SIZE_OPCODE;\r
594 }\r
595 else {\r
596 len = SIZE_OPCODE + SIZE_BITSET;\r
597 }\r
598#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
599 len += SIZE_LENGTH + cc->mbuf->used;\r
600#else\r
601 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);\r
602#endif\r
603 }\r
604\r
605 return len;\r
606}\r
607\r
608static int\r
609compile_cclass_node(CClassNode* cc, regex_t* reg)\r
610{\r
611 int r;\r
612\r
613 if (IS_NCCLASS_SHARE(cc)) {\r
614 add_opcode(reg, OP_CCLASS_NODE);\r
615 r = add_pointer(reg, cc);\r
616 return r;\r
617 }\r
618\r
619 if (IS_NULL(cc->mbuf)) {\r
620 if (IS_NCCLASS_NOT(cc))\r
621 add_opcode(reg, OP_CCLASS_NOT);\r
622 else\r
623 add_opcode(reg, OP_CCLASS);\r
624\r
625 r = add_bitset(reg, cc->bs);\r
626 }\r
627 else {\r
628 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
629 if (IS_NCCLASS_NOT(cc))\r
630 add_opcode(reg, OP_CCLASS_MB_NOT);\r
631 else\r
632 add_opcode(reg, OP_CCLASS_MB);\r
633\r
634 r = add_multi_byte_cclass(cc->mbuf, reg);\r
635 }\r
636 else {\r
637 if (IS_NCCLASS_NOT(cc))\r
638 add_opcode(reg, OP_CCLASS_MIX_NOT);\r
639 else\r
640 add_opcode(reg, OP_CCLASS_MIX);\r
641\r
642 r = add_bitset(reg, cc->bs);\r
643 if (r) return r;\r
644 r = add_multi_byte_cclass(cc->mbuf, reg);\r
645 }\r
646 }\r
647\r
648 return r;\r
649}\r
650\r
651static int\r
652entry_repeat_range(regex_t* reg, int id, int lower, int upper)\r
653{\r
654#define REPEAT_RANGE_ALLOC 4\r
655\r
656 OnigRepeatRange* p;\r
657\r
658 if (reg->repeat_range_alloc == 0) {\r
659 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);\r
660 CHECK_NULL_RETURN_MEMERR(p);\r
661 reg->repeat_range = p;\r
662 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;\r
663 }\r
664 else if (reg->repeat_range_alloc <= id) {\r
665 int n;\r
666 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;\r
667 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,\r
668 sizeof(OnigRepeatRange) * n,\r
669 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);\r
670 CHECK_NULL_RETURN_MEMERR(p);\r
671 reg->repeat_range = p;\r
672 reg->repeat_range_alloc = n;\r
673 }\r
674 else {\r
675 p = reg->repeat_range;\r
676 }\r
677\r
678 p[id].lower = lower;\r
679 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);\r
680 return 0;\r
681}\r
682\r
683static int\r
684compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,\r
685 regex_t* reg)\r
686{\r
687 int r;\r
688 int num_repeat = reg->num_repeat;\r
689\r
690 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
691 if (r) return r;\r
692 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
693 reg->num_repeat++;\r
694 if (r) return r;\r
695 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);\r
696 if (r) return r;\r
697\r
698 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);\r
699 if (r) return r;\r
700\r
701 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
702 if (r) return r;\r
703\r
704 if (\r
705#ifdef USE_SUBEXP_CALL\r
706 reg->num_call > 0 ||\r
707#endif\r
708 IS_QUANTIFIER_IN_REPEAT(qn)) {\r
709 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
710 }\r
711 else {\r
712 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
713 }\r
714 if (r) return r;\r
715 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
716 return r;\r
717}\r
718\r
719static int\r
720is_anychar_star_quantifier(QtfrNode* qn)\r
721{\r
722 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&\r
723 NTYPE(qn->target) == NT_CANY)\r
724 return 1;\r
725 else\r
726 return 0;\r
727}\r
728\r
729#define QUANTIFIER_EXPAND_LIMIT_SIZE 50\r
730#define CKN_ON (ckn > 0)\r
731\r
732#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
733\r
734static int\r
735compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)\r
736{\r
737 int len, mod_tlen, cklen;\r
738 int ckn;\r
739 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
740 int empty_info = qn->target_empty_info;\r
741 int tlen = compile_length_tree(qn->target, reg);\r
742\r
743 if (tlen < 0) return tlen;\r
744\r
745 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);\r
746\r
747 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);\r
748\r
749 /* anychar repeat */\r
750 if (NTYPE(qn->target) == NT_CANY) {\r
751 if (qn->greedy && infinite) {\r
752 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)\r
753 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;\r
754 else\r
755 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;\r
756 }\r
757 }\r
758\r
759 if (empty_info != 0)\r
760 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
761 else\r
762 mod_tlen = tlen;\r
763\r
764 if (infinite && qn->lower <= 1) {\r
765 if (qn->greedy) {\r
766 if (qn->lower == 1)\r
767 len = SIZE_OP_JUMP;\r
768 else\r
769 len = 0;\r
770\r
771 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;\r
772 }\r
773 else {\r
774 if (qn->lower == 0)\r
775 len = SIZE_OP_JUMP;\r
776 else\r
777 len = 0;\r
778\r
779 len += mod_tlen + SIZE_OP_PUSH + cklen;\r
780 }\r
781 }\r
782 else if (qn->upper == 0) {\r
783 if (qn->is_refered != 0) /* /(?<n>..){0}/ */\r
784 len = SIZE_OP_JUMP + tlen;\r
785 else\r
786 len = 0;\r
787 }\r
788 else if (qn->upper == 1 && qn->greedy) {\r
789 if (qn->lower == 0) {\r
790 if (CKN_ON) {\r
791 len = SIZE_OP_STATE_CHECK_PUSH + tlen;\r
792 }\r
793 else {\r
794 len = SIZE_OP_PUSH + tlen;\r
795 }\r
796 }\r
797 else {\r
798 len = tlen;\r
799 }\r
800 }\r
801 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
802 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;\r
803 }\r
804 else {\r
805 len = SIZE_OP_REPEAT_INC\r
806 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;\r
807 if (CKN_ON)\r
808 len += SIZE_OP_STATE_CHECK;\r
809 }\r
810\r
811 return len;\r
812}\r
813\r
814static int\r
815compile_quantifier_node(QtfrNode* qn, regex_t* reg)\r
816{\r
817 int r, mod_tlen;\r
818 int ckn;\r
819 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
820 int empty_info = qn->target_empty_info;\r
821 int tlen = compile_length_tree(qn->target, reg);\r
822\r
823 if (tlen < 0) return tlen;\r
824\r
825 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);\r
826\r
827 if (is_anychar_star_quantifier(qn)) {\r
828 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
829 if (r) return r;\r
830 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {\r
831 if (IS_MULTILINE(reg->options))\r
832 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r
833 else\r
834 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);\r
835 if (r) return r;\r
836 if (CKN_ON) {\r
837 r = add_state_check_num(reg, ckn);\r
838 if (r) return r;\r
839 }\r
840\r
841 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);\r
842 }\r
843 else {\r
844 if (IS_MULTILINE(reg->options)) {\r
845 r = add_opcode(reg, (CKN_ON ?\r
846 OP_STATE_CHECK_ANYCHAR_ML_STAR\r
847 : OP_ANYCHAR_ML_STAR));\r
848 }\r
849 else {\r
850 r = add_opcode(reg, (CKN_ON ?\r
851 OP_STATE_CHECK_ANYCHAR_STAR\r
852 : OP_ANYCHAR_STAR));\r
853 }\r
854 if (r) return r;\r
855 if (CKN_ON)\r
856 r = add_state_check_num(reg, ckn);\r
857\r
858 return r;\r
859 }\r
860 }\r
861\r
862 if (empty_info != 0)\r
863 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
864 else\r
865 mod_tlen = tlen;\r
866\r
867 if (infinite && qn->lower <= 1) {\r
868 if (qn->greedy) {\r
869 if (qn->lower == 1) {\r
870 r = add_opcode_rel_addr(reg, OP_JUMP,\r
871 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));\r
872 if (r) return r;\r
873 }\r
874\r
875 if (CKN_ON) {\r
876 r = add_opcode(reg, OP_STATE_CHECK_PUSH);\r
877 if (r) return r;\r
878 r = add_state_check_num(reg, ckn);\r
879 if (r) return r;\r
880 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);\r
881 }\r
882 else {\r
883 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);\r
884 }\r
885 if (r) return r;\r
886 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
887 if (r) return r;\r
888 r = add_opcode_rel_addr(reg, OP_JUMP,\r
889 -(mod_tlen + (int )SIZE_OP_JUMP\r
890 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));\r
891 }\r
892 else {\r
893 if (qn->lower == 0) {\r
894 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);\r
895 if (r) return r;\r
896 }\r
897 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
898 if (r) return r;\r
899 if (CKN_ON) {\r
900 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);\r
901 if (r) return r;\r
902 r = add_state_check_num(reg, ckn);\r
903 if (r) return r;\r
904 r = add_rel_addr(reg,\r
905 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));\r
906 }\r
907 else\r
908 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));\r
909 }\r
910 }\r
911 else if (qn->upper == 0) {\r
912 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
913 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
914 if (r) return r;\r
915 r = compile_tree(qn->target, reg);\r
916 }\r
917 else\r
918 r = 0;\r
919 }\r
920 else if (qn->upper == 1 && qn->greedy) {\r
921 if (qn->lower == 0) {\r
922 if (CKN_ON) {\r
923 r = add_opcode(reg, OP_STATE_CHECK_PUSH);\r
924 if (r) return r;\r
925 r = add_state_check_num(reg, ckn);\r
926 if (r) return r;\r
927 r = add_rel_addr(reg, tlen);\r
928 }\r
929 else {\r
930 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);\r
931 }\r
932 if (r) return r;\r
933 }\r
934\r
935 r = compile_tree(qn->target, reg);\r
936 }\r
937 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
938 if (CKN_ON) {\r
939 r = add_opcode(reg, OP_STATE_CHECK_PUSH);\r
940 if (r) return r;\r
941 r = add_state_check_num(reg, ckn);\r
942 if (r) return r;\r
943 r = add_rel_addr(reg, SIZE_OP_JUMP);\r
944 }\r
945 else {\r
946 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);\r
947 }\r
948\r
949 if (r) return r;\r
950 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
951 if (r) return r;\r
952 r = compile_tree(qn->target, reg);\r
953 }\r
954 else {\r
955 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);\r
956 if (CKN_ON) {\r
957 if (r) return r;\r
958 r = add_opcode(reg, OP_STATE_CHECK);\r
959 if (r) return r;\r
960 r = add_state_check_num(reg, ckn);\r
961 }\r
962 }\r
963 return r;\r
964}\r
965\r
966#else /* USE_COMBINATION_EXPLOSION_CHECK */\r
967\r
968static int\r
969compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)\r
970{\r
971 int len, mod_tlen;\r
972 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
973 int empty_info = qn->target_empty_info;\r
974 int tlen = compile_length_tree(qn->target, reg);\r
975\r
976 if (tlen < 0) return tlen;\r
977\r
978 /* anychar repeat */\r
979 if (NTYPE(qn->target) == NT_CANY) {\r
980 if (qn->greedy && infinite) {\r
981 if (IS_NOT_NULL(qn->next_head_exact))\r
982 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;\r
983 else\r
984 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;\r
985 }\r
986 }\r
987\r
988 if (empty_info != 0)\r
989 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
990 else\r
991 mod_tlen = tlen;\r
992\r
993 if (infinite &&\r
994 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
995 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
996 len = SIZE_OP_JUMP;\r
997 }\r
998 else {\r
999 len = tlen * qn->lower;\r
1000 }\r
1001\r
1002 if (qn->greedy) {\r
1003 if (IS_NOT_NULL(qn->head_exact))\r
1004 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;\r
1005 else if (IS_NOT_NULL(qn->next_head_exact))\r
1006 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;\r
1007 else\r
1008 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;\r
1009 }\r
1010 else\r
1011 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;\r
1012 }\r
1013 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
1014 len = SIZE_OP_JUMP + tlen;\r
1015 }\r
1016 else if (!infinite && qn->greedy &&\r
1017 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper\r
1018 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
1019 len = tlen * qn->lower;\r
1020 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);\r
1021 }\r
1022 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
1023 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;\r
1024 }\r
1025 else {\r
1026 len = SIZE_OP_REPEAT_INC\r
1027 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;\r
1028 }\r
1029\r
1030 return len;\r
1031}\r
1032\r
1033static int\r
1034compile_quantifier_node(QtfrNode* qn, regex_t* reg)\r
1035{\r
1036 int i, r, mod_tlen;\r
1037 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
1038 int empty_info = qn->target_empty_info;\r
1039 int tlen = compile_length_tree(qn->target, reg);\r
1040\r
1041 if (tlen < 0) return tlen;\r
1042\r
1043 if (is_anychar_star_quantifier(qn)) {\r
1044 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1045 if (r) return r;\r
1046 if (IS_NOT_NULL(qn->next_head_exact)) {\r
1047 if (IS_MULTILINE(reg->options))\r
1048 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r
1049 else\r
1050 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);\r
1051 if (r) return r;\r
1052 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);\r
1053 }\r
1054 else {\r
1055 if (IS_MULTILINE(reg->options))\r
1056 return add_opcode(reg, OP_ANYCHAR_ML_STAR);\r
1057 else\r
1058 return add_opcode(reg, OP_ANYCHAR_STAR);\r
1059 }\r
1060 }\r
1061\r
1062 if (empty_info != 0)\r
1063 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);\r
1064 else\r
1065 mod_tlen = tlen;\r
1066\r
1067 if (infinite &&\r
1068 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
1069 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
1070 if (qn->greedy) {\r
1071 if (IS_NOT_NULL(qn->head_exact))\r
1072 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
1073 else if (IS_NOT_NULL(qn->next_head_exact))\r
1074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);\r
1075 else\r
1076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);\r
1077 }\r
1078 else {\r
1079 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);\r
1080 }\r
1081 if (r) return r;\r
1082 }\r
1083 else {\r
1084 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1085 if (r) return r;\r
1086 }\r
1087\r
1088 if (qn->greedy) {\r
1089 if (IS_NOT_NULL(qn->head_exact)) {\r
1090 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,\r
1091 mod_tlen + SIZE_OP_JUMP);\r
1092 if (r) return r;\r
1093 add_bytes(reg, NSTR(qn->head_exact)->s, 1);\r
1094 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1095 if (r) return r;\r
1096 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1097 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));\r
1098 }\r
1099 else if (IS_NOT_NULL(qn->next_head_exact)) {\r
1100 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,\r
1101 mod_tlen + SIZE_OP_JUMP);\r
1102 if (r) return r;\r
1103 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);\r
1104 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1105 if (r) return r;\r
1106 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1107 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));\r
1108 }\r
1109 else {\r
1110 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);\r
1111 if (r) return r;\r
1112 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1113 if (r) return r;\r
1114 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1115 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));\r
1116 }\r
1117 }\r
1118 else {\r
1119 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);\r
1120 if (r) return r;\r
1121 r = compile_tree_empty_check(qn->target, reg, empty_info);\r
1122 if (r) return r;\r
1123 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));\r
1124 }\r
1125 }\r
1126 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
1127 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
1128 if (r) return r;\r
1129 r = compile_tree(qn->target, reg);\r
1130 }\r
1131 else if (!infinite && qn->greedy &&\r
1132 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper\r
1133 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {\r
1134 int n = qn->upper - qn->lower;\r
1135\r
1136 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1137 if (r) return r;\r
1138\r
1139 for (i = 0; i < n; i++) {\r
1140 r = add_opcode_rel_addr(reg, OP_PUSH,\r
1141 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);\r
1142 if (r) return r;\r
1143 r = compile_tree(qn->target, reg);\r
1144 if (r) return r;\r
1145 }\r
1146 }\r
1147 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
1148 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);\r
1149 if (r) return r;\r
1150 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
1151 if (r) return r;\r
1152 r = compile_tree(qn->target, reg);\r
1153 }\r
1154 else {\r
1155 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);\r
1156 }\r
1157 return r;\r
1158}\r
1159#endif /* USE_COMBINATION_EXPLOSION_CHECK */\r
1160\r
1161static int\r
1162compile_length_option_node(EncloseNode* node, regex_t* reg)\r
1163{\r
1164 int tlen;\r
1165 OnigOptionType prev = reg->options;\r
1166\r
1167 reg->options = node->option;\r
1168 tlen = compile_length_tree(node->target, reg);\r
1169 reg->options = prev;\r
1170\r
1171 if (tlen < 0) return tlen;\r
1172\r
1173 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {\r
1174 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL\r
1175 + tlen + SIZE_OP_SET_OPTION;\r
1176 }\r
1177 else\r
1178 return tlen;\r
1179}\r
1180\r
1181static int\r
1182compile_option_node(EncloseNode* node, regex_t* reg)\r
1183{\r
1184 int r;\r
1185 OnigOptionType prev = reg->options;\r
1186\r
1187 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {\r
1188 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);\r
1189 if (r) return r;\r
1190 r = add_opcode_option(reg, OP_SET_OPTION, prev);\r
1191 if (r) return r;\r
1192 r = add_opcode(reg, OP_FAIL);\r
1193 if (r) return r;\r
1194 }\r
1195\r
1196 reg->options = node->option;\r
1197 r = compile_tree(node->target, reg);\r
1198 reg->options = prev;\r
1199\r
1200 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {\r
1201 if (r) return r;\r
1202 r = add_opcode_option(reg, OP_SET_OPTION, prev);\r
1203 }\r
1204 return r;\r
1205}\r
1206\r
1207static int\r
1208compile_length_enclose_node(EncloseNode* node, regex_t* reg)\r
1209{\r
1210 int len;\r
1211 int tlen;\r
1212\r
1213 if (node->type == ENCLOSE_OPTION)\r
1214 return compile_length_option_node(node, reg);\r
1215\r
1216 if (node->target) {\r
1217 tlen = compile_length_tree(node->target, reg);\r
1218 if (tlen < 0) return tlen;\r
1219 }\r
1220 else\r
1221 tlen = 0;\r
1222\r
1223 switch (node->type) {\r
1224 case ENCLOSE_MEMORY:\r
1225#ifdef USE_SUBEXP_CALL\r
1226 if (IS_ENCLOSE_CALLED(node)) {\r
1227 len = SIZE_OP_MEMORY_START_PUSH + tlen\r
1228 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1229 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1230 len += (IS_ENCLOSE_RECURSION(node)\r
1231 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
1232 else\r
1233 len += (IS_ENCLOSE_RECURSION(node)\r
1234 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1235 }\r
1236 else\r
1237#endif\r
1238 {\r
1239 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))\r
1240 len = SIZE_OP_MEMORY_START_PUSH;\r
1241 else\r
1242 len = SIZE_OP_MEMORY_START;\r
1243\r
1244 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)\r
1245 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);\r
1246 }\r
1247 break;\r
1248\r
1249 case ENCLOSE_STOP_BACKTRACK:\r
1250 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {\r
b0c2b797
QS
1251 if (node->target == NULL) {\r
1252 CHECK_NULL_RETURN_MEMERR(node->target);\r
1253 }\r
14b0e578
CS
1254 QtfrNode* qn = NQTFR(node->target);\r
1255 tlen = compile_length_tree(qn->target, reg);\r
1256 if (tlen < 0) return tlen;\r
1257\r
1258 len = tlen * qn->lower\r
1259 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;\r
1260 }\r
1261 else {\r
1262 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;\r
1263 }\r
1264 break;\r
1265\r
1266 default:\r
1267 return ONIGERR_TYPE_BUG;\r
1268 break;\r
1269 }\r
1270\r
1271 return len;\r
1272}\r
1273\r
1274static int get_char_length_tree(Node* node, regex_t* reg, int* len);\r
1275\r
1276static int\r
1277compile_enclose_node(EncloseNode* node, regex_t* reg)\r
1278{\r
1279 int r, len;\r
1280\r
1281 if (node->type == ENCLOSE_OPTION)\r
1282 return compile_option_node(node, reg);\r
1283\r
1284 switch (node->type) {\r
1285 case ENCLOSE_MEMORY:\r
1286#ifdef USE_SUBEXP_CALL\r
1287 if (IS_ENCLOSE_CALLED(node)) {\r
1288 r = add_opcode(reg, OP_CALL);\r
1289 if (r) return r;\r
1290 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
1291 node->state |= NST_ADDR_FIXED;\r
1292 r = add_abs_addr(reg, (int )node->call_addr);\r
1293 if (r) return r;\r
1294 len = compile_length_tree(node->target, reg);\r
1295 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
1296 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1297 len += (IS_ENCLOSE_RECURSION(node)\r
1298 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
1299 else\r
1300 len += (IS_ENCLOSE_RECURSION(node)\r
1301 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1302\r
1303 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1304 if (r) return r;\r
1305 }\r
1306#endif\r
1307 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))\r
1308 r = add_opcode(reg, OP_MEMORY_START_PUSH);\r
1309 else\r
1310 r = add_opcode(reg, OP_MEMORY_START);\r
1311 if (r) return r;\r
1312 r = add_mem_num(reg, node->regnum);\r
1313 if (r) return r;\r
1314 r = compile_tree(node->target, reg);\r
1315 if (r) return r;\r
1316#ifdef USE_SUBEXP_CALL\r
1317 if (IS_ENCLOSE_CALLED(node)) {\r
1318 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1319 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)\r
1320 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
1321 else\r
1322 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)\r
1323 ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
1324\r
1325 if (r) return r;\r
1326 r = add_mem_num(reg, node->regnum);\r
1327 if (r) return r;\r
1328 r = add_opcode(reg, OP_RETURN);\r
1329 }\r
1330 else\r
1331#endif\r
1332 {\r
1333 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))\r
1334 r = add_opcode(reg, OP_MEMORY_END_PUSH);\r
1335 else\r
1336 r = add_opcode(reg, OP_MEMORY_END);\r
1337 if (r) return r;\r
1338 r = add_mem_num(reg, node->regnum);\r
1339 }\r
1340 break;\r
1341\r
1342 case ENCLOSE_STOP_BACKTRACK:\r
1343 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {\r
1344 QtfrNode* qn = NQTFR(node->target);\r
1345 r = compile_tree_n_times(qn->target, qn->lower, reg);\r
1346 if (r) return r;\r
1347\r
1348 len = compile_length_tree(qn->target, reg);\r
1349 if (len < 0) return len;\r
1350\r
1351 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);\r
1352 if (r) return r;\r
1353 r = compile_tree(qn->target, reg);\r
1354 if (r) return r;\r
1355 r = add_opcode(reg, OP_POP);\r
1356 if (r) return r;\r
1357 r = add_opcode_rel_addr(reg, OP_JUMP,\r
1358 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));\r
1359 }\r
1360 else {\r
1361 r = add_opcode(reg, OP_PUSH_STOP_BT);\r
1362 if (r) return r;\r
1363 r = compile_tree(node->target, reg);\r
1364 if (r) return r;\r
1365 r = add_opcode(reg, OP_POP_STOP_BT);\r
1366 }\r
1367 break;\r
1368\r
1369 default:\r
1370 return ONIGERR_TYPE_BUG;\r
1371 break;\r
1372 }\r
1373\r
1374 return r;\r
1375}\r
1376\r
1377static int\r
1378compile_length_anchor_node(AnchorNode* node, regex_t* reg)\r
1379{\r
1380 int len;\r
1381 int tlen = 0;\r
1382\r
1383 if (node->target) {\r
1384 tlen = compile_length_tree(node->target, reg);\r
1385 if (tlen < 0) return tlen;\r
1386 }\r
1387\r
1388 switch (node->type) {\r
1389 case ANCHOR_PREC_READ:\r
1390 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;\r
1391 break;\r
1392 case ANCHOR_PREC_READ_NOT:\r
1393 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;\r
1394 break;\r
1395 case ANCHOR_LOOK_BEHIND:\r
1396 len = SIZE_OP_LOOK_BEHIND + tlen;\r
1397 break;\r
1398 case ANCHOR_LOOK_BEHIND_NOT:\r
1399 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;\r
1400 break;\r
1401\r
1402 default:\r
1403 len = SIZE_OPCODE;\r
1404 break;\r
1405 }\r
1406\r
1407 return len;\r
1408}\r
1409\r
1410static int\r
1411compile_anchor_node(AnchorNode* node, regex_t* reg)\r
1412{\r
1413 int r, len;\r
1414\r
1415 switch (node->type) {\r
1416 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;\r
1417 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;\r
1418 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;\r
1419 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;\r
1420 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;\r
1421 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;\r
1422\r
1423 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;\r
1424 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;\r
1425#ifdef USE_WORD_BEGIN_END\r
1426 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;\r
1427 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;\r
1428#endif\r
1429\r
1430 case ANCHOR_PREC_READ:\r
1431 r = add_opcode(reg, OP_PUSH_POS);\r
1432 if (r) return r;\r
1433 r = compile_tree(node->target, reg);\r
1434 if (r) return r;\r
1435 r = add_opcode(reg, OP_POP_POS);\r
1436 break;\r
1437\r
1438 case ANCHOR_PREC_READ_NOT:\r
1439 len = compile_length_tree(node->target, reg);\r
1440 if (len < 0) return len;\r
1441 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);\r
1442 if (r) return r;\r
1443 r = compile_tree(node->target, reg);\r
1444 if (r) return r;\r
1445 r = add_opcode(reg, OP_FAIL_POS);\r
1446 break;\r
1447\r
1448 case ANCHOR_LOOK_BEHIND:\r
1449 {\r
1450 int n;\r
1451 r = add_opcode(reg, OP_LOOK_BEHIND);\r
1452 if (r) return r;\r
1453 if (node->char_len < 0) {\r
1454 r = get_char_length_tree(node->target, reg, &n);\r
1455 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
1456 }\r
1457 else\r
1458 n = node->char_len;\r
1459 r = add_length(reg, n);\r
1460 if (r) return r;\r
1461 r = compile_tree(node->target, reg);\r
1462 }\r
1463 break;\r
1464\r
1465 case ANCHOR_LOOK_BEHIND_NOT:\r
1466 {\r
1467 int n;\r
1468 len = compile_length_tree(node->target, reg);\r
1469 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,\r
1470 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);\r
1471 if (r) return r;\r
1472 if (node->char_len < 0) {\r
1473 r = get_char_length_tree(node->target, reg, &n);\r
1474 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
1475 }\r
1476 else\r
1477 n = node->char_len;\r
1478 r = add_length(reg, n);\r
1479 if (r) return r;\r
1480 r = compile_tree(node->target, reg);\r
1481 if (r) return r;\r
1482 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);\r
1483 }\r
1484 break;\r
1485\r
1486 default:\r
1487 return ONIGERR_TYPE_BUG;\r
1488 break;\r
1489 }\r
1490\r
1491 return r;\r
1492}\r
1493\r
1494static int\r
1495compile_length_tree(Node* node, regex_t* reg)\r
1496{\r
1497 int len, type, r;\r
1498\r
1499 type = NTYPE(node);\r
1500 switch (type) {\r
1501 case NT_LIST:\r
1502 len = 0;\r
1503 do {\r
1504 r = compile_length_tree(NCAR(node), reg);\r
1505 if (r < 0) return r;\r
1506 len += r;\r
1507 } while (IS_NOT_NULL(node = NCDR(node)));\r
1508 r = len;\r
1509 break;\r
1510\r
1511 case NT_ALT:\r
1512 {\r
1513 int n;\r
1514\r
1515 n = r = 0;\r
1516 do {\r
1517 r += compile_length_tree(NCAR(node), reg);\r
1518 n++;\r
1519 } while (IS_NOT_NULL(node = NCDR(node)));\r
1520 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);\r
1521 }\r
1522 break;\r
1523\r
1524 case NT_STR:\r
1525 if (NSTRING_IS_RAW(node))\r
1526 r = compile_length_string_raw_node(NSTR(node), reg);\r
1527 else\r
1528 r = compile_length_string_node(node, reg);\r
1529 break;\r
1530\r
1531 case NT_CCLASS:\r
1532 r = compile_length_cclass_node(NCCLASS(node), reg);\r
1533 break;\r
1534\r
1535 case NT_CTYPE:\r
1536 case NT_CANY:\r
1537 r = SIZE_OPCODE;\r
1538 break;\r
1539\r
1540 case NT_BREF:\r
1541 {\r
1542 BRefNode* br = NBREF(node);\r
1543\r
1544#ifdef USE_BACKREF_WITH_LEVEL\r
1545 if (IS_BACKREF_NEST_LEVEL(br)) {\r
1546 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +\r
1547 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1548 }\r
1549 else\r
1550#endif\r
1551 if (br->back_num == 1) {\r
1552 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)\r
1553 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));\r
1554 }\r
1555 else {\r
1556 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1557 }\r
1558 }\r
1559 break;\r
1560\r
1561#ifdef USE_SUBEXP_CALL\r
1562 case NT_CALL:\r
1563 r = SIZE_OP_CALL;\r
1564 break;\r
1565#endif\r
1566\r
1567 case NT_QTFR:\r
1568 r = compile_length_quantifier_node(NQTFR(node), reg);\r
1569 break;\r
1570\r
1571 case NT_ENCLOSE:\r
1572 r = compile_length_enclose_node(NENCLOSE(node), reg);\r
1573 break;\r
1574\r
1575 case NT_ANCHOR:\r
1576 r = compile_length_anchor_node(NANCHOR(node), reg);\r
1577 break;\r
1578\r
1579 default:\r
1580 return ONIGERR_TYPE_BUG;\r
1581 break;\r
1582 }\r
1583\r
1584 return r;\r
1585}\r
1586\r
1587static int\r
1588compile_tree(Node* node, regex_t* reg)\r
1589{\r
1590 int n, type, len, pos, r = 0;\r
1591\r
1592 type = NTYPE(node);\r
1593 switch (type) {\r
1594 case NT_LIST:\r
1595 do {\r
1596 r = compile_tree(NCAR(node), reg);\r
1597 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1598 break;\r
1599\r
1600 case NT_ALT:\r
1601 {\r
1602 Node* x = node;\r
1603 len = 0;\r
1604 do {\r
1605 len += compile_length_tree(NCAR(x), reg);\r
1606 if (NCDR(x) != NULL) {\r
1607 len += SIZE_OP_PUSH + SIZE_OP_JUMP;\r
1608 }\r
1609 } while (IS_NOT_NULL(x = NCDR(x)));\r
1610 pos = reg->used + len; /* goal position */\r
1611\r
1612 do {\r
1613 len = compile_length_tree(NCAR(node), reg);\r
1614 if (IS_NOT_NULL(NCDR(node))) {\r
1615 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);\r
1616 if (r) break;\r
1617 }\r
1618 r = compile_tree(NCAR(node), reg);\r
1619 if (r) break;\r
1620 if (IS_NOT_NULL(NCDR(node))) {\r
1621 len = pos - (reg->used + SIZE_OP_JUMP);\r
1622 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1623 if (r) break;\r
1624 }\r
1625 } while (IS_NOT_NULL(node = NCDR(node)));\r
1626 }\r
1627 break;\r
1628\r
1629 case NT_STR:\r
1630 if (NSTRING_IS_RAW(node))\r
1631 r = compile_string_raw_node(NSTR(node), reg);\r
1632 else\r
1633 r = compile_string_node(node, reg);\r
1634 break;\r
1635\r
1636 case NT_CCLASS:\r
1637 r = compile_cclass_node(NCCLASS(node), reg);\r
1638 break;\r
1639\r
1640 case NT_CTYPE:\r
1641 {\r
1642 int op;\r
1643\r
1644 switch (NCTYPE(node)->ctype) {\r
1645 case ONIGENC_CTYPE_WORD:\r
1646 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;\r
1647 else op = OP_WORD;\r
1648 break;\r
1649 default:\r
1650 return ONIGERR_TYPE_BUG;\r
1651 break;\r
1652 }\r
1653 r = add_opcode(reg, op);\r
1654 }\r
1655 break;\r
1656\r
1657 case NT_CANY:\r
1658 if (IS_MULTILINE(reg->options))\r
1659 r = add_opcode(reg, OP_ANYCHAR_ML);\r
1660 else\r
1661 r = add_opcode(reg, OP_ANYCHAR);\r
1662 break;\r
1663\r
1664 case NT_BREF:\r
1665 {\r
1666 BRefNode* br = NBREF(node);\r
1667\r
1668#ifdef USE_BACKREF_WITH_LEVEL\r
1669 if (IS_BACKREF_NEST_LEVEL(br)) {\r
1670 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);\r
1671 if (r) return r;\r
1672 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));\r
1673 if (r) return r;\r
1674 r = add_length(reg, br->nest_level);\r
1675 if (r) return r;\r
1676\r
1677 goto add_bacref_mems;\r
1678 }\r
1679 else\r
1680#endif\r
1681 if (br->back_num == 1) {\r
1682 n = br->back_static[0];\r
1683 if (IS_IGNORECASE(reg->options)) {\r
1684 r = add_opcode(reg, OP_BACKREFN_IC);\r
1685 if (r) return r;\r
1686 r = add_mem_num(reg, n);\r
1687 }\r
1688 else {\r
1689 switch (n) {\r
1690 case 1: r = add_opcode(reg, OP_BACKREF1); break;\r
1691 case 2: r = add_opcode(reg, OP_BACKREF2); break;\r
1692 default:\r
1693 r = add_opcode(reg, OP_BACKREFN);\r
1694 if (r) return r;\r
1695 r = add_mem_num(reg, n);\r
1696 break;\r
1697 }\r
1698 }\r
1699 }\r
1700 else {\r
1701 int i;\r
1702 int* p;\r
1703\r
1704 if (IS_IGNORECASE(reg->options)) {\r
1705 r = add_opcode(reg, OP_BACKREF_MULTI_IC);\r
1706 }\r
1707 else {\r
1708 r = add_opcode(reg, OP_BACKREF_MULTI);\r
1709 }\r
1710 if (r) return r;\r
1711\r
1712#ifdef USE_BACKREF_WITH_LEVEL\r
1713 add_bacref_mems:\r
1714#endif\r
1715 r = add_length(reg, br->back_num);\r
1716 if (r) return r;\r
1717 p = BACKREFS_P(br);\r
1718 for (i = br->back_num - 1; i >= 0; i--) {\r
1719 r = add_mem_num(reg, p[i]);\r
1720 if (r) return r;\r
1721 }\r
1722 }\r
1723 }\r
1724 break;\r
1725\r
1726#ifdef USE_SUBEXP_CALL\r
1727 case NT_CALL:\r
1728 r = compile_call(NCALL(node), reg);\r
1729 break;\r
1730#endif\r
1731\r
1732 case NT_QTFR:\r
1733 r = compile_quantifier_node(NQTFR(node), reg);\r
1734 break;\r
1735\r
1736 case NT_ENCLOSE:\r
1737 r = compile_enclose_node(NENCLOSE(node), reg);\r
1738 break;\r
1739\r
1740 case NT_ANCHOR:\r
1741 r = compile_anchor_node(NANCHOR(node), reg);\r
1742 break;\r
1743\r
1744 default:\r
1745#ifdef ONIG_DEBUG\r
1746 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));\r
1747#endif\r
1748 break;\r
1749 }\r
1750\r
1751 return r;\r
1752}\r
1753\r
1754#ifdef USE_NAMED_GROUP\r
1755\r
1756static int\r
1757noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)\r
1758{\r
1759 int r = 0;\r
1760 Node* node = *plink;\r
1761\r
1762 switch (NTYPE(node)) {\r
1763 case NT_LIST:\r
1764 case NT_ALT:\r
1765 do {\r
1766 r = noname_disable_map(&(NCAR(node)), map, counter);\r
1767 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1768 break;\r
1769\r
1770 case NT_QTFR:\r
1771 {\r
1772 Node** ptarget = &(NQTFR(node)->target);\r
1773 Node* old = *ptarget;\r
1774 r = noname_disable_map(ptarget, map, counter);\r
1775 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {\r
1776 onig_reduce_nested_quantifier(node, *ptarget);\r
1777 }\r
1778 }\r
1779 break;\r
1780\r
1781 case NT_ENCLOSE:\r
1782 {\r
1783 EncloseNode* en = NENCLOSE(node);\r
1784 if (en->type == ENCLOSE_MEMORY) {\r
1785 if (IS_ENCLOSE_NAMED_GROUP(en)) {\r
1786 (*counter)++;\r
1787 map[en->regnum].new_val = *counter;\r
1788 en->regnum = *counter;\r
1789 r = noname_disable_map(&(en->target), map, counter);\r
1790 }\r
1791 else {\r
1792 *plink = en->target;\r
1793 en->target = NULL_NODE;\r
1794 onig_node_free(node);\r
1795 r = noname_disable_map(plink, map, counter);\r
1796 }\r
1797 }\r
1798 else\r
1799 r = noname_disable_map(&(en->target), map, counter);\r
1800 }\r
1801 break;\r
1802\r
1803 default:\r
1804 break;\r
1805 }\r
1806\r
1807 return r;\r
1808}\r
1809\r
1810static int\r
1811renumber_node_backref(Node* node, GroupNumRemap* map)\r
1812{\r
1813 int i, pos, n, old_num;\r
1814 int *backs;\r
1815 BRefNode* bn = NBREF(node);\r
1816\r
1817 if (! IS_BACKREF_NAME_REF(bn))\r
1818 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
1819\r
1820 old_num = bn->back_num;\r
1821 if (IS_NULL(bn->back_dynamic))\r
1822 backs = bn->back_static;\r
1823 else\r
1824 backs = bn->back_dynamic;\r
1825\r
1826 for (i = 0, pos = 0; i < old_num; i++) {\r
1827 n = map[backs[i]].new_val;\r
1828 if (n > 0) {\r
1829 backs[pos] = n;\r
1830 pos++;\r
1831 }\r
1832 }\r
1833\r
1834 bn->back_num = pos;\r
1835 return 0;\r
1836}\r
1837\r
1838static int\r
1839renumber_by_map(Node* node, GroupNumRemap* map)\r
1840{\r
1841 int r = 0;\r
1842\r
1843 switch (NTYPE(node)) {\r
1844 case NT_LIST:\r
1845 case NT_ALT:\r
1846 do {\r
1847 r = renumber_by_map(NCAR(node), map);\r
1848 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1849 break;\r
1850 case NT_QTFR:\r
1851 r = renumber_by_map(NQTFR(node)->target, map);\r
1852 break;\r
1853 case NT_ENCLOSE:\r
1854 r = renumber_by_map(NENCLOSE(node)->target, map);\r
1855 break;\r
1856\r
1857 case NT_BREF:\r
1858 r = renumber_node_backref(node, map);\r
1859 break;\r
1860\r
1861 default:\r
1862 break;\r
1863 }\r
1864\r
1865 return r;\r
1866}\r
1867\r
1868static int\r
1869numbered_ref_check(Node* node)\r
1870{\r
1871 int r = 0;\r
1872\r
1873 switch (NTYPE(node)) {\r
1874 case NT_LIST:\r
1875 case NT_ALT:\r
1876 do {\r
1877 r = numbered_ref_check(NCAR(node));\r
1878 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
1879 break;\r
1880 case NT_QTFR:\r
1881 r = numbered_ref_check(NQTFR(node)->target);\r
1882 break;\r
1883 case NT_ENCLOSE:\r
1884 r = numbered_ref_check(NENCLOSE(node)->target);\r
1885 break;\r
1886\r
1887 case NT_BREF:\r
1888 if (! IS_BACKREF_NAME_REF(NBREF(node)))\r
1889 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
1890 break;\r
1891\r
1892 default:\r
1893 break;\r
1894 }\r
1895\r
1896 return r;\r
1897}\r
1898\r
1899static int\r
1900disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)\r
1901{\r
1902 int r, i, pos, counter;\r
1903 int Result;\r
1904 BitStatusType loc;\r
1905 GroupNumRemap* map;\r
1906\r
1907 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));\r
1908 CHECK_NULL_RETURN_MEMERR(map);\r
1909 for (i = 1; i <= env->num_mem; i++) {\r
1910 map[i].new_val = 0;\r
1911 }\r
1912 counter = 0;\r
1913 r = noname_disable_map(root, map, &counter);\r
1914 if (r != 0) return r;\r
1915\r
1916 r = renumber_by_map(*root, map);\r
1917 if (r != 0) return r;\r
1918\r
1919 for (i = 1, pos = 1; i <= env->num_mem; i++) {\r
1920 if (map[i].new_val > 0) {\r
1921 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];\r
1922 pos++;\r
1923 }\r
1924 }\r
1925\r
1926 loc = env->capture_history;\r
1927 BIT_STATUS_CLEAR(env->capture_history);\r
1928 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {\r
1929 if (BIT_STATUS_AT(loc, i)) {\r
1930 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);\r
1931 }\r
1932 }\r
1933\r
1934 env->num_mem = env->num_named;\r
1935 reg->num_mem = env->num_named;\r
1936\r
1937 Result = onig_renumber_name_table(reg, map);\r
1938 xfree(map);\r
1939 return Result;\r
1940}\r
1941#endif /* USE_NAMED_GROUP */\r
1942\r
1943#ifdef USE_SUBEXP_CALL\r
1944static int\r
1945unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)\r
1946{\r
1947 int i, offset;\r
1948 EncloseNode* en;\r
1949 AbsAddrType addr;\r
1950\r
1951 for (i = 0; i < uslist->num; i++) {\r
1952 en = NENCLOSE(uslist->us[i].target);\r
1953 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;\r
1954 addr = en->call_addr;\r
1955 offset = uslist->us[i].offset;\r
1956\r
1957 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);\r
1958 }\r
1959 return 0;\r
1960}\r
1961#endif\r
1962\r
1963#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT\r
1964static int\r
1965quantifiers_memory_node_info(Node* node)\r
1966{\r
1967 int r = 0;\r
1968\r
1969 switch (NTYPE(node)) {\r
1970 case NT_LIST:\r
1971 case NT_ALT:\r
1972 {\r
1973 int v;\r
1974 do {\r
1975 v = quantifiers_memory_node_info(NCAR(node));\r
1976 if (v > r) r = v;\r
1977 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));\r
1978 }\r
1979 break;\r
1980\r
1981#ifdef USE_SUBEXP_CALL\r
1982 case NT_CALL:\r
1983 if (IS_CALL_RECURSION(NCALL(node))) {\r
1984 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */\r
1985 }\r
1986 else\r
1987 r = quantifiers_memory_node_info(NCALL(node)->target);\r
1988 break;\r
1989#endif\r
1990\r
1991 case NT_QTFR:\r
1992 {\r
1993 QtfrNode* qn = NQTFR(node);\r
1994 if (qn->upper != 0) {\r
1995 r = quantifiers_memory_node_info(qn->target);\r
1996 }\r
1997 }\r
1998 break;\r
1999\r
2000 case NT_ENCLOSE:\r
2001 {\r
2002 EncloseNode* en = NENCLOSE(node);\r
2003 switch (en->type) {\r
2004 case ENCLOSE_MEMORY:\r
2005 return NQ_TARGET_IS_EMPTY_MEM;\r
2006 break;\r
2007\r
2008 case ENCLOSE_OPTION:\r
2009 case ENCLOSE_STOP_BACKTRACK:\r
2010 r = quantifiers_memory_node_info(en->target);\r
2011 break;\r
2012 default:\r
2013 break;\r
2014 }\r
2015 }\r
2016 break;\r
2017\r
2018 case NT_BREF:\r
2019 case NT_STR:\r
2020 case NT_CTYPE:\r
2021 case NT_CCLASS:\r
2022 case NT_CANY:\r
2023 case NT_ANCHOR:\r
2024 default:\r
2025 break;\r
2026 }\r
2027\r
2028 return r;\r
2029}\r
2030#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */\r
2031\r
2032static int\r
2033get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)\r
2034{\r
2035 OnigDistance tmin;\r
2036 int r = 0;\r
2037\r
2038 *min = 0;\r
2039 switch (NTYPE(node)) {\r
2040 case NT_BREF:\r
2041 {\r
2042 int i;\r
2043 int* backs;\r
2044 Node** nodes = SCANENV_MEM_NODES(env);\r
2045 BRefNode* br = NBREF(node);\r
2046 if (br->state & NST_RECURSION) break;\r
2047\r
2048 backs = BACKREFS_P(br);\r
2049 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
2050 r = get_min_match_length(nodes[backs[0]], min, env);\r
2051 if (r != 0) break;\r
2052 for (i = 1; i < br->back_num; i++) {\r
2053 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
2054 r = get_min_match_length(nodes[backs[i]], &tmin, env);\r
2055 if (r != 0) break;\r
2056 if (*min > tmin) *min = tmin;\r
2057 }\r
2058 }\r
2059 break;\r
2060\r
2061#ifdef USE_SUBEXP_CALL\r
2062 case NT_CALL:\r
2063 if (IS_CALL_RECURSION(NCALL(node))) {\r
2064 EncloseNode* en = NENCLOSE(NCALL(node)->target);\r
2065 if (IS_ENCLOSE_MIN_FIXED(en))\r
2066 *min = en->min_len;\r
2067 }\r
2068 else\r
2069 r = get_min_match_length(NCALL(node)->target, min, env);\r
2070 break;\r
2071#endif\r
2072\r
2073 case NT_LIST:\r
2074 do {\r
2075 r = get_min_match_length(NCAR(node), &tmin, env);\r
2076 if (r == 0) *min += tmin;\r
2077 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2078 break;\r
2079\r
2080 case NT_ALT:\r
2081 {\r
2082 Node *x, *y;\r
2083 y = node;\r
2084 do {\r
2085 x = NCAR(y);\r
2086 r = get_min_match_length(x, &tmin, env);\r
2087 if (r != 0) break;\r
2088 if (y == node) *min = tmin;\r
2089 else if (*min > tmin) *min = tmin;\r
2090 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));\r
2091 }\r
2092 break;\r
2093\r
2094 case NT_STR:\r
2095 {\r
2096 StrNode* sn = NSTR(node);\r
2097 *min = (OnigDistance)(sn->end - sn->s);\r
2098 }\r
2099 break;\r
2100\r
2101 case NT_CTYPE:\r
2102 *min = 1;\r
2103 break;\r
2104\r
2105 case NT_CCLASS:\r
2106 case NT_CANY:\r
2107 *min = 1;\r
2108 break;\r
2109\r
2110 case NT_QTFR:\r
2111 {\r
2112 QtfrNode* qn = NQTFR(node);\r
2113\r
2114 if (qn->lower > 0) {\r
2115 r = get_min_match_length(qn->target, min, env);\r
2116 if (r == 0)\r
2117 *min = distance_multiply(*min, qn->lower);\r
2118 }\r
2119 }\r
2120 break;\r
2121\r
2122 case NT_ENCLOSE:\r
2123 {\r
2124 EncloseNode* en = NENCLOSE(node);\r
2125 switch (en->type) {\r
2126 case ENCLOSE_MEMORY:\r
2127#ifdef USE_SUBEXP_CALL\r
2128 if (IS_ENCLOSE_MIN_FIXED(en))\r
2129 *min = en->min_len;\r
2130 else {\r
2131 r = get_min_match_length(en->target, min, env);\r
2132 if (r == 0) {\r
2133 en->min_len = *min;\r
2134 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);\r
2135 }\r
2136 }\r
2137 break;\r
2138#endif\r
2139 case ENCLOSE_OPTION:\r
2140 case ENCLOSE_STOP_BACKTRACK:\r
2141 r = get_min_match_length(en->target, min, env);\r
2142 break;\r
2143 }\r
2144 }\r
2145 break;\r
2146\r
2147 case NT_ANCHOR:\r
2148 default:\r
2149 break;\r
2150 }\r
2151\r
2152 return r;\r
2153}\r
2154\r
2155static int\r
2156get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)\r
2157{\r
2158 OnigDistance tmax;\r
2159 int r = 0;\r
2160\r
2161 *max = 0;\r
2162 switch (NTYPE(node)) {\r
2163 case NT_LIST:\r
2164 do {\r
2165 r = get_max_match_length(NCAR(node), &tmax, env);\r
2166 if (r == 0)\r
2167 *max = distance_add(*max, tmax);\r
2168 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2169 break;\r
2170\r
2171 case NT_ALT:\r
2172 do {\r
2173 r = get_max_match_length(NCAR(node), &tmax, env);\r
2174 if (r == 0 && *max < tmax) *max = tmax;\r
2175 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2176 break;\r
2177\r
2178 case NT_STR:\r
2179 {\r
2180 StrNode* sn = NSTR(node);\r
2181 *max = (OnigDistance)(sn->end - sn->s);\r
2182 }\r
2183 break;\r
2184\r
2185 case NT_CTYPE:\r
2186 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
2187 break;\r
2188\r
2189 case NT_CCLASS:\r
2190 case NT_CANY:\r
2191 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
2192 break;\r
2193\r
2194 case NT_BREF:\r
2195 {\r
2196 int i;\r
2197 int* backs;\r
2198 Node** nodes = SCANENV_MEM_NODES(env);\r
2199 BRefNode* br = NBREF(node);\r
2200 if (br->state & NST_RECURSION) {\r
2201 *max = ONIG_INFINITE_DISTANCE;\r
2202 break;\r
2203 }\r
2204 backs = BACKREFS_P(br);\r
2205 for (i = 0; i < br->back_num; i++) {\r
2206 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
2207 r = get_max_match_length(nodes[backs[i]], &tmax, env);\r
2208 if (r != 0) break;\r
2209 if (*max < tmax) *max = tmax;\r
2210 }\r
2211 }\r
2212 break;\r
2213\r
2214#ifdef USE_SUBEXP_CALL\r
2215 case NT_CALL:\r
2216 if (! IS_CALL_RECURSION(NCALL(node)))\r
2217 r = get_max_match_length(NCALL(node)->target, max, env);\r
2218 else\r
2219 *max = ONIG_INFINITE_DISTANCE;\r
2220 break;\r
2221#endif\r
2222\r
2223 case NT_QTFR:\r
2224 {\r
2225 QtfrNode* qn = NQTFR(node);\r
2226\r
2227 if (qn->upper != 0) {\r
2228 r = get_max_match_length(qn->target, max, env);\r
2229 if (r == 0 && *max != 0) {\r
2230 if (! IS_REPEAT_INFINITE(qn->upper))\r
2231 *max = distance_multiply(*max, qn->upper);\r
2232 else\r
2233 *max = ONIG_INFINITE_DISTANCE;\r
2234 }\r
2235 }\r
2236 }\r
2237 break;\r
2238\r
2239 case NT_ENCLOSE:\r
2240 {\r
2241 EncloseNode* en = NENCLOSE(node);\r
2242 switch (en->type) {\r
2243 case ENCLOSE_MEMORY:\r
2244#ifdef USE_SUBEXP_CALL\r
2245 if (IS_ENCLOSE_MAX_FIXED(en))\r
2246 *max = en->max_len;\r
2247 else {\r
2248 r = get_max_match_length(en->target, max, env);\r
2249 if (r == 0) {\r
2250 en->max_len = *max;\r
2251 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);\r
2252 }\r
2253 }\r
2254 break;\r
2255#endif\r
2256 case ENCLOSE_OPTION:\r
2257 case ENCLOSE_STOP_BACKTRACK:\r
2258 r = get_max_match_length(en->target, max, env);\r
2259 break;\r
2260 }\r
2261 }\r
2262 break;\r
2263\r
2264 case NT_ANCHOR:\r
2265 default:\r
2266 break;\r
2267 }\r
2268\r
2269 return r;\r
2270}\r
2271\r
2272#define GET_CHAR_LEN_VARLEN -1\r
2273#define GET_CHAR_LEN_TOP_ALT_VARLEN -2\r
2274\r
2275/* fixed size pattern node only */\r
2276static int\r
2277get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)\r
2278{\r
2279 int tlen;\r
2280 int r = 0;\r
2281\r
2282 level++;\r
2283 *len = 0;\r
2284 switch (NTYPE(node)) {\r
2285 case NT_LIST:\r
2286 do {\r
2287 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);\r
2288 if (r == 0)\r
2289 *len = distance_add(*len, tlen);\r
2290 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2291 break;\r
2292\r
2293 case NT_ALT:\r
2294 {\r
2295 int tlen2;\r
2296 int varlen = 0;\r
2297\r
2298 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);\r
2299 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {\r
2300 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);\r
2301 if (r == 0) {\r
2302 if (tlen != tlen2)\r
2303 varlen = 1;\r
2304 }\r
2305 }\r
2306 if (r == 0) {\r
2307 if (varlen != 0) {\r
2308 if (level == 1)\r
2309 r = GET_CHAR_LEN_TOP_ALT_VARLEN;\r
2310 else\r
2311 r = GET_CHAR_LEN_VARLEN;\r
2312 }\r
2313 else\r
2314 *len = tlen;\r
2315 }\r
2316 }\r
2317 break;\r
2318\r
2319 case NT_STR:\r
2320 {\r
2321 StrNode* sn = NSTR(node);\r
2322 UChar *s = sn->s;\r
2323 while (s < sn->end) {\r
2324 s += enclen(reg->enc, s);\r
2325 (*len)++;\r
2326 }\r
2327 }\r
2328 break;\r
2329\r
2330 case NT_QTFR:\r
2331 {\r
2332 QtfrNode* qn = NQTFR(node);\r
2333 if (qn->lower == qn->upper) {\r
2334 r = get_char_length_tree1(qn->target, reg, &tlen, level);\r
2335 if (r == 0)\r
2336 *len = distance_multiply(tlen, qn->lower);\r
2337 }\r
2338 else\r
2339 r = GET_CHAR_LEN_VARLEN;\r
2340 }\r
2341 break;\r
2342\r
2343#ifdef USE_SUBEXP_CALL\r
2344 case NT_CALL:\r
2345 if (! IS_CALL_RECURSION(NCALL(node)))\r
2346 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);\r
2347 else\r
2348 r = GET_CHAR_LEN_VARLEN;\r
2349 break;\r
2350#endif\r
2351\r
2352 case NT_CTYPE:\r
2353 *len = 1;\r
2354 break;\r
2355\r
2356 case NT_CCLASS:\r
2357 case NT_CANY:\r
2358 *len = 1;\r
2359 break;\r
2360\r
2361 case NT_ENCLOSE:\r
2362 {\r
2363 EncloseNode* en = NENCLOSE(node);\r
2364 switch (en->type) {\r
2365 case ENCLOSE_MEMORY:\r
2366#ifdef USE_SUBEXP_CALL\r
2367 if (IS_ENCLOSE_CLEN_FIXED(en))\r
2368 *len = en->char_len;\r
2369 else {\r
2370 r = get_char_length_tree1(en->target, reg, len, level);\r
2371 if (r == 0) {\r
2372 en->char_len = *len;\r
2373 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);\r
2374 }\r
2375 }\r
2376 break;\r
2377#endif\r
2378 case ENCLOSE_OPTION:\r
2379 case ENCLOSE_STOP_BACKTRACK:\r
2380 r = get_char_length_tree1(en->target, reg, len, level);\r
2381 break;\r
2382 default:\r
2383 break;\r
2384 }\r
2385 }\r
2386 break;\r
2387\r
2388 case NT_ANCHOR:\r
2389 break;\r
2390\r
2391 default:\r
2392 r = GET_CHAR_LEN_VARLEN;\r
2393 break;\r
2394 }\r
2395\r
2396 return r;\r
2397}\r
2398\r
2399static int\r
2400get_char_length_tree(Node* node, regex_t* reg, int* len)\r
2401{\r
2402 return get_char_length_tree1(node, reg, len, 0);\r
2403}\r
2404\r
2405/* x is not included y ==> 1 : 0 */\r
2406static int\r
2407is_not_included(Node* x, Node* y, regex_t* reg)\r
2408{\r
2409 int i, len;\r
2410 OnigCodePoint code;\r
2411 UChar *p;\r
2412 int ytype;\r
2413\r
2414 retry:\r
2415 ytype = NTYPE(y);\r
2416 switch (NTYPE(x)) {\r
2417 case NT_CTYPE:\r
2418 {\r
2419 switch (ytype) {\r
2420 case NT_CTYPE:\r
2421 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&\r
2422 NCTYPE(y)->not != NCTYPE(x)->not)\r
2423 return 1;\r
2424 else\r
2425 return 0;\r
2426 break;\r
2427\r
2428 case NT_CCLASS:\r
2429 swap:\r
2430 {\r
2431 Node* tmp;\r
2432 tmp = x; x = y; y = tmp;\r
2433 goto retry;\r
2434 }\r
2435 break;\r
2436\r
2437 case NT_STR:\r
2438 goto swap;\r
2439 break;\r
2440\r
2441 default:\r
2442 break;\r
2443 }\r
2444 }\r
2445 break;\r
2446\r
2447 case NT_CCLASS:\r
2448 {\r
2449 CClassNode* xc = NCCLASS(x);\r
2450 switch (ytype) {\r
2451 case NT_CTYPE:\r
2452 switch (NCTYPE(y)->ctype) {\r
2453 case ONIGENC_CTYPE_WORD:\r
2454 if (NCTYPE(y)->not == 0) {\r
2455 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {\r
2456 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2457 if (BITSET_AT(xc->bs, i)) {\r
2458 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;\r
2459 }\r
2460 }\r
2461 return 1;\r
2462 }\r
2463 return 0;\r
2464 }\r
2465 else {\r
2466 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2467 if (! IS_CODE_SB_WORD(reg->enc, i)) {\r
2468 if (!IS_NCCLASS_NOT(xc)) {\r
2469 if (BITSET_AT(xc->bs, i))\r
2470 return 0;\r
2471 }\r
2472 else {\r
2473 if (! BITSET_AT(xc->bs, i))\r
2474 return 0;\r
2475 }\r
2476 }\r
2477 }\r
2478 return 1;\r
2479 }\r
2480 break;\r
2481\r
2482 default:\r
2483 break;\r
2484 }\r
2485 break;\r
2486\r
2487 case NT_CCLASS:\r
2488 {\r
2489 int v;\r
2490 CClassNode* yc = NCCLASS(y);\r
2491\r
2492 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2493 v = BITSET_AT(xc->bs, i);\r
2494 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||\r
2495 (v == 0 && IS_NCCLASS_NOT(xc))) {\r
2496 v = BITSET_AT(yc->bs, i);\r
2497 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||\r
2498 (v == 0 && IS_NCCLASS_NOT(yc)))\r
2499 return 0;\r
2500 }\r
2501 }\r
2502 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||\r
2503 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))\r
2504 return 1;\r
2505 return 0;\r
2506 }\r
2507 break;\r
2508\r
2509 case NT_STR:\r
2510 goto swap;\r
2511 break;\r
2512\r
2513 default:\r
2514 break;\r
2515 }\r
2516 }\r
2517 break;\r
2518\r
2519 case NT_STR:\r
2520 {\r
2521 StrNode* xs = NSTR(x);\r
2522 if (NSTRING_LEN(x) == 0)\r
2523 break;\r
2524\r
2525 //c = *(xs->s);\r
2526 switch (ytype) {\r
2527 case NT_CTYPE:\r
2528 switch (NCTYPE(y)->ctype) {\r
2529 case ONIGENC_CTYPE_WORD:\r
2530 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))\r
2531 return NCTYPE(y)->not;\r
2532 else\r
2533 return !(NCTYPE(y)->not);\r
2534 break;\r
2535 default:\r
2536 break;\r
2537 }\r
2538 break;\r
2539\r
2540 case NT_CCLASS:\r
2541 {\r
2542 CClassNode* cc = NCCLASS(y);\r
2543\r
2544 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,\r
2545 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));\r
2546 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);\r
2547 }\r
2548 break;\r
2549\r
2550 case NT_STR:\r
2551 {\r
2552 UChar *q;\r
2553 StrNode* ys = NSTR(y);\r
2554 len = NSTRING_LEN(x);\r
2555 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);\r
2556 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {\r
2557 /* tiny version */\r
2558 return 0;\r
2559 }\r
2560 else {\r
2561 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {\r
2562 if (*p != *q) return 1;\r
2563 }\r
2564 }\r
2565 }\r
2566 break;\r
2567 \r
2568 default:\r
2569 break;\r
2570 }\r
2571 }\r
2572 break;\r
2573\r
2574 default:\r
2575 break;\r
2576 }\r
2577\r
2578 return 0;\r
2579}\r
2580\r
2581static Node*\r
2582get_head_value_node(Node* node, int exact, regex_t* reg)\r
2583{\r
2584 Node* n = NULL_NODE;\r
2585\r
2586 switch (NTYPE(node)) {\r
2587 case NT_BREF:\r
2588 case NT_ALT:\r
2589 case NT_CANY:\r
2590#ifdef USE_SUBEXP_CALL\r
2591 case NT_CALL:\r
2592#endif\r
2593 break;\r
2594\r
2595 case NT_CTYPE:\r
2596 case NT_CCLASS:\r
2597 if (exact == 0) {\r
2598 n = node;\r
2599 }\r
2600 break;\r
2601\r
2602 case NT_LIST:\r
2603 n = get_head_value_node(NCAR(node), exact, reg);\r
2604 break;\r
2605\r
2606 case NT_STR:\r
2607 {\r
2608 StrNode* sn = NSTR(node);\r
2609\r
2610 if (sn->end <= sn->s)\r
2611 break;\r
2612\r
2613 if (exact != 0 &&\r
2614 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {\r
2615 }\r
2616 else {\r
2617 n = node;\r
2618 }\r
2619 }\r
2620 break;\r
2621\r
2622 case NT_QTFR:\r
2623 {\r
2624 QtfrNode* qn = NQTFR(node);\r
2625 if (qn->lower > 0) {\r
2626 if (IS_NOT_NULL(qn->head_exact))\r
2627 n = qn->head_exact;\r
2628 else\r
2629 n = get_head_value_node(qn->target, exact, reg);\r
2630 }\r
2631 }\r
2632 break;\r
2633\r
2634 case NT_ENCLOSE:\r
2635 {\r
2636 EncloseNode* en = NENCLOSE(node);\r
2637 switch (en->type) {\r
2638 case ENCLOSE_OPTION:\r
2639 {\r
2640 OnigOptionType options = reg->options;\r
2641\r
2642 reg->options = NENCLOSE(node)->option;\r
2643 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);\r
2644 reg->options = options;\r
2645 }\r
2646 break;\r
2647\r
2648 case ENCLOSE_MEMORY:\r
2649 case ENCLOSE_STOP_BACKTRACK:\r
2650 n = get_head_value_node(en->target, exact, reg);\r
2651 break;\r
2652 }\r
2653 }\r
2654 break;\r
2655\r
2656 case NT_ANCHOR:\r
2657 if (NANCHOR(node)->type == ANCHOR_PREC_READ)\r
2658 n = get_head_value_node(NANCHOR(node)->target, exact, reg);\r
2659 break;\r
2660\r
2661 default:\r
2662 break;\r
2663 }\r
2664\r
2665 return n;\r
2666}\r
2667\r
2668static int\r
2669check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)\r
2670{\r
2671 int type, r = 0;\r
2672\r
2673 type = NTYPE(node);\r
2674 if ((NTYPE2BIT(type) & type_mask) == 0)\r
2675 return 1;\r
2676\r
2677 switch (type) {\r
2678 case NT_LIST:\r
2679 case NT_ALT:\r
2680 do {\r
2681 r = check_type_tree(NCAR(node), type_mask, enclose_mask,\r
2682 anchor_mask);\r
2683 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2684 break;\r
2685\r
2686 case NT_QTFR:\r
2687 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,\r
2688 anchor_mask);\r
2689 break;\r
2690\r
2691 case NT_ENCLOSE:\r
2692 {\r
2693 EncloseNode* en = NENCLOSE(node);\r
2694 if ((en->type & enclose_mask) == 0)\r
2695 return 1;\r
2696\r
2697 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);\r
2698 }\r
2699 break;\r
2700\r
2701 case NT_ANCHOR:\r
2702 type = NANCHOR(node)->type;\r
2703 if ((type & anchor_mask) == 0)\r
2704 return 1;\r
2705\r
2706 if (NANCHOR(node)->target)\r
2707 r = check_type_tree(NANCHOR(node)->target,\r
2708 type_mask, enclose_mask, anchor_mask);\r
2709 break;\r
2710\r
2711 default:\r
2712 break;\r
2713 }\r
2714 return r;\r
2715}\r
2716\r
2717#ifdef USE_SUBEXP_CALL\r
2718\r
2719#define RECURSION_EXIST 1\r
2720#define RECURSION_INFINITE 2\r
2721\r
2722static int\r
2723subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)\r
2724{\r
2725 int type;\r
2726 int r = 0;\r
2727\r
2728 type = NTYPE(node);\r
2729 switch (type) {\r
2730 case NT_LIST:\r
2731 {\r
2732 Node *x;\r
2733 OnigDistance min;\r
2734 int ret;\r
2735\r
2736 x = node;\r
2737 do {\r
2738 ret = subexp_inf_recursive_check(NCAR(x), env, head);\r
2739 if (ret < 0 || ret == RECURSION_INFINITE) return ret;\r
2740 r |= ret;\r
2741 if (head) {\r
2742 ret = get_min_match_length(NCAR(x), &min, env);\r
2743 if (ret != 0) return ret;\r
2744 if (min != 0) head = 0;\r
2745 }\r
2746 } while (IS_NOT_NULL(x = NCDR(x)));\r
2747 }\r
2748 break;\r
2749\r
2750 case NT_ALT:\r
2751 {\r
2752 int ret;\r
2753 r = RECURSION_EXIST;\r
2754 do {\r
2755 ret = subexp_inf_recursive_check(NCAR(node), env, head);\r
2756 if (ret < 0 || ret == RECURSION_INFINITE) return ret;\r
2757 r &= ret;\r
2758 } while (IS_NOT_NULL(node = NCDR(node)));\r
2759 }\r
2760 break;\r
2761\r
2762 case NT_QTFR:\r
2763 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);\r
2764 if (r == RECURSION_EXIST) {\r
2765 if (NQTFR(node)->lower == 0) r = 0;\r
2766 }\r
2767 break;\r
2768\r
2769 case NT_ANCHOR:\r
2770 {\r
2771 AnchorNode* an = NANCHOR(node);\r
2772 switch (an->type) {\r
2773 case ANCHOR_PREC_READ:\r
2774 case ANCHOR_PREC_READ_NOT:\r
2775 case ANCHOR_LOOK_BEHIND:\r
2776 case ANCHOR_LOOK_BEHIND_NOT:\r
2777 r = subexp_inf_recursive_check(an->target, env, head);\r
2778 break;\r
2779 }\r
2780 }\r
2781 break;\r
2782\r
2783 case NT_CALL:\r
2784 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);\r
2785 break;\r
2786\r
2787 case NT_ENCLOSE:\r
2788 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))\r
2789 return 0;\r
2790 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))\r
2791 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);\r
2792 else {\r
2793 SET_ENCLOSE_STATUS(node, NST_MARK2);\r
2794 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);\r
2795 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);\r
2796 }\r
2797 break;\r
2798\r
2799 default:\r
2800 break;\r
2801 }\r
2802\r
2803 return r;\r
2804}\r
2805\r
2806static int\r
2807subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)\r
2808{\r
2809 int type;\r
2810 int r = 0;\r
2811\r
2812 type = NTYPE(node);\r
2813 switch (type) {\r
2814 case NT_LIST:\r
2815 case NT_ALT:\r
2816 do {\r
2817 r = subexp_inf_recursive_check_trav(NCAR(node), env);\r
2818 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2819 break;\r
2820\r
2821 case NT_QTFR:\r
2822 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);\r
2823 break;\r
2824\r
2825 case NT_ANCHOR:\r
2826 {\r
2827 AnchorNode* an = NANCHOR(node);\r
2828 switch (an->type) {\r
2829 case ANCHOR_PREC_READ:\r
2830 case ANCHOR_PREC_READ_NOT:\r
2831 case ANCHOR_LOOK_BEHIND:\r
2832 case ANCHOR_LOOK_BEHIND_NOT:\r
2833 r = subexp_inf_recursive_check_trav(an->target, env);\r
2834 break;\r
2835 }\r
2836 }\r
2837 break;\r
2838\r
2839 case NT_ENCLOSE:\r
2840 {\r
2841 EncloseNode* en = NENCLOSE(node);\r
2842\r
2843 if (IS_ENCLOSE_RECURSION(en)) {\r
2844 SET_ENCLOSE_STATUS(node, NST_MARK1);\r
2845 r = subexp_inf_recursive_check(en->target, env, 1);\r
2846 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;\r
2847 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);\r
2848 }\r
2849 r = subexp_inf_recursive_check_trav(en->target, env);\r
2850 }\r
2851\r
2852 break;\r
2853\r
2854 default:\r
2855 break;\r
2856 }\r
2857\r
2858 return r;\r
2859}\r
2860\r
2861static int\r
2862subexp_recursive_check(Node* node)\r
2863{\r
2864 int r = 0;\r
2865\r
2866 switch (NTYPE(node)) {\r
2867 case NT_LIST:\r
2868 case NT_ALT:\r
2869 do {\r
2870 r |= subexp_recursive_check(NCAR(node));\r
2871 } while (IS_NOT_NULL(node = NCDR(node)));\r
2872 break;\r
2873\r
2874 case NT_QTFR:\r
2875 r = subexp_recursive_check(NQTFR(node)->target);\r
2876 break;\r
2877\r
2878 case NT_ANCHOR:\r
2879 {\r
2880 AnchorNode* an = NANCHOR(node);\r
2881 switch (an->type) {\r
2882 case ANCHOR_PREC_READ:\r
2883 case ANCHOR_PREC_READ_NOT:\r
2884 case ANCHOR_LOOK_BEHIND:\r
2885 case ANCHOR_LOOK_BEHIND_NOT:\r
2886 r = subexp_recursive_check(an->target);\r
2887 break;\r
2888 }\r
2889 }\r
2890 break;\r
2891\r
2892 case NT_CALL:\r
2893 r = subexp_recursive_check(NCALL(node)->target);\r
2894 if (r != 0) SET_CALL_RECURSION(node);\r
2895 break;\r
2896\r
2897 case NT_ENCLOSE:\r
2898 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))\r
2899 return 0;\r
2900 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))\r
2901 return 1; /* recursion */\r
2902 else {\r
2903 SET_ENCLOSE_STATUS(node, NST_MARK2);\r
2904 r = subexp_recursive_check(NENCLOSE(node)->target);\r
2905 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);\r
2906 }\r
2907 break;\r
2908\r
2909 default:\r
2910 break;\r
2911 }\r
2912\r
2913 return r;\r
2914}\r
2915\r
2916\r
2917static int\r
2918subexp_recursive_check_trav(Node* node, ScanEnv* env)\r
2919{\r
2920#define FOUND_CALLED_NODE 1\r
2921\r
2922 int type;\r
2923 int r = 0;\r
2924\r
2925 type = NTYPE(node);\r
2926 switch (type) {\r
2927 case NT_LIST:\r
2928 case NT_ALT:\r
2929 {\r
2930 int ret;\r
2931 do {\r
2932 ret = subexp_recursive_check_trav(NCAR(node), env);\r
2933 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;\r
2934 else if (ret < 0) return ret;\r
2935 } while (IS_NOT_NULL(node = NCDR(node)));\r
2936 }\r
2937 break;\r
2938\r
2939 case NT_QTFR:\r
2940 r = subexp_recursive_check_trav(NQTFR(node)->target, env);\r
2941 if (NQTFR(node)->upper == 0) {\r
2942 if (r == FOUND_CALLED_NODE)\r
2943 NQTFR(node)->is_refered = 1;\r
2944 }\r
2945 break;\r
2946\r
2947 case NT_ANCHOR:\r
2948 {\r
2949 AnchorNode* an = NANCHOR(node);\r
2950 switch (an->type) {\r
2951 case ANCHOR_PREC_READ:\r
2952 case ANCHOR_PREC_READ_NOT:\r
2953 case ANCHOR_LOOK_BEHIND:\r
2954 case ANCHOR_LOOK_BEHIND_NOT:\r
2955 r = subexp_recursive_check_trav(an->target, env);\r
2956 break;\r
2957 }\r
2958 }\r
2959 break;\r
2960\r
2961 case NT_ENCLOSE:\r
2962 {\r
2963 EncloseNode* en = NENCLOSE(node);\r
2964\r
2965 if (! IS_ENCLOSE_RECURSION(en)) {\r
2966 if (IS_ENCLOSE_CALLED(en)) {\r
2967 SET_ENCLOSE_STATUS(node, NST_MARK1);\r
2968 r = subexp_recursive_check(en->target);\r
2969 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);\r
2970 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);\r
2971 }\r
2972 }\r
2973 r = subexp_recursive_check_trav(en->target, env);\r
2974 if (IS_ENCLOSE_CALLED(en))\r
2975 r |= FOUND_CALLED_NODE;\r
2976 }\r
2977 break;\r
2978\r
2979 default:\r
2980 break;\r
2981 }\r
2982\r
2983 return r;\r
2984}\r
2985\r
2986static int\r
2987setup_subexp_call(Node* node, ScanEnv* env)\r
2988{\r
2989 int type;\r
2990 int r = 0;\r
2991\r
2992 type = NTYPE(node);\r
2993 switch (type) {\r
2994 case NT_LIST:\r
2995 do {\r
2996 r = setup_subexp_call(NCAR(node), env);\r
2997 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
2998 break;\r
2999\r
3000 case NT_ALT:\r
3001 do {\r
3002 r = setup_subexp_call(NCAR(node), env);\r
3003 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
3004 break;\r
3005\r
3006 case NT_QTFR:\r
3007 r = setup_subexp_call(NQTFR(node)->target, env);\r
3008 break;\r
3009 case NT_ENCLOSE:\r
3010 r = setup_subexp_call(NENCLOSE(node)->target, env);\r
3011 break;\r
3012\r
3013 case NT_CALL:\r
3014 {\r
3015 CallNode* cn = NCALL(node);\r
3016 Node** nodes = SCANENV_MEM_NODES(env);\r
3017\r
3018 if (cn->group_num != 0) {\r
3019 int gnum = cn->group_num;\r
3020\r
3021#ifdef USE_NAMED_GROUP\r
3022 if (env->num_named > 0 &&\r
3023 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
3024 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {\r
3025 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
3026 }\r
3027#endif\r
3028 if (gnum > env->num_mem) {\r
3029 onig_scan_env_set_error_string(env,\r
3030 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);\r
3031 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
3032 }\r
3033\r
3034#ifdef USE_NAMED_GROUP\r
3035 set_call_attr:\r
3036#endif\r
3037 cn->target = nodes[cn->group_num];\r
3038 if (IS_NULL(cn->target)) {\r
3039 onig_scan_env_set_error_string(env,\r
3040 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);\r
3041 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3042 }\r
3043 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);\r
3044 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);\r
3045 cn->unset_addr_list = env->unset_addr_list;\r
3046 }\r
3047#ifdef USE_NAMED_GROUP\r
3048 else {\r
3049 int *refs;\r
3050\r
3051 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,\r
3052 &refs);\r
3053 if (n <= 0) {\r
3054 onig_scan_env_set_error_string(env,\r
3055 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);\r
3056 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3057 }\r
3058 else if (n > 1) {\r
3059 onig_scan_env_set_error_string(env,\r
3060 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);\r
3061 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;\r
3062 }\r
3063 else {\r
3064 cn->group_num = refs[0];\r
3065 goto set_call_attr;\r
3066 }\r
3067 }\r
3068#endif\r
3069 }\r
3070 break;\r
3071\r
3072 case NT_ANCHOR:\r
3073 {\r
3074 AnchorNode* an = NANCHOR(node);\r
3075\r
3076 switch (an->type) {\r
3077 case ANCHOR_PREC_READ:\r
3078 case ANCHOR_PREC_READ_NOT:\r
3079 case ANCHOR_LOOK_BEHIND:\r
3080 case ANCHOR_LOOK_BEHIND_NOT:\r
3081 r = setup_subexp_call(an->target, env);\r
3082 break;\r
3083 }\r
3084 }\r
3085 break;\r
3086\r
3087 default:\r
3088 break;\r
3089 }\r
3090\r
3091 return r;\r
3092}\r
3093#endif\r
3094\r
3095/* divide different length alternatives in look-behind.\r
3096 (?<=A|B) ==> (?<=A)|(?<=B)\r
3097 (?<!A|B) ==> (?<!A)(?<!B)\r
3098*/\r
3099static int\r
3100divide_look_behind_alternatives(Node* node)\r
3101{\r
3102 Node *head, *np, *insert_node;\r
3103 AnchorNode* an = NANCHOR(node);\r
3104 int anc_type = an->type;\r
3105\r
3106 head = an->target;\r
3107 np = NCAR(head);\r
3108 swap_node(node, head);\r
3109 NCAR(node) = head;\r
3110 NANCHOR(head)->target = np;\r
3111\r
3112 np = node;\r
3113 while ((np = NCDR(np)) != NULL_NODE) {\r
3114 insert_node = onig_node_new_anchor(anc_type);\r
3115 CHECK_NULL_RETURN_MEMERR(insert_node);\r
3116 NANCHOR(insert_node)->target = NCAR(np);\r
3117 NCAR(np) = insert_node;\r
3118 }\r
3119\r
3120 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {\r
3121 np = node;\r
3122 do {\r
3123 SET_NTYPE(np, NT_LIST); /* alt -> list */\r
3124 } while ((np = NCDR(np)) != NULL_NODE);\r
3125 }\r
3126 return 0;\r
3127}\r
3128\r
3129static int\r
3130setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)\r
3131{\r
3132 int r, len;\r
3133 AnchorNode* an = NANCHOR(node);\r
3134\r
3135 r = get_char_length_tree(an->target, reg, &len);\r
3136 if (r == 0)\r
3137 an->char_len = len;\r
3138 else if (r == GET_CHAR_LEN_VARLEN)\r
3139 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3140 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {\r
3141 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))\r
3142 r = divide_look_behind_alternatives(node);\r
3143 else\r
3144 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3145 }\r
3146\r
3147 return r;\r
3148}\r
3149\r
3150static int\r
3151next_setup(Node* node, Node* next_node, regex_t* reg)\r
3152{\r
3153 int type;\r
3154\r
3155 retry:\r
3156 type = NTYPE(node);\r
3157 if (type == NT_QTFR) {\r
3158 QtfrNode* qn = NQTFR(node);\r
3159 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {\r
3160#ifdef USE_QTFR_PEEK_NEXT\r
3161 Node* n = get_head_value_node(next_node, 1, reg);\r
3162 /* '\0': for UTF-16BE etc... */\r
3163 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {\r
3164 qn->next_head_exact = n;\r
3165 }\r
3166#endif\r
3167 /* automatic posseivation a*b ==> (?>a*)b */\r
3168 if (qn->lower <= 1) {\r
3169 int ttype = NTYPE(qn->target);\r
3170 if (IS_NODE_TYPE_SIMPLE(ttype)) {\r
3171 Node *x, *y;\r
3172 x = get_head_value_node(qn->target, 0, reg);\r
3173 if (IS_NOT_NULL(x)) {\r
3174 y = get_head_value_node(next_node, 0, reg);\r
3175 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {\r
3176 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
3177 CHECK_NULL_RETURN_MEMERR(en);\r
3178 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);\r
3179 swap_node(node, en);\r
3180 NENCLOSE(node)->target = en;\r
3181 }\r
3182 }\r
3183 }\r
3184 }\r
3185 }\r
3186 }\r
3187 else if (type == NT_ENCLOSE) {\r
3188 EncloseNode* en = NENCLOSE(node);\r
3189 if (en->type == ENCLOSE_MEMORY) {\r
3190 node = en->target;\r
3191 goto retry;\r
3192 }\r
3193 }\r
3194 return 0;\r
3195}\r
3196\r
3197\r
3198static int\r
3199update_string_node_case_fold(regex_t* reg, Node *node)\r
3200{\r
3201 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
3202 UChar *sbuf, *ebuf, *sp;\r
3203 int r, i, len, sbuf_size;\r
3204 StrNode* sn = NSTR(node);\r
3205\r
3206 end = sn->end;\r
3207 sbuf_size = (int)(end - sn->s) * 2;\r
3208 sbuf = (UChar* )xmalloc(sbuf_size);\r
3209 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3210 ebuf = sbuf + sbuf_size;\r
3211\r
3212 sp = sbuf;\r
3213 p = sn->s;\r
3214 while (p < end) {\r
3215 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);\r
3216 for (i = 0; i < len; i++) {\r
3217 if (sp >= ebuf) {\r
3218 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);\r
3219 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3220 sp = sbuf + sbuf_size;\r
3221 sbuf_size *= 2;\r
3222 ebuf = sbuf + sbuf_size;\r
3223 }\r
3224\r
3225 *sp++ = buf[i];\r
3226 }\r
3227 }\r
3228\r
3229 r = onig_node_str_set(node, sbuf, sp);\r
3230 if (r != 0) {\r
3231 xfree(sbuf);\r
3232 return r;\r
3233 }\r
3234\r
3235 xfree(sbuf);\r
3236 return 0;\r
3237}\r
3238\r
3239static int\r
3240expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,\r
3241 regex_t* reg)\r
3242{\r
3243 int r;\r
3244 Node *node;\r
3245\r
3246 node = onig_node_new_str(s, end);\r
3247 if (IS_NULL(node)) return ONIGERR_MEMORY;\r
3248\r
3249 r = update_string_node_case_fold(reg, node);\r
3250 if (r != 0) {\r
3251 onig_node_free(node);\r
3252 return r;\r
3253 }\r
3254\r
3255 NSTRING_SET_AMBIG(node);\r
3256 NSTRING_SET_DONT_GET_OPT_INFO(node);\r
3257 *rnode = node;\r
3258 return 0;\r
3259}\r
3260\r
3261static int\r
3262expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],\r
3263 UChar *p, int slen, UChar *end,\r
3264 regex_t* reg, Node **rnode)\r
3265{\r
3266 int r, i, j, len, varlen;\r
3267 Node *anode, *var_anode, *snode, *xnode, *an;\r
3268 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
b0c2b797 3269 xnode = NULL_NODE;\r
14b0e578
CS
3270\r
3271 *rnode = var_anode = NULL_NODE;\r
3272\r
3273 varlen = 0;\r
3274 for (i = 0; i < item_num; i++) {\r
3275 if (items[i].byte_len != slen) {\r
3276 varlen = 1;\r
3277 break;\r
3278 }\r
3279 }\r
3280\r
3281 if (varlen != 0) {\r
3282 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3283 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;\r
3284\r
3285 xnode = onig_node_new_list(NULL, NULL);\r
3286 if (IS_NULL(xnode)) goto mem_err;\r
3287 NCAR(var_anode) = xnode;\r
3288\r
3289 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3290 if (IS_NULL(anode)) goto mem_err;\r
3291 NCAR(xnode) = anode;\r
3292 }\r
3293 else {\r
3294 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3295 if (IS_NULL(anode)) return ONIGERR_MEMORY;\r
3296 }\r
3297\r
3298 snode = onig_node_new_str(p, p + slen);\r
3299 if (IS_NULL(snode)) goto mem_err;\r
3300\r
3301 NCAR(anode) = snode;\r
3302\r
3303 for (i = 0; i < item_num; i++) {\r
3304 snode = onig_node_new_str(NULL, NULL);\r
3305 if (IS_NULL(snode)) goto mem_err;\r
3306 \r
3307 for (j = 0; j < items[i].code_len; j++) {\r
3308 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);\r
3309 if (len < 0) {\r
3310 r = len;\r
3311 goto mem_err2;\r
3312 }\r
3313\r
3314 r = onig_node_str_cat(snode, buf, buf + len);\r
3315 if (r != 0) goto mem_err2;\r
3316 }\r
3317\r
3318 an = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3319 if (IS_NULL(an)) {\r
3320 goto mem_err2;\r
3321 }\r
3322\r
3323 if (items[i].byte_len != slen) {\r
b0c2b797 3324 Node *rem = NULL_NODE;\r
14b0e578
CS
3325 UChar *q = p + items[i].byte_len;\r
3326\r
3327 if (q < end) {\r
3328 r = expand_case_fold_make_rem_string(&rem, q, end, reg);\r
3329 if (r != 0) {\r
3330 onig_node_free(an);\r
3331 goto mem_err2;\r
3332 }\r
3333\r
3334 xnode = onig_node_list_add(NULL_NODE, snode);\r
3335 if (IS_NULL(xnode)) {\r
3336 onig_node_free(an);\r
3337 onig_node_free(rem);\r
3338 goto mem_err2;\r
3339 }\r
3340 if (IS_NULL(onig_node_list_add(xnode, rem))) {\r
3341 onig_node_free(an);\r
3342 onig_node_free(xnode);\r
3343 onig_node_free(rem);\r
3344 goto mem_err;\r
3345 }\r
3346\r
3347 NCAR(an) = xnode;\r
3348 }\r
3349 else {\r
3350 NCAR(an) = snode;\r
3351 }\r
3352\r
b0c2b797
QS
3353 if (var_anode == NULL) {\r
3354 onig_node_free(an);\r
3355 onig_node_free(xnode);\r
3356 onig_node_free(rem);\r
3357 goto mem_err2;\r
3358 }\r
14b0e578
CS
3359 NCDR(var_anode) = an;\r
3360 var_anode = an;\r
3361 }\r
3362 else {\r
3363 NCAR(an) = snode;\r
3364 NCDR(anode) = an;\r
3365 anode = an;\r
3366 }\r
3367 }\r
3368\r
3369 return varlen;\r
3370\r
3371 mem_err2:\r
3372 onig_node_free(snode);\r
3373\r
3374 mem_err:\r
3375 onig_node_free(*rnode);\r
3376\r
3377 return ONIGERR_MEMORY;\r
3378}\r
3379\r
3380static int\r
3381expand_case_fold_string(Node* node, regex_t* reg)\r
3382{\r
3383#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8\r
3384\r
3385 int r, n, len, alt_num;\r
3386 UChar *start, *end, *p;\r
3387 Node *top_root, *root, *snode, *prev_node;\r
3388 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
3389 StrNode* sn = NSTR(node);\r
3390\r
3391 if (NSTRING_IS_AMBIG(node)) return 0;\r
3392\r
3393 start = sn->s;\r
3394 end = sn->end;\r
3395 if (start >= end) return 0;\r
3396\r
3397 r = 0;\r
3398 top_root = root = prev_node = snode = NULL_NODE;\r
3399 alt_num = 1;\r
3400 p = start;\r
3401 while (p < end) {\r
3402 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,\r
3403 p, end, items);\r
3404 if (n < 0) {\r
3405 r = n;\r
3406 goto err;\r
3407 }\r
3408\r
3409 len = enclen(reg->enc, p);\r
3410\r
3411 if (n == 0) {\r
3412 if (IS_NULL(snode)) {\r
3413 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
3414 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3415 if (IS_NULL(root)) {\r
3416 onig_node_free(prev_node);\r
3417 goto mem_err;\r
3418 }\r
3419 }\r
3420\r
3421 prev_node = snode = onig_node_new_str(NULL, NULL);\r
3422 if (IS_NULL(snode)) goto mem_err;\r
3423 if (IS_NOT_NULL(root)) {\r
3424 if (IS_NULL(onig_node_list_add(root, snode))) {\r
3425 onig_node_free(snode);\r
3426 goto mem_err;\r
3427 }\r
3428 }\r
3429 }\r
3430\r
3431 r = onig_node_str_cat(snode, p, p + len);\r
3432 if (r != 0) goto err;\r
3433 }\r
3434 else {\r
3435 alt_num *= (n + 1);\r
3436 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;\r
3437\r
3438 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
3439 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3440 if (IS_NULL(root)) {\r
3441 onig_node_free(prev_node);\r
3442 goto mem_err;\r
3443 }\r
3444 }\r
3445\r
3446 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);\r
3447 if (r < 0) goto mem_err;\r
3448 if (r == 1) {\r
3449 if (IS_NULL(root)) {\r
3450 top_root = prev_node;\r
3451 }\r
3452 else {\r
3453 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3454 onig_node_free(prev_node);\r
3455 goto mem_err;\r
3456 }\r
3457 }\r
3458\r
3459 root = NCAR(prev_node);\r
3460 }\r
3461 else { /* r == 0 */\r
3462 if (IS_NOT_NULL(root)) {\r
3463 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3464 onig_node_free(prev_node);\r
3465 goto mem_err;\r
3466 }\r
3467 }\r
3468 }\r
3469\r
3470 snode = NULL_NODE;\r
3471 }\r
3472\r
3473 p += len;\r
3474 }\r
3475\r
3476 if (p < end) {\r
3477 Node *srem;\r
3478\r
3479 r = expand_case_fold_make_rem_string(&srem, p, end, reg);\r
3480 if (r != 0) goto mem_err;\r
3481\r
3482 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {\r
3483 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3484 if (IS_NULL(root)) {\r
3485 onig_node_free(srem);\r
3486 onig_node_free(prev_node);\r
3487 goto mem_err;\r
3488 }\r
3489 }\r
3490\r
3491 if (IS_NULL(root)) {\r
3492 prev_node = srem;\r
3493 }\r
3494 else {\r
3495 if (IS_NULL(onig_node_list_add(root, srem))) {\r
3496 onig_node_free(srem);\r
3497 goto mem_err;\r
3498 }\r
3499 }\r
3500 }\r
3501\r
3502 /* ending */\r
3503 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);\r
3504 swap_node(node, top_root);\r
3505 onig_node_free(top_root);\r
3506 return 0;\r
3507\r
3508 mem_err:\r
3509 r = ONIGERR_MEMORY;\r
3510\r
3511 err:\r
3512 onig_node_free(top_root);\r
3513 return r;\r
3514}\r
3515\r
3516\r
3517#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
3518\r
3519#define CEC_THRES_NUM_BIG_REPEAT 512\r
3520#define CEC_INFINITE_NUM 0x7fffffff\r
3521\r
3522#define CEC_IN_INFINITE_REPEAT (1<<0)\r
3523#define CEC_IN_FINITE_REPEAT (1<<1)\r
3524#define CEC_CONT_BIG_REPEAT (1<<2)\r
3525\r
3526static int\r
3527setup_comb_exp_check(Node* node, int state, ScanEnv* env)\r
3528{\r
3529 int type;\r
3530 int r = state;\r
3531\r
3532 type = NTYPE(node);\r
3533 switch (type) {\r
3534 case NT_LIST:\r
3535 {\r
3536 Node* prev = NULL_NODE;\r
3537 do {\r
3538 r = setup_comb_exp_check(NCAR(node), r, env);\r
3539 prev = NCAR(node);\r
3540 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));\r
3541 }\r
3542 break;\r
3543\r
3544 case NT_ALT:\r
3545 {\r
3546 int ret;\r
3547 do {\r
3548 ret = setup_comb_exp_check(NCAR(node), state, env);\r
3549 r |= ret;\r
3550 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));\r
3551 }\r
3552 break;\r
3553\r
3554 case NT_QTFR:\r
3555 {\r
3556 int child_state = state;\r
3557 int add_state = 0;\r
3558 QtfrNode* qn = NQTFR(node);\r
3559 Node* target = qn->target;\r
3560 int var_num;\r
3561\r
3562 if (! IS_REPEAT_INFINITE(qn->upper)) {\r
3563 if (qn->upper > 1) {\r
3564 /* {0,1}, {1,1} are allowed */\r
3565 child_state |= CEC_IN_FINITE_REPEAT;\r
3566\r
3567 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */\r
3568 if (env->backrefed_mem == 0) {\r
3569 if (NTYPE(qn->target) == NT_ENCLOSE) {\r
3570 EncloseNode* en = NENCLOSE(qn->target);\r
3571 if (en->type == ENCLOSE_MEMORY) {\r
3572 if (NTYPE(en->target) == NT_QTFR) {\r
3573 QtfrNode* q = NQTFR(en->target);\r
3574 if (IS_REPEAT_INFINITE(q->upper)\r
3575 && q->greedy == qn->greedy) {\r
3576 qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
3577 if (qn->upper == 1)\r
3578 child_state = state;\r
3579 }\r
3580 }\r
3581 }\r
3582 }\r
3583 }\r
3584 }\r
3585 }\r
3586\r
3587 if (state & CEC_IN_FINITE_REPEAT) {\r
3588 qn->comb_exp_check_num = -1;\r
3589 }\r
3590 else {\r
3591 if (IS_REPEAT_INFINITE(qn->upper)) {\r
3592 var_num = CEC_INFINITE_NUM;\r
3593 child_state |= CEC_IN_INFINITE_REPEAT;\r
3594 }\r
3595 else {\r
3596 var_num = qn->upper - qn->lower;\r
3597 }\r
3598\r
3599 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)\r
3600 add_state |= CEC_CONT_BIG_REPEAT;\r
3601\r
3602 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||\r
3603 ((state & CEC_CONT_BIG_REPEAT) != 0 &&\r
3604 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {\r
3605 if (qn->comb_exp_check_num == 0) {\r
3606 env->num_comb_exp_check++;\r
3607 qn->comb_exp_check_num = env->num_comb_exp_check;\r
3608 if (env->curr_max_regnum > env->comb_exp_max_regnum)\r
3609 env->comb_exp_max_regnum = env->curr_max_regnum;\r
3610 }\r
3611 }\r
3612 }\r
3613\r
3614 r = setup_comb_exp_check(target, child_state, env);\r
3615 r |= add_state;\r
3616 }\r
3617 break;\r
3618\r
3619 case NT_ENCLOSE:\r
3620 {\r
3621 EncloseNode* en = NENCLOSE(node);\r
3622\r
3623 switch (en->type) {\r
3624 case ENCLOSE_MEMORY:\r
3625 {\r
3626 if (env->curr_max_regnum < en->regnum)\r
3627 env->curr_max_regnum = en->regnum;\r
3628\r
3629 r = setup_comb_exp_check(en->target, state, env);\r
3630 }\r
3631 break;\r
3632\r
3633 default:\r
3634 r = setup_comb_exp_check(en->target, state, env);\r
3635 break;\r
3636 }\r
3637 }\r
3638 break;\r
3639\r
3640#ifdef USE_SUBEXP_CALL\r
3641 case NT_CALL:\r
3642 if (IS_CALL_RECURSION(NCALL(node)))\r
3643 env->has_recursion = 1;\r
3644 else\r
3645 r = setup_comb_exp_check(NCALL(node)->target, state, env);\r
3646 break;\r
3647#endif\r
3648\r
3649 default:\r
3650 break;\r
3651 }\r
3652\r
3653 return r;\r
3654}\r
3655#endif\r
3656\r
3657#define IN_ALT (1<<0)\r
3658#define IN_NOT (1<<1)\r
3659#define IN_REPEAT (1<<2)\r
3660#define IN_VAR_REPEAT (1<<3)\r
3661\r
3662/* setup_tree does the following work.\r
3663 1. check empty loop. (set qn->target_empty_info)\r
3664 2. expand ignore-case in char class.\r
3665 3. set memory status bit flags. (reg->mem_stats)\r
3666 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].\r
3667 5. find invalid patterns in look-behind.\r
3668 6. expand repeated string.\r
3669 */\r
3670static int\r
3671setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)\r
3672{\r
3673 int type;\r
3674 int r = 0;\r
3675\r
3676 type = NTYPE(node);\r
3677 switch (type) {\r
3678 case NT_LIST:\r
3679 {\r
3680 Node* prev = NULL_NODE;\r
3681 do {\r
3682 r = setup_tree(NCAR(node), reg, state, env);\r
3683 if (IS_NOT_NULL(prev) && r == 0) {\r
3684 r = next_setup(prev, NCAR(node), reg);\r
3685 }\r
3686 prev = NCAR(node);\r
3687 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
3688 }\r
3689 break;\r
3690\r
3691 case NT_ALT:\r
3692 do {\r
3693 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);\r
3694 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));\r
3695 break;\r
3696\r
3697 case NT_CCLASS:\r
3698 break;\r
3699\r
3700 case NT_STR:\r
3701 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {\r
3702 r = expand_case_fold_string(node, reg);\r
3703 }\r
3704 break;\r
3705\r
3706 case NT_CTYPE:\r
3707 case NT_CANY:\r
3708 break;\r
3709\r
3710#ifdef USE_SUBEXP_CALL\r
3711 case NT_CALL:\r
3712 break;\r
3713#endif\r
3714\r
3715 case NT_BREF:\r
3716 {\r
3717 int i;\r
3718 int* p;\r
3719 Node** nodes = SCANENV_MEM_NODES(env);\r
3720 BRefNode* br = NBREF(node);\r
3721 p = BACKREFS_P(br);\r
3722 for (i = 0; i < br->back_num; i++) {\r
3723 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
3724 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);\r
3725 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);\r
3726#ifdef USE_BACKREF_WITH_LEVEL\r
3727 if (IS_BACKREF_NEST_LEVEL(br)) {\r
3728 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);\r
3729 }\r
3730#endif\r
3731 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);\r
3732 }\r
3733 }\r
3734 break;\r
3735\r
3736 case NT_QTFR:\r
3737 {\r
3738 OnigDistance d;\r
3739 QtfrNode* qn = NQTFR(node);\r
3740 Node* target = qn->target;\r
3741\r
3742 if ((state & IN_REPEAT) != 0) {\r
3743 qn->state |= NST_IN_REPEAT;\r
3744 }\r
3745\r
3746 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {\r
3747 r = get_min_match_length(target, &d, env);\r
3748 if (r) break;\r
3749 if (d == 0) {\r
3750 qn->target_empty_info = NQ_TARGET_IS_EMPTY;\r
3751#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT\r
3752 r = quantifiers_memory_node_info(target);\r
3753 if (r < 0) break;\r
3754 if (r > 0) {\r
3755 qn->target_empty_info = r;\r
3756 }\r
3757#endif\r
3758#if 0\r
3759 r = get_max_match_length(target, &d, env);\r
3760 if (r == 0 && d == 0) {\r
3761 /* ()* ==> ()?, ()+ ==> () */\r
3762 qn->upper = 1;\r
3763 if (qn->lower > 1) qn->lower = 1;\r
3764 if (NTYPE(target) == NT_STR) {\r
3765 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */\r
3766 }\r
3767 }\r
3768#endif\r
3769 }\r
3770 }\r
3771\r
3772 state |= IN_REPEAT;\r
3773 if (qn->lower != qn->upper)\r
3774 state |= IN_VAR_REPEAT;\r
3775 r = setup_tree(target, reg, state, env);\r
3776 if (r) break;\r
3777\r
3778 /* expand string */\r
3779#define EXPAND_STRING_MAX_LENGTH 100\r
3780 if (NTYPE(target) == NT_STR) {\r
3781 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&\r
3782 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
3783 int len = NSTRING_LEN(target);\r
3784 StrNode* sn = NSTR(target);\r
3785\r
3786 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
3787 int i, n = qn->lower;\r
3788 onig_node_conv_to_str_node(node, NSTR(target)->flag);\r
3789 for (i = 0; i < n; i++) {\r
3790 r = onig_node_str_cat(node, sn->s, sn->end);\r
3791 if (r) break;\r
3792 }\r
3793 onig_node_free(target);\r
3794 break; /* break case NT_QTFR: */\r
3795 }\r
3796 }\r
3797 }\r
3798\r
3799#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
3800 if (qn->greedy && (qn->target_empty_info != 0)) {\r
3801 if (NTYPE(target) == NT_QTFR) {\r
3802 QtfrNode* tqn = NQTFR(target);\r
3803 if (IS_NOT_NULL(tqn->head_exact)) {\r
3804 qn->head_exact = tqn->head_exact;\r
3805 tqn->head_exact = NULL;\r
3806 }\r
3807 }\r
3808 else {\r
3809 qn->head_exact = get_head_value_node(qn->target, 1, reg);\r
3810 }\r
3811 }\r
3812#endif\r
3813 }\r
3814 break;\r
3815\r
3816 case NT_ENCLOSE:\r
3817 {\r
3818 EncloseNode* en = NENCLOSE(node);\r
3819\r
3820 switch (en->type) {\r
3821 case ENCLOSE_OPTION:\r
3822 {\r
3823 OnigOptionType options = reg->options;\r
3824 reg->options = NENCLOSE(node)->option;\r
3825 r = setup_tree(NENCLOSE(node)->target, reg, state, env);\r
3826 reg->options = options;\r
3827 }\r
3828 break;\r
3829\r
3830 case ENCLOSE_MEMORY:\r
3831 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {\r
3832 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);\r
3833 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */\r
3834 }\r
3835 r = setup_tree(en->target, reg, state, env);\r
3836 break;\r
3837\r
3838 case ENCLOSE_STOP_BACKTRACK:\r
3839 {\r
3840 Node* target = en->target;\r
3841 r = setup_tree(target, reg, state, env);\r
3842 if (NTYPE(target) == NT_QTFR) {\r
3843 QtfrNode* tqn = NQTFR(target);\r
3844 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&\r
3845 tqn->greedy != 0) { /* (?>a*), a*+ etc... */\r
3846 int qtype = NTYPE(tqn->target);\r
3847 if (IS_NODE_TYPE_SIMPLE(qtype))\r
3848 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);\r
3849 }\r
3850 }\r
3851 }\r
3852 break;\r
3853 }\r
3854 }\r
3855 break;\r
3856\r
3857 case NT_ANCHOR:\r
3858 {\r
3859 AnchorNode* an = NANCHOR(node);\r
3860\r
3861 switch (an->type) {\r
3862 case ANCHOR_PREC_READ:\r
3863 r = setup_tree(an->target, reg, state, env);\r
3864 break;\r
3865 case ANCHOR_PREC_READ_NOT:\r
3866 r = setup_tree(an->target, reg, (state | IN_NOT), env);\r
3867 break;\r
3868\r
3869/* allowed node types in look-behind */\r
3870#define ALLOWED_TYPE_IN_LB \\r
3871 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \\r
3872 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )\r
3873\r
3874#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )\r
3875#define ALLOWED_ENCLOSE_IN_LB_NOT 0\r
3876\r
3877#define ALLOWED_ANCHOR_IN_LB \\r
3878( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )\r
3879#define ALLOWED_ANCHOR_IN_LB_NOT \\r
3880( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )\r
3881\r
3882 case ANCHOR_LOOK_BEHIND:\r
3883 {\r
3884 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,\r
3885 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
3886 if (r < 0) return r;\r
3887 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3888 r = setup_look_behind(node, reg, env);\r
3889 if (r != 0) return r;\r
3890 r = setup_tree(an->target, reg, state, env);\r
3891 }\r
3892 break;\r
3893\r
3894 case ANCHOR_LOOK_BEHIND_NOT:\r
3895 {\r
3896 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,\r
3897 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
3898 if (r < 0) return r;\r
3899 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3900 r = setup_look_behind(node, reg, env);\r
3901 if (r != 0) return r;\r
3902 r = setup_tree(an->target, reg, (state | IN_NOT), env);\r
3903 }\r
3904 break;\r
3905 }\r
3906 }\r
3907 break;\r
3908\r
3909 default:\r
3910 break;\r
3911 }\r
3912\r
3913 return r;\r
3914}\r
3915\r
3916/* set skip map for Boyer-Moor search */\r
3917static int\r
3918set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,\r
3919 UChar skip[], int** int_skip)\r
3920{\r
3921 int i, len;\r
3922\r
3923 len = (int)(end - s);\r
3924 if (len < ONIG_CHAR_TABLE_SIZE) {\r
3925 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar)len;\r
3926\r
3927 for (i = 0; i < len - 1; i++)\r
3928 skip[s[i]] = (UChar)(len - 1 - i);\r
3929 }\r
3930 else {\r
3931 if (IS_NULL(*int_skip)) {\r
3932 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);\r
3933 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;\r
3934 }\r
3935 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;\r
3936\r
3937 for (i = 0; i < len - 1; i++)\r
3938 (*int_skip)[s[i]] = len - 1 - i;\r
3939 }\r
3940 return 0;\r
3941}\r
3942\r
3943#define OPT_EXACT_MAXLEN 24\r
3944\r
3945typedef struct {\r
3946 OnigDistance min; /* min byte length */\r
3947 OnigDistance max; /* max byte length */\r
3948} MinMaxLen;\r
3949\r
3950typedef struct {\r
3951 MinMaxLen mmd;\r
3952 OnigEncoding enc;\r
3953 OnigOptionType options;\r
3954 OnigCaseFoldType case_fold_flag;\r
3955 ScanEnv* scan_env;\r
3956} OptEnv;\r
3957\r
3958typedef struct {\r
3959 int left_anchor;\r
3960 int right_anchor;\r
3961} OptAncInfo;\r
3962\r
3963typedef struct {\r
3964 MinMaxLen mmd; /* info position */\r
3965 OptAncInfo anc;\r
3966\r
3967 int reach_end;\r
3968 int ignore_case;\r
3969 int len;\r
3970 UChar s[OPT_EXACT_MAXLEN];\r
3971} OptExactInfo;\r
3972\r
3973typedef struct {\r
3974 MinMaxLen mmd; /* info position */\r
3975 OptAncInfo anc;\r
3976\r
3977 int value; /* weighted value */\r
3978 UChar map[ONIG_CHAR_TABLE_SIZE];\r
3979} OptMapInfo;\r
3980\r
3981typedef struct {\r
3982 MinMaxLen len;\r
3983\r
3984 OptAncInfo anc;\r
3985 OptExactInfo exb; /* boundary */\r
3986 OptExactInfo exm; /* middle */\r
3987 OptExactInfo expr; /* prec read (?=...) */\r
3988\r
3989 OptMapInfo map; /* boundary */\r
3990} NodeOptInfo;\r
3991\r
3992\r
3993static int\r
3994map_position_value(OnigEncoding enc, int i)\r
3995{\r
3996 static const short int ByteValTable[] = {\r
3997 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,\r
3998 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
3999 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,\r
4000 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,\r
4001 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4002 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,\r
4003 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4004 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1\r
4005 };\r
4006\r
4007 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {\r
4008 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)\r
4009 return 20;\r
4010 else\r
4011 return (int )ByteValTable[i];\r
4012 }\r
4013 else\r
4014 return 4; /* Take it easy. */\r
4015}\r
4016\r
4017static int\r
4018distance_value(MinMaxLen* mm)\r
4019{\r
4020 /* 1000 / (min-max-dist + 1) */\r
4021 static const short int dist_vals[] = {\r
4022 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, \r
4023 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, \r
4024 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, \r
4025 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, \r
4026 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, \r
4027 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, \r
4028 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, \r
4029 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, \r
4030 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, \r
4031 11, 11, 11, 11, 11, 10, 10, 10, 10, 10\r
4032 };\r
4033\r
4034 int d;\r
4035\r
4036 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;\r
4037\r
4038 d = mm->max - mm->min;\r
4039 if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))\r
4040 /* return dist_vals[d] * 16 / (mm->min + 12); */\r
4041 return (int )dist_vals[d];\r
4042 else\r
4043 return 1;\r
4044}\r
4045\r
4046static int\r
4047comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)\r
4048{\r
4049 if (v2 <= 0) return -1;\r
4050 if (v1 <= 0) return 1;\r
4051\r
4052 v1 *= distance_value(d1);\r
4053 v2 *= distance_value(d2);\r
4054\r
4055 if (v2 > v1) return 1;\r
4056 if (v2 < v1) return -1;\r
4057\r
4058 if (d2->min < d1->min) return 1;\r
4059 if (d2->min > d1->min) return -1;\r
4060 return 0;\r
4061}\r
4062\r
4063static int\r
4064is_equal_mml(MinMaxLen* a, MinMaxLen* b)\r
4065{\r
4066 return (a->min == b->min && a->max == b->max) ? 1 : 0;\r
4067}\r
4068\r
4069\r
4070static void\r
4071set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)\r
4072{\r
4073 mml->min = min;\r
4074 mml->max = max;\r
4075}\r
4076\r
4077static void\r
4078clear_mml(MinMaxLen* mml)\r
4079{\r
4080 mml->min = mml->max = 0;\r
4081}\r
4082\r
4083static void\r
4084copy_mml(MinMaxLen* to, MinMaxLen* from)\r
4085{\r
4086 to->min = from->min;\r
4087 to->max = from->max;\r
4088}\r
4089\r
4090static void\r
4091add_mml(MinMaxLen* to, MinMaxLen* from)\r
4092{\r
4093 to->min = distance_add(to->min, from->min);\r
4094 to->max = distance_add(to->max, from->max);\r
4095}\r
4096\r
4097#if 0\r
4098static void\r
4099add_len_mml(MinMaxLen* to, OnigDistance len)\r
4100{\r
4101 to->min = distance_add(to->min, len);\r
4102 to->max = distance_add(to->max, len);\r
4103}\r
4104#endif\r
4105\r
4106static void\r
4107alt_merge_mml(MinMaxLen* to, MinMaxLen* from)\r
4108{\r
4109 if (to->min > from->min) to->min = from->min;\r
4110 if (to->max < from->max) to->max = from->max;\r
4111}\r
4112\r
4113static void\r
4114copy_opt_env(OptEnv* to, OptEnv* from)\r
4115{\r
4116 *to = *from;\r
4117}\r
4118\r
4119static void\r
4120clear_opt_anc_info(OptAncInfo* anc)\r
4121{\r
4122 anc->left_anchor = 0;\r
4123 anc->right_anchor = 0;\r
4124}\r
4125\r
4126static void\r
4127copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)\r
4128{\r
4129 *to = *from;\r
4130}\r
4131\r
4132static void\r
4133concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,\r
4134 OnigDistance left_len, OnigDistance right_len)\r
4135{\r
4136 clear_opt_anc_info(to);\r
4137\r
4138 to->left_anchor = left->left_anchor;\r
4139 if (left_len == 0) {\r
4140 to->left_anchor |= right->left_anchor;\r
4141 }\r
4142\r
4143 to->right_anchor = right->right_anchor;\r
4144 if (right_len == 0) {\r
4145 to->right_anchor |= left->right_anchor;\r
4146 }\r
4147}\r
4148\r
4149static int\r
4150is_left_anchor(int anc)\r
4151{\r
4152 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||\r
4153 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||\r
4154 anc == ANCHOR_PREC_READ_NOT)\r
4155 return 0;\r
4156\r
4157 return 1;\r
4158}\r
4159\r
4160static int\r
4161is_set_opt_anc_info(OptAncInfo* to, int anc)\r
4162{\r
4163 if ((to->left_anchor & anc) != 0) return 1;\r
4164\r
4165 return ((to->right_anchor & anc) != 0 ? 1 : 0);\r
4166}\r
4167\r
4168static void\r
4169add_opt_anc_info(OptAncInfo* to, int anc)\r
4170{\r
4171 if (is_left_anchor(anc))\r
4172 to->left_anchor |= anc;\r
4173 else\r
4174 to->right_anchor |= anc;\r
4175}\r
4176\r
4177static void\r
4178remove_opt_anc_info(OptAncInfo* to, int anc)\r
4179{\r
4180 if (is_left_anchor(anc))\r
4181 to->left_anchor &= ~anc;\r
4182 else\r
4183 to->right_anchor &= ~anc;\r
4184}\r
4185\r
4186static void\r
4187alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)\r
4188{\r
4189 to->left_anchor &= add->left_anchor;\r
4190 to->right_anchor &= add->right_anchor;\r
4191}\r
4192\r
4193static int\r
4194is_full_opt_exact_info(OptExactInfo* ex)\r
4195{\r
4196 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);\r
4197}\r
4198\r
4199static void\r
4200clear_opt_exact_info(OptExactInfo* ex)\r
4201{\r
4202 clear_mml(&ex->mmd);\r
4203 clear_opt_anc_info(&ex->anc);\r
4204 ex->reach_end = 0;\r
4205 ex->ignore_case = 0;\r
4206 ex->len = 0;\r
4207 ex->s[0] = '\0';\r
4208}\r
4209\r
4210static void\r
4211copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)\r
4212{\r
4213 *to = *from;\r
4214}\r
4215\r
4216static void\r
4217concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)\r
4218{\r
4219 int i, j, len;\r
4220 UChar *p, *end;\r
4221 OptAncInfo tanc;\r
4222\r
4223 if (! to->ignore_case && add->ignore_case) {\r
4224 if (to->len >= add->len) return ; /* avoid */\r
4225\r
4226 to->ignore_case = 1;\r
4227 }\r
4228\r
4229 p = add->s;\r
4230 end = p + add->len;\r
4231 for (i = to->len; p < end; ) {\r
4232 len = enclen(enc, p);\r
4233 if (i + len > OPT_EXACT_MAXLEN) break;\r
4234 for (j = 0; j < len && p < end; j++)\r
4235 to->s[i++] = *p++;\r
4236 }\r
4237\r
4238 to->len = i;\r
4239 to->reach_end = (p == end ? add->reach_end : 0);\r
4240\r
4241 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);\r
4242 if (! to->reach_end) tanc.right_anchor = 0;\r
4243 copy_opt_anc_info(&to->anc, &tanc);\r
4244}\r
4245\r
4246static void\r
4247concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,\r
4248 int raw ARG_UNUSED, OnigEncoding enc)\r
4249{\r
4250 int i, j, len;\r
4251 UChar *p;\r
4252\r
4253 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {\r
4254 len = enclen(enc, p);\r
4255 if (i + len > OPT_EXACT_MAXLEN) break;\r
4256 for (j = 0; j < len && p < end; j++)\r
4257 to->s[i++] = *p++;\r
4258 }\r
4259\r
4260 to->len = i;\r
4261}\r
4262\r
4263static void\r
4264alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)\r
4265{\r
4266 int i, j, len;\r
4267\r
4268 if (add->len == 0 || to->len == 0) {\r
4269 clear_opt_exact_info(to);\r
4270 return ;\r
4271 }\r
4272\r
4273 if (! is_equal_mml(&to->mmd, &add->mmd)) {\r
4274 clear_opt_exact_info(to);\r
4275 return ;\r
4276 }\r
4277\r
4278 for (i = 0; i < to->len && i < add->len; ) {\r
4279 if (to->s[i] != add->s[i]) break;\r
4280 len = enclen(env->enc, to->s + i);\r
4281\r
4282 for (j = 1; j < len; j++) {\r
4283 if (to->s[i+j] != add->s[i+j]) break;\r
4284 }\r
4285 if (j < len) break;\r
4286 i += len;\r
4287 }\r
4288\r
4289 if (! add->reach_end || i < add->len || i < to->len) {\r
4290 to->reach_end = 0;\r
4291 }\r
4292 to->len = i;\r
4293 to->ignore_case |= add->ignore_case;\r
4294\r
4295 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
4296 if (! to->reach_end) to->anc.right_anchor = 0;\r
4297}\r
4298\r
4299static void\r
4300select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)\r
4301{\r
4302 int v1, v2;\r
4303\r
4304 v1 = now->len;\r
4305 v2 = alt->len;\r
4306\r
4307 if (v2 == 0) {\r
4308 return ;\r
4309 }\r
4310 else if (v1 == 0) {\r
4311 copy_opt_exact_info(now, alt);\r
4312 return ;\r
4313 }\r
4314 else if (v1 <= 2 && v2 <= 2) {\r
4315 /* ByteValTable[x] is big value --> low price */\r
4316 v2 = map_position_value(enc, now->s[0]);\r
4317 v1 = map_position_value(enc, alt->s[0]);\r
4318\r
4319 if (now->len > 1) v1 += 5;\r
4320 if (alt->len > 1) v2 += 5;\r
4321 }\r
4322\r
4323 if (now->ignore_case == 0) v1 *= 2;\r
4324 if (alt->ignore_case == 0) v2 *= 2;\r
4325\r
4326 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)\r
4327 copy_opt_exact_info(now, alt);\r
4328}\r
4329\r
4330static void\r
4331clear_opt_map_info(OptMapInfo* map)\r
4332{\r
4333 static const OptMapInfo clean_info = {\r
4334 {0, 0}, {0, 0}, 0,\r
4335 {\r
4336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4340 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4341 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4342 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4345 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r
4352 }\r
4353 };\r
4354\r
4355 xmemcpy(map, &clean_info, sizeof(OptMapInfo));\r
4356}\r
4357\r
4358static void\r
4359copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)\r
4360{\r
4361 *to = *from;\r
4362}\r
4363\r
4364static void\r
4365add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)\r
4366{\r
4367 if (map->map[c] == 0) {\r
4368 map->map[c] = 1;\r
4369 map->value += map_position_value(enc, c);\r
4370 }\r
4371}\r
4372\r
4373static int\r
4374add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,\r
4375 OnigEncoding enc, OnigCaseFoldType case_fold_flag)\r
4376{\r
4377 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
4378 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
4379 int i, n;\r
4380\r
4381 add_char_opt_map_info(map, p[0], enc);\r
4382\r
4383 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);\r
4384 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);\r
4385 if (n < 0) return n;\r
4386\r
4387 for (i = 0; i < n; i++) {\r
4388 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);\r
4389 add_char_opt_map_info(map, buf[0], enc);\r
4390 }\r
4391\r
4392 return 0;\r
4393}\r
4394\r
4395static void\r
4396select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)\r
4397{\r
4398 static int z = 1<<15; /* 32768: something big value */\r
4399\r
4400 int v1, v2;\r
4401\r
4402 if (alt->value == 0) return ;\r
4403 if (now->value == 0) {\r
4404 copy_opt_map_info(now, alt);\r
4405 return ;\r
4406 }\r
4407\r
4408 v1 = z / now->value;\r
4409 v2 = z / alt->value;\r
4410 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)\r
4411 copy_opt_map_info(now, alt);\r
4412}\r
4413\r
4414static int\r
4415comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)\r
4416{\r
4417#define COMP_EM_BASE 20\r
4418 int ve, vm;\r
4419\r
4420 if (m->value <= 0) return -1;\r
4421\r
4422 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);\r
4423 vm = COMP_EM_BASE * 5 * 2 / m->value;\r
4424 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);\r
4425}\r
4426\r
4427static void\r
4428alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)\r
4429{\r
4430 int i, val;\r
4431\r
4432 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */\r
4433 if (to->value == 0) return ;\r
4434 if (add->value == 0 || to->mmd.max < add->mmd.min) {\r
4435 clear_opt_map_info(to);\r
4436 return ;\r
4437 }\r
4438\r
4439 alt_merge_mml(&to->mmd, &add->mmd);\r
4440\r
4441 val = 0;\r
4442 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
4443 if (add->map[i])\r
4444 to->map[i] = 1;\r
4445\r
4446 if (to->map[i])\r
4447 val += map_position_value(enc, i);\r
4448 }\r
4449 to->value = val;\r
4450\r
4451 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
4452}\r
4453\r
4454static void\r
4455set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)\r
4456{\r
4457 copy_mml(&(opt->exb.mmd), mmd);\r
4458 copy_mml(&(opt->expr.mmd), mmd);\r
4459 copy_mml(&(opt->map.mmd), mmd);\r
4460}\r
4461\r
4462static void\r
4463clear_node_opt_info(NodeOptInfo* opt)\r
4464{\r
4465 clear_mml(&opt->len);\r
4466 clear_opt_anc_info(&opt->anc);\r
4467 clear_opt_exact_info(&opt->exb);\r
4468 clear_opt_exact_info(&opt->exm);\r
4469 clear_opt_exact_info(&opt->expr);\r
4470 clear_opt_map_info(&opt->map);\r
4471}\r
4472\r
4473static void\r
4474copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)\r
4475{\r
4476 *to = *from;\r
4477}\r
4478\r
4479static void\r
4480concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)\r
4481{\r
4482 int exb_reach, exm_reach;\r
4483 OptAncInfo tanc;\r
4484\r
4485 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);\r
4486 copy_opt_anc_info(&to->anc, &tanc);\r
4487\r
4488 if (add->exb.len > 0 && to->len.max == 0) {\r
4489 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,\r
4490 to->len.max, add->len.max);\r
4491 copy_opt_anc_info(&add->exb.anc, &tanc);\r
4492 }\r
4493\r
4494 if (add->map.value > 0 && to->len.max == 0) {\r
4495 if (add->map.mmd.max == 0)\r
4496 add->map.anc.left_anchor |= to->anc.left_anchor;\r
4497 }\r
4498\r
4499 exb_reach = to->exb.reach_end;\r
4500 exm_reach = to->exm.reach_end;\r
4501\r
4502 if (add->len.max != 0)\r
4503 to->exb.reach_end = to->exm.reach_end = 0;\r
4504\r
4505 if (add->exb.len > 0) {\r
4506 if (exb_reach) {\r
4507 concat_opt_exact_info(&to->exb, &add->exb, enc);\r
4508 clear_opt_exact_info(&add->exb);\r
4509 }\r
4510 else if (exm_reach) {\r
4511 concat_opt_exact_info(&to->exm, &add->exb, enc);\r
4512 clear_opt_exact_info(&add->exb);\r
4513 }\r
4514 }\r
4515 select_opt_exact_info(enc, &to->exm, &add->exb);\r
4516 select_opt_exact_info(enc, &to->exm, &add->exm);\r
4517\r
4518 if (to->expr.len > 0) {\r
4519 if (add->len.max > 0) {\r
4520 if (to->expr.len > (int )add->len.max)\r
4521 to->expr.len = add->len.max;\r
4522\r
4523 if (to->expr.mmd.max == 0)\r
4524 select_opt_exact_info(enc, &to->exb, &to->expr);\r
4525 else\r
4526 select_opt_exact_info(enc, &to->exm, &to->expr);\r
4527 }\r
4528 }\r
4529 else if (add->expr.len > 0) {\r
4530 copy_opt_exact_info(&to->expr, &add->expr);\r
4531 }\r
4532\r
4533 select_opt_map_info(&to->map, &add->map);\r
4534\r
4535 add_mml(&to->len, &add->len);\r
4536}\r
4537\r
4538static void\r
4539alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)\r
4540{\r
4541 alt_merge_opt_anc_info (&to->anc, &add->anc);\r
4542 alt_merge_opt_exact_info(&to->exb, &add->exb, env);\r
4543 alt_merge_opt_exact_info(&to->exm, &add->exm, env);\r
4544 alt_merge_opt_exact_info(&to->expr, &add->expr, env);\r
4545 alt_merge_opt_map_info(env->enc, &to->map, &add->map);\r
4546\r
4547 alt_merge_mml(&to->len, &add->len);\r
4548}\r
4549\r
4550\r
4551#define MAX_NODE_OPT_INFO_REF_COUNT 5\r
4552\r
4553static int\r
4554optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)\r
4555{\r
4556 int type;\r
4557 int r = 0;\r
4558\r
4559 clear_node_opt_info(opt);\r
4560 set_bound_node_opt_info(opt, &env->mmd);\r
4561\r
4562 type = NTYPE(node);\r
4563 switch (type) {\r
4564 case NT_LIST:\r
4565 {\r
4566 OptEnv nenv;\r
4567 NodeOptInfo nopt;\r
4568 Node* nd = node;\r
4569\r
4570 copy_opt_env(&nenv, env);\r
4571 do {\r
4572 r = optimize_node_left(NCAR(nd), &nopt, &nenv);\r
4573 if (r == 0) {\r
4574 add_mml(&nenv.mmd, &nopt.len);\r
4575 concat_left_node_opt_info(env->enc, opt, &nopt);\r
4576 }\r
4577 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));\r
4578 }\r
4579 break;\r
4580\r
4581 case NT_ALT:\r
4582 {\r
4583 NodeOptInfo nopt;\r
4584 Node* nd = node;\r
4585\r
4586 do {\r
4587 r = optimize_node_left(NCAR(nd), &nopt, env);\r
4588 if (r == 0) {\r
4589 if (nd == node) copy_node_opt_info(opt, &nopt);\r
4590 else alt_merge_node_opt_info(opt, &nopt, env);\r
4591 }\r
4592 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));\r
4593 }\r
4594 break;\r
4595\r
4596 case NT_STR:\r
4597 {\r
4598 StrNode* sn = NSTR(node);\r
4599 int slen = (int)(sn->end - sn->s);\r
4600 int is_raw = NSTRING_IS_RAW(node);\r
4601\r
4602 if (! NSTRING_IS_AMBIG(node)) {\r
4603 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,\r
4604 NSTRING_IS_RAW(node), env->enc);\r
4605 if (slen > 0) {\r
4606 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);\r
4607 }\r
4608 set_mml(&opt->len, slen, slen);\r
4609 }\r
4610 else {\r
4611 int max;\r
4612\r
4613 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {\r
4614 int n = onigenc_strlen(env->enc, sn->s, sn->end);\r
4615 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;\r
4616 }\r
4617 else {\r
4618 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,\r
4619 is_raw, env->enc);\r
4620 opt->exb.ignore_case = 1;\r
4621\r
4622 if (slen > 0) {\r
4623 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,\r
4624 env->enc, env->case_fold_flag);\r
4625 if (r != 0) break;\r
4626 }\r
4627\r
4628 max = slen;\r
4629 }\r
4630\r
4631 set_mml(&opt->len, slen, max);\r
4632 }\r
4633\r
4634 if (opt->exb.len == slen)\r
4635 opt->exb.reach_end = 1;\r
4636 }\r
4637 break;\r
4638\r
4639 case NT_CCLASS:\r
4640 {\r
4641 int i, z;\r
4642 CClassNode* cc = NCCLASS(node);\r
4643\r
4644 /* no need to check ignore case. (setted in setup_tree()) */\r
4645\r
4646 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {\r
4647 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);\r
4648 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
4649\r
4650 set_mml(&opt->len, min, max);\r
4651 }\r
4652 else {\r
4653 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
4654 z = BITSET_AT(cc->bs, i);\r
4655 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {\r
4656 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);\r
4657 }\r
4658 }\r
4659 set_mml(&opt->len, 1, 1);\r
4660 }\r
4661 }\r
4662 break;\r
4663\r
4664 case NT_CTYPE:\r
4665 {\r
4666 int i, min, max;\r
4667\r
4668 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
4669\r
4670 if (max == 1) {\r
4671 min = 1;\r
4672\r
4673 switch (NCTYPE(node)->ctype) {\r
4674 case ONIGENC_CTYPE_WORD:\r
4675 if (NCTYPE(node)->not != 0) {\r
4676 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
4677 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {\r
4678 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);\r
4679 }\r
4680 }\r
4681 }\r
4682 else {\r
4683 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
4684 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {\r
4685 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);\r
4686 }\r
4687 }\r
4688 }\r
4689 break;\r
4690 }\r
4691 }\r
4692 else {\r
4693 min = ONIGENC_MBC_MINLEN(env->enc);\r
4694 }\r
4695 set_mml(&opt->len, min, max);\r
4696 }\r
4697 break;\r
4698\r
4699 case NT_CANY:\r
4700 {\r
4701 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);\r
4702 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
4703 set_mml(&opt->len, min, max);\r
4704 }\r
4705 break;\r
4706\r
4707 case NT_ANCHOR:\r
4708 switch (NANCHOR(node)->type) {\r
4709 case ANCHOR_BEGIN_BUF:\r
4710 case ANCHOR_BEGIN_POSITION:\r
4711 case ANCHOR_BEGIN_LINE:\r
4712 case ANCHOR_END_BUF:\r
4713 case ANCHOR_SEMI_END_BUF:\r
4714 case ANCHOR_END_LINE:\r
4715 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);\r
4716 break;\r
4717\r
4718 case ANCHOR_PREC_READ:\r
4719 {\r
4720 NodeOptInfo nopt;\r
4721\r
4722 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);\r
4723 if (r == 0) {\r
4724 if (nopt.exb.len > 0)\r
4725 copy_opt_exact_info(&opt->expr, &nopt.exb);\r
4726 else if (nopt.exm.len > 0)\r
4727 copy_opt_exact_info(&opt->expr, &nopt.exm);\r
4728\r
4729 opt->expr.reach_end = 0;\r
4730\r
4731 if (nopt.map.value > 0)\r
4732 copy_opt_map_info(&opt->map, &nopt.map);\r
4733 }\r
4734 }\r
4735 break;\r
4736\r
4737 case ANCHOR_PREC_READ_NOT:\r
4738 case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */\r
4739 case ANCHOR_LOOK_BEHIND_NOT:\r
4740 break;\r
4741 }\r
4742 break;\r
4743\r
4744 case NT_BREF:\r
4745 {\r
4746 int i;\r
4747 int* backs;\r
4748 OnigDistance min, max, tmin, tmax;\r
4749 Node** nodes = SCANENV_MEM_NODES(env->scan_env);\r
4750 BRefNode* br = NBREF(node);\r
4751\r
4752 if (br->state & NST_RECURSION) {\r
4753 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);\r
4754 break;\r
4755 }\r
4756 backs = BACKREFS_P(br);\r
4757 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);\r
4758 if (r != 0) break;\r
4759 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);\r
4760 if (r != 0) break;\r
4761 for (i = 1; i < br->back_num; i++) {\r
4762 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);\r
4763 if (r != 0) break;\r
4764 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);\r
4765 if (r != 0) break;\r
4766 if (min > tmin) min = tmin;\r
4767 if (max < tmax) max = tmax;\r
4768 }\r
4769 if (r == 0) set_mml(&opt->len, min, max);\r
4770 }\r
4771 break;\r
4772\r
4773#ifdef USE_SUBEXP_CALL\r
4774 case NT_CALL:\r
4775 if (IS_CALL_RECURSION(NCALL(node)))\r
4776 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);\r
4777 else {\r
4778 OnigOptionType save = env->options;\r
4779 env->options = NENCLOSE(NCALL(node)->target)->option;\r
4780 r = optimize_node_left(NCALL(node)->target, opt, env);\r
4781 env->options = save;\r
4782 }\r
4783 break;\r
4784#endif\r
4785\r
4786 case NT_QTFR:\r
4787 {\r
4788 int i;\r
4789 OnigDistance min, max;\r
4790 NodeOptInfo nopt;\r
4791 QtfrNode* qn = NQTFR(node);\r
4792\r
4793 r = optimize_node_left(qn->target, &nopt, env);\r
4794 if (r) break;\r
4795\r
4796 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {\r
4797 if (env->mmd.max == 0 &&\r
4798 NTYPE(qn->target) == NT_CANY && qn->greedy) {\r
4799 if (IS_MULTILINE(env->options))\r
4800 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);\r
4801 else\r
4802 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);\r
4803 }\r
4804 }\r
4805 else {\r
4806 if (qn->lower > 0) {\r
4807 copy_node_opt_info(opt, &nopt);\r
4808 if (nopt.exb.len > 0) {\r
4809 if (nopt.exb.reach_end) {\r
4810 for (i = 2; i <= qn->lower &&\r
4811 ! is_full_opt_exact_info(&opt->exb); i++) {\r
4812 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);\r
4813 }\r
4814 if (i < qn->lower) {\r
4815 opt->exb.reach_end = 0;\r
4816 }\r
4817 }\r
4818 }\r
4819\r
4820 if (qn->lower != qn->upper) {\r
4821 opt->exb.reach_end = 0;\r
4822 opt->exm.reach_end = 0;\r
4823 }\r
4824 if (qn->lower > 1)\r
4825 opt->exm.reach_end = 0;\r
4826 }\r
4827 }\r
4828\r
4829 min = distance_multiply(nopt.len.min, qn->lower);\r
4830 if (IS_REPEAT_INFINITE(qn->upper))\r
4831 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);\r
4832 else\r
4833 max = distance_multiply(nopt.len.max, qn->upper);\r
4834\r
4835 set_mml(&opt->len, min, max);\r
4836 }\r
4837 break;\r
4838\r
4839 case NT_ENCLOSE:\r
4840 {\r
4841 EncloseNode* en = NENCLOSE(node);\r
4842\r
4843 switch (en->type) {\r
4844 case ENCLOSE_OPTION:\r
4845 {\r
4846 OnigOptionType save = env->options;\r
4847\r
4848 env->options = en->option;\r
4849 r = optimize_node_left(en->target, opt, env);\r
4850 env->options = save;\r
4851 }\r
4852 break;\r
4853\r
4854 case ENCLOSE_MEMORY:\r
4855#ifdef USE_SUBEXP_CALL\r
4856 en->opt_count++;\r
4857 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {\r
4858 OnigDistance min, max;\r
4859\r
4860 min = 0;\r
4861 max = ONIG_INFINITE_DISTANCE;\r
4862 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;\r
4863 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;\r
4864 set_mml(&opt->len, min, max);\r
4865 }\r
4866 else\r
4867#endif\r
4868 {\r
4869 r = optimize_node_left(en->target, opt, env);\r
4870\r
4871 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {\r
4872 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))\r
4873 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);\r
4874 }\r
4875 }\r
4876 break;\r
4877\r
4878 case ENCLOSE_STOP_BACKTRACK:\r
4879 r = optimize_node_left(en->target, opt, env);\r
4880 break;\r
4881 }\r
4882 }\r
4883 break;\r
4884\r
4885 default:\r
4886#ifdef ONIG_DEBUG\r
4887 fprintf(stderr, "optimize_node_left: undefined node type %d\n",\r
4888 NTYPE(node));\r
4889#endif\r
4890 r = ONIGERR_TYPE_BUG;\r
4891 break;\r
4892 }\r
4893\r
4894 return r;\r
4895}\r
4896\r
4897static int\r
4898set_optimize_exact_info(regex_t* reg, OptExactInfo* e)\r
4899{\r
4900 int r;\r
4901\r
4902 if (e->len == 0) return 0;\r
4903\r
4904 if (e->ignore_case) {\r
4905 reg->exact = (UChar* )xmalloc(e->len);\r
4906 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
4907 xmemcpy(reg->exact, e->s, e->len);\r
4908 reg->exact_end = reg->exact + e->len;\r
4909 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;\r
4910 }\r
4911 else {\r
4912 int allow_reverse;\r
4913\r
4914 reg->exact = str_dup(e->s, e->s + e->len);\r
4915 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
4916 reg->exact_end = reg->exact + e->len;\r
4917 \r
4918 allow_reverse =\r
4919 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);\r
4920\r
4921 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {\r
4922 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,\r
4923 reg->map, &(reg->int_map));\r
4924 if (r) return r;\r
4925\r
4926 reg->optimize = (allow_reverse != 0\r
4927 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);\r
4928 }\r
4929 else {\r
4930 reg->optimize = ONIG_OPTIMIZE_EXACT;\r
4931 }\r
4932 }\r
4933\r
4934 reg->dmin = e->mmd.min;\r
4935 reg->dmax = e->mmd.max;\r
4936\r
4937 if (reg->dmin != ONIG_INFINITE_DISTANCE) {\r
4938 reg->threshold_len = reg->dmin + (int)(reg->exact_end - reg->exact);\r
4939 }\r
4940\r
4941 return 0;\r
4942}\r
4943\r
4944static void\r
4945set_optimize_map_info(regex_t* reg, OptMapInfo* m)\r
4946{\r
4947 int i;\r
4948\r
4949 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
4950 reg->map[i] = m->map[i];\r
4951\r
4952 reg->optimize = ONIG_OPTIMIZE_MAP;\r
4953 reg->dmin = m->mmd.min;\r
4954 reg->dmax = m->mmd.max;\r
4955\r
4956 if (reg->dmin != ONIG_INFINITE_DISTANCE) {\r
4957 reg->threshold_len = reg->dmin + 1;\r
4958 }\r
4959}\r
4960\r
4961static void\r
4962set_sub_anchor(regex_t* reg, OptAncInfo* anc)\r
4963{\r
4964 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;\r
4965 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;\r
4966}\r
4967\r
4968#ifdef ONIG_DEBUG\r
4969static void print_optimize_info(FILE* f, regex_t* reg);\r
4970#endif\r
4971\r
4972static int\r
4973set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)\r
4974{\r
4975\r
4976 int r;\r
4977 NodeOptInfo opt;\r
4978 OptEnv env;\r
4979\r
4980 env.enc = reg->enc;\r
4981 env.options = reg->options;\r
4982 env.case_fold_flag = reg->case_fold_flag;\r
4983 env.scan_env = scan_env;\r
4984 clear_mml(&env.mmd);\r
4985\r
4986 r = optimize_node_left(node, &opt, &env);\r
4987 if (r) return r;\r
4988\r
4989 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |\r
4990 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);\r
4991\r
4992 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);\r
4993\r
4994 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {\r
4995 reg->anchor_dmin = opt.len.min;\r
4996 reg->anchor_dmax = opt.len.max;\r
4997 }\r
4998\r
4999 if (opt.exb.len > 0 || opt.exm.len > 0) {\r
5000 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);\r
5001 if (opt.map.value > 0 &&\r
5002 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {\r
5003 goto set_map;\r
5004 }\r
5005 else {\r
5006 r = set_optimize_exact_info(reg, &opt.exb);\r
5007 set_sub_anchor(reg, &opt.exb.anc);\r
5008 }\r
5009 }\r
5010 else if (opt.map.value > 0) {\r
5011 set_map:\r
5012 set_optimize_map_info(reg, &opt.map);\r
5013 set_sub_anchor(reg, &opt.map.anc);\r
5014 }\r
5015 else {\r
5016 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;\r
5017 if (opt.len.max == 0)\r
5018 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;\r
5019 }\r
5020\r
5021#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
5022 print_optimize_info(stderr, reg);\r
5023#endif\r
5024 return r;\r
5025}\r
5026\r
5027static void\r
5028clear_optimize_info(regex_t* reg)\r
5029{\r
5030 reg->optimize = ONIG_OPTIMIZE_NONE;\r
5031 reg->anchor = 0;\r
5032 reg->anchor_dmin = 0;\r
5033 reg->anchor_dmax = 0;\r
5034 reg->sub_anchor = 0;\r
5035 reg->exact_end = (UChar* )NULL;\r
5036 reg->threshold_len = 0;\r
5037 if (IS_NOT_NULL(reg->exact)) {\r
5038 xfree(reg->exact);\r
5039 reg->exact = (UChar* )NULL;\r
5040 }\r
5041}\r
5042\r
5043#ifdef ONIG_DEBUG\r
5044\r
5045static void print_enc_string(FILE* fp, OnigEncoding enc,\r
5046 const UChar *s, const UChar *end)\r
5047{\r
5048 fprintf(fp, "\nPATTERN: /");\r
5049\r
5050 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
5051 const UChar *p;\r
5052 OnigCodePoint code;\r
5053\r
5054 p = s;\r
5055 while (p < end) {\r
5056 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
5057 if (code >= 0x80) {\r
5058 fprintf(fp, " 0x%04x ", (int )code);\r
5059 }\r
5060 else {\r
5061 fputc((int )code, fp);\r
5062 }\r
5063\r
5064 p += enclen(enc, p);\r
5065 }\r
5066 }\r
5067 else {\r
5068 while (s < end) {\r
5069 fputc((int )*s, fp);\r
5070 s++;\r
5071 }\r
5072 }\r
5073\r
5074 fprintf(fp, "/\n");\r
5075}\r
5076\r
5077static void\r
5078print_distance_range(FILE* f, OnigDistance a, OnigDistance b)\r
5079{\r
5080 if (a == ONIG_INFINITE_DISTANCE)\r
5081 fputs("inf", f);\r
5082 else\r
5083 fprintf(f, "(%u)", a);\r
5084\r
5085 fputs("-", f);\r
5086\r
5087 if (b == ONIG_INFINITE_DISTANCE)\r
5088 fputs("inf", f);\r
5089 else\r
5090 fprintf(f, "(%u)", b);\r
5091}\r
5092\r
5093static void\r
5094print_anchor(FILE* f, int anchor)\r
5095{\r
5096 int q = 0;\r
5097\r
5098 fprintf(f, "[");\r
5099\r
5100 if (anchor & ANCHOR_BEGIN_BUF) {\r
5101 fprintf(f, "begin-buf");\r
5102 q = 1;\r
5103 }\r
5104 if (anchor & ANCHOR_BEGIN_LINE) {\r
5105 if (q) fprintf(f, ", ");\r
5106 q = 1;\r
5107 fprintf(f, "begin-line");\r
5108 }\r
5109 if (anchor & ANCHOR_BEGIN_POSITION) {\r
5110 if (q) fprintf(f, ", ");\r
5111 q = 1;\r
5112 fprintf(f, "begin-pos");\r
5113 }\r
5114 if (anchor & ANCHOR_END_BUF) {\r
5115 if (q) fprintf(f, ", ");\r
5116 q = 1;\r
5117 fprintf(f, "end-buf");\r
5118 }\r
5119 if (anchor & ANCHOR_SEMI_END_BUF) {\r
5120 if (q) fprintf(f, ", ");\r
5121 q = 1;\r
5122 fprintf(f, "semi-end-buf");\r
5123 }\r
5124 if (anchor & ANCHOR_END_LINE) {\r
5125 if (q) fprintf(f, ", ");\r
5126 q = 1;\r
5127 fprintf(f, "end-line");\r
5128 }\r
5129 if (anchor & ANCHOR_ANYCHAR_STAR) {\r
5130 if (q) fprintf(f, ", ");\r
5131 q = 1;\r
5132 fprintf(f, "anychar-star");\r
5133 }\r
5134 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {\r
5135 if (q) fprintf(f, ", ");\r
5136 fprintf(f, "anychar-star-pl");\r
5137 }\r
5138\r
5139 fprintf(f, "]");\r
5140}\r
5141\r
5142static void\r
5143print_optimize_info(FILE* f, regex_t* reg)\r
5144{\r
5145 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",\r
5146 "EXACT_IC", "MAP" };\r
5147\r
5148 fprintf(f, "optimize: %s\n", on[reg->optimize]);\r
5149 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);\r
5150 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)\r
5151 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);\r
5152 fprintf(f, "\n");\r
5153\r
5154 if (reg->optimize) {\r
5155 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);\r
5156 fprintf(f, "\n");\r
5157 }\r
5158 fprintf(f, "\n");\r
5159\r
5160 if (reg->exact) {\r
5161 UChar *p;\r
5162 fprintf(f, "exact: [");\r
5163 for (p = reg->exact; p < reg->exact_end; p++) {\r
5164 fputc(*p, f);\r
5165 }\r
5166 fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));\r
5167 }\r
5168 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {\r
5169 int c, i, n = 0;\r
5170\r
5171 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
5172 if (reg->map[i]) n++;\r
5173\r
5174 fprintf(f, "map: n=%d\n", n);\r
5175 if (n > 0) {\r
5176 c = 0;\r
5177 fputc('[', f);\r
5178 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
5179 if (reg->map[i] != 0) {\r
5180 if (c > 0) fputs(", ", f);\r
5181 c++;\r
5182 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&\r
5183 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))\r
5184 fputc(i, f);\r
5185 else\r
5186 fprintf(f, "%d", i);\r
5187 }\r
5188 }\r
5189 fprintf(f, "]\n");\r
5190 }\r
5191 }\r
5192}\r
5193#endif /* ONIG_DEBUG */\r
5194\r
5195\r
5196extern void\r
5197onig_free_body(regex_t* reg)\r
5198{\r
5199 if (IS_NOT_NULL(reg)) {\r
5200 if (IS_NOT_NULL(reg->p)) xfree(reg->p);\r
5201 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);\r
5202 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);\r
5203 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);\r
5204 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);\r
5205 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);\r
5206\r
5207#ifdef USE_NAMED_GROUP\r
5208 onig_names_free(reg);\r
5209#endif\r
5210 }\r
5211}\r
5212\r
5213extern void\r
5214onig_free(regex_t* reg)\r
5215{\r
5216 if (IS_NOT_NULL(reg)) {\r
5217 onig_free_body(reg);\r
5218 xfree(reg);\r
5219 }\r
5220}\r
5221\r
5222#define REGEX_TRANSFER(to,from) do {\\r
5223 (to)->state = ONIG_STATE_MODIFY;\\r
5224 onig_free_body(to);\\r
5225 xmemcpy(to, from, sizeof(regex_t));\\r
5226 xfree(from);\\r
5227} while (0)\r
5228\r
5229extern void\r
5230onig_transfer(regex_t* to, regex_t* from)\r
5231{\r
5232 THREAD_ATOMIC_START;\r
5233 REGEX_TRANSFER(to, from);\r
5234 THREAD_ATOMIC_END;\r
5235}\r
5236\r
5237#define REGEX_CHAIN_HEAD(reg) do {\\r
5238 while (IS_NOT_NULL((reg)->chain)) {\\r
5239 (reg) = (reg)->chain;\\r
5240 }\\r
5241} while (0)\r
5242\r
5243extern void\r
5244onig_chain_link_add(regex_t* to, regex_t* add)\r
5245{\r
5246 THREAD_ATOMIC_START;\r
5247 REGEX_CHAIN_HEAD(to);\r
5248 to->chain = add;\r
5249 THREAD_ATOMIC_END;\r
5250}\r
5251\r
5252extern void\r
5253onig_chain_reduce(regex_t* reg)\r
5254{\r
5255 regex_t *head, *prev;\r
5256\r
5257 prev = reg;\r
5258 head = prev->chain;\r
5259 if (IS_NOT_NULL(head)) {\r
5260 reg->state = ONIG_STATE_MODIFY;\r
5261 while (IS_NOT_NULL(head->chain)) {\r
5262 prev = head;\r
5263 head = head->chain;\r
5264 }\r
5265 prev->chain = (regex_t* )NULL;\r
5266 REGEX_TRANSFER(reg, head);\r
5267 }\r
5268}\r
5269\r
5270#ifdef ONIG_DEBUG\r
5271static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));\r
5272#endif\r
5273#ifdef ONIG_DEBUG_PARSE_TREE\r
5274static void print_tree P_((FILE* f, Node* node));\r
5275#endif\r
5276\r
5277extern int\r
5278onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
5279 OnigErrorInfo* einfo)\r
5280{\r
5281#define COMPILE_INIT_SIZE 20\r
5282\r
5283 int r, init_size;\r
5284 Node* root;\r
5285 ScanEnv scan_env;\r
5286#ifdef USE_SUBEXP_CALL\r
5287 UnsetAddrList uslist;\r
5288#endif\r
5289\r
5290 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;\r
5291\r
5292 reg->state = ONIG_STATE_COMPILING;\r
5293\r
5294#ifdef ONIG_DEBUG\r
5295 print_enc_string(stderr, reg->enc, pattern, pattern_end);\r
5296#endif\r
5297\r
5298 if (reg->alloc == 0) {\r
5299 init_size = ((int)(pattern_end - pattern)) * 2;\r
5300 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;\r
5301 r = BBUF_INIT(reg, init_size);\r
5302 if (r != 0) goto end;\r
5303 }\r
5304 else\r
5305 reg->used = 0;\r
5306\r
5307 reg->num_mem = 0;\r
5308 reg->num_repeat = 0;\r
5309 reg->num_null_check = 0;\r
5310 reg->repeat_range_alloc = 0;\r
5311 reg->repeat_range = (OnigRepeatRange* )NULL;\r
5312#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
5313 reg->num_comb_exp_check = 0;\r
5314#endif\r
5315\r
5316 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);\r
b0c2b797 5317 if (r != 0 || root == NULL) goto err;\r
14b0e578
CS
5318\r
5319#ifdef USE_NAMED_GROUP\r
5320 /* mixed use named group and no-named group */\r
5321 if (scan_env.num_named > 0 &&\r
5322 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
5323 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
5324 if (scan_env.num_named != scan_env.num_mem)\r
5325 r = disable_noname_group_capture(&root, reg, &scan_env);\r
5326 else\r
5327 r = numbered_ref_check(root);\r
5328\r
5329 if (r != 0) goto err;\r
5330 }\r
5331#endif\r
5332\r
5333#ifdef USE_SUBEXP_CALL\r
5334 if (scan_env.num_call > 0) {\r
5335 r = unset_addr_list_init(&uslist, scan_env.num_call);\r
5336 if (r != 0) goto err;\r
5337 scan_env.unset_addr_list = &uslist;\r
5338 r = setup_subexp_call(root, &scan_env);\r
5339 if (r != 0) goto err_unset;\r
5340 r = subexp_recursive_check_trav(root, &scan_env);\r
5341 if (r < 0) goto err_unset;\r
5342 r = subexp_inf_recursive_check_trav(root, &scan_env);\r
5343 if (r != 0) goto err_unset;\r
5344\r
5345 reg->num_call = scan_env.num_call;\r
5346 }\r
5347 else\r
5348 reg->num_call = 0;\r
5349#endif\r
5350\r
5351 r = setup_tree(root, reg, 0, &scan_env);\r
5352 if (r != 0) goto err_unset;\r
5353\r
5354#ifdef ONIG_DEBUG_PARSE_TREE\r
5355 print_tree(stderr, root);\r
5356#endif\r
5357\r
5358 reg->capture_history = scan_env.capture_history;\r
5359 reg->bt_mem_start = scan_env.bt_mem_start;\r
5360 reg->bt_mem_start |= reg->capture_history;\r
5361 if (IS_FIND_CONDITION(reg->options))\r
5362 BIT_STATUS_ON_ALL(reg->bt_mem_end);\r
5363 else {\r
5364 reg->bt_mem_end = scan_env.bt_mem_end;\r
5365 reg->bt_mem_end |= reg->capture_history;\r
5366 }\r
5367\r
5368#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
5369 if (scan_env.backrefed_mem == 0\r
5370#ifdef USE_SUBEXP_CALL\r
5371 || scan_env.num_call == 0\r
5372#endif\r
5373 ) {\r
5374 setup_comb_exp_check(root, 0, &scan_env);\r
5375#ifdef USE_SUBEXP_CALL\r
5376 if (scan_env.has_recursion != 0) {\r
5377 scan_env.num_comb_exp_check = 0;\r
5378 }\r
5379 else\r
5380#endif\r
5381 if (scan_env.comb_exp_max_regnum > 0) {\r
5382 int i;\r
5383 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {\r
5384 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {\r
5385 scan_env.num_comb_exp_check = 0;\r
5386 break;\r
5387 }\r
5388 }\r
5389 }\r
5390 }\r
5391\r
5392 reg->num_comb_exp_check = scan_env.num_comb_exp_check;\r
5393#endif\r
5394\r
5395 clear_optimize_info(reg);\r
5396#ifndef ONIG_DONT_OPTIMIZE\r
5397 r = set_optimize_info_from_tree(root, reg, &scan_env);\r
5398 if (r != 0) goto err_unset;\r
5399#endif\r
5400\r
5401 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {\r
5402 xfree(scan_env.mem_nodes_dynamic);\r
5403 scan_env.mem_nodes_dynamic = (Node** )NULL;\r
5404 }\r
5405\r
5406 r = compile_tree(root, reg);\r
5407 if (r == 0) {\r
5408 r = add_opcode(reg, OP_END);\r
5409#ifdef USE_SUBEXP_CALL\r
5410 if (scan_env.num_call > 0) {\r
5411 r = unset_addr_list_fix(&uslist, reg);\r
5412 unset_addr_list_end(&uslist);\r
5413 if (r) goto err;\r
5414 }\r
5415#endif\r
5416\r
5417 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))\r
5418 reg->stack_pop_level = STACK_POP_LEVEL_ALL;\r
5419 else {\r
5420 if (reg->bt_mem_start != 0)\r
5421 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;\r
5422 else\r
5423 reg->stack_pop_level = STACK_POP_LEVEL_FREE;\r
5424 }\r
5425 }\r
5426#ifdef USE_SUBEXP_CALL\r
5427 else if (scan_env.num_call > 0) {\r
5428 unset_addr_list_end(&uslist);\r
5429 }\r
5430#endif\r
5431 onig_node_free(root);\r
5432\r
5433#ifdef ONIG_DEBUG_COMPILE\r
5434#ifdef USE_NAMED_GROUP\r
5435 onig_print_names(stderr, reg);\r
5436#endif\r
5437 print_compiled_byte_code_list(stderr, reg);\r
5438#endif\r
5439\r
5440 end:\r
5441 reg->state = ONIG_STATE_NORMAL;\r
5442 return r;\r
5443\r
5444 err_unset:\r
5445#ifdef USE_SUBEXP_CALL\r
5446 if (scan_env.num_call > 0) {\r
5447 unset_addr_list_end(&uslist);\r
5448 }\r
5449#endif\r
5450 err:\r
5451 if (IS_NOT_NULL(scan_env.error)) {\r
5452 if (IS_NOT_NULL(einfo)) {\r
5453 einfo->enc = scan_env.enc;\r
5454 einfo->par = scan_env.error;\r
5455 einfo->par_end = scan_env.error_end;\r
5456 }\r
5457 }\r
5458\r
5459 onig_node_free(root);\r
5460 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))\r
5461 xfree(scan_env.mem_nodes_dynamic);\r
5462 return r;\r
5463}\r
5464\r
5465#ifdef USE_RECOMPILE_API\r
5466extern int\r
5467onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
5468 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
5469 OnigErrorInfo* einfo)\r
5470{\r
5471 int r;\r
5472 regex_t *new_reg;\r
5473\r
5474 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);\r
5475 if (r) return r;\r
5476 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {\r
5477 onig_transfer(reg, new_reg);\r
5478 }\r
5479 else {\r
5480 onig_chain_link_add(reg, new_reg);\r
5481 }\r
5482 return 0;\r
5483}\r
5484#endif\r
5485\r
5486static int onig_inited = 0;\r
5487\r
5488extern int\r
5489onig_reg_init(regex_t* reg, OnigOptionType option,\r
5490 OnigCaseFoldType case_fold_flag,\r
5491 OnigEncoding enc, OnigSyntaxType* syntax)\r
5492{\r
5493 if (! onig_inited)\r
5494 onig_init();\r
5495\r
5496 if (IS_NULL(reg))\r
5497 return ONIGERR_INVALID_ARGUMENT;\r
5498\r
5499 if (ONIGENC_IS_UNDEF(enc))\r
5500 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;\r
5501\r
5502 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))\r
5503 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {\r
5504 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;\r
5505 }\r
5506\r
5507 (reg)->state = ONIG_STATE_MODIFY;\r
5508\r
5509 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {\r
5510 option |= syntax->options;\r
5511 option &= ~ONIG_OPTION_SINGLELINE;\r
5512 }\r
5513 else\r
5514 option |= syntax->options;\r
5515\r
5516 (reg)->enc = enc;\r
5517 (reg)->options = option;\r
5518 (reg)->syntax = syntax;\r
5519 (reg)->optimize = 0;\r
5520 (reg)->exact = (UChar* )NULL;\r
5521 (reg)->int_map = (int* )NULL;\r
5522 (reg)->int_map_backward = (int* )NULL;\r
5523 (reg)->chain = (regex_t* )NULL;\r
5524\r
5525 (reg)->p = (UChar* )NULL;\r
5526 (reg)->alloc = 0;\r
5527 (reg)->used = 0;\r
5528 (reg)->name_table = (void* )NULL;\r
5529\r
5530 (reg)->case_fold_flag = case_fold_flag;\r
5531 return 0;\r
5532}\r
5533\r
5534extern int\r
5535onig_new_without_alloc(regex_t* reg, const UChar* pattern,\r
5536 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,\r
5537 OnigSyntaxType* syntax, OnigErrorInfo* einfo)\r
5538{\r
5539 int r;\r
5540\r
5541 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
5542 if (r) return r;\r
5543\r
5544 r = onig_compile(reg, pattern, pattern_end, einfo);\r
5545 return r;\r
5546}\r
5547\r
5548extern int\r
5549onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,\r
5550 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
5551 OnigErrorInfo* einfo)\r
5552{\r
5553 int r;\r
5554\r
5555 *reg = (regex_t* )xmalloc(sizeof(regex_t));\r
5556 if (IS_NULL(*reg)) return ONIGERR_MEMORY;\r
5557\r
5558 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
5559 if (r) goto err;\r
5560\r
5561 r = onig_compile(*reg, pattern, pattern_end, einfo);\r
5562 if (r) {\r
5563 err:\r
5564 onig_free(*reg);\r
5565 *reg = NULL;\r
5566 }\r
5567 return r;\r
5568}\r
5569\r
5570\r
5571extern int\r
5572onig_init(void)\r
5573{\r
5574 if (onig_inited != 0)\r
5575 return 0;\r
5576\r
5577 THREAD_SYSTEM_INIT;\r
5578 THREAD_ATOMIC_START;\r
5579\r
5580 onig_inited = 1;\r
5581\r
5582 onigenc_init();\r
5583 /* onigenc_set_default_caseconv_table((UChar* )0); */\r
5584\r
5585#ifdef ONIG_DEBUG_STATISTICS\r
5586 onig_statistics_init();\r
5587#endif\r
5588\r
5589 THREAD_ATOMIC_END;\r
5590 return 0;\r
5591}\r
5592\r
5593\r
5594static OnigEndCallListItemType* EndCallTop;\r
5595\r
5596extern void onig_add_end_call(void (*func)(void))\r
5597{\r
5598 OnigEndCallListItemType* item;\r
5599\r
5600 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));\r
5601 if (item == 0) return ;\r
5602\r
5603 item->next = EndCallTop;\r
5604 item->func = func;\r
5605\r
5606 EndCallTop = item;\r
5607}\r
5608\r
5609static void\r
5610exec_end_call_list(void)\r
5611{\r
5612 OnigEndCallListItemType* prev;\r
5613 void (*func)(void);\r
5614\r
5615 while (EndCallTop != 0) {\r
5616 func = EndCallTop->func;\r
5617 (*func)();\r
5618\r
5619 prev = EndCallTop;\r
5620 EndCallTop = EndCallTop->next;\r
5621 xfree(prev);\r
5622 }\r
5623}\r
5624\r
5625extern int\r
5626onig_end(void)\r
5627{\r
5628 THREAD_ATOMIC_START;\r
5629\r
5630 exec_end_call_list();\r
5631\r
5632#ifdef ONIG_DEBUG_STATISTICS\r
5633 onig_print_statistics(stderr);\r
5634#endif\r
5635\r
5636#ifdef USE_SHARED_CCLASS_TABLE\r
5637 onig_free_shared_cclass_table();\r
5638#endif\r
5639\r
5640#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
5641 onig_free_node_list();\r
5642#endif\r
5643\r
5644 onig_inited = 0;\r
5645\r
5646 THREAD_ATOMIC_END;\r
5647 THREAD_SYSTEM_END;\r
5648 return 0;\r
5649}\r
5650\r
5651extern int\r
5652onig_is_in_code_range(const UChar* p, OnigCodePoint code)\r
5653{\r
5654 OnigCodePoint n, *data;\r
5655 OnigCodePoint low, high, x;\r
5656\r
5657 GET_CODE_POINT(n, p);\r
5658 data = (OnigCodePoint* )p;\r
5659 data++;\r
5660\r
5661 for (low = 0, high = n; low < high; ) {\r
5662 x = (low + high) >> 1;\r
5663 if (code > data[x * 2 + 1])\r
5664 low = x + 1;\r
5665 else\r
5666 high = x;\r
5667 }\r
5668\r
5669 return ((low < n && code >= data[low * 2]) ? 1 : 0);\r
5670}\r
5671\r
5672extern int\r
5673onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)\r
5674{\r
5675 int found;\r
5676\r
5677 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {\r
5678 if (IS_NULL(cc->mbuf)) {\r
5679 found = 0;\r
5680 }\r
5681 else {\r
5682 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);\r
5683 }\r
5684 }\r
5685 else {\r
5686 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);\r
5687 }\r
5688\r
5689 if (IS_NCCLASS_NOT(cc))\r
5690 return !found;\r
5691 else\r
5692 return found;\r
5693}\r
5694\r
5695extern int\r
5696onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)\r
5697{\r
5698 int len;\r
5699\r
5700 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
5701 len = 2;\r
5702 }\r
5703 else {\r
5704 len = ONIGENC_CODE_TO_MBCLEN(enc, code);\r
5705 }\r
5706 return onig_is_code_in_cc_len(len, code, cc);\r
5707}\r
5708\r
5709\r
5710#ifdef ONIG_DEBUG\r
5711\r
5712/* arguments type */\r
5713#define ARG_SPECIAL -1\r
5714#define ARG_NON 0\r
5715#define ARG_RELADDR 1\r
5716#define ARG_ABSADDR 2\r
5717#define ARG_LENGTH 3\r
5718#define ARG_MEMNUM 4\r
5719#define ARG_OPTION 5\r
5720#define ARG_STATE_CHECK 6\r
5721\r
5722OnigOpInfoType OnigOpInfo[] = {\r
5723 { OP_FINISH, "finish", ARG_NON },\r
5724 { OP_END, "end", ARG_NON },\r
5725 { OP_EXACT1, "exact1", ARG_SPECIAL },\r
5726 { OP_EXACT2, "exact2", ARG_SPECIAL },\r
5727 { OP_EXACT3, "exact3", ARG_SPECIAL },\r
5728 { OP_EXACT4, "exact4", ARG_SPECIAL },\r
5729 { OP_EXACT5, "exact5", ARG_SPECIAL },\r
5730 { OP_EXACTN, "exactn", ARG_SPECIAL },\r
5731 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },\r
5732 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },\r
5733 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },\r
5734 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },\r
5735 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },\r
5736 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },\r
5737 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },\r
5738 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },\r
5739 { OP_CCLASS, "cclass", ARG_SPECIAL },\r
5740 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },\r
5741 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },\r
5742 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },\r
5743 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },\r
5744 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },\r
5745 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },\r
5746 { OP_ANYCHAR, "anychar", ARG_NON },\r
5747 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },\r
5748 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },\r
5749 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },\r
5750 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },\r
5751 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },\r
5752 { OP_WORD, "word", ARG_NON },\r
5753 { OP_NOT_WORD, "not-word", ARG_NON },\r
5754 { OP_WORD_BOUND, "word-bound", ARG_NON },\r
5755 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },\r
5756 { OP_WORD_BEGIN, "word-begin", ARG_NON },\r
5757 { OP_WORD_END, "word-end", ARG_NON },\r
5758 { OP_BEGIN_BUF, "begin-buf", ARG_NON },\r
5759 { OP_END_BUF, "end-buf", ARG_NON },\r
5760 { OP_BEGIN_LINE, "begin-line", ARG_NON },\r
5761 { OP_END_LINE, "end-line", ARG_NON },\r
5762 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },\r
5763 { OP_BEGIN_POSITION, "begin-position", ARG_NON },\r
5764 { OP_BACKREF1, "backref1", ARG_NON },\r
5765 { OP_BACKREF2, "backref2", ARG_NON },\r
5766 { OP_BACKREFN, "backrefn", ARG_MEMNUM },\r
5767 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },\r
5768 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },\r
5769 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },\r
5770 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },\r
5771 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },\r
5772 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },\r
5773 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },\r
5774 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },\r
5775 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },\r
5776 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },\r
5777 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },\r
5778 { OP_SET_OPTION, "set-option", ARG_OPTION },\r
5779 { OP_FAIL, "fail", ARG_NON },\r
5780 { OP_JUMP, "jump", ARG_RELADDR },\r
5781 { OP_PUSH, "push", ARG_RELADDR },\r
5782 { OP_POP, "pop", ARG_NON },\r
5783 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },\r
5784 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },\r
5785 { OP_REPEAT, "repeat", ARG_SPECIAL },\r
5786 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },\r
5787 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },\r
5788 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },\r
5789 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },\r
5790 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },\r
5791 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },\r
5792 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },\r
5793 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },\r
5794 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },\r
5795 { OP_PUSH_POS, "push-pos", ARG_NON },\r
5796 { OP_POP_POS, "pop-pos", ARG_NON },\r
5797 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },\r
5798 { OP_FAIL_POS, "fail-pos", ARG_NON },\r
5799 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },\r
5800 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },\r
5801 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },\r
5802 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },\r
5803 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },\r
5804 { OP_CALL, "call", ARG_ABSADDR },\r
5805 { OP_RETURN, "return", ARG_NON },\r
5806 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },\r
5807 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },\r
5808 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },\r
5809 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },\r
5810 { OP_STATE_CHECK_ANYCHAR_ML_STAR,\r
5811 "state-check-anychar-ml*", ARG_STATE_CHECK },\r
5812 { -1, "", ARG_NON }\r
5813};\r
5814\r
5815static char*\r
5816op2name(int opcode)\r
5817{\r
5818 int i;\r
5819\r
5820 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {\r
5821 if (opcode == OnigOpInfo[i].opcode)\r
5822 return OnigOpInfo[i].name;\r
5823 }\r
5824 return "";\r
5825}\r
5826\r
5827static int\r
5828op2arg_type(int opcode)\r
5829{\r
5830 int i;\r
5831\r
5832 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {\r
5833 if (opcode == OnigOpInfo[i].opcode)\r
5834 return OnigOpInfo[i].arg_type;\r
5835 }\r
5836 return ARG_SPECIAL;\r
5837}\r
5838\r
5839static void\r
5840Indent(FILE* f, int indent)\r
5841{\r
5842 int i;\r
5843 for (i = 0; i < indent; i++) putc(' ', f);\r
5844}\r
5845\r
5846static void\r
5847p_string(FILE* f, int len, UChar* s)\r
5848{\r
5849 fputs(":", f);\r
5850 while (len-- > 0) { fputc(*s++, f); }\r
5851}\r
5852\r
5853static void\r
5854p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)\r
5855{\r
5856 int x = len * mb_len;\r
5857\r
5858 fprintf(f, ":%d:", len);\r
5859 while (x-- > 0) { fputc(*s++, f); }\r
5860}\r
5861\r
5862extern void\r
5863onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,\r
5864 OnigEncoding enc)\r
5865{\r
5866 int i, n, arg_type;\r
5867 RelAddrType addr;\r
5868 LengthType len;\r
5869 MemNumType mem;\r
5870 StateCheckNumType scn;\r
5871 OnigCodePoint code;\r
5872 UChar *q;\r
5873\r
5874 fprintf(f, "[%s", op2name(*bp));\r
5875 arg_type = op2arg_type(*bp);\r
5876 if (arg_type != ARG_SPECIAL) {\r
5877 bp++;\r
5878 switch (arg_type) {\r
5879 case ARG_NON:\r
5880 break;\r
5881 case ARG_RELADDR:\r
5882 GET_RELADDR_INC(addr, bp);\r
5883 fprintf(f, ":(%d)", addr);\r
5884 break;\r
5885 case ARG_ABSADDR:\r
5886 GET_ABSADDR_INC(addr, bp);\r
5887 fprintf(f, ":(%d)", addr);\r
5888 break;\r
5889 case ARG_LENGTH:\r
5890 GET_LENGTH_INC(len, bp);\r
5891 fprintf(f, ":%d", len);\r
5892 break;\r
5893 case ARG_MEMNUM:\r
5894 mem = *((MemNumType* )bp);\r
5895 bp += SIZE_MEMNUM;\r
5896 fprintf(f, ":%d", mem);\r
5897 break;\r
5898 case ARG_OPTION:\r
5899 {\r
5900 OnigOptionType option = *((OnigOptionType* )bp);\r
5901 bp += SIZE_OPTION;\r
5902 fprintf(f, ":%d", option);\r
5903 }\r
5904 break;\r
5905\r
5906 case ARG_STATE_CHECK:\r
5907 scn = *((StateCheckNumType* )bp);\r
5908 bp += SIZE_STATE_CHECK_NUM;\r
5909 fprintf(f, ":%d", scn);\r
5910 break;\r
5911 }\r
5912 }\r
5913 else {\r
5914 switch (*bp++) {\r
5915 case OP_EXACT1:\r
5916 case OP_ANYCHAR_STAR_PEEK_NEXT:\r
5917 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:\r
5918 p_string(f, 1, bp++); break;\r
5919 case OP_EXACT2:\r
5920 p_string(f, 2, bp); bp += 2; break;\r
5921 case OP_EXACT3:\r
5922 p_string(f, 3, bp); bp += 3; break;\r
5923 case OP_EXACT4:\r
5924 p_string(f, 4, bp); bp += 4; break;\r
5925 case OP_EXACT5:\r
5926 p_string(f, 5, bp); bp += 5; break;\r
5927 case OP_EXACTN:\r
5928 GET_LENGTH_INC(len, bp);\r
5929 p_len_string(f, len, 1, bp);\r
5930 bp += len;\r
5931 break;\r
5932 \r
5933 case OP_EXACTMB2N1:\r
5934 p_string(f, 2, bp); bp += 2; break;\r
5935 case OP_EXACTMB2N2:\r
5936 p_string(f, 4, bp); bp += 4; break;\r
5937 case OP_EXACTMB2N3:\r
5938 p_string(f, 6, bp); bp += 6; break;\r
5939 case OP_EXACTMB2N:\r
5940 GET_LENGTH_INC(len, bp);\r
5941 p_len_string(f, len, 2, bp);\r
5942 bp += len * 2;\r
5943 break;\r
5944 case OP_EXACTMB3N:\r
5945 GET_LENGTH_INC(len, bp);\r
5946 p_len_string(f, len, 3, bp);\r
5947 bp += len * 3;\r
5948 break;\r
5949 case OP_EXACTMBN:\r
5950 {\r
5951 int mb_len;\r
5952 \r
5953 GET_LENGTH_INC(mb_len, bp);\r
5954 GET_LENGTH_INC(len, bp);\r
5955 fprintf(f, ":%d:%d:", mb_len, len);\r
5956 n = len * mb_len;\r
5957 while (n-- > 0) { fputc(*bp++, f); }\r
5958 }\r
5959 break;\r
5960\r
5961 case OP_EXACT1_IC:\r
5962 len = enclen(enc, bp);\r
5963 p_string(f, len, bp);\r
5964 bp += len;\r
5965 break;\r
5966 case OP_EXACTN_IC:\r
5967 GET_LENGTH_INC(len, bp);\r
5968 p_len_string(f, len, 1, bp);\r
5969 bp += len;\r
5970 break;\r
5971\r
5972 case OP_CCLASS:\r
5973 n = bitset_on_num((BitSetRef )bp);\r
5974 bp += SIZE_BITSET;\r
5975 fprintf(f, ":%d", n);\r
5976 break;\r
5977\r
5978 case OP_CCLASS_NOT:\r
5979 n = bitset_on_num((BitSetRef )bp);\r
5980 bp += SIZE_BITSET;\r
5981 fprintf(f, ":%d", n);\r
5982 break;\r
5983\r
5984 case OP_CCLASS_MB:\r
5985 case OP_CCLASS_MB_NOT:\r
5986 GET_LENGTH_INC(len, bp);\r
5987 q = bp;\r
5988#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
5989 ALIGNMENT_RIGHT(q);\r
5990#endif\r
5991 GET_CODE_POINT(code, q);\r
5992 bp += len;\r
5993 fprintf(f, ":%d:%d", (int )code, len);\r
5994 break;\r
5995\r
5996 case OP_CCLASS_MIX:\r
5997 case OP_CCLASS_MIX_NOT:\r
5998 n = bitset_on_num((BitSetRef )bp);\r
5999 bp += SIZE_BITSET;\r
6000 GET_LENGTH_INC(len, bp);\r
6001 q = bp;\r
6002#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
6003 ALIGNMENT_RIGHT(q);\r
6004#endif\r
6005 GET_CODE_POINT(code, q);\r
6006 bp += len;\r
6007 fprintf(f, ":%d:%d:%d", n, (int )code, len);\r
6008 break;\r
6009\r
6010 case OP_CCLASS_NODE:\r
6011 {\r
6012 CClassNode *cc;\r
6013\r
6014 GET_POINTER_INC(cc, bp);\r
6015 n = bitset_on_num(cc->bs);\r
6016 fprintf(f, ":%u:%d", (unsigned int )cc, n);\r
6017 }\r
6018 break;\r
6019\r
6020 case OP_BACKREFN_IC:\r
6021 mem = *((MemNumType* )bp);\r
6022 bp += SIZE_MEMNUM;\r
6023 fprintf(f, ":%d", mem);\r
6024 break;\r
6025\r
6026 case OP_BACKREF_MULTI_IC:\r
6027 case OP_BACKREF_MULTI:\r
6028 fputs(" ", f);\r
6029 GET_LENGTH_INC(len, bp);\r
6030 for (i = 0; i < len; i++) {\r
6031 GET_MEMNUM_INC(mem, bp);\r
6032 if (i > 0) fputs(", ", f);\r
6033 fprintf(f, "%d", mem);\r
6034 }\r
6035 break;\r
6036\r
6037 case OP_BACKREF_WITH_LEVEL:\r
6038 {\r
6039 OnigOptionType option;\r
6040 LengthType level;\r
6041\r
6042 GET_OPTION_INC(option, bp);\r
6043 fprintf(f, ":%d", option);\r
6044 GET_LENGTH_INC(level, bp);\r
6045 fprintf(f, ":%d", level);\r
6046\r
6047 fputs(" ", f);\r
6048 GET_LENGTH_INC(len, bp);\r
6049 for (i = 0; i < len; i++) {\r
6050 GET_MEMNUM_INC(mem, bp);\r
6051 if (i > 0) fputs(", ", f);\r
6052 fprintf(f, "%d", mem);\r
6053 }\r
6054 }\r
6055 break;\r
6056\r
6057 case OP_REPEAT:\r
6058 case OP_REPEAT_NG:\r
6059 {\r
6060 mem = *((MemNumType* )bp);\r
6061 bp += SIZE_MEMNUM;\r
6062 addr = *((RelAddrType* )bp);\r
6063 bp += SIZE_RELADDR;\r
6064 fprintf(f, ":%d:%d", mem, addr);\r
6065 }\r
6066 break;\r
6067\r
6068 case OP_PUSH_OR_JUMP_EXACT1:\r
6069 case OP_PUSH_IF_PEEK_NEXT:\r
6070 addr = *((RelAddrType* )bp);\r
6071 bp += SIZE_RELADDR;\r
6072 fprintf(f, ":(%d)", addr);\r
6073 p_string(f, 1, bp);\r
6074 bp += 1;\r
6075 break;\r
6076\r
6077 case OP_LOOK_BEHIND:\r
6078 GET_LENGTH_INC(len, bp);\r
6079 fprintf(f, ":%d", len);\r
6080 break;\r
6081\r
6082 case OP_PUSH_LOOK_BEHIND_NOT:\r
6083 GET_RELADDR_INC(addr, bp);\r
6084 GET_LENGTH_INC(len, bp);\r
6085 fprintf(f, ":%d:(%d)", len, addr);\r
6086 break;\r
6087\r
6088 case OP_STATE_CHECK_PUSH:\r
6089 case OP_STATE_CHECK_PUSH_OR_JUMP:\r
6090 scn = *((StateCheckNumType* )bp);\r
6091 bp += SIZE_STATE_CHECK_NUM;\r
6092 addr = *((RelAddrType* )bp);\r
6093 bp += SIZE_RELADDR;\r
6094 fprintf(f, ":%d:(%d)", scn, addr);\r
6095 break;\r
6096\r
6097 default:\r
6098 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",\r
6099 *--bp);\r
6100 }\r
6101 }\r
6102 fputs("]", f);\r
6103 if (nextp) *nextp = bp;\r
6104}\r
6105\r
6106static void\r
6107print_compiled_byte_code_list(FILE* f, regex_t* reg)\r
6108{\r
6109 int ncode;\r
6110 UChar* bp = reg->p;\r
6111 UChar* end = reg->p + reg->used;\r
6112\r
6113 fprintf(f, "code length: %d\n", reg->used);\r
6114\r
6115 ncode = 0;\r
6116 while (bp < end) {\r
6117 ncode++;\r
6118 if (bp > reg->p) {\r
6119 if (ncode % 5 == 0)\r
6120 fprintf(f, "\n");\r
6121 else\r
6122 fputs(" ", f);\r
6123 }\r
6124 onig_print_compiled_byte_code(f, bp, &bp, reg->enc);\r
6125 }\r
6126\r
6127 fprintf(f, "\n");\r
6128}\r
6129\r
6130static void\r
6131print_indent_tree(FILE* f, Node* node, int indent)\r
6132{\r
6133 int i, type;\r
6134 int add = 3;\r
6135 UChar* p;\r
6136\r
6137 Indent(f, indent);\r
6138 if (IS_NULL(node)) {\r
6139 fprintf(f, "ERROR: null node!!!\n");\r
6140 exit (0);\r
6141 }\r
6142\r
6143 type = NTYPE(node);\r
6144 switch (type) {\r
6145 case NT_LIST:\r
6146 case NT_ALT:\r
6147 if (NTYPE(node) == NT_LIST)\r
6148 fprintf(f, "<list:%x>\n", (int )node);\r
6149 else\r
6150 fprintf(f, "<alt:%x>\n", (int )node);\r
6151\r
6152 print_indent_tree(f, NCAR(node), indent + add);\r
6153 while (IS_NOT_NULL(node = NCDR(node))) {\r
6154 if (NTYPE(node) != type) {\r
6155 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));\r
6156 exit(0);\r
6157 }\r
6158 print_indent_tree(f, NCAR(node), indent + add);\r
6159 }\r
6160 break;\r
6161\r
6162 case NT_STR:\r
6163 fprintf(f, "<string%s:%x>",\r
6164 (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);\r
6165 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {\r
6166 if (*p >= 0x20 && *p < 0x7f)\r
6167 fputc(*p, f);\r
6168 else {\r
6169 fprintf(f, " 0x%02x", *p);\r
6170 }\r
6171 }\r
6172 break;\r
6173\r
6174 case NT_CCLASS:\r
6175 fprintf(f, "<cclass:%x>", (int )node);\r
6176 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);\r
6177 if (NCCLASS(node)->mbuf) {\r
6178 BBuf* bbuf = NCCLASS(node)->mbuf;\r
6179 for (i = 0; i < bbuf->used; i++) {\r
6180 if (i > 0) fprintf(f, ",");\r
6181 fprintf(f, "%0x", bbuf->p[i]);\r
6182 }\r
6183 }\r
6184 break;\r
6185\r
6186 case NT_CTYPE:\r
6187 fprintf(f, "<ctype:%x> ", (int )node);\r
6188 switch (NCTYPE(node)->ctype) {\r
6189 case ONIGENC_CTYPE_WORD:\r
6190 if (NCTYPE(node)->not != 0)\r
6191 fputs("not word", f);\r
6192 else\r
6193 fputs("word", f);\r
6194 break;\r
6195\r
6196 default:\r
6197 fprintf(f, "ERROR: undefined ctype.\n");\r
6198 exit(0);\r
6199 }\r
6200 break;\r
6201\r
6202 case NT_CANY:\r
6203 fprintf(f, "<anychar:%x>", (int )node);\r
6204 break;\r
6205\r
6206 case NT_ANCHOR:\r
6207 fprintf(f, "<anchor:%x> ", (int )node);\r
6208 switch (NANCHOR(node)->type) {\r
6209 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;\r
6210 case ANCHOR_END_BUF: fputs("end buf", f); break;\r
6211 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;\r
6212 case ANCHOR_END_LINE: fputs("end line", f); break;\r
6213 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
6214 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;\r
6215\r
6216 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;\r
6217 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;\r
6218#ifdef USE_WORD_BEGIN_END\r
6219 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;\r
6220 case ANCHOR_WORD_END: fputs("word end", f); break;\r
6221#endif\r
6222 case ANCHOR_PREC_READ: fputs("prec read", f); break;\r
6223 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;\r
6224 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;\r
6225 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;\r
6226\r
6227 default:\r
6228 fprintf(f, "ERROR: undefined anchor type.\n");\r
6229 break;\r
6230 }\r
6231 break;\r
6232\r
6233 case NT_BREF:\r
6234 {\r
6235 int* p;\r
6236 BRefNode* br = NBREF(node);\r
6237 p = BACKREFS_P(br);\r
6238 fprintf(f, "<backref:%x>", (int )node);\r
6239 for (i = 0; i < br->back_num; i++) {\r
6240 if (i > 0) fputs(", ", f);\r
6241 fprintf(f, "%d", p[i]);\r
6242 }\r
6243 }\r
6244 break;\r
6245\r
6246#ifdef USE_SUBEXP_CALL\r
6247 case NT_CALL:\r
6248 {\r
6249 CallNode* cn = NCALL(node);\r
6250 fprintf(f, "<call:%x>", (int )node);\r
6251 p_string(f, cn->name_end - cn->name, cn->name);\r
6252 }\r
6253 break;\r
6254#endif\r
6255\r
6256 case NT_QTFR:\r
6257 fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,\r
6258 NQTFR(node)->lower, NQTFR(node)->upper,\r
6259 (NQTFR(node)->greedy ? "" : "?"));\r
6260 print_indent_tree(f, NQTFR(node)->target, indent + add);\r
6261 break;\r
6262\r
6263 case NT_ENCLOSE:\r
6264 fprintf(f, "<enclose:%x> ", (int )node);\r
6265 switch (NENCLOSE(node)->type) {\r
6266 case ENCLOSE_OPTION:\r
6267 fprintf(f, "option:%d", NENCLOSE(node)->option);\r
6268 break;\r
6269 case ENCLOSE_MEMORY:\r
6270 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);\r
6271 break;\r
6272 case ENCLOSE_STOP_BACKTRACK:\r
6273 fprintf(f, "stop-bt");\r
6274 break;\r
6275\r
6276 default:\r
6277 break;\r
6278 }\r
6279 fprintf(f, "\n");\r
6280 print_indent_tree(f, NENCLOSE(node)->target, indent + add);\r
6281 break;\r
6282\r
6283 default:\r
6284 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));\r
6285 break;\r
6286 }\r
6287\r
6288 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&\r
6289 type != NT_ENCLOSE)\r
6290 fprintf(f, "\n");\r
6291 fflush(f);\r
6292}\r
6293#endif /* ONIG_DEBUG */\r
6294\r
6295#ifdef ONIG_DEBUG_PARSE_TREE\r
6296static void\r
6297print_tree(FILE* f, Node* node)\r
6298{\r
6299 print_indent_tree(f, node, 0);\r
6300}\r
6301#endif\r