]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg: Delete Tcp4Dxe in MdeModulePkg.
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regcomp.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b602265d 5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
14b0e578
CS
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regparse.h"\r
31\r
32OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;\r
33\r
b602265d
DG
34#if 0\r
35typedef struct {\r
36 int n;\r
37 int alloc;\r
38 int* v;\r
39} int_stack;\r
40\r
41static int\r
42make_int_stack(int_stack** rs, int init_size)\r
43{\r
44 int_stack* s;\r
45 int* v;\r
46\r
47 *rs = 0;\r
48\r
49 s = xmalloc(sizeof(*s));\r
50 if (IS_NULL(s)) return ONIGERR_MEMORY;\r
51\r
52 v = (int* )xmalloc(sizeof(int) * init_size);\r
53 if (IS_NULL(v)) {\r
54 xfree(s);\r
55 return ONIGERR_MEMORY;\r
56 }\r
57\r
58 s->n = 0;\r
59 s->alloc = init_size;\r
60 s->v = v;\r
61\r
62 *rs = s;\r
63 return ONIG_NORMAL;\r
64}\r
65\r
66static void\r
67free_int_stack(int_stack* s)\r
68{\r
69 if (IS_NOT_NULL(s)) {\r
70 if (IS_NOT_NULL(s->v))\r
71 xfree(s->v);\r
72 xfree(s);\r
73 }\r
74}\r
75\r
76static int\r
77int_stack_push(int_stack* s, int v)\r
78{\r
79 if (s->n >= s->alloc) {\r
80 int new_size = s->alloc * 2;\r
81 int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);\r
82 if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
83\r
84 s->alloc = new_size;\r
85 s->v = nv;\r
86 }\r
87\r
88 s->v[s->n] = v;\r
89 s->n++;\r
90 return ONIG_NORMAL;\r
91}\r
92\r
93static int\r
94int_stack_pop(int_stack* s)\r
95{\r
96 int v;\r
97\r
98#ifdef ONIG_DEBUG\r
99 if (s->n <= 0) {\r
100 fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);\r
101 return 0;\r
102 }\r
103#endif\r
104\r
105 v = s->v[s->n];\r
106 s->n--;\r
107 return v;\r
108}\r
109#endif\r
110\r
14b0e578
CS
111extern OnigCaseFoldType\r
112onig_get_default_case_fold_flag(void)\r
113{\r
114 return OnigDefaultCaseFoldFlag;\r
115}\r
116\r
117extern int\r
118onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)\r
119{\r
120 OnigDefaultCaseFoldFlag = case_fold_flag;\r
121 return 0;\r
122}\r
123\r
b602265d
DG
124static int\r
125int_multiply_cmp(int x, int y, int v)\r
14b0e578 126{\r
b602265d 127 if (x == 0 || y == 0) return -1;\r
14b0e578 128\r
b602265d
DG
129 if (x < INT_MAX / y) {\r
130 int xy = x * y;\r
131 if (xy > v) return 1;\r
132 else {\r
133 if (xy == v) return 0;\r
134 else return -1;\r
135 }\r
14b0e578 136 }\r
b602265d
DG
137 else\r
138 return 1;\r
14b0e578
CS
139}\r
140\r
b602265d
DG
141\r
142#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
143static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];\r
144#endif\r
145\r
14b0e578
CS
146static void\r
147swap_node(Node* a, Node* b)\r
148{\r
149 Node c;\r
14b0e578 150\r
b602265d
DG
151 c = *a; *a = *b; *b = c;\r
152\r
153 if (NODE_TYPE(a) == NODE_STRING) {\r
154 StrNode* sn = STR_(a);\r
14b0e578 155 if (sn->capa == 0) {\r
b602265d 156 int len = (int )(sn->end - sn->s);\r
14b0e578
CS
157 sn->s = sn->buf;\r
158 sn->end = sn->s + len;\r
159 }\r
160 }\r
161\r
b602265d
DG
162 if (NODE_TYPE(b) == NODE_STRING) {\r
163 StrNode* sn = STR_(b);\r
14b0e578 164 if (sn->capa == 0) {\r
b602265d 165 int len = (int )(sn->end - sn->s);\r
14b0e578
CS
166 sn->s = sn->buf;\r
167 sn->end = sn->s + len;\r
168 }\r
169 }\r
170}\r
171\r
b602265d
DG
172static OnigLen\r
173distance_add(OnigLen d1, OnigLen d2)\r
14b0e578 174{\r
b602265d
DG
175 if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)\r
176 return INFINITE_LEN;\r
14b0e578 177 else {\r
b602265d
DG
178 if (d1 <= INFINITE_LEN - d2) return d1 + d2;\r
179 else return INFINITE_LEN;\r
14b0e578
CS
180 }\r
181}\r
182\r
b602265d
DG
183static OnigLen\r
184distance_multiply(OnigLen d, int m)\r
14b0e578
CS
185{\r
186 if (m == 0) return 0;\r
187\r
b602265d 188 if (d < INFINITE_LEN / m)\r
14b0e578
CS
189 return d * m;\r
190 else\r
b602265d 191 return INFINITE_LEN;\r
14b0e578
CS
192}\r
193\r
194static int\r
195bitset_is_empty(BitSetRef bs)\r
196{\r
197 int i;\r
b602265d 198\r
14b0e578
CS
199 for (i = 0; i < (int )BITSET_SIZE; i++) {\r
200 if (bs[i] != 0) return 0;\r
201 }\r
202 return 1;\r
203}\r
204\r
14b0e578
CS
205extern int\r
206onig_bbuf_init(BBuf* buf, int size)\r
207{\r
208 if (size <= 0) {\r
209 size = 0;\r
210 buf->p = NULL;\r
211 }\r
212 else {\r
213 buf->p = (UChar* )xmalloc(size);\r
214 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);\r
215 }\r
216\r
217 buf->alloc = size;\r
218 buf->used = 0;\r
219 return 0;\r
220}\r
221\r
222\r
b602265d 223#ifdef USE_CALL\r
14b0e578
CS
224\r
225static int\r
b602265d 226unset_addr_list_init(UnsetAddrList* list, int size)\r
14b0e578 227{\r
b602265d 228 UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);\r
14b0e578 229 CHECK_NULL_RETURN_MEMERR(p);\r
b602265d
DG
230\r
231 list->num = 0;\r
232 list->alloc = size;\r
233 list->us = p;\r
14b0e578
CS
234 return 0;\r
235}\r
236\r
237static void\r
b602265d 238unset_addr_list_end(UnsetAddrList* list)\r
14b0e578 239{\r
b602265d
DG
240 if (IS_NOT_NULL(list->us))\r
241 xfree(list->us);\r
14b0e578
CS
242}\r
243\r
244static int\r
b602265d 245unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)\r
14b0e578
CS
246{\r
247 UnsetAddr* p;\r
248 int size;\r
249\r
b602265d
DG
250 if (list->num >= list->alloc) {\r
251 size = list->alloc * 2;\r
252 p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);\r
14b0e578 253 CHECK_NULL_RETURN_MEMERR(p);\r
b602265d
DG
254 list->alloc = size;\r
255 list->us = p;\r
14b0e578
CS
256 }\r
257\r
b602265d
DG
258 list->us[list->num].offset = offset;\r
259 list->us[list->num].target = node;\r
260 list->num++;\r
14b0e578
CS
261 return 0;\r
262}\r
b602265d 263#endif /* USE_CALL */\r
14b0e578
CS
264\r
265\r
266static int\r
267add_opcode(regex_t* reg, int opcode)\r
268{\r
b602265d 269 BB_ADD1(reg, opcode);\r
14b0e578
CS
270 return 0;\r
271}\r
14b0e578
CS
272\r
273static int\r
274add_rel_addr(regex_t* reg, int addr)\r
275{\r
276 RelAddrType ra = (RelAddrType )addr;\r
277\r
b602265d 278 BB_ADD(reg, &ra, SIZE_RELADDR);\r
14b0e578
CS
279 return 0;\r
280}\r
281\r
282static int\r
283add_abs_addr(regex_t* reg, int addr)\r
284{\r
285 AbsAddrType ra = (AbsAddrType )addr;\r
286\r
b602265d 287 BB_ADD(reg, &ra, SIZE_ABSADDR);\r
14b0e578
CS
288 return 0;\r
289}\r
290\r
291static int\r
292add_length(regex_t* reg, int len)\r
293{\r
294 LengthType l = (LengthType )len;\r
295\r
b602265d 296 BB_ADD(reg, &l, SIZE_LENGTH);\r
14b0e578
CS
297 return 0;\r
298}\r
299\r
300static int\r
301add_mem_num(regex_t* reg, int num)\r
302{\r
303 MemNumType n = (MemNumType )num;\r
304\r
b602265d 305 BB_ADD(reg, &n, SIZE_MEMNUM);\r
14b0e578
CS
306 return 0;\r
307}\r
308\r
b602265d 309#if 0\r
14b0e578
CS
310static int\r
311add_pointer(regex_t* reg, void* addr)\r
312{\r
313 PointerType ptr = (PointerType )addr;\r
314\r
b602265d 315 BB_ADD(reg, &ptr, SIZE_POINTER);\r
14b0e578
CS
316 return 0;\r
317}\r
b602265d 318#endif\r
14b0e578
CS
319\r
320static int\r
321add_option(regex_t* reg, OnigOptionType option)\r
322{\r
b602265d 323 BB_ADD(reg, &option, SIZE_OPTION);\r
14b0e578
CS
324 return 0;\r
325}\r
326\r
327static int\r
b602265d 328add_save_type(regex_t* reg, enum SaveType type)\r
14b0e578 329{\r
b602265d 330 SaveType t = (SaveType )type;\r
14b0e578 331\r
b602265d
DG
332 BB_ADD(reg, &t, SIZE_SAVE_TYPE);\r
333 return 0;\r
14b0e578
CS
334}\r
335\r
336static int\r
b602265d 337add_update_var_type(regex_t* reg, enum UpdateVarType type)\r
14b0e578 338{\r
b602265d
DG
339 UpdateVarType t = (UpdateVarType )type;\r
340\r
341 BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);\r
14b0e578
CS
342 return 0;\r
343}\r
344\r
345static int\r
b602265d 346add_mode(regex_t* reg, ModeType mode)\r
14b0e578 347{\r
b602265d 348 BB_ADD(reg, &mode, SIZE_MODE);\r
14b0e578
CS
349 return 0;\r
350}\r
351\r
352static int\r
b602265d 353add_opcode_rel_addr(regex_t* reg, int opcode, int addr)\r
14b0e578
CS
354{\r
355 int r;\r
356\r
357 r = add_opcode(reg, opcode);\r
b602265d
DG
358 if (r != 0) return r;\r
359 r = add_rel_addr(reg, addr);\r
14b0e578
CS
360 return r;\r
361}\r
362\r
b602265d
DG
363static int\r
364add_bytes(regex_t* reg, UChar* bytes, int len)\r
365{\r
366 BB_ADD(reg, bytes, len);\r
367 return 0;\r
368}\r
369\r
370static int\r
371add_bitset(regex_t* reg, BitSetRef bs)\r
372{\r
373 BB_ADD(reg, bs, SIZE_BITSET);\r
374 return 0;\r
375}\r
376\r
14b0e578 377static int compile_length_tree(Node* node, regex_t* reg);\r
b602265d 378static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);\r
14b0e578
CS
379\r
380\r
381#define IS_NEED_STR_LEN_OP_EXACT(op) \\r
382 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\\r
383 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)\r
384\r
385static int\r
386select_str_opcode(int mb_len, int str_len, int ignore_case)\r
387{\r
388 int op;\r
389\r
390 if (ignore_case) {\r
391 switch (str_len) {\r
392 case 1: op = OP_EXACT1_IC; break;\r
393 default: op = OP_EXACTN_IC; break;\r
394 }\r
395 }\r
396 else {\r
397 switch (mb_len) {\r
398 case 1:\r
399 switch (str_len) {\r
400 case 1: op = OP_EXACT1; break;\r
401 case 2: op = OP_EXACT2; break;\r
402 case 3: op = OP_EXACT3; break;\r
403 case 4: op = OP_EXACT4; break;\r
404 case 5: op = OP_EXACT5; break;\r
405 default: op = OP_EXACTN; break;\r
406 }\r
407 break;\r
408\r
409 case 2:\r
410 switch (str_len) {\r
411 case 1: op = OP_EXACTMB2N1; break;\r
412 case 2: op = OP_EXACTMB2N2; break;\r
413 case 3: op = OP_EXACTMB2N3; break;\r
414 default: op = OP_EXACTMB2N; break;\r
415 }\r
416 break;\r
417\r
418 case 3:\r
419 op = OP_EXACTMB3N;\r
420 break;\r
421\r
422 default:\r
423 op = OP_EXACTMBN;\r
424 break;\r
425 }\r
426 }\r
427 return op;\r
428}\r
429\r
430static int\r
b602265d 431compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)\r
14b0e578
CS
432{\r
433 int r;\r
434 int saved_num_null_check = reg->num_null_check;\r
435\r
b602265d
DG
436 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {\r
437 r = add_opcode(reg, OP_EMPTY_CHECK_START);\r
438 if (r != 0) return r;\r
14b0e578 439 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */\r
b602265d 440 if (r != 0) return r;\r
14b0e578
CS
441 reg->num_null_check++;\r
442 }\r
443\r
b602265d
DG
444 r = compile_tree(node, reg, env);\r
445 if (r != 0) return r;\r
14b0e578 446\r
b602265d
DG
447 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {\r
448 if (empty_info == QUANT_BODY_IS_EMPTY)\r
449 r = add_opcode(reg, OP_EMPTY_CHECK_END);\r
450 else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)\r
451 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);\r
452 else if (empty_info == QUANT_BODY_IS_EMPTY_REC)\r
453 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);\r
14b0e578 454\r
b602265d 455 if (r != 0) return r;\r
14b0e578
CS
456 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */\r
457 }\r
458 return r;\r
459}\r
460\r
b602265d 461#ifdef USE_CALL\r
14b0e578 462static int\r
b602265d 463compile_call(CallNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
464{\r
465 int r;\r
466\r
467 r = add_opcode(reg, OP_CALL);\r
b602265d
DG
468 if (r != 0) return r;\r
469 r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),\r
470 NODE_CALL_BODY(node));\r
471 if (r != 0) return r;\r
14b0e578
CS
472 r = add_abs_addr(reg, 0 /*dummy addr.*/);\r
473 return r;\r
474}\r
475#endif\r
476\r
477static int\r
b602265d 478compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
479{\r
480 int i, r;\r
481\r
482 for (i = 0; i < n; i++) {\r
b602265d
DG
483 r = compile_tree(node, reg, env);\r
484 if (r != 0) return r;\r
14b0e578
CS
485 }\r
486 return 0;\r
487}\r
488\r
489static int\r
490add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,\r
491 regex_t* reg ARG_UNUSED, int ignore_case)\r
492{\r
493 int len;\r
494 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
495\r
496 len = SIZE_OPCODE;\r
497\r
498 if (op == OP_EXACTMBN) len += SIZE_LENGTH;\r
499 if (IS_NEED_STR_LEN_OP_EXACT(op))\r
500 len += SIZE_LENGTH;\r
501\r
502 len += mb_len * str_len;\r
503 return len;\r
504}\r
505\r
506static int\r
507add_compile_string(UChar* s, int mb_len, int str_len,\r
508 regex_t* reg, int ignore_case)\r
509{\r
510 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
511 add_opcode(reg, op);\r
512\r
513 if (op == OP_EXACTMBN)\r
514 add_length(reg, mb_len);\r
515\r
516 if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
517 if (op == OP_EXACTN_IC)\r
518 add_length(reg, mb_len * str_len);\r
519 else\r
520 add_length(reg, str_len);\r
521 }\r
522\r
523 add_bytes(reg, s, mb_len * str_len);\r
524 return 0;\r
525}\r
526\r
527\r
528static int\r
529compile_length_string_node(Node* node, regex_t* reg)\r
530{\r
531 int rlen, r, len, prev_len, slen, ambig;\r
14b0e578
CS
532 UChar *p, *prev;\r
533 StrNode* sn;\r
b602265d 534 OnigEncoding enc = reg->enc;\r
14b0e578 535\r
b602265d 536 sn = STR_(node);\r
14b0e578
CS
537 if (sn->end <= sn->s)\r
538 return 0;\r
539\r
b602265d 540 ambig = NODE_STRING_IS_AMBIG(node);\r
14b0e578
CS
541\r
542 p = prev = sn->s;\r
543 prev_len = enclen(enc, p);\r
544 p += prev_len;\r
545 slen = 1;\r
546 rlen = 0;\r
547\r
548 for (; p < sn->end; ) {\r
549 len = enclen(enc, p);\r
550 if (len == prev_len) {\r
551 slen++;\r
552 }\r
553 else {\r
554 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
555 rlen += r;\r
556 prev = p;\r
557 slen = 1;\r
558 prev_len = len;\r
559 }\r
560 p += len;\r
561 }\r
b602265d 562\r
14b0e578
CS
563 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
564 rlen += r;\r
565 return rlen;\r
566}\r
567\r
568static int\r
569compile_length_string_raw_node(StrNode* sn, regex_t* reg)\r
570{\r
571 if (sn->end <= sn->s)\r
572 return 0;\r
573\r
b602265d
DG
574 return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),\r
575 reg, 0);\r
14b0e578
CS
576}\r
577\r
578static int\r
579compile_string_node(Node* node, regex_t* reg)\r
580{\r
581 int r, len, prev_len, slen, ambig;\r
14b0e578
CS
582 UChar *p, *prev, *end;\r
583 StrNode* sn;\r
b602265d 584 OnigEncoding enc = reg->enc;\r
14b0e578 585\r
b602265d 586 sn = STR_(node);\r
14b0e578
CS
587 if (sn->end <= sn->s)\r
588 return 0;\r
589\r
590 end = sn->end;\r
b602265d 591 ambig = NODE_STRING_IS_AMBIG(node);\r
14b0e578
CS
592\r
593 p = prev = sn->s;\r
594 prev_len = enclen(enc, p);\r
595 p += prev_len;\r
596 slen = 1;\r
597\r
598 for (; p < end; ) {\r
599 len = enclen(enc, p);\r
600 if (len == prev_len) {\r
601 slen++;\r
602 }\r
603 else {\r
604 r = add_compile_string(prev, prev_len, slen, reg, ambig);\r
b602265d 605 if (r != 0) return r;\r
14b0e578
CS
606\r
607 prev = p;\r
608 slen = 1;\r
609 prev_len = len;\r
610 }\r
611\r
612 p += len;\r
613 }\r
b602265d 614\r
14b0e578
CS
615 return add_compile_string(prev, prev_len, slen, reg, ambig);\r
616}\r
617\r
618static int\r
619compile_string_raw_node(StrNode* sn, regex_t* reg)\r
620{\r
621 if (sn->end <= sn->s)\r
622 return 0;\r
623\r
b602265d 624 return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);\r
14b0e578
CS
625}\r
626\r
627static int\r
628add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
629{\r
630#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
631 add_length(reg, mbuf->used);\r
632 return add_bytes(reg, mbuf->p, mbuf->used);\r
633#else\r
634 int r, pad_size;\r
b602265d 635 UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;\r
14b0e578
CS
636\r
637 GET_ALIGNMENT_PAD_SIZE(p, pad_size);\r
638 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));\r
639 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
640\r
641 r = add_bytes(reg, mbuf->p, mbuf->used);\r
642\r
643 /* padding for return value from compile_length_cclass_node() to be fix. */\r
644 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;\r
645 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
646 return r;\r
647#endif\r
648}\r
649\r
650static int\r
651compile_length_cclass_node(CClassNode* cc, regex_t* reg)\r
652{\r
653 int len;\r
654\r
14b0e578
CS
655 if (IS_NULL(cc->mbuf)) {\r
656 len = SIZE_OPCODE + SIZE_BITSET;\r
657 }\r
658 else {\r
659 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
660 len = SIZE_OPCODE;\r
661 }\r
662 else {\r
663 len = SIZE_OPCODE + SIZE_BITSET;\r
664 }\r
665#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
666 len += SIZE_LENGTH + cc->mbuf->used;\r
667#else\r
668 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);\r
669#endif\r
670 }\r
671\r
672 return len;\r
673}\r
674\r
675static int\r
676compile_cclass_node(CClassNode* cc, regex_t* reg)\r
677{\r
678 int r;\r
679\r
14b0e578
CS
680 if (IS_NULL(cc->mbuf)) {\r
681 if (IS_NCCLASS_NOT(cc))\r
682 add_opcode(reg, OP_CCLASS_NOT);\r
683 else\r
684 add_opcode(reg, OP_CCLASS);\r
685\r
686 r = add_bitset(reg, cc->bs);\r
687 }\r
688 else {\r
689 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
690 if (IS_NCCLASS_NOT(cc))\r
691 add_opcode(reg, OP_CCLASS_MB_NOT);\r
692 else\r
693 add_opcode(reg, OP_CCLASS_MB);\r
694\r
695 r = add_multi_byte_cclass(cc->mbuf, reg);\r
696 }\r
697 else {\r
698 if (IS_NCCLASS_NOT(cc))\r
699 add_opcode(reg, OP_CCLASS_MIX_NOT);\r
700 else\r
701 add_opcode(reg, OP_CCLASS_MIX);\r
702\r
703 r = add_bitset(reg, cc->bs);\r
b602265d 704 if (r != 0) return r;\r
14b0e578
CS
705 r = add_multi_byte_cclass(cc->mbuf, reg);\r
706 }\r
707 }\r
708\r
709 return r;\r
710}\r
711\r
712static int\r
713entry_repeat_range(regex_t* reg, int id, int lower, int upper)\r
714{\r
715#define REPEAT_RANGE_ALLOC 4\r
716\r
717 OnigRepeatRange* p;\r
718\r
719 if (reg->repeat_range_alloc == 0) {\r
720 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);\r
721 CHECK_NULL_RETURN_MEMERR(p);\r
722 reg->repeat_range = p;\r
723 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;\r
724 }\r
725 else if (reg->repeat_range_alloc <= id) {\r
726 int n;\r
727 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;\r
728 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,\r
729 sizeof(OnigRepeatRange) * n,\r
730 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);\r
731 CHECK_NULL_RETURN_MEMERR(p);\r
732 reg->repeat_range = p;\r
733 reg->repeat_range_alloc = n;\r
734 }\r
735 else {\r
736 p = reg->repeat_range;\r
737 }\r
738\r
739 p[id].lower = lower;\r
740 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);\r
741 return 0;\r
742}\r
743\r
744static int\r
b602265d
DG
745compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,\r
746 regex_t* reg, ScanEnv* env)\r
14b0e578
CS
747{\r
748 int r;\r
749 int num_repeat = reg->num_repeat;\r
750\r
751 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
b602265d 752 if (r != 0) return r;\r
14b0e578
CS
753 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
754 reg->num_repeat++;\r
b602265d 755 if (r != 0) return r;\r
14b0e578 756 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);\r
b602265d 757 if (r != 0) return r;\r
14b0e578
CS
758\r
759 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);\r
b602265d 760 if (r != 0) return r;\r
14b0e578 761\r
b602265d
DG
762 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
763 if (r != 0) return r;\r
14b0e578
CS
764\r
765 if (\r
b602265d
DG
766#ifdef USE_CALL\r
767 NODE_IS_IN_MULTI_ENTRY(qn) ||\r
14b0e578 768#endif\r
b602265d 769 NODE_IS_IN_REAL_REPEAT(qn)) {\r
14b0e578
CS
770 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
771 }\r
772 else {\r
773 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
774 }\r
b602265d 775 if (r != 0) return r;\r
14b0e578
CS
776 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
777 return r;\r
778}\r
779\r
780static int\r
b602265d 781is_anychar_infinite_greedy(QuantNode* qn)\r
14b0e578
CS
782{\r
783 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&\r
b602265d 784 NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))\r
14b0e578
CS
785 return 1;\r
786 else\r
787 return 0;\r
788}\r
789\r
790#define QUANTIFIER_EXPAND_LIMIT_SIZE 50\r
791#define CKN_ON (ckn > 0)\r
792\r
14b0e578 793static int\r
b602265d 794compile_length_quantifier_node(QuantNode* qn, regex_t* reg)\r
14b0e578
CS
795{\r
796 int len, mod_tlen;\r
797 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
b602265d
DG
798 enum QuantBodyEmpty empty_info = qn->body_empty_info;\r
799 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
800\r
801 if (tlen < 0) return tlen;\r
b602265d 802 if (tlen == 0) return 0;\r
14b0e578
CS
803\r
804 /* anychar repeat */\r
b602265d
DG
805 if (is_anychar_infinite_greedy(qn)) {\r
806 if (qn->lower <= 1 ||\r
807 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {\r
14b0e578
CS
808 if (IS_NOT_NULL(qn->next_head_exact))\r
809 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;\r
810 else\r
811 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;\r
812 }\r
813 }\r
814\r
b602265d 815 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)\r
14b0e578 816 mod_tlen = tlen;\r
b602265d
DG
817 else\r
818 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);\r
14b0e578
CS
819\r
820 if (infinite &&\r
b602265d
DG
821 (qn->lower <= 1 ||\r
822 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
823 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
824 len = SIZE_OP_JUMP;\r
825 }\r
826 else {\r
827 len = tlen * qn->lower;\r
828 }\r
829\r
830 if (qn->greedy) {\r
831 if (IS_NOT_NULL(qn->head_exact))\r
b602265d 832 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;\r
14b0e578 833 else if (IS_NOT_NULL(qn->next_head_exact))\r
b602265d 834 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;\r
14b0e578 835 else\r
b602265d 836 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;\r
14b0e578
CS
837 }\r
838 else\r
839 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;\r
840 }\r
841 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
842 len = SIZE_OP_JUMP + tlen;\r
843 }\r
844 else if (!infinite && qn->greedy &&\r
b602265d
DG
845 (qn->upper == 1 ||\r
846 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
847 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
848 len = tlen * qn->lower;\r
849 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);\r
850 }\r
851 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
852 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;\r
853 }\r
854 else {\r
855 len = SIZE_OP_REPEAT_INC\r
856 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;\r
857 }\r
858\r
859 return len;\r
860}\r
861\r
862static int\r
b602265d 863compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
864{\r
865 int i, r, mod_tlen;\r
866 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
b602265d
DG
867 enum QuantBodyEmpty empty_info = qn->body_empty_info;\r
868 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
869\r
870 if (tlen < 0) return tlen;\r
b602265d 871 if (tlen == 0) return 0;\r
14b0e578 872\r
b602265d
DG
873 if (is_anychar_infinite_greedy(qn) &&\r
874 (qn->lower <= 1 ||\r
875 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
876 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
877 if (r != 0) return r;\r
14b0e578 878 if (IS_NOT_NULL(qn->next_head_exact)) {\r
b602265d
DG
879 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))\r
880 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r
14b0e578 881 else\r
b602265d
DG
882 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);\r
883 if (r != 0) return r;\r
884 return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);\r
14b0e578
CS
885 }\r
886 else {\r
b602265d
DG
887 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))\r
888 return add_opcode(reg, OP_ANYCHAR_ML_STAR);\r
14b0e578 889 else\r
b602265d 890 return add_opcode(reg, OP_ANYCHAR_STAR);\r
14b0e578
CS
891 }\r
892 }\r
893\r
b602265d 894 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)\r
14b0e578 895 mod_tlen = tlen;\r
b602265d
DG
896 else\r
897 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);\r
14b0e578
CS
898\r
899 if (infinite &&\r
b602265d
DG
900 (qn->lower <= 1 ||\r
901 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
902 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
903 if (qn->greedy) {\r
b602265d
DG
904 if (IS_NOT_NULL(qn->head_exact))\r
905 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
906 else if (IS_NOT_NULL(qn->next_head_exact))\r
907 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);\r
908 else\r
909 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);\r
14b0e578
CS
910 }\r
911 else {\r
b602265d 912 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);\r
14b0e578 913 }\r
b602265d 914 if (r != 0) return r;\r
14b0e578
CS
915 }\r
916 else {\r
b602265d
DG
917 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
918 if (r != 0) return r;\r
14b0e578
CS
919 }\r
920\r
921 if (qn->greedy) {\r
922 if (IS_NOT_NULL(qn->head_exact)) {\r
b602265d
DG
923 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,\r
924 mod_tlen + SIZE_OP_JUMP);\r
925 if (r != 0) return r;\r
926 add_bytes(reg, STR_(qn->head_exact)->s, 1);\r
927 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
928 if (r != 0) return r;\r
929 r = add_opcode_rel_addr(reg, OP_JUMP,\r
930 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));\r
14b0e578
CS
931 }\r
932 else if (IS_NOT_NULL(qn->next_head_exact)) {\r
b602265d
DG
933 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,\r
934 mod_tlen + SIZE_OP_JUMP);\r
935 if (r != 0) return r;\r
936 add_bytes(reg, STR_(qn->next_head_exact)->s, 1);\r
937 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
938 if (r != 0) return r;\r
939 r = add_opcode_rel_addr(reg, OP_JUMP,\r
940 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));\r
14b0e578
CS
941 }\r
942 else {\r
b602265d
DG
943 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);\r
944 if (r != 0) return r;\r
945 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
946 if (r != 0) return r;\r
947 r = add_opcode_rel_addr(reg, OP_JUMP,\r
948 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));\r
14b0e578
CS
949 }\r
950 }\r
951 else {\r
952 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);\r
b602265d
DG
953 if (r != 0) return r;\r
954 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
955 if (r != 0) return r;\r
14b0e578
CS
956 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));\r
957 }\r
958 }\r
959 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
960 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
b602265d
DG
961 if (r != 0) return r;\r
962 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
14b0e578 963 }\r
b602265d
DG
964 else if (! infinite && qn->greedy &&\r
965 (qn->upper == 1 ||\r
966 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
967 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
968 int n = qn->upper - qn->lower;\r
969\r
b602265d
DG
970 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
971 if (r != 0) return r;\r
14b0e578
CS
972\r
973 for (i = 0; i < n; i++) {\r
974 r = add_opcode_rel_addr(reg, OP_PUSH,\r
b602265d
DG
975 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);\r
976 if (r != 0) return r;\r
977 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
978 if (r != 0) return r;\r
14b0e578
CS
979 }\r
980 }\r
b602265d 981 else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
14b0e578 982 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);\r
b602265d 983 if (r != 0) return r;\r
14b0e578 984 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
b602265d
DG
985 if (r != 0) return r;\r
986 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
14b0e578
CS
987 }\r
988 else {\r
b602265d 989 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);\r
14b0e578
CS
990 }\r
991 return r;\r
992}\r
14b0e578
CS
993\r
994static int\r
b602265d 995compile_length_option_node(EnclosureNode* node, regex_t* reg)\r
14b0e578
CS
996{\r
997 int tlen;\r
998 OnigOptionType prev = reg->options;\r
999\r
b602265d
DG
1000 reg->options = node->o.options;\r
1001 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
14b0e578
CS
1002 reg->options = prev;\r
1003\r
b602265d 1004 return tlen;\r
14b0e578
CS
1005}\r
1006\r
1007static int\r
b602265d 1008compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
1009{\r
1010 int r;\r
1011 OnigOptionType prev = reg->options;\r
1012\r
b602265d
DG
1013 reg->options = node->o.options;\r
1014 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
14b0e578
CS
1015 reg->options = prev;\r
1016\r
14b0e578
CS
1017 return r;\r
1018}\r
1019\r
1020static int\r
b602265d 1021compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)\r
14b0e578
CS
1022{\r
1023 int len;\r
1024 int tlen;\r
fbaab715 1025\r
b602265d 1026 if (node->type == ENCLOSURE_OPTION)\r
14b0e578
CS
1027 return compile_length_option_node(node, reg);\r
1028\r
b602265d
DG
1029 if (NODE_ENCLOSURE_BODY(node)) {\r
1030 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
14b0e578
CS
1031 if (tlen < 0) return tlen;\r
1032 }\r
1033 else\r
1034 tlen = 0;\r
1035\r
1036 switch (node->type) {\r
b602265d
DG
1037 case ENCLOSURE_MEMORY:\r
1038#ifdef USE_CALL\r
1039\r
1040 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
1041 len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1042 return len;\r
1043 }\r
1044\r
1045 if (NODE_IS_CALLED(node)) {\r
14b0e578 1046 len = SIZE_OP_MEMORY_START_PUSH + tlen\r
b602265d
DG
1047 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1048 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1049 len += (NODE_IS_RECURSION(node)\r
1050 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
14b0e578 1051 else\r
b602265d
DG
1052 len += (NODE_IS_RECURSION(node)\r
1053 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1054 }\r
1055 else if (NODE_IS_RECURSION(node)) {\r
1056 len = SIZE_OP_MEMORY_START_PUSH;\r
1057 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
1058 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);\r
14b0e578
CS
1059 }\r
1060 else\r
1061#endif\r
1062 {\r
b602265d
DG
1063 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
1064 len = SIZE_OP_MEMORY_START_PUSH;\r
14b0e578 1065 else\r
b602265d 1066 len = SIZE_OP_MEMORY_START;\r
14b0e578 1067\r
b602265d
DG
1068 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
1069 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);\r
14b0e578
CS
1070 }\r
1071 break;\r
1072\r
b602265d
DG
1073 case ENCLOSURE_STOP_BACKTRACK:\r
1074 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {\r
1075 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));\r
1076 tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
1077 if (tlen < 0) return tlen;\r
1078\r
1079 len = tlen * qn->lower\r
b602265d 1080 + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
14b0e578
CS
1081 }\r
1082 else {\r
b602265d 1083 len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;\r
14b0e578
CS
1084 }\r
1085 break;\r
1086\r
b602265d
DG
1087 case ENCLOSURE_IF_ELSE:\r
1088 {\r
1089 Node* cond = NODE_ENCLOSURE_BODY(node);\r
1090 Node* Then = node->te.Then;\r
1091 Node* Else = node->te.Else;\r
14b0e578 1092\r
b602265d
DG
1093 len = compile_length_tree(cond, reg);\r
1094 if (len < 0) return len;\r
1095 len += SIZE_OP_PUSH;\r
1096 len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;\r
1097\r
1098 if (IS_NOT_NULL(Then)) {\r
1099 tlen = compile_length_tree(Then, reg);\r
1100 if (tlen < 0) return tlen;\r
1101 len += tlen;\r
1102 }\r
1103\r
1104 if (IS_NOT_NULL(Else)) {\r
1105 len += SIZE_OP_JUMP;\r
1106 tlen = compile_length_tree(Else, reg);\r
1107 if (tlen < 0) return tlen;\r
1108 len += tlen;\r
1109 }\r
1110 }\r
1111 break;\r
1112\r
1113 default:\r
1114 return ONIGERR_TYPE_BUG;\r
1115 break;\r
1116 }\r
1117\r
1118 return len;\r
1119}\r
14b0e578
CS
1120\r
1121static int get_char_length_tree(Node* node, regex_t* reg, int* len);\r
1122\r
1123static int\r
b602265d 1124compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578 1125{\r
b602265d
DG
1126 int r;\r
1127 int len;\r
14b0e578 1128\r
b602265d
DG
1129#ifdef USE_CALL\r
1130 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
1131 r = add_opcode(reg, OP_CALL);\r
1132 if (r != 0) return r;\r
1133 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
1134 NODE_STATUS_ADD(node, ADDR_FIXED);\r
1135 r = add_abs_addr(reg, (int )node->m.called_addr);\r
1136 if (r != 0) return r;\r
1137 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
1138 len += SIZE_OP_RETURN;\r
1139 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1140 if (r != 0) return r;\r
1141\r
1142 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
1143 if (r != 0) return r;\r
1144 r = add_opcode(reg, OP_RETURN);\r
1145 return r;\r
1146 }\r
14b0e578 1147\r
b602265d
DG
1148 if (NODE_IS_CALLED(node)) {\r
1149 r = add_opcode(reg, OP_CALL);\r
1150 if (r != 0) return r;\r
1151 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
1152 NODE_STATUS_ADD(node, ADDR_FIXED);\r
1153 r = add_abs_addr(reg, (int )node->m.called_addr);\r
1154 if (r != 0) return r;\r
1155 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
1156 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
1157 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1158 len += (NODE_IS_RECURSION(node)\r
1159 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
1160 else\r
1161 len += (NODE_IS_RECURSION(node)\r
1162 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
14b0e578 1163\r
b602265d
DG
1164 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1165 if (r != 0) return r;\r
1166 }\r
14b0e578 1167#endif\r
14b0e578 1168\r
b602265d
DG
1169 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
1170 r = add_opcode(reg, OP_MEMORY_START_PUSH);\r
1171 else\r
1172 r = add_opcode(reg, OP_MEMORY_START);\r
1173 if (r != 0) return r;\r
1174 r = add_mem_num(reg, node->m.regnum);\r
1175 if (r != 0) return r;\r
1176 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
1177 if (r != 0) return r;\r
1178\r
1179#ifdef USE_CALL\r
1180 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1181 r = add_opcode(reg, (NODE_IS_RECURSION(node)\r
1182 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
1183 else\r
1184 r = add_opcode(reg, (NODE_IS_RECURSION(node)\r
1185 ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
1186 if (r != 0) return r;\r
1187 r = add_mem_num(reg, node->m.regnum);\r
1188 if (NODE_IS_CALLED(node)) {\r
1189 if (r != 0) return r;\r
1190 r = add_opcode(reg, OP_RETURN);\r
1191 }\r
1192#else\r
1193 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1194 r = add_opcode(reg, OP_MEMORY_END_PUSH);\r
1195 else\r
1196 r = add_opcode(reg, OP_MEMORY_END);\r
1197 if (r != 0) return r;\r
1198 r = add_mem_num(reg, node->m.regnum);\r
14b0e578 1199#endif\r
b602265d
DG
1200\r
1201 return r;\r
1202}\r
1203\r
1204static int\r
1205compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
1206{\r
1207 int r, len;\r
1208\r
1209 switch (node->type) {\r
1210 case ENCLOSURE_MEMORY:\r
1211 r = compile_enclosure_memory_node(node, reg, env);\r
1212 break;\r
1213\r
1214 case ENCLOSURE_OPTION:\r
1215 r = compile_option_node(node, reg, env);\r
14b0e578
CS
1216 break;\r
1217\r
b602265d
DG
1218 case ENCLOSURE_STOP_BACKTRACK:\r
1219 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {\r
1220 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));\r
1221 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
1222 if (r != 0) return r;\r
14b0e578 1223\r
b602265d 1224 len = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
1225 if (len < 0) return len;\r
1226\r
b602265d
DG
1227 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);\r
1228 if (r != 0) return r;\r
1229 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
1230 if (r != 0) return r;\r
1231 r = add_opcode(reg, OP_POP_OUT);\r
1232 if (r != 0) return r;\r
14b0e578 1233 r = add_opcode_rel_addr(reg, OP_JUMP,\r
b602265d 1234 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));\r
14b0e578
CS
1235 }\r
1236 else {\r
b602265d
DG
1237 r = add_opcode(reg, OP_ATOMIC_START);\r
1238 if (r != 0) return r;\r
1239 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
1240 if (r != 0) return r;\r
1241 r = add_opcode(reg, OP_ATOMIC_END);\r
1242 }\r
1243 break;\r
1244\r
1245 case ENCLOSURE_IF_ELSE:\r
1246 {\r
1247 int cond_len, then_len, jump_len;\r
1248 Node* cond = NODE_ENCLOSURE_BODY(node);\r
1249 Node* Then = node->te.Then;\r
1250 Node* Else = node->te.Else;\r
1251\r
1252 r = add_opcode(reg, OP_ATOMIC_START);\r
1253 if (r != 0) return r;\r
1254\r
1255 cond_len = compile_length_tree(cond, reg);\r
1256 if (cond_len < 0) return cond_len;\r
1257 if (IS_NOT_NULL(Then)) {\r
1258 then_len = compile_length_tree(Then, reg);\r
1259 if (then_len < 0) return then_len;\r
1260 }\r
1261 else\r
1262 then_len = 0;\r
1263\r
1264 jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;\r
1265 if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;\r
1266\r
1267 r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);\r
1268 if (r != 0) return r;\r
1269 r = compile_tree(cond, reg, env);\r
1270 if (r != 0) return r;\r
1271 r = add_opcode(reg, OP_ATOMIC_END);\r
1272 if (r != 0) return r;\r
1273\r
1274 if (IS_NOT_NULL(Then)) {\r
1275 r = compile_tree(Then, reg, env);\r
1276 if (r != 0) return r;\r
1277 }\r
1278\r
1279 if (IS_NOT_NULL(Else)) {\r
1280 int else_len = compile_length_tree(Else, reg);\r
1281 r = add_opcode_rel_addr(reg, OP_JUMP, else_len);\r
1282 if (r != 0) return r;\r
1283 r = compile_tree(Else, reg, env);\r
1284 }\r
14b0e578
CS
1285 }\r
1286 break;\r
1287\r
1288 default:\r
1289 return ONIGERR_TYPE_BUG;\r
1290 break;\r
1291 }\r
1292\r
1293 return r;\r
1294}\r
1295\r
1296static int\r
1297compile_length_anchor_node(AnchorNode* node, regex_t* reg)\r
1298{\r
1299 int len;\r
1300 int tlen = 0;\r
1301\r
b602265d
DG
1302 if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {\r
1303 tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
14b0e578
CS
1304 if (tlen < 0) return tlen;\r
1305 }\r
1306\r
1307 switch (node->type) {\r
1308 case ANCHOR_PREC_READ:\r
b602265d 1309 len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;\r
14b0e578
CS
1310 break;\r
1311 case ANCHOR_PREC_READ_NOT:\r
b602265d 1312 len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;\r
14b0e578
CS
1313 break;\r
1314 case ANCHOR_LOOK_BEHIND:\r
1315 len = SIZE_OP_LOOK_BEHIND + tlen;\r
1316 break;\r
1317 case ANCHOR_LOOK_BEHIND_NOT:\r
b602265d
DG
1318 len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;\r
1319 break;\r
1320\r
1321 case ANCHOR_WORD_BOUNDARY:\r
1322 case ANCHOR_NO_WORD_BOUNDARY:\r
1323#ifdef USE_WORD_BEGIN_END\r
1324 case ANCHOR_WORD_BEGIN:\r
1325 case ANCHOR_WORD_END:\r
1326#endif\r
1327 len = SIZE_OP_WORD_BOUNDARY;\r
1328 break;\r
1329\r
1330 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1331 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1332 len = SIZE_OPCODE;\r
14b0e578
CS
1333 break;\r
1334\r
1335 default:\r
1336 len = SIZE_OPCODE;\r
1337 break;\r
1338 }\r
1339\r
1340 return len;\r
1341}\r
1342\r
1343static int\r
b602265d 1344compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
1345{\r
1346 int r, len;\r
b602265d 1347 enum OpCode op;\r
14b0e578
CS
1348\r
1349 switch (node->type) {\r
1350 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;\r
1351 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;\r
1352 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;\r
1353 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;\r
1354 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;\r
1355 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;\r
1356\r
b602265d
DG
1357 case ANCHOR_WORD_BOUNDARY:\r
1358 op = OP_WORD_BOUNDARY;\r
1359 word:\r
1360 r = add_opcode(reg, op);\r
1361 if (r != 0) return r;\r
1362 r = add_mode(reg, (ModeType )node->ascii_mode);\r
1363 break;\r
1364\r
1365 case ANCHOR_NO_WORD_BOUNDARY:\r
1366 op = OP_NO_WORD_BOUNDARY; goto word;\r
1367 break;\r
14b0e578 1368#ifdef USE_WORD_BEGIN_END\r
b602265d
DG
1369 case ANCHOR_WORD_BEGIN:\r
1370 op = OP_WORD_BEGIN; goto word;\r
1371 break;\r
1372 case ANCHOR_WORD_END:\r
1373 op = OP_WORD_END; goto word;\r
1374 break;\r
14b0e578
CS
1375#endif\r
1376\r
b602265d
DG
1377 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1378 r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);\r
1379 break;\r
1380\r
1381 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1382 r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);\r
1383 break;\r
1384\r
14b0e578 1385 case ANCHOR_PREC_READ:\r
b602265d
DG
1386 r = add_opcode(reg, OP_PREC_READ_START);\r
1387 if (r != 0) return r;\r
1388 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1389 if (r != 0) return r;\r
1390 r = add_opcode(reg, OP_PREC_READ_END);\r
14b0e578
CS
1391 break;\r
1392\r
1393 case ANCHOR_PREC_READ_NOT:\r
b602265d 1394 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
14b0e578 1395 if (len < 0) return len;\r
b602265d
DG
1396 r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);\r
1397 if (r != 0) return r;\r
1398 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1399 if (r != 0) return r;\r
1400 r = add_opcode(reg, OP_PREC_READ_NOT_END);\r
14b0e578
CS
1401 break;\r
1402\r
1403 case ANCHOR_LOOK_BEHIND:\r
1404 {\r
1405 int n;\r
1406 r = add_opcode(reg, OP_LOOK_BEHIND);\r
b602265d 1407 if (r != 0) return r;\r
14b0e578 1408 if (node->char_len < 0) {\r
b602265d
DG
1409 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);\r
1410 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
14b0e578
CS
1411 }\r
1412 else\r
b602265d
DG
1413 n = node->char_len;\r
1414\r
14b0e578 1415 r = add_length(reg, n);\r
b602265d
DG
1416 if (r != 0) return r;\r
1417 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
14b0e578
CS
1418 }\r
1419 break;\r
1420\r
1421 case ANCHOR_LOOK_BEHIND_NOT:\r
1422 {\r
1423 int n;\r
b602265d
DG
1424\r
1425 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
1426 r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,\r
1427 len + SIZE_OP_LOOK_BEHIND_NOT_END);\r
1428 if (r != 0) return r;\r
14b0e578 1429 if (node->char_len < 0) {\r
b602265d
DG
1430 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);\r
1431 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
14b0e578
CS
1432 }\r
1433 else\r
b602265d 1434 n = node->char_len;\r
14b0e578 1435 r = add_length(reg, n);\r
b602265d
DG
1436 if (r != 0) return r;\r
1437 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1438 if (r != 0) return r;\r
1439 r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);\r
14b0e578
CS
1440 }\r
1441 break;\r
1442\r
1443 default:\r
1444 return ONIGERR_TYPE_BUG;\r
1445 break;\r
1446 }\r
1447\r
1448 return r;\r
1449}\r
1450\r
b602265d
DG
1451static int\r
1452compile_gimmick_node(GimmickNode* node, regex_t* reg)\r
1453{\r
1454 int r;\r
1455\r
1456 switch (node->type) {\r
1457 case GIMMICK_FAIL:\r
1458 r = add_opcode(reg, OP_FAIL);\r
1459 break;\r
1460\r
1461 case GIMMICK_KEEP:\r
1462 r = add_opcode(reg, OP_PUSH_SAVE_VAL);\r
1463 if (r != 0) return r;\r
1464 r = add_save_type(reg, SAVE_KEEP);\r
1465 if (r != 0) return r;\r
1466 r = add_mem_num(reg, node->id);\r
1467 break;\r
1468\r
1469 case GIMMICK_SAVE:\r
1470 r = add_opcode(reg, OP_PUSH_SAVE_VAL);\r
1471 if (r != 0) return r;\r
1472 r = add_save_type(reg, node->detail_type);\r
1473 if (r != 0) return r;\r
1474 r = add_mem_num(reg, node->id);\r
1475 break;\r
1476\r
1477 case GIMMICK_UPDATE_VAR:\r
1478 r = add_opcode(reg, OP_UPDATE_VAR);\r
1479 if (r != 0) return r;\r
1480 r = add_update_var_type(reg, node->detail_type);\r
1481 if (r != 0) return r;\r
1482 r = add_mem_num(reg, node->id);\r
1483 break;\r
1484\r
1485#ifdef USE_CALLOUT\r
1486 case GIMMICK_CALLOUT:\r
1487 switch (node->detail_type) {\r
1488 case ONIG_CALLOUT_OF_CONTENTS:\r
1489 case ONIG_CALLOUT_OF_NAME:\r
1490 {\r
1491 r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?\r
1492 OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);\r
1493 if (r != 0) return r;\r
1494 if (node->detail_type == ONIG_CALLOUT_OF_NAME) {\r
1495 r = add_mem_num(reg, node->id);\r
1496 if (r != 0) return r;\r
1497 }\r
1498 r = add_mem_num(reg, node->num);\r
1499 if (r != 0) return r;\r
1500 }\r
1501 break;\r
1502\r
1503 default:\r
1504 r = ONIGERR_TYPE_BUG;\r
1505 break;\r
1506 }\r
1507#endif\r
1508 }\r
1509\r
1510 return r;\r
1511}\r
1512\r
1513static int\r
1514compile_length_gimmick_node(GimmickNode* node, regex_t* reg)\r
1515{\r
1516 int len;\r
1517\r
1518 switch (node->type) {\r
1519 case GIMMICK_FAIL:\r
1520 len = SIZE_OP_FAIL;\r
1521 break;\r
1522\r
1523 case GIMMICK_KEEP:\r
1524 case GIMMICK_SAVE:\r
1525 len = SIZE_OP_PUSH_SAVE_VAL;\r
1526 break;\r
1527\r
1528 case GIMMICK_UPDATE_VAR:\r
1529 len = SIZE_OP_UPDATE_VAR;\r
1530 break;\r
1531\r
1532#ifdef USE_CALLOUT\r
1533 case GIMMICK_CALLOUT:\r
1534 switch (node->detail_type) {\r
1535 case ONIG_CALLOUT_OF_CONTENTS:\r
1536 len = SIZE_OP_CALLOUT_CONTENTS;\r
1537 break;\r
1538 case ONIG_CALLOUT_OF_NAME:\r
1539 len = SIZE_OP_CALLOUT_NAME;\r
1540 break;\r
1541\r
1542 default:\r
1543 len = ONIGERR_TYPE_BUG;\r
1544 break;\r
1545 }\r
1546 break;\r
1547#endif\r
1548 }\r
1549\r
1550 return len;\r
1551}\r
1552\r
14b0e578
CS
1553static int\r
1554compile_length_tree(Node* node, regex_t* reg)\r
1555{\r
b602265d 1556 int len, r;\r
14b0e578 1557\r
b602265d
DG
1558 switch (NODE_TYPE(node)) {\r
1559 case NODE_LIST:\r
14b0e578
CS
1560 len = 0;\r
1561 do {\r
b602265d 1562 r = compile_length_tree(NODE_CAR(node), reg);\r
14b0e578
CS
1563 if (r < 0) return r;\r
1564 len += r;\r
b602265d 1565 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1566 r = len;\r
1567 break;\r
1568\r
b602265d 1569 case NODE_ALT:\r
14b0e578
CS
1570 {\r
1571 int n;\r
1572\r
1573 n = r = 0;\r
1574 do {\r
b602265d
DG
1575 r += compile_length_tree(NODE_CAR(node), reg);\r
1576 n++;\r
1577 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1578 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);\r
1579 }\r
1580 break;\r
1581\r
b602265d
DG
1582 case NODE_STRING:\r
1583 if (NODE_STRING_IS_RAW(node))\r
1584 r = compile_length_string_raw_node(STR_(node), reg);\r
14b0e578
CS
1585 else\r
1586 r = compile_length_string_node(node, reg);\r
1587 break;\r
1588\r
b602265d
DG
1589 case NODE_CCLASS:\r
1590 r = compile_length_cclass_node(CCLASS_(node), reg);\r
14b0e578
CS
1591 break;\r
1592\r
b602265d 1593 case NODE_CTYPE:\r
14b0e578
CS
1594 r = SIZE_OPCODE;\r
1595 break;\r
1596\r
b602265d 1597 case NODE_BACKREF:\r
14b0e578 1598 {\r
b602265d 1599 BackRefNode* br = BACKREF_(node);\r
14b0e578 1600\r
b602265d 1601 if (NODE_IS_CHECKER(node)) {\r
14b0e578 1602#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
1603 if (NODE_IS_NEST_LEVEL(node)) {\r
1604 r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1605 }\r
1606 else\r
14b0e578 1607#endif\r
b602265d 1608 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
14b0e578
CS
1609 }\r
1610 else {\r
b602265d
DG
1611#ifdef USE_BACKREF_WITH_LEVEL\r
1612 if (NODE_IS_NEST_LEVEL(node)) {\r
1613 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +\r
1614 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1615 }\r
1616 else\r
1617#endif\r
1618 if (br->back_num == 1) {\r
1619 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)\r
1620 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));\r
1621 }\r
1622 else {\r
1623 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1624 }\r
14b0e578
CS
1625 }\r
1626 }\r
1627 break;\r
1628\r
b602265d
DG
1629#ifdef USE_CALL\r
1630 case NODE_CALL:\r
14b0e578
CS
1631 r = SIZE_OP_CALL;\r
1632 break;\r
1633#endif\r
1634\r
b602265d
DG
1635 case NODE_QUANT:\r
1636 r = compile_length_quantifier_node(QUANT_(node), reg);\r
1637 break;\r
1638\r
1639 case NODE_ENCLOSURE:\r
1640 r = compile_length_enclosure_node(ENCLOSURE_(node), reg);\r
14b0e578
CS
1641 break;\r
1642\r
b602265d
DG
1643 case NODE_ANCHOR:\r
1644 r = compile_length_anchor_node(ANCHOR_(node), reg);\r
14b0e578
CS
1645 break;\r
1646\r
b602265d
DG
1647 case NODE_GIMMICK:\r
1648 r = compile_length_gimmick_node(GIMMICK_(node), reg);\r
14b0e578
CS
1649 break;\r
1650\r
1651 default:\r
1652 return ONIGERR_TYPE_BUG;\r
1653 break;\r
1654 }\r
1655\r
1656 return r;\r
1657}\r
1658\r
1659static int\r
b602265d 1660compile_tree(Node* node, regex_t* reg, ScanEnv* env)\r
14b0e578 1661{\r
b602265d 1662 int n, len, pos, r = 0;\r
14b0e578 1663\r
b602265d
DG
1664 switch (NODE_TYPE(node)) {\r
1665 case NODE_LIST:\r
14b0e578 1666 do {\r
b602265d
DG
1667 r = compile_tree(NODE_CAR(node), reg, env);\r
1668 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1669 break;\r
1670\r
b602265d 1671 case NODE_ALT:\r
14b0e578
CS
1672 {\r
1673 Node* x = node;\r
1674 len = 0;\r
1675 do {\r
b602265d
DG
1676 len += compile_length_tree(NODE_CAR(x), reg);\r
1677 if (IS_NOT_NULL(NODE_CDR(x))) {\r
1678 len += SIZE_OP_PUSH + SIZE_OP_JUMP;\r
1679 }\r
1680 } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
14b0e578
CS
1681 pos = reg->used + len; /* goal position */\r
1682\r
1683 do {\r
b602265d
DG
1684 len = compile_length_tree(NODE_CAR(node), reg);\r
1685 if (IS_NOT_NULL(NODE_CDR(node))) {\r
1686 enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;\r
1687 r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);\r
1688 if (r != 0) break;\r
1689 }\r
1690 r = compile_tree(NODE_CAR(node), reg, env);\r
1691 if (r != 0) break;\r
1692 if (IS_NOT_NULL(NODE_CDR(node))) {\r
1693 len = pos - (reg->used + SIZE_OP_JUMP);\r
1694 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1695 if (r != 0) break;\r
1696 }\r
1697 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
1698 }\r
1699 break;\r
1700\r
1701 case NODE_STRING:\r
1702 if (NODE_STRING_IS_RAW(node))\r
1703 r = compile_string_raw_node(STR_(node), reg);\r
14b0e578
CS
1704 else\r
1705 r = compile_string_node(node, reg);\r
1706 break;\r
1707\r
b602265d
DG
1708 case NODE_CCLASS:\r
1709 r = compile_cclass_node(CCLASS_(node), reg);\r
14b0e578
CS
1710 break;\r
1711\r
b602265d 1712 case NODE_CTYPE:\r
14b0e578
CS
1713 {\r
1714 int op;\r
1715\r
b602265d
DG
1716 switch (CTYPE_(node)->ctype) {\r
1717 case CTYPE_ANYCHAR:\r
1718 if (IS_MULTILINE(CTYPE_OPTION(node, reg)))\r
1719 r = add_opcode(reg, OP_ANYCHAR_ML);\r
1720 else\r
1721 r = add_opcode(reg, OP_ANYCHAR);\r
1722 break;\r
1723\r
14b0e578 1724 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
1725 if (CTYPE_(node)->ascii_mode == 0) {\r
1726 op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;\r
1727 }\r
1728 else {\r
1729 op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;\r
1730 }\r
1731 r = add_opcode(reg, op);\r
1732 break;\r
1733\r
14b0e578 1734 default:\r
b602265d
DG
1735 return ONIGERR_TYPE_BUG;\r
1736 break;\r
14b0e578 1737 }\r
14b0e578
CS
1738 }\r
1739 break;\r
1740\r
b602265d 1741 case NODE_BACKREF:\r
14b0e578 1742 {\r
b602265d 1743 BackRefNode* br = BACKREF_(node);\r
14b0e578 1744\r
b602265d 1745 if (NODE_IS_CHECKER(node)) {\r
14b0e578 1746#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
1747 if (NODE_IS_NEST_LEVEL(node)) {\r
1748 r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);\r
1749 if (r != 0) return r;\r
1750 r = add_length(reg, br->nest_level);\r
1751 if (r != 0) return r;\r
1752 }\r
1753 else\r
14b0e578 1754#endif\r
b602265d
DG
1755 {\r
1756 r = add_opcode(reg, OP_BACKREF_CHECK);\r
1757 if (r != 0) return r;\r
1758 }\r
1759\r
1760 goto add_bacref_mems;\r
14b0e578
CS
1761 }\r
1762 else {\r
b602265d
DG
1763#ifdef USE_BACKREF_WITH_LEVEL\r
1764 if (NODE_IS_NEST_LEVEL(node)) {\r
1765 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);\r
1766 if (r != 0) return r;\r
1767 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));\r
1768 if (r != 0) return r;\r
1769 r = add_length(reg, br->nest_level);\r
1770 if (r != 0) return r;\r
1771\r
1772 goto add_bacref_mems;\r
14b0e578 1773 }\r
b602265d
DG
1774 else\r
1775#endif\r
1776 if (br->back_num == 1) {\r
1777 n = br->back_static[0];\r
1778 if (IS_IGNORECASE(reg->options)) {\r
1779 r = add_opcode(reg, OP_BACKREF_N_IC);\r
1780 if (r != 0) return r;\r
1781 r = add_mem_num(reg, n);\r
1782 }\r
1783 else {\r
1784 switch (n) {\r
1785 case 1: r = add_opcode(reg, OP_BACKREF1); break;\r
1786 case 2: r = add_opcode(reg, OP_BACKREF2); break;\r
1787 default:\r
1788 r = add_opcode(reg, OP_BACKREF_N);\r
1789 if (r != 0) return r;\r
1790 r = add_mem_num(reg, n);\r
1791 break;\r
1792 }\r
1793 }\r
14b0e578 1794 }\r
b602265d
DG
1795 else {\r
1796 int i;\r
1797 int* p;\r
14b0e578 1798\r
b602265d
DG
1799 if (IS_IGNORECASE(reg->options)) {\r
1800 r = add_opcode(reg, OP_BACKREF_MULTI_IC);\r
1801 }\r
1802 else {\r
1803 r = add_opcode(reg, OP_BACKREF_MULTI);\r
1804 }\r
1805 if (r != 0) return r;\r
1806\r
1807 add_bacref_mems:\r
1808 r = add_length(reg, br->back_num);\r
1809 if (r != 0) return r;\r
1810 p = BACKREFS_P(br);\r
1811 for (i = br->back_num - 1; i >= 0; i--) {\r
1812 r = add_mem_num(reg, p[i]);\r
1813 if (r != 0) return r;\r
1814 }\r
1815 }\r
14b0e578
CS
1816 }\r
1817 }\r
1818 break;\r
1819\r
b602265d
DG
1820#ifdef USE_CALL\r
1821 case NODE_CALL:\r
1822 r = compile_call(CALL_(node), reg, env);\r
14b0e578
CS
1823 break;\r
1824#endif\r
1825\r
b602265d
DG
1826 case NODE_QUANT:\r
1827 r = compile_quantifier_node(QUANT_(node), reg, env);\r
14b0e578
CS
1828 break;\r
1829\r
b602265d
DG
1830 case NODE_ENCLOSURE:\r
1831 r = compile_enclosure_node(ENCLOSURE_(node), reg, env);\r
14b0e578
CS
1832 break;\r
1833\r
b602265d
DG
1834 case NODE_ANCHOR:\r
1835 r = compile_anchor_node(ANCHOR_(node), reg, env);\r
1836 break;\r
1837\r
1838 case NODE_GIMMICK:\r
1839 r = compile_gimmick_node(GIMMICK_(node), reg);\r
14b0e578
CS
1840 break;\r
1841\r
1842 default:\r
1843#ifdef ONIG_DEBUG\r
b602265d 1844 fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
1845#endif\r
1846 break;\r
1847 }\r
1848\r
1849 return r;\r
1850}\r
1851\r
14b0e578
CS
1852static int\r
1853noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)\r
1854{\r
1855 int r = 0;\r
1856 Node* node = *plink;\r
1857\r
b602265d
DG
1858 switch (NODE_TYPE(node)) {\r
1859 case NODE_LIST:\r
1860 case NODE_ALT:\r
14b0e578 1861 do {\r
b602265d
DG
1862 r = noname_disable_map(&(NODE_CAR(node)), map, counter);\r
1863 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1864 break;\r
1865\r
b602265d 1866 case NODE_QUANT:\r
14b0e578 1867 {\r
b602265d 1868 Node** ptarget = &(NODE_BODY(node));\r
14b0e578
CS
1869 Node* old = *ptarget;\r
1870 r = noname_disable_map(ptarget, map, counter);\r
b602265d
DG
1871 if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {\r
1872 onig_reduce_nested_quantifier(node, *ptarget);\r
14b0e578
CS
1873 }\r
1874 }\r
1875 break;\r
1876\r
b602265d 1877 case NODE_ENCLOSURE:\r
14b0e578 1878 {\r
b602265d
DG
1879 EnclosureNode* en = ENCLOSURE_(node);\r
1880 if (en->type == ENCLOSURE_MEMORY) {\r
1881 if (NODE_IS_NAMED_GROUP(node)) {\r
1882 (*counter)++;\r
1883 map[en->m.regnum].new_val = *counter;\r
1884 en->m.regnum = *counter;\r
1885 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
1886 }\r
1887 else {\r
1888 *plink = NODE_BODY(node);\r
1889 NODE_BODY(node) = NULL_NODE;\r
1890 onig_node_free(node);\r
1891 r = noname_disable_map(plink, map, counter);\r
1892 }\r
1893 }\r
1894 else if (en->type == ENCLOSURE_IF_ELSE) {\r
1895 r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);\r
1896 if (r != 0) return r;\r
1897 if (IS_NOT_NULL(en->te.Then)) {\r
1898 r = noname_disable_map(&(en->te.Then), map, counter);\r
1899 if (r != 0) return r;\r
1900 }\r
1901 if (IS_NOT_NULL(en->te.Else)) {\r
1902 r = noname_disable_map(&(en->te.Else), map, counter);\r
1903 if (r != 0) return r;\r
1904 }\r
14b0e578
CS
1905 }\r
1906 else\r
b602265d 1907 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
14b0e578
CS
1908 }\r
1909 break;\r
1910\r
b602265d
DG
1911 case NODE_ANCHOR:\r
1912 if (IS_NOT_NULL(NODE_BODY(node)))\r
1913 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
1914 break;\r
1915\r
14b0e578
CS
1916 default:\r
1917 break;\r
1918 }\r
1919\r
1920 return r;\r
1921}\r
1922\r
1923static int\r
1924renumber_node_backref(Node* node, GroupNumRemap* map)\r
1925{\r
1926 int i, pos, n, old_num;\r
1927 int *backs;\r
b602265d 1928 BackRefNode* bn = BACKREF_(node);\r
14b0e578 1929\r
b602265d 1930 if (! NODE_IS_BY_NAME(node))\r
14b0e578
CS
1931 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
1932\r
1933 old_num = bn->back_num;\r
1934 if (IS_NULL(bn->back_dynamic))\r
1935 backs = bn->back_static;\r
1936 else\r
1937 backs = bn->back_dynamic;\r
1938\r
1939 for (i = 0, pos = 0; i < old_num; i++) {\r
1940 n = map[backs[i]].new_val;\r
1941 if (n > 0) {\r
1942 backs[pos] = n;\r
1943 pos++;\r
1944 }\r
1945 }\r
1946\r
1947 bn->back_num = pos;\r
1948 return 0;\r
1949}\r
1950\r
1951static int\r
1952renumber_by_map(Node* node, GroupNumRemap* map)\r
1953{\r
1954 int r = 0;\r
1955\r
b602265d
DG
1956 switch (NODE_TYPE(node)) {\r
1957 case NODE_LIST:\r
1958 case NODE_ALT:\r
14b0e578 1959 do {\r
b602265d
DG
1960 r = renumber_by_map(NODE_CAR(node), map);\r
1961 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578 1962 break;\r
b602265d
DG
1963\r
1964 case NODE_QUANT:\r
1965 r = renumber_by_map(NODE_BODY(node), map);\r
14b0e578 1966 break;\r
b602265d
DG
1967\r
1968 case NODE_ENCLOSURE:\r
1969 {\r
1970 EnclosureNode* en = ENCLOSURE_(node);\r
1971\r
1972 r = renumber_by_map(NODE_BODY(node), map);\r
1973 if (r != 0) return r;\r
1974\r
1975 if (en->type == ENCLOSURE_IF_ELSE) {\r
1976 if (IS_NOT_NULL(en->te.Then)) {\r
1977 r = renumber_by_map(en->te.Then, map);\r
1978 if (r != 0) return r;\r
1979 }\r
1980 if (IS_NOT_NULL(en->te.Else)) {\r
1981 r = renumber_by_map(en->te.Else, map);\r
1982 if (r != 0) return r;\r
1983 }\r
1984 }\r
1985 }\r
14b0e578
CS
1986 break;\r
1987\r
b602265d 1988 case NODE_BACKREF:\r
14b0e578
CS
1989 r = renumber_node_backref(node, map);\r
1990 break;\r
1991\r
b602265d
DG
1992 case NODE_ANCHOR:\r
1993 if (IS_NOT_NULL(NODE_BODY(node)))\r
1994 r = renumber_by_map(NODE_BODY(node), map);\r
1995 break;\r
1996\r
14b0e578
CS
1997 default:\r
1998 break;\r
1999 }\r
2000\r
2001 return r;\r
2002}\r
2003\r
2004static int\r
2005numbered_ref_check(Node* node)\r
2006{\r
2007 int r = 0;\r
2008\r
b602265d
DG
2009 switch (NODE_TYPE(node)) {\r
2010 case NODE_LIST:\r
2011 case NODE_ALT:\r
14b0e578 2012 do {\r
b602265d
DG
2013 r = numbered_ref_check(NODE_CAR(node));\r
2014 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578 2015 break;\r
b602265d
DG
2016\r
2017 case NODE_ANCHOR:\r
2018 if (IS_NULL(NODE_BODY(node)))\r
2019 break;\r
2020 /* fall */\r
2021 case NODE_QUANT:\r
2022 r = numbered_ref_check(NODE_BODY(node));\r
14b0e578 2023 break;\r
b602265d
DG
2024\r
2025 case NODE_ENCLOSURE:\r
2026 {\r
2027 EnclosureNode* en = ENCLOSURE_(node);\r
2028\r
2029 r = numbered_ref_check(NODE_BODY(node));\r
2030 if (r != 0) return r;\r
2031\r
2032 if (en->type == ENCLOSURE_IF_ELSE) {\r
2033 if (IS_NOT_NULL(en->te.Then)) {\r
2034 r = numbered_ref_check(en->te.Then);\r
2035 if (r != 0) return r;\r
2036 }\r
2037 if (IS_NOT_NULL(en->te.Else)) {\r
2038 r = numbered_ref_check(en->te.Else);\r
2039 if (r != 0) return r;\r
2040 }\r
2041 }\r
2042 }\r
2043\r
14b0e578
CS
2044 break;\r
2045\r
b602265d
DG
2046 case NODE_BACKREF:\r
2047 if (! NODE_IS_BY_NAME(node))\r
14b0e578
CS
2048 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
2049 break;\r
2050\r
2051 default:\r
2052 break;\r
2053 }\r
2054\r
2055 return r;\r
2056}\r
2057\r
2058static int\r
2059disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)\r
2060{\r
2061 int r, i, pos, counter;\r
b602265d
DG
2062 int result;\r
2063 MemStatusType loc;\r
14b0e578
CS
2064 GroupNumRemap* map;\r
2065\r
2066 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));\r
2067 CHECK_NULL_RETURN_MEMERR(map);\r
2068 for (i = 1; i <= env->num_mem; i++) {\r
2069 map[i].new_val = 0;\r
2070 }\r
2071 counter = 0;\r
2072 r = noname_disable_map(root, map, &counter);\r
2073 if (r != 0) return r;\r
2074\r
2075 r = renumber_by_map(*root, map);\r
2076 if (r != 0) return r;\r
2077\r
2078 for (i = 1, pos = 1; i <= env->num_mem; i++) {\r
2079 if (map[i].new_val > 0) {\r
b602265d 2080 SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];\r
14b0e578
CS
2081 pos++;\r
2082 }\r
2083 }\r
2084\r
2085 loc = env->capture_history;\r
b602265d 2086 MEM_STATUS_CLEAR(env->capture_history);\r
14b0e578 2087 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {\r
b602265d
DG
2088 if (MEM_STATUS_AT(loc, i)) {\r
2089 MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);\r
14b0e578
CS
2090 }\r
2091 }\r
2092\r
2093 env->num_mem = env->num_named;\r
2094 reg->num_mem = env->num_named;\r
b602265d 2095 result = onig_renumber_name_table(reg, map);\r
14b0e578 2096 xfree(map);\r
b602265d 2097 return result;\r
14b0e578 2098}\r
14b0e578 2099\r
b602265d 2100#ifdef USE_CALL\r
14b0e578 2101static int\r
b602265d 2102fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)\r
14b0e578
CS
2103{\r
2104 int i, offset;\r
b602265d 2105 EnclosureNode* en;\r
14b0e578
CS
2106 AbsAddrType addr;\r
2107\r
2108 for (i = 0; i < uslist->num; i++) {\r
b602265d
DG
2109 if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))\r
2110 return ONIGERR_PARSER_BUG;\r
2111\r
2112 en = ENCLOSURE_(uslist->us[i].target);\r
2113 addr = en->m.called_addr;\r
14b0e578
CS
2114 offset = uslist->us[i].offset;\r
2115\r
b602265d 2116 BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);\r
14b0e578
CS
2117 }\r
2118 return 0;\r
2119}\r
2120#endif\r
2121\r
b602265d
DG
2122\r
2123#define GET_CHAR_LEN_VARLEN -1\r
2124#define GET_CHAR_LEN_TOP_ALT_VARLEN -2\r
2125\r
2126/* fixed size pattern node only */\r
14b0e578 2127static int\r
b602265d 2128get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)\r
14b0e578 2129{\r
b602265d 2130 int tlen;\r
14b0e578
CS
2131 int r = 0;\r
2132\r
b602265d
DG
2133 level++;\r
2134 *len = 0;\r
2135 switch (NODE_TYPE(node)) {\r
2136 case NODE_LIST:\r
2137 do {\r
2138 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);\r
2139 if (r == 0)\r
2140 *len = distance_add(*len, tlen);\r
2141 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2142 break;\r
2143\r
b602265d
DG
2144 case NODE_ALT:\r
2145 {\r
2146 int tlen2;\r
2147 int varlen = 0;\r
2148\r
2149 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);\r
2150 while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {\r
2151 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);\r
2152 if (r == 0) {\r
2153 if (tlen != tlen2)\r
2154 varlen = 1;\r
2155 }\r
2156 }\r
2157 if (r == 0) {\r
2158 if (varlen != 0) {\r
2159 if (level == 1)\r
2160 r = GET_CHAR_LEN_TOP_ALT_VARLEN;\r
2161 else\r
2162 r = GET_CHAR_LEN_VARLEN;\r
2163 }\r
2164 else\r
2165 *len = tlen;\r
2166 }\r
14b0e578 2167 }\r
14b0e578 2168 break;\r
14b0e578 2169\r
b602265d 2170 case NODE_STRING:\r
14b0e578 2171 {\r
b602265d
DG
2172 StrNode* sn = STR_(node);\r
2173 UChar *s = sn->s;\r
2174\r
2175 while (s < sn->end) {\r
2176 s += enclen(reg->enc, s);\r
2177 (*len)++;\r
14b0e578
CS
2178 }\r
2179 }\r
2180 break;\r
2181\r
b602265d 2182 case NODE_QUANT:\r
14b0e578 2183 {\r
b602265d 2184 QuantNode* qn = QUANT_(node);\r
14b0e578 2185\r
14b0e578 2186 if (qn->lower == qn->upper) {\r
b602265d
DG
2187 if (qn->upper == 0) {\r
2188 *len = 0;\r
2189 }\r
2190 else {\r
2191 r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);\r
2192 if (r == 0)\r
2193 *len = distance_multiply(tlen, qn->lower);\r
2194 }\r
14b0e578
CS
2195 }\r
2196 else\r
b602265d 2197 r = GET_CHAR_LEN_VARLEN;\r
14b0e578
CS
2198 }\r
2199 break;\r
2200\r
b602265d
DG
2201#ifdef USE_CALL\r
2202 case NODE_CALL:\r
2203 if (! NODE_IS_RECURSION(node))\r
2204 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
14b0e578
CS
2205 else\r
2206 r = GET_CHAR_LEN_VARLEN;\r
2207 break;\r
2208#endif\r
2209\r
b602265d
DG
2210 case NODE_CTYPE:\r
2211 case NODE_CCLASS:\r
14b0e578
CS
2212 *len = 1;\r
2213 break;\r
2214\r
b602265d 2215 case NODE_ENCLOSURE:\r
14b0e578 2216 {\r
b602265d
DG
2217 EnclosureNode* en = ENCLOSURE_(node);\r
2218\r
14b0e578 2219 switch (en->type) {\r
b602265d
DG
2220 case ENCLOSURE_MEMORY:\r
2221#ifdef USE_CALL\r
2222 if (NODE_IS_CLEN_FIXED(node))\r
2223 *len = en->char_len;\r
2224 else {\r
2225 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
2226 if (r == 0) {\r
2227 en->char_len = *len;\r
2228 NODE_STATUS_ADD(node, CLEN_FIXED);\r
2229 }\r
2230 }\r
2231 break;\r
14b0e578 2232#endif\r
b602265d
DG
2233 case ENCLOSURE_OPTION:\r
2234 case ENCLOSURE_STOP_BACKTRACK:\r
2235 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
2236 break;\r
2237 case ENCLOSURE_IF_ELSE:\r
2238 {\r
2239 int clen, elen;\r
2240\r
2241 r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);\r
2242 if (r == 0) {\r
2243 if (IS_NOT_NULL(en->te.Then)) {\r
2244 r = get_char_length_tree1(en->te.Then, reg, &tlen, level);\r
2245 if (r != 0) break;\r
2246 }\r
2247 else tlen = 0;\r
2248 if (IS_NOT_NULL(en->te.Else)) {\r
2249 r = get_char_length_tree1(en->te.Else, reg, &elen, level);\r
2250 if (r != 0) break;\r
2251 }\r
2252 else elen = 0;\r
2253\r
2254 if (clen + tlen != elen) {\r
2255 r = GET_CHAR_LEN_VARLEN;\r
2256 }\r
2257 else {\r
2258 *len = elen;\r
2259 }\r
2260 }\r
2261 }\r
2262 break;\r
2263\r
14b0e578 2264 default:\r
b602265d 2265 break;\r
14b0e578
CS
2266 }\r
2267 }\r
2268 break;\r
2269\r
b602265d
DG
2270 case NODE_ANCHOR:\r
2271 case NODE_GIMMICK:\r
14b0e578
CS
2272 break;\r
2273\r
b602265d
DG
2274 case NODE_BACKREF:\r
2275 if (NODE_IS_CHECKER(node))\r
2276 break;\r
2277 /* fall */\r
14b0e578
CS
2278 default:\r
2279 r = GET_CHAR_LEN_VARLEN;\r
2280 break;\r
2281 }\r
2282\r
2283 return r;\r
2284}\r
2285\r
2286static int\r
2287get_char_length_tree(Node* node, regex_t* reg, int* len)\r
2288{\r
2289 return get_char_length_tree1(node, reg, len, 0);\r
2290}\r
2291\r
2292/* x is not included y ==> 1 : 0 */\r
2293static int\r
b602265d 2294is_exclusive(Node* x, Node* y, regex_t* reg)\r
14b0e578
CS
2295{\r
2296 int i, len;\r
2297 OnigCodePoint code;\r
2298 UChar *p;\r
b602265d 2299 NodeType ytype;\r
14b0e578
CS
2300\r
2301 retry:\r
b602265d
DG
2302 ytype = NODE_TYPE(y);\r
2303 switch (NODE_TYPE(x)) {\r
2304 case NODE_CTYPE:\r
14b0e578 2305 {\r
b602265d
DG
2306 if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||\r
2307 CTYPE_(y)->ctype == CTYPE_ANYCHAR)\r
2308 break;\r
2309\r
14b0e578 2310 switch (ytype) {\r
b602265d
DG
2311 case NODE_CTYPE:\r
2312 if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&\r
2313 CTYPE_(y)->not != CTYPE_(x)->not &&\r
2314 CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)\r
2315 return 1;\r
2316 else\r
2317 return 0;\r
2318 break;\r
2319\r
2320 case NODE_CCLASS:\r
14b0e578 2321 swap:\r
b602265d
DG
2322 {\r
2323 Node* tmp;\r
2324 tmp = x; x = y; y = tmp;\r
2325 goto retry;\r
2326 }\r
2327 break;\r
14b0e578 2328\r
b602265d
DG
2329 case NODE_STRING:\r
2330 goto swap;\r
2331 break;\r
14b0e578
CS
2332\r
2333 default:\r
b602265d 2334 break;\r
14b0e578
CS
2335 }\r
2336 }\r
2337 break;\r
2338\r
b602265d 2339 case NODE_CCLASS:\r
14b0e578 2340 {\r
b602265d
DG
2341 int range;\r
2342 CClassNode* xc = CCLASS_(x);\r
2343\r
14b0e578 2344 switch (ytype) {\r
b602265d
DG
2345 case NODE_CTYPE:\r
2346 switch (CTYPE_(y)->ctype) {\r
2347 case CTYPE_ANYCHAR:\r
2348 return 0;\r
2349 break;\r
2350\r
2351 case ONIGENC_CTYPE_WORD:\r
2352 if (CTYPE_(y)->not == 0) {\r
2353 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {\r
2354 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
2355 for (i = 0; i < range; i++) {\r
2356 if (BITSET_AT(xc->bs, i)) {\r
2357 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;\r
2358 }\r
2359 }\r
2360 return 1;\r
2361 }\r
2362 return 0;\r
2363 }\r
2364 else {\r
2365 if (IS_NOT_NULL(xc->mbuf)) return 0;\r
2366 if (IS_NCCLASS_NOT(xc)) return 0;\r
2367\r
2368 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
2369 for (i = 0; i < range; i++) {\r
2370 if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {\r
2371 if (BITSET_AT(xc->bs, i))\r
2372 return 0;\r
2373 }\r
2374 }\r
2375 for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
2376 if (BITSET_AT(xc->bs, i)) return 0;\r
2377 }\r
2378 return 1;\r
2379 }\r
2380 break;\r
2381\r
2382 default:\r
2383 break;\r
2384 }\r
2385 break;\r
2386\r
2387 case NODE_CCLASS:\r
2388 {\r
2389 int v;\r
2390 CClassNode* yc = CCLASS_(y);\r
2391\r
2392 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2393 v = BITSET_AT(xc->bs, i);\r
2394 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {\r
2395 v = BITSET_AT(yc->bs, i);\r
2396 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||\r
14b0e578 2397 (v == 0 && IS_NCCLASS_NOT(yc)))\r
b602265d
DG
2398 return 0;\r
2399 }\r
2400 }\r
2401 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||\r
2402 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))\r
2403 return 1;\r
2404 return 0;\r
2405 }\r
2406 break;\r
2407\r
2408 case NODE_STRING:\r
2409 goto swap;\r
2410 break;\r
14b0e578
CS
2411\r
2412 default:\r
b602265d 2413 break;\r
14b0e578
CS
2414 }\r
2415 }\r
2416 break;\r
2417\r
b602265d 2418 case NODE_STRING:\r
14b0e578 2419 {\r
b602265d
DG
2420 StrNode* xs = STR_(x);\r
2421\r
2422 if (NODE_STRING_LEN(x) == 0)\r
2423 break;\r
14b0e578 2424\r
14b0e578 2425 switch (ytype) {\r
b602265d
DG
2426 case NODE_CTYPE:\r
2427 switch (CTYPE_(y)->ctype) {\r
2428 case CTYPE_ANYCHAR:\r
2429 break;\r
2430\r
14b0e578 2431 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
2432 if (CTYPE_(y)->ascii_mode == 0) {\r
2433 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))\r
2434 return CTYPE_(y)->not;\r
2435 else\r
2436 return !(CTYPE_(y)->not);\r
2437 }\r
2438 else {\r
2439 if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))\r
2440 return CTYPE_(y)->not;\r
2441 else\r
2442 return !(CTYPE_(y)->not);\r
2443 }\r
14b0e578
CS
2444 break;\r
2445 default:\r
2446 break;\r
2447 }\r
2448 break;\r
2449\r
b602265d 2450 case NODE_CCLASS:\r
14b0e578 2451 {\r
b602265d 2452 CClassNode* cc = CCLASS_(y);\r
14b0e578
CS
2453\r
2454 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,\r
2455 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));\r
2456 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);\r
2457 }\r
2458 break;\r
2459\r
b602265d 2460 case NODE_STRING:\r
14b0e578
CS
2461 {\r
2462 UChar *q;\r
b602265d
DG
2463 StrNode* ys = STR_(y);\r
2464\r
2465 len = NODE_STRING_LEN(x);\r
2466 if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);\r
2467 if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {\r
14b0e578
CS
2468 /* tiny version */\r
2469 return 0;\r
2470 }\r
2471 else {\r
2472 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {\r
2473 if (*p != *q) return 1;\r
2474 }\r
2475 }\r
2476 }\r
2477 break;\r
b602265d 2478\r
14b0e578
CS
2479 default:\r
2480 break;\r
2481 }\r
2482 }\r
2483 break;\r
2484\r
2485 default:\r
2486 break;\r
2487 }\r
2488\r
2489 return 0;\r
2490}\r
2491\r
2492static Node*\r
2493get_head_value_node(Node* node, int exact, regex_t* reg)\r
2494{\r
2495 Node* n = NULL_NODE;\r
2496\r
b602265d
DG
2497 switch (NODE_TYPE(node)) {\r
2498 case NODE_BACKREF:\r
2499 case NODE_ALT:\r
2500#ifdef USE_CALL\r
2501 case NODE_CALL:\r
14b0e578
CS
2502#endif\r
2503 break;\r
2504\r
b602265d
DG
2505 case NODE_CTYPE:\r
2506 if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)\r
2507 break;\r
2508 /* fall */\r
2509 case NODE_CCLASS:\r
14b0e578
CS
2510 if (exact == 0) {\r
2511 n = node;\r
2512 }\r
2513 break;\r
2514\r
b602265d
DG
2515 case NODE_LIST:\r
2516 n = get_head_value_node(NODE_CAR(node), exact, reg);\r
14b0e578
CS
2517 break;\r
2518\r
b602265d 2519 case NODE_STRING:\r
14b0e578 2520 {\r
b602265d 2521 StrNode* sn = STR_(node);\r
14b0e578
CS
2522\r
2523 if (sn->end <= sn->s)\r
b602265d 2524 break;\r
14b0e578
CS
2525\r
2526 if (exact != 0 &&\r
b602265d 2527 !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {\r
14b0e578
CS
2528 }\r
2529 else {\r
b602265d 2530 n = node;\r
14b0e578
CS
2531 }\r
2532 }\r
2533 break;\r
2534\r
b602265d 2535 case NODE_QUANT:\r
14b0e578 2536 {\r
b602265d 2537 QuantNode* qn = QUANT_(node);\r
14b0e578 2538 if (qn->lower > 0) {\r
b602265d
DG
2539 if (IS_NOT_NULL(qn->head_exact))\r
2540 n = qn->head_exact;\r
2541 else\r
2542 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
14b0e578
CS
2543 }\r
2544 }\r
2545 break;\r
2546\r
b602265d 2547 case NODE_ENCLOSURE:\r
14b0e578 2548 {\r
b602265d 2549 EnclosureNode* en = ENCLOSURE_(node);\r
14b0e578 2550 switch (en->type) {\r
b602265d
DG
2551 case ENCLOSURE_OPTION:\r
2552 {\r
2553 OnigOptionType options = reg->options;\r
2554\r
2555 reg->options = ENCLOSURE_(node)->o.options;\r
2556 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
2557 reg->options = options;\r
2558 }\r
2559 break;\r
2560\r
2561 case ENCLOSURE_MEMORY:\r
2562 case ENCLOSURE_STOP_BACKTRACK:\r
2563 case ENCLOSURE_IF_ELSE:\r
2564 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
2565 break;\r
14b0e578
CS
2566 }\r
2567 }\r
2568 break;\r
2569\r
b602265d
DG
2570 case NODE_ANCHOR:\r
2571 if (ANCHOR_(node)->type == ANCHOR_PREC_READ)\r
2572 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
14b0e578
CS
2573 break;\r
2574\r
b602265d 2575 case NODE_GIMMICK:\r
14b0e578
CS
2576 default:\r
2577 break;\r
2578 }\r
2579\r
2580 return n;\r
2581}\r
2582\r
2583static int\r
b602265d 2584check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)\r
14b0e578 2585{\r
b602265d
DG
2586 NodeType type;\r
2587 int r = 0;\r
14b0e578 2588\r
b602265d
DG
2589 type = NODE_TYPE(node);\r
2590 if ((NODE_TYPE2BIT(type) & type_mask) == 0)\r
14b0e578
CS
2591 return 1;\r
2592\r
2593 switch (type) {\r
b602265d
DG
2594 case NODE_LIST:\r
2595 case NODE_ALT:\r
14b0e578 2596 do {\r
b602265d
DG
2597 r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,\r
2598 anchor_mask);\r
2599 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2600 break;\r
2601\r
b602265d
DG
2602 case NODE_QUANT:\r
2603 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
14b0e578
CS
2604 break;\r
2605\r
b602265d 2606 case NODE_ENCLOSURE:\r
14b0e578 2607 {\r
b602265d
DG
2608 EnclosureNode* en = ENCLOSURE_(node);\r
2609 if (((1<<en->type) & enclosure_mask) == 0)\r
2610 return 1;\r
2611\r
2612 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
2613 if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {\r
2614 if (IS_NOT_NULL(en->te.Then)) {\r
2615 r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);\r
2616 if (r != 0) break;\r
2617 }\r
2618 if (IS_NOT_NULL(en->te.Else)) {\r
2619 r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);\r
2620 }\r
2621 }\r
14b0e578
CS
2622 }\r
2623 break;\r
2624\r
b602265d
DG
2625 case NODE_ANCHOR:\r
2626 type = ANCHOR_(node)->type;\r
14b0e578
CS
2627 if ((type & anchor_mask) == 0)\r
2628 return 1;\r
2629\r
b602265d
DG
2630 if (IS_NOT_NULL(NODE_BODY(node)))\r
2631 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
14b0e578
CS
2632 break;\r
2633\r
b602265d 2634 case NODE_GIMMICK:\r
14b0e578
CS
2635 default:\r
2636 break;\r
2637 }\r
2638 return r;\r
2639}\r
2640\r
b602265d
DG
2641static OnigLen\r
2642tree_min_len(Node* node, ScanEnv* env)\r
14b0e578 2643{\r
b602265d
DG
2644 OnigLen len;\r
2645 OnigLen tmin;\r
14b0e578 2646\r
b602265d
DG
2647 len = 0;\r
2648 switch (NODE_TYPE(node)) {\r
2649 case NODE_BACKREF:\r
2650 if (! NODE_IS_CHECKER(node)) {\r
2651 int i;\r
2652 int* backs;\r
2653 MemEnv* mem_env = SCANENV_MEMENV(env);\r
2654 BackRefNode* br = BACKREF_(node);\r
2655 if (NODE_IS_RECURSION(node)) break;\r
14b0e578 2656\r
b602265d
DG
2657 backs = BACKREFS_P(br);\r
2658 len = tree_min_len(mem_env[backs[0]].node, env);\r
2659 for (i = 1; i < br->back_num; i++) {\r
2660 tmin = tree_min_len(mem_env[backs[i]].node, env);\r
2661 if (len > tmin) len = tmin;\r
2662 }\r
14b0e578
CS
2663 }\r
2664 break;\r
2665\r
b602265d
DG
2666#ifdef USE_CALL\r
2667 case NODE_CALL:\r
14b0e578 2668 {\r
b602265d
DG
2669 Node* t = NODE_BODY(node);\r
2670 if (NODE_IS_RECURSION(node)) {\r
2671 if (NODE_IS_MIN_FIXED(t))\r
2672 len = ENCLOSURE_(t)->min_len;\r
2673 }\r
2674 else\r
2675 len = tree_min_len(t, env);\r
2676 }\r
2677 break;\r
2678#endif\r
2679\r
2680 case NODE_LIST:\r
2681 do {\r
2682 tmin = tree_min_len(NODE_CAR(node), env);\r
2683 len = distance_add(len, tmin);\r
2684 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
2685 break;\r
2686\r
2687 case NODE_ALT:\r
2688 {\r
2689 Node *x, *y;\r
2690 y = node;\r
14b0e578 2691 do {\r
b602265d
DG
2692 x = NODE_CAR(y);\r
2693 tmin = tree_min_len(x, env);\r
2694 if (y == node) len = tmin;\r
2695 else if (len > tmin) len = tmin;\r
2696 } while (IS_NOT_NULL(y = NODE_CDR(y)));\r
14b0e578
CS
2697 }\r
2698 break;\r
2699\r
b602265d
DG
2700 case NODE_STRING:\r
2701 {\r
2702 StrNode* sn = STR_(node);\r
2703 len = (int )(sn->end - sn->s);\r
14b0e578
CS
2704 }\r
2705 break;\r
2706\r
b602265d
DG
2707 case NODE_CTYPE:\r
2708 case NODE_CCLASS:\r
2709 len = ONIGENC_MBC_MINLEN(env->enc);\r
2710 break;\r
2711\r
2712 case NODE_QUANT:\r
14b0e578 2713 {\r
b602265d
DG
2714 QuantNode* qn = QUANT_(node);\r
2715\r
2716 if (qn->lower > 0) {\r
2717 len = tree_min_len(NODE_BODY(node), env);\r
2718 len = distance_multiply(len, qn->lower);\r
14b0e578
CS
2719 }\r
2720 }\r
2721 break;\r
2722\r
b602265d
DG
2723 case NODE_ENCLOSURE:\r
2724 {\r
2725 EnclosureNode* en = ENCLOSURE_(node);\r
2726 switch (en->type) {\r
2727 case ENCLOSURE_MEMORY:\r
2728 if (NODE_IS_MIN_FIXED(node))\r
2729 len = en->min_len;\r
2730 else {\r
2731 if (NODE_IS_MARK1(node))\r
2732 len = 0; /* recursive */\r
2733 else {\r
2734 NODE_STATUS_ADD(node, MARK1);\r
2735 len = tree_min_len(NODE_BODY(node), env);\r
2736 NODE_STATUS_REMOVE(node, MARK1);\r
14b0e578 2737\r
b602265d
DG
2738 en->min_len = len;\r
2739 NODE_STATUS_ADD(node, MIN_FIXED);\r
2740 }\r
2741 }\r
2742 break;\r
2743\r
2744 case ENCLOSURE_OPTION:\r
2745 case ENCLOSURE_STOP_BACKTRACK:\r
2746 len = tree_min_len(NODE_BODY(node), env);\r
2747 break;\r
2748 case ENCLOSURE_IF_ELSE:\r
2749 {\r
2750 OnigLen elen;\r
2751\r
2752 len = tree_min_len(NODE_BODY(node), env);\r
2753 if (IS_NOT_NULL(en->te.Then))\r
2754 len += tree_min_len(en->te.Then, env);\r
2755 if (IS_NOT_NULL(en->te.Else))\r
2756 elen = tree_min_len(en->te.Else, env);\r
2757 else elen = 0;\r
2758\r
2759 if (elen < len) len = elen;\r
2760 }\r
2761 break;\r
2762 }\r
14b0e578
CS
2763 }\r
2764 break;\r
2765\r
b602265d
DG
2766 case NODE_GIMMICK:\r
2767 {\r
2768 GimmickNode* g = GIMMICK_(node);\r
2769 if (g->type == GIMMICK_FAIL) {\r
2770 len = INFINITE_LEN;\r
2771 break;\r
2772 }\r
2773 }\r
2774 /* fall */\r
2775 case NODE_ANCHOR:\r
14b0e578
CS
2776 default:\r
2777 break;\r
2778 }\r
2779\r
b602265d 2780 return len;\r
14b0e578
CS
2781}\r
2782\r
b602265d
DG
2783static OnigLen\r
2784tree_max_len(Node* node, ScanEnv* env)\r
14b0e578 2785{\r
b602265d
DG
2786 OnigLen len;\r
2787 OnigLen tmax;\r
14b0e578 2788\r
b602265d
DG
2789 len = 0;\r
2790 switch (NODE_TYPE(node)) {\r
2791 case NODE_LIST:\r
14b0e578 2792 do {\r
b602265d
DG
2793 tmax = tree_max_len(NODE_CAR(node), env);\r
2794 len = distance_add(len, tmax);\r
2795 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2796 break;\r
2797\r
b602265d
DG
2798 case NODE_ALT:\r
2799 do {\r
2800 tmax = tree_max_len(NODE_CAR(node), env);\r
2801 if (len < tmax) len = tmax;\r
2802 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2803 break;\r
2804\r
b602265d 2805 case NODE_STRING:\r
14b0e578 2806 {\r
b602265d
DG
2807 StrNode* sn = STR_(node);\r
2808 len = (OnigLen )(sn->end - sn->s);\r
2809 }\r
2810 break;\r
2811\r
2812 case NODE_CTYPE:\r
2813 case NODE_CCLASS:\r
2814 len = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
2815 break;\r
2816\r
2817 case NODE_BACKREF:\r
2818 if (! NODE_IS_CHECKER(node)) {\r
2819 int i;\r
2820 int* backs;\r
2821 MemEnv* mem_env = SCANENV_MEMENV(env);\r
2822 BackRefNode* br = BACKREF_(node);\r
2823 if (NODE_IS_RECURSION(node)) {\r
2824 len = INFINITE_LEN;\r
2825 break;\r
2826 }\r
2827 backs = BACKREFS_P(br);\r
2828 for (i = 0; i < br->back_num; i++) {\r
2829 tmax = tree_max_len(mem_env[backs[i]].node, env);\r
2830 if (len < tmax) len = tmax;\r
14b0e578
CS
2831 }\r
2832 }\r
2833 break;\r
2834\r
b602265d
DG
2835#ifdef USE_CALL\r
2836 case NODE_CALL:\r
2837 if (! NODE_IS_RECURSION(node))\r
2838 len = tree_max_len(NODE_BODY(node), env);\r
2839 else\r
2840 len = INFINITE_LEN;\r
2841 break;\r
2842#endif\r
2843\r
2844 case NODE_QUANT:\r
14b0e578 2845 {\r
b602265d 2846 QuantNode* qn = QUANT_(node);\r
14b0e578 2847\r
b602265d
DG
2848 if (qn->upper != 0) {\r
2849 len = tree_max_len(NODE_BODY(node), env);\r
2850 if (len != 0) {\r
2851 if (! IS_REPEAT_INFINITE(qn->upper))\r
2852 len = distance_multiply(len, qn->upper);\r
2853 else\r
2854 len = INFINITE_LEN;\r
2855 }\r
14b0e578 2856 }\r
14b0e578 2857 }\r
b602265d
DG
2858 break;\r
2859\r
2860 case NODE_ENCLOSURE:\r
2861 {\r
2862 EnclosureNode* en = ENCLOSURE_(node);\r
2863 switch (en->type) {\r
2864 case ENCLOSURE_MEMORY:\r
2865 if (NODE_IS_MAX_FIXED(node))\r
2866 len = en->max_len;\r
2867 else {\r
2868 if (NODE_IS_MARK1(node))\r
2869 len = INFINITE_LEN;\r
2870 else {\r
2871 NODE_STATUS_ADD(node, MARK1);\r
2872 len = tree_max_len(NODE_BODY(node), env);\r
2873 NODE_STATUS_REMOVE(node, MARK1);\r
2874\r
2875 en->max_len = len;\r
2876 NODE_STATUS_ADD(node, MAX_FIXED);\r
2877 }\r
2878 }\r
2879 break;\r
2880\r
2881 case ENCLOSURE_OPTION:\r
2882 case ENCLOSURE_STOP_BACKTRACK:\r
2883 len = tree_max_len(NODE_BODY(node), env);\r
2884 break;\r
2885 case ENCLOSURE_IF_ELSE:\r
2886 {\r
2887 OnigLen tlen, elen;\r
2888\r
2889 len = tree_max_len(NODE_BODY(node), env);\r
2890 if (IS_NOT_NULL(en->te.Then)) {\r
2891 tlen = tree_max_len(en->te.Then, env);\r
2892 len = distance_add(len, tlen);\r
2893 }\r
2894 if (IS_NOT_NULL(en->te.Else))\r
2895 elen = tree_max_len(en->te.Else, env);\r
2896 else elen = 0;\r
14b0e578 2897\r
b602265d
DG
2898 if (elen > len) len = elen;\r
2899 }\r
2900 break;\r
2901 }\r
2902 }\r
14b0e578
CS
2903 break;\r
2904\r
b602265d
DG
2905 case NODE_ANCHOR:\r
2906 case NODE_GIMMICK:\r
14b0e578
CS
2907 default:\r
2908 break;\r
2909 }\r
2910\r
b602265d 2911 return len;\r
14b0e578
CS
2912}\r
2913\r
2914static int\r
b602265d 2915check_backrefs(Node* node, ScanEnv* env)\r
14b0e578 2916{\r
b602265d 2917 int r;\r
14b0e578 2918\r
b602265d
DG
2919 switch (NODE_TYPE(node)) {\r
2920 case NODE_LIST:\r
2921 case NODE_ALT:\r
14b0e578 2922 do {\r
b602265d
DG
2923 r = check_backrefs(NODE_CAR(node), env);\r
2924 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2925 break;\r
2926\r
b602265d
DG
2927 case NODE_ANCHOR:\r
2928 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
2929 r = 0;\r
2930 break;\r
2931 }\r
2932 /* fall */\r
2933 case NODE_QUANT:\r
2934 r = check_backrefs(NODE_BODY(node), env);\r
14b0e578
CS
2935 break;\r
2936\r
b602265d
DG
2937 case NODE_ENCLOSURE:\r
2938 r = check_backrefs(NODE_BODY(node), env);\r
14b0e578 2939 {\r
b602265d
DG
2940 EnclosureNode* en = ENCLOSURE_(node);\r
2941\r
2942 if (en->type == ENCLOSURE_IF_ELSE) {\r
2943 if (r != 0) return r;\r
2944 if (IS_NOT_NULL(en->te.Then)) {\r
2945 r = check_backrefs(en->te.Then, env);\r
2946 if (r != 0) return r;\r
2947 }\r
2948 if (IS_NOT_NULL(en->te.Else)) {\r
2949 r = check_backrefs(en->te.Else, env);\r
2950 }\r
14b0e578
CS
2951 }\r
2952 }\r
2953 break;\r
2954\r
b602265d
DG
2955 case NODE_BACKREF:\r
2956 {\r
2957 int i;\r
2958 BackRefNode* br = BACKREF_(node);\r
2959 int* backs = BACKREFS_P(br);\r
2960 MemEnv* mem_env = SCANENV_MEMENV(env);\r
14b0e578 2961\r
b602265d
DG
2962 for (i = 0; i < br->back_num; i++) {\r
2963 if (backs[i] > env->num_mem)\r
2964 return ONIGERR_INVALID_BACKREF;\r
2965\r
2966 NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);\r
2967 }\r
2968 r = 0;\r
14b0e578
CS
2969 }\r
2970 break;\r
2971\r
2972 default:\r
b602265d 2973 r = 0;\r
14b0e578
CS
2974 break;\r
2975 }\r
2976\r
2977 return r;\r
2978}\r
2979\r
2980\r
b602265d
DG
2981#ifdef USE_CALL\r
2982\r
2983#define RECURSION_EXIST (1<<0)\r
2984#define RECURSION_MUST (1<<1)\r
2985#define RECURSION_INFINITE (1<<2)\r
2986\r
14b0e578 2987static int\r
b602265d 2988infinite_recursive_call_check(Node* node, ScanEnv* env, int head)\r
14b0e578 2989{\r
b602265d 2990 int ret;\r
14b0e578
CS
2991 int r = 0;\r
2992\r
b602265d
DG
2993 switch (NODE_TYPE(node)) {\r
2994 case NODE_LIST:\r
14b0e578 2995 {\r
b602265d
DG
2996 Node *x;\r
2997 OnigLen min;\r
2998\r
2999 x = node;\r
14b0e578 3000 do {\r
b602265d
DG
3001 ret = infinite_recursive_call_check(NODE_CAR(x), env, head);\r
3002 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3003 r |= ret;\r
3004 if (head != 0) {\r
3005 min = tree_min_len(NODE_CAR(x), env);\r
3006 if (min != 0) head = 0;\r
3007 }\r
3008 } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
14b0e578
CS
3009 }\r
3010 break;\r
3011\r
b602265d
DG
3012 case NODE_ALT:\r
3013 {\r
3014 int must;\r
3015\r
3016 must = RECURSION_MUST;\r
3017 do {\r
3018 ret = infinite_recursive_call_check(NODE_CAR(node), env, head);\r
3019 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3020\r
3021 r |= (ret & RECURSION_EXIST);\r
3022 must &= ret;\r
3023 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3024 r |= must;\r
14b0e578
CS
3025 }\r
3026 break;\r
3027\r
b602265d
DG
3028 case NODE_QUANT:\r
3029 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3030 if (r < 0) return r;\r
3031 if ((r & RECURSION_MUST) != 0) {\r
3032 if (QUANT_(node)->lower == 0)\r
3033 r &= ~RECURSION_MUST;\r
14b0e578
CS
3034 }\r
3035 break;\r
3036\r
b602265d
DG
3037 case NODE_ANCHOR:\r
3038 if (! ANCHOR_HAS_BODY(ANCHOR_(node)))\r
3039 break;\r
3040 /* fall */\r
3041 case NODE_CALL:\r
3042 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3043 break;\r
3044\r
3045 case NODE_ENCLOSURE:\r
14b0e578 3046 {\r
b602265d
DG
3047 EnclosureNode* en = ENCLOSURE_(node);\r
3048\r
3049 if (en->type == ENCLOSURE_MEMORY) {\r
3050 if (NODE_IS_MARK2(node))\r
3051 return 0;\r
3052 else if (NODE_IS_MARK1(node))\r
3053 return (head == 0 ? RECURSION_EXIST | RECURSION_MUST\r
3054 : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);\r
3055 else {\r
3056 NODE_STATUS_ADD(node, MARK2);\r
3057 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3058 NODE_STATUS_REMOVE(node, MARK2);\r
3059 }\r
3060 }\r
3061 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3062 int eret;\r
3063\r
3064 ret = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3065 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3066 r |= ret;\r
3067 if (IS_NOT_NULL(en->te.Then)) {\r
3068 OnigLen min;\r
3069 if (head != 0) {\r
3070 min = tree_min_len(NODE_BODY(node), env);\r
3071 }\r
3072 else min = 0;\r
3073\r
3074 ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);\r
3075 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3076 r |= ret;\r
3077 }\r
3078 if (IS_NOT_NULL(en->te.Else)) {\r
3079 eret = infinite_recursive_call_check(en->te.Else, env, head);\r
3080 if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;\r
3081 r |= (eret & RECURSION_EXIST);\r
3082 if ((eret & RECURSION_MUST) == 0)\r
3083 r &= ~RECURSION_MUST;\r
3084 }\r
3085 }\r
3086 else {\r
3087 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
14b0e578 3088 }\r
14b0e578
CS
3089 }\r
3090 break;\r
3091\r
3092 default:\r
3093 break;\r
3094 }\r
3095\r
3096 return r;\r
3097}\r
3098\r
3099static int\r
b602265d 3100infinite_recursive_call_check_trav(Node* node, ScanEnv* env)\r
14b0e578 3101{\r
b602265d 3102 int r;\r
14b0e578 3103\r
b602265d
DG
3104 switch (NODE_TYPE(node)) {\r
3105 case NODE_LIST:\r
3106 case NODE_ALT:\r
14b0e578 3107 do {\r
b602265d
DG
3108 r = infinite_recursive_call_check_trav(NODE_CAR(node), env);\r
3109 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
3110 break;\r
3111\r
3112 case NODE_ANCHOR:\r
3113 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
3114 r = 0;\r
3115 break;\r
3116 }\r
3117 /* fall */\r
3118 case NODE_QUANT:\r
3119 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
3120 break;\r
3121\r
3122 case NODE_ENCLOSURE:\r
3123 {\r
3124 EnclosureNode* en = ENCLOSURE_(node);\r
3125\r
3126 if (en->type == ENCLOSURE_MEMORY) {\r
3127 if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {\r
3128 int ret;\r
3129\r
3130 NODE_STATUS_ADD(node, MARK1);\r
3131\r
3132 ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);\r
3133 if (ret < 0) return ret;\r
3134 else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)\r
3135 return ONIGERR_NEVER_ENDING_RECURSION;\r
3136\r
3137 NODE_STATUS_REMOVE(node, MARK1);\r
3138 }\r
3139 }\r
3140 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3141 if (IS_NOT_NULL(en->te.Then)) {\r
3142 r = infinite_recursive_call_check_trav(en->te.Then, env);\r
3143 if (r != 0) return r;\r
3144 }\r
3145 if (IS_NOT_NULL(en->te.Else)) {\r
3146 r = infinite_recursive_call_check_trav(en->te.Else, env);\r
3147 if (r != 0) return r;\r
3148 }\r
3149 }\r
3150 }\r
3151\r
3152 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
3153 break;\r
3154\r
3155 default:\r
3156 r = 0;\r
14b0e578 3157 break;\r
b602265d
DG
3158 }\r
3159\r
3160 return r;\r
3161}\r
3162\r
3163static int\r
3164recursive_call_check(Node* node)\r
3165{\r
3166 int r;\r
14b0e578 3167\r
b602265d
DG
3168 switch (NODE_TYPE(node)) {\r
3169 case NODE_LIST:\r
3170 case NODE_ALT:\r
3171 r = 0;\r
14b0e578 3172 do {\r
b602265d
DG
3173 r |= recursive_call_check(NODE_CAR(node));\r
3174 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3175 break;\r
3176\r
b602265d
DG
3177 case NODE_ANCHOR:\r
3178 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
3179 r = 0;\r
3180 break;\r
3181 }\r
3182 /* fall */\r
3183 case NODE_QUANT:\r
3184 r = recursive_call_check(NODE_BODY(node));\r
14b0e578 3185 break;\r
b602265d
DG
3186\r
3187 case NODE_CALL:\r
3188 r = recursive_call_check(NODE_BODY(node));\r
3189 if (r != 0) {\r
3190 if (NODE_IS_MARK1(NODE_BODY(node)))\r
3191 NODE_STATUS_ADD(node, RECURSION);\r
3192 }\r
14b0e578
CS
3193 break;\r
3194\r
b602265d 3195 case NODE_ENCLOSURE:\r
14b0e578 3196 {\r
b602265d
DG
3197 EnclosureNode* en = ENCLOSURE_(node);\r
3198\r
3199 if (en->type == ENCLOSURE_MEMORY) {\r
3200 if (NODE_IS_MARK2(node))\r
3201 return 0;\r
3202 else if (NODE_IS_MARK1(node))\r
3203 return 1; /* recursion */\r
3204 else {\r
3205 NODE_STATUS_ADD(node, MARK2);\r
3206 r = recursive_call_check(NODE_BODY(node));\r
3207 NODE_STATUS_REMOVE(node, MARK2);\r
3208 }\r
3209 }\r
3210 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3211 r = 0;\r
3212 if (IS_NOT_NULL(en->te.Then)) {\r
3213 r |= recursive_call_check(en->te.Then);\r
3214 }\r
3215 if (IS_NOT_NULL(en->te.Else)) {\r
3216 r |= recursive_call_check(en->te.Else);\r
3217 }\r
3218 r |= recursive_call_check(NODE_BODY(node));\r
14b0e578 3219 }\r
14b0e578 3220 else {\r
b602265d 3221 r = recursive_call_check(NODE_BODY(node));\r
14b0e578 3222 }\r
14b0e578
CS
3223 }\r
3224 break;\r
3225\r
b602265d
DG
3226 default:\r
3227 r = 0;\r
3228 break;\r
3229 }\r
3230\r
3231 return r;\r
3232}\r
3233\r
3234#define IN_RECURSION (1<<0)\r
3235#define FOUND_CALLED_NODE 1\r
3236\r
3237static int\r
3238recursive_call_check_trav(Node* node, ScanEnv* env, int state)\r
3239{\r
3240 int r = 0;\r
3241\r
3242 switch (NODE_TYPE(node)) {\r
3243 case NODE_LIST:\r
3244 case NODE_ALT:\r
14b0e578 3245 {\r
b602265d
DG
3246 int ret;\r
3247 do {\r
3248 ret = recursive_call_check_trav(NODE_CAR(node), env, state);\r
3249 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;\r
3250 else if (ret < 0) return ret;\r
3251 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3252 }\r
3253 break;\r
14b0e578 3254\r
b602265d
DG
3255 case NODE_QUANT:\r
3256 r = recursive_call_check_trav(NODE_BODY(node), env, state);\r
3257 if (QUANT_(node)->upper == 0) {\r
3258 if (r == FOUND_CALLED_NODE)\r
3259 QUANT_(node)->is_refered = 1;\r
3260 }\r
3261 break;\r
3262\r
3263 case NODE_ANCHOR:\r
3264 {\r
3265 AnchorNode* an = ANCHOR_(node);\r
3266 if (ANCHOR_HAS_BODY(an))\r
3267 r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);\r
3268 }\r
3269 break;\r
3270\r
3271 case NODE_ENCLOSURE:\r
3272 {\r
3273 int ret;\r
3274 int state1;\r
3275 EnclosureNode* en = ENCLOSURE_(node);\r
3276\r
3277 if (en->type == ENCLOSURE_MEMORY) {\r
3278 if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {\r
3279 if (! NODE_IS_RECURSION(node)) {\r
3280 NODE_STATUS_ADD(node, MARK1);\r
3281 r = recursive_call_check(NODE_BODY(node));\r
3282 if (r != 0)\r
3283 NODE_STATUS_ADD(node, RECURSION);\r
3284 NODE_STATUS_REMOVE(node, MARK1);\r
3285 }\r
3286\r
3287 if (NODE_IS_CALLED(node))\r
3288 r = FOUND_CALLED_NODE;\r
3289 }\r
3290 }\r
3291\r
3292 state1 = state;\r
3293 if (NODE_IS_RECURSION(node))\r
3294 state1 |= IN_RECURSION;\r
3295\r
3296 ret = recursive_call_check_trav(NODE_BODY(node), env, state1);\r
3297 if (ret == FOUND_CALLED_NODE)\r
3298 r = FOUND_CALLED_NODE;\r
3299\r
3300 if (en->type == ENCLOSURE_IF_ELSE) {\r
3301 if (IS_NOT_NULL(en->te.Then)) {\r
3302 ret = recursive_call_check_trav(en->te.Then, env, state1);\r
3303 if (ret == FOUND_CALLED_NODE)\r
3304 r = FOUND_CALLED_NODE;\r
3305 }\r
3306 if (IS_NOT_NULL(en->te.Else)) {\r
3307 ret = recursive_call_check_trav(en->te.Else, env, state1);\r
3308 if (ret == FOUND_CALLED_NODE)\r
3309 r = FOUND_CALLED_NODE;\r
3310 }\r
14b0e578
CS
3311 }\r
3312 }\r
3313 break;\r
3314\r
3315 default:\r
3316 break;\r
3317 }\r
3318\r
3319 return r;\r
3320}\r
b602265d 3321\r
14b0e578
CS
3322#endif\r
3323\r
3324/* divide different length alternatives in look-behind.\r
3325 (?<=A|B) ==> (?<=A)|(?<=B)\r
3326 (?<!A|B) ==> (?<!A)(?<!B)\r
3327*/\r
3328static int\r
3329divide_look_behind_alternatives(Node* node)\r
3330{\r
3331 Node *head, *np, *insert_node;\r
b602265d 3332 AnchorNode* an = ANCHOR_(node);\r
14b0e578
CS
3333 int anc_type = an->type;\r
3334\r
b602265d
DG
3335 head = NODE_ANCHOR_BODY(an);\r
3336 np = NODE_CAR(head);\r
14b0e578 3337 swap_node(node, head);\r
b602265d
DG
3338 NODE_CAR(node) = head;\r
3339 NODE_BODY(head) = np;\r
14b0e578
CS
3340\r
3341 np = node;\r
b602265d
DG
3342 while (IS_NOT_NULL(np = NODE_CDR(np))) {\r
3343 insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);\r
14b0e578 3344 CHECK_NULL_RETURN_MEMERR(insert_node);\r
b602265d
DG
3345 NODE_BODY(insert_node) = NODE_CAR(np);\r
3346 NODE_CAR(np) = insert_node;\r
14b0e578
CS
3347 }\r
3348\r
3349 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {\r
3350 np = node;\r
3351 do {\r
b602265d
DG
3352 NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */\r
3353 } while (IS_NOT_NULL(np = NODE_CDR(np)));\r
14b0e578
CS
3354 }\r
3355 return 0;\r
3356}\r
3357\r
3358static int\r
3359setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)\r
3360{\r
3361 int r, len;\r
b602265d 3362 AnchorNode* an = ANCHOR_(node);\r
14b0e578 3363\r
b602265d 3364 r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);\r
14b0e578
CS
3365 if (r == 0)\r
3366 an->char_len = len;\r
3367 else if (r == GET_CHAR_LEN_VARLEN)\r
3368 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3369 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {\r
3370 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))\r
3371 r = divide_look_behind_alternatives(node);\r
3372 else\r
3373 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3374 }\r
3375\r
3376 return r;\r
3377}\r
3378\r
3379static int\r
3380next_setup(Node* node, Node* next_node, regex_t* reg)\r
3381{\r
b602265d 3382 NodeType type;\r
14b0e578
CS
3383\r
3384 retry:\r
b602265d
DG
3385 type = NODE_TYPE(node);\r
3386 if (type == NODE_QUANT) {\r
3387 QuantNode* qn = QUANT_(node);\r
14b0e578 3388 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {\r
b602265d 3389#ifdef USE_QUANT_PEEK_NEXT\r
14b0e578
CS
3390 Node* n = get_head_value_node(next_node, 1, reg);\r
3391 /* '\0': for UTF-16BE etc... */\r
b602265d
DG
3392 if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {\r
3393 qn->next_head_exact = n;\r
14b0e578
CS
3394 }\r
3395#endif\r
3396 /* automatic posseivation a*b ==> (?>a*)b */\r
3397 if (qn->lower <= 1) {\r
b602265d
DG
3398 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {\r
3399 Node *x, *y;\r
3400 x = get_head_value_node(NODE_BODY(node), 0, reg);\r
3401 if (IS_NOT_NULL(x)) {\r
3402 y = get_head_value_node(next_node, 0, reg);\r
3403 if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {\r
3404 Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
3405 CHECK_NULL_RETURN_MEMERR(en);\r
3406 NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);\r
3407 swap_node(node, en);\r
3408 NODE_BODY(node) = en;\r
3409 }\r
3410 }\r
3411 }\r
14b0e578
CS
3412 }\r
3413 }\r
3414 }\r
b602265d
DG
3415 else if (type == NODE_ENCLOSURE) {\r
3416 EnclosureNode* en = ENCLOSURE_(node);\r
3417 if (en->type == ENCLOSURE_MEMORY) {\r
3418 node = NODE_BODY(node);\r
14b0e578
CS
3419 goto retry;\r
3420 }\r
3421 }\r
3422 return 0;\r
3423}\r
3424\r
3425\r
3426static int\r
3427update_string_node_case_fold(regex_t* reg, Node *node)\r
3428{\r
3429 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
3430 UChar *sbuf, *ebuf, *sp;\r
3431 int r, i, len, sbuf_size;\r
b602265d 3432 StrNode* sn = STR_(node);\r
14b0e578
CS
3433\r
3434 end = sn->end;\r
b602265d 3435 sbuf_size = (int )(end - sn->s) * 2;\r
14b0e578
CS
3436 sbuf = (UChar* )xmalloc(sbuf_size);\r
3437 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3438 ebuf = sbuf + sbuf_size;\r
3439\r
3440 sp = sbuf;\r
3441 p = sn->s;\r
3442 while (p < end) {\r
3443 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);\r
3444 for (i = 0; i < len; i++) {\r
3445 if (sp >= ebuf) {\r
3446 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);\r
3447 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3448 sp = sbuf + sbuf_size;\r
3449 sbuf_size *= 2;\r
3450 ebuf = sbuf + sbuf_size;\r
3451 }\r
3452\r
3453 *sp++ = buf[i];\r
3454 }\r
3455 }\r
3456\r
3457 r = onig_node_str_set(node, sbuf, sp);\r
3458 if (r != 0) {\r
3459 xfree(sbuf);\r
3460 return r;\r
3461 }\r
3462\r
3463 xfree(sbuf);\r
3464 return 0;\r
3465}\r
3466\r
3467static int\r
b602265d 3468expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)\r
14b0e578
CS
3469{\r
3470 int r;\r
3471 Node *node;\r
3472\r
3473 node = onig_node_new_str(s, end);\r
3474 if (IS_NULL(node)) return ONIGERR_MEMORY;\r
3475\r
3476 r = update_string_node_case_fold(reg, node);\r
3477 if (r != 0) {\r
3478 onig_node_free(node);\r
3479 return r;\r
3480 }\r
3481\r
b602265d
DG
3482 NODE_STRING_SET_AMBIG(node);\r
3483 NODE_STRING_SET_DONT_GET_OPT_INFO(node);\r
14b0e578
CS
3484 *rnode = node;\r
3485 return 0;\r
3486}\r
3487\r
3488static int\r
b602265d
DG
3489expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,\r
3490 int slen, UChar *end, regex_t* reg, Node **rnode)\r
14b0e578 3491{\r
b602265d
DG
3492 int r, i, j;\r
3493 int len;\r
3494 int varlen;\r
14b0e578
CS
3495 Node *anode, *var_anode, *snode, *xnode, *an;\r
3496 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
3497\r
3498 *rnode = var_anode = NULL_NODE;\r
3499\r
3500 varlen = 0;\r
3501 for (i = 0; i < item_num; i++) {\r
3502 if (items[i].byte_len != slen) {\r
3503 varlen = 1;\r
3504 break;\r
3505 }\r
3506 }\r
3507\r
3508 if (varlen != 0) {\r
3509 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3510 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;\r
3511\r
3512 xnode = onig_node_new_list(NULL, NULL);\r
3513 if (IS_NULL(xnode)) goto mem_err;\r
b602265d 3514 NODE_CAR(var_anode) = xnode;\r
14b0e578
CS
3515\r
3516 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3517 if (IS_NULL(anode)) goto mem_err;\r
b602265d 3518 NODE_CAR(xnode) = anode;\r
14b0e578
CS
3519 }\r
3520 else {\r
3521 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3522 if (IS_NULL(anode)) return ONIGERR_MEMORY;\r
3523 }\r
3524\r
3525 snode = onig_node_new_str(p, p + slen);\r
3526 if (IS_NULL(snode)) goto mem_err;\r
3527\r
b602265d 3528 NODE_CAR(anode) = snode;\r
14b0e578
CS
3529\r
3530 for (i = 0; i < item_num; i++) {\r
3531 snode = onig_node_new_str(NULL, NULL);\r
3532 if (IS_NULL(snode)) goto mem_err;\r
3533 \r
3534 for (j = 0; j < items[i].code_len; j++) {\r
3535 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);\r
3536 if (len < 0) {\r
b602265d
DG
3537 r = len;\r
3538 goto mem_err2;\r
14b0e578
CS
3539 }\r
3540\r
3541 r = onig_node_str_cat(snode, buf, buf + len);\r
3542 if (r != 0) goto mem_err2;\r
3543 }\r
3544\r
3545 an = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3546 if (IS_NULL(an)) {\r
3547 goto mem_err2;\r
3548 }\r
3948c510
DG
3549 //The NULL pointer check is not necessary. It is added just for pass static\r
3550 //analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"\r
3551 //in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"\r
3552 //in line 3510 will be executed, so the null pointer has been checked before\r
3553 //deferenced in line 3584.\r
3554 if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {\r
b602265d 3555 Node *rem;\r
14b0e578
CS
3556 UChar *q = p + items[i].byte_len;\r
3557\r
3558 if (q < end) {\r
3559 r = expand_case_fold_make_rem_string(&rem, q, end, reg);\r
3560 if (r != 0) {\r
3561 onig_node_free(an);\r
3562 goto mem_err2;\r
3563 }\r
3564\r
3565 xnode = onig_node_list_add(NULL_NODE, snode);\r
3566 if (IS_NULL(xnode)) {\r
3567 onig_node_free(an);\r
3568 onig_node_free(rem);\r
3569 goto mem_err2;\r
3570 }\r
3571 if (IS_NULL(onig_node_list_add(xnode, rem))) {\r
3572 onig_node_free(an);\r
3573 onig_node_free(xnode);\r
3574 onig_node_free(rem);\r
3575 goto mem_err;\r
3576 }\r
3577\r
b602265d 3578 NODE_CAR(an) = xnode;\r
14b0e578
CS
3579 }\r
3580 else {\r
b602265d 3581 NODE_CAR(an) = snode;\r
14b0e578
CS
3582 }\r
3583\r
b602265d 3584 NODE_CDR(var_anode) = an;\r
14b0e578
CS
3585 var_anode = an;\r
3586 }\r
3587 else {\r
b602265d
DG
3588 NODE_CAR(an) = snode;\r
3589 NODE_CDR(anode) = an;\r
14b0e578
CS
3590 anode = an;\r
3591 }\r
3592 }\r
3593\r
3594 return varlen;\r
3595\r
3596 mem_err2:\r
3597 onig_node_free(snode);\r
3598\r
3599 mem_err:\r
3600 onig_node_free(*rnode);\r
3601\r
3602 return ONIGERR_MEMORY;\r
3603}\r
3604\r
3605static int\r
3606expand_case_fold_string(Node* node, regex_t* reg)\r
3607{\r
3608#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8\r
3609\r
3610 int r, n, len, alt_num;\r
3611 UChar *start, *end, *p;\r
3612 Node *top_root, *root, *snode, *prev_node;\r
3613 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
b602265d 3614 StrNode* sn = STR_(node);\r
14b0e578 3615\r
b602265d 3616 if (NODE_STRING_IS_AMBIG(node)) return 0;\r
14b0e578
CS
3617\r
3618 start = sn->s;\r
3619 end = sn->end;\r
3620 if (start >= end) return 0;\r
3621\r
3622 r = 0;\r
3623 top_root = root = prev_node = snode = NULL_NODE;\r
3624 alt_num = 1;\r
3625 p = start;\r
3626 while (p < end) {\r
b602265d
DG
3627 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,\r
3628 items);\r
14b0e578
CS
3629 if (n < 0) {\r
3630 r = n;\r
3631 goto err;\r
3632 }\r
3633\r
3634 len = enclen(reg->enc, p);\r
3635\r
3636 if (n == 0) {\r
3637 if (IS_NULL(snode)) {\r
b602265d
DG
3638 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
3639 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3640 if (IS_NULL(root)) {\r
3641 onig_node_free(prev_node);\r
3642 goto mem_err;\r
3643 }\r
3644 }\r
3645\r
3646 prev_node = snode = onig_node_new_str(NULL, NULL);\r
3647 if (IS_NULL(snode)) goto mem_err;\r
3648 if (IS_NOT_NULL(root)) {\r
3649 if (IS_NULL(onig_node_list_add(root, snode))) {\r
3650 onig_node_free(snode);\r
3651 goto mem_err;\r
3652 }\r
3653 }\r
14b0e578
CS
3654 }\r
3655\r
3656 r = onig_node_str_cat(snode, p, p + len);\r
3657 if (r != 0) goto err;\r
3658 }\r
3659 else {\r
3660 alt_num *= (n + 1);\r
3661 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;\r
3662\r
3663 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
b602265d
DG
3664 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3665 if (IS_NULL(root)) {\r
3666 onig_node_free(prev_node);\r
3667 goto mem_err;\r
3668 }\r
14b0e578
CS
3669 }\r
3670\r
3671 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);\r
3672 if (r < 0) goto mem_err;\r
3673 if (r == 1) {\r
b602265d
DG
3674 if (IS_NULL(root)) {\r
3675 top_root = prev_node;\r
3676 }\r
3677 else {\r
3678 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3679 onig_node_free(prev_node);\r
3680 goto mem_err;\r
3681 }\r
3682 }\r
3683\r
3684 root = NODE_CAR(prev_node);\r
3685 }\r
3686 else { /* r == 0 */\r
3687 if (IS_NOT_NULL(root)) {\r
3688 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3689 onig_node_free(prev_node);\r
3690 goto mem_err;\r
3691 }\r
3692 }\r
3693 }\r
3694\r
14b0e578
CS
3695 snode = NULL_NODE;\r
3696 }\r
3697\r
3698 p += len;\r
3699 }\r
3700\r
3701 if (p < end) {\r
3702 Node *srem;\r
3703\r
3704 r = expand_case_fold_make_rem_string(&srem, p, end, reg);\r
3705 if (r != 0) goto mem_err;\r
3706\r
3707 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {\r
3708 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3709 if (IS_NULL(root)) {\r
b602265d
DG
3710 onig_node_free(srem);\r
3711 onig_node_free(prev_node);\r
3712 goto mem_err;\r
3713 }\r
3714 }\r
3715\r
3716 if (IS_NULL(root)) {\r
3717 prev_node = srem;\r
3718 }\r
3719 else {\r
3720 if (IS_NULL(onig_node_list_add(root, srem))) {\r
3721 onig_node_free(srem);\r
3722 goto mem_err;\r
3723 }\r
3724 }\r
3725 }\r
3726\r
3727 /* ending */\r
3728 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);\r
3729 swap_node(node, top_root);\r
3730 onig_node_free(top_root);\r
3731 return 0;\r
3732\r
3733 mem_err:\r
3734 r = ONIGERR_MEMORY;\r
3735\r
3736 err:\r
3737 onig_node_free(top_root);\r
3738 return r;\r
3739}\r
3740\r
3741#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
3742static enum QuantBodyEmpty\r
3743quantifiers_memory_node_info(Node* node)\r
3744{\r
3745 int r = QUANT_BODY_IS_EMPTY;\r
3746\r
3747 switch (NODE_TYPE(node)) {\r
3748 case NODE_LIST:\r
3749 case NODE_ALT:\r
3750 {\r
3751 int v;\r
3752 do {\r
3753 v = quantifiers_memory_node_info(NODE_CAR(node));\r
3754 if (v > r) r = v;\r
3755 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3756 }\r
3757 break;\r
3758\r
3759#ifdef USE_CALL\r
3760 case NODE_CALL:\r
3761 if (NODE_IS_RECURSION(node)) {\r
3762 return QUANT_BODY_IS_EMPTY_REC; /* tiny version */\r
3763 }\r
3764 else\r
3765 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3766 break;\r
3767#endif\r
3768\r
3769 case NODE_QUANT:\r
3770 {\r
3771 QuantNode* qn = QUANT_(node);\r
3772 if (qn->upper != 0) {\r
3773 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3774 }\r
3775 }\r
3776 break;\r
3777\r
3778 case NODE_ENCLOSURE:\r
3779 {\r
3780 EnclosureNode* en = ENCLOSURE_(node);\r
3781 switch (en->type) {\r
3782 case ENCLOSURE_MEMORY:\r
3783 if (NODE_IS_RECURSION(node)) {\r
3784 return QUANT_BODY_IS_EMPTY_REC;\r
3785 }\r
3786 return QUANT_BODY_IS_EMPTY_MEM;\r
3787 break;\r
3788\r
3789 case ENCLOSURE_OPTION:\r
3790 case ENCLOSURE_STOP_BACKTRACK:\r
3791 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3792 break;\r
3793 case ENCLOSURE_IF_ELSE:\r
3794 {\r
3795 int v;\r
3796 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3797 if (IS_NOT_NULL(en->te.Then)) {\r
3798 v = quantifiers_memory_node_info(en->te.Then);\r
3799 if (v > r) r = v;\r
3800 }\r
3801 if (IS_NOT_NULL(en->te.Else)) {\r
3802 v = quantifiers_memory_node_info(en->te.Else);\r
3803 if (v > r) r = v;\r
3804 }\r
3805 }\r
3806 break;\r
3807 default:\r
3808 break;\r
3809 }\r
3810 }\r
3811 break;\r
3812\r
3813 case NODE_BACKREF:\r
3814 case NODE_STRING:\r
3815 case NODE_CTYPE:\r
3816 case NODE_CCLASS:\r
3817 case NODE_ANCHOR:\r
3818 case NODE_GIMMICK:\r
3819 default:\r
3820 break;\r
3821 }\r
3822\r
3823 return r;\r
3824}\r
3825#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */\r
3826\r
3827\r
3828#define IN_ALT (1<<0)\r
3829#define IN_NOT (1<<1)\r
3830#define IN_REAL_REPEAT (1<<2)\r
3831#define IN_VAR_REPEAT (1<<3)\r
3832#define IN_ZERO_REPEAT (1<<4)\r
3833#define IN_MULTI_ENTRY (1<<5)\r
3834\r
3835#ifdef USE_CALL\r
3836\r
3837#ifdef __GNUC__\r
3838__inline\r
3839#endif\r
3840static int\r
3841setup_call_node_call(CallNode* cn, ScanEnv* env, int state)\r
3842{\r
3843 MemEnv* mem_env = SCANENV_MEMENV(env);\r
3844\r
3845 if (cn->by_number != 0) {\r
3846 int gnum = cn->group_num;\r
3847\r
3848 if (env->num_named > 0 &&\r
3849 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
3850 ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
3851 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
3852 }\r
3853\r
3854 if (gnum > env->num_mem) {\r
3855 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,\r
3856 cn->name, cn->name_end);\r
3857 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
3858 }\r
3859\r
3860 set_call_attr:\r
3861 NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;\r
3862 if (IS_NULL(NODE_CALL_BODY(cn))) {\r
3863 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
3864 cn->name, cn->name_end);\r
3865 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3866 }\r
3867 }\r
3868 else {\r
3869 int *refs;\r
3870\r
3871 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);\r
3872 if (n <= 0) {\r
3873 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
3874 cn->name, cn->name_end);\r
3875 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3876 }\r
3877 else if (n > 1) {\r
3878 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,\r
3879 cn->name, cn->name_end);\r
3880 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;\r
3881 }\r
3882 else {\r
3883 cn->group_num = refs[0];\r
3884 goto set_call_attr;\r
3885 }\r
3886 }\r
3887\r
3888 return 0;\r
3889}\r
3890\r
3891static void\r
3892setup_call2_call(Node* node)\r
3893{\r
3894 switch (NODE_TYPE(node)) {\r
3895 case NODE_LIST:\r
3896 case NODE_ALT:\r
3897 do {\r
3898 setup_call2_call(NODE_CAR(node));\r
3899 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3900 break;\r
3901\r
3902 case NODE_QUANT:\r
3903 setup_call2_call(NODE_BODY(node));\r
3904 break;\r
3905\r
3906 case NODE_ANCHOR:\r
3907 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
3908 setup_call2_call(NODE_BODY(node));\r
3909 break;\r
3910\r
3911 case NODE_ENCLOSURE:\r
3912 {\r
3913 EnclosureNode* en = ENCLOSURE_(node);\r
3914\r
3915 if (en->type == ENCLOSURE_MEMORY) {\r
3916 if (! NODE_IS_MARK1(node)) {\r
3917 NODE_STATUS_ADD(node, MARK1);\r
3918 setup_call2_call(NODE_BODY(node));\r
3919 NODE_STATUS_REMOVE(node, MARK1);\r
3920 }\r
3921 }\r
3922 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3923 setup_call2_call(NODE_BODY(node));\r
3924 if (IS_NOT_NULL(en->te.Then))\r
3925 setup_call2_call(en->te.Then);\r
3926 if (IS_NOT_NULL(en->te.Else))\r
3927 setup_call2_call(en->te.Else);\r
3928 }\r
3929 else {\r
3930 setup_call2_call(NODE_BODY(node));\r
3931 }\r
3932 }\r
3933 break;\r
3934\r
3935 case NODE_CALL:\r
3936 if (! NODE_IS_MARK1(node)) {\r
3937 NODE_STATUS_ADD(node, MARK1);\r
3938 {\r
3939 CallNode* cn = CALL_(node);\r
3940 Node* called = NODE_CALL_BODY(cn);\r
3941\r
3942 cn->entry_count++;\r
3943\r
3944 NODE_STATUS_ADD(called, CALLED);\r
3945 ENCLOSURE_(called)->m.entry_count++;\r
3946 setup_call2_call(called);\r
3947 }\r
3948 NODE_STATUS_REMOVE(node, MARK1);\r
3949 }\r
3950 break;\r
3951\r
3952 default:\r
3953 break;\r
3954 }\r
3955}\r
3956\r
3957static int\r
3958setup_call(Node* node, ScanEnv* env, int state)\r
3959{\r
3960 int r;\r
3961\r
3962 switch (NODE_TYPE(node)) {\r
3963 case NODE_LIST:\r
3964 case NODE_ALT:\r
3965 do {\r
3966 r = setup_call(NODE_CAR(node), env, state);\r
3967 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
3968 break;\r
3969\r
3970 case NODE_QUANT:\r
3971 if (QUANT_(node)->upper == 0)\r
3972 state |= IN_ZERO_REPEAT;\r
3973\r
3974 r = setup_call(NODE_BODY(node), env, state);\r
3975 break;\r
3976\r
3977 case NODE_ANCHOR:\r
3978 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
3979 r = setup_call(NODE_BODY(node), env, state);\r
3980 else\r
3981 r = 0;\r
3982 break;\r
3983\r
3984 case NODE_ENCLOSURE:\r
3985 {\r
3986 EnclosureNode* en = ENCLOSURE_(node);\r
3987\r
3988 if (en->type == ENCLOSURE_MEMORY) {\r
3989 if ((state & IN_ZERO_REPEAT) != 0) {\r
3990 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
3991 ENCLOSURE_(node)->m.entry_count--;\r
3992 }\r
3993 r = setup_call(NODE_BODY(node), env, state);\r
3994 }\r
3995 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3996 r = setup_call(NODE_BODY(node), env, state);\r
3997 if (r != 0) return r;\r
3998 if (IS_NOT_NULL(en->te.Then)) {\r
3999 r = setup_call(en->te.Then, env, state);\r
4000 if (r != 0) return r;\r
4001 }\r
4002 if (IS_NOT_NULL(en->te.Else))\r
4003 r = setup_call(en->te.Else, env, state);\r
4004 }\r
4005 else\r
4006 r = setup_call(NODE_BODY(node), env, state);\r
4007 }\r
4008 break;\r
4009\r
4010 case NODE_CALL:\r
4011 if ((state & IN_ZERO_REPEAT) != 0) {\r
4012 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
4013 CALL_(node)->entry_count--;\r
4014 }\r
4015\r
4016 r = setup_call_node_call(CALL_(node), env, state);\r
4017 break;\r
4018\r
4019 default:\r
4020 r = 0;\r
4021 break;\r
4022 }\r
4023\r
4024 return r;\r
4025}\r
4026\r
4027static int\r
4028setup_call2(Node* node)\r
4029{\r
4030 int r = 0;\r
4031\r
4032 switch (NODE_TYPE(node)) {\r
4033 case NODE_LIST:\r
4034 case NODE_ALT:\r
4035 do {\r
4036 r = setup_call2(NODE_CAR(node));\r
4037 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
4038 break;\r
4039\r
4040 case NODE_QUANT:\r
4041 if (QUANT_(node)->upper != 0)\r
4042 r = setup_call2(NODE_BODY(node));\r
4043 break;\r
4044\r
4045 case NODE_ANCHOR:\r
4046 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
4047 r = setup_call2(NODE_BODY(node));\r
4048 break;\r
4049\r
4050 case NODE_ENCLOSURE:\r
4051 if (! NODE_IS_IN_ZERO_REPEAT(node))\r
4052 r = setup_call2(NODE_BODY(node));\r
4053\r
4054 {\r
4055 EnclosureNode* en = ENCLOSURE_(node);\r
4056\r
4057 if (r != 0) return r;\r
4058 if (en->type == ENCLOSURE_IF_ELSE) {\r
4059 if (IS_NOT_NULL(en->te.Then)) {\r
4060 r = setup_call2(en->te.Then);\r
4061 if (r != 0) return r;\r
4062 }\r
4063 if (IS_NOT_NULL(en->te.Else))\r
4064 r = setup_call2(en->te.Else);\r
4065 }\r
4066 }\r
4067 break;\r
4068\r
4069 case NODE_CALL:\r
4070 if (! NODE_IS_IN_ZERO_REPEAT(node)) {\r
4071 setup_call2_call(node);\r
4072 }\r
4073 break;\r
4074\r
4075 default:\r
4076 break;\r
4077 }\r
4078\r
4079 return r;\r
4080}\r
4081\r
4082\r
4083static void\r
4084setup_called_state_call(Node* node, int state)\r
4085{\r
4086 switch (NODE_TYPE(node)) {\r
4087 case NODE_ALT:\r
4088 state |= IN_ALT;\r
4089 /* fall */\r
4090 case NODE_LIST:\r
4091 do {\r
4092 setup_called_state_call(NODE_CAR(node), state);\r
4093 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4094 break;\r
4095\r
4096 case NODE_QUANT:\r
4097 {\r
4098 QuantNode* qn = QUANT_(node);\r
4099\r
4100 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
4101 state |= IN_REAL_REPEAT;\r
4102 if (qn->lower != qn->upper)\r
4103 state |= IN_VAR_REPEAT;\r
4104\r
4105 setup_called_state_call(NODE_QUANT_BODY(qn), state);\r
4106 }\r
4107 break;\r
4108\r
4109 case NODE_ANCHOR:\r
4110 {\r
4111 AnchorNode* an = ANCHOR_(node);\r
4112\r
4113 switch (an->type) {\r
4114 case ANCHOR_PREC_READ_NOT:\r
4115 case ANCHOR_LOOK_BEHIND_NOT:\r
4116 state |= IN_NOT;\r
4117 /* fall */\r
4118 case ANCHOR_PREC_READ:\r
4119 case ANCHOR_LOOK_BEHIND:\r
4120 setup_called_state_call(NODE_ANCHOR_BODY(an), state);\r
4121 break;\r
4122 default:\r
4123 break;\r
4124 }\r
4125 }\r
4126 break;\r
4127\r
4128 case NODE_ENCLOSURE:\r
4129 {\r
4130 EnclosureNode* en = ENCLOSURE_(node);\r
4131\r
4132 if (en->type == ENCLOSURE_MEMORY) {\r
4133 if (NODE_IS_MARK1(node)) {\r
4134 if ((~en->m.called_state & state) != 0) {\r
4135 en->m.called_state |= state;\r
4136 setup_called_state_call(NODE_BODY(node), state);\r
4137 }\r
4138 }\r
4139 else {\r
4140 NODE_STATUS_ADD(node, MARK1);\r
4141 en->m.called_state |= state;\r
4142 setup_called_state_call(NODE_BODY(node), state);\r
4143 NODE_STATUS_REMOVE(node, MARK1);\r
4144 }\r
4145 }\r
4146 else if (en->type == ENCLOSURE_IF_ELSE) {\r
4147 if (IS_NOT_NULL(en->te.Then)) {\r
4148 setup_called_state_call(en->te.Then, state);\r
4149 }\r
4150 if (IS_NOT_NULL(en->te.Else))\r
4151 setup_called_state_call(en->te.Else, state);\r
4152 }\r
4153 else {\r
4154 setup_called_state_call(NODE_BODY(node), state);\r
4155 }\r
4156 }\r
4157 break;\r
4158\r
4159 case NODE_CALL:\r
4160 setup_called_state_call(NODE_BODY(node), state);\r
4161 break;\r
4162\r
4163 default:\r
4164 break;\r
4165 }\r
4166}\r
4167\r
4168static void\r
4169setup_called_state(Node* node, int state)\r
4170{\r
4171 switch (NODE_TYPE(node)) {\r
4172 case NODE_ALT:\r
4173 state |= IN_ALT;\r
4174 /* fall */\r
4175 case NODE_LIST:\r
4176 do {\r
4177 setup_called_state(NODE_CAR(node), state);\r
4178 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4179 break;\r
4180\r
4181#ifdef USE_CALL\r
4182 case NODE_CALL:\r
4183 setup_called_state_call(node, state);\r
4184 break;\r
4185#endif\r
4186\r
4187 case NODE_ENCLOSURE:\r
4188 {\r
4189 EnclosureNode* en = ENCLOSURE_(node);\r
4190\r
4191 switch (en->type) {\r
4192 case ENCLOSURE_MEMORY:\r
4193 if (en->m.entry_count > 1)\r
4194 state |= IN_MULTI_ENTRY;\r
4195\r
4196 en->m.called_state |= state;\r
4197 /* fall */\r
4198 case ENCLOSURE_OPTION:\r
4199 case ENCLOSURE_STOP_BACKTRACK:\r
4200 setup_called_state(NODE_BODY(node), state);\r
4201 break;\r
4202 case ENCLOSURE_IF_ELSE:\r
4203 setup_called_state(NODE_BODY(node), state);\r
4204 if (IS_NOT_NULL(en->te.Then))\r
4205 setup_called_state(en->te.Then, state);\r
4206 if (IS_NOT_NULL(en->te.Else))\r
4207 setup_called_state(en->te.Else, state);\r
4208 break;\r
4209 }\r
4210 }\r
4211 break;\r
4212\r
4213 case NODE_QUANT:\r
4214 {\r
4215 QuantNode* qn = QUANT_(node);\r
4216\r
4217 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
4218 state |= IN_REAL_REPEAT;\r
4219 if (qn->lower != qn->upper)\r
4220 state |= IN_VAR_REPEAT;\r
4221\r
4222 setup_called_state(NODE_QUANT_BODY(qn), state);\r
4223 }\r
4224 break;\r
4225\r
4226 case NODE_ANCHOR:\r
4227 {\r
4228 AnchorNode* an = ANCHOR_(node);\r
4229\r
4230 switch (an->type) {\r
4231 case ANCHOR_PREC_READ_NOT:\r
4232 case ANCHOR_LOOK_BEHIND_NOT:\r
4233 state |= IN_NOT;\r
4234 /* fall */\r
4235 case ANCHOR_PREC_READ:\r
4236 case ANCHOR_LOOK_BEHIND:\r
4237 setup_called_state(NODE_ANCHOR_BODY(an), state);\r
4238 break;\r
4239 default:\r
4240 break;\r
14b0e578
CS
4241 }\r
4242 }\r
b602265d 4243 break;\r
14b0e578 4244\r
b602265d
DG
4245 case NODE_BACKREF:\r
4246 case NODE_STRING:\r
4247 case NODE_CTYPE:\r
4248 case NODE_CCLASS:\r
4249 case NODE_GIMMICK:\r
4250 default:\r
4251 break;\r
14b0e578 4252 }\r
b602265d 4253}\r
14b0e578 4254\r
b602265d 4255#endif /* USE_CALL */\r
14b0e578 4256\r
14b0e578 4257\r
b602265d
DG
4258static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);\r
4259\r
4260#ifdef __GNUC__\r
4261__inline\r
4262#endif\r
4263static int\r
4264setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4265{\r
4266/* allowed node types in look-behind */\r
4267#define ALLOWED_TYPE_IN_LB \\r
4268 ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \\r
4269 | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \\r
4270 | NODE_BIT_CALL | NODE_BIT_GIMMICK)\r
14b0e578 4271\r
b602265d
DG
4272#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )\r
4273#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)\r
14b0e578 4274\r
b602265d
DG
4275#define ALLOWED_ANCHOR_IN_LB \\r
4276 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \\r
4277 | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \\r
4278 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \\r
4279 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \\r
4280 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )\r
14b0e578 4281\r
b602265d
DG
4282#define ALLOWED_ANCHOR_IN_LB_NOT \\r
4283 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \\r
4284 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \\r
4285 | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \\r
4286 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \\r
4287 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )\r
14b0e578 4288\r
b602265d
DG
4289 int r;\r
4290 AnchorNode* an = ANCHOR_(node);\r
14b0e578 4291\r
b602265d
DG
4292 switch (an->type) {\r
4293 case ANCHOR_PREC_READ:\r
4294 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
4295 break;\r
4296 case ANCHOR_PREC_READ_NOT:\r
4297 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
4298 break;\r
14b0e578 4299\r
b602265d 4300 case ANCHOR_LOOK_BEHIND:\r
14b0e578 4301 {\r
b602265d
DG
4302 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
4303 ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
4304 if (r < 0) return r;\r
4305 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
4306 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
4307 if (r != 0) return r;\r
4308 r = setup_look_behind(node, reg, env);\r
14b0e578
CS
4309 }\r
4310 break;\r
4311\r
b602265d 4312 case ANCHOR_LOOK_BEHIND_NOT:\r
14b0e578 4313 {\r
b602265d
DG
4314 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
4315 ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
4316 if (r < 0) return r;\r
4317 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
4318 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
4319 if (r != 0) return r;\r
4320 r = setup_look_behind(node, reg, env);\r
14b0e578
CS
4321 }\r
4322 break;\r
4323\r
b602265d
DG
4324 default:\r
4325 r = 0;\r
4326 break;\r
4327 }\r
14b0e578 4328\r
b602265d
DG
4329 return r;\r
4330}\r
14b0e578 4331\r
b602265d
DG
4332#ifdef __GNUC__\r
4333__inline\r
4334#endif\r
4335static int\r
4336setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4337{\r
4338 int r;\r
4339 OnigLen d;\r
4340 QuantNode* qn = QUANT_(node);\r
4341 Node* body = NODE_BODY(node);\r
4342\r
4343 if ((state & IN_REAL_REPEAT) != 0) {\r
4344 NODE_STATUS_ADD(node, IN_REAL_REPEAT);\r
4345 }\r
4346 if ((state & IN_MULTI_ENTRY) != 0) {\r
4347 NODE_STATUS_ADD(node, IN_MULTI_ENTRY);\r
4348 }\r
4349\r
4350 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {\r
4351 d = tree_min_len(body, env);\r
4352 if (d == 0) {\r
4353#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
4354 qn->body_empty_info = quantifiers_memory_node_info(body);\r
4355 if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {\r
4356 if (NODE_TYPE(body) == NODE_ENCLOSURE &&\r
4357 ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {\r
4358 MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);\r
4359 }\r
4360 }\r
4361#else\r
4362 qn->body_empty_info = QUANT_BODY_IS_EMPTY;\r
4363#endif\r
14b0e578 4364 }\r
b602265d 4365 }\r
14b0e578 4366\r
b602265d
DG
4367 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
4368 state |= IN_REAL_REPEAT;\r
4369 if (qn->lower != qn->upper)\r
4370 state |= IN_VAR_REPEAT;\r
14b0e578 4371\r
b602265d
DG
4372 r = setup_tree(body, reg, state, env);\r
4373 if (r != 0) return r;\r
14b0e578 4374\r
b602265d
DG
4375 /* expand string */\r
4376#define EXPAND_STRING_MAX_LENGTH 100\r
4377 if (NODE_TYPE(body) == NODE_STRING) {\r
4378 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&\r
4379 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
4380 int len = NODE_STRING_LEN(body);\r
4381 StrNode* sn = STR_(body);\r
4382\r
4383 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
4384 int i, n = qn->lower;\r
4385 onig_node_conv_to_str_node(node, STR_(body)->flag);\r
4386 for (i = 0; i < n; i++) {\r
4387 r = onig_node_str_cat(node, sn->s, sn->end);\r
4388 if (r != 0) return r;\r
4389 }\r
4390 onig_node_free(body);\r
4391 return r;\r
14b0e578
CS
4392 }\r
4393 }\r
b602265d 4394 }\r
14b0e578 4395\r
b602265d
DG
4396#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
4397 if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {\r
4398 if (NODE_TYPE(body) == NODE_QUANT) {\r
4399 QuantNode* tqn = QUANT_(body);\r
4400 if (IS_NOT_NULL(tqn->head_exact)) {\r
4401 qn->head_exact = tqn->head_exact;\r
4402 tqn->head_exact = NULL;\r
4403 }\r
4404 }\r
4405 else {\r
4406 qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);\r
4407 }\r
14b0e578 4408 }\r
b602265d 4409#endif\r
14b0e578
CS
4410\r
4411 return r;\r
4412}\r
14b0e578
CS
4413\r
4414/* setup_tree does the following work.\r
b602265d 4415 1. check empty loop. (set qn->body_empty_info)\r
14b0e578
CS
4416 2. expand ignore-case in char class.\r
4417 3. set memory status bit flags. (reg->mem_stats)\r
4418 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].\r
4419 5. find invalid patterns in look-behind.\r
4420 6. expand repeated string.\r
4421 */\r
4422static int\r
4423setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4424{\r
14b0e578
CS
4425 int r = 0;\r
4426\r
b602265d
DG
4427 switch (NODE_TYPE(node)) {\r
4428 case NODE_LIST:\r
14b0e578
CS
4429 {\r
4430 Node* prev = NULL_NODE;\r
4431 do {\r
b602265d
DG
4432 r = setup_tree(NODE_CAR(node), reg, state, env);\r
4433 if (IS_NOT_NULL(prev) && r == 0) {\r
4434 r = next_setup(prev, NODE_CAR(node), reg);\r
4435 }\r
4436 prev = NODE_CAR(node);\r
4437 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
4438 }\r
4439 break;\r
4440\r
b602265d 4441 case NODE_ALT:\r
14b0e578 4442 do {\r
b602265d
DG
4443 r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);\r
4444 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
4445 break;\r
4446\r
b602265d
DG
4447 case NODE_STRING:\r
4448 if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {\r
14b0e578
CS
4449 r = expand_case_fold_string(node, reg);\r
4450 }\r
4451 break;\r
4452\r
b602265d 4453 case NODE_BACKREF:\r
14b0e578
CS
4454 {\r
4455 int i;\r
4456 int* p;\r
b602265d 4457 BackRefNode* br = BACKREF_(node);\r
14b0e578
CS
4458 p = BACKREFS_P(br);\r
4459 for (i = 0; i < br->back_num; i++) {\r
b602265d
DG
4460 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
4461 MEM_STATUS_ON(env->backrefed_mem, p[i]);\r
4462 MEM_STATUS_ON(env->bt_mem_start, p[i]);\r
14b0e578 4463#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
4464 if (NODE_IS_NEST_LEVEL(node)) {\r
4465 MEM_STATUS_ON(env->bt_mem_end, p[i]);\r
4466 }\r
14b0e578 4467#endif\r
14b0e578
CS
4468 }\r
4469 }\r
4470 break;\r
4471\r
b602265d 4472 case NODE_ENCLOSURE:\r
14b0e578 4473 {\r
b602265d 4474 EnclosureNode* en = ENCLOSURE_(node);\r
14b0e578 4475\r
b602265d
DG
4476 switch (en->type) {\r
4477 case ENCLOSURE_OPTION:\r
4478 {\r
4479 OnigOptionType options = reg->options;\r
4480 reg->options = ENCLOSURE_(node)->o.options;\r
4481 r = setup_tree(NODE_BODY(node), reg, state, env);\r
4482 reg->options = options;\r
4483 }\r
4484 break;\r
14b0e578 4485\r
b602265d
DG
4486 case ENCLOSURE_MEMORY:\r
4487#ifdef USE_CALL\r
4488 state |= en->m.called_state;\r
14b0e578 4489#endif\r
14b0e578 4490\r
b602265d
DG
4491 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0\r
4492 || NODE_IS_RECURSION(node)) {\r
4493 MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);\r
4494 }\r
4495 r = setup_tree(NODE_BODY(node), reg, state, env);\r
4496 break;\r
14b0e578 4497\r
b602265d
DG
4498 case ENCLOSURE_STOP_BACKTRACK:\r
4499 {\r
4500 Node* target = NODE_BODY(node);\r
4501 r = setup_tree(target, reg, state, env);\r
4502 if (NODE_TYPE(target) == NODE_QUANT) {\r
4503 QuantNode* tqn = QUANT_(target);\r
4504 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&\r
4505 tqn->greedy != 0) { /* (?>a*), a*+ etc... */\r
4506 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))\r
4507 NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);\r
4508 }\r
4509 }\r
4510 }\r
14b0e578
CS
4511 break;\r
4512\r
b602265d
DG
4513 case ENCLOSURE_IF_ELSE:\r
4514 r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);\r
4515 if (r != 0) return r;\r
4516 if (IS_NOT_NULL(en->te.Then)) {\r
4517 r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);\r
4518 if (r != 0) return r;\r
4519 }\r
4520 if (IS_NOT_NULL(en->te.Else))\r
4521 r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);\r
4522 break;\r
14b0e578
CS
4523 }\r
4524 }\r
4525 break;\r
4526\r
b602265d
DG
4527 case NODE_QUANT:\r
4528 r = setup_quant(node, reg, state, env);\r
4529 break;\r
14b0e578 4530\r
b602265d
DG
4531 case NODE_ANCHOR:\r
4532 r = setup_anchor(node, reg, state, env);\r
14b0e578
CS
4533 break;\r
4534\r
b602265d
DG
4535#ifdef USE_CALL\r
4536 case NODE_CALL:\r
4537#endif\r
4538 case NODE_CTYPE:\r
4539 case NODE_CCLASS:\r
4540 case NODE_GIMMICK:\r
14b0e578
CS
4541 default:\r
4542 break;\r
4543 }\r
4544\r
4545 return r;\r
4546}\r
4547\r
b602265d 4548/* set skip map for Boyer-Moore search */\r
14b0e578
CS
4549static int\r
4550set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,\r
b602265d 4551 UChar skip[], int** int_skip)\r
14b0e578
CS
4552{\r
4553 int i, len;\r
4554\r
b602265d 4555 len = (int )(end - s);\r
14b0e578 4556 if (len < ONIG_CHAR_TABLE_SIZE) {\r
b602265d 4557 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;\r
14b0e578
CS
4558\r
4559 for (i = 0; i < len - 1; i++)\r
b602265d 4560 skip[s[i]] = len - 1 - i;\r
14b0e578
CS
4561 }\r
4562 else {\r
4563 if (IS_NULL(*int_skip)) {\r
4564 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);\r
4565 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;\r
4566 }\r
4567 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;\r
4568\r
4569 for (i = 0; i < len - 1; i++)\r
4570 (*int_skip)[s[i]] = len - 1 - i;\r
4571 }\r
4572 return 0;\r
4573}\r
4574\r
4575#define OPT_EXACT_MAXLEN 24\r
4576\r
4577typedef struct {\r
b602265d
DG
4578 OnigLen min; /* min byte length */\r
4579 OnigLen max; /* max byte length */\r
4580} MinMax;\r
14b0e578
CS
4581\r
4582typedef struct {\r
b602265d 4583 MinMax mmd;\r
14b0e578
CS
4584 OnigEncoding enc;\r
4585 OnigOptionType options;\r
4586 OnigCaseFoldType case_fold_flag;\r
4587 ScanEnv* scan_env;\r
4588} OptEnv;\r
4589\r
4590typedef struct {\r
b602265d
DG
4591 int left;\r
4592 int right;\r
4593} OptAnc;\r
14b0e578
CS
4594\r
4595typedef struct {\r
b602265d
DG
4596 MinMax mmd; /* position */\r
4597 OptAnc anc;\r
4598 int reach_end;\r
4599 int ignore_case;\r
4600 int len;\r
4601 UChar s[OPT_EXACT_MAXLEN];\r
4602} OptExact;\r
14b0e578
CS
4603\r
4604typedef struct {\r
b602265d
DG
4605 MinMax mmd; /* position */\r
4606 OptAnc anc;\r
4607 int value; /* weighted value */\r
4608 UChar map[ONIG_CHAR_TABLE_SIZE];\r
4609} OptMap;\r
14b0e578
CS
4610\r
4611typedef struct {\r
b602265d
DG
4612 MinMax len;\r
4613 OptAnc anc;\r
4614 OptExact exb; /* boundary */\r
4615 OptExact exm; /* middle */\r
4616 OptExact expr; /* prec read (?=...) */\r
4617 OptMap map; /* boundary */\r
4618} NodeOpt;\r
14b0e578
CS
4619\r
4620\r
4621static int\r
4622map_position_value(OnigEncoding enc, int i)\r
4623{\r
b602265d 4624 static const short int Vals[] = {\r
14b0e578
CS
4625 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,\r
4626 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
4627 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,\r
4628 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,\r
4629 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4630 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,\r
4631 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4632 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1\r
4633 };\r
4634\r
b602265d 4635 if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {\r
14b0e578
CS
4636 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)\r
4637 return 20;\r
4638 else\r
b602265d 4639 return (int )Vals[i];\r
14b0e578
CS
4640 }\r
4641 else\r
4642 return 4; /* Take it easy. */\r
4643}\r
4644\r
4645static int\r
b602265d 4646distance_value(MinMax* mm)\r
14b0e578
CS
4647{\r
4648 /* 1000 / (min-max-dist + 1) */\r
4649 static const short int dist_vals[] = {\r
4650 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, \r
4651 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, \r
4652 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, \r
4653 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, \r
4654 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, \r
4655 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, \r
4656 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, \r
4657 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, \r
4658 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, \r
4659 11, 11, 11, 11, 11, 10, 10, 10, 10, 10\r
4660 };\r
4661\r
b602265d 4662 OnigLen d;\r
14b0e578 4663\r
b602265d 4664 if (mm->max == INFINITE_LEN) return 0;\r
14b0e578
CS
4665\r
4666 d = mm->max - mm->min;\r
b602265d 4667 if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))\r
14b0e578
CS
4668 /* return dist_vals[d] * 16 / (mm->min + 12); */\r
4669 return (int )dist_vals[d];\r
4670 else\r
4671 return 1;\r
4672}\r
4673\r
4674static int\r
b602265d 4675comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)\r
14b0e578
CS
4676{\r
4677 if (v2 <= 0) return -1;\r
4678 if (v1 <= 0) return 1;\r
4679\r
4680 v1 *= distance_value(d1);\r
4681 v2 *= distance_value(d2);\r
4682\r
4683 if (v2 > v1) return 1;\r
4684 if (v2 < v1) return -1;\r
4685\r
4686 if (d2->min < d1->min) return 1;\r
4687 if (d2->min > d1->min) return -1;\r
4688 return 0;\r
4689}\r
4690\r
4691static int\r
b602265d 4692is_equal_mml(MinMax* a, MinMax* b)\r
14b0e578
CS
4693{\r
4694 return (a->min == b->min && a->max == b->max) ? 1 : 0;\r
4695}\r
4696\r
14b0e578 4697static void\r
b602265d 4698set_mml(MinMax* l, OnigLen min, OnigLen max)\r
14b0e578 4699{\r
b602265d
DG
4700 l->min = min;\r
4701 l->max = max;\r
14b0e578
CS
4702}\r
4703\r
4704static void\r
b602265d 4705clear_mml(MinMax* l)\r
14b0e578 4706{\r
b602265d 4707 l->min = l->max = 0;\r
14b0e578
CS
4708}\r
4709\r
4710static void\r
b602265d 4711copy_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
4712{\r
4713 to->min = from->min;\r
4714 to->max = from->max;\r
4715}\r
4716\r
4717static void\r
b602265d 4718add_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
4719{\r
4720 to->min = distance_add(to->min, from->min);\r
4721 to->max = distance_add(to->max, from->max);\r
4722}\r
4723\r
14b0e578 4724static void\r
b602265d 4725alt_merge_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
4726{\r
4727 if (to->min > from->min) to->min = from->min;\r
4728 if (to->max < from->max) to->max = from->max;\r
4729}\r
4730\r
4731static void\r
4732copy_opt_env(OptEnv* to, OptEnv* from)\r
4733{\r
b602265d 4734 *to = *from;\r
14b0e578
CS
4735}\r
4736\r
4737static void\r
b602265d 4738clear_opt_anc_info(OptAnc* a)\r
14b0e578 4739{\r
b602265d
DG
4740 a->left = 0;\r
4741 a->right = 0;\r
14b0e578
CS
4742}\r
4743\r
4744static void\r
b602265d 4745copy_opt_anc_info(OptAnc* to, OptAnc* from)\r
14b0e578 4746{\r
b602265d 4747 *to = *from;\r
14b0e578
CS
4748}\r
4749\r
4750static void\r
b602265d
DG
4751concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,\r
4752 OnigLen left_len, OnigLen right_len)\r
14b0e578
CS
4753{\r
4754 clear_opt_anc_info(to);\r
4755\r
b602265d 4756 to->left = left->left;\r
14b0e578 4757 if (left_len == 0) {\r
b602265d 4758 to->left |= right->left;\r
14b0e578
CS
4759 }\r
4760\r
b602265d 4761 to->right = right->right;\r
14b0e578 4762 if (right_len == 0) {\r
b602265d
DG
4763 to->right |= left->right;\r
4764 }\r
4765 else {\r
4766 to->right |= (left->right & ANCHOR_PREC_READ_NOT);\r
14b0e578
CS
4767 }\r
4768}\r
4769\r
4770static int\r
b602265d 4771is_left(int a)\r
14b0e578 4772{\r
b602265d
DG
4773 if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||\r
4774 a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)\r
14b0e578
CS
4775 return 0;\r
4776\r
4777 return 1;\r
4778}\r
4779\r
4780static int\r
b602265d 4781is_set_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 4782{\r
b602265d 4783 if ((to->left & anc) != 0) return 1;\r
14b0e578 4784\r
b602265d 4785 return ((to->right & anc) != 0 ? 1 : 0);\r
14b0e578
CS
4786}\r
4787\r
4788static void\r
b602265d 4789add_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 4790{\r
b602265d
DG
4791 if (is_left(anc))\r
4792 to->left |= anc;\r
14b0e578 4793 else\r
b602265d 4794 to->right |= anc;\r
14b0e578
CS
4795}\r
4796\r
4797static void\r
b602265d 4798remove_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 4799{\r
b602265d
DG
4800 if (is_left(anc))\r
4801 to->left &= ~anc;\r
14b0e578 4802 else\r
b602265d 4803 to->right &= ~anc;\r
14b0e578
CS
4804}\r
4805\r
4806static void\r
b602265d 4807alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)\r
14b0e578 4808{\r
b602265d
DG
4809 to->left &= add->left;\r
4810 to->right &= add->right;\r
14b0e578
CS
4811}\r
4812\r
4813static int\r
b602265d 4814is_full_opt_exact(OptExact* e)\r
14b0e578 4815{\r
b602265d 4816 return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);\r
14b0e578
CS
4817}\r
4818\r
4819static void\r
b602265d 4820clear_opt_exact(OptExact* e)\r
14b0e578 4821{\r
b602265d
DG
4822 clear_mml(&e->mmd);\r
4823 clear_opt_anc_info(&e->anc);\r
4824 e->reach_end = 0;\r
4825 e->ignore_case = 0;\r
4826 e->len = 0;\r
4827 e->s[0] = '\0';\r
14b0e578
CS
4828}\r
4829\r
4830static void\r
b602265d 4831copy_opt_exact(OptExact* to, OptExact* from)\r
14b0e578 4832{\r
b602265d 4833 *to = *from;\r
14b0e578
CS
4834}\r
4835\r
b602265d
DG
4836static int\r
4837concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)\r
14b0e578 4838{\r
b602265d 4839 int i, j, len, r;\r
14b0e578 4840 UChar *p, *end;\r
b602265d 4841 OptAnc tanc;\r
14b0e578
CS
4842\r
4843 if (! to->ignore_case && add->ignore_case) {\r
b602265d 4844 if (to->len >= add->len) return 0; /* avoid */\r
14b0e578
CS
4845\r
4846 to->ignore_case = 1;\r
4847 }\r
4848\r
b602265d 4849 r = 0;\r
14b0e578
CS
4850 p = add->s;\r
4851 end = p + add->len;\r
4852 for (i = to->len; p < end; ) {\r
4853 len = enclen(enc, p);\r
b602265d
DG
4854 if (i + len > OPT_EXACT_MAXLEN) {\r
4855 r = 1; /* 1:full */\r
4856 break;\r
4857 }\r
14b0e578
CS
4858 for (j = 0; j < len && p < end; j++)\r
4859 to->s[i++] = *p++;\r
4860 }\r
4861\r
4862 to->len = i;\r
4863 to->reach_end = (p == end ? add->reach_end : 0);\r
4864\r
4865 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);\r
b602265d 4866 if (! to->reach_end) tanc.right = 0;\r
14b0e578 4867 copy_opt_anc_info(&to->anc, &tanc);\r
b602265d
DG
4868\r
4869 return r;\r
14b0e578
CS
4870}\r
4871\r
4872static void\r
b602265d 4873concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)\r
14b0e578
CS
4874{\r
4875 int i, j, len;\r
4876 UChar *p;\r
4877\r
4878 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {\r
4879 len = enclen(enc, p);\r
4880 if (i + len > OPT_EXACT_MAXLEN) break;\r
4881 for (j = 0; j < len && p < end; j++)\r
4882 to->s[i++] = *p++;\r
4883 }\r
4884\r
4885 to->len = i;\r
4886}\r
4887\r
4888static void\r
b602265d 4889alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)\r
14b0e578
CS
4890{\r
4891 int i, j, len;\r
4892\r
4893 if (add->len == 0 || to->len == 0) {\r
b602265d 4894 clear_opt_exact(to);\r
14b0e578
CS
4895 return ;\r
4896 }\r
4897\r
4898 if (! is_equal_mml(&to->mmd, &add->mmd)) {\r
b602265d 4899 clear_opt_exact(to);\r
14b0e578
CS
4900 return ;\r
4901 }\r
4902\r
4903 for (i = 0; i < to->len && i < add->len; ) {\r
4904 if (to->s[i] != add->s[i]) break;\r
4905 len = enclen(env->enc, to->s + i);\r
4906\r
4907 for (j = 1; j < len; j++) {\r
4908 if (to->s[i+j] != add->s[i+j]) break;\r
4909 }\r
4910 if (j < len) break;\r
4911 i += len;\r
4912 }\r
4913\r
4914 if (! add->reach_end || i < add->len || i < to->len) {\r
4915 to->reach_end = 0;\r
4916 }\r
4917 to->len = i;\r
4918 to->ignore_case |= add->ignore_case;\r
4919\r
4920 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
b602265d 4921 if (! to->reach_end) to->anc.right = 0;\r
14b0e578
CS
4922}\r
4923\r
4924static void\r
b602265d 4925select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)\r
14b0e578 4926{\r
b602265d 4927 int vn, va;\r
14b0e578 4928\r
b602265d
DG
4929 vn = now->len;\r
4930 va = alt->len;\r
14b0e578 4931\r
b602265d 4932 if (va == 0) {\r
14b0e578
CS
4933 return ;\r
4934 }\r
b602265d
DG
4935 else if (vn == 0) {\r
4936 copy_opt_exact(now, alt);\r
14b0e578
CS
4937 return ;\r
4938 }\r
b602265d 4939 else if (vn <= 2 && va <= 2) {\r
14b0e578 4940 /* ByteValTable[x] is big value --> low price */\r
b602265d
DG
4941 va = map_position_value(enc, now->s[0]);\r
4942 vn = map_position_value(enc, alt->s[0]);\r
14b0e578 4943\r
b602265d
DG
4944 if (now->len > 1) vn += 5;\r
4945 if (alt->len > 1) va += 5;\r
14b0e578
CS
4946 }\r
4947\r
b602265d
DG
4948 if (now->ignore_case == 0) vn *= 2;\r
4949 if (alt->ignore_case == 0) va *= 2;\r
14b0e578 4950\r
b602265d
DG
4951 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
4952 copy_opt_exact(now, alt);\r
14b0e578
CS
4953}\r
4954\r
4955static void\r
b602265d 4956clear_opt_map(OptMap* map)\r
14b0e578 4957{\r
b602265d 4958 static const OptMap clean_info = {\r
14b0e578
CS
4959 {0, 0}, {0, 0}, 0,\r
4960 {\r
4961 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4963 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4964 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4965 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4966 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4967 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4968 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4969 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4970 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4971 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4972 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4973 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4974 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4975 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4976 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r
4977 }\r
4978 };\r
4979\r
b602265d 4980 xmemcpy(map, &clean_info, sizeof(OptMap));\r
14b0e578
CS
4981}\r
4982\r
4983static void\r
b602265d 4984copy_opt_map(OptMap* to, OptMap* from)\r
14b0e578 4985{\r
b602265d 4986 xmemcpy(to,from,sizeof(OptMap));\r
14b0e578
CS
4987}\r
4988\r
4989static void\r
b602265d 4990add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)\r
14b0e578 4991{\r
b602265d
DG
4992 if (m->map[c] == 0) {\r
4993 m->map[c] = 1;\r
4994 m->value += map_position_value(enc, c);\r
14b0e578
CS
4995 }\r
4996}\r
4997\r
4998static int\r
b602265d
DG
4999add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,\r
5000 OnigEncoding enc, OnigCaseFoldType fold_flag)\r
14b0e578
CS
5001{\r
5002 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
5003 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
5004 int i, n;\r
5005\r
b602265d 5006 add_char_opt_map(map, p[0], enc);\r
14b0e578 5007\r
b602265d
DG
5008 fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);\r
5009 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);\r
14b0e578
CS
5010 if (n < 0) return n;\r
5011\r
5012 for (i = 0; i < n; i++) {\r
5013 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);\r
b602265d 5014 add_char_opt_map(map, buf[0], enc);\r
14b0e578
CS
5015 }\r
5016\r
5017 return 0;\r
5018}\r
5019\r
5020static void\r
b602265d 5021select_opt_map(OptMap* now, OptMap* alt)\r
14b0e578
CS
5022{\r
5023 static int z = 1<<15; /* 32768: something big value */\r
5024\r
b602265d 5025 int vn, va;\r
14b0e578
CS
5026\r
5027 if (alt->value == 0) return ;\r
5028 if (now->value == 0) {\r
b602265d 5029 copy_opt_map(now, alt);\r
14b0e578
CS
5030 return ;\r
5031 }\r
5032\r
b602265d
DG
5033 vn = z / now->value;\r
5034 va = z / alt->value;\r
5035 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
5036 copy_opt_map(now, alt);\r
14b0e578
CS
5037}\r
5038\r
5039static int\r
b602265d 5040comp_opt_exact_or_map(OptExact* e, OptMap* m)\r
14b0e578
CS
5041{\r
5042#define COMP_EM_BASE 20\r
b602265d 5043 int ae, am;\r
14b0e578
CS
5044\r
5045 if (m->value <= 0) return -1;\r
5046\r
b602265d
DG
5047 ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);\r
5048 am = COMP_EM_BASE * 5 * 2 / m->value;\r
5049 return comp_distance_value(&e->mmd, &m->mmd, ae, am);\r
14b0e578
CS
5050}\r
5051\r
5052static void\r
b602265d 5053alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)\r
14b0e578
CS
5054{\r
5055 int i, val;\r
5056\r
5057 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */\r
5058 if (to->value == 0) return ;\r
5059 if (add->value == 0 || to->mmd.max < add->mmd.min) {\r
b602265d 5060 clear_opt_map(to);\r
14b0e578
CS
5061 return ;\r
5062 }\r
5063\r
5064 alt_merge_mml(&to->mmd, &add->mmd);\r
5065\r
5066 val = 0;\r
5067 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
5068 if (add->map[i])\r
5069 to->map[i] = 1;\r
5070\r
5071 if (to->map[i])\r
5072 val += map_position_value(enc, i);\r
5073 }\r
5074 to->value = val;\r
5075\r
5076 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
5077}\r
5078\r
5079static void\r
b602265d 5080set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)\r
14b0e578 5081{\r
b602265d
DG
5082 copy_mml(&(opt->exb.mmd), plen);\r
5083 copy_mml(&(opt->expr.mmd), plen);\r
5084 copy_mml(&(opt->map.mmd), plen);\r
14b0e578
CS
5085}\r
5086\r
5087static void\r
b602265d 5088clear_node_opt_info(NodeOpt* opt)\r
14b0e578
CS
5089{\r
5090 clear_mml(&opt->len);\r
5091 clear_opt_anc_info(&opt->anc);\r
b602265d
DG
5092 clear_opt_exact(&opt->exb);\r
5093 clear_opt_exact(&opt->exm);\r
5094 clear_opt_exact(&opt->expr);\r
5095 clear_opt_map(&opt->map);\r
14b0e578
CS
5096}\r
5097\r
5098static void\r
b602265d 5099copy_node_opt_info(NodeOpt* to, NodeOpt* from)\r
14b0e578 5100{\r
b602265d 5101 xmemcpy(to,from,sizeof(NodeOpt));\r
14b0e578
CS
5102}\r
5103\r
5104static void\r
b602265d 5105concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)\r
14b0e578
CS
5106{\r
5107 int exb_reach, exm_reach;\r
b602265d 5108 OptAnc tanc;\r
14b0e578
CS
5109\r
5110 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);\r
5111 copy_opt_anc_info(&to->anc, &tanc);\r
5112\r
5113 if (add->exb.len > 0 && to->len.max == 0) {\r
b602265d 5114 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);\r
14b0e578
CS
5115 copy_opt_anc_info(&add->exb.anc, &tanc);\r
5116 }\r
5117\r
5118 if (add->map.value > 0 && to->len.max == 0) {\r
5119 if (add->map.mmd.max == 0)\r
b602265d 5120 add->map.anc.left |= to->anc.left;\r
14b0e578
CS
5121 }\r
5122\r
5123 exb_reach = to->exb.reach_end;\r
5124 exm_reach = to->exm.reach_end;\r
5125\r
5126 if (add->len.max != 0)\r
5127 to->exb.reach_end = to->exm.reach_end = 0;\r
5128\r
5129 if (add->exb.len > 0) {\r
5130 if (exb_reach) {\r
b602265d
DG
5131 concat_opt_exact(&to->exb, &add->exb, enc);\r
5132 clear_opt_exact(&add->exb);\r
14b0e578
CS
5133 }\r
5134 else if (exm_reach) {\r
b602265d
DG
5135 concat_opt_exact(&to->exm, &add->exb, enc);\r
5136 clear_opt_exact(&add->exb);\r
14b0e578
CS
5137 }\r
5138 }\r
b602265d
DG
5139 select_opt_exact(enc, &to->exm, &add->exb);\r
5140 select_opt_exact(enc, &to->exm, &add->exm);\r
14b0e578
CS
5141\r
5142 if (to->expr.len > 0) {\r
5143 if (add->len.max > 0) {\r
5144 if (to->expr.len > (int )add->len.max)\r
b602265d 5145 to->expr.len = add->len.max;\r
14b0e578
CS
5146\r
5147 if (to->expr.mmd.max == 0)\r
b602265d 5148 select_opt_exact(enc, &to->exb, &to->expr);\r
14b0e578 5149 else\r
b602265d 5150 select_opt_exact(enc, &to->exm, &to->expr);\r
14b0e578
CS
5151 }\r
5152 }\r
5153 else if (add->expr.len > 0) {\r
b602265d 5154 copy_opt_exact(&to->expr, &add->expr);\r
14b0e578
CS
5155 }\r
5156\r
b602265d 5157 select_opt_map(&to->map, &add->map);\r
14b0e578
CS
5158 add_mml(&to->len, &add->len);\r
5159}\r
5160\r
5161static void\r
b602265d 5162alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)\r
14b0e578 5163{\r
b602265d
DG
5164 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
5165 alt_merge_opt_exact(&to->exb, &add->exb, env);\r
5166 alt_merge_opt_exact(&to->exm, &add->exm, env);\r
5167 alt_merge_opt_exact(&to->expr, &add->expr, env);\r
5168 alt_merge_opt_map(env->enc, &to->map, &add->map);\r
14b0e578
CS
5169\r
5170 alt_merge_mml(&to->len, &add->len);\r
5171}\r
5172\r
5173\r
5174#define MAX_NODE_OPT_INFO_REF_COUNT 5\r
5175\r
5176static int\r
b602265d 5177optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)\r
14b0e578 5178{\r
b602265d
DG
5179 int i;\r
5180 int r;\r
5181 NodeOpt xo;\r
5182 OnigEncoding enc;\r
14b0e578 5183\r
b602265d
DG
5184 r = 0;\r
5185 enc = env->enc;\r
14b0e578
CS
5186 clear_node_opt_info(opt);\r
5187 set_bound_node_opt_info(opt, &env->mmd);\r
5188\r
b602265d
DG
5189 switch (NODE_TYPE(node)) {\r
5190 case NODE_LIST:\r
14b0e578
CS
5191 {\r
5192 OptEnv nenv;\r
14b0e578
CS
5193 Node* nd = node;\r
5194\r
5195 copy_opt_env(&nenv, env);\r
5196 do {\r
b602265d
DG
5197 r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);\r
5198 if (r == 0) {\r
5199 add_mml(&nenv.mmd, &xo.len);\r
5200 concat_left_node_opt_info(enc, opt, &xo);\r
5201 }\r
5202 } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
14b0e578
CS
5203 }\r
5204 break;\r
5205\r
b602265d 5206 case NODE_ALT:\r
14b0e578 5207 {\r
14b0e578
CS
5208 Node* nd = node;\r
5209\r
5210 do {\r
b602265d
DG
5211 r = optimize_nodes(NODE_CAR(nd), &xo, env);\r
5212 if (r == 0) {\r
5213 if (nd == node) copy_node_opt_info(opt, &xo);\r
5214 else alt_merge_node_opt_info(opt, &xo, env);\r
5215 }\r
5216 } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
14b0e578
CS
5217 }\r
5218 break;\r
5219\r
b602265d 5220 case NODE_STRING:\r
14b0e578 5221 {\r
b602265d
DG
5222 StrNode* sn = STR_(node);\r
5223 int slen = (int )(sn->end - sn->s);\r
5224 /* int is_raw = NODE_STRING_IS_RAW(node); */\r
5225\r
5226 if (! NODE_STRING_IS_AMBIG(node)) {\r
5227 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);\r
5228 if (slen > 0) {\r
5229 add_char_opt_map(&opt->map, *(sn->s), enc);\r
5230 }\r
14b0e578
CS
5231 set_mml(&opt->len, slen, slen);\r
5232 }\r
5233 else {\r
5234 int max;\r
5235\r
b602265d
DG
5236 if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {\r
5237 int n = onigenc_strlen(enc, sn->s, sn->end);\r
5238 max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;\r
5239 }\r
5240 else {\r
5241 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);\r
5242 opt->exb.ignore_case = 1;\r
14b0e578 5243\r
b602265d
DG
5244 if (slen > 0) {\r
5245 r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,\r
5246 enc, env->case_fold_flag);\r
5247 if (r != 0) break;\r
5248 }\r
14b0e578 5249\r
b602265d
DG
5250 max = slen;\r
5251 }\r
14b0e578
CS
5252\r
5253 set_mml(&opt->len, slen, max);\r
5254 }\r
5255\r
5256 if (opt->exb.len == slen)\r
b602265d 5257 opt->exb.reach_end = 1;\r
14b0e578
CS
5258 }\r
5259 break;\r
5260\r
b602265d 5261 case NODE_CCLASS:\r
14b0e578 5262 {\r
b602265d
DG
5263 int z;\r
5264 CClassNode* cc = CCLASS_(node);\r
14b0e578 5265\r
b602265d 5266 /* no need to check ignore case. (set in setup_tree()) */\r
14b0e578
CS
5267\r
5268 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {\r
b602265d
DG
5269 OnigLen min = ONIGENC_MBC_MINLEN(enc);\r
5270 OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
14b0e578 5271\r
b602265d 5272 set_mml(&opt->len, min, max);\r
14b0e578
CS
5273 }\r
5274 else {\r
5275 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
5276 z = BITSET_AT(cc->bs, i);\r
b602265d
DG
5277 if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {\r
5278 add_char_opt_map(&opt->map, (UChar )i, enc);\r
14b0e578
CS
5279 }\r
5280 }\r
b602265d 5281 set_mml(&opt->len, 1, 1);\r
14b0e578
CS
5282 }\r
5283 }\r
5284 break;\r
5285\r
b602265d 5286 case NODE_CTYPE:\r
14b0e578 5287 {\r
b602265d
DG
5288 int min, max;\r
5289 int range;\r
14b0e578 5290\r
b602265d 5291 max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
14b0e578
CS
5292\r
5293 if (max == 1) {\r
5294 min = 1;\r
5295\r
b602265d
DG
5296 switch (CTYPE_(node)->ctype) {\r
5297 case CTYPE_ANYCHAR:\r
5298 break;\r
5299\r
5300 case ONIGENC_CTYPE_WORD:\r
5301 range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
5302 if (CTYPE_(node)->not != 0) {\r
5303 for (i = 0; i < range; i++) {\r
5304 if (! ONIGENC_IS_CODE_WORD(enc, i)) {\r
5305 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5306 }\r
5307 }\r
5308 for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
5309 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5310 }\r
5311 }\r
5312 else {\r
5313 for (i = 0; i < range; i++) {\r
5314 if (ONIGENC_IS_CODE_WORD(enc, i)) {\r
5315 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5316 }\r
5317 }\r
5318 }\r
5319 break;\r
5320 }\r
14b0e578
CS
5321 }\r
5322 else {\r
b602265d 5323 min = ONIGENC_MBC_MINLEN(enc);\r
14b0e578
CS
5324 }\r
5325 set_mml(&opt->len, min, max);\r
5326 }\r
5327 break;\r
5328\r
b602265d
DG
5329 case NODE_ANCHOR:\r
5330 switch (ANCHOR_(node)->type) {\r
14b0e578
CS
5331 case ANCHOR_BEGIN_BUF:\r
5332 case ANCHOR_BEGIN_POSITION:\r
5333 case ANCHOR_BEGIN_LINE:\r
5334 case ANCHOR_END_BUF:\r
5335 case ANCHOR_SEMI_END_BUF:\r
5336 case ANCHOR_END_LINE:\r
b602265d
DG
5337 case ANCHOR_PREC_READ_NOT:\r
5338 case ANCHOR_LOOK_BEHIND:\r
5339 add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);\r
14b0e578
CS
5340 break;\r
5341\r
5342 case ANCHOR_PREC_READ:\r
5343 {\r
b602265d
DG
5344 r = optimize_nodes(NODE_BODY(node), &xo, env);\r
5345 if (r == 0) {\r
5346 if (xo.exb.len > 0)\r
5347 copy_opt_exact(&opt->expr, &xo.exb);\r
5348 else if (xo.exm.len > 0)\r
5349 copy_opt_exact(&opt->expr, &xo.exm);\r
14b0e578 5350\r
b602265d 5351 opt->expr.reach_end = 0;\r
14b0e578 5352\r
b602265d
DG
5353 if (xo.map.value > 0)\r
5354 copy_opt_map(&opt->map, &xo.map);\r
5355 }\r
14b0e578
CS
5356 }\r
5357 break;\r
5358\r
14b0e578
CS
5359 case ANCHOR_LOOK_BEHIND_NOT:\r
5360 break;\r
5361 }\r
5362 break;\r
5363\r
b602265d
DG
5364 case NODE_BACKREF:\r
5365 if (! NODE_IS_CHECKER(node)) {\r
14b0e578 5366 int* backs;\r
b602265d
DG
5367 OnigLen min, max, tmin, tmax;\r
5368 MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);\r
5369 BackRefNode* br = BACKREF_(node);\r
14b0e578 5370\r
b602265d
DG
5371 if (NODE_IS_RECURSION(node)) {\r
5372 set_mml(&opt->len, 0, INFINITE_LEN);\r
5373 break;\r
14b0e578
CS
5374 }\r
5375 backs = BACKREFS_P(br);\r
b602265d
DG
5376 min = tree_min_len(mem_env[backs[0]].node, env->scan_env);\r
5377 max = tree_max_len(mem_env[backs[0]].node, env->scan_env);\r
14b0e578 5378 for (i = 1; i < br->back_num; i++) {\r
b602265d
DG
5379 tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);\r
5380 tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);\r
5381 if (min > tmin) min = tmin;\r
5382 if (max < tmax) max = tmax;\r
14b0e578 5383 }\r
b602265d 5384 set_mml(&opt->len, min, max);\r
14b0e578
CS
5385 }\r
5386 break;\r
5387\r
b602265d
DG
5388#ifdef USE_CALL\r
5389 case NODE_CALL:\r
5390 if (NODE_IS_RECURSION(node))\r
5391 set_mml(&opt->len, 0, INFINITE_LEN);\r
14b0e578
CS
5392 else {\r
5393 OnigOptionType save = env->options;\r
b602265d
DG
5394 env->options = ENCLOSURE_(NODE_BODY(node))->o.options;\r
5395 r = optimize_nodes(NODE_BODY(node), opt, env);\r
14b0e578
CS
5396 env->options = save;\r
5397 }\r
5398 break;\r
5399#endif\r
5400\r
b602265d 5401 case NODE_QUANT:\r
14b0e578 5402 {\r
b602265d
DG
5403 OnigLen min, max;\r
5404 QuantNode* qn = QUANT_(node);\r
5405\r
5406 r = optimize_nodes(NODE_BODY(node), &xo, env);\r
5407 if (r != 0) break;\r
5408\r
5409 if (qn->lower > 0) {\r
5410 copy_node_opt_info(opt, &xo);\r
5411 if (xo.exb.len > 0) {\r
5412 if (xo.exb.reach_end) {\r
5413 for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {\r
5414 int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);\r
5415 if (rc > 0) break;\r
5416 }\r
5417 if (i < qn->lower) opt->exb.reach_end = 0;\r
5418 }\r
5419 }\r
5420\r
5421 if (qn->lower != qn->upper) {\r
5422 opt->exb.reach_end = 0;\r
5423 opt->exm.reach_end = 0;\r
5424 }\r
5425 if (qn->lower > 1)\r
5426 opt->exm.reach_end = 0;\r
5427 }\r
5428\r
5429 if (IS_REPEAT_INFINITE(qn->upper)) {\r
5430 if (env->mmd.max == 0 &&\r
5431 NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {\r
5432 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))\r
5433 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);\r
5434 else\r
5435 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);\r
5436 }\r
5437\r
5438 max = (xo.len.max > 0 ? INFINITE_LEN : 0);\r
14b0e578
CS
5439 }\r
5440 else {\r
b602265d 5441 max = distance_multiply(xo.len.max, qn->upper);\r
14b0e578
CS
5442 }\r
5443\r
b602265d 5444 min = distance_multiply(xo.len.min, qn->lower);\r
14b0e578
CS
5445 set_mml(&opt->len, min, max);\r
5446 }\r
5447 break;\r
5448\r
b602265d
DG
5449 case NODE_ENCLOSURE:\r
5450 {\r
5451 EnclosureNode* en = ENCLOSURE_(node);\r
5452\r
5453 switch (en->type) {\r
5454 case ENCLOSURE_OPTION:\r
5455 {\r
5456 OnigOptionType save = env->options;\r
5457\r
5458 env->options = en->o.options;\r
5459 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5460 env->options = save;\r
5461 }\r
5462 break;\r
5463\r
5464 case ENCLOSURE_MEMORY:\r
5465#ifdef USE_CALL\r
5466 en->opt_count++;\r
5467 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {\r
5468 OnigLen min, max;\r
5469\r
5470 min = 0;\r
5471 max = INFINITE_LEN;\r
5472 if (NODE_IS_MIN_FIXED(node)) min = en->min_len;\r
5473 if (NODE_IS_MAX_FIXED(node)) max = en->max_len;\r
5474 set_mml(&opt->len, min, max);\r
5475 }\r
5476 else\r
5477#endif\r
5478 {\r
5479 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5480 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {\r
5481 if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))\r
5482 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);\r
5483 }\r
5484 }\r
5485 break;\r
5486\r
5487 case ENCLOSURE_STOP_BACKTRACK:\r
5488 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5489 break;\r
5490\r
5491 case ENCLOSURE_IF_ELSE:\r
5492 {\r
5493 OptEnv nenv;\r
5494\r
5495 copy_opt_env(&nenv, env);\r
5496 r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);\r
5497 if (r == 0) {\r
5498 add_mml(&nenv.mmd, &xo.len);\r
5499 concat_left_node_opt_info(enc, opt, &xo);\r
5500 if (IS_NOT_NULL(en->te.Then)) {\r
5501 r = optimize_nodes(en->te.Then, &xo, &nenv);\r
5502 if (r == 0) {\r
5503 concat_left_node_opt_info(enc, opt, &xo);\r
5504 }\r
5505 }\r
5506\r
5507 if (IS_NOT_NULL(en->te.Else)) {\r
5508 r = optimize_nodes(en->te.Else, &xo, env);\r
5509 if (r == 0)\r
5510 alt_merge_node_opt_info(opt, &xo, env);\r
5511 }\r
5512 }\r
5513 }\r
5514 break;\r
5515 }\r
5516 }\r
5517 break;\r
5518\r
5519 case NODE_GIMMICK:\r
14b0e578
CS
5520 break;\r
5521\r
5522 default:\r
5523#ifdef ONIG_DEBUG\r
b602265d 5524 fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
5525#endif\r
5526 r = ONIGERR_TYPE_BUG;\r
5527 break;\r
5528 }\r
5529\r
5530 return r;\r
5531}\r
5532\r
5533static int\r
b602265d 5534set_optimize_exact(regex_t* reg, OptExact* e)\r
14b0e578
CS
5535{\r
5536 int r;\r
5537\r
5538 if (e->len == 0) return 0;\r
5539\r
5540 if (e->ignore_case) {\r
5541 reg->exact = (UChar* )xmalloc(e->len);\r
5542 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
5543 xmemcpy(reg->exact, e->s, e->len);\r
5544 reg->exact_end = reg->exact + e->len;\r
b602265d 5545 reg->optimize = OPTIMIZE_EXACT_IC;\r
14b0e578
CS
5546 }\r
5547 else {\r
5548 int allow_reverse;\r
5549\r
b602265d 5550 reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);\r
14b0e578
CS
5551 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
5552 reg->exact_end = reg->exact + e->len;\r
5553 \r
5554 allow_reverse =\r
b602265d 5555 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);\r
14b0e578
CS
5556\r
5557 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {\r
5558 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,\r
b602265d
DG
5559 reg->map, &(reg->int_map));\r
5560 if (r != 0) return r;\r
14b0e578
CS
5561\r
5562 reg->optimize = (allow_reverse != 0\r
b602265d 5563 ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);\r
14b0e578
CS
5564 }\r
5565 else {\r
b602265d 5566 reg->optimize = OPTIMIZE_EXACT;\r
14b0e578
CS
5567 }\r
5568 }\r
5569\r
5570 reg->dmin = e->mmd.min;\r
5571 reg->dmax = e->mmd.max;\r
5572\r
b602265d
DG
5573 if (reg->dmin != INFINITE_LEN) {\r
5574 reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);\r
14b0e578
CS
5575 }\r
5576\r
5577 return 0;\r
5578}\r
5579\r
5580static void\r
b602265d 5581set_optimize_map(regex_t* reg, OptMap* m)\r
14b0e578
CS
5582{\r
5583 int i;\r
5584\r
5585 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
5586 reg->map[i] = m->map[i];\r
5587\r
b602265d 5588 reg->optimize = OPTIMIZE_MAP;\r
14b0e578
CS
5589 reg->dmin = m->mmd.min;\r
5590 reg->dmax = m->mmd.max;\r
5591\r
b602265d 5592 if (reg->dmin != INFINITE_LEN) {\r
14b0e578
CS
5593 reg->threshold_len = reg->dmin + 1;\r
5594 }\r
5595}\r
5596\r
5597static void\r
b602265d 5598set_sub_anchor(regex_t* reg, OptAnc* anc)\r
14b0e578 5599{\r
b602265d
DG
5600 reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE;\r
5601 reg->sub_anchor |= anc->right & ANCHOR_END_LINE;\r
14b0e578
CS
5602}\r
5603\r
b602265d 5604#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
14b0e578
CS
5605static void print_optimize_info(FILE* f, regex_t* reg);\r
5606#endif\r
5607\r
5608static int\r
5609set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)\r
5610{\r
14b0e578 5611 int r;\r
b602265d 5612 NodeOpt opt;\r
14b0e578
CS
5613 OptEnv env;\r
5614\r
5615 env.enc = reg->enc;\r
5616 env.options = reg->options;\r
5617 env.case_fold_flag = reg->case_fold_flag;\r
b602265d 5618 env.scan_env = scan_env;\r
14b0e578
CS
5619 clear_mml(&env.mmd);\r
5620\r
b602265d
DG
5621 r = optimize_nodes(node, &opt, &env);\r
5622 if (r != 0) return r;\r
5623\r
5624 reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |\r
5625 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |\r
5626 ANCHOR_LOOK_BEHIND);\r
14b0e578 5627\r
b602265d
DG
5628 if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)\r
5629 reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;\r
14b0e578 5630\r
b602265d
DG
5631 reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |\r
5632 ANCHOR_PREC_READ_NOT);\r
14b0e578
CS
5633\r
5634 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {\r
5635 reg->anchor_dmin = opt.len.min;\r
5636 reg->anchor_dmax = opt.len.max;\r
5637 }\r
5638\r
5639 if (opt.exb.len > 0 || opt.exm.len > 0) {\r
b602265d
DG
5640 select_opt_exact(reg->enc, &opt.exb, &opt.exm);\r
5641 if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {\r
14b0e578
CS
5642 goto set_map;\r
5643 }\r
5644 else {\r
b602265d 5645 r = set_optimize_exact(reg, &opt.exb);\r
14b0e578
CS
5646 set_sub_anchor(reg, &opt.exb.anc);\r
5647 }\r
5648 }\r
5649 else if (opt.map.value > 0) {\r
5650 set_map:\r
b602265d 5651 set_optimize_map(reg, &opt.map);\r
14b0e578
CS
5652 set_sub_anchor(reg, &opt.map.anc);\r
5653 }\r
5654 else {\r
b602265d 5655 reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;\r
14b0e578 5656 if (opt.len.max == 0)\r
b602265d 5657 reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;\r
14b0e578
CS
5658 }\r
5659\r
5660#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
5661 print_optimize_info(stderr, reg);\r
5662#endif\r
5663 return r;\r
5664}\r
5665\r
5666static void\r
5667clear_optimize_info(regex_t* reg)\r
5668{\r
b602265d 5669 reg->optimize = OPTIMIZE_NONE;\r
14b0e578
CS
5670 reg->anchor = 0;\r
5671 reg->anchor_dmin = 0;\r
5672 reg->anchor_dmax = 0;\r
5673 reg->sub_anchor = 0;\r
5674 reg->exact_end = (UChar* )NULL;\r
5675 reg->threshold_len = 0;\r
5676 if (IS_NOT_NULL(reg->exact)) {\r
5677 xfree(reg->exact);\r
5678 reg->exact = (UChar* )NULL;\r
5679 }\r
5680}\r
5681\r
5682#ifdef ONIG_DEBUG\r
5683\r
5684static void print_enc_string(FILE* fp, OnigEncoding enc,\r
b602265d 5685 const UChar *s, const UChar *end)\r
14b0e578
CS
5686{\r
5687 fprintf(fp, "\nPATTERN: /");\r
5688\r
5689 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
5690 const UChar *p;\r
5691 OnigCodePoint code;\r
5692\r
5693 p = s;\r
5694 while (p < end) {\r
5695 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
5696 if (code >= 0x80) {\r
b602265d 5697 fprintf(fp, " 0x%04x ", (int )code);\r
14b0e578
CS
5698 }\r
5699 else {\r
b602265d 5700 fputc((int )code, fp);\r
14b0e578
CS
5701 }\r
5702\r
5703 p += enclen(enc, p);\r
5704 }\r
5705 }\r
5706 else {\r
5707 while (s < end) {\r
5708 fputc((int )*s, fp);\r
5709 s++;\r
5710 }\r
5711 }\r
5712\r
5713 fprintf(fp, "/\n");\r
5714}\r
5715\r
b602265d
DG
5716#endif /* ONIG_DEBUG */\r
5717\r
5718#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
5719\r
14b0e578 5720static void\r
b602265d 5721print_distance_range(FILE* f, OnigLen a, OnigLen b)\r
14b0e578 5722{\r
b602265d 5723 if (a == INFINITE_LEN)\r
14b0e578
CS
5724 fputs("inf", f);\r
5725 else\r
5726 fprintf(f, "(%u)", a);\r
5727\r
5728 fputs("-", f);\r
5729\r
b602265d 5730 if (b == INFINITE_LEN)\r
14b0e578
CS
5731 fputs("inf", f);\r
5732 else\r
5733 fprintf(f, "(%u)", b);\r
5734}\r
5735\r
5736static void\r
5737print_anchor(FILE* f, int anchor)\r
5738{\r
5739 int q = 0;\r
5740\r
5741 fprintf(f, "[");\r
5742\r
5743 if (anchor & ANCHOR_BEGIN_BUF) {\r
5744 fprintf(f, "begin-buf");\r
5745 q = 1;\r
5746 }\r
5747 if (anchor & ANCHOR_BEGIN_LINE) {\r
5748 if (q) fprintf(f, ", ");\r
5749 q = 1;\r
5750 fprintf(f, "begin-line");\r
5751 }\r
5752 if (anchor & ANCHOR_BEGIN_POSITION) {\r
5753 if (q) fprintf(f, ", ");\r
5754 q = 1;\r
5755 fprintf(f, "begin-pos");\r
5756 }\r
5757 if (anchor & ANCHOR_END_BUF) {\r
5758 if (q) fprintf(f, ", ");\r
5759 q = 1;\r
5760 fprintf(f, "end-buf");\r
5761 }\r
5762 if (anchor & ANCHOR_SEMI_END_BUF) {\r
5763 if (q) fprintf(f, ", ");\r
5764 q = 1;\r
5765 fprintf(f, "semi-end-buf");\r
5766 }\r
5767 if (anchor & ANCHOR_END_LINE) {\r
5768 if (q) fprintf(f, ", ");\r
5769 q = 1;\r
5770 fprintf(f, "end-line");\r
5771 }\r
b602265d 5772 if (anchor & ANCHOR_ANYCHAR_INF) {\r
14b0e578
CS
5773 if (q) fprintf(f, ", ");\r
5774 q = 1;\r
b602265d 5775 fprintf(f, "anychar-inf");\r
14b0e578 5776 }\r
b602265d 5777 if (anchor & ANCHOR_ANYCHAR_INF_ML) {\r
14b0e578 5778 if (q) fprintf(f, ", ");\r
b602265d 5779 fprintf(f, "anychar-inf-ml");\r
14b0e578
CS
5780 }\r
5781\r
5782 fprintf(f, "]");\r
5783}\r
5784\r
5785static void\r
5786print_optimize_info(FILE* f, regex_t* reg)\r
5787{\r
5788 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",\r
5789 "EXACT_IC", "MAP" };\r
5790\r
5791 fprintf(f, "optimize: %s\n", on[reg->optimize]);\r
5792 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);\r
5793 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)\r
5794 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);\r
5795 fprintf(f, "\n");\r
5796\r
5797 if (reg->optimize) {\r
5798 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);\r
5799 fprintf(f, "\n");\r
5800 }\r
5801 fprintf(f, "\n");\r
5802\r
5803 if (reg->exact) {\r
5804 UChar *p;\r
5805 fprintf(f, "exact: [");\r
5806 for (p = reg->exact; p < reg->exact_end; p++) {\r
5807 fputc(*p, f);\r
5808 }\r
b602265d 5809 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));\r
14b0e578 5810 }\r
b602265d 5811 else if (reg->optimize & OPTIMIZE_MAP) {\r
14b0e578
CS
5812 int c, i, n = 0;\r
5813\r
5814 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
5815 if (reg->map[i]) n++;\r
5816\r
5817 fprintf(f, "map: n=%d\n", n);\r
5818 if (n > 0) {\r
5819 c = 0;\r
5820 fputc('[', f);\r
5821 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
b602265d 5822 if (reg->map[i] != 0) {\r
14b0e578
CS
5823 if (c > 0) fputs(", ", f);\r
5824 c++;\r
5825 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&\r
5826 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))\r
5827 fputc(i, f);\r
5828 else\r
5829 fprintf(f, "%d", i);\r
5830 }\r
5831 }\r
5832 fprintf(f, "]\n");\r
5833 }\r
5834 }\r
5835}\r
b602265d
DG
5836#endif\r
5837\r
5838\r
5839extern RegexExt*\r
5840onig_get_regex_ext(regex_t* reg)\r
5841{\r
5842 if (IS_NULL(REG_EXTP(reg))) {\r
5843 RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));\r
5844 if (IS_NULL(ext)) return 0;\r
5845\r
5846 ext->pattern = 0;\r
5847 ext->pattern_end = 0;\r
5848#ifdef USE_CALLOUT\r
5849 ext->tag_table = 0;\r
5850 ext->callout_num = 0;\r
5851 ext->callout_list_alloc = 0;\r
5852 ext->callout_list = 0;\r
5853#endif\r
5854\r
5855 REG_EXTPL(reg) = (void* )ext;\r
5856 }\r
5857\r
5858 return REG_EXTP(reg);\r
5859}\r
5860\r
5861static void\r
5862free_regex_ext(RegexExt* ext)\r
5863{\r
5864 if (IS_NOT_NULL(ext)) {\r
5865 if (IS_NOT_NULL(ext->pattern))\r
5866 xfree((void* )ext->pattern);\r
5867\r
5868#ifdef USE_CALLOUT\r
5869 if (IS_NOT_NULL(ext->tag_table))\r
5870 onig_callout_tag_table_free(ext->tag_table);\r
5871\r
5872 if (IS_NOT_NULL(ext->callout_list))\r
5873 onig_free_reg_callout_list(ext->callout_num, ext->callout_list);\r
5874#endif\r
5875\r
5876 xfree(ext);\r
5877 }\r
5878}\r
5879\r
5880extern int\r
5881onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)\r
5882{\r
5883 RegexExt* ext;\r
5884 UChar* s;\r
5885\r
5886 ext = onig_get_regex_ext(reg);\r
5887 CHECK_NULL_RETURN_MEMERR(ext);\r
5888\r
5889 s = onigenc_strdup(reg->enc, pattern, pattern_end);\r
5890 CHECK_NULL_RETURN_MEMERR(s);\r
5891\r
5892 ext->pattern = s;\r
5893 ext->pattern_end = s + (pattern_end - pattern);\r
5894\r
5895 return ONIG_NORMAL;\r
5896}\r
14b0e578
CS
5897\r
5898\r
5899extern void\r
5900onig_free_body(regex_t* reg)\r
5901{\r
5902 if (IS_NOT_NULL(reg)) {\r
5903 if (IS_NOT_NULL(reg->p)) xfree(reg->p);\r
5904 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);\r
5905 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);\r
5906 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);\r
5907 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);\r
b602265d
DG
5908 if (IS_NOT_NULL(REG_EXTP(reg))) {\r
5909 free_regex_ext(REG_EXTP(reg));\r
5910 REG_EXTPL(reg) = 0;\r
5911 }\r
14b0e578 5912\r
14b0e578 5913 onig_names_free(reg);\r
14b0e578
CS
5914 }\r
5915}\r
5916\r
5917extern void\r
5918onig_free(regex_t* reg)\r
5919{\r
5920 if (IS_NOT_NULL(reg)) {\r
5921 onig_free_body(reg);\r
5922 xfree(reg);\r
5923 }\r
5924}\r
5925\r
5926#define REGEX_TRANSFER(to,from) do {\\r
14b0e578
CS
5927 onig_free_body(to);\\r
5928 xmemcpy(to, from, sizeof(regex_t));\\r
5929 xfree(from);\\r
5930} while (0)\r
5931\r
5932extern void\r
5933onig_transfer(regex_t* to, regex_t* from)\r
5934{\r
14b0e578 5935 REGEX_TRANSFER(to, from);\r
14b0e578
CS
5936}\r
5937\r
14b0e578 5938\r
b602265d 5939#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
5940static void print_tree P_((FILE* f, Node* node));\r
5941#endif\r
5942\r
5943extern int\r
5944onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
b602265d 5945 OnigErrorInfo* einfo)\r
14b0e578
CS
5946{\r
5947#define COMPILE_INIT_SIZE 20\r
5948\r
5949 int r, init_size;\r
5950 Node* root;\r
5951 ScanEnv scan_env;\r
b602265d 5952#ifdef USE_CALL\r
14b0e578
CS
5953 UnsetAddrList uslist;\r
5954#endif\r
5955\r
b602265d
DG
5956 root = 0;\r
5957 if (IS_NOT_NULL(einfo)) {\r
5958 einfo->enc = reg->enc;\r
5959 einfo->par = (UChar* )NULL;\r
5960 }\r
14b0e578
CS
5961\r
5962#ifdef ONIG_DEBUG\r
5963 print_enc_string(stderr, reg->enc, pattern, pattern_end);\r
5964#endif\r
5965\r
5966 if (reg->alloc == 0) {\r
b602265d 5967 init_size = (int )(pattern_end - pattern) * 2;\r
14b0e578 5968 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;\r
b602265d 5969 r = BB_INIT(reg, init_size);\r
14b0e578
CS
5970 if (r != 0) goto end;\r
5971 }\r
5972 else\r
5973 reg->used = 0;\r
5974\r
5975 reg->num_mem = 0;\r
5976 reg->num_repeat = 0;\r
5977 reg->num_null_check = 0;\r
5978 reg->repeat_range_alloc = 0;\r
5979 reg->repeat_range = (OnigRepeatRange* )NULL;\r
14b0e578 5980\r
b602265d
DG
5981 r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);\r
5982 if (r != 0) goto err;\r
14b0e578 5983\r
14b0e578
CS
5984 /* mixed use named group and no-named group */\r
5985 if (scan_env.num_named > 0 &&\r
5986 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
b602265d 5987 ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
14b0e578
CS
5988 if (scan_env.num_named != scan_env.num_mem)\r
5989 r = disable_noname_group_capture(&root, reg, &scan_env);\r
5990 else\r
5991 r = numbered_ref_check(root);\r
5992\r
5993 if (r != 0) goto err;\r
5994 }\r
14b0e578 5995\r
b602265d
DG
5996 r = check_backrefs(root, &scan_env);\r
5997 if (r != 0) goto err;\r
5998\r
5999#ifdef USE_CALL\r
14b0e578
CS
6000 if (scan_env.num_call > 0) {\r
6001 r = unset_addr_list_init(&uslist, scan_env.num_call);\r
6002 if (r != 0) goto err;\r
6003 scan_env.unset_addr_list = &uslist;\r
b602265d
DG
6004 r = setup_call(root, &scan_env, 0);\r
6005 if (r != 0) goto err_unset;\r
6006 r = setup_call2(root);\r
14b0e578 6007 if (r != 0) goto err_unset;\r
b602265d 6008 r = recursive_call_check_trav(root, &scan_env, 0);\r
14b0e578 6009 if (r < 0) goto err_unset;\r
b602265d 6010 r = infinite_recursive_call_check_trav(root, &scan_env);\r
14b0e578
CS
6011 if (r != 0) goto err_unset;\r
6012\r
b602265d 6013 setup_called_state(root, 0);\r
14b0e578 6014 }\r
b602265d
DG
6015\r
6016 reg->num_call = scan_env.num_call;\r
14b0e578
CS
6017#endif\r
6018\r
6019 r = setup_tree(root, reg, 0, &scan_env);\r
6020 if (r != 0) goto err_unset;\r
6021\r
b602265d 6022#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
6023 print_tree(stderr, root);\r
6024#endif\r
6025\r
6026 reg->capture_history = scan_env.capture_history;\r
6027 reg->bt_mem_start = scan_env.bt_mem_start;\r
6028 reg->bt_mem_start |= reg->capture_history;\r
6029 if (IS_FIND_CONDITION(reg->options))\r
b602265d 6030 MEM_STATUS_ON_ALL(reg->bt_mem_end);\r
14b0e578
CS
6031 else {\r
6032 reg->bt_mem_end = scan_env.bt_mem_end;\r
6033 reg->bt_mem_end |= reg->capture_history;\r
6034 }\r
b602265d 6035 reg->bt_mem_start |= reg->bt_mem_end;\r
14b0e578
CS
6036\r
6037 clear_optimize_info(reg);\r
6038#ifndef ONIG_DONT_OPTIMIZE\r
6039 r = set_optimize_info_from_tree(root, reg, &scan_env);\r
6040 if (r != 0) goto err_unset;\r
6041#endif\r
6042\r
b602265d
DG
6043 if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {\r
6044 xfree(scan_env.mem_env_dynamic);\r
6045 scan_env.mem_env_dynamic = (MemEnv* )NULL;\r
14b0e578
CS
6046 }\r
6047\r
b602265d 6048 r = compile_tree(root, reg, &scan_env);\r
14b0e578 6049 if (r == 0) {\r
b602265d
DG
6050 if (scan_env.keep_num > 0) {\r
6051 r = add_opcode(reg, OP_UPDATE_VAR);\r
6052 if (r != 0) goto err;\r
6053 r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);\r
6054 if (r != 0) goto err;\r
6055 r = add_mem_num(reg, 0 /* not used */);\r
6056 if (r != 0) goto err;\r
6057 }\r
6058\r
14b0e578 6059 r = add_opcode(reg, OP_END);\r
b602265d 6060#ifdef USE_CALL\r
14b0e578 6061 if (scan_env.num_call > 0) {\r
b602265d 6062 r = fix_unset_addr_list(&uslist, reg);\r
14b0e578 6063 unset_addr_list_end(&uslist);\r
b602265d 6064 if (r != 0) goto err;\r
14b0e578
CS
6065 }\r
6066#endif\r
6067\r
b602265d
DG
6068 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)\r
6069#ifdef USE_CALLOUT\r
6070 || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)\r
6071#endif\r
6072 )\r
14b0e578
CS
6073 reg->stack_pop_level = STACK_POP_LEVEL_ALL;\r
6074 else {\r
6075 if (reg->bt_mem_start != 0)\r
b602265d 6076 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;\r
14b0e578 6077 else\r
b602265d 6078 reg->stack_pop_level = STACK_POP_LEVEL_FREE;\r
14b0e578
CS
6079 }\r
6080 }\r
b602265d 6081#ifdef USE_CALL\r
14b0e578
CS
6082 else if (scan_env.num_call > 0) {\r
6083 unset_addr_list_end(&uslist);\r
6084 }\r
6085#endif\r
6086 onig_node_free(root);\r
6087\r
6088#ifdef ONIG_DEBUG_COMPILE\r
14b0e578 6089 onig_print_names(stderr, reg);\r
b602265d 6090 onig_print_compiled_byte_code_list(stderr, reg);\r
14b0e578
CS
6091#endif\r
6092\r
6093 end:\r
14b0e578
CS
6094 return r;\r
6095\r
6096 err_unset:\r
b602265d 6097#ifdef USE_CALL\r
14b0e578
CS
6098 if (scan_env.num_call > 0) {\r
6099 unset_addr_list_end(&uslist);\r
6100 }\r
6101#endif\r
6102 err:\r
6103 if (IS_NOT_NULL(scan_env.error)) {\r
6104 if (IS_NOT_NULL(einfo)) {\r
14b0e578
CS
6105 einfo->par = scan_env.error;\r
6106 einfo->par_end = scan_env.error_end;\r
6107 }\r
6108 }\r
6109\r
6110 onig_node_free(root);\r
b602265d
DG
6111 if (IS_NOT_NULL(scan_env.mem_env_dynamic))\r
6112 xfree(scan_env.mem_env_dynamic);\r
14b0e578
CS
6113 return r;\r
6114}\r
6115\r
b602265d
DG
6116\r
6117static int onig_inited = 0;\r
6118\r
14b0e578 6119extern int\r
b602265d
DG
6120onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,\r
6121 OnigEncoding enc, OnigSyntaxType* syntax)\r
14b0e578
CS
6122{\r
6123 int r;\r
14b0e578 6124\r
b602265d 6125 xmemset(reg, 0, sizeof(*reg));\r
14b0e578 6126\r
b602265d
DG
6127 if (onig_inited == 0) {\r
6128#if 0\r
6129 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;\r
6130#else\r
6131 r = onig_initialize(&enc, 1);\r
6132 if (r != 0)\r
6133 return ONIGERR_FAIL_TO_INITIALIZE;\r
14b0e578 6134\r
b602265d
DG
6135 onig_warning("You didn't call onig_initialize() explicitly");\r
6136#endif\r
6137 }\r
14b0e578
CS
6138\r
6139 if (IS_NULL(reg))\r
6140 return ONIGERR_INVALID_ARGUMENT;\r
6141\r
6142 if (ONIGENC_IS_UNDEF(enc))\r
6143 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;\r
6144\r
6145 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))\r
6146 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {\r
6147 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;\r
6148 }\r
6149\r
14b0e578
CS
6150 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {\r
6151 option |= syntax->options;\r
6152 option &= ~ONIG_OPTION_SINGLELINE;\r
6153 }\r
6154 else\r
6155 option |= syntax->options;\r
6156\r
6157 (reg)->enc = enc;\r
6158 (reg)->options = option;\r
6159 (reg)->syntax = syntax;\r
6160 (reg)->optimize = 0;\r
6161 (reg)->exact = (UChar* )NULL;\r
6162 (reg)->int_map = (int* )NULL;\r
6163 (reg)->int_map_backward = (int* )NULL;\r
b602265d 6164 REG_EXTPL(reg) = NULL;\r
14b0e578
CS
6165\r
6166 (reg)->p = (UChar* )NULL;\r
6167 (reg)->alloc = 0;\r
6168 (reg)->used = 0;\r
6169 (reg)->name_table = (void* )NULL;\r
6170\r
6171 (reg)->case_fold_flag = case_fold_flag;\r
6172 return 0;\r
6173}\r
6174\r
6175extern int\r
b602265d
DG
6176onig_new_without_alloc(regex_t* reg,\r
6177 const UChar* pattern, const UChar* pattern_end,\r
6178 OnigOptionType option, OnigEncoding enc,\r
6179 OnigSyntaxType* syntax, OnigErrorInfo* einfo)\r
14b0e578
CS
6180{\r
6181 int r;\r
6182\r
6183 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
b602265d 6184 if (r != 0) return r;\r
14b0e578
CS
6185\r
6186 r = onig_compile(reg, pattern, pattern_end, einfo);\r
6187 return r;\r
6188}\r
6189\r
6190extern int\r
6191onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,\r
b602265d
DG
6192 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
6193 OnigErrorInfo* einfo)\r
14b0e578
CS
6194{\r
6195 int r;\r
6196\r
6197 *reg = (regex_t* )xmalloc(sizeof(regex_t));\r
6198 if (IS_NULL(*reg)) return ONIGERR_MEMORY;\r
6199\r
6200 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
b602265d 6201 if (r != 0) goto err;\r
14b0e578
CS
6202\r
6203 r = onig_compile(*reg, pattern, pattern_end, einfo);\r
b602265d 6204 if (r != 0) {\r
14b0e578
CS
6205 err:\r
6206 onig_free(*reg);\r
6207 *reg = NULL;\r
6208 }\r
6209 return r;\r
6210}\r
6211\r
14b0e578 6212extern int\r
b602265d 6213onig_initialize(OnigEncoding encodings[], int n)\r
14b0e578 6214{\r
b602265d
DG
6215 int i;\r
6216 int r;\r
6217\r
14b0e578
CS
6218 if (onig_inited != 0)\r
6219 return 0;\r
6220\r
b602265d 6221 onigenc_init();\r
14b0e578
CS
6222\r
6223 onig_inited = 1;\r
6224\r
b602265d
DG
6225 for (i = 0; i < n; i++) {\r
6226 OnigEncoding enc = encodings[i];\r
6227 r = onig_initialize_encoding(enc);\r
6228 if (r != 0)\r
6229 return r;\r
6230 }\r
14b0e578 6231\r
b602265d 6232 return ONIG_NORMAL;\r
14b0e578
CS
6233}\r
6234\r
b602265d
DG
6235typedef struct EndCallListItem {\r
6236 struct EndCallListItem* next;\r
6237 void (*func)(void);\r
6238} EndCallListItemType;\r
14b0e578 6239\r
b602265d 6240static EndCallListItemType* EndCallTop;\r
14b0e578
CS
6241\r
6242extern void onig_add_end_call(void (*func)(void))\r
6243{\r
b602265d 6244 EndCallListItemType* item;\r
14b0e578 6245\r
b602265d 6246 item = (EndCallListItemType* )xmalloc(sizeof(*item));\r
14b0e578
CS
6247 if (item == 0) return ;\r
6248\r
6249 item->next = EndCallTop;\r
6250 item->func = func;\r
6251\r
6252 EndCallTop = item;\r
6253}\r
6254\r
6255static void\r
6256exec_end_call_list(void)\r
6257{\r
b602265d 6258 EndCallListItemType* prev;\r
14b0e578
CS
6259 void (*func)(void);\r
6260\r
6261 while (EndCallTop != 0) {\r
6262 func = EndCallTop->func;\r
6263 (*func)();\r
6264\r
6265 prev = EndCallTop;\r
6266 EndCallTop = EndCallTop->next;\r
6267 xfree(prev);\r
6268 }\r
6269}\r
6270\r
6271extern int\r
6272onig_end(void)\r
6273{\r
14b0e578
CS
6274 exec_end_call_list();\r
6275\r
b602265d
DG
6276#ifdef USE_CALLOUT\r
6277 onig_global_callout_names_free();\r
14b0e578
CS
6278#endif\r
6279\r
b602265d 6280 onigenc_end();\r
14b0e578
CS
6281\r
6282 onig_inited = 0;\r
6283\r
14b0e578
CS
6284 return 0;\r
6285}\r
6286\r
6287extern int\r
6288onig_is_in_code_range(const UChar* p, OnigCodePoint code)\r
6289{\r
6290 OnigCodePoint n, *data;\r
6291 OnigCodePoint low, high, x;\r
6292\r
6293 GET_CODE_POINT(n, p);\r
6294 data = (OnigCodePoint* )p;\r
6295 data++;\r
6296\r
6297 for (low = 0, high = n; low < high; ) {\r
6298 x = (low + high) >> 1;\r
6299 if (code > data[x * 2 + 1])\r
6300 low = x + 1;\r
6301 else\r
6302 high = x;\r
6303 }\r
6304\r
6305 return ((low < n && code >= data[low * 2]) ? 1 : 0);\r
6306}\r
6307\r
6308extern int\r
b602265d 6309onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)\r
14b0e578
CS
6310{\r
6311 int found;\r
b602265d 6312 CClassNode* cc = (CClassNode* )cc_arg;\r
14b0e578
CS
6313\r
6314 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {\r
6315 if (IS_NULL(cc->mbuf)) {\r
6316 found = 0;\r
6317 }\r
6318 else {\r
6319 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);\r
6320 }\r
6321 }\r
6322 else {\r
6323 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);\r
6324 }\r
6325\r
6326 if (IS_NCCLASS_NOT(cc))\r
6327 return !found;\r
6328 else\r
6329 return found;\r
6330}\r
6331\r
6332extern int\r
6333onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)\r
6334{\r
6335 int len;\r
6336\r
6337 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
6338 len = 2;\r
6339 }\r
6340 else {\r
6341 len = ONIGENC_CODE_TO_MBCLEN(enc, code);\r
6342 }\r
6343 return onig_is_code_in_cc_len(len, code, cc);\r
6344}\r
6345\r
6346\r
b602265d 6347#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
6348\r
6349static void\r
6350p_string(FILE* f, int len, UChar* s)\r
6351{\r
6352 fputs(":", f);\r
6353 while (len-- > 0) { fputc(*s++, f); }\r
6354}\r
6355\r
6356static void\r
b602265d 6357Indent(FILE* f, int indent)\r
14b0e578 6358{\r
b602265d
DG
6359 int i;\r
6360 for (i = 0; i < indent; i++) putc(' ', f);\r
14b0e578
CS
6361}\r
6362\r
6363static void\r
6364print_indent_tree(FILE* f, Node* node, int indent)\r
6365{\r
b602265d
DG
6366 int i;\r
6367 NodeType type;\r
14b0e578 6368 UChar* p;\r
b602265d 6369 int add = 3;\r
14b0e578
CS
6370\r
6371 Indent(f, indent);\r
6372 if (IS_NULL(node)) {\r
6373 fprintf(f, "ERROR: null node!!!\n");\r
6374 exit (0);\r
6375 }\r
6376\r
b602265d 6377 type = NODE_TYPE(node);\r
14b0e578 6378 switch (type) {\r
b602265d
DG
6379 case NODE_LIST:\r
6380 case NODE_ALT:\r
6381 if (type == NODE_LIST)\r
6382 fprintf(f, "<list:%p>\n", node);\r
14b0e578 6383 else\r
b602265d 6384 fprintf(f, "<alt:%p>\n", node);\r
14b0e578 6385\r
b602265d
DG
6386 print_indent_tree(f, NODE_CAR(node), indent + add);\r
6387 while (IS_NOT_NULL(node = NODE_CDR(node))) {\r
6388 if (NODE_TYPE(node) != type) {\r
6389 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));\r
6390 exit(0);\r
14b0e578 6391 }\r
b602265d 6392 print_indent_tree(f, NODE_CAR(node), indent + add);\r
14b0e578
CS
6393 }\r
6394 break;\r
6395\r
b602265d
DG
6396 case NODE_STRING:\r
6397 fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);\r
6398 for (p = STR_(node)->s; p < STR_(node)->end; p++) {\r
14b0e578 6399 if (*p >= 0x20 && *p < 0x7f)\r
b602265d 6400 fputc(*p, f);\r
14b0e578 6401 else {\r
b602265d 6402 fprintf(f, " 0x%02x", *p);\r
14b0e578
CS
6403 }\r
6404 }\r
6405 break;\r
6406\r
b602265d
DG
6407 case NODE_CCLASS:\r
6408 fprintf(f, "<cclass:%p>", node);\r
6409 if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);\r
6410 if (CCLASS_(node)->mbuf) {\r
6411 BBuf* bbuf = CCLASS_(node)->mbuf;\r
14b0e578 6412 for (i = 0; i < bbuf->used; i++) {\r
b602265d
DG
6413 if (i > 0) fprintf(f, ",");\r
6414 fprintf(f, "%0x", bbuf->p[i]);\r
14b0e578
CS
6415 }\r
6416 }\r
6417 break;\r
6418\r
b602265d
DG
6419 case NODE_CTYPE:\r
6420 fprintf(f, "<ctype:%p> ", node);\r
6421 switch (CTYPE_(node)->ctype) {\r
6422 case CTYPE_ANYCHAR:\r
6423 fprintf(f, "<anychar:%p>", node);\r
6424 break;\r
6425\r
14b0e578 6426 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
6427 if (CTYPE_(node)->not != 0)\r
6428 fputs("not word", f);\r
14b0e578 6429 else\r
b602265d
DG
6430 fputs("word", f);\r
6431\r
6432 if (CTYPE_(node)->ascii_mode != 0)\r
6433 fputs(" (ascii)", f);\r
6434\r
14b0e578
CS
6435 break;\r
6436\r
6437 default:\r
6438 fprintf(f, "ERROR: undefined ctype.\n");\r
6439 exit(0);\r
6440 }\r
6441 break;\r
6442\r
b602265d
DG
6443 case NODE_ANCHOR:\r
6444 fprintf(f, "<anchor:%p> ", node);\r
6445 switch (ANCHOR_(node)->type) {\r
6446 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;\r
6447 case ANCHOR_END_BUF: fputs("end buf", f); break;\r
6448 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;\r
6449 case ANCHOR_END_LINE: fputs("end line", f); break;\r
6450 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
6451 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;\r
14b0e578 6452\r
b602265d
DG
6453 case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;\r
6454 case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;\r
14b0e578 6455#ifdef USE_WORD_BEGIN_END\r
b602265d
DG
6456 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;\r
6457 case ANCHOR_WORD_END: fputs("word end", f); break;\r
14b0e578 6458#endif\r
b602265d
DG
6459 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
6460 fputs("extended-grapheme-cluster boundary", f); break;\r
6461 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
6462 fputs("no-extended-grapheme-cluster boundary", f); break;\r
6463 case ANCHOR_PREC_READ:\r
6464 fprintf(f, "prec read\n");\r
6465 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6466 break;\r
6467 case ANCHOR_PREC_READ_NOT:\r
6468 fprintf(f, "prec read not\n");\r
6469 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6470 break;\r
6471 case ANCHOR_LOOK_BEHIND:\r
6472 fprintf(f, "look behind\n");\r
6473 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6474 break;\r
6475 case ANCHOR_LOOK_BEHIND_NOT:\r
6476 fprintf(f, "look behind not\n");\r
6477 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6478 break;\r
14b0e578
CS
6479\r
6480 default:\r
6481 fprintf(f, "ERROR: undefined anchor type.\n");\r
6482 break;\r
6483 }\r
6484 break;\r
6485\r
b602265d 6486 case NODE_BACKREF:\r
14b0e578
CS
6487 {\r
6488 int* p;\r
b602265d 6489 BackRefNode* br = BACKREF_(node);\r
14b0e578 6490 p = BACKREFS_P(br);\r
b602265d 6491 fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);\r
14b0e578 6492 for (i = 0; i < br->back_num; i++) {\r
b602265d
DG
6493 if (i > 0) fputs(", ", f);\r
6494 fprintf(f, "%d", p[i]);\r
14b0e578
CS
6495 }\r
6496 }\r
6497 break;\r
6498\r
b602265d
DG
6499#ifdef USE_CALL\r
6500 case NODE_CALL:\r
14b0e578 6501 {\r
b602265d
DG
6502 CallNode* cn = CALL_(node);\r
6503 fprintf(f, "<call:%p>", node);\r
14b0e578
CS
6504 p_string(f, cn->name_end - cn->name, cn->name);\r
6505 }\r
6506 break;\r
6507#endif\r
6508\r
b602265d
DG
6509 case NODE_QUANT:\r
6510 fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,\r
6511 QUANT_(node)->lower, QUANT_(node)->upper,\r
6512 (QUANT_(node)->greedy ? "" : "?"));\r
6513 print_indent_tree(f, NODE_BODY(node), indent + add);\r
14b0e578
CS
6514 break;\r
6515\r
b602265d
DG
6516 case NODE_ENCLOSURE:\r
6517 fprintf(f, "<enclosure:%p> ", node);\r
6518 switch (ENCLOSURE_(node)->type) {\r
6519 case ENCLOSURE_OPTION:\r
6520 fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);\r
14b0e578 6521 break;\r
b602265d
DG
6522 case ENCLOSURE_MEMORY:\r
6523 fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);\r
14b0e578 6524 break;\r
b602265d 6525 case ENCLOSURE_STOP_BACKTRACK:\r
14b0e578
CS
6526 fprintf(f, "stop-bt");\r
6527 break;\r
6528\r
6529 default:\r
6530 break;\r
6531 }\r
6532 fprintf(f, "\n");\r
b602265d
DG
6533 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6534 break;\r
6535\r
6536 case NODE_GIMMICK:\r
6537 fprintf(f, "<gimmick:%p> ", node);\r
6538 switch (GIMMICK_(node)->type) {\r
6539 case GIMMICK_FAIL:\r
6540 fprintf(f, "fail");\r
6541 break;\r
6542 case GIMMICK_KEEP:\r
6543 fprintf(f, "keep:%d", GIMMICK_(node)->id);\r
6544 break;\r
6545 case GIMMICK_SAVE:\r
6546 fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
6547 break;\r
6548 case GIMMICK_UPDATE_VAR:\r
6549 fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
6550 break;\r
6551#ifdef USE_CALLOUT\r
6552 case GIMMICK_CALLOUT:\r
6553 switch (GIMMICK_(node)->detail_type) {\r
6554 case ONIG_CALLOUT_OF_CONTENTS:\r
6555 fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);\r
6556 break;\r
6557 case ONIG_CALLOUT_OF_NAME:\r
6558 fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);\r
6559 break;\r
6560 }\r
6561#endif\r
6562 }\r
14b0e578
CS
6563 break;\r
6564\r
6565 default:\r
b602265d 6566 fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
6567 break;\r
6568 }\r
6569\r
b602265d
DG
6570 if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&\r
6571 type != NODE_ENCLOSURE)\r
14b0e578
CS
6572 fprintf(f, "\n");\r
6573 fflush(f);\r
6574}\r
14b0e578 6575\r
14b0e578
CS
6576static void\r
6577print_tree(FILE* f, Node* node)\r
6578{\r
6579 print_indent_tree(f, node, 0);\r
6580}\r
6581#endif\r