]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regcomp.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b602265d 5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
14b0e578
CS
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regparse.h"\r
31\r
32OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;\r
33\r
b602265d
DG
34#if 0\r
35typedef struct {\r
36 int n;\r
37 int alloc;\r
38 int* v;\r
39} int_stack;\r
40\r
41static int\r
42make_int_stack(int_stack** rs, int init_size)\r
43{\r
44 int_stack* s;\r
45 int* v;\r
46\r
47 *rs = 0;\r
48\r
49 s = xmalloc(sizeof(*s));\r
50 if (IS_NULL(s)) return ONIGERR_MEMORY;\r
51\r
52 v = (int* )xmalloc(sizeof(int) * init_size);\r
53 if (IS_NULL(v)) {\r
54 xfree(s);\r
55 return ONIGERR_MEMORY;\r
56 }\r
57\r
58 s->n = 0;\r
59 s->alloc = init_size;\r
60 s->v = v;\r
61\r
62 *rs = s;\r
63 return ONIG_NORMAL;\r
64}\r
65\r
66static void\r
67free_int_stack(int_stack* s)\r
68{\r
69 if (IS_NOT_NULL(s)) {\r
70 if (IS_NOT_NULL(s->v))\r
71 xfree(s->v);\r
72 xfree(s);\r
73 }\r
74}\r
75\r
76static int\r
77int_stack_push(int_stack* s, int v)\r
78{\r
79 if (s->n >= s->alloc) {\r
80 int new_size = s->alloc * 2;\r
81 int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);\r
82 if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
83\r
84 s->alloc = new_size;\r
85 s->v = nv;\r
86 }\r
87\r
88 s->v[s->n] = v;\r
89 s->n++;\r
90 return ONIG_NORMAL;\r
91}\r
92\r
93static int\r
94int_stack_pop(int_stack* s)\r
95{\r
96 int v;\r
97\r
98#ifdef ONIG_DEBUG\r
99 if (s->n <= 0) {\r
100 fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);\r
101 return 0;\r
102 }\r
103#endif\r
104\r
105 v = s->v[s->n];\r
106 s->n--;\r
107 return v;\r
108}\r
109#endif\r
110\r
14b0e578
CS
111extern OnigCaseFoldType\r
112onig_get_default_case_fold_flag(void)\r
113{\r
114 return OnigDefaultCaseFoldFlag;\r
115}\r
116\r
117extern int\r
118onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)\r
119{\r
120 OnigDefaultCaseFoldFlag = case_fold_flag;\r
121 return 0;\r
122}\r
123\r
b602265d
DG
124static int\r
125int_multiply_cmp(int x, int y, int v)\r
14b0e578 126{\r
b602265d 127 if (x == 0 || y == 0) return -1;\r
14b0e578 128\r
b602265d
DG
129 if (x < INT_MAX / y) {\r
130 int xy = x * y;\r
131 if (xy > v) return 1;\r
132 else {\r
133 if (xy == v) return 0;\r
134 else return -1;\r
135 }\r
14b0e578 136 }\r
b602265d
DG
137 else\r
138 return 1;\r
14b0e578
CS
139}\r
140\r
b602265d
DG
141\r
142#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
143static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];\r
144#endif\r
145\r
14b0e578
CS
146static void\r
147swap_node(Node* a, Node* b)\r
148{\r
149 Node c;\r
14b0e578 150\r
b602265d
DG
151 c = *a; *a = *b; *b = c;\r
152\r
153 if (NODE_TYPE(a) == NODE_STRING) {\r
154 StrNode* sn = STR_(a);\r
14b0e578 155 if (sn->capa == 0) {\r
b602265d 156 int len = (int )(sn->end - sn->s);\r
14b0e578
CS
157 sn->s = sn->buf;\r
158 sn->end = sn->s + len;\r
159 }\r
160 }\r
161\r
b602265d
DG
162 if (NODE_TYPE(b) == NODE_STRING) {\r
163 StrNode* sn = STR_(b);\r
14b0e578 164 if (sn->capa == 0) {\r
b602265d 165 int len = (int )(sn->end - sn->s);\r
14b0e578
CS
166 sn->s = sn->buf;\r
167 sn->end = sn->s + len;\r
168 }\r
169 }\r
170}\r
171\r
b602265d
DG
172static OnigLen\r
173distance_add(OnigLen d1, OnigLen d2)\r
14b0e578 174{\r
b602265d
DG
175 if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)\r
176 return INFINITE_LEN;\r
14b0e578 177 else {\r
b602265d
DG
178 if (d1 <= INFINITE_LEN - d2) return d1 + d2;\r
179 else return INFINITE_LEN;\r
14b0e578
CS
180 }\r
181}\r
182\r
b602265d
DG
183static OnigLen\r
184distance_multiply(OnigLen d, int m)\r
14b0e578
CS
185{\r
186 if (m == 0) return 0;\r
187\r
b602265d 188 if (d < INFINITE_LEN / m)\r
14b0e578
CS
189 return d * m;\r
190 else\r
b602265d 191 return INFINITE_LEN;\r
14b0e578
CS
192}\r
193\r
194static int\r
195bitset_is_empty(BitSetRef bs)\r
196{\r
197 int i;\r
b602265d 198\r
14b0e578
CS
199 for (i = 0; i < (int )BITSET_SIZE; i++) {\r
200 if (bs[i] != 0) return 0;\r
201 }\r
202 return 1;\r
203}\r
204\r
14b0e578
CS
205extern int\r
206onig_bbuf_init(BBuf* buf, int size)\r
207{\r
208 if (size <= 0) {\r
209 size = 0;\r
210 buf->p = NULL;\r
211 }\r
212 else {\r
213 buf->p = (UChar* )xmalloc(size);\r
214 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);\r
215 }\r
216\r
217 buf->alloc = size;\r
218 buf->used = 0;\r
219 return 0;\r
220}\r
221\r
222\r
b602265d 223#ifdef USE_CALL\r
14b0e578
CS
224\r
225static int\r
b602265d 226unset_addr_list_init(UnsetAddrList* list, int size)\r
14b0e578 227{\r
b602265d 228 UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);\r
14b0e578 229 CHECK_NULL_RETURN_MEMERR(p);\r
b602265d
DG
230\r
231 list->num = 0;\r
232 list->alloc = size;\r
233 list->us = p;\r
14b0e578
CS
234 return 0;\r
235}\r
236\r
237static void\r
b602265d 238unset_addr_list_end(UnsetAddrList* list)\r
14b0e578 239{\r
b602265d
DG
240 if (IS_NOT_NULL(list->us))\r
241 xfree(list->us);\r
14b0e578
CS
242}\r
243\r
244static int\r
b602265d 245unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)\r
14b0e578
CS
246{\r
247 UnsetAddr* p;\r
248 int size;\r
249\r
b602265d
DG
250 if (list->num >= list->alloc) {\r
251 size = list->alloc * 2;\r
252 p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);\r
14b0e578 253 CHECK_NULL_RETURN_MEMERR(p);\r
b602265d
DG
254 list->alloc = size;\r
255 list->us = p;\r
14b0e578
CS
256 }\r
257\r
b602265d
DG
258 list->us[list->num].offset = offset;\r
259 list->us[list->num].target = node;\r
260 list->num++;\r
14b0e578
CS
261 return 0;\r
262}\r
b602265d 263#endif /* USE_CALL */\r
14b0e578
CS
264\r
265\r
266static int\r
267add_opcode(regex_t* reg, int opcode)\r
268{\r
b602265d 269 BB_ADD1(reg, opcode);\r
14b0e578
CS
270 return 0;\r
271}\r
14b0e578
CS
272\r
273static int\r
274add_rel_addr(regex_t* reg, int addr)\r
275{\r
276 RelAddrType ra = (RelAddrType )addr;\r
277\r
b602265d 278 BB_ADD(reg, &ra, SIZE_RELADDR);\r
14b0e578
CS
279 return 0;\r
280}\r
281\r
282static int\r
283add_abs_addr(regex_t* reg, int addr)\r
284{\r
285 AbsAddrType ra = (AbsAddrType )addr;\r
286\r
b602265d 287 BB_ADD(reg, &ra, SIZE_ABSADDR);\r
14b0e578
CS
288 return 0;\r
289}\r
290\r
291static int\r
292add_length(regex_t* reg, int len)\r
293{\r
294 LengthType l = (LengthType )len;\r
295\r
b602265d 296 BB_ADD(reg, &l, SIZE_LENGTH);\r
14b0e578
CS
297 return 0;\r
298}\r
299\r
300static int\r
301add_mem_num(regex_t* reg, int num)\r
302{\r
303 MemNumType n = (MemNumType )num;\r
304\r
b602265d 305 BB_ADD(reg, &n, SIZE_MEMNUM);\r
14b0e578
CS
306 return 0;\r
307}\r
308\r
b602265d 309#if 0\r
14b0e578
CS
310static int\r
311add_pointer(regex_t* reg, void* addr)\r
312{\r
313 PointerType ptr = (PointerType )addr;\r
314\r
b602265d 315 BB_ADD(reg, &ptr, SIZE_POINTER);\r
14b0e578
CS
316 return 0;\r
317}\r
b602265d 318#endif\r
14b0e578
CS
319\r
320static int\r
321add_option(regex_t* reg, OnigOptionType option)\r
322{\r
b602265d 323 BB_ADD(reg, &option, SIZE_OPTION);\r
14b0e578
CS
324 return 0;\r
325}\r
326\r
327static int\r
b602265d 328add_save_type(regex_t* reg, enum SaveType type)\r
14b0e578 329{\r
b602265d 330 SaveType t = (SaveType )type;\r
14b0e578 331\r
b602265d
DG
332 BB_ADD(reg, &t, SIZE_SAVE_TYPE);\r
333 return 0;\r
14b0e578
CS
334}\r
335\r
336static int\r
b602265d 337add_update_var_type(regex_t* reg, enum UpdateVarType type)\r
14b0e578 338{\r
b602265d
DG
339 UpdateVarType t = (UpdateVarType )type;\r
340\r
341 BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);\r
14b0e578
CS
342 return 0;\r
343}\r
344\r
345static int\r
b602265d 346add_mode(regex_t* reg, ModeType mode)\r
14b0e578 347{\r
b602265d 348 BB_ADD(reg, &mode, SIZE_MODE);\r
14b0e578
CS
349 return 0;\r
350}\r
351\r
352static int\r
b602265d 353add_opcode_rel_addr(regex_t* reg, int opcode, int addr)\r
14b0e578
CS
354{\r
355 int r;\r
356\r
357 r = add_opcode(reg, opcode);\r
b602265d
DG
358 if (r != 0) return r;\r
359 r = add_rel_addr(reg, addr);\r
14b0e578
CS
360 return r;\r
361}\r
362\r
b602265d
DG
363static int\r
364add_bytes(regex_t* reg, UChar* bytes, int len)\r
365{\r
366 BB_ADD(reg, bytes, len);\r
367 return 0;\r
368}\r
369\r
370static int\r
371add_bitset(regex_t* reg, BitSetRef bs)\r
372{\r
373 BB_ADD(reg, bs, SIZE_BITSET);\r
374 return 0;\r
375}\r
376\r
14b0e578 377static int compile_length_tree(Node* node, regex_t* reg);\r
b602265d 378static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);\r
14b0e578
CS
379\r
380\r
381#define IS_NEED_STR_LEN_OP_EXACT(op) \\r
382 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\\r
383 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)\r
384\r
385static int\r
386select_str_opcode(int mb_len, int str_len, int ignore_case)\r
387{\r
388 int op;\r
389\r
390 if (ignore_case) {\r
391 switch (str_len) {\r
392 case 1: op = OP_EXACT1_IC; break;\r
393 default: op = OP_EXACTN_IC; break;\r
394 }\r
395 }\r
396 else {\r
397 switch (mb_len) {\r
398 case 1:\r
399 switch (str_len) {\r
400 case 1: op = OP_EXACT1; break;\r
401 case 2: op = OP_EXACT2; break;\r
402 case 3: op = OP_EXACT3; break;\r
403 case 4: op = OP_EXACT4; break;\r
404 case 5: op = OP_EXACT5; break;\r
405 default: op = OP_EXACTN; break;\r
406 }\r
407 break;\r
408\r
409 case 2:\r
410 switch (str_len) {\r
411 case 1: op = OP_EXACTMB2N1; break;\r
412 case 2: op = OP_EXACTMB2N2; break;\r
413 case 3: op = OP_EXACTMB2N3; break;\r
414 default: op = OP_EXACTMB2N; break;\r
415 }\r
416 break;\r
417\r
418 case 3:\r
419 op = OP_EXACTMB3N;\r
420 break;\r
421\r
422 default:\r
423 op = OP_EXACTMBN;\r
424 break;\r
425 }\r
426 }\r
427 return op;\r
428}\r
429\r
430static int\r
b602265d 431compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)\r
14b0e578
CS
432{\r
433 int r;\r
434 int saved_num_null_check = reg->num_null_check;\r
435\r
b602265d
DG
436 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {\r
437 r = add_opcode(reg, OP_EMPTY_CHECK_START);\r
438 if (r != 0) return r;\r
14b0e578 439 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */\r
b602265d 440 if (r != 0) return r;\r
14b0e578
CS
441 reg->num_null_check++;\r
442 }\r
443\r
b602265d
DG
444 r = compile_tree(node, reg, env);\r
445 if (r != 0) return r;\r
14b0e578 446\r
b602265d
DG
447 if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {\r
448 if (empty_info == QUANT_BODY_IS_EMPTY)\r
449 r = add_opcode(reg, OP_EMPTY_CHECK_END);\r
450 else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)\r
451 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);\r
452 else if (empty_info == QUANT_BODY_IS_EMPTY_REC)\r
453 r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);\r
14b0e578 454\r
b602265d 455 if (r != 0) return r;\r
14b0e578
CS
456 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */\r
457 }\r
458 return r;\r
459}\r
460\r
b602265d 461#ifdef USE_CALL\r
14b0e578 462static int\r
b602265d 463compile_call(CallNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
464{\r
465 int r;\r
466\r
467 r = add_opcode(reg, OP_CALL);\r
b602265d
DG
468 if (r != 0) return r;\r
469 r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),\r
470 NODE_CALL_BODY(node));\r
471 if (r != 0) return r;\r
14b0e578
CS
472 r = add_abs_addr(reg, 0 /*dummy addr.*/);\r
473 return r;\r
474}\r
475#endif\r
476\r
477static int\r
b602265d 478compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
479{\r
480 int i, r;\r
481\r
482 for (i = 0; i < n; i++) {\r
b602265d
DG
483 r = compile_tree(node, reg, env);\r
484 if (r != 0) return r;\r
14b0e578
CS
485 }\r
486 return 0;\r
487}\r
488\r
489static int\r
490add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,\r
491 regex_t* reg ARG_UNUSED, int ignore_case)\r
492{\r
493 int len;\r
494 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
495\r
496 len = SIZE_OPCODE;\r
497\r
498 if (op == OP_EXACTMBN) len += SIZE_LENGTH;\r
499 if (IS_NEED_STR_LEN_OP_EXACT(op))\r
500 len += SIZE_LENGTH;\r
501\r
502 len += mb_len * str_len;\r
503 return len;\r
504}\r
505\r
506static int\r
507add_compile_string(UChar* s, int mb_len, int str_len,\r
508 regex_t* reg, int ignore_case)\r
509{\r
510 int op = select_str_opcode(mb_len, str_len, ignore_case);\r
511 add_opcode(reg, op);\r
512\r
513 if (op == OP_EXACTMBN)\r
514 add_length(reg, mb_len);\r
515\r
516 if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
517 if (op == OP_EXACTN_IC)\r
518 add_length(reg, mb_len * str_len);\r
519 else\r
520 add_length(reg, str_len);\r
521 }\r
522\r
523 add_bytes(reg, s, mb_len * str_len);\r
524 return 0;\r
525}\r
526\r
527\r
528static int\r
529compile_length_string_node(Node* node, regex_t* reg)\r
530{\r
531 int rlen, r, len, prev_len, slen, ambig;\r
14b0e578
CS
532 UChar *p, *prev;\r
533 StrNode* sn;\r
b602265d 534 OnigEncoding enc = reg->enc;\r
14b0e578 535\r
b602265d 536 sn = STR_(node);\r
14b0e578
CS
537 if (sn->end <= sn->s)\r
538 return 0;\r
539\r
b602265d 540 ambig = NODE_STRING_IS_AMBIG(node);\r
14b0e578
CS
541\r
542 p = prev = sn->s;\r
543 prev_len = enclen(enc, p);\r
544 p += prev_len;\r
545 slen = 1;\r
546 rlen = 0;\r
547\r
548 for (; p < sn->end; ) {\r
549 len = enclen(enc, p);\r
550 if (len == prev_len) {\r
551 slen++;\r
552 }\r
553 else {\r
554 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
555 rlen += r;\r
556 prev = p;\r
557 slen = 1;\r
558 prev_len = len;\r
559 }\r
560 p += len;\r
561 }\r
b602265d 562\r
14b0e578
CS
563 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
564 rlen += r;\r
565 return rlen;\r
566}\r
567\r
568static int\r
569compile_length_string_raw_node(StrNode* sn, regex_t* reg)\r
570{\r
571 if (sn->end <= sn->s)\r
572 return 0;\r
573\r
b602265d
DG
574 return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),\r
575 reg, 0);\r
14b0e578
CS
576}\r
577\r
578static int\r
579compile_string_node(Node* node, regex_t* reg)\r
580{\r
581 int r, len, prev_len, slen, ambig;\r
14b0e578
CS
582 UChar *p, *prev, *end;\r
583 StrNode* sn;\r
b602265d 584 OnigEncoding enc = reg->enc;\r
14b0e578 585\r
b602265d 586 sn = STR_(node);\r
14b0e578
CS
587 if (sn->end <= sn->s)\r
588 return 0;\r
589\r
590 end = sn->end;\r
b602265d 591 ambig = NODE_STRING_IS_AMBIG(node);\r
14b0e578
CS
592\r
593 p = prev = sn->s;\r
594 prev_len = enclen(enc, p);\r
595 p += prev_len;\r
596 slen = 1;\r
597\r
598 for (; p < end; ) {\r
599 len = enclen(enc, p);\r
600 if (len == prev_len) {\r
601 slen++;\r
602 }\r
603 else {\r
604 r = add_compile_string(prev, prev_len, slen, reg, ambig);\r
b602265d 605 if (r != 0) return r;\r
14b0e578
CS
606\r
607 prev = p;\r
608 slen = 1;\r
609 prev_len = len;\r
610 }\r
611\r
612 p += len;\r
613 }\r
b602265d 614\r
14b0e578
CS
615 return add_compile_string(prev, prev_len, slen, reg, ambig);\r
616}\r
617\r
618static int\r
619compile_string_raw_node(StrNode* sn, regex_t* reg)\r
620{\r
621 if (sn->end <= sn->s)\r
622 return 0;\r
623\r
b602265d 624 return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);\r
14b0e578
CS
625}\r
626\r
627static int\r
628add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
629{\r
630#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
631 add_length(reg, mbuf->used);\r
632 return add_bytes(reg, mbuf->p, mbuf->used);\r
633#else\r
634 int r, pad_size;\r
b602265d 635 UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;\r
14b0e578
CS
636\r
637 GET_ALIGNMENT_PAD_SIZE(p, pad_size);\r
638 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));\r
639 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
640\r
641 r = add_bytes(reg, mbuf->p, mbuf->used);\r
642\r
643 /* padding for return value from compile_length_cclass_node() to be fix. */\r
644 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;\r
645 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
646 return r;\r
647#endif\r
648}\r
649\r
650static int\r
651compile_length_cclass_node(CClassNode* cc, regex_t* reg)\r
652{\r
653 int len;\r
654\r
14b0e578
CS
655 if (IS_NULL(cc->mbuf)) {\r
656 len = SIZE_OPCODE + SIZE_BITSET;\r
657 }\r
658 else {\r
659 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
660 len = SIZE_OPCODE;\r
661 }\r
662 else {\r
663 len = SIZE_OPCODE + SIZE_BITSET;\r
664 }\r
665#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
666 len += SIZE_LENGTH + cc->mbuf->used;\r
667#else\r
668 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);\r
669#endif\r
670 }\r
671\r
672 return len;\r
673}\r
674\r
675static int\r
676compile_cclass_node(CClassNode* cc, regex_t* reg)\r
677{\r
678 int r;\r
679\r
14b0e578
CS
680 if (IS_NULL(cc->mbuf)) {\r
681 if (IS_NCCLASS_NOT(cc))\r
682 add_opcode(reg, OP_CCLASS_NOT);\r
683 else\r
684 add_opcode(reg, OP_CCLASS);\r
685\r
686 r = add_bitset(reg, cc->bs);\r
687 }\r
688 else {\r
689 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
690 if (IS_NCCLASS_NOT(cc))\r
691 add_opcode(reg, OP_CCLASS_MB_NOT);\r
692 else\r
693 add_opcode(reg, OP_CCLASS_MB);\r
694\r
695 r = add_multi_byte_cclass(cc->mbuf, reg);\r
696 }\r
697 else {\r
698 if (IS_NCCLASS_NOT(cc))\r
699 add_opcode(reg, OP_CCLASS_MIX_NOT);\r
700 else\r
701 add_opcode(reg, OP_CCLASS_MIX);\r
702\r
703 r = add_bitset(reg, cc->bs);\r
b602265d 704 if (r != 0) return r;\r
14b0e578
CS
705 r = add_multi_byte_cclass(cc->mbuf, reg);\r
706 }\r
707 }\r
708\r
709 return r;\r
710}\r
711\r
712static int\r
713entry_repeat_range(regex_t* reg, int id, int lower, int upper)\r
714{\r
715#define REPEAT_RANGE_ALLOC 4\r
716\r
717 OnigRepeatRange* p;\r
718\r
719 if (reg->repeat_range_alloc == 0) {\r
720 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);\r
721 CHECK_NULL_RETURN_MEMERR(p);\r
722 reg->repeat_range = p;\r
723 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;\r
724 }\r
725 else if (reg->repeat_range_alloc <= id) {\r
726 int n;\r
727 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;\r
728 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,\r
729 sizeof(OnigRepeatRange) * n,\r
730 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);\r
731 CHECK_NULL_RETURN_MEMERR(p);\r
732 reg->repeat_range = p;\r
733 reg->repeat_range_alloc = n;\r
734 }\r
735 else {\r
736 p = reg->repeat_range;\r
737 }\r
738\r
739 p[id].lower = lower;\r
740 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);\r
741 return 0;\r
742}\r
743\r
744static int\r
b602265d
DG
745compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,\r
746 regex_t* reg, ScanEnv* env)\r
14b0e578
CS
747{\r
748 int r;\r
749 int num_repeat = reg->num_repeat;\r
750\r
751 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
b602265d 752 if (r != 0) return r;\r
14b0e578
CS
753 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
754 reg->num_repeat++;\r
b602265d 755 if (r != 0) return r;\r
14b0e578 756 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);\r
b602265d 757 if (r != 0) return r;\r
14b0e578
CS
758\r
759 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);\r
b602265d 760 if (r != 0) return r;\r
14b0e578 761\r
b602265d
DG
762 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
763 if (r != 0) return r;\r
14b0e578
CS
764\r
765 if (\r
b602265d
DG
766#ifdef USE_CALL\r
767 NODE_IS_IN_MULTI_ENTRY(qn) ||\r
14b0e578 768#endif\r
b602265d 769 NODE_IS_IN_REAL_REPEAT(qn)) {\r
14b0e578
CS
770 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
771 }\r
772 else {\r
773 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
774 }\r
b602265d 775 if (r != 0) return r;\r
14b0e578
CS
776 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
777 return r;\r
778}\r
779\r
780static int\r
b602265d 781is_anychar_infinite_greedy(QuantNode* qn)\r
14b0e578
CS
782{\r
783 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&\r
b602265d 784 NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))\r
14b0e578
CS
785 return 1;\r
786 else\r
787 return 0;\r
788}\r
789\r
790#define QUANTIFIER_EXPAND_LIMIT_SIZE 50\r
791#define CKN_ON (ckn > 0)\r
792\r
14b0e578 793static int\r
b602265d 794compile_length_quantifier_node(QuantNode* qn, regex_t* reg)\r
14b0e578
CS
795{\r
796 int len, mod_tlen;\r
797 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
b602265d
DG
798 enum QuantBodyEmpty empty_info = qn->body_empty_info;\r
799 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
800\r
801 if (tlen < 0) return tlen;\r
b602265d 802 if (tlen == 0) return 0;\r
14b0e578
CS
803\r
804 /* anychar repeat */\r
b602265d
DG
805 if (is_anychar_infinite_greedy(qn)) {\r
806 if (qn->lower <= 1 ||\r
807 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {\r
14b0e578
CS
808 if (IS_NOT_NULL(qn->next_head_exact))\r
809 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;\r
810 else\r
811 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;\r
812 }\r
813 }\r
814\r
b602265d 815 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)\r
14b0e578 816 mod_tlen = tlen;\r
b602265d
DG
817 else\r
818 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);\r
14b0e578
CS
819\r
820 if (infinite &&\r
b602265d
DG
821 (qn->lower <= 1 ||\r
822 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
823 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
824 len = SIZE_OP_JUMP;\r
825 }\r
826 else {\r
827 len = tlen * qn->lower;\r
828 }\r
829\r
830 if (qn->greedy) {\r
831 if (IS_NOT_NULL(qn->head_exact))\r
b602265d 832 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;\r
14b0e578 833 else if (IS_NOT_NULL(qn->next_head_exact))\r
b602265d 834 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;\r
14b0e578 835 else\r
b602265d 836 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;\r
14b0e578
CS
837 }\r
838 else\r
839 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;\r
840 }\r
841 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
842 len = SIZE_OP_JUMP + tlen;\r
843 }\r
844 else if (!infinite && qn->greedy &&\r
b602265d
DG
845 (qn->upper == 1 ||\r
846 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
847 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
848 len = tlen * qn->lower;\r
849 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);\r
850 }\r
851 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
852 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;\r
853 }\r
854 else {\r
855 len = SIZE_OP_REPEAT_INC\r
856 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;\r
857 }\r
858\r
859 return len;\r
860}\r
861\r
862static int\r
b602265d 863compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
864{\r
865 int i, r, mod_tlen;\r
866 int infinite = IS_REPEAT_INFINITE(qn->upper);\r
b602265d
DG
867 enum QuantBodyEmpty empty_info = qn->body_empty_info;\r
868 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
869\r
870 if (tlen < 0) return tlen;\r
b602265d 871 if (tlen == 0) return 0;\r
14b0e578 872\r
b602265d
DG
873 if (is_anychar_infinite_greedy(qn) &&\r
874 (qn->lower <= 1 ||\r
875 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
876 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
877 if (r != 0) return r;\r
14b0e578 878 if (IS_NOT_NULL(qn->next_head_exact)) {\r
b602265d
DG
879 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))\r
880 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r
14b0e578 881 else\r
b602265d
DG
882 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);\r
883 if (r != 0) return r;\r
884 return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);\r
14b0e578
CS
885 }\r
886 else {\r
b602265d
DG
887 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))\r
888 return add_opcode(reg, OP_ANYCHAR_ML_STAR);\r
14b0e578 889 else\r
b602265d 890 return add_opcode(reg, OP_ANYCHAR_STAR);\r
14b0e578
CS
891 }\r
892 }\r
893\r
b602265d 894 if (empty_info == QUANT_BODY_IS_NOT_EMPTY)\r
14b0e578 895 mod_tlen = tlen;\r
b602265d
DG
896 else\r
897 mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);\r
14b0e578
CS
898\r
899 if (infinite &&\r
b602265d
DG
900 (qn->lower <= 1 ||\r
901 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
902 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
903 if (qn->greedy) {\r
b602265d
DG
904 if (IS_NOT_NULL(qn->head_exact))\r
905 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
906 else if (IS_NOT_NULL(qn->next_head_exact))\r
907 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);\r
908 else\r
909 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);\r
14b0e578
CS
910 }\r
911 else {\r
b602265d 912 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);\r
14b0e578 913 }\r
b602265d 914 if (r != 0) return r;\r
14b0e578
CS
915 }\r
916 else {\r
b602265d
DG
917 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
918 if (r != 0) return r;\r
14b0e578
CS
919 }\r
920\r
921 if (qn->greedy) {\r
922 if (IS_NOT_NULL(qn->head_exact)) {\r
b602265d
DG
923 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,\r
924 mod_tlen + SIZE_OP_JUMP);\r
925 if (r != 0) return r;\r
926 add_bytes(reg, STR_(qn->head_exact)->s, 1);\r
927 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
928 if (r != 0) return r;\r
929 r = add_opcode_rel_addr(reg, OP_JUMP,\r
930 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));\r
14b0e578
CS
931 }\r
932 else if (IS_NOT_NULL(qn->next_head_exact)) {\r
b602265d
DG
933 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,\r
934 mod_tlen + SIZE_OP_JUMP);\r
935 if (r != 0) return r;\r
936 add_bytes(reg, STR_(qn->next_head_exact)->s, 1);\r
937 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
938 if (r != 0) return r;\r
939 r = add_opcode_rel_addr(reg, OP_JUMP,\r
940 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));\r
14b0e578
CS
941 }\r
942 else {\r
b602265d
DG
943 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);\r
944 if (r != 0) return r;\r
945 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
946 if (r != 0) return r;\r
947 r = add_opcode_rel_addr(reg, OP_JUMP,\r
948 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));\r
14b0e578
CS
949 }\r
950 }\r
951 else {\r
952 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);\r
b602265d
DG
953 if (r != 0) return r;\r
954 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
955 if (r != 0) return r;\r
14b0e578
CS
956 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));\r
957 }\r
958 }\r
959 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
960 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
b602265d
DG
961 if (r != 0) return r;\r
962 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
14b0e578 963 }\r
b602265d
DG
964 else if (! infinite && qn->greedy &&\r
965 (qn->upper == 1 ||\r
966 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
967 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
968 int n = qn->upper - qn->lower;\r
969\r
b602265d
DG
970 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
971 if (r != 0) return r;\r
14b0e578
CS
972\r
973 for (i = 0; i < n; i++) {\r
974 r = add_opcode_rel_addr(reg, OP_PUSH,\r
b602265d
DG
975 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);\r
976 if (r != 0) return r;\r
977 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
978 if (r != 0) return r;\r
14b0e578
CS
979 }\r
980 }\r
b602265d 981 else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
14b0e578 982 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);\r
b602265d 983 if (r != 0) return r;\r
14b0e578 984 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
b602265d
DG
985 if (r != 0) return r;\r
986 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
14b0e578
CS
987 }\r
988 else {\r
b602265d 989 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);\r
14b0e578
CS
990 }\r
991 return r;\r
992}\r
14b0e578
CS
993\r
994static int\r
b602265d 995compile_length_option_node(EnclosureNode* node, regex_t* reg)\r
14b0e578
CS
996{\r
997 int tlen;\r
998 OnigOptionType prev = reg->options;\r
999\r
b602265d
DG
1000 reg->options = node->o.options;\r
1001 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
14b0e578
CS
1002 reg->options = prev;\r
1003\r
b602265d 1004 return tlen;\r
14b0e578
CS
1005}\r
1006\r
1007static int\r
b602265d 1008compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
1009{\r
1010 int r;\r
1011 OnigOptionType prev = reg->options;\r
1012\r
b602265d
DG
1013 reg->options = node->o.options;\r
1014 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
14b0e578
CS
1015 reg->options = prev;\r
1016\r
14b0e578
CS
1017 return r;\r
1018}\r
1019\r
1020static int\r
b602265d 1021compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)\r
14b0e578
CS
1022{\r
1023 int len;\r
1024 int tlen;\r
fbaab715 1025\r
b602265d 1026 if (node->type == ENCLOSURE_OPTION)\r
14b0e578
CS
1027 return compile_length_option_node(node, reg);\r
1028\r
b602265d
DG
1029 if (NODE_ENCLOSURE_BODY(node)) {\r
1030 tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
14b0e578
CS
1031 if (tlen < 0) return tlen;\r
1032 }\r
1033 else\r
1034 tlen = 0;\r
1035\r
1036 switch (node->type) {\r
b602265d
DG
1037 case ENCLOSURE_MEMORY:\r
1038#ifdef USE_CALL\r
1039\r
1040 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
1041 len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1042 return len;\r
1043 }\r
1044\r
1045 if (NODE_IS_CALLED(node)) {\r
14b0e578 1046 len = SIZE_OP_MEMORY_START_PUSH + tlen\r
b602265d
DG
1047 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1048 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1049 len += (NODE_IS_RECURSION(node)\r
1050 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
14b0e578 1051 else\r
b602265d
DG
1052 len += (NODE_IS_RECURSION(node)\r
1053 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1054 }\r
1055 else if (NODE_IS_RECURSION(node)) {\r
1056 len = SIZE_OP_MEMORY_START_PUSH;\r
1057 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
1058 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);\r
14b0e578
CS
1059 }\r
1060 else\r
1061#endif\r
1062 {\r
b602265d
DG
1063 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
1064 len = SIZE_OP_MEMORY_START_PUSH;\r
14b0e578 1065 else\r
b602265d 1066 len = SIZE_OP_MEMORY_START;\r
14b0e578 1067\r
b602265d
DG
1068 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
1069 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);\r
14b0e578
CS
1070 }\r
1071 break;\r
1072\r
b602265d
DG
1073 case ENCLOSURE_STOP_BACKTRACK:\r
1074 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {\r
1075 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));\r
1076 tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
1077 if (tlen < 0) return tlen;\r
1078\r
1079 len = tlen * qn->lower\r
b602265d 1080 + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
14b0e578
CS
1081 }\r
1082 else {\r
b602265d 1083 len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;\r
14b0e578
CS
1084 }\r
1085 break;\r
1086\r
b602265d
DG
1087 case ENCLOSURE_IF_ELSE:\r
1088 {\r
1089 Node* cond = NODE_ENCLOSURE_BODY(node);\r
1090 Node* Then = node->te.Then;\r
1091 Node* Else = node->te.Else;\r
14b0e578 1092\r
b602265d
DG
1093 len = compile_length_tree(cond, reg);\r
1094 if (len < 0) return len;\r
1095 len += SIZE_OP_PUSH;\r
1096 len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;\r
1097\r
1098 if (IS_NOT_NULL(Then)) {\r
1099 tlen = compile_length_tree(Then, reg);\r
1100 if (tlen < 0) return tlen;\r
1101 len += tlen;\r
1102 }\r
1103\r
1104 if (IS_NOT_NULL(Else)) {\r
1105 len += SIZE_OP_JUMP;\r
1106 tlen = compile_length_tree(Else, reg);\r
1107 if (tlen < 0) return tlen;\r
1108 len += tlen;\r
1109 }\r
1110 }\r
1111 break;\r
1112\r
1113 default:\r
1114 return ONIGERR_TYPE_BUG;\r
1115 break;\r
1116 }\r
1117\r
1118 return len;\r
1119}\r
14b0e578
CS
1120\r
1121static int get_char_length_tree(Node* node, regex_t* reg, int* len);\r
1122\r
1123static int\r
b602265d 1124compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578 1125{\r
b602265d
DG
1126 int r;\r
1127 int len;\r
14b0e578 1128\r
b602265d
DG
1129#ifdef USE_CALL\r
1130 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
1131 r = add_opcode(reg, OP_CALL);\r
1132 if (r != 0) return r;\r
1133 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
1134 NODE_STATUS_ADD(node, ADDR_FIXED);\r
1135 r = add_abs_addr(reg, (int )node->m.called_addr);\r
1136 if (r != 0) return r;\r
1137 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
1138 len += SIZE_OP_RETURN;\r
1139 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1140 if (r != 0) return r;\r
1141\r
1142 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
1143 if (r != 0) return r;\r
1144 r = add_opcode(reg, OP_RETURN);\r
1145 return r;\r
1146 }\r
14b0e578 1147\r
b602265d
DG
1148 if (NODE_IS_CALLED(node)) {\r
1149 r = add_opcode(reg, OP_CALL);\r
1150 if (r != 0) return r;\r
1151 node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
1152 NODE_STATUS_ADD(node, ADDR_FIXED);\r
1153 r = add_abs_addr(reg, (int )node->m.called_addr);\r
1154 if (r != 0) return r;\r
1155 len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
1156 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
1157 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1158 len += (NODE_IS_RECURSION(node)\r
1159 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
1160 else\r
1161 len += (NODE_IS_RECURSION(node)\r
1162 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
14b0e578 1163\r
b602265d
DG
1164 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1165 if (r != 0) return r;\r
1166 }\r
14b0e578 1167#endif\r
14b0e578 1168\r
b602265d
DG
1169 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
1170 r = add_opcode(reg, OP_MEMORY_START_PUSH);\r
1171 else\r
1172 r = add_opcode(reg, OP_MEMORY_START);\r
1173 if (r != 0) return r;\r
1174 r = add_mem_num(reg, node->m.regnum);\r
1175 if (r != 0) return r;\r
1176 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
1177 if (r != 0) return r;\r
1178\r
1179#ifdef USE_CALL\r
1180 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1181 r = add_opcode(reg, (NODE_IS_RECURSION(node)\r
1182 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
1183 else\r
1184 r = add_opcode(reg, (NODE_IS_RECURSION(node)\r
1185 ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
1186 if (r != 0) return r;\r
1187 r = add_mem_num(reg, node->m.regnum);\r
1188 if (NODE_IS_CALLED(node)) {\r
1189 if (r != 0) return r;\r
1190 r = add_opcode(reg, OP_RETURN);\r
1191 }\r
1192#else\r
1193 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1194 r = add_opcode(reg, OP_MEMORY_END_PUSH);\r
1195 else\r
1196 r = add_opcode(reg, OP_MEMORY_END);\r
1197 if (r != 0) return r;\r
1198 r = add_mem_num(reg, node->m.regnum);\r
14b0e578 1199#endif\r
b602265d
DG
1200\r
1201 return r;\r
1202}\r
1203\r
1204static int\r
1205compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
1206{\r
1207 int r, len;\r
1208\r
1209 switch (node->type) {\r
1210 case ENCLOSURE_MEMORY:\r
1211 r = compile_enclosure_memory_node(node, reg, env);\r
1212 break;\r
1213\r
1214 case ENCLOSURE_OPTION:\r
1215 r = compile_option_node(node, reg, env);\r
14b0e578
CS
1216 break;\r
1217\r
b602265d
DG
1218 case ENCLOSURE_STOP_BACKTRACK:\r
1219 if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {\r
1220 QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));\r
1221 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
1222 if (r != 0) return r;\r
14b0e578 1223\r
b602265d 1224 len = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
1225 if (len < 0) return len;\r
1226\r
b602265d
DG
1227 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);\r
1228 if (r != 0) return r;\r
1229 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
1230 if (r != 0) return r;\r
1231 r = add_opcode(reg, OP_POP_OUT);\r
1232 if (r != 0) return r;\r
14b0e578 1233 r = add_opcode_rel_addr(reg, OP_JUMP,\r
b602265d 1234 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));\r
14b0e578
CS
1235 }\r
1236 else {\r
b602265d
DG
1237 r = add_opcode(reg, OP_ATOMIC_START);\r
1238 if (r != 0) return r;\r
1239 r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
1240 if (r != 0) return r;\r
1241 r = add_opcode(reg, OP_ATOMIC_END);\r
1242 }\r
1243 break;\r
1244\r
1245 case ENCLOSURE_IF_ELSE:\r
1246 {\r
1247 int cond_len, then_len, jump_len;\r
1248 Node* cond = NODE_ENCLOSURE_BODY(node);\r
1249 Node* Then = node->te.Then;\r
1250 Node* Else = node->te.Else;\r
1251\r
1252 r = add_opcode(reg, OP_ATOMIC_START);\r
1253 if (r != 0) return r;\r
1254\r
1255 cond_len = compile_length_tree(cond, reg);\r
1256 if (cond_len < 0) return cond_len;\r
1257 if (IS_NOT_NULL(Then)) {\r
1258 then_len = compile_length_tree(Then, reg);\r
1259 if (then_len < 0) return then_len;\r
1260 }\r
1261 else\r
1262 then_len = 0;\r
1263\r
1264 jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;\r
1265 if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;\r
1266\r
1267 r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);\r
1268 if (r != 0) return r;\r
1269 r = compile_tree(cond, reg, env);\r
1270 if (r != 0) return r;\r
1271 r = add_opcode(reg, OP_ATOMIC_END);\r
1272 if (r != 0) return r;\r
1273\r
1274 if (IS_NOT_NULL(Then)) {\r
1275 r = compile_tree(Then, reg, env);\r
1276 if (r != 0) return r;\r
1277 }\r
1278\r
1279 if (IS_NOT_NULL(Else)) {\r
1280 int else_len = compile_length_tree(Else, reg);\r
1281 r = add_opcode_rel_addr(reg, OP_JUMP, else_len);\r
1282 if (r != 0) return r;\r
1283 r = compile_tree(Else, reg, env);\r
1284 }\r
14b0e578
CS
1285 }\r
1286 break;\r
1287\r
1288 default:\r
1289 return ONIGERR_TYPE_BUG;\r
1290 break;\r
1291 }\r
1292\r
1293 return r;\r
1294}\r
1295\r
1296static int\r
1297compile_length_anchor_node(AnchorNode* node, regex_t* reg)\r
1298{\r
1299 int len;\r
1300 int tlen = 0;\r
1301\r
b602265d
DG
1302 if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {\r
1303 tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
14b0e578
CS
1304 if (tlen < 0) return tlen;\r
1305 }\r
1306\r
1307 switch (node->type) {\r
1308 case ANCHOR_PREC_READ:\r
b602265d 1309 len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;\r
14b0e578
CS
1310 break;\r
1311 case ANCHOR_PREC_READ_NOT:\r
b602265d 1312 len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;\r
14b0e578
CS
1313 break;\r
1314 case ANCHOR_LOOK_BEHIND:\r
1315 len = SIZE_OP_LOOK_BEHIND + tlen;\r
1316 break;\r
1317 case ANCHOR_LOOK_BEHIND_NOT:\r
b602265d
DG
1318 len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;\r
1319 break;\r
1320\r
1321 case ANCHOR_WORD_BOUNDARY:\r
1322 case ANCHOR_NO_WORD_BOUNDARY:\r
1323#ifdef USE_WORD_BEGIN_END\r
1324 case ANCHOR_WORD_BEGIN:\r
1325 case ANCHOR_WORD_END:\r
1326#endif\r
1327 len = SIZE_OP_WORD_BOUNDARY;\r
1328 break;\r
1329\r
1330 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1331 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1332 len = SIZE_OPCODE;\r
14b0e578
CS
1333 break;\r
1334\r
1335 default:\r
1336 len = SIZE_OPCODE;\r
1337 break;\r
1338 }\r
1339\r
1340 return len;\r
1341}\r
1342\r
1343static int\r
b602265d 1344compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
1345{\r
1346 int r, len;\r
b602265d 1347 enum OpCode op;\r
14b0e578
CS
1348\r
1349 switch (node->type) {\r
1350 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;\r
1351 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;\r
1352 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;\r
1353 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;\r
1354 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;\r
1355 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;\r
1356\r
b602265d
DG
1357 case ANCHOR_WORD_BOUNDARY:\r
1358 op = OP_WORD_BOUNDARY;\r
1359 word:\r
1360 r = add_opcode(reg, op);\r
1361 if (r != 0) return r;\r
1362 r = add_mode(reg, (ModeType )node->ascii_mode);\r
1363 break;\r
1364\r
1365 case ANCHOR_NO_WORD_BOUNDARY:\r
1366 op = OP_NO_WORD_BOUNDARY; goto word;\r
1367 break;\r
14b0e578 1368#ifdef USE_WORD_BEGIN_END\r
b602265d
DG
1369 case ANCHOR_WORD_BEGIN:\r
1370 op = OP_WORD_BEGIN; goto word;\r
1371 break;\r
1372 case ANCHOR_WORD_END:\r
1373 op = OP_WORD_END; goto word;\r
1374 break;\r
14b0e578
CS
1375#endif\r
1376\r
b602265d
DG
1377 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1378 r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);\r
1379 break;\r
1380\r
1381 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
1382 r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);\r
1383 break;\r
1384\r
14b0e578 1385 case ANCHOR_PREC_READ:\r
b602265d
DG
1386 r = add_opcode(reg, OP_PREC_READ_START);\r
1387 if (r != 0) return r;\r
1388 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1389 if (r != 0) return r;\r
1390 r = add_opcode(reg, OP_PREC_READ_END);\r
14b0e578
CS
1391 break;\r
1392\r
1393 case ANCHOR_PREC_READ_NOT:\r
b602265d 1394 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
14b0e578 1395 if (len < 0) return len;\r
b602265d
DG
1396 r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);\r
1397 if (r != 0) return r;\r
1398 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1399 if (r != 0) return r;\r
1400 r = add_opcode(reg, OP_PREC_READ_NOT_END);\r
14b0e578
CS
1401 break;\r
1402\r
1403 case ANCHOR_LOOK_BEHIND:\r
1404 {\r
1405 int n;\r
1406 r = add_opcode(reg, OP_LOOK_BEHIND);\r
b602265d 1407 if (r != 0) return r;\r
14b0e578 1408 if (node->char_len < 0) {\r
b602265d
DG
1409 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);\r
1410 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
14b0e578
CS
1411 }\r
1412 else\r
b602265d
DG
1413 n = node->char_len;\r
1414\r
14b0e578 1415 r = add_length(reg, n);\r
b602265d
DG
1416 if (r != 0) return r;\r
1417 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
14b0e578
CS
1418 }\r
1419 break;\r
1420\r
1421 case ANCHOR_LOOK_BEHIND_NOT:\r
1422 {\r
1423 int n;\r
b602265d
DG
1424\r
1425 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
1426 r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,\r
1427 len + SIZE_OP_LOOK_BEHIND_NOT_END);\r
1428 if (r != 0) return r;\r
14b0e578 1429 if (node->char_len < 0) {\r
b602265d
DG
1430 r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);\r
1431 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
14b0e578
CS
1432 }\r
1433 else\r
b602265d 1434 n = node->char_len;\r
14b0e578 1435 r = add_length(reg, n);\r
b602265d
DG
1436 if (r != 0) return r;\r
1437 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1438 if (r != 0) return r;\r
1439 r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);\r
14b0e578
CS
1440 }\r
1441 break;\r
1442\r
1443 default:\r
1444 return ONIGERR_TYPE_BUG;\r
1445 break;\r
1446 }\r
1447\r
1448 return r;\r
1449}\r
1450\r
b602265d
DG
1451static int\r
1452compile_gimmick_node(GimmickNode* node, regex_t* reg)\r
1453{\r
1454 int r;\r
1455\r
1456 switch (node->type) {\r
1457 case GIMMICK_FAIL:\r
1458 r = add_opcode(reg, OP_FAIL);\r
1459 break;\r
1460\r
1461 case GIMMICK_KEEP:\r
1462 r = add_opcode(reg, OP_PUSH_SAVE_VAL);\r
1463 if (r != 0) return r;\r
1464 r = add_save_type(reg, SAVE_KEEP);\r
1465 if (r != 0) return r;\r
1466 r = add_mem_num(reg, node->id);\r
1467 break;\r
1468\r
1469 case GIMMICK_SAVE:\r
1470 r = add_opcode(reg, OP_PUSH_SAVE_VAL);\r
1471 if (r != 0) return r;\r
1472 r = add_save_type(reg, node->detail_type);\r
1473 if (r != 0) return r;\r
1474 r = add_mem_num(reg, node->id);\r
1475 break;\r
1476\r
1477 case GIMMICK_UPDATE_VAR:\r
1478 r = add_opcode(reg, OP_UPDATE_VAR);\r
1479 if (r != 0) return r;\r
1480 r = add_update_var_type(reg, node->detail_type);\r
1481 if (r != 0) return r;\r
1482 r = add_mem_num(reg, node->id);\r
1483 break;\r
1484\r
1485#ifdef USE_CALLOUT\r
1486 case GIMMICK_CALLOUT:\r
1487 switch (node->detail_type) {\r
1488 case ONIG_CALLOUT_OF_CONTENTS:\r
1489 case ONIG_CALLOUT_OF_NAME:\r
1490 {\r
1491 r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?\r
1492 OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);\r
1493 if (r != 0) return r;\r
1494 if (node->detail_type == ONIG_CALLOUT_OF_NAME) {\r
1495 r = add_mem_num(reg, node->id);\r
1496 if (r != 0) return r;\r
1497 }\r
1498 r = add_mem_num(reg, node->num);\r
1499 if (r != 0) return r;\r
1500 }\r
1501 break;\r
1502\r
1503 default:\r
1504 r = ONIGERR_TYPE_BUG;\r
1505 break;\r
1506 }\r
1507#endif\r
1508 }\r
1509\r
1510 return r;\r
1511}\r
1512\r
1513static int\r
1514compile_length_gimmick_node(GimmickNode* node, regex_t* reg)\r
1515{\r
1516 int len;\r
1517\r
1518 switch (node->type) {\r
1519 case GIMMICK_FAIL:\r
1520 len = SIZE_OP_FAIL;\r
1521 break;\r
1522\r
1523 case GIMMICK_KEEP:\r
1524 case GIMMICK_SAVE:\r
1525 len = SIZE_OP_PUSH_SAVE_VAL;\r
1526 break;\r
1527\r
1528 case GIMMICK_UPDATE_VAR:\r
1529 len = SIZE_OP_UPDATE_VAR;\r
1530 break;\r
1531\r
1532#ifdef USE_CALLOUT\r
1533 case GIMMICK_CALLOUT:\r
1534 switch (node->detail_type) {\r
1535 case ONIG_CALLOUT_OF_CONTENTS:\r
1536 len = SIZE_OP_CALLOUT_CONTENTS;\r
1537 break;\r
1538 case ONIG_CALLOUT_OF_NAME:\r
1539 len = SIZE_OP_CALLOUT_NAME;\r
1540 break;\r
1541\r
1542 default:\r
1543 len = ONIGERR_TYPE_BUG;\r
1544 break;\r
1545 }\r
1546 break;\r
1547#endif\r
1548 }\r
1549\r
1550 return len;\r
1551}\r
1552\r
14b0e578
CS
1553static int\r
1554compile_length_tree(Node* node, regex_t* reg)\r
1555{\r
b602265d 1556 int len, r;\r
14b0e578 1557\r
b602265d
DG
1558 switch (NODE_TYPE(node)) {\r
1559 case NODE_LIST:\r
14b0e578
CS
1560 len = 0;\r
1561 do {\r
b602265d 1562 r = compile_length_tree(NODE_CAR(node), reg);\r
14b0e578
CS
1563 if (r < 0) return r;\r
1564 len += r;\r
b602265d 1565 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1566 r = len;\r
1567 break;\r
1568\r
b602265d 1569 case NODE_ALT:\r
14b0e578
CS
1570 {\r
1571 int n;\r
1572\r
1573 n = r = 0;\r
1574 do {\r
b602265d
DG
1575 r += compile_length_tree(NODE_CAR(node), reg);\r
1576 n++;\r
1577 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1578 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);\r
1579 }\r
1580 break;\r
1581\r
b602265d
DG
1582 case NODE_STRING:\r
1583 if (NODE_STRING_IS_RAW(node))\r
1584 r = compile_length_string_raw_node(STR_(node), reg);\r
14b0e578
CS
1585 else\r
1586 r = compile_length_string_node(node, reg);\r
1587 break;\r
1588\r
b602265d
DG
1589 case NODE_CCLASS:\r
1590 r = compile_length_cclass_node(CCLASS_(node), reg);\r
14b0e578
CS
1591 break;\r
1592\r
b602265d 1593 case NODE_CTYPE:\r
14b0e578
CS
1594 r = SIZE_OPCODE;\r
1595 break;\r
1596\r
b602265d 1597 case NODE_BACKREF:\r
14b0e578 1598 {\r
b602265d 1599 BackRefNode* br = BACKREF_(node);\r
14b0e578 1600\r
b602265d 1601 if (NODE_IS_CHECKER(node)) {\r
14b0e578 1602#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
1603 if (NODE_IS_NEST_LEVEL(node)) {\r
1604 r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1605 }\r
1606 else\r
14b0e578 1607#endif\r
b602265d 1608 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
14b0e578
CS
1609 }\r
1610 else {\r
b602265d
DG
1611#ifdef USE_BACKREF_WITH_LEVEL\r
1612 if (NODE_IS_NEST_LEVEL(node)) {\r
1613 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +\r
1614 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1615 }\r
1616 else\r
1617#endif\r
1618 if (br->back_num == 1) {\r
1619 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)\r
1620 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));\r
1621 }\r
1622 else {\r
1623 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
1624 }\r
14b0e578
CS
1625 }\r
1626 }\r
1627 break;\r
1628\r
b602265d
DG
1629#ifdef USE_CALL\r
1630 case NODE_CALL:\r
14b0e578
CS
1631 r = SIZE_OP_CALL;\r
1632 break;\r
1633#endif\r
1634\r
b602265d
DG
1635 case NODE_QUANT:\r
1636 r = compile_length_quantifier_node(QUANT_(node), reg);\r
1637 break;\r
1638\r
1639 case NODE_ENCLOSURE:\r
1640 r = compile_length_enclosure_node(ENCLOSURE_(node), reg);\r
14b0e578
CS
1641 break;\r
1642\r
b602265d
DG
1643 case NODE_ANCHOR:\r
1644 r = compile_length_anchor_node(ANCHOR_(node), reg);\r
14b0e578
CS
1645 break;\r
1646\r
b602265d
DG
1647 case NODE_GIMMICK:\r
1648 r = compile_length_gimmick_node(GIMMICK_(node), reg);\r
14b0e578
CS
1649 break;\r
1650\r
1651 default:\r
1652 return ONIGERR_TYPE_BUG;\r
1653 break;\r
1654 }\r
1655\r
1656 return r;\r
1657}\r
1658\r
1659static int\r
b602265d 1660compile_tree(Node* node, regex_t* reg, ScanEnv* env)\r
14b0e578 1661{\r
b602265d 1662 int n, len, pos, r = 0;\r
14b0e578 1663\r
b602265d
DG
1664 switch (NODE_TYPE(node)) {\r
1665 case NODE_LIST:\r
14b0e578 1666 do {\r
b602265d
DG
1667 r = compile_tree(NODE_CAR(node), reg, env);\r
1668 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1669 break;\r
1670\r
b602265d 1671 case NODE_ALT:\r
14b0e578
CS
1672 {\r
1673 Node* x = node;\r
1674 len = 0;\r
1675 do {\r
b602265d
DG
1676 len += compile_length_tree(NODE_CAR(x), reg);\r
1677 if (IS_NOT_NULL(NODE_CDR(x))) {\r
1678 len += SIZE_OP_PUSH + SIZE_OP_JUMP;\r
1679 }\r
1680 } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
14b0e578
CS
1681 pos = reg->used + len; /* goal position */\r
1682\r
1683 do {\r
b602265d
DG
1684 len = compile_length_tree(NODE_CAR(node), reg);\r
1685 if (IS_NOT_NULL(NODE_CDR(node))) {\r
1686 enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;\r
1687 r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);\r
1688 if (r != 0) break;\r
1689 }\r
1690 r = compile_tree(NODE_CAR(node), reg, env);\r
1691 if (r != 0) break;\r
1692 if (IS_NOT_NULL(NODE_CDR(node))) {\r
1693 len = pos - (reg->used + SIZE_OP_JUMP);\r
1694 r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
1695 if (r != 0) break;\r
1696 }\r
1697 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
1698 }\r
1699 break;\r
1700\r
1701 case NODE_STRING:\r
1702 if (NODE_STRING_IS_RAW(node))\r
1703 r = compile_string_raw_node(STR_(node), reg);\r
14b0e578
CS
1704 else\r
1705 r = compile_string_node(node, reg);\r
1706 break;\r
1707\r
b602265d
DG
1708 case NODE_CCLASS:\r
1709 r = compile_cclass_node(CCLASS_(node), reg);\r
14b0e578
CS
1710 break;\r
1711\r
b602265d 1712 case NODE_CTYPE:\r
14b0e578
CS
1713 {\r
1714 int op;\r
1715\r
b602265d
DG
1716 switch (CTYPE_(node)->ctype) {\r
1717 case CTYPE_ANYCHAR:\r
1718 if (IS_MULTILINE(CTYPE_OPTION(node, reg)))\r
1719 r = add_opcode(reg, OP_ANYCHAR_ML);\r
1720 else\r
1721 r = add_opcode(reg, OP_ANYCHAR);\r
1722 break;\r
1723\r
14b0e578 1724 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
1725 if (CTYPE_(node)->ascii_mode == 0) {\r
1726 op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;\r
1727 }\r
1728 else {\r
1729 op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;\r
1730 }\r
1731 r = add_opcode(reg, op);\r
1732 break;\r
1733\r
14b0e578 1734 default:\r
b602265d
DG
1735 return ONIGERR_TYPE_BUG;\r
1736 break;\r
14b0e578 1737 }\r
14b0e578
CS
1738 }\r
1739 break;\r
1740\r
b602265d 1741 case NODE_BACKREF:\r
14b0e578 1742 {\r
b602265d 1743 BackRefNode* br = BACKREF_(node);\r
14b0e578 1744\r
b602265d 1745 if (NODE_IS_CHECKER(node)) {\r
14b0e578 1746#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
1747 if (NODE_IS_NEST_LEVEL(node)) {\r
1748 r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);\r
1749 if (r != 0) return r;\r
1750 r = add_length(reg, br->nest_level);\r
1751 if (r != 0) return r;\r
1752 }\r
1753 else\r
14b0e578 1754#endif\r
b602265d
DG
1755 {\r
1756 r = add_opcode(reg, OP_BACKREF_CHECK);\r
1757 if (r != 0) return r;\r
1758 }\r
1759\r
1760 goto add_bacref_mems;\r
14b0e578
CS
1761 }\r
1762 else {\r
b602265d
DG
1763#ifdef USE_BACKREF_WITH_LEVEL\r
1764 if (NODE_IS_NEST_LEVEL(node)) {\r
1765 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);\r
1766 if (r != 0) return r;\r
1767 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));\r
1768 if (r != 0) return r;\r
1769 r = add_length(reg, br->nest_level);\r
1770 if (r != 0) return r;\r
1771\r
1772 goto add_bacref_mems;\r
14b0e578 1773 }\r
b602265d
DG
1774 else\r
1775#endif\r
1776 if (br->back_num == 1) {\r
1777 n = br->back_static[0];\r
1778 if (IS_IGNORECASE(reg->options)) {\r
1779 r = add_opcode(reg, OP_BACKREF_N_IC);\r
1780 if (r != 0) return r;\r
1781 r = add_mem_num(reg, n);\r
1782 }\r
1783 else {\r
1784 switch (n) {\r
1785 case 1: r = add_opcode(reg, OP_BACKREF1); break;\r
1786 case 2: r = add_opcode(reg, OP_BACKREF2); break;\r
1787 default:\r
1788 r = add_opcode(reg, OP_BACKREF_N);\r
1789 if (r != 0) return r;\r
1790 r = add_mem_num(reg, n);\r
1791 break;\r
1792 }\r
1793 }\r
14b0e578 1794 }\r
b602265d
DG
1795 else {\r
1796 int i;\r
1797 int* p;\r
14b0e578 1798\r
b602265d
DG
1799 if (IS_IGNORECASE(reg->options)) {\r
1800 r = add_opcode(reg, OP_BACKREF_MULTI_IC);\r
1801 }\r
1802 else {\r
1803 r = add_opcode(reg, OP_BACKREF_MULTI);\r
1804 }\r
1805 if (r != 0) return r;\r
1806\r
1807 add_bacref_mems:\r
1808 r = add_length(reg, br->back_num);\r
1809 if (r != 0) return r;\r
1810 p = BACKREFS_P(br);\r
1811 for (i = br->back_num - 1; i >= 0; i--) {\r
1812 r = add_mem_num(reg, p[i]);\r
1813 if (r != 0) return r;\r
1814 }\r
1815 }\r
14b0e578
CS
1816 }\r
1817 }\r
1818 break;\r
1819\r
b602265d
DG
1820#ifdef USE_CALL\r
1821 case NODE_CALL:\r
1822 r = compile_call(CALL_(node), reg, env);\r
14b0e578
CS
1823 break;\r
1824#endif\r
1825\r
b602265d
DG
1826 case NODE_QUANT:\r
1827 r = compile_quantifier_node(QUANT_(node), reg, env);\r
14b0e578
CS
1828 break;\r
1829\r
b602265d
DG
1830 case NODE_ENCLOSURE:\r
1831 r = compile_enclosure_node(ENCLOSURE_(node), reg, env);\r
14b0e578
CS
1832 break;\r
1833\r
b602265d
DG
1834 case NODE_ANCHOR:\r
1835 r = compile_anchor_node(ANCHOR_(node), reg, env);\r
1836 break;\r
1837\r
1838 case NODE_GIMMICK:\r
1839 r = compile_gimmick_node(GIMMICK_(node), reg);\r
14b0e578
CS
1840 break;\r
1841\r
1842 default:\r
1843#ifdef ONIG_DEBUG\r
b602265d 1844 fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
1845#endif\r
1846 break;\r
1847 }\r
1848\r
1849 return r;\r
1850}\r
1851\r
14b0e578
CS
1852static int\r
1853noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)\r
1854{\r
1855 int r = 0;\r
1856 Node* node = *plink;\r
1857\r
b602265d
DG
1858 switch (NODE_TYPE(node)) {\r
1859 case NODE_LIST:\r
1860 case NODE_ALT:\r
14b0e578 1861 do {\r
b602265d
DG
1862 r = noname_disable_map(&(NODE_CAR(node)), map, counter);\r
1863 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1864 break;\r
1865\r
b602265d 1866 case NODE_QUANT:\r
14b0e578 1867 {\r
b602265d 1868 Node** ptarget = &(NODE_BODY(node));\r
14b0e578
CS
1869 Node* old = *ptarget;\r
1870 r = noname_disable_map(ptarget, map, counter);\r
b602265d
DG
1871 if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {\r
1872 onig_reduce_nested_quantifier(node, *ptarget);\r
14b0e578
CS
1873 }\r
1874 }\r
1875 break;\r
1876\r
b602265d 1877 case NODE_ENCLOSURE:\r
14b0e578 1878 {\r
b602265d
DG
1879 EnclosureNode* en = ENCLOSURE_(node);\r
1880 if (en->type == ENCLOSURE_MEMORY) {\r
1881 if (NODE_IS_NAMED_GROUP(node)) {\r
1882 (*counter)++;\r
1883 map[en->m.regnum].new_val = *counter;\r
1884 en->m.regnum = *counter;\r
1885 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
1886 }\r
1887 else {\r
1888 *plink = NODE_BODY(node);\r
1889 NODE_BODY(node) = NULL_NODE;\r
1890 onig_node_free(node);\r
1891 r = noname_disable_map(plink, map, counter);\r
1892 }\r
1893 }\r
1894 else if (en->type == ENCLOSURE_IF_ELSE) {\r
1895 r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);\r
1896 if (r != 0) return r;\r
1897 if (IS_NOT_NULL(en->te.Then)) {\r
1898 r = noname_disable_map(&(en->te.Then), map, counter);\r
1899 if (r != 0) return r;\r
1900 }\r
1901 if (IS_NOT_NULL(en->te.Else)) {\r
1902 r = noname_disable_map(&(en->te.Else), map, counter);\r
1903 if (r != 0) return r;\r
1904 }\r
14b0e578
CS
1905 }\r
1906 else\r
b602265d 1907 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
14b0e578
CS
1908 }\r
1909 break;\r
1910\r
b602265d
DG
1911 case NODE_ANCHOR:\r
1912 if (IS_NOT_NULL(NODE_BODY(node)))\r
1913 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
1914 break;\r
1915\r
14b0e578
CS
1916 default:\r
1917 break;\r
1918 }\r
1919\r
1920 return r;\r
1921}\r
1922\r
1923static int\r
1924renumber_node_backref(Node* node, GroupNumRemap* map)\r
1925{\r
1926 int i, pos, n, old_num;\r
1927 int *backs;\r
b602265d 1928 BackRefNode* bn = BACKREF_(node);\r
14b0e578 1929\r
b602265d 1930 if (! NODE_IS_BY_NAME(node))\r
14b0e578
CS
1931 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
1932\r
1933 old_num = bn->back_num;\r
1934 if (IS_NULL(bn->back_dynamic))\r
1935 backs = bn->back_static;\r
1936 else\r
1937 backs = bn->back_dynamic;\r
1938\r
1939 for (i = 0, pos = 0; i < old_num; i++) {\r
1940 n = map[backs[i]].new_val;\r
1941 if (n > 0) {\r
1942 backs[pos] = n;\r
1943 pos++;\r
1944 }\r
1945 }\r
1946\r
1947 bn->back_num = pos;\r
1948 return 0;\r
1949}\r
1950\r
1951static int\r
1952renumber_by_map(Node* node, GroupNumRemap* map)\r
1953{\r
1954 int r = 0;\r
1955\r
b602265d
DG
1956 switch (NODE_TYPE(node)) {\r
1957 case NODE_LIST:\r
1958 case NODE_ALT:\r
14b0e578 1959 do {\r
b602265d
DG
1960 r = renumber_by_map(NODE_CAR(node), map);\r
1961 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578 1962 break;\r
b602265d
DG
1963\r
1964 case NODE_QUANT:\r
1965 r = renumber_by_map(NODE_BODY(node), map);\r
14b0e578 1966 break;\r
b602265d
DG
1967\r
1968 case NODE_ENCLOSURE:\r
1969 {\r
1970 EnclosureNode* en = ENCLOSURE_(node);\r
1971\r
1972 r = renumber_by_map(NODE_BODY(node), map);\r
1973 if (r != 0) return r;\r
1974\r
1975 if (en->type == ENCLOSURE_IF_ELSE) {\r
1976 if (IS_NOT_NULL(en->te.Then)) {\r
1977 r = renumber_by_map(en->te.Then, map);\r
1978 if (r != 0) return r;\r
1979 }\r
1980 if (IS_NOT_NULL(en->te.Else)) {\r
1981 r = renumber_by_map(en->te.Else, map);\r
1982 if (r != 0) return r;\r
1983 }\r
1984 }\r
1985 }\r
14b0e578
CS
1986 break;\r
1987\r
b602265d 1988 case NODE_BACKREF:\r
14b0e578
CS
1989 r = renumber_node_backref(node, map);\r
1990 break;\r
1991\r
b602265d
DG
1992 case NODE_ANCHOR:\r
1993 if (IS_NOT_NULL(NODE_BODY(node)))\r
1994 r = renumber_by_map(NODE_BODY(node), map);\r
1995 break;\r
1996\r
14b0e578
CS
1997 default:\r
1998 break;\r
1999 }\r
2000\r
2001 return r;\r
2002}\r
2003\r
2004static int\r
2005numbered_ref_check(Node* node)\r
2006{\r
2007 int r = 0;\r
2008\r
b602265d
DG
2009 switch (NODE_TYPE(node)) {\r
2010 case NODE_LIST:\r
2011 case NODE_ALT:\r
14b0e578 2012 do {\r
b602265d
DG
2013 r = numbered_ref_check(NODE_CAR(node));\r
2014 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578 2015 break;\r
b602265d
DG
2016\r
2017 case NODE_ANCHOR:\r
2018 if (IS_NULL(NODE_BODY(node)))\r
2019 break;\r
2020 /* fall */\r
2021 case NODE_QUANT:\r
2022 r = numbered_ref_check(NODE_BODY(node));\r
14b0e578 2023 break;\r
b602265d
DG
2024\r
2025 case NODE_ENCLOSURE:\r
2026 {\r
2027 EnclosureNode* en = ENCLOSURE_(node);\r
2028\r
2029 r = numbered_ref_check(NODE_BODY(node));\r
2030 if (r != 0) return r;\r
2031\r
2032 if (en->type == ENCLOSURE_IF_ELSE) {\r
2033 if (IS_NOT_NULL(en->te.Then)) {\r
2034 r = numbered_ref_check(en->te.Then);\r
2035 if (r != 0) return r;\r
2036 }\r
2037 if (IS_NOT_NULL(en->te.Else)) {\r
2038 r = numbered_ref_check(en->te.Else);\r
2039 if (r != 0) return r;\r
2040 }\r
2041 }\r
2042 }\r
2043\r
14b0e578
CS
2044 break;\r
2045\r
b602265d
DG
2046 case NODE_BACKREF:\r
2047 if (! NODE_IS_BY_NAME(node))\r
14b0e578
CS
2048 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
2049 break;\r
2050\r
2051 default:\r
2052 break;\r
2053 }\r
2054\r
2055 return r;\r
2056}\r
2057\r
2058static int\r
2059disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)\r
2060{\r
2061 int r, i, pos, counter;\r
b602265d
DG
2062 int result;\r
2063 MemStatusType loc;\r
14b0e578
CS
2064 GroupNumRemap* map;\r
2065\r
2066 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));\r
2067 CHECK_NULL_RETURN_MEMERR(map);\r
2068 for (i = 1; i <= env->num_mem; i++) {\r
2069 map[i].new_val = 0;\r
2070 }\r
2071 counter = 0;\r
2072 r = noname_disable_map(root, map, &counter);\r
2073 if (r != 0) return r;\r
2074\r
2075 r = renumber_by_map(*root, map);\r
2076 if (r != 0) return r;\r
2077\r
2078 for (i = 1, pos = 1; i <= env->num_mem; i++) {\r
2079 if (map[i].new_val > 0) {\r
b602265d 2080 SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];\r
14b0e578
CS
2081 pos++;\r
2082 }\r
2083 }\r
2084\r
2085 loc = env->capture_history;\r
b602265d 2086 MEM_STATUS_CLEAR(env->capture_history);\r
14b0e578 2087 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {\r
b602265d
DG
2088 if (MEM_STATUS_AT(loc, i)) {\r
2089 MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);\r
14b0e578
CS
2090 }\r
2091 }\r
2092\r
2093 env->num_mem = env->num_named;\r
2094 reg->num_mem = env->num_named;\r
b602265d 2095 result = onig_renumber_name_table(reg, map);\r
14b0e578 2096 xfree(map);\r
b602265d 2097 return result;\r
14b0e578 2098}\r
14b0e578 2099\r
b602265d 2100#ifdef USE_CALL\r
14b0e578 2101static int\r
b602265d 2102fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)\r
14b0e578
CS
2103{\r
2104 int i, offset;\r
b602265d 2105 EnclosureNode* en;\r
14b0e578
CS
2106 AbsAddrType addr;\r
2107\r
2108 for (i = 0; i < uslist->num; i++) {\r
b602265d
DG
2109 if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))\r
2110 return ONIGERR_PARSER_BUG;\r
2111\r
2112 en = ENCLOSURE_(uslist->us[i].target);\r
2113 addr = en->m.called_addr;\r
14b0e578
CS
2114 offset = uslist->us[i].offset;\r
2115\r
b602265d 2116 BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);\r
14b0e578
CS
2117 }\r
2118 return 0;\r
2119}\r
2120#endif\r
2121\r
b602265d
DG
2122\r
2123#define GET_CHAR_LEN_VARLEN -1\r
2124#define GET_CHAR_LEN_TOP_ALT_VARLEN -2\r
2125\r
2126/* fixed size pattern node only */\r
14b0e578 2127static int\r
b602265d 2128get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)\r
14b0e578 2129{\r
b602265d 2130 int tlen;\r
14b0e578
CS
2131 int r = 0;\r
2132\r
b602265d
DG
2133 level++;\r
2134 *len = 0;\r
2135 switch (NODE_TYPE(node)) {\r
2136 case NODE_LIST:\r
2137 do {\r
2138 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);\r
2139 if (r == 0)\r
2140 *len = distance_add(*len, tlen);\r
2141 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2142 break;\r
2143\r
b602265d
DG
2144 case NODE_ALT:\r
2145 {\r
2146 int tlen2;\r
2147 int varlen = 0;\r
2148\r
2149 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);\r
2150 while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {\r
2151 r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);\r
2152 if (r == 0) {\r
2153 if (tlen != tlen2)\r
2154 varlen = 1;\r
2155 }\r
2156 }\r
2157 if (r == 0) {\r
2158 if (varlen != 0) {\r
2159 if (level == 1)\r
2160 r = GET_CHAR_LEN_TOP_ALT_VARLEN;\r
2161 else\r
2162 r = GET_CHAR_LEN_VARLEN;\r
2163 }\r
2164 else\r
2165 *len = tlen;\r
2166 }\r
14b0e578 2167 }\r
14b0e578 2168 break;\r
14b0e578 2169\r
b602265d 2170 case NODE_STRING:\r
14b0e578 2171 {\r
b602265d
DG
2172 StrNode* sn = STR_(node);\r
2173 UChar *s = sn->s;\r
2174\r
2175 while (s < sn->end) {\r
2176 s += enclen(reg->enc, s);\r
2177 (*len)++;\r
14b0e578
CS
2178 }\r
2179 }\r
2180 break;\r
2181\r
b602265d 2182 case NODE_QUANT:\r
14b0e578 2183 {\r
b602265d 2184 QuantNode* qn = QUANT_(node);\r
14b0e578 2185\r
14b0e578 2186 if (qn->lower == qn->upper) {\r
b602265d
DG
2187 if (qn->upper == 0) {\r
2188 *len = 0;\r
2189 }\r
2190 else {\r
2191 r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);\r
2192 if (r == 0)\r
2193 *len = distance_multiply(tlen, qn->lower);\r
2194 }\r
14b0e578
CS
2195 }\r
2196 else\r
b602265d 2197 r = GET_CHAR_LEN_VARLEN;\r
14b0e578
CS
2198 }\r
2199 break;\r
2200\r
b602265d
DG
2201#ifdef USE_CALL\r
2202 case NODE_CALL:\r
2203 if (! NODE_IS_RECURSION(node))\r
2204 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
14b0e578
CS
2205 else\r
2206 r = GET_CHAR_LEN_VARLEN;\r
2207 break;\r
2208#endif\r
2209\r
b602265d
DG
2210 case NODE_CTYPE:\r
2211 case NODE_CCLASS:\r
14b0e578
CS
2212 *len = 1;\r
2213 break;\r
2214\r
b602265d 2215 case NODE_ENCLOSURE:\r
14b0e578 2216 {\r
b602265d
DG
2217 EnclosureNode* en = ENCLOSURE_(node);\r
2218\r
14b0e578 2219 switch (en->type) {\r
b602265d
DG
2220 case ENCLOSURE_MEMORY:\r
2221#ifdef USE_CALL\r
2222 if (NODE_IS_CLEN_FIXED(node))\r
2223 *len = en->char_len;\r
2224 else {\r
2225 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
2226 if (r == 0) {\r
2227 en->char_len = *len;\r
2228 NODE_STATUS_ADD(node, CLEN_FIXED);\r
2229 }\r
2230 }\r
2231 break;\r
14b0e578 2232#endif\r
b602265d
DG
2233 case ENCLOSURE_OPTION:\r
2234 case ENCLOSURE_STOP_BACKTRACK:\r
2235 r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
2236 break;\r
2237 case ENCLOSURE_IF_ELSE:\r
2238 {\r
2239 int clen, elen;\r
2240\r
2241 r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);\r
2242 if (r == 0) {\r
2243 if (IS_NOT_NULL(en->te.Then)) {\r
2244 r = get_char_length_tree1(en->te.Then, reg, &tlen, level);\r
2245 if (r != 0) break;\r
2246 }\r
2247 else tlen = 0;\r
2248 if (IS_NOT_NULL(en->te.Else)) {\r
2249 r = get_char_length_tree1(en->te.Else, reg, &elen, level);\r
2250 if (r != 0) break;\r
2251 }\r
2252 else elen = 0;\r
2253\r
2254 if (clen + tlen != elen) {\r
2255 r = GET_CHAR_LEN_VARLEN;\r
2256 }\r
2257 else {\r
2258 *len = elen;\r
2259 }\r
2260 }\r
2261 }\r
2262 break;\r
2263\r
14b0e578 2264 default:\r
b602265d 2265 break;\r
14b0e578
CS
2266 }\r
2267 }\r
2268 break;\r
2269\r
b602265d
DG
2270 case NODE_ANCHOR:\r
2271 case NODE_GIMMICK:\r
14b0e578
CS
2272 break;\r
2273\r
b602265d
DG
2274 case NODE_BACKREF:\r
2275 if (NODE_IS_CHECKER(node))\r
2276 break;\r
2277 /* fall */\r
14b0e578
CS
2278 default:\r
2279 r = GET_CHAR_LEN_VARLEN;\r
2280 break;\r
2281 }\r
2282\r
2283 return r;\r
2284}\r
2285\r
2286static int\r
2287get_char_length_tree(Node* node, regex_t* reg, int* len)\r
2288{\r
2289 return get_char_length_tree1(node, reg, len, 0);\r
2290}\r
2291\r
2292/* x is not included y ==> 1 : 0 */\r
2293static int\r
b602265d 2294is_exclusive(Node* x, Node* y, regex_t* reg)\r
14b0e578
CS
2295{\r
2296 int i, len;\r
2297 OnigCodePoint code;\r
2298 UChar *p;\r
b602265d 2299 NodeType ytype;\r
14b0e578
CS
2300\r
2301 retry:\r
b602265d
DG
2302 ytype = NODE_TYPE(y);\r
2303 switch (NODE_TYPE(x)) {\r
2304 case NODE_CTYPE:\r
14b0e578 2305 {\r
b602265d
DG
2306 if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||\r
2307 CTYPE_(y)->ctype == CTYPE_ANYCHAR)\r
2308 break;\r
2309\r
14b0e578 2310 switch (ytype) {\r
b602265d
DG
2311 case NODE_CTYPE:\r
2312 if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&\r
2313 CTYPE_(y)->not != CTYPE_(x)->not &&\r
2314 CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)\r
2315 return 1;\r
2316 else\r
2317 return 0;\r
2318 break;\r
2319\r
2320 case NODE_CCLASS:\r
14b0e578 2321 swap:\r
b602265d
DG
2322 {\r
2323 Node* tmp;\r
2324 tmp = x; x = y; y = tmp;\r
2325 goto retry;\r
2326 }\r
2327 break;\r
14b0e578 2328\r
b602265d
DG
2329 case NODE_STRING:\r
2330 goto swap;\r
2331 break;\r
14b0e578
CS
2332\r
2333 default:\r
b602265d 2334 break;\r
14b0e578
CS
2335 }\r
2336 }\r
2337 break;\r
2338\r
b602265d 2339 case NODE_CCLASS:\r
14b0e578 2340 {\r
b602265d
DG
2341 int range;\r
2342 CClassNode* xc = CCLASS_(x);\r
2343\r
14b0e578 2344 switch (ytype) {\r
b602265d
DG
2345 case NODE_CTYPE:\r
2346 switch (CTYPE_(y)->ctype) {\r
2347 case CTYPE_ANYCHAR:\r
2348 return 0;\r
2349 break;\r
2350\r
2351 case ONIGENC_CTYPE_WORD:\r
2352 if (CTYPE_(y)->not == 0) {\r
2353 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {\r
2354 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
2355 for (i = 0; i < range; i++) {\r
2356 if (BITSET_AT(xc->bs, i)) {\r
2357 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;\r
2358 }\r
2359 }\r
2360 return 1;\r
2361 }\r
2362 return 0;\r
2363 }\r
2364 else {\r
2365 if (IS_NOT_NULL(xc->mbuf)) return 0;\r
2366 if (IS_NCCLASS_NOT(xc)) return 0;\r
2367\r
2368 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
2369 for (i = 0; i < range; i++) {\r
2370 if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {\r
2371 if (BITSET_AT(xc->bs, i))\r
2372 return 0;\r
2373 }\r
2374 }\r
2375 for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
2376 if (BITSET_AT(xc->bs, i)) return 0;\r
2377 }\r
2378 return 1;\r
2379 }\r
2380 break;\r
2381\r
2382 default:\r
2383 break;\r
2384 }\r
2385 break;\r
2386\r
2387 case NODE_CCLASS:\r
2388 {\r
2389 int v;\r
2390 CClassNode* yc = CCLASS_(y);\r
2391\r
2392 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2393 v = BITSET_AT(xc->bs, i);\r
2394 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {\r
2395 v = BITSET_AT(yc->bs, i);\r
2396 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||\r
14b0e578 2397 (v == 0 && IS_NCCLASS_NOT(yc)))\r
b602265d
DG
2398 return 0;\r
2399 }\r
2400 }\r
2401 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||\r
2402 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))\r
2403 return 1;\r
2404 return 0;\r
2405 }\r
2406 break;\r
2407\r
2408 case NODE_STRING:\r
2409 goto swap;\r
2410 break;\r
14b0e578
CS
2411\r
2412 default:\r
b602265d 2413 break;\r
14b0e578
CS
2414 }\r
2415 }\r
2416 break;\r
2417\r
b602265d 2418 case NODE_STRING:\r
14b0e578 2419 {\r
b602265d
DG
2420 StrNode* xs = STR_(x);\r
2421\r
2422 if (NODE_STRING_LEN(x) == 0)\r
2423 break;\r
14b0e578 2424\r
14b0e578 2425 switch (ytype) {\r
b602265d
DG
2426 case NODE_CTYPE:\r
2427 switch (CTYPE_(y)->ctype) {\r
2428 case CTYPE_ANYCHAR:\r
2429 break;\r
2430\r
14b0e578 2431 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
2432 if (CTYPE_(y)->ascii_mode == 0) {\r
2433 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))\r
2434 return CTYPE_(y)->not;\r
2435 else\r
2436 return !(CTYPE_(y)->not);\r
2437 }\r
2438 else {\r
2439 if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))\r
2440 return CTYPE_(y)->not;\r
2441 else\r
2442 return !(CTYPE_(y)->not);\r
2443 }\r
14b0e578
CS
2444 break;\r
2445 default:\r
2446 break;\r
2447 }\r
2448 break;\r
2449\r
b602265d 2450 case NODE_CCLASS:\r
14b0e578 2451 {\r
b602265d 2452 CClassNode* cc = CCLASS_(y);\r
14b0e578
CS
2453\r
2454 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,\r
2455 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));\r
2456 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);\r
2457 }\r
2458 break;\r
2459\r
b602265d 2460 case NODE_STRING:\r
14b0e578
CS
2461 {\r
2462 UChar *q;\r
b602265d
DG
2463 StrNode* ys = STR_(y);\r
2464\r
2465 len = NODE_STRING_LEN(x);\r
2466 if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);\r
2467 if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {\r
14b0e578
CS
2468 /* tiny version */\r
2469 return 0;\r
2470 }\r
2471 else {\r
2472 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {\r
2473 if (*p != *q) return 1;\r
2474 }\r
2475 }\r
2476 }\r
2477 break;\r
b602265d 2478\r
14b0e578
CS
2479 default:\r
2480 break;\r
2481 }\r
2482 }\r
2483 break;\r
2484\r
2485 default:\r
2486 break;\r
2487 }\r
2488\r
2489 return 0;\r
2490}\r
2491\r
2492static Node*\r
2493get_head_value_node(Node* node, int exact, regex_t* reg)\r
2494{\r
2495 Node* n = NULL_NODE;\r
2496\r
b602265d
DG
2497 switch (NODE_TYPE(node)) {\r
2498 case NODE_BACKREF:\r
2499 case NODE_ALT:\r
2500#ifdef USE_CALL\r
2501 case NODE_CALL:\r
14b0e578
CS
2502#endif\r
2503 break;\r
2504\r
b602265d
DG
2505 case NODE_CTYPE:\r
2506 if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)\r
2507 break;\r
2508 /* fall */\r
2509 case NODE_CCLASS:\r
14b0e578
CS
2510 if (exact == 0) {\r
2511 n = node;\r
2512 }\r
2513 break;\r
2514\r
b602265d
DG
2515 case NODE_LIST:\r
2516 n = get_head_value_node(NODE_CAR(node), exact, reg);\r
14b0e578
CS
2517 break;\r
2518\r
b602265d 2519 case NODE_STRING:\r
14b0e578 2520 {\r
b602265d 2521 StrNode* sn = STR_(node);\r
14b0e578
CS
2522\r
2523 if (sn->end <= sn->s)\r
b602265d 2524 break;\r
14b0e578
CS
2525\r
2526 if (exact != 0 &&\r
b602265d 2527 !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {\r
14b0e578
CS
2528 }\r
2529 else {\r
b602265d 2530 n = node;\r
14b0e578
CS
2531 }\r
2532 }\r
2533 break;\r
2534\r
b602265d 2535 case NODE_QUANT:\r
14b0e578 2536 {\r
b602265d 2537 QuantNode* qn = QUANT_(node);\r
14b0e578 2538 if (qn->lower > 0) {\r
b602265d
DG
2539 if (IS_NOT_NULL(qn->head_exact))\r
2540 n = qn->head_exact;\r
2541 else\r
2542 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
14b0e578
CS
2543 }\r
2544 }\r
2545 break;\r
2546\r
b602265d 2547 case NODE_ENCLOSURE:\r
14b0e578 2548 {\r
b602265d 2549 EnclosureNode* en = ENCLOSURE_(node);\r
14b0e578 2550 switch (en->type) {\r
b602265d
DG
2551 case ENCLOSURE_OPTION:\r
2552 {\r
2553 OnigOptionType options = reg->options;\r
2554\r
2555 reg->options = ENCLOSURE_(node)->o.options;\r
2556 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
2557 reg->options = options;\r
2558 }\r
2559 break;\r
2560\r
2561 case ENCLOSURE_MEMORY:\r
2562 case ENCLOSURE_STOP_BACKTRACK:\r
2563 case ENCLOSURE_IF_ELSE:\r
2564 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
2565 break;\r
14b0e578
CS
2566 }\r
2567 }\r
2568 break;\r
2569\r
b602265d
DG
2570 case NODE_ANCHOR:\r
2571 if (ANCHOR_(node)->type == ANCHOR_PREC_READ)\r
2572 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
14b0e578
CS
2573 break;\r
2574\r
b602265d 2575 case NODE_GIMMICK:\r
14b0e578
CS
2576 default:\r
2577 break;\r
2578 }\r
2579\r
2580 return n;\r
2581}\r
2582\r
2583static int\r
b602265d 2584check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)\r
14b0e578 2585{\r
b602265d
DG
2586 NodeType type;\r
2587 int r = 0;\r
14b0e578 2588\r
b602265d
DG
2589 type = NODE_TYPE(node);\r
2590 if ((NODE_TYPE2BIT(type) & type_mask) == 0)\r
14b0e578
CS
2591 return 1;\r
2592\r
2593 switch (type) {\r
b602265d
DG
2594 case NODE_LIST:\r
2595 case NODE_ALT:\r
14b0e578 2596 do {\r
b602265d
DG
2597 r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,\r
2598 anchor_mask);\r
2599 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2600 break;\r
2601\r
b602265d
DG
2602 case NODE_QUANT:\r
2603 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
14b0e578
CS
2604 break;\r
2605\r
b602265d 2606 case NODE_ENCLOSURE:\r
14b0e578 2607 {\r
b602265d
DG
2608 EnclosureNode* en = ENCLOSURE_(node);\r
2609 if (((1<<en->type) & enclosure_mask) == 0)\r
2610 return 1;\r
2611\r
2612 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
2613 if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {\r
2614 if (IS_NOT_NULL(en->te.Then)) {\r
2615 r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);\r
2616 if (r != 0) break;\r
2617 }\r
2618 if (IS_NOT_NULL(en->te.Else)) {\r
2619 r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);\r
2620 }\r
2621 }\r
14b0e578
CS
2622 }\r
2623 break;\r
2624\r
b602265d
DG
2625 case NODE_ANCHOR:\r
2626 type = ANCHOR_(node)->type;\r
14b0e578
CS
2627 if ((type & anchor_mask) == 0)\r
2628 return 1;\r
2629\r
b602265d
DG
2630 if (IS_NOT_NULL(NODE_BODY(node)))\r
2631 r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
14b0e578
CS
2632 break;\r
2633\r
b602265d 2634 case NODE_GIMMICK:\r
14b0e578
CS
2635 default:\r
2636 break;\r
2637 }\r
2638 return r;\r
2639}\r
2640\r
b602265d
DG
2641static OnigLen\r
2642tree_min_len(Node* node, ScanEnv* env)\r
14b0e578 2643{\r
b602265d
DG
2644 OnigLen len;\r
2645 OnigLen tmin;\r
14b0e578 2646\r
b602265d
DG
2647 len = 0;\r
2648 switch (NODE_TYPE(node)) {\r
2649 case NODE_BACKREF:\r
2650 if (! NODE_IS_CHECKER(node)) {\r
2651 int i;\r
2652 int* backs;\r
2653 MemEnv* mem_env = SCANENV_MEMENV(env);\r
2654 BackRefNode* br = BACKREF_(node);\r
2655 if (NODE_IS_RECURSION(node)) break;\r
14b0e578 2656\r
b602265d
DG
2657 backs = BACKREFS_P(br);\r
2658 len = tree_min_len(mem_env[backs[0]].node, env);\r
2659 for (i = 1; i < br->back_num; i++) {\r
2660 tmin = tree_min_len(mem_env[backs[i]].node, env);\r
2661 if (len > tmin) len = tmin;\r
2662 }\r
14b0e578
CS
2663 }\r
2664 break;\r
2665\r
b602265d
DG
2666#ifdef USE_CALL\r
2667 case NODE_CALL:\r
14b0e578 2668 {\r
b602265d
DG
2669 Node* t = NODE_BODY(node);\r
2670 if (NODE_IS_RECURSION(node)) {\r
2671 if (NODE_IS_MIN_FIXED(t))\r
2672 len = ENCLOSURE_(t)->min_len;\r
2673 }\r
2674 else\r
2675 len = tree_min_len(t, env);\r
2676 }\r
2677 break;\r
2678#endif\r
2679\r
2680 case NODE_LIST:\r
2681 do {\r
2682 tmin = tree_min_len(NODE_CAR(node), env);\r
2683 len = distance_add(len, tmin);\r
2684 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
2685 break;\r
2686\r
2687 case NODE_ALT:\r
2688 {\r
2689 Node *x, *y;\r
2690 y = node;\r
14b0e578 2691 do {\r
b602265d
DG
2692 x = NODE_CAR(y);\r
2693 tmin = tree_min_len(x, env);\r
2694 if (y == node) len = tmin;\r
2695 else if (len > tmin) len = tmin;\r
2696 } while (IS_NOT_NULL(y = NODE_CDR(y)));\r
14b0e578
CS
2697 }\r
2698 break;\r
2699\r
b602265d
DG
2700 case NODE_STRING:\r
2701 {\r
2702 StrNode* sn = STR_(node);\r
2703 len = (int )(sn->end - sn->s);\r
14b0e578
CS
2704 }\r
2705 break;\r
2706\r
b602265d
DG
2707 case NODE_CTYPE:\r
2708 case NODE_CCLASS:\r
2709 len = ONIGENC_MBC_MINLEN(env->enc);\r
2710 break;\r
2711\r
2712 case NODE_QUANT:\r
14b0e578 2713 {\r
b602265d
DG
2714 QuantNode* qn = QUANT_(node);\r
2715\r
2716 if (qn->lower > 0) {\r
2717 len = tree_min_len(NODE_BODY(node), env);\r
2718 len = distance_multiply(len, qn->lower);\r
14b0e578
CS
2719 }\r
2720 }\r
2721 break;\r
2722\r
b602265d
DG
2723 case NODE_ENCLOSURE:\r
2724 {\r
2725 EnclosureNode* en = ENCLOSURE_(node);\r
2726 switch (en->type) {\r
2727 case ENCLOSURE_MEMORY:\r
2728 if (NODE_IS_MIN_FIXED(node))\r
2729 len = en->min_len;\r
2730 else {\r
2731 if (NODE_IS_MARK1(node))\r
2732 len = 0; /* recursive */\r
2733 else {\r
2734 NODE_STATUS_ADD(node, MARK1);\r
2735 len = tree_min_len(NODE_BODY(node), env);\r
2736 NODE_STATUS_REMOVE(node, MARK1);\r
14b0e578 2737\r
b602265d
DG
2738 en->min_len = len;\r
2739 NODE_STATUS_ADD(node, MIN_FIXED);\r
2740 }\r
2741 }\r
2742 break;\r
2743\r
2744 case ENCLOSURE_OPTION:\r
2745 case ENCLOSURE_STOP_BACKTRACK:\r
2746 len = tree_min_len(NODE_BODY(node), env);\r
2747 break;\r
2748 case ENCLOSURE_IF_ELSE:\r
2749 {\r
2750 OnigLen elen;\r
2751\r
2752 len = tree_min_len(NODE_BODY(node), env);\r
2753 if (IS_NOT_NULL(en->te.Then))\r
2754 len += tree_min_len(en->te.Then, env);\r
2755 if (IS_NOT_NULL(en->te.Else))\r
2756 elen = tree_min_len(en->te.Else, env);\r
2757 else elen = 0;\r
2758\r
2759 if (elen < len) len = elen;\r
2760 }\r
2761 break;\r
2762 }\r
14b0e578
CS
2763 }\r
2764 break;\r
2765\r
b602265d
DG
2766 case NODE_GIMMICK:\r
2767 {\r
2768 GimmickNode* g = GIMMICK_(node);\r
2769 if (g->type == GIMMICK_FAIL) {\r
2770 len = INFINITE_LEN;\r
2771 break;\r
2772 }\r
2773 }\r
2774 /* fall */\r
2775 case NODE_ANCHOR:\r
14b0e578
CS
2776 default:\r
2777 break;\r
2778 }\r
2779\r
b602265d 2780 return len;\r
14b0e578
CS
2781}\r
2782\r
b602265d
DG
2783static OnigLen\r
2784tree_max_len(Node* node, ScanEnv* env)\r
14b0e578 2785{\r
b602265d
DG
2786 OnigLen len;\r
2787 OnigLen tmax;\r
14b0e578 2788\r
b602265d
DG
2789 len = 0;\r
2790 switch (NODE_TYPE(node)) {\r
2791 case NODE_LIST:\r
14b0e578 2792 do {\r
b602265d
DG
2793 tmax = tree_max_len(NODE_CAR(node), env);\r
2794 len = distance_add(len, tmax);\r
2795 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2796 break;\r
2797\r
b602265d
DG
2798 case NODE_ALT:\r
2799 do {\r
2800 tmax = tree_max_len(NODE_CAR(node), env);\r
2801 if (len < tmax) len = tmax;\r
2802 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2803 break;\r
2804\r
b602265d 2805 case NODE_STRING:\r
14b0e578 2806 {\r
b602265d
DG
2807 StrNode* sn = STR_(node);\r
2808 len = (OnigLen )(sn->end - sn->s);\r
2809 }\r
2810 break;\r
2811\r
2812 case NODE_CTYPE:\r
2813 case NODE_CCLASS:\r
2814 len = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
2815 break;\r
2816\r
2817 case NODE_BACKREF:\r
2818 if (! NODE_IS_CHECKER(node)) {\r
2819 int i;\r
2820 int* backs;\r
2821 MemEnv* mem_env = SCANENV_MEMENV(env);\r
2822 BackRefNode* br = BACKREF_(node);\r
2823 if (NODE_IS_RECURSION(node)) {\r
2824 len = INFINITE_LEN;\r
2825 break;\r
2826 }\r
2827 backs = BACKREFS_P(br);\r
2828 for (i = 0; i < br->back_num; i++) {\r
2829 tmax = tree_max_len(mem_env[backs[i]].node, env);\r
2830 if (len < tmax) len = tmax;\r
14b0e578
CS
2831 }\r
2832 }\r
2833 break;\r
2834\r
b602265d
DG
2835#ifdef USE_CALL\r
2836 case NODE_CALL:\r
2837 if (! NODE_IS_RECURSION(node))\r
2838 len = tree_max_len(NODE_BODY(node), env);\r
2839 else\r
2840 len = INFINITE_LEN;\r
2841 break;\r
2842#endif\r
2843\r
2844 case NODE_QUANT:\r
14b0e578 2845 {\r
b602265d 2846 QuantNode* qn = QUANT_(node);\r
14b0e578 2847\r
b602265d
DG
2848 if (qn->upper != 0) {\r
2849 len = tree_max_len(NODE_BODY(node), env);\r
2850 if (len != 0) {\r
2851 if (! IS_REPEAT_INFINITE(qn->upper))\r
2852 len = distance_multiply(len, qn->upper);\r
2853 else\r
2854 len = INFINITE_LEN;\r
2855 }\r
14b0e578 2856 }\r
14b0e578 2857 }\r
b602265d
DG
2858 break;\r
2859\r
2860 case NODE_ENCLOSURE:\r
2861 {\r
2862 EnclosureNode* en = ENCLOSURE_(node);\r
2863 switch (en->type) {\r
2864 case ENCLOSURE_MEMORY:\r
2865 if (NODE_IS_MAX_FIXED(node))\r
2866 len = en->max_len;\r
2867 else {\r
2868 if (NODE_IS_MARK1(node))\r
2869 len = INFINITE_LEN;\r
2870 else {\r
2871 NODE_STATUS_ADD(node, MARK1);\r
2872 len = tree_max_len(NODE_BODY(node), env);\r
2873 NODE_STATUS_REMOVE(node, MARK1);\r
2874\r
2875 en->max_len = len;\r
2876 NODE_STATUS_ADD(node, MAX_FIXED);\r
2877 }\r
2878 }\r
2879 break;\r
2880\r
2881 case ENCLOSURE_OPTION:\r
2882 case ENCLOSURE_STOP_BACKTRACK:\r
2883 len = tree_max_len(NODE_BODY(node), env);\r
2884 break;\r
2885 case ENCLOSURE_IF_ELSE:\r
2886 {\r
2887 OnigLen tlen, elen;\r
2888\r
2889 len = tree_max_len(NODE_BODY(node), env);\r
2890 if (IS_NOT_NULL(en->te.Then)) {\r
2891 tlen = tree_max_len(en->te.Then, env);\r
2892 len = distance_add(len, tlen);\r
2893 }\r
2894 if (IS_NOT_NULL(en->te.Else))\r
2895 elen = tree_max_len(en->te.Else, env);\r
2896 else elen = 0;\r
14b0e578 2897\r
b602265d
DG
2898 if (elen > len) len = elen;\r
2899 }\r
2900 break;\r
2901 }\r
2902 }\r
14b0e578
CS
2903 break;\r
2904\r
b602265d
DG
2905 case NODE_ANCHOR:\r
2906 case NODE_GIMMICK:\r
14b0e578
CS
2907 default:\r
2908 break;\r
2909 }\r
2910\r
b602265d 2911 return len;\r
14b0e578
CS
2912}\r
2913\r
2914static int\r
b602265d 2915check_backrefs(Node* node, ScanEnv* env)\r
14b0e578 2916{\r
b602265d 2917 int r;\r
14b0e578 2918\r
b602265d
DG
2919 switch (NODE_TYPE(node)) {\r
2920 case NODE_LIST:\r
2921 case NODE_ALT:\r
14b0e578 2922 do {\r
b602265d
DG
2923 r = check_backrefs(NODE_CAR(node), env);\r
2924 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2925 break;\r
2926\r
b602265d
DG
2927 case NODE_ANCHOR:\r
2928 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
2929 r = 0;\r
2930 break;\r
2931 }\r
2932 /* fall */\r
2933 case NODE_QUANT:\r
2934 r = check_backrefs(NODE_BODY(node), env);\r
14b0e578
CS
2935 break;\r
2936\r
b602265d
DG
2937 case NODE_ENCLOSURE:\r
2938 r = check_backrefs(NODE_BODY(node), env);\r
14b0e578 2939 {\r
b602265d
DG
2940 EnclosureNode* en = ENCLOSURE_(node);\r
2941\r
2942 if (en->type == ENCLOSURE_IF_ELSE) {\r
2943 if (r != 0) return r;\r
2944 if (IS_NOT_NULL(en->te.Then)) {\r
2945 r = check_backrefs(en->te.Then, env);\r
2946 if (r != 0) return r;\r
2947 }\r
2948 if (IS_NOT_NULL(en->te.Else)) {\r
2949 r = check_backrefs(en->te.Else, env);\r
2950 }\r
14b0e578
CS
2951 }\r
2952 }\r
2953 break;\r
2954\r
b602265d
DG
2955 case NODE_BACKREF:\r
2956 {\r
2957 int i;\r
2958 BackRefNode* br = BACKREF_(node);\r
2959 int* backs = BACKREFS_P(br);\r
2960 MemEnv* mem_env = SCANENV_MEMENV(env);\r
14b0e578 2961\r
b602265d
DG
2962 for (i = 0; i < br->back_num; i++) {\r
2963 if (backs[i] > env->num_mem)\r
2964 return ONIGERR_INVALID_BACKREF;\r
2965\r
2966 NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);\r
2967 }\r
2968 r = 0;\r
14b0e578
CS
2969 }\r
2970 break;\r
2971\r
2972 default:\r
b602265d 2973 r = 0;\r
14b0e578
CS
2974 break;\r
2975 }\r
2976\r
2977 return r;\r
2978}\r
2979\r
2980\r
b602265d
DG
2981#ifdef USE_CALL\r
2982\r
2983#define RECURSION_EXIST (1<<0)\r
2984#define RECURSION_MUST (1<<1)\r
2985#define RECURSION_INFINITE (1<<2)\r
2986\r
14b0e578 2987static int\r
b602265d 2988infinite_recursive_call_check(Node* node, ScanEnv* env, int head)\r
14b0e578 2989{\r
b602265d 2990 int ret;\r
14b0e578
CS
2991 int r = 0;\r
2992\r
b602265d
DG
2993 switch (NODE_TYPE(node)) {\r
2994 case NODE_LIST:\r
14b0e578 2995 {\r
b602265d
DG
2996 Node *x;\r
2997 OnigLen min;\r
2998\r
2999 x = node;\r
14b0e578 3000 do {\r
b602265d
DG
3001 ret = infinite_recursive_call_check(NODE_CAR(x), env, head);\r
3002 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3003 r |= ret;\r
3004 if (head != 0) {\r
3005 min = tree_min_len(NODE_CAR(x), env);\r
3006 if (min != 0) head = 0;\r
3007 }\r
3008 } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
14b0e578
CS
3009 }\r
3010 break;\r
3011\r
b602265d
DG
3012 case NODE_ALT:\r
3013 {\r
3014 int must;\r
3015\r
3016 must = RECURSION_MUST;\r
3017 do {\r
3018 ret = infinite_recursive_call_check(NODE_CAR(node), env, head);\r
3019 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3020\r
3021 r |= (ret & RECURSION_EXIST);\r
3022 must &= ret;\r
3023 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3024 r |= must;\r
14b0e578
CS
3025 }\r
3026 break;\r
3027\r
b602265d
DG
3028 case NODE_QUANT:\r
3029 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3030 if (r < 0) return r;\r
3031 if ((r & RECURSION_MUST) != 0) {\r
3032 if (QUANT_(node)->lower == 0)\r
3033 r &= ~RECURSION_MUST;\r
14b0e578
CS
3034 }\r
3035 break;\r
3036\r
b602265d
DG
3037 case NODE_ANCHOR:\r
3038 if (! ANCHOR_HAS_BODY(ANCHOR_(node)))\r
3039 break;\r
3040 /* fall */\r
3041 case NODE_CALL:\r
3042 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3043 break;\r
3044\r
3045 case NODE_ENCLOSURE:\r
14b0e578 3046 {\r
b602265d
DG
3047 EnclosureNode* en = ENCLOSURE_(node);\r
3048\r
3049 if (en->type == ENCLOSURE_MEMORY) {\r
3050 if (NODE_IS_MARK2(node))\r
3051 return 0;\r
3052 else if (NODE_IS_MARK1(node))\r
3053 return (head == 0 ? RECURSION_EXIST | RECURSION_MUST\r
3054 : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);\r
3055 else {\r
3056 NODE_STATUS_ADD(node, MARK2);\r
3057 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3058 NODE_STATUS_REMOVE(node, MARK2);\r
3059 }\r
3060 }\r
3061 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3062 int eret;\r
3063\r
3064 ret = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3065 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3066 r |= ret;\r
3067 if (IS_NOT_NULL(en->te.Then)) {\r
3068 OnigLen min;\r
3069 if (head != 0) {\r
3070 min = tree_min_len(NODE_BODY(node), env);\r
3071 }\r
3072 else min = 0;\r
3073\r
3074 ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);\r
3075 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3076 r |= ret;\r
3077 }\r
3078 if (IS_NOT_NULL(en->te.Else)) {\r
3079 eret = infinite_recursive_call_check(en->te.Else, env, head);\r
3080 if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;\r
3081 r |= (eret & RECURSION_EXIST);\r
3082 if ((eret & RECURSION_MUST) == 0)\r
3083 r &= ~RECURSION_MUST;\r
3084 }\r
3085 }\r
3086 else {\r
3087 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
14b0e578 3088 }\r
14b0e578
CS
3089 }\r
3090 break;\r
3091\r
3092 default:\r
3093 break;\r
3094 }\r
3095\r
3096 return r;\r
3097}\r
3098\r
3099static int\r
b602265d 3100infinite_recursive_call_check_trav(Node* node, ScanEnv* env)\r
14b0e578 3101{\r
b602265d 3102 int r;\r
14b0e578 3103\r
b602265d
DG
3104 switch (NODE_TYPE(node)) {\r
3105 case NODE_LIST:\r
3106 case NODE_ALT:\r
14b0e578 3107 do {\r
b602265d
DG
3108 r = infinite_recursive_call_check_trav(NODE_CAR(node), env);\r
3109 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
3110 break;\r
3111\r
3112 case NODE_ANCHOR:\r
3113 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
3114 r = 0;\r
3115 break;\r
3116 }\r
3117 /* fall */\r
3118 case NODE_QUANT:\r
3119 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
3120 break;\r
3121\r
3122 case NODE_ENCLOSURE:\r
3123 {\r
3124 EnclosureNode* en = ENCLOSURE_(node);\r
3125\r
3126 if (en->type == ENCLOSURE_MEMORY) {\r
3127 if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {\r
3128 int ret;\r
3129\r
3130 NODE_STATUS_ADD(node, MARK1);\r
3131\r
3132 ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);\r
3133 if (ret < 0) return ret;\r
3134 else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)\r
3135 return ONIGERR_NEVER_ENDING_RECURSION;\r
3136\r
3137 NODE_STATUS_REMOVE(node, MARK1);\r
3138 }\r
3139 }\r
3140 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3141 if (IS_NOT_NULL(en->te.Then)) {\r
3142 r = infinite_recursive_call_check_trav(en->te.Then, env);\r
3143 if (r != 0) return r;\r
3144 }\r
3145 if (IS_NOT_NULL(en->te.Else)) {\r
3146 r = infinite_recursive_call_check_trav(en->te.Else, env);\r
3147 if (r != 0) return r;\r
3148 }\r
3149 }\r
3150 }\r
3151\r
3152 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
3153 break;\r
3154\r
3155 default:\r
3156 r = 0;\r
14b0e578 3157 break;\r
b602265d
DG
3158 }\r
3159\r
3160 return r;\r
3161}\r
3162\r
3163static int\r
3164recursive_call_check(Node* node)\r
3165{\r
3166 int r;\r
14b0e578 3167\r
b602265d
DG
3168 switch (NODE_TYPE(node)) {\r
3169 case NODE_LIST:\r
3170 case NODE_ALT:\r
3171 r = 0;\r
14b0e578 3172 do {\r
b602265d
DG
3173 r |= recursive_call_check(NODE_CAR(node));\r
3174 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3175 break;\r
3176\r
b602265d
DG
3177 case NODE_ANCHOR:\r
3178 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
3179 r = 0;\r
3180 break;\r
3181 }\r
3182 /* fall */\r
3183 case NODE_QUANT:\r
3184 r = recursive_call_check(NODE_BODY(node));\r
14b0e578 3185 break;\r
b602265d
DG
3186\r
3187 case NODE_CALL:\r
3188 r = recursive_call_check(NODE_BODY(node));\r
3189 if (r != 0) {\r
3190 if (NODE_IS_MARK1(NODE_BODY(node)))\r
3191 NODE_STATUS_ADD(node, RECURSION);\r
3192 }\r
14b0e578
CS
3193 break;\r
3194\r
b602265d 3195 case NODE_ENCLOSURE:\r
14b0e578 3196 {\r
b602265d
DG
3197 EnclosureNode* en = ENCLOSURE_(node);\r
3198\r
3199 if (en->type == ENCLOSURE_MEMORY) {\r
3200 if (NODE_IS_MARK2(node))\r
3201 return 0;\r
3202 else if (NODE_IS_MARK1(node))\r
3203 return 1; /* recursion */\r
3204 else {\r
3205 NODE_STATUS_ADD(node, MARK2);\r
3206 r = recursive_call_check(NODE_BODY(node));\r
3207 NODE_STATUS_REMOVE(node, MARK2);\r
3208 }\r
3209 }\r
3210 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3211 r = 0;\r
3212 if (IS_NOT_NULL(en->te.Then)) {\r
3213 r |= recursive_call_check(en->te.Then);\r
3214 }\r
3215 if (IS_NOT_NULL(en->te.Else)) {\r
3216 r |= recursive_call_check(en->te.Else);\r
3217 }\r
3218 r |= recursive_call_check(NODE_BODY(node));\r
14b0e578 3219 }\r
14b0e578 3220 else {\r
b602265d 3221 r = recursive_call_check(NODE_BODY(node));\r
14b0e578 3222 }\r
14b0e578
CS
3223 }\r
3224 break;\r
3225\r
b602265d
DG
3226 default:\r
3227 r = 0;\r
3228 break;\r
3229 }\r
3230\r
3231 return r;\r
3232}\r
3233\r
3234#define IN_RECURSION (1<<0)\r
3235#define FOUND_CALLED_NODE 1\r
3236\r
3237static int\r
3238recursive_call_check_trav(Node* node, ScanEnv* env, int state)\r
3239{\r
3240 int r = 0;\r
3241\r
3242 switch (NODE_TYPE(node)) {\r
3243 case NODE_LIST:\r
3244 case NODE_ALT:\r
14b0e578 3245 {\r
b602265d
DG
3246 int ret;\r
3247 do {\r
3248 ret = recursive_call_check_trav(NODE_CAR(node), env, state);\r
3249 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;\r
3250 else if (ret < 0) return ret;\r
3251 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3252 }\r
3253 break;\r
14b0e578 3254\r
b602265d
DG
3255 case NODE_QUANT:\r
3256 r = recursive_call_check_trav(NODE_BODY(node), env, state);\r
3257 if (QUANT_(node)->upper == 0) {\r
3258 if (r == FOUND_CALLED_NODE)\r
3259 QUANT_(node)->is_refered = 1;\r
3260 }\r
3261 break;\r
3262\r
3263 case NODE_ANCHOR:\r
3264 {\r
3265 AnchorNode* an = ANCHOR_(node);\r
3266 if (ANCHOR_HAS_BODY(an))\r
3267 r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);\r
3268 }\r
3269 break;\r
3270\r
3271 case NODE_ENCLOSURE:\r
3272 {\r
3273 int ret;\r
3274 int state1;\r
3275 EnclosureNode* en = ENCLOSURE_(node);\r
3276\r
3277 if (en->type == ENCLOSURE_MEMORY) {\r
3278 if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {\r
3279 if (! NODE_IS_RECURSION(node)) {\r
3280 NODE_STATUS_ADD(node, MARK1);\r
3281 r = recursive_call_check(NODE_BODY(node));\r
3282 if (r != 0)\r
3283 NODE_STATUS_ADD(node, RECURSION);\r
3284 NODE_STATUS_REMOVE(node, MARK1);\r
3285 }\r
3286\r
3287 if (NODE_IS_CALLED(node))\r
3288 r = FOUND_CALLED_NODE;\r
3289 }\r
3290 }\r
3291\r
3292 state1 = state;\r
3293 if (NODE_IS_RECURSION(node))\r
3294 state1 |= IN_RECURSION;\r
3295\r
3296 ret = recursive_call_check_trav(NODE_BODY(node), env, state1);\r
3297 if (ret == FOUND_CALLED_NODE)\r
3298 r = FOUND_CALLED_NODE;\r
3299\r
3300 if (en->type == ENCLOSURE_IF_ELSE) {\r
3301 if (IS_NOT_NULL(en->te.Then)) {\r
3302 ret = recursive_call_check_trav(en->te.Then, env, state1);\r
3303 if (ret == FOUND_CALLED_NODE)\r
3304 r = FOUND_CALLED_NODE;\r
3305 }\r
3306 if (IS_NOT_NULL(en->te.Else)) {\r
3307 ret = recursive_call_check_trav(en->te.Else, env, state1);\r
3308 if (ret == FOUND_CALLED_NODE)\r
3309 r = FOUND_CALLED_NODE;\r
3310 }\r
14b0e578
CS
3311 }\r
3312 }\r
3313 break;\r
3314\r
3315 default:\r
3316 break;\r
3317 }\r
3318\r
3319 return r;\r
3320}\r
b602265d 3321\r
14b0e578
CS
3322#endif\r
3323\r
3324/* divide different length alternatives in look-behind.\r
3325 (?<=A|B) ==> (?<=A)|(?<=B)\r
3326 (?<!A|B) ==> (?<!A)(?<!B)\r
3327*/\r
3328static int\r
3329divide_look_behind_alternatives(Node* node)\r
3330{\r
3331 Node *head, *np, *insert_node;\r
b602265d 3332 AnchorNode* an = ANCHOR_(node);\r
14b0e578
CS
3333 int anc_type = an->type;\r
3334\r
b602265d
DG
3335 head = NODE_ANCHOR_BODY(an);\r
3336 np = NODE_CAR(head);\r
14b0e578 3337 swap_node(node, head);\r
b602265d
DG
3338 NODE_CAR(node) = head;\r
3339 NODE_BODY(head) = np;\r
14b0e578
CS
3340\r
3341 np = node;\r
b602265d
DG
3342 while (IS_NOT_NULL(np = NODE_CDR(np))) {\r
3343 insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);\r
14b0e578 3344 CHECK_NULL_RETURN_MEMERR(insert_node);\r
b602265d
DG
3345 NODE_BODY(insert_node) = NODE_CAR(np);\r
3346 NODE_CAR(np) = insert_node;\r
14b0e578
CS
3347 }\r
3348\r
3349 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {\r
3350 np = node;\r
3351 do {\r
b602265d
DG
3352 NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */\r
3353 } while (IS_NOT_NULL(np = NODE_CDR(np)));\r
14b0e578
CS
3354 }\r
3355 return 0;\r
3356}\r
3357\r
3358static int\r
3359setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)\r
3360{\r
3361 int r, len;\r
b602265d 3362 AnchorNode* an = ANCHOR_(node);\r
14b0e578 3363\r
b602265d 3364 r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);\r
14b0e578
CS
3365 if (r == 0)\r
3366 an->char_len = len;\r
3367 else if (r == GET_CHAR_LEN_VARLEN)\r
3368 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3369 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {\r
3370 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))\r
3371 r = divide_look_behind_alternatives(node);\r
3372 else\r
3373 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3374 }\r
3375\r
3376 return r;\r
3377}\r
3378\r
3379static int\r
3380next_setup(Node* node, Node* next_node, regex_t* reg)\r
3381{\r
b602265d 3382 NodeType type;\r
14b0e578
CS
3383\r
3384 retry:\r
b602265d
DG
3385 type = NODE_TYPE(node);\r
3386 if (type == NODE_QUANT) {\r
3387 QuantNode* qn = QUANT_(node);\r
14b0e578 3388 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {\r
b602265d 3389#ifdef USE_QUANT_PEEK_NEXT\r
14b0e578
CS
3390 Node* n = get_head_value_node(next_node, 1, reg);\r
3391 /* '\0': for UTF-16BE etc... */\r
b602265d
DG
3392 if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {\r
3393 qn->next_head_exact = n;\r
14b0e578
CS
3394 }\r
3395#endif\r
3396 /* automatic posseivation a*b ==> (?>a*)b */\r
3397 if (qn->lower <= 1) {\r
b602265d
DG
3398 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {\r
3399 Node *x, *y;\r
3400 x = get_head_value_node(NODE_BODY(node), 0, reg);\r
3401 if (IS_NOT_NULL(x)) {\r
3402 y = get_head_value_node(next_node, 0, reg);\r
3403 if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {\r
3404 Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
3405 CHECK_NULL_RETURN_MEMERR(en);\r
3406 NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);\r
3407 swap_node(node, en);\r
3408 NODE_BODY(node) = en;\r
3409 }\r
3410 }\r
3411 }\r
14b0e578
CS
3412 }\r
3413 }\r
3414 }\r
b602265d
DG
3415 else if (type == NODE_ENCLOSURE) {\r
3416 EnclosureNode* en = ENCLOSURE_(node);\r
3417 if (en->type == ENCLOSURE_MEMORY) {\r
3418 node = NODE_BODY(node);\r
14b0e578
CS
3419 goto retry;\r
3420 }\r
3421 }\r
3422 return 0;\r
3423}\r
3424\r
3425\r
3426static int\r
3427update_string_node_case_fold(regex_t* reg, Node *node)\r
3428{\r
3429 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
3430 UChar *sbuf, *ebuf, *sp;\r
3431 int r, i, len, sbuf_size;\r
b602265d 3432 StrNode* sn = STR_(node);\r
14b0e578
CS
3433\r
3434 end = sn->end;\r
b602265d 3435 sbuf_size = (int )(end - sn->s) * 2;\r
14b0e578
CS
3436 sbuf = (UChar* )xmalloc(sbuf_size);\r
3437 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3438 ebuf = sbuf + sbuf_size;\r
3439\r
3440 sp = sbuf;\r
3441 p = sn->s;\r
3442 while (p < end) {\r
3443 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);\r
3444 for (i = 0; i < len; i++) {\r
3445 if (sp >= ebuf) {\r
3446 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);\r
3447 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3448 sp = sbuf + sbuf_size;\r
3449 sbuf_size *= 2;\r
3450 ebuf = sbuf + sbuf_size;\r
3451 }\r
3452\r
3453 *sp++ = buf[i];\r
3454 }\r
3455 }\r
3456\r
3457 r = onig_node_str_set(node, sbuf, sp);\r
3458 if (r != 0) {\r
3459 xfree(sbuf);\r
3460 return r;\r
3461 }\r
3462\r
3463 xfree(sbuf);\r
3464 return 0;\r
3465}\r
3466\r
3467static int\r
b602265d 3468expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)\r
14b0e578
CS
3469{\r
3470 int r;\r
3471 Node *node;\r
3472\r
3473 node = onig_node_new_str(s, end);\r
3474 if (IS_NULL(node)) return ONIGERR_MEMORY;\r
3475\r
3476 r = update_string_node_case_fold(reg, node);\r
3477 if (r != 0) {\r
3478 onig_node_free(node);\r
3479 return r;\r
3480 }\r
3481\r
b602265d
DG
3482 NODE_STRING_SET_AMBIG(node);\r
3483 NODE_STRING_SET_DONT_GET_OPT_INFO(node);\r
14b0e578
CS
3484 *rnode = node;\r
3485 return 0;\r
3486}\r
3487\r
3488static int\r
b602265d
DG
3489expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,\r
3490 int slen, UChar *end, regex_t* reg, Node **rnode)\r
14b0e578 3491{\r
b602265d
DG
3492 int r, i, j;\r
3493 int len;\r
3494 int varlen;\r
14b0e578
CS
3495 Node *anode, *var_anode, *snode, *xnode, *an;\r
3496 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
3497\r
3498 *rnode = var_anode = NULL_NODE;\r
3499\r
3500 varlen = 0;\r
3501 for (i = 0; i < item_num; i++) {\r
3502 if (items[i].byte_len != slen) {\r
3503 varlen = 1;\r
3504 break;\r
3505 }\r
3506 }\r
3507\r
3508 if (varlen != 0) {\r
3509 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3510 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;\r
3511\r
3512 xnode = onig_node_new_list(NULL, NULL);\r
3513 if (IS_NULL(xnode)) goto mem_err;\r
b602265d 3514 NODE_CAR(var_anode) = xnode;\r
14b0e578
CS
3515\r
3516 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3517 if (IS_NULL(anode)) goto mem_err;\r
b602265d 3518 NODE_CAR(xnode) = anode;\r
14b0e578
CS
3519 }\r
3520 else {\r
3521 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3522 if (IS_NULL(anode)) return ONIGERR_MEMORY;\r
3523 }\r
3524\r
3525 snode = onig_node_new_str(p, p + slen);\r
3526 if (IS_NULL(snode)) goto mem_err;\r
3527\r
b602265d 3528 NODE_CAR(anode) = snode;\r
14b0e578
CS
3529\r
3530 for (i = 0; i < item_num; i++) {\r
3531 snode = onig_node_new_str(NULL, NULL);\r
3532 if (IS_NULL(snode)) goto mem_err;\r
3533 \r
3534 for (j = 0; j < items[i].code_len; j++) {\r
3535 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);\r
3536 if (len < 0) {\r
b602265d
DG
3537 r = len;\r
3538 goto mem_err2;\r
14b0e578
CS
3539 }\r
3540\r
3541 r = onig_node_str_cat(snode, buf, buf + len);\r
3542 if (r != 0) goto mem_err2;\r
3543 }\r
3544\r
3545 an = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3546 if (IS_NULL(an)) {\r
3547 goto mem_err2;\r
3548 }\r
3549\r
3550 if (items[i].byte_len != slen) {\r
b602265d 3551 Node *rem;\r
14b0e578
CS
3552 UChar *q = p + items[i].byte_len;\r
3553\r
3554 if (q < end) {\r
3555 r = expand_case_fold_make_rem_string(&rem, q, end, reg);\r
3556 if (r != 0) {\r
3557 onig_node_free(an);\r
3558 goto mem_err2;\r
3559 }\r
3560\r
3561 xnode = onig_node_list_add(NULL_NODE, snode);\r
3562 if (IS_NULL(xnode)) {\r
3563 onig_node_free(an);\r
3564 onig_node_free(rem);\r
3565 goto mem_err2;\r
3566 }\r
3567 if (IS_NULL(onig_node_list_add(xnode, rem))) {\r
3568 onig_node_free(an);\r
3569 onig_node_free(xnode);\r
3570 onig_node_free(rem);\r
3571 goto mem_err;\r
3572 }\r
3573\r
b602265d 3574 NODE_CAR(an) = xnode;\r
14b0e578
CS
3575 }\r
3576 else {\r
b602265d 3577 NODE_CAR(an) = snode;\r
14b0e578
CS
3578 }\r
3579\r
b602265d 3580 NODE_CDR(var_anode) = an;\r
14b0e578
CS
3581 var_anode = an;\r
3582 }\r
3583 else {\r
b602265d
DG
3584 NODE_CAR(an) = snode;\r
3585 NODE_CDR(anode) = an;\r
14b0e578
CS
3586 anode = an;\r
3587 }\r
3588 }\r
3589\r
3590 return varlen;\r
3591\r
3592 mem_err2:\r
3593 onig_node_free(snode);\r
3594\r
3595 mem_err:\r
3596 onig_node_free(*rnode);\r
3597\r
3598 return ONIGERR_MEMORY;\r
3599}\r
3600\r
3601static int\r
3602expand_case_fold_string(Node* node, regex_t* reg)\r
3603{\r
3604#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8\r
3605\r
3606 int r, n, len, alt_num;\r
3607 UChar *start, *end, *p;\r
3608 Node *top_root, *root, *snode, *prev_node;\r
3609 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
b602265d 3610 StrNode* sn = STR_(node);\r
14b0e578 3611\r
b602265d 3612 if (NODE_STRING_IS_AMBIG(node)) return 0;\r
14b0e578
CS
3613\r
3614 start = sn->s;\r
3615 end = sn->end;\r
3616 if (start >= end) return 0;\r
3617\r
3618 r = 0;\r
3619 top_root = root = prev_node = snode = NULL_NODE;\r
3620 alt_num = 1;\r
3621 p = start;\r
3622 while (p < end) {\r
b602265d
DG
3623 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,\r
3624 items);\r
14b0e578
CS
3625 if (n < 0) {\r
3626 r = n;\r
3627 goto err;\r
3628 }\r
3629\r
3630 len = enclen(reg->enc, p);\r
3631\r
3632 if (n == 0) {\r
3633 if (IS_NULL(snode)) {\r
b602265d
DG
3634 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
3635 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3636 if (IS_NULL(root)) {\r
3637 onig_node_free(prev_node);\r
3638 goto mem_err;\r
3639 }\r
3640 }\r
3641\r
3642 prev_node = snode = onig_node_new_str(NULL, NULL);\r
3643 if (IS_NULL(snode)) goto mem_err;\r
3644 if (IS_NOT_NULL(root)) {\r
3645 if (IS_NULL(onig_node_list_add(root, snode))) {\r
3646 onig_node_free(snode);\r
3647 goto mem_err;\r
3648 }\r
3649 }\r
14b0e578
CS
3650 }\r
3651\r
3652 r = onig_node_str_cat(snode, p, p + len);\r
3653 if (r != 0) goto err;\r
3654 }\r
3655 else {\r
3656 alt_num *= (n + 1);\r
3657 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;\r
3658\r
3659 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
b602265d
DG
3660 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3661 if (IS_NULL(root)) {\r
3662 onig_node_free(prev_node);\r
3663 goto mem_err;\r
3664 }\r
14b0e578
CS
3665 }\r
3666\r
3667 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);\r
3668 if (r < 0) goto mem_err;\r
3669 if (r == 1) {\r
b602265d
DG
3670 if (IS_NULL(root)) {\r
3671 top_root = prev_node;\r
3672 }\r
3673 else {\r
3674 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3675 onig_node_free(prev_node);\r
3676 goto mem_err;\r
3677 }\r
3678 }\r
3679\r
3680 root = NODE_CAR(prev_node);\r
3681 }\r
3682 else { /* r == 0 */\r
3683 if (IS_NOT_NULL(root)) {\r
3684 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3685 onig_node_free(prev_node);\r
3686 goto mem_err;\r
3687 }\r
3688 }\r
3689 }\r
3690\r
14b0e578
CS
3691 snode = NULL_NODE;\r
3692 }\r
3693\r
3694 p += len;\r
3695 }\r
3696\r
3697 if (p < end) {\r
3698 Node *srem;\r
3699\r
3700 r = expand_case_fold_make_rem_string(&srem, p, end, reg);\r
3701 if (r != 0) goto mem_err;\r
3702\r
3703 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {\r
3704 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3705 if (IS_NULL(root)) {\r
b602265d
DG
3706 onig_node_free(srem);\r
3707 onig_node_free(prev_node);\r
3708 goto mem_err;\r
3709 }\r
3710 }\r
3711\r
3712 if (IS_NULL(root)) {\r
3713 prev_node = srem;\r
3714 }\r
3715 else {\r
3716 if (IS_NULL(onig_node_list_add(root, srem))) {\r
3717 onig_node_free(srem);\r
3718 goto mem_err;\r
3719 }\r
3720 }\r
3721 }\r
3722\r
3723 /* ending */\r
3724 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);\r
3725 swap_node(node, top_root);\r
3726 onig_node_free(top_root);\r
3727 return 0;\r
3728\r
3729 mem_err:\r
3730 r = ONIGERR_MEMORY;\r
3731\r
3732 err:\r
3733 onig_node_free(top_root);\r
3734 return r;\r
3735}\r
3736\r
3737#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
3738static enum QuantBodyEmpty\r
3739quantifiers_memory_node_info(Node* node)\r
3740{\r
3741 int r = QUANT_BODY_IS_EMPTY;\r
3742\r
3743 switch (NODE_TYPE(node)) {\r
3744 case NODE_LIST:\r
3745 case NODE_ALT:\r
3746 {\r
3747 int v;\r
3748 do {\r
3749 v = quantifiers_memory_node_info(NODE_CAR(node));\r
3750 if (v > r) r = v;\r
3751 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3752 }\r
3753 break;\r
3754\r
3755#ifdef USE_CALL\r
3756 case NODE_CALL:\r
3757 if (NODE_IS_RECURSION(node)) {\r
3758 return QUANT_BODY_IS_EMPTY_REC; /* tiny version */\r
3759 }\r
3760 else\r
3761 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3762 break;\r
3763#endif\r
3764\r
3765 case NODE_QUANT:\r
3766 {\r
3767 QuantNode* qn = QUANT_(node);\r
3768 if (qn->upper != 0) {\r
3769 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3770 }\r
3771 }\r
3772 break;\r
3773\r
3774 case NODE_ENCLOSURE:\r
3775 {\r
3776 EnclosureNode* en = ENCLOSURE_(node);\r
3777 switch (en->type) {\r
3778 case ENCLOSURE_MEMORY:\r
3779 if (NODE_IS_RECURSION(node)) {\r
3780 return QUANT_BODY_IS_EMPTY_REC;\r
3781 }\r
3782 return QUANT_BODY_IS_EMPTY_MEM;\r
3783 break;\r
3784\r
3785 case ENCLOSURE_OPTION:\r
3786 case ENCLOSURE_STOP_BACKTRACK:\r
3787 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3788 break;\r
3789 case ENCLOSURE_IF_ELSE:\r
3790 {\r
3791 int v;\r
3792 r = quantifiers_memory_node_info(NODE_BODY(node));\r
3793 if (IS_NOT_NULL(en->te.Then)) {\r
3794 v = quantifiers_memory_node_info(en->te.Then);\r
3795 if (v > r) r = v;\r
3796 }\r
3797 if (IS_NOT_NULL(en->te.Else)) {\r
3798 v = quantifiers_memory_node_info(en->te.Else);\r
3799 if (v > r) r = v;\r
3800 }\r
3801 }\r
3802 break;\r
3803 default:\r
3804 break;\r
3805 }\r
3806 }\r
3807 break;\r
3808\r
3809 case NODE_BACKREF:\r
3810 case NODE_STRING:\r
3811 case NODE_CTYPE:\r
3812 case NODE_CCLASS:\r
3813 case NODE_ANCHOR:\r
3814 case NODE_GIMMICK:\r
3815 default:\r
3816 break;\r
3817 }\r
3818\r
3819 return r;\r
3820}\r
3821#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */\r
3822\r
3823\r
3824#define IN_ALT (1<<0)\r
3825#define IN_NOT (1<<1)\r
3826#define IN_REAL_REPEAT (1<<2)\r
3827#define IN_VAR_REPEAT (1<<3)\r
3828#define IN_ZERO_REPEAT (1<<4)\r
3829#define IN_MULTI_ENTRY (1<<5)\r
3830\r
3831#ifdef USE_CALL\r
3832\r
3833#ifdef __GNUC__\r
3834__inline\r
3835#endif\r
3836static int\r
3837setup_call_node_call(CallNode* cn, ScanEnv* env, int state)\r
3838{\r
3839 MemEnv* mem_env = SCANENV_MEMENV(env);\r
3840\r
3841 if (cn->by_number != 0) {\r
3842 int gnum = cn->group_num;\r
3843\r
3844 if (env->num_named > 0 &&\r
3845 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
3846 ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
3847 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
3848 }\r
3849\r
3850 if (gnum > env->num_mem) {\r
3851 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,\r
3852 cn->name, cn->name_end);\r
3853 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
3854 }\r
3855\r
3856 set_call_attr:\r
3857 NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;\r
3858 if (IS_NULL(NODE_CALL_BODY(cn))) {\r
3859 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
3860 cn->name, cn->name_end);\r
3861 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3862 }\r
3863 }\r
3864 else {\r
3865 int *refs;\r
3866\r
3867 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);\r
3868 if (n <= 0) {\r
3869 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
3870 cn->name, cn->name_end);\r
3871 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3872 }\r
3873 else if (n > 1) {\r
3874 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,\r
3875 cn->name, cn->name_end);\r
3876 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;\r
3877 }\r
3878 else {\r
3879 cn->group_num = refs[0];\r
3880 goto set_call_attr;\r
3881 }\r
3882 }\r
3883\r
3884 return 0;\r
3885}\r
3886\r
3887static void\r
3888setup_call2_call(Node* node)\r
3889{\r
3890 switch (NODE_TYPE(node)) {\r
3891 case NODE_LIST:\r
3892 case NODE_ALT:\r
3893 do {\r
3894 setup_call2_call(NODE_CAR(node));\r
3895 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3896 break;\r
3897\r
3898 case NODE_QUANT:\r
3899 setup_call2_call(NODE_BODY(node));\r
3900 break;\r
3901\r
3902 case NODE_ANCHOR:\r
3903 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
3904 setup_call2_call(NODE_BODY(node));\r
3905 break;\r
3906\r
3907 case NODE_ENCLOSURE:\r
3908 {\r
3909 EnclosureNode* en = ENCLOSURE_(node);\r
3910\r
3911 if (en->type == ENCLOSURE_MEMORY) {\r
3912 if (! NODE_IS_MARK1(node)) {\r
3913 NODE_STATUS_ADD(node, MARK1);\r
3914 setup_call2_call(NODE_BODY(node));\r
3915 NODE_STATUS_REMOVE(node, MARK1);\r
3916 }\r
3917 }\r
3918 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3919 setup_call2_call(NODE_BODY(node));\r
3920 if (IS_NOT_NULL(en->te.Then))\r
3921 setup_call2_call(en->te.Then);\r
3922 if (IS_NOT_NULL(en->te.Else))\r
3923 setup_call2_call(en->te.Else);\r
3924 }\r
3925 else {\r
3926 setup_call2_call(NODE_BODY(node));\r
3927 }\r
3928 }\r
3929 break;\r
3930\r
3931 case NODE_CALL:\r
3932 if (! NODE_IS_MARK1(node)) {\r
3933 NODE_STATUS_ADD(node, MARK1);\r
3934 {\r
3935 CallNode* cn = CALL_(node);\r
3936 Node* called = NODE_CALL_BODY(cn);\r
3937\r
3938 cn->entry_count++;\r
3939\r
3940 NODE_STATUS_ADD(called, CALLED);\r
3941 ENCLOSURE_(called)->m.entry_count++;\r
3942 setup_call2_call(called);\r
3943 }\r
3944 NODE_STATUS_REMOVE(node, MARK1);\r
3945 }\r
3946 break;\r
3947\r
3948 default:\r
3949 break;\r
3950 }\r
3951}\r
3952\r
3953static int\r
3954setup_call(Node* node, ScanEnv* env, int state)\r
3955{\r
3956 int r;\r
3957\r
3958 switch (NODE_TYPE(node)) {\r
3959 case NODE_LIST:\r
3960 case NODE_ALT:\r
3961 do {\r
3962 r = setup_call(NODE_CAR(node), env, state);\r
3963 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
3964 break;\r
3965\r
3966 case NODE_QUANT:\r
3967 if (QUANT_(node)->upper == 0)\r
3968 state |= IN_ZERO_REPEAT;\r
3969\r
3970 r = setup_call(NODE_BODY(node), env, state);\r
3971 break;\r
3972\r
3973 case NODE_ANCHOR:\r
3974 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
3975 r = setup_call(NODE_BODY(node), env, state);\r
3976 else\r
3977 r = 0;\r
3978 break;\r
3979\r
3980 case NODE_ENCLOSURE:\r
3981 {\r
3982 EnclosureNode* en = ENCLOSURE_(node);\r
3983\r
3984 if (en->type == ENCLOSURE_MEMORY) {\r
3985 if ((state & IN_ZERO_REPEAT) != 0) {\r
3986 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
3987 ENCLOSURE_(node)->m.entry_count--;\r
3988 }\r
3989 r = setup_call(NODE_BODY(node), env, state);\r
3990 }\r
3991 else if (en->type == ENCLOSURE_IF_ELSE) {\r
3992 r = setup_call(NODE_BODY(node), env, state);\r
3993 if (r != 0) return r;\r
3994 if (IS_NOT_NULL(en->te.Then)) {\r
3995 r = setup_call(en->te.Then, env, state);\r
3996 if (r != 0) return r;\r
3997 }\r
3998 if (IS_NOT_NULL(en->te.Else))\r
3999 r = setup_call(en->te.Else, env, state);\r
4000 }\r
4001 else\r
4002 r = setup_call(NODE_BODY(node), env, state);\r
4003 }\r
4004 break;\r
4005\r
4006 case NODE_CALL:\r
4007 if ((state & IN_ZERO_REPEAT) != 0) {\r
4008 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
4009 CALL_(node)->entry_count--;\r
4010 }\r
4011\r
4012 r = setup_call_node_call(CALL_(node), env, state);\r
4013 break;\r
4014\r
4015 default:\r
4016 r = 0;\r
4017 break;\r
4018 }\r
4019\r
4020 return r;\r
4021}\r
4022\r
4023static int\r
4024setup_call2(Node* node)\r
4025{\r
4026 int r = 0;\r
4027\r
4028 switch (NODE_TYPE(node)) {\r
4029 case NODE_LIST:\r
4030 case NODE_ALT:\r
4031 do {\r
4032 r = setup_call2(NODE_CAR(node));\r
4033 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
4034 break;\r
4035\r
4036 case NODE_QUANT:\r
4037 if (QUANT_(node)->upper != 0)\r
4038 r = setup_call2(NODE_BODY(node));\r
4039 break;\r
4040\r
4041 case NODE_ANCHOR:\r
4042 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
4043 r = setup_call2(NODE_BODY(node));\r
4044 break;\r
4045\r
4046 case NODE_ENCLOSURE:\r
4047 if (! NODE_IS_IN_ZERO_REPEAT(node))\r
4048 r = setup_call2(NODE_BODY(node));\r
4049\r
4050 {\r
4051 EnclosureNode* en = ENCLOSURE_(node);\r
4052\r
4053 if (r != 0) return r;\r
4054 if (en->type == ENCLOSURE_IF_ELSE) {\r
4055 if (IS_NOT_NULL(en->te.Then)) {\r
4056 r = setup_call2(en->te.Then);\r
4057 if (r != 0) return r;\r
4058 }\r
4059 if (IS_NOT_NULL(en->te.Else))\r
4060 r = setup_call2(en->te.Else);\r
4061 }\r
4062 }\r
4063 break;\r
4064\r
4065 case NODE_CALL:\r
4066 if (! NODE_IS_IN_ZERO_REPEAT(node)) {\r
4067 setup_call2_call(node);\r
4068 }\r
4069 break;\r
4070\r
4071 default:\r
4072 break;\r
4073 }\r
4074\r
4075 return r;\r
4076}\r
4077\r
4078\r
4079static void\r
4080setup_called_state_call(Node* node, int state)\r
4081{\r
4082 switch (NODE_TYPE(node)) {\r
4083 case NODE_ALT:\r
4084 state |= IN_ALT;\r
4085 /* fall */\r
4086 case NODE_LIST:\r
4087 do {\r
4088 setup_called_state_call(NODE_CAR(node), state);\r
4089 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4090 break;\r
4091\r
4092 case NODE_QUANT:\r
4093 {\r
4094 QuantNode* qn = QUANT_(node);\r
4095\r
4096 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
4097 state |= IN_REAL_REPEAT;\r
4098 if (qn->lower != qn->upper)\r
4099 state |= IN_VAR_REPEAT;\r
4100\r
4101 setup_called_state_call(NODE_QUANT_BODY(qn), state);\r
4102 }\r
4103 break;\r
4104\r
4105 case NODE_ANCHOR:\r
4106 {\r
4107 AnchorNode* an = ANCHOR_(node);\r
4108\r
4109 switch (an->type) {\r
4110 case ANCHOR_PREC_READ_NOT:\r
4111 case ANCHOR_LOOK_BEHIND_NOT:\r
4112 state |= IN_NOT;\r
4113 /* fall */\r
4114 case ANCHOR_PREC_READ:\r
4115 case ANCHOR_LOOK_BEHIND:\r
4116 setup_called_state_call(NODE_ANCHOR_BODY(an), state);\r
4117 break;\r
4118 default:\r
4119 break;\r
4120 }\r
4121 }\r
4122 break;\r
4123\r
4124 case NODE_ENCLOSURE:\r
4125 {\r
4126 EnclosureNode* en = ENCLOSURE_(node);\r
4127\r
4128 if (en->type == ENCLOSURE_MEMORY) {\r
4129 if (NODE_IS_MARK1(node)) {\r
4130 if ((~en->m.called_state & state) != 0) {\r
4131 en->m.called_state |= state;\r
4132 setup_called_state_call(NODE_BODY(node), state);\r
4133 }\r
4134 }\r
4135 else {\r
4136 NODE_STATUS_ADD(node, MARK1);\r
4137 en->m.called_state |= state;\r
4138 setup_called_state_call(NODE_BODY(node), state);\r
4139 NODE_STATUS_REMOVE(node, MARK1);\r
4140 }\r
4141 }\r
4142 else if (en->type == ENCLOSURE_IF_ELSE) {\r
4143 if (IS_NOT_NULL(en->te.Then)) {\r
4144 setup_called_state_call(en->te.Then, state);\r
4145 }\r
4146 if (IS_NOT_NULL(en->te.Else))\r
4147 setup_called_state_call(en->te.Else, state);\r
4148 }\r
4149 else {\r
4150 setup_called_state_call(NODE_BODY(node), state);\r
4151 }\r
4152 }\r
4153 break;\r
4154\r
4155 case NODE_CALL:\r
4156 setup_called_state_call(NODE_BODY(node), state);\r
4157 break;\r
4158\r
4159 default:\r
4160 break;\r
4161 }\r
4162}\r
4163\r
4164static void\r
4165setup_called_state(Node* node, int state)\r
4166{\r
4167 switch (NODE_TYPE(node)) {\r
4168 case NODE_ALT:\r
4169 state |= IN_ALT;\r
4170 /* fall */\r
4171 case NODE_LIST:\r
4172 do {\r
4173 setup_called_state(NODE_CAR(node), state);\r
4174 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4175 break;\r
4176\r
4177#ifdef USE_CALL\r
4178 case NODE_CALL:\r
4179 setup_called_state_call(node, state);\r
4180 break;\r
4181#endif\r
4182\r
4183 case NODE_ENCLOSURE:\r
4184 {\r
4185 EnclosureNode* en = ENCLOSURE_(node);\r
4186\r
4187 switch (en->type) {\r
4188 case ENCLOSURE_MEMORY:\r
4189 if (en->m.entry_count > 1)\r
4190 state |= IN_MULTI_ENTRY;\r
4191\r
4192 en->m.called_state |= state;\r
4193 /* fall */\r
4194 case ENCLOSURE_OPTION:\r
4195 case ENCLOSURE_STOP_BACKTRACK:\r
4196 setup_called_state(NODE_BODY(node), state);\r
4197 break;\r
4198 case ENCLOSURE_IF_ELSE:\r
4199 setup_called_state(NODE_BODY(node), state);\r
4200 if (IS_NOT_NULL(en->te.Then))\r
4201 setup_called_state(en->te.Then, state);\r
4202 if (IS_NOT_NULL(en->te.Else))\r
4203 setup_called_state(en->te.Else, state);\r
4204 break;\r
4205 }\r
4206 }\r
4207 break;\r
4208\r
4209 case NODE_QUANT:\r
4210 {\r
4211 QuantNode* qn = QUANT_(node);\r
4212\r
4213 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
4214 state |= IN_REAL_REPEAT;\r
4215 if (qn->lower != qn->upper)\r
4216 state |= IN_VAR_REPEAT;\r
4217\r
4218 setup_called_state(NODE_QUANT_BODY(qn), state);\r
4219 }\r
4220 break;\r
4221\r
4222 case NODE_ANCHOR:\r
4223 {\r
4224 AnchorNode* an = ANCHOR_(node);\r
4225\r
4226 switch (an->type) {\r
4227 case ANCHOR_PREC_READ_NOT:\r
4228 case ANCHOR_LOOK_BEHIND_NOT:\r
4229 state |= IN_NOT;\r
4230 /* fall */\r
4231 case ANCHOR_PREC_READ:\r
4232 case ANCHOR_LOOK_BEHIND:\r
4233 setup_called_state(NODE_ANCHOR_BODY(an), state);\r
4234 break;\r
4235 default:\r
4236 break;\r
14b0e578
CS
4237 }\r
4238 }\r
b602265d 4239 break;\r
14b0e578 4240\r
b602265d
DG
4241 case NODE_BACKREF:\r
4242 case NODE_STRING:\r
4243 case NODE_CTYPE:\r
4244 case NODE_CCLASS:\r
4245 case NODE_GIMMICK:\r
4246 default:\r
4247 break;\r
14b0e578 4248 }\r
b602265d 4249}\r
14b0e578 4250\r
b602265d 4251#endif /* USE_CALL */\r
14b0e578 4252\r
14b0e578 4253\r
b602265d
DG
4254static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);\r
4255\r
4256#ifdef __GNUC__\r
4257__inline\r
4258#endif\r
4259static int\r
4260setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4261{\r
4262/* allowed node types in look-behind */\r
4263#define ALLOWED_TYPE_IN_LB \\r
4264 ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \\r
4265 | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \\r
4266 | NODE_BIT_CALL | NODE_BIT_GIMMICK)\r
14b0e578 4267\r
b602265d
DG
4268#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )\r
4269#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)\r
14b0e578 4270\r
b602265d
DG
4271#define ALLOWED_ANCHOR_IN_LB \\r
4272 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \\r
4273 | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \\r
4274 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \\r
4275 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \\r
4276 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )\r
14b0e578 4277\r
b602265d
DG
4278#define ALLOWED_ANCHOR_IN_LB_NOT \\r
4279 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \\r
4280 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \\r
4281 | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \\r
4282 | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \\r
4283 | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )\r
14b0e578 4284\r
b602265d
DG
4285 int r;\r
4286 AnchorNode* an = ANCHOR_(node);\r
14b0e578 4287\r
b602265d
DG
4288 switch (an->type) {\r
4289 case ANCHOR_PREC_READ:\r
4290 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
4291 break;\r
4292 case ANCHOR_PREC_READ_NOT:\r
4293 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
4294 break;\r
14b0e578 4295\r
b602265d 4296 case ANCHOR_LOOK_BEHIND:\r
14b0e578 4297 {\r
b602265d
DG
4298 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
4299 ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
4300 if (r < 0) return r;\r
4301 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
4302 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
4303 if (r != 0) return r;\r
4304 r = setup_look_behind(node, reg, env);\r
14b0e578
CS
4305 }\r
4306 break;\r
4307\r
b602265d 4308 case ANCHOR_LOOK_BEHIND_NOT:\r
14b0e578 4309 {\r
b602265d
DG
4310 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
4311 ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
4312 if (r < 0) return r;\r
4313 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
4314 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
4315 if (r != 0) return r;\r
4316 r = setup_look_behind(node, reg, env);\r
14b0e578
CS
4317 }\r
4318 break;\r
4319\r
b602265d
DG
4320 default:\r
4321 r = 0;\r
4322 break;\r
4323 }\r
14b0e578 4324\r
b602265d
DG
4325 return r;\r
4326}\r
14b0e578 4327\r
b602265d
DG
4328#ifdef __GNUC__\r
4329__inline\r
4330#endif\r
4331static int\r
4332setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4333{\r
4334 int r;\r
4335 OnigLen d;\r
4336 QuantNode* qn = QUANT_(node);\r
4337 Node* body = NODE_BODY(node);\r
4338\r
4339 if ((state & IN_REAL_REPEAT) != 0) {\r
4340 NODE_STATUS_ADD(node, IN_REAL_REPEAT);\r
4341 }\r
4342 if ((state & IN_MULTI_ENTRY) != 0) {\r
4343 NODE_STATUS_ADD(node, IN_MULTI_ENTRY);\r
4344 }\r
4345\r
4346 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {\r
4347 d = tree_min_len(body, env);\r
4348 if (d == 0) {\r
4349#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
4350 qn->body_empty_info = quantifiers_memory_node_info(body);\r
4351 if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {\r
4352 if (NODE_TYPE(body) == NODE_ENCLOSURE &&\r
4353 ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {\r
4354 MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);\r
4355 }\r
4356 }\r
4357#else\r
4358 qn->body_empty_info = QUANT_BODY_IS_EMPTY;\r
4359#endif\r
14b0e578 4360 }\r
b602265d 4361 }\r
14b0e578 4362\r
b602265d
DG
4363 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
4364 state |= IN_REAL_REPEAT;\r
4365 if (qn->lower != qn->upper)\r
4366 state |= IN_VAR_REPEAT;\r
14b0e578 4367\r
b602265d
DG
4368 r = setup_tree(body, reg, state, env);\r
4369 if (r != 0) return r;\r
14b0e578 4370\r
b602265d
DG
4371 /* expand string */\r
4372#define EXPAND_STRING_MAX_LENGTH 100\r
4373 if (NODE_TYPE(body) == NODE_STRING) {\r
4374 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&\r
4375 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
4376 int len = NODE_STRING_LEN(body);\r
4377 StrNode* sn = STR_(body);\r
4378\r
4379 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
4380 int i, n = qn->lower;\r
4381 onig_node_conv_to_str_node(node, STR_(body)->flag);\r
4382 for (i = 0; i < n; i++) {\r
4383 r = onig_node_str_cat(node, sn->s, sn->end);\r
4384 if (r != 0) return r;\r
4385 }\r
4386 onig_node_free(body);\r
4387 return r;\r
14b0e578
CS
4388 }\r
4389 }\r
b602265d 4390 }\r
14b0e578 4391\r
b602265d
DG
4392#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
4393 if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {\r
4394 if (NODE_TYPE(body) == NODE_QUANT) {\r
4395 QuantNode* tqn = QUANT_(body);\r
4396 if (IS_NOT_NULL(tqn->head_exact)) {\r
4397 qn->head_exact = tqn->head_exact;\r
4398 tqn->head_exact = NULL;\r
4399 }\r
4400 }\r
4401 else {\r
4402 qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);\r
4403 }\r
14b0e578 4404 }\r
b602265d 4405#endif\r
14b0e578
CS
4406\r
4407 return r;\r
4408}\r
14b0e578
CS
4409\r
4410/* setup_tree does the following work.\r
b602265d 4411 1. check empty loop. (set qn->body_empty_info)\r
14b0e578
CS
4412 2. expand ignore-case in char class.\r
4413 3. set memory status bit flags. (reg->mem_stats)\r
4414 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].\r
4415 5. find invalid patterns in look-behind.\r
4416 6. expand repeated string.\r
4417 */\r
4418static int\r
4419setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4420{\r
14b0e578
CS
4421 int r = 0;\r
4422\r
b602265d
DG
4423 switch (NODE_TYPE(node)) {\r
4424 case NODE_LIST:\r
14b0e578
CS
4425 {\r
4426 Node* prev = NULL_NODE;\r
4427 do {\r
b602265d
DG
4428 r = setup_tree(NODE_CAR(node), reg, state, env);\r
4429 if (IS_NOT_NULL(prev) && r == 0) {\r
4430 r = next_setup(prev, NODE_CAR(node), reg);\r
4431 }\r
4432 prev = NODE_CAR(node);\r
4433 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
4434 }\r
4435 break;\r
4436\r
b602265d 4437 case NODE_ALT:\r
14b0e578 4438 do {\r
b602265d
DG
4439 r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);\r
4440 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
4441 break;\r
4442\r
b602265d
DG
4443 case NODE_STRING:\r
4444 if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {\r
14b0e578
CS
4445 r = expand_case_fold_string(node, reg);\r
4446 }\r
4447 break;\r
4448\r
b602265d 4449 case NODE_BACKREF:\r
14b0e578
CS
4450 {\r
4451 int i;\r
4452 int* p;\r
b602265d 4453 BackRefNode* br = BACKREF_(node);\r
14b0e578
CS
4454 p = BACKREFS_P(br);\r
4455 for (i = 0; i < br->back_num; i++) {\r
b602265d
DG
4456 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
4457 MEM_STATUS_ON(env->backrefed_mem, p[i]);\r
4458 MEM_STATUS_ON(env->bt_mem_start, p[i]);\r
14b0e578 4459#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
4460 if (NODE_IS_NEST_LEVEL(node)) {\r
4461 MEM_STATUS_ON(env->bt_mem_end, p[i]);\r
4462 }\r
14b0e578 4463#endif\r
14b0e578
CS
4464 }\r
4465 }\r
4466 break;\r
4467\r
b602265d 4468 case NODE_ENCLOSURE:\r
14b0e578 4469 {\r
b602265d 4470 EnclosureNode* en = ENCLOSURE_(node);\r
14b0e578 4471\r
b602265d
DG
4472 switch (en->type) {\r
4473 case ENCLOSURE_OPTION:\r
4474 {\r
4475 OnigOptionType options = reg->options;\r
4476 reg->options = ENCLOSURE_(node)->o.options;\r
4477 r = setup_tree(NODE_BODY(node), reg, state, env);\r
4478 reg->options = options;\r
4479 }\r
4480 break;\r
14b0e578 4481\r
b602265d
DG
4482 case ENCLOSURE_MEMORY:\r
4483#ifdef USE_CALL\r
4484 state |= en->m.called_state;\r
14b0e578 4485#endif\r
14b0e578 4486\r
b602265d
DG
4487 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0\r
4488 || NODE_IS_RECURSION(node)) {\r
4489 MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);\r
4490 }\r
4491 r = setup_tree(NODE_BODY(node), reg, state, env);\r
4492 break;\r
14b0e578 4493\r
b602265d
DG
4494 case ENCLOSURE_STOP_BACKTRACK:\r
4495 {\r
4496 Node* target = NODE_BODY(node);\r
4497 r = setup_tree(target, reg, state, env);\r
4498 if (NODE_TYPE(target) == NODE_QUANT) {\r
4499 QuantNode* tqn = QUANT_(target);\r
4500 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&\r
4501 tqn->greedy != 0) { /* (?>a*), a*+ etc... */\r
4502 if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))\r
4503 NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);\r
4504 }\r
4505 }\r
4506 }\r
14b0e578
CS
4507 break;\r
4508\r
b602265d
DG
4509 case ENCLOSURE_IF_ELSE:\r
4510 r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);\r
4511 if (r != 0) return r;\r
4512 if (IS_NOT_NULL(en->te.Then)) {\r
4513 r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);\r
4514 if (r != 0) return r;\r
4515 }\r
4516 if (IS_NOT_NULL(en->te.Else))\r
4517 r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);\r
4518 break;\r
14b0e578
CS
4519 }\r
4520 }\r
4521 break;\r
4522\r
b602265d
DG
4523 case NODE_QUANT:\r
4524 r = setup_quant(node, reg, state, env);\r
4525 break;\r
14b0e578 4526\r
b602265d
DG
4527 case NODE_ANCHOR:\r
4528 r = setup_anchor(node, reg, state, env);\r
14b0e578
CS
4529 break;\r
4530\r
b602265d
DG
4531#ifdef USE_CALL\r
4532 case NODE_CALL:\r
4533#endif\r
4534 case NODE_CTYPE:\r
4535 case NODE_CCLASS:\r
4536 case NODE_GIMMICK:\r
14b0e578
CS
4537 default:\r
4538 break;\r
4539 }\r
4540\r
4541 return r;\r
4542}\r
4543\r
b602265d 4544/* set skip map for Boyer-Moore search */\r
14b0e578
CS
4545static int\r
4546set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,\r
b602265d 4547 UChar skip[], int** int_skip)\r
14b0e578
CS
4548{\r
4549 int i, len;\r
4550\r
b602265d 4551 len = (int )(end - s);\r
14b0e578 4552 if (len < ONIG_CHAR_TABLE_SIZE) {\r
b602265d 4553 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;\r
14b0e578
CS
4554\r
4555 for (i = 0; i < len - 1; i++)\r
b602265d 4556 skip[s[i]] = len - 1 - i;\r
14b0e578
CS
4557 }\r
4558 else {\r
4559 if (IS_NULL(*int_skip)) {\r
4560 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);\r
4561 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;\r
4562 }\r
4563 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;\r
4564\r
4565 for (i = 0; i < len - 1; i++)\r
4566 (*int_skip)[s[i]] = len - 1 - i;\r
4567 }\r
4568 return 0;\r
4569}\r
4570\r
4571#define OPT_EXACT_MAXLEN 24\r
4572\r
4573typedef struct {\r
b602265d
DG
4574 OnigLen min; /* min byte length */\r
4575 OnigLen max; /* max byte length */\r
4576} MinMax;\r
14b0e578
CS
4577\r
4578typedef struct {\r
b602265d 4579 MinMax mmd;\r
14b0e578
CS
4580 OnigEncoding enc;\r
4581 OnigOptionType options;\r
4582 OnigCaseFoldType case_fold_flag;\r
4583 ScanEnv* scan_env;\r
4584} OptEnv;\r
4585\r
4586typedef struct {\r
b602265d
DG
4587 int left;\r
4588 int right;\r
4589} OptAnc;\r
14b0e578
CS
4590\r
4591typedef struct {\r
b602265d
DG
4592 MinMax mmd; /* position */\r
4593 OptAnc anc;\r
4594 int reach_end;\r
4595 int ignore_case;\r
4596 int len;\r
4597 UChar s[OPT_EXACT_MAXLEN];\r
4598} OptExact;\r
14b0e578
CS
4599\r
4600typedef struct {\r
b602265d
DG
4601 MinMax mmd; /* position */\r
4602 OptAnc anc;\r
4603 int value; /* weighted value */\r
4604 UChar map[ONIG_CHAR_TABLE_SIZE];\r
4605} OptMap;\r
14b0e578
CS
4606\r
4607typedef struct {\r
b602265d
DG
4608 MinMax len;\r
4609 OptAnc anc;\r
4610 OptExact exb; /* boundary */\r
4611 OptExact exm; /* middle */\r
4612 OptExact expr; /* prec read (?=...) */\r
4613 OptMap map; /* boundary */\r
4614} NodeOpt;\r
14b0e578
CS
4615\r
4616\r
4617static int\r
4618map_position_value(OnigEncoding enc, int i)\r
4619{\r
b602265d 4620 static const short int Vals[] = {\r
14b0e578
CS
4621 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,\r
4622 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
4623 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,\r
4624 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,\r
4625 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4626 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,\r
4627 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4628 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1\r
4629 };\r
4630\r
b602265d 4631 if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {\r
14b0e578
CS
4632 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)\r
4633 return 20;\r
4634 else\r
b602265d 4635 return (int )Vals[i];\r
14b0e578
CS
4636 }\r
4637 else\r
4638 return 4; /* Take it easy. */\r
4639}\r
4640\r
4641static int\r
b602265d 4642distance_value(MinMax* mm)\r
14b0e578
CS
4643{\r
4644 /* 1000 / (min-max-dist + 1) */\r
4645 static const short int dist_vals[] = {\r
4646 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, \r
4647 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, \r
4648 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, \r
4649 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, \r
4650 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, \r
4651 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, \r
4652 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, \r
4653 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, \r
4654 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, \r
4655 11, 11, 11, 11, 11, 10, 10, 10, 10, 10\r
4656 };\r
4657\r
b602265d 4658 OnigLen d;\r
14b0e578 4659\r
b602265d 4660 if (mm->max == INFINITE_LEN) return 0;\r
14b0e578
CS
4661\r
4662 d = mm->max - mm->min;\r
b602265d 4663 if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))\r
14b0e578
CS
4664 /* return dist_vals[d] * 16 / (mm->min + 12); */\r
4665 return (int )dist_vals[d];\r
4666 else\r
4667 return 1;\r
4668}\r
4669\r
4670static int\r
b602265d 4671comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)\r
14b0e578
CS
4672{\r
4673 if (v2 <= 0) return -1;\r
4674 if (v1 <= 0) return 1;\r
4675\r
4676 v1 *= distance_value(d1);\r
4677 v2 *= distance_value(d2);\r
4678\r
4679 if (v2 > v1) return 1;\r
4680 if (v2 < v1) return -1;\r
4681\r
4682 if (d2->min < d1->min) return 1;\r
4683 if (d2->min > d1->min) return -1;\r
4684 return 0;\r
4685}\r
4686\r
4687static int\r
b602265d 4688is_equal_mml(MinMax* a, MinMax* b)\r
14b0e578
CS
4689{\r
4690 return (a->min == b->min && a->max == b->max) ? 1 : 0;\r
4691}\r
4692\r
14b0e578 4693static void\r
b602265d 4694set_mml(MinMax* l, OnigLen min, OnigLen max)\r
14b0e578 4695{\r
b602265d
DG
4696 l->min = min;\r
4697 l->max = max;\r
14b0e578
CS
4698}\r
4699\r
4700static void\r
b602265d 4701clear_mml(MinMax* l)\r
14b0e578 4702{\r
b602265d 4703 l->min = l->max = 0;\r
14b0e578
CS
4704}\r
4705\r
4706static void\r
b602265d 4707copy_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
4708{\r
4709 to->min = from->min;\r
4710 to->max = from->max;\r
4711}\r
4712\r
4713static void\r
b602265d 4714add_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
4715{\r
4716 to->min = distance_add(to->min, from->min);\r
4717 to->max = distance_add(to->max, from->max);\r
4718}\r
4719\r
14b0e578 4720static void\r
b602265d 4721alt_merge_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
4722{\r
4723 if (to->min > from->min) to->min = from->min;\r
4724 if (to->max < from->max) to->max = from->max;\r
4725}\r
4726\r
4727static void\r
4728copy_opt_env(OptEnv* to, OptEnv* from)\r
4729{\r
b602265d 4730 *to = *from;\r
14b0e578
CS
4731}\r
4732\r
4733static void\r
b602265d 4734clear_opt_anc_info(OptAnc* a)\r
14b0e578 4735{\r
b602265d
DG
4736 a->left = 0;\r
4737 a->right = 0;\r
14b0e578
CS
4738}\r
4739\r
4740static void\r
b602265d 4741copy_opt_anc_info(OptAnc* to, OptAnc* from)\r
14b0e578 4742{\r
b602265d 4743 *to = *from;\r
14b0e578
CS
4744}\r
4745\r
4746static void\r
b602265d
DG
4747concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,\r
4748 OnigLen left_len, OnigLen right_len)\r
14b0e578
CS
4749{\r
4750 clear_opt_anc_info(to);\r
4751\r
b602265d 4752 to->left = left->left;\r
14b0e578 4753 if (left_len == 0) {\r
b602265d 4754 to->left |= right->left;\r
14b0e578
CS
4755 }\r
4756\r
b602265d 4757 to->right = right->right;\r
14b0e578 4758 if (right_len == 0) {\r
b602265d
DG
4759 to->right |= left->right;\r
4760 }\r
4761 else {\r
4762 to->right |= (left->right & ANCHOR_PREC_READ_NOT);\r
14b0e578
CS
4763 }\r
4764}\r
4765\r
4766static int\r
b602265d 4767is_left(int a)\r
14b0e578 4768{\r
b602265d
DG
4769 if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||\r
4770 a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)\r
14b0e578
CS
4771 return 0;\r
4772\r
4773 return 1;\r
4774}\r
4775\r
4776static int\r
b602265d 4777is_set_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 4778{\r
b602265d 4779 if ((to->left & anc) != 0) return 1;\r
14b0e578 4780\r
b602265d 4781 return ((to->right & anc) != 0 ? 1 : 0);\r
14b0e578
CS
4782}\r
4783\r
4784static void\r
b602265d 4785add_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 4786{\r
b602265d
DG
4787 if (is_left(anc))\r
4788 to->left |= anc;\r
14b0e578 4789 else\r
b602265d 4790 to->right |= anc;\r
14b0e578
CS
4791}\r
4792\r
4793static void\r
b602265d 4794remove_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 4795{\r
b602265d
DG
4796 if (is_left(anc))\r
4797 to->left &= ~anc;\r
14b0e578 4798 else\r
b602265d 4799 to->right &= ~anc;\r
14b0e578
CS
4800}\r
4801\r
4802static void\r
b602265d 4803alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)\r
14b0e578 4804{\r
b602265d
DG
4805 to->left &= add->left;\r
4806 to->right &= add->right;\r
14b0e578
CS
4807}\r
4808\r
4809static int\r
b602265d 4810is_full_opt_exact(OptExact* e)\r
14b0e578 4811{\r
b602265d 4812 return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);\r
14b0e578
CS
4813}\r
4814\r
4815static void\r
b602265d 4816clear_opt_exact(OptExact* e)\r
14b0e578 4817{\r
b602265d
DG
4818 clear_mml(&e->mmd);\r
4819 clear_opt_anc_info(&e->anc);\r
4820 e->reach_end = 0;\r
4821 e->ignore_case = 0;\r
4822 e->len = 0;\r
4823 e->s[0] = '\0';\r
14b0e578
CS
4824}\r
4825\r
4826static void\r
b602265d 4827copy_opt_exact(OptExact* to, OptExact* from)\r
14b0e578 4828{\r
b602265d 4829 *to = *from;\r
14b0e578
CS
4830}\r
4831\r
b602265d
DG
4832static int\r
4833concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)\r
14b0e578 4834{\r
b602265d 4835 int i, j, len, r;\r
14b0e578 4836 UChar *p, *end;\r
b602265d 4837 OptAnc tanc;\r
14b0e578
CS
4838\r
4839 if (! to->ignore_case && add->ignore_case) {\r
b602265d 4840 if (to->len >= add->len) return 0; /* avoid */\r
14b0e578
CS
4841\r
4842 to->ignore_case = 1;\r
4843 }\r
4844\r
b602265d 4845 r = 0;\r
14b0e578
CS
4846 p = add->s;\r
4847 end = p + add->len;\r
4848 for (i = to->len; p < end; ) {\r
4849 len = enclen(enc, p);\r
b602265d
DG
4850 if (i + len > OPT_EXACT_MAXLEN) {\r
4851 r = 1; /* 1:full */\r
4852 break;\r
4853 }\r
14b0e578
CS
4854 for (j = 0; j < len && p < end; j++)\r
4855 to->s[i++] = *p++;\r
4856 }\r
4857\r
4858 to->len = i;\r
4859 to->reach_end = (p == end ? add->reach_end : 0);\r
4860\r
4861 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);\r
b602265d 4862 if (! to->reach_end) tanc.right = 0;\r
14b0e578 4863 copy_opt_anc_info(&to->anc, &tanc);\r
b602265d
DG
4864\r
4865 return r;\r
14b0e578
CS
4866}\r
4867\r
4868static void\r
b602265d 4869concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)\r
14b0e578
CS
4870{\r
4871 int i, j, len;\r
4872 UChar *p;\r
4873\r
4874 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {\r
4875 len = enclen(enc, p);\r
4876 if (i + len > OPT_EXACT_MAXLEN) break;\r
4877 for (j = 0; j < len && p < end; j++)\r
4878 to->s[i++] = *p++;\r
4879 }\r
4880\r
4881 to->len = i;\r
4882}\r
4883\r
4884static void\r
b602265d 4885alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)\r
14b0e578
CS
4886{\r
4887 int i, j, len;\r
4888\r
4889 if (add->len == 0 || to->len == 0) {\r
b602265d 4890 clear_opt_exact(to);\r
14b0e578
CS
4891 return ;\r
4892 }\r
4893\r
4894 if (! is_equal_mml(&to->mmd, &add->mmd)) {\r
b602265d 4895 clear_opt_exact(to);\r
14b0e578
CS
4896 return ;\r
4897 }\r
4898\r
4899 for (i = 0; i < to->len && i < add->len; ) {\r
4900 if (to->s[i] != add->s[i]) break;\r
4901 len = enclen(env->enc, to->s + i);\r
4902\r
4903 for (j = 1; j < len; j++) {\r
4904 if (to->s[i+j] != add->s[i+j]) break;\r
4905 }\r
4906 if (j < len) break;\r
4907 i += len;\r
4908 }\r
4909\r
4910 if (! add->reach_end || i < add->len || i < to->len) {\r
4911 to->reach_end = 0;\r
4912 }\r
4913 to->len = i;\r
4914 to->ignore_case |= add->ignore_case;\r
4915\r
4916 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
b602265d 4917 if (! to->reach_end) to->anc.right = 0;\r
14b0e578
CS
4918}\r
4919\r
4920static void\r
b602265d 4921select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)\r
14b0e578 4922{\r
b602265d 4923 int vn, va;\r
14b0e578 4924\r
b602265d
DG
4925 vn = now->len;\r
4926 va = alt->len;\r
14b0e578 4927\r
b602265d 4928 if (va == 0) {\r
14b0e578
CS
4929 return ;\r
4930 }\r
b602265d
DG
4931 else if (vn == 0) {\r
4932 copy_opt_exact(now, alt);\r
14b0e578
CS
4933 return ;\r
4934 }\r
b602265d 4935 else if (vn <= 2 && va <= 2) {\r
14b0e578 4936 /* ByteValTable[x] is big value --> low price */\r
b602265d
DG
4937 va = map_position_value(enc, now->s[0]);\r
4938 vn = map_position_value(enc, alt->s[0]);\r
14b0e578 4939\r
b602265d
DG
4940 if (now->len > 1) vn += 5;\r
4941 if (alt->len > 1) va += 5;\r
14b0e578
CS
4942 }\r
4943\r
b602265d
DG
4944 if (now->ignore_case == 0) vn *= 2;\r
4945 if (alt->ignore_case == 0) va *= 2;\r
14b0e578 4946\r
b602265d
DG
4947 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
4948 copy_opt_exact(now, alt);\r
14b0e578
CS
4949}\r
4950\r
4951static void\r
b602265d 4952clear_opt_map(OptMap* map)\r
14b0e578 4953{\r
b602265d 4954 static const OptMap clean_info = {\r
14b0e578
CS
4955 {0, 0}, {0, 0}, 0,\r
4956 {\r
4957 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4958 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4959 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4960 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4961 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4963 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4964 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4965 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4966 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4967 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4968 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4969 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4970 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4971 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
4972 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r
4973 }\r
4974 };\r
4975\r
b602265d 4976 xmemcpy(map, &clean_info, sizeof(OptMap));\r
14b0e578
CS
4977}\r
4978\r
4979static void\r
b602265d 4980copy_opt_map(OptMap* to, OptMap* from)\r
14b0e578 4981{\r
b602265d 4982 xmemcpy(to,from,sizeof(OptMap));\r
14b0e578
CS
4983}\r
4984\r
4985static void\r
b602265d 4986add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)\r
14b0e578 4987{\r
b602265d
DG
4988 if (m->map[c] == 0) {\r
4989 m->map[c] = 1;\r
4990 m->value += map_position_value(enc, c);\r
14b0e578
CS
4991 }\r
4992}\r
4993\r
4994static int\r
b602265d
DG
4995add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,\r
4996 OnigEncoding enc, OnigCaseFoldType fold_flag)\r
14b0e578
CS
4997{\r
4998 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
4999 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
5000 int i, n;\r
5001\r
b602265d 5002 add_char_opt_map(map, p[0], enc);\r
14b0e578 5003\r
b602265d
DG
5004 fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);\r
5005 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);\r
14b0e578
CS
5006 if (n < 0) return n;\r
5007\r
5008 for (i = 0; i < n; i++) {\r
5009 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);\r
b602265d 5010 add_char_opt_map(map, buf[0], enc);\r
14b0e578
CS
5011 }\r
5012\r
5013 return 0;\r
5014}\r
5015\r
5016static void\r
b602265d 5017select_opt_map(OptMap* now, OptMap* alt)\r
14b0e578
CS
5018{\r
5019 static int z = 1<<15; /* 32768: something big value */\r
5020\r
b602265d 5021 int vn, va;\r
14b0e578
CS
5022\r
5023 if (alt->value == 0) return ;\r
5024 if (now->value == 0) {\r
b602265d 5025 copy_opt_map(now, alt);\r
14b0e578
CS
5026 return ;\r
5027 }\r
5028\r
b602265d
DG
5029 vn = z / now->value;\r
5030 va = z / alt->value;\r
5031 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
5032 copy_opt_map(now, alt);\r
14b0e578
CS
5033}\r
5034\r
5035static int\r
b602265d 5036comp_opt_exact_or_map(OptExact* e, OptMap* m)\r
14b0e578
CS
5037{\r
5038#define COMP_EM_BASE 20\r
b602265d 5039 int ae, am;\r
14b0e578
CS
5040\r
5041 if (m->value <= 0) return -1;\r
5042\r
b602265d
DG
5043 ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);\r
5044 am = COMP_EM_BASE * 5 * 2 / m->value;\r
5045 return comp_distance_value(&e->mmd, &m->mmd, ae, am);\r
14b0e578
CS
5046}\r
5047\r
5048static void\r
b602265d 5049alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)\r
14b0e578
CS
5050{\r
5051 int i, val;\r
5052\r
5053 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */\r
5054 if (to->value == 0) return ;\r
5055 if (add->value == 0 || to->mmd.max < add->mmd.min) {\r
b602265d 5056 clear_opt_map(to);\r
14b0e578
CS
5057 return ;\r
5058 }\r
5059\r
5060 alt_merge_mml(&to->mmd, &add->mmd);\r
5061\r
5062 val = 0;\r
5063 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
5064 if (add->map[i])\r
5065 to->map[i] = 1;\r
5066\r
5067 if (to->map[i])\r
5068 val += map_position_value(enc, i);\r
5069 }\r
5070 to->value = val;\r
5071\r
5072 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
5073}\r
5074\r
5075static void\r
b602265d 5076set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)\r
14b0e578 5077{\r
b602265d
DG
5078 copy_mml(&(opt->exb.mmd), plen);\r
5079 copy_mml(&(opt->expr.mmd), plen);\r
5080 copy_mml(&(opt->map.mmd), plen);\r
14b0e578
CS
5081}\r
5082\r
5083static void\r
b602265d 5084clear_node_opt_info(NodeOpt* opt)\r
14b0e578
CS
5085{\r
5086 clear_mml(&opt->len);\r
5087 clear_opt_anc_info(&opt->anc);\r
b602265d
DG
5088 clear_opt_exact(&opt->exb);\r
5089 clear_opt_exact(&opt->exm);\r
5090 clear_opt_exact(&opt->expr);\r
5091 clear_opt_map(&opt->map);\r
14b0e578
CS
5092}\r
5093\r
5094static void\r
b602265d 5095copy_node_opt_info(NodeOpt* to, NodeOpt* from)\r
14b0e578 5096{\r
b602265d 5097 xmemcpy(to,from,sizeof(NodeOpt));\r
14b0e578
CS
5098}\r
5099\r
5100static void\r
b602265d 5101concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)\r
14b0e578
CS
5102{\r
5103 int exb_reach, exm_reach;\r
b602265d 5104 OptAnc tanc;\r
14b0e578
CS
5105\r
5106 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);\r
5107 copy_opt_anc_info(&to->anc, &tanc);\r
5108\r
5109 if (add->exb.len > 0 && to->len.max == 0) {\r
b602265d 5110 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);\r
14b0e578
CS
5111 copy_opt_anc_info(&add->exb.anc, &tanc);\r
5112 }\r
5113\r
5114 if (add->map.value > 0 && to->len.max == 0) {\r
5115 if (add->map.mmd.max == 0)\r
b602265d 5116 add->map.anc.left |= to->anc.left;\r
14b0e578
CS
5117 }\r
5118\r
5119 exb_reach = to->exb.reach_end;\r
5120 exm_reach = to->exm.reach_end;\r
5121\r
5122 if (add->len.max != 0)\r
5123 to->exb.reach_end = to->exm.reach_end = 0;\r
5124\r
5125 if (add->exb.len > 0) {\r
5126 if (exb_reach) {\r
b602265d
DG
5127 concat_opt_exact(&to->exb, &add->exb, enc);\r
5128 clear_opt_exact(&add->exb);\r
14b0e578
CS
5129 }\r
5130 else if (exm_reach) {\r
b602265d
DG
5131 concat_opt_exact(&to->exm, &add->exb, enc);\r
5132 clear_opt_exact(&add->exb);\r
14b0e578
CS
5133 }\r
5134 }\r
b602265d
DG
5135 select_opt_exact(enc, &to->exm, &add->exb);\r
5136 select_opt_exact(enc, &to->exm, &add->exm);\r
14b0e578
CS
5137\r
5138 if (to->expr.len > 0) {\r
5139 if (add->len.max > 0) {\r
5140 if (to->expr.len > (int )add->len.max)\r
b602265d 5141 to->expr.len = add->len.max;\r
14b0e578
CS
5142\r
5143 if (to->expr.mmd.max == 0)\r
b602265d 5144 select_opt_exact(enc, &to->exb, &to->expr);\r
14b0e578 5145 else\r
b602265d 5146 select_opt_exact(enc, &to->exm, &to->expr);\r
14b0e578
CS
5147 }\r
5148 }\r
5149 else if (add->expr.len > 0) {\r
b602265d 5150 copy_opt_exact(&to->expr, &add->expr);\r
14b0e578
CS
5151 }\r
5152\r
b602265d 5153 select_opt_map(&to->map, &add->map);\r
14b0e578
CS
5154 add_mml(&to->len, &add->len);\r
5155}\r
5156\r
5157static void\r
b602265d 5158alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)\r
14b0e578 5159{\r
b602265d
DG
5160 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
5161 alt_merge_opt_exact(&to->exb, &add->exb, env);\r
5162 alt_merge_opt_exact(&to->exm, &add->exm, env);\r
5163 alt_merge_opt_exact(&to->expr, &add->expr, env);\r
5164 alt_merge_opt_map(env->enc, &to->map, &add->map);\r
14b0e578
CS
5165\r
5166 alt_merge_mml(&to->len, &add->len);\r
5167}\r
5168\r
5169\r
5170#define MAX_NODE_OPT_INFO_REF_COUNT 5\r
5171\r
5172static int\r
b602265d 5173optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)\r
14b0e578 5174{\r
b602265d
DG
5175 int i;\r
5176 int r;\r
5177 NodeOpt xo;\r
5178 OnigEncoding enc;\r
14b0e578 5179\r
b602265d
DG
5180 r = 0;\r
5181 enc = env->enc;\r
14b0e578
CS
5182 clear_node_opt_info(opt);\r
5183 set_bound_node_opt_info(opt, &env->mmd);\r
5184\r
b602265d
DG
5185 switch (NODE_TYPE(node)) {\r
5186 case NODE_LIST:\r
14b0e578
CS
5187 {\r
5188 OptEnv nenv;\r
14b0e578
CS
5189 Node* nd = node;\r
5190\r
5191 copy_opt_env(&nenv, env);\r
5192 do {\r
b602265d
DG
5193 r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);\r
5194 if (r == 0) {\r
5195 add_mml(&nenv.mmd, &xo.len);\r
5196 concat_left_node_opt_info(enc, opt, &xo);\r
5197 }\r
5198 } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
14b0e578
CS
5199 }\r
5200 break;\r
5201\r
b602265d 5202 case NODE_ALT:\r
14b0e578 5203 {\r
14b0e578
CS
5204 Node* nd = node;\r
5205\r
5206 do {\r
b602265d
DG
5207 r = optimize_nodes(NODE_CAR(nd), &xo, env);\r
5208 if (r == 0) {\r
5209 if (nd == node) copy_node_opt_info(opt, &xo);\r
5210 else alt_merge_node_opt_info(opt, &xo, env);\r
5211 }\r
5212 } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
14b0e578
CS
5213 }\r
5214 break;\r
5215\r
b602265d 5216 case NODE_STRING:\r
14b0e578 5217 {\r
b602265d
DG
5218 StrNode* sn = STR_(node);\r
5219 int slen = (int )(sn->end - sn->s);\r
5220 /* int is_raw = NODE_STRING_IS_RAW(node); */\r
5221\r
5222 if (! NODE_STRING_IS_AMBIG(node)) {\r
5223 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);\r
5224 if (slen > 0) {\r
5225 add_char_opt_map(&opt->map, *(sn->s), enc);\r
5226 }\r
14b0e578
CS
5227 set_mml(&opt->len, slen, slen);\r
5228 }\r
5229 else {\r
5230 int max;\r
5231\r
b602265d
DG
5232 if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {\r
5233 int n = onigenc_strlen(enc, sn->s, sn->end);\r
5234 max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;\r
5235 }\r
5236 else {\r
5237 concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);\r
5238 opt->exb.ignore_case = 1;\r
14b0e578 5239\r
b602265d
DG
5240 if (slen > 0) {\r
5241 r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,\r
5242 enc, env->case_fold_flag);\r
5243 if (r != 0) break;\r
5244 }\r
14b0e578 5245\r
b602265d
DG
5246 max = slen;\r
5247 }\r
14b0e578
CS
5248\r
5249 set_mml(&opt->len, slen, max);\r
5250 }\r
5251\r
5252 if (opt->exb.len == slen)\r
b602265d 5253 opt->exb.reach_end = 1;\r
14b0e578
CS
5254 }\r
5255 break;\r
5256\r
b602265d 5257 case NODE_CCLASS:\r
14b0e578 5258 {\r
b602265d
DG
5259 int z;\r
5260 CClassNode* cc = CCLASS_(node);\r
14b0e578 5261\r
b602265d 5262 /* no need to check ignore case. (set in setup_tree()) */\r
14b0e578
CS
5263\r
5264 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {\r
b602265d
DG
5265 OnigLen min = ONIGENC_MBC_MINLEN(enc);\r
5266 OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
14b0e578 5267\r
b602265d 5268 set_mml(&opt->len, min, max);\r
14b0e578
CS
5269 }\r
5270 else {\r
5271 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
5272 z = BITSET_AT(cc->bs, i);\r
b602265d
DG
5273 if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {\r
5274 add_char_opt_map(&opt->map, (UChar )i, enc);\r
14b0e578
CS
5275 }\r
5276 }\r
b602265d 5277 set_mml(&opt->len, 1, 1);\r
14b0e578
CS
5278 }\r
5279 }\r
5280 break;\r
5281\r
b602265d 5282 case NODE_CTYPE:\r
14b0e578 5283 {\r
b602265d
DG
5284 int min, max;\r
5285 int range;\r
14b0e578 5286\r
b602265d 5287 max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
14b0e578
CS
5288\r
5289 if (max == 1) {\r
5290 min = 1;\r
5291\r
b602265d
DG
5292 switch (CTYPE_(node)->ctype) {\r
5293 case CTYPE_ANYCHAR:\r
5294 break;\r
5295\r
5296 case ONIGENC_CTYPE_WORD:\r
5297 range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
5298 if (CTYPE_(node)->not != 0) {\r
5299 for (i = 0; i < range; i++) {\r
5300 if (! ONIGENC_IS_CODE_WORD(enc, i)) {\r
5301 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5302 }\r
5303 }\r
5304 for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
5305 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5306 }\r
5307 }\r
5308 else {\r
5309 for (i = 0; i < range; i++) {\r
5310 if (ONIGENC_IS_CODE_WORD(enc, i)) {\r
5311 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5312 }\r
5313 }\r
5314 }\r
5315 break;\r
5316 }\r
14b0e578
CS
5317 }\r
5318 else {\r
b602265d 5319 min = ONIGENC_MBC_MINLEN(enc);\r
14b0e578
CS
5320 }\r
5321 set_mml(&opt->len, min, max);\r
5322 }\r
5323 break;\r
5324\r
b602265d
DG
5325 case NODE_ANCHOR:\r
5326 switch (ANCHOR_(node)->type) {\r
14b0e578
CS
5327 case ANCHOR_BEGIN_BUF:\r
5328 case ANCHOR_BEGIN_POSITION:\r
5329 case ANCHOR_BEGIN_LINE:\r
5330 case ANCHOR_END_BUF:\r
5331 case ANCHOR_SEMI_END_BUF:\r
5332 case ANCHOR_END_LINE:\r
b602265d
DG
5333 case ANCHOR_PREC_READ_NOT:\r
5334 case ANCHOR_LOOK_BEHIND:\r
5335 add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);\r
14b0e578
CS
5336 break;\r
5337\r
5338 case ANCHOR_PREC_READ:\r
5339 {\r
b602265d
DG
5340 r = optimize_nodes(NODE_BODY(node), &xo, env);\r
5341 if (r == 0) {\r
5342 if (xo.exb.len > 0)\r
5343 copy_opt_exact(&opt->expr, &xo.exb);\r
5344 else if (xo.exm.len > 0)\r
5345 copy_opt_exact(&opt->expr, &xo.exm);\r
14b0e578 5346\r
b602265d 5347 opt->expr.reach_end = 0;\r
14b0e578 5348\r
b602265d
DG
5349 if (xo.map.value > 0)\r
5350 copy_opt_map(&opt->map, &xo.map);\r
5351 }\r
14b0e578
CS
5352 }\r
5353 break;\r
5354\r
14b0e578
CS
5355 case ANCHOR_LOOK_BEHIND_NOT:\r
5356 break;\r
5357 }\r
5358 break;\r
5359\r
b602265d
DG
5360 case NODE_BACKREF:\r
5361 if (! NODE_IS_CHECKER(node)) {\r
14b0e578 5362 int* backs;\r
b602265d
DG
5363 OnigLen min, max, tmin, tmax;\r
5364 MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);\r
5365 BackRefNode* br = BACKREF_(node);\r
14b0e578 5366\r
b602265d
DG
5367 if (NODE_IS_RECURSION(node)) {\r
5368 set_mml(&opt->len, 0, INFINITE_LEN);\r
5369 break;\r
14b0e578
CS
5370 }\r
5371 backs = BACKREFS_P(br);\r
b602265d
DG
5372 min = tree_min_len(mem_env[backs[0]].node, env->scan_env);\r
5373 max = tree_max_len(mem_env[backs[0]].node, env->scan_env);\r
14b0e578 5374 for (i = 1; i < br->back_num; i++) {\r
b602265d
DG
5375 tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);\r
5376 tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);\r
5377 if (min > tmin) min = tmin;\r
5378 if (max < tmax) max = tmax;\r
14b0e578 5379 }\r
b602265d 5380 set_mml(&opt->len, min, max);\r
14b0e578
CS
5381 }\r
5382 break;\r
5383\r
b602265d
DG
5384#ifdef USE_CALL\r
5385 case NODE_CALL:\r
5386 if (NODE_IS_RECURSION(node))\r
5387 set_mml(&opt->len, 0, INFINITE_LEN);\r
14b0e578
CS
5388 else {\r
5389 OnigOptionType save = env->options;\r
b602265d
DG
5390 env->options = ENCLOSURE_(NODE_BODY(node))->o.options;\r
5391 r = optimize_nodes(NODE_BODY(node), opt, env);\r
14b0e578
CS
5392 env->options = save;\r
5393 }\r
5394 break;\r
5395#endif\r
5396\r
b602265d 5397 case NODE_QUANT:\r
14b0e578 5398 {\r
b602265d
DG
5399 OnigLen min, max;\r
5400 QuantNode* qn = QUANT_(node);\r
5401\r
5402 r = optimize_nodes(NODE_BODY(node), &xo, env);\r
5403 if (r != 0) break;\r
5404\r
5405 if (qn->lower > 0) {\r
5406 copy_node_opt_info(opt, &xo);\r
5407 if (xo.exb.len > 0) {\r
5408 if (xo.exb.reach_end) {\r
5409 for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {\r
5410 int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);\r
5411 if (rc > 0) break;\r
5412 }\r
5413 if (i < qn->lower) opt->exb.reach_end = 0;\r
5414 }\r
5415 }\r
5416\r
5417 if (qn->lower != qn->upper) {\r
5418 opt->exb.reach_end = 0;\r
5419 opt->exm.reach_end = 0;\r
5420 }\r
5421 if (qn->lower > 1)\r
5422 opt->exm.reach_end = 0;\r
5423 }\r
5424\r
5425 if (IS_REPEAT_INFINITE(qn->upper)) {\r
5426 if (env->mmd.max == 0 &&\r
5427 NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {\r
5428 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))\r
5429 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);\r
5430 else\r
5431 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);\r
5432 }\r
5433\r
5434 max = (xo.len.max > 0 ? INFINITE_LEN : 0);\r
14b0e578
CS
5435 }\r
5436 else {\r
b602265d 5437 max = distance_multiply(xo.len.max, qn->upper);\r
14b0e578
CS
5438 }\r
5439\r
b602265d 5440 min = distance_multiply(xo.len.min, qn->lower);\r
14b0e578
CS
5441 set_mml(&opt->len, min, max);\r
5442 }\r
5443 break;\r
5444\r
b602265d
DG
5445 case NODE_ENCLOSURE:\r
5446 {\r
5447 EnclosureNode* en = ENCLOSURE_(node);\r
5448\r
5449 switch (en->type) {\r
5450 case ENCLOSURE_OPTION:\r
5451 {\r
5452 OnigOptionType save = env->options;\r
5453\r
5454 env->options = en->o.options;\r
5455 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5456 env->options = save;\r
5457 }\r
5458 break;\r
5459\r
5460 case ENCLOSURE_MEMORY:\r
5461#ifdef USE_CALL\r
5462 en->opt_count++;\r
5463 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {\r
5464 OnigLen min, max;\r
5465\r
5466 min = 0;\r
5467 max = INFINITE_LEN;\r
5468 if (NODE_IS_MIN_FIXED(node)) min = en->min_len;\r
5469 if (NODE_IS_MAX_FIXED(node)) max = en->max_len;\r
5470 set_mml(&opt->len, min, max);\r
5471 }\r
5472 else\r
5473#endif\r
5474 {\r
5475 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5476 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {\r
5477 if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))\r
5478 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);\r
5479 }\r
5480 }\r
5481 break;\r
5482\r
5483 case ENCLOSURE_STOP_BACKTRACK:\r
5484 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5485 break;\r
5486\r
5487 case ENCLOSURE_IF_ELSE:\r
5488 {\r
5489 OptEnv nenv;\r
5490\r
5491 copy_opt_env(&nenv, env);\r
5492 r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);\r
5493 if (r == 0) {\r
5494 add_mml(&nenv.mmd, &xo.len);\r
5495 concat_left_node_opt_info(enc, opt, &xo);\r
5496 if (IS_NOT_NULL(en->te.Then)) {\r
5497 r = optimize_nodes(en->te.Then, &xo, &nenv);\r
5498 if (r == 0) {\r
5499 concat_left_node_opt_info(enc, opt, &xo);\r
5500 }\r
5501 }\r
5502\r
5503 if (IS_NOT_NULL(en->te.Else)) {\r
5504 r = optimize_nodes(en->te.Else, &xo, env);\r
5505 if (r == 0)\r
5506 alt_merge_node_opt_info(opt, &xo, env);\r
5507 }\r
5508 }\r
5509 }\r
5510 break;\r
5511 }\r
5512 }\r
5513 break;\r
5514\r
5515 case NODE_GIMMICK:\r
14b0e578
CS
5516 break;\r
5517\r
5518 default:\r
5519#ifdef ONIG_DEBUG\r
b602265d 5520 fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
5521#endif\r
5522 r = ONIGERR_TYPE_BUG;\r
5523 break;\r
5524 }\r
5525\r
5526 return r;\r
5527}\r
5528\r
5529static int\r
b602265d 5530set_optimize_exact(regex_t* reg, OptExact* e)\r
14b0e578
CS
5531{\r
5532 int r;\r
5533\r
5534 if (e->len == 0) return 0;\r
5535\r
5536 if (e->ignore_case) {\r
5537 reg->exact = (UChar* )xmalloc(e->len);\r
5538 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
5539 xmemcpy(reg->exact, e->s, e->len);\r
5540 reg->exact_end = reg->exact + e->len;\r
b602265d 5541 reg->optimize = OPTIMIZE_EXACT_IC;\r
14b0e578
CS
5542 }\r
5543 else {\r
5544 int allow_reverse;\r
5545\r
b602265d 5546 reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);\r
14b0e578
CS
5547 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
5548 reg->exact_end = reg->exact + e->len;\r
5549 \r
5550 allow_reverse =\r
b602265d 5551 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);\r
14b0e578
CS
5552\r
5553 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {\r
5554 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,\r
b602265d
DG
5555 reg->map, &(reg->int_map));\r
5556 if (r != 0) return r;\r
14b0e578
CS
5557\r
5558 reg->optimize = (allow_reverse != 0\r
b602265d 5559 ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);\r
14b0e578
CS
5560 }\r
5561 else {\r
b602265d 5562 reg->optimize = OPTIMIZE_EXACT;\r
14b0e578
CS
5563 }\r
5564 }\r
5565\r
5566 reg->dmin = e->mmd.min;\r
5567 reg->dmax = e->mmd.max;\r
5568\r
b602265d
DG
5569 if (reg->dmin != INFINITE_LEN) {\r
5570 reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);\r
14b0e578
CS
5571 }\r
5572\r
5573 return 0;\r
5574}\r
5575\r
5576static void\r
b602265d 5577set_optimize_map(regex_t* reg, OptMap* m)\r
14b0e578
CS
5578{\r
5579 int i;\r
5580\r
5581 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
5582 reg->map[i] = m->map[i];\r
5583\r
b602265d 5584 reg->optimize = OPTIMIZE_MAP;\r
14b0e578
CS
5585 reg->dmin = m->mmd.min;\r
5586 reg->dmax = m->mmd.max;\r
5587\r
b602265d 5588 if (reg->dmin != INFINITE_LEN) {\r
14b0e578
CS
5589 reg->threshold_len = reg->dmin + 1;\r
5590 }\r
5591}\r
5592\r
5593static void\r
b602265d 5594set_sub_anchor(regex_t* reg, OptAnc* anc)\r
14b0e578 5595{\r
b602265d
DG
5596 reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE;\r
5597 reg->sub_anchor |= anc->right & ANCHOR_END_LINE;\r
14b0e578
CS
5598}\r
5599\r
b602265d 5600#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
14b0e578
CS
5601static void print_optimize_info(FILE* f, regex_t* reg);\r
5602#endif\r
5603\r
5604static int\r
5605set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)\r
5606{\r
14b0e578 5607 int r;\r
b602265d 5608 NodeOpt opt;\r
14b0e578
CS
5609 OptEnv env;\r
5610\r
5611 env.enc = reg->enc;\r
5612 env.options = reg->options;\r
5613 env.case_fold_flag = reg->case_fold_flag;\r
b602265d 5614 env.scan_env = scan_env;\r
14b0e578
CS
5615 clear_mml(&env.mmd);\r
5616\r
b602265d
DG
5617 r = optimize_nodes(node, &opt, &env);\r
5618 if (r != 0) return r;\r
5619\r
5620 reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |\r
5621 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |\r
5622 ANCHOR_LOOK_BEHIND);\r
14b0e578 5623\r
b602265d
DG
5624 if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)\r
5625 reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;\r
14b0e578 5626\r
b602265d
DG
5627 reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |\r
5628 ANCHOR_PREC_READ_NOT);\r
14b0e578
CS
5629\r
5630 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {\r
5631 reg->anchor_dmin = opt.len.min;\r
5632 reg->anchor_dmax = opt.len.max;\r
5633 }\r
5634\r
5635 if (opt.exb.len > 0 || opt.exm.len > 0) {\r
b602265d
DG
5636 select_opt_exact(reg->enc, &opt.exb, &opt.exm);\r
5637 if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {\r
14b0e578
CS
5638 goto set_map;\r
5639 }\r
5640 else {\r
b602265d 5641 r = set_optimize_exact(reg, &opt.exb);\r
14b0e578
CS
5642 set_sub_anchor(reg, &opt.exb.anc);\r
5643 }\r
5644 }\r
5645 else if (opt.map.value > 0) {\r
5646 set_map:\r
b602265d 5647 set_optimize_map(reg, &opt.map);\r
14b0e578
CS
5648 set_sub_anchor(reg, &opt.map.anc);\r
5649 }\r
5650 else {\r
b602265d 5651 reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;\r
14b0e578 5652 if (opt.len.max == 0)\r
b602265d 5653 reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;\r
14b0e578
CS
5654 }\r
5655\r
5656#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
5657 print_optimize_info(stderr, reg);\r
5658#endif\r
5659 return r;\r
5660}\r
5661\r
5662static void\r
5663clear_optimize_info(regex_t* reg)\r
5664{\r
b602265d 5665 reg->optimize = OPTIMIZE_NONE;\r
14b0e578
CS
5666 reg->anchor = 0;\r
5667 reg->anchor_dmin = 0;\r
5668 reg->anchor_dmax = 0;\r
5669 reg->sub_anchor = 0;\r
5670 reg->exact_end = (UChar* )NULL;\r
5671 reg->threshold_len = 0;\r
5672 if (IS_NOT_NULL(reg->exact)) {\r
5673 xfree(reg->exact);\r
5674 reg->exact = (UChar* )NULL;\r
5675 }\r
5676}\r
5677\r
5678#ifdef ONIG_DEBUG\r
5679\r
5680static void print_enc_string(FILE* fp, OnigEncoding enc,\r
b602265d 5681 const UChar *s, const UChar *end)\r
14b0e578
CS
5682{\r
5683 fprintf(fp, "\nPATTERN: /");\r
5684\r
5685 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
5686 const UChar *p;\r
5687 OnigCodePoint code;\r
5688\r
5689 p = s;\r
5690 while (p < end) {\r
5691 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
5692 if (code >= 0x80) {\r
b602265d 5693 fprintf(fp, " 0x%04x ", (int )code);\r
14b0e578
CS
5694 }\r
5695 else {\r
b602265d 5696 fputc((int )code, fp);\r
14b0e578
CS
5697 }\r
5698\r
5699 p += enclen(enc, p);\r
5700 }\r
5701 }\r
5702 else {\r
5703 while (s < end) {\r
5704 fputc((int )*s, fp);\r
5705 s++;\r
5706 }\r
5707 }\r
5708\r
5709 fprintf(fp, "/\n");\r
5710}\r
5711\r
b602265d
DG
5712#endif /* ONIG_DEBUG */\r
5713\r
5714#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
5715\r
14b0e578 5716static void\r
b602265d 5717print_distance_range(FILE* f, OnigLen a, OnigLen b)\r
14b0e578 5718{\r
b602265d 5719 if (a == INFINITE_LEN)\r
14b0e578
CS
5720 fputs("inf", f);\r
5721 else\r
5722 fprintf(f, "(%u)", a);\r
5723\r
5724 fputs("-", f);\r
5725\r
b602265d 5726 if (b == INFINITE_LEN)\r
14b0e578
CS
5727 fputs("inf", f);\r
5728 else\r
5729 fprintf(f, "(%u)", b);\r
5730}\r
5731\r
5732static void\r
5733print_anchor(FILE* f, int anchor)\r
5734{\r
5735 int q = 0;\r
5736\r
5737 fprintf(f, "[");\r
5738\r
5739 if (anchor & ANCHOR_BEGIN_BUF) {\r
5740 fprintf(f, "begin-buf");\r
5741 q = 1;\r
5742 }\r
5743 if (anchor & ANCHOR_BEGIN_LINE) {\r
5744 if (q) fprintf(f, ", ");\r
5745 q = 1;\r
5746 fprintf(f, "begin-line");\r
5747 }\r
5748 if (anchor & ANCHOR_BEGIN_POSITION) {\r
5749 if (q) fprintf(f, ", ");\r
5750 q = 1;\r
5751 fprintf(f, "begin-pos");\r
5752 }\r
5753 if (anchor & ANCHOR_END_BUF) {\r
5754 if (q) fprintf(f, ", ");\r
5755 q = 1;\r
5756 fprintf(f, "end-buf");\r
5757 }\r
5758 if (anchor & ANCHOR_SEMI_END_BUF) {\r
5759 if (q) fprintf(f, ", ");\r
5760 q = 1;\r
5761 fprintf(f, "semi-end-buf");\r
5762 }\r
5763 if (anchor & ANCHOR_END_LINE) {\r
5764 if (q) fprintf(f, ", ");\r
5765 q = 1;\r
5766 fprintf(f, "end-line");\r
5767 }\r
b602265d 5768 if (anchor & ANCHOR_ANYCHAR_INF) {\r
14b0e578
CS
5769 if (q) fprintf(f, ", ");\r
5770 q = 1;\r
b602265d 5771 fprintf(f, "anychar-inf");\r
14b0e578 5772 }\r
b602265d 5773 if (anchor & ANCHOR_ANYCHAR_INF_ML) {\r
14b0e578 5774 if (q) fprintf(f, ", ");\r
b602265d 5775 fprintf(f, "anychar-inf-ml");\r
14b0e578
CS
5776 }\r
5777\r
5778 fprintf(f, "]");\r
5779}\r
5780\r
5781static void\r
5782print_optimize_info(FILE* f, regex_t* reg)\r
5783{\r
5784 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",\r
5785 "EXACT_IC", "MAP" };\r
5786\r
5787 fprintf(f, "optimize: %s\n", on[reg->optimize]);\r
5788 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);\r
5789 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)\r
5790 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);\r
5791 fprintf(f, "\n");\r
5792\r
5793 if (reg->optimize) {\r
5794 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);\r
5795 fprintf(f, "\n");\r
5796 }\r
5797 fprintf(f, "\n");\r
5798\r
5799 if (reg->exact) {\r
5800 UChar *p;\r
5801 fprintf(f, "exact: [");\r
5802 for (p = reg->exact; p < reg->exact_end; p++) {\r
5803 fputc(*p, f);\r
5804 }\r
b602265d 5805 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));\r
14b0e578 5806 }\r
b602265d 5807 else if (reg->optimize & OPTIMIZE_MAP) {\r
14b0e578
CS
5808 int c, i, n = 0;\r
5809\r
5810 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
5811 if (reg->map[i]) n++;\r
5812\r
5813 fprintf(f, "map: n=%d\n", n);\r
5814 if (n > 0) {\r
5815 c = 0;\r
5816 fputc('[', f);\r
5817 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
b602265d 5818 if (reg->map[i] != 0) {\r
14b0e578
CS
5819 if (c > 0) fputs(", ", f);\r
5820 c++;\r
5821 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&\r
5822 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))\r
5823 fputc(i, f);\r
5824 else\r
5825 fprintf(f, "%d", i);\r
5826 }\r
5827 }\r
5828 fprintf(f, "]\n");\r
5829 }\r
5830 }\r
5831}\r
b602265d
DG
5832#endif\r
5833\r
5834\r
5835extern RegexExt*\r
5836onig_get_regex_ext(regex_t* reg)\r
5837{\r
5838 if (IS_NULL(REG_EXTP(reg))) {\r
5839 RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));\r
5840 if (IS_NULL(ext)) return 0;\r
5841\r
5842 ext->pattern = 0;\r
5843 ext->pattern_end = 0;\r
5844#ifdef USE_CALLOUT\r
5845 ext->tag_table = 0;\r
5846 ext->callout_num = 0;\r
5847 ext->callout_list_alloc = 0;\r
5848 ext->callout_list = 0;\r
5849#endif\r
5850\r
5851 REG_EXTPL(reg) = (void* )ext;\r
5852 }\r
5853\r
5854 return REG_EXTP(reg);\r
5855}\r
5856\r
5857static void\r
5858free_regex_ext(RegexExt* ext)\r
5859{\r
5860 if (IS_NOT_NULL(ext)) {\r
5861 if (IS_NOT_NULL(ext->pattern))\r
5862 xfree((void* )ext->pattern);\r
5863\r
5864#ifdef USE_CALLOUT\r
5865 if (IS_NOT_NULL(ext->tag_table))\r
5866 onig_callout_tag_table_free(ext->tag_table);\r
5867\r
5868 if (IS_NOT_NULL(ext->callout_list))\r
5869 onig_free_reg_callout_list(ext->callout_num, ext->callout_list);\r
5870#endif\r
5871\r
5872 xfree(ext);\r
5873 }\r
5874}\r
5875\r
5876extern int\r
5877onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)\r
5878{\r
5879 RegexExt* ext;\r
5880 UChar* s;\r
5881\r
5882 ext = onig_get_regex_ext(reg);\r
5883 CHECK_NULL_RETURN_MEMERR(ext);\r
5884\r
5885 s = onigenc_strdup(reg->enc, pattern, pattern_end);\r
5886 CHECK_NULL_RETURN_MEMERR(s);\r
5887\r
5888 ext->pattern = s;\r
5889 ext->pattern_end = s + (pattern_end - pattern);\r
5890\r
5891 return ONIG_NORMAL;\r
5892}\r
14b0e578
CS
5893\r
5894\r
5895extern void\r
5896onig_free_body(regex_t* reg)\r
5897{\r
5898 if (IS_NOT_NULL(reg)) {\r
5899 if (IS_NOT_NULL(reg->p)) xfree(reg->p);\r
5900 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);\r
5901 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);\r
5902 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);\r
5903 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);\r
b602265d
DG
5904 if (IS_NOT_NULL(REG_EXTP(reg))) {\r
5905 free_regex_ext(REG_EXTP(reg));\r
5906 REG_EXTPL(reg) = 0;\r
5907 }\r
14b0e578 5908\r
14b0e578 5909 onig_names_free(reg);\r
14b0e578
CS
5910 }\r
5911}\r
5912\r
5913extern void\r
5914onig_free(regex_t* reg)\r
5915{\r
5916 if (IS_NOT_NULL(reg)) {\r
5917 onig_free_body(reg);\r
5918 xfree(reg);\r
5919 }\r
5920}\r
5921\r
5922#define REGEX_TRANSFER(to,from) do {\\r
14b0e578
CS
5923 onig_free_body(to);\\r
5924 xmemcpy(to, from, sizeof(regex_t));\\r
5925 xfree(from);\\r
5926} while (0)\r
5927\r
5928extern void\r
5929onig_transfer(regex_t* to, regex_t* from)\r
5930{\r
14b0e578 5931 REGEX_TRANSFER(to, from);\r
14b0e578
CS
5932}\r
5933\r
14b0e578 5934\r
b602265d 5935#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
5936static void print_tree P_((FILE* f, Node* node));\r
5937#endif\r
5938\r
5939extern int\r
5940onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
b602265d 5941 OnigErrorInfo* einfo)\r
14b0e578
CS
5942{\r
5943#define COMPILE_INIT_SIZE 20\r
5944\r
5945 int r, init_size;\r
5946 Node* root;\r
5947 ScanEnv scan_env;\r
b602265d 5948#ifdef USE_CALL\r
14b0e578
CS
5949 UnsetAddrList uslist;\r
5950#endif\r
5951\r
b602265d
DG
5952 root = 0;\r
5953 if (IS_NOT_NULL(einfo)) {\r
5954 einfo->enc = reg->enc;\r
5955 einfo->par = (UChar* )NULL;\r
5956 }\r
14b0e578
CS
5957\r
5958#ifdef ONIG_DEBUG\r
5959 print_enc_string(stderr, reg->enc, pattern, pattern_end);\r
5960#endif\r
5961\r
5962 if (reg->alloc == 0) {\r
b602265d 5963 init_size = (int )(pattern_end - pattern) * 2;\r
14b0e578 5964 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;\r
b602265d 5965 r = BB_INIT(reg, init_size);\r
14b0e578
CS
5966 if (r != 0) goto end;\r
5967 }\r
5968 else\r
5969 reg->used = 0;\r
5970\r
5971 reg->num_mem = 0;\r
5972 reg->num_repeat = 0;\r
5973 reg->num_null_check = 0;\r
5974 reg->repeat_range_alloc = 0;\r
5975 reg->repeat_range = (OnigRepeatRange* )NULL;\r
14b0e578 5976\r
b602265d
DG
5977 r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);\r
5978 if (r != 0) goto err;\r
14b0e578 5979\r
14b0e578
CS
5980 /* mixed use named group and no-named group */\r
5981 if (scan_env.num_named > 0 &&\r
5982 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
b602265d 5983 ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
14b0e578
CS
5984 if (scan_env.num_named != scan_env.num_mem)\r
5985 r = disable_noname_group_capture(&root, reg, &scan_env);\r
5986 else\r
5987 r = numbered_ref_check(root);\r
5988\r
5989 if (r != 0) goto err;\r
5990 }\r
14b0e578 5991\r
b602265d
DG
5992 r = check_backrefs(root, &scan_env);\r
5993 if (r != 0) goto err;\r
5994\r
5995#ifdef USE_CALL\r
14b0e578
CS
5996 if (scan_env.num_call > 0) {\r
5997 r = unset_addr_list_init(&uslist, scan_env.num_call);\r
5998 if (r != 0) goto err;\r
5999 scan_env.unset_addr_list = &uslist;\r
b602265d
DG
6000 r = setup_call(root, &scan_env, 0);\r
6001 if (r != 0) goto err_unset;\r
6002 r = setup_call2(root);\r
14b0e578 6003 if (r != 0) goto err_unset;\r
b602265d 6004 r = recursive_call_check_trav(root, &scan_env, 0);\r
14b0e578 6005 if (r < 0) goto err_unset;\r
b602265d 6006 r = infinite_recursive_call_check_trav(root, &scan_env);\r
14b0e578
CS
6007 if (r != 0) goto err_unset;\r
6008\r
b602265d 6009 setup_called_state(root, 0);\r
14b0e578 6010 }\r
b602265d
DG
6011\r
6012 reg->num_call = scan_env.num_call;\r
14b0e578
CS
6013#endif\r
6014\r
6015 r = setup_tree(root, reg, 0, &scan_env);\r
6016 if (r != 0) goto err_unset;\r
6017\r
b602265d 6018#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
6019 print_tree(stderr, root);\r
6020#endif\r
6021\r
6022 reg->capture_history = scan_env.capture_history;\r
6023 reg->bt_mem_start = scan_env.bt_mem_start;\r
6024 reg->bt_mem_start |= reg->capture_history;\r
6025 if (IS_FIND_CONDITION(reg->options))\r
b602265d 6026 MEM_STATUS_ON_ALL(reg->bt_mem_end);\r
14b0e578
CS
6027 else {\r
6028 reg->bt_mem_end = scan_env.bt_mem_end;\r
6029 reg->bt_mem_end |= reg->capture_history;\r
6030 }\r
b602265d 6031 reg->bt_mem_start |= reg->bt_mem_end;\r
14b0e578
CS
6032\r
6033 clear_optimize_info(reg);\r
6034#ifndef ONIG_DONT_OPTIMIZE\r
6035 r = set_optimize_info_from_tree(root, reg, &scan_env);\r
6036 if (r != 0) goto err_unset;\r
6037#endif\r
6038\r
b602265d
DG
6039 if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {\r
6040 xfree(scan_env.mem_env_dynamic);\r
6041 scan_env.mem_env_dynamic = (MemEnv* )NULL;\r
14b0e578
CS
6042 }\r
6043\r
b602265d 6044 r = compile_tree(root, reg, &scan_env);\r
14b0e578 6045 if (r == 0) {\r
b602265d
DG
6046 if (scan_env.keep_num > 0) {\r
6047 r = add_opcode(reg, OP_UPDATE_VAR);\r
6048 if (r != 0) goto err;\r
6049 r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);\r
6050 if (r != 0) goto err;\r
6051 r = add_mem_num(reg, 0 /* not used */);\r
6052 if (r != 0) goto err;\r
6053 }\r
6054\r
14b0e578 6055 r = add_opcode(reg, OP_END);\r
b602265d 6056#ifdef USE_CALL\r
14b0e578 6057 if (scan_env.num_call > 0) {\r
b602265d 6058 r = fix_unset_addr_list(&uslist, reg);\r
14b0e578 6059 unset_addr_list_end(&uslist);\r
b602265d 6060 if (r != 0) goto err;\r
14b0e578
CS
6061 }\r
6062#endif\r
6063\r
b602265d
DG
6064 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)\r
6065#ifdef USE_CALLOUT\r
6066 || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)\r
6067#endif\r
6068 )\r
14b0e578
CS
6069 reg->stack_pop_level = STACK_POP_LEVEL_ALL;\r
6070 else {\r
6071 if (reg->bt_mem_start != 0)\r
b602265d 6072 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;\r
14b0e578 6073 else\r
b602265d 6074 reg->stack_pop_level = STACK_POP_LEVEL_FREE;\r
14b0e578
CS
6075 }\r
6076 }\r
b602265d 6077#ifdef USE_CALL\r
14b0e578
CS
6078 else if (scan_env.num_call > 0) {\r
6079 unset_addr_list_end(&uslist);\r
6080 }\r
6081#endif\r
6082 onig_node_free(root);\r
6083\r
6084#ifdef ONIG_DEBUG_COMPILE\r
14b0e578 6085 onig_print_names(stderr, reg);\r
b602265d 6086 onig_print_compiled_byte_code_list(stderr, reg);\r
14b0e578
CS
6087#endif\r
6088\r
6089 end:\r
14b0e578
CS
6090 return r;\r
6091\r
6092 err_unset:\r
b602265d 6093#ifdef USE_CALL\r
14b0e578
CS
6094 if (scan_env.num_call > 0) {\r
6095 unset_addr_list_end(&uslist);\r
6096 }\r
6097#endif\r
6098 err:\r
6099 if (IS_NOT_NULL(scan_env.error)) {\r
6100 if (IS_NOT_NULL(einfo)) {\r
14b0e578
CS
6101 einfo->par = scan_env.error;\r
6102 einfo->par_end = scan_env.error_end;\r
6103 }\r
6104 }\r
6105\r
6106 onig_node_free(root);\r
b602265d
DG
6107 if (IS_NOT_NULL(scan_env.mem_env_dynamic))\r
6108 xfree(scan_env.mem_env_dynamic);\r
14b0e578
CS
6109 return r;\r
6110}\r
6111\r
b602265d
DG
6112\r
6113static int onig_inited = 0;\r
6114\r
14b0e578 6115extern int\r
b602265d
DG
6116onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,\r
6117 OnigEncoding enc, OnigSyntaxType* syntax)\r
14b0e578
CS
6118{\r
6119 int r;\r
14b0e578 6120\r
b602265d 6121 xmemset(reg, 0, sizeof(*reg));\r
14b0e578 6122\r
b602265d
DG
6123 if (onig_inited == 0) {\r
6124#if 0\r
6125 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;\r
6126#else\r
6127 r = onig_initialize(&enc, 1);\r
6128 if (r != 0)\r
6129 return ONIGERR_FAIL_TO_INITIALIZE;\r
14b0e578 6130\r
b602265d
DG
6131 onig_warning("You didn't call onig_initialize() explicitly");\r
6132#endif\r
6133 }\r
14b0e578
CS
6134\r
6135 if (IS_NULL(reg))\r
6136 return ONIGERR_INVALID_ARGUMENT;\r
6137\r
6138 if (ONIGENC_IS_UNDEF(enc))\r
6139 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;\r
6140\r
6141 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))\r
6142 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {\r
6143 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;\r
6144 }\r
6145\r
14b0e578
CS
6146 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {\r
6147 option |= syntax->options;\r
6148 option &= ~ONIG_OPTION_SINGLELINE;\r
6149 }\r
6150 else\r
6151 option |= syntax->options;\r
6152\r
6153 (reg)->enc = enc;\r
6154 (reg)->options = option;\r
6155 (reg)->syntax = syntax;\r
6156 (reg)->optimize = 0;\r
6157 (reg)->exact = (UChar* )NULL;\r
6158 (reg)->int_map = (int* )NULL;\r
6159 (reg)->int_map_backward = (int* )NULL;\r
b602265d 6160 REG_EXTPL(reg) = NULL;\r
14b0e578
CS
6161\r
6162 (reg)->p = (UChar* )NULL;\r
6163 (reg)->alloc = 0;\r
6164 (reg)->used = 0;\r
6165 (reg)->name_table = (void* )NULL;\r
6166\r
6167 (reg)->case_fold_flag = case_fold_flag;\r
6168 return 0;\r
6169}\r
6170\r
6171extern int\r
b602265d
DG
6172onig_new_without_alloc(regex_t* reg,\r
6173 const UChar* pattern, const UChar* pattern_end,\r
6174 OnigOptionType option, OnigEncoding enc,\r
6175 OnigSyntaxType* syntax, OnigErrorInfo* einfo)\r
14b0e578
CS
6176{\r
6177 int r;\r
6178\r
6179 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
b602265d 6180 if (r != 0) return r;\r
14b0e578
CS
6181\r
6182 r = onig_compile(reg, pattern, pattern_end, einfo);\r
6183 return r;\r
6184}\r
6185\r
6186extern int\r
6187onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,\r
b602265d
DG
6188 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
6189 OnigErrorInfo* einfo)\r
14b0e578
CS
6190{\r
6191 int r;\r
6192\r
6193 *reg = (regex_t* )xmalloc(sizeof(regex_t));\r
6194 if (IS_NULL(*reg)) return ONIGERR_MEMORY;\r
6195\r
6196 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
b602265d 6197 if (r != 0) goto err;\r
14b0e578
CS
6198\r
6199 r = onig_compile(*reg, pattern, pattern_end, einfo);\r
b602265d 6200 if (r != 0) {\r
14b0e578
CS
6201 err:\r
6202 onig_free(*reg);\r
6203 *reg = NULL;\r
6204 }\r
6205 return r;\r
6206}\r
6207\r
14b0e578 6208extern int\r
b602265d 6209onig_initialize(OnigEncoding encodings[], int n)\r
14b0e578 6210{\r
b602265d
DG
6211 int i;\r
6212 int r;\r
6213\r
14b0e578
CS
6214 if (onig_inited != 0)\r
6215 return 0;\r
6216\r
b602265d 6217 onigenc_init();\r
14b0e578
CS
6218\r
6219 onig_inited = 1;\r
6220\r
b602265d
DG
6221 for (i = 0; i < n; i++) {\r
6222 OnigEncoding enc = encodings[i];\r
6223 r = onig_initialize_encoding(enc);\r
6224 if (r != 0)\r
6225 return r;\r
6226 }\r
14b0e578 6227\r
b602265d 6228 return ONIG_NORMAL;\r
14b0e578
CS
6229}\r
6230\r
b602265d
DG
6231typedef struct EndCallListItem {\r
6232 struct EndCallListItem* next;\r
6233 void (*func)(void);\r
6234} EndCallListItemType;\r
14b0e578 6235\r
b602265d 6236static EndCallListItemType* EndCallTop;\r
14b0e578
CS
6237\r
6238extern void onig_add_end_call(void (*func)(void))\r
6239{\r
b602265d 6240 EndCallListItemType* item;\r
14b0e578 6241\r
b602265d 6242 item = (EndCallListItemType* )xmalloc(sizeof(*item));\r
14b0e578
CS
6243 if (item == 0) return ;\r
6244\r
6245 item->next = EndCallTop;\r
6246 item->func = func;\r
6247\r
6248 EndCallTop = item;\r
6249}\r
6250\r
6251static void\r
6252exec_end_call_list(void)\r
6253{\r
b602265d 6254 EndCallListItemType* prev;\r
14b0e578
CS
6255 void (*func)(void);\r
6256\r
6257 while (EndCallTop != 0) {\r
6258 func = EndCallTop->func;\r
6259 (*func)();\r
6260\r
6261 prev = EndCallTop;\r
6262 EndCallTop = EndCallTop->next;\r
6263 xfree(prev);\r
6264 }\r
6265}\r
6266\r
6267extern int\r
6268onig_end(void)\r
6269{\r
14b0e578
CS
6270 exec_end_call_list();\r
6271\r
b602265d
DG
6272#ifdef USE_CALLOUT\r
6273 onig_global_callout_names_free();\r
14b0e578
CS
6274#endif\r
6275\r
b602265d 6276 onigenc_end();\r
14b0e578
CS
6277\r
6278 onig_inited = 0;\r
6279\r
14b0e578
CS
6280 return 0;\r
6281}\r
6282\r
6283extern int\r
6284onig_is_in_code_range(const UChar* p, OnigCodePoint code)\r
6285{\r
6286 OnigCodePoint n, *data;\r
6287 OnigCodePoint low, high, x;\r
6288\r
6289 GET_CODE_POINT(n, p);\r
6290 data = (OnigCodePoint* )p;\r
6291 data++;\r
6292\r
6293 for (low = 0, high = n; low < high; ) {\r
6294 x = (low + high) >> 1;\r
6295 if (code > data[x * 2 + 1])\r
6296 low = x + 1;\r
6297 else\r
6298 high = x;\r
6299 }\r
6300\r
6301 return ((low < n && code >= data[low * 2]) ? 1 : 0);\r
6302}\r
6303\r
6304extern int\r
b602265d 6305onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)\r
14b0e578
CS
6306{\r
6307 int found;\r
b602265d 6308 CClassNode* cc = (CClassNode* )cc_arg;\r
14b0e578
CS
6309\r
6310 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {\r
6311 if (IS_NULL(cc->mbuf)) {\r
6312 found = 0;\r
6313 }\r
6314 else {\r
6315 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);\r
6316 }\r
6317 }\r
6318 else {\r
6319 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);\r
6320 }\r
6321\r
6322 if (IS_NCCLASS_NOT(cc))\r
6323 return !found;\r
6324 else\r
6325 return found;\r
6326}\r
6327\r
6328extern int\r
6329onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)\r
6330{\r
6331 int len;\r
6332\r
6333 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
6334 len = 2;\r
6335 }\r
6336 else {\r
6337 len = ONIGENC_CODE_TO_MBCLEN(enc, code);\r
6338 }\r
6339 return onig_is_code_in_cc_len(len, code, cc);\r
6340}\r
6341\r
6342\r
b602265d 6343#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
6344\r
6345static void\r
6346p_string(FILE* f, int len, UChar* s)\r
6347{\r
6348 fputs(":", f);\r
6349 while (len-- > 0) { fputc(*s++, f); }\r
6350}\r
6351\r
6352static void\r
b602265d 6353Indent(FILE* f, int indent)\r
14b0e578 6354{\r
b602265d
DG
6355 int i;\r
6356 for (i = 0; i < indent; i++) putc(' ', f);\r
14b0e578
CS
6357}\r
6358\r
6359static void\r
6360print_indent_tree(FILE* f, Node* node, int indent)\r
6361{\r
b602265d
DG
6362 int i;\r
6363 NodeType type;\r
14b0e578 6364 UChar* p;\r
b602265d 6365 int add = 3;\r
14b0e578
CS
6366\r
6367 Indent(f, indent);\r
6368 if (IS_NULL(node)) {\r
6369 fprintf(f, "ERROR: null node!!!\n");\r
6370 exit (0);\r
6371 }\r
6372\r
b602265d 6373 type = NODE_TYPE(node);\r
14b0e578 6374 switch (type) {\r
b602265d
DG
6375 case NODE_LIST:\r
6376 case NODE_ALT:\r
6377 if (type == NODE_LIST)\r
6378 fprintf(f, "<list:%p>\n", node);\r
14b0e578 6379 else\r
b602265d 6380 fprintf(f, "<alt:%p>\n", node);\r
14b0e578 6381\r
b602265d
DG
6382 print_indent_tree(f, NODE_CAR(node), indent + add);\r
6383 while (IS_NOT_NULL(node = NODE_CDR(node))) {\r
6384 if (NODE_TYPE(node) != type) {\r
6385 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));\r
6386 exit(0);\r
14b0e578 6387 }\r
b602265d 6388 print_indent_tree(f, NODE_CAR(node), indent + add);\r
14b0e578
CS
6389 }\r
6390 break;\r
6391\r
b602265d
DG
6392 case NODE_STRING:\r
6393 fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);\r
6394 for (p = STR_(node)->s; p < STR_(node)->end; p++) {\r
14b0e578 6395 if (*p >= 0x20 && *p < 0x7f)\r
b602265d 6396 fputc(*p, f);\r
14b0e578 6397 else {\r
b602265d 6398 fprintf(f, " 0x%02x", *p);\r
14b0e578
CS
6399 }\r
6400 }\r
6401 break;\r
6402\r
b602265d
DG
6403 case NODE_CCLASS:\r
6404 fprintf(f, "<cclass:%p>", node);\r
6405 if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);\r
6406 if (CCLASS_(node)->mbuf) {\r
6407 BBuf* bbuf = CCLASS_(node)->mbuf;\r
14b0e578 6408 for (i = 0; i < bbuf->used; i++) {\r
b602265d
DG
6409 if (i > 0) fprintf(f, ",");\r
6410 fprintf(f, "%0x", bbuf->p[i]);\r
14b0e578
CS
6411 }\r
6412 }\r
6413 break;\r
6414\r
b602265d
DG
6415 case NODE_CTYPE:\r
6416 fprintf(f, "<ctype:%p> ", node);\r
6417 switch (CTYPE_(node)->ctype) {\r
6418 case CTYPE_ANYCHAR:\r
6419 fprintf(f, "<anychar:%p>", node);\r
6420 break;\r
6421\r
14b0e578 6422 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
6423 if (CTYPE_(node)->not != 0)\r
6424 fputs("not word", f);\r
14b0e578 6425 else\r
b602265d
DG
6426 fputs("word", f);\r
6427\r
6428 if (CTYPE_(node)->ascii_mode != 0)\r
6429 fputs(" (ascii)", f);\r
6430\r
14b0e578
CS
6431 break;\r
6432\r
6433 default:\r
6434 fprintf(f, "ERROR: undefined ctype.\n");\r
6435 exit(0);\r
6436 }\r
6437 break;\r
6438\r
b602265d
DG
6439 case NODE_ANCHOR:\r
6440 fprintf(f, "<anchor:%p> ", node);\r
6441 switch (ANCHOR_(node)->type) {\r
6442 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;\r
6443 case ANCHOR_END_BUF: fputs("end buf", f); break;\r
6444 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;\r
6445 case ANCHOR_END_LINE: fputs("end line", f); break;\r
6446 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
6447 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;\r
14b0e578 6448\r
b602265d
DG
6449 case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;\r
6450 case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;\r
14b0e578 6451#ifdef USE_WORD_BEGIN_END\r
b602265d
DG
6452 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;\r
6453 case ANCHOR_WORD_END: fputs("word end", f); break;\r
14b0e578 6454#endif\r
b602265d
DG
6455 case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
6456 fputs("extended-grapheme-cluster boundary", f); break;\r
6457 case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
6458 fputs("no-extended-grapheme-cluster boundary", f); break;\r
6459 case ANCHOR_PREC_READ:\r
6460 fprintf(f, "prec read\n");\r
6461 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6462 break;\r
6463 case ANCHOR_PREC_READ_NOT:\r
6464 fprintf(f, "prec read not\n");\r
6465 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6466 break;\r
6467 case ANCHOR_LOOK_BEHIND:\r
6468 fprintf(f, "look behind\n");\r
6469 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6470 break;\r
6471 case ANCHOR_LOOK_BEHIND_NOT:\r
6472 fprintf(f, "look behind not\n");\r
6473 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6474 break;\r
14b0e578
CS
6475\r
6476 default:\r
6477 fprintf(f, "ERROR: undefined anchor type.\n");\r
6478 break;\r
6479 }\r
6480 break;\r
6481\r
b602265d 6482 case NODE_BACKREF:\r
14b0e578
CS
6483 {\r
6484 int* p;\r
b602265d 6485 BackRefNode* br = BACKREF_(node);\r
14b0e578 6486 p = BACKREFS_P(br);\r
b602265d 6487 fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);\r
14b0e578 6488 for (i = 0; i < br->back_num; i++) {\r
b602265d
DG
6489 if (i > 0) fputs(", ", f);\r
6490 fprintf(f, "%d", p[i]);\r
14b0e578
CS
6491 }\r
6492 }\r
6493 break;\r
6494\r
b602265d
DG
6495#ifdef USE_CALL\r
6496 case NODE_CALL:\r
14b0e578 6497 {\r
b602265d
DG
6498 CallNode* cn = CALL_(node);\r
6499 fprintf(f, "<call:%p>", node);\r
14b0e578
CS
6500 p_string(f, cn->name_end - cn->name, cn->name);\r
6501 }\r
6502 break;\r
6503#endif\r
6504\r
b602265d
DG
6505 case NODE_QUANT:\r
6506 fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,\r
6507 QUANT_(node)->lower, QUANT_(node)->upper,\r
6508 (QUANT_(node)->greedy ? "" : "?"));\r
6509 print_indent_tree(f, NODE_BODY(node), indent + add);\r
14b0e578
CS
6510 break;\r
6511\r
b602265d
DG
6512 case NODE_ENCLOSURE:\r
6513 fprintf(f, "<enclosure:%p> ", node);\r
6514 switch (ENCLOSURE_(node)->type) {\r
6515 case ENCLOSURE_OPTION:\r
6516 fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);\r
14b0e578 6517 break;\r
b602265d
DG
6518 case ENCLOSURE_MEMORY:\r
6519 fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);\r
14b0e578 6520 break;\r
b602265d 6521 case ENCLOSURE_STOP_BACKTRACK:\r
14b0e578
CS
6522 fprintf(f, "stop-bt");\r
6523 break;\r
6524\r
6525 default:\r
6526 break;\r
6527 }\r
6528 fprintf(f, "\n");\r
b602265d
DG
6529 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6530 break;\r
6531\r
6532 case NODE_GIMMICK:\r
6533 fprintf(f, "<gimmick:%p> ", node);\r
6534 switch (GIMMICK_(node)->type) {\r
6535 case GIMMICK_FAIL:\r
6536 fprintf(f, "fail");\r
6537 break;\r
6538 case GIMMICK_KEEP:\r
6539 fprintf(f, "keep:%d", GIMMICK_(node)->id);\r
6540 break;\r
6541 case GIMMICK_SAVE:\r
6542 fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
6543 break;\r
6544 case GIMMICK_UPDATE_VAR:\r
6545 fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
6546 break;\r
6547#ifdef USE_CALLOUT\r
6548 case GIMMICK_CALLOUT:\r
6549 switch (GIMMICK_(node)->detail_type) {\r
6550 case ONIG_CALLOUT_OF_CONTENTS:\r
6551 fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);\r
6552 break;\r
6553 case ONIG_CALLOUT_OF_NAME:\r
6554 fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);\r
6555 break;\r
6556 }\r
6557#endif\r
6558 }\r
14b0e578
CS
6559 break;\r
6560\r
6561 default:\r
b602265d 6562 fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
6563 break;\r
6564 }\r
6565\r
b602265d
DG
6566 if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&\r
6567 type != NODE_ENCLOSURE)\r
14b0e578
CS
6568 fprintf(f, "\n");\r
6569 fflush(f);\r
6570}\r
14b0e578 6571\r
14b0e578
CS
6572static void\r
6573print_tree(FILE* f, Node* node)\r
6574{\r
6575 print_indent_tree(f, node, 0);\r
6576}\r
6577#endif\r