]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regcomp.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regcomp.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b26691c4 5 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
14b0e578
CS
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regparse.h"\r
31\r
b26691c4
LG
32#define OPS_INIT_SIZE 8\r
33\r
14b0e578
CS
34OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;\r
35\r
b602265d
DG
36#if 0\r
37typedef struct {\r
38 int n;\r
39 int alloc;\r
40 int* v;\r
41} int_stack;\r
42\r
43static int\r
44make_int_stack(int_stack** rs, int init_size)\r
45{\r
46 int_stack* s;\r
47 int* v;\r
48\r
49 *rs = 0;\r
50\r
51 s = xmalloc(sizeof(*s));\r
52 if (IS_NULL(s)) return ONIGERR_MEMORY;\r
53\r
54 v = (int* )xmalloc(sizeof(int) * init_size);\r
55 if (IS_NULL(v)) {\r
56 xfree(s);\r
57 return ONIGERR_MEMORY;\r
58 }\r
59\r
60 s->n = 0;\r
61 s->alloc = init_size;\r
62 s->v = v;\r
63\r
64 *rs = s;\r
65 return ONIG_NORMAL;\r
66}\r
67\r
68static void\r
69free_int_stack(int_stack* s)\r
70{\r
71 if (IS_NOT_NULL(s)) {\r
72 if (IS_NOT_NULL(s->v))\r
73 xfree(s->v);\r
74 xfree(s);\r
75 }\r
76}\r
77\r
78static int\r
79int_stack_push(int_stack* s, int v)\r
80{\r
81 if (s->n >= s->alloc) {\r
82 int new_size = s->alloc * 2;\r
b26691c4 83 int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size, sizeof(int) * s->alloc);\r
b602265d
DG
84 if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
85\r
86 s->alloc = new_size;\r
87 s->v = nv;\r
88 }\r
89\r
90 s->v[s->n] = v;\r
91 s->n++;\r
92 return ONIG_NORMAL;\r
93}\r
94\r
95static int\r
96int_stack_pop(int_stack* s)\r
97{\r
98 int v;\r
99\r
100#ifdef ONIG_DEBUG\r
101 if (s->n <= 0) {\r
102 fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);\r
103 return 0;\r
104 }\r
105#endif\r
106\r
107 v = s->v[s->n];\r
108 s->n--;\r
109 return v;\r
110}\r
111#endif\r
112\r
b26691c4
LG
113static int\r
114ops_init(regex_t* reg, int init_alloc_size)\r
115{\r
116 Operation* p;\r
117 size_t size;\r
118\r
119 if (init_alloc_size > 0) {\r
120 size = sizeof(Operation) * init_alloc_size;\r
121 p = (Operation* )xmalloc(size);\r
122 CHECK_NULL_RETURN_MEMERR(p);\r
123#ifdef USE_DIRECT_THREADED_CODE\r
124 {\r
125 enum OpCode* cp;\r
126 size = sizeof(enum OpCode) * init_alloc_size;\r
127 cp = (enum OpCode* )xmalloc(size);\r
128 CHECK_NULL_RETURN_MEMERR(cp);\r
129 reg->ocs = cp;\r
130 }\r
131#endif\r
132 }\r
133 else {\r
134 p = (Operation* )0;\r
135#ifdef USE_DIRECT_THREADED_CODE\r
136 reg->ocs = (enum OpCode* )0;\r
137#endif\r
138 }\r
139\r
140 reg->ops = p;\r
141 reg->ops_curr = 0; /* !!! not yet done ops_new() */\r
142 reg->ops_alloc = init_alloc_size;\r
143 reg->ops_used = 0;\r
144\r
145 return ONIG_NORMAL;\r
146}\r
147\r
148static int\r
149ops_expand(regex_t* reg, int n)\r
150{\r
151#define MIN_OPS_EXPAND_SIZE 4\r
152\r
153#ifdef USE_DIRECT_THREADED_CODE\r
154 enum OpCode* cp;\r
155#endif\r
156 Operation* p;\r
157 size_t size;\r
158\r
159 if (n <= 0) n = MIN_OPS_EXPAND_SIZE;\r
160\r
161 n += reg->ops_alloc;\r
162\r
163 size = sizeof(Operation) * n;\r
164 p = (Operation* )xrealloc(reg->ops, size, sizeof(Operation) * reg->ops_alloc);\r
165 CHECK_NULL_RETURN_MEMERR(p);\r
166\r
167#ifdef USE_DIRECT_THREADED_CODE\r
168 size = sizeof(enum OpCode) * n;\r
169 cp = (enum OpCode* )xrealloc(reg->ocs, size, sizeof(enum OpCode) * reg->ops_alloc);\r
170 CHECK_NULL_RETURN_MEMERR(cp);\r
171 reg->ocs = cp;\r
172#endif\r
173\r
174 reg->ops = p;\r
175 reg->ops_alloc = n;\r
176 if (reg->ops_used == 0)\r
177 reg->ops_curr = 0;\r
178 else\r
179 reg->ops_curr = reg->ops + (reg->ops_used - 1);\r
180\r
181 return ONIG_NORMAL;\r
182}\r
183\r
184static int\r
185ops_new(regex_t* reg)\r
186{\r
187 int r;\r
188\r
189 if (reg->ops_used >= reg->ops_alloc) {\r
190 r = ops_expand(reg, reg->ops_alloc);\r
191 if (r != ONIG_NORMAL) return r;\r
192 }\r
193\r
194 reg->ops_curr = reg->ops + reg->ops_used;\r
195 reg->ops_used++;\r
196\r
197 xmemset(reg->ops_curr, 0, sizeof(Operation));\r
198 return ONIG_NORMAL;\r
199}\r
200\r
201static int\r
202is_in_string_pool(regex_t* reg, UChar* s)\r
203{\r
204 return (s >= reg->string_pool && s < reg->string_pool_end);\r
205}\r
206\r
207static void\r
208ops_free(regex_t* reg)\r
209{\r
210 int i;\r
211\r
212 if (IS_NULL(reg->ops)) return ;\r
213\r
214 for (i = 0; i < (int )reg->ops_used; i++) {\r
215 enum OpCode opcode;\r
216 Operation* op;\r
217\r
218 op = reg->ops + i;\r
219\r
220#ifdef USE_DIRECT_THREADED_CODE\r
221 opcode = *(reg->ocs + i);\r
222#else\r
223 opcode = op->opcode;\r
224#endif\r
225\r
226 switch (opcode) {\r
227 case OP_EXACTMBN:\r
228 if (! is_in_string_pool(reg, op->exact_len_n.s))\r
229 xfree(op->exact_len_n.s);\r
230 break;\r
231 case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:\r
232 if (! is_in_string_pool(reg, op->exact_n.s))\r
233 xfree(op->exact_n.s);\r
234 break;\r
235 case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:\r
236 case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:\r
237 case OP_EXACTMB2N3: case OP_EXACT1_IC:\r
238 break;\r
239\r
240 case OP_CCLASS_NOT: case OP_CCLASS:\r
241 xfree(op->cclass.bsp);\r
242 break;\r
243\r
244 case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:\r
245 xfree(op->cclass_mb.mb);\r
246 break;\r
247 case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:\r
248 xfree(op->cclass_mix.mb);\r
249 xfree(op->cclass_mix.bsp);\r
250 break;\r
251\r
252 case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:\r
253 break;\r
254 case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC:\r
255 case OP_BACKREF_WITH_LEVEL:\r
256 case OP_BACKREF_WITH_LEVEL_IC:\r
257 case OP_BACKREF_CHECK:\r
258 case OP_BACKREF_CHECK_WITH_LEVEL:\r
259 if (op->backref_general.num != 1)\r
260 xfree(op->backref_general.ns);\r
261 break;\r
262\r
263 default:\r
264 break;\r
265 }\r
266 }\r
267\r
268 xfree(reg->ops);\r
269#ifdef USE_DIRECT_THREADED_CODE\r
270 xfree(reg->ocs);\r
271 reg->ocs = 0;\r
272#endif\r
273\r
274 reg->ops = 0;\r
275 reg->ops_curr = 0;\r
276 reg->ops_alloc = 0;\r
277 reg->ops_used = 0;\r
278}\r
279\r
280static int\r
281ops_calc_size_of_string_pool(regex_t* reg)\r
282{\r
283 int i;\r
284 int total;\r
285\r
286 if (IS_NULL(reg->ops)) return 0;\r
287\r
288 total = 0;\r
289 for (i = 0; i < (int )reg->ops_used; i++) {\r
290 enum OpCode opcode;\r
291 Operation* op;\r
292\r
293 op = reg->ops + i;\r
294#ifdef USE_DIRECT_THREADED_CODE\r
295 opcode = *(reg->ocs + i);\r
296#else\r
297 opcode = op->opcode;\r
298#endif\r
299\r
300 switch (opcode) {\r
301 case OP_EXACTMBN:\r
302 total += op->exact_len_n.len * op->exact_len_n.n;\r
303 break;\r
304 case OP_EXACTN:\r
305 case OP_EXACTN_IC:\r
306 total += op->exact_n.n;\r
307 break;\r
308 case OP_EXACTMB2N:\r
309 total += op->exact_n.n * 2;\r
310 break;\r
311 case OP_EXACTMB3N:\r
312 total += op->exact_n.n * 3;\r
313 break;\r
314\r
315 default:\r
316 break;\r
317 }\r
318 }\r
319\r
320 return total;\r
321}\r
322\r
323static int\r
324ops_make_string_pool(regex_t* reg)\r
325{\r
326 int i;\r
327 int len;\r
328 int size;\r
329 UChar* pool;\r
330 UChar* curr;\r
331\r
332 size = ops_calc_size_of_string_pool(reg);\r
333 if (size <= 0) {\r
334 return 0;\r
335 }\r
336\r
337 curr = pool = (UChar* )xmalloc((size_t )size);\r
338 CHECK_NULL_RETURN_MEMERR(pool);\r
339\r
340 for (i = 0; i < (int )reg->ops_used; i++) {\r
341 enum OpCode opcode;\r
342 Operation* op;\r
343\r
344 op = reg->ops + i;\r
345#ifdef USE_DIRECT_THREADED_CODE\r
346 opcode = *(reg->ocs + i);\r
347#else\r
348 opcode = op->opcode;\r
349#endif\r
350\r
351 switch (opcode) {\r
352 case OP_EXACTMBN:\r
353 len = op->exact_len_n.len * op->exact_len_n.n;\r
354 xmemcpy(curr, op->exact_len_n.s, len);\r
355 xfree(op->exact_len_n.s);\r
356 op->exact_len_n.s = curr;\r
357 curr += len;\r
358 break;\r
359 case OP_EXACTN:\r
360 case OP_EXACTN_IC:\r
361 len = op->exact_n.n;\r
362 copy:\r
363 xmemcpy(curr, op->exact_n.s, len);\r
364 xfree(op->exact_n.s);\r
365 op->exact_n.s = curr;\r
366 curr += len;\r
367 break;\r
368 case OP_EXACTMB2N:\r
369 len = op->exact_n.n * 2;\r
370 goto copy;\r
371 break;\r
372 case OP_EXACTMB3N:\r
373 len = op->exact_n.n * 3;\r
374 goto copy;\r
375 break;\r
376\r
377 default:\r
378 break;\r
379 }\r
380 }\r
381\r
382 reg->string_pool = pool;\r
383 reg->string_pool_end = pool + size;\r
384 return 0;\r
385}\r
386\r
14b0e578
CS
387extern OnigCaseFoldType\r
388onig_get_default_case_fold_flag(void)\r
389{\r
390 return OnigDefaultCaseFoldFlag;\r
391}\r
392\r
393extern int\r
394onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)\r
395{\r
396 OnigDefaultCaseFoldFlag = case_fold_flag;\r
397 return 0;\r
398}\r
399\r
b602265d
DG
400static int\r
401int_multiply_cmp(int x, int y, int v)\r
14b0e578 402{\r
b602265d 403 if (x == 0 || y == 0) return -1;\r
14b0e578 404\r
b602265d
DG
405 if (x < INT_MAX / y) {\r
406 int xy = x * y;\r
407 if (xy > v) return 1;\r
408 else {\r
409 if (xy == v) return 0;\r
410 else return -1;\r
411 }\r
14b0e578 412 }\r
b602265d
DG
413 else\r
414 return 1;\r
14b0e578
CS
415}\r
416\r
b26691c4
LG
417extern int\r
418onig_positive_int_multiply(int x, int y)\r
419{\r
420 if (x == 0 || y == 0) return 0;\r
421\r
422 if (x < INT_MAX / y)\r
423 return x * y;\r
424 else\r
425 return -1;\r
426}\r
b602265d 427\r
b602265d 428\r
14b0e578
CS
429static void\r
430swap_node(Node* a, Node* b)\r
431{\r
432 Node c;\r
14b0e578 433\r
b602265d
DG
434 c = *a; *a = *b; *b = c;\r
435\r
436 if (NODE_TYPE(a) == NODE_STRING) {\r
437 StrNode* sn = STR_(a);\r
b26691c4 438 if (sn->capacity == 0) {\r
b602265d 439 int len = (int )(sn->end - sn->s);\r
14b0e578
CS
440 sn->s = sn->buf;\r
441 sn->end = sn->s + len;\r
442 }\r
443 }\r
444\r
b602265d
DG
445 if (NODE_TYPE(b) == NODE_STRING) {\r
446 StrNode* sn = STR_(b);\r
b26691c4 447 if (sn->capacity == 0) {\r
b602265d 448 int len = (int )(sn->end - sn->s);\r
14b0e578
CS
449 sn->s = sn->buf;\r
450 sn->end = sn->s + len;\r
451 }\r
452 }\r
453}\r
454\r
b602265d
DG
455static OnigLen\r
456distance_add(OnigLen d1, OnigLen d2)\r
14b0e578 457{\r
b602265d
DG
458 if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)\r
459 return INFINITE_LEN;\r
14b0e578 460 else {\r
b602265d
DG
461 if (d1 <= INFINITE_LEN - d2) return d1 + d2;\r
462 else return INFINITE_LEN;\r
14b0e578
CS
463 }\r
464}\r
465\r
b602265d
DG
466static OnigLen\r
467distance_multiply(OnigLen d, int m)\r
14b0e578
CS
468{\r
469 if (m == 0) return 0;\r
470\r
b602265d 471 if (d < INFINITE_LEN / m)\r
14b0e578
CS
472 return d * m;\r
473 else\r
b602265d 474 return INFINITE_LEN;\r
14b0e578
CS
475}\r
476\r
477static int\r
478bitset_is_empty(BitSetRef bs)\r
479{\r
480 int i;\r
b602265d 481\r
14b0e578
CS
482 for (i = 0; i < (int )BITSET_SIZE; i++) {\r
483 if (bs[i] != 0) return 0;\r
484 }\r
485 return 1;\r
486}\r
487\r
b602265d 488#ifdef USE_CALL\r
14b0e578
CS
489\r
490static int\r
b602265d 491unset_addr_list_init(UnsetAddrList* list, int size)\r
14b0e578 492{\r
b602265d 493 UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);\r
14b0e578 494 CHECK_NULL_RETURN_MEMERR(p);\r
b602265d
DG
495\r
496 list->num = 0;\r
497 list->alloc = size;\r
498 list->us = p;\r
14b0e578
CS
499 return 0;\r
500}\r
501\r
502static void\r
b602265d 503unset_addr_list_end(UnsetAddrList* list)\r
14b0e578 504{\r
b602265d
DG
505 if (IS_NOT_NULL(list->us))\r
506 xfree(list->us);\r
14b0e578
CS
507}\r
508\r
509static int\r
b602265d 510unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)\r
14b0e578
CS
511{\r
512 UnsetAddr* p;\r
513 int size;\r
514\r
b602265d
DG
515 if (list->num >= list->alloc) {\r
516 size = list->alloc * 2;\r
517 p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);\r
14b0e578 518 CHECK_NULL_RETURN_MEMERR(p);\r
b602265d
DG
519 list->alloc = size;\r
520 list->us = p;\r
14b0e578
CS
521 }\r
522\r
b602265d
DG
523 list->us[list->num].offset = offset;\r
524 list->us[list->num].target = node;\r
525 list->num++;\r
14b0e578
CS
526 return 0;\r
527}\r
b602265d 528#endif /* USE_CALL */\r
14b0e578
CS
529\r
530\r
531static int\r
b26691c4 532add_op(regex_t* reg, int opcode)\r
14b0e578
CS
533{\r
534 int r;\r
535\r
b26691c4
LG
536 r = ops_new(reg);\r
537 if (r != ONIG_NORMAL) return r;\r
14b0e578 538\r
b26691c4
LG
539#ifdef USE_DIRECT_THREADED_CODE\r
540 *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;\r
541#else\r
542 reg->ops_curr->opcode = opcode;\r
543#endif\r
b602265d 544\r
b602265d
DG
545 return 0;\r
546}\r
547\r
14b0e578 548static int compile_length_tree(Node* node, regex_t* reg);\r
b602265d 549static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);\r
14b0e578
CS
550\r
551\r
552#define IS_NEED_STR_LEN_OP_EXACT(op) \\r
553 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\\r
554 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)\r
555\r
556static int\r
557select_str_opcode(int mb_len, int str_len, int ignore_case)\r
558{\r
559 int op;\r
560\r
561 if (ignore_case) {\r
562 switch (str_len) {\r
563 case 1: op = OP_EXACT1_IC; break;\r
564 default: op = OP_EXACTN_IC; break;\r
565 }\r
566 }\r
567 else {\r
568 switch (mb_len) {\r
569 case 1:\r
570 switch (str_len) {\r
571 case 1: op = OP_EXACT1; break;\r
572 case 2: op = OP_EXACT2; break;\r
573 case 3: op = OP_EXACT3; break;\r
574 case 4: op = OP_EXACT4; break;\r
575 case 5: op = OP_EXACT5; break;\r
576 default: op = OP_EXACTN; break;\r
577 }\r
578 break;\r
579\r
580 case 2:\r
581 switch (str_len) {\r
582 case 1: op = OP_EXACTMB2N1; break;\r
583 case 2: op = OP_EXACTMB2N2; break;\r
584 case 3: op = OP_EXACTMB2N3; break;\r
585 default: op = OP_EXACTMB2N; break;\r
586 }\r
587 break;\r
588\r
589 case 3:\r
590 op = OP_EXACTMB3N;\r
591 break;\r
592\r
593 default:\r
594 op = OP_EXACTMBN;\r
595 break;\r
596 }\r
597 }\r
598 return op;\r
599}\r
600\r
601static int\r
b26691c4
LG
602is_strict_real_node(Node* node)\r
603{\r
604 switch (NODE_TYPE(node)) {\r
605 case NODE_STRING:\r
606 {\r
607 StrNode* sn = STR_(node);\r
608 return (sn->end != sn->s);\r
609 }\r
610 break;\r
611\r
612 case NODE_CCLASS:\r
613 case NODE_CTYPE:\r
614 return 1;\r
615 break;\r
616\r
617 default:\r
618 return 0;\r
619 break;\r
620 }\r
621}\r
622\r
623static int\r
624compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env)\r
14b0e578
CS
625{\r
626 int r;\r
627 int saved_num_null_check = reg->num_null_check;\r
628\r
b26691c4
LG
629 if (emptiness != BODY_IS_NOT_EMPTY) {\r
630 r = add_op(reg, OP_EMPTY_CHECK_START);\r
b602265d 631 if (r != 0) return r;\r
b26691c4 632 COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */\r
14b0e578
CS
633 reg->num_null_check++;\r
634 }\r
635\r
b602265d
DG
636 r = compile_tree(node, reg, env);\r
637 if (r != 0) return r;\r
14b0e578 638\r
b26691c4
LG
639 if (emptiness != BODY_IS_NOT_EMPTY) {\r
640 if (emptiness == BODY_IS_EMPTY_POSSIBILITY)\r
641 r = add_op(reg, OP_EMPTY_CHECK_END);\r
642 else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM)\r
643 r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);\r
644 else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)\r
645 r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);\r
14b0e578 646\r
b602265d 647 if (r != 0) return r;\r
b26691c4 648 COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */\r
14b0e578
CS
649 }\r
650 return r;\r
651}\r
652\r
b602265d 653#ifdef USE_CALL\r
14b0e578 654static int\r
b602265d 655compile_call(CallNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
656{\r
657 int r;\r
b26691c4 658 int offset;\r
14b0e578 659\r
b26691c4 660 r = add_op(reg, OP_CALL);\r
b602265d 661 if (r != 0) return r;\r
b26691c4
LG
662\r
663 COP(reg)->call.addr = 0; /* dummy addr. */\r
664\r
665 offset = COP_CURR_OFFSET_BYTES(reg, call.addr);\r
666 r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node));\r
14b0e578
CS
667 return r;\r
668}\r
669#endif\r
670\r
671static int\r
b602265d 672compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
673{\r
674 int i, r;\r
675\r
676 for (i = 0; i < n; i++) {\r
b602265d
DG
677 r = compile_tree(node, reg, env);\r
678 if (r != 0) return r;\r
14b0e578
CS
679 }\r
680 return 0;\r
681}\r
682\r
683static int\r
684add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,\r
685 regex_t* reg ARG_UNUSED, int ignore_case)\r
686{\r
b26691c4 687 return 1;\r
14b0e578
CS
688}\r
689\r
690static int\r
691add_compile_string(UChar* s, int mb_len, int str_len,\r
692 regex_t* reg, int ignore_case)\r
693{\r
b26691c4
LG
694 int op;\r
695 int r;\r
696 int byte_len;\r
697 UChar* p;\r
698 UChar* end;\r
699\r
700 op = select_str_opcode(mb_len, str_len, ignore_case);\r
701 r = add_op(reg, op);\r
702 if (r != 0) return r;\r
703\r
704 byte_len = mb_len * str_len;\r
705 end = s + byte_len;\r
14b0e578 706\r
b26691c4
LG
707 if (op == OP_EXACTMBN) {\r
708 p = onigenc_strdup(reg->enc, s, end);\r
709 CHECK_NULL_RETURN_MEMERR(p);\r
710\r
711 COP(reg)->exact_len_n.len = mb_len;\r
712 COP(reg)->exact_len_n.n = str_len;\r
713 COP(reg)->exact_len_n.s = p;\r
714 }\r
715 else if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
716 p = onigenc_strdup(reg->enc, s, end);\r
717 CHECK_NULL_RETURN_MEMERR(p);\r
14b0e578 718\r
14b0e578 719 if (op == OP_EXACTN_IC)\r
b26691c4 720 COP(reg)->exact_n.n = byte_len;\r
14b0e578 721 else\r
b26691c4
LG
722 COP(reg)->exact_n.n = str_len;\r
723\r
724 COP(reg)->exact_n.s = p;\r
725 }\r
726 else {\r
727 xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);\r
728 COP(reg)->exact.s[byte_len] = '\0';\r
14b0e578
CS
729 }\r
730\r
14b0e578
CS
731 return 0;\r
732}\r
733\r
14b0e578
CS
734static int\r
735compile_length_string_node(Node* node, regex_t* reg)\r
736{\r
737 int rlen, r, len, prev_len, slen, ambig;\r
14b0e578
CS
738 UChar *p, *prev;\r
739 StrNode* sn;\r
b602265d 740 OnigEncoding enc = reg->enc;\r
14b0e578 741\r
b602265d 742 sn = STR_(node);\r
14b0e578
CS
743 if (sn->end <= sn->s)\r
744 return 0;\r
745\r
b602265d 746 ambig = NODE_STRING_IS_AMBIG(node);\r
14b0e578
CS
747\r
748 p = prev = sn->s;\r
749 prev_len = enclen(enc, p);\r
750 p += prev_len;\r
751 slen = 1;\r
752 rlen = 0;\r
753\r
754 for (; p < sn->end; ) {\r
755 len = enclen(enc, p);\r
756 if (len == prev_len) {\r
757 slen++;\r
758 }\r
759 else {\r
760 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
761 rlen += r;\r
762 prev = p;\r
763 slen = 1;\r
764 prev_len = len;\r
765 }\r
766 p += len;\r
767 }\r
b602265d 768\r
14b0e578
CS
769 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
770 rlen += r;\r
771 return rlen;\r
772}\r
773\r
774static int\r
775compile_length_string_raw_node(StrNode* sn, regex_t* reg)\r
776{\r
777 if (sn->end <= sn->s)\r
778 return 0;\r
779\r
b602265d
DG
780 return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),\r
781 reg, 0);\r
14b0e578
CS
782}\r
783\r
784static int\r
785compile_string_node(Node* node, regex_t* reg)\r
786{\r
787 int r, len, prev_len, slen, ambig;\r
14b0e578
CS
788 UChar *p, *prev, *end;\r
789 StrNode* sn;\r
b602265d 790 OnigEncoding enc = reg->enc;\r
14b0e578 791\r
b602265d 792 sn = STR_(node);\r
14b0e578
CS
793 if (sn->end <= sn->s)\r
794 return 0;\r
795\r
796 end = sn->end;\r
b602265d 797 ambig = NODE_STRING_IS_AMBIG(node);\r
14b0e578
CS
798\r
799 p = prev = sn->s;\r
800 prev_len = enclen(enc, p);\r
801 p += prev_len;\r
802 slen = 1;\r
803\r
804 for (; p < end; ) {\r
805 len = enclen(enc, p);\r
806 if (len == prev_len) {\r
807 slen++;\r
808 }\r
809 else {\r
810 r = add_compile_string(prev, prev_len, slen, reg, ambig);\r
b602265d 811 if (r != 0) return r;\r
14b0e578
CS
812\r
813 prev = p;\r
814 slen = 1;\r
815 prev_len = len;\r
816 }\r
817\r
818 p += len;\r
819 }\r
b602265d 820\r
14b0e578
CS
821 return add_compile_string(prev, prev_len, slen, reg, ambig);\r
822}\r
823\r
824static int\r
825compile_string_raw_node(StrNode* sn, regex_t* reg)\r
826{\r
827 if (sn->end <= sn->s)\r
828 return 0;\r
829\r
b602265d 830 return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);\r
14b0e578
CS
831}\r
832\r
b26691c4
LG
833static void*\r
834set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
14b0e578 835{\r
b26691c4
LG
836 size_t len;\r
837 void* p;\r
14b0e578 838\r
b26691c4
LG
839 len = (size_t )mbuf->used;\r
840 p = xmalloc(len);\r
841 if (IS_NULL(p)) return NULL;\r
14b0e578 842\r
b26691c4
LG
843 xmemcpy(p, mbuf->p, len);\r
844 return p;\r
14b0e578
CS
845}\r
846\r
847static int\r
848compile_length_cclass_node(CClassNode* cc, regex_t* reg)\r
849{\r
b26691c4 850 return 1;\r
14b0e578
CS
851}\r
852\r
853static int\r
854compile_cclass_node(CClassNode* cc, regex_t* reg)\r
855{\r
856 int r;\r
857\r
14b0e578 858 if (IS_NULL(cc->mbuf)) {\r
b26691c4
LG
859 r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);\r
860 if (r != 0) return r;\r
14b0e578 861\r
b26691c4
LG
862 COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);\r
863 CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);\r
864 xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);\r
14b0e578
CS
865 }\r
866 else {\r
b26691c4
LG
867 void* p;\r
868\r
14b0e578 869 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
b26691c4
LG
870 r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);\r
871 if (r != 0) return r;\r
14b0e578 872\r
b26691c4
LG
873 p = set_multi_byte_cclass(cc->mbuf, reg);\r
874 CHECK_NULL_RETURN_MEMERR(p);\r
875 COP(reg)->cclass_mb.mb = p;\r
14b0e578
CS
876 }\r
877 else {\r
b26691c4 878 r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);\r
b602265d 879 if (r != 0) return r;\r
b26691c4
LG
880\r
881 COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);\r
882 CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);\r
883 xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);\r
884\r
885 p = set_multi_byte_cclass(cc->mbuf, reg);\r
886 CHECK_NULL_RETURN_MEMERR(p);\r
887 COP(reg)->cclass_mix.mb = p;\r
14b0e578
CS
888 }\r
889 }\r
890\r
b26691c4 891 return 0;\r
14b0e578
CS
892}\r
893\r
894static int\r
895entry_repeat_range(regex_t* reg, int id, int lower, int upper)\r
896{\r
897#define REPEAT_RANGE_ALLOC 4\r
898\r
899 OnigRepeatRange* p;\r
900\r
901 if (reg->repeat_range_alloc == 0) {\r
902 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);\r
903 CHECK_NULL_RETURN_MEMERR(p);\r
904 reg->repeat_range = p;\r
905 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;\r
906 }\r
907 else if (reg->repeat_range_alloc <= id) {\r
908 int n;\r
909 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;\r
910 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,\r
911 sizeof(OnigRepeatRange) * n,\r
912 sizeof(OnigRepeatRange) * reg->repeat_range_alloc);\r
913 CHECK_NULL_RETURN_MEMERR(p);\r
914 reg->repeat_range = p;\r
915 reg->repeat_range_alloc = n;\r
916 }\r
917 else {\r
918 p = reg->repeat_range;\r
919 }\r
920\r
921 p[id].lower = lower;\r
b26691c4 922 p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);\r
14b0e578
CS
923 return 0;\r
924}\r
925\r
926static int\r
b26691c4 927compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,\r
b602265d 928 regex_t* reg, ScanEnv* env)\r
14b0e578
CS
929{\r
930 int r;\r
b26691c4 931 int num_repeat = reg->num_repeat++;\r
14b0e578 932\r
b26691c4 933 r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
b602265d 934 if (r != 0) return r;\r
14b0e578 935\r
b26691c4
LG
936 COP(reg)->repeat.id = num_repeat;\r
937 COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;\r
938\r
14b0e578 939 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);\r
b602265d 940 if (r != 0) return r;\r
14b0e578 941\r
b26691c4 942 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
b602265d 943 if (r != 0) return r;\r
14b0e578
CS
944\r
945 if (\r
b602265d
DG
946#ifdef USE_CALL\r
947 NODE_IS_IN_MULTI_ENTRY(qn) ||\r
14b0e578 948#endif\r
b602265d 949 NODE_IS_IN_REAL_REPEAT(qn)) {\r
b26691c4 950 r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
14b0e578
CS
951 }\r
952 else {\r
b26691c4 953 r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
14b0e578 954 }\r
b602265d 955 if (r != 0) return r;\r
b26691c4
LG
956\r
957 COP(reg)->repeat_inc.id = num_repeat;\r
14b0e578
CS
958 return r;\r
959}\r
960\r
961static int\r
b602265d 962is_anychar_infinite_greedy(QuantNode* qn)\r
14b0e578 963{\r
b26691c4 964 if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&\r
b602265d 965 NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))\r
14b0e578
CS
966 return 1;\r
967 else\r
968 return 0;\r
969}\r
970\r
b26691c4 971#define QUANTIFIER_EXPAND_LIMIT_SIZE 10\r
14b0e578
CS
972#define CKN_ON (ckn > 0)\r
973\r
14b0e578 974static int\r
b602265d 975compile_length_quantifier_node(QuantNode* qn, regex_t* reg)\r
14b0e578
CS
976{\r
977 int len, mod_tlen;\r
b26691c4
LG
978 int infinite = IS_INFINITE_REPEAT(qn->upper);\r
979 enum BodyEmptyType emptiness = qn->emptiness;\r
b602265d 980 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
981\r
982 if (tlen < 0) return tlen;\r
b602265d 983 if (tlen == 0) return 0;\r
14b0e578
CS
984\r
985 /* anychar repeat */\r
b602265d
DG
986 if (is_anychar_infinite_greedy(qn)) {\r
987 if (qn->lower <= 1 ||\r
988 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {\r
14b0e578
CS
989 if (IS_NOT_NULL(qn->next_head_exact))\r
990 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;\r
991 else\r
992 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;\r
993 }\r
994 }\r
995\r
b26691c4
LG
996 mod_tlen = tlen;\r
997 if (emptiness != BODY_IS_NOT_EMPTY)\r
998 mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;\r
14b0e578
CS
999\r
1000 if (infinite &&\r
b602265d
DG
1001 (qn->lower <= 1 ||\r
1002 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
1003 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
1004 len = SIZE_OP_JUMP;\r
1005 }\r
1006 else {\r
1007 len = tlen * qn->lower;\r
1008 }\r
1009\r
1010 if (qn->greedy) {\r
b26691c4 1011#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
14b0e578 1012 if (IS_NOT_NULL(qn->head_exact))\r
b602265d 1013 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;\r
b26691c4
LG
1014 else\r
1015#endif\r
1016 if (IS_NOT_NULL(qn->next_head_exact))\r
b602265d 1017 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;\r
14b0e578 1018 else\r
b602265d 1019 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;\r
14b0e578
CS
1020 }\r
1021 else\r
1022 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;\r
1023 }\r
b26691c4
LG
1024 else if (qn->upper == 0) {\r
1025 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
1026 len = SIZE_OP_JUMP + tlen;\r
1027 }\r
1028 else\r
1029 len = 0;\r
14b0e578
CS
1030 }\r
1031 else if (!infinite && qn->greedy &&\r
b602265d
DG
1032 (qn->upper == 1 ||\r
1033 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
1034 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
1035 len = tlen * qn->lower;\r
1036 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);\r
1037 }\r
1038 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
1039 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;\r
1040 }\r
1041 else {\r
b26691c4 1042 len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;\r
14b0e578
CS
1043 }\r
1044\r
1045 return len;\r
1046}\r
1047\r
1048static int\r
b602265d 1049compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
1050{\r
1051 int i, r, mod_tlen;\r
b26691c4
LG
1052 int infinite = IS_INFINITE_REPEAT(qn->upper);\r
1053 enum BodyEmptyType emptiness = qn->emptiness;\r
b602265d 1054 int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
1055\r
1056 if (tlen < 0) return tlen;\r
b602265d 1057 if (tlen == 0) return 0;\r
14b0e578 1058\r
b602265d
DG
1059 if (is_anychar_infinite_greedy(qn) &&\r
1060 (qn->lower <= 1 ||\r
1061 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
1062 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
1063 if (r != 0) return r;\r
14b0e578 1064 if (IS_NOT_NULL(qn->next_head_exact)) {\r
b26691c4
LG
1065 r = add_op(reg,\r
1066 IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?\r
1067 OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);\r
b602265d 1068 if (r != 0) return r;\r
b26691c4
LG
1069\r
1070 COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];\r
1071 return 0;\r
14b0e578
CS
1072 }\r
1073 else {\r
b26691c4
LG
1074 r = add_op(reg,\r
1075 IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?\r
1076 OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);\r
1077 return r;\r
14b0e578
CS
1078 }\r
1079 }\r
1080\r
b26691c4
LG
1081 mod_tlen = tlen;\r
1082 if (emptiness != BODY_IS_NOT_EMPTY)\r
1083 mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;\r
14b0e578
CS
1084\r
1085 if (infinite &&\r
b602265d
DG
1086 (qn->lower <= 1 ||\r
1087 int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
b26691c4
LG
1088 int addr;\r
1089\r
14b0e578 1090 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
b26691c4
LG
1091 r = add_op(reg, OP_JUMP);\r
1092 if (r != 0) return r;\r
14b0e578 1093 if (qn->greedy) {\r
b26691c4 1094#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
b602265d 1095 if (IS_NOT_NULL(qn->head_exact))\r
b26691c4
LG
1096 COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;\r
1097 else\r
1098#endif\r
1099 if (IS_NOT_NULL(qn->next_head_exact))\r
1100 COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;\r
b602265d 1101 else\r
b26691c4 1102 COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;\r
14b0e578
CS
1103 }\r
1104 else {\r
b26691c4 1105 COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;\r
14b0e578 1106 }\r
14b0e578
CS
1107 }\r
1108 else {\r
b602265d
DG
1109 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
1110 if (r != 0) return r;\r
14b0e578
CS
1111 }\r
1112\r
1113 if (qn->greedy) {\r
b26691c4 1114#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
14b0e578 1115 if (IS_NOT_NULL(qn->head_exact)) {\r
b26691c4 1116 r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);\r
b602265d 1117 if (r != 0) return r;\r
b26691c4
LG
1118 COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
1119 COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];\r
1120\r
1121 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
b602265d 1122 if (r != 0) return r;\r
b26691c4
LG
1123\r
1124 addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
14b0e578 1125 }\r
b26691c4
LG
1126 else\r
1127#endif\r
1128 if (IS_NOT_NULL(qn->next_head_exact)) {\r
1129 r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);\r
b602265d 1130 if (r != 0) return r;\r
b26691c4
LG
1131 COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
1132 COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];\r
1133\r
1134 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
b602265d 1135 if (r != 0) return r;\r
b26691c4
LG
1136\r
1137 addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);\r
14b0e578
CS
1138 }\r
1139 else {\r
b26691c4 1140 r = add_op(reg, OP_PUSH);\r
b602265d 1141 if (r != 0) return r;\r
b26691c4
LG
1142 COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
1143\r
1144 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
b602265d 1145 if (r != 0) return r;\r
b26691c4
LG
1146\r
1147 addr = -(mod_tlen + (int )SIZE_OP_PUSH);\r
14b0e578 1148 }\r
b26691c4
LG
1149\r
1150 r = add_op(reg, OP_JUMP);\r
1151 if (r != 0) return r;\r
1152 COP(reg)->jump.addr = addr;\r
14b0e578
CS
1153 }\r
1154 else {\r
b26691c4 1155 r = add_op(reg, OP_JUMP);\r
b602265d 1156 if (r != 0) return r;\r
b26691c4
LG
1157 COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;\r
1158\r
1159 r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
1160 if (r != 0) return r;\r
1161\r
1162 r = add_op(reg, OP_PUSH);\r
1163 if (r != 0) return r;\r
1164 COP(reg)->push.addr = -mod_tlen;\r
1165 }\r
1166 }\r
1167 else if (qn->upper == 0) {\r
1168 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
1169 r = add_op(reg, OP_JUMP);\r
b602265d 1170 if (r != 0) return r;\r
b26691c4
LG
1171 COP(reg)->jump.addr = tlen + SIZE_INC_OP;\r
1172\r
1173 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
1174 }\r
1175 else {\r
1176 /* Nothing output */\r
1177 r = 0;\r
14b0e578
CS
1178 }\r
1179 }\r
b602265d
DG
1180 else if (! infinite && qn->greedy &&\r
1181 (qn->upper == 1 ||\r
1182 int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
1183 QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
14b0e578
CS
1184 int n = qn->upper - qn->lower;\r
1185\r
b602265d
DG
1186 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
1187 if (r != 0) return r;\r
14b0e578
CS
1188\r
1189 for (i = 0; i < n; i++) {\r
b26691c4
LG
1190 int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);\r
1191 if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
1192\r
1193 r = add_op(reg, OP_PUSH);\r
b602265d 1194 if (r != 0) return r;\r
b26691c4
LG
1195 COP(reg)->push.addr = v;\r
1196\r
b602265d
DG
1197 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
1198 if (r != 0) return r;\r
14b0e578
CS
1199 }\r
1200 }\r
b602265d 1201 else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
b26691c4 1202 r = add_op(reg, OP_PUSH);\r
b602265d 1203 if (r != 0) return r;\r
b26691c4
LG
1204 COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;\r
1205\r
1206 r = add_op(reg, OP_JUMP);\r
b602265d 1207 if (r != 0) return r;\r
b26691c4
LG
1208 COP(reg)->jump.addr = tlen + SIZE_INC_OP;\r
1209\r
b602265d 1210 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
14b0e578
CS
1211 }\r
1212 else {\r
b26691c4 1213 r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);\r
14b0e578
CS
1214 }\r
1215 return r;\r
1216}\r
14b0e578
CS
1217\r
1218static int\r
b26691c4 1219compile_length_option_node(BagNode* node, regex_t* reg)\r
14b0e578
CS
1220{\r
1221 int tlen;\r
1222 OnigOptionType prev = reg->options;\r
1223\r
b602265d 1224 reg->options = node->o.options;\r
b26691c4 1225 tlen = compile_length_tree(NODE_BAG_BODY(node), reg);\r
14b0e578
CS
1226 reg->options = prev;\r
1227\r
b602265d 1228 return tlen;\r
14b0e578
CS
1229}\r
1230\r
1231static int\r
b26691c4 1232compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
1233{\r
1234 int r;\r
1235 OnigOptionType prev = reg->options;\r
1236\r
b602265d 1237 reg->options = node->o.options;\r
b26691c4 1238 r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
14b0e578
CS
1239 reg->options = prev;\r
1240\r
14b0e578
CS
1241 return r;\r
1242}\r
1243\r
1244static int\r
b26691c4 1245compile_length_bag_node(BagNode* node, regex_t* reg)\r
14b0e578
CS
1246{\r
1247 int len;\r
1248 int tlen;\r
fbaab715 1249\r
b26691c4 1250 if (node->type == BAG_OPTION)\r
14b0e578
CS
1251 return compile_length_option_node(node, reg);\r
1252\r
b26691c4
LG
1253 if (NODE_BAG_BODY(node)) {\r
1254 tlen = compile_length_tree(NODE_BAG_BODY(node), reg);\r
14b0e578
CS
1255 if (tlen < 0) return tlen;\r
1256 }\r
1257 else\r
1258 tlen = 0;\r
1259\r
1260 switch (node->type) {\r
b26691c4 1261 case BAG_MEMORY:\r
b602265d
DG
1262#ifdef USE_CALL\r
1263\r
1264 if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
1265 len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1266 return len;\r
1267 }\r
1268\r
1269 if (NODE_IS_CALLED(node)) {\r
14b0e578 1270 len = SIZE_OP_MEMORY_START_PUSH + tlen\r
b602265d
DG
1271 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
1272 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1273 len += (NODE_IS_RECURSION(node)\r
1274 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
14b0e578 1275 else\r
b602265d
DG
1276 len += (NODE_IS_RECURSION(node)\r
1277 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1278 }\r
1279 else if (NODE_IS_RECURSION(node)) {\r
1280 len = SIZE_OP_MEMORY_START_PUSH;\r
1281 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
1282 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);\r
14b0e578
CS
1283 }\r
1284 else\r
1285#endif\r
1286 {\r
b602265d
DG
1287 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
1288 len = SIZE_OP_MEMORY_START_PUSH;\r
14b0e578 1289 else\r
b602265d 1290 len = SIZE_OP_MEMORY_START;\r
14b0e578 1291\r
b602265d
DG
1292 len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
1293 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);\r
14b0e578
CS
1294 }\r
1295 break;\r
1296\r
b26691c4
LG
1297 case BAG_STOP_BACKTRACK:\r
1298 if (NODE_IS_STRICT_REAL_REPEAT(node)) {\r
1299 int v;\r
1300 QuantNode* qn;\r
1301\r
1302 qn = QUANT_(NODE_BAG_BODY(node));\r
b602265d 1303 tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
1304 if (tlen < 0) return tlen;\r
1305\r
b26691c4
LG
1306 v = onig_positive_int_multiply(qn->lower, tlen);\r
1307 if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
1308 len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
14b0e578
CS
1309 }\r
1310 else {\r
b602265d 1311 len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;\r
14b0e578
CS
1312 }\r
1313 break;\r
1314\r
b26691c4 1315 case BAG_IF_ELSE:\r
b602265d 1316 {\r
b26691c4 1317 Node* cond = NODE_BAG_BODY(node);\r
b602265d
DG
1318 Node* Then = node->te.Then;\r
1319 Node* Else = node->te.Else;\r
14b0e578 1320\r
b602265d
DG
1321 len = compile_length_tree(cond, reg);\r
1322 if (len < 0) return len;\r
1323 len += SIZE_OP_PUSH;\r
1324 len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;\r
1325\r
1326 if (IS_NOT_NULL(Then)) {\r
1327 tlen = compile_length_tree(Then, reg);\r
1328 if (tlen < 0) return tlen;\r
1329 len += tlen;\r
1330 }\r
1331\r
b26691c4
LG
1332 len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END;\r
1333\r
b602265d 1334 if (IS_NOT_NULL(Else)) {\r
b602265d
DG
1335 tlen = compile_length_tree(Else, reg);\r
1336 if (tlen < 0) return tlen;\r
1337 len += tlen;\r
1338 }\r
1339 }\r
1340 break;\r
1341\r
b26691c4
LG
1342 case BAG_OPTION:\r
1343 /* never come here, but set for escape warning */\r
1344 len = 0;\r
b602265d
DG
1345 break;\r
1346 }\r
1347\r
1348 return len;\r
1349}\r
14b0e578 1350\r
b26691c4 1351static int get_char_len_node(Node* node, regex_t* reg, int* len);\r
14b0e578
CS
1352\r
1353static int\r
b26691c4 1354compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578 1355{\r
b602265d
DG
1356 int r;\r
1357 int len;\r
14b0e578 1358\r
b602265d 1359#ifdef USE_CALL\r
b26691c4
LG
1360 if (NODE_IS_CALLED(node)) {\r
1361 r = add_op(reg, OP_CALL);\r
b602265d 1362 if (r != 0) return r;\r
b26691c4
LG
1363\r
1364 node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;\r
b602265d 1365 NODE_STATUS_ADD(node, ADDR_FIXED);\r
b26691c4 1366 COP(reg)->call.addr = (int )node->m.called_addr;\r
b602265d 1367\r
b26691c4
LG
1368 if (node->m.regnum == 0) {\r
1369 len = compile_length_tree(NODE_BAG_BODY(node), reg);\r
1370 len += SIZE_OP_RETURN;\r
14b0e578 1371\r
b26691c4
LG
1372 r = add_op(reg, OP_JUMP);\r
1373 if (r != 0) return r;\r
1374 COP(reg)->jump.addr = len + SIZE_INC_OP;\r
14b0e578 1375\r
b26691c4
LG
1376 r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
1377 if (r != 0) return r;\r
1378\r
1379 r = add_op(reg, OP_RETURN);\r
1380 return r;\r
1381 }\r
1382 else {\r
1383 len = compile_length_tree(NODE_BAG_BODY(node), reg);\r
1384 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
1385 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
1386 len += (NODE_IS_RECURSION(node)\r
1387 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
1388 else\r
1389 len += (NODE_IS_RECURSION(node)\r
1390 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
1391\r
1392 r = add_op(reg, OP_JUMP);\r
1393 if (r != 0) return r;\r
1394 COP(reg)->jump.addr = len + SIZE_INC_OP;\r
1395 }\r
b602265d 1396 }\r
14b0e578 1397#endif\r
14b0e578 1398\r
b602265d 1399 if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
b26691c4 1400 r = add_op(reg, OP_MEMORY_START_PUSH);\r
b602265d 1401 else\r
b26691c4 1402 r = add_op(reg, OP_MEMORY_START);\r
b602265d 1403 if (r != 0) return r;\r
b26691c4
LG
1404 COP(reg)->memory_start.num = node->m.regnum;\r
1405\r
1406 r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
b602265d
DG
1407 if (r != 0) return r;\r
1408\r
1409#ifdef USE_CALL\r
1410 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
b26691c4
LG
1411 r = add_op(reg, (NODE_IS_RECURSION(node)\r
1412 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
b602265d 1413 else\r
b26691c4 1414 r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
b602265d 1415 if (r != 0) return r;\r
b26691c4
LG
1416 COP(reg)->memory_end.num = node->m.regnum;\r
1417\r
b602265d
DG
1418 if (NODE_IS_CALLED(node)) {\r
1419 if (r != 0) return r;\r
b26691c4 1420 r = add_op(reg, OP_RETURN);\r
b602265d
DG
1421 }\r
1422#else\r
1423 if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
b26691c4 1424 r = add_op(reg, OP_MEMORY_END_PUSH);\r
b602265d 1425 else\r
b26691c4 1426 r = add_op(reg, OP_MEMORY_END);\r
b602265d 1427 if (r != 0) return r;\r
b26691c4 1428 COP(reg)->memory_end.num = node->m.regnum;\r
14b0e578 1429#endif\r
b602265d
DG
1430\r
1431 return r;\r
1432}\r
1433\r
1434static int\r
b26691c4 1435compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
b602265d
DG
1436{\r
1437 int r, len;\r
1438\r
1439 switch (node->type) {\r
b26691c4
LG
1440 case BAG_MEMORY:\r
1441 r = compile_bag_memory_node(node, reg, env);\r
b602265d
DG
1442 break;\r
1443\r
b26691c4 1444 case BAG_OPTION:\r
b602265d 1445 r = compile_option_node(node, reg, env);\r
14b0e578
CS
1446 break;\r
1447\r
b26691c4
LG
1448 case BAG_STOP_BACKTRACK:\r
1449 if (NODE_IS_STRICT_REAL_REPEAT(node)) {\r
1450 QuantNode* qn = QUANT_(NODE_BAG_BODY(node));\r
b602265d
DG
1451 r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
1452 if (r != 0) return r;\r
14b0e578 1453\r
b602265d 1454 len = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
14b0e578
CS
1455 if (len < 0) return len;\r
1456\r
b26691c4 1457 r = add_op(reg, OP_PUSH);\r
b602265d 1458 if (r != 0) return r;\r
b26691c4
LG
1459 COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
1460\r
b602265d
DG
1461 r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
1462 if (r != 0) return r;\r
b26691c4
LG
1463 r = add_op(reg, OP_POP_OUT);\r
1464 if (r != 0) return r;\r
1465\r
1466 r = add_op(reg, OP_JUMP);\r
b602265d 1467 if (r != 0) return r;\r
b26691c4 1468 COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);\r
14b0e578
CS
1469 }\r
1470 else {\r
b26691c4 1471 r = add_op(reg, OP_ATOMIC_START);\r
b602265d 1472 if (r != 0) return r;\r
b26691c4 1473 r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
b602265d 1474 if (r != 0) return r;\r
b26691c4 1475 r = add_op(reg, OP_ATOMIC_END);\r
b602265d
DG
1476 }\r
1477 break;\r
1478\r
b26691c4 1479 case BAG_IF_ELSE:\r
b602265d 1480 {\r
b26691c4
LG
1481 int cond_len, then_len, else_len, jump_len;\r
1482 Node* cond = NODE_BAG_BODY(node);\r
b602265d
DG
1483 Node* Then = node->te.Then;\r
1484 Node* Else = node->te.Else;\r
1485\r
b26691c4 1486 r = add_op(reg, OP_ATOMIC_START);\r
b602265d
DG
1487 if (r != 0) return r;\r
1488\r
1489 cond_len = compile_length_tree(cond, reg);\r
1490 if (cond_len < 0) return cond_len;\r
1491 if (IS_NOT_NULL(Then)) {\r
1492 then_len = compile_length_tree(Then, reg);\r
1493 if (then_len < 0) return then_len;\r
1494 }\r
1495 else\r
1496 then_len = 0;\r
1497\r
b26691c4 1498 jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP;\r
b602265d 1499\r
b26691c4 1500 r = add_op(reg, OP_PUSH);\r
b602265d 1501 if (r != 0) return r;\r
b26691c4
LG
1502 COP(reg)->push.addr = SIZE_INC_OP + jump_len;\r
1503\r
b602265d
DG
1504 r = compile_tree(cond, reg, env);\r
1505 if (r != 0) return r;\r
b26691c4 1506 r = add_op(reg, OP_ATOMIC_END);\r
b602265d
DG
1507 if (r != 0) return r;\r
1508\r
1509 if (IS_NOT_NULL(Then)) {\r
1510 r = compile_tree(Then, reg, env);\r
1511 if (r != 0) return r;\r
1512 }\r
1513\r
1514 if (IS_NOT_NULL(Else)) {\r
b26691c4
LG
1515 else_len = compile_length_tree(Else, reg);\r
1516 if (else_len < 0) return else_len;\r
1517 }\r
1518 else\r
1519 else_len = 0;\r
1520\r
1521 r = add_op(reg, OP_JUMP);\r
1522 if (r != 0) return r;\r
1523 COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP;\r
1524\r
1525 r = add_op(reg, OP_ATOMIC_END);\r
1526 if (r != 0) return r;\r
1527\r
1528 if (IS_NOT_NULL(Else)) {\r
b602265d
DG
1529 r = compile_tree(Else, reg, env);\r
1530 }\r
14b0e578
CS
1531 }\r
1532 break;\r
14b0e578
CS
1533 }\r
1534\r
1535 return r;\r
1536}\r
1537\r
1538static int\r
1539compile_length_anchor_node(AnchorNode* node, regex_t* reg)\r
1540{\r
1541 int len;\r
1542 int tlen = 0;\r
1543\r
b602265d
DG
1544 if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {\r
1545 tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
14b0e578
CS
1546 if (tlen < 0) return tlen;\r
1547 }\r
1548\r
1549 switch (node->type) {\r
b26691c4 1550 case ANCR_PREC_READ:\r
b602265d 1551 len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;\r
14b0e578 1552 break;\r
b26691c4 1553 case ANCR_PREC_READ_NOT:\r
b602265d 1554 len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;\r
14b0e578 1555 break;\r
b26691c4 1556 case ANCR_LOOK_BEHIND:\r
14b0e578
CS
1557 len = SIZE_OP_LOOK_BEHIND + tlen;\r
1558 break;\r
b26691c4 1559 case ANCR_LOOK_BEHIND_NOT:\r
b602265d
DG
1560 len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;\r
1561 break;\r
1562\r
b26691c4
LG
1563 case ANCR_WORD_BOUNDARY:\r
1564 case ANCR_NO_WORD_BOUNDARY:\r
b602265d 1565#ifdef USE_WORD_BEGIN_END\r
b26691c4
LG
1566 case ANCR_WORD_BEGIN:\r
1567 case ANCR_WORD_END:\r
b602265d
DG
1568#endif\r
1569 len = SIZE_OP_WORD_BOUNDARY;\r
1570 break;\r
1571\r
b26691c4
LG
1572 case ANCR_TEXT_SEGMENT_BOUNDARY:\r
1573 case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
b602265d 1574 len = SIZE_OPCODE;\r
14b0e578
CS
1575 break;\r
1576\r
1577 default:\r
1578 len = SIZE_OPCODE;\r
1579 break;\r
1580 }\r
1581\r
1582 return len;\r
1583}\r
1584\r
1585static int\r
b602265d 1586compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)\r
14b0e578
CS
1587{\r
1588 int r, len;\r
b602265d 1589 enum OpCode op;\r
14b0e578
CS
1590\r
1591 switch (node->type) {\r
b26691c4
LG
1592 case ANCR_BEGIN_BUF: r = add_op(reg, OP_BEGIN_BUF); break;\r
1593 case ANCR_END_BUF: r = add_op(reg, OP_END_BUF); break;\r
1594 case ANCR_BEGIN_LINE: r = add_op(reg, OP_BEGIN_LINE); break;\r
1595 case ANCR_END_LINE: r = add_op(reg, OP_END_LINE); break;\r
1596 case ANCR_SEMI_END_BUF: r = add_op(reg, OP_SEMI_END_BUF); break;\r
1597 case ANCR_BEGIN_POSITION: r = add_op(reg, OP_BEGIN_POSITION); break;\r
1598\r
1599 case ANCR_WORD_BOUNDARY:\r
b602265d
DG
1600 op = OP_WORD_BOUNDARY;\r
1601 word:\r
b26691c4 1602 r = add_op(reg, op);\r
b602265d 1603 if (r != 0) return r;\r
b26691c4 1604 COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;\r
b602265d
DG
1605 break;\r
1606\r
b26691c4 1607 case ANCR_NO_WORD_BOUNDARY:\r
b602265d
DG
1608 op = OP_NO_WORD_BOUNDARY; goto word;\r
1609 break;\r
14b0e578 1610#ifdef USE_WORD_BEGIN_END\r
b26691c4 1611 case ANCR_WORD_BEGIN:\r
b602265d
DG
1612 op = OP_WORD_BEGIN; goto word;\r
1613 break;\r
b26691c4 1614 case ANCR_WORD_END:\r
b602265d
DG
1615 op = OP_WORD_END; goto word;\r
1616 break;\r
14b0e578
CS
1617#endif\r
1618\r
b26691c4
LG
1619 case ANCR_TEXT_SEGMENT_BOUNDARY:\r
1620 case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
1621 {\r
1622 enum TextSegmentBoundaryType type;\r
b602265d 1623\r
b26691c4
LG
1624 r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);\r
1625 if (r != 0) return r;\r
1626\r
1627 type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
1628#ifdef USE_UNICODE_WORD_BREAK\r
1629 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_TEXT_SEGMENT_WORD))\r
1630 type = WORD_BOUNDARY;\r
1631#endif\r
1632\r
1633 COP(reg)->text_segment_boundary.type = type;\r
1634 COP(reg)->text_segment_boundary.not =\r
1635 (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);\r
1636 }\r
b602265d
DG
1637 break;\r
1638\r
b26691c4
LG
1639 case ANCR_PREC_READ:\r
1640 r = add_op(reg, OP_PREC_READ_START);\r
b602265d
DG
1641 if (r != 0) return r;\r
1642 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1643 if (r != 0) return r;\r
b26691c4 1644 r = add_op(reg, OP_PREC_READ_END);\r
14b0e578
CS
1645 break;\r
1646\r
b26691c4 1647 case ANCR_PREC_READ_NOT:\r
b602265d 1648 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
14b0e578 1649 if (len < 0) return len;\r
b26691c4
LG
1650\r
1651 r = add_op(reg, OP_PREC_READ_NOT_START);\r
b602265d 1652 if (r != 0) return r;\r
b26691c4 1653 COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;\r
b602265d
DG
1654 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1655 if (r != 0) return r;\r
b26691c4 1656 r = add_op(reg, OP_PREC_READ_NOT_END);\r
14b0e578
CS
1657 break;\r
1658\r
b26691c4 1659 case ANCR_LOOK_BEHIND:\r
14b0e578
CS
1660 {\r
1661 int n;\r
b26691c4 1662 r = add_op(reg, OP_LOOK_BEHIND);\r
b602265d 1663 if (r != 0) return r;\r
14b0e578 1664 if (node->char_len < 0) {\r
b26691c4 1665 r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);\r
b602265d 1666 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
14b0e578
CS
1667 }\r
1668 else\r
b602265d
DG
1669 n = node->char_len;\r
1670\r
b26691c4 1671 COP(reg)->look_behind.len = n;\r
b602265d 1672 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
14b0e578
CS
1673 }\r
1674 break;\r
1675\r
b26691c4 1676 case ANCR_LOOK_BEHIND_NOT:\r
14b0e578
CS
1677 {\r
1678 int n;\r
b602265d
DG
1679\r
1680 len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
b26691c4 1681 r = add_op(reg, OP_LOOK_BEHIND_NOT_START);\r
b602265d 1682 if (r != 0) return r;\r
b26691c4
LG
1683 COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;\r
1684\r
14b0e578 1685 if (node->char_len < 0) {\r
b26691c4 1686 r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);\r
b602265d 1687 if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
14b0e578
CS
1688 }\r
1689 else\r
b602265d 1690 n = node->char_len;\r
b26691c4
LG
1691\r
1692 COP(reg)->look_behind_not_start.len = n;\r
1693\r
b602265d
DG
1694 r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
1695 if (r != 0) return r;\r
b26691c4 1696 r = add_op(reg, OP_LOOK_BEHIND_NOT_END);\r
14b0e578
CS
1697 }\r
1698 break;\r
1699\r
1700 default:\r
1701 return ONIGERR_TYPE_BUG;\r
1702 break;\r
1703 }\r
1704\r
1705 return r;\r
1706}\r
1707\r
b602265d
DG
1708static int\r
1709compile_gimmick_node(GimmickNode* node, regex_t* reg)\r
1710{\r
1711 int r;\r
1712\r
1713 switch (node->type) {\r
1714 case GIMMICK_FAIL:\r
b26691c4 1715 r = add_op(reg, OP_FAIL);\r
b602265d
DG
1716 break;\r
1717\r
1718 case GIMMICK_SAVE:\r
b26691c4 1719 r = add_op(reg, OP_PUSH_SAVE_VAL);\r
b602265d 1720 if (r != 0) return r;\r
b26691c4
LG
1721 COP(reg)->push_save_val.type = node->detail_type;\r
1722 COP(reg)->push_save_val.id = node->id;\r
b602265d
DG
1723 break;\r
1724\r
1725 case GIMMICK_UPDATE_VAR:\r
b26691c4 1726 r = add_op(reg, OP_UPDATE_VAR);\r
b602265d 1727 if (r != 0) return r;\r
b26691c4
LG
1728 COP(reg)->update_var.type = node->detail_type;\r
1729 COP(reg)->update_var.id = node->id;\r
b602265d
DG
1730 break;\r
1731\r
1732#ifdef USE_CALLOUT\r
1733 case GIMMICK_CALLOUT:\r
1734 switch (node->detail_type) {\r
1735 case ONIG_CALLOUT_OF_CONTENTS:\r
1736 case ONIG_CALLOUT_OF_NAME:\r
1737 {\r
b602265d 1738 if (node->detail_type == ONIG_CALLOUT_OF_NAME) {\r
b26691c4 1739 r = add_op(reg, OP_CALLOUT_NAME);\r
b602265d 1740 if (r != 0) return r;\r
b26691c4
LG
1741 COP(reg)->callout_name.id = node->id;\r
1742 COP(reg)->callout_name.num = node->num;\r
1743 }\r
1744 else {\r
1745 r = add_op(reg, OP_CALLOUT_CONTENTS);\r
1746 if (r != 0) return r;\r
1747 COP(reg)->callout_contents.num = node->num;\r
b602265d 1748 }\r
b602265d
DG
1749 }\r
1750 break;\r
1751\r
1752 default:\r
1753 r = ONIGERR_TYPE_BUG;\r
1754 break;\r
1755 }\r
1756#endif\r
1757 }\r
1758\r
1759 return r;\r
1760}\r
1761\r
1762static int\r
1763compile_length_gimmick_node(GimmickNode* node, regex_t* reg)\r
1764{\r
1765 int len;\r
1766\r
1767 switch (node->type) {\r
1768 case GIMMICK_FAIL:\r
1769 len = SIZE_OP_FAIL;\r
1770 break;\r
1771\r
b602265d
DG
1772 case GIMMICK_SAVE:\r
1773 len = SIZE_OP_PUSH_SAVE_VAL;\r
1774 break;\r
1775\r
1776 case GIMMICK_UPDATE_VAR:\r
1777 len = SIZE_OP_UPDATE_VAR;\r
1778 break;\r
1779\r
1780#ifdef USE_CALLOUT\r
1781 case GIMMICK_CALLOUT:\r
1782 switch (node->detail_type) {\r
1783 case ONIG_CALLOUT_OF_CONTENTS:\r
1784 len = SIZE_OP_CALLOUT_CONTENTS;\r
1785 break;\r
1786 case ONIG_CALLOUT_OF_NAME:\r
1787 len = SIZE_OP_CALLOUT_NAME;\r
1788 break;\r
1789\r
1790 default:\r
1791 len = ONIGERR_TYPE_BUG;\r
1792 break;\r
1793 }\r
1794 break;\r
1795#endif\r
1796 }\r
1797\r
1798 return len;\r
1799}\r
1800\r
14b0e578
CS
1801static int\r
1802compile_length_tree(Node* node, regex_t* reg)\r
1803{\r
b602265d 1804 int len, r;\r
14b0e578 1805\r
b602265d
DG
1806 switch (NODE_TYPE(node)) {\r
1807 case NODE_LIST:\r
14b0e578
CS
1808 len = 0;\r
1809 do {\r
b602265d 1810 r = compile_length_tree(NODE_CAR(node), reg);\r
14b0e578
CS
1811 if (r < 0) return r;\r
1812 len += r;\r
b602265d 1813 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1814 r = len;\r
1815 break;\r
1816\r
b602265d 1817 case NODE_ALT:\r
14b0e578
CS
1818 {\r
1819 int n;\r
1820\r
1821 n = r = 0;\r
1822 do {\r
b602265d
DG
1823 r += compile_length_tree(NODE_CAR(node), reg);\r
1824 n++;\r
1825 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1826 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);\r
1827 }\r
1828 break;\r
1829\r
b602265d
DG
1830 case NODE_STRING:\r
1831 if (NODE_STRING_IS_RAW(node))\r
1832 r = compile_length_string_raw_node(STR_(node), reg);\r
14b0e578
CS
1833 else\r
1834 r = compile_length_string_node(node, reg);\r
1835 break;\r
1836\r
b602265d
DG
1837 case NODE_CCLASS:\r
1838 r = compile_length_cclass_node(CCLASS_(node), reg);\r
14b0e578
CS
1839 break;\r
1840\r
b602265d 1841 case NODE_CTYPE:\r
14b0e578
CS
1842 r = SIZE_OPCODE;\r
1843 break;\r
1844\r
b602265d 1845 case NODE_BACKREF:\r
b26691c4 1846 r = SIZE_OP_BACKREF;\r
14b0e578
CS
1847 break;\r
1848\r
b602265d
DG
1849#ifdef USE_CALL\r
1850 case NODE_CALL:\r
14b0e578
CS
1851 r = SIZE_OP_CALL;\r
1852 break;\r
1853#endif\r
1854\r
b602265d
DG
1855 case NODE_QUANT:\r
1856 r = compile_length_quantifier_node(QUANT_(node), reg);\r
1857 break;\r
1858\r
b26691c4
LG
1859 case NODE_BAG:\r
1860 r = compile_length_bag_node(BAG_(node), reg);\r
14b0e578
CS
1861 break;\r
1862\r
b602265d
DG
1863 case NODE_ANCHOR:\r
1864 r = compile_length_anchor_node(ANCHOR_(node), reg);\r
14b0e578
CS
1865 break;\r
1866\r
b602265d
DG
1867 case NODE_GIMMICK:\r
1868 r = compile_length_gimmick_node(GIMMICK_(node), reg);\r
14b0e578
CS
1869 break;\r
1870\r
1871 default:\r
1872 return ONIGERR_TYPE_BUG;\r
1873 break;\r
1874 }\r
1875\r
1876 return r;\r
1877}\r
1878\r
1879static int\r
b602265d 1880compile_tree(Node* node, regex_t* reg, ScanEnv* env)\r
14b0e578 1881{\r
b602265d 1882 int n, len, pos, r = 0;\r
14b0e578 1883\r
b602265d
DG
1884 switch (NODE_TYPE(node)) {\r
1885 case NODE_LIST:\r
14b0e578 1886 do {\r
b602265d
DG
1887 r = compile_tree(NODE_CAR(node), reg, env);\r
1888 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
1889 break;\r
1890\r
b602265d 1891 case NODE_ALT:\r
14b0e578
CS
1892 {\r
1893 Node* x = node;\r
1894 len = 0;\r
1895 do {\r
b602265d
DG
1896 len += compile_length_tree(NODE_CAR(x), reg);\r
1897 if (IS_NOT_NULL(NODE_CDR(x))) {\r
1898 len += SIZE_OP_PUSH + SIZE_OP_JUMP;\r
1899 }\r
1900 } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
b26691c4 1901 pos = COP_CURR_OFFSET(reg) + 1 + len; /* goal position */\r
14b0e578
CS
1902\r
1903 do {\r
b602265d
DG
1904 len = compile_length_tree(NODE_CAR(node), reg);\r
1905 if (IS_NOT_NULL(NODE_CDR(node))) {\r
1906 enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;\r
b26691c4 1907 r = add_op(reg, push);\r
b602265d 1908 if (r != 0) break;\r
b26691c4 1909 COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;\r
b602265d
DG
1910 }\r
1911 r = compile_tree(NODE_CAR(node), reg, env);\r
1912 if (r != 0) break;\r
1913 if (IS_NOT_NULL(NODE_CDR(node))) {\r
b26691c4
LG
1914 len = pos - (COP_CURR_OFFSET(reg) + 1);\r
1915 r = add_op(reg, OP_JUMP);\r
b602265d 1916 if (r != 0) break;\r
b26691c4 1917 COP(reg)->jump.addr = len;\r
b602265d
DG
1918 }\r
1919 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
1920 }\r
1921 break;\r
1922\r
1923 case NODE_STRING:\r
1924 if (NODE_STRING_IS_RAW(node))\r
1925 r = compile_string_raw_node(STR_(node), reg);\r
14b0e578
CS
1926 else\r
1927 r = compile_string_node(node, reg);\r
1928 break;\r
1929\r
b602265d
DG
1930 case NODE_CCLASS:\r
1931 r = compile_cclass_node(CCLASS_(node), reg);\r
14b0e578
CS
1932 break;\r
1933\r
b602265d 1934 case NODE_CTYPE:\r
14b0e578
CS
1935 {\r
1936 int op;\r
1937\r
b602265d
DG
1938 switch (CTYPE_(node)->ctype) {\r
1939 case CTYPE_ANYCHAR:\r
b26691c4
LG
1940 r = add_op(reg, IS_MULTILINE(CTYPE_OPTION(node, reg)) ?\r
1941 OP_ANYCHAR_ML : OP_ANYCHAR);\r
b602265d
DG
1942 break;\r
1943\r
14b0e578 1944 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
1945 if (CTYPE_(node)->ascii_mode == 0) {\r
1946 op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;\r
1947 }\r
1948 else {\r
1949 op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;\r
1950 }\r
b26691c4 1951 r = add_op(reg, op);\r
b602265d
DG
1952 break;\r
1953\r
14b0e578 1954 default:\r
b602265d
DG
1955 return ONIGERR_TYPE_BUG;\r
1956 break;\r
14b0e578 1957 }\r
14b0e578
CS
1958 }\r
1959 break;\r
1960\r
b602265d 1961 case NODE_BACKREF:\r
14b0e578 1962 {\r
b602265d 1963 BackRefNode* br = BACKREF_(node);\r
14b0e578 1964\r
b602265d 1965 if (NODE_IS_CHECKER(node)) {\r
14b0e578 1966#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d 1967 if (NODE_IS_NEST_LEVEL(node)) {\r
b26691c4 1968 r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);\r
b602265d 1969 if (r != 0) return r;\r
b26691c4 1970 COP(reg)->backref_general.nest_level = br->nest_level;\r
b602265d
DG
1971 }\r
1972 else\r
14b0e578 1973#endif\r
b602265d 1974 {\r
b26691c4 1975 r = add_op(reg, OP_BACKREF_CHECK);\r
b602265d
DG
1976 if (r != 0) return r;\r
1977 }\r
b602265d 1978 goto add_bacref_mems;\r
14b0e578
CS
1979 }\r
1980 else {\r
b602265d
DG
1981#ifdef USE_BACKREF_WITH_LEVEL\r
1982 if (NODE_IS_NEST_LEVEL(node)) {\r
b26691c4
LG
1983 if ((reg->options & ONIG_OPTION_IGNORECASE) != 0)\r
1984 r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);\r
1985 else\r
1986 r = add_op(reg, OP_BACKREF_WITH_LEVEL);\r
b602265d 1987\r
b26691c4
LG
1988 if (r != 0) return r;\r
1989 COP(reg)->backref_general.nest_level = br->nest_level;\r
b602265d 1990 goto add_bacref_mems;\r
14b0e578 1991 }\r
b602265d
DG
1992 else\r
1993#endif\r
1994 if (br->back_num == 1) {\r
1995 n = br->back_static[0];\r
1996 if (IS_IGNORECASE(reg->options)) {\r
b26691c4 1997 r = add_op(reg, OP_BACKREF_N_IC);\r
b602265d 1998 if (r != 0) return r;\r
b26691c4 1999 COP(reg)->backref_n.n1 = n;\r
b602265d
DG
2000 }\r
2001 else {\r
2002 switch (n) {\r
b26691c4
LG
2003 case 1: r = add_op(reg, OP_BACKREF1); break;\r
2004 case 2: r = add_op(reg, OP_BACKREF2); break;\r
b602265d 2005 default:\r
b26691c4 2006 r = add_op(reg, OP_BACKREF_N);\r
b602265d 2007 if (r != 0) return r;\r
b26691c4 2008 COP(reg)->backref_n.n1 = n;\r
b602265d
DG
2009 break;\r
2010 }\r
2011 }\r
14b0e578 2012 }\r
b602265d 2013 else {\r
b26691c4 2014 int num;\r
b602265d 2015 int* p;\r
14b0e578 2016\r
b26691c4
LG
2017 r = add_op(reg, IS_IGNORECASE(reg->options) ?\r
2018 OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);\r
b602265d
DG
2019 if (r != 0) return r;\r
2020\r
2021 add_bacref_mems:\r
b26691c4
LG
2022 num = br->back_num;\r
2023 COP(reg)->backref_general.num = num;\r
2024 if (num == 1) {\r
2025 COP(reg)->backref_general.n1 = br->back_static[0];\r
2026 }\r
2027 else {\r
2028 int i, j;\r
2029 MemNumType* ns;\r
2030\r
2031 ns = xmalloc(sizeof(MemNumType) * num);\r
2032 CHECK_NULL_RETURN_MEMERR(ns);\r
2033 COP(reg)->backref_general.ns = ns;\r
2034 p = BACKREFS_P(br);\r
2035 for (i = num - 1, j = 0; i >= 0; i--, j++) {\r
2036 ns[j] = p[i];\r
2037 }\r
b602265d
DG
2038 }\r
2039 }\r
14b0e578
CS
2040 }\r
2041 }\r
2042 break;\r
2043\r
b602265d
DG
2044#ifdef USE_CALL\r
2045 case NODE_CALL:\r
2046 r = compile_call(CALL_(node), reg, env);\r
14b0e578
CS
2047 break;\r
2048#endif\r
2049\r
b602265d
DG
2050 case NODE_QUANT:\r
2051 r = compile_quantifier_node(QUANT_(node), reg, env);\r
14b0e578
CS
2052 break;\r
2053\r
b26691c4
LG
2054 case NODE_BAG:\r
2055 r = compile_bag_node(BAG_(node), reg, env);\r
14b0e578
CS
2056 break;\r
2057\r
b602265d
DG
2058 case NODE_ANCHOR:\r
2059 r = compile_anchor_node(ANCHOR_(node), reg, env);\r
2060 break;\r
2061\r
2062 case NODE_GIMMICK:\r
2063 r = compile_gimmick_node(GIMMICK_(node), reg);\r
14b0e578
CS
2064 break;\r
2065\r
2066 default:\r
2067#ifdef ONIG_DEBUG\r
b602265d 2068 fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
2069#endif\r
2070 break;\r
2071 }\r
2072\r
2073 return r;\r
2074}\r
2075\r
14b0e578
CS
2076static int\r
2077noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)\r
2078{\r
2079 int r = 0;\r
2080 Node* node = *plink;\r
2081\r
b602265d
DG
2082 switch (NODE_TYPE(node)) {\r
2083 case NODE_LIST:\r
2084 case NODE_ALT:\r
14b0e578 2085 do {\r
b602265d
DG
2086 r = noname_disable_map(&(NODE_CAR(node)), map, counter);\r
2087 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2088 break;\r
2089\r
b602265d 2090 case NODE_QUANT:\r
14b0e578 2091 {\r
b602265d 2092 Node** ptarget = &(NODE_BODY(node));\r
14b0e578
CS
2093 Node* old = *ptarget;\r
2094 r = noname_disable_map(ptarget, map, counter);\r
b602265d
DG
2095 if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {\r
2096 onig_reduce_nested_quantifier(node, *ptarget);\r
14b0e578
CS
2097 }\r
2098 }\r
2099 break;\r
2100\r
b26691c4 2101 case NODE_BAG:\r
14b0e578 2102 {\r
b26691c4
LG
2103 BagNode* en = BAG_(node);\r
2104 if (en->type == BAG_MEMORY) {\r
b602265d
DG
2105 if (NODE_IS_NAMED_GROUP(node)) {\r
2106 (*counter)++;\r
2107 map[en->m.regnum].new_val = *counter;\r
2108 en->m.regnum = *counter;\r
2109 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
2110 }\r
2111 else {\r
2112 *plink = NODE_BODY(node);\r
2113 NODE_BODY(node) = NULL_NODE;\r
2114 onig_node_free(node);\r
2115 r = noname_disable_map(plink, map, counter);\r
2116 }\r
2117 }\r
b26691c4
LG
2118 else if (en->type == BAG_IF_ELSE) {\r
2119 r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter);\r
b602265d
DG
2120 if (r != 0) return r;\r
2121 if (IS_NOT_NULL(en->te.Then)) {\r
2122 r = noname_disable_map(&(en->te.Then), map, counter);\r
2123 if (r != 0) return r;\r
2124 }\r
2125 if (IS_NOT_NULL(en->te.Else)) {\r
2126 r = noname_disable_map(&(en->te.Else), map, counter);\r
2127 if (r != 0) return r;\r
2128 }\r
14b0e578
CS
2129 }\r
2130 else\r
b602265d 2131 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
14b0e578
CS
2132 }\r
2133 break;\r
2134\r
b602265d
DG
2135 case NODE_ANCHOR:\r
2136 if (IS_NOT_NULL(NODE_BODY(node)))\r
2137 r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
2138 break;\r
2139\r
14b0e578
CS
2140 default:\r
2141 break;\r
2142 }\r
2143\r
2144 return r;\r
2145}\r
2146\r
2147static int\r
2148renumber_node_backref(Node* node, GroupNumRemap* map)\r
2149{\r
2150 int i, pos, n, old_num;\r
2151 int *backs;\r
b602265d 2152 BackRefNode* bn = BACKREF_(node);\r
14b0e578 2153\r
b602265d 2154 if (! NODE_IS_BY_NAME(node))\r
14b0e578
CS
2155 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
2156\r
2157 old_num = bn->back_num;\r
2158 if (IS_NULL(bn->back_dynamic))\r
2159 backs = bn->back_static;\r
2160 else\r
2161 backs = bn->back_dynamic;\r
2162\r
2163 for (i = 0, pos = 0; i < old_num; i++) {\r
2164 n = map[backs[i]].new_val;\r
2165 if (n > 0) {\r
2166 backs[pos] = n;\r
2167 pos++;\r
2168 }\r
2169 }\r
2170\r
2171 bn->back_num = pos;\r
2172 return 0;\r
2173}\r
2174\r
2175static int\r
2176renumber_by_map(Node* node, GroupNumRemap* map)\r
2177{\r
2178 int r = 0;\r
2179\r
b602265d
DG
2180 switch (NODE_TYPE(node)) {\r
2181 case NODE_LIST:\r
2182 case NODE_ALT:\r
14b0e578 2183 do {\r
b602265d
DG
2184 r = renumber_by_map(NODE_CAR(node), map);\r
2185 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578 2186 break;\r
b602265d
DG
2187\r
2188 case NODE_QUANT:\r
2189 r = renumber_by_map(NODE_BODY(node), map);\r
14b0e578 2190 break;\r
b602265d 2191\r
b26691c4 2192 case NODE_BAG:\r
b602265d 2193 {\r
b26691c4 2194 BagNode* en = BAG_(node);\r
b602265d
DG
2195\r
2196 r = renumber_by_map(NODE_BODY(node), map);\r
2197 if (r != 0) return r;\r
2198\r
b26691c4 2199 if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
2200 if (IS_NOT_NULL(en->te.Then)) {\r
2201 r = renumber_by_map(en->te.Then, map);\r
2202 if (r != 0) return r;\r
2203 }\r
2204 if (IS_NOT_NULL(en->te.Else)) {\r
2205 r = renumber_by_map(en->te.Else, map);\r
2206 if (r != 0) return r;\r
2207 }\r
2208 }\r
2209 }\r
14b0e578
CS
2210 break;\r
2211\r
b602265d 2212 case NODE_BACKREF:\r
14b0e578
CS
2213 r = renumber_node_backref(node, map);\r
2214 break;\r
2215\r
b602265d
DG
2216 case NODE_ANCHOR:\r
2217 if (IS_NOT_NULL(NODE_BODY(node)))\r
2218 r = renumber_by_map(NODE_BODY(node), map);\r
2219 break;\r
2220\r
14b0e578
CS
2221 default:\r
2222 break;\r
2223 }\r
2224\r
2225 return r;\r
2226}\r
2227\r
2228static int\r
2229numbered_ref_check(Node* node)\r
2230{\r
2231 int r = 0;\r
2232\r
b602265d
DG
2233 switch (NODE_TYPE(node)) {\r
2234 case NODE_LIST:\r
2235 case NODE_ALT:\r
14b0e578 2236 do {\r
b602265d
DG
2237 r = numbered_ref_check(NODE_CAR(node));\r
2238 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578 2239 break;\r
b602265d
DG
2240\r
2241 case NODE_ANCHOR:\r
2242 if (IS_NULL(NODE_BODY(node)))\r
2243 break;\r
2244 /* fall */\r
2245 case NODE_QUANT:\r
2246 r = numbered_ref_check(NODE_BODY(node));\r
14b0e578 2247 break;\r
b602265d 2248\r
b26691c4 2249 case NODE_BAG:\r
b602265d 2250 {\r
b26691c4 2251 BagNode* en = BAG_(node);\r
b602265d
DG
2252\r
2253 r = numbered_ref_check(NODE_BODY(node));\r
2254 if (r != 0) return r;\r
2255\r
b26691c4 2256 if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
2257 if (IS_NOT_NULL(en->te.Then)) {\r
2258 r = numbered_ref_check(en->te.Then);\r
2259 if (r != 0) return r;\r
2260 }\r
2261 if (IS_NOT_NULL(en->te.Else)) {\r
2262 r = numbered_ref_check(en->te.Else);\r
2263 if (r != 0) return r;\r
2264 }\r
2265 }\r
2266 }\r
2267\r
14b0e578
CS
2268 break;\r
2269\r
b602265d
DG
2270 case NODE_BACKREF:\r
2271 if (! NODE_IS_BY_NAME(node))\r
14b0e578
CS
2272 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
2273 break;\r
2274\r
2275 default:\r
2276 break;\r
2277 }\r
2278\r
2279 return r;\r
2280}\r
2281\r
2282static int\r
2283disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)\r
2284{\r
2285 int r, i, pos, counter;\r
b602265d
DG
2286 int result;\r
2287 MemStatusType loc;\r
14b0e578
CS
2288 GroupNumRemap* map;\r
2289\r
2290 map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));\r
2291 CHECK_NULL_RETURN_MEMERR(map);\r
2292 for (i = 1; i <= env->num_mem; i++) {\r
2293 map[i].new_val = 0;\r
2294 }\r
2295 counter = 0;\r
2296 r = noname_disable_map(root, map, &counter);\r
2297 if (r != 0) return r;\r
2298\r
2299 r = renumber_by_map(*root, map);\r
2300 if (r != 0) return r;\r
2301\r
2302 for (i = 1, pos = 1; i <= env->num_mem; i++) {\r
2303 if (map[i].new_val > 0) {\r
b602265d 2304 SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];\r
14b0e578
CS
2305 pos++;\r
2306 }\r
2307 }\r
2308\r
2309 loc = env->capture_history;\r
b602265d 2310 MEM_STATUS_CLEAR(env->capture_history);\r
14b0e578 2311 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {\r
b602265d
DG
2312 if (MEM_STATUS_AT(loc, i)) {\r
2313 MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);\r
14b0e578
CS
2314 }\r
2315 }\r
2316\r
2317 env->num_mem = env->num_named;\r
2318 reg->num_mem = env->num_named;\r
b602265d 2319 result = onig_renumber_name_table(reg, map);\r
14b0e578 2320 xfree(map);\r
b602265d 2321 return result;\r
14b0e578 2322}\r
14b0e578 2323\r
b602265d 2324#ifdef USE_CALL\r
14b0e578 2325static int\r
b602265d 2326fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)\r
14b0e578
CS
2327{\r
2328 int i, offset;\r
b26691c4 2329 BagNode* en;\r
14b0e578 2330 AbsAddrType addr;\r
b26691c4 2331 AbsAddrType* paddr;\r
14b0e578
CS
2332\r
2333 for (i = 0; i < uslist->num; i++) {\r
b602265d
DG
2334 if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))\r
2335 return ONIGERR_PARSER_BUG;\r
2336\r
b26691c4 2337 en = BAG_(uslist->us[i].target);\r
b602265d 2338 addr = en->m.called_addr;\r
14b0e578
CS
2339 offset = uslist->us[i].offset;\r
2340\r
b26691c4
LG
2341 paddr = (AbsAddrType* )((char* )reg->ops + offset);\r
2342 *paddr = addr;\r
14b0e578
CS
2343 }\r
2344 return 0;\r
2345}\r
2346#endif\r
2347\r
b602265d
DG
2348\r
2349#define GET_CHAR_LEN_VARLEN -1\r
2350#define GET_CHAR_LEN_TOP_ALT_VARLEN -2\r
2351\r
2352/* fixed size pattern node only */\r
14b0e578 2353static int\r
b26691c4 2354get_char_len_node1(Node* node, regex_t* reg, int* len, int level)\r
14b0e578 2355{\r
b602265d 2356 int tlen;\r
14b0e578
CS
2357 int r = 0;\r
2358\r
b602265d
DG
2359 level++;\r
2360 *len = 0;\r
2361 switch (NODE_TYPE(node)) {\r
2362 case NODE_LIST:\r
2363 do {\r
b26691c4 2364 r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);\r
b602265d
DG
2365 if (r == 0)\r
2366 *len = distance_add(*len, tlen);\r
2367 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2368 break;\r
2369\r
b602265d
DG
2370 case NODE_ALT:\r
2371 {\r
2372 int tlen2;\r
2373 int varlen = 0;\r
2374\r
b26691c4 2375 r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);\r
b602265d 2376 while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {\r
b26691c4 2377 r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level);\r
b602265d
DG
2378 if (r == 0) {\r
2379 if (tlen != tlen2)\r
2380 varlen = 1;\r
2381 }\r
2382 }\r
2383 if (r == 0) {\r
2384 if (varlen != 0) {\r
2385 if (level == 1)\r
2386 r = GET_CHAR_LEN_TOP_ALT_VARLEN;\r
2387 else\r
2388 r = GET_CHAR_LEN_VARLEN;\r
2389 }\r
2390 else\r
2391 *len = tlen;\r
2392 }\r
14b0e578 2393 }\r
14b0e578 2394 break;\r
14b0e578 2395\r
b602265d 2396 case NODE_STRING:\r
14b0e578 2397 {\r
b602265d
DG
2398 StrNode* sn = STR_(node);\r
2399 UChar *s = sn->s;\r
2400\r
2401 while (s < sn->end) {\r
2402 s += enclen(reg->enc, s);\r
2403 (*len)++;\r
14b0e578
CS
2404 }\r
2405 }\r
2406 break;\r
2407\r
b602265d 2408 case NODE_QUANT:\r
14b0e578 2409 {\r
b602265d 2410 QuantNode* qn = QUANT_(node);\r
14b0e578 2411\r
14b0e578 2412 if (qn->lower == qn->upper) {\r
b602265d
DG
2413 if (qn->upper == 0) {\r
2414 *len = 0;\r
2415 }\r
2416 else {\r
b26691c4 2417 r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level);\r
b602265d
DG
2418 if (r == 0)\r
2419 *len = distance_multiply(tlen, qn->lower);\r
2420 }\r
14b0e578
CS
2421 }\r
2422 else\r
b602265d 2423 r = GET_CHAR_LEN_VARLEN;\r
14b0e578
CS
2424 }\r
2425 break;\r
2426\r
b602265d
DG
2427#ifdef USE_CALL\r
2428 case NODE_CALL:\r
2429 if (! NODE_IS_RECURSION(node))\r
b26691c4 2430 r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
14b0e578
CS
2431 else\r
2432 r = GET_CHAR_LEN_VARLEN;\r
2433 break;\r
2434#endif\r
2435\r
b602265d
DG
2436 case NODE_CTYPE:\r
2437 case NODE_CCLASS:\r
14b0e578
CS
2438 *len = 1;\r
2439 break;\r
2440\r
b26691c4 2441 case NODE_BAG:\r
14b0e578 2442 {\r
b26691c4 2443 BagNode* en = BAG_(node);\r
b602265d 2444\r
14b0e578 2445 switch (en->type) {\r
b26691c4 2446 case BAG_MEMORY:\r
b602265d
DG
2447#ifdef USE_CALL\r
2448 if (NODE_IS_CLEN_FIXED(node))\r
2449 *len = en->char_len;\r
2450 else {\r
b26691c4 2451 r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
b602265d
DG
2452 if (r == 0) {\r
2453 en->char_len = *len;\r
2454 NODE_STATUS_ADD(node, CLEN_FIXED);\r
2455 }\r
2456 }\r
2457 break;\r
14b0e578 2458#endif\r
b26691c4
LG
2459 case BAG_OPTION:\r
2460 case BAG_STOP_BACKTRACK:\r
2461 r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
b602265d 2462 break;\r
b26691c4 2463 case BAG_IF_ELSE:\r
b602265d
DG
2464 {\r
2465 int clen, elen;\r
2466\r
b26691c4 2467 r = get_char_len_node1(NODE_BODY(node), reg, &clen, level);\r
b602265d
DG
2468 if (r == 0) {\r
2469 if (IS_NOT_NULL(en->te.Then)) {\r
b26691c4 2470 r = get_char_len_node1(en->te.Then, reg, &tlen, level);\r
b602265d
DG
2471 if (r != 0) break;\r
2472 }\r
2473 else tlen = 0;\r
2474 if (IS_NOT_NULL(en->te.Else)) {\r
b26691c4 2475 r = get_char_len_node1(en->te.Else, reg, &elen, level);\r
b602265d
DG
2476 if (r != 0) break;\r
2477 }\r
2478 else elen = 0;\r
2479\r
2480 if (clen + tlen != elen) {\r
2481 r = GET_CHAR_LEN_VARLEN;\r
2482 }\r
2483 else {\r
2484 *len = elen;\r
2485 }\r
2486 }\r
2487 }\r
2488 break;\r
14b0e578
CS
2489 }\r
2490 }\r
2491 break;\r
2492\r
b602265d
DG
2493 case NODE_ANCHOR:\r
2494 case NODE_GIMMICK:\r
14b0e578
CS
2495 break;\r
2496\r
b602265d
DG
2497 case NODE_BACKREF:\r
2498 if (NODE_IS_CHECKER(node))\r
2499 break;\r
2500 /* fall */\r
14b0e578
CS
2501 default:\r
2502 r = GET_CHAR_LEN_VARLEN;\r
2503 break;\r
2504 }\r
2505\r
2506 return r;\r
2507}\r
2508\r
2509static int\r
b26691c4 2510get_char_len_node(Node* node, regex_t* reg, int* len)\r
14b0e578 2511{\r
b26691c4 2512 return get_char_len_node1(node, reg, len, 0);\r
14b0e578
CS
2513}\r
2514\r
2515/* x is not included y ==> 1 : 0 */\r
2516static int\r
b602265d 2517is_exclusive(Node* x, Node* y, regex_t* reg)\r
14b0e578
CS
2518{\r
2519 int i, len;\r
2520 OnigCodePoint code;\r
2521 UChar *p;\r
b602265d 2522 NodeType ytype;\r
14b0e578
CS
2523\r
2524 retry:\r
b602265d
DG
2525 ytype = NODE_TYPE(y);\r
2526 switch (NODE_TYPE(x)) {\r
2527 case NODE_CTYPE:\r
14b0e578 2528 {\r
b602265d
DG
2529 if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||\r
2530 CTYPE_(y)->ctype == CTYPE_ANYCHAR)\r
2531 break;\r
2532\r
14b0e578 2533 switch (ytype) {\r
b602265d
DG
2534 case NODE_CTYPE:\r
2535 if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&\r
2536 CTYPE_(y)->not != CTYPE_(x)->not &&\r
2537 CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)\r
2538 return 1;\r
2539 else\r
2540 return 0;\r
2541 break;\r
2542\r
2543 case NODE_CCLASS:\r
14b0e578 2544 swap:\r
b602265d
DG
2545 {\r
2546 Node* tmp;\r
2547 tmp = x; x = y; y = tmp;\r
2548 goto retry;\r
2549 }\r
2550 break;\r
14b0e578 2551\r
b602265d
DG
2552 case NODE_STRING:\r
2553 goto swap;\r
2554 break;\r
14b0e578
CS
2555\r
2556 default:\r
b602265d 2557 break;\r
14b0e578
CS
2558 }\r
2559 }\r
2560 break;\r
2561\r
b602265d 2562 case NODE_CCLASS:\r
14b0e578 2563 {\r
b602265d
DG
2564 int range;\r
2565 CClassNode* xc = CCLASS_(x);\r
2566\r
14b0e578 2567 switch (ytype) {\r
b602265d
DG
2568 case NODE_CTYPE:\r
2569 switch (CTYPE_(y)->ctype) {\r
2570 case CTYPE_ANYCHAR:\r
2571 return 0;\r
2572 break;\r
2573\r
2574 case ONIGENC_CTYPE_WORD:\r
2575 if (CTYPE_(y)->not == 0) {\r
2576 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {\r
2577 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
2578 for (i = 0; i < range; i++) {\r
2579 if (BITSET_AT(xc->bs, i)) {\r
2580 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;\r
2581 }\r
2582 }\r
2583 return 1;\r
2584 }\r
2585 return 0;\r
2586 }\r
2587 else {\r
2588 if (IS_NOT_NULL(xc->mbuf)) return 0;\r
2589 if (IS_NCCLASS_NOT(xc)) return 0;\r
2590\r
2591 range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
2592 for (i = 0; i < range; i++) {\r
2593 if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {\r
2594 if (BITSET_AT(xc->bs, i))\r
2595 return 0;\r
2596 }\r
2597 }\r
2598 for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
2599 if (BITSET_AT(xc->bs, i)) return 0;\r
2600 }\r
2601 return 1;\r
2602 }\r
2603 break;\r
2604\r
2605 default:\r
2606 break;\r
2607 }\r
2608 break;\r
2609\r
2610 case NODE_CCLASS:\r
2611 {\r
2612 int v;\r
2613 CClassNode* yc = CCLASS_(y);\r
2614\r
2615 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
2616 v = BITSET_AT(xc->bs, i);\r
2617 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {\r
2618 v = BITSET_AT(yc->bs, i);\r
2619 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||\r
14b0e578 2620 (v == 0 && IS_NCCLASS_NOT(yc)))\r
b602265d
DG
2621 return 0;\r
2622 }\r
2623 }\r
2624 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||\r
2625 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))\r
2626 return 1;\r
2627 return 0;\r
2628 }\r
2629 break;\r
2630\r
2631 case NODE_STRING:\r
2632 goto swap;\r
2633 break;\r
14b0e578
CS
2634\r
2635 default:\r
b602265d 2636 break;\r
14b0e578
CS
2637 }\r
2638 }\r
2639 break;\r
2640\r
b602265d 2641 case NODE_STRING:\r
14b0e578 2642 {\r
b602265d
DG
2643 StrNode* xs = STR_(x);\r
2644\r
2645 if (NODE_STRING_LEN(x) == 0)\r
2646 break;\r
14b0e578 2647\r
14b0e578 2648 switch (ytype) {\r
b602265d
DG
2649 case NODE_CTYPE:\r
2650 switch (CTYPE_(y)->ctype) {\r
2651 case CTYPE_ANYCHAR:\r
2652 break;\r
2653\r
14b0e578 2654 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
2655 if (CTYPE_(y)->ascii_mode == 0) {\r
2656 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))\r
2657 return CTYPE_(y)->not;\r
2658 else\r
2659 return !(CTYPE_(y)->not);\r
2660 }\r
2661 else {\r
2662 if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))\r
2663 return CTYPE_(y)->not;\r
2664 else\r
2665 return !(CTYPE_(y)->not);\r
2666 }\r
14b0e578
CS
2667 break;\r
2668 default:\r
2669 break;\r
2670 }\r
2671 break;\r
2672\r
b602265d 2673 case NODE_CCLASS:\r
14b0e578 2674 {\r
b602265d 2675 CClassNode* cc = CCLASS_(y);\r
14b0e578
CS
2676\r
2677 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,\r
2678 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));\r
b26691c4 2679 return onig_is_code_in_cc(reg->enc, code, cc) == 0;\r
14b0e578
CS
2680 }\r
2681 break;\r
2682\r
b602265d 2683 case NODE_STRING:\r
14b0e578
CS
2684 {\r
2685 UChar *q;\r
b602265d
DG
2686 StrNode* ys = STR_(y);\r
2687\r
2688 len = NODE_STRING_LEN(x);\r
2689 if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);\r
2690 if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {\r
14b0e578
CS
2691 /* tiny version */\r
2692 return 0;\r
2693 }\r
2694 else {\r
2695 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {\r
2696 if (*p != *q) return 1;\r
2697 }\r
2698 }\r
2699 }\r
2700 break;\r
b602265d 2701\r
14b0e578
CS
2702 default:\r
2703 break;\r
2704 }\r
2705 }\r
2706 break;\r
2707\r
2708 default:\r
2709 break;\r
2710 }\r
2711\r
2712 return 0;\r
2713}\r
2714\r
2715static Node*\r
2716get_head_value_node(Node* node, int exact, regex_t* reg)\r
2717{\r
2718 Node* n = NULL_NODE;\r
2719\r
b602265d
DG
2720 switch (NODE_TYPE(node)) {\r
2721 case NODE_BACKREF:\r
2722 case NODE_ALT:\r
2723#ifdef USE_CALL\r
2724 case NODE_CALL:\r
14b0e578
CS
2725#endif\r
2726 break;\r
2727\r
b602265d
DG
2728 case NODE_CTYPE:\r
2729 if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)\r
2730 break;\r
2731 /* fall */\r
2732 case NODE_CCLASS:\r
14b0e578
CS
2733 if (exact == 0) {\r
2734 n = node;\r
2735 }\r
2736 break;\r
2737\r
b602265d
DG
2738 case NODE_LIST:\r
2739 n = get_head_value_node(NODE_CAR(node), exact, reg);\r
14b0e578
CS
2740 break;\r
2741\r
b602265d 2742 case NODE_STRING:\r
14b0e578 2743 {\r
b602265d 2744 StrNode* sn = STR_(node);\r
14b0e578
CS
2745\r
2746 if (sn->end <= sn->s)\r
b602265d 2747 break;\r
14b0e578 2748\r
b26691c4
LG
2749 if (exact == 0 ||\r
2750 ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {\r
b602265d 2751 n = node;\r
14b0e578
CS
2752 }\r
2753 }\r
2754 break;\r
2755\r
b602265d 2756 case NODE_QUANT:\r
14b0e578 2757 {\r
b602265d 2758 QuantNode* qn = QUANT_(node);\r
14b0e578 2759 if (qn->lower > 0) {\r
b602265d
DG
2760 if (IS_NOT_NULL(qn->head_exact))\r
2761 n = qn->head_exact;\r
2762 else\r
2763 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
14b0e578
CS
2764 }\r
2765 }\r
2766 break;\r
2767\r
b26691c4 2768 case NODE_BAG:\r
14b0e578 2769 {\r
b26691c4 2770 BagNode* en = BAG_(node);\r
14b0e578 2771 switch (en->type) {\r
b26691c4 2772 case BAG_OPTION:\r
b602265d
DG
2773 {\r
2774 OnigOptionType options = reg->options;\r
2775\r
b26691c4 2776 reg->options = BAG_(node)->o.options;\r
b602265d
DG
2777 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
2778 reg->options = options;\r
2779 }\r
2780 break;\r
2781\r
b26691c4
LG
2782 case BAG_MEMORY:\r
2783 case BAG_STOP_BACKTRACK:\r
2784 case BAG_IF_ELSE:\r
b602265d
DG
2785 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
2786 break;\r
14b0e578
CS
2787 }\r
2788 }\r
2789 break;\r
2790\r
b602265d 2791 case NODE_ANCHOR:\r
b26691c4 2792 if (ANCHOR_(node)->type == ANCR_PREC_READ)\r
b602265d 2793 n = get_head_value_node(NODE_BODY(node), exact, reg);\r
14b0e578
CS
2794 break;\r
2795\r
b602265d 2796 case NODE_GIMMICK:\r
14b0e578
CS
2797 default:\r
2798 break;\r
2799 }\r
2800\r
2801 return n;\r
2802}\r
2803\r
2804static int\r
b26691c4 2805check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)\r
14b0e578 2806{\r
b602265d
DG
2807 NodeType type;\r
2808 int r = 0;\r
14b0e578 2809\r
b602265d
DG
2810 type = NODE_TYPE(node);\r
2811 if ((NODE_TYPE2BIT(type) & type_mask) == 0)\r
14b0e578
CS
2812 return 1;\r
2813\r
2814 switch (type) {\r
b602265d
DG
2815 case NODE_LIST:\r
2816 case NODE_ALT:\r
14b0e578 2817 do {\r
b26691c4 2818 r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask);\r
b602265d 2819 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
2820 break;\r
2821\r
b602265d 2822 case NODE_QUANT:\r
b26691c4 2823 r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
14b0e578
CS
2824 break;\r
2825\r
b26691c4 2826 case NODE_BAG:\r
14b0e578 2827 {\r
b26691c4
LG
2828 BagNode* en = BAG_(node);\r
2829 if (((1<<en->type) & bag_mask) == 0)\r
b602265d
DG
2830 return 1;\r
2831\r
b26691c4
LG
2832 r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
2833 if (r == 0 && en->type == BAG_IF_ELSE) {\r
b602265d 2834 if (IS_NOT_NULL(en->te.Then)) {\r
b26691c4 2835 r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask);\r
b602265d
DG
2836 if (r != 0) break;\r
2837 }\r
2838 if (IS_NOT_NULL(en->te.Else)) {\r
b26691c4 2839 r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask);\r
b602265d
DG
2840 }\r
2841 }\r
14b0e578
CS
2842 }\r
2843 break;\r
2844\r
b602265d
DG
2845 case NODE_ANCHOR:\r
2846 type = ANCHOR_(node)->type;\r
14b0e578
CS
2847 if ((type & anchor_mask) == 0)\r
2848 return 1;\r
2849\r
b602265d 2850 if (IS_NOT_NULL(NODE_BODY(node)))\r
b26691c4 2851 r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
14b0e578
CS
2852 break;\r
2853\r
b602265d 2854 case NODE_GIMMICK:\r
14b0e578
CS
2855 default:\r
2856 break;\r
2857 }\r
2858 return r;\r
2859}\r
2860\r
b602265d
DG
2861static OnigLen\r
2862tree_min_len(Node* node, ScanEnv* env)\r
14b0e578 2863{\r
b602265d
DG
2864 OnigLen len;\r
2865 OnigLen tmin;\r
14b0e578 2866\r
b602265d
DG
2867 len = 0;\r
2868 switch (NODE_TYPE(node)) {\r
2869 case NODE_BACKREF:\r
2870 if (! NODE_IS_CHECKER(node)) {\r
2871 int i;\r
2872 int* backs;\r
2873 MemEnv* mem_env = SCANENV_MEMENV(env);\r
2874 BackRefNode* br = BACKREF_(node);\r
2875 if (NODE_IS_RECURSION(node)) break;\r
14b0e578 2876\r
b602265d
DG
2877 backs = BACKREFS_P(br);\r
2878 len = tree_min_len(mem_env[backs[0]].node, env);\r
2879 for (i = 1; i < br->back_num; i++) {\r
2880 tmin = tree_min_len(mem_env[backs[i]].node, env);\r
2881 if (len > tmin) len = tmin;\r
2882 }\r
14b0e578
CS
2883 }\r
2884 break;\r
2885\r
b602265d
DG
2886#ifdef USE_CALL\r
2887 case NODE_CALL:\r
14b0e578 2888 {\r
b602265d
DG
2889 Node* t = NODE_BODY(node);\r
2890 if (NODE_IS_RECURSION(node)) {\r
2891 if (NODE_IS_MIN_FIXED(t))\r
b26691c4 2892 len = BAG_(t)->min_len;\r
b602265d
DG
2893 }\r
2894 else\r
2895 len = tree_min_len(t, env);\r
2896 }\r
2897 break;\r
2898#endif\r
2899\r
2900 case NODE_LIST:\r
2901 do {\r
2902 tmin = tree_min_len(NODE_CAR(node), env);\r
2903 len = distance_add(len, tmin);\r
2904 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
2905 break;\r
2906\r
2907 case NODE_ALT:\r
2908 {\r
2909 Node *x, *y;\r
2910 y = node;\r
14b0e578 2911 do {\r
b602265d
DG
2912 x = NODE_CAR(y);\r
2913 tmin = tree_min_len(x, env);\r
2914 if (y == node) len = tmin;\r
2915 else if (len > tmin) len = tmin;\r
2916 } while (IS_NOT_NULL(y = NODE_CDR(y)));\r
14b0e578
CS
2917 }\r
2918 break;\r
2919\r
b602265d
DG
2920 case NODE_STRING:\r
2921 {\r
2922 StrNode* sn = STR_(node);\r
2923 len = (int )(sn->end - sn->s);\r
14b0e578
CS
2924 }\r
2925 break;\r
2926\r
b602265d
DG
2927 case NODE_CTYPE:\r
2928 case NODE_CCLASS:\r
2929 len = ONIGENC_MBC_MINLEN(env->enc);\r
2930 break;\r
2931\r
2932 case NODE_QUANT:\r
14b0e578 2933 {\r
b602265d
DG
2934 QuantNode* qn = QUANT_(node);\r
2935\r
2936 if (qn->lower > 0) {\r
2937 len = tree_min_len(NODE_BODY(node), env);\r
2938 len = distance_multiply(len, qn->lower);\r
14b0e578
CS
2939 }\r
2940 }\r
2941 break;\r
2942\r
b26691c4 2943 case NODE_BAG:\r
b602265d 2944 {\r
b26691c4 2945 BagNode* en = BAG_(node);\r
b602265d 2946 switch (en->type) {\r
b26691c4 2947 case BAG_MEMORY:\r
b602265d
DG
2948 if (NODE_IS_MIN_FIXED(node))\r
2949 len = en->min_len;\r
2950 else {\r
2951 if (NODE_IS_MARK1(node))\r
2952 len = 0; /* recursive */\r
2953 else {\r
2954 NODE_STATUS_ADD(node, MARK1);\r
2955 len = tree_min_len(NODE_BODY(node), env);\r
2956 NODE_STATUS_REMOVE(node, MARK1);\r
14b0e578 2957\r
b602265d
DG
2958 en->min_len = len;\r
2959 NODE_STATUS_ADD(node, MIN_FIXED);\r
2960 }\r
2961 }\r
2962 break;\r
2963\r
b26691c4
LG
2964 case BAG_OPTION:\r
2965 case BAG_STOP_BACKTRACK:\r
b602265d
DG
2966 len = tree_min_len(NODE_BODY(node), env);\r
2967 break;\r
b26691c4 2968 case BAG_IF_ELSE:\r
b602265d
DG
2969 {\r
2970 OnigLen elen;\r
2971\r
2972 len = tree_min_len(NODE_BODY(node), env);\r
2973 if (IS_NOT_NULL(en->te.Then))\r
2974 len += tree_min_len(en->te.Then, env);\r
2975 if (IS_NOT_NULL(en->te.Else))\r
2976 elen = tree_min_len(en->te.Else, env);\r
2977 else elen = 0;\r
2978\r
2979 if (elen < len) len = elen;\r
2980 }\r
2981 break;\r
2982 }\r
14b0e578
CS
2983 }\r
2984 break;\r
2985\r
b602265d
DG
2986 case NODE_GIMMICK:\r
2987 {\r
2988 GimmickNode* g = GIMMICK_(node);\r
2989 if (g->type == GIMMICK_FAIL) {\r
2990 len = INFINITE_LEN;\r
2991 break;\r
2992 }\r
2993 }\r
2994 /* fall */\r
2995 case NODE_ANCHOR:\r
14b0e578
CS
2996 default:\r
2997 break;\r
2998 }\r
2999\r
b602265d 3000 return len;\r
14b0e578
CS
3001}\r
3002\r
b602265d
DG
3003static OnigLen\r
3004tree_max_len(Node* node, ScanEnv* env)\r
14b0e578 3005{\r
b602265d
DG
3006 OnigLen len;\r
3007 OnigLen tmax;\r
14b0e578 3008\r
b602265d
DG
3009 len = 0;\r
3010 switch (NODE_TYPE(node)) {\r
3011 case NODE_LIST:\r
14b0e578 3012 do {\r
b602265d
DG
3013 tmax = tree_max_len(NODE_CAR(node), env);\r
3014 len = distance_add(len, tmax);\r
3015 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3016 break;\r
3017\r
b602265d
DG
3018 case NODE_ALT:\r
3019 do {\r
3020 tmax = tree_max_len(NODE_CAR(node), env);\r
3021 if (len < tmax) len = tmax;\r
3022 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3023 break;\r
3024\r
b602265d 3025 case NODE_STRING:\r
14b0e578 3026 {\r
b602265d
DG
3027 StrNode* sn = STR_(node);\r
3028 len = (OnigLen )(sn->end - sn->s);\r
3029 }\r
3030 break;\r
3031\r
3032 case NODE_CTYPE:\r
3033 case NODE_CCLASS:\r
3034 len = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
3035 break;\r
3036\r
3037 case NODE_BACKREF:\r
3038 if (! NODE_IS_CHECKER(node)) {\r
3039 int i;\r
3040 int* backs;\r
3041 MemEnv* mem_env = SCANENV_MEMENV(env);\r
3042 BackRefNode* br = BACKREF_(node);\r
3043 if (NODE_IS_RECURSION(node)) {\r
3044 len = INFINITE_LEN;\r
3045 break;\r
3046 }\r
3047 backs = BACKREFS_P(br);\r
3048 for (i = 0; i < br->back_num; i++) {\r
3049 tmax = tree_max_len(mem_env[backs[i]].node, env);\r
3050 if (len < tmax) len = tmax;\r
14b0e578
CS
3051 }\r
3052 }\r
3053 break;\r
3054\r
b602265d
DG
3055#ifdef USE_CALL\r
3056 case NODE_CALL:\r
3057 if (! NODE_IS_RECURSION(node))\r
3058 len = tree_max_len(NODE_BODY(node), env);\r
3059 else\r
3060 len = INFINITE_LEN;\r
3061 break;\r
3062#endif\r
3063\r
3064 case NODE_QUANT:\r
14b0e578 3065 {\r
b602265d 3066 QuantNode* qn = QUANT_(node);\r
14b0e578 3067\r
b602265d
DG
3068 if (qn->upper != 0) {\r
3069 len = tree_max_len(NODE_BODY(node), env);\r
3070 if (len != 0) {\r
b26691c4 3071 if (! IS_INFINITE_REPEAT(qn->upper))\r
b602265d
DG
3072 len = distance_multiply(len, qn->upper);\r
3073 else\r
3074 len = INFINITE_LEN;\r
3075 }\r
14b0e578 3076 }\r
14b0e578 3077 }\r
b602265d
DG
3078 break;\r
3079\r
b26691c4 3080 case NODE_BAG:\r
b602265d 3081 {\r
b26691c4 3082 BagNode* en = BAG_(node);\r
b602265d 3083 switch (en->type) {\r
b26691c4 3084 case BAG_MEMORY:\r
b602265d
DG
3085 if (NODE_IS_MAX_FIXED(node))\r
3086 len = en->max_len;\r
3087 else {\r
3088 if (NODE_IS_MARK1(node))\r
3089 len = INFINITE_LEN;\r
3090 else {\r
3091 NODE_STATUS_ADD(node, MARK1);\r
3092 len = tree_max_len(NODE_BODY(node), env);\r
3093 NODE_STATUS_REMOVE(node, MARK1);\r
3094\r
3095 en->max_len = len;\r
3096 NODE_STATUS_ADD(node, MAX_FIXED);\r
3097 }\r
3098 }\r
3099 break;\r
3100\r
b26691c4
LG
3101 case BAG_OPTION:\r
3102 case BAG_STOP_BACKTRACK:\r
b602265d
DG
3103 len = tree_max_len(NODE_BODY(node), env);\r
3104 break;\r
b26691c4 3105 case BAG_IF_ELSE:\r
b602265d
DG
3106 {\r
3107 OnigLen tlen, elen;\r
3108\r
3109 len = tree_max_len(NODE_BODY(node), env);\r
3110 if (IS_NOT_NULL(en->te.Then)) {\r
3111 tlen = tree_max_len(en->te.Then, env);\r
3112 len = distance_add(len, tlen);\r
3113 }\r
3114 if (IS_NOT_NULL(en->te.Else))\r
3115 elen = tree_max_len(en->te.Else, env);\r
3116 else elen = 0;\r
14b0e578 3117\r
b602265d
DG
3118 if (elen > len) len = elen;\r
3119 }\r
3120 break;\r
3121 }\r
3122 }\r
14b0e578
CS
3123 break;\r
3124\r
b602265d
DG
3125 case NODE_ANCHOR:\r
3126 case NODE_GIMMICK:\r
14b0e578
CS
3127 default:\r
3128 break;\r
3129 }\r
3130\r
b602265d 3131 return len;\r
14b0e578
CS
3132}\r
3133\r
3134static int\r
b602265d 3135check_backrefs(Node* node, ScanEnv* env)\r
14b0e578 3136{\r
b602265d 3137 int r;\r
14b0e578 3138\r
b602265d
DG
3139 switch (NODE_TYPE(node)) {\r
3140 case NODE_LIST:\r
3141 case NODE_ALT:\r
14b0e578 3142 do {\r
b602265d
DG
3143 r = check_backrefs(NODE_CAR(node), env);\r
3144 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3145 break;\r
3146\r
b602265d
DG
3147 case NODE_ANCHOR:\r
3148 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
3149 r = 0;\r
3150 break;\r
3151 }\r
3152 /* fall */\r
3153 case NODE_QUANT:\r
3154 r = check_backrefs(NODE_BODY(node), env);\r
14b0e578
CS
3155 break;\r
3156\r
b26691c4 3157 case NODE_BAG:\r
b602265d 3158 r = check_backrefs(NODE_BODY(node), env);\r
14b0e578 3159 {\r
b26691c4 3160 BagNode* en = BAG_(node);\r
b602265d 3161\r
b26691c4 3162 if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
3163 if (r != 0) return r;\r
3164 if (IS_NOT_NULL(en->te.Then)) {\r
3165 r = check_backrefs(en->te.Then, env);\r
3166 if (r != 0) return r;\r
3167 }\r
3168 if (IS_NOT_NULL(en->te.Else)) {\r
3169 r = check_backrefs(en->te.Else, env);\r
3170 }\r
14b0e578
CS
3171 }\r
3172 }\r
3173 break;\r
3174\r
b602265d
DG
3175 case NODE_BACKREF:\r
3176 {\r
3177 int i;\r
3178 BackRefNode* br = BACKREF_(node);\r
3179 int* backs = BACKREFS_P(br);\r
3180 MemEnv* mem_env = SCANENV_MEMENV(env);\r
14b0e578 3181\r
b602265d
DG
3182 for (i = 0; i < br->back_num; i++) {\r
3183 if (backs[i] > env->num_mem)\r
3184 return ONIGERR_INVALID_BACKREF;\r
3185\r
3186 NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);\r
3187 }\r
3188 r = 0;\r
14b0e578
CS
3189 }\r
3190 break;\r
3191\r
3192 default:\r
b602265d 3193 r = 0;\r
14b0e578
CS
3194 break;\r
3195 }\r
3196\r
3197 return r;\r
3198}\r
3199\r
3200\r
b602265d
DG
3201#ifdef USE_CALL\r
3202\r
3203#define RECURSION_EXIST (1<<0)\r
3204#define RECURSION_MUST (1<<1)\r
3205#define RECURSION_INFINITE (1<<2)\r
3206\r
14b0e578 3207static int\r
b602265d 3208infinite_recursive_call_check(Node* node, ScanEnv* env, int head)\r
14b0e578 3209{\r
b602265d 3210 int ret;\r
14b0e578
CS
3211 int r = 0;\r
3212\r
b602265d
DG
3213 switch (NODE_TYPE(node)) {\r
3214 case NODE_LIST:\r
14b0e578 3215 {\r
b602265d
DG
3216 Node *x;\r
3217 OnigLen min;\r
3218\r
3219 x = node;\r
14b0e578 3220 do {\r
b602265d
DG
3221 ret = infinite_recursive_call_check(NODE_CAR(x), env, head);\r
3222 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3223 r |= ret;\r
3224 if (head != 0) {\r
3225 min = tree_min_len(NODE_CAR(x), env);\r
3226 if (min != 0) head = 0;\r
3227 }\r
3228 } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
14b0e578
CS
3229 }\r
3230 break;\r
3231\r
b602265d
DG
3232 case NODE_ALT:\r
3233 {\r
3234 int must;\r
3235\r
3236 must = RECURSION_MUST;\r
3237 do {\r
3238 ret = infinite_recursive_call_check(NODE_CAR(node), env, head);\r
3239 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3240\r
3241 r |= (ret & RECURSION_EXIST);\r
3242 must &= ret;\r
3243 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3244 r |= must;\r
14b0e578
CS
3245 }\r
3246 break;\r
3247\r
b602265d
DG
3248 case NODE_QUANT:\r
3249 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3250 if (r < 0) return r;\r
3251 if ((r & RECURSION_MUST) != 0) {\r
3252 if (QUANT_(node)->lower == 0)\r
3253 r &= ~RECURSION_MUST;\r
14b0e578
CS
3254 }\r
3255 break;\r
3256\r
b602265d
DG
3257 case NODE_ANCHOR:\r
3258 if (! ANCHOR_HAS_BODY(ANCHOR_(node)))\r
3259 break;\r
3260 /* fall */\r
3261 case NODE_CALL:\r
3262 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3263 break;\r
3264\r
b26691c4 3265 case NODE_BAG:\r
14b0e578 3266 {\r
b26691c4 3267 BagNode* en = BAG_(node);\r
b602265d 3268\r
b26691c4 3269 if (en->type == BAG_MEMORY) {\r
b602265d
DG
3270 if (NODE_IS_MARK2(node))\r
3271 return 0;\r
3272 else if (NODE_IS_MARK1(node))\r
3273 return (head == 0 ? RECURSION_EXIST | RECURSION_MUST\r
3274 : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);\r
3275 else {\r
3276 NODE_STATUS_ADD(node, MARK2);\r
3277 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3278 NODE_STATUS_REMOVE(node, MARK2);\r
3279 }\r
3280 }\r
b26691c4 3281 else if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
3282 int eret;\r
3283\r
3284 ret = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
3285 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3286 r |= ret;\r
3287 if (IS_NOT_NULL(en->te.Then)) {\r
3288 OnigLen min;\r
3289 if (head != 0) {\r
3290 min = tree_min_len(NODE_BODY(node), env);\r
3291 }\r
3292 else min = 0;\r
3293\r
3294 ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);\r
3295 if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
3296 r |= ret;\r
3297 }\r
3298 if (IS_NOT_NULL(en->te.Else)) {\r
3299 eret = infinite_recursive_call_check(en->te.Else, env, head);\r
3300 if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;\r
3301 r |= (eret & RECURSION_EXIST);\r
3302 if ((eret & RECURSION_MUST) == 0)\r
3303 r &= ~RECURSION_MUST;\r
3304 }\r
3305 }\r
3306 else {\r
3307 r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
14b0e578 3308 }\r
14b0e578
CS
3309 }\r
3310 break;\r
3311\r
3312 default:\r
3313 break;\r
3314 }\r
3315\r
3316 return r;\r
3317}\r
3318\r
3319static int\r
b602265d 3320infinite_recursive_call_check_trav(Node* node, ScanEnv* env)\r
14b0e578 3321{\r
b602265d 3322 int r;\r
14b0e578 3323\r
b602265d
DG
3324 switch (NODE_TYPE(node)) {\r
3325 case NODE_LIST:\r
3326 case NODE_ALT:\r
14b0e578 3327 do {\r
b602265d
DG
3328 r = infinite_recursive_call_check_trav(NODE_CAR(node), env);\r
3329 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
3330 break;\r
3331\r
3332 case NODE_ANCHOR:\r
3333 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
3334 r = 0;\r
3335 break;\r
3336 }\r
3337 /* fall */\r
3338 case NODE_QUANT:\r
3339 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
3340 break;\r
3341\r
b26691c4 3342 case NODE_BAG:\r
b602265d 3343 {\r
b26691c4 3344 BagNode* en = BAG_(node);\r
b602265d 3345\r
b26691c4 3346 if (en->type == BAG_MEMORY) {\r
b602265d
DG
3347 if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {\r
3348 int ret;\r
3349\r
3350 NODE_STATUS_ADD(node, MARK1);\r
3351\r
3352 ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);\r
3353 if (ret < 0) return ret;\r
3354 else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)\r
3355 return ONIGERR_NEVER_ENDING_RECURSION;\r
3356\r
3357 NODE_STATUS_REMOVE(node, MARK1);\r
3358 }\r
3359 }\r
b26691c4 3360 else if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
3361 if (IS_NOT_NULL(en->te.Then)) {\r
3362 r = infinite_recursive_call_check_trav(en->te.Then, env);\r
3363 if (r != 0) return r;\r
3364 }\r
3365 if (IS_NOT_NULL(en->te.Else)) {\r
3366 r = infinite_recursive_call_check_trav(en->te.Else, env);\r
3367 if (r != 0) return r;\r
3368 }\r
3369 }\r
3370 }\r
3371\r
3372 r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
3373 break;\r
3374\r
3375 default:\r
3376 r = 0;\r
14b0e578 3377 break;\r
b602265d
DG
3378 }\r
3379\r
3380 return r;\r
3381}\r
3382\r
3383static int\r
3384recursive_call_check(Node* node)\r
3385{\r
3386 int r;\r
14b0e578 3387\r
b602265d
DG
3388 switch (NODE_TYPE(node)) {\r
3389 case NODE_LIST:\r
3390 case NODE_ALT:\r
3391 r = 0;\r
14b0e578 3392 do {\r
b602265d
DG
3393 r |= recursive_call_check(NODE_CAR(node));\r
3394 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3395 break;\r
3396\r
b602265d
DG
3397 case NODE_ANCHOR:\r
3398 if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
3399 r = 0;\r
3400 break;\r
3401 }\r
3402 /* fall */\r
3403 case NODE_QUANT:\r
3404 r = recursive_call_check(NODE_BODY(node));\r
14b0e578 3405 break;\r
b602265d
DG
3406\r
3407 case NODE_CALL:\r
3408 r = recursive_call_check(NODE_BODY(node));\r
3409 if (r != 0) {\r
3410 if (NODE_IS_MARK1(NODE_BODY(node)))\r
3411 NODE_STATUS_ADD(node, RECURSION);\r
3412 }\r
14b0e578
CS
3413 break;\r
3414\r
b26691c4 3415 case NODE_BAG:\r
14b0e578 3416 {\r
b26691c4 3417 BagNode* en = BAG_(node);\r
b602265d 3418\r
b26691c4 3419 if (en->type == BAG_MEMORY) {\r
b602265d
DG
3420 if (NODE_IS_MARK2(node))\r
3421 return 0;\r
3422 else if (NODE_IS_MARK1(node))\r
3423 return 1; /* recursion */\r
3424 else {\r
3425 NODE_STATUS_ADD(node, MARK2);\r
3426 r = recursive_call_check(NODE_BODY(node));\r
3427 NODE_STATUS_REMOVE(node, MARK2);\r
3428 }\r
3429 }\r
b26691c4 3430 else if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
3431 r = 0;\r
3432 if (IS_NOT_NULL(en->te.Then)) {\r
3433 r |= recursive_call_check(en->te.Then);\r
3434 }\r
3435 if (IS_NOT_NULL(en->te.Else)) {\r
3436 r |= recursive_call_check(en->te.Else);\r
3437 }\r
3438 r |= recursive_call_check(NODE_BODY(node));\r
14b0e578 3439 }\r
14b0e578 3440 else {\r
b602265d 3441 r = recursive_call_check(NODE_BODY(node));\r
14b0e578 3442 }\r
14b0e578
CS
3443 }\r
3444 break;\r
3445\r
b602265d
DG
3446 default:\r
3447 r = 0;\r
3448 break;\r
3449 }\r
3450\r
3451 return r;\r
3452}\r
3453\r
3454#define IN_RECURSION (1<<0)\r
3455#define FOUND_CALLED_NODE 1\r
3456\r
3457static int\r
3458recursive_call_check_trav(Node* node, ScanEnv* env, int state)\r
3459{\r
3460 int r = 0;\r
3461\r
3462 switch (NODE_TYPE(node)) {\r
3463 case NODE_LIST:\r
3464 case NODE_ALT:\r
14b0e578 3465 {\r
b602265d
DG
3466 int ret;\r
3467 do {\r
3468 ret = recursive_call_check_trav(NODE_CAR(node), env, state);\r
3469 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;\r
3470 else if (ret < 0) return ret;\r
3471 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
3472 }\r
3473 break;\r
14b0e578 3474\r
b602265d
DG
3475 case NODE_QUANT:\r
3476 r = recursive_call_check_trav(NODE_BODY(node), env, state);\r
3477 if (QUANT_(node)->upper == 0) {\r
3478 if (r == FOUND_CALLED_NODE)\r
3479 QUANT_(node)->is_refered = 1;\r
3480 }\r
3481 break;\r
3482\r
3483 case NODE_ANCHOR:\r
3484 {\r
3485 AnchorNode* an = ANCHOR_(node);\r
3486 if (ANCHOR_HAS_BODY(an))\r
3487 r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);\r
3488 }\r
3489 break;\r
3490\r
b26691c4 3491 case NODE_BAG:\r
b602265d
DG
3492 {\r
3493 int ret;\r
3494 int state1;\r
b26691c4 3495 BagNode* en = BAG_(node);\r
b602265d 3496\r
b26691c4 3497 if (en->type == BAG_MEMORY) {\r
b602265d
DG
3498 if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {\r
3499 if (! NODE_IS_RECURSION(node)) {\r
3500 NODE_STATUS_ADD(node, MARK1);\r
3501 r = recursive_call_check(NODE_BODY(node));\r
3502 if (r != 0)\r
3503 NODE_STATUS_ADD(node, RECURSION);\r
3504 NODE_STATUS_REMOVE(node, MARK1);\r
3505 }\r
3506\r
3507 if (NODE_IS_CALLED(node))\r
3508 r = FOUND_CALLED_NODE;\r
3509 }\r
3510 }\r
3511\r
3512 state1 = state;\r
3513 if (NODE_IS_RECURSION(node))\r
3514 state1 |= IN_RECURSION;\r
3515\r
3516 ret = recursive_call_check_trav(NODE_BODY(node), env, state1);\r
3517 if (ret == FOUND_CALLED_NODE)\r
3518 r = FOUND_CALLED_NODE;\r
3519\r
b26691c4 3520 if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
3521 if (IS_NOT_NULL(en->te.Then)) {\r
3522 ret = recursive_call_check_trav(en->te.Then, env, state1);\r
3523 if (ret == FOUND_CALLED_NODE)\r
3524 r = FOUND_CALLED_NODE;\r
3525 }\r
3526 if (IS_NOT_NULL(en->te.Else)) {\r
3527 ret = recursive_call_check_trav(en->te.Else, env, state1);\r
3528 if (ret == FOUND_CALLED_NODE)\r
3529 r = FOUND_CALLED_NODE;\r
3530 }\r
14b0e578
CS
3531 }\r
3532 }\r
3533 break;\r
3534\r
3535 default:\r
3536 break;\r
3537 }\r
3538\r
3539 return r;\r
3540}\r
b602265d 3541\r
14b0e578
CS
3542#endif\r
3543\r
b26691c4
LG
3544#define IN_ALT (1<<0)\r
3545#define IN_NOT (1<<1)\r
3546#define IN_REAL_REPEAT (1<<2)\r
3547#define IN_VAR_REPEAT (1<<3)\r
3548#define IN_ZERO_REPEAT (1<<4)\r
3549#define IN_MULTI_ENTRY (1<<5)\r
3550#define IN_LOOK_BEHIND (1<<6)\r
3551\r
3552\r
14b0e578
CS
3553/* divide different length alternatives in look-behind.\r
3554 (?<=A|B) ==> (?<=A)|(?<=B)\r
3555 (?<!A|B) ==> (?<!A)(?<!B)\r
3556*/\r
3557static int\r
3558divide_look_behind_alternatives(Node* node)\r
3559{\r
3560 Node *head, *np, *insert_node;\r
b602265d 3561 AnchorNode* an = ANCHOR_(node);\r
14b0e578
CS
3562 int anc_type = an->type;\r
3563\r
b602265d
DG
3564 head = NODE_ANCHOR_BODY(an);\r
3565 np = NODE_CAR(head);\r
14b0e578 3566 swap_node(node, head);\r
b602265d
DG
3567 NODE_CAR(node) = head;\r
3568 NODE_BODY(head) = np;\r
14b0e578
CS
3569\r
3570 np = node;\r
b602265d
DG
3571 while (IS_NOT_NULL(np = NODE_CDR(np))) {\r
3572 insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);\r
14b0e578 3573 CHECK_NULL_RETURN_MEMERR(insert_node);\r
b602265d
DG
3574 NODE_BODY(insert_node) = NODE_CAR(np);\r
3575 NODE_CAR(np) = insert_node;\r
14b0e578
CS
3576 }\r
3577\r
b26691c4 3578 if (anc_type == ANCR_LOOK_BEHIND_NOT) {\r
14b0e578
CS
3579 np = node;\r
3580 do {\r
b602265d
DG
3581 NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */\r
3582 } while (IS_NOT_NULL(np = NODE_CDR(np)));\r
14b0e578
CS
3583 }\r
3584 return 0;\r
3585}\r
3586\r
3587static int\r
3588setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)\r
3589{\r
3590 int r, len;\r
b602265d 3591 AnchorNode* an = ANCHOR_(node);\r
14b0e578 3592\r
b26691c4 3593 r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len);\r
14b0e578
CS
3594 if (r == 0)\r
3595 an->char_len = len;\r
3596 else if (r == GET_CHAR_LEN_VARLEN)\r
3597 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3598 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {\r
3599 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))\r
3600 r = divide_look_behind_alternatives(node);\r
3601 else\r
3602 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
3603 }\r
3604\r
3605 return r;\r
3606}\r
3607\r
3608static int\r
3609next_setup(Node* node, Node* next_node, regex_t* reg)\r
3610{\r
b602265d 3611 NodeType type;\r
14b0e578
CS
3612\r
3613 retry:\r
b602265d
DG
3614 type = NODE_TYPE(node);\r
3615 if (type == NODE_QUANT) {\r
3616 QuantNode* qn = QUANT_(node);\r
b26691c4 3617 if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {\r
b602265d 3618#ifdef USE_QUANT_PEEK_NEXT\r
14b0e578
CS
3619 Node* n = get_head_value_node(next_node, 1, reg);\r
3620 /* '\0': for UTF-16BE etc... */\r
b602265d
DG
3621 if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {\r
3622 qn->next_head_exact = n;\r
14b0e578
CS
3623 }\r
3624#endif\r
3625 /* automatic posseivation a*b ==> (?>a*)b */\r
3626 if (qn->lower <= 1) {\r
b26691c4 3627 if (is_strict_real_node(NODE_BODY(node))) {\r
b602265d
DG
3628 Node *x, *y;\r
3629 x = get_head_value_node(NODE_BODY(node), 0, reg);\r
3630 if (IS_NOT_NULL(x)) {\r
3631 y = get_head_value_node(next_node, 0, reg);\r
3632 if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {\r
b26691c4 3633 Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);\r
b602265d 3634 CHECK_NULL_RETURN_MEMERR(en);\r
b26691c4 3635 NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);\r
b602265d
DG
3636 swap_node(node, en);\r
3637 NODE_BODY(node) = en;\r
3638 }\r
3639 }\r
3640 }\r
14b0e578
CS
3641 }\r
3642 }\r
3643 }\r
b26691c4
LG
3644 else if (type == NODE_BAG) {\r
3645 BagNode* en = BAG_(node);\r
3646 if (en->type == BAG_MEMORY) {\r
b602265d 3647 node = NODE_BODY(node);\r
14b0e578
CS
3648 goto retry;\r
3649 }\r
3650 }\r
3651 return 0;\r
3652}\r
3653\r
3654\r
3655static int\r
3656update_string_node_case_fold(regex_t* reg, Node *node)\r
3657{\r
3658 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
3659 UChar *sbuf, *ebuf, *sp;\r
3660 int r, i, len, sbuf_size;\r
b602265d 3661 StrNode* sn = STR_(node);\r
14b0e578
CS
3662\r
3663 end = sn->end;\r
b602265d 3664 sbuf_size = (int )(end - sn->s) * 2;\r
14b0e578
CS
3665 sbuf = (UChar* )xmalloc(sbuf_size);\r
3666 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3667 ebuf = sbuf + sbuf_size;\r
3668\r
3669 sp = sbuf;\r
3670 p = sn->s;\r
3671 while (p < end) {\r
3672 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);\r
3673 for (i = 0; i < len; i++) {\r
3674 if (sp >= ebuf) {\r
3675 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);\r
3676 CHECK_NULL_RETURN_MEMERR(sbuf);\r
3677 sp = sbuf + sbuf_size;\r
3678 sbuf_size *= 2;\r
3679 ebuf = sbuf + sbuf_size;\r
3680 }\r
3681\r
3682 *sp++ = buf[i];\r
3683 }\r
3684 }\r
3685\r
3686 r = onig_node_str_set(node, sbuf, sp);\r
3687 if (r != 0) {\r
3688 xfree(sbuf);\r
3689 return r;\r
3690 }\r
3691\r
3692 xfree(sbuf);\r
3693 return 0;\r
3694}\r
3695\r
3696static int\r
b602265d 3697expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)\r
14b0e578
CS
3698{\r
3699 int r;\r
3700 Node *node;\r
3701\r
3702 node = onig_node_new_str(s, end);\r
3703 if (IS_NULL(node)) return ONIGERR_MEMORY;\r
3704\r
3705 r = update_string_node_case_fold(reg, node);\r
3706 if (r != 0) {\r
3707 onig_node_free(node);\r
3708 return r;\r
3709 }\r
3710\r
b602265d
DG
3711 NODE_STRING_SET_AMBIG(node);\r
3712 NODE_STRING_SET_DONT_GET_OPT_INFO(node);\r
14b0e578
CS
3713 *rnode = node;\r
3714 return 0;\r
3715}\r
3716\r
3717static int\r
b602265d
DG
3718expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,\r
3719 int slen, UChar *end, regex_t* reg, Node **rnode)\r
14b0e578 3720{\r
b602265d
DG
3721 int r, i, j;\r
3722 int len;\r
3723 int varlen;\r
14b0e578
CS
3724 Node *anode, *var_anode, *snode, *xnode, *an;\r
3725 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
3726\r
3727 *rnode = var_anode = NULL_NODE;\r
3728\r
3729 varlen = 0;\r
3730 for (i = 0; i < item_num; i++) {\r
3731 if (items[i].byte_len != slen) {\r
3732 varlen = 1;\r
3733 break;\r
3734 }\r
3735 }\r
3736\r
3737 if (varlen != 0) {\r
3738 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3739 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;\r
3740\r
3741 xnode = onig_node_new_list(NULL, NULL);\r
3742 if (IS_NULL(xnode)) goto mem_err;\r
b602265d 3743 NODE_CAR(var_anode) = xnode;\r
14b0e578
CS
3744\r
3745 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3746 if (IS_NULL(anode)) goto mem_err;\r
b602265d 3747 NODE_CAR(xnode) = anode;\r
14b0e578
CS
3748 }\r
3749 else {\r
3750 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3751 if (IS_NULL(anode)) return ONIGERR_MEMORY;\r
3752 }\r
3753\r
3754 snode = onig_node_new_str(p, p + slen);\r
3755 if (IS_NULL(snode)) goto mem_err;\r
3756\r
b602265d 3757 NODE_CAR(anode) = snode;\r
14b0e578
CS
3758\r
3759 for (i = 0; i < item_num; i++) {\r
3760 snode = onig_node_new_str(NULL, NULL);\r
3761 if (IS_NULL(snode)) goto mem_err;\r
b26691c4 3762\r
14b0e578
CS
3763 for (j = 0; j < items[i].code_len; j++) {\r
3764 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);\r
3765 if (len < 0) {\r
b602265d
DG
3766 r = len;\r
3767 goto mem_err2;\r
14b0e578
CS
3768 }\r
3769\r
3770 r = onig_node_str_cat(snode, buf, buf + len);\r
3771 if (r != 0) goto mem_err2;\r
3772 }\r
3773\r
3774 an = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
3775 if (IS_NULL(an)) {\r
3776 goto mem_err2;\r
3777 }\r
3948c510
DG
3778 //The NULL pointer check is not necessary. It is added just for pass static\r
3779 //analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"\r
3780 //in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"\r
3781 //in line 3510 will be executed, so the null pointer has been checked before\r
3782 //deferenced in line 3584.\r
3783 if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {\r
b602265d 3784 Node *rem;\r
14b0e578
CS
3785 UChar *q = p + items[i].byte_len;\r
3786\r
3787 if (q < end) {\r
3788 r = expand_case_fold_make_rem_string(&rem, q, end, reg);\r
3789 if (r != 0) {\r
3790 onig_node_free(an);\r
3791 goto mem_err2;\r
3792 }\r
3793\r
3794 xnode = onig_node_list_add(NULL_NODE, snode);\r
3795 if (IS_NULL(xnode)) {\r
3796 onig_node_free(an);\r
3797 onig_node_free(rem);\r
3798 goto mem_err2;\r
3799 }\r
3800 if (IS_NULL(onig_node_list_add(xnode, rem))) {\r
3801 onig_node_free(an);\r
3802 onig_node_free(xnode);\r
3803 onig_node_free(rem);\r
3804 goto mem_err;\r
3805 }\r
3806\r
b602265d 3807 NODE_CAR(an) = xnode;\r
14b0e578
CS
3808 }\r
3809 else {\r
b602265d 3810 NODE_CAR(an) = snode;\r
14b0e578
CS
3811 }\r
3812\r
b602265d 3813 NODE_CDR(var_anode) = an;\r
14b0e578
CS
3814 var_anode = an;\r
3815 }\r
3816 else {\r
b602265d
DG
3817 NODE_CAR(an) = snode;\r
3818 NODE_CDR(anode) = an;\r
14b0e578
CS
3819 anode = an;\r
3820 }\r
3821 }\r
3822\r
3823 return varlen;\r
3824\r
3825 mem_err2:\r
3826 onig_node_free(snode);\r
3827\r
3828 mem_err:\r
3829 onig_node_free(*rnode);\r
3830\r
3831 return ONIGERR_MEMORY;\r
3832}\r
3833\r
3834static int\r
b26691c4
LG
3835is_good_case_fold_items_for_search(OnigEncoding enc, int slen,\r
3836 int n, OnigCaseFoldCodeItem items[])\r
14b0e578 3837{\r
b26691c4
LG
3838 int i, len;\r
3839 UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
3840\r
3841 for (i = 0; i < n; i++) {\r
3842 OnigCaseFoldCodeItem* item = items + i;\r
3843\r
3844 if (item->code_len != 1) return 0;\r
3845 if (item->byte_len != slen) return 0;\r
3846 len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);\r
3847 if (len != slen) return 0;\r
3848 }\r
3849\r
3850 return 1;\r
3851}\r
3852\r
14b0e578
CS
3853#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8\r
3854\r
b26691c4
LG
3855static int\r
3856expand_case_fold_string(Node* node, regex_t* reg, int state)\r
3857{\r
14b0e578 3858 int r, n, len, alt_num;\r
b26691c4
LG
3859 int fold_len;\r
3860 int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;\r
14b0e578 3861 UChar *start, *end, *p;\r
b26691c4 3862 UChar* foldp;\r
14b0e578
CS
3863 Node *top_root, *root, *snode, *prev_node;\r
3864 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
b26691c4
LG
3865 UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
3866 StrNode* sn;\r
14b0e578 3867\r
b602265d 3868 if (NODE_STRING_IS_AMBIG(node)) return 0;\r
14b0e578 3869\r
b26691c4
LG
3870 sn = STR_(node);\r
3871\r
14b0e578
CS
3872 start = sn->s;\r
3873 end = sn->end;\r
3874 if (start >= end) return 0;\r
3875\r
b26691c4
LG
3876 is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;\r
3877\r
14b0e578
CS
3878 r = 0;\r
3879 top_root = root = prev_node = snode = NULL_NODE;\r
3880 alt_num = 1;\r
3881 p = start;\r
3882 while (p < end) {\r
b26691c4
LG
3883 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,\r
3884 p, end, items);\r
14b0e578
CS
3885 if (n < 0) {\r
3886 r = n;\r
3887 goto err;\r
3888 }\r
3889\r
3890 len = enclen(reg->enc, p);\r
b26691c4 3891 is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);\r
14b0e578 3892\r
b26691c4
LG
3893 if (is_in_look_behind ||\r
3894 (IS_NOT_NULL(snode) ||\r
3895 (is_good\r
3896 /* expand single char case: ex. /(?i:a)/ */\r
3897 && !(p == start && p + len >= end)))) {\r
14b0e578 3898 if (IS_NULL(snode)) {\r
b602265d
DG
3899 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
3900 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3901 if (IS_NULL(root)) {\r
3902 onig_node_free(prev_node);\r
3903 goto mem_err;\r
3904 }\r
3905 }\r
3906\r
3907 prev_node = snode = onig_node_new_str(NULL, NULL);\r
3908 if (IS_NULL(snode)) goto mem_err;\r
3909 if (IS_NOT_NULL(root)) {\r
3910 if (IS_NULL(onig_node_list_add(root, snode))) {\r
3911 onig_node_free(snode);\r
3912 goto mem_err;\r
3913 }\r
3914 }\r
b26691c4
LG
3915\r
3916 prev_is_ambig = -1; /* -1: new */\r
3917 prev_is_good = 0; /* escape compiler warning */\r
3918 }\r
3919 else {\r
3920 prev_is_ambig = NODE_STRING_IS_AMBIG(snode);\r
3921 prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode);\r
14b0e578
CS
3922 }\r
3923\r
b26691c4
LG
3924 if (n != 0) {\r
3925 foldp = p;\r
3926 fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,\r
3927 &foldp, end, buf);\r
3928 foldp = buf;\r
3929 }\r
3930 else {\r
3931 foldp = p; fold_len = len;\r
3932 }\r
3933\r
3934 if ((prev_is_ambig == 0 && n != 0) ||\r
3935 (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {\r
3936 if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {\r
3937 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3938 if (IS_NULL(root)) {\r
3939 onig_node_free(prev_node);\r
3940 goto mem_err;\r
3941 }\r
3942 }\r
3943\r
3944 prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);\r
3945 if (IS_NULL(snode)) goto mem_err;\r
3946 if (IS_NULL(onig_node_list_add(root, snode))) {\r
3947 onig_node_free(snode);\r
3948 goto mem_err;\r
3949 }\r
3950 }\r
3951 else {\r
3952 r = onig_node_str_cat(snode, foldp, foldp + fold_len);\r
3953 if (r != 0) goto err;\r
3954 }\r
3955\r
3956 if (n != 0) NODE_STRING_SET_AMBIG(snode);\r
3957 if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);\r
14b0e578
CS
3958 }\r
3959 else {\r
3960 alt_num *= (n + 1);\r
3961 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;\r
3962\r
3963 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
b602265d
DG
3964 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
3965 if (IS_NULL(root)) {\r
3966 onig_node_free(prev_node);\r
3967 goto mem_err;\r
3968 }\r
14b0e578
CS
3969 }\r
3970\r
3971 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);\r
3972 if (r < 0) goto mem_err;\r
3973 if (r == 1) {\r
b602265d
DG
3974 if (IS_NULL(root)) {\r
3975 top_root = prev_node;\r
3976 }\r
3977 else {\r
3978 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3979 onig_node_free(prev_node);\r
3980 goto mem_err;\r
3981 }\r
3982 }\r
3983\r
3984 root = NODE_CAR(prev_node);\r
3985 }\r
3986 else { /* r == 0 */\r
3987 if (IS_NOT_NULL(root)) {\r
3988 if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
3989 onig_node_free(prev_node);\r
3990 goto mem_err;\r
3991 }\r
3992 }\r
3993 }\r
3994\r
14b0e578
CS
3995 snode = NULL_NODE;\r
3996 }\r
3997\r
3998 p += len;\r
3999 }\r
4000\r
4001 if (p < end) {\r
4002 Node *srem;\r
4003\r
4004 r = expand_case_fold_make_rem_string(&srem, p, end, reg);\r
4005 if (r != 0) goto mem_err;\r
4006\r
4007 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {\r
4008 top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
4009 if (IS_NULL(root)) {\r
b602265d
DG
4010 onig_node_free(srem);\r
4011 onig_node_free(prev_node);\r
4012 goto mem_err;\r
4013 }\r
4014 }\r
4015\r
4016 if (IS_NULL(root)) {\r
4017 prev_node = srem;\r
4018 }\r
4019 else {\r
4020 if (IS_NULL(onig_node_list_add(root, srem))) {\r
4021 onig_node_free(srem);\r
4022 goto mem_err;\r
4023 }\r
4024 }\r
4025 }\r
4026\r
4027 /* ending */\r
4028 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);\r
4029 swap_node(node, top_root);\r
4030 onig_node_free(top_root);\r
4031 return 0;\r
4032\r
4033 mem_err:\r
4034 r = ONIGERR_MEMORY;\r
4035\r
4036 err:\r
4037 onig_node_free(top_root);\r
4038 return r;\r
4039}\r
4040\r
b26691c4
LG
4041#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
4042static enum BodyEmptyType\r
b602265d
DG
4043quantifiers_memory_node_info(Node* node)\r
4044{\r
b26691c4 4045 int r = BODY_IS_EMPTY_POSSIBILITY;\r
b602265d
DG
4046\r
4047 switch (NODE_TYPE(node)) {\r
4048 case NODE_LIST:\r
4049 case NODE_ALT:\r
4050 {\r
4051 int v;\r
4052 do {\r
4053 v = quantifiers_memory_node_info(NODE_CAR(node));\r
4054 if (v > r) r = v;\r
4055 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4056 }\r
4057 break;\r
4058\r
4059#ifdef USE_CALL\r
4060 case NODE_CALL:\r
4061 if (NODE_IS_RECURSION(node)) {\r
b26691c4 4062 return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */\r
b602265d
DG
4063 }\r
4064 else\r
4065 r = quantifiers_memory_node_info(NODE_BODY(node));\r
4066 break;\r
4067#endif\r
4068\r
4069 case NODE_QUANT:\r
4070 {\r
4071 QuantNode* qn = QUANT_(node);\r
4072 if (qn->upper != 0) {\r
4073 r = quantifiers_memory_node_info(NODE_BODY(node));\r
4074 }\r
4075 }\r
4076 break;\r
4077\r
b26691c4 4078 case NODE_BAG:\r
b602265d 4079 {\r
b26691c4 4080 BagNode* en = BAG_(node);\r
b602265d 4081 switch (en->type) {\r
b26691c4 4082 case BAG_MEMORY:\r
b602265d 4083 if (NODE_IS_RECURSION(node)) {\r
b26691c4 4084 return BODY_IS_EMPTY_POSSIBILITY_REC;\r
b602265d 4085 }\r
b26691c4 4086 return BODY_IS_EMPTY_POSSIBILITY_MEM;\r
b602265d
DG
4087 break;\r
4088\r
b26691c4
LG
4089 case BAG_OPTION:\r
4090 case BAG_STOP_BACKTRACK:\r
b602265d
DG
4091 r = quantifiers_memory_node_info(NODE_BODY(node));\r
4092 break;\r
b26691c4 4093 case BAG_IF_ELSE:\r
b602265d
DG
4094 {\r
4095 int v;\r
4096 r = quantifiers_memory_node_info(NODE_BODY(node));\r
4097 if (IS_NOT_NULL(en->te.Then)) {\r
4098 v = quantifiers_memory_node_info(en->te.Then);\r
4099 if (v > r) r = v;\r
4100 }\r
4101 if (IS_NOT_NULL(en->te.Else)) {\r
4102 v = quantifiers_memory_node_info(en->te.Else);\r
4103 if (v > r) r = v;\r
4104 }\r
4105 }\r
4106 break;\r
b602265d
DG
4107 }\r
4108 }\r
4109 break;\r
4110\r
4111 case NODE_BACKREF:\r
4112 case NODE_STRING:\r
4113 case NODE_CTYPE:\r
4114 case NODE_CCLASS:\r
4115 case NODE_ANCHOR:\r
4116 case NODE_GIMMICK:\r
4117 default:\r
4118 break;\r
4119 }\r
4120\r
4121 return r;\r
4122}\r
b26691c4 4123#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */\r
b602265d 4124\r
b602265d
DG
4125\r
4126#ifdef USE_CALL\r
4127\r
4128#ifdef __GNUC__\r
4129__inline\r
4130#endif\r
4131static int\r
4132setup_call_node_call(CallNode* cn, ScanEnv* env, int state)\r
4133{\r
4134 MemEnv* mem_env = SCANENV_MEMENV(env);\r
4135\r
4136 if (cn->by_number != 0) {\r
4137 int gnum = cn->group_num;\r
4138\r
4139 if (env->num_named > 0 &&\r
4140 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
4141 ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
4142 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
4143 }\r
4144\r
4145 if (gnum > env->num_mem) {\r
4146 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,\r
4147 cn->name, cn->name_end);\r
4148 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
4149 }\r
4150\r
4151 set_call_attr:\r
4152 NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;\r
4153 if (IS_NULL(NODE_CALL_BODY(cn))) {\r
4154 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
4155 cn->name, cn->name_end);\r
4156 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
4157 }\r
4158 }\r
4159 else {\r
4160 int *refs;\r
4161\r
4162 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);\r
4163 if (n <= 0) {\r
4164 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
4165 cn->name, cn->name_end);\r
4166 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
4167 }\r
4168 else if (n > 1) {\r
4169 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,\r
4170 cn->name, cn->name_end);\r
4171 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;\r
4172 }\r
4173 else {\r
4174 cn->group_num = refs[0];\r
4175 goto set_call_attr;\r
4176 }\r
4177 }\r
4178\r
4179 return 0;\r
4180}\r
4181\r
4182static void\r
4183setup_call2_call(Node* node)\r
4184{\r
4185 switch (NODE_TYPE(node)) {\r
4186 case NODE_LIST:\r
4187 case NODE_ALT:\r
4188 do {\r
4189 setup_call2_call(NODE_CAR(node));\r
4190 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4191 break;\r
4192\r
4193 case NODE_QUANT:\r
4194 setup_call2_call(NODE_BODY(node));\r
4195 break;\r
4196\r
4197 case NODE_ANCHOR:\r
4198 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
4199 setup_call2_call(NODE_BODY(node));\r
4200 break;\r
4201\r
b26691c4 4202 case NODE_BAG:\r
b602265d 4203 {\r
b26691c4 4204 BagNode* en = BAG_(node);\r
b602265d 4205\r
b26691c4 4206 if (en->type == BAG_MEMORY) {\r
b602265d
DG
4207 if (! NODE_IS_MARK1(node)) {\r
4208 NODE_STATUS_ADD(node, MARK1);\r
4209 setup_call2_call(NODE_BODY(node));\r
4210 NODE_STATUS_REMOVE(node, MARK1);\r
4211 }\r
4212 }\r
b26691c4 4213 else if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
4214 setup_call2_call(NODE_BODY(node));\r
4215 if (IS_NOT_NULL(en->te.Then))\r
4216 setup_call2_call(en->te.Then);\r
4217 if (IS_NOT_NULL(en->te.Else))\r
4218 setup_call2_call(en->te.Else);\r
4219 }\r
4220 else {\r
4221 setup_call2_call(NODE_BODY(node));\r
4222 }\r
4223 }\r
4224 break;\r
4225\r
4226 case NODE_CALL:\r
4227 if (! NODE_IS_MARK1(node)) {\r
4228 NODE_STATUS_ADD(node, MARK1);\r
4229 {\r
4230 CallNode* cn = CALL_(node);\r
4231 Node* called = NODE_CALL_BODY(cn);\r
4232\r
4233 cn->entry_count++;\r
4234\r
4235 NODE_STATUS_ADD(called, CALLED);\r
b26691c4 4236 BAG_(called)->m.entry_count++;\r
b602265d
DG
4237 setup_call2_call(called);\r
4238 }\r
4239 NODE_STATUS_REMOVE(node, MARK1);\r
4240 }\r
4241 break;\r
4242\r
4243 default:\r
4244 break;\r
4245 }\r
4246}\r
4247\r
4248static int\r
4249setup_call(Node* node, ScanEnv* env, int state)\r
4250{\r
4251 int r;\r
4252\r
4253 switch (NODE_TYPE(node)) {\r
4254 case NODE_LIST:\r
4255 case NODE_ALT:\r
4256 do {\r
4257 r = setup_call(NODE_CAR(node), env, state);\r
4258 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
4259 break;\r
4260\r
4261 case NODE_QUANT:\r
4262 if (QUANT_(node)->upper == 0)\r
4263 state |= IN_ZERO_REPEAT;\r
4264\r
4265 r = setup_call(NODE_BODY(node), env, state);\r
4266 break;\r
4267\r
4268 case NODE_ANCHOR:\r
4269 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
4270 r = setup_call(NODE_BODY(node), env, state);\r
4271 else\r
4272 r = 0;\r
4273 break;\r
4274\r
b26691c4 4275 case NODE_BAG:\r
b602265d 4276 {\r
b26691c4 4277 BagNode* en = BAG_(node);\r
b602265d 4278\r
b26691c4 4279 if (en->type == BAG_MEMORY) {\r
b602265d
DG
4280 if ((state & IN_ZERO_REPEAT) != 0) {\r
4281 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
b26691c4 4282 BAG_(node)->m.entry_count--;\r
b602265d
DG
4283 }\r
4284 r = setup_call(NODE_BODY(node), env, state);\r
4285 }\r
b26691c4 4286 else if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
4287 r = setup_call(NODE_BODY(node), env, state);\r
4288 if (r != 0) return r;\r
4289 if (IS_NOT_NULL(en->te.Then)) {\r
4290 r = setup_call(en->te.Then, env, state);\r
4291 if (r != 0) return r;\r
4292 }\r
4293 if (IS_NOT_NULL(en->te.Else))\r
4294 r = setup_call(en->te.Else, env, state);\r
4295 }\r
4296 else\r
4297 r = setup_call(NODE_BODY(node), env, state);\r
4298 }\r
4299 break;\r
4300\r
4301 case NODE_CALL:\r
4302 if ((state & IN_ZERO_REPEAT) != 0) {\r
4303 NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
4304 CALL_(node)->entry_count--;\r
4305 }\r
4306\r
4307 r = setup_call_node_call(CALL_(node), env, state);\r
4308 break;\r
4309\r
4310 default:\r
4311 r = 0;\r
4312 break;\r
4313 }\r
4314\r
4315 return r;\r
4316}\r
4317\r
4318static int\r
4319setup_call2(Node* node)\r
4320{\r
4321 int r = 0;\r
4322\r
4323 switch (NODE_TYPE(node)) {\r
4324 case NODE_LIST:\r
4325 case NODE_ALT:\r
4326 do {\r
4327 r = setup_call2(NODE_CAR(node));\r
4328 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
4329 break;\r
4330\r
4331 case NODE_QUANT:\r
4332 if (QUANT_(node)->upper != 0)\r
4333 r = setup_call2(NODE_BODY(node));\r
4334 break;\r
4335\r
4336 case NODE_ANCHOR:\r
4337 if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
4338 r = setup_call2(NODE_BODY(node));\r
4339 break;\r
4340\r
b26691c4 4341 case NODE_BAG:\r
b602265d
DG
4342 if (! NODE_IS_IN_ZERO_REPEAT(node))\r
4343 r = setup_call2(NODE_BODY(node));\r
4344\r
4345 {\r
b26691c4 4346 BagNode* en = BAG_(node);\r
b602265d
DG
4347\r
4348 if (r != 0) return r;\r
b26691c4 4349 if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
4350 if (IS_NOT_NULL(en->te.Then)) {\r
4351 r = setup_call2(en->te.Then);\r
4352 if (r != 0) return r;\r
4353 }\r
4354 if (IS_NOT_NULL(en->te.Else))\r
4355 r = setup_call2(en->te.Else);\r
4356 }\r
4357 }\r
4358 break;\r
4359\r
4360 case NODE_CALL:\r
4361 if (! NODE_IS_IN_ZERO_REPEAT(node)) {\r
4362 setup_call2_call(node);\r
4363 }\r
4364 break;\r
4365\r
4366 default:\r
4367 break;\r
4368 }\r
4369\r
4370 return r;\r
4371}\r
4372\r
4373\r
4374static void\r
4375setup_called_state_call(Node* node, int state)\r
4376{\r
4377 switch (NODE_TYPE(node)) {\r
4378 case NODE_ALT:\r
4379 state |= IN_ALT;\r
4380 /* fall */\r
4381 case NODE_LIST:\r
4382 do {\r
4383 setup_called_state_call(NODE_CAR(node), state);\r
4384 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4385 break;\r
4386\r
4387 case NODE_QUANT:\r
4388 {\r
4389 QuantNode* qn = QUANT_(node);\r
4390\r
b26691c4 4391 if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
b602265d
DG
4392 state |= IN_REAL_REPEAT;\r
4393 if (qn->lower != qn->upper)\r
4394 state |= IN_VAR_REPEAT;\r
4395\r
4396 setup_called_state_call(NODE_QUANT_BODY(qn), state);\r
4397 }\r
4398 break;\r
4399\r
4400 case NODE_ANCHOR:\r
4401 {\r
4402 AnchorNode* an = ANCHOR_(node);\r
4403\r
4404 switch (an->type) {\r
b26691c4
LG
4405 case ANCR_PREC_READ_NOT:\r
4406 case ANCR_LOOK_BEHIND_NOT:\r
b602265d
DG
4407 state |= IN_NOT;\r
4408 /* fall */\r
b26691c4
LG
4409 case ANCR_PREC_READ:\r
4410 case ANCR_LOOK_BEHIND:\r
b602265d
DG
4411 setup_called_state_call(NODE_ANCHOR_BODY(an), state);\r
4412 break;\r
4413 default:\r
4414 break;\r
4415 }\r
4416 }\r
4417 break;\r
4418\r
b26691c4 4419 case NODE_BAG:\r
b602265d 4420 {\r
b26691c4 4421 BagNode* en = BAG_(node);\r
b602265d 4422\r
b26691c4 4423 if (en->type == BAG_MEMORY) {\r
b602265d
DG
4424 if (NODE_IS_MARK1(node)) {\r
4425 if ((~en->m.called_state & state) != 0) {\r
4426 en->m.called_state |= state;\r
4427 setup_called_state_call(NODE_BODY(node), state);\r
4428 }\r
4429 }\r
4430 else {\r
4431 NODE_STATUS_ADD(node, MARK1);\r
4432 en->m.called_state |= state;\r
4433 setup_called_state_call(NODE_BODY(node), state);\r
4434 NODE_STATUS_REMOVE(node, MARK1);\r
4435 }\r
4436 }\r
b26691c4 4437 else if (en->type == BAG_IF_ELSE) {\r
b602265d
DG
4438 if (IS_NOT_NULL(en->te.Then)) {\r
4439 setup_called_state_call(en->te.Then, state);\r
4440 }\r
4441 if (IS_NOT_NULL(en->te.Else))\r
4442 setup_called_state_call(en->te.Else, state);\r
4443 }\r
4444 else {\r
4445 setup_called_state_call(NODE_BODY(node), state);\r
4446 }\r
4447 }\r
4448 break;\r
4449\r
4450 case NODE_CALL:\r
4451 setup_called_state_call(NODE_BODY(node), state);\r
4452 break;\r
4453\r
4454 default:\r
4455 break;\r
4456 }\r
4457}\r
4458\r
4459static void\r
4460setup_called_state(Node* node, int state)\r
4461{\r
4462 switch (NODE_TYPE(node)) {\r
4463 case NODE_ALT:\r
4464 state |= IN_ALT;\r
4465 /* fall */\r
4466 case NODE_LIST:\r
4467 do {\r
4468 setup_called_state(NODE_CAR(node), state);\r
4469 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
4470 break;\r
4471\r
4472#ifdef USE_CALL\r
4473 case NODE_CALL:\r
4474 setup_called_state_call(node, state);\r
4475 break;\r
4476#endif\r
4477\r
b26691c4 4478 case NODE_BAG:\r
b602265d 4479 {\r
b26691c4 4480 BagNode* en = BAG_(node);\r
b602265d
DG
4481\r
4482 switch (en->type) {\r
b26691c4 4483 case BAG_MEMORY:\r
b602265d
DG
4484 if (en->m.entry_count > 1)\r
4485 state |= IN_MULTI_ENTRY;\r
4486\r
4487 en->m.called_state |= state;\r
4488 /* fall */\r
b26691c4
LG
4489 case BAG_OPTION:\r
4490 case BAG_STOP_BACKTRACK:\r
b602265d
DG
4491 setup_called_state(NODE_BODY(node), state);\r
4492 break;\r
b26691c4 4493 case BAG_IF_ELSE:\r
b602265d
DG
4494 setup_called_state(NODE_BODY(node), state);\r
4495 if (IS_NOT_NULL(en->te.Then))\r
4496 setup_called_state(en->te.Then, state);\r
4497 if (IS_NOT_NULL(en->te.Else))\r
4498 setup_called_state(en->te.Else, state);\r
4499 break;\r
4500 }\r
4501 }\r
4502 break;\r
4503\r
4504 case NODE_QUANT:\r
4505 {\r
4506 QuantNode* qn = QUANT_(node);\r
4507\r
b26691c4 4508 if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
b602265d
DG
4509 state |= IN_REAL_REPEAT;\r
4510 if (qn->lower != qn->upper)\r
4511 state |= IN_VAR_REPEAT;\r
4512\r
4513 setup_called_state(NODE_QUANT_BODY(qn), state);\r
4514 }\r
4515 break;\r
4516\r
4517 case NODE_ANCHOR:\r
4518 {\r
4519 AnchorNode* an = ANCHOR_(node);\r
4520\r
4521 switch (an->type) {\r
b26691c4
LG
4522 case ANCR_PREC_READ_NOT:\r
4523 case ANCR_LOOK_BEHIND_NOT:\r
b602265d
DG
4524 state |= IN_NOT;\r
4525 /* fall */\r
b26691c4
LG
4526 case ANCR_PREC_READ:\r
4527 case ANCR_LOOK_BEHIND:\r
b602265d
DG
4528 setup_called_state(NODE_ANCHOR_BODY(an), state);\r
4529 break;\r
4530 default:\r
4531 break;\r
14b0e578
CS
4532 }\r
4533 }\r
b602265d 4534 break;\r
14b0e578 4535\r
b602265d
DG
4536 case NODE_BACKREF:\r
4537 case NODE_STRING:\r
4538 case NODE_CTYPE:\r
4539 case NODE_CCLASS:\r
4540 case NODE_GIMMICK:\r
4541 default:\r
4542 break;\r
14b0e578 4543 }\r
b602265d 4544}\r
14b0e578 4545\r
b602265d 4546#endif /* USE_CALL */\r
14b0e578 4547\r
14b0e578 4548\r
b602265d
DG
4549static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);\r
4550\r
4551#ifdef __GNUC__\r
4552__inline\r
4553#endif\r
4554static int\r
4555setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4556{\r
4557/* allowed node types in look-behind */\r
4558#define ALLOWED_TYPE_IN_LB \\r
4559 ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \\r
b26691c4 4560 | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \\r
b602265d 4561 | NODE_BIT_CALL | NODE_BIT_GIMMICK)\r
14b0e578 4562\r
b26691c4
LG
4563#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_IF_ELSE )\r
4564#define ALLOWED_BAG_IN_LB_NOT ( 1<<BAG_OPTION | 1<<BAG_IF_ELSE )\r
14b0e578 4565\r
b602265d 4566#define ALLOWED_ANCHOR_IN_LB \\r
b26691c4
LG
4567 ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \\r
4568 | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \\r
4569 | ANCR_WORD_BEGIN | ANCR_WORD_END \\r
4570 | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )\r
14b0e578 4571\r
b602265d 4572#define ALLOWED_ANCHOR_IN_LB_NOT \\r
b26691c4
LG
4573 ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \\r
4574 | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \\r
4575 | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \\r
4576 | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )\r
14b0e578 4577\r
b602265d
DG
4578 int r;\r
4579 AnchorNode* an = ANCHOR_(node);\r
14b0e578 4580\r
b602265d 4581 switch (an->type) {\r
b26691c4 4582 case ANCR_PREC_READ:\r
b602265d
DG
4583 r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
4584 break;\r
b26691c4 4585 case ANCR_PREC_READ_NOT:\r
b602265d
DG
4586 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
4587 break;\r
14b0e578 4588\r
b26691c4 4589 case ANCR_LOOK_BEHIND:\r
14b0e578 4590 {\r
b602265d 4591 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
b26691c4 4592 ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
b602265d
DG
4593 if (r < 0) return r;\r
4594 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
b26691c4 4595 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);\r
b602265d
DG
4596 if (r != 0) return r;\r
4597 r = setup_look_behind(node, reg, env);\r
14b0e578
CS
4598 }\r
4599 break;\r
4600\r
b26691c4 4601 case ANCR_LOOK_BEHIND_NOT:\r
14b0e578 4602 {\r
b602265d 4603 r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
b26691c4 4604 ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
b602265d
DG
4605 if (r < 0) return r;\r
4606 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
b26691c4
LG
4607 r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),\r
4608 env);\r
b602265d
DG
4609 if (r != 0) return r;\r
4610 r = setup_look_behind(node, reg, env);\r
14b0e578
CS
4611 }\r
4612 break;\r
4613\r
b602265d
DG
4614 default:\r
4615 r = 0;\r
4616 break;\r
4617 }\r
14b0e578 4618\r
b602265d
DG
4619 return r;\r
4620}\r
14b0e578 4621\r
b602265d
DG
4622#ifdef __GNUC__\r
4623__inline\r
4624#endif\r
4625static int\r
4626setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4627{\r
4628 int r;\r
4629 OnigLen d;\r
4630 QuantNode* qn = QUANT_(node);\r
4631 Node* body = NODE_BODY(node);\r
4632\r
4633 if ((state & IN_REAL_REPEAT) != 0) {\r
4634 NODE_STATUS_ADD(node, IN_REAL_REPEAT);\r
4635 }\r
4636 if ((state & IN_MULTI_ENTRY) != 0) {\r
4637 NODE_STATUS_ADD(node, IN_MULTI_ENTRY);\r
4638 }\r
4639\r
b26691c4 4640 if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {\r
b602265d
DG
4641 d = tree_min_len(body, env);\r
4642 if (d == 0) {\r
b26691c4
LG
4643#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
4644 qn->emptiness = quantifiers_memory_node_info(body);\r
4645 if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) {\r
4646 if (NODE_TYPE(body) == NODE_BAG &&\r
4647 BAG_(body)->type == BAG_MEMORY) {\r
4648 MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);\r
b602265d
DG
4649 }\r
4650 }\r
4651#else\r
b26691c4 4652 qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;\r
b602265d 4653#endif\r
14b0e578 4654 }\r
b602265d 4655 }\r
14b0e578 4656\r
b26691c4 4657 if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
b602265d
DG
4658 state |= IN_REAL_REPEAT;\r
4659 if (qn->lower != qn->upper)\r
4660 state |= IN_VAR_REPEAT;\r
14b0e578 4661\r
b602265d
DG
4662 r = setup_tree(body, reg, state, env);\r
4663 if (r != 0) return r;\r
14b0e578 4664\r
b602265d
DG
4665 /* expand string */\r
4666#define EXPAND_STRING_MAX_LENGTH 100\r
4667 if (NODE_TYPE(body) == NODE_STRING) {\r
b26691c4 4668 if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&\r
b602265d
DG
4669 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
4670 int len = NODE_STRING_LEN(body);\r
4671 StrNode* sn = STR_(body);\r
4672\r
4673 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
4674 int i, n = qn->lower;\r
4675 onig_node_conv_to_str_node(node, STR_(body)->flag);\r
4676 for (i = 0; i < n; i++) {\r
4677 r = onig_node_str_cat(node, sn->s, sn->end);\r
4678 if (r != 0) return r;\r
4679 }\r
4680 onig_node_free(body);\r
4681 return r;\r
14b0e578
CS
4682 }\r
4683 }\r
b602265d 4684 }\r
14b0e578 4685\r
b26691c4 4686 if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {\r
b602265d
DG
4687 if (NODE_TYPE(body) == NODE_QUANT) {\r
4688 QuantNode* tqn = QUANT_(body);\r
4689 if (IS_NOT_NULL(tqn->head_exact)) {\r
4690 qn->head_exact = tqn->head_exact;\r
4691 tqn->head_exact = NULL;\r
4692 }\r
4693 }\r
4694 else {\r
4695 qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);\r
4696 }\r
14b0e578
CS
4697 }\r
4698\r
4699 return r;\r
4700}\r
14b0e578
CS
4701\r
4702/* setup_tree does the following work.\r
b26691c4 4703 1. check empty loop. (set qn->emptiness)\r
14b0e578
CS
4704 2. expand ignore-case in char class.\r
4705 3. set memory status bit flags. (reg->mem_stats)\r
4706 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].\r
4707 5. find invalid patterns in look-behind.\r
4708 6. expand repeated string.\r
4709 */\r
4710static int\r
4711setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)\r
4712{\r
14b0e578
CS
4713 int r = 0;\r
4714\r
b602265d
DG
4715 switch (NODE_TYPE(node)) {\r
4716 case NODE_LIST:\r
14b0e578
CS
4717 {\r
4718 Node* prev = NULL_NODE;\r
4719 do {\r
b602265d
DG
4720 r = setup_tree(NODE_CAR(node), reg, state, env);\r
4721 if (IS_NOT_NULL(prev) && r == 0) {\r
4722 r = next_setup(prev, NODE_CAR(node), reg);\r
4723 }\r
4724 prev = NODE_CAR(node);\r
4725 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
4726 }\r
4727 break;\r
4728\r
b602265d 4729 case NODE_ALT:\r
14b0e578 4730 do {\r
b602265d
DG
4731 r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);\r
4732 } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
4733 break;\r
4734\r
b602265d
DG
4735 case NODE_STRING:\r
4736 if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {\r
b26691c4 4737 r = expand_case_fold_string(node, reg, state);\r
14b0e578
CS
4738 }\r
4739 break;\r
4740\r
b602265d 4741 case NODE_BACKREF:\r
14b0e578
CS
4742 {\r
4743 int i;\r
4744 int* p;\r
b602265d 4745 BackRefNode* br = BACKREF_(node);\r
14b0e578
CS
4746 p = BACKREFS_P(br);\r
4747 for (i = 0; i < br->back_num; i++) {\r
b602265d
DG
4748 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
4749 MEM_STATUS_ON(env->backrefed_mem, p[i]);\r
4750 MEM_STATUS_ON(env->bt_mem_start, p[i]);\r
14b0e578 4751#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
4752 if (NODE_IS_NEST_LEVEL(node)) {\r
4753 MEM_STATUS_ON(env->bt_mem_end, p[i]);\r
4754 }\r
14b0e578 4755#endif\r
14b0e578
CS
4756 }\r
4757 }\r
4758 break;\r
4759\r
b26691c4 4760 case NODE_BAG:\r
14b0e578 4761 {\r
b26691c4 4762 BagNode* en = BAG_(node);\r
14b0e578 4763\r
b602265d 4764 switch (en->type) {\r
b26691c4 4765 case BAG_OPTION:\r
b602265d
DG
4766 {\r
4767 OnigOptionType options = reg->options;\r
b26691c4 4768 reg->options = BAG_(node)->o.options;\r
b602265d
DG
4769 r = setup_tree(NODE_BODY(node), reg, state, env);\r
4770 reg->options = options;\r
4771 }\r
4772 break;\r
14b0e578 4773\r
b26691c4 4774 case BAG_MEMORY:\r
b602265d
DG
4775#ifdef USE_CALL\r
4776 state |= en->m.called_state;\r
14b0e578 4777#endif\r
14b0e578 4778\r
b602265d
DG
4779 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0\r
4780 || NODE_IS_RECURSION(node)) {\r
4781 MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);\r
4782 }\r
4783 r = setup_tree(NODE_BODY(node), reg, state, env);\r
4784 break;\r
14b0e578 4785\r
b26691c4 4786 case BAG_STOP_BACKTRACK:\r
b602265d
DG
4787 {\r
4788 Node* target = NODE_BODY(node);\r
4789 r = setup_tree(target, reg, state, env);\r
4790 if (NODE_TYPE(target) == NODE_QUANT) {\r
4791 QuantNode* tqn = QUANT_(target);\r
b26691c4 4792 if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&\r
b602265d 4793 tqn->greedy != 0) { /* (?>a*), a*+ etc... */\r
b26691c4
LG
4794 if (is_strict_real_node(NODE_BODY(target)))\r
4795 NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);\r
b602265d
DG
4796 }\r
4797 }\r
4798 }\r
14b0e578
CS
4799 break;\r
4800\r
b26691c4 4801 case BAG_IF_ELSE:\r
b602265d
DG
4802 r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);\r
4803 if (r != 0) return r;\r
4804 if (IS_NOT_NULL(en->te.Then)) {\r
4805 r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);\r
4806 if (r != 0) return r;\r
4807 }\r
4808 if (IS_NOT_NULL(en->te.Else))\r
4809 r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);\r
4810 break;\r
14b0e578
CS
4811 }\r
4812 }\r
4813 break;\r
4814\r
b602265d
DG
4815 case NODE_QUANT:\r
4816 r = setup_quant(node, reg, state, env);\r
4817 break;\r
14b0e578 4818\r
b602265d
DG
4819 case NODE_ANCHOR:\r
4820 r = setup_anchor(node, reg, state, env);\r
14b0e578
CS
4821 break;\r
4822\r
b602265d
DG
4823#ifdef USE_CALL\r
4824 case NODE_CALL:\r
4825#endif\r
4826 case NODE_CTYPE:\r
4827 case NODE_CCLASS:\r
4828 case NODE_GIMMICK:\r
14b0e578
CS
4829 default:\r
4830 break;\r
4831 }\r
4832\r
4833 return r;\r
4834}\r
4835\r
14b0e578 4836static int\r
b26691c4
LG
4837set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,\r
4838 UChar* s, UChar* end,\r
4839 UChar skip[], int* roffset)\r
14b0e578 4840{\r
b26691c4
LG
4841 int i, j, k, len, offset;\r
4842 int n, clen;\r
4843 UChar* p;\r
4844 OnigEncoding enc;\r
4845 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
4846 UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
4847\r
4848 enc = reg->enc;\r
4849 offset = ENC_GET_SKIP_OFFSET(enc);\r
4850 if (offset == ENC_SKIP_OFFSET_1_OR_0) {\r
4851 UChar* p = s;\r
4852 while (1) {\r
4853 len = enclen(enc, p);\r
4854 if (p + len >= end) {\r
4855 if (len == 1) offset = 1;\r
4856 else offset = 0;\r
4857 break;\r
4858 }\r
4859 p += len;\r
4860 }\r
4861 }\r
14b0e578 4862\r
b602265d 4863 len = (int )(end - s);\r
b26691c4
LG
4864 if (len + offset >= 255)\r
4865 return ONIGERR_PARSER_BUG;\r
14b0e578 4866\r
b26691c4
LG
4867 *roffset = offset;\r
4868\r
4869 for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
4870 skip[i] = (UChar )(len + offset);\r
14b0e578 4871 }\r
b26691c4
LG
4872\r
4873 for (p = s; p < end; ) {\r
4874 int z;\r
4875\r
4876 clen = enclen(enc, p);\r
4877 if (p + clen > end) clen = (int )(end - p);\r
4878\r
4879 len = (int )(end - p);\r
4880 for (j = 0; j < clen; j++) {\r
4881 z = len - j + (offset - 1);\r
4882 if (z <= 0) break;\r
4883 skip[p[j]] = z;\r
4884 }\r
4885\r
4886 if (case_expand != 0) {\r
4887 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,\r
4888 p, end, items);\r
4889 for (k = 0; k < n; k++) {\r
4890 ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);\r
4891 for (j = 0; j < clen; j++) {\r
4892 z = len - j + (offset - 1);\r
4893 if (z <= 0) break;\r
4894 if (skip[buf[j]] > z)\r
4895 skip[buf[j]] = z;\r
4896 }\r
4897 }\r
14b0e578 4898 }\r
14b0e578 4899\r
b26691c4 4900 p += clen;\r
14b0e578 4901 }\r
b26691c4 4902\r
14b0e578
CS
4903 return 0;\r
4904}\r
4905\r
b26691c4 4906\r
14b0e578
CS
4907#define OPT_EXACT_MAXLEN 24\r
4908\r
b26691c4
LG
4909#if OPT_EXACT_MAXLEN >= 255\r
4910#error Too big OPT_EXACT_MAXLEN\r
4911#endif\r
4912\r
14b0e578 4913typedef struct {\r
b602265d
DG
4914 OnigLen min; /* min byte length */\r
4915 OnigLen max; /* max byte length */\r
4916} MinMax;\r
14b0e578
CS
4917\r
4918typedef struct {\r
b602265d 4919 MinMax mmd;\r
14b0e578
CS
4920 OnigEncoding enc;\r
4921 OnigOptionType options;\r
4922 OnigCaseFoldType case_fold_flag;\r
4923 ScanEnv* scan_env;\r
4924} OptEnv;\r
4925\r
4926typedef struct {\r
b602265d
DG
4927 int left;\r
4928 int right;\r
4929} OptAnc;\r
14b0e578
CS
4930\r
4931typedef struct {\r
b602265d
DG
4932 MinMax mmd; /* position */\r
4933 OptAnc anc;\r
4934 int reach_end;\r
b26691c4
LG
4935 int case_fold;\r
4936 int good_case_fold;\r
b602265d
DG
4937 int len;\r
4938 UChar s[OPT_EXACT_MAXLEN];\r
b26691c4 4939} OptStr;\r
14b0e578
CS
4940\r
4941typedef struct {\r
b602265d
DG
4942 MinMax mmd; /* position */\r
4943 OptAnc anc;\r
4944 int value; /* weighted value */\r
b26691c4 4945 UChar map[CHAR_MAP_SIZE];\r
b602265d 4946} OptMap;\r
14b0e578
CS
4947\r
4948typedef struct {\r
b26691c4
LG
4949 MinMax len;\r
4950 OptAnc anc;\r
4951 OptStr sb; /* boundary */\r
4952 OptStr sm; /* middle */\r
4953 OptStr spr; /* prec read (?=...) */\r
4954 OptMap map; /* boundary */\r
4955} OptNode;\r
14b0e578
CS
4956\r
4957\r
4958static int\r
4959map_position_value(OnigEncoding enc, int i)\r
4960{\r
b602265d 4961 static const short int Vals[] = {\r
14b0e578
CS
4962 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,\r
4963 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
4964 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,\r
4965 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,\r
4966 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4967 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,\r
4968 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
4969 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1\r
4970 };\r
4971\r
b602265d 4972 if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {\r
14b0e578
CS
4973 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)\r
4974 return 20;\r
4975 else\r
b602265d 4976 return (int )Vals[i];\r
14b0e578
CS
4977 }\r
4978 else\r
4979 return 4; /* Take it easy. */\r
4980}\r
4981\r
4982static int\r
b602265d 4983distance_value(MinMax* mm)\r
14b0e578
CS
4984{\r
4985 /* 1000 / (min-max-dist + 1) */\r
4986 static const short int dist_vals[] = {\r
b26691c4
LG
4987 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,\r
4988 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,\r
4989 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,\r
4990 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,\r
4991 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,\r
4992 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,\r
4993 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,\r
4994 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,\r
4995 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,\r
14b0e578
CS
4996 11, 11, 11, 11, 11, 10, 10, 10, 10, 10\r
4997 };\r
4998\r
b602265d 4999 OnigLen d;\r
14b0e578 5000\r
b602265d 5001 if (mm->max == INFINITE_LEN) return 0;\r
14b0e578
CS
5002\r
5003 d = mm->max - mm->min;\r
b602265d 5004 if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))\r
14b0e578
CS
5005 /* return dist_vals[d] * 16 / (mm->min + 12); */\r
5006 return (int )dist_vals[d];\r
5007 else\r
5008 return 1;\r
5009}\r
5010\r
5011static int\r
b602265d 5012comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)\r
14b0e578
CS
5013{\r
5014 if (v2 <= 0) return -1;\r
5015 if (v1 <= 0) return 1;\r
5016\r
5017 v1 *= distance_value(d1);\r
5018 v2 *= distance_value(d2);\r
5019\r
5020 if (v2 > v1) return 1;\r
5021 if (v2 < v1) return -1;\r
5022\r
5023 if (d2->min < d1->min) return 1;\r
5024 if (d2->min > d1->min) return -1;\r
5025 return 0;\r
5026}\r
5027\r
5028static int\r
b602265d 5029is_equal_mml(MinMax* a, MinMax* b)\r
14b0e578 5030{\r
b26691c4 5031 return a->min == b->min && a->max == b->max;\r
14b0e578
CS
5032}\r
5033\r
14b0e578 5034static void\r
b602265d 5035set_mml(MinMax* l, OnigLen min, OnigLen max)\r
14b0e578 5036{\r
b602265d
DG
5037 l->min = min;\r
5038 l->max = max;\r
14b0e578
CS
5039}\r
5040\r
5041static void\r
b602265d 5042clear_mml(MinMax* l)\r
14b0e578 5043{\r
b602265d 5044 l->min = l->max = 0;\r
14b0e578
CS
5045}\r
5046\r
5047static void\r
b602265d 5048copy_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
5049{\r
5050 to->min = from->min;\r
5051 to->max = from->max;\r
5052}\r
5053\r
5054static void\r
b602265d 5055add_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
5056{\r
5057 to->min = distance_add(to->min, from->min);\r
5058 to->max = distance_add(to->max, from->max);\r
5059}\r
5060\r
14b0e578 5061static void\r
b602265d 5062alt_merge_mml(MinMax* to, MinMax* from)\r
14b0e578
CS
5063{\r
5064 if (to->min > from->min) to->min = from->min;\r
5065 if (to->max < from->max) to->max = from->max;\r
5066}\r
5067\r
5068static void\r
5069copy_opt_env(OptEnv* to, OptEnv* from)\r
5070{\r
b602265d 5071 *to = *from;\r
14b0e578
CS
5072}\r
5073\r
5074static void\r
b602265d 5075clear_opt_anc_info(OptAnc* a)\r
14b0e578 5076{\r
b602265d
DG
5077 a->left = 0;\r
5078 a->right = 0;\r
14b0e578
CS
5079}\r
5080\r
5081static void\r
b602265d 5082copy_opt_anc_info(OptAnc* to, OptAnc* from)\r
14b0e578 5083{\r
b602265d 5084 *to = *from;\r
14b0e578
CS
5085}\r
5086\r
5087static void\r
b602265d
DG
5088concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,\r
5089 OnigLen left_len, OnigLen right_len)\r
14b0e578
CS
5090{\r
5091 clear_opt_anc_info(to);\r
5092\r
b602265d 5093 to->left = left->left;\r
14b0e578 5094 if (left_len == 0) {\r
b602265d 5095 to->left |= right->left;\r
14b0e578
CS
5096 }\r
5097\r
b602265d 5098 to->right = right->right;\r
14b0e578 5099 if (right_len == 0) {\r
b602265d
DG
5100 to->right |= left->right;\r
5101 }\r
5102 else {\r
b26691c4 5103 to->right |= (left->right & ANCR_PREC_READ_NOT);\r
14b0e578
CS
5104 }\r
5105}\r
5106\r
5107static int\r
b602265d 5108is_left(int a)\r
14b0e578 5109{\r
b26691c4
LG
5110 if (a == ANCR_END_BUF || a == ANCR_SEMI_END_BUF ||\r
5111 a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)\r
14b0e578
CS
5112 return 0;\r
5113\r
5114 return 1;\r
5115}\r
5116\r
5117static int\r
b602265d 5118is_set_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 5119{\r
b602265d 5120 if ((to->left & anc) != 0) return 1;\r
14b0e578 5121\r
b602265d 5122 return ((to->right & anc) != 0 ? 1 : 0);\r
14b0e578
CS
5123}\r
5124\r
5125static void\r
b602265d 5126add_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 5127{\r
b602265d
DG
5128 if (is_left(anc))\r
5129 to->left |= anc;\r
14b0e578 5130 else\r
b602265d 5131 to->right |= anc;\r
14b0e578
CS
5132}\r
5133\r
5134static void\r
b602265d 5135remove_opt_anc_info(OptAnc* to, int anc)\r
14b0e578 5136{\r
b602265d
DG
5137 if (is_left(anc))\r
5138 to->left &= ~anc;\r
14b0e578 5139 else\r
b602265d 5140 to->right &= ~anc;\r
14b0e578
CS
5141}\r
5142\r
5143static void\r
b602265d 5144alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)\r
14b0e578 5145{\r
b602265d
DG
5146 to->left &= add->left;\r
5147 to->right &= add->right;\r
14b0e578
CS
5148}\r
5149\r
5150static int\r
b26691c4 5151is_full_opt_exact(OptStr* e)\r
14b0e578 5152{\r
b26691c4 5153 return e->len >= OPT_EXACT_MAXLEN;\r
14b0e578
CS
5154}\r
5155\r
5156static void\r
b26691c4 5157clear_opt_exact(OptStr* e)\r
14b0e578 5158{\r
b602265d
DG
5159 clear_mml(&e->mmd);\r
5160 clear_opt_anc_info(&e->anc);\r
b26691c4
LG
5161 e->reach_end = 0;\r
5162 e->case_fold = 0;\r
5163 e->good_case_fold = 0;\r
5164 e->len = 0;\r
5165 e->s[0] = '\0';\r
14b0e578
CS
5166}\r
5167\r
5168static void\r
b26691c4 5169copy_opt_exact(OptStr* to, OptStr* from)\r
14b0e578 5170{\r
b602265d 5171 *to = *from;\r
14b0e578
CS
5172}\r
5173\r
b602265d 5174static int\r
b26691c4 5175concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)\r
14b0e578 5176{\r
b602265d 5177 int i, j, len, r;\r
14b0e578 5178 UChar *p, *end;\r
b602265d 5179 OptAnc tanc;\r
14b0e578 5180\r
b26691c4
LG
5181 if (add->case_fold != 0) {\r
5182 if (! to->case_fold) {\r
5183 if (to->len > 1 || to->len >= add->len) return 0; /* avoid */\r
14b0e578 5184\r
b26691c4
LG
5185 to->case_fold = 1;\r
5186 }\r
5187 else {\r
5188 if (to->good_case_fold != 0) {\r
5189 if (add->good_case_fold == 0) return 0;\r
5190 }\r
5191 }\r
14b0e578
CS
5192 }\r
5193\r
b602265d 5194 r = 0;\r
14b0e578
CS
5195 p = add->s;\r
5196 end = p + add->len;\r
5197 for (i = to->len; p < end; ) {\r
5198 len = enclen(enc, p);\r
b602265d
DG
5199 if (i + len > OPT_EXACT_MAXLEN) {\r
5200 r = 1; /* 1:full */\r
5201 break;\r
5202 }\r
14b0e578
CS
5203 for (j = 0; j < len && p < end; j++)\r
5204 to->s[i++] = *p++;\r
5205 }\r
5206\r
5207 to->len = i;\r
5208 to->reach_end = (p == end ? add->reach_end : 0);\r
5209\r
5210 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);\r
b602265d 5211 if (! to->reach_end) tanc.right = 0;\r
14b0e578 5212 copy_opt_anc_info(&to->anc, &tanc);\r
b602265d
DG
5213\r
5214 return r;\r
14b0e578
CS
5215}\r
5216\r
5217static void\r
b26691c4 5218concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)\r
14b0e578
CS
5219{\r
5220 int i, j, len;\r
5221 UChar *p;\r
5222\r
5223 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {\r
5224 len = enclen(enc, p);\r
5225 if (i + len > OPT_EXACT_MAXLEN) break;\r
5226 for (j = 0; j < len && p < end; j++)\r
5227 to->s[i++] = *p++;\r
5228 }\r
5229\r
5230 to->len = i;\r
b26691c4
LG
5231\r
5232 if (p >= end && to->len == (int )(end - s))\r
5233 to->reach_end = 1;\r
14b0e578
CS
5234}\r
5235\r
5236static void\r
b26691c4 5237alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)\r
14b0e578
CS
5238{\r
5239 int i, j, len;\r
5240\r
5241 if (add->len == 0 || to->len == 0) {\r
b602265d 5242 clear_opt_exact(to);\r
14b0e578
CS
5243 return ;\r
5244 }\r
5245\r
5246 if (! is_equal_mml(&to->mmd, &add->mmd)) {\r
b602265d 5247 clear_opt_exact(to);\r
14b0e578
CS
5248 return ;\r
5249 }\r
5250\r
5251 for (i = 0; i < to->len && i < add->len; ) {\r
5252 if (to->s[i] != add->s[i]) break;\r
5253 len = enclen(env->enc, to->s + i);\r
5254\r
5255 for (j = 1; j < len; j++) {\r
5256 if (to->s[i+j] != add->s[i+j]) break;\r
5257 }\r
5258 if (j < len) break;\r
5259 i += len;\r
5260 }\r
5261\r
5262 if (! add->reach_end || i < add->len || i < to->len) {\r
5263 to->reach_end = 0;\r
5264 }\r
5265 to->len = i;\r
b26691c4
LG
5266 if (add->case_fold != 0)\r
5267 to->case_fold = 1;\r
5268 if (add->good_case_fold == 0)\r
5269 to->good_case_fold = 0;\r
14b0e578
CS
5270\r
5271 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
b602265d 5272 if (! to->reach_end) to->anc.right = 0;\r
14b0e578
CS
5273}\r
5274\r
5275static void\r
b26691c4 5276select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)\r
14b0e578 5277{\r
b602265d 5278 int vn, va;\r
14b0e578 5279\r
b602265d
DG
5280 vn = now->len;\r
5281 va = alt->len;\r
14b0e578 5282\r
b602265d 5283 if (va == 0) {\r
14b0e578
CS
5284 return ;\r
5285 }\r
b602265d
DG
5286 else if (vn == 0) {\r
5287 copy_opt_exact(now, alt);\r
14b0e578
CS
5288 return ;\r
5289 }\r
b602265d 5290 else if (vn <= 2 && va <= 2) {\r
14b0e578 5291 /* ByteValTable[x] is big value --> low price */\r
b602265d
DG
5292 va = map_position_value(enc, now->s[0]);\r
5293 vn = map_position_value(enc, alt->s[0]);\r
14b0e578 5294\r
b602265d
DG
5295 if (now->len > 1) vn += 5;\r
5296 if (alt->len > 1) va += 5;\r
14b0e578
CS
5297 }\r
5298\r
b26691c4
LG
5299 if (now->case_fold == 0) vn *= 2;\r
5300 if (alt->case_fold == 0) va *= 2;\r
5301\r
5302 if (now->good_case_fold != 0) vn *= 4;\r
5303 if (alt->good_case_fold != 0) va *= 4;\r
14b0e578 5304\r
b602265d
DG
5305 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
5306 copy_opt_exact(now, alt);\r
14b0e578
CS
5307}\r
5308\r
5309static void\r
b602265d 5310clear_opt_map(OptMap* map)\r
14b0e578 5311{\r
b602265d 5312 static const OptMap clean_info = {\r
14b0e578
CS
5313 {0, 0}, {0, 0}, 0,\r
5314 {\r
5315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
5330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r
5331 }\r
5332 };\r
5333\r
b602265d 5334 xmemcpy(map, &clean_info, sizeof(OptMap));\r
14b0e578
CS
5335}\r
5336\r
5337static void\r
b602265d 5338copy_opt_map(OptMap* to, OptMap* from)\r
14b0e578 5339{\r
b602265d 5340 xmemcpy(to,from,sizeof(OptMap));\r
14b0e578
CS
5341}\r
5342\r
5343static void\r
b602265d 5344add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)\r
14b0e578 5345{\r
b602265d
DG
5346 if (m->map[c] == 0) {\r
5347 m->map[c] = 1;\r
5348 m->value += map_position_value(enc, c);\r
14b0e578
CS
5349 }\r
5350}\r
5351\r
5352static int\r
b602265d
DG
5353add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,\r
5354 OnigEncoding enc, OnigCaseFoldType fold_flag)\r
14b0e578
CS
5355{\r
5356 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
5357 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
5358 int i, n;\r
5359\r
b602265d 5360 add_char_opt_map(map, p[0], enc);\r
14b0e578 5361\r
b602265d
DG
5362 fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);\r
5363 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);\r
14b0e578
CS
5364 if (n < 0) return n;\r
5365\r
5366 for (i = 0; i < n; i++) {\r
5367 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);\r
b602265d 5368 add_char_opt_map(map, buf[0], enc);\r
14b0e578
CS
5369 }\r
5370\r
5371 return 0;\r
5372}\r
5373\r
5374static void\r
b602265d 5375select_opt_map(OptMap* now, OptMap* alt)\r
14b0e578
CS
5376{\r
5377 static int z = 1<<15; /* 32768: something big value */\r
5378\r
b602265d 5379 int vn, va;\r
14b0e578
CS
5380\r
5381 if (alt->value == 0) return ;\r
5382 if (now->value == 0) {\r
b602265d 5383 copy_opt_map(now, alt);\r
14b0e578
CS
5384 return ;\r
5385 }\r
5386\r
b602265d
DG
5387 vn = z / now->value;\r
5388 va = z / alt->value;\r
5389 if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
5390 copy_opt_map(now, alt);\r
14b0e578
CS
5391}\r
5392\r
5393static int\r
b26691c4 5394comp_opt_exact_or_map(OptStr* e, OptMap* m)\r
14b0e578
CS
5395{\r
5396#define COMP_EM_BASE 20\r
b602265d 5397 int ae, am;\r
b26691c4 5398 int case_value;\r
14b0e578
CS
5399\r
5400 if (m->value <= 0) return -1;\r
5401\r
b26691c4
LG
5402 if (e->case_fold != 0) {\r
5403 if (e->good_case_fold != 0)\r
5404 case_value = 2;\r
5405 else\r
5406 case_value = 1;\r
5407 }\r
5408 else\r
5409 case_value = 3;\r
5410\r
5411 ae = COMP_EM_BASE * e->len * case_value;\r
b602265d
DG
5412 am = COMP_EM_BASE * 5 * 2 / m->value;\r
5413 return comp_distance_value(&e->mmd, &m->mmd, ae, am);\r
14b0e578
CS
5414}\r
5415\r
5416static void\r
b602265d 5417alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)\r
14b0e578
CS
5418{\r
5419 int i, val;\r
5420\r
5421 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */\r
5422 if (to->value == 0) return ;\r
5423 if (add->value == 0 || to->mmd.max < add->mmd.min) {\r
b602265d 5424 clear_opt_map(to);\r
14b0e578
CS
5425 return ;\r
5426 }\r
5427\r
5428 alt_merge_mml(&to->mmd, &add->mmd);\r
5429\r
5430 val = 0;\r
b26691c4 5431 for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
14b0e578
CS
5432 if (add->map[i])\r
5433 to->map[i] = 1;\r
5434\r
5435 if (to->map[i])\r
5436 val += map_position_value(enc, i);\r
5437 }\r
5438 to->value = val;\r
5439\r
5440 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
5441}\r
5442\r
5443static void\r
b26691c4 5444set_bound_node_opt_info(OptNode* opt, MinMax* plen)\r
14b0e578 5445{\r
b26691c4
LG
5446 copy_mml(&(opt->sb.mmd), plen);\r
5447 copy_mml(&(opt->spr.mmd), plen);\r
5448 copy_mml(&(opt->map.mmd), plen);\r
14b0e578
CS
5449}\r
5450\r
5451static void\r
b26691c4 5452clear_node_opt_info(OptNode* opt)\r
14b0e578
CS
5453{\r
5454 clear_mml(&opt->len);\r
5455 clear_opt_anc_info(&opt->anc);\r
b26691c4
LG
5456 clear_opt_exact(&opt->sb);\r
5457 clear_opt_exact(&opt->sm);\r
5458 clear_opt_exact(&opt->spr);\r
b602265d 5459 clear_opt_map(&opt->map);\r
14b0e578
CS
5460}\r
5461\r
5462static void\r
b26691c4 5463copy_node_opt_info(OptNode* to, OptNode* from)\r
14b0e578 5464{\r
b26691c4 5465 xmemcpy(to,from,sizeof(OptNode));\r
14b0e578
CS
5466}\r
5467\r
5468static void\r
b26691c4 5469concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)\r
14b0e578 5470{\r
b26691c4 5471 int sb_reach, sm_reach;\r
b602265d 5472 OptAnc tanc;\r
14b0e578
CS
5473\r
5474 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);\r
5475 copy_opt_anc_info(&to->anc, &tanc);\r
5476\r
b26691c4
LG
5477 if (add->sb.len > 0 && to->len.max == 0) {\r
5478 concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);\r
5479 copy_opt_anc_info(&add->sb.anc, &tanc);\r
14b0e578
CS
5480 }\r
5481\r
5482 if (add->map.value > 0 && to->len.max == 0) {\r
5483 if (add->map.mmd.max == 0)\r
b602265d 5484 add->map.anc.left |= to->anc.left;\r
14b0e578
CS
5485 }\r
5486\r
b26691c4
LG
5487 sb_reach = to->sb.reach_end;\r
5488 sm_reach = to->sm.reach_end;\r
14b0e578
CS
5489\r
5490 if (add->len.max != 0)\r
b26691c4 5491 to->sb.reach_end = to->sm.reach_end = 0;\r
14b0e578 5492\r
b26691c4
LG
5493 if (add->sb.len > 0) {\r
5494 if (sb_reach) {\r
5495 concat_opt_exact(&to->sb, &add->sb, enc);\r
5496 clear_opt_exact(&add->sb);\r
14b0e578 5497 }\r
b26691c4
LG
5498 else if (sm_reach) {\r
5499 concat_opt_exact(&to->sm, &add->sb, enc);\r
5500 clear_opt_exact(&add->sb);\r
14b0e578
CS
5501 }\r
5502 }\r
b26691c4
LG
5503 select_opt_exact(enc, &to->sm, &add->sb);\r
5504 select_opt_exact(enc, &to->sm, &add->sm);\r
14b0e578 5505\r
b26691c4 5506 if (to->spr.len > 0) {\r
14b0e578 5507 if (add->len.max > 0) {\r
b26691c4
LG
5508 if (to->spr.len > (int )add->len.max)\r
5509 to->spr.len = add->len.max;\r
14b0e578 5510\r
b26691c4
LG
5511 if (to->spr.mmd.max == 0)\r
5512 select_opt_exact(enc, &to->sb, &to->spr);\r
14b0e578 5513 else\r
b26691c4 5514 select_opt_exact(enc, &to->sm, &to->spr);\r
14b0e578
CS
5515 }\r
5516 }\r
b26691c4
LG
5517 else if (add->spr.len > 0) {\r
5518 copy_opt_exact(&to->spr, &add->spr);\r
14b0e578
CS
5519 }\r
5520\r
b602265d 5521 select_opt_map(&to->map, &add->map);\r
14b0e578
CS
5522 add_mml(&to->len, &add->len);\r
5523}\r
5524\r
5525static void\r
b26691c4 5526alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)\r
14b0e578 5527{\r
b602265d 5528 alt_merge_opt_anc_info(&to->anc, &add->anc);\r
b26691c4
LG
5529 alt_merge_opt_exact(&to->sb, &add->sb, env);\r
5530 alt_merge_opt_exact(&to->sm, &add->sm, env);\r
5531 alt_merge_opt_exact(&to->spr, &add->spr, env);\r
b602265d 5532 alt_merge_opt_map(env->enc, &to->map, &add->map);\r
14b0e578
CS
5533\r
5534 alt_merge_mml(&to->len, &add->len);\r
5535}\r
5536\r
5537\r
5538#define MAX_NODE_OPT_INFO_REF_COUNT 5\r
5539\r
5540static int\r
b26691c4 5541optimize_nodes(Node* node, OptNode* opt, OptEnv* env)\r
14b0e578 5542{\r
b602265d
DG
5543 int i;\r
5544 int r;\r
b26691c4 5545 OptNode xo;\r
b602265d 5546 OnigEncoding enc;\r
14b0e578 5547\r
b602265d
DG
5548 r = 0;\r
5549 enc = env->enc;\r
14b0e578
CS
5550 clear_node_opt_info(opt);\r
5551 set_bound_node_opt_info(opt, &env->mmd);\r
5552\r
b602265d
DG
5553 switch (NODE_TYPE(node)) {\r
5554 case NODE_LIST:\r
14b0e578
CS
5555 {\r
5556 OptEnv nenv;\r
14b0e578
CS
5557 Node* nd = node;\r
5558\r
5559 copy_opt_env(&nenv, env);\r
5560 do {\r
b602265d
DG
5561 r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);\r
5562 if (r == 0) {\r
5563 add_mml(&nenv.mmd, &xo.len);\r
5564 concat_left_node_opt_info(enc, opt, &xo);\r
5565 }\r
5566 } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
14b0e578
CS
5567 }\r
5568 break;\r
5569\r
b602265d 5570 case NODE_ALT:\r
14b0e578 5571 {\r
14b0e578
CS
5572 Node* nd = node;\r
5573\r
5574 do {\r
b602265d
DG
5575 r = optimize_nodes(NODE_CAR(nd), &xo, env);\r
5576 if (r == 0) {\r
5577 if (nd == node) copy_node_opt_info(opt, &xo);\r
5578 else alt_merge_node_opt_info(opt, &xo, env);\r
5579 }\r
5580 } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
14b0e578
CS
5581 }\r
5582 break;\r
5583\r
b602265d 5584 case NODE_STRING:\r
14b0e578 5585 {\r
b602265d
DG
5586 StrNode* sn = STR_(node);\r
5587 int slen = (int )(sn->end - sn->s);\r
5588 /* int is_raw = NODE_STRING_IS_RAW(node); */\r
5589\r
5590 if (! NODE_STRING_IS_AMBIG(node)) {\r
b26691c4 5591 concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);\r
b602265d
DG
5592 if (slen > 0) {\r
5593 add_char_opt_map(&opt->map, *(sn->s), enc);\r
5594 }\r
14b0e578
CS
5595 set_mml(&opt->len, slen, slen);\r
5596 }\r
5597 else {\r
5598 int max;\r
5599\r
b602265d
DG
5600 if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {\r
5601 int n = onigenc_strlen(enc, sn->s, sn->end);\r
5602 max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;\r
5603 }\r
5604 else {\r
b26691c4
LG
5605 concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);\r
5606 opt->sb.case_fold = 1;\r
5607 if (NODE_STRING_IS_GOOD_AMBIG(node))\r
5608 opt->sb.good_case_fold = 1;\r
14b0e578 5609\r
b602265d
DG
5610 if (slen > 0) {\r
5611 r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,\r
5612 enc, env->case_fold_flag);\r
5613 if (r != 0) break;\r
5614 }\r
14b0e578 5615\r
b602265d
DG
5616 max = slen;\r
5617 }\r
14b0e578
CS
5618\r
5619 set_mml(&opt->len, slen, max);\r
5620 }\r
14b0e578
CS
5621 }\r
5622 break;\r
5623\r
b602265d 5624 case NODE_CCLASS:\r
14b0e578 5625 {\r
b602265d
DG
5626 int z;\r
5627 CClassNode* cc = CCLASS_(node);\r
14b0e578 5628\r
b602265d 5629 /* no need to check ignore case. (set in setup_tree()) */\r
14b0e578
CS
5630\r
5631 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {\r
b602265d
DG
5632 OnigLen min = ONIGENC_MBC_MINLEN(enc);\r
5633 OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
14b0e578 5634\r
b602265d 5635 set_mml(&opt->len, min, max);\r
14b0e578
CS
5636 }\r
5637 else {\r
5638 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
5639 z = BITSET_AT(cc->bs, i);\r
b602265d
DG
5640 if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {\r
5641 add_char_opt_map(&opt->map, (UChar )i, enc);\r
14b0e578
CS
5642 }\r
5643 }\r
b602265d 5644 set_mml(&opt->len, 1, 1);\r
14b0e578
CS
5645 }\r
5646 }\r
5647 break;\r
5648\r
b602265d 5649 case NODE_CTYPE:\r
14b0e578 5650 {\r
b602265d
DG
5651 int min, max;\r
5652 int range;\r
14b0e578 5653\r
b602265d 5654 max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
14b0e578
CS
5655\r
5656 if (max == 1) {\r
5657 min = 1;\r
5658\r
b602265d
DG
5659 switch (CTYPE_(node)->ctype) {\r
5660 case CTYPE_ANYCHAR:\r
5661 break;\r
5662\r
5663 case ONIGENC_CTYPE_WORD:\r
5664 range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
5665 if (CTYPE_(node)->not != 0) {\r
5666 for (i = 0; i < range; i++) {\r
5667 if (! ONIGENC_IS_CODE_WORD(enc, i)) {\r
5668 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5669 }\r
5670 }\r
5671 for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
5672 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5673 }\r
5674 }\r
5675 else {\r
5676 for (i = 0; i < range; i++) {\r
5677 if (ONIGENC_IS_CODE_WORD(enc, i)) {\r
5678 add_char_opt_map(&opt->map, (UChar )i, enc);\r
5679 }\r
5680 }\r
5681 }\r
5682 break;\r
5683 }\r
14b0e578
CS
5684 }\r
5685 else {\r
b602265d 5686 min = ONIGENC_MBC_MINLEN(enc);\r
14b0e578
CS
5687 }\r
5688 set_mml(&opt->len, min, max);\r
5689 }\r
5690 break;\r
5691\r
b602265d
DG
5692 case NODE_ANCHOR:\r
5693 switch (ANCHOR_(node)->type) {\r
b26691c4
LG
5694 case ANCR_BEGIN_BUF:\r
5695 case ANCR_BEGIN_POSITION:\r
5696 case ANCR_BEGIN_LINE:\r
5697 case ANCR_END_BUF:\r
5698 case ANCR_SEMI_END_BUF:\r
5699 case ANCR_END_LINE:\r
5700 case ANCR_PREC_READ_NOT:\r
5701 case ANCR_LOOK_BEHIND:\r
b602265d 5702 add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);\r
14b0e578
CS
5703 break;\r
5704\r
b26691c4 5705 case ANCR_PREC_READ:\r
14b0e578 5706 {\r
b602265d
DG
5707 r = optimize_nodes(NODE_BODY(node), &xo, env);\r
5708 if (r == 0) {\r
b26691c4
LG
5709 if (xo.sb.len > 0)\r
5710 copy_opt_exact(&opt->spr, &xo.sb);\r
5711 else if (xo.sm.len > 0)\r
5712 copy_opt_exact(&opt->spr, &xo.sm);\r
14b0e578 5713\r
b26691c4 5714 opt->spr.reach_end = 0;\r
14b0e578 5715\r
b602265d
DG
5716 if (xo.map.value > 0)\r
5717 copy_opt_map(&opt->map, &xo.map);\r
5718 }\r
14b0e578
CS
5719 }\r
5720 break;\r
5721\r
b26691c4 5722 case ANCR_LOOK_BEHIND_NOT:\r
14b0e578
CS
5723 break;\r
5724 }\r
5725 break;\r
5726\r
b602265d
DG
5727 case NODE_BACKREF:\r
5728 if (! NODE_IS_CHECKER(node)) {\r
14b0e578 5729 int* backs;\r
b602265d
DG
5730 OnigLen min, max, tmin, tmax;\r
5731 MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);\r
5732 BackRefNode* br = BACKREF_(node);\r
14b0e578 5733\r
b602265d
DG
5734 if (NODE_IS_RECURSION(node)) {\r
5735 set_mml(&opt->len, 0, INFINITE_LEN);\r
5736 break;\r
14b0e578
CS
5737 }\r
5738 backs = BACKREFS_P(br);\r
b602265d
DG
5739 min = tree_min_len(mem_env[backs[0]].node, env->scan_env);\r
5740 max = tree_max_len(mem_env[backs[0]].node, env->scan_env);\r
14b0e578 5741 for (i = 1; i < br->back_num; i++) {\r
b602265d
DG
5742 tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);\r
5743 tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);\r
5744 if (min > tmin) min = tmin;\r
5745 if (max < tmax) max = tmax;\r
14b0e578 5746 }\r
b602265d 5747 set_mml(&opt->len, min, max);\r
14b0e578
CS
5748 }\r
5749 break;\r
5750\r
b602265d
DG
5751#ifdef USE_CALL\r
5752 case NODE_CALL:\r
5753 if (NODE_IS_RECURSION(node))\r
5754 set_mml(&opt->len, 0, INFINITE_LEN);\r
14b0e578
CS
5755 else {\r
5756 OnigOptionType save = env->options;\r
b26691c4 5757 env->options = BAG_(NODE_BODY(node))->o.options;\r
b602265d 5758 r = optimize_nodes(NODE_BODY(node), opt, env);\r
14b0e578
CS
5759 env->options = save;\r
5760 }\r
5761 break;\r
5762#endif\r
5763\r
b602265d 5764 case NODE_QUANT:\r
14b0e578 5765 {\r
b602265d
DG
5766 OnigLen min, max;\r
5767 QuantNode* qn = QUANT_(node);\r
5768\r
5769 r = optimize_nodes(NODE_BODY(node), &xo, env);\r
5770 if (r != 0) break;\r
5771\r
5772 if (qn->lower > 0) {\r
5773 copy_node_opt_info(opt, &xo);\r
b26691c4
LG
5774 if (xo.sb.len > 0) {\r
5775 if (xo.sb.reach_end) {\r
5776 for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {\r
5777 int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);\r
b602265d
DG
5778 if (rc > 0) break;\r
5779 }\r
b26691c4 5780 if (i < qn->lower) opt->sb.reach_end = 0;\r
b602265d
DG
5781 }\r
5782 }\r
5783\r
5784 if (qn->lower != qn->upper) {\r
b26691c4
LG
5785 opt->sb.reach_end = 0;\r
5786 opt->sm.reach_end = 0;\r
b602265d
DG
5787 }\r
5788 if (qn->lower > 1)\r
b26691c4 5789 opt->sm.reach_end = 0;\r
b602265d
DG
5790 }\r
5791\r
b26691c4 5792 if (IS_INFINITE_REPEAT(qn->upper)) {\r
b602265d
DG
5793 if (env->mmd.max == 0 &&\r
5794 NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {\r
5795 if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))\r
b26691c4 5796 add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);\r
b602265d 5797 else\r
b26691c4 5798 add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);\r
b602265d
DG
5799 }\r
5800\r
5801 max = (xo.len.max > 0 ? INFINITE_LEN : 0);\r
14b0e578
CS
5802 }\r
5803 else {\r
b602265d 5804 max = distance_multiply(xo.len.max, qn->upper);\r
14b0e578
CS
5805 }\r
5806\r
b602265d 5807 min = distance_multiply(xo.len.min, qn->lower);\r
14b0e578
CS
5808 set_mml(&opt->len, min, max);\r
5809 }\r
5810 break;\r
5811\r
b26691c4 5812 case NODE_BAG:\r
b602265d 5813 {\r
b26691c4 5814 BagNode* en = BAG_(node);\r
b602265d
DG
5815\r
5816 switch (en->type) {\r
b26691c4 5817 case BAG_OPTION:\r
b602265d
DG
5818 {\r
5819 OnigOptionType save = env->options;\r
5820\r
5821 env->options = en->o.options;\r
5822 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5823 env->options = save;\r
5824 }\r
5825 break;\r
5826\r
b26691c4 5827 case BAG_MEMORY:\r
b602265d
DG
5828#ifdef USE_CALL\r
5829 en->opt_count++;\r
5830 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {\r
5831 OnigLen min, max;\r
5832\r
5833 min = 0;\r
5834 max = INFINITE_LEN;\r
5835 if (NODE_IS_MIN_FIXED(node)) min = en->min_len;\r
5836 if (NODE_IS_MAX_FIXED(node)) max = en->max_len;\r
5837 set_mml(&opt->len, min, max);\r
5838 }\r
5839 else\r
5840#endif\r
5841 {\r
5842 r = optimize_nodes(NODE_BODY(node), opt, env);\r
b26691c4 5843 if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {\r
b602265d 5844 if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))\r
b26691c4 5845 remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);\r
b602265d
DG
5846 }\r
5847 }\r
5848 break;\r
5849\r
b26691c4 5850 case BAG_STOP_BACKTRACK:\r
b602265d
DG
5851 r = optimize_nodes(NODE_BODY(node), opt, env);\r
5852 break;\r
5853\r
b26691c4 5854 case BAG_IF_ELSE:\r
b602265d
DG
5855 {\r
5856 OptEnv nenv;\r
5857\r
5858 copy_opt_env(&nenv, env);\r
b26691c4 5859 r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);\r
b602265d
DG
5860 if (r == 0) {\r
5861 add_mml(&nenv.mmd, &xo.len);\r
5862 concat_left_node_opt_info(enc, opt, &xo);\r
5863 if (IS_NOT_NULL(en->te.Then)) {\r
5864 r = optimize_nodes(en->te.Then, &xo, &nenv);\r
5865 if (r == 0) {\r
5866 concat_left_node_opt_info(enc, opt, &xo);\r
5867 }\r
5868 }\r
5869\r
5870 if (IS_NOT_NULL(en->te.Else)) {\r
5871 r = optimize_nodes(en->te.Else, &xo, env);\r
5872 if (r == 0)\r
5873 alt_merge_node_opt_info(opt, &xo, env);\r
5874 }\r
5875 }\r
5876 }\r
5877 break;\r
5878 }\r
5879 }\r
5880 break;\r
5881\r
5882 case NODE_GIMMICK:\r
14b0e578
CS
5883 break;\r
5884\r
5885 default:\r
5886#ifdef ONIG_DEBUG\r
b602265d 5887 fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
5888#endif\r
5889 r = ONIGERR_TYPE_BUG;\r
5890 break;\r
5891 }\r
5892\r
5893 return r;\r
5894}\r
5895\r
5896static int\r
b26691c4 5897set_optimize_exact(regex_t* reg, OptStr* e)\r
14b0e578
CS
5898{\r
5899 int r;\r
5900\r
5901 if (e->len == 0) return 0;\r
5902\r
b26691c4
LG
5903 reg->exact = (UChar* )xmalloc(e->len);\r
5904 CHECK_NULL_RETURN_MEMERR(reg->exact);\r
5905 xmemcpy(reg->exact, e->s, e->len);\r
5906 reg->exact_end = reg->exact + e->len;\r
5907\r
5908 if (e->case_fold) {\r
5909 reg->optimize = OPTIMIZE_STR_CASE_FOLD;\r
5910 if (e->good_case_fold != 0) {\r
5911 if (e->len >= 2) {\r
5912 r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,\r
5913 reg->exact, reg->exact_end,\r
5914 reg->map, &(reg->map_offset));\r
5915 if (r != 0) return r;\r
5916 reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;\r
5917 }\r
5918 }\r
14b0e578
CS
5919 }\r
5920 else {\r
5921 int allow_reverse;\r
5922\r
14b0e578 5923 allow_reverse =\r
b602265d 5924 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);\r
14b0e578 5925\r
b26691c4
LG
5926 if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {\r
5927 r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,\r
5928 reg->exact, reg->exact_end,\r
5929 reg->map, &(reg->map_offset));\r
b602265d 5930 if (r != 0) return r;\r
14b0e578
CS
5931\r
5932 reg->optimize = (allow_reverse != 0\r
b26691c4
LG
5933 ? OPTIMIZE_STR_FAST\r
5934 : OPTIMIZE_STR_FAST_STEP_FORWARD);\r
14b0e578
CS
5935 }\r
5936 else {\r
b26691c4 5937 reg->optimize = OPTIMIZE_STR;\r
14b0e578
CS
5938 }\r
5939 }\r
5940\r
5941 reg->dmin = e->mmd.min;\r
5942 reg->dmax = e->mmd.max;\r
5943\r
b602265d
DG
5944 if (reg->dmin != INFINITE_LEN) {\r
5945 reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);\r
14b0e578
CS
5946 }\r
5947\r
5948 return 0;\r
5949}\r
5950\r
5951static void\r
b602265d 5952set_optimize_map(regex_t* reg, OptMap* m)\r
14b0e578
CS
5953{\r
5954 int i;\r
5955\r
b26691c4 5956 for (i = 0; i < CHAR_MAP_SIZE; i++)\r
14b0e578
CS
5957 reg->map[i] = m->map[i];\r
5958\r
b602265d 5959 reg->optimize = OPTIMIZE_MAP;\r
14b0e578
CS
5960 reg->dmin = m->mmd.min;\r
5961 reg->dmax = m->mmd.max;\r
5962\r
b602265d 5963 if (reg->dmin != INFINITE_LEN) {\r
14b0e578
CS
5964 reg->threshold_len = reg->dmin + 1;\r
5965 }\r
5966}\r
5967\r
5968static void\r
b602265d 5969set_sub_anchor(regex_t* reg, OptAnc* anc)\r
14b0e578 5970{\r
b26691c4
LG
5971 reg->sub_anchor |= anc->left & ANCR_BEGIN_LINE;\r
5972 reg->sub_anchor |= anc->right & ANCR_END_LINE;\r
14b0e578
CS
5973}\r
5974\r
b602265d 5975#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
14b0e578
CS
5976static void print_optimize_info(FILE* f, regex_t* reg);\r
5977#endif\r
5978\r
5979static int\r
5980set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)\r
5981{\r
14b0e578 5982 int r;\r
b26691c4 5983 OptNode opt;\r
14b0e578
CS
5984 OptEnv env;\r
5985\r
5986 env.enc = reg->enc;\r
5987 env.options = reg->options;\r
5988 env.case_fold_flag = reg->case_fold_flag;\r
b602265d 5989 env.scan_env = scan_env;\r
14b0e578
CS
5990 clear_mml(&env.mmd);\r
5991\r
b602265d
DG
5992 r = optimize_nodes(node, &opt, &env);\r
5993 if (r != 0) return r;\r
5994\r
b26691c4
LG
5995 reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |\r
5996 ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |\r
5997 ANCR_LOOK_BEHIND);\r
14b0e578 5998\r
b26691c4
LG
5999 if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)\r
6000 reg->anchor &= ~ANCR_ANYCHAR_INF_ML;\r
14b0e578 6001\r
b26691c4
LG
6002 reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |\r
6003 ANCR_PREC_READ_NOT);\r
14b0e578 6004\r
b26691c4 6005 if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {\r
14b0e578
CS
6006 reg->anchor_dmin = opt.len.min;\r
6007 reg->anchor_dmax = opt.len.max;\r
6008 }\r
6009\r
b26691c4
LG
6010 if (opt.sb.len > 0 || opt.sm.len > 0) {\r
6011 select_opt_exact(reg->enc, &opt.sb, &opt.sm);\r
6012 if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {\r
14b0e578
CS
6013 goto set_map;\r
6014 }\r
6015 else {\r
b26691c4
LG
6016 r = set_optimize_exact(reg, &opt.sb);\r
6017 set_sub_anchor(reg, &opt.sb.anc);\r
14b0e578
CS
6018 }\r
6019 }\r
6020 else if (opt.map.value > 0) {\r
6021 set_map:\r
b602265d 6022 set_optimize_map(reg, &opt.map);\r
14b0e578
CS
6023 set_sub_anchor(reg, &opt.map.anc);\r
6024 }\r
6025 else {\r
b26691c4 6026 reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;\r
14b0e578 6027 if (opt.len.max == 0)\r
b26691c4 6028 reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;\r
14b0e578
CS
6029 }\r
6030\r
6031#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
6032 print_optimize_info(stderr, reg);\r
6033#endif\r
6034 return r;\r
6035}\r
6036\r
6037static void\r
6038clear_optimize_info(regex_t* reg)\r
6039{\r
b602265d 6040 reg->optimize = OPTIMIZE_NONE;\r
14b0e578
CS
6041 reg->anchor = 0;\r
6042 reg->anchor_dmin = 0;\r
6043 reg->anchor_dmax = 0;\r
6044 reg->sub_anchor = 0;\r
6045 reg->exact_end = (UChar* )NULL;\r
b26691c4 6046 reg->map_offset = 0;\r
14b0e578
CS
6047 reg->threshold_len = 0;\r
6048 if (IS_NOT_NULL(reg->exact)) {\r
6049 xfree(reg->exact);\r
6050 reg->exact = (UChar* )NULL;\r
6051 }\r
6052}\r
6053\r
6054#ifdef ONIG_DEBUG\r
6055\r
6056static void print_enc_string(FILE* fp, OnigEncoding enc,\r
b602265d 6057 const UChar *s, const UChar *end)\r
14b0e578
CS
6058{\r
6059 fprintf(fp, "\nPATTERN: /");\r
6060\r
6061 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
6062 const UChar *p;\r
6063 OnigCodePoint code;\r
6064\r
6065 p = s;\r
6066 while (p < end) {\r
6067 code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
6068 if (code >= 0x80) {\r
b602265d 6069 fprintf(fp, " 0x%04x ", (int )code);\r
14b0e578
CS
6070 }\r
6071 else {\r
b602265d 6072 fputc((int )code, fp);\r
14b0e578
CS
6073 }\r
6074\r
6075 p += enclen(enc, p);\r
6076 }\r
6077 }\r
6078 else {\r
6079 while (s < end) {\r
6080 fputc((int )*s, fp);\r
6081 s++;\r
6082 }\r
6083 }\r
6084\r
6085 fprintf(fp, "/\n");\r
6086}\r
6087\r
b602265d
DG
6088#endif /* ONIG_DEBUG */\r
6089\r
6090#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
6091\r
14b0e578 6092static void\r
b602265d 6093print_distance_range(FILE* f, OnigLen a, OnigLen b)\r
14b0e578 6094{\r
b602265d 6095 if (a == INFINITE_LEN)\r
14b0e578
CS
6096 fputs("inf", f);\r
6097 else\r
6098 fprintf(f, "(%u)", a);\r
6099\r
6100 fputs("-", f);\r
6101\r
b602265d 6102 if (b == INFINITE_LEN)\r
14b0e578
CS
6103 fputs("inf", f);\r
6104 else\r
6105 fprintf(f, "(%u)", b);\r
6106}\r
6107\r
6108static void\r
6109print_anchor(FILE* f, int anchor)\r
6110{\r
6111 int q = 0;\r
6112\r
6113 fprintf(f, "[");\r
6114\r
b26691c4 6115 if (anchor & ANCR_BEGIN_BUF) {\r
14b0e578
CS
6116 fprintf(f, "begin-buf");\r
6117 q = 1;\r
6118 }\r
b26691c4 6119 if (anchor & ANCR_BEGIN_LINE) {\r
14b0e578
CS
6120 if (q) fprintf(f, ", ");\r
6121 q = 1;\r
6122 fprintf(f, "begin-line");\r
6123 }\r
b26691c4 6124 if (anchor & ANCR_BEGIN_POSITION) {\r
14b0e578
CS
6125 if (q) fprintf(f, ", ");\r
6126 q = 1;\r
6127 fprintf(f, "begin-pos");\r
6128 }\r
b26691c4 6129 if (anchor & ANCR_END_BUF) {\r
14b0e578
CS
6130 if (q) fprintf(f, ", ");\r
6131 q = 1;\r
6132 fprintf(f, "end-buf");\r
6133 }\r
b26691c4 6134 if (anchor & ANCR_SEMI_END_BUF) {\r
14b0e578
CS
6135 if (q) fprintf(f, ", ");\r
6136 q = 1;\r
6137 fprintf(f, "semi-end-buf");\r
6138 }\r
b26691c4 6139 if (anchor & ANCR_END_LINE) {\r
14b0e578
CS
6140 if (q) fprintf(f, ", ");\r
6141 q = 1;\r
6142 fprintf(f, "end-line");\r
6143 }\r
b26691c4 6144 if (anchor & ANCR_ANYCHAR_INF) {\r
14b0e578
CS
6145 if (q) fprintf(f, ", ");\r
6146 q = 1;\r
b602265d 6147 fprintf(f, "anychar-inf");\r
14b0e578 6148 }\r
b26691c4 6149 if (anchor & ANCR_ANYCHAR_INF_ML) {\r
14b0e578 6150 if (q) fprintf(f, ", ");\r
b602265d 6151 fprintf(f, "anychar-inf-ml");\r
14b0e578
CS
6152 }\r
6153\r
6154 fprintf(f, "]");\r
6155}\r
6156\r
6157static void\r
6158print_optimize_info(FILE* f, regex_t* reg)\r
6159{\r
b26691c4
LG
6160 static const char* on[] = { "NONE", "STR",\r
6161 "STR_FAST", "STR_FAST_STEP_FORWARD",\r
6162 "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };\r
14b0e578
CS
6163\r
6164 fprintf(f, "optimize: %s\n", on[reg->optimize]);\r
6165 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);\r
b26691c4 6166 if ((reg->anchor & ANCR_END_BUF_MASK) != 0)\r
14b0e578
CS
6167 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);\r
6168 fprintf(f, "\n");\r
6169\r
6170 if (reg->optimize) {\r
6171 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);\r
6172 fprintf(f, "\n");\r
6173 }\r
6174 fprintf(f, "\n");\r
6175\r
6176 if (reg->exact) {\r
6177 UChar *p;\r
6178 fprintf(f, "exact: [");\r
6179 for (p = reg->exact; p < reg->exact_end; p++) {\r
6180 fputc(*p, f);\r
6181 }\r
b602265d 6182 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));\r
14b0e578 6183 }\r
b602265d 6184 else if (reg->optimize & OPTIMIZE_MAP) {\r
14b0e578
CS
6185 int c, i, n = 0;\r
6186\r
b26691c4 6187 for (i = 0; i < CHAR_MAP_SIZE; i++)\r
14b0e578
CS
6188 if (reg->map[i]) n++;\r
6189\r
6190 fprintf(f, "map: n=%d\n", n);\r
6191 if (n > 0) {\r
6192 c = 0;\r
6193 fputc('[', f);\r
b26691c4 6194 for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
b602265d 6195 if (reg->map[i] != 0) {\r
14b0e578
CS
6196 if (c > 0) fputs(", ", f);\r
6197 c++;\r
6198 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&\r
6199 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))\r
6200 fputc(i, f);\r
6201 else\r
6202 fprintf(f, "%d", i);\r
6203 }\r
6204 }\r
6205 fprintf(f, "]\n");\r
6206 }\r
6207 }\r
6208}\r
b602265d
DG
6209#endif\r
6210\r
6211\r
6212extern RegexExt*\r
6213onig_get_regex_ext(regex_t* reg)\r
6214{\r
b26691c4 6215 if (IS_NULL(reg->extp)) {\r
b602265d
DG
6216 RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));\r
6217 if (IS_NULL(ext)) return 0;\r
6218\r
6219 ext->pattern = 0;\r
6220 ext->pattern_end = 0;\r
6221#ifdef USE_CALLOUT\r
6222 ext->tag_table = 0;\r
6223 ext->callout_num = 0;\r
6224 ext->callout_list_alloc = 0;\r
6225 ext->callout_list = 0;\r
6226#endif\r
6227\r
b26691c4 6228 reg->extp = ext;\r
b602265d
DG
6229 }\r
6230\r
b26691c4 6231 return reg->extp;\r
b602265d
DG
6232}\r
6233\r
6234static void\r
6235free_regex_ext(RegexExt* ext)\r
6236{\r
6237 if (IS_NOT_NULL(ext)) {\r
6238 if (IS_NOT_NULL(ext->pattern))\r
6239 xfree((void* )ext->pattern);\r
6240\r
6241#ifdef USE_CALLOUT\r
6242 if (IS_NOT_NULL(ext->tag_table))\r
6243 onig_callout_tag_table_free(ext->tag_table);\r
6244\r
6245 if (IS_NOT_NULL(ext->callout_list))\r
6246 onig_free_reg_callout_list(ext->callout_num, ext->callout_list);\r
6247#endif\r
6248\r
6249 xfree(ext);\r
6250 }\r
6251}\r
6252\r
6253extern int\r
6254onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)\r
6255{\r
6256 RegexExt* ext;\r
6257 UChar* s;\r
6258\r
6259 ext = onig_get_regex_ext(reg);\r
6260 CHECK_NULL_RETURN_MEMERR(ext);\r
6261\r
6262 s = onigenc_strdup(reg->enc, pattern, pattern_end);\r
6263 CHECK_NULL_RETURN_MEMERR(s);\r
6264\r
6265 ext->pattern = s;\r
6266 ext->pattern_end = s + (pattern_end - pattern);\r
6267\r
6268 return ONIG_NORMAL;\r
6269}\r
14b0e578 6270\r
14b0e578
CS
6271extern void\r
6272onig_free_body(regex_t* reg)\r
6273{\r
6274 if (IS_NOT_NULL(reg)) {\r
b26691c4
LG
6275 ops_free(reg);\r
6276 if (IS_NOT_NULL(reg->string_pool)) {\r
6277 xfree(reg->string_pool);\r
6278 reg->string_pool_end = reg->string_pool = 0;\r
6279 }\r
14b0e578 6280 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);\r
14b0e578 6281 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);\r
b26691c4
LG
6282 if (IS_NOT_NULL(reg->extp)) {\r
6283 free_regex_ext(reg->extp);\r
6284 reg->extp = 0;\r
b602265d 6285 }\r
14b0e578 6286\r
14b0e578 6287 onig_names_free(reg);\r
14b0e578
CS
6288 }\r
6289}\r
6290\r
6291extern void\r
6292onig_free(regex_t* reg)\r
6293{\r
6294 if (IS_NOT_NULL(reg)) {\r
6295 onig_free_body(reg);\r
6296 xfree(reg);\r
6297 }\r
6298}\r
6299\r
14b0e578 6300\r
b602265d 6301#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
6302static void print_tree P_((FILE* f, Node* node));\r
6303#endif\r
6304\r
b26691c4
LG
6305extern int onig_init_for_match_at(regex_t* reg);\r
6306\r
14b0e578
CS
6307extern int\r
6308onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
b602265d 6309 OnigErrorInfo* einfo)\r
14b0e578 6310{\r
b26691c4 6311 int r;\r
14b0e578
CS
6312 Node* root;\r
6313 ScanEnv scan_env;\r
b602265d 6314#ifdef USE_CALL\r
14b0e578
CS
6315 UnsetAddrList uslist;\r
6316#endif\r
6317\r
b602265d
DG
6318 root = 0;\r
6319 if (IS_NOT_NULL(einfo)) {\r
6320 einfo->enc = reg->enc;\r
6321 einfo->par = (UChar* )NULL;\r
6322 }\r
14b0e578
CS
6323\r
6324#ifdef ONIG_DEBUG\r
6325 print_enc_string(stderr, reg->enc, pattern, pattern_end);\r
6326#endif\r
6327\r
b26691c4
LG
6328 if (reg->ops_alloc == 0) {\r
6329 r = ops_init(reg, OPS_INIT_SIZE);\r
14b0e578
CS
6330 if (r != 0) goto end;\r
6331 }\r
6332 else\r
b26691c4 6333 reg->ops_used = 0;\r
14b0e578 6334\r
b26691c4
LG
6335 reg->string_pool = 0;\r
6336 reg->string_pool_end = 0;\r
14b0e578
CS
6337 reg->num_mem = 0;\r
6338 reg->num_repeat = 0;\r
6339 reg->num_null_check = 0;\r
6340 reg->repeat_range_alloc = 0;\r
6341 reg->repeat_range = (OnigRepeatRange* )NULL;\r
14b0e578 6342\r
b602265d
DG
6343 r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);\r
6344 if (r != 0) goto err;\r
14b0e578 6345\r
14b0e578
CS
6346 /* mixed use named group and no-named group */\r
6347 if (scan_env.num_named > 0 &&\r
6348 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
b602265d 6349 ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
14b0e578
CS
6350 if (scan_env.num_named != scan_env.num_mem)\r
6351 r = disable_noname_group_capture(&root, reg, &scan_env);\r
6352 else\r
6353 r = numbered_ref_check(root);\r
6354\r
6355 if (r != 0) goto err;\r
6356 }\r
14b0e578 6357\r
b602265d
DG
6358 r = check_backrefs(root, &scan_env);\r
6359 if (r != 0) goto err;\r
6360\r
6361#ifdef USE_CALL\r
14b0e578
CS
6362 if (scan_env.num_call > 0) {\r
6363 r = unset_addr_list_init(&uslist, scan_env.num_call);\r
6364 if (r != 0) goto err;\r
6365 scan_env.unset_addr_list = &uslist;\r
b602265d
DG
6366 r = setup_call(root, &scan_env, 0);\r
6367 if (r != 0) goto err_unset;\r
6368 r = setup_call2(root);\r
14b0e578 6369 if (r != 0) goto err_unset;\r
b602265d 6370 r = recursive_call_check_trav(root, &scan_env, 0);\r
14b0e578 6371 if (r < 0) goto err_unset;\r
b602265d 6372 r = infinite_recursive_call_check_trav(root, &scan_env);\r
14b0e578
CS
6373 if (r != 0) goto err_unset;\r
6374\r
b602265d 6375 setup_called_state(root, 0);\r
14b0e578 6376 }\r
b602265d
DG
6377\r
6378 reg->num_call = scan_env.num_call;\r
14b0e578
CS
6379#endif\r
6380\r
6381 r = setup_tree(root, reg, 0, &scan_env);\r
6382 if (r != 0) goto err_unset;\r
6383\r
b602265d 6384#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
6385 print_tree(stderr, root);\r
6386#endif\r
6387\r
6388 reg->capture_history = scan_env.capture_history;\r
6389 reg->bt_mem_start = scan_env.bt_mem_start;\r
6390 reg->bt_mem_start |= reg->capture_history;\r
6391 if (IS_FIND_CONDITION(reg->options))\r
b602265d 6392 MEM_STATUS_ON_ALL(reg->bt_mem_end);\r
14b0e578
CS
6393 else {\r
6394 reg->bt_mem_end = scan_env.bt_mem_end;\r
6395 reg->bt_mem_end |= reg->capture_history;\r
6396 }\r
b602265d 6397 reg->bt_mem_start |= reg->bt_mem_end;\r
14b0e578
CS
6398\r
6399 clear_optimize_info(reg);\r
6400#ifndef ONIG_DONT_OPTIMIZE\r
6401 r = set_optimize_info_from_tree(root, reg, &scan_env);\r
6402 if (r != 0) goto err_unset;\r
6403#endif\r
6404\r
b602265d
DG
6405 if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {\r
6406 xfree(scan_env.mem_env_dynamic);\r
6407 scan_env.mem_env_dynamic = (MemEnv* )NULL;\r
14b0e578
CS
6408 }\r
6409\r
b602265d 6410 r = compile_tree(root, reg, &scan_env);\r
14b0e578 6411 if (r == 0) {\r
b602265d 6412 if (scan_env.keep_num > 0) {\r
b26691c4 6413 r = add_op(reg, OP_UPDATE_VAR);\r
b602265d 6414 if (r != 0) goto err;\r
b26691c4
LG
6415\r
6416 COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;\r
6417 COP(reg)->update_var.id = 0; /* not used */\r
b602265d
DG
6418 }\r
6419\r
b26691c4
LG
6420 r = add_op(reg, OP_END);\r
6421 if (r != 0) goto err;\r
6422\r
b602265d 6423#ifdef USE_CALL\r
14b0e578 6424 if (scan_env.num_call > 0) {\r
b602265d 6425 r = fix_unset_addr_list(&uslist, reg);\r
14b0e578 6426 unset_addr_list_end(&uslist);\r
b602265d 6427 if (r != 0) goto err;\r
14b0e578
CS
6428 }\r
6429#endif\r
6430\r
b602265d
DG
6431 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)\r
6432#ifdef USE_CALLOUT\r
b26691c4 6433 || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)\r
b602265d
DG
6434#endif\r
6435 )\r
14b0e578
CS
6436 reg->stack_pop_level = STACK_POP_LEVEL_ALL;\r
6437 else {\r
6438 if (reg->bt_mem_start != 0)\r
b602265d 6439 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;\r
14b0e578 6440 else\r
b602265d 6441 reg->stack_pop_level = STACK_POP_LEVEL_FREE;\r
14b0e578 6442 }\r
b26691c4
LG
6443\r
6444 r = ops_make_string_pool(reg);\r
6445 if (r != 0) goto err;\r
14b0e578 6446 }\r
b602265d 6447#ifdef USE_CALL\r
14b0e578
CS
6448 else if (scan_env.num_call > 0) {\r
6449 unset_addr_list_end(&uslist);\r
6450 }\r
6451#endif\r
6452 onig_node_free(root);\r
6453\r
6454#ifdef ONIG_DEBUG_COMPILE\r
14b0e578 6455 onig_print_names(stderr, reg);\r
b602265d 6456 onig_print_compiled_byte_code_list(stderr, reg);\r
14b0e578
CS
6457#endif\r
6458\r
b26691c4
LG
6459#ifdef USE_DIRECT_THREADED_CODE\r
6460 /* opcode -> opaddr */\r
6461 onig_init_for_match_at(reg);\r
6462#endif\r
6463\r
14b0e578 6464 end:\r
14b0e578
CS
6465 return r;\r
6466\r
6467 err_unset:\r
b602265d 6468#ifdef USE_CALL\r
14b0e578
CS
6469 if (scan_env.num_call > 0) {\r
6470 unset_addr_list_end(&uslist);\r
6471 }\r
6472#endif\r
6473 err:\r
6474 if (IS_NOT_NULL(scan_env.error)) {\r
6475 if (IS_NOT_NULL(einfo)) {\r
14b0e578
CS
6476 einfo->par = scan_env.error;\r
6477 einfo->par_end = scan_env.error_end;\r
6478 }\r
6479 }\r
6480\r
6481 onig_node_free(root);\r
b602265d
DG
6482 if (IS_NOT_NULL(scan_env.mem_env_dynamic))\r
6483 xfree(scan_env.mem_env_dynamic);\r
14b0e578
CS
6484 return r;\r
6485}\r
6486\r
b602265d
DG
6487\r
6488static int onig_inited = 0;\r
6489\r
14b0e578 6490extern int\r
b602265d
DG
6491onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,\r
6492 OnigEncoding enc, OnigSyntaxType* syntax)\r
14b0e578
CS
6493{\r
6494 int r;\r
14b0e578 6495\r
b602265d 6496 xmemset(reg, 0, sizeof(*reg));\r
14b0e578 6497\r
b602265d
DG
6498 if (onig_inited == 0) {\r
6499#if 0\r
6500 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;\r
6501#else\r
6502 r = onig_initialize(&enc, 1);\r
6503 if (r != 0)\r
6504 return ONIGERR_FAIL_TO_INITIALIZE;\r
14b0e578 6505\r
b602265d
DG
6506 onig_warning("You didn't call onig_initialize() explicitly");\r
6507#endif\r
6508 }\r
14b0e578
CS
6509\r
6510 if (IS_NULL(reg))\r
6511 return ONIGERR_INVALID_ARGUMENT;\r
6512\r
6513 if (ONIGENC_IS_UNDEF(enc))\r
6514 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;\r
6515\r
6516 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))\r
6517 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {\r
6518 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;\r
6519 }\r
6520\r
14b0e578
CS
6521 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {\r
6522 option |= syntax->options;\r
6523 option &= ~ONIG_OPTION_SINGLELINE;\r
6524 }\r
6525 else\r
6526 option |= syntax->options;\r
6527\r
6528 (reg)->enc = enc;\r
6529 (reg)->options = option;\r
6530 (reg)->syntax = syntax;\r
6531 (reg)->optimize = 0;\r
6532 (reg)->exact = (UChar* )NULL;\r
b26691c4 6533 (reg)->extp = (RegexExt* )NULL;\r
14b0e578 6534\r
b26691c4
LG
6535 (reg)->ops = (Operation* )NULL;\r
6536 (reg)->ops_curr = (Operation* )NULL;\r
6537 (reg)->ops_used = 0;\r
6538 (reg)->ops_alloc = 0;\r
14b0e578
CS
6539 (reg)->name_table = (void* )NULL;\r
6540\r
6541 (reg)->case_fold_flag = case_fold_flag;\r
6542 return 0;\r
6543}\r
6544\r
6545extern int\r
b602265d
DG
6546onig_new_without_alloc(regex_t* reg,\r
6547 const UChar* pattern, const UChar* pattern_end,\r
6548 OnigOptionType option, OnigEncoding enc,\r
6549 OnigSyntaxType* syntax, OnigErrorInfo* einfo)\r
14b0e578
CS
6550{\r
6551 int r;\r
6552\r
6553 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
b602265d 6554 if (r != 0) return r;\r
14b0e578
CS
6555\r
6556 r = onig_compile(reg, pattern, pattern_end, einfo);\r
6557 return r;\r
6558}\r
6559\r
6560extern int\r
6561onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,\r
b602265d
DG
6562 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
6563 OnigErrorInfo* einfo)\r
14b0e578
CS
6564{\r
6565 int r;\r
6566\r
6567 *reg = (regex_t* )xmalloc(sizeof(regex_t));\r
6568 if (IS_NULL(*reg)) return ONIGERR_MEMORY;\r
6569\r
6570 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
b602265d 6571 if (r != 0) goto err;\r
14b0e578
CS
6572\r
6573 r = onig_compile(*reg, pattern, pattern_end, einfo);\r
b602265d 6574 if (r != 0) {\r
14b0e578
CS
6575 err:\r
6576 onig_free(*reg);\r
6577 *reg = NULL;\r
6578 }\r
6579 return r;\r
6580}\r
6581\r
14b0e578 6582extern int\r
b602265d 6583onig_initialize(OnigEncoding encodings[], int n)\r
14b0e578 6584{\r
b602265d
DG
6585 int i;\r
6586 int r;\r
6587\r
14b0e578
CS
6588 if (onig_inited != 0)\r
6589 return 0;\r
6590\r
b602265d 6591 onigenc_init();\r
14b0e578
CS
6592\r
6593 onig_inited = 1;\r
6594\r
b602265d
DG
6595 for (i = 0; i < n; i++) {\r
6596 OnigEncoding enc = encodings[i];\r
6597 r = onig_initialize_encoding(enc);\r
6598 if (r != 0)\r
6599 return r;\r
6600 }\r
14b0e578 6601\r
b602265d 6602 return ONIG_NORMAL;\r
14b0e578
CS
6603}\r
6604\r
b602265d
DG
6605typedef struct EndCallListItem {\r
6606 struct EndCallListItem* next;\r
6607 void (*func)(void);\r
6608} EndCallListItemType;\r
14b0e578 6609\r
b602265d 6610static EndCallListItemType* EndCallTop;\r
14b0e578
CS
6611\r
6612extern void onig_add_end_call(void (*func)(void))\r
6613{\r
b602265d 6614 EndCallListItemType* item;\r
14b0e578 6615\r
b602265d 6616 item = (EndCallListItemType* )xmalloc(sizeof(*item));\r
14b0e578
CS
6617 if (item == 0) return ;\r
6618\r
6619 item->next = EndCallTop;\r
6620 item->func = func;\r
6621\r
6622 EndCallTop = item;\r
6623}\r
6624\r
6625static void\r
6626exec_end_call_list(void)\r
6627{\r
b602265d 6628 EndCallListItemType* prev;\r
14b0e578
CS
6629 void (*func)(void);\r
6630\r
6631 while (EndCallTop != 0) {\r
6632 func = EndCallTop->func;\r
6633 (*func)();\r
6634\r
6635 prev = EndCallTop;\r
6636 EndCallTop = EndCallTop->next;\r
6637 xfree(prev);\r
6638 }\r
6639}\r
6640\r
6641extern int\r
6642onig_end(void)\r
6643{\r
14b0e578
CS
6644 exec_end_call_list();\r
6645\r
b602265d
DG
6646#ifdef USE_CALLOUT\r
6647 onig_global_callout_names_free();\r
14b0e578
CS
6648#endif\r
6649\r
b602265d 6650 onigenc_end();\r
14b0e578
CS
6651\r
6652 onig_inited = 0;\r
6653\r
14b0e578
CS
6654 return 0;\r
6655}\r
6656\r
6657extern int\r
6658onig_is_in_code_range(const UChar* p, OnigCodePoint code)\r
6659{\r
6660 OnigCodePoint n, *data;\r
6661 OnigCodePoint low, high, x;\r
6662\r
6663 GET_CODE_POINT(n, p);\r
6664 data = (OnigCodePoint* )p;\r
6665 data++;\r
6666\r
6667 for (low = 0, high = n; low < high; ) {\r
6668 x = (low + high) >> 1;\r
6669 if (code > data[x * 2 + 1])\r
6670 low = x + 1;\r
6671 else\r
6672 high = x;\r
6673 }\r
6674\r
6675 return ((low < n && code >= data[low * 2]) ? 1 : 0);\r
6676}\r
6677\r
6678extern int\r
b602265d 6679onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)\r
14b0e578
CS
6680{\r
6681 int found;\r
b602265d 6682 CClassNode* cc = (CClassNode* )cc_arg;\r
14b0e578
CS
6683\r
6684 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {\r
6685 if (IS_NULL(cc->mbuf)) {\r
6686 found = 0;\r
6687 }\r
6688 else {\r
b26691c4 6689 found = onig_is_in_code_range(cc->mbuf->p, code) != 0;\r
14b0e578
CS
6690 }\r
6691 }\r
6692 else {\r
b26691c4 6693 found = BITSET_AT(cc->bs, code) != 0;\r
14b0e578
CS
6694 }\r
6695\r
6696 if (IS_NCCLASS_NOT(cc))\r
6697 return !found;\r
6698 else\r
6699 return found;\r
6700}\r
6701\r
6702extern int\r
6703onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)\r
6704{\r
6705 int len;\r
6706\r
6707 if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
6708 len = 2;\r
6709 }\r
6710 else {\r
6711 len = ONIGENC_CODE_TO_MBCLEN(enc, code);\r
b26691c4 6712 if (len < 0) return 0;\r
14b0e578
CS
6713 }\r
6714 return onig_is_code_in_cc_len(len, code, cc);\r
6715}\r
6716\r
6717\r
b602265d 6718#ifdef ONIG_DEBUG_PARSE\r
14b0e578
CS
6719\r
6720static void\r
6721p_string(FILE* f, int len, UChar* s)\r
6722{\r
6723 fputs(":", f);\r
6724 while (len-- > 0) { fputc(*s++, f); }\r
6725}\r
6726\r
6727static void\r
b602265d 6728Indent(FILE* f, int indent)\r
14b0e578 6729{\r
b602265d
DG
6730 int i;\r
6731 for (i = 0; i < indent; i++) putc(' ', f);\r
14b0e578
CS
6732}\r
6733\r
6734static void\r
6735print_indent_tree(FILE* f, Node* node, int indent)\r
6736{\r
b602265d
DG
6737 int i;\r
6738 NodeType type;\r
14b0e578 6739 UChar* p;\r
b602265d 6740 int add = 3;\r
14b0e578
CS
6741\r
6742 Indent(f, indent);\r
6743 if (IS_NULL(node)) {\r
6744 fprintf(f, "ERROR: null node!!!\n");\r
6745 exit (0);\r
6746 }\r
6747\r
b602265d 6748 type = NODE_TYPE(node);\r
14b0e578 6749 switch (type) {\r
b602265d
DG
6750 case NODE_LIST:\r
6751 case NODE_ALT:\r
6752 if (type == NODE_LIST)\r
6753 fprintf(f, "<list:%p>\n", node);\r
14b0e578 6754 else\r
b602265d 6755 fprintf(f, "<alt:%p>\n", node);\r
14b0e578 6756\r
b602265d
DG
6757 print_indent_tree(f, NODE_CAR(node), indent + add);\r
6758 while (IS_NOT_NULL(node = NODE_CDR(node))) {\r
6759 if (NODE_TYPE(node) != type) {\r
6760 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));\r
6761 exit(0);\r
14b0e578 6762 }\r
b602265d 6763 print_indent_tree(f, NODE_CAR(node), indent + add);\r
14b0e578
CS
6764 }\r
6765 break;\r
6766\r
b602265d 6767 case NODE_STRING:\r
b26691c4
LG
6768 {\r
6769 char* mode;\r
6770 char* dont;\r
6771 char* good;\r
6772\r
6773 if (NODE_STRING_IS_RAW(node))\r
6774 mode = "-raw";\r
6775 else if (NODE_STRING_IS_AMBIG(node))\r
6776 mode = "-ambig";\r
6777 else\r
6778 mode = "";\r
6779\r
6780 if (NODE_STRING_IS_GOOD_AMBIG(node))\r
6781 good = "-good";\r
6782 else\r
6783 good = "";\r
6784\r
6785 if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))\r
6786 dont = " (dont-opt)";\r
6787 else\r
6788 dont = "";\r
6789\r
6790 fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);\r
6791 for (p = STR_(node)->s; p < STR_(node)->end; p++) {\r
6792 if (*p >= 0x20 && *p < 0x7f)\r
6793 fputc(*p, f);\r
6794 else {\r
6795 fprintf(f, " 0x%02x", *p);\r
6796 }\r
14b0e578
CS
6797 }\r
6798 }\r
6799 break;\r
6800\r
b602265d
DG
6801 case NODE_CCLASS:\r
6802 fprintf(f, "<cclass:%p>", node);\r
6803 if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);\r
6804 if (CCLASS_(node)->mbuf) {\r
6805 BBuf* bbuf = CCLASS_(node)->mbuf;\r
14b0e578 6806 for (i = 0; i < bbuf->used; i++) {\r
b602265d
DG
6807 if (i > 0) fprintf(f, ",");\r
6808 fprintf(f, "%0x", bbuf->p[i]);\r
14b0e578
CS
6809 }\r
6810 }\r
6811 break;\r
6812\r
b602265d
DG
6813 case NODE_CTYPE:\r
6814 fprintf(f, "<ctype:%p> ", node);\r
6815 switch (CTYPE_(node)->ctype) {\r
6816 case CTYPE_ANYCHAR:\r
6817 fprintf(f, "<anychar:%p>", node);\r
6818 break;\r
6819\r
14b0e578 6820 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
6821 if (CTYPE_(node)->not != 0)\r
6822 fputs("not word", f);\r
14b0e578 6823 else\r
b602265d
DG
6824 fputs("word", f);\r
6825\r
6826 if (CTYPE_(node)->ascii_mode != 0)\r
6827 fputs(" (ascii)", f);\r
6828\r
14b0e578
CS
6829 break;\r
6830\r
6831 default:\r
6832 fprintf(f, "ERROR: undefined ctype.\n");\r
6833 exit(0);\r
6834 }\r
6835 break;\r
6836\r
b602265d
DG
6837 case NODE_ANCHOR:\r
6838 fprintf(f, "<anchor:%p> ", node);\r
6839 switch (ANCHOR_(node)->type) {\r
b26691c4
LG
6840 case ANCR_BEGIN_BUF: fputs("begin buf", f); break;\r
6841 case ANCR_END_BUF: fputs("end buf", f); break;\r
6842 case ANCR_BEGIN_LINE: fputs("begin line", f); break;\r
6843 case ANCR_END_LINE: fputs("end line", f); break;\r
6844 case ANCR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
6845 case ANCR_BEGIN_POSITION: fputs("begin position", f); break;\r
6846\r
6847 case ANCR_WORD_BOUNDARY: fputs("word boundary", f); break;\r
6848 case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;\r
14b0e578 6849#ifdef USE_WORD_BEGIN_END\r
b26691c4
LG
6850 case ANCR_WORD_BEGIN: fputs("word begin", f); break;\r
6851 case ANCR_WORD_END: fputs("word end", f); break;\r
14b0e578 6852#endif\r
b26691c4
LG
6853 case ANCR_TEXT_SEGMENT_BOUNDARY:\r
6854 fputs("text-segment boundary", f); break;\r
6855 case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
6856 fputs("no text-segment boundary", f); break;\r
6857 case ANCR_PREC_READ:\r
b602265d
DG
6858 fprintf(f, "prec read\n");\r
6859 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6860 break;\r
b26691c4 6861 case ANCR_PREC_READ_NOT:\r
b602265d
DG
6862 fprintf(f, "prec read not\n");\r
6863 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6864 break;\r
b26691c4 6865 case ANCR_LOOK_BEHIND:\r
b602265d
DG
6866 fprintf(f, "look behind\n");\r
6867 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6868 break;\r
b26691c4 6869 case ANCR_LOOK_BEHIND_NOT:\r
b602265d
DG
6870 fprintf(f, "look behind not\n");\r
6871 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6872 break;\r
14b0e578
CS
6873\r
6874 default:\r
6875 fprintf(f, "ERROR: undefined anchor type.\n");\r
6876 break;\r
6877 }\r
6878 break;\r
6879\r
b602265d 6880 case NODE_BACKREF:\r
14b0e578
CS
6881 {\r
6882 int* p;\r
b602265d 6883 BackRefNode* br = BACKREF_(node);\r
14b0e578 6884 p = BACKREFS_P(br);\r
b602265d 6885 fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);\r
14b0e578 6886 for (i = 0; i < br->back_num; i++) {\r
b602265d
DG
6887 if (i > 0) fputs(", ", f);\r
6888 fprintf(f, "%d", p[i]);\r
14b0e578
CS
6889 }\r
6890 }\r
6891 break;\r
6892\r
b602265d
DG
6893#ifdef USE_CALL\r
6894 case NODE_CALL:\r
14b0e578 6895 {\r
b602265d
DG
6896 CallNode* cn = CALL_(node);\r
6897 fprintf(f, "<call:%p>", node);\r
14b0e578
CS
6898 p_string(f, cn->name_end - cn->name, cn->name);\r
6899 }\r
6900 break;\r
6901#endif\r
6902\r
b602265d
DG
6903 case NODE_QUANT:\r
6904 fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,\r
6905 QUANT_(node)->lower, QUANT_(node)->upper,\r
6906 (QUANT_(node)->greedy ? "" : "?"));\r
6907 print_indent_tree(f, NODE_BODY(node), indent + add);\r
14b0e578
CS
6908 break;\r
6909\r
b26691c4
LG
6910 case NODE_BAG:\r
6911 fprintf(f, "<bag:%p> ", node);\r
6912 switch (BAG_(node)->type) {\r
6913 case BAG_OPTION:\r
6914 fprintf(f, "option:%d", BAG_(node)->o.options);\r
14b0e578 6915 break;\r
b26691c4
LG
6916 case BAG_MEMORY:\r
6917 fprintf(f, "memory:%d", BAG_(node)->m.regnum);\r
14b0e578 6918 break;\r
b26691c4 6919 case BAG_STOP_BACKTRACK:\r
14b0e578
CS
6920 fprintf(f, "stop-bt");\r
6921 break;\r
b26691c4
LG
6922 case BAG_IF_ELSE:\r
6923 fprintf(f, "if-else");\r
14b0e578
CS
6924 break;\r
6925 }\r
6926 fprintf(f, "\n");\r
b602265d
DG
6927 print_indent_tree(f, NODE_BODY(node), indent + add);\r
6928 break;\r
6929\r
6930 case NODE_GIMMICK:\r
6931 fprintf(f, "<gimmick:%p> ", node);\r
6932 switch (GIMMICK_(node)->type) {\r
6933 case GIMMICK_FAIL:\r
6934 fprintf(f, "fail");\r
6935 break;\r
b602265d
DG
6936 case GIMMICK_SAVE:\r
6937 fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
6938 break;\r
6939 case GIMMICK_UPDATE_VAR:\r
6940 fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
6941 break;\r
6942#ifdef USE_CALLOUT\r
6943 case GIMMICK_CALLOUT:\r
6944 switch (GIMMICK_(node)->detail_type) {\r
6945 case ONIG_CALLOUT_OF_CONTENTS:\r
6946 fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);\r
6947 break;\r
6948 case ONIG_CALLOUT_OF_NAME:\r
6949 fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);\r
6950 break;\r
6951 }\r
6952#endif\r
6953 }\r
14b0e578
CS
6954 break;\r
6955\r
6956 default:\r
b602265d 6957 fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));\r
14b0e578
CS
6958 break;\r
6959 }\r
6960\r
b602265d 6961 if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&\r
b26691c4 6962 type != NODE_BAG)\r
14b0e578
CS
6963 fprintf(f, "\n");\r
6964 fflush(f);\r
6965}\r
14b0e578 6966\r
14b0e578
CS
6967static void\r
6968print_tree(FILE* f, Node* node)\r
6969{\r
6970 print_indent_tree(f, node, 0);\r
6971}\r
6972#endif\r