+++ /dev/null
-/**********************************************************************\r
- regcomp.c - Oniguruma (regular expression library)\r
-**********************************************************************/\r
-/*-\r
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
- * All rights reserved.\r
- *\r
- * Redistribution and use in source and binary forms, with or without\r
- * modification, are permitted provided that the following conditions\r
- * are met:\r
- * 1. Redistributions of source code must retain the above copyright\r
- * notice, this list of conditions and the following disclaimer.\r
- * 2. Redistributions in binary form must reproduce the above copyright\r
- * notice, this list of conditions and the following disclaimer in the\r
- * documentation and/or other materials provided with the distribution.\r
- *\r
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
- * SUCH DAMAGE.\r
- */\r
-\r
-#include "regparse.h"\r
-\r
-#define OPS_INIT_SIZE 8\r
-\r
-OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;\r
-\r
-#if 0\r
-typedef struct {\r
- int n;\r
- int alloc;\r
- int* v;\r
-} int_stack;\r
-\r
-static int\r
-make_int_stack(int_stack** rs, int init_size)\r
-{\r
- int_stack* s;\r
- int* v;\r
-\r
- *rs = 0;\r
-\r
- s = xmalloc(sizeof(*s));\r
- if (IS_NULL(s)) return ONIGERR_MEMORY;\r
-\r
- v = (int* )xmalloc(sizeof(int) * init_size);\r
- if (IS_NULL(v)) {\r
- xfree(s);\r
- return ONIGERR_MEMORY;\r
- }\r
-\r
- s->n = 0;\r
- s->alloc = init_size;\r
- s->v = v;\r
-\r
- *rs = s;\r
- return ONIG_NORMAL;\r
-}\r
-\r
-static void\r
-free_int_stack(int_stack* s)\r
-{\r
- if (IS_NOT_NULL(s)) {\r
- if (IS_NOT_NULL(s->v))\r
- xfree(s->v);\r
- xfree(s);\r
- }\r
-}\r
-\r
-static int\r
-int_stack_push(int_stack* s, int v)\r
-{\r
- if (s->n >= s->alloc) {\r
- int new_size = s->alloc * 2;\r
- int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size, sizeof(int) * s->alloc);\r
- if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
-\r
- s->alloc = new_size;\r
- s->v = nv;\r
- }\r
-\r
- s->v[s->n] = v;\r
- s->n++;\r
- return ONIG_NORMAL;\r
-}\r
-\r
-static int\r
-int_stack_pop(int_stack* s)\r
-{\r
- int v;\r
-\r
-#ifdef ONIG_DEBUG\r
- if (s->n <= 0) {\r
- fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);\r
- return 0;\r
- }\r
-#endif\r
-\r
- v = s->v[s->n];\r
- s->n--;\r
- return v;\r
-}\r
-#endif\r
-\r
-static int\r
-ops_init(regex_t* reg, int init_alloc_size)\r
-{\r
- Operation* p;\r
- size_t size;\r
-\r
- if (init_alloc_size > 0) {\r
- size = sizeof(Operation) * init_alloc_size;\r
- p = (Operation* )xmalloc(size);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- {\r
- enum OpCode* cp;\r
- size = sizeof(enum OpCode) * init_alloc_size;\r
- cp = (enum OpCode* )xmalloc(size);\r
- CHECK_NULL_RETURN_MEMERR(cp);\r
- reg->ocs = cp;\r
- }\r
-#endif\r
- }\r
- else {\r
- p = (Operation* )0;\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- reg->ocs = (enum OpCode* )0;\r
-#endif\r
- }\r
-\r
- reg->ops = p;\r
- reg->ops_curr = 0; /* !!! not yet done ops_new() */\r
- reg->ops_alloc = init_alloc_size;\r
- reg->ops_used = 0;\r
-\r
- return ONIG_NORMAL;\r
-}\r
-\r
-static int\r
-ops_expand(regex_t* reg, int n)\r
-{\r
-#define MIN_OPS_EXPAND_SIZE 4\r
-\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- enum OpCode* cp;\r
-#endif\r
- Operation* p;\r
- size_t size;\r
-\r
- if (n <= 0) n = MIN_OPS_EXPAND_SIZE;\r
-\r
- n += reg->ops_alloc;\r
-\r
- size = sizeof(Operation) * n;\r
- p = (Operation* )xrealloc(reg->ops, size, sizeof(Operation) * reg->ops_alloc);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
-\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- size = sizeof(enum OpCode) * n;\r
- cp = (enum OpCode* )xrealloc(reg->ocs, size, sizeof(enum OpCode) * reg->ops_alloc);\r
- CHECK_NULL_RETURN_MEMERR(cp);\r
- reg->ocs = cp;\r
-#endif\r
-\r
- reg->ops = p;\r
- reg->ops_alloc = n;\r
- if (reg->ops_used == 0)\r
- reg->ops_curr = 0;\r
- else\r
- reg->ops_curr = reg->ops + (reg->ops_used - 1);\r
-\r
- return ONIG_NORMAL;\r
-}\r
-\r
-static int\r
-ops_new(regex_t* reg)\r
-{\r
- int r;\r
-\r
- if (reg->ops_used >= reg->ops_alloc) {\r
- r = ops_expand(reg, reg->ops_alloc);\r
- if (r != ONIG_NORMAL) return r;\r
- }\r
-\r
- reg->ops_curr = reg->ops + reg->ops_used;\r
- reg->ops_used++;\r
-\r
- xmemset(reg->ops_curr, 0, sizeof(Operation));\r
- return ONIG_NORMAL;\r
-}\r
-\r
-static int\r
-is_in_string_pool(regex_t* reg, UChar* s)\r
-{\r
- return (s >= reg->string_pool && s < reg->string_pool_end);\r
-}\r
-\r
-static void\r
-ops_free(regex_t* reg)\r
-{\r
- int i;\r
-\r
- if (IS_NULL(reg->ops)) return ;\r
-\r
- for (i = 0; i < (int )reg->ops_used; i++) {\r
- enum OpCode opcode;\r
- Operation* op;\r
-\r
- op = reg->ops + i;\r
-\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- opcode = *(reg->ocs + i);\r
-#else\r
- opcode = op->opcode;\r
-#endif\r
-\r
- switch (opcode) {\r
- case OP_EXACTMBN:\r
- if (! is_in_string_pool(reg, op->exact_len_n.s))\r
- xfree(op->exact_len_n.s);\r
- break;\r
- case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:\r
- if (! is_in_string_pool(reg, op->exact_n.s))\r
- xfree(op->exact_n.s);\r
- break;\r
- case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:\r
- case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:\r
- case OP_EXACTMB2N3: case OP_EXACT1_IC:\r
- break;\r
-\r
- case OP_CCLASS_NOT: case OP_CCLASS:\r
- xfree(op->cclass.bsp);\r
- break;\r
-\r
- case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:\r
- xfree(op->cclass_mb.mb);\r
- break;\r
- case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:\r
- xfree(op->cclass_mix.mb);\r
- xfree(op->cclass_mix.bsp);\r
- break;\r
-\r
- case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:\r
- break;\r
- case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC:\r
- case OP_BACKREF_WITH_LEVEL:\r
- case OP_BACKREF_WITH_LEVEL_IC:\r
- case OP_BACKREF_CHECK:\r
- case OP_BACKREF_CHECK_WITH_LEVEL:\r
- if (op->backref_general.num != 1)\r
- xfree(op->backref_general.ns);\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
- }\r
-\r
- xfree(reg->ops);\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- xfree(reg->ocs);\r
- reg->ocs = 0;\r
-#endif\r
-\r
- reg->ops = 0;\r
- reg->ops_curr = 0;\r
- reg->ops_alloc = 0;\r
- reg->ops_used = 0;\r
-}\r
-\r
-static int\r
-ops_calc_size_of_string_pool(regex_t* reg)\r
-{\r
- int i;\r
- int total;\r
-\r
- if (IS_NULL(reg->ops)) return 0;\r
-\r
- total = 0;\r
- for (i = 0; i < (int )reg->ops_used; i++) {\r
- enum OpCode opcode;\r
- Operation* op;\r
-\r
- op = reg->ops + i;\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- opcode = *(reg->ocs + i);\r
-#else\r
- opcode = op->opcode;\r
-#endif\r
-\r
- switch (opcode) {\r
- case OP_EXACTMBN:\r
- total += op->exact_len_n.len * op->exact_len_n.n;\r
- break;\r
- case OP_EXACTN:\r
- case OP_EXACTN_IC:\r
- total += op->exact_n.n;\r
- break;\r
- case OP_EXACTMB2N:\r
- total += op->exact_n.n * 2;\r
- break;\r
- case OP_EXACTMB3N:\r
- total += op->exact_n.n * 3;\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
- }\r
-\r
- return total;\r
-}\r
-\r
-static int\r
-ops_make_string_pool(regex_t* reg)\r
-{\r
- int i;\r
- int len;\r
- int size;\r
- UChar* pool;\r
- UChar* curr;\r
-\r
- size = ops_calc_size_of_string_pool(reg);\r
- if (size <= 0) {\r
- return 0;\r
- }\r
-\r
- curr = pool = (UChar* )xmalloc((size_t )size);\r
- CHECK_NULL_RETURN_MEMERR(pool);\r
-\r
- for (i = 0; i < (int )reg->ops_used; i++) {\r
- enum OpCode opcode;\r
- Operation* op;\r
-\r
- op = reg->ops + i;\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- opcode = *(reg->ocs + i);\r
-#else\r
- opcode = op->opcode;\r
-#endif\r
-\r
- switch (opcode) {\r
- case OP_EXACTMBN:\r
- len = op->exact_len_n.len * op->exact_len_n.n;\r
- xmemcpy(curr, op->exact_len_n.s, len);\r
- xfree(op->exact_len_n.s);\r
- op->exact_len_n.s = curr;\r
- curr += len;\r
- break;\r
- case OP_EXACTN:\r
- case OP_EXACTN_IC:\r
- len = op->exact_n.n;\r
- copy:\r
- xmemcpy(curr, op->exact_n.s, len);\r
- xfree(op->exact_n.s);\r
- op->exact_n.s = curr;\r
- curr += len;\r
- break;\r
- case OP_EXACTMB2N:\r
- len = op->exact_n.n * 2;\r
- goto copy;\r
- break;\r
- case OP_EXACTMB3N:\r
- len = op->exact_n.n * 3;\r
- goto copy;\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
- }\r
-\r
- reg->string_pool = pool;\r
- reg->string_pool_end = pool + size;\r
- return 0;\r
-}\r
-\r
-extern OnigCaseFoldType\r
-onig_get_default_case_fold_flag(void)\r
-{\r
- return OnigDefaultCaseFoldFlag;\r
-}\r
-\r
-extern int\r
-onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)\r
-{\r
- OnigDefaultCaseFoldFlag = case_fold_flag;\r
- return 0;\r
-}\r
-\r
-static int\r
-int_multiply_cmp(int x, int y, int v)\r
-{\r
- if (x == 0 || y == 0) return -1;\r
-\r
- if (x < INT_MAX / y) {\r
- int xy = x * y;\r
- if (xy > v) return 1;\r
- else {\r
- if (xy == v) return 0;\r
- else return -1;\r
- }\r
- }\r
- else\r
- return 1;\r
-}\r
-\r
-extern int\r
-onig_positive_int_multiply(int x, int y)\r
-{\r
- if (x == 0 || y == 0) return 0;\r
-\r
- if (x < INT_MAX / y)\r
- return x * y;\r
- else\r
- return -1;\r
-}\r
-\r
-\r
-static void\r
-swap_node(Node* a, Node* b)\r
-{\r
- Node c;\r
-\r
- c = *a; *a = *b; *b = c;\r
-\r
- if (NODE_TYPE(a) == NODE_STRING) {\r
- StrNode* sn = STR_(a);\r
- if (sn->capacity == 0) {\r
- int len = (int )(sn->end - sn->s);\r
- sn->s = sn->buf;\r
- sn->end = sn->s + len;\r
- }\r
- }\r
-\r
- if (NODE_TYPE(b) == NODE_STRING) {\r
- StrNode* sn = STR_(b);\r
- if (sn->capacity == 0) {\r
- int len = (int )(sn->end - sn->s);\r
- sn->s = sn->buf;\r
- sn->end = sn->s + len;\r
- }\r
- }\r
-}\r
-\r
-static OnigLen\r
-distance_add(OnigLen d1, OnigLen d2)\r
-{\r
- if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)\r
- return INFINITE_LEN;\r
- else {\r
- if (d1 <= INFINITE_LEN - d2) return d1 + d2;\r
- else return INFINITE_LEN;\r
- }\r
-}\r
-\r
-static OnigLen\r
-distance_multiply(OnigLen d, int m)\r
-{\r
- if (m == 0) return 0;\r
-\r
- if (d < INFINITE_LEN / m)\r
- return d * m;\r
- else\r
- return INFINITE_LEN;\r
-}\r
-\r
-static int\r
-bitset_is_empty(BitSetRef bs)\r
-{\r
- int i;\r
-\r
- for (i = 0; i < (int )BITSET_SIZE; i++) {\r
- if (bs[i] != 0) return 0;\r
- }\r
- return 1;\r
-}\r
-\r
-#ifdef USE_CALL\r
-\r
-static int\r
-unset_addr_list_init(UnsetAddrList* list, int size)\r
-{\r
- UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
-\r
- list->num = 0;\r
- list->alloc = size;\r
- list->us = p;\r
- return 0;\r
-}\r
-\r
-static void\r
-unset_addr_list_end(UnsetAddrList* list)\r
-{\r
- if (IS_NOT_NULL(list->us))\r
- xfree(list->us);\r
-}\r
-\r
-static int\r
-unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)\r
-{\r
- UnsetAddr* p;\r
- int size;\r
-\r
- if (list->num >= list->alloc) {\r
- size = list->alloc * 2;\r
- p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
- list->alloc = size;\r
- list->us = p;\r
- }\r
-\r
- list->us[list->num].offset = offset;\r
- list->us[list->num].target = node;\r
- list->num++;\r
- return 0;\r
-}\r
-#endif /* USE_CALL */\r
-\r
-\r
-static int\r
-add_op(regex_t* reg, int opcode)\r
-{\r
- int r;\r
-\r
- r = ops_new(reg);\r
- if (r != ONIG_NORMAL) return r;\r
-\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;\r
-#else\r
- reg->ops_curr->opcode = opcode;\r
-#endif\r
-\r
- return 0;\r
-}\r
-\r
-static int compile_length_tree(Node* node, regex_t* reg);\r
-static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);\r
-\r
-\r
-#define IS_NEED_STR_LEN_OP_EXACT(op) \\r
- ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\\r
- (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)\r
-\r
-static int\r
-select_str_opcode(int mb_len, int str_len, int ignore_case)\r
-{\r
- int op;\r
-\r
- if (ignore_case) {\r
- switch (str_len) {\r
- case 1: op = OP_EXACT1_IC; break;\r
- default: op = OP_EXACTN_IC; break;\r
- }\r
- }\r
- else {\r
- switch (mb_len) {\r
- case 1:\r
- switch (str_len) {\r
- case 1: op = OP_EXACT1; break;\r
- case 2: op = OP_EXACT2; break;\r
- case 3: op = OP_EXACT3; break;\r
- case 4: op = OP_EXACT4; break;\r
- case 5: op = OP_EXACT5; break;\r
- default: op = OP_EXACTN; break;\r
- }\r
- break;\r
-\r
- case 2:\r
- switch (str_len) {\r
- case 1: op = OP_EXACTMB2N1; break;\r
- case 2: op = OP_EXACTMB2N2; break;\r
- case 3: op = OP_EXACTMB2N3; break;\r
- default: op = OP_EXACTMB2N; break;\r
- }\r
- break;\r
-\r
- case 3:\r
- op = OP_EXACTMB3N;\r
- break;\r
-\r
- default:\r
- op = OP_EXACTMBN;\r
- break;\r
- }\r
- }\r
- return op;\r
-}\r
-\r
-static int\r
-is_strict_real_node(Node* node)\r
-{\r
- switch (NODE_TYPE(node)) {\r
- case NODE_STRING:\r
- {\r
- StrNode* sn = STR_(node);\r
- return (sn->end != sn->s);\r
- }\r
- break;\r
-\r
- case NODE_CCLASS:\r
- case NODE_CTYPE:\r
- return 1;\r
- break;\r
-\r
- default:\r
- return 0;\r
- break;\r
- }\r
-}\r
-\r
-static int\r
-compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env)\r
-{\r
- int r;\r
- int saved_num_null_check = reg->num_null_check;\r
-\r
- if (emptiness != BODY_IS_NOT_EMPTY) {\r
- r = add_op(reg, OP_EMPTY_CHECK_START);\r
- if (r != 0) return r;\r
- COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */\r
- reg->num_null_check++;\r
- }\r
-\r
- r = compile_tree(node, reg, env);\r
- if (r != 0) return r;\r
-\r
- if (emptiness != BODY_IS_NOT_EMPTY) {\r
- if (emptiness == BODY_IS_EMPTY_POSSIBILITY)\r
- r = add_op(reg, OP_EMPTY_CHECK_END);\r
- else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM)\r
- r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);\r
- else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)\r
- r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);\r
-\r
- if (r != 0) return r;\r
- COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */\r
- }\r
- return r;\r
-}\r
-\r
-#ifdef USE_CALL\r
-static int\r
-compile_call(CallNode* node, regex_t* reg, ScanEnv* env)\r
-{\r
- int r;\r
- int offset;\r
-\r
- r = add_op(reg, OP_CALL);\r
- if (r != 0) return r;\r
-\r
- COP(reg)->call.addr = 0; /* dummy addr. */\r
-\r
- offset = COP_CURR_OFFSET_BYTES(reg, call.addr);\r
- r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node));\r
- return r;\r
-}\r
-#endif\r
-\r
-static int\r
-compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)\r
-{\r
- int i, r;\r
-\r
- for (i = 0; i < n; i++) {\r
- r = compile_tree(node, reg, env);\r
- if (r != 0) return r;\r
- }\r
- return 0;\r
-}\r
-\r
-static int\r
-add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,\r
- regex_t* reg ARG_UNUSED, int ignore_case)\r
-{\r
- return 1;\r
-}\r
-\r
-static int\r
-add_compile_string(UChar* s, int mb_len, int str_len,\r
- regex_t* reg, int ignore_case)\r
-{\r
- int op;\r
- int r;\r
- int byte_len;\r
- UChar* p;\r
- UChar* end;\r
-\r
- op = select_str_opcode(mb_len, str_len, ignore_case);\r
- r = add_op(reg, op);\r
- if (r != 0) return r;\r
-\r
- byte_len = mb_len * str_len;\r
- end = s + byte_len;\r
-\r
- if (op == OP_EXACTMBN) {\r
- p = onigenc_strdup(reg->enc, s, end);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
-\r
- COP(reg)->exact_len_n.len = mb_len;\r
- COP(reg)->exact_len_n.n = str_len;\r
- COP(reg)->exact_len_n.s = p;\r
- }\r
- else if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
- p = onigenc_strdup(reg->enc, s, end);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
-\r
- if (op == OP_EXACTN_IC)\r
- COP(reg)->exact_n.n = byte_len;\r
- else\r
- COP(reg)->exact_n.n = str_len;\r
-\r
- COP(reg)->exact_n.s = p;\r
- }\r
- else {\r
- xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);\r
- COP(reg)->exact.s[byte_len] = '\0';\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int\r
-compile_length_string_node(Node* node, regex_t* reg)\r
-{\r
- int rlen, r, len, prev_len, slen, ambig;\r
- UChar *p, *prev;\r
- StrNode* sn;\r
- OnigEncoding enc = reg->enc;\r
-\r
- sn = STR_(node);\r
- if (sn->end <= sn->s)\r
- return 0;\r
-\r
- ambig = NODE_STRING_IS_AMBIG(node);\r
-\r
- p = prev = sn->s;\r
- prev_len = enclen(enc, p);\r
- p += prev_len;\r
- slen = 1;\r
- rlen = 0;\r
-\r
- for (; p < sn->end; ) {\r
- len = enclen(enc, p);\r
- if (len == prev_len) {\r
- slen++;\r
- }\r
- else {\r
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
- rlen += r;\r
- prev = p;\r
- slen = 1;\r
- prev_len = len;\r
- }\r
- p += len;\r
- }\r
-\r
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);\r
- rlen += r;\r
- return rlen;\r
-}\r
-\r
-static int\r
-compile_length_string_raw_node(StrNode* sn, regex_t* reg)\r
-{\r
- if (sn->end <= sn->s)\r
- return 0;\r
-\r
- return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),\r
- reg, 0);\r
-}\r
-\r
-static int\r
-compile_string_node(Node* node, regex_t* reg)\r
-{\r
- int r, len, prev_len, slen, ambig;\r
- UChar *p, *prev, *end;\r
- StrNode* sn;\r
- OnigEncoding enc = reg->enc;\r
-\r
- sn = STR_(node);\r
- if (sn->end <= sn->s)\r
- return 0;\r
-\r
- end = sn->end;\r
- ambig = NODE_STRING_IS_AMBIG(node);\r
-\r
- p = prev = sn->s;\r
- prev_len = enclen(enc, p);\r
- p += prev_len;\r
- slen = 1;\r
-\r
- for (; p < end; ) {\r
- len = enclen(enc, p);\r
- if (len == prev_len) {\r
- slen++;\r
- }\r
- else {\r
- r = add_compile_string(prev, prev_len, slen, reg, ambig);\r
- if (r != 0) return r;\r
-\r
- prev = p;\r
- slen = 1;\r
- prev_len = len;\r
- }\r
-\r
- p += len;\r
- }\r
-\r
- return add_compile_string(prev, prev_len, slen, reg, ambig);\r
-}\r
-\r
-static int\r
-compile_string_raw_node(StrNode* sn, regex_t* reg)\r
-{\r
- if (sn->end <= sn->s)\r
- return 0;\r
-\r
- return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);\r
-}\r
-\r
-static void*\r
-set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
-{\r
- size_t len;\r
- void* p;\r
-\r
- len = (size_t )mbuf->used;\r
- p = xmalloc(len);\r
- if (IS_NULL(p)) return NULL;\r
-\r
- xmemcpy(p, mbuf->p, len);\r
- return p;\r
-}\r
-\r
-static int\r
-compile_length_cclass_node(CClassNode* cc, regex_t* reg)\r
-{\r
- return 1;\r
-}\r
-\r
-static int\r
-compile_cclass_node(CClassNode* cc, regex_t* reg)\r
-{\r
- int r;\r
-\r
- if (IS_NULL(cc->mbuf)) {\r
- r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);\r
- if (r != 0) return r;\r
-\r
- COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);\r
- CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);\r
- xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);\r
- }\r
- else {\r
- void* p;\r
-\r
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
- r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);\r
- if (r != 0) return r;\r
-\r
- p = set_multi_byte_cclass(cc->mbuf, reg);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
- COP(reg)->cclass_mb.mb = p;\r
- }\r
- else {\r
- r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);\r
- if (r != 0) return r;\r
-\r
- COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);\r
- CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);\r
- xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);\r
-\r
- p = set_multi_byte_cclass(cc->mbuf, reg);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
- COP(reg)->cclass_mix.mb = p;\r
- }\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int\r
-entry_repeat_range(regex_t* reg, int id, int lower, int upper)\r
-{\r
-#define REPEAT_RANGE_ALLOC 4\r
-\r
- OnigRepeatRange* p;\r
-\r
- if (reg->repeat_range_alloc == 0) {\r
- p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
- reg->repeat_range = p;\r
- reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;\r
- }\r
- else if (reg->repeat_range_alloc <= id) {\r
- int n;\r
- n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;\r
- p = (OnigRepeatRange* )xrealloc(reg->repeat_range,\r
- sizeof(OnigRepeatRange) * n,\r
- sizeof(OnigRepeatRange) * reg->repeat_range_alloc);\r
- CHECK_NULL_RETURN_MEMERR(p);\r
- reg->repeat_range = p;\r
- reg->repeat_range_alloc = n;\r
- }\r
- else {\r
- p = reg->repeat_range;\r
- }\r
-\r
- p[id].lower = lower;\r
- p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);\r
- return 0;\r
-}\r
-\r
-static int\r
-compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,\r
- regex_t* reg, ScanEnv* env)\r
-{\r
- int r;\r
- int num_repeat = reg->num_repeat++;\r
-\r
- r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
- if (r != 0) return r;\r
-\r
- COP(reg)->repeat.id = num_repeat;\r
- COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;\r
-\r
- r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);\r
- if (r != 0) return r;\r
-\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
- if (r != 0) return r;\r
-\r
- if (\r
-#ifdef USE_CALL\r
- NODE_IS_IN_MULTI_ENTRY(qn) ||\r
-#endif\r
- NODE_IS_IN_REAL_REPEAT(qn)) {\r
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
- }\r
- else {\r
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
- }\r
- if (r != 0) return r;\r
-\r
- COP(reg)->repeat_inc.id = num_repeat;\r
- return r;\r
-}\r
-\r
-static int\r
-is_anychar_infinite_greedy(QuantNode* qn)\r
-{\r
- if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&\r
- NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))\r
- return 1;\r
- else\r
- return 0;\r
-}\r
-\r
-#define QUANTIFIER_EXPAND_LIMIT_SIZE 10\r
-#define CKN_ON (ckn > 0)\r
-\r
-static int\r
-compile_length_quantifier_node(QuantNode* qn, regex_t* reg)\r
-{\r
- int len, mod_tlen;\r
- int infinite = IS_INFINITE_REPEAT(qn->upper);\r
- enum BodyEmptyType emptiness = qn->emptiness;\r
- int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
-\r
- if (tlen < 0) return tlen;\r
- if (tlen == 0) return 0;\r
-\r
- /* anychar repeat */\r
- if (is_anychar_infinite_greedy(qn)) {\r
- if (qn->lower <= 1 ||\r
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {\r
- if (IS_NOT_NULL(qn->next_head_exact))\r
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;\r
- else\r
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;\r
- }\r
- }\r
-\r
- mod_tlen = tlen;\r
- if (emptiness != BODY_IS_NOT_EMPTY)\r
- mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;\r
-\r
- if (infinite &&\r
- (qn->lower <= 1 ||\r
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
- len = SIZE_OP_JUMP;\r
- }\r
- else {\r
- len = tlen * qn->lower;\r
- }\r
-\r
- if (qn->greedy) {\r
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
- if (IS_NOT_NULL(qn->head_exact))\r
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;\r
- else\r
-#endif\r
- if (IS_NOT_NULL(qn->next_head_exact))\r
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;\r
- else\r
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;\r
- }\r
- else\r
- len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;\r
- }\r
- else if (qn->upper == 0) {\r
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
- len = SIZE_OP_JUMP + tlen;\r
- }\r
- else\r
- len = 0;\r
- }\r
- else if (!infinite && qn->greedy &&\r
- (qn->upper == 1 ||\r
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
- QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
- len = tlen * qn->lower;\r
- len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);\r
- }\r
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
- len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;\r
- }\r
- else {\r
- len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;\r
- }\r
-\r
- return len;\r
-}\r
-\r
-static int\r
-compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)\r
-{\r
- int i, r, mod_tlen;\r
- int infinite = IS_INFINITE_REPEAT(qn->upper);\r
- enum BodyEmptyType emptiness = qn->emptiness;\r
- int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
-\r
- if (tlen < 0) return tlen;\r
- if (tlen == 0) return 0;\r
-\r
- if (is_anychar_infinite_greedy(qn) &&\r
- (qn->lower <= 1 ||\r
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
- if (r != 0) return r;\r
- if (IS_NOT_NULL(qn->next_head_exact)) {\r
- r = add_op(reg,\r
- IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?\r
- OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);\r
- if (r != 0) return r;\r
-\r
- COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];\r
- return 0;\r
- }\r
- else {\r
- r = add_op(reg,\r
- IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?\r
- OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);\r
- return r;\r
- }\r
- }\r
-\r
- mod_tlen = tlen;\r
- if (emptiness != BODY_IS_NOT_EMPTY)\r
- mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;\r
-\r
- if (infinite &&\r
- (qn->lower <= 1 ||\r
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
- int addr;\r
-\r
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- if (qn->greedy) {\r
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
- if (IS_NOT_NULL(qn->head_exact))\r
- COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;\r
- else\r
-#endif\r
- if (IS_NOT_NULL(qn->next_head_exact))\r
- COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;\r
- else\r
- COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;\r
- }\r
- else {\r
- COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;\r
- }\r
- }\r
- else {\r
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
- if (r != 0) return r;\r
- }\r
-\r
- if (qn->greedy) {\r
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
- if (IS_NOT_NULL(qn->head_exact)) {\r
- r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);\r
- if (r != 0) return r;\r
- COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
- COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];\r
-\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
- if (r != 0) return r;\r
-\r
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
- }\r
- else\r
-#endif\r
- if (IS_NOT_NULL(qn->next_head_exact)) {\r
- r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);\r
- if (r != 0) return r;\r
- COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
- COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];\r
-\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
- if (r != 0) return r;\r
-\r
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);\r
- }\r
- else {\r
- r = add_op(reg, OP_PUSH);\r
- if (r != 0) return r;\r
- COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
-\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
- if (r != 0) return r;\r
-\r
- addr = -(mod_tlen + (int )SIZE_OP_PUSH);\r
- }\r
-\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = addr;\r
- }\r
- else {\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;\r
-\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
- if (r != 0) return r;\r
-\r
- r = add_op(reg, OP_PUSH);\r
- if (r != 0) return r;\r
- COP(reg)->push.addr = -mod_tlen;\r
- }\r
- }\r
- else if (qn->upper == 0) {\r
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;\r
-\r
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
- }\r
- else {\r
- /* Nothing output */\r
- r = 0;\r
- }\r
- }\r
- else if (! infinite && qn->greedy &&\r
- (qn->upper == 1 ||\r
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
- QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
- int n = qn->upper - qn->lower;\r
-\r
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
- if (r != 0) return r;\r
-\r
- for (i = 0; i < n; i++) {\r
- int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);\r
- if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
-\r
- r = add_op(reg, OP_PUSH);\r
- if (r != 0) return r;\r
- COP(reg)->push.addr = v;\r
-\r
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
- if (r != 0) return r;\r
- }\r
- }\r
- else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
- r = add_op(reg, OP_PUSH);\r
- if (r != 0) return r;\r
- COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;\r
-\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;\r
-\r
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
- }\r
- else {\r
- r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);\r
- }\r
- return r;\r
-}\r
-\r
-static int\r
-compile_length_option_node(BagNode* node, regex_t* reg)\r
-{\r
- int tlen;\r
- OnigOptionType prev = reg->options;\r
-\r
- reg->options = node->o.options;\r
- tlen = compile_length_tree(NODE_BAG_BODY(node), reg);\r
- reg->options = prev;\r
-\r
- return tlen;\r
-}\r
-\r
-static int\r
-compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
-{\r
- int r;\r
- OnigOptionType prev = reg->options;\r
-\r
- reg->options = node->o.options;\r
- r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
- reg->options = prev;\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-compile_length_bag_node(BagNode* node, regex_t* reg)\r
-{\r
- int len;\r
- int tlen;\r
-\r
- if (node->type == BAG_OPTION)\r
- return compile_length_option_node(node, reg);\r
-\r
- if (NODE_BAG_BODY(node)) {\r
- tlen = compile_length_tree(NODE_BAG_BODY(node), reg);\r
- if (tlen < 0) return tlen;\r
- }\r
- else\r
- tlen = 0;\r
-\r
- switch (node->type) {\r
- case BAG_MEMORY:\r
-#ifdef USE_CALL\r
-\r
- if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
- len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
- return len;\r
- }\r
-\r
- if (NODE_IS_CALLED(node)) {\r
- len = SIZE_OP_MEMORY_START_PUSH + tlen\r
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;\r
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
- len += (NODE_IS_RECURSION(node)\r
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
- else\r
- len += (NODE_IS_RECURSION(node)\r
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
- }\r
- else if (NODE_IS_RECURSION(node)) {\r
- len = SIZE_OP_MEMORY_START_PUSH;\r
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);\r
- }\r
- else\r
-#endif\r
- {\r
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
- len = SIZE_OP_MEMORY_START_PUSH;\r
- else\r
- len = SIZE_OP_MEMORY_START;\r
-\r
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)\r
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);\r
- }\r
- break;\r
-\r
- case BAG_STOP_BACKTRACK:\r
- if (NODE_IS_STRICT_REAL_REPEAT(node)) {\r
- int v;\r
- QuantNode* qn;\r
-\r
- qn = QUANT_(NODE_BAG_BODY(node));\r
- tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
- if (tlen < 0) return tlen;\r
-\r
- v = onig_positive_int_multiply(qn->lower, tlen);\r
- if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
- len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
- }\r
- else {\r
- len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;\r
- }\r
- break;\r
-\r
- case BAG_IF_ELSE:\r
- {\r
- Node* cond = NODE_BAG_BODY(node);\r
- Node* Then = node->te.Then;\r
- Node* Else = node->te.Else;\r
-\r
- len = compile_length_tree(cond, reg);\r
- if (len < 0) return len;\r
- len += SIZE_OP_PUSH;\r
- len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;\r
-\r
- if (IS_NOT_NULL(Then)) {\r
- tlen = compile_length_tree(Then, reg);\r
- if (tlen < 0) return tlen;\r
- len += tlen;\r
- }\r
-\r
- len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END;\r
-\r
- if (IS_NOT_NULL(Else)) {\r
- tlen = compile_length_tree(Else, reg);\r
- if (tlen < 0) return tlen;\r
- len += tlen;\r
- }\r
- }\r
- break;\r
-\r
- case BAG_OPTION:\r
- /* never come here, but set for escape warning */\r
- len = 0;\r
- break;\r
- }\r
-\r
- return len;\r
-}\r
-\r
-static int get_char_len_node(Node* node, regex_t* reg, int* len);\r
-\r
-static int\r
-compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
-{\r
- int r;\r
- int len;\r
-\r
-#ifdef USE_CALL\r
- if (NODE_IS_CALLED(node)) {\r
- r = add_op(reg, OP_CALL);\r
- if (r != 0) return r;\r
-\r
- node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;\r
- NODE_STATUS_ADD(node, ADDR_FIXED);\r
- COP(reg)->call.addr = (int )node->m.called_addr;\r
-\r
- if (node->m.regnum == 0) {\r
- len = compile_length_tree(NODE_BAG_BODY(node), reg);\r
- len += SIZE_OP_RETURN;\r
-\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = len + SIZE_INC_OP;\r
-\r
- r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
- if (r != 0) return r;\r
-\r
- r = add_op(reg, OP_RETURN);\r
- return r;\r
- }\r
- else {\r
- len = compile_length_tree(NODE_BAG_BODY(node), reg);\r
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
- len += (NODE_IS_RECURSION(node)\r
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
- else\r
- len += (NODE_IS_RECURSION(node)\r
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
-\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = len + SIZE_INC_OP;\r
- }\r
- }\r
-#endif\r
-\r
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
- r = add_op(reg, OP_MEMORY_START_PUSH);\r
- else\r
- r = add_op(reg, OP_MEMORY_START);\r
- if (r != 0) return r;\r
- COP(reg)->memory_start.num = node->m.regnum;\r
-\r
- r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
- if (r != 0) return r;\r
-\r
-#ifdef USE_CALL\r
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
- r = add_op(reg, (NODE_IS_RECURSION(node)\r
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
- else\r
- r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
- if (r != 0) return r;\r
- COP(reg)->memory_end.num = node->m.regnum;\r
-\r
- if (NODE_IS_CALLED(node)) {\r
- if (r != 0) return r;\r
- r = add_op(reg, OP_RETURN);\r
- }\r
-#else\r
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
- r = add_op(reg, OP_MEMORY_END_PUSH);\r
- else\r
- r = add_op(reg, OP_MEMORY_END);\r
- if (r != 0) return r;\r
- COP(reg)->memory_end.num = node->m.regnum;\r
-#endif\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
-{\r
- int r, len;\r
-\r
- switch (node->type) {\r
- case BAG_MEMORY:\r
- r = compile_bag_memory_node(node, reg, env);\r
- break;\r
-\r
- case BAG_OPTION:\r
- r = compile_option_node(node, reg, env);\r
- break;\r
-\r
- case BAG_STOP_BACKTRACK:\r
- if (NODE_IS_STRICT_REAL_REPEAT(node)) {\r
- QuantNode* qn = QUANT_(NODE_BAG_BODY(node));\r
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
- if (r != 0) return r;\r
-\r
- len = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
- if (len < 0) return len;\r
-\r
- r = add_op(reg, OP_PUSH);\r
- if (r != 0) return r;\r
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
-\r
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
- if (r != 0) return r;\r
- r = add_op(reg, OP_POP_OUT);\r
- if (r != 0) return r;\r
-\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);\r
- }\r
- else {\r
- r = add_op(reg, OP_ATOMIC_START);\r
- if (r != 0) return r;\r
- r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
- if (r != 0) return r;\r
- r = add_op(reg, OP_ATOMIC_END);\r
- }\r
- break;\r
-\r
- case BAG_IF_ELSE:\r
- {\r
- int cond_len, then_len, else_len, jump_len;\r
- Node* cond = NODE_BAG_BODY(node);\r
- Node* Then = node->te.Then;\r
- Node* Else = node->te.Else;\r
-\r
- r = add_op(reg, OP_ATOMIC_START);\r
- if (r != 0) return r;\r
-\r
- cond_len = compile_length_tree(cond, reg);\r
- if (cond_len < 0) return cond_len;\r
- if (IS_NOT_NULL(Then)) {\r
- then_len = compile_length_tree(Then, reg);\r
- if (then_len < 0) return then_len;\r
- }\r
- else\r
- then_len = 0;\r
-\r
- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP;\r
-\r
- r = add_op(reg, OP_PUSH);\r
- if (r != 0) return r;\r
- COP(reg)->push.addr = SIZE_INC_OP + jump_len;\r
-\r
- r = compile_tree(cond, reg, env);\r
- if (r != 0) return r;\r
- r = add_op(reg, OP_ATOMIC_END);\r
- if (r != 0) return r;\r
-\r
- if (IS_NOT_NULL(Then)) {\r
- r = compile_tree(Then, reg, env);\r
- if (r != 0) return r;\r
- }\r
-\r
- if (IS_NOT_NULL(Else)) {\r
- else_len = compile_length_tree(Else, reg);\r
- if (else_len < 0) return else_len;\r
- }\r
- else\r
- else_len = 0;\r
-\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) return r;\r
- COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP;\r
-\r
- r = add_op(reg, OP_ATOMIC_END);\r
- if (r != 0) return r;\r
-\r
- if (IS_NOT_NULL(Else)) {\r
- r = compile_tree(Else, reg, env);\r
- }\r
- }\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-compile_length_anchor_node(AnchorNode* node, regex_t* reg)\r
-{\r
- int len;\r
- int tlen = 0;\r
-\r
- if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {\r
- tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
- if (tlen < 0) return tlen;\r
- }\r
-\r
- switch (node->type) {\r
- case ANCR_PREC_READ:\r
- len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;\r
- break;\r
- case ANCR_PREC_READ_NOT:\r
- len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;\r
- break;\r
- case ANCR_LOOK_BEHIND:\r
- len = SIZE_OP_LOOK_BEHIND + tlen;\r
- break;\r
- case ANCR_LOOK_BEHIND_NOT:\r
- len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;\r
- break;\r
-\r
- case ANCR_WORD_BOUNDARY:\r
- case ANCR_NO_WORD_BOUNDARY:\r
-#ifdef USE_WORD_BEGIN_END\r
- case ANCR_WORD_BEGIN:\r
- case ANCR_WORD_END:\r
-#endif\r
- len = SIZE_OP_WORD_BOUNDARY;\r
- break;\r
-\r
- case ANCR_TEXT_SEGMENT_BOUNDARY:\r
- case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
- len = SIZE_OPCODE;\r
- break;\r
-\r
- default:\r
- len = SIZE_OPCODE;\r
- break;\r
- }\r
-\r
- return len;\r
-}\r
-\r
-static int\r
-compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)\r
-{\r
- int r, len;\r
- enum OpCode op;\r
-\r
- switch (node->type) {\r
- case ANCR_BEGIN_BUF: r = add_op(reg, OP_BEGIN_BUF); break;\r
- case ANCR_END_BUF: r = add_op(reg, OP_END_BUF); break;\r
- case ANCR_BEGIN_LINE: r = add_op(reg, OP_BEGIN_LINE); break;\r
- case ANCR_END_LINE: r = add_op(reg, OP_END_LINE); break;\r
- case ANCR_SEMI_END_BUF: r = add_op(reg, OP_SEMI_END_BUF); break;\r
- case ANCR_BEGIN_POSITION: r = add_op(reg, OP_BEGIN_POSITION); break;\r
-\r
- case ANCR_WORD_BOUNDARY:\r
- op = OP_WORD_BOUNDARY;\r
- word:\r
- r = add_op(reg, op);\r
- if (r != 0) return r;\r
- COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;\r
- break;\r
-\r
- case ANCR_NO_WORD_BOUNDARY:\r
- op = OP_NO_WORD_BOUNDARY; goto word;\r
- break;\r
-#ifdef USE_WORD_BEGIN_END\r
- case ANCR_WORD_BEGIN:\r
- op = OP_WORD_BEGIN; goto word;\r
- break;\r
- case ANCR_WORD_END:\r
- op = OP_WORD_END; goto word;\r
- break;\r
-#endif\r
-\r
- case ANCR_TEXT_SEGMENT_BOUNDARY:\r
- case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
- {\r
- enum TextSegmentBoundaryType type;\r
-\r
- r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);\r
- if (r != 0) return r;\r
-\r
- type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
-#ifdef USE_UNICODE_WORD_BREAK\r
- if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_TEXT_SEGMENT_WORD))\r
- type = WORD_BOUNDARY;\r
-#endif\r
-\r
- COP(reg)->text_segment_boundary.type = type;\r
- COP(reg)->text_segment_boundary.not =\r
- (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);\r
- }\r
- break;\r
-\r
- case ANCR_PREC_READ:\r
- r = add_op(reg, OP_PREC_READ_START);\r
- if (r != 0) return r;\r
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
- if (r != 0) return r;\r
- r = add_op(reg, OP_PREC_READ_END);\r
- break;\r
-\r
- case ANCR_PREC_READ_NOT:\r
- len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
- if (len < 0) return len;\r
-\r
- r = add_op(reg, OP_PREC_READ_NOT_START);\r
- if (r != 0) return r;\r
- COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;\r
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
- if (r != 0) return r;\r
- r = add_op(reg, OP_PREC_READ_NOT_END);\r
- break;\r
-\r
- case ANCR_LOOK_BEHIND:\r
- {\r
- int n;\r
- r = add_op(reg, OP_LOOK_BEHIND);\r
- if (r != 0) return r;\r
- if (node->char_len < 0) {\r
- r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);\r
- if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- }\r
- else\r
- n = node->char_len;\r
-\r
- COP(reg)->look_behind.len = n;\r
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
- }\r
- break;\r
-\r
- case ANCR_LOOK_BEHIND_NOT:\r
- {\r
- int n;\r
-\r
- len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
- r = add_op(reg, OP_LOOK_BEHIND_NOT_START);\r
- if (r != 0) return r;\r
- COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;\r
-\r
- if (node->char_len < 0) {\r
- r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);\r
- if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- }\r
- else\r
- n = node->char_len;\r
-\r
- COP(reg)->look_behind_not_start.len = n;\r
-\r
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
- if (r != 0) return r;\r
- r = add_op(reg, OP_LOOK_BEHIND_NOT_END);\r
- }\r
- break;\r
-\r
- default:\r
- return ONIGERR_TYPE_BUG;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-compile_gimmick_node(GimmickNode* node, regex_t* reg)\r
-{\r
- int r;\r
-\r
- switch (node->type) {\r
- case GIMMICK_FAIL:\r
- r = add_op(reg, OP_FAIL);\r
- break;\r
-\r
- case GIMMICK_SAVE:\r
- r = add_op(reg, OP_PUSH_SAVE_VAL);\r
- if (r != 0) return r;\r
- COP(reg)->push_save_val.type = node->detail_type;\r
- COP(reg)->push_save_val.id = node->id;\r
- break;\r
-\r
- case GIMMICK_UPDATE_VAR:\r
- r = add_op(reg, OP_UPDATE_VAR);\r
- if (r != 0) return r;\r
- COP(reg)->update_var.type = node->detail_type;\r
- COP(reg)->update_var.id = node->id;\r
- break;\r
-\r
-#ifdef USE_CALLOUT\r
- case GIMMICK_CALLOUT:\r
- switch (node->detail_type) {\r
- case ONIG_CALLOUT_OF_CONTENTS:\r
- case ONIG_CALLOUT_OF_NAME:\r
- {\r
- if (node->detail_type == ONIG_CALLOUT_OF_NAME) {\r
- r = add_op(reg, OP_CALLOUT_NAME);\r
- if (r != 0) return r;\r
- COP(reg)->callout_name.id = node->id;\r
- COP(reg)->callout_name.num = node->num;\r
- }\r
- else {\r
- r = add_op(reg, OP_CALLOUT_CONTENTS);\r
- if (r != 0) return r;\r
- COP(reg)->callout_contents.num = node->num;\r
- }\r
- }\r
- break;\r
-\r
- default:\r
- r = ONIGERR_TYPE_BUG;\r
- break;\r
- }\r
-#endif\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-compile_length_gimmick_node(GimmickNode* node, regex_t* reg)\r
-{\r
- int len;\r
-\r
- switch (node->type) {\r
- case GIMMICK_FAIL:\r
- len = SIZE_OP_FAIL;\r
- break;\r
-\r
- case GIMMICK_SAVE:\r
- len = SIZE_OP_PUSH_SAVE_VAL;\r
- break;\r
-\r
- case GIMMICK_UPDATE_VAR:\r
- len = SIZE_OP_UPDATE_VAR;\r
- break;\r
-\r
-#ifdef USE_CALLOUT\r
- case GIMMICK_CALLOUT:\r
- switch (node->detail_type) {\r
- case ONIG_CALLOUT_OF_CONTENTS:\r
- len = SIZE_OP_CALLOUT_CONTENTS;\r
- break;\r
- case ONIG_CALLOUT_OF_NAME:\r
- len = SIZE_OP_CALLOUT_NAME;\r
- break;\r
-\r
- default:\r
- len = ONIGERR_TYPE_BUG;\r
- break;\r
- }\r
- break;\r
-#endif\r
- }\r
-\r
- return len;\r
-}\r
-\r
-static int\r
-compile_length_tree(Node* node, regex_t* reg)\r
-{\r
- int len, r;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- len = 0;\r
- do {\r
- r = compile_length_tree(NODE_CAR(node), reg);\r
- if (r < 0) return r;\r
- len += r;\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- r = len;\r
- break;\r
-\r
- case NODE_ALT:\r
- {\r
- int n;\r
-\r
- n = r = 0;\r
- do {\r
- r += compile_length_tree(NODE_CAR(node), reg);\r
- n++;\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- if (NODE_STRING_IS_RAW(node))\r
- r = compile_length_string_raw_node(STR_(node), reg);\r
- else\r
- r = compile_length_string_node(node, reg);\r
- break;\r
-\r
- case NODE_CCLASS:\r
- r = compile_length_cclass_node(CCLASS_(node), reg);\r
- break;\r
-\r
- case NODE_CTYPE:\r
- r = SIZE_OPCODE;\r
- break;\r
-\r
- case NODE_BACKREF:\r
- r = SIZE_OP_BACKREF;\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- r = SIZE_OP_CALL;\r
- break;\r
-#endif\r
-\r
- case NODE_QUANT:\r
- r = compile_length_quantifier_node(QUANT_(node), reg);\r
- break;\r
-\r
- case NODE_BAG:\r
- r = compile_length_bag_node(BAG_(node), reg);\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- r = compile_length_anchor_node(ANCHOR_(node), reg);\r
- break;\r
-\r
- case NODE_GIMMICK:\r
- r = compile_length_gimmick_node(GIMMICK_(node), reg);\r
- break;\r
-\r
- default:\r
- return ONIGERR_TYPE_BUG;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-compile_tree(Node* node, regex_t* reg, ScanEnv* env)\r
-{\r
- int n, len, pos, r = 0;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- do {\r
- r = compile_tree(NODE_CAR(node), reg, env);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ALT:\r
- {\r
- Node* x = node;\r
- len = 0;\r
- do {\r
- len += compile_length_tree(NODE_CAR(x), reg);\r
- if (IS_NOT_NULL(NODE_CDR(x))) {\r
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;\r
- }\r
- } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
- pos = COP_CURR_OFFSET(reg) + 1 + len; /* goal position */\r
-\r
- do {\r
- len = compile_length_tree(NODE_CAR(node), reg);\r
- if (IS_NOT_NULL(NODE_CDR(node))) {\r
- enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;\r
- r = add_op(reg, push);\r
- if (r != 0) break;\r
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;\r
- }\r
- r = compile_tree(NODE_CAR(node), reg, env);\r
- if (r != 0) break;\r
- if (IS_NOT_NULL(NODE_CDR(node))) {\r
- len = pos - (COP_CURR_OFFSET(reg) + 1);\r
- r = add_op(reg, OP_JUMP);\r
- if (r != 0) break;\r
- COP(reg)->jump.addr = len;\r
- }\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- if (NODE_STRING_IS_RAW(node))\r
- r = compile_string_raw_node(STR_(node), reg);\r
- else\r
- r = compile_string_node(node, reg);\r
- break;\r
-\r
- case NODE_CCLASS:\r
- r = compile_cclass_node(CCLASS_(node), reg);\r
- break;\r
-\r
- case NODE_CTYPE:\r
- {\r
- int op;\r
-\r
- switch (CTYPE_(node)->ctype) {\r
- case CTYPE_ANYCHAR:\r
- r = add_op(reg, IS_MULTILINE(CTYPE_OPTION(node, reg)) ?\r
- OP_ANYCHAR_ML : OP_ANYCHAR);\r
- break;\r
-\r
- case ONIGENC_CTYPE_WORD:\r
- if (CTYPE_(node)->ascii_mode == 0) {\r
- op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;\r
- }\r
- else {\r
- op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;\r
- }\r
- r = add_op(reg, op);\r
- break;\r
-\r
- default:\r
- return ONIGERR_TYPE_BUG;\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- {\r
- BackRefNode* br = BACKREF_(node);\r
-\r
- if (NODE_IS_CHECKER(node)) {\r
-#ifdef USE_BACKREF_WITH_LEVEL\r
- if (NODE_IS_NEST_LEVEL(node)) {\r
- r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);\r
- if (r != 0) return r;\r
- COP(reg)->backref_general.nest_level = br->nest_level;\r
- }\r
- else\r
-#endif\r
- {\r
- r = add_op(reg, OP_BACKREF_CHECK);\r
- if (r != 0) return r;\r
- }\r
- goto add_bacref_mems;\r
- }\r
- else {\r
-#ifdef USE_BACKREF_WITH_LEVEL\r
- if (NODE_IS_NEST_LEVEL(node)) {\r
- if ((reg->options & ONIG_OPTION_IGNORECASE) != 0)\r
- r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);\r
- else\r
- r = add_op(reg, OP_BACKREF_WITH_LEVEL);\r
-\r
- if (r != 0) return r;\r
- COP(reg)->backref_general.nest_level = br->nest_level;\r
- goto add_bacref_mems;\r
- }\r
- else\r
-#endif\r
- if (br->back_num == 1) {\r
- n = br->back_static[0];\r
- if (IS_IGNORECASE(reg->options)) {\r
- r = add_op(reg, OP_BACKREF_N_IC);\r
- if (r != 0) return r;\r
- COP(reg)->backref_n.n1 = n;\r
- }\r
- else {\r
- switch (n) {\r
- case 1: r = add_op(reg, OP_BACKREF1); break;\r
- case 2: r = add_op(reg, OP_BACKREF2); break;\r
- default:\r
- r = add_op(reg, OP_BACKREF_N);\r
- if (r != 0) return r;\r
- COP(reg)->backref_n.n1 = n;\r
- break;\r
- }\r
- }\r
- }\r
- else {\r
- int num;\r
- int* p;\r
-\r
- r = add_op(reg, IS_IGNORECASE(reg->options) ?\r
- OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);\r
- if (r != 0) return r;\r
-\r
- add_bacref_mems:\r
- num = br->back_num;\r
- COP(reg)->backref_general.num = num;\r
- if (num == 1) {\r
- COP(reg)->backref_general.n1 = br->back_static[0];\r
- }\r
- else {\r
- int i, j;\r
- MemNumType* ns;\r
-\r
- ns = xmalloc(sizeof(MemNumType) * num);\r
- CHECK_NULL_RETURN_MEMERR(ns);\r
- COP(reg)->backref_general.ns = ns;\r
- p = BACKREFS_P(br);\r
- for (i = num - 1, j = 0; i >= 0; i--, j++) {\r
- ns[j] = p[i];\r
- }\r
- }\r
- }\r
- }\r
- }\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- r = compile_call(CALL_(node), reg, env);\r
- break;\r
-#endif\r
-\r
- case NODE_QUANT:\r
- r = compile_quantifier_node(QUANT_(node), reg, env);\r
- break;\r
-\r
- case NODE_BAG:\r
- r = compile_bag_node(BAG_(node), reg, env);\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- r = compile_anchor_node(ANCHOR_(node), reg, env);\r
- break;\r
-\r
- case NODE_GIMMICK:\r
- r = compile_gimmick_node(GIMMICK_(node), reg);\r
- break;\r
-\r
- default:\r
-#ifdef ONIG_DEBUG\r
- fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));\r
-#endif\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)\r
-{\r
- int r = 0;\r
- Node* node = *plink;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = noname_disable_map(&(NODE_CAR(node)), map, counter);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_QUANT:\r
- {\r
- Node** ptarget = &(NODE_BODY(node));\r
- Node* old = *ptarget;\r
- r = noname_disable_map(ptarget, map, counter);\r
- if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {\r
- onig_reduce_nested_quantifier(node, *ptarget);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
- if (en->type == BAG_MEMORY) {\r
- if (NODE_IS_NAMED_GROUP(node)) {\r
- (*counter)++;\r
- map[en->m.regnum].new_val = *counter;\r
- en->m.regnum = *counter;\r
- r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
- }\r
- else {\r
- *plink = NODE_BODY(node);\r
- NODE_BODY(node) = NULL_NODE;\r
- onig_node_free(node);\r
- r = noname_disable_map(plink, map, counter);\r
- }\r
- }\r
- else if (en->type == BAG_IF_ELSE) {\r
- r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter);\r
- if (r != 0) return r;\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = noname_disable_map(&(en->te.Then), map, counter);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = noname_disable_map(&(en->te.Else), map, counter);\r
- if (r != 0) return r;\r
- }\r
- }\r
- else\r
- r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (IS_NOT_NULL(NODE_BODY(node)))\r
- r = noname_disable_map(&(NODE_BODY(node)), map, counter);\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-renumber_node_backref(Node* node, GroupNumRemap* map)\r
-{\r
- int i, pos, n, old_num;\r
- int *backs;\r
- BackRefNode* bn = BACKREF_(node);\r
-\r
- if (! NODE_IS_BY_NAME(node))\r
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
-\r
- old_num = bn->back_num;\r
- if (IS_NULL(bn->back_dynamic))\r
- backs = bn->back_static;\r
- else\r
- backs = bn->back_dynamic;\r
-\r
- for (i = 0, pos = 0; i < old_num; i++) {\r
- n = map[backs[i]].new_val;\r
- if (n > 0) {\r
- backs[pos] = n;\r
- pos++;\r
- }\r
- }\r
-\r
- bn->back_num = pos;\r
- return 0;\r
-}\r
-\r
-static int\r
-renumber_by_map(Node* node, GroupNumRemap* map)\r
-{\r
- int r = 0;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = renumber_by_map(NODE_CAR(node), map);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_QUANT:\r
- r = renumber_by_map(NODE_BODY(node), map);\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- r = renumber_by_map(NODE_BODY(node), map);\r
- if (r != 0) return r;\r
-\r
- if (en->type == BAG_IF_ELSE) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = renumber_by_map(en->te.Then, map);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = renumber_by_map(en->te.Else, map);\r
- if (r != 0) return r;\r
- }\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- r = renumber_node_backref(node, map);\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (IS_NOT_NULL(NODE_BODY(node)))\r
- r = renumber_by_map(NODE_BODY(node), map);\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-numbered_ref_check(Node* node)\r
-{\r
- int r = 0;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = numbered_ref_check(NODE_CAR(node));\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (IS_NULL(NODE_BODY(node)))\r
- break;\r
- /* fall */\r
- case NODE_QUANT:\r
- r = numbered_ref_check(NODE_BODY(node));\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- r = numbered_ref_check(NODE_BODY(node));\r
- if (r != 0) return r;\r
-\r
- if (en->type == BAG_IF_ELSE) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = numbered_ref_check(en->te.Then);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = numbered_ref_check(en->te.Else);\r
- if (r != 0) return r;\r
- }\r
- }\r
- }\r
-\r
- break;\r
-\r
- case NODE_BACKREF:\r
- if (! NODE_IS_BY_NAME(node))\r
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)\r
-{\r
- int r, i, pos, counter;\r
- int result;\r
- MemStatusType loc;\r
- GroupNumRemap* map;\r
-\r
- map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));\r
- CHECK_NULL_RETURN_MEMERR(map);\r
- for (i = 1; i <= env->num_mem; i++) {\r
- map[i].new_val = 0;\r
- }\r
- counter = 0;\r
- r = noname_disable_map(root, map, &counter);\r
- if (r != 0) return r;\r
-\r
- r = renumber_by_map(*root, map);\r
- if (r != 0) return r;\r
-\r
- for (i = 1, pos = 1; i <= env->num_mem; i++) {\r
- if (map[i].new_val > 0) {\r
- SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];\r
- pos++;\r
- }\r
- }\r
-\r
- loc = env->capture_history;\r
- MEM_STATUS_CLEAR(env->capture_history);\r
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {\r
- if (MEM_STATUS_AT(loc, i)) {\r
- MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);\r
- }\r
- }\r
-\r
- env->num_mem = env->num_named;\r
- reg->num_mem = env->num_named;\r
- result = onig_renumber_name_table(reg, map);\r
- xfree(map);\r
- return result;\r
-}\r
-\r
-#ifdef USE_CALL\r
-static int\r
-fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)\r
-{\r
- int i, offset;\r
- BagNode* en;\r
- AbsAddrType addr;\r
- AbsAddrType* paddr;\r
-\r
- for (i = 0; i < uslist->num; i++) {\r
- if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))\r
- return ONIGERR_PARSER_BUG;\r
-\r
- en = BAG_(uslist->us[i].target);\r
- addr = en->m.called_addr;\r
- offset = uslist->us[i].offset;\r
-\r
- paddr = (AbsAddrType* )((char* )reg->ops + offset);\r
- *paddr = addr;\r
- }\r
- return 0;\r
-}\r
-#endif\r
-\r
-\r
-#define GET_CHAR_LEN_VARLEN -1\r
-#define GET_CHAR_LEN_TOP_ALT_VARLEN -2\r
-\r
-/* fixed size pattern node only */\r
-static int\r
-get_char_len_node1(Node* node, regex_t* reg, int* len, int level)\r
-{\r
- int tlen;\r
- int r = 0;\r
-\r
- level++;\r
- *len = 0;\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- do {\r
- r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);\r
- if (r == 0)\r
- *len = distance_add(*len, tlen);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ALT:\r
- {\r
- int tlen2;\r
- int varlen = 0;\r
-\r
- r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);\r
- while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {\r
- r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level);\r
- if (r == 0) {\r
- if (tlen != tlen2)\r
- varlen = 1;\r
- }\r
- }\r
- if (r == 0) {\r
- if (varlen != 0) {\r
- if (level == 1)\r
- r = GET_CHAR_LEN_TOP_ALT_VARLEN;\r
- else\r
- r = GET_CHAR_LEN_VARLEN;\r
- }\r
- else\r
- *len = tlen;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- StrNode* sn = STR_(node);\r
- UChar *s = sn->s;\r
-\r
- while (s < sn->end) {\r
- s += enclen(reg->enc, s);\r
- (*len)++;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_QUANT:\r
- {\r
- QuantNode* qn = QUANT_(node);\r
-\r
- if (qn->lower == qn->upper) {\r
- if (qn->upper == 0) {\r
- *len = 0;\r
- }\r
- else {\r
- r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level);\r
- if (r == 0)\r
- *len = distance_multiply(tlen, qn->lower);\r
- }\r
- }\r
- else\r
- r = GET_CHAR_LEN_VARLEN;\r
- }\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- if (! NODE_IS_RECURSION(node))\r
- r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
- else\r
- r = GET_CHAR_LEN_VARLEN;\r
- break;\r
-#endif\r
-\r
- case NODE_CTYPE:\r
- case NODE_CCLASS:\r
- *len = 1;\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- switch (en->type) {\r
- case BAG_MEMORY:\r
-#ifdef USE_CALL\r
- if (NODE_IS_CLEN_FIXED(node))\r
- *len = en->char_len;\r
- else {\r
- r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
- if (r == 0) {\r
- en->char_len = *len;\r
- NODE_STATUS_ADD(node, CLEN_FIXED);\r
- }\r
- }\r
- break;\r
-#endif\r
- case BAG_OPTION:\r
- case BAG_STOP_BACKTRACK:\r
- r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
- break;\r
- case BAG_IF_ELSE:\r
- {\r
- int clen, elen;\r
-\r
- r = get_char_len_node1(NODE_BODY(node), reg, &clen, level);\r
- if (r == 0) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = get_char_len_node1(en->te.Then, reg, &tlen, level);\r
- if (r != 0) break;\r
- }\r
- else tlen = 0;\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = get_char_len_node1(en->te.Else, reg, &elen, level);\r
- if (r != 0) break;\r
- }\r
- else elen = 0;\r
-\r
- if (clen + tlen != elen) {\r
- r = GET_CHAR_LEN_VARLEN;\r
- }\r
- else {\r
- *len = elen;\r
- }\r
- }\r
- }\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- case NODE_GIMMICK:\r
- break;\r
-\r
- case NODE_BACKREF:\r
- if (NODE_IS_CHECKER(node))\r
- break;\r
- /* fall */\r
- default:\r
- r = GET_CHAR_LEN_VARLEN;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-get_char_len_node(Node* node, regex_t* reg, int* len)\r
-{\r
- return get_char_len_node1(node, reg, len, 0);\r
-}\r
-\r
-/* x is not included y ==> 1 : 0 */\r
-static int\r
-is_exclusive(Node* x, Node* y, regex_t* reg)\r
-{\r
- int i, len;\r
- OnigCodePoint code;\r
- UChar *p;\r
- NodeType ytype;\r
-\r
- retry:\r
- ytype = NODE_TYPE(y);\r
- switch (NODE_TYPE(x)) {\r
- case NODE_CTYPE:\r
- {\r
- if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||\r
- CTYPE_(y)->ctype == CTYPE_ANYCHAR)\r
- break;\r
-\r
- switch (ytype) {\r
- case NODE_CTYPE:\r
- if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&\r
- CTYPE_(y)->not != CTYPE_(x)->not &&\r
- CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)\r
- return 1;\r
- else\r
- return 0;\r
- break;\r
-\r
- case NODE_CCLASS:\r
- swap:\r
- {\r
- Node* tmp;\r
- tmp = x; x = y; y = tmp;\r
- goto retry;\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- goto swap;\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CCLASS:\r
- {\r
- int range;\r
- CClassNode* xc = CCLASS_(x);\r
-\r
- switch (ytype) {\r
- case NODE_CTYPE:\r
- switch (CTYPE_(y)->ctype) {\r
- case CTYPE_ANYCHAR:\r
- return 0;\r
- break;\r
-\r
- case ONIGENC_CTYPE_WORD:\r
- if (CTYPE_(y)->not == 0) {\r
- if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {\r
- range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
- for (i = 0; i < range; i++) {\r
- if (BITSET_AT(xc->bs, i)) {\r
- if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;\r
- }\r
- }\r
- return 1;\r
- }\r
- return 0;\r
- }\r
- else {\r
- if (IS_NOT_NULL(xc->mbuf)) return 0;\r
- if (IS_NCCLASS_NOT(xc)) return 0;\r
-\r
- range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
- for (i = 0; i < range; i++) {\r
- if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {\r
- if (BITSET_AT(xc->bs, i))\r
- return 0;\r
- }\r
- }\r
- for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
- if (BITSET_AT(xc->bs, i)) return 0;\r
- }\r
- return 1;\r
- }\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
- break;\r
-\r
- case NODE_CCLASS:\r
- {\r
- int v;\r
- CClassNode* yc = CCLASS_(y);\r
-\r
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
- v = BITSET_AT(xc->bs, i);\r
- if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {\r
- v = BITSET_AT(yc->bs, i);\r
- if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||\r
- (v == 0 && IS_NCCLASS_NOT(yc)))\r
- return 0;\r
- }\r
- }\r
- if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||\r
- (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))\r
- return 1;\r
- return 0;\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- goto swap;\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- StrNode* xs = STR_(x);\r
-\r
- if (NODE_STRING_LEN(x) == 0)\r
- break;\r
-\r
- switch (ytype) {\r
- case NODE_CTYPE:\r
- switch (CTYPE_(y)->ctype) {\r
- case CTYPE_ANYCHAR:\r
- break;\r
-\r
- case ONIGENC_CTYPE_WORD:\r
- if (CTYPE_(y)->ascii_mode == 0) {\r
- if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))\r
- return CTYPE_(y)->not;\r
- else\r
- return !(CTYPE_(y)->not);\r
- }\r
- else {\r
- if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))\r
- return CTYPE_(y)->not;\r
- else\r
- return !(CTYPE_(y)->not);\r
- }\r
- break;\r
- default:\r
- break;\r
- }\r
- break;\r
-\r
- case NODE_CCLASS:\r
- {\r
- CClassNode* cc = CCLASS_(y);\r
-\r
- code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,\r
- xs->s + ONIGENC_MBC_MAXLEN(reg->enc));\r
- return onig_is_code_in_cc(reg->enc, code, cc) == 0;\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- UChar *q;\r
- StrNode* ys = STR_(y);\r
-\r
- len = NODE_STRING_LEN(x);\r
- if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);\r
- if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {\r
- /* tiny version */\r
- return 0;\r
- }\r
- else {\r
- for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {\r
- if (*p != *q) return 1;\r
- }\r
- }\r
- }\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static Node*\r
-get_head_value_node(Node* node, int exact, regex_t* reg)\r
-{\r
- Node* n = NULL_NODE;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_BACKREF:\r
- case NODE_ALT:\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
-#endif\r
- break;\r
-\r
- case NODE_CTYPE:\r
- if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)\r
- break;\r
- /* fall */\r
- case NODE_CCLASS:\r
- if (exact == 0) {\r
- n = node;\r
- }\r
- break;\r
-\r
- case NODE_LIST:\r
- n = get_head_value_node(NODE_CAR(node), exact, reg);\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- StrNode* sn = STR_(node);\r
-\r
- if (sn->end <= sn->s)\r
- break;\r
-\r
- if (exact == 0 ||\r
- ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {\r
- n = node;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_QUANT:\r
- {\r
- QuantNode* qn = QUANT_(node);\r
- if (qn->lower > 0) {\r
- if (IS_NOT_NULL(qn->head_exact))\r
- n = qn->head_exact;\r
- else\r
- n = get_head_value_node(NODE_BODY(node), exact, reg);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
- switch (en->type) {\r
- case BAG_OPTION:\r
- {\r
- OnigOptionType options = reg->options;\r
-\r
- reg->options = BAG_(node)->o.options;\r
- n = get_head_value_node(NODE_BODY(node), exact, reg);\r
- reg->options = options;\r
- }\r
- break;\r
-\r
- case BAG_MEMORY:\r
- case BAG_STOP_BACKTRACK:\r
- case BAG_IF_ELSE:\r
- n = get_head_value_node(NODE_BODY(node), exact, reg);\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (ANCHOR_(node)->type == ANCR_PREC_READ)\r
- n = get_head_value_node(NODE_BODY(node), exact, reg);\r
- break;\r
-\r
- case NODE_GIMMICK:\r
- default:\r
- break;\r
- }\r
-\r
- return n;\r
-}\r
-\r
-static int\r
-check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)\r
-{\r
- NodeType type;\r
- int r = 0;\r
-\r
- type = NODE_TYPE(node);\r
- if ((NODE_TYPE2BIT(type) & type_mask) == 0)\r
- return 1;\r
-\r
- switch (type) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_QUANT:\r
- r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
- if (((1<<en->type) & bag_mask) == 0)\r
- return 1;\r
-\r
- r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
- if (r == 0 && en->type == BAG_IF_ELSE) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask);\r
- if (r != 0) break;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask);\r
- }\r
- }\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- type = ANCHOR_(node)->type;\r
- if ((type & anchor_mask) == 0)\r
- return 1;\r
-\r
- if (IS_NOT_NULL(NODE_BODY(node)))\r
- r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
- break;\r
-\r
- case NODE_GIMMICK:\r
- default:\r
- break;\r
- }\r
- return r;\r
-}\r
-\r
-static OnigLen\r
-tree_min_len(Node* node, ScanEnv* env)\r
-{\r
- OnigLen len;\r
- OnigLen tmin;\r
-\r
- len = 0;\r
- switch (NODE_TYPE(node)) {\r
- case NODE_BACKREF:\r
- if (! NODE_IS_CHECKER(node)) {\r
- int i;\r
- int* backs;\r
- MemEnv* mem_env = SCANENV_MEMENV(env);\r
- BackRefNode* br = BACKREF_(node);\r
- if (NODE_IS_RECURSION(node)) break;\r
-\r
- backs = BACKREFS_P(br);\r
- len = tree_min_len(mem_env[backs[0]].node, env);\r
- for (i = 1; i < br->back_num; i++) {\r
- tmin = tree_min_len(mem_env[backs[i]].node, env);\r
- if (len > tmin) len = tmin;\r
- }\r
- }\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- {\r
- Node* t = NODE_BODY(node);\r
- if (NODE_IS_RECURSION(node)) {\r
- if (NODE_IS_MIN_FIXED(t))\r
- len = BAG_(t)->min_len;\r
- }\r
- else\r
- len = tree_min_len(t, env);\r
- }\r
- break;\r
-#endif\r
-\r
- case NODE_LIST:\r
- do {\r
- tmin = tree_min_len(NODE_CAR(node), env);\r
- len = distance_add(len, tmin);\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ALT:\r
- {\r
- Node *x, *y;\r
- y = node;\r
- do {\r
- x = NODE_CAR(y);\r
- tmin = tree_min_len(x, env);\r
- if (y == node) len = tmin;\r
- else if (len > tmin) len = tmin;\r
- } while (IS_NOT_NULL(y = NODE_CDR(y)));\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- StrNode* sn = STR_(node);\r
- len = (int )(sn->end - sn->s);\r
- }\r
- break;\r
-\r
- case NODE_CTYPE:\r
- case NODE_CCLASS:\r
- len = ONIGENC_MBC_MINLEN(env->enc);\r
- break;\r
-\r
- case NODE_QUANT:\r
- {\r
- QuantNode* qn = QUANT_(node);\r
-\r
- if (qn->lower > 0) {\r
- len = tree_min_len(NODE_BODY(node), env);\r
- len = distance_multiply(len, qn->lower);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
- switch (en->type) {\r
- case BAG_MEMORY:\r
- if (NODE_IS_MIN_FIXED(node))\r
- len = en->min_len;\r
- else {\r
- if (NODE_IS_MARK1(node))\r
- len = 0; /* recursive */\r
- else {\r
- NODE_STATUS_ADD(node, MARK1);\r
- len = tree_min_len(NODE_BODY(node), env);\r
- NODE_STATUS_REMOVE(node, MARK1);\r
-\r
- en->min_len = len;\r
- NODE_STATUS_ADD(node, MIN_FIXED);\r
- }\r
- }\r
- break;\r
-\r
- case BAG_OPTION:\r
- case BAG_STOP_BACKTRACK:\r
- len = tree_min_len(NODE_BODY(node), env);\r
- break;\r
- case BAG_IF_ELSE:\r
- {\r
- OnigLen elen;\r
-\r
- len = tree_min_len(NODE_BODY(node), env);\r
- if (IS_NOT_NULL(en->te.Then))\r
- len += tree_min_len(en->te.Then, env);\r
- if (IS_NOT_NULL(en->te.Else))\r
- elen = tree_min_len(en->te.Else, env);\r
- else elen = 0;\r
-\r
- if (elen < len) len = elen;\r
- }\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_GIMMICK:\r
- {\r
- GimmickNode* g = GIMMICK_(node);\r
- if (g->type == GIMMICK_FAIL) {\r
- len = INFINITE_LEN;\r
- break;\r
- }\r
- }\r
- /* fall */\r
- case NODE_ANCHOR:\r
- default:\r
- break;\r
- }\r
-\r
- return len;\r
-}\r
-\r
-static OnigLen\r
-tree_max_len(Node* node, ScanEnv* env)\r
-{\r
- OnigLen len;\r
- OnigLen tmax;\r
-\r
- len = 0;\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- do {\r
- tmax = tree_max_len(NODE_CAR(node), env);\r
- len = distance_add(len, tmax);\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ALT:\r
- do {\r
- tmax = tree_max_len(NODE_CAR(node), env);\r
- if (len < tmax) len = tmax;\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- StrNode* sn = STR_(node);\r
- len = (OnigLen )(sn->end - sn->s);\r
- }\r
- break;\r
-\r
- case NODE_CTYPE:\r
- case NODE_CCLASS:\r
- len = ONIGENC_MBC_MAXLEN_DIST(env->enc);\r
- break;\r
-\r
- case NODE_BACKREF:\r
- if (! NODE_IS_CHECKER(node)) {\r
- int i;\r
- int* backs;\r
- MemEnv* mem_env = SCANENV_MEMENV(env);\r
- BackRefNode* br = BACKREF_(node);\r
- if (NODE_IS_RECURSION(node)) {\r
- len = INFINITE_LEN;\r
- break;\r
- }\r
- backs = BACKREFS_P(br);\r
- for (i = 0; i < br->back_num; i++) {\r
- tmax = tree_max_len(mem_env[backs[i]].node, env);\r
- if (len < tmax) len = tmax;\r
- }\r
- }\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- if (! NODE_IS_RECURSION(node))\r
- len = tree_max_len(NODE_BODY(node), env);\r
- else\r
- len = INFINITE_LEN;\r
- break;\r
-#endif\r
-\r
- case NODE_QUANT:\r
- {\r
- QuantNode* qn = QUANT_(node);\r
-\r
- if (qn->upper != 0) {\r
- len = tree_max_len(NODE_BODY(node), env);\r
- if (len != 0) {\r
- if (! IS_INFINITE_REPEAT(qn->upper))\r
- len = distance_multiply(len, qn->upper);\r
- else\r
- len = INFINITE_LEN;\r
- }\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
- switch (en->type) {\r
- case BAG_MEMORY:\r
- if (NODE_IS_MAX_FIXED(node))\r
- len = en->max_len;\r
- else {\r
- if (NODE_IS_MARK1(node))\r
- len = INFINITE_LEN;\r
- else {\r
- NODE_STATUS_ADD(node, MARK1);\r
- len = tree_max_len(NODE_BODY(node), env);\r
- NODE_STATUS_REMOVE(node, MARK1);\r
-\r
- en->max_len = len;\r
- NODE_STATUS_ADD(node, MAX_FIXED);\r
- }\r
- }\r
- break;\r
-\r
- case BAG_OPTION:\r
- case BAG_STOP_BACKTRACK:\r
- len = tree_max_len(NODE_BODY(node), env);\r
- break;\r
- case BAG_IF_ELSE:\r
- {\r
- OnigLen tlen, elen;\r
-\r
- len = tree_max_len(NODE_BODY(node), env);\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- tlen = tree_max_len(en->te.Then, env);\r
- len = distance_add(len, tlen);\r
- }\r
- if (IS_NOT_NULL(en->te.Else))\r
- elen = tree_max_len(en->te.Else, env);\r
- else elen = 0;\r
-\r
- if (elen > len) len = elen;\r
- }\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- case NODE_GIMMICK:\r
- default:\r
- break;\r
- }\r
-\r
- return len;\r
-}\r
-\r
-static int\r
-check_backrefs(Node* node, ScanEnv* env)\r
-{\r
- int r;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = check_backrefs(NODE_CAR(node), env);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
- r = 0;\r
- break;\r
- }\r
- /* fall */\r
- case NODE_QUANT:\r
- r = check_backrefs(NODE_BODY(node), env);\r
- break;\r
-\r
- case NODE_BAG:\r
- r = check_backrefs(NODE_BODY(node), env);\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_IF_ELSE) {\r
- if (r != 0) return r;\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = check_backrefs(en->te.Then, env);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = check_backrefs(en->te.Else, env);\r
- }\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- {\r
- int i;\r
- BackRefNode* br = BACKREF_(node);\r
- int* backs = BACKREFS_P(br);\r
- MemEnv* mem_env = SCANENV_MEMENV(env);\r
-\r
- for (i = 0; i < br->back_num; i++) {\r
- if (backs[i] > env->num_mem)\r
- return ONIGERR_INVALID_BACKREF;\r
-\r
- NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);\r
- }\r
- r = 0;\r
- }\r
- break;\r
-\r
- default:\r
- r = 0;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-\r
-#ifdef USE_CALL\r
-\r
-#define RECURSION_EXIST (1<<0)\r
-#define RECURSION_MUST (1<<1)\r
-#define RECURSION_INFINITE (1<<2)\r
-\r
-static int\r
-infinite_recursive_call_check(Node* node, ScanEnv* env, int head)\r
-{\r
- int ret;\r
- int r = 0;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- {\r
- Node *x;\r
- OnigLen min;\r
-\r
- x = node;\r
- do {\r
- ret = infinite_recursive_call_check(NODE_CAR(x), env, head);\r
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
- r |= ret;\r
- if (head != 0) {\r
- min = tree_min_len(NODE_CAR(x), env);\r
- if (min != 0) head = 0;\r
- }\r
- } while (IS_NOT_NULL(x = NODE_CDR(x)));\r
- }\r
- break;\r
-\r
- case NODE_ALT:\r
- {\r
- int must;\r
-\r
- must = RECURSION_MUST;\r
- do {\r
- ret = infinite_recursive_call_check(NODE_CAR(node), env, head);\r
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
-\r
- r |= (ret & RECURSION_EXIST);\r
- must &= ret;\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- r |= must;\r
- }\r
- break;\r
-\r
- case NODE_QUANT:\r
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
- if (r < 0) return r;\r
- if ((r & RECURSION_MUST) != 0) {\r
- if (QUANT_(node)->lower == 0)\r
- r &= ~RECURSION_MUST;\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (! ANCHOR_HAS_BODY(ANCHOR_(node)))\r
- break;\r
- /* fall */\r
- case NODE_CALL:\r
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_MEMORY) {\r
- if (NODE_IS_MARK2(node))\r
- return 0;\r
- else if (NODE_IS_MARK1(node))\r
- return (head == 0 ? RECURSION_EXIST | RECURSION_MUST\r
- : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);\r
- else {\r
- NODE_STATUS_ADD(node, MARK2);\r
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
- NODE_STATUS_REMOVE(node, MARK2);\r
- }\r
- }\r
- else if (en->type == BAG_IF_ELSE) {\r
- int eret;\r
-\r
- ret = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
- r |= ret;\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- OnigLen min;\r
- if (head != 0) {\r
- min = tree_min_len(NODE_BODY(node), env);\r
- }\r
- else min = 0;\r
-\r
- ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);\r
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;\r
- r |= ret;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- eret = infinite_recursive_call_check(en->te.Else, env, head);\r
- if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;\r
- r |= (eret & RECURSION_EXIST);\r
- if ((eret & RECURSION_MUST) == 0)\r
- r &= ~RECURSION_MUST;\r
- }\r
- }\r
- else {\r
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
- }\r
- }\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-infinite_recursive_call_check_trav(Node* node, ScanEnv* env)\r
-{\r
- int r;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = infinite_recursive_call_check_trav(NODE_CAR(node), env);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
- r = 0;\r
- break;\r
- }\r
- /* fall */\r
- case NODE_QUANT:\r
- r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_MEMORY) {\r
- if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {\r
- int ret;\r
-\r
- NODE_STATUS_ADD(node, MARK1);\r
-\r
- ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);\r
- if (ret < 0) return ret;\r
- else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)\r
- return ONIGERR_NEVER_ENDING_RECURSION;\r
-\r
- NODE_STATUS_REMOVE(node, MARK1);\r
- }\r
- }\r
- else if (en->type == BAG_IF_ELSE) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = infinite_recursive_call_check_trav(en->te.Then, env);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = infinite_recursive_call_check_trav(en->te.Else, env);\r
- if (r != 0) return r;\r
- }\r
- }\r
- }\r
-\r
- r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
- break;\r
-\r
- default:\r
- r = 0;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-recursive_call_check(Node* node)\r
-{\r
- int r;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- r = 0;\r
- do {\r
- r |= recursive_call_check(NODE_CAR(node));\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {\r
- r = 0;\r
- break;\r
- }\r
- /* fall */\r
- case NODE_QUANT:\r
- r = recursive_call_check(NODE_BODY(node));\r
- break;\r
-\r
- case NODE_CALL:\r
- r = recursive_call_check(NODE_BODY(node));\r
- if (r != 0) {\r
- if (NODE_IS_MARK1(NODE_BODY(node)))\r
- NODE_STATUS_ADD(node, RECURSION);\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_MEMORY) {\r
- if (NODE_IS_MARK2(node))\r
- return 0;\r
- else if (NODE_IS_MARK1(node))\r
- return 1; /* recursion */\r
- else {\r
- NODE_STATUS_ADD(node, MARK2);\r
- r = recursive_call_check(NODE_BODY(node));\r
- NODE_STATUS_REMOVE(node, MARK2);\r
- }\r
- }\r
- else if (en->type == BAG_IF_ELSE) {\r
- r = 0;\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r |= recursive_call_check(en->te.Then);\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r |= recursive_call_check(en->te.Else);\r
- }\r
- r |= recursive_call_check(NODE_BODY(node));\r
- }\r
- else {\r
- r = recursive_call_check(NODE_BODY(node));\r
- }\r
- }\r
- break;\r
-\r
- default:\r
- r = 0;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-#define IN_RECURSION (1<<0)\r
-#define FOUND_CALLED_NODE 1\r
-\r
-static int\r
-recursive_call_check_trav(Node* node, ScanEnv* env, int state)\r
-{\r
- int r = 0;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- {\r
- int ret;\r
- do {\r
- ret = recursive_call_check_trav(NODE_CAR(node), env, state);\r
- if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;\r
- else if (ret < 0) return ret;\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- }\r
- break;\r
-\r
- case NODE_QUANT:\r
- r = recursive_call_check_trav(NODE_BODY(node), env, state);\r
- if (QUANT_(node)->upper == 0) {\r
- if (r == FOUND_CALLED_NODE)\r
- QUANT_(node)->is_refered = 1;\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- {\r
- AnchorNode* an = ANCHOR_(node);\r
- if (ANCHOR_HAS_BODY(an))\r
- r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- int ret;\r
- int state1;\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_MEMORY) {\r
- if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {\r
- if (! NODE_IS_RECURSION(node)) {\r
- NODE_STATUS_ADD(node, MARK1);\r
- r = recursive_call_check(NODE_BODY(node));\r
- if (r != 0)\r
- NODE_STATUS_ADD(node, RECURSION);\r
- NODE_STATUS_REMOVE(node, MARK1);\r
- }\r
-\r
- if (NODE_IS_CALLED(node))\r
- r = FOUND_CALLED_NODE;\r
- }\r
- }\r
-\r
- state1 = state;\r
- if (NODE_IS_RECURSION(node))\r
- state1 |= IN_RECURSION;\r
-\r
- ret = recursive_call_check_trav(NODE_BODY(node), env, state1);\r
- if (ret == FOUND_CALLED_NODE)\r
- r = FOUND_CALLED_NODE;\r
-\r
- if (en->type == BAG_IF_ELSE) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- ret = recursive_call_check_trav(en->te.Then, env, state1);\r
- if (ret == FOUND_CALLED_NODE)\r
- r = FOUND_CALLED_NODE;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- ret = recursive_call_check_trav(en->te.Else, env, state1);\r
- if (ret == FOUND_CALLED_NODE)\r
- r = FOUND_CALLED_NODE;\r
- }\r
- }\r
- }\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-#endif\r
-\r
-#define IN_ALT (1<<0)\r
-#define IN_NOT (1<<1)\r
-#define IN_REAL_REPEAT (1<<2)\r
-#define IN_VAR_REPEAT (1<<3)\r
-#define IN_ZERO_REPEAT (1<<4)\r
-#define IN_MULTI_ENTRY (1<<5)\r
-#define IN_LOOK_BEHIND (1<<6)\r
-\r
-\r
-/* divide different length alternatives in look-behind.\r
- (?<=A|B) ==> (?<=A)|(?<=B)\r
- (?<!A|B) ==> (?<!A)(?<!B)\r
-*/\r
-static int\r
-divide_look_behind_alternatives(Node* node)\r
-{\r
- Node *head, *np, *insert_node;\r
- AnchorNode* an = ANCHOR_(node);\r
- int anc_type = an->type;\r
-\r
- head = NODE_ANCHOR_BODY(an);\r
- np = NODE_CAR(head);\r
- swap_node(node, head);\r
- NODE_CAR(node) = head;\r
- NODE_BODY(head) = np;\r
-\r
- np = node;\r
- while (IS_NOT_NULL(np = NODE_CDR(np))) {\r
- insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);\r
- CHECK_NULL_RETURN_MEMERR(insert_node);\r
- NODE_BODY(insert_node) = NODE_CAR(np);\r
- NODE_CAR(np) = insert_node;\r
- }\r
-\r
- if (anc_type == ANCR_LOOK_BEHIND_NOT) {\r
- np = node;\r
- do {\r
- NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */\r
- } while (IS_NOT_NULL(np = NODE_CDR(np)));\r
- }\r
- return 0;\r
-}\r
-\r
-static int\r
-setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)\r
-{\r
- int r, len;\r
- AnchorNode* an = ANCHOR_(node);\r
-\r
- r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len);\r
- if (r == 0)\r
- an->char_len = len;\r
- else if (r == GET_CHAR_LEN_VARLEN)\r
- r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {\r
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))\r
- r = divide_look_behind_alternatives(node);\r
- else\r
- r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-next_setup(Node* node, Node* next_node, regex_t* reg)\r
-{\r
- NodeType type;\r
-\r
- retry:\r
- type = NODE_TYPE(node);\r
- if (type == NODE_QUANT) {\r
- QuantNode* qn = QUANT_(node);\r
- if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {\r
-#ifdef USE_QUANT_PEEK_NEXT\r
- Node* n = get_head_value_node(next_node, 1, reg);\r
- /* '\0': for UTF-16BE etc... */\r
- if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {\r
- qn->next_head_exact = n;\r
- }\r
-#endif\r
- /* automatic posseivation a*b ==> (?>a*)b */\r
- if (qn->lower <= 1) {\r
- if (is_strict_real_node(NODE_BODY(node))) {\r
- Node *x, *y;\r
- x = get_head_value_node(NODE_BODY(node), 0, reg);\r
- if (IS_NOT_NULL(x)) {\r
- y = get_head_value_node(next_node, 0, reg);\r
- if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {\r
- Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);\r
- CHECK_NULL_RETURN_MEMERR(en);\r
- NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);\r
- swap_node(node, en);\r
- NODE_BODY(node) = en;\r
- }\r
- }\r
- }\r
- }\r
- }\r
- }\r
- else if (type == NODE_BAG) {\r
- BagNode* en = BAG_(node);\r
- if (en->type == BAG_MEMORY) {\r
- node = NODE_BODY(node);\r
- goto retry;\r
- }\r
- }\r
- return 0;\r
-}\r
-\r
-\r
-static int\r
-update_string_node_case_fold(regex_t* reg, Node *node)\r
-{\r
- UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
- UChar *sbuf, *ebuf, *sp;\r
- int r, i, len, sbuf_size;\r
- StrNode* sn = STR_(node);\r
-\r
- end = sn->end;\r
- sbuf_size = (int )(end - sn->s) * 2;\r
- sbuf = (UChar* )xmalloc(sbuf_size);\r
- CHECK_NULL_RETURN_MEMERR(sbuf);\r
- ebuf = sbuf + sbuf_size;\r
-\r
- sp = sbuf;\r
- p = sn->s;\r
- while (p < end) {\r
- len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);\r
- for (i = 0; i < len; i++) {\r
- if (sp >= ebuf) {\r
- sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);\r
- CHECK_NULL_RETURN_MEMERR(sbuf);\r
- sp = sbuf + sbuf_size;\r
- sbuf_size *= 2;\r
- ebuf = sbuf + sbuf_size;\r
- }\r
-\r
- *sp++ = buf[i];\r
- }\r
- }\r
-\r
- r = onig_node_str_set(node, sbuf, sp);\r
- if (r != 0) {\r
- xfree(sbuf);\r
- return r;\r
- }\r
-\r
- xfree(sbuf);\r
- return 0;\r
-}\r
-\r
-static int\r
-expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)\r
-{\r
- int r;\r
- Node *node;\r
-\r
- node = onig_node_new_str(s, end);\r
- if (IS_NULL(node)) return ONIGERR_MEMORY;\r
-\r
- r = update_string_node_case_fold(reg, node);\r
- if (r != 0) {\r
- onig_node_free(node);\r
- return r;\r
- }\r
-\r
- NODE_STRING_SET_AMBIG(node);\r
- NODE_STRING_SET_DONT_GET_OPT_INFO(node);\r
- *rnode = node;\r
- return 0;\r
-}\r
-\r
-static int\r
-expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,\r
- int slen, UChar *end, regex_t* reg, Node **rnode)\r
-{\r
- int r, i, j;\r
- int len;\r
- int varlen;\r
- Node *anode, *var_anode, *snode, *xnode, *an;\r
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
-\r
- *rnode = var_anode = NULL_NODE;\r
-\r
- varlen = 0;\r
- for (i = 0; i < item_num; i++) {\r
- if (items[i].byte_len != slen) {\r
- varlen = 1;\r
- break;\r
- }\r
- }\r
-\r
- if (varlen != 0) {\r
- *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
- if (IS_NULL(var_anode)) return ONIGERR_MEMORY;\r
-\r
- xnode = onig_node_new_list(NULL, NULL);\r
- if (IS_NULL(xnode)) goto mem_err;\r
- NODE_CAR(var_anode) = xnode;\r
-\r
- anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
- if (IS_NULL(anode)) goto mem_err;\r
- NODE_CAR(xnode) = anode;\r
- }\r
- else {\r
- *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
- if (IS_NULL(anode)) return ONIGERR_MEMORY;\r
- }\r
-\r
- snode = onig_node_new_str(p, p + slen);\r
- if (IS_NULL(snode)) goto mem_err;\r
-\r
- NODE_CAR(anode) = snode;\r
-\r
- for (i = 0; i < item_num; i++) {\r
- snode = onig_node_new_str(NULL, NULL);\r
- if (IS_NULL(snode)) goto mem_err;\r
-\r
- for (j = 0; j < items[i].code_len; j++) {\r
- len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);\r
- if (len < 0) {\r
- r = len;\r
- goto mem_err2;\r
- }\r
-\r
- r = onig_node_str_cat(snode, buf, buf + len);\r
- if (r != 0) goto mem_err2;\r
- }\r
-\r
- an = onig_node_new_alt(NULL_NODE, NULL_NODE);\r
- if (IS_NULL(an)) {\r
- goto mem_err2;\r
- }\r
- //The NULL pointer check is not necessary. It is added just for pass static\r
- //analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"\r
- //in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"\r
- //in line 3510 will be executed, so the null pointer has been checked before\r
- //deferenced in line 3584.\r
- if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {\r
- Node *rem;\r
- UChar *q = p + items[i].byte_len;\r
-\r
- if (q < end) {\r
- r = expand_case_fold_make_rem_string(&rem, q, end, reg);\r
- if (r != 0) {\r
- onig_node_free(an);\r
- goto mem_err2;\r
- }\r
-\r
- xnode = onig_node_list_add(NULL_NODE, snode);\r
- if (IS_NULL(xnode)) {\r
- onig_node_free(an);\r
- onig_node_free(rem);\r
- goto mem_err2;\r
- }\r
- if (IS_NULL(onig_node_list_add(xnode, rem))) {\r
- onig_node_free(an);\r
- onig_node_free(xnode);\r
- onig_node_free(rem);\r
- goto mem_err;\r
- }\r
-\r
- NODE_CAR(an) = xnode;\r
- }\r
- else {\r
- NODE_CAR(an) = snode;\r
- }\r
-\r
- NODE_CDR(var_anode) = an;\r
- var_anode = an;\r
- }\r
- else {\r
- NODE_CAR(an) = snode;\r
- NODE_CDR(anode) = an;\r
- anode = an;\r
- }\r
- }\r
-\r
- return varlen;\r
-\r
- mem_err2:\r
- onig_node_free(snode);\r
-\r
- mem_err:\r
- onig_node_free(*rnode);\r
-\r
- return ONIGERR_MEMORY;\r
-}\r
-\r
-static int\r
-is_good_case_fold_items_for_search(OnigEncoding enc, int slen,\r
- int n, OnigCaseFoldCodeItem items[])\r
-{\r
- int i, len;\r
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
-\r
- for (i = 0; i < n; i++) {\r
- OnigCaseFoldCodeItem* item = items + i;\r
-\r
- if (item->code_len != 1) return 0;\r
- if (item->byte_len != slen) return 0;\r
- len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);\r
- if (len != slen) return 0;\r
- }\r
-\r
- return 1;\r
-}\r
-\r
-#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8\r
-\r
-static int\r
-expand_case_fold_string(Node* node, regex_t* reg, int state)\r
-{\r
- int r, n, len, alt_num;\r
- int fold_len;\r
- int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;\r
- UChar *start, *end, *p;\r
- UChar* foldp;\r
- Node *top_root, *root, *snode, *prev_node;\r
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
- StrNode* sn;\r
-\r
- if (NODE_STRING_IS_AMBIG(node)) return 0;\r
-\r
- sn = STR_(node);\r
-\r
- start = sn->s;\r
- end = sn->end;\r
- if (start >= end) return 0;\r
-\r
- is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;\r
-\r
- r = 0;\r
- top_root = root = prev_node = snode = NULL_NODE;\r
- alt_num = 1;\r
- p = start;\r
- while (p < end) {\r
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,\r
- p, end, items);\r
- if (n < 0) {\r
- r = n;\r
- goto err;\r
- }\r
-\r
- len = enclen(reg->enc, p);\r
- is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);\r
-\r
- if (is_in_look_behind ||\r
- (IS_NOT_NULL(snode) ||\r
- (is_good\r
- /* expand single char case: ex. /(?i:a)/ */\r
- && !(p == start && p + len >= end)))) {\r
- if (IS_NULL(snode)) {\r
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
- if (IS_NULL(root)) {\r
- onig_node_free(prev_node);\r
- goto mem_err;\r
- }\r
- }\r
-\r
- prev_node = snode = onig_node_new_str(NULL, NULL);\r
- if (IS_NULL(snode)) goto mem_err;\r
- if (IS_NOT_NULL(root)) {\r
- if (IS_NULL(onig_node_list_add(root, snode))) {\r
- onig_node_free(snode);\r
- goto mem_err;\r
- }\r
- }\r
-\r
- prev_is_ambig = -1; /* -1: new */\r
- prev_is_good = 0; /* escape compiler warning */\r
- }\r
- else {\r
- prev_is_ambig = NODE_STRING_IS_AMBIG(snode);\r
- prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode);\r
- }\r
-\r
- if (n != 0) {\r
- foldp = p;\r
- fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,\r
- &foldp, end, buf);\r
- foldp = buf;\r
- }\r
- else {\r
- foldp = p; fold_len = len;\r
- }\r
-\r
- if ((prev_is_ambig == 0 && n != 0) ||\r
- (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {\r
- if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {\r
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
- if (IS_NULL(root)) {\r
- onig_node_free(prev_node);\r
- goto mem_err;\r
- }\r
- }\r
-\r
- prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);\r
- if (IS_NULL(snode)) goto mem_err;\r
- if (IS_NULL(onig_node_list_add(root, snode))) {\r
- onig_node_free(snode);\r
- goto mem_err;\r
- }\r
- }\r
- else {\r
- r = onig_node_str_cat(snode, foldp, foldp + fold_len);\r
- if (r != 0) goto err;\r
- }\r
-\r
- if (n != 0) NODE_STRING_SET_AMBIG(snode);\r
- if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);\r
- }\r
- else {\r
- alt_num *= (n + 1);\r
- if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;\r
-\r
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
- if (IS_NULL(root)) {\r
- onig_node_free(prev_node);\r
- goto mem_err;\r
- }\r
- }\r
-\r
- r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);\r
- if (r < 0) goto mem_err;\r
- if (r == 1) {\r
- if (IS_NULL(root)) {\r
- top_root = prev_node;\r
- }\r
- else {\r
- if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
- onig_node_free(prev_node);\r
- goto mem_err;\r
- }\r
- }\r
-\r
- root = NODE_CAR(prev_node);\r
- }\r
- else { /* r == 0 */\r
- if (IS_NOT_NULL(root)) {\r
- if (IS_NULL(onig_node_list_add(root, prev_node))) {\r
- onig_node_free(prev_node);\r
- goto mem_err;\r
- }\r
- }\r
- }\r
-\r
- snode = NULL_NODE;\r
- }\r
-\r
- p += len;\r
- }\r
-\r
- if (p < end) {\r
- Node *srem;\r
-\r
- r = expand_case_fold_make_rem_string(&srem, p, end, reg);\r
- if (r != 0) goto mem_err;\r
-\r
- if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {\r
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
- if (IS_NULL(root)) {\r
- onig_node_free(srem);\r
- onig_node_free(prev_node);\r
- goto mem_err;\r
- }\r
- }\r
-\r
- if (IS_NULL(root)) {\r
- prev_node = srem;\r
- }\r
- else {\r
- if (IS_NULL(onig_node_list_add(root, srem))) {\r
- onig_node_free(srem);\r
- goto mem_err;\r
- }\r
- }\r
- }\r
-\r
- /* ending */\r
- top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);\r
- swap_node(node, top_root);\r
- onig_node_free(top_root);\r
- return 0;\r
-\r
- mem_err:\r
- r = ONIGERR_MEMORY;\r
-\r
- err:\r
- onig_node_free(top_root);\r
- return r;\r
-}\r
-\r
-#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
-static enum BodyEmptyType\r
-quantifiers_memory_node_info(Node* node)\r
-{\r
- int r = BODY_IS_EMPTY_POSSIBILITY;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- {\r
- int v;\r
- do {\r
- v = quantifiers_memory_node_info(NODE_CAR(node));\r
- if (v > r) r = v;\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- }\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- if (NODE_IS_RECURSION(node)) {\r
- return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */\r
- }\r
- else\r
- r = quantifiers_memory_node_info(NODE_BODY(node));\r
- break;\r
-#endif\r
-\r
- case NODE_QUANT:\r
- {\r
- QuantNode* qn = QUANT_(node);\r
- if (qn->upper != 0) {\r
- r = quantifiers_memory_node_info(NODE_BODY(node));\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
- switch (en->type) {\r
- case BAG_MEMORY:\r
- if (NODE_IS_RECURSION(node)) {\r
- return BODY_IS_EMPTY_POSSIBILITY_REC;\r
- }\r
- return BODY_IS_EMPTY_POSSIBILITY_MEM;\r
- break;\r
-\r
- case BAG_OPTION:\r
- case BAG_STOP_BACKTRACK:\r
- r = quantifiers_memory_node_info(NODE_BODY(node));\r
- break;\r
- case BAG_IF_ELSE:\r
- {\r
- int v;\r
- r = quantifiers_memory_node_info(NODE_BODY(node));\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- v = quantifiers_memory_node_info(en->te.Then);\r
- if (v > r) r = v;\r
- }\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- v = quantifiers_memory_node_info(en->te.Else);\r
- if (v > r) r = v;\r
- }\r
- }\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- case NODE_STRING:\r
- case NODE_CTYPE:\r
- case NODE_CCLASS:\r
- case NODE_ANCHOR:\r
- case NODE_GIMMICK:\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */\r
-\r
-\r
-#ifdef USE_CALL\r
-\r
-#ifdef __GNUC__\r
-__inline\r
-#endif\r
-static int\r
-setup_call_node_call(CallNode* cn, ScanEnv* env, int state)\r
-{\r
- MemEnv* mem_env = SCANENV_MEMENV(env);\r
-\r
- if (cn->by_number != 0) {\r
- int gnum = cn->group_num;\r
-\r
- if (env->num_named > 0 &&\r
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
- ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;\r
- }\r
-\r
- if (gnum > env->num_mem) {\r
- onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,\r
- cn->name, cn->name_end);\r
- return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
- }\r
-\r
- set_call_attr:\r
- NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;\r
- if (IS_NULL(NODE_CALL_BODY(cn))) {\r
- onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
- cn->name, cn->name_end);\r
- return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
- }\r
- }\r
- else {\r
- int *refs;\r
-\r
- int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);\r
- if (n <= 0) {\r
- onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
- cn->name, cn->name_end);\r
- return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
- }\r
- else if (n > 1) {\r
- onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,\r
- cn->name, cn->name_end);\r
- return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;\r
- }\r
- else {\r
- cn->group_num = refs[0];\r
- goto set_call_attr;\r
- }\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static void\r
-setup_call2_call(Node* node)\r
-{\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- setup_call2_call(NODE_CAR(node));\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_QUANT:\r
- setup_call2_call(NODE_BODY(node));\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
- setup_call2_call(NODE_BODY(node));\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_MEMORY) {\r
- if (! NODE_IS_MARK1(node)) {\r
- NODE_STATUS_ADD(node, MARK1);\r
- setup_call2_call(NODE_BODY(node));\r
- NODE_STATUS_REMOVE(node, MARK1);\r
- }\r
- }\r
- else if (en->type == BAG_IF_ELSE) {\r
- setup_call2_call(NODE_BODY(node));\r
- if (IS_NOT_NULL(en->te.Then))\r
- setup_call2_call(en->te.Then);\r
- if (IS_NOT_NULL(en->te.Else))\r
- setup_call2_call(en->te.Else);\r
- }\r
- else {\r
- setup_call2_call(NODE_BODY(node));\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CALL:\r
- if (! NODE_IS_MARK1(node)) {\r
- NODE_STATUS_ADD(node, MARK1);\r
- {\r
- CallNode* cn = CALL_(node);\r
- Node* called = NODE_CALL_BODY(cn);\r
-\r
- cn->entry_count++;\r
-\r
- NODE_STATUS_ADD(called, CALLED);\r
- BAG_(called)->m.entry_count++;\r
- setup_call2_call(called);\r
- }\r
- NODE_STATUS_REMOVE(node, MARK1);\r
- }\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-}\r
-\r
-static int\r
-setup_call(Node* node, ScanEnv* env, int state)\r
-{\r
- int r;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = setup_call(NODE_CAR(node), env, state);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_QUANT:\r
- if (QUANT_(node)->upper == 0)\r
- state |= IN_ZERO_REPEAT;\r
-\r
- r = setup_call(NODE_BODY(node), env, state);\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
- r = setup_call(NODE_BODY(node), env, state);\r
- else\r
- r = 0;\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_MEMORY) {\r
- if ((state & IN_ZERO_REPEAT) != 0) {\r
- NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
- BAG_(node)->m.entry_count--;\r
- }\r
- r = setup_call(NODE_BODY(node), env, state);\r
- }\r
- else if (en->type == BAG_IF_ELSE) {\r
- r = setup_call(NODE_BODY(node), env, state);\r
- if (r != 0) return r;\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = setup_call(en->te.Then, env, state);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else))\r
- r = setup_call(en->te.Else, env, state);\r
- }\r
- else\r
- r = setup_call(NODE_BODY(node), env, state);\r
- }\r
- break;\r
-\r
- case NODE_CALL:\r
- if ((state & IN_ZERO_REPEAT) != 0) {\r
- NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
- CALL_(node)->entry_count--;\r
- }\r
-\r
- r = setup_call_node_call(CALL_(node), env, state);\r
- break;\r
-\r
- default:\r
- r = 0;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-setup_call2(Node* node)\r
-{\r
- int r = 0;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- do {\r
- r = setup_call2(NODE_CAR(node));\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_QUANT:\r
- if (QUANT_(node)->upper != 0)\r
- r = setup_call2(NODE_BODY(node));\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- if (ANCHOR_HAS_BODY(ANCHOR_(node)))\r
- r = setup_call2(NODE_BODY(node));\r
- break;\r
-\r
- case NODE_BAG:\r
- if (! NODE_IS_IN_ZERO_REPEAT(node))\r
- r = setup_call2(NODE_BODY(node));\r
-\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (r != 0) return r;\r
- if (en->type == BAG_IF_ELSE) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = setup_call2(en->te.Then);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else))\r
- r = setup_call2(en->te.Else);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CALL:\r
- if (! NODE_IS_IN_ZERO_REPEAT(node)) {\r
- setup_call2_call(node);\r
- }\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-\r
-static void\r
-setup_called_state_call(Node* node, int state)\r
-{\r
- switch (NODE_TYPE(node)) {\r
- case NODE_ALT:\r
- state |= IN_ALT;\r
- /* fall */\r
- case NODE_LIST:\r
- do {\r
- setup_called_state_call(NODE_CAR(node), state);\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_QUANT:\r
- {\r
- QuantNode* qn = QUANT_(node);\r
-\r
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
- state |= IN_REAL_REPEAT;\r
- if (qn->lower != qn->upper)\r
- state |= IN_VAR_REPEAT;\r
-\r
- setup_called_state_call(NODE_QUANT_BODY(qn), state);\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- {\r
- AnchorNode* an = ANCHOR_(node);\r
-\r
- switch (an->type) {\r
- case ANCR_PREC_READ_NOT:\r
- case ANCR_LOOK_BEHIND_NOT:\r
- state |= IN_NOT;\r
- /* fall */\r
- case ANCR_PREC_READ:\r
- case ANCR_LOOK_BEHIND:\r
- setup_called_state_call(NODE_ANCHOR_BODY(an), state);\r
- break;\r
- default:\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- if (en->type == BAG_MEMORY) {\r
- if (NODE_IS_MARK1(node)) {\r
- if ((~en->m.called_state & state) != 0) {\r
- en->m.called_state |= state;\r
- setup_called_state_call(NODE_BODY(node), state);\r
- }\r
- }\r
- else {\r
- NODE_STATUS_ADD(node, MARK1);\r
- en->m.called_state |= state;\r
- setup_called_state_call(NODE_BODY(node), state);\r
- NODE_STATUS_REMOVE(node, MARK1);\r
- }\r
- }\r
- else if (en->type == BAG_IF_ELSE) {\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- setup_called_state_call(en->te.Then, state);\r
- }\r
- if (IS_NOT_NULL(en->te.Else))\r
- setup_called_state_call(en->te.Else, state);\r
- }\r
- else {\r
- setup_called_state_call(NODE_BODY(node), state);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CALL:\r
- setup_called_state_call(NODE_BODY(node), state);\r
- break;\r
-\r
- default:\r
- break;\r
- }\r
-}\r
-\r
-static void\r
-setup_called_state(Node* node, int state)\r
-{\r
- switch (NODE_TYPE(node)) {\r
- case NODE_ALT:\r
- state |= IN_ALT;\r
- /* fall */\r
- case NODE_LIST:\r
- do {\r
- setup_called_state(NODE_CAR(node), state);\r
- } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- setup_called_state_call(node, state);\r
- break;\r
-#endif\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- switch (en->type) {\r
- case BAG_MEMORY:\r
- if (en->m.entry_count > 1)\r
- state |= IN_MULTI_ENTRY;\r
-\r
- en->m.called_state |= state;\r
- /* fall */\r
- case BAG_OPTION:\r
- case BAG_STOP_BACKTRACK:\r
- setup_called_state(NODE_BODY(node), state);\r
- break;\r
- case BAG_IF_ELSE:\r
- setup_called_state(NODE_BODY(node), state);\r
- if (IS_NOT_NULL(en->te.Then))\r
- setup_called_state(en->te.Then, state);\r
- if (IS_NOT_NULL(en->te.Else))\r
- setup_called_state(en->te.Else, state);\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_QUANT:\r
- {\r
- QuantNode* qn = QUANT_(node);\r
-\r
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
- state |= IN_REAL_REPEAT;\r
- if (qn->lower != qn->upper)\r
- state |= IN_VAR_REPEAT;\r
-\r
- setup_called_state(NODE_QUANT_BODY(qn), state);\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- {\r
- AnchorNode* an = ANCHOR_(node);\r
-\r
- switch (an->type) {\r
- case ANCR_PREC_READ_NOT:\r
- case ANCR_LOOK_BEHIND_NOT:\r
- state |= IN_NOT;\r
- /* fall */\r
- case ANCR_PREC_READ:\r
- case ANCR_LOOK_BEHIND:\r
- setup_called_state(NODE_ANCHOR_BODY(an), state);\r
- break;\r
- default:\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- case NODE_STRING:\r
- case NODE_CTYPE:\r
- case NODE_CCLASS:\r
- case NODE_GIMMICK:\r
- default:\r
- break;\r
- }\r
-}\r
-\r
-#endif /* USE_CALL */\r
-\r
-\r
-static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);\r
-\r
-#ifdef __GNUC__\r
-__inline\r
-#endif\r
-static int\r
-setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)\r
-{\r
-/* allowed node types in look-behind */\r
-#define ALLOWED_TYPE_IN_LB \\r
- ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \\r
- | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \\r
- | NODE_BIT_CALL | NODE_BIT_GIMMICK)\r
-\r
-#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_IF_ELSE )\r
-#define ALLOWED_BAG_IN_LB_NOT ( 1<<BAG_OPTION | 1<<BAG_IF_ELSE )\r
-\r
-#define ALLOWED_ANCHOR_IN_LB \\r
- ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \\r
- | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \\r
- | ANCR_WORD_BEGIN | ANCR_WORD_END \\r
- | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )\r
-\r
-#define ALLOWED_ANCHOR_IN_LB_NOT \\r
- ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \\r
- | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \\r
- | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \\r
- | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )\r
-\r
- int r;\r
- AnchorNode* an = ANCHOR_(node);\r
-\r
- switch (an->type) {\r
- case ANCR_PREC_READ:\r
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
- break;\r
- case ANCR_PREC_READ_NOT:\r
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
- break;\r
-\r
- case ANCR_LOOK_BEHIND:\r
- {\r
- r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
- ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
- if (r < 0) return r;\r
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);\r
- if (r != 0) return r;\r
- r = setup_look_behind(node, reg, env);\r
- }\r
- break;\r
-\r
- case ANCR_LOOK_BEHIND_NOT:\r
- {\r
- r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
- ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
- if (r < 0) return r;\r
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),\r
- env);\r
- if (r != 0) return r;\r
- r = setup_look_behind(node, reg, env);\r
- }\r
- break;\r
-\r
- default:\r
- r = 0;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-#ifdef __GNUC__\r
-__inline\r
-#endif\r
-static int\r
-setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)\r
-{\r
- int r;\r
- OnigLen d;\r
- QuantNode* qn = QUANT_(node);\r
- Node* body = NODE_BODY(node);\r
-\r
- if ((state & IN_REAL_REPEAT) != 0) {\r
- NODE_STATUS_ADD(node, IN_REAL_REPEAT);\r
- }\r
- if ((state & IN_MULTI_ENTRY) != 0) {\r
- NODE_STATUS_ADD(node, IN_MULTI_ENTRY);\r
- }\r
-\r
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {\r
- d = tree_min_len(body, env);\r
- if (d == 0) {\r
-#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
- qn->emptiness = quantifiers_memory_node_info(body);\r
- if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) {\r
- if (NODE_TYPE(body) == NODE_BAG &&\r
- BAG_(body)->type == BAG_MEMORY) {\r
- MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);\r
- }\r
- }\r
-#else\r
- qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;\r
-#endif\r
- }\r
- }\r
-\r
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
- state |= IN_REAL_REPEAT;\r
- if (qn->lower != qn->upper)\r
- state |= IN_VAR_REPEAT;\r
-\r
- r = setup_tree(body, reg, state, env);\r
- if (r != 0) return r;\r
-\r
- /* expand string */\r
-#define EXPAND_STRING_MAX_LENGTH 100\r
- if (NODE_TYPE(body) == NODE_STRING) {\r
- if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&\r
- qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
- int len = NODE_STRING_LEN(body);\r
- StrNode* sn = STR_(body);\r
-\r
- if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
- int i, n = qn->lower;\r
- onig_node_conv_to_str_node(node, STR_(body)->flag);\r
- for (i = 0; i < n; i++) {\r
- r = onig_node_str_cat(node, sn->s, sn->end);\r
- if (r != 0) return r;\r
- }\r
- onig_node_free(body);\r
- return r;\r
- }\r
- }\r
- }\r
-\r
- if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {\r
- if (NODE_TYPE(body) == NODE_QUANT) {\r
- QuantNode* tqn = QUANT_(body);\r
- if (IS_NOT_NULL(tqn->head_exact)) {\r
- qn->head_exact = tqn->head_exact;\r
- tqn->head_exact = NULL;\r
- }\r
- }\r
- else {\r
- qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);\r
- }\r
- }\r
-\r
- return r;\r
-}\r
-\r
-/* setup_tree does the following work.\r
- 1. check empty loop. (set qn->emptiness)\r
- 2. expand ignore-case in char class.\r
- 3. set memory status bit flags. (reg->mem_stats)\r
- 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].\r
- 5. find invalid patterns in look-behind.\r
- 6. expand repeated string.\r
- */\r
-static int\r
-setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)\r
-{\r
- int r = 0;\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- {\r
- Node* prev = NULL_NODE;\r
- do {\r
- r = setup_tree(NODE_CAR(node), reg, state, env);\r
- if (IS_NOT_NULL(prev) && r == 0) {\r
- r = next_setup(prev, NODE_CAR(node), reg);\r
- }\r
- prev = NODE_CAR(node);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- }\r
- break;\r
-\r
- case NODE_ALT:\r
- do {\r
- r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);\r
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
- break;\r
-\r
- case NODE_STRING:\r
- if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {\r
- r = expand_case_fold_string(node, reg, state);\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- {\r
- int i;\r
- int* p;\r
- BackRefNode* br = BACKREF_(node);\r
- p = BACKREFS_P(br);\r
- for (i = 0; i < br->back_num; i++) {\r
- if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;\r
- MEM_STATUS_ON(env->backrefed_mem, p[i]);\r
- MEM_STATUS_ON(env->bt_mem_start, p[i]);\r
-#ifdef USE_BACKREF_WITH_LEVEL\r
- if (NODE_IS_NEST_LEVEL(node)) {\r
- MEM_STATUS_ON(env->bt_mem_end, p[i]);\r
- }\r
-#endif\r
- }\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- switch (en->type) {\r
- case BAG_OPTION:\r
- {\r
- OnigOptionType options = reg->options;\r
- reg->options = BAG_(node)->o.options;\r
- r = setup_tree(NODE_BODY(node), reg, state, env);\r
- reg->options = options;\r
- }\r
- break;\r
-\r
- case BAG_MEMORY:\r
-#ifdef USE_CALL\r
- state |= en->m.called_state;\r
-#endif\r
-\r
- if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0\r
- || NODE_IS_RECURSION(node)) {\r
- MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);\r
- }\r
- r = setup_tree(NODE_BODY(node), reg, state, env);\r
- break;\r
-\r
- case BAG_STOP_BACKTRACK:\r
- {\r
- Node* target = NODE_BODY(node);\r
- r = setup_tree(target, reg, state, env);\r
- if (NODE_TYPE(target) == NODE_QUANT) {\r
- QuantNode* tqn = QUANT_(target);\r
- if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&\r
- tqn->greedy != 0) { /* (?>a*), a*+ etc... */\r
- if (is_strict_real_node(NODE_BODY(target)))\r
- NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);\r
- }\r
- }\r
- }\r
- break;\r
-\r
- case BAG_IF_ELSE:\r
- r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);\r
- if (r != 0) return r;\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);\r
- if (r != 0) return r;\r
- }\r
- if (IS_NOT_NULL(en->te.Else))\r
- r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_QUANT:\r
- r = setup_quant(node, reg, state, env);\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- r = setup_anchor(node, reg, state, env);\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
-#endif\r
- case NODE_CTYPE:\r
- case NODE_CCLASS:\r
- case NODE_GIMMICK:\r
- default:\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,\r
- UChar* s, UChar* end,\r
- UChar skip[], int* roffset)\r
-{\r
- int i, j, k, len, offset;\r
- int n, clen;\r
- UChar* p;\r
- OnigEncoding enc;\r
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
-\r
- enc = reg->enc;\r
- offset = ENC_GET_SKIP_OFFSET(enc);\r
- if (offset == ENC_SKIP_OFFSET_1_OR_0) {\r
- UChar* p = s;\r
- while (1) {\r
- len = enclen(enc, p);\r
- if (p + len >= end) {\r
- if (len == 1) offset = 1;\r
- else offset = 0;\r
- break;\r
- }\r
- p += len;\r
- }\r
- }\r
-\r
- len = (int )(end - s);\r
- if (len + offset >= 255)\r
- return ONIGERR_PARSER_BUG;\r
-\r
- *roffset = offset;\r
-\r
- for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
- skip[i] = (UChar )(len + offset);\r
- }\r
-\r
- for (p = s; p < end; ) {\r
- int z;\r
-\r
- clen = enclen(enc, p);\r
- if (p + clen > end) clen = (int )(end - p);\r
-\r
- len = (int )(end - p);\r
- for (j = 0; j < clen; j++) {\r
- z = len - j + (offset - 1);\r
- if (z <= 0) break;\r
- skip[p[j]] = z;\r
- }\r
-\r
- if (case_expand != 0) {\r
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,\r
- p, end, items);\r
- for (k = 0; k < n; k++) {\r
- ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);\r
- for (j = 0; j < clen; j++) {\r
- z = len - j + (offset - 1);\r
- if (z <= 0) break;\r
- if (skip[buf[j]] > z)\r
- skip[buf[j]] = z;\r
- }\r
- }\r
- }\r
-\r
- p += clen;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-\r
-#define OPT_EXACT_MAXLEN 24\r
-\r
-#if OPT_EXACT_MAXLEN >= 255\r
-#error Too big OPT_EXACT_MAXLEN\r
-#endif\r
-\r
-typedef struct {\r
- OnigLen min; /* min byte length */\r
- OnigLen max; /* max byte length */\r
-} MinMax;\r
-\r
-typedef struct {\r
- MinMax mmd;\r
- OnigEncoding enc;\r
- OnigOptionType options;\r
- OnigCaseFoldType case_fold_flag;\r
- ScanEnv* scan_env;\r
-} OptEnv;\r
-\r
-typedef struct {\r
- int left;\r
- int right;\r
-} OptAnc;\r
-\r
-typedef struct {\r
- MinMax mmd; /* position */\r
- OptAnc anc;\r
- int reach_end;\r
- int case_fold;\r
- int good_case_fold;\r
- int len;\r
- UChar s[OPT_EXACT_MAXLEN];\r
-} OptStr;\r
-\r
-typedef struct {\r
- MinMax mmd; /* position */\r
- OptAnc anc;\r
- int value; /* weighted value */\r
- UChar map[CHAR_MAP_SIZE];\r
-} OptMap;\r
-\r
-typedef struct {\r
- MinMax len;\r
- OptAnc anc;\r
- OptStr sb; /* boundary */\r
- OptStr sm; /* middle */\r
- OptStr spr; /* prec read (?=...) */\r
- OptMap map; /* boundary */\r
-} OptNode;\r
-\r
-\r
-static int\r
-map_position_value(OnigEncoding enc, int i)\r
-{\r
- static const short int Vals[] = {\r
- 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,\r
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\r
- 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,\r
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,\r
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,\r
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,\r
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1\r
- };\r
-\r
- if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {\r
- if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)\r
- return 20;\r
- else\r
- return (int )Vals[i];\r
- }\r
- else\r
- return 4; /* Take it easy. */\r
-}\r
-\r
-static int\r
-distance_value(MinMax* mm)\r
-{\r
- /* 1000 / (min-max-dist + 1) */\r
- static const short int dist_vals[] = {\r
- 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,\r
- 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,\r
- 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,\r
- 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,\r
- 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,\r
- 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,\r
- 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,\r
- 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,\r
- 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,\r
- 11, 11, 11, 11, 11, 10, 10, 10, 10, 10\r
- };\r
-\r
- OnigLen d;\r
-\r
- if (mm->max == INFINITE_LEN) return 0;\r
-\r
- d = mm->max - mm->min;\r
- if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))\r
- /* return dist_vals[d] * 16 / (mm->min + 12); */\r
- return (int )dist_vals[d];\r
- else\r
- return 1;\r
-}\r
-\r
-static int\r
-comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)\r
-{\r
- if (v2 <= 0) return -1;\r
- if (v1 <= 0) return 1;\r
-\r
- v1 *= distance_value(d1);\r
- v2 *= distance_value(d2);\r
-\r
- if (v2 > v1) return 1;\r
- if (v2 < v1) return -1;\r
-\r
- if (d2->min < d1->min) return 1;\r
- if (d2->min > d1->min) return -1;\r
- return 0;\r
-}\r
-\r
-static int\r
-is_equal_mml(MinMax* a, MinMax* b)\r
-{\r
- return a->min == b->min && a->max == b->max;\r
-}\r
-\r
-static void\r
-set_mml(MinMax* l, OnigLen min, OnigLen max)\r
-{\r
- l->min = min;\r
- l->max = max;\r
-}\r
-\r
-static void\r
-clear_mml(MinMax* l)\r
-{\r
- l->min = l->max = 0;\r
-}\r
-\r
-static void\r
-copy_mml(MinMax* to, MinMax* from)\r
-{\r
- to->min = from->min;\r
- to->max = from->max;\r
-}\r
-\r
-static void\r
-add_mml(MinMax* to, MinMax* from)\r
-{\r
- to->min = distance_add(to->min, from->min);\r
- to->max = distance_add(to->max, from->max);\r
-}\r
-\r
-static void\r
-alt_merge_mml(MinMax* to, MinMax* from)\r
-{\r
- if (to->min > from->min) to->min = from->min;\r
- if (to->max < from->max) to->max = from->max;\r
-}\r
-\r
-static void\r
-copy_opt_env(OptEnv* to, OptEnv* from)\r
-{\r
- *to = *from;\r
-}\r
-\r
-static void\r
-clear_opt_anc_info(OptAnc* a)\r
-{\r
- a->left = 0;\r
- a->right = 0;\r
-}\r
-\r
-static void\r
-copy_opt_anc_info(OptAnc* to, OptAnc* from)\r
-{\r
- *to = *from;\r
-}\r
-\r
-static void\r
-concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,\r
- OnigLen left_len, OnigLen right_len)\r
-{\r
- clear_opt_anc_info(to);\r
-\r
- to->left = left->left;\r
- if (left_len == 0) {\r
- to->left |= right->left;\r
- }\r
-\r
- to->right = right->right;\r
- if (right_len == 0) {\r
- to->right |= left->right;\r
- }\r
- else {\r
- to->right |= (left->right & ANCR_PREC_READ_NOT);\r
- }\r
-}\r
-\r
-static int\r
-is_left(int a)\r
-{\r
- if (a == ANCR_END_BUF || a == ANCR_SEMI_END_BUF ||\r
- a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)\r
- return 0;\r
-\r
- return 1;\r
-}\r
-\r
-static int\r
-is_set_opt_anc_info(OptAnc* to, int anc)\r
-{\r
- if ((to->left & anc) != 0) return 1;\r
-\r
- return ((to->right & anc) != 0 ? 1 : 0);\r
-}\r
-\r
-static void\r
-add_opt_anc_info(OptAnc* to, int anc)\r
-{\r
- if (is_left(anc))\r
- to->left |= anc;\r
- else\r
- to->right |= anc;\r
-}\r
-\r
-static void\r
-remove_opt_anc_info(OptAnc* to, int anc)\r
-{\r
- if (is_left(anc))\r
- to->left &= ~anc;\r
- else\r
- to->right &= ~anc;\r
-}\r
-\r
-static void\r
-alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)\r
-{\r
- to->left &= add->left;\r
- to->right &= add->right;\r
-}\r
-\r
-static int\r
-is_full_opt_exact(OptStr* e)\r
-{\r
- return e->len >= OPT_EXACT_MAXLEN;\r
-}\r
-\r
-static void\r
-clear_opt_exact(OptStr* e)\r
-{\r
- clear_mml(&e->mmd);\r
- clear_opt_anc_info(&e->anc);\r
- e->reach_end = 0;\r
- e->case_fold = 0;\r
- e->good_case_fold = 0;\r
- e->len = 0;\r
- e->s[0] = '\0';\r
-}\r
-\r
-static void\r
-copy_opt_exact(OptStr* to, OptStr* from)\r
-{\r
- *to = *from;\r
-}\r
-\r
-static int\r
-concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)\r
-{\r
- int i, j, len, r;\r
- UChar *p, *end;\r
- OptAnc tanc;\r
-\r
- if (add->case_fold != 0) {\r
- if (! to->case_fold) {\r
- if (to->len > 1 || to->len >= add->len) return 0; /* avoid */\r
-\r
- to->case_fold = 1;\r
- }\r
- else {\r
- if (to->good_case_fold != 0) {\r
- if (add->good_case_fold == 0) return 0;\r
- }\r
- }\r
- }\r
-\r
- r = 0;\r
- p = add->s;\r
- end = p + add->len;\r
- for (i = to->len; p < end; ) {\r
- len = enclen(enc, p);\r
- if (i + len > OPT_EXACT_MAXLEN) {\r
- r = 1; /* 1:full */\r
- break;\r
- }\r
- for (j = 0; j < len && p < end; j++)\r
- to->s[i++] = *p++;\r
- }\r
-\r
- to->len = i;\r
- to->reach_end = (p == end ? add->reach_end : 0);\r
-\r
- concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);\r
- if (! to->reach_end) tanc.right = 0;\r
- copy_opt_anc_info(&to->anc, &tanc);\r
-\r
- return r;\r
-}\r
-\r
-static void\r
-concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)\r
-{\r
- int i, j, len;\r
- UChar *p;\r
-\r
- for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {\r
- len = enclen(enc, p);\r
- if (i + len > OPT_EXACT_MAXLEN) break;\r
- for (j = 0; j < len && p < end; j++)\r
- to->s[i++] = *p++;\r
- }\r
-\r
- to->len = i;\r
-\r
- if (p >= end && to->len == (int )(end - s))\r
- to->reach_end = 1;\r
-}\r
-\r
-static void\r
-alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)\r
-{\r
- int i, j, len;\r
-\r
- if (add->len == 0 || to->len == 0) {\r
- clear_opt_exact(to);\r
- return ;\r
- }\r
-\r
- if (! is_equal_mml(&to->mmd, &add->mmd)) {\r
- clear_opt_exact(to);\r
- return ;\r
- }\r
-\r
- for (i = 0; i < to->len && i < add->len; ) {\r
- if (to->s[i] != add->s[i]) break;\r
- len = enclen(env->enc, to->s + i);\r
-\r
- for (j = 1; j < len; j++) {\r
- if (to->s[i+j] != add->s[i+j]) break;\r
- }\r
- if (j < len) break;\r
- i += len;\r
- }\r
-\r
- if (! add->reach_end || i < add->len || i < to->len) {\r
- to->reach_end = 0;\r
- }\r
- to->len = i;\r
- if (add->case_fold != 0)\r
- to->case_fold = 1;\r
- if (add->good_case_fold == 0)\r
- to->good_case_fold = 0;\r
-\r
- alt_merge_opt_anc_info(&to->anc, &add->anc);\r
- if (! to->reach_end) to->anc.right = 0;\r
-}\r
-\r
-static void\r
-select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)\r
-{\r
- int vn, va;\r
-\r
- vn = now->len;\r
- va = alt->len;\r
-\r
- if (va == 0) {\r
- return ;\r
- }\r
- else if (vn == 0) {\r
- copy_opt_exact(now, alt);\r
- return ;\r
- }\r
- else if (vn <= 2 && va <= 2) {\r
- /* ByteValTable[x] is big value --> low price */\r
- va = map_position_value(enc, now->s[0]);\r
- vn = map_position_value(enc, alt->s[0]);\r
-\r
- if (now->len > 1) vn += 5;\r
- if (alt->len > 1) va += 5;\r
- }\r
-\r
- if (now->case_fold == 0) vn *= 2;\r
- if (alt->case_fold == 0) va *= 2;\r
-\r
- if (now->good_case_fold != 0) vn *= 4;\r
- if (alt->good_case_fold != 0) va *= 4;\r
-\r
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
- copy_opt_exact(now, alt);\r
-}\r
-\r
-static void\r
-clear_opt_map(OptMap* map)\r
-{\r
- static const OptMap clean_info = {\r
- {0, 0}, {0, 0}, 0,\r
- {\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\r
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0\r
- }\r
- };\r
-\r
- xmemcpy(map, &clean_info, sizeof(OptMap));\r
-}\r
-\r
-static void\r
-copy_opt_map(OptMap* to, OptMap* from)\r
-{\r
- xmemcpy(to,from,sizeof(OptMap));\r
-}\r
-\r
-static void\r
-add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)\r
-{\r
- if (m->map[c] == 0) {\r
- m->map[c] = 1;\r
- m->value += map_position_value(enc, c);\r
- }\r
-}\r
-\r
-static int\r
-add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,\r
- OnigEncoding enc, OnigCaseFoldType fold_flag)\r
-{\r
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
- int i, n;\r
-\r
- add_char_opt_map(map, p[0], enc);\r
-\r
- fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);\r
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);\r
- if (n < 0) return n;\r
-\r
- for (i = 0; i < n; i++) {\r
- ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);\r
- add_char_opt_map(map, buf[0], enc);\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static void\r
-select_opt_map(OptMap* now, OptMap* alt)\r
-{\r
- static int z = 1<<15; /* 32768: something big value */\r
-\r
- int vn, va;\r
-\r
- if (alt->value == 0) return ;\r
- if (now->value == 0) {\r
- copy_opt_map(now, alt);\r
- return ;\r
- }\r
-\r
- vn = z / now->value;\r
- va = z / alt->value;\r
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
- copy_opt_map(now, alt);\r
-}\r
-\r
-static int\r
-comp_opt_exact_or_map(OptStr* e, OptMap* m)\r
-{\r
-#define COMP_EM_BASE 20\r
- int ae, am;\r
- int case_value;\r
-\r
- if (m->value <= 0) return -1;\r
-\r
- if (e->case_fold != 0) {\r
- if (e->good_case_fold != 0)\r
- case_value = 2;\r
- else\r
- case_value = 1;\r
- }\r
- else\r
- case_value = 3;\r
-\r
- ae = COMP_EM_BASE * e->len * case_value;\r
- am = COMP_EM_BASE * 5 * 2 / m->value;\r
- return comp_distance_value(&e->mmd, &m->mmd, ae, am);\r
-}\r
-\r
-static void\r
-alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)\r
-{\r
- int i, val;\r
-\r
- /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */\r
- if (to->value == 0) return ;\r
- if (add->value == 0 || to->mmd.max < add->mmd.min) {\r
- clear_opt_map(to);\r
- return ;\r
- }\r
-\r
- alt_merge_mml(&to->mmd, &add->mmd);\r
-\r
- val = 0;\r
- for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
- if (add->map[i])\r
- to->map[i] = 1;\r
-\r
- if (to->map[i])\r
- val += map_position_value(enc, i);\r
- }\r
- to->value = val;\r
-\r
- alt_merge_opt_anc_info(&to->anc, &add->anc);\r
-}\r
-\r
-static void\r
-set_bound_node_opt_info(OptNode* opt, MinMax* plen)\r
-{\r
- copy_mml(&(opt->sb.mmd), plen);\r
- copy_mml(&(opt->spr.mmd), plen);\r
- copy_mml(&(opt->map.mmd), plen);\r
-}\r
-\r
-static void\r
-clear_node_opt_info(OptNode* opt)\r
-{\r
- clear_mml(&opt->len);\r
- clear_opt_anc_info(&opt->anc);\r
- clear_opt_exact(&opt->sb);\r
- clear_opt_exact(&opt->sm);\r
- clear_opt_exact(&opt->spr);\r
- clear_opt_map(&opt->map);\r
-}\r
-\r
-static void\r
-copy_node_opt_info(OptNode* to, OptNode* from)\r
-{\r
- xmemcpy(to,from,sizeof(OptNode));\r
-}\r
-\r
-static void\r
-concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)\r
-{\r
- int sb_reach, sm_reach;\r
- OptAnc tanc;\r
-\r
- concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);\r
- copy_opt_anc_info(&to->anc, &tanc);\r
-\r
- if (add->sb.len > 0 && to->len.max == 0) {\r
- concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);\r
- copy_opt_anc_info(&add->sb.anc, &tanc);\r
- }\r
-\r
- if (add->map.value > 0 && to->len.max == 0) {\r
- if (add->map.mmd.max == 0)\r
- add->map.anc.left |= to->anc.left;\r
- }\r
-\r
- sb_reach = to->sb.reach_end;\r
- sm_reach = to->sm.reach_end;\r
-\r
- if (add->len.max != 0)\r
- to->sb.reach_end = to->sm.reach_end = 0;\r
-\r
- if (add->sb.len > 0) {\r
- if (sb_reach) {\r
- concat_opt_exact(&to->sb, &add->sb, enc);\r
- clear_opt_exact(&add->sb);\r
- }\r
- else if (sm_reach) {\r
- concat_opt_exact(&to->sm, &add->sb, enc);\r
- clear_opt_exact(&add->sb);\r
- }\r
- }\r
- select_opt_exact(enc, &to->sm, &add->sb);\r
- select_opt_exact(enc, &to->sm, &add->sm);\r
-\r
- if (to->spr.len > 0) {\r
- if (add->len.max > 0) {\r
- if (to->spr.len > (int )add->len.max)\r
- to->spr.len = add->len.max;\r
-\r
- if (to->spr.mmd.max == 0)\r
- select_opt_exact(enc, &to->sb, &to->spr);\r
- else\r
- select_opt_exact(enc, &to->sm, &to->spr);\r
- }\r
- }\r
- else if (add->spr.len > 0) {\r
- copy_opt_exact(&to->spr, &add->spr);\r
- }\r
-\r
- select_opt_map(&to->map, &add->map);\r
- add_mml(&to->len, &add->len);\r
-}\r
-\r
-static void\r
-alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)\r
-{\r
- alt_merge_opt_anc_info(&to->anc, &add->anc);\r
- alt_merge_opt_exact(&to->sb, &add->sb, env);\r
- alt_merge_opt_exact(&to->sm, &add->sm, env);\r
- alt_merge_opt_exact(&to->spr, &add->spr, env);\r
- alt_merge_opt_map(env->enc, &to->map, &add->map);\r
-\r
- alt_merge_mml(&to->len, &add->len);\r
-}\r
-\r
-\r
-#define MAX_NODE_OPT_INFO_REF_COUNT 5\r
-\r
-static int\r
-optimize_nodes(Node* node, OptNode* opt, OptEnv* env)\r
-{\r
- int i;\r
- int r;\r
- OptNode xo;\r
- OnigEncoding enc;\r
-\r
- r = 0;\r
- enc = env->enc;\r
- clear_node_opt_info(opt);\r
- set_bound_node_opt_info(opt, &env->mmd);\r
-\r
- switch (NODE_TYPE(node)) {\r
- case NODE_LIST:\r
- {\r
- OptEnv nenv;\r
- Node* nd = node;\r
-\r
- copy_opt_env(&nenv, env);\r
- do {\r
- r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);\r
- if (r == 0) {\r
- add_mml(&nenv.mmd, &xo.len);\r
- concat_left_node_opt_info(enc, opt, &xo);\r
- }\r
- } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
- }\r
- break;\r
-\r
- case NODE_ALT:\r
- {\r
- Node* nd = node;\r
-\r
- do {\r
- r = optimize_nodes(NODE_CAR(nd), &xo, env);\r
- if (r == 0) {\r
- if (nd == node) copy_node_opt_info(opt, &xo);\r
- else alt_merge_node_opt_info(opt, &xo, env);\r
- }\r
- } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- StrNode* sn = STR_(node);\r
- int slen = (int )(sn->end - sn->s);\r
- /* int is_raw = NODE_STRING_IS_RAW(node); */\r
-\r
- if (! NODE_STRING_IS_AMBIG(node)) {\r
- concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);\r
- if (slen > 0) {\r
- add_char_opt_map(&opt->map, *(sn->s), enc);\r
- }\r
- set_mml(&opt->len, slen, slen);\r
- }\r
- else {\r
- int max;\r
-\r
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {\r
- int n = onigenc_strlen(enc, sn->s, sn->end);\r
- max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;\r
- }\r
- else {\r
- concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);\r
- opt->sb.case_fold = 1;\r
- if (NODE_STRING_IS_GOOD_AMBIG(node))\r
- opt->sb.good_case_fold = 1;\r
-\r
- if (slen > 0) {\r
- r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,\r
- enc, env->case_fold_flag);\r
- if (r != 0) break;\r
- }\r
-\r
- max = slen;\r
- }\r
-\r
- set_mml(&opt->len, slen, max);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CCLASS:\r
- {\r
- int z;\r
- CClassNode* cc = CCLASS_(node);\r
-\r
- /* no need to check ignore case. (set in setup_tree()) */\r
-\r
- if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {\r
- OnigLen min = ONIGENC_MBC_MINLEN(enc);\r
- OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
-\r
- set_mml(&opt->len, min, max);\r
- }\r
- else {\r
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {\r
- z = BITSET_AT(cc->bs, i);\r
- if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {\r
- add_char_opt_map(&opt->map, (UChar )i, enc);\r
- }\r
- }\r
- set_mml(&opt->len, 1, 1);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CTYPE:\r
- {\r
- int min, max;\r
- int range;\r
-\r
- max = ONIGENC_MBC_MAXLEN_DIST(enc);\r
-\r
- if (max == 1) {\r
- min = 1;\r
-\r
- switch (CTYPE_(node)->ctype) {\r
- case CTYPE_ANYCHAR:\r
- break;\r
-\r
- case ONIGENC_CTYPE_WORD:\r
- range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;\r
- if (CTYPE_(node)->not != 0) {\r
- for (i = 0; i < range; i++) {\r
- if (! ONIGENC_IS_CODE_WORD(enc, i)) {\r
- add_char_opt_map(&opt->map, (UChar )i, enc);\r
- }\r
- }\r
- for (i = range; i < SINGLE_BYTE_SIZE; i++) {\r
- add_char_opt_map(&opt->map, (UChar )i, enc);\r
- }\r
- }\r
- else {\r
- for (i = 0; i < range; i++) {\r
- if (ONIGENC_IS_CODE_WORD(enc, i)) {\r
- add_char_opt_map(&opt->map, (UChar )i, enc);\r
- }\r
- }\r
- }\r
- break;\r
- }\r
- }\r
- else {\r
- min = ONIGENC_MBC_MINLEN(enc);\r
- }\r
- set_mml(&opt->len, min, max);\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- switch (ANCHOR_(node)->type) {\r
- case ANCR_BEGIN_BUF:\r
- case ANCR_BEGIN_POSITION:\r
- case ANCR_BEGIN_LINE:\r
- case ANCR_END_BUF:\r
- case ANCR_SEMI_END_BUF:\r
- case ANCR_END_LINE:\r
- case ANCR_PREC_READ_NOT:\r
- case ANCR_LOOK_BEHIND:\r
- add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);\r
- break;\r
-\r
- case ANCR_PREC_READ:\r
- {\r
- r = optimize_nodes(NODE_BODY(node), &xo, env);\r
- if (r == 0) {\r
- if (xo.sb.len > 0)\r
- copy_opt_exact(&opt->spr, &xo.sb);\r
- else if (xo.sm.len > 0)\r
- copy_opt_exact(&opt->spr, &xo.sm);\r
-\r
- opt->spr.reach_end = 0;\r
-\r
- if (xo.map.value > 0)\r
- copy_opt_map(&opt->map, &xo.map);\r
- }\r
- }\r
- break;\r
-\r
- case ANCR_LOOK_BEHIND_NOT:\r
- break;\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- if (! NODE_IS_CHECKER(node)) {\r
- int* backs;\r
- OnigLen min, max, tmin, tmax;\r
- MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);\r
- BackRefNode* br = BACKREF_(node);\r
-\r
- if (NODE_IS_RECURSION(node)) {\r
- set_mml(&opt->len, 0, INFINITE_LEN);\r
- break;\r
- }\r
- backs = BACKREFS_P(br);\r
- min = tree_min_len(mem_env[backs[0]].node, env->scan_env);\r
- max = tree_max_len(mem_env[backs[0]].node, env->scan_env);\r
- for (i = 1; i < br->back_num; i++) {\r
- tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);\r
- tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);\r
- if (min > tmin) min = tmin;\r
- if (max < tmax) max = tmax;\r
- }\r
- set_mml(&opt->len, min, max);\r
- }\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- if (NODE_IS_RECURSION(node))\r
- set_mml(&opt->len, 0, INFINITE_LEN);\r
- else {\r
- OnigOptionType save = env->options;\r
- env->options = BAG_(NODE_BODY(node))->o.options;\r
- r = optimize_nodes(NODE_BODY(node), opt, env);\r
- env->options = save;\r
- }\r
- break;\r
-#endif\r
-\r
- case NODE_QUANT:\r
- {\r
- OnigLen min, max;\r
- QuantNode* qn = QUANT_(node);\r
-\r
- r = optimize_nodes(NODE_BODY(node), &xo, env);\r
- if (r != 0) break;\r
-\r
- if (qn->lower > 0) {\r
- copy_node_opt_info(opt, &xo);\r
- if (xo.sb.len > 0) {\r
- if (xo.sb.reach_end) {\r
- for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {\r
- int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);\r
- if (rc > 0) break;\r
- }\r
- if (i < qn->lower) opt->sb.reach_end = 0;\r
- }\r
- }\r
-\r
- if (qn->lower != qn->upper) {\r
- opt->sb.reach_end = 0;\r
- opt->sm.reach_end = 0;\r
- }\r
- if (qn->lower > 1)\r
- opt->sm.reach_end = 0;\r
- }\r
-\r
- if (IS_INFINITE_REPEAT(qn->upper)) {\r
- if (env->mmd.max == 0 &&\r
- NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {\r
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))\r
- add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);\r
- else\r
- add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);\r
- }\r
-\r
- max = (xo.len.max > 0 ? INFINITE_LEN : 0);\r
- }\r
- else {\r
- max = distance_multiply(xo.len.max, qn->upper);\r
- }\r
-\r
- min = distance_multiply(xo.len.min, qn->lower);\r
- set_mml(&opt->len, min, max);\r
- }\r
- break;\r
-\r
- case NODE_BAG:\r
- {\r
- BagNode* en = BAG_(node);\r
-\r
- switch (en->type) {\r
- case BAG_OPTION:\r
- {\r
- OnigOptionType save = env->options;\r
-\r
- env->options = en->o.options;\r
- r = optimize_nodes(NODE_BODY(node), opt, env);\r
- env->options = save;\r
- }\r
- break;\r
-\r
- case BAG_MEMORY:\r
-#ifdef USE_CALL\r
- en->opt_count++;\r
- if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {\r
- OnigLen min, max;\r
-\r
- min = 0;\r
- max = INFINITE_LEN;\r
- if (NODE_IS_MIN_FIXED(node)) min = en->min_len;\r
- if (NODE_IS_MAX_FIXED(node)) max = en->max_len;\r
- set_mml(&opt->len, min, max);\r
- }\r
- else\r
-#endif\r
- {\r
- r = optimize_nodes(NODE_BODY(node), opt, env);\r
- if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {\r
- if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))\r
- remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);\r
- }\r
- }\r
- break;\r
-\r
- case BAG_STOP_BACKTRACK:\r
- r = optimize_nodes(NODE_BODY(node), opt, env);\r
- break;\r
-\r
- case BAG_IF_ELSE:\r
- {\r
- OptEnv nenv;\r
-\r
- copy_opt_env(&nenv, env);\r
- r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);\r
- if (r == 0) {\r
- add_mml(&nenv.mmd, &xo.len);\r
- concat_left_node_opt_info(enc, opt, &xo);\r
- if (IS_NOT_NULL(en->te.Then)) {\r
- r = optimize_nodes(en->te.Then, &xo, &nenv);\r
- if (r == 0) {\r
- concat_left_node_opt_info(enc, opt, &xo);\r
- }\r
- }\r
-\r
- if (IS_NOT_NULL(en->te.Else)) {\r
- r = optimize_nodes(en->te.Else, &xo, env);\r
- if (r == 0)\r
- alt_merge_node_opt_info(opt, &xo, env);\r
- }\r
- }\r
- }\r
- break;\r
- }\r
- }\r
- break;\r
-\r
- case NODE_GIMMICK:\r
- break;\r
-\r
- default:\r
-#ifdef ONIG_DEBUG\r
- fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));\r
-#endif\r
- r = ONIGERR_TYPE_BUG;\r
- break;\r
- }\r
-\r
- return r;\r
-}\r
-\r
-static int\r
-set_optimize_exact(regex_t* reg, OptStr* e)\r
-{\r
- int r;\r
-\r
- if (e->len == 0) return 0;\r
-\r
- reg->exact = (UChar* )xmalloc(e->len);\r
- CHECK_NULL_RETURN_MEMERR(reg->exact);\r
- xmemcpy(reg->exact, e->s, e->len);\r
- reg->exact_end = reg->exact + e->len;\r
-\r
- if (e->case_fold) {\r
- reg->optimize = OPTIMIZE_STR_CASE_FOLD;\r
- if (e->good_case_fold != 0) {\r
- if (e->len >= 2) {\r
- r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,\r
- reg->exact, reg->exact_end,\r
- reg->map, &(reg->map_offset));\r
- if (r != 0) return r;\r
- reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;\r
- }\r
- }\r
- }\r
- else {\r
- int allow_reverse;\r
-\r
- allow_reverse =\r
- ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);\r
-\r
- if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {\r
- r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,\r
- reg->exact, reg->exact_end,\r
- reg->map, &(reg->map_offset));\r
- if (r != 0) return r;\r
-\r
- reg->optimize = (allow_reverse != 0\r
- ? OPTIMIZE_STR_FAST\r
- : OPTIMIZE_STR_FAST_STEP_FORWARD);\r
- }\r
- else {\r
- reg->optimize = OPTIMIZE_STR;\r
- }\r
- }\r
-\r
- reg->dmin = e->mmd.min;\r
- reg->dmax = e->mmd.max;\r
-\r
- if (reg->dmin != INFINITE_LEN) {\r
- reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static void\r
-set_optimize_map(regex_t* reg, OptMap* m)\r
-{\r
- int i;\r
-\r
- for (i = 0; i < CHAR_MAP_SIZE; i++)\r
- reg->map[i] = m->map[i];\r
-\r
- reg->optimize = OPTIMIZE_MAP;\r
- reg->dmin = m->mmd.min;\r
- reg->dmax = m->mmd.max;\r
-\r
- if (reg->dmin != INFINITE_LEN) {\r
- reg->threshold_len = reg->dmin + 1;\r
- }\r
-}\r
-\r
-static void\r
-set_sub_anchor(regex_t* reg, OptAnc* anc)\r
-{\r
- reg->sub_anchor |= anc->left & ANCR_BEGIN_LINE;\r
- reg->sub_anchor |= anc->right & ANCR_END_LINE;\r
-}\r
-\r
-#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
-static void print_optimize_info(FILE* f, regex_t* reg);\r
-#endif\r
-\r
-static int\r
-set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)\r
-{\r
- int r;\r
- OptNode opt;\r
- OptEnv env;\r
-\r
- env.enc = reg->enc;\r
- env.options = reg->options;\r
- env.case_fold_flag = reg->case_fold_flag;\r
- env.scan_env = scan_env;\r
- clear_mml(&env.mmd);\r
-\r
- r = optimize_nodes(node, &opt, &env);\r
- if (r != 0) return r;\r
-\r
- reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |\r
- ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |\r
- ANCR_LOOK_BEHIND);\r
-\r
- if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)\r
- reg->anchor &= ~ANCR_ANYCHAR_INF_ML;\r
-\r
- reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |\r
- ANCR_PREC_READ_NOT);\r
-\r
- if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {\r
- reg->anchor_dmin = opt.len.min;\r
- reg->anchor_dmax = opt.len.max;\r
- }\r
-\r
- if (opt.sb.len > 0 || opt.sm.len > 0) {\r
- select_opt_exact(reg->enc, &opt.sb, &opt.sm);\r
- if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {\r
- goto set_map;\r
- }\r
- else {\r
- r = set_optimize_exact(reg, &opt.sb);\r
- set_sub_anchor(reg, &opt.sb.anc);\r
- }\r
- }\r
- else if (opt.map.value > 0) {\r
- set_map:\r
- set_optimize_map(reg, &opt.map);\r
- set_sub_anchor(reg, &opt.map.anc);\r
- }\r
- else {\r
- reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;\r
- if (opt.len.max == 0)\r
- reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;\r
- }\r
-\r
-#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
- print_optimize_info(stderr, reg);\r
-#endif\r
- return r;\r
-}\r
-\r
-static void\r
-clear_optimize_info(regex_t* reg)\r
-{\r
- reg->optimize = OPTIMIZE_NONE;\r
- reg->anchor = 0;\r
- reg->anchor_dmin = 0;\r
- reg->anchor_dmax = 0;\r
- reg->sub_anchor = 0;\r
- reg->exact_end = (UChar* )NULL;\r
- reg->map_offset = 0;\r
- reg->threshold_len = 0;\r
- if (IS_NOT_NULL(reg->exact)) {\r
- xfree(reg->exact);\r
- reg->exact = (UChar* )NULL;\r
- }\r
-}\r
-\r
-#ifdef ONIG_DEBUG\r
-\r
-static void print_enc_string(FILE* fp, OnigEncoding enc,\r
- const UChar *s, const UChar *end)\r
-{\r
- fprintf(fp, "\nPATTERN: /");\r
-\r
- if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
- const UChar *p;\r
- OnigCodePoint code;\r
-\r
- p = s;\r
- while (p < end) {\r
- code = ONIGENC_MBC_TO_CODE(enc, p, end);\r
- if (code >= 0x80) {\r
- fprintf(fp, " 0x%04x ", (int )code);\r
- }\r
- else {\r
- fputc((int )code, fp);\r
- }\r
-\r
- p += enclen(enc, p);\r
- }\r
- }\r
- else {\r
- while (s < end) {\r
- fputc((int )*s, fp);\r
- s++;\r
- }\r
- }\r
-\r
- fprintf(fp, "/\n");\r
-}\r
-\r
-#endif /* ONIG_DEBUG */\r
-\r
-#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
-\r
-static void\r
-print_distance_range(FILE* f, OnigLen a, OnigLen b)\r
-{\r
- if (a == INFINITE_LEN)\r
- fputs("inf", f);\r
- else\r
- fprintf(f, "(%u)", a);\r
-\r
- fputs("-", f);\r
-\r
- if (b == INFINITE_LEN)\r
- fputs("inf", f);\r
- else\r
- fprintf(f, "(%u)", b);\r
-}\r
-\r
-static void\r
-print_anchor(FILE* f, int anchor)\r
-{\r
- int q = 0;\r
-\r
- fprintf(f, "[");\r
-\r
- if (anchor & ANCR_BEGIN_BUF) {\r
- fprintf(f, "begin-buf");\r
- q = 1;\r
- }\r
- if (anchor & ANCR_BEGIN_LINE) {\r
- if (q) fprintf(f, ", ");\r
- q = 1;\r
- fprintf(f, "begin-line");\r
- }\r
- if (anchor & ANCR_BEGIN_POSITION) {\r
- if (q) fprintf(f, ", ");\r
- q = 1;\r
- fprintf(f, "begin-pos");\r
- }\r
- if (anchor & ANCR_END_BUF) {\r
- if (q) fprintf(f, ", ");\r
- q = 1;\r
- fprintf(f, "end-buf");\r
- }\r
- if (anchor & ANCR_SEMI_END_BUF) {\r
- if (q) fprintf(f, ", ");\r
- q = 1;\r
- fprintf(f, "semi-end-buf");\r
- }\r
- if (anchor & ANCR_END_LINE) {\r
- if (q) fprintf(f, ", ");\r
- q = 1;\r
- fprintf(f, "end-line");\r
- }\r
- if (anchor & ANCR_ANYCHAR_INF) {\r
- if (q) fprintf(f, ", ");\r
- q = 1;\r
- fprintf(f, "anychar-inf");\r
- }\r
- if (anchor & ANCR_ANYCHAR_INF_ML) {\r
- if (q) fprintf(f, ", ");\r
- fprintf(f, "anychar-inf-ml");\r
- }\r
-\r
- fprintf(f, "]");\r
-}\r
-\r
-static void\r
-print_optimize_info(FILE* f, regex_t* reg)\r
-{\r
- static const char* on[] = { "NONE", "STR",\r
- "STR_FAST", "STR_FAST_STEP_FORWARD",\r
- "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };\r
-\r
- fprintf(f, "optimize: %s\n", on[reg->optimize]);\r
- fprintf(f, " anchor: "); print_anchor(f, reg->anchor);\r
- if ((reg->anchor & ANCR_END_BUF_MASK) != 0)\r
- print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);\r
- fprintf(f, "\n");\r
-\r
- if (reg->optimize) {\r
- fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);\r
- fprintf(f, "\n");\r
- }\r
- fprintf(f, "\n");\r
-\r
- if (reg->exact) {\r
- UChar *p;\r
- fprintf(f, "exact: [");\r
- for (p = reg->exact; p < reg->exact_end; p++) {\r
- fputc(*p, f);\r
- }\r
- fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));\r
- }\r
- else if (reg->optimize & OPTIMIZE_MAP) {\r
- int c, i, n = 0;\r
-\r
- for (i = 0; i < CHAR_MAP_SIZE; i++)\r
- if (reg->map[i]) n++;\r
-\r
- fprintf(f, "map: n=%d\n", n);\r
- if (n > 0) {\r
- c = 0;\r
- fputc('[', f);\r
- for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
- if (reg->map[i] != 0) {\r
- if (c > 0) fputs(", ", f);\r
- c++;\r
- if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&\r
- ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))\r
- fputc(i, f);\r
- else\r
- fprintf(f, "%d", i);\r
- }\r
- }\r
- fprintf(f, "]\n");\r
- }\r
- }\r
-}\r
-#endif\r
-\r
-\r
-extern RegexExt*\r
-onig_get_regex_ext(regex_t* reg)\r
-{\r
- if (IS_NULL(reg->extp)) {\r
- RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));\r
- if (IS_NULL(ext)) return 0;\r
-\r
- ext->pattern = 0;\r
- ext->pattern_end = 0;\r
-#ifdef USE_CALLOUT\r
- ext->tag_table = 0;\r
- ext->callout_num = 0;\r
- ext->callout_list_alloc = 0;\r
- ext->callout_list = 0;\r
-#endif\r
-\r
- reg->extp = ext;\r
- }\r
-\r
- return reg->extp;\r
-}\r
-\r
-static void\r
-free_regex_ext(RegexExt* ext)\r
-{\r
- if (IS_NOT_NULL(ext)) {\r
- if (IS_NOT_NULL(ext->pattern))\r
- xfree((void* )ext->pattern);\r
-\r
-#ifdef USE_CALLOUT\r
- if (IS_NOT_NULL(ext->tag_table))\r
- onig_callout_tag_table_free(ext->tag_table);\r
-\r
- if (IS_NOT_NULL(ext->callout_list))\r
- onig_free_reg_callout_list(ext->callout_num, ext->callout_list);\r
-#endif\r
-\r
- xfree(ext);\r
- }\r
-}\r
-\r
-extern int\r
-onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)\r
-{\r
- RegexExt* ext;\r
- UChar* s;\r
-\r
- ext = onig_get_regex_ext(reg);\r
- CHECK_NULL_RETURN_MEMERR(ext);\r
-\r
- s = onigenc_strdup(reg->enc, pattern, pattern_end);\r
- CHECK_NULL_RETURN_MEMERR(s);\r
-\r
- ext->pattern = s;\r
- ext->pattern_end = s + (pattern_end - pattern);\r
-\r
- return ONIG_NORMAL;\r
-}\r
-\r
-extern void\r
-onig_free_body(regex_t* reg)\r
-{\r
- if (IS_NOT_NULL(reg)) {\r
- ops_free(reg);\r
- if (IS_NOT_NULL(reg->string_pool)) {\r
- xfree(reg->string_pool);\r
- reg->string_pool_end = reg->string_pool = 0;\r
- }\r
- if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);\r
- if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);\r
- if (IS_NOT_NULL(reg->extp)) {\r
- free_regex_ext(reg->extp);\r
- reg->extp = 0;\r
- }\r
-\r
- onig_names_free(reg);\r
- }\r
-}\r
-\r
-extern void\r
-onig_free(regex_t* reg)\r
-{\r
- if (IS_NOT_NULL(reg)) {\r
- onig_free_body(reg);\r
- xfree(reg);\r
- }\r
-}\r
-\r
-\r
-#ifdef ONIG_DEBUG_PARSE\r
-static void print_tree P_((FILE* f, Node* node));\r
-#endif\r
-\r
-extern int onig_init_for_match_at(regex_t* reg);\r
-\r
-extern int\r
-onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
- OnigErrorInfo* einfo)\r
-{\r
- int r;\r
- Node* root;\r
- ScanEnv scan_env;\r
-#ifdef USE_CALL\r
- UnsetAddrList uslist;\r
-#endif\r
-\r
- root = 0;\r
- if (IS_NOT_NULL(einfo)) {\r
- einfo->enc = reg->enc;\r
- einfo->par = (UChar* )NULL;\r
- }\r
-\r
-#ifdef ONIG_DEBUG\r
- print_enc_string(stderr, reg->enc, pattern, pattern_end);\r
-#endif\r
-\r
- if (reg->ops_alloc == 0) {\r
- r = ops_init(reg, OPS_INIT_SIZE);\r
- if (r != 0) goto end;\r
- }\r
- else\r
- reg->ops_used = 0;\r
-\r
- reg->string_pool = 0;\r
- reg->string_pool_end = 0;\r
- reg->num_mem = 0;\r
- reg->num_repeat = 0;\r
- reg->num_null_check = 0;\r
- reg->repeat_range_alloc = 0;\r
- reg->repeat_range = (OnigRepeatRange* )NULL;\r
-\r
- r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);\r
- if (r != 0) goto err;\r
-\r
- /* mixed use named group and no-named group */\r
- if (scan_env.num_named > 0 &&\r
- IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
- ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
- if (scan_env.num_named != scan_env.num_mem)\r
- r = disable_noname_group_capture(&root, reg, &scan_env);\r
- else\r
- r = numbered_ref_check(root);\r
-\r
- if (r != 0) goto err;\r
- }\r
-\r
- r = check_backrefs(root, &scan_env);\r
- if (r != 0) goto err;\r
-\r
-#ifdef USE_CALL\r
- if (scan_env.num_call > 0) {\r
- r = unset_addr_list_init(&uslist, scan_env.num_call);\r
- if (r != 0) goto err;\r
- scan_env.unset_addr_list = &uslist;\r
- r = setup_call(root, &scan_env, 0);\r
- if (r != 0) goto err_unset;\r
- r = setup_call2(root);\r
- if (r != 0) goto err_unset;\r
- r = recursive_call_check_trav(root, &scan_env, 0);\r
- if (r < 0) goto err_unset;\r
- r = infinite_recursive_call_check_trav(root, &scan_env);\r
- if (r != 0) goto err_unset;\r
-\r
- setup_called_state(root, 0);\r
- }\r
-\r
- reg->num_call = scan_env.num_call;\r
-#endif\r
-\r
- r = setup_tree(root, reg, 0, &scan_env);\r
- if (r != 0) goto err_unset;\r
-\r
-#ifdef ONIG_DEBUG_PARSE\r
- print_tree(stderr, root);\r
-#endif\r
-\r
- reg->capture_history = scan_env.capture_history;\r
- reg->bt_mem_start = scan_env.bt_mem_start;\r
- reg->bt_mem_start |= reg->capture_history;\r
- if (IS_FIND_CONDITION(reg->options))\r
- MEM_STATUS_ON_ALL(reg->bt_mem_end);\r
- else {\r
- reg->bt_mem_end = scan_env.bt_mem_end;\r
- reg->bt_mem_end |= reg->capture_history;\r
- }\r
- reg->bt_mem_start |= reg->bt_mem_end;\r
-\r
- clear_optimize_info(reg);\r
-#ifndef ONIG_DONT_OPTIMIZE\r
- r = set_optimize_info_from_tree(root, reg, &scan_env);\r
- if (r != 0) goto err_unset;\r
-#endif\r
-\r
- if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {\r
- xfree(scan_env.mem_env_dynamic);\r
- scan_env.mem_env_dynamic = (MemEnv* )NULL;\r
- }\r
-\r
- r = compile_tree(root, reg, &scan_env);\r
- if (r == 0) {\r
- if (scan_env.keep_num > 0) {\r
- r = add_op(reg, OP_UPDATE_VAR);\r
- if (r != 0) goto err;\r
-\r
- COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;\r
- COP(reg)->update_var.id = 0; /* not used */\r
- }\r
-\r
- r = add_op(reg, OP_END);\r
- if (r != 0) goto err;\r
-\r
-#ifdef USE_CALL\r
- if (scan_env.num_call > 0) {\r
- r = fix_unset_addr_list(&uslist, reg);\r
- unset_addr_list_end(&uslist);\r
- if (r != 0) goto err;\r
- }\r
-#endif\r
-\r
- if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)\r
-#ifdef USE_CALLOUT\r
- || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)\r
-#endif\r
- )\r
- reg->stack_pop_level = STACK_POP_LEVEL_ALL;\r
- else {\r
- if (reg->bt_mem_start != 0)\r
- reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;\r
- else\r
- reg->stack_pop_level = STACK_POP_LEVEL_FREE;\r
- }\r
-\r
- r = ops_make_string_pool(reg);\r
- if (r != 0) goto err;\r
- }\r
-#ifdef USE_CALL\r
- else if (scan_env.num_call > 0) {\r
- unset_addr_list_end(&uslist);\r
- }\r
-#endif\r
- onig_node_free(root);\r
-\r
-#ifdef ONIG_DEBUG_COMPILE\r
- onig_print_names(stderr, reg);\r
- onig_print_compiled_byte_code_list(stderr, reg);\r
-#endif\r
-\r
-#ifdef USE_DIRECT_THREADED_CODE\r
- /* opcode -> opaddr */\r
- onig_init_for_match_at(reg);\r
-#endif\r
-\r
- end:\r
- return r;\r
-\r
- err_unset:\r
-#ifdef USE_CALL\r
- if (scan_env.num_call > 0) {\r
- unset_addr_list_end(&uslist);\r
- }\r
-#endif\r
- err:\r
- if (IS_NOT_NULL(scan_env.error)) {\r
- if (IS_NOT_NULL(einfo)) {\r
- einfo->par = scan_env.error;\r
- einfo->par_end = scan_env.error_end;\r
- }\r
- }\r
-\r
- onig_node_free(root);\r
- if (IS_NOT_NULL(scan_env.mem_env_dynamic))\r
- xfree(scan_env.mem_env_dynamic);\r
- return r;\r
-}\r
-\r
-\r
-static int onig_inited = 0;\r
-\r
-extern int\r
-onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,\r
- OnigEncoding enc, OnigSyntaxType* syntax)\r
-{\r
- int r;\r
-\r
- xmemset(reg, 0, sizeof(*reg));\r
-\r
- if (onig_inited == 0) {\r
-#if 0\r
- return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;\r
-#else\r
- r = onig_initialize(&enc, 1);\r
- if (r != 0)\r
- return ONIGERR_FAIL_TO_INITIALIZE;\r
-\r
- onig_warning("You didn't call onig_initialize() explicitly");\r
-#endif\r
- }\r
-\r
- if (IS_NULL(reg))\r
- return ONIGERR_INVALID_ARGUMENT;\r
-\r
- if (ONIGENC_IS_UNDEF(enc))\r
- return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;\r
-\r
- if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))\r
- == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {\r
- return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;\r
- }\r
-\r
- if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {\r
- option |= syntax->options;\r
- option &= ~ONIG_OPTION_SINGLELINE;\r
- }\r
- else\r
- option |= syntax->options;\r
-\r
- (reg)->enc = enc;\r
- (reg)->options = option;\r
- (reg)->syntax = syntax;\r
- (reg)->optimize = 0;\r
- (reg)->exact = (UChar* )NULL;\r
- (reg)->extp = (RegexExt* )NULL;\r
-\r
- (reg)->ops = (Operation* )NULL;\r
- (reg)->ops_curr = (Operation* )NULL;\r
- (reg)->ops_used = 0;\r
- (reg)->ops_alloc = 0;\r
- (reg)->name_table = (void* )NULL;\r
-\r
- (reg)->case_fold_flag = case_fold_flag;\r
- return 0;\r
-}\r
-\r
-extern int\r
-onig_new_without_alloc(regex_t* reg,\r
- const UChar* pattern, const UChar* pattern_end,\r
- OnigOptionType option, OnigEncoding enc,\r
- OnigSyntaxType* syntax, OnigErrorInfo* einfo)\r
-{\r
- int r;\r
-\r
- r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
- if (r != 0) return r;\r
-\r
- r = onig_compile(reg, pattern, pattern_end, einfo);\r
- return r;\r
-}\r
-\r
-extern int\r
-onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,\r
- OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,\r
- OnigErrorInfo* einfo)\r
-{\r
- int r;\r
-\r
- *reg = (regex_t* )xmalloc(sizeof(regex_t));\r
- if (IS_NULL(*reg)) return ONIGERR_MEMORY;\r
-\r
- r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);\r
- if (r != 0) goto err;\r
-\r
- r = onig_compile(*reg, pattern, pattern_end, einfo);\r
- if (r != 0) {\r
- err:\r
- onig_free(*reg);\r
- *reg = NULL;\r
- }\r
- return r;\r
-}\r
-\r
-extern int\r
-onig_initialize(OnigEncoding encodings[], int n)\r
-{\r
- int i;\r
- int r;\r
-\r
- if (onig_inited != 0)\r
- return 0;\r
-\r
- onigenc_init();\r
-\r
- onig_inited = 1;\r
-\r
- for (i = 0; i < n; i++) {\r
- OnigEncoding enc = encodings[i];\r
- r = onig_initialize_encoding(enc);\r
- if (r != 0)\r
- return r;\r
- }\r
-\r
- return ONIG_NORMAL;\r
-}\r
-\r
-typedef struct EndCallListItem {\r
- struct EndCallListItem* next;\r
- void (*func)(void);\r
-} EndCallListItemType;\r
-\r
-static EndCallListItemType* EndCallTop;\r
-\r
-extern void onig_add_end_call(void (*func)(void))\r
-{\r
- EndCallListItemType* item;\r
-\r
- item = (EndCallListItemType* )xmalloc(sizeof(*item));\r
- if (item == 0) return ;\r
-\r
- item->next = EndCallTop;\r
- item->func = func;\r
-\r
- EndCallTop = item;\r
-}\r
-\r
-static void\r
-exec_end_call_list(void)\r
-{\r
- EndCallListItemType* prev;\r
- void (*func)(void);\r
-\r
- while (EndCallTop != 0) {\r
- func = EndCallTop->func;\r
- (*func)();\r
-\r
- prev = EndCallTop;\r
- EndCallTop = EndCallTop->next;\r
- xfree(prev);\r
- }\r
-}\r
-\r
-extern int\r
-onig_end(void)\r
-{\r
- exec_end_call_list();\r
-\r
-#ifdef USE_CALLOUT\r
- onig_global_callout_names_free();\r
-#endif\r
-\r
- onigenc_end();\r
-\r
- onig_inited = 0;\r
-\r
- return 0;\r
-}\r
-\r
-extern int\r
-onig_is_in_code_range(const UChar* p, OnigCodePoint code)\r
-{\r
- OnigCodePoint n, *data;\r
- OnigCodePoint low, high, x;\r
-\r
- GET_CODE_POINT(n, p);\r
- data = (OnigCodePoint* )p;\r
- data++;\r
-\r
- for (low = 0, high = n; low < high; ) {\r
- x = (low + high) >> 1;\r
- if (code > data[x * 2 + 1])\r
- low = x + 1;\r
- else\r
- high = x;\r
- }\r
-\r
- return ((low < n && code >= data[low * 2]) ? 1 : 0);\r
-}\r
-\r
-extern int\r
-onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)\r
-{\r
- int found;\r
- CClassNode* cc = (CClassNode* )cc_arg;\r
-\r
- if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {\r
- if (IS_NULL(cc->mbuf)) {\r
- found = 0;\r
- }\r
- else {\r
- found = onig_is_in_code_range(cc->mbuf->p, code) != 0;\r
- }\r
- }\r
- else {\r
- found = BITSET_AT(cc->bs, code) != 0;\r
- }\r
-\r
- if (IS_NCCLASS_NOT(cc))\r
- return !found;\r
- else\r
- return found;\r
-}\r
-\r
-extern int\r
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)\r
-{\r
- int len;\r
-\r
- if (ONIGENC_MBC_MINLEN(enc) > 1) {\r
- len = 2;\r
- }\r
- else {\r
- len = ONIGENC_CODE_TO_MBCLEN(enc, code);\r
- if (len < 0) return 0;\r
- }\r
- return onig_is_code_in_cc_len(len, code, cc);\r
-}\r
-\r
-\r
-#ifdef ONIG_DEBUG_PARSE\r
-\r
-static void\r
-p_string(FILE* f, int len, UChar* s)\r
-{\r
- fputs(":", f);\r
- while (len-- > 0) { fputc(*s++, f); }\r
-}\r
-\r
-static void\r
-Indent(FILE* f, int indent)\r
-{\r
- int i;\r
- for (i = 0; i < indent; i++) putc(' ', f);\r
-}\r
-\r
-static void\r
-print_indent_tree(FILE* f, Node* node, int indent)\r
-{\r
- int i;\r
- NodeType type;\r
- UChar* p;\r
- int add = 3;\r
-\r
- Indent(f, indent);\r
- if (IS_NULL(node)) {\r
- fprintf(f, "ERROR: null node!!!\n");\r
- exit (0);\r
- }\r
-\r
- type = NODE_TYPE(node);\r
- switch (type) {\r
- case NODE_LIST:\r
- case NODE_ALT:\r
- if (type == NODE_LIST)\r
- fprintf(f, "<list:%p>\n", node);\r
- else\r
- fprintf(f, "<alt:%p>\n", node);\r
-\r
- print_indent_tree(f, NODE_CAR(node), indent + add);\r
- while (IS_NOT_NULL(node = NODE_CDR(node))) {\r
- if (NODE_TYPE(node) != type) {\r
- fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));\r
- exit(0);\r
- }\r
- print_indent_tree(f, NODE_CAR(node), indent + add);\r
- }\r
- break;\r
-\r
- case NODE_STRING:\r
- {\r
- char* mode;\r
- char* dont;\r
- char* good;\r
-\r
- if (NODE_STRING_IS_RAW(node))\r
- mode = "-raw";\r
- else if (NODE_STRING_IS_AMBIG(node))\r
- mode = "-ambig";\r
- else\r
- mode = "";\r
-\r
- if (NODE_STRING_IS_GOOD_AMBIG(node))\r
- good = "-good";\r
- else\r
- good = "";\r
-\r
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))\r
- dont = " (dont-opt)";\r
- else\r
- dont = "";\r
-\r
- fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);\r
- for (p = STR_(node)->s; p < STR_(node)->end; p++) {\r
- if (*p >= 0x20 && *p < 0x7f)\r
- fputc(*p, f);\r
- else {\r
- fprintf(f, " 0x%02x", *p);\r
- }\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CCLASS:\r
- fprintf(f, "<cclass:%p>", node);\r
- if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);\r
- if (CCLASS_(node)->mbuf) {\r
- BBuf* bbuf = CCLASS_(node)->mbuf;\r
- for (i = 0; i < bbuf->used; i++) {\r
- if (i > 0) fprintf(f, ",");\r
- fprintf(f, "%0x", bbuf->p[i]);\r
- }\r
- }\r
- break;\r
-\r
- case NODE_CTYPE:\r
- fprintf(f, "<ctype:%p> ", node);\r
- switch (CTYPE_(node)->ctype) {\r
- case CTYPE_ANYCHAR:\r
- fprintf(f, "<anychar:%p>", node);\r
- break;\r
-\r
- case ONIGENC_CTYPE_WORD:\r
- if (CTYPE_(node)->not != 0)\r
- fputs("not word", f);\r
- else\r
- fputs("word", f);\r
-\r
- if (CTYPE_(node)->ascii_mode != 0)\r
- fputs(" (ascii)", f);\r
-\r
- break;\r
-\r
- default:\r
- fprintf(f, "ERROR: undefined ctype.\n");\r
- exit(0);\r
- }\r
- break;\r
-\r
- case NODE_ANCHOR:\r
- fprintf(f, "<anchor:%p> ", node);\r
- switch (ANCHOR_(node)->type) {\r
- case ANCR_BEGIN_BUF: fputs("begin buf", f); break;\r
- case ANCR_END_BUF: fputs("end buf", f); break;\r
- case ANCR_BEGIN_LINE: fputs("begin line", f); break;\r
- case ANCR_END_LINE: fputs("end line", f); break;\r
- case ANCR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
- case ANCR_BEGIN_POSITION: fputs("begin position", f); break;\r
-\r
- case ANCR_WORD_BOUNDARY: fputs("word boundary", f); break;\r
- case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;\r
-#ifdef USE_WORD_BEGIN_END\r
- case ANCR_WORD_BEGIN: fputs("word begin", f); break;\r
- case ANCR_WORD_END: fputs("word end", f); break;\r
-#endif\r
- case ANCR_TEXT_SEGMENT_BOUNDARY:\r
- fputs("text-segment boundary", f); break;\r
- case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
- fputs("no text-segment boundary", f); break;\r
- case ANCR_PREC_READ:\r
- fprintf(f, "prec read\n");\r
- print_indent_tree(f, NODE_BODY(node), indent + add);\r
- break;\r
- case ANCR_PREC_READ_NOT:\r
- fprintf(f, "prec read not\n");\r
- print_indent_tree(f, NODE_BODY(node), indent + add);\r
- break;\r
- case ANCR_LOOK_BEHIND:\r
- fprintf(f, "look behind\n");\r
- print_indent_tree(f, NODE_BODY(node), indent + add);\r
- break;\r
- case ANCR_LOOK_BEHIND_NOT:\r
- fprintf(f, "look behind not\n");\r
- print_indent_tree(f, NODE_BODY(node), indent + add);\r
- break;\r
-\r
- default:\r
- fprintf(f, "ERROR: undefined anchor type.\n");\r
- break;\r
- }\r
- break;\r
-\r
- case NODE_BACKREF:\r
- {\r
- int* p;\r
- BackRefNode* br = BACKREF_(node);\r
- p = BACKREFS_P(br);\r
- fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);\r
- for (i = 0; i < br->back_num; i++) {\r
- if (i > 0) fputs(", ", f);\r
- fprintf(f, "%d", p[i]);\r
- }\r
- }\r
- break;\r
-\r
-#ifdef USE_CALL\r
- case NODE_CALL:\r
- {\r
- CallNode* cn = CALL_(node);\r
- fprintf(f, "<call:%p>", node);\r
- p_string(f, cn->name_end - cn->name, cn->name);\r
- }\r
- break;\r
-#endif\r
-\r
- case NODE_QUANT:\r
- fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,\r
- QUANT_(node)->lower, QUANT_(node)->upper,\r
- (QUANT_(node)->greedy ? "" : "?"));\r
- print_indent_tree(f, NODE_BODY(node), indent + add);\r
- break;\r
-\r
- case NODE_BAG:\r
- fprintf(f, "<bag:%p> ", node);\r
- switch (BAG_(node)->type) {\r
- case BAG_OPTION:\r
- fprintf(f, "option:%d", BAG_(node)->o.options);\r
- break;\r
- case BAG_MEMORY:\r
- fprintf(f, "memory:%d", BAG_(node)->m.regnum);\r
- break;\r
- case BAG_STOP_BACKTRACK:\r
- fprintf(f, "stop-bt");\r
- break;\r
- case BAG_IF_ELSE:\r
- fprintf(f, "if-else");\r
- break;\r
- }\r
- fprintf(f, "\n");\r
- print_indent_tree(f, NODE_BODY(node), indent + add);\r
- break;\r
-\r
- case NODE_GIMMICK:\r
- fprintf(f, "<gimmick:%p> ", node);\r
- switch (GIMMICK_(node)->type) {\r
- case GIMMICK_FAIL:\r
- fprintf(f, "fail");\r
- break;\r
- case GIMMICK_SAVE:\r
- fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
- break;\r
- case GIMMICK_UPDATE_VAR:\r
- fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
- break;\r
-#ifdef USE_CALLOUT\r
- case GIMMICK_CALLOUT:\r
- switch (GIMMICK_(node)->detail_type) {\r
- case ONIG_CALLOUT_OF_CONTENTS:\r
- fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);\r
- break;\r
- case ONIG_CALLOUT_OF_NAME:\r
- fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);\r
- break;\r
- }\r
-#endif\r
- }\r
- break;\r
-\r
- default:\r
- fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));\r
- break;\r
- }\r
-\r
- if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&\r
- type != NODE_BAG)\r
- fprintf(f, "\n");\r
- fflush(f);\r
-}\r
-\r
-static void\r
-print_tree(FILE* f, Node* node)\r
-{\r
- print_indent_tree(f, node, 0);\r
-}\r
-#endif\r