regcomp.c - Oniguruma (regular expression library)\r
**********************************************************************/\r
/*-\r
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
* All rights reserved.\r
*\r
* Redistribution and use in source and binary forms, with or without\r
\r
#include "regparse.h"\r
\r
+#define OPS_INIT_SIZE 8\r
+\r
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;\r
\r
#if 0\r
{\r
if (s->n >= s->alloc) {\r
int new_size = s->alloc * 2;\r
- int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size);\r
+ int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size, sizeof(int) * s->alloc);\r
if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
\r
s->alloc = new_size;\r
}\r
#endif\r
\r
+static int\r
+ops_init(regex_t* reg, int init_alloc_size)\r
+{\r
+ Operation* p;\r
+ size_t size;\r
+\r
+ if (init_alloc_size > 0) {\r
+ size = sizeof(Operation) * init_alloc_size;\r
+ p = (Operation* )xmalloc(size);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ {\r
+ enum OpCode* cp;\r
+ size = sizeof(enum OpCode) * init_alloc_size;\r
+ cp = (enum OpCode* )xmalloc(size);\r
+ CHECK_NULL_RETURN_MEMERR(cp);\r
+ reg->ocs = cp;\r
+ }\r
+#endif\r
+ }\r
+ else {\r
+ p = (Operation* )0;\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ reg->ocs = (enum OpCode* )0;\r
+#endif\r
+ }\r
+\r
+ reg->ops = p;\r
+ reg->ops_curr = 0; /* !!! not yet done ops_new() */\r
+ reg->ops_alloc = init_alloc_size;\r
+ reg->ops_used = 0;\r
+\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+ops_expand(regex_t* reg, int n)\r
+{\r
+#define MIN_OPS_EXPAND_SIZE 4\r
+\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ enum OpCode* cp;\r
+#endif\r
+ Operation* p;\r
+ size_t size;\r
+\r
+ if (n <= 0) n = MIN_OPS_EXPAND_SIZE;\r
+\r
+ n += reg->ops_alloc;\r
+\r
+ size = sizeof(Operation) * n;\r
+ p = (Operation* )xrealloc(reg->ops, size, sizeof(Operation) * reg->ops_alloc);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ size = sizeof(enum OpCode) * n;\r
+ cp = (enum OpCode* )xrealloc(reg->ocs, size, sizeof(enum OpCode) * reg->ops_alloc);\r
+ CHECK_NULL_RETURN_MEMERR(cp);\r
+ reg->ocs = cp;\r
+#endif\r
+\r
+ reg->ops = p;\r
+ reg->ops_alloc = n;\r
+ if (reg->ops_used == 0)\r
+ reg->ops_curr = 0;\r
+ else\r
+ reg->ops_curr = reg->ops + (reg->ops_used - 1);\r
+\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+ops_new(regex_t* reg)\r
+{\r
+ int r;\r
+\r
+ if (reg->ops_used >= reg->ops_alloc) {\r
+ r = ops_expand(reg, reg->ops_alloc);\r
+ if (r != ONIG_NORMAL) return r;\r
+ }\r
+\r
+ reg->ops_curr = reg->ops + reg->ops_used;\r
+ reg->ops_used++;\r
+\r
+ xmemset(reg->ops_curr, 0, sizeof(Operation));\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+is_in_string_pool(regex_t* reg, UChar* s)\r
+{\r
+ return (s >= reg->string_pool && s < reg->string_pool_end);\r
+}\r
+\r
+static void\r
+ops_free(regex_t* reg)\r
+{\r
+ int i;\r
+\r
+ if (IS_NULL(reg->ops)) return ;\r
+\r
+ for (i = 0; i < (int )reg->ops_used; i++) {\r
+ enum OpCode opcode;\r
+ Operation* op;\r
+\r
+ op = reg->ops + i;\r
+\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ opcode = *(reg->ocs + i);\r
+#else\r
+ opcode = op->opcode;\r
+#endif\r
+\r
+ switch (opcode) {\r
+ case OP_EXACTMBN:\r
+ if (! is_in_string_pool(reg, op->exact_len_n.s))\r
+ xfree(op->exact_len_n.s);\r
+ break;\r
+ case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:\r
+ if (! is_in_string_pool(reg, op->exact_n.s))\r
+ xfree(op->exact_n.s);\r
+ break;\r
+ case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:\r
+ case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:\r
+ case OP_EXACTMB2N3: case OP_EXACT1_IC:\r
+ break;\r
+\r
+ case OP_CCLASS_NOT: case OP_CCLASS:\r
+ xfree(op->cclass.bsp);\r
+ break;\r
+\r
+ case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:\r
+ xfree(op->cclass_mb.mb);\r
+ break;\r
+ case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:\r
+ xfree(op->cclass_mix.mb);\r
+ xfree(op->cclass_mix.bsp);\r
+ break;\r
+\r
+ case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:\r
+ break;\r
+ case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC:\r
+ case OP_BACKREF_WITH_LEVEL:\r
+ case OP_BACKREF_WITH_LEVEL_IC:\r
+ case OP_BACKREF_CHECK:\r
+ case OP_BACKREF_CHECK_WITH_LEVEL:\r
+ if (op->backref_general.num != 1)\r
+ xfree(op->backref_general.ns);\r
+ break;\r
+\r
+ default:\r
+ break;\r
+ }\r
+ }\r
+\r
+ xfree(reg->ops);\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ xfree(reg->ocs);\r
+ reg->ocs = 0;\r
+#endif\r
+\r
+ reg->ops = 0;\r
+ reg->ops_curr = 0;\r
+ reg->ops_alloc = 0;\r
+ reg->ops_used = 0;\r
+}\r
+\r
+static int\r
+ops_calc_size_of_string_pool(regex_t* reg)\r
+{\r
+ int i;\r
+ int total;\r
+\r
+ if (IS_NULL(reg->ops)) return 0;\r
+\r
+ total = 0;\r
+ for (i = 0; i < (int )reg->ops_used; i++) {\r
+ enum OpCode opcode;\r
+ Operation* op;\r
+\r
+ op = reg->ops + i;\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ opcode = *(reg->ocs + i);\r
+#else\r
+ opcode = op->opcode;\r
+#endif\r
+\r
+ switch (opcode) {\r
+ case OP_EXACTMBN:\r
+ total += op->exact_len_n.len * op->exact_len_n.n;\r
+ break;\r
+ case OP_EXACTN:\r
+ case OP_EXACTN_IC:\r
+ total += op->exact_n.n;\r
+ break;\r
+ case OP_EXACTMB2N:\r
+ total += op->exact_n.n * 2;\r
+ break;\r
+ case OP_EXACTMB3N:\r
+ total += op->exact_n.n * 3;\r
+ break;\r
+\r
+ default:\r
+ break;\r
+ }\r
+ }\r
+\r
+ return total;\r
+}\r
+\r
+static int\r
+ops_make_string_pool(regex_t* reg)\r
+{\r
+ int i;\r
+ int len;\r
+ int size;\r
+ UChar* pool;\r
+ UChar* curr;\r
+\r
+ size = ops_calc_size_of_string_pool(reg);\r
+ if (size <= 0) {\r
+ return 0;\r
+ }\r
+\r
+ curr = pool = (UChar* )xmalloc((size_t )size);\r
+ CHECK_NULL_RETURN_MEMERR(pool);\r
+\r
+ for (i = 0; i < (int )reg->ops_used; i++) {\r
+ enum OpCode opcode;\r
+ Operation* op;\r
+\r
+ op = reg->ops + i;\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ opcode = *(reg->ocs + i);\r
+#else\r
+ opcode = op->opcode;\r
+#endif\r
+\r
+ switch (opcode) {\r
+ case OP_EXACTMBN:\r
+ len = op->exact_len_n.len * op->exact_len_n.n;\r
+ xmemcpy(curr, op->exact_len_n.s, len);\r
+ xfree(op->exact_len_n.s);\r
+ op->exact_len_n.s = curr;\r
+ curr += len;\r
+ break;\r
+ case OP_EXACTN:\r
+ case OP_EXACTN_IC:\r
+ len = op->exact_n.n;\r
+ copy:\r
+ xmemcpy(curr, op->exact_n.s, len);\r
+ xfree(op->exact_n.s);\r
+ op->exact_n.s = curr;\r
+ curr += len;\r
+ break;\r
+ case OP_EXACTMB2N:\r
+ len = op->exact_n.n * 2;\r
+ goto copy;\r
+ break;\r
+ case OP_EXACTMB3N:\r
+ len = op->exact_n.n * 3;\r
+ goto copy;\r
+ break;\r
+\r
+ default:\r
+ break;\r
+ }\r
+ }\r
+\r
+ reg->string_pool = pool;\r
+ reg->string_pool_end = pool + size;\r
+ return 0;\r
+}\r
+\r
extern OnigCaseFoldType\r
onig_get_default_case_fold_flag(void)\r
{\r
return 1;\r
}\r
\r
+extern int\r
+onig_positive_int_multiply(int x, int y)\r
+{\r
+ if (x == 0 || y == 0) return 0;\r
+\r
+ if (x < INT_MAX / y)\r
+ return x * y;\r
+ else\r
+ return -1;\r
+}\r
\r
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS\r
-static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];\r
-#endif\r
\r
static void\r
swap_node(Node* a, Node* b)\r
\r
if (NODE_TYPE(a) == NODE_STRING) {\r
StrNode* sn = STR_(a);\r
- if (sn->capa == 0) {\r
+ if (sn->capacity == 0) {\r
int len = (int )(sn->end - sn->s);\r
sn->s = sn->buf;\r
sn->end = sn->s + len;\r
\r
if (NODE_TYPE(b) == NODE_STRING) {\r
StrNode* sn = STR_(b);\r
- if (sn->capa == 0) {\r
+ if (sn->capacity == 0) {\r
int len = (int )(sn->end - sn->s);\r
sn->s = sn->buf;\r
sn->end = sn->s + len;\r
return 1;\r
}\r
\r
-extern int\r
-onig_bbuf_init(BBuf* buf, int size)\r
-{\r
- if (size <= 0) {\r
- size = 0;\r
- buf->p = NULL;\r
- }\r
- else {\r
- buf->p = (UChar* )xmalloc(size);\r
- if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);\r
- }\r
-\r
- buf->alloc = size;\r
- buf->used = 0;\r
- return 0;\r
-}\r
-\r
-\r
#ifdef USE_CALL\r
\r
static int\r
\r
\r
static int\r
-add_opcode(regex_t* reg, int opcode)\r
-{\r
- BB_ADD1(reg, opcode);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_rel_addr(regex_t* reg, int addr)\r
-{\r
- RelAddrType ra = (RelAddrType )addr;\r
-\r
- BB_ADD(reg, &ra, SIZE_RELADDR);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_abs_addr(regex_t* reg, int addr)\r
-{\r
- AbsAddrType ra = (AbsAddrType )addr;\r
-\r
- BB_ADD(reg, &ra, SIZE_ABSADDR);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_length(regex_t* reg, int len)\r
-{\r
- LengthType l = (LengthType )len;\r
-\r
- BB_ADD(reg, &l, SIZE_LENGTH);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_mem_num(regex_t* reg, int num)\r
-{\r
- MemNumType n = (MemNumType )num;\r
-\r
- BB_ADD(reg, &n, SIZE_MEMNUM);\r
- return 0;\r
-}\r
-\r
-#if 0\r
-static int\r
-add_pointer(regex_t* reg, void* addr)\r
-{\r
- PointerType ptr = (PointerType )addr;\r
-\r
- BB_ADD(reg, &ptr, SIZE_POINTER);\r
- return 0;\r
-}\r
-#endif\r
-\r
-static int\r
-add_option(regex_t* reg, OnigOptionType option)\r
-{\r
- BB_ADD(reg, &option, SIZE_OPTION);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_save_type(regex_t* reg, enum SaveType type)\r
-{\r
- SaveType t = (SaveType )type;\r
-\r
- BB_ADD(reg, &t, SIZE_SAVE_TYPE);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_update_var_type(regex_t* reg, enum UpdateVarType type)\r
-{\r
- UpdateVarType t = (UpdateVarType )type;\r
-\r
- BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_mode(regex_t* reg, ModeType mode)\r
-{\r
- BB_ADD(reg, &mode, SIZE_MODE);\r
- return 0;\r
-}\r
-\r
-static int\r
-add_opcode_rel_addr(regex_t* reg, int opcode, int addr)\r
+add_op(regex_t* reg, int opcode)\r
{\r
int r;\r
\r
- r = add_opcode(reg, opcode);\r
- if (r != 0) return r;\r
- r = add_rel_addr(reg, addr);\r
- return r;\r
-}\r
+ r = ops_new(reg);\r
+ if (r != ONIG_NORMAL) return r;\r
\r
-static int\r
-add_bytes(regex_t* reg, UChar* bytes, int len)\r
-{\r
- BB_ADD(reg, bytes, len);\r
- return 0;\r
-}\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;\r
+#else\r
+ reg->ops_curr->opcode = opcode;\r
+#endif\r
\r
-static int\r
-add_bitset(regex_t* reg, BitSetRef bs)\r
-{\r
- BB_ADD(reg, bs, SIZE_BITSET);\r
return 0;\r
}\r
\r
}\r
\r
static int\r
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)\r
+is_strict_real_node(Node* node)\r
+{\r
+ switch (NODE_TYPE(node)) {\r
+ case NODE_STRING:\r
+ {\r
+ StrNode* sn = STR_(node);\r
+ return (sn->end != sn->s);\r
+ }\r
+ break;\r
+\r
+ case NODE_CCLASS:\r
+ case NODE_CTYPE:\r
+ return 1;\r
+ break;\r
+\r
+ default:\r
+ return 0;\r
+ break;\r
+ }\r
+}\r
+\r
+static int\r
+compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env)\r
{\r
int r;\r
int saved_num_null_check = reg->num_null_check;\r
\r
- if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {\r
- r = add_opcode(reg, OP_EMPTY_CHECK_START);\r
- if (r != 0) return r;\r
- r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */\r
+ if (emptiness != BODY_IS_NOT_EMPTY) {\r
+ r = add_op(reg, OP_EMPTY_CHECK_START);\r
if (r != 0) return r;\r
+ COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */\r
reg->num_null_check++;\r
}\r
\r
r = compile_tree(node, reg, env);\r
if (r != 0) return r;\r
\r
- if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {\r
- if (empty_info == QUANT_BODY_IS_EMPTY)\r
- r = add_opcode(reg, OP_EMPTY_CHECK_END);\r
- else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)\r
- r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);\r
- else if (empty_info == QUANT_BODY_IS_EMPTY_REC)\r
- r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);\r
+ if (emptiness != BODY_IS_NOT_EMPTY) {\r
+ if (emptiness == BODY_IS_EMPTY_POSSIBILITY)\r
+ r = add_op(reg, OP_EMPTY_CHECK_END);\r
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM)\r
+ r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);\r
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)\r
+ r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);\r
\r
if (r != 0) return r;\r
- r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */\r
+ COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */\r
}\r
return r;\r
}\r
compile_call(CallNode* node, regex_t* reg, ScanEnv* env)\r
{\r
int r;\r
+ int offset;\r
\r
- r = add_opcode(reg, OP_CALL);\r
+ r = add_op(reg, OP_CALL);\r
if (r != 0) return r;\r
- r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),\r
- NODE_CALL_BODY(node));\r
- if (r != 0) return r;\r
- r = add_abs_addr(reg, 0 /*dummy addr.*/);\r
+\r
+ COP(reg)->call.addr = 0; /* dummy addr. */\r
+\r
+ offset = COP_CURR_OFFSET_BYTES(reg, call.addr);\r
+ r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node));\r
return r;\r
}\r
#endif\r
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,\r
regex_t* reg ARG_UNUSED, int ignore_case)\r
{\r
- int len;\r
- int op = select_str_opcode(mb_len, str_len, ignore_case);\r
-\r
- len = SIZE_OPCODE;\r
-\r
- if (op == OP_EXACTMBN) len += SIZE_LENGTH;\r
- if (IS_NEED_STR_LEN_OP_EXACT(op))\r
- len += SIZE_LENGTH;\r
-\r
- len += mb_len * str_len;\r
- return len;\r
+ return 1;\r
}\r
\r
static int\r
add_compile_string(UChar* s, int mb_len, int str_len,\r
regex_t* reg, int ignore_case)\r
{\r
- int op = select_str_opcode(mb_len, str_len, ignore_case);\r
- add_opcode(reg, op);\r
+ int op;\r
+ int r;\r
+ int byte_len;\r
+ UChar* p;\r
+ UChar* end;\r
+\r
+ op = select_str_opcode(mb_len, str_len, ignore_case);\r
+ r = add_op(reg, op);\r
+ if (r != 0) return r;\r
+\r
+ byte_len = mb_len * str_len;\r
+ end = s + byte_len;\r
\r
- if (op == OP_EXACTMBN)\r
- add_length(reg, mb_len);\r
+ if (op == OP_EXACTMBN) {\r
+ p = onigenc_strdup(reg->enc, s, end);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+\r
+ COP(reg)->exact_len_n.len = mb_len;\r
+ COP(reg)->exact_len_n.n = str_len;\r
+ COP(reg)->exact_len_n.s = p;\r
+ }\r
+ else if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
+ p = onigenc_strdup(reg->enc, s, end);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
\r
- if (IS_NEED_STR_LEN_OP_EXACT(op)) {\r
if (op == OP_EXACTN_IC)\r
- add_length(reg, mb_len * str_len);\r
+ COP(reg)->exact_n.n = byte_len;\r
else\r
- add_length(reg, str_len);\r
+ COP(reg)->exact_n.n = str_len;\r
+\r
+ COP(reg)->exact_n.s = p;\r
+ }\r
+ else {\r
+ xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);\r
+ COP(reg)->exact.s[byte_len] = '\0';\r
}\r
\r
- add_bytes(reg, s, mb_len * str_len);\r
return 0;\r
}\r
\r
-\r
static int\r
compile_length_string_node(Node* node, regex_t* reg)\r
{\r
return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);\r
}\r
\r
-static int\r
-add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
+static void*\r
+set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)\r
{\r
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
- add_length(reg, mbuf->used);\r
- return add_bytes(reg, mbuf->p, mbuf->used);\r
-#else\r
- int r, pad_size;\r
- UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;\r
-\r
- GET_ALIGNMENT_PAD_SIZE(p, pad_size);\r
- add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));\r
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
+ size_t len;\r
+ void* p;\r
\r
- r = add_bytes(reg, mbuf->p, mbuf->used);\r
+ len = (size_t )mbuf->used;\r
+ p = xmalloc(len);\r
+ if (IS_NULL(p)) return NULL;\r
\r
- /* padding for return value from compile_length_cclass_node() to be fix. */\r
- pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;\r
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);\r
- return r;\r
-#endif\r
+ xmemcpy(p, mbuf->p, len);\r
+ return p;\r
}\r
\r
static int\r
compile_length_cclass_node(CClassNode* cc, regex_t* reg)\r
{\r
- int len;\r
-\r
- if (IS_NULL(cc->mbuf)) {\r
- len = SIZE_OPCODE + SIZE_BITSET;\r
- }\r
- else {\r
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
- len = SIZE_OPCODE;\r
- }\r
- else {\r
- len = SIZE_OPCODE + SIZE_BITSET;\r
- }\r
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r
- len += SIZE_LENGTH + cc->mbuf->used;\r
-#else\r
- len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);\r
-#endif\r
- }\r
-\r
- return len;\r
+ return 1;\r
}\r
\r
static int\r
int r;\r
\r
if (IS_NULL(cc->mbuf)) {\r
- if (IS_NCCLASS_NOT(cc))\r
- add_opcode(reg, OP_CCLASS_NOT);\r
- else\r
- add_opcode(reg, OP_CCLASS);\r
+ r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);\r
+ if (r != 0) return r;\r
\r
- r = add_bitset(reg, cc->bs);\r
+ COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);\r
+ CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);\r
+ xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);\r
}\r
else {\r
+ void* p;\r
+\r
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {\r
- if (IS_NCCLASS_NOT(cc))\r
- add_opcode(reg, OP_CCLASS_MB_NOT);\r
- else\r
- add_opcode(reg, OP_CCLASS_MB);\r
+ r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);\r
+ if (r != 0) return r;\r
\r
- r = add_multi_byte_cclass(cc->mbuf, reg);\r
+ p = set_multi_byte_cclass(cc->mbuf, reg);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+ COP(reg)->cclass_mb.mb = p;\r
}\r
else {\r
- if (IS_NCCLASS_NOT(cc))\r
- add_opcode(reg, OP_CCLASS_MIX_NOT);\r
- else\r
- add_opcode(reg, OP_CCLASS_MIX);\r
-\r
- r = add_bitset(reg, cc->bs);\r
+ r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);\r
if (r != 0) return r;\r
- r = add_multi_byte_cclass(cc->mbuf, reg);\r
+\r
+ COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);\r
+ CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);\r
+ xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);\r
+\r
+ p = set_multi_byte_cclass(cc->mbuf, reg);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+ COP(reg)->cclass_mix.mb = p;\r
}\r
}\r
\r
- return r;\r
+ return 0;\r
}\r
\r
static int\r
}\r
\r
p[id].lower = lower;\r
- p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);\r
+ p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);\r
return 0;\r
}\r
\r
static int\r
-compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,\r
+compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,\r
regex_t* reg, ScanEnv* env)\r
{\r
int r;\r
- int num_repeat = reg->num_repeat;\r
+ int num_repeat = reg->num_repeat++;\r
\r
- r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
- if (r != 0) return r;\r
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
- reg->num_repeat++;\r
- if (r != 0) return r;\r
- r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);\r
+ r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);\r
if (r != 0) return r;\r
\r
+ COP(reg)->repeat.id = num_repeat;\r
+ COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;\r
+\r
r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);\r
if (r != 0) return r;\r
\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
if (r != 0) return r;\r
\r
if (\r
NODE_IS_IN_MULTI_ENTRY(qn) ||\r
#endif\r
NODE_IS_IN_REAL_REPEAT(qn)) {\r
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
+ r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);\r
}\r
else {\r
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
+ r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);\r
}\r
if (r != 0) return r;\r
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */\r
+\r
+ COP(reg)->repeat_inc.id = num_repeat;\r
return r;\r
}\r
\r
static int\r
is_anychar_infinite_greedy(QuantNode* qn)\r
{\r
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&\r
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&\r
NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))\r
return 1;\r
else\r
return 0;\r
}\r
\r
-#define QUANTIFIER_EXPAND_LIMIT_SIZE 50\r
+#define QUANTIFIER_EXPAND_LIMIT_SIZE 10\r
#define CKN_ON (ckn > 0)\r
\r
static int\r
compile_length_quantifier_node(QuantNode* qn, regex_t* reg)\r
{\r
int len, mod_tlen;\r
- int infinite = IS_REPEAT_INFINITE(qn->upper);\r
- enum QuantBodyEmpty empty_info = qn->body_empty_info;\r
+ int infinite = IS_INFINITE_REPEAT(qn->upper);\r
+ enum BodyEmptyType emptiness = qn->emptiness;\r
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
\r
if (tlen < 0) return tlen;\r
}\r
}\r
\r
- if (empty_info == QUANT_BODY_IS_NOT_EMPTY)\r
- mod_tlen = tlen;\r
- else\r
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);\r
+ mod_tlen = tlen;\r
+ if (emptiness != BODY_IS_NOT_EMPTY)\r
+ mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;\r
\r
if (infinite &&\r
(qn->lower <= 1 ||\r
}\r
\r
if (qn->greedy) {\r
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
if (IS_NOT_NULL(qn->head_exact))\r
len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;\r
- else if (IS_NOT_NULL(qn->next_head_exact))\r
+ else\r
+#endif\r
+ if (IS_NOT_NULL(qn->next_head_exact))\r
len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;\r
else\r
len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;\r
else\r
len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;\r
}\r
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
- len = SIZE_OP_JUMP + tlen;\r
+ else if (qn->upper == 0) {\r
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
+ len = SIZE_OP_JUMP + tlen;\r
+ }\r
+ else\r
+ len = 0;\r
}\r
else if (!infinite && qn->greedy &&\r
(qn->upper == 1 ||\r
len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;\r
}\r
else {\r
- len = SIZE_OP_REPEAT_INC\r
- + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;\r
+ len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;\r
}\r
\r
return len;\r
compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)\r
{\r
int i, r, mod_tlen;\r
- int infinite = IS_REPEAT_INFINITE(qn->upper);\r
- enum QuantBodyEmpty empty_info = qn->body_empty_info;\r
+ int infinite = IS_INFINITE_REPEAT(qn->upper);\r
+ enum BodyEmptyType emptiness = qn->emptiness;\r
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
\r
if (tlen < 0) return tlen;\r
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
if (r != 0) return r;\r
if (IS_NOT_NULL(qn->next_head_exact)) {\r
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))\r
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r
- else\r
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);\r
+ r = add_op(reg,\r
+ IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?\r
+ OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);\r
if (r != 0) return r;\r
- return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);\r
+\r
+ COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];\r
+ return 0;\r
}\r
else {\r
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))\r
- return add_opcode(reg, OP_ANYCHAR_ML_STAR);\r
- else\r
- return add_opcode(reg, OP_ANYCHAR_STAR);\r
+ r = add_op(reg,\r
+ IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?\r
+ OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);\r
+ return r;\r
}\r
}\r
\r
- if (empty_info == QUANT_BODY_IS_NOT_EMPTY)\r
- mod_tlen = tlen;\r
- else\r
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);\r
+ mod_tlen = tlen;\r
+ if (emptiness != BODY_IS_NOT_EMPTY)\r
+ mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;\r
\r
if (infinite &&\r
(qn->lower <= 1 ||\r
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {\r
+ int addr;\r
+\r
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {\r
+ r = add_op(reg, OP_JUMP);\r
+ if (r != 0) return r;\r
if (qn->greedy) {\r
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
if (IS_NOT_NULL(qn->head_exact))\r
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
- else if (IS_NOT_NULL(qn->next_head_exact))\r
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);\r
+ COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;\r
+ else\r
+#endif\r
+ if (IS_NOT_NULL(qn->next_head_exact))\r
+ COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;\r
else\r
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);\r
+ COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;\r
}\r
else {\r
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);\r
+ COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;\r
}\r
- if (r != 0) return r;\r
}\r
else {\r
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
}\r
\r
if (qn->greedy) {\r
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
if (IS_NOT_NULL(qn->head_exact)) {\r
- r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,\r
- mod_tlen + SIZE_OP_JUMP);\r
+ r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);\r
if (r != 0) return r;\r
- add_bytes(reg, STR_(qn->head_exact)->s, 1);\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
+ COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
+ COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];\r
+\r
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
if (r != 0) return r;\r
- r = add_opcode_rel_addr(reg, OP_JUMP,\r
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));\r
+\r
+ addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);\r
}\r
- else if (IS_NOT_NULL(qn->next_head_exact)) {\r
- r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,\r
- mod_tlen + SIZE_OP_JUMP);\r
+ else\r
+#endif\r
+ if (IS_NOT_NULL(qn->next_head_exact)) {\r
+ r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);\r
if (r != 0) return r;\r
- add_bytes(reg, STR_(qn->next_head_exact)->s, 1);\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
+ COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
+ COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];\r
+\r
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
if (r != 0) return r;\r
- r = add_opcode_rel_addr(reg, OP_JUMP,\r
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));\r
+\r
+ addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);\r
}\r
else {\r
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);\r
+ r = add_op(reg, OP_PUSH);\r
if (r != 0) return r;\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
+ COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;\r
+\r
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
if (r != 0) return r;\r
- r = add_opcode_rel_addr(reg, OP_JUMP,\r
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));\r
+\r
+ addr = -(mod_tlen + (int )SIZE_OP_PUSH);\r
}\r
+\r
+ r = add_op(reg, OP_JUMP);\r
+ if (r != 0) return r;\r
+ COP(reg)->jump.addr = addr;\r
}\r
else {\r
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);\r
+ r = add_op(reg, OP_JUMP);\r
if (r != 0) return r;\r
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);\r
+ COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;\r
+\r
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);\r
+ if (r != 0) return r;\r
+\r
+ r = add_op(reg, OP_PUSH);\r
+ if (r != 0) return r;\r
+ COP(reg)->push.addr = -mod_tlen;\r
+ }\r
+ }\r
+ else if (qn->upper == 0) {\r
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
+ r = add_op(reg, OP_JUMP);\r
if (r != 0) return r;\r
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));\r
+ COP(reg)->jump.addr = tlen + SIZE_INC_OP;\r
+\r
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
+ }\r
+ else {\r
+ /* Nothing output */\r
+ r = 0;\r
}\r
}\r
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */\r
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
- if (r != 0) return r;\r
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
- }\r
else if (! infinite && qn->greedy &&\r
(qn->upper == 1 ||\r
int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,\r
if (r != 0) return r;\r
\r
for (i = 0; i < n; i++) {\r
- r = add_opcode_rel_addr(reg, OP_PUSH,\r
- (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);\r
+ int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);\r
+ if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
+\r
+ r = add_op(reg, OP_PUSH);\r
if (r != 0) return r;\r
+ COP(reg)->push.addr = v;\r
+\r
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
if (r != 0) return r;\r
}\r
}\r
else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */\r
- r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);\r
+ r = add_op(reg, OP_PUSH);\r
if (r != 0) return r;\r
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);\r
+ COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;\r
+\r
+ r = add_op(reg, OP_JUMP);\r
if (r != 0) return r;\r
+ COP(reg)->jump.addr = tlen + SIZE_INC_OP;\r
+\r
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
}\r
else {\r
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);\r
+ r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);\r
}\r
return r;\r
}\r
\r
static int\r
-compile_length_option_node(EnclosureNode* node, regex_t* reg)\r
+compile_length_option_node(BagNode* node, regex_t* reg)\r
{\r
int tlen;\r
OnigOptionType prev = reg->options;\r
\r
reg->options = node->o.options;\r
- tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
+ tlen = compile_length_tree(NODE_BAG_BODY(node), reg);\r
reg->options = prev;\r
\r
return tlen;\r
}\r
\r
static int\r
-compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
+compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
{\r
int r;\r
OnigOptionType prev = reg->options;\r
\r
reg->options = node->o.options;\r
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
reg->options = prev;\r
\r
return r;\r
}\r
\r
static int\r
-compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)\r
+compile_length_bag_node(BagNode* node, regex_t* reg)\r
{\r
int len;\r
int tlen;\r
\r
- if (node->type == ENCLOSURE_OPTION)\r
+ if (node->type == BAG_OPTION)\r
return compile_length_option_node(node, reg);\r
\r
- if (NODE_ENCLOSURE_BODY(node)) {\r
- tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
+ if (NODE_BAG_BODY(node)) {\r
+ tlen = compile_length_tree(NODE_BAG_BODY(node), reg);\r
if (tlen < 0) return tlen;\r
}\r
else\r
tlen = 0;\r
\r
switch (node->type) {\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
#ifdef USE_CALL\r
\r
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
}\r
break;\r
\r
- case ENCLOSURE_STOP_BACKTRACK:\r
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {\r
- QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));\r
+ case BAG_STOP_BACKTRACK:\r
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {\r
+ int v;\r
+ QuantNode* qn;\r
+\r
+ qn = QUANT_(NODE_BAG_BODY(node));\r
tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
if (tlen < 0) return tlen;\r
\r
- len = tlen * qn->lower\r
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
+ v = onig_positive_int_multiply(qn->lower, tlen);\r
+ if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
+ len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
}\r
else {\r
len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;\r
}\r
break;\r
\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
{\r
- Node* cond = NODE_ENCLOSURE_BODY(node);\r
+ Node* cond = NODE_BAG_BODY(node);\r
Node* Then = node->te.Then;\r
Node* Else = node->te.Else;\r
\r
len += tlen;\r
}\r
\r
+ len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END;\r
+\r
if (IS_NOT_NULL(Else)) {\r
- len += SIZE_OP_JUMP;\r
tlen = compile_length_tree(Else, reg);\r
if (tlen < 0) return tlen;\r
len += tlen;\r
}\r
break;\r
\r
- default:\r
- return ONIGERR_TYPE_BUG;\r
+ case BAG_OPTION:\r
+ /* never come here, but set for escape warning */\r
+ len = 0;\r
break;\r
}\r
\r
return len;\r
}\r
\r
-static int get_char_length_tree(Node* node, regex_t* reg, int* len);\r
+static int get_char_len_node(Node* node, regex_t* reg, int* len);\r
\r
static int\r
-compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
+compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
{\r
int r;\r
int len;\r
\r
#ifdef USE_CALL\r
- if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {\r
- r = add_opcode(reg, OP_CALL);\r
+ if (NODE_IS_CALLED(node)) {\r
+ r = add_op(reg, OP_CALL);\r
if (r != 0) return r;\r
- node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
+\r
+ node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;\r
NODE_STATUS_ADD(node, ADDR_FIXED);\r
- r = add_abs_addr(reg, (int )node->m.called_addr);\r
- if (r != 0) return r;\r
- len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
- len += SIZE_OP_RETURN;\r
- r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
- if (r != 0) return r;\r
+ COP(reg)->call.addr = (int )node->m.called_addr;\r
\r
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
- if (r != 0) return r;\r
- r = add_opcode(reg, OP_RETURN);\r
- return r;\r
- }\r
+ if (node->m.regnum == 0) {\r
+ len = compile_length_tree(NODE_BAG_BODY(node), reg);\r
+ len += SIZE_OP_RETURN;\r
\r
- if (NODE_IS_CALLED(node)) {\r
- r = add_opcode(reg, OP_CALL);\r
- if (r != 0) return r;\r
- node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;\r
- NODE_STATUS_ADD(node, ADDR_FIXED);\r
- r = add_abs_addr(reg, (int )node->m.called_addr);\r
- if (r != 0) return r;\r
- len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);\r
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
- len += (NODE_IS_RECURSION(node)\r
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
- else\r
- len += (NODE_IS_RECURSION(node)\r
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
+ r = add_op(reg, OP_JUMP);\r
+ if (r != 0) return r;\r
+ COP(reg)->jump.addr = len + SIZE_INC_OP;\r
\r
- r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
- if (r != 0) return r;\r
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
+ if (r != 0) return r;\r
+\r
+ r = add_op(reg, OP_RETURN);\r
+ return r;\r
+ }\r
+ else {\r
+ len = compile_length_tree(NODE_BAG_BODY(node), reg);\r
+ len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);\r
+ if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
+ len += (NODE_IS_RECURSION(node)\r
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);\r
+ else\r
+ len += (NODE_IS_RECURSION(node)\r
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);\r
+\r
+ r = add_op(reg, OP_JUMP);\r
+ if (r != 0) return r;\r
+ COP(reg)->jump.addr = len + SIZE_INC_OP;\r
+ }\r
}\r
#endif\r
\r
if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))\r
- r = add_opcode(reg, OP_MEMORY_START_PUSH);\r
+ r = add_op(reg, OP_MEMORY_START_PUSH);\r
else\r
- r = add_opcode(reg, OP_MEMORY_START);\r
+ r = add_op(reg, OP_MEMORY_START);\r
if (r != 0) return r;\r
- r = add_mem_num(reg, node->m.regnum);\r
- if (r != 0) return r;\r
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
+ COP(reg)->memory_start.num = node->m.regnum;\r
+\r
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
if (r != 0) return r;\r
\r
#ifdef USE_CALL\r
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
- r = add_opcode(reg, (NODE_IS_RECURSION(node)\r
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
+ r = add_op(reg, (NODE_IS_RECURSION(node)\r
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));\r
else\r
- r = add_opcode(reg, (NODE_IS_RECURSION(node)\r
- ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
+ r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));\r
if (r != 0) return r;\r
- r = add_mem_num(reg, node->m.regnum);\r
+ COP(reg)->memory_end.num = node->m.regnum;\r
+\r
if (NODE_IS_CALLED(node)) {\r
if (r != 0) return r;\r
- r = add_opcode(reg, OP_RETURN);\r
+ r = add_op(reg, OP_RETURN);\r
}\r
#else\r
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))\r
- r = add_opcode(reg, OP_MEMORY_END_PUSH);\r
+ r = add_op(reg, OP_MEMORY_END_PUSH);\r
else\r
- r = add_opcode(reg, OP_MEMORY_END);\r
+ r = add_op(reg, OP_MEMORY_END);\r
if (r != 0) return r;\r
- r = add_mem_num(reg, node->m.regnum);\r
+ COP(reg)->memory_end.num = node->m.regnum;\r
#endif\r
\r
return r;\r
}\r
\r
static int\r
-compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)\r
+compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)\r
{\r
int r, len;\r
\r
switch (node->type) {\r
- case ENCLOSURE_MEMORY:\r
- r = compile_enclosure_memory_node(node, reg, env);\r
+ case BAG_MEMORY:\r
+ r = compile_bag_memory_node(node, reg, env);\r
break;\r
\r
- case ENCLOSURE_OPTION:\r
+ case BAG_OPTION:\r
r = compile_option_node(node, reg, env);\r
break;\r
\r
- case ENCLOSURE_STOP_BACKTRACK:\r
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {\r
- QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));\r
+ case BAG_STOP_BACKTRACK:\r
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {\r
+ QuantNode* qn = QUANT_(NODE_BAG_BODY(node));\r
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);\r
if (r != 0) return r;\r
\r
len = compile_length_tree(NODE_QUANT_BODY(qn), reg);\r
if (len < 0) return len;\r
\r
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);\r
+ r = add_op(reg, OP_PUSH);\r
if (r != 0) return r;\r
+ COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;\r
+\r
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);\r
if (r != 0) return r;\r
- r = add_opcode(reg, OP_POP_OUT);\r
+ r = add_op(reg, OP_POP_OUT);\r
+ if (r != 0) return r;\r
+\r
+ r = add_op(reg, OP_JUMP);\r
if (r != 0) return r;\r
- r = add_opcode_rel_addr(reg, OP_JUMP,\r
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));\r
+ COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);\r
}\r
else {\r
- r = add_opcode(reg, OP_ATOMIC_START);\r
+ r = add_op(reg, OP_ATOMIC_START);\r
if (r != 0) return r;\r
- r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);\r
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);\r
if (r != 0) return r;\r
- r = add_opcode(reg, OP_ATOMIC_END);\r
+ r = add_op(reg, OP_ATOMIC_END);\r
}\r
break;\r
\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
{\r
- int cond_len, then_len, jump_len;\r
- Node* cond = NODE_ENCLOSURE_BODY(node);\r
+ int cond_len, then_len, else_len, jump_len;\r
+ Node* cond = NODE_BAG_BODY(node);\r
Node* Then = node->te.Then;\r
Node* Else = node->te.Else;\r
\r
- r = add_opcode(reg, OP_ATOMIC_START);\r
+ r = add_op(reg, OP_ATOMIC_START);\r
if (r != 0) return r;\r
\r
cond_len = compile_length_tree(cond, reg);\r
else\r
then_len = 0;\r
\r
- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;\r
- if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;\r
+ jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP;\r
\r
- r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);\r
+ r = add_op(reg, OP_PUSH);\r
if (r != 0) return r;\r
+ COP(reg)->push.addr = SIZE_INC_OP + jump_len;\r
+\r
r = compile_tree(cond, reg, env);\r
if (r != 0) return r;\r
- r = add_opcode(reg, OP_ATOMIC_END);\r
+ r = add_op(reg, OP_ATOMIC_END);\r
if (r != 0) return r;\r
\r
if (IS_NOT_NULL(Then)) {\r
}\r
\r
if (IS_NOT_NULL(Else)) {\r
- int else_len = compile_length_tree(Else, reg);\r
- r = add_opcode_rel_addr(reg, OP_JUMP, else_len);\r
- if (r != 0) return r;\r
+ else_len = compile_length_tree(Else, reg);\r
+ if (else_len < 0) return else_len;\r
+ }\r
+ else\r
+ else_len = 0;\r
+\r
+ r = add_op(reg, OP_JUMP);\r
+ if (r != 0) return r;\r
+ COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP;\r
+\r
+ r = add_op(reg, OP_ATOMIC_END);\r
+ if (r != 0) return r;\r
+\r
+ if (IS_NOT_NULL(Else)) {\r
r = compile_tree(Else, reg, env);\r
}\r
}\r
break;\r
-\r
- default:\r
- return ONIGERR_TYPE_BUG;\r
- break;\r
}\r
\r
return r;\r
}\r
\r
switch (node->type) {\r
- case ANCHOR_PREC_READ:\r
+ case ANCR_PREC_READ:\r
len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;\r
break;\r
- case ANCHOR_PREC_READ_NOT:\r
+ case ANCR_PREC_READ_NOT:\r
len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;\r
break;\r
- case ANCHOR_LOOK_BEHIND:\r
+ case ANCR_LOOK_BEHIND:\r
len = SIZE_OP_LOOK_BEHIND + tlen;\r
break;\r
- case ANCHOR_LOOK_BEHIND_NOT:\r
+ case ANCR_LOOK_BEHIND_NOT:\r
len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;\r
break;\r
\r
- case ANCHOR_WORD_BOUNDARY:\r
- case ANCHOR_NO_WORD_BOUNDARY:\r
+ case ANCR_WORD_BOUNDARY:\r
+ case ANCR_NO_WORD_BOUNDARY:\r
#ifdef USE_WORD_BEGIN_END\r
- case ANCHOR_WORD_BEGIN:\r
- case ANCHOR_WORD_END:\r
+ case ANCR_WORD_BEGIN:\r
+ case ANCR_WORD_END:\r
#endif\r
len = SIZE_OP_WORD_BOUNDARY;\r
break;\r
\r
- case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
- case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
+ case ANCR_TEXT_SEGMENT_BOUNDARY:\r
+ case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
len = SIZE_OPCODE;\r
break;\r
\r
enum OpCode op;\r
\r
switch (node->type) {\r
- case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;\r
- case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;\r
- case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;\r
- case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;\r
- case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;\r
- case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;\r
-\r
- case ANCHOR_WORD_BOUNDARY:\r
+ case ANCR_BEGIN_BUF: r = add_op(reg, OP_BEGIN_BUF); break;\r
+ case ANCR_END_BUF: r = add_op(reg, OP_END_BUF); break;\r
+ case ANCR_BEGIN_LINE: r = add_op(reg, OP_BEGIN_LINE); break;\r
+ case ANCR_END_LINE: r = add_op(reg, OP_END_LINE); break;\r
+ case ANCR_SEMI_END_BUF: r = add_op(reg, OP_SEMI_END_BUF); break;\r
+ case ANCR_BEGIN_POSITION: r = add_op(reg, OP_BEGIN_POSITION); break;\r
+\r
+ case ANCR_WORD_BOUNDARY:\r
op = OP_WORD_BOUNDARY;\r
word:\r
- r = add_opcode(reg, op);\r
+ r = add_op(reg, op);\r
if (r != 0) return r;\r
- r = add_mode(reg, (ModeType )node->ascii_mode);\r
+ COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;\r
break;\r
\r
- case ANCHOR_NO_WORD_BOUNDARY:\r
+ case ANCR_NO_WORD_BOUNDARY:\r
op = OP_NO_WORD_BOUNDARY; goto word;\r
break;\r
#ifdef USE_WORD_BEGIN_END\r
- case ANCHOR_WORD_BEGIN:\r
+ case ANCR_WORD_BEGIN:\r
op = OP_WORD_BEGIN; goto word;\r
break;\r
- case ANCHOR_WORD_END:\r
+ case ANCR_WORD_END:\r
op = OP_WORD_END; goto word;\r
break;\r
#endif\r
\r
- case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
- r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);\r
- break;\r
+ case ANCR_TEXT_SEGMENT_BOUNDARY:\r
+ case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
+ {\r
+ enum TextSegmentBoundaryType type;\r
\r
- case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
- r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);\r
+ r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);\r
+ if (r != 0) return r;\r
+\r
+ type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
+#ifdef USE_UNICODE_WORD_BREAK\r
+ if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_TEXT_SEGMENT_WORD))\r
+ type = WORD_BOUNDARY;\r
+#endif\r
+\r
+ COP(reg)->text_segment_boundary.type = type;\r
+ COP(reg)->text_segment_boundary.not =\r
+ (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);\r
+ }\r
break;\r
\r
- case ANCHOR_PREC_READ:\r
- r = add_opcode(reg, OP_PREC_READ_START);\r
+ case ANCR_PREC_READ:\r
+ r = add_op(reg, OP_PREC_READ_START);\r
if (r != 0) return r;\r
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
if (r != 0) return r;\r
- r = add_opcode(reg, OP_PREC_READ_END);\r
+ r = add_op(reg, OP_PREC_READ_END);\r
break;\r
\r
- case ANCHOR_PREC_READ_NOT:\r
+ case ANCR_PREC_READ_NOT:\r
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
if (len < 0) return len;\r
- r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);\r
+\r
+ r = add_op(reg, OP_PREC_READ_NOT_START);\r
if (r != 0) return r;\r
+ COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;\r
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
if (r != 0) return r;\r
- r = add_opcode(reg, OP_PREC_READ_NOT_END);\r
+ r = add_op(reg, OP_PREC_READ_NOT_END);\r
break;\r
\r
- case ANCHOR_LOOK_BEHIND:\r
+ case ANCR_LOOK_BEHIND:\r
{\r
int n;\r
- r = add_opcode(reg, OP_LOOK_BEHIND);\r
+ r = add_op(reg, OP_LOOK_BEHIND);\r
if (r != 0) return r;\r
if (node->char_len < 0) {\r
- r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);\r
+ r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);\r
if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
}\r
else\r
n = node->char_len;\r
\r
- r = add_length(reg, n);\r
- if (r != 0) return r;\r
+ COP(reg)->look_behind.len = n;\r
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
}\r
break;\r
\r
- case ANCHOR_LOOK_BEHIND_NOT:\r
+ case ANCR_LOOK_BEHIND_NOT:\r
{\r
int n;\r
\r
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);\r
- r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,\r
- len + SIZE_OP_LOOK_BEHIND_NOT_END);\r
+ r = add_op(reg, OP_LOOK_BEHIND_NOT_START);\r
if (r != 0) return r;\r
+ COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;\r
+\r
if (node->char_len < 0) {\r
- r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);\r
+ r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);\r
if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
}\r
else\r
n = node->char_len;\r
- r = add_length(reg, n);\r
- if (r != 0) return r;\r
+\r
+ COP(reg)->look_behind_not_start.len = n;\r
+\r
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);\r
if (r != 0) return r;\r
- r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);\r
+ r = add_op(reg, OP_LOOK_BEHIND_NOT_END);\r
}\r
break;\r
\r
\r
switch (node->type) {\r
case GIMMICK_FAIL:\r
- r = add_opcode(reg, OP_FAIL);\r
- break;\r
-\r
- case GIMMICK_KEEP:\r
- r = add_opcode(reg, OP_PUSH_SAVE_VAL);\r
- if (r != 0) return r;\r
- r = add_save_type(reg, SAVE_KEEP);\r
- if (r != 0) return r;\r
- r = add_mem_num(reg, node->id);\r
+ r = add_op(reg, OP_FAIL);\r
break;\r
\r
case GIMMICK_SAVE:\r
- r = add_opcode(reg, OP_PUSH_SAVE_VAL);\r
- if (r != 0) return r;\r
- r = add_save_type(reg, node->detail_type);\r
+ r = add_op(reg, OP_PUSH_SAVE_VAL);\r
if (r != 0) return r;\r
- r = add_mem_num(reg, node->id);\r
+ COP(reg)->push_save_val.type = node->detail_type;\r
+ COP(reg)->push_save_val.id = node->id;\r
break;\r
\r
case GIMMICK_UPDATE_VAR:\r
- r = add_opcode(reg, OP_UPDATE_VAR);\r
+ r = add_op(reg, OP_UPDATE_VAR);\r
if (r != 0) return r;\r
- r = add_update_var_type(reg, node->detail_type);\r
- if (r != 0) return r;\r
- r = add_mem_num(reg, node->id);\r
+ COP(reg)->update_var.type = node->detail_type;\r
+ COP(reg)->update_var.id = node->id;\r
break;\r
\r
#ifdef USE_CALLOUT\r
case ONIG_CALLOUT_OF_CONTENTS:\r
case ONIG_CALLOUT_OF_NAME:\r
{\r
- r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?\r
- OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);\r
- if (r != 0) return r;\r
if (node->detail_type == ONIG_CALLOUT_OF_NAME) {\r
- r = add_mem_num(reg, node->id);\r
+ r = add_op(reg, OP_CALLOUT_NAME);\r
if (r != 0) return r;\r
+ COP(reg)->callout_name.id = node->id;\r
+ COP(reg)->callout_name.num = node->num;\r
+ }\r
+ else {\r
+ r = add_op(reg, OP_CALLOUT_CONTENTS);\r
+ if (r != 0) return r;\r
+ COP(reg)->callout_contents.num = node->num;\r
}\r
- r = add_mem_num(reg, node->num);\r
- if (r != 0) return r;\r
}\r
break;\r
\r
len = SIZE_OP_FAIL;\r
break;\r
\r
- case GIMMICK_KEEP:\r
case GIMMICK_SAVE:\r
len = SIZE_OP_PUSH_SAVE_VAL;\r
break;\r
break;\r
\r
case NODE_BACKREF:\r
- {\r
- BackRefNode* br = BACKREF_(node);\r
-\r
- if (NODE_IS_CHECKER(node)) {\r
-#ifdef USE_BACKREF_WITH_LEVEL\r
- if (NODE_IS_NEST_LEVEL(node)) {\r
- r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
- }\r
- else\r
-#endif\r
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
- }\r
- else {\r
-#ifdef USE_BACKREF_WITH_LEVEL\r
- if (NODE_IS_NEST_LEVEL(node)) {\r
- r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +\r
- SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
- }\r
- else\r
-#endif\r
- if (br->back_num == 1) {\r
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)\r
- ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));\r
- }\r
- else {\r
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);\r
- }\r
- }\r
- }\r
+ r = SIZE_OP_BACKREF;\r
break;\r
\r
#ifdef USE_CALL\r
r = compile_length_quantifier_node(QUANT_(node), reg);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
- r = compile_length_enclosure_node(ENCLOSURE_(node), reg);\r
+ case NODE_BAG:\r
+ r = compile_length_bag_node(BAG_(node), reg);\r
break;\r
\r
case NODE_ANCHOR:\r
len += SIZE_OP_PUSH + SIZE_OP_JUMP;\r
}\r
} while (IS_NOT_NULL(x = NODE_CDR(x)));\r
- pos = reg->used + len; /* goal position */\r
+ pos = COP_CURR_OFFSET(reg) + 1 + len; /* goal position */\r
\r
do {\r
len = compile_length_tree(NODE_CAR(node), reg);\r
if (IS_NOT_NULL(NODE_CDR(node))) {\r
enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;\r
- r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);\r
+ r = add_op(reg, push);\r
if (r != 0) break;\r
+ COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;\r
}\r
r = compile_tree(NODE_CAR(node), reg, env);\r
if (r != 0) break;\r
if (IS_NOT_NULL(NODE_CDR(node))) {\r
- len = pos - (reg->used + SIZE_OP_JUMP);\r
- r = add_opcode_rel_addr(reg, OP_JUMP, len);\r
+ len = pos - (COP_CURR_OFFSET(reg) + 1);\r
+ r = add_op(reg, OP_JUMP);\r
if (r != 0) break;\r
+ COP(reg)->jump.addr = len;\r
}\r
} while (IS_NOT_NULL(node = NODE_CDR(node)));\r
}\r
\r
switch (CTYPE_(node)->ctype) {\r
case CTYPE_ANYCHAR:\r
- if (IS_MULTILINE(CTYPE_OPTION(node, reg)))\r
- r = add_opcode(reg, OP_ANYCHAR_ML);\r
- else\r
- r = add_opcode(reg, OP_ANYCHAR);\r
+ r = add_op(reg, IS_MULTILINE(CTYPE_OPTION(node, reg)) ?\r
+ OP_ANYCHAR_ML : OP_ANYCHAR);\r
break;\r
\r
case ONIGENC_CTYPE_WORD:\r
else {\r
op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;\r
}\r
- r = add_opcode(reg, op);\r
+ r = add_op(reg, op);\r
break;\r
\r
default:\r
if (NODE_IS_CHECKER(node)) {\r
#ifdef USE_BACKREF_WITH_LEVEL\r
if (NODE_IS_NEST_LEVEL(node)) {\r
- r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);\r
- if (r != 0) return r;\r
- r = add_length(reg, br->nest_level);\r
+ r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);\r
if (r != 0) return r;\r
+ COP(reg)->backref_general.nest_level = br->nest_level;\r
}\r
else\r
#endif\r
{\r
- r = add_opcode(reg, OP_BACKREF_CHECK);\r
+ r = add_op(reg, OP_BACKREF_CHECK);\r
if (r != 0) return r;\r
}\r
-\r
goto add_bacref_mems;\r
}\r
else {\r
#ifdef USE_BACKREF_WITH_LEVEL\r
if (NODE_IS_NEST_LEVEL(node)) {\r
- r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);\r
- if (r != 0) return r;\r
- r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));\r
- if (r != 0) return r;\r
- r = add_length(reg, br->nest_level);\r
- if (r != 0) return r;\r
+ if ((reg->options & ONIG_OPTION_IGNORECASE) != 0)\r
+ r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);\r
+ else\r
+ r = add_op(reg, OP_BACKREF_WITH_LEVEL);\r
\r
+ if (r != 0) return r;\r
+ COP(reg)->backref_general.nest_level = br->nest_level;\r
goto add_bacref_mems;\r
}\r
else\r
if (br->back_num == 1) {\r
n = br->back_static[0];\r
if (IS_IGNORECASE(reg->options)) {\r
- r = add_opcode(reg, OP_BACKREF_N_IC);\r
+ r = add_op(reg, OP_BACKREF_N_IC);\r
if (r != 0) return r;\r
- r = add_mem_num(reg, n);\r
+ COP(reg)->backref_n.n1 = n;\r
}\r
else {\r
switch (n) {\r
- case 1: r = add_opcode(reg, OP_BACKREF1); break;\r
- case 2: r = add_opcode(reg, OP_BACKREF2); break;\r
+ case 1: r = add_op(reg, OP_BACKREF1); break;\r
+ case 2: r = add_op(reg, OP_BACKREF2); break;\r
default:\r
- r = add_opcode(reg, OP_BACKREF_N);\r
+ r = add_op(reg, OP_BACKREF_N);\r
if (r != 0) return r;\r
- r = add_mem_num(reg, n);\r
+ COP(reg)->backref_n.n1 = n;\r
break;\r
}\r
}\r
}\r
else {\r
- int i;\r
+ int num;\r
int* p;\r
\r
- if (IS_IGNORECASE(reg->options)) {\r
- r = add_opcode(reg, OP_BACKREF_MULTI_IC);\r
- }\r
- else {\r
- r = add_opcode(reg, OP_BACKREF_MULTI);\r
- }\r
+ r = add_op(reg, IS_IGNORECASE(reg->options) ?\r
+ OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);\r
if (r != 0) return r;\r
\r
add_bacref_mems:\r
- r = add_length(reg, br->back_num);\r
- if (r != 0) return r;\r
- p = BACKREFS_P(br);\r
- for (i = br->back_num - 1; i >= 0; i--) {\r
- r = add_mem_num(reg, p[i]);\r
- if (r != 0) return r;\r
+ num = br->back_num;\r
+ COP(reg)->backref_general.num = num;\r
+ if (num == 1) {\r
+ COP(reg)->backref_general.n1 = br->back_static[0];\r
+ }\r
+ else {\r
+ int i, j;\r
+ MemNumType* ns;\r
+\r
+ ns = xmalloc(sizeof(MemNumType) * num);\r
+ CHECK_NULL_RETURN_MEMERR(ns);\r
+ COP(reg)->backref_general.ns = ns;\r
+ p = BACKREFS_P(br);\r
+ for (i = num - 1, j = 0; i >= 0; i--, j++) {\r
+ ns[j] = p[i];\r
+ }\r
}\r
}\r
}\r
r = compile_quantifier_node(QUANT_(node), reg, env);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
- r = compile_enclosure_node(ENCLOSURE_(node), reg, env);\r
+ case NODE_BAG:\r
+ r = compile_bag_node(BAG_(node), reg, env);\r
break;\r
\r
case NODE_ANCHOR:\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ BagNode* en = BAG_(node);\r
+ if (en->type == BAG_MEMORY) {\r
if (NODE_IS_NAMED_GROUP(node)) {\r
(*counter)++;\r
map[en->m.regnum].new_val = *counter;\r
r = noname_disable_map(plink, map, counter);\r
}\r
}\r
- else if (en->type == ENCLOSURE_IF_ELSE) {\r
- r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter);\r
+ else if (en->type == BAG_IF_ELSE) {\r
+ r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter);\r
if (r != 0) return r;\r
if (IS_NOT_NULL(en->te.Then)) {\r
r = noname_disable_map(&(en->te.Then), map, counter);\r
r = renumber_by_map(NODE_BODY(node), map);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
r = renumber_by_map(NODE_BODY(node), map);\r
if (r != 0) return r;\r
\r
- if (en->type == ENCLOSURE_IF_ELSE) {\r
+ if (en->type == BAG_IF_ELSE) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
r = renumber_by_map(en->te.Then, map);\r
if (r != 0) return r;\r
r = numbered_ref_check(NODE_BODY(node));\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
r = numbered_ref_check(NODE_BODY(node));\r
if (r != 0) return r;\r
\r
- if (en->type == ENCLOSURE_IF_ELSE) {\r
+ if (en->type == BAG_IF_ELSE) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
r = numbered_ref_check(en->te.Then);\r
if (r != 0) return r;\r
fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)\r
{\r
int i, offset;\r
- EnclosureNode* en;\r
+ BagNode* en;\r
AbsAddrType addr;\r
+ AbsAddrType* paddr;\r
\r
for (i = 0; i < uslist->num; i++) {\r
if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))\r
return ONIGERR_PARSER_BUG;\r
\r
- en = ENCLOSURE_(uslist->us[i].target);\r
+ en = BAG_(uslist->us[i].target);\r
addr = en->m.called_addr;\r
offset = uslist->us[i].offset;\r
\r
- BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);\r
+ paddr = (AbsAddrType* )((char* )reg->ops + offset);\r
+ *paddr = addr;\r
}\r
return 0;\r
}\r
\r
/* fixed size pattern node only */\r
static int\r
-get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)\r
+get_char_len_node1(Node* node, regex_t* reg, int* len, int level)\r
{\r
int tlen;\r
int r = 0;\r
switch (NODE_TYPE(node)) {\r
case NODE_LIST:\r
do {\r
- r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);\r
+ r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);\r
if (r == 0)\r
*len = distance_add(*len, tlen);\r
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
int tlen2;\r
int varlen = 0;\r
\r
- r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);\r
+ r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);\r
while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {\r
- r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);\r
+ r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level);\r
if (r == 0) {\r
if (tlen != tlen2)\r
varlen = 1;\r
*len = 0;\r
}\r
else {\r
- r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);\r
+ r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level);\r
if (r == 0)\r
*len = distance_multiply(tlen, qn->lower);\r
}\r
#ifdef USE_CALL\r
case NODE_CALL:\r
if (! NODE_IS_RECURSION(node))\r
- r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
+ r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
else\r
r = GET_CHAR_LEN_VARLEN;\r
break;\r
*len = 1;\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
switch (en->type) {\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
#ifdef USE_CALL\r
if (NODE_IS_CLEN_FIXED(node))\r
*len = en->char_len;\r
else {\r
- r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
+ r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
if (r == 0) {\r
en->char_len = *len;\r
NODE_STATUS_ADD(node, CLEN_FIXED);\r
}\r
break;\r
#endif\r
- case ENCLOSURE_OPTION:\r
- case ENCLOSURE_STOP_BACKTRACK:\r
- r = get_char_length_tree1(NODE_BODY(node), reg, len, level);\r
+ case BAG_OPTION:\r
+ case BAG_STOP_BACKTRACK:\r
+ r = get_char_len_node1(NODE_BODY(node), reg, len, level);\r
break;\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
{\r
int clen, elen;\r
\r
- r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level);\r
+ r = get_char_len_node1(NODE_BODY(node), reg, &clen, level);\r
if (r == 0) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
- r = get_char_length_tree1(en->te.Then, reg, &tlen, level);\r
+ r = get_char_len_node1(en->te.Then, reg, &tlen, level);\r
if (r != 0) break;\r
}\r
else tlen = 0;\r
if (IS_NOT_NULL(en->te.Else)) {\r
- r = get_char_length_tree1(en->te.Else, reg, &elen, level);\r
+ r = get_char_len_node1(en->te.Else, reg, &elen, level);\r
if (r != 0) break;\r
}\r
else elen = 0;\r
}\r
}\r
break;\r
-\r
- default:\r
- break;\r
}\r
}\r
break;\r
}\r
\r
static int\r
-get_char_length_tree(Node* node, regex_t* reg, int* len)\r
+get_char_len_node(Node* node, regex_t* reg, int* len)\r
{\r
- return get_char_length_tree1(node, reg, len, 0);\r
+ return get_char_len_node1(node, reg, len, 0);\r
}\r
\r
/* x is not included y ==> 1 : 0 */\r
\r
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,\r
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));\r
- return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);\r
+ return onig_is_code_in_cc(reg->enc, code, cc) == 0;\r
}\r
break;\r
\r
if (sn->end <= sn->s)\r
break;\r
\r
- if (exact != 0 &&\r
- !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {\r
- }\r
- else {\r
+ if (exact == 0 ||\r
+ ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {\r
n = node;\r
}\r
}\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
switch (en->type) {\r
- case ENCLOSURE_OPTION:\r
+ case BAG_OPTION:\r
{\r
OnigOptionType options = reg->options;\r
\r
- reg->options = ENCLOSURE_(node)->o.options;\r
+ reg->options = BAG_(node)->o.options;\r
n = get_head_value_node(NODE_BODY(node), exact, reg);\r
reg->options = options;\r
}\r
break;\r
\r
- case ENCLOSURE_MEMORY:\r
- case ENCLOSURE_STOP_BACKTRACK:\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_MEMORY:\r
+ case BAG_STOP_BACKTRACK:\r
+ case BAG_IF_ELSE:\r
n = get_head_value_node(NODE_BODY(node), exact, reg);\r
break;\r
}\r
break;\r
\r
case NODE_ANCHOR:\r
- if (ANCHOR_(node)->type == ANCHOR_PREC_READ)\r
+ if (ANCHOR_(node)->type == ANCR_PREC_READ)\r
n = get_head_value_node(NODE_BODY(node), exact, reg);\r
break;\r
\r
}\r
\r
static int\r
-check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)\r
+check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)\r
{\r
NodeType type;\r
int r = 0;\r
case NODE_LIST:\r
case NODE_ALT:\r
do {\r
- r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,\r
- anchor_mask);\r
+ r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask);\r
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));\r
break;\r
\r
case NODE_QUANT:\r
- r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
+ r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
- if (((1<<en->type) & enclosure_mask) == 0)\r
+ BagNode* en = BAG_(node);\r
+ if (((1<<en->type) & bag_mask) == 0)\r
return 1;\r
\r
- r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
- if (r == 0 && en->type == ENCLOSURE_IF_ELSE) {\r
+ r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
+ if (r == 0 && en->type == BAG_IF_ELSE) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
- r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask);\r
+ r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask);\r
if (r != 0) break;\r
}\r
if (IS_NOT_NULL(en->te.Else)) {\r
- r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask);\r
+ r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask);\r
}\r
}\r
}\r
return 1;\r
\r
if (IS_NOT_NULL(NODE_BODY(node)))\r
- r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);\r
+ r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);\r
break;\r
\r
case NODE_GIMMICK:\r
Node* t = NODE_BODY(node);\r
if (NODE_IS_RECURSION(node)) {\r
if (NODE_IS_MIN_FIXED(t))\r
- len = ENCLOSURE_(t)->min_len;\r
+ len = BAG_(t)->min_len;\r
}\r
else\r
len = tree_min_len(t, env);\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
switch (en->type) {\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
if (NODE_IS_MIN_FIXED(node))\r
len = en->min_len;\r
else {\r
}\r
break;\r
\r
- case ENCLOSURE_OPTION:\r
- case ENCLOSURE_STOP_BACKTRACK:\r
+ case BAG_OPTION:\r
+ case BAG_STOP_BACKTRACK:\r
len = tree_min_len(NODE_BODY(node), env);\r
break;\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
{\r
OnigLen elen;\r
\r
if (qn->upper != 0) {\r
len = tree_max_len(NODE_BODY(node), env);\r
if (len != 0) {\r
- if (! IS_REPEAT_INFINITE(qn->upper))\r
+ if (! IS_INFINITE_REPEAT(qn->upper))\r
len = distance_multiply(len, qn->upper);\r
else\r
len = INFINITE_LEN;\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
switch (en->type) {\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
if (NODE_IS_MAX_FIXED(node))\r
len = en->max_len;\r
else {\r
}\r
break;\r
\r
- case ENCLOSURE_OPTION:\r
- case ENCLOSURE_STOP_BACKTRACK:\r
+ case BAG_OPTION:\r
+ case BAG_STOP_BACKTRACK:\r
len = tree_max_len(NODE_BODY(node), env);\r
break;\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
{\r
OnigLen tlen, elen;\r
\r
r = check_backrefs(NODE_BODY(node), env);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
r = check_backrefs(NODE_BODY(node), env);\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_IF_ELSE) {\r
+ if (en->type == BAG_IF_ELSE) {\r
if (r != 0) return r;\r
if (IS_NOT_NULL(en->te.Then)) {\r
r = check_backrefs(en->te.Then, env);\r
r = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ if (en->type == BAG_MEMORY) {\r
if (NODE_IS_MARK2(node))\r
return 0;\r
else if (NODE_IS_MARK1(node))\r
NODE_STATUS_REMOVE(node, MARK2);\r
}\r
}\r
- else if (en->type == ENCLOSURE_IF_ELSE) {\r
+ else if (en->type == BAG_IF_ELSE) {\r
int eret;\r
\r
ret = infinite_recursive_call_check(NODE_BODY(node), env, head);\r
r = infinite_recursive_call_check_trav(NODE_BODY(node), env);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ if (en->type == BAG_MEMORY) {\r
if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {\r
int ret;\r
\r
NODE_STATUS_REMOVE(node, MARK1);\r
}\r
}\r
- else if (en->type == ENCLOSURE_IF_ELSE) {\r
+ else if (en->type == BAG_IF_ELSE) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
r = infinite_recursive_call_check_trav(en->te.Then, env);\r
if (r != 0) return r;\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ if (en->type == BAG_MEMORY) {\r
if (NODE_IS_MARK2(node))\r
return 0;\r
else if (NODE_IS_MARK1(node))\r
NODE_STATUS_REMOVE(node, MARK2);\r
}\r
}\r
- else if (en->type == ENCLOSURE_IF_ELSE) {\r
+ else if (en->type == BAG_IF_ELSE) {\r
r = 0;\r
if (IS_NOT_NULL(en->te.Then)) {\r
r |= recursive_call_check(en->te.Then);\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
int ret;\r
int state1;\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ if (en->type == BAG_MEMORY) {\r
if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {\r
if (! NODE_IS_RECURSION(node)) {\r
NODE_STATUS_ADD(node, MARK1);\r
if (ret == FOUND_CALLED_NODE)\r
r = FOUND_CALLED_NODE;\r
\r
- if (en->type == ENCLOSURE_IF_ELSE) {\r
+ if (en->type == BAG_IF_ELSE) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
ret = recursive_call_check_trav(en->te.Then, env, state1);\r
if (ret == FOUND_CALLED_NODE)\r
\r
#endif\r
\r
+#define IN_ALT (1<<0)\r
+#define IN_NOT (1<<1)\r
+#define IN_REAL_REPEAT (1<<2)\r
+#define IN_VAR_REPEAT (1<<3)\r
+#define IN_ZERO_REPEAT (1<<4)\r
+#define IN_MULTI_ENTRY (1<<5)\r
+#define IN_LOOK_BEHIND (1<<6)\r
+\r
+\r
/* divide different length alternatives in look-behind.\r
(?<=A|B) ==> (?<=A)|(?<=B)\r
(?<!A|B) ==> (?<!A)(?<!B)\r
NODE_CAR(np) = insert_node;\r
}\r
\r
- if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {\r
+ if (anc_type == ANCR_LOOK_BEHIND_NOT) {\r
np = node;\r
do {\r
NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */\r
int r, len;\r
AnchorNode* an = ANCHOR_(node);\r
\r
- r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);\r
+ r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len);\r
if (r == 0)\r
an->char_len = len;\r
else if (r == GET_CHAR_LEN_VARLEN)\r
type = NODE_TYPE(node);\r
if (type == NODE_QUANT) {\r
QuantNode* qn = QUANT_(node);\r
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {\r
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {\r
#ifdef USE_QUANT_PEEK_NEXT\r
Node* n = get_head_value_node(next_node, 1, reg);\r
/* '\0': for UTF-16BE etc... */\r
#endif\r
/* automatic posseivation a*b ==> (?>a*)b */\r
if (qn->lower <= 1) {\r
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {\r
+ if (is_strict_real_node(NODE_BODY(node))) {\r
Node *x, *y;\r
x = get_head_value_node(NODE_BODY(node), 0, reg);\r
if (IS_NOT_NULL(x)) {\r
y = get_head_value_node(next_node, 0, reg);\r
if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {\r
- Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
+ Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);\r
CHECK_NULL_RETURN_MEMERR(en);\r
- NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);\r
+ NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);\r
swap_node(node, en);\r
NODE_BODY(node) = en;\r
}\r
}\r
}\r
}\r
- else if (type == NODE_ENCLOSURE) {\r
- EnclosureNode* en = ENCLOSURE_(node);\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ else if (type == NODE_BAG) {\r
+ BagNode* en = BAG_(node);\r
+ if (en->type == BAG_MEMORY) {\r
node = NODE_BODY(node);\r
goto retry;\r
}\r
for (i = 0; i < item_num; i++) {\r
snode = onig_node_new_str(NULL, NULL);\r
if (IS_NULL(snode)) goto mem_err;\r
- \r
+\r
for (j = 0; j < items[i].code_len; j++) {\r
len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);\r
if (len < 0) {\r
}\r
\r
static int\r
-expand_case_fold_string(Node* node, regex_t* reg)\r
+is_good_case_fold_items_for_search(OnigEncoding enc, int slen,\r
+ int n, OnigCaseFoldCodeItem items[])\r
{\r
+ int i, len;\r
+ UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
+\r
+ for (i = 0; i < n; i++) {\r
+ OnigCaseFoldCodeItem* item = items + i;\r
+\r
+ if (item->code_len != 1) return 0;\r
+ if (item->byte_len != slen) return 0;\r
+ len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);\r
+ if (len != slen) return 0;\r
+ }\r
+\r
+ return 1;\r
+}\r
+\r
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8\r
\r
+static int\r
+expand_case_fold_string(Node* node, regex_t* reg, int state)\r
+{\r
int r, n, len, alt_num;\r
+ int fold_len;\r
+ int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;\r
UChar *start, *end, *p;\r
+ UChar* foldp;\r
Node *top_root, *root, *snode, *prev_node;\r
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
- StrNode* sn = STR_(node);\r
+ UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
+ StrNode* sn;\r
\r
if (NODE_STRING_IS_AMBIG(node)) return 0;\r
\r
+ sn = STR_(node);\r
+\r
start = sn->s;\r
end = sn->end;\r
if (start >= end) return 0;\r
\r
+ is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;\r
+\r
r = 0;\r
top_root = root = prev_node = snode = NULL_NODE;\r
alt_num = 1;\r
p = start;\r
while (p < end) {\r
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end,\r
- items);\r
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,\r
+ p, end, items);\r
if (n < 0) {\r
r = n;\r
goto err;\r
}\r
\r
len = enclen(reg->enc, p);\r
+ is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);\r
\r
- if (n == 0) {\r
+ if (is_in_look_behind ||\r
+ (IS_NOT_NULL(snode) ||\r
+ (is_good\r
+ /* expand single char case: ex. /(?i:a)/ */\r
+ && !(p == start && p + len >= end)))) {\r
if (IS_NULL(snode)) {\r
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {\r
top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
goto mem_err;\r
}\r
}\r
+\r
+ prev_is_ambig = -1; /* -1: new */\r
+ prev_is_good = 0; /* escape compiler warning */\r
+ }\r
+ else {\r
+ prev_is_ambig = NODE_STRING_IS_AMBIG(snode);\r
+ prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode);\r
}\r
\r
- r = onig_node_str_cat(snode, p, p + len);\r
- if (r != 0) goto err;\r
+ if (n != 0) {\r
+ foldp = p;\r
+ fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,\r
+ &foldp, end, buf);\r
+ foldp = buf;\r
+ }\r
+ else {\r
+ foldp = p; fold_len = len;\r
+ }\r
+\r
+ if ((prev_is_ambig == 0 && n != 0) ||\r
+ (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {\r
+ if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {\r
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);\r
+ if (IS_NULL(root)) {\r
+ onig_node_free(prev_node);\r
+ goto mem_err;\r
+ }\r
+ }\r
+\r
+ prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);\r
+ if (IS_NULL(snode)) goto mem_err;\r
+ if (IS_NULL(onig_node_list_add(root, snode))) {\r
+ onig_node_free(snode);\r
+ goto mem_err;\r
+ }\r
+ }\r
+ else {\r
+ r = onig_node_str_cat(snode, foldp, foldp + fold_len);\r
+ if (r != 0) goto err;\r
+ }\r
+\r
+ if (n != 0) NODE_STRING_SET_AMBIG(snode);\r
+ if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);\r
}\r
else {\r
alt_num *= (n + 1);\r
return r;\r
}\r
\r
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
-static enum QuantBodyEmpty\r
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
+static enum BodyEmptyType\r
quantifiers_memory_node_info(Node* node)\r
{\r
- int r = QUANT_BODY_IS_EMPTY;\r
+ int r = BODY_IS_EMPTY_POSSIBILITY;\r
\r
switch (NODE_TYPE(node)) {\r
case NODE_LIST:\r
#ifdef USE_CALL\r
case NODE_CALL:\r
if (NODE_IS_RECURSION(node)) {\r
- return QUANT_BODY_IS_EMPTY_REC; /* tiny version */\r
+ return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */\r
}\r
else\r
r = quantifiers_memory_node_info(NODE_BODY(node));\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
switch (en->type) {\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
if (NODE_IS_RECURSION(node)) {\r
- return QUANT_BODY_IS_EMPTY_REC;\r
+ return BODY_IS_EMPTY_POSSIBILITY_REC;\r
}\r
- return QUANT_BODY_IS_EMPTY_MEM;\r
+ return BODY_IS_EMPTY_POSSIBILITY_MEM;\r
break;\r
\r
- case ENCLOSURE_OPTION:\r
- case ENCLOSURE_STOP_BACKTRACK:\r
+ case BAG_OPTION:\r
+ case BAG_STOP_BACKTRACK:\r
r = quantifiers_memory_node_info(NODE_BODY(node));\r
break;\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
{\r
int v;\r
r = quantifiers_memory_node_info(NODE_BODY(node));\r
}\r
}\r
break;\r
- default:\r
- break;\r
}\r
}\r
break;\r
\r
return r;\r
}\r
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */\r
-\r
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */\r
\r
-#define IN_ALT (1<<0)\r
-#define IN_NOT (1<<1)\r
-#define IN_REAL_REPEAT (1<<2)\r
-#define IN_VAR_REPEAT (1<<3)\r
-#define IN_ZERO_REPEAT (1<<4)\r
-#define IN_MULTI_ENTRY (1<<5)\r
\r
#ifdef USE_CALL\r
\r
setup_call2_call(NODE_BODY(node));\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ if (en->type == BAG_MEMORY) {\r
if (! NODE_IS_MARK1(node)) {\r
NODE_STATUS_ADD(node, MARK1);\r
setup_call2_call(NODE_BODY(node));\r
NODE_STATUS_REMOVE(node, MARK1);\r
}\r
}\r
- else if (en->type == ENCLOSURE_IF_ELSE) {\r
+ else if (en->type == BAG_IF_ELSE) {\r
setup_call2_call(NODE_BODY(node));\r
if (IS_NOT_NULL(en->te.Then))\r
setup_call2_call(en->te.Then);\r
cn->entry_count++;\r
\r
NODE_STATUS_ADD(called, CALLED);\r
- ENCLOSURE_(called)->m.entry_count++;\r
+ BAG_(called)->m.entry_count++;\r
setup_call2_call(called);\r
}\r
NODE_STATUS_REMOVE(node, MARK1);\r
r = 0;\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ if (en->type == BAG_MEMORY) {\r
if ((state & IN_ZERO_REPEAT) != 0) {\r
NODE_STATUS_ADD(node, IN_ZERO_REPEAT);\r
- ENCLOSURE_(node)->m.entry_count--;\r
+ BAG_(node)->m.entry_count--;\r
}\r
r = setup_call(NODE_BODY(node), env, state);\r
}\r
- else if (en->type == ENCLOSURE_IF_ELSE) {\r
+ else if (en->type == BAG_IF_ELSE) {\r
r = setup_call(NODE_BODY(node), env, state);\r
if (r != 0) return r;\r
if (IS_NOT_NULL(en->te.Then)) {\r
r = setup_call2(NODE_BODY(node));\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
if (! NODE_IS_IN_ZERO_REPEAT(node))\r
r = setup_call2(NODE_BODY(node));\r
\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
if (r != 0) return r;\r
- if (en->type == ENCLOSURE_IF_ELSE) {\r
+ if (en->type == BAG_IF_ELSE) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
r = setup_call2(en->te.Then);\r
if (r != 0) return r;\r
{\r
QuantNode* qn = QUANT_(node);\r
\r
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
state |= IN_REAL_REPEAT;\r
if (qn->lower != qn->upper)\r
state |= IN_VAR_REPEAT;\r
AnchorNode* an = ANCHOR_(node);\r
\r
switch (an->type) {\r
- case ANCHOR_PREC_READ_NOT:\r
- case ANCHOR_LOOK_BEHIND_NOT:\r
+ case ANCR_PREC_READ_NOT:\r
+ case ANCR_LOOK_BEHIND_NOT:\r
state |= IN_NOT;\r
/* fall */\r
- case ANCHOR_PREC_READ:\r
- case ANCHOR_LOOK_BEHIND:\r
+ case ANCR_PREC_READ:\r
+ case ANCR_LOOK_BEHIND:\r
setup_called_state_call(NODE_ANCHOR_BODY(an), state);\r
break;\r
default:\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
- if (en->type == ENCLOSURE_MEMORY) {\r
+ if (en->type == BAG_MEMORY) {\r
if (NODE_IS_MARK1(node)) {\r
if ((~en->m.called_state & state) != 0) {\r
en->m.called_state |= state;\r
NODE_STATUS_REMOVE(node, MARK1);\r
}\r
}\r
- else if (en->type == ENCLOSURE_IF_ELSE) {\r
+ else if (en->type == BAG_IF_ELSE) {\r
if (IS_NOT_NULL(en->te.Then)) {\r
setup_called_state_call(en->te.Then, state);\r
}\r
break;\r
#endif\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
switch (en->type) {\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
if (en->m.entry_count > 1)\r
state |= IN_MULTI_ENTRY;\r
\r
en->m.called_state |= state;\r
/* fall */\r
- case ENCLOSURE_OPTION:\r
- case ENCLOSURE_STOP_BACKTRACK:\r
+ case BAG_OPTION:\r
+ case BAG_STOP_BACKTRACK:\r
setup_called_state(NODE_BODY(node), state);\r
break;\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
setup_called_state(NODE_BODY(node), state);\r
if (IS_NOT_NULL(en->te.Then))\r
setup_called_state(en->te.Then, state);\r
{\r
QuantNode* qn = QUANT_(node);\r
\r
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
state |= IN_REAL_REPEAT;\r
if (qn->lower != qn->upper)\r
state |= IN_VAR_REPEAT;\r
AnchorNode* an = ANCHOR_(node);\r
\r
switch (an->type) {\r
- case ANCHOR_PREC_READ_NOT:\r
- case ANCHOR_LOOK_BEHIND_NOT:\r
+ case ANCR_PREC_READ_NOT:\r
+ case ANCR_LOOK_BEHIND_NOT:\r
state |= IN_NOT;\r
/* fall */\r
- case ANCHOR_PREC_READ:\r
- case ANCHOR_LOOK_BEHIND:\r
+ case ANCR_PREC_READ:\r
+ case ANCR_LOOK_BEHIND:\r
setup_called_state(NODE_ANCHOR_BODY(an), state);\r
break;\r
default:\r
/* allowed node types in look-behind */\r
#define ALLOWED_TYPE_IN_LB \\r
( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \\r
- | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_ENCLOSURE | NODE_BIT_QUANT \\r
+ | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \\r
| NODE_BIT_CALL | NODE_BIT_GIMMICK)\r
\r
-#define ALLOWED_ENCLOSURE_IN_LB ( 1<<ENCLOSURE_MEMORY | 1<<ENCLOSURE_OPTION )\r
-#define ALLOWED_ENCLOSURE_IN_LB_NOT (1<<ENCLOSURE_OPTION)\r
+#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_IF_ELSE )\r
+#define ALLOWED_BAG_IN_LB_NOT ( 1<<BAG_OPTION | 1<<BAG_IF_ELSE )\r
\r
#define ALLOWED_ANCHOR_IN_LB \\r
- ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \\r
- | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY | ANCHOR_NO_WORD_BOUNDARY \\r
- | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \\r
- | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \\r
- | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )\r
+ ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \\r
+ | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \\r
+ | ANCR_WORD_BEGIN | ANCR_WORD_END \\r
+ | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )\r
\r
#define ALLOWED_ANCHOR_IN_LB_NOT \\r
- ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \\r
- | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUNDARY \\r
- | ANCHOR_NO_WORD_BOUNDARY | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END \\r
- | ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \\r
- | ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )\r
+ ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \\r
+ | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \\r
+ | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \\r
+ | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )\r
\r
int r;\r
AnchorNode* an = ANCHOR_(node);\r
\r
switch (an->type) {\r
- case ANCHOR_PREC_READ:\r
+ case ANCR_PREC_READ:\r
r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
break;\r
- case ANCHOR_PREC_READ_NOT:\r
+ case ANCR_PREC_READ_NOT:\r
r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
break;\r
\r
- case ANCHOR_LOOK_BEHIND:\r
+ case ANCR_LOOK_BEHIND:\r
{\r
r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
- ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
+ ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);\r
if (r < 0) return r;\r
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);\r
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);\r
if (r != 0) return r;\r
r = setup_look_behind(node, reg, env);\r
}\r
break;\r
\r
- case ANCHOR_LOOK_BEHIND_NOT:\r
+ case ANCR_LOOK_BEHIND_NOT:\r
{\r
r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,\r
- ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
+ ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);\r
if (r < 0) return r;\r
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;\r
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);\r
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),\r
+ env);\r
if (r != 0) return r;\r
r = setup_look_behind(node, reg, env);\r
}\r
NODE_STATUS_ADD(node, IN_MULTI_ENTRY);\r
}\r
\r
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {\r
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {\r
d = tree_min_len(body, env);\r
if (d == 0) {\r
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
- qn->body_empty_info = quantifiers_memory_node_info(body);\r
- if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {\r
- if (NODE_TYPE(body) == NODE_ENCLOSURE &&\r
- ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {\r
- MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);\r
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT\r
+ qn->emptiness = quantifiers_memory_node_info(body);\r
+ if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) {\r
+ if (NODE_TYPE(body) == NODE_BAG &&\r
+ BAG_(body)->type == BAG_MEMORY) {\r
+ MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);\r
}\r
}\r
#else\r
- qn->body_empty_info = QUANT_BODY_IS_EMPTY;\r
+ qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;\r
#endif\r
}\r
}\r
\r
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)\r
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)\r
state |= IN_REAL_REPEAT;\r
if (qn->lower != qn->upper)\r
state |= IN_VAR_REPEAT;\r
/* expand string */\r
#define EXPAND_STRING_MAX_LENGTH 100\r
if (NODE_TYPE(body) == NODE_STRING) {\r
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&\r
+ if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&\r
qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {\r
int len = NODE_STRING_LEN(body);\r
StrNode* sn = STR_(body);\r
}\r
}\r
\r
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT\r
- if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {\r
+ if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {\r
if (NODE_TYPE(body) == NODE_QUANT) {\r
QuantNode* tqn = QUANT_(body);\r
if (IS_NOT_NULL(tqn->head_exact)) {\r
qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);\r
}\r
}\r
-#endif\r
\r
return r;\r
}\r
\r
/* setup_tree does the following work.\r
- 1. check empty loop. (set qn->body_empty_info)\r
+ 1. check empty loop. (set qn->emptiness)\r
2. expand ignore-case in char class.\r
3. set memory status bit flags. (reg->mem_stats)\r
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].\r
\r
case NODE_STRING:\r
if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {\r
- r = expand_case_fold_string(node, reg);\r
+ r = expand_case_fold_string(node, reg, state);\r
}\r
break;\r
\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
switch (en->type) {\r
- case ENCLOSURE_OPTION:\r
+ case BAG_OPTION:\r
{\r
OnigOptionType options = reg->options;\r
- reg->options = ENCLOSURE_(node)->o.options;\r
+ reg->options = BAG_(node)->o.options;\r
r = setup_tree(NODE_BODY(node), reg, state, env);\r
reg->options = options;\r
}\r
break;\r
\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
#ifdef USE_CALL\r
state |= en->m.called_state;\r
#endif\r
r = setup_tree(NODE_BODY(node), reg, state, env);\r
break;\r
\r
- case ENCLOSURE_STOP_BACKTRACK:\r
+ case BAG_STOP_BACKTRACK:\r
{\r
Node* target = NODE_BODY(node);\r
r = setup_tree(target, reg, state, env);\r
if (NODE_TYPE(target) == NODE_QUANT) {\r
QuantNode* tqn = QUANT_(target);\r
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&\r
+ if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&\r
tqn->greedy != 0) { /* (?>a*), a*+ etc... */\r
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))\r
- NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);\r
+ if (is_strict_real_node(NODE_BODY(target)))\r
+ NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);\r
}\r
}\r
}\r
break;\r
\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);\r
if (r != 0) return r;\r
if (IS_NOT_NULL(en->te.Then)) {\r
return r;\r
}\r
\r
-/* set skip map for Boyer-Moore search */\r
static int\r
-set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,\r
- UChar skip[], int** int_skip)\r
+set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,\r
+ UChar* s, UChar* end,\r
+ UChar skip[], int* roffset)\r
{\r
- int i, len;\r
+ int i, j, k, len, offset;\r
+ int n, clen;\r
+ UChar* p;\r
+ OnigEncoding enc;\r
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];\r
+ UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r
+\r
+ enc = reg->enc;\r
+ offset = ENC_GET_SKIP_OFFSET(enc);\r
+ if (offset == ENC_SKIP_OFFSET_1_OR_0) {\r
+ UChar* p = s;\r
+ while (1) {\r
+ len = enclen(enc, p);\r
+ if (p + len >= end) {\r
+ if (len == 1) offset = 1;\r
+ else offset = 0;\r
+ break;\r
+ }\r
+ p += len;\r
+ }\r
+ }\r
\r
len = (int )(end - s);\r
- if (len < ONIG_CHAR_TABLE_SIZE) {\r
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;\r
+ if (len + offset >= 255)\r
+ return ONIGERR_PARSER_BUG;\r
\r
- for (i = 0; i < len - 1; i++)\r
- skip[s[i]] = len - 1 - i;\r
+ *roffset = offset;\r
+\r
+ for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
+ skip[i] = (UChar )(len + offset);\r
}\r
- else {\r
- if (IS_NULL(*int_skip)) {\r
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);\r
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;\r
+\r
+ for (p = s; p < end; ) {\r
+ int z;\r
+\r
+ clen = enclen(enc, p);\r
+ if (p + clen > end) clen = (int )(end - p);\r
+\r
+ len = (int )(end - p);\r
+ for (j = 0; j < clen; j++) {\r
+ z = len - j + (offset - 1);\r
+ if (z <= 0) break;\r
+ skip[p[j]] = z;\r
+ }\r
+\r
+ if (case_expand != 0) {\r
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,\r
+ p, end, items);\r
+ for (k = 0; k < n; k++) {\r
+ ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);\r
+ for (j = 0; j < clen; j++) {\r
+ z = len - j + (offset - 1);\r
+ if (z <= 0) break;\r
+ if (skip[buf[j]] > z)\r
+ skip[buf[j]] = z;\r
+ }\r
+ }\r
}\r
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;\r
\r
- for (i = 0; i < len - 1; i++)\r
- (*int_skip)[s[i]] = len - 1 - i;\r
+ p += clen;\r
}\r
+\r
return 0;\r
}\r
\r
+\r
#define OPT_EXACT_MAXLEN 24\r
\r
+#if OPT_EXACT_MAXLEN >= 255\r
+#error Too big OPT_EXACT_MAXLEN\r
+#endif\r
+\r
typedef struct {\r
OnigLen min; /* min byte length */\r
OnigLen max; /* max byte length */\r
MinMax mmd; /* position */\r
OptAnc anc;\r
int reach_end;\r
- int ignore_case;\r
+ int case_fold;\r
+ int good_case_fold;\r
int len;\r
UChar s[OPT_EXACT_MAXLEN];\r
-} OptExact;\r
+} OptStr;\r
\r
typedef struct {\r
MinMax mmd; /* position */\r
OptAnc anc;\r
int value; /* weighted value */\r
- UChar map[ONIG_CHAR_TABLE_SIZE];\r
+ UChar map[CHAR_MAP_SIZE];\r
} OptMap;\r
\r
typedef struct {\r
- MinMax len;\r
- OptAnc anc;\r
- OptExact exb; /* boundary */\r
- OptExact exm; /* middle */\r
- OptExact expr; /* prec read (?=...) */\r
- OptMap map; /* boundary */\r
-} NodeOpt;\r
+ MinMax len;\r
+ OptAnc anc;\r
+ OptStr sb; /* boundary */\r
+ OptStr sm; /* middle */\r
+ OptStr spr; /* prec read (?=...) */\r
+ OptMap map; /* boundary */\r
+} OptNode;\r
\r
\r
static int\r
{\r
/* 1000 / (min-max-dist + 1) */\r
static const short int dist_vals[] = {\r
- 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, \r
- 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, \r
- 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, \r
- 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, \r
- 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, \r
- 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, \r
- 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, \r
- 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, \r
- 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, \r
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,\r
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,\r
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,\r
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,\r
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,\r
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,\r
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,\r
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,\r
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,\r
11, 11, 11, 11, 11, 10, 10, 10, 10, 10\r
};\r
\r
static int\r
is_equal_mml(MinMax* a, MinMax* b)\r
{\r
- return (a->min == b->min && a->max == b->max) ? 1 : 0;\r
+ return a->min == b->min && a->max == b->max;\r
}\r
\r
static void\r
to->right |= left->right;\r
}\r
else {\r
- to->right |= (left->right & ANCHOR_PREC_READ_NOT);\r
+ to->right |= (left->right & ANCR_PREC_READ_NOT);\r
}\r
}\r
\r
static int\r
is_left(int a)\r
{\r
- if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF ||\r
- a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT)\r
+ if (a == ANCR_END_BUF || a == ANCR_SEMI_END_BUF ||\r
+ a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)\r
return 0;\r
\r
return 1;\r
}\r
\r
static int\r
-is_full_opt_exact(OptExact* e)\r
+is_full_opt_exact(OptStr* e)\r
{\r
- return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0);\r
+ return e->len >= OPT_EXACT_MAXLEN;\r
}\r
\r
static void\r
-clear_opt_exact(OptExact* e)\r
+clear_opt_exact(OptStr* e)\r
{\r
clear_mml(&e->mmd);\r
clear_opt_anc_info(&e->anc);\r
- e->reach_end = 0;\r
- e->ignore_case = 0;\r
- e->len = 0;\r
- e->s[0] = '\0';\r
+ e->reach_end = 0;\r
+ e->case_fold = 0;\r
+ e->good_case_fold = 0;\r
+ e->len = 0;\r
+ e->s[0] = '\0';\r
}\r
\r
static void\r
-copy_opt_exact(OptExact* to, OptExact* from)\r
+copy_opt_exact(OptStr* to, OptStr* from)\r
{\r
*to = *from;\r
}\r
\r
static int\r
-concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc)\r
+concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)\r
{\r
int i, j, len, r;\r
UChar *p, *end;\r
OptAnc tanc;\r
\r
- if (! to->ignore_case && add->ignore_case) {\r
- if (to->len >= add->len) return 0; /* avoid */\r
+ if (add->case_fold != 0) {\r
+ if (! to->case_fold) {\r
+ if (to->len > 1 || to->len >= add->len) return 0; /* avoid */\r
\r
- to->ignore_case = 1;\r
+ to->case_fold = 1;\r
+ }\r
+ else {\r
+ if (to->good_case_fold != 0) {\r
+ if (add->good_case_fold == 0) return 0;\r
+ }\r
+ }\r
}\r
\r
r = 0;\r
}\r
\r
static void\r
-concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc)\r
+concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)\r
{\r
int i, j, len;\r
UChar *p;\r
}\r
\r
to->len = i;\r
+\r
+ if (p >= end && to->len == (int )(end - s))\r
+ to->reach_end = 1;\r
}\r
\r
static void\r
-alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env)\r
+alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)\r
{\r
int i, j, len;\r
\r
to->reach_end = 0;\r
}\r
to->len = i;\r
- to->ignore_case |= add->ignore_case;\r
+ if (add->case_fold != 0)\r
+ to->case_fold = 1;\r
+ if (add->good_case_fold == 0)\r
+ to->good_case_fold = 0;\r
\r
alt_merge_opt_anc_info(&to->anc, &add->anc);\r
if (! to->reach_end) to->anc.right = 0;\r
}\r
\r
static void\r
-select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt)\r
+select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)\r
{\r
int vn, va;\r
\r
if (alt->len > 1) va += 5;\r
}\r
\r
- if (now->ignore_case == 0) vn *= 2;\r
- if (alt->ignore_case == 0) va *= 2;\r
+ if (now->case_fold == 0) vn *= 2;\r
+ if (alt->case_fold == 0) va *= 2;\r
+\r
+ if (now->good_case_fold != 0) vn *= 4;\r
+ if (alt->good_case_fold != 0) va *= 4;\r
\r
if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)\r
copy_opt_exact(now, alt);\r
}\r
\r
static int\r
-comp_opt_exact_or_map(OptExact* e, OptMap* m)\r
+comp_opt_exact_or_map(OptStr* e, OptMap* m)\r
{\r
#define COMP_EM_BASE 20\r
int ae, am;\r
+ int case_value;\r
\r
if (m->value <= 0) return -1;\r
\r
- ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);\r
+ if (e->case_fold != 0) {\r
+ if (e->good_case_fold != 0)\r
+ case_value = 2;\r
+ else\r
+ case_value = 1;\r
+ }\r
+ else\r
+ case_value = 3;\r
+\r
+ ae = COMP_EM_BASE * e->len * case_value;\r
am = COMP_EM_BASE * 5 * 2 / m->value;\r
return comp_distance_value(&e->mmd, &m->mmd, ae, am);\r
}\r
alt_merge_mml(&to->mmd, &add->mmd);\r
\r
val = 0;\r
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
+ for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
if (add->map[i])\r
to->map[i] = 1;\r
\r
}\r
\r
static void\r
-set_bound_node_opt_info(NodeOpt* opt, MinMax* plen)\r
+set_bound_node_opt_info(OptNode* opt, MinMax* plen)\r
{\r
- copy_mml(&(opt->exb.mmd), plen);\r
- copy_mml(&(opt->expr.mmd), plen);\r
- copy_mml(&(opt->map.mmd), plen);\r
+ copy_mml(&(opt->sb.mmd), plen);\r
+ copy_mml(&(opt->spr.mmd), plen);\r
+ copy_mml(&(opt->map.mmd), plen);\r
}\r
\r
static void\r
-clear_node_opt_info(NodeOpt* opt)\r
+clear_node_opt_info(OptNode* opt)\r
{\r
clear_mml(&opt->len);\r
clear_opt_anc_info(&opt->anc);\r
- clear_opt_exact(&opt->exb);\r
- clear_opt_exact(&opt->exm);\r
- clear_opt_exact(&opt->expr);\r
+ clear_opt_exact(&opt->sb);\r
+ clear_opt_exact(&opt->sm);\r
+ clear_opt_exact(&opt->spr);\r
clear_opt_map(&opt->map);\r
}\r
\r
static void\r
-copy_node_opt_info(NodeOpt* to, NodeOpt* from)\r
+copy_node_opt_info(OptNode* to, OptNode* from)\r
{\r
- xmemcpy(to,from,sizeof(NodeOpt));\r
+ xmemcpy(to,from,sizeof(OptNode));\r
}\r
\r
static void\r
-concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add)\r
+concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)\r
{\r
- int exb_reach, exm_reach;\r
+ int sb_reach, sm_reach;\r
OptAnc tanc;\r
\r
concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);\r
copy_opt_anc_info(&to->anc, &tanc);\r
\r
- if (add->exb.len > 0 && to->len.max == 0) {\r
- concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max);\r
- copy_opt_anc_info(&add->exb.anc, &tanc);\r
+ if (add->sb.len > 0 && to->len.max == 0) {\r
+ concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);\r
+ copy_opt_anc_info(&add->sb.anc, &tanc);\r
}\r
\r
if (add->map.value > 0 && to->len.max == 0) {\r
add->map.anc.left |= to->anc.left;\r
}\r
\r
- exb_reach = to->exb.reach_end;\r
- exm_reach = to->exm.reach_end;\r
+ sb_reach = to->sb.reach_end;\r
+ sm_reach = to->sm.reach_end;\r
\r
if (add->len.max != 0)\r
- to->exb.reach_end = to->exm.reach_end = 0;\r
+ to->sb.reach_end = to->sm.reach_end = 0;\r
\r
- if (add->exb.len > 0) {\r
- if (exb_reach) {\r
- concat_opt_exact(&to->exb, &add->exb, enc);\r
- clear_opt_exact(&add->exb);\r
+ if (add->sb.len > 0) {\r
+ if (sb_reach) {\r
+ concat_opt_exact(&to->sb, &add->sb, enc);\r
+ clear_opt_exact(&add->sb);\r
}\r
- else if (exm_reach) {\r
- concat_opt_exact(&to->exm, &add->exb, enc);\r
- clear_opt_exact(&add->exb);\r
+ else if (sm_reach) {\r
+ concat_opt_exact(&to->sm, &add->sb, enc);\r
+ clear_opt_exact(&add->sb);\r
}\r
}\r
- select_opt_exact(enc, &to->exm, &add->exb);\r
- select_opt_exact(enc, &to->exm, &add->exm);\r
+ select_opt_exact(enc, &to->sm, &add->sb);\r
+ select_opt_exact(enc, &to->sm, &add->sm);\r
\r
- if (to->expr.len > 0) {\r
+ if (to->spr.len > 0) {\r
if (add->len.max > 0) {\r
- if (to->expr.len > (int )add->len.max)\r
- to->expr.len = add->len.max;\r
+ if (to->spr.len > (int )add->len.max)\r
+ to->spr.len = add->len.max;\r
\r
- if (to->expr.mmd.max == 0)\r
- select_opt_exact(enc, &to->exb, &to->expr);\r
+ if (to->spr.mmd.max == 0)\r
+ select_opt_exact(enc, &to->sb, &to->spr);\r
else\r
- select_opt_exact(enc, &to->exm, &to->expr);\r
+ select_opt_exact(enc, &to->sm, &to->spr);\r
}\r
}\r
- else if (add->expr.len > 0) {\r
- copy_opt_exact(&to->expr, &add->expr);\r
+ else if (add->spr.len > 0) {\r
+ copy_opt_exact(&to->spr, &add->spr);\r
}\r
\r
select_opt_map(&to->map, &add->map);\r
}\r
\r
static void\r
-alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env)\r
+alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)\r
{\r
alt_merge_opt_anc_info(&to->anc, &add->anc);\r
- alt_merge_opt_exact(&to->exb, &add->exb, env);\r
- alt_merge_opt_exact(&to->exm, &add->exm, env);\r
- alt_merge_opt_exact(&to->expr, &add->expr, env);\r
+ alt_merge_opt_exact(&to->sb, &add->sb, env);\r
+ alt_merge_opt_exact(&to->sm, &add->sm, env);\r
+ alt_merge_opt_exact(&to->spr, &add->spr, env);\r
alt_merge_opt_map(env->enc, &to->map, &add->map);\r
\r
alt_merge_mml(&to->len, &add->len);\r
#define MAX_NODE_OPT_INFO_REF_COUNT 5\r
\r
static int\r
-optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env)\r
+optimize_nodes(Node* node, OptNode* opt, OptEnv* env)\r
{\r
int i;\r
int r;\r
- NodeOpt xo;\r
+ OptNode xo;\r
OnigEncoding enc;\r
\r
r = 0;\r
/* int is_raw = NODE_STRING_IS_RAW(node); */\r
\r
if (! NODE_STRING_IS_AMBIG(node)) {\r
- concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);\r
+ concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);\r
if (slen > 0) {\r
add_char_opt_map(&opt->map, *(sn->s), enc);\r
}\r
max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;\r
}\r
else {\r
- concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc);\r
- opt->exb.ignore_case = 1;\r
+ concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);\r
+ opt->sb.case_fold = 1;\r
+ if (NODE_STRING_IS_GOOD_AMBIG(node))\r
+ opt->sb.good_case_fold = 1;\r
\r
if (slen > 0) {\r
r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,\r
\r
set_mml(&opt->len, slen, max);\r
}\r
-\r
- if (opt->exb.len == slen)\r
- opt->exb.reach_end = 1;\r
}\r
break;\r
\r
\r
case NODE_ANCHOR:\r
switch (ANCHOR_(node)->type) {\r
- case ANCHOR_BEGIN_BUF:\r
- case ANCHOR_BEGIN_POSITION:\r
- case ANCHOR_BEGIN_LINE:\r
- case ANCHOR_END_BUF:\r
- case ANCHOR_SEMI_END_BUF:\r
- case ANCHOR_END_LINE:\r
- case ANCHOR_PREC_READ_NOT:\r
- case ANCHOR_LOOK_BEHIND:\r
+ case ANCR_BEGIN_BUF:\r
+ case ANCR_BEGIN_POSITION:\r
+ case ANCR_BEGIN_LINE:\r
+ case ANCR_END_BUF:\r
+ case ANCR_SEMI_END_BUF:\r
+ case ANCR_END_LINE:\r
+ case ANCR_PREC_READ_NOT:\r
+ case ANCR_LOOK_BEHIND:\r
add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);\r
break;\r
\r
- case ANCHOR_PREC_READ:\r
+ case ANCR_PREC_READ:\r
{\r
r = optimize_nodes(NODE_BODY(node), &xo, env);\r
if (r == 0) {\r
- if (xo.exb.len > 0)\r
- copy_opt_exact(&opt->expr, &xo.exb);\r
- else if (xo.exm.len > 0)\r
- copy_opt_exact(&opt->expr, &xo.exm);\r
+ if (xo.sb.len > 0)\r
+ copy_opt_exact(&opt->spr, &xo.sb);\r
+ else if (xo.sm.len > 0)\r
+ copy_opt_exact(&opt->spr, &xo.sm);\r
\r
- opt->expr.reach_end = 0;\r
+ opt->spr.reach_end = 0;\r
\r
if (xo.map.value > 0)\r
copy_opt_map(&opt->map, &xo.map);\r
}\r
break;\r
\r
- case ANCHOR_LOOK_BEHIND_NOT:\r
+ case ANCR_LOOK_BEHIND_NOT:\r
break;\r
}\r
break;\r
set_mml(&opt->len, 0, INFINITE_LEN);\r
else {\r
OnigOptionType save = env->options;\r
- env->options = ENCLOSURE_(NODE_BODY(node))->o.options;\r
+ env->options = BAG_(NODE_BODY(node))->o.options;\r
r = optimize_nodes(NODE_BODY(node), opt, env);\r
env->options = save;\r
}\r
\r
if (qn->lower > 0) {\r
copy_node_opt_info(opt, &xo);\r
- if (xo.exb.len > 0) {\r
- if (xo.exb.reach_end) {\r
- for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) {\r
- int rc = concat_opt_exact(&opt->exb, &xo.exb, enc);\r
+ if (xo.sb.len > 0) {\r
+ if (xo.sb.reach_end) {\r
+ for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {\r
+ int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);\r
if (rc > 0) break;\r
}\r
- if (i < qn->lower) opt->exb.reach_end = 0;\r
+ if (i < qn->lower) opt->sb.reach_end = 0;\r
}\r
}\r
\r
if (qn->lower != qn->upper) {\r
- opt->exb.reach_end = 0;\r
- opt->exm.reach_end = 0;\r
+ opt->sb.reach_end = 0;\r
+ opt->sm.reach_end = 0;\r
}\r
if (qn->lower > 1)\r
- opt->exm.reach_end = 0;\r
+ opt->sm.reach_end = 0;\r
}\r
\r
- if (IS_REPEAT_INFINITE(qn->upper)) {\r
+ if (IS_INFINITE_REPEAT(qn->upper)) {\r
if (env->mmd.max == 0 &&\r
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {\r
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))\r
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML);\r
+ add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);\r
else\r
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF);\r
+ add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);\r
}\r
\r
max = (xo.len.max > 0 ? INFINITE_LEN : 0);\r
}\r
break;\r
\r
- case NODE_ENCLOSURE:\r
+ case NODE_BAG:\r
{\r
- EnclosureNode* en = ENCLOSURE_(node);\r
+ BagNode* en = BAG_(node);\r
\r
switch (en->type) {\r
- case ENCLOSURE_OPTION:\r
+ case BAG_OPTION:\r
{\r
OnigOptionType save = env->options;\r
\r
}\r
break;\r
\r
- case ENCLOSURE_MEMORY:\r
+ case BAG_MEMORY:\r
#ifdef USE_CALL\r
en->opt_count++;\r
if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {\r
#endif\r
{\r
r = optimize_nodes(NODE_BODY(node), opt, env);\r
- if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) {\r
+ if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {\r
if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))\r
- remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK);\r
+ remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);\r
}\r
}\r
break;\r
\r
- case ENCLOSURE_STOP_BACKTRACK:\r
+ case BAG_STOP_BACKTRACK:\r
r = optimize_nodes(NODE_BODY(node), opt, env);\r
break;\r
\r
- case ENCLOSURE_IF_ELSE:\r
+ case BAG_IF_ELSE:\r
{\r
OptEnv nenv;\r
\r
copy_opt_env(&nenv, env);\r
- r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv);\r
+ r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);\r
if (r == 0) {\r
add_mml(&nenv.mmd, &xo.len);\r
concat_left_node_opt_info(enc, opt, &xo);\r
}\r
\r
static int\r
-set_optimize_exact(regex_t* reg, OptExact* e)\r
+set_optimize_exact(regex_t* reg, OptStr* e)\r
{\r
int r;\r
\r
if (e->len == 0) return 0;\r
\r
- if (e->ignore_case) {\r
- reg->exact = (UChar* )xmalloc(e->len);\r
- CHECK_NULL_RETURN_MEMERR(reg->exact);\r
- xmemcpy(reg->exact, e->s, e->len);\r
- reg->exact_end = reg->exact + e->len;\r
- reg->optimize = OPTIMIZE_EXACT_IC;\r
+ reg->exact = (UChar* )xmalloc(e->len);\r
+ CHECK_NULL_RETURN_MEMERR(reg->exact);\r
+ xmemcpy(reg->exact, e->s, e->len);\r
+ reg->exact_end = reg->exact + e->len;\r
+\r
+ if (e->case_fold) {\r
+ reg->optimize = OPTIMIZE_STR_CASE_FOLD;\r
+ if (e->good_case_fold != 0) {\r
+ if (e->len >= 2) {\r
+ r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,\r
+ reg->exact, reg->exact_end,\r
+ reg->map, &(reg->map_offset));\r
+ if (r != 0) return r;\r
+ reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;\r
+ }\r
+ }\r
}\r
else {\r
int allow_reverse;\r
\r
- reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len);\r
- CHECK_NULL_RETURN_MEMERR(reg->exact);\r
- reg->exact_end = reg->exact + e->len;\r
- \r
allow_reverse =\r
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);\r
\r
- if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {\r
- r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,\r
- reg->map, &(reg->int_map));\r
+ if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {\r
+ r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,\r
+ reg->exact, reg->exact_end,\r
+ reg->map, &(reg->map_offset));\r
if (r != 0) return r;\r
\r
reg->optimize = (allow_reverse != 0\r
- ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV);\r
+ ? OPTIMIZE_STR_FAST\r
+ : OPTIMIZE_STR_FAST_STEP_FORWARD);\r
}\r
else {\r
- reg->optimize = OPTIMIZE_EXACT;\r
+ reg->optimize = OPTIMIZE_STR;\r
}\r
}\r
\r
{\r
int i;\r
\r
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
+ for (i = 0; i < CHAR_MAP_SIZE; i++)\r
reg->map[i] = m->map[i];\r
\r
reg->optimize = OPTIMIZE_MAP;\r
static void\r
set_sub_anchor(regex_t* reg, OptAnc* anc)\r
{\r
- reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE;\r
- reg->sub_anchor |= anc->right & ANCHOR_END_LINE;\r
+ reg->sub_anchor |= anc->left & ANCR_BEGIN_LINE;\r
+ reg->sub_anchor |= anc->right & ANCR_END_LINE;\r
}\r
\r
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)\r
{\r
int r;\r
- NodeOpt opt;\r
+ OptNode opt;\r
OptEnv env;\r
\r
env.enc = reg->enc;\r
r = optimize_nodes(node, &opt, &env);\r
if (r != 0) return r;\r
\r
- reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF |\r
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML |\r
- ANCHOR_LOOK_BEHIND);\r
+ reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |\r
+ ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |\r
+ ANCR_LOOK_BEHIND);\r
\r
- if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)\r
- reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML;\r
+ if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)\r
+ reg->anchor &= ~ANCR_ANYCHAR_INF_ML;\r
\r
- reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |\r
- ANCHOR_PREC_READ_NOT);\r
+ reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |\r
+ ANCR_PREC_READ_NOT);\r
\r
- if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {\r
+ if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {\r
reg->anchor_dmin = opt.len.min;\r
reg->anchor_dmax = opt.len.max;\r
}\r
\r
- if (opt.exb.len > 0 || opt.exm.len > 0) {\r
- select_opt_exact(reg->enc, &opt.exb, &opt.exm);\r
- if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) {\r
+ if (opt.sb.len > 0 || opt.sm.len > 0) {\r
+ select_opt_exact(reg->enc, &opt.sb, &opt.sm);\r
+ if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {\r
goto set_map;\r
}\r
else {\r
- r = set_optimize_exact(reg, &opt.exb);\r
- set_sub_anchor(reg, &opt.exb.anc);\r
+ r = set_optimize_exact(reg, &opt.sb);\r
+ set_sub_anchor(reg, &opt.sb.anc);\r
}\r
}\r
else if (opt.map.value > 0) {\r
set_sub_anchor(reg, &opt.map.anc);\r
}\r
else {\r
- reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE;\r
+ reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;\r
if (opt.len.max == 0)\r
- reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE;\r
+ reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;\r
}\r
\r
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)\r
reg->anchor_dmax = 0;\r
reg->sub_anchor = 0;\r
reg->exact_end = (UChar* )NULL;\r
+ reg->map_offset = 0;\r
reg->threshold_len = 0;\r
if (IS_NOT_NULL(reg->exact)) {\r
xfree(reg->exact);\r
\r
fprintf(f, "[");\r
\r
- if (anchor & ANCHOR_BEGIN_BUF) {\r
+ if (anchor & ANCR_BEGIN_BUF) {\r
fprintf(f, "begin-buf");\r
q = 1;\r
}\r
- if (anchor & ANCHOR_BEGIN_LINE) {\r
+ if (anchor & ANCR_BEGIN_LINE) {\r
if (q) fprintf(f, ", ");\r
q = 1;\r
fprintf(f, "begin-line");\r
}\r
- if (anchor & ANCHOR_BEGIN_POSITION) {\r
+ if (anchor & ANCR_BEGIN_POSITION) {\r
if (q) fprintf(f, ", ");\r
q = 1;\r
fprintf(f, "begin-pos");\r
}\r
- if (anchor & ANCHOR_END_BUF) {\r
+ if (anchor & ANCR_END_BUF) {\r
if (q) fprintf(f, ", ");\r
q = 1;\r
fprintf(f, "end-buf");\r
}\r
- if (anchor & ANCHOR_SEMI_END_BUF) {\r
+ if (anchor & ANCR_SEMI_END_BUF) {\r
if (q) fprintf(f, ", ");\r
q = 1;\r
fprintf(f, "semi-end-buf");\r
}\r
- if (anchor & ANCHOR_END_LINE) {\r
+ if (anchor & ANCR_END_LINE) {\r
if (q) fprintf(f, ", ");\r
q = 1;\r
fprintf(f, "end-line");\r
}\r
- if (anchor & ANCHOR_ANYCHAR_INF) {\r
+ if (anchor & ANCR_ANYCHAR_INF) {\r
if (q) fprintf(f, ", ");\r
q = 1;\r
fprintf(f, "anychar-inf");\r
}\r
- if (anchor & ANCHOR_ANYCHAR_INF_ML) {\r
+ if (anchor & ANCR_ANYCHAR_INF_ML) {\r
if (q) fprintf(f, ", ");\r
fprintf(f, "anychar-inf-ml");\r
}\r
static void\r
print_optimize_info(FILE* f, regex_t* reg)\r
{\r
- static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",\r
- "EXACT_IC", "MAP" };\r
+ static const char* on[] = { "NONE", "STR",\r
+ "STR_FAST", "STR_FAST_STEP_FORWARD",\r
+ "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };\r
\r
fprintf(f, "optimize: %s\n", on[reg->optimize]);\r
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);\r
- if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)\r
+ if ((reg->anchor & ANCR_END_BUF_MASK) != 0)\r
print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);\r
fprintf(f, "\n");\r
\r
else if (reg->optimize & OPTIMIZE_MAP) {\r
int c, i, n = 0;\r
\r
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r
+ for (i = 0; i < CHAR_MAP_SIZE; i++)\r
if (reg->map[i]) n++;\r
\r
fprintf(f, "map: n=%d\n", n);\r
if (n > 0) {\r
c = 0;\r
fputc('[', f);\r
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {\r
+ for (i = 0; i < CHAR_MAP_SIZE; i++) {\r
if (reg->map[i] != 0) {\r
if (c > 0) fputs(", ", f);\r
c++;\r
extern RegexExt*\r
onig_get_regex_ext(regex_t* reg)\r
{\r
- if (IS_NULL(REG_EXTP(reg))) {\r
+ if (IS_NULL(reg->extp)) {\r
RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));\r
if (IS_NULL(ext)) return 0;\r
\r
ext->callout_list = 0;\r
#endif\r
\r
- REG_EXTPL(reg) = (void* )ext;\r
+ reg->extp = ext;\r
}\r
\r
- return REG_EXTP(reg);\r
+ return reg->extp;\r
}\r
\r
static void\r
return ONIG_NORMAL;\r
}\r
\r
-\r
extern void\r
onig_free_body(regex_t* reg)\r
{\r
if (IS_NOT_NULL(reg)) {\r
- if (IS_NOT_NULL(reg->p)) xfree(reg->p);\r
+ ops_free(reg);\r
+ if (IS_NOT_NULL(reg->string_pool)) {\r
+ xfree(reg->string_pool);\r
+ reg->string_pool_end = reg->string_pool = 0;\r
+ }\r
if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);\r
- if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);\r
- if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);\r
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);\r
- if (IS_NOT_NULL(REG_EXTP(reg))) {\r
- free_regex_ext(REG_EXTP(reg));\r
- REG_EXTPL(reg) = 0;\r
+ if (IS_NOT_NULL(reg->extp)) {\r
+ free_regex_ext(reg->extp);\r
+ reg->extp = 0;\r
}\r
\r
onig_names_free(reg);\r
}\r
}\r
\r
-#define REGEX_TRANSFER(to,from) do {\\r
- onig_free_body(to);\\r
- xmemcpy(to, from, sizeof(regex_t));\\r
- xfree(from);\\r
-} while (0)\r
-\r
-extern void\r
-onig_transfer(regex_t* to, regex_t* from)\r
-{\r
- REGEX_TRANSFER(to, from);\r
-}\r
-\r
\r
#ifdef ONIG_DEBUG_PARSE\r
static void print_tree P_((FILE* f, Node* node));\r
#endif\r
\r
+extern int onig_init_for_match_at(regex_t* reg);\r
+\r
extern int\r
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,\r
OnigErrorInfo* einfo)\r
{\r
-#define COMPILE_INIT_SIZE 20\r
-\r
- int r, init_size;\r
+ int r;\r
Node* root;\r
ScanEnv scan_env;\r
#ifdef USE_CALL\r
print_enc_string(stderr, reg->enc, pattern, pattern_end);\r
#endif\r
\r
- if (reg->alloc == 0) {\r
- init_size = (int )(pattern_end - pattern) * 2;\r
- if (init_size <= 0) init_size = COMPILE_INIT_SIZE;\r
- r = BB_INIT(reg, init_size);\r
+ if (reg->ops_alloc == 0) {\r
+ r = ops_init(reg, OPS_INIT_SIZE);\r
if (r != 0) goto end;\r
}\r
else\r
- reg->used = 0;\r
+ reg->ops_used = 0;\r
\r
+ reg->string_pool = 0;\r
+ reg->string_pool_end = 0;\r
reg->num_mem = 0;\r
reg->num_repeat = 0;\r
reg->num_null_check = 0;\r
r = compile_tree(root, reg, &scan_env);\r
if (r == 0) {\r
if (scan_env.keep_num > 0) {\r
- r = add_opcode(reg, OP_UPDATE_VAR);\r
- if (r != 0) goto err;\r
- r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);\r
- if (r != 0) goto err;\r
- r = add_mem_num(reg, 0 /* not used */);\r
+ r = add_op(reg, OP_UPDATE_VAR);\r
if (r != 0) goto err;\r
+\r
+ COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;\r
+ COP(reg)->update_var.id = 0; /* not used */\r
}\r
\r
- r = add_opcode(reg, OP_END);\r
+ r = add_op(reg, OP_END);\r
+ if (r != 0) goto err;\r
+\r
#ifdef USE_CALL\r
if (scan_env.num_call > 0) {\r
r = fix_unset_addr_list(&uslist, reg);\r
\r
if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)\r
#ifdef USE_CALLOUT\r
- || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0)\r
+ || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)\r
#endif\r
)\r
reg->stack_pop_level = STACK_POP_LEVEL_ALL;\r
else\r
reg->stack_pop_level = STACK_POP_LEVEL_FREE;\r
}\r
+\r
+ r = ops_make_string_pool(reg);\r
+ if (r != 0) goto err;\r
}\r
#ifdef USE_CALL\r
else if (scan_env.num_call > 0) {\r
onig_print_compiled_byte_code_list(stderr, reg);\r
#endif\r
\r
+#ifdef USE_DIRECT_THREADED_CODE\r
+ /* opcode -> opaddr */\r
+ onig_init_for_match_at(reg);\r
+#endif\r
+\r
end:\r
return r;\r
\r
(reg)->syntax = syntax;\r
(reg)->optimize = 0;\r
(reg)->exact = (UChar* )NULL;\r
- (reg)->int_map = (int* )NULL;\r
- (reg)->int_map_backward = (int* )NULL;\r
- REG_EXTPL(reg) = NULL;\r
+ (reg)->extp = (RegexExt* )NULL;\r
\r
- (reg)->p = (UChar* )NULL;\r
- (reg)->alloc = 0;\r
- (reg)->used = 0;\r
+ (reg)->ops = (Operation* )NULL;\r
+ (reg)->ops_curr = (Operation* )NULL;\r
+ (reg)->ops_used = 0;\r
+ (reg)->ops_alloc = 0;\r
(reg)->name_table = (void* )NULL;\r
\r
(reg)->case_fold_flag = case_fold_flag;\r
found = 0;\r
}\r
else {\r
- found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);\r
+ found = onig_is_in_code_range(cc->mbuf->p, code) != 0;\r
}\r
}\r
else {\r
- found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);\r
+ found = BITSET_AT(cc->bs, code) != 0;\r
}\r
\r
if (IS_NCCLASS_NOT(cc))\r
}\r
else {\r
len = ONIGENC_CODE_TO_MBCLEN(enc, code);\r
+ if (len < 0) return 0;\r
}\r
return onig_is_code_in_cc_len(len, code, cc);\r
}\r
break;\r
\r
case NODE_STRING:\r
- fprintf(f, "<string%s:%p>", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node);\r
- for (p = STR_(node)->s; p < STR_(node)->end; p++) {\r
- if (*p >= 0x20 && *p < 0x7f)\r
- fputc(*p, f);\r
- else {\r
- fprintf(f, " 0x%02x", *p);\r
+ {\r
+ char* mode;\r
+ char* dont;\r
+ char* good;\r
+\r
+ if (NODE_STRING_IS_RAW(node))\r
+ mode = "-raw";\r
+ else if (NODE_STRING_IS_AMBIG(node))\r
+ mode = "-ambig";\r
+ else\r
+ mode = "";\r
+\r
+ if (NODE_STRING_IS_GOOD_AMBIG(node))\r
+ good = "-good";\r
+ else\r
+ good = "";\r
+\r
+ if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))\r
+ dont = " (dont-opt)";\r
+ else\r
+ dont = "";\r
+\r
+ fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);\r
+ for (p = STR_(node)->s; p < STR_(node)->end; p++) {\r
+ if (*p >= 0x20 && *p < 0x7f)\r
+ fputc(*p, f);\r
+ else {\r
+ fprintf(f, " 0x%02x", *p);\r
+ }\r
}\r
}\r
break;\r
case NODE_ANCHOR:\r
fprintf(f, "<anchor:%p> ", node);\r
switch (ANCHOR_(node)->type) {\r
- case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;\r
- case ANCHOR_END_BUF: fputs("end buf", f); break;\r
- case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;\r
- case ANCHOR_END_LINE: fputs("end line", f); break;\r
- case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
- case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;\r
-\r
- case ANCHOR_WORD_BOUNDARY: fputs("word boundary", f); break;\r
- case ANCHOR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;\r
+ case ANCR_BEGIN_BUF: fputs("begin buf", f); break;\r
+ case ANCR_END_BUF: fputs("end buf", f); break;\r
+ case ANCR_BEGIN_LINE: fputs("begin line", f); break;\r
+ case ANCR_END_LINE: fputs("end line", f); break;\r
+ case ANCR_SEMI_END_BUF: fputs("semi end buf", f); break;\r
+ case ANCR_BEGIN_POSITION: fputs("begin position", f); break;\r
+\r
+ case ANCR_WORD_BOUNDARY: fputs("word boundary", f); break;\r
+ case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;\r
#ifdef USE_WORD_BEGIN_END\r
- case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;\r
- case ANCHOR_WORD_END: fputs("word end", f); break;\r
+ case ANCR_WORD_BEGIN: fputs("word begin", f); break;\r
+ case ANCR_WORD_END: fputs("word end", f); break;\r
#endif\r
- case ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
- fputs("extended-grapheme-cluster boundary", f); break;\r
- case ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:\r
- fputs("no-extended-grapheme-cluster boundary", f); break;\r
- case ANCHOR_PREC_READ:\r
+ case ANCR_TEXT_SEGMENT_BOUNDARY:\r
+ fputs("text-segment boundary", f); break;\r
+ case ANCR_NO_TEXT_SEGMENT_BOUNDARY:\r
+ fputs("no text-segment boundary", f); break;\r
+ case ANCR_PREC_READ:\r
fprintf(f, "prec read\n");\r
print_indent_tree(f, NODE_BODY(node), indent + add);\r
break;\r
- case ANCHOR_PREC_READ_NOT:\r
+ case ANCR_PREC_READ_NOT:\r
fprintf(f, "prec read not\n");\r
print_indent_tree(f, NODE_BODY(node), indent + add);\r
break;\r
- case ANCHOR_LOOK_BEHIND:\r
+ case ANCR_LOOK_BEHIND:\r
fprintf(f, "look behind\n");\r
print_indent_tree(f, NODE_BODY(node), indent + add);\r
break;\r
- case ANCHOR_LOOK_BEHIND_NOT:\r
+ case ANCR_LOOK_BEHIND_NOT:\r
fprintf(f, "look behind not\n");\r
print_indent_tree(f, NODE_BODY(node), indent + add);\r
break;\r
print_indent_tree(f, NODE_BODY(node), indent + add);\r
break;\r
\r
- case NODE_ENCLOSURE:\r
- fprintf(f, "<enclosure:%p> ", node);\r
- switch (ENCLOSURE_(node)->type) {\r
- case ENCLOSURE_OPTION:\r
- fprintf(f, "option:%d", ENCLOSURE_(node)->o.options);\r
+ case NODE_BAG:\r
+ fprintf(f, "<bag:%p> ", node);\r
+ switch (BAG_(node)->type) {\r
+ case BAG_OPTION:\r
+ fprintf(f, "option:%d", BAG_(node)->o.options);\r
break;\r
- case ENCLOSURE_MEMORY:\r
- fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum);\r
+ case BAG_MEMORY:\r
+ fprintf(f, "memory:%d", BAG_(node)->m.regnum);\r
break;\r
- case ENCLOSURE_STOP_BACKTRACK:\r
+ case BAG_STOP_BACKTRACK:\r
fprintf(f, "stop-bt");\r
break;\r
-\r
- default:\r
+ case BAG_IF_ELSE:\r
+ fprintf(f, "if-else");\r
break;\r
}\r
fprintf(f, "\n");\r
case GIMMICK_FAIL:\r
fprintf(f, "fail");\r
break;\r
- case GIMMICK_KEEP:\r
- fprintf(f, "keep:%d", GIMMICK_(node)->id);\r
- break;\r
case GIMMICK_SAVE:\r
fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);\r
break;\r
}\r
\r
if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&\r
- type != NODE_ENCLOSURE)\r
+ type != NODE_BAG)\r
fprintf(f, "\n");\r
fflush(f);\r
}\r