X-Git-Url: https://git.proxmox.com/?p=mirror_edk2.git;a=blobdiff_plain;f=MdeModulePkg%2FUniversal%2FRegularExpressionDxe%2FOniguruma%2Fregexec.c;h=7b0fda07f8966d0a43fc352b2f2fdc39a97286c1;hp=a175f3a6cc499c100d7cf44a32d15ea4e2298d26;hb=b602265d559b2f2ade4d09ba55652c23922c141f;hpb=a364928195e911c2650fcae6bd34cfd501df1f57 diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c index a175f3a6cc..7b0fda07f8 100644 --- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c +++ b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c @@ -2,11 +2,9 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * - * (C) Copyright 2015 Hewlett Packard Enterprise Development LP
- * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -28,10 +26,10 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ - #include "regint.h" -#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \ + ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end)) #ifdef USE_CRNL_AS_LINE_TERMINATOR #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ @@ -39,6 +37,586 @@ ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif +#define CHECK_INTERRUPT_IN_MATCH + +#ifdef USE_CALLOUT +typedef struct { + int last_match_at_call_counter; + struct { + OnigType type; + OnigValue val; + } slot[ONIG_CALLOUT_DATA_SLOT_NUM]; +} CalloutData; +#endif + +struct OnigMatchParamStruct { + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; +#ifdef USE_CALLOUT + OnigCalloutFunc progress_callout_of_contents; + OnigCalloutFunc retraction_callout_of_contents; + int match_at_call_counter; + void* callout_user_data; + CalloutData* callout_data; + int callout_data_alloc_num; +#endif +}; + +extern int +onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param, + unsigned int limit) +{ + param->match_stack_limit = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param, + unsigned long limit) +{ + param->retry_limit_in_match = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ +#ifdef USE_CALLOUT + param->progress_callout_of_contents = f; + return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +extern int +onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ +#ifdef USE_CALLOUT + param->retraction_callout_of_contents = f; + return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +extern int +onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data) +{ +#ifdef USE_CALLOUT + param->callout_user_data = user_data; + return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + + + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + int ptr_num; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; + OnigMatchParam* mp; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +} MatchArg; + + +#ifdef ONIG_DEBUG + +/* arguments type */ +typedef enum { + ARG_SPECIAL = -1, + ARG_NON = 0, + ARG_RELADDR = 1, + ARG_ABSADDR = 2, + ARG_LENGTH = 3, + ARG_MEMNUM = 4, + ARG_OPTION = 5, + ARG_MODE = 6 +} OpArgType; + +typedef struct { + short int opcode; + char* name; + OpArgType arg_type; +} OpInfoType; + +static OpInfoType OpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, +#ifdef USE_OP_CCLASS_NODE + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, +#endif + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_WORD_ASCII, "word-ascii", ARG_NON }, + { OP_NO_WORD, "not-word", ARG_NON }, + { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON }, + { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE }, + { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE }, + { OP_WORD_BEGIN, "word-begin", ARG_MODE }, + { OP_WORD_END, "word-end", ARG_MODE }, + { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON }, + { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, + { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, + { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, + { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_PUSH_SUPER, "push-super", ARG_RELADDR }, + { OP_POP_OUT, "pop-out", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, + { OP_PREC_READ_START, "push-pos", ARG_NON }, + { OP_PREC_READ_END, "pop-pos", ARG_NON }, + { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, + { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, + { OP_ATOMIC_START, "atomic-start", ARG_NON }, + { OP_ATOMIC_END, "atomic-end", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, + { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, +#ifdef USE_CALLOUT + { OP_CALLOUT_CONTENTS, "callout-contents", ARG_SPECIAL }, + { OP_CALLOUT_NAME, "callout-name", ARG_SPECIAL }, +#endif + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +static void +p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) +{ + RelAddrType curr = (RelAddrType )(p - start); + + fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); +} + +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, + OnigEncoding enc) +{ + int i, n; + OpArgType arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + OnigCodePoint code; + OnigOptionType option; + ModeType mode; + UChar *q; + + fprintf(f, "%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fputc(':', f); + p_rel_addr(f, addr, bp, start); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":{/%d}", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + case ARG_MODE: + mode = *((ModeType* )bp); + bp += SIZE_MODE; + fprintf(f, ":%d", mode); + break; + default: + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + +#ifdef USE_OP_CCLASS_NODE + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%p:%d", cc, n); + } + break; +#endif + + case OP_BACKREF_N_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + case OP_BACKREF_CHECK: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_WITH_LEVEL: + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + /* fall */ + case OP_BACKREF_CHECK_WITH_LEVEL: + { + LengthType level; + + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fputc(':', f); + p_rel_addr(f, addr, bp, start); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_LOOK_BEHIND_NOT_START: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:", len); + p_rel_addr(f, addr, bp, start); + break; + + case OP_PUSH_SAVE_VAL: + { + SaveType type; + GET_SAVE_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + + case OP_UPDATE_VAR: + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: + { + GET_MEMNUM_INC(mem, bp); /* number */ + fprintf(f, ":%d", mem); + } + break; + + case OP_CALLOUT_NAME: + { + int id; + + GET_MEMNUM_INC(id, bp); /* id */ + GET_MEMNUM_INC(mem, bp); /* number */ + + fprintf(f, ":%d:%d", id, mem); + } + break; +#endif + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); + } + } + if (nextp) *nextp = bp; +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_COMPILE +extern void +onig_print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + UChar* bp; + UChar* start = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", + reg->bt_mem_start, reg->bt_mem_end); + fprintf(f, "code-length: %d\n", reg->used); + + bp = start; + while (bp < end) { + int pos = bp - start; + + fprintf(f, "%4d: ", pos); + onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} +#endif + + #ifdef USE_CAPTURE_HISTORY static void history_tree_free(OnigCaptureTreeNode* node); @@ -243,16 +821,15 @@ onig_region_new(void) OnigRegion* r; r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); - if (r != NULL) { - onig_region_init(r); - } + CHECK_NULL_RETURN(r); + onig_region_init(r); return r; } extern void onig_region_free(OnigRegion* r, int free_self) { - if (r) { + if (r != 0) { if (r->allocated > 0) { if (r->beg) xfree(r->beg); if (r->end) xfree(r->end); @@ -276,20 +853,20 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) if (to->allocated == 0) { if (from->num_regs > 0) { to->beg = (int* )xmalloc(RREGC_SIZE); + if (IS_NULL(to->beg)) return; to->end = (int* )xmalloc(RREGC_SIZE); + if (IS_NULL(to->end)) return; to->allocated = from->num_regs; } } else if (to->allocated < from->num_regs) { to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated); + if (IS_NULL(to->beg)) return; to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated); + if (IS_NULL(to->end)) return; to->allocated = from->num_regs; } - if (to->beg == NULL || to->end == NULL) { - return; - } - for (i = 0; i < from->num_regs; i++) { to->beg[i] = from->beg[i]; to->end[i] = from->end[i]; @@ -305,251 +882,604 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #endif } +#ifdef USE_CALLOUT +#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \ + args.in = (ain);\ + args.name_id = (aname_id);\ + args.num = anum;\ + args.regex = reg;\ + args.string = str;\ + args.string_end = end;\ + args.start = sstart;\ + args.right_range = right_range;\ + args.current = s;\ + args.retry_in_match_counter = retry_in_match_counter;\ + args.msa = msa;\ + args.stk_base = stk_base;\ + args.stk = stk;\ + args.mem_start_stk = mem_start_stk;\ + args.mem_end_stk = mem_end_stk;\ + result = (func)(&args, user);\ +} while (0) + +#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\ + int result;\ + OnigCalloutArgs args;\ + CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\ + switch (result) {\ + case ONIG_CALLOUT_FAIL:\ + case ONIG_CALLOUT_SUCCESS:\ + break;\ + default:\ + if (result > 0) {\ + result = ONIGERR_INVALID_ARGUMENT;\ + }\ + best_len = result;\ + goto finish;\ + break;\ + }\ +} while(0) +#endif + /** stack **/ #define INVALID_STACK_INDEX -1 +#define STK_ALT_FLAG 0x0001 + /* stack type */ /* used by normal-POP */ -#define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0002 -#define STK_POS_NOT 0x0003 +#define STK_SUPER_ALT STK_ALT_FLAG +#define STK_ALT (0x0002 | STK_ALT_FLAG) +#define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG) +#define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG) + /* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 +#define STK_MEM_START 0x0010 +#define STK_MEM_END 0x8030 +#define STK_REPEAT_INC 0x0050 +#ifdef USE_CALLOUT +#define STK_CALLOUT 0x0070 +#endif + /* avoided by normal-POP */ -#define STK_NULL_CHECK_START 0x3000 -#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0700 -#define STK_CALL_FRAME 0x0800 -#define STK_RETURN 0x0900 -#define STK_VOID 0x0a00 /* for fill a blank */ +#define STK_VOID 0x0000 /* for fill a blank */ +#define STK_EMPTY_CHECK_START 0x3000 +#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8100 +#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0300 +#define STK_CALL_FRAME 0x0400 +#define STK_RETURN 0x0500 +#define STK_SAVE_VAL 0x0600 /* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_POP_USED STK_ALT_FLAG +#define STK_MASK_POP_HANDLED 0x0010 +#define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004) +#define STK_MASK_TO_VOID_TARGET 0x100e #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ +typedef intptr_t StackIndex; + +typedef struct _StackType { + unsigned int type; + int zid; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + } repeat; + struct { + StackIndex si; /* index of stack */ + } repeat_inc; + struct { + UChar *pstr; /* start/end position */ + /* Following information is set, if this stack type is MEM-START */ + StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */ + StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + UChar *pstr; /* start position */ + } empty_check; +#ifdef USE_CALL + struct { + UChar *ret_addr; /* byte code position */ + UChar *pstr; /* string position */ + } call_frame; +#endif + struct { + enum SaveType type; + UChar* v; + UChar* v2; + } val; +#ifdef USE_CALLOUT + struct { + int num; + OnigCalloutFunc func; + } callout; +#endif + } u; +} StackType; + +#ifdef USE_CALLOUT + +struct OnigCalloutArgsStruct { + OnigCalloutIn in; + int name_id; /* name id or ONIG_NON_NAME_ID */ + int num; + OnigRegex regex; + const OnigUChar* string; + const OnigUChar* string_end; + const OnigUChar* start; + const OnigUChar* right_range; + const OnigUChar* current; /* current matching position */ + unsigned long retry_in_match_counter; + + /* invisible to users */ + MatchArg* msa; + StackType* stk_base; + StackType* stk; + StackIndex* mem_start_stk; + StackIndex* mem_end_stk; +}; + +#endif + + #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ (msa).best_len = ONIG_MISMATCH;\ + (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #else -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ + (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ - if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ - offset = ((offset) * (state_num)) >> 3;\ - if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ - if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ - (msa).state_check_buff = (void* )xmalloc(size);\ - else \ - (msa).state_check_buff = (void* )xalloca(size);\ - xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ - (size_t )(size - (offset))); \ - (msa).state_check_buff_size = size;\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - } while(0) - -#define MATCH_ARG_FREE(msa) do {\ - if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ - if ((msa).state_check_buff) xfree((msa).state_check_buff);\ - }\ -} while(0) -#else -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) -#endif +#define ALLOCA_PTR_NUM_LIMIT 50 -#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ +#define STACK_INIT(stack_num) do {\ if (msa->stack_p) {\ - alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num));\ - stk_alloc = (OnigStackType* )(msa->stack_p);\ - stk_base = stk_alloc;\ + is_alloca = 0;\ + alloc_base = msa->stack_p;\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + msa->stack_n;\ }\ + else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\ + is_alloca = 0;\ + alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ + + sizeof(StackType) * (stack_num));\ + CHECK_NULL_RETURN_MEMERR(alloc_base);\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ else {\ - alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num)\ - + sizeof(OnigStackType) * (stack_num));\ - stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ - stk_base = stk_alloc;\ + is_alloca = 1;\ + alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ + + sizeof(StackType) * (stack_num));\ + CHECK_NULL_RETURN_MEMERR(alloc_base);\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ -} while(0) +} while(0); + #define STACK_SAVE do{\ - if (stk_base != stk_alloc) {\ - msa->stack_p = stk_base;\ - msa->stack_n = (int)(stk_end - stk_base);\ + msa->stack_n = (int )(stk_end - stk_base);\ + if (is_alloca != 0) {\ + size_t size = sizeof(StackIndex) * msa->ptr_num \ + + sizeof(StackType) * msa->stack_n;\ + msa->stack_p = xmalloc(size);\ + CHECK_NULL_RETURN_MEMERR(msa->stack_p);\ + xmemcpy(msa->stack_p, alloc_base, size);\ + }\ + else {\ + msa->stack_p = alloc_base;\ };\ } while(0) -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; +#define UPDATE_FOR_STACK_REALLOC do{\ + repeat_stk = (StackIndex* )alloc_base;\ + mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ + mem_end_stk = mem_start_stk + num_mem + 1;\ +} while(0) + +static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE; extern unsigned int onig_get_match_stack_limit_size(void) { - return MatchStackLimitSize; + return MatchStackLimit; } extern int onig_set_match_stack_limit_size(unsigned int size) { - MatchStackLimitSize = size; + MatchStackLimit = size; return 0; } -static int -stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, - OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) -{ - unsigned int n; - OnigStackType *x, *stk_base, *stk_end, *stk; +#ifdef USE_RETRY_LIMIT_IN_MATCH - stk_base = *arg_stk_base; - stk_end = *arg_stk_end; - stk = *arg_stk; +static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; - n = (unsigned int)(stk_end - stk_base); - if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - xmemcpy(x, stk_base, n * sizeof(OnigStackType)); - n *= 2; - } - else { - n *= 2; - if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { - if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) - return ONIGERR_MATCH_STACK_LIMIT_OVER; - else - n = MatchStackLimitSize; - } - x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n, sizeof(OnigStackType) * (stk_end - stk_base)); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - } - *arg_stk = x + (stk - stk_base); - *arg_stk_base = x; - *arg_stk_end = x + n; - return 0; -} +#define CHECK_RETRY_LIMIT_IN_MATCH do {\ + if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\ +} while (0) -#define STACK_ENSURE(n) do {\ - if (stk_end - stk < (n)) {\ - int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ - if (r != 0) { STACK_SAVE; return r; } \ - }\ -} while(0) +#else -#define STACK_AT(index) (stk_base + (index)) -#define GET_STACK_INDEX(stk) ((OnigStackIndex)((stk) - stk_base)) +#define CHECK_RETRY_LIMIT_IN_MATCH -#define STACK_PUSH_TYPE(stack_type) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - STACK_INC;\ -} while(0) +#endif /* USE_RETRY_LIMIT_IN_MATCH */ -#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) +extern unsigned long +onig_get_retry_limit_in_match(void) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + return RetryLimitInMatch; +#else + /* return ONIG_NO_SUPPORT_CONFIG; */ + return 0; +#endif +} -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ - (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ - if (state_check_buff != NULL) {\ - int x = STATE_CHECK_POS(s,snum);\ - (v) = state_check_buff[x/8] & (1<<(x%8));\ - }\ - else (v) = 0;\ -} while(0) +extern int +onig_set_retry_limit_in_match(unsigned long size) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + RetryLimitInMatch = size; + return 0; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +static OnigCalloutFunc DefaultProgressCallout; +static OnigCalloutFunc DefaultRetractionCallout; +extern OnigMatchParam* +onig_new_match_param(void) +{ + OnigMatchParam* p; -#define ELSE_IF_STATE_CHECK_MARK(stk) \ - else if ((stk)->type == STK_STATE_CHECK_MARK) { \ - int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8)); \ + p = (OnigMatchParam* )xmalloc(sizeof(*p)); + if (IS_NOT_NULL(p)) { + onig_initialize_match_param(p); } -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) + return p; +} -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) +extern void +onig_free_match_param_content(OnigMatchParam* p) +{ +#ifdef USE_CALLOUT + if (IS_NOT_NULL(p->callout_data)) { + xfree(p->callout_data); + p->callout_data = 0; + } +#endif +} -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_ALT;\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ - STACK_INC;\ -} while(0) +extern void +onig_free_match_param(OnigMatchParam* p) +{ + if (IS_NOT_NULL(p)) { + onig_free_match_param_content(p); + xfree(p); + } +} + +extern int +onig_initialize_match_param(OnigMatchParam* mp) +{ + mp->match_stack_limit = MatchStackLimit; +#ifdef USE_RETRY_LIMIT_IN_MATCH + mp->retry_limit_in_match = RetryLimitInMatch; +#endif + mp->progress_callout_of_contents = DefaultProgressCallout; + mp->retraction_callout_of_contents = DefaultRetractionCallout; + +#ifdef USE_CALLOUT + mp->match_at_call_counter = 0; + mp->callout_user_data = 0; + mp->callout_data = 0; + mp->callout_data_alloc_num = 0; +#endif + + return ONIG_NORMAL; +} + +#ifdef USE_CALLOUT + +static int +adjust_match_param(regex_t* reg, OnigMatchParam* mp) +{ + RegexExt* ext = REG_EXTP(reg); + + mp->match_at_call_counter = 0; + + if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL; + + if (ext->callout_num > mp->callout_data_alloc_num) { + CalloutData* d; + size_t n = ext->callout_num * sizeof(*d); + if (IS_NOT_NULL(mp->callout_data)) + d = (CalloutData* )xrealloc(mp->callout_data, n, mp->callout_data_alloc_num * sizeof(*d)); + else + d = (CalloutData* )xmalloc(n); + CHECK_NULL_RETURN_MEMERR(d); + + mp->callout_data = d; + mp->callout_data_alloc_num = ext->callout_num; + } + + xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData)); + return ONIG_NORMAL; +} + +#define ADJUST_MATCH_PARAM(reg, mp) \ + r = adjust_match_param(reg, mp);\ + if (r != ONIG_NORMAL) return r; + +#define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1)) + +extern int +onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args) +{ + OnigMatchParam* mp; + int num; + CalloutData* d; + + mp = args->msa->mp; + num = args->num; + + d = CALLOUT_DATA_AT_NUM(mp, num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + return d->last_match_at_call_counter; + } + + return 0; +} + +extern int +onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args, + int slot, OnigType* type, + OnigValue* val) +{ + return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp, + args->num, slot, type, val); +} + +extern int +onig_get_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + } + + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType* type, OnigValue* val) +{ + int num; + + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + return onig_get_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_get_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); +} + +extern int +onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); +} + +extern int +onig_set_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType type, OnigValue* val) +{ + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + d->slot[slot].type = type; + d->slot[slot].val = *val; + d->last_match_at_call_counter = mp->match_at_call_counter; + + return ONIG_NORMAL; +} + +extern int +onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType type, OnigValue* val) +{ + int num; + + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + return onig_set_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_set_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType type, OnigValue* val) +{ + return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); +} + +extern int +onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType type, OnigValue* val) +{ + return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); +} + +#else +#define ADJUST_MATCH_PARAM(reg, mp) +#endif /* USE_CALLOUT */ + + +static int +stack_double(int is_alloca, char** arg_alloc_base, + StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, + MatchArg* msa) +{ + unsigned int n; + int used; + size_t size; + size_t new_size; + char* alloc_base; + char* new_alloc_base; + StackType *stk_base, *stk_end, *stk; + + alloc_base = *arg_alloc_base; + stk_base = *arg_stk_base; + stk_end = *arg_stk_end; + stk = *arg_stk; + + n = (unsigned int )(stk_end - stk_base); + size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; + n *= 2; + new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; + if (is_alloca != 0) { + new_alloc_base = (char* )xmalloc(new_size); + if (IS_NULL(new_alloc_base)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + xmemcpy(new_alloc_base, alloc_base, size); + } + else { + if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) { + if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = msa->match_stack_limit; + } + new_alloc_base = (char* )xrealloc(alloc_base, new_size, size); + if (IS_NULL(new_alloc_base)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + } + + alloc_base = new_alloc_base; + used = (int )(stk - stk_base); + *arg_alloc_base = alloc_base; + *arg_stk_base = (StackType* )(alloc_base + + (sizeof(StackIndex) * msa->ptr_num)); + *arg_stk = *arg_stk_base + used; + *arg_stk_end = *arg_stk_base + n; + return 0; +} -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (state_check_buff != NULL) {\ - STACK_ENSURE(1);\ - stk->type = STK_STATE_CHECK_MARK;\ - stk->u.state.pstr = (s);\ - stk->u.state.state_check = (snum);\ - STACK_INC;\ +#define STACK_ENSURE(n) do {\ + if ((int )(stk_end - stk) < (n)) {\ + int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\ + if (r != 0) { STACK_SAVE; return r; } \ + is_alloca = 0;\ + UPDATE_FOR_STACK_REALLOC;\ }\ } while(0) -#else /* USE_COMBINATION_EXPLOSION_CHECK */ +#define STACK_AT(index) (stk_base + (index)) +#define GET_STACK_INDEX(stk) ((stk) - stk_base) + +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) -#define ELSE_IF_STATE_CHECK_MARK(stk) +#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) #define STACK_PUSH(stack_type,pat,s,sprev) do {\ STACK_ENSURE(1);\ @@ -565,19 +1495,37 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, stk->u.state.pcode = (pat);\ STACK_INC;\ } while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) -#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) -#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ - STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) +#ifdef ONIG_DEBUG_MATCH +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = s;\ + stk->u.state.pstr_prev = sprev;\ + STACK_INC;\ +} while (0) +#else +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while (0) +#endif -#define STACK_PUSH_REPEAT(id, pat) do {\ +#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) +#define STACK_PUSH_POS(s,sprev) \ + STACK_PUSH(STK_TO_VOID_START,NULL_UCHARP,s,sprev) +#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \ + STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev) +#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START) +#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \ + STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev) + +#define STACK_PUSH_REPEAT(sid, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ - stk->u.repeat.num = (id);\ + stk->zid = (sid);\ stk->u.repeat.pcode = (pat);\ stk->u.repeat.count = 0;\ STACK_INC;\ @@ -593,22 +1541,22 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_MEM_START(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_START;\ - stk->u.mem.num = (int)(mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + stk->zid = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.prev_start = mem_start_stk[mnum];\ + stk->u.mem.prev_end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ STACK_INC;\ } while(0) #define STACK_PUSH_MEM_END(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ + stk->zid = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.prev_start = mem_start_stk[mnum];\ + stk->u.mem.prev_end = mem_end_stk[mnum];\ mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ STACK_INC;\ } while(0) @@ -616,7 +1564,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_PUSH_MEM_END_MARK(mnum) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END_MARK;\ - stk->u.mem.num = (mnum);\ + stk->zid = (mnum);\ STACK_INC;\ } while(0) @@ -626,10 +1574,10 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, while (k > stk_base) {\ k--;\ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->u.mem.num == (mnum)) {\ + && k->zid == (mnum)) {\ level++;\ }\ - else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + else if (k->type == STK_MEM_START && k->zid == (mnum)) {\ if (level == 0) break;\ level--;\ }\ @@ -654,18 +1602,18 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while(0) -#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ +#define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_START;\ - stk->u.null_check.num = (cnum);\ - stk->u.null_check.pstr = (s);\ + stk->type = STK_EMPTY_CHECK_START;\ + stk->zid = (cnum);\ + stk->u.empty_check.pstr = (s);\ STACK_INC;\ } while(0) -#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ +#define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\ STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_END;\ - stk->u.null_check.num = (cnum);\ + stk->type = STK_EMPTY_CHECK_END;\ + stk->zid = (cnum);\ STACK_INC;\ } while(0) @@ -682,6 +1630,115 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_INC;\ } while(0) +#define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ + STACK_ENSURE(1);\ + stk->type = STK_SAVE_VAL;\ + stk->zid = (sid);\ + stk->u.val.type = (stype);\ + stk->u.val.v = (UChar* )(sval);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\ + STACK_ENSURE(1);\ + stk->type = STK_SAVE_VAL;\ + stk->zid = (sid);\ + stk->u.val.type = (stype);\ + stk->u.val.v = (UChar* )(sval);\ + stk->u.val.v2 = sprev;\ + STACK_INC;\ +} while(0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\ + StackType *k = stk;\ + while (k > stk_base) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\ + (sval) = k->u.val.v;\ + break;\ + }\ + }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \ + int level = 0;\ + StackType *k = stk;\ + while (k > stk_base) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ + && k->zid == (sid)) {\ + if (level == 0) {\ + (sval) = k->u.val.v;\ + break;\ + }\ + }\ + else if (k->type == STK_CALL_FRAME)\ + level--;\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \ + int level = 0;\ + StackType *k = stk;\ + while (k > stk_base) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ + && k->zid == (sid)) {\ + if (level == 0) {\ + (sval) = k->u.val.v;\ + sprev = k->u.val.v2;\ + break;\ + }\ + }\ + else if (k->type == STK_CALL_FRAME)\ + level--;\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \ + int level = 0;\ + StackType *k = (stk_from);\ + while (k > stk_base) {\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ + && k->u.val.id == (sid)) {\ + if (level == 0) {\ + (sval) = k->u.val.v;\ + break;\ + }\ + }\ + else if (k->type == STK_CALL_FRAME)\ + level--;\ + else if (k->type == STK_RETURN)\ + level++;\ + k--;\ + }\ +} while (0) + +#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = ONIG_NON_NAME_ID;\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = (aid);\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) #ifdef ONIG_DEBUG #define STACK_BASE_CHECK(p, at) \ @@ -698,6 +1755,16 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ } while(0) + +#ifdef USE_CALLOUT +#define POP_CALLOUT_CASE \ + else if (stk->type == STK_CALLOUT) {\ + RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\ + } +#else +#define POP_CALLOUT_CASE +#endif + #define STACK_POP do {\ switch (pop_level) {\ case STACK_POP_LEVEL_FREE:\ @@ -705,7 +1772,6 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ @@ -714,10 +1780,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ default:\ @@ -725,136 +1790,108 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ + }\ + POP_CALLOUT_CASE\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ }\ } while(0) -#define STACK_POP_TIL_POS_NOT do {\ +#define POP_TIL_BODY(aname, til_type) do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ - if (stk->type == STK_POS_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + STACK_BASE_CHECK(stk, (aname));\ + if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\ + if (stk->type == (til_type)) break;\ + else {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ + }\ + /* Don't call callout here because negation of total success by (?!..) (?type == STK_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ +#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ + POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\ } while(0) -#define STACK_POS_END(k) do {\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_POS_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_POS) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ +#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\ + POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\ } while(0) -#define STACK_STOP_BT_END do {\ - OnigStackType *k = stk;\ + +#define STACK_EXEC_TO_VOID(k) do {\ + k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ + STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \ if (IS_TO_VOID_TARGET(k)) {\ + if (k->type == STK_TO_VOID_START) {\ + k->type = STK_VOID;\ + break;\ + }\ k->type = STK_VOID;\ }\ - else if (k->type == STK_STOP_BT) {\ - k->type = STK_VOID;\ - break;\ - }\ }\ } while(0) -#define STACK_NULL_CHECK(isnull,id,s) do {\ - OnigStackType* k = stk;\ +#define STACK_EMPTY_CHECK(isnull,sid,s) do {\ + StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - (isnull) = (k->u.null_check.pstr == (s));\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->zid == (sid)) {\ + (isnull) = (k->u.empty_check.pstr == (s));\ break;\ }\ }\ }\ } while(0) -#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - else level--;\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - level++;\ - }\ +#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\ + if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\ + (addr) = 0;\ }\ -} while(0) + else {\ + if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\ + (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\ + else\ + (addr) = (UChar* )k->u.mem.prev_end;\ + }\ +} while (0) -#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - OnigStackType* k = stk;\ +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT +#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\ + StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (k->u.null_check.pstr != (s)) {\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->zid == (sid)) {\ + if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ }\ @@ -863,14 +1900,11 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, (isnull) = 1;\ while (k < stk) {\ if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ + STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ + if (endp == 0) {\ (isnull) = 0; break;\ }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )(UINTN)k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\ (isnull) = 0; break;\ }\ else if (endp != s) {\ @@ -879,23 +1913,23 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ k++;\ }\ - break;\ + break;\ }\ }\ }\ }\ } while(0) -#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ +#define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\ int level = 0;\ - OnigStackType* k = stk;\ + StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->zid == (sid)) {\ if (level == 0) {\ - if (k->u.null_check.pstr != (s)) {\ + if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ }\ @@ -904,23 +1938,28 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, (isnull) = 1;\ while (k < stk) {\ if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )(UINTN)k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ + if (level == 0) {\ + STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ + if (endp == 0) {\ + (isnull) = 0; break;\ + }\ + else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */\ + }\ }\ }\ + else if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->zid == (sid)) level++;\ + }\ + else if (k->type == STK_EMPTY_CHECK_END) {\ + if (k->zid == (sid)) level--;\ + }\ k++;\ }\ - break;\ + break;\ }\ }\ else {\ @@ -928,13 +1967,35 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ }\ }\ - else if (k->type == STK_NULL_CHECK_END) {\ - if (k->u.null_check.num == (id)) level++;\ + else if (k->type == STK_EMPTY_CHECK_END) {\ + if (k->zid == (sid)) level++;\ + }\ + }\ +} while(0) +#else +#define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + StackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->u.empty_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.empty_check.pstr == (s));\ + break;\ + }\ + }\ + level--;\ + }\ + else if (k->type == STK_EMPTY_CHECK_END) {\ + level++;\ }\ }\ } while(0) +#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */ -#define STACK_GET_REPEAT(id, k) do {\ +#define STACK_GET_REPEAT(sid, k) do {\ int level = 0;\ k = stk;\ while (1) {\ @@ -942,7 +2003,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ - if (k->u.repeat.num == (id)) {\ + if (k->zid == (sid)) {\ break;\ }\ }\ @@ -954,7 +2015,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, #define STACK_RETURN(addr) do {\ int level = 0;\ - OnigStackType* k = stk;\ + StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_RETURN"); \ @@ -983,7 +2044,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, } while(0) static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, - UChar* s1, UChar** ps2, int mblen) + UChar* s1, UChar** ps2, int mblen) { UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1028,33 +2089,28 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define IS_EMPTY_STR (str == end) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) #define DATA_ENSURE_CHECK1 (s < right_range) #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#else -#define DATA_ENSURE_CHECK1 (s < end) -#define DATA_ENSURE_CHECK(n) (s + (n) <= end) -#define DATA_ENSURE(n) if (s + (n) > end) goto fail -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ +#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range #ifdef USE_CAPTURE_HISTORY static int -make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, - OnigStackType* stk_top, UChar* str, regex_t* reg) +make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, + StackType* stk_top, UChar* str, regex_t* reg) { int n, r; OnigCaptureTreeNode* child; - OnigStackType* k = *kp; + StackType* k = *kp; while (k < stk_top) { if (k->type == STK_MEM_START) { - n = k->u.mem.num; + n = k->zid; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && - BIT_STATUS_AT(reg->capture_history, n) != 0) { + MEM_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); CHECK_NULL_RETURN_MEMERR(child); child->group = n; @@ -1070,7 +2126,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, } } else if (k->type == STK_MEM_END) { - if (k->u.mem.num == node->group) { + if (k->zid == node->group) { node->end = (int )(k->u.mem.pstr - str); *kp = k; return 0; @@ -1096,14 +2152,16 @@ static int mem_is_in_memp(int mem, int num, UChar* memp) return 0; } -static int backref_match_at_nested_level(regex_t* reg - , OnigStackType* top, OnigStackType* stk_base - , int ignore_case, int case_fold_flag - , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +static int +backref_match_at_nested_level(regex_t* reg, + StackType* top, StackType* stk_base, + int ignore_case, int case_fold_flag, + int nest, int mem_num, UChar* memp, + UChar** s, const UChar* send) { UChar *ss, *p, *pstart, *pend = NULL_UCHARP; int level; - OnigStackType* k; + StackType* k; level = 0; k = top; @@ -1117,33 +2175,64 @@ static int backref_match_at_nested_level(regex_t* reg } else if (level == nest) { if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pstart = k->u.mem.pstr; - if (pend != NULL_UCHARP) { - if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ - p = pstart; - ss = *s; - - if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, case_fold_flag, - pstart, &ss, (int )(pend - pstart)) == 0) - return 0; /* or goto next_mem; */ - } - else { - while (p < pend) { - if (*p++ != *ss++) return 0; /* or goto next_mem; */ - } - } - - *s = ss; - return 1; - } - } + if (mem_is_in_memp(k->zid, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (IS_NOT_NULL(pend)) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, case_fold_flag, + pstart, &ss, (int )(pend - pstart)) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } } else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pend = k->u.mem.pstr; - } + if (mem_is_in_memp(k->zid, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} + +static int +backref_check_at_nested_level(regex_t* reg, + StackType* top, StackType* stk_base, + int nest, int mem_num, UChar* memp) +{ + int level; + StackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { + return 1; + } } } k--; @@ -1185,14 +2274,14 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define MOP_IN(opcode) do {\ +#define SOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ } while(0) -#define MOP_OUT do {\ +#define SOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ } while(0) @@ -1207,16 +2296,24 @@ onig_statistics_init(void) MaxStackDepth = 0; } -extern void +extern int onig_print_statistics(FILE* f) { + int r; int i; - fprintf(f, " count prev time\n"); - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + + r = fprintf(f, " count prev time\n"); + if (r < 0) return -1; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + r = fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name); + if (r < 0) return -1; } - fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); + r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); + if (r < 0) return -1; + + return 0; } #define STACK_INC do {\ @@ -1228,8 +2325,8 @@ onig_print_statistics(FILE* f) #else #define STACK_INC stk++ -#define MOP_IN(opcode) -#define MOP_OUT +#define SOP_IN(opcode) +#define SOP_OUT #endif @@ -1245,10 +2342,8 @@ typedef struct { /* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* right_range, -#endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) + const UChar* in_right_range, const UChar* sstart, UChar* sprev, + MatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1256,133 +2351,159 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; + UChar *s, *q, *sbegin; + UChar *right_range; + int is_alloca; + char *alloc_base; + StackType *stk_base, *stk, *stk_end; + StackType *stkp; /* used as any purpose. */ + StackIndex si; + StackIndex *repeat_stk; + StackIndex *mem_start_stk, *mem_end_stk; + UChar* keep; +#ifdef USE_RETRY_LIMIT_IN_MATCH + unsigned long retry_limit_in_match; + unsigned long retry_in_match_counter; +#endif + +#ifdef USE_CALLOUT + int of; +#endif + + UChar *p = reg->p; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; - UChar *s, *q, *sbegin; - UChar *p = reg->p; - char *alloca_base; - OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; - OnigStackType *stkp; /* used as any purpose. */ - OnigStackIndex si; - OnigStackIndex *repeat_stk; - OnigStackIndex *mem_start_stk, *mem_end_stk; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int scv; - unsigned char* state_check_buff = msa->state_check_buff; - int num_comb_exp_check = reg->num_comb_exp_check; -#endif - n = reg->num_repeat + reg->num_mem * 2; - - STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); + +#ifdef USE_CALLOUT + msa->mp->match_at_call_counter++; +#endif + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match = msa->retry_limit_in_match; +#endif + pop_level = reg->stack_pop_level; num_mem = reg->num_mem; - repeat_stk = (OnigStackIndex* )alloca_base; - - mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + num_mem; - mem_start_stk--; /* for index start from 1, - mem_start_stk[1]..mem_start_stk[num_mem] */ - mem_end_stk--; /* for index start from 1, - mem_end_stk[1]..mem_end_stk[num_mem] */ + STACK_INIT(INIT_MATCH_STACK_SIZE); + UPDATE_FOR_STACK_REALLOC; for (i = 1; i <= num_mem; i++) { mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; } #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", - (int )str, (int )end, (int )sstart, (int )sprev); + fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", + str, end, sstart, sprev); fprintf(stderr, "size: %d, start offset: %d\n", - (int )(end - str), (int )(sstart - str)); + (int )(end - str), (int )(sstart - str)); #endif - STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; - s = (UChar* )sstart; + keep = s = (UChar* )sstart; + STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */ + INIT_RIGHT_RANGE; + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_in_match_counter = 0; +#endif + while (1) { #ifdef ONIG_DEBUG_MATCH { + static unsigned int counter = 1; + UChar *q, *bp, buf[50]; int len; - fprintf(stderr, "%4d> \"", (int )(s - str)); + fprintf(stderr, "%7u: %7ld: %4d> \"", + counter, GET_STACK_INDEX(stk), (int )(s - str)); + counter++; + bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q); - while (len-- > 0) *bp++ = *q++; + len = enclen(encode, q); + while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } else { xmemcpy(bp, "\"", 1); bp += 1; } *bp = 0; fputs((char* )buf, stderr); + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - onig_print_compiled_byte_code(stderr, p, NULL, encode); + if (p == FinishCode) + fprintf(stderr, "----: "); + else + fprintf(stderr, "%4d: ", (int )(p - reg->p)); + onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); fprintf(stderr, "\n"); } #endif sbegin = s; switch (*p++) { - case OP_END: MOP_IN(OP_END); - n = (int)(s - sstart); + case OP_END: SOP_IN(OP_END); + n = (int )(s - sstart); if (n > best_len) { - OnigRegion* region; + OnigRegion* region; #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(option)) { - if (n > msa->best_len) { - msa->best_len = n; - msa->best_s = (UChar* )sstart; - } - else - goto end_best_len; + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; } #endif - best_len = n; - region = msa->region; - if (region) { + best_len = n; + region = msa->region; + if (region) { + if (keep > s) keep = s; + #ifdef USE_POSIX_API_REGION_OPTION - if (IS_POSIX_REGION(msa->options)) { - posix_regmatch_t* rmt = (posix_regmatch_t* )region; - - rmt[0].rm_so = (regoff_t)(sstart - str); - rmt[0].rm_eo = (regoff_t)(s - str); - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - rmt[i].rm_so = (regoff_t)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); - else - rmt[i].rm_so = (regoff_t)((UChar* )((void* )(UINTN)(mem_start_stk[i])) - str); - - rmt[i].rm_eo = (regoff_t)((BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str); - } - else { - rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; - } - } - } - else { + if (IS_POSIX_REGION(msa->options)) { + posix_regmatch_t* rmt = (posix_regmatch_t* )region; + + rmt[0].rm_so = (regoff_t )(keep - str); + rmt[0].rm_eo = (regoff_t )(s - str); + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (MEM_STATUS_AT(reg->bt_mem_start, i)) + rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); + else + rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str); + + rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) + - str); + } + else { + rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; + } + } + } + else { #endif /* USE_POSIX_API_REGION_OPTION */ - region->beg[0] = (int)(sstart - str); - region->end[0] = (int)(s - str); - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = (int)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); - else - region->beg[i] = (int)((UChar* )((void* )(UINTN)mem_start_stk[i]) - str); - - region->end[i] = (int)((BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str); - } - else { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - } + region->beg[0] = (int )(keep - str); + region->end[0] = (int )(s - str); + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (MEM_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); + else + region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str); + + region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str); + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } #ifdef USE_CAPTURE_HISTORY - if (reg->capture_history != 0) { + if (reg->capture_history != 0) { int r; OnigCaptureTreeNode* node; @@ -1396,8 +2517,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } node->group = 0; - node->beg = (int)(sstart - str); - node->end = (int)(s - str); + node->beg = (int )(keep - str); + node->end = (int )(s - str); stkp = stk_base; r = make_capture_history_tree(region->history_root, &stkp, @@ -1406,79 +2527,74 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, best_len = r; /* error code */ goto finish; } - } + } #endif /* USE_CAPTURE_HISTORY */ #ifdef USE_POSIX_API_REGION_OPTION - } /* else IS_POSIX_REGION() */ + } /* else IS_POSIX_REGION() */ #endif - } /* if (region) */ + } /* if (region) */ } /* n > best_len */ #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE end_best_len: #endif - MOP_OUT; + SOP_OUT; if (IS_FIND_CONDITION(option)) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = ONIG_MISMATCH; - goto fail; /* for retry */ - } - if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ - } + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + goto fail; /* for retry */ + } } /* default behavior: return first-matching result. */ goto finish; break; - case OP_EXACT1: MOP_IN(OP_EXACT1); -#if 0 + case OP_EXACT1: SOP_IN(OP_EXACT1); DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; -#endif - if (*p != *s++) goto fail; - DATA_ENSURE(0); - p++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); + case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC); { - int len; - UChar *q1, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q1 = lowbuf; - while (len-- > 0) { - if (*p != *q1) { + int len; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { goto fail; } - p++; q1++; - } + p++; q++; + } } - MOP_OUT; + SOP_OUT; break; - case OP_EXACT2: MOP_IN(OP_EXACT2); + case OP_EXACT2: SOP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT3: MOP_IN(OP_EXACT3); + case OP_EXACT3: SOP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -1487,11 +2603,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT4: MOP_IN(OP_EXACT4); + case OP_EXACT4: SOP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1502,11 +2618,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT5: MOP_IN(OP_EXACT5); + case OP_EXACT5: SOP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -1519,59 +2635,59 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN: MOP_IN(OP_EXACTN); + case OP_EXACTN: SOP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { - if (*p++ != *s++) goto fail; + if (*p++ != *s++) goto fail; } sprev = s - 1; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); + case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC); { - int len; - UChar *qn, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - GET_LENGTH_INC(tlen, p); - endp = p + tlen; - - while (p < endp) { - sprev = s; - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - qn = lowbuf; - while (len-- > 0) { - if (*p != *qn) goto fail; - p++; qn++; - } - } - } - - MOP_OUT; + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } + } + + SOP_OUT; continue; break; - case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); + case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); + case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1582,11 +2698,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); + case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -1601,469 +2717,488 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); + case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - 2; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); + case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - 3; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); + case OP_EXACTMBN: SOP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; DATA_ENSURE(tlen2); while (tlen2-- > 0) { - if (*p != *s) goto fail; - p++; s++; + if (*p != *s) goto fail; + p++; s++; } sprev = s - tlen; - MOP_OUT; + SOP_OUT; continue; break; - case OP_CCLASS: MOP_IN(OP_CCLASS); + case OP_CCLASS: SOP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); + case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: GET_LENGTH_INC(tlen, p); { - OnigCodePoint code; - UChar *ss; - int mb_len; + OnigCodePoint code; + UChar *ss; + int mb_len; - DATA_ENSURE(1); - mb_len = enclen(encode, s); - DATA_ENSURE(mb_len); - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); + DATA_ENSURE(1); + mb_len = enclen(encode, s); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); #ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (! onig_is_in_code_range(p, code)) goto fail; + if (! onig_is_in_code_range(p, code)) goto fail; #else - q = p; - ALIGNMENT_RIGHT(q); - if (! onig_is_in_code_range(q, code)) goto fail; + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); + case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb; + p += SIZE_BITSET; + goto cclass_mb; } else { - if (BITSET_AT(((BitSetRef )p), *s) == 0) - goto fail; + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; } - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); + case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); + case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { - s++; - GET_LENGTH_INC(tlen, p); - p += tlen; - goto cc_mb_not_success; + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; } cclass_mb_not: GET_LENGTH_INC(tlen, p); { - OnigCodePoint code; - UChar *ss; - int mb_len = enclen(encode, s); + OnigCodePoint code; + UChar *ss; + int mb_len = enclen(encode, s); - if (! DATA_ENSURE_CHECK(mb_len)) { + if (! DATA_ENSURE_CHECK(mb_len)) { DATA_ENSURE(1); - s = (UChar* )end; - p += tlen; - goto cc_mb_not_success; - } + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); #ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (onig_is_in_code_range(p, code)) goto fail; + if (onig_is_in_code_range(p, code)) goto fail; #else - q = p; - ALIGNMENT_RIGHT(q); - if (onig_is_in_code_range(q, code)) goto fail; + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; #endif } p += tlen; cc_mb_not_success: - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); + case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { - p += SIZE_BITSET; - goto cclass_mb_not; + p += SIZE_BITSET; + goto cclass_mb_not; } else { - if (BITSET_AT(((BitSetRef )p), *s) != 0) - goto fail; + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; } - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); +#ifdef USE_OP_CCLASS_NODE + case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE); { - OnigCodePoint code; + OnigCodePoint code; void *node; int mb_len; UChar *ss; DATA_ENSURE(1); GET_POINTER_INC(node, p); - mb_len = enclen(encode, s); - ss = s; - s += mb_len; - DATA_ENSURE(0); - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; + mb_len = enclen(encode, s); + ss = s; + s += mb_len; + DATA_ENSURE(0); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - MOP_OUT; + SOP_OUT; break; +#endif - case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); + case OP_ANYCHAR: SOP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); + case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } - MOP_OUT; + SOP_OUT; + continue; break; - case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; - - case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + SOP_OUT; + continue; + break; + + case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; s += n; } p++; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } } p++; - MOP_OUT; + SOP_OUT; break; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; + case OP_WORD: SOP_IN(OP_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; + s += enclen(encode, s); + SOP_OUT; break; - case OP_STATE_CHECK_ANYCHAR_ML_STAR: - MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_WORD: MOP_IN(OP_WORD); + case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII); DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) + goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); + case OP_NO_WORD: SOP_IN(OP_NO_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; + goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - == ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) + goto fail; + + s += enclen(encode, s); + SOP_OUT; + break; + + case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY); + { + ModeType mode; + GET_MODE_INC(mode, p); /* ascii_mode */ + + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - != ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY); + { + ModeType mode; + GET_MODE_INC(mode, p); /* ascii_mode */ + + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) + goto fail; + } + else if (ON_STR_END(s)) { + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - MOP_OUT; - continue; - } + case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN); + { + ModeType mode; + GET_MODE_INC(mode, p); /* ascii_mode */ + + if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { + if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + SOP_OUT; + continue; + } + } } goto fail; break; - case OP_WORD_END: MOP_IN(OP_WORD_END); - if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - MOP_OUT; - continue; - } + case OP_WORD_END: SOP_IN(OP_WORD_END); + { + ModeType mode; + GET_MODE_INC(mode, p); /* ascii_mode */ + + if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { + SOP_OUT; + continue; + } + } } goto fail; break; #endif - case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); + case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { + SOP_OUT; + continue; + } + goto fail; + break; + + case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) + goto fail; + + SOP_OUT; + continue; + break; + + case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_END_BUF: MOP_IN(OP_END_BUF); + case OP_END_BUF: SOP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); + case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; - continue; + if (IS_NOTBOL(msa->options)) goto fail; + SOP_OUT; + continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; - continue; + SOP_OUT; + continue; } goto fail; break; - case OP_END_LINE: MOP_IN(OP_END_LINE); + case OP_END_LINE: SOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; + if (IS_NOTEOL(msa->options)) goto fail; + SOP_OUT; + continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } + } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; - continue; + SOP_OUT; + continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; - continue; + SOP_OUT; + continue; } #endif goto fail; break; - case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); + case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; + if (IS_NOTEOL(msa->options)) goto fail; + SOP_OUT; + continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } + } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enclen(encode, s))) { - MOP_OUT; - continue; + ON_STR_END(s + enclen(encode, s))) { + SOP_OUT; + continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { UChar* ss = s + enclen(encode, s); - ss += enclen(encode, ss); + ss += enclen(encode, ss); if (ON_STR_END(ss)) { - MOP_OUT; + SOP_OUT; continue; } } @@ -2071,477 +3206,464 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); + case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION); if (s != msa->start) - goto fail; + goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); + case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); + case OP_MEMORY_START: SOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )s); - MOP_OUT; + mem_start_stk[mem] = (StackIndex )((void* )s); + SOP_OUT; continue; break; - case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); + case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); + case OP_MEMORY_END: SOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s); - MOP_OUT; + mem_end_stk[mem] = (StackIndex )((void* )s); + SOP_OUT; continue; break; -#ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); +#ifdef USE_CALL + case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); + case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s); + mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) + mem_start_stk[mem] = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )stkp->u.mem.pstr); + mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_BACKREF1: MOP_IN(OP_BACKREF1); + case OP_BACKREF1: SOP_IN(OP_BACKREF1); mem = 1; goto backref; break; - case OP_BACKREF2: MOP_IN(OP_BACKREF2); + case OP_BACKREF2: SOP_IN(OP_BACKREF2); mem = 2; goto backref; break; - case OP_BACKREFN: MOP_IN(OP_BACKREFN); + case OP_BACKREF_N: SOP_IN(OP_BACKREF_N); GET_MEMNUM_INC(mem, p); backref: { - int len; - UChar *pstart, *pend; + int len; + UChar *pstart, *pend; - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )(UINTN)mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - STRING_CMP(pstart, s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = (int )(pend - pstart); + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; - MOP_OUT; - continue; + SOP_OUT; + continue; } break; - case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); + case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC); GET_MEMNUM_INC(mem, p); { - int len; - UChar *pstart, *pend; + int len; + UChar *pstart, *pend; - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )(UINTN)mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = (int )(pend - pstart); + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(case_fold_flag, pstart, &s, n); + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; - MOP_OUT; - continue; + SOP_OUT; + continue; } break; - case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); + case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI); { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )(UINTN)mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE(pstart, swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = (int )(pend - pstart); + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + SOP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC); { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )(UINTN)mem_end_stk[mem])); - n = (int)(pend - pstart); - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = (int )(pend - pstart); + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + SOP_OUT; + continue; } break; #ifdef USE_BACKREF_WITH_LEVEL case OP_BACKREF_WITH_LEVEL: { - int len; - OnigOptionType ic; - LengthType level; - - GET_OPTION_INC(ic, p); - GET_LENGTH_INC(level, p); - GET_LENGTH_INC(tlen, p); + int len; + OnigOptionType ic; + LengthType level; - sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic - , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); - p += (SIZE_MEMNUM * tlen); - } - else - goto fail; + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic + , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + if (sprev < end) { + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + } + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; - MOP_OUT; - continue; + SOP_OUT; + continue; } - break; #endif -#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ - case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); - GET_OPTION_INC(option, p); - STACK_PUSH_ALT(p, s, sprev); - p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - MOP_OUT; - continue; + case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK); + { + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + SOP_OUT; + continue; + } break; - case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); - GET_OPTION_INC(option, p); - MOP_OUT; - continue; +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_CHECK_WITH_LEVEL: + { + LengthType level; + + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + if (backref_check_at_nested_level(reg, stk, stk_base, + (int )level, (int )tlen, p) != 0) { + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + SOP_OUT; + continue; + } break; #endif - case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); + case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_PUSH_NULL_CHECK_START(mem, s); - MOP_OUT; + STACK_PUSH_EMPTY_CHECK_START(mem, s); + SOP_OUT; continue; break; - case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); + case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END); { - int isnull; + int is_empty; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_EMPTY_CHECK(is_empty, mem, s); + if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - null_check_found: - /* empty loop founded, skip next instruction */ - switch (*p++) { - case OP_JUMP: - case OP_PUSH: - p += SIZE_RELADDR; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - case OP_REPEAT_INC_SG: - case OP_REPEAT_INC_NG_SG: - p += SIZE_MEMNUM; - break; - default: - goto unexpected_bytecode_error; - break; - } - } - } - MOP_OUT; + fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s); +#endif + empty_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } + } + SOP_OUT; continue; break; -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT + case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST); { - int isnull; + int is_empty; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); - if (isnull) { + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg); + if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", - (int )mem, (int )s); + fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s); #endif - if (isnull == -1) goto fail; - goto null_check_found; - } + if (is_empty == -1) goto fail; + goto empty_check_found; + } } - MOP_OUT; + SOP_OUT; continue; break; #endif -#ifdef USE_SUBEXP_CALL - case OP_NULL_CHECK_END_MEMST_PUSH: - MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); +#ifdef USE_CALL + case OP_EMPTY_CHECK_END_MEMST_PUSH: + SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); { - int isnull; + int is_empty; - GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT + STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg); #else - STACK_NULL_CHECK_REC(isnull, mem, s); + STACK_EMPTY_CHECK_REC(is_empty, mem, s); #endif - if (isnull) { + if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", - (int )mem, (int )s); + fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n", + (int )mem, s); #endif - if (isnull == -1) goto fail; - goto null_check_found; - } - else { - STACK_PUSH_NULL_CHECK_END(mem); - } - } - MOP_OUT; - continue; + if (is_empty == -1) goto fail; + goto empty_check_found; + } + else { + STACK_PUSH_EMPTY_CHECK_END(mem); + } + } + SOP_OUT; + continue; break; #endif - case OP_JUMP: MOP_IN(OP_JUMP); + case OP_JUMP: SOP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_PUSH: MOP_IN(OP_PUSH); + case OP_PUSH: SOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; break; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - + case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER); GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); - GET_STATE_CHECK_NUM_INC(mem, p); - GET_RELADDR_INC(addr, p); - STATE_CHECK_VAL(scv, mem); - if (scv) { - p += addr; - } - else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - } - MOP_OUT; + STACK_PUSH_SUPER_ALT(p + addr, s, sprev); + SOP_OUT; continue; break; - case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_STATE_CHECK(s, mem); - MOP_OUT; - continue; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_POP: MOP_IN(OP_POP); + case OP_POP_OUT: SOP_IN(OP_POP_OUT); STACK_POP_ONE; - MOP_OUT; + /* for stop backtrack */ + /* CHECK_RETRY_LIMIT_IN_MATCH; */ + SOP_OUT; continue; break; - case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + SOP_OUT; + continue; } p += (addr + 1); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); + case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + SOP_OUT; + continue; } p++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT: MOP_IN(OP_REPEAT); + case OP_REPEAT: SOP_IN(OP_REPEAT); { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); - } + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev); + } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); + case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG); { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p, s, sprev); - p += addr; - } + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev); + p += addr; + } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); + case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2559,19 +3681,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); + case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; break; - case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); + case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2593,104 +3715,234 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); + case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; break; - case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); + case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START); STACK_PUSH_POS(s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_POP_POS: MOP_IN(OP_POP_POS); + case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END); { - STACK_POS_END(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; + STACK_EXEC_TO_VOID(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; } - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); + case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START); GET_RELADDR_INC(addr, p); - STACK_PUSH_POS_NOT(p + addr, s, sprev); - MOP_OUT; + STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); + SOP_OUT; continue; break; - case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); - STACK_POP_TIL_POS_NOT; + case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END); + STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; break; - case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); - STACK_PUSH_STOP_BT; - MOP_OUT; + case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START); + STACK_PUSH_TO_VOID_START; + SOP_OUT; continue; break; - case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); - STACK_STOP_BT_END; - MOP_OUT; + case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END); + STACK_EXEC_TO_VOID(stkp); + SOP_OUT; continue; break; - case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); + case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); + case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(q)) { - /* too short case -> success. ex. /(? success. ex. /(?p + addr; - MOP_OUT; + SOP_OUT; continue; break; - case OP_RETURN: MOP_IN(OP_RETURN); + case OP_RETURN: SOP_IN(OP_RETURN); STACK_RETURN(p); STACK_PUSH_RETURN; - MOP_OUT; + SOP_OUT; + continue; + break; +#endif + + case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL); + { + SaveType type; + GET_SAVE_TYPE_INC(type, p); + GET_MEMNUM_INC(mem, p); /* mem: save id */ + switch ((enum SaveType )type) { + case SAVE_KEEP: + STACK_PUSH_SAVE_VAL(mem, type, s); + break; + + case SAVE_S: + STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s); + break; + + case SAVE_RIGHT_RANGE: + STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range); + break; + } + } + SOP_OUT; + continue; + break; + + case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR); + { + UpdateVarType type; + enum SaveType save_type; + + GET_UPDATE_VAR_TYPE_INC(type, p); + GET_MEMNUM_INC(mem, p); /* mem: save id */ + switch ((enum UpdateVarType )type) { + case UPDATE_VAR_KEEP_FROM_STACK_LAST: + STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep); + break; + case UPDATE_VAR_S_FROM_STACK: + STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s); + break; + case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK: + save_type = SAVE_S; + goto get_save_val_type_last_id; + break; + case UPDATE_VAR_RIGHT_RANGE_FROM_STACK: + save_type = SAVE_RIGHT_RANGE; + get_save_val_type_last_id: + STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range); + break; + case UPDATE_VAR_RIGHT_RANGE_INIT: + INIT_RIGHT_RANGE; + break; + } + } + SOP_OUT; + continue; + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); + of = ONIG_CALLOUT_OF_CONTENTS; + goto callout_common_entry; + + SOP_OUT; + continue; + break; + + case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); + { + int call_result; + int name_id; + int num; + int in; + CalloutListEntry* e; + OnigCalloutFunc func; + OnigCalloutArgs args; + + of = ONIG_CALLOUT_OF_NAME; + GET_MEMNUM_INC(name_id, p); + + callout_common_entry: + GET_MEMNUM_INC(num, p); + e = onig_reg_callout_list_at(reg, num); + in = e->in; + if (of == ONIG_CALLOUT_OF_NAME) { + func = onig_get_callout_start_func(reg, num); + } + else { + name_id = ONIG_NON_NAME_ID; + func = msa->mp->progress_callout_of_contents; + } + + if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) { + CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id, + num, msa->mp->callout_user_data, args, call_result); + switch (call_result) { + case ONIG_CALLOUT_FAIL: + goto fail; + break; + case ONIG_CALLOUT_SUCCESS: + goto retraction_callout2; + break; + default: /* error code */ + if (call_result > 0) { + call_result = ONIGERR_INVALID_ARGUMENT; + } + best_len = call_result; + goto finish; + break; + } + } + else { + retraction_callout2: + if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) { + if (of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_NAME(name_id, num, func); + } + } + else { + func = msa->mp->retraction_callout_of_contents; + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_CONTENTS(num, func); + } + } + } + } + } + SOP_OUT; continue; break; #endif @@ -2700,22 +3952,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; fail: - MOP_OUT; + SOP_OUT; /* fall */ - case OP_FAIL: MOP_IN(OP_FAIL); + case OP_FAIL: SOP_IN(OP_FAIL); STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; - } -#endif - - MOP_OUT; + CHECK_RETRY_LIMIT_IN_MATCH; + SOP_OUT; continue; break; @@ -2728,31 +3973,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, finish: STACK_SAVE; - xfree(alloca_base); return best_len; #ifdef ONIG_DEBUG stack_error: STACK_SAVE; - xfree(alloca_base); return ONIGERR_STACK_BUG; #endif bytecode_error: STACK_SAVE; - xfree(alloca_base); return ONIGERR_UNDEFINED_BYTECODE; unexpected_bytecode_error: STACK_SAVE; - xfree(alloca_base); return ONIGERR_UNEXPECTED_BYTECODE; + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match_over: + STACK_SAVE; + return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER; +#endif } static UChar* slow_search(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *t, *p, *s, *end; @@ -2768,12 +4015,12 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, p = s + 1; t = target + 1; while (t < target_end) { - if (*t != *p++) - break; - t++; + if (*t != *p++) + break; + t++; } if (t == target_end) - return s; + return s; } s += enclen(enc, s); } @@ -2784,7 +4031,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, static int str_lower_case_match(OnigEncoding enc, int case_fold_flag, const UChar* t, const UChar* tend, - const UChar* p, const UChar* end) + const UChar* p, const UChar* end) { int lowlen; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2793,7 +4040,7 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag, lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { - if (*t++ != *q++) return 0; + if (*t++ != *q++) return 0; lowlen--; } } @@ -2803,8 +4050,8 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag, static UChar* slow_search_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *s, *end; @@ -2817,7 +4064,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, while (s < end) { if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, text_end)) + s, text_end)) return s; s += enclen(enc, s); @@ -2828,8 +4075,8 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, static UChar* slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *t, *p, *s; @@ -2845,12 +4092,12 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, p = s + 1; t = target + 1; while (t < target_end) { - if (*t != *p++) - break; - t++; + if (*t != *p++) + break; + t++; } if (t == target_end) - return s; + return s; } s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); } @@ -2860,9 +4107,9 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, static UChar* slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *s; @@ -2886,20 +4133,20 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, static UChar* bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *p, *end; const UChar *tail; int skip, tlen1; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", - (int )text, (int )text_end, (int )text_range); + fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n", + text, text_end, text_range); #endif tail = target_end - 1; - tlen1 = (int)(tail - target); + tlen1 = (int )(tail - target); end = text_range; if (end + tlen1 > text_end) end = text_end - tlen1; @@ -2911,8 +4158,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } skip = reg->map[*se]; t = s; @@ -2926,8 +4173,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, p = se = s + tlen1; t = tail; while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; + if (t == target) return (UChar* )s; + p--; t--; } skip = reg->int_map[*se]; t = s; @@ -2942,7 +4189,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, static UChar* bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) + const UChar* text, const UChar* text_end, const UChar* text_range) { const UChar *s, *t, *p, *end; const UChar *tail; @@ -2958,8 +4205,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, p = s; t = tail; while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } s += reg->map[*s]; } @@ -2969,8 +4216,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, p = s; t = tail; while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; + if (t == target) return (UChar* )p; + p--; t--; } s += reg->int_map[*s]; } @@ -2978,10 +4225,9 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )NULL; } +#ifdef USE_INT_MAP_BACKWARD static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) - +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip) { int i, len; @@ -2990,7 +4236,7 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, if (IS_NULL(*skip)) return ONIGERR_MEMORY; } - len = (int)(end - s); + len = end - s; for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*skip)[i] = len; @@ -3002,8 +4248,8 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, static UChar* bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { const UChar *s, *t, *p; @@ -3028,10 +4274,11 @@ bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )NULL; } +#endif static UChar* map_search(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* text_range) + const UChar* text, const UChar* text_range) { const UChar *s = text; @@ -3045,8 +4292,8 @@ map_search(OnigEncoding enc, UChar map[], static UChar* map_search_backward(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* adjust_text, - const UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_start) { const UChar *s = text_start; @@ -3057,48 +4304,30 @@ map_search_backward(OnigEncoding enc, UChar map[], } return (UChar* )NULL; } +extern int +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, + OnigRegion* region, OnigOptionType option) +{ + int r; + OnigMatchParam mp; + + onig_initialize_match_param(&mp); + r = onig_match_with_param(reg, str, end, at, region, option, &mp); + onig_free_match_param_content(&mp); + return r; +} extern int -onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, - OnigOptionType option) +onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, OnigOptionType option, + OnigMatchParam* mp) { int r; UChar *prev; - OnigMatchArg msa; - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - - MATCH_ARG_INIT(msa, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = at - str; - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + MatchArg msa; + ADJUST_MATCH_PARAM(reg, mp); + MATCH_ARG_INIT(msa, reg, option, region, at, mp); if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3110,28 +4339,31 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On r = 0; if (r == 0) { + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto end; + } + } + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, -#endif - at, prev, &msa); + r = match_at(reg, str, end, end, at, prev, &msa); } + end: MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); return r; } static int forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) + UChar* range, UChar** low, UChar** high, UChar** low_prev) { UChar *p, *pprev = (UChar* )NULL; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", - (int )str, (int )end, (int )s, (int )range); + fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n", + str, end, s, range); #endif p = s; @@ -3141,29 +4373,31 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; + + if (q >= end) return 0; /* fail */ while (p < q) p += enclen(reg->enc, p); } } retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM: p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM_NO_REV: p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search(reg->enc, reg->map, p, range); break; } @@ -3181,58 +4415,65 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; - } - break; + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + break; case ANCHOR_END_LINE: - if (ON_STR_END(p)) { + if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; #endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) #endif - ) - goto retry_gate; - break; + ) + goto retry_gate; + break; } } if (reg->dmax == 0) { *low = p; if (low_prev) { - if (*low > s) - *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); } } else { - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); - } + if (reg->dmax != INFINITE_LEN) { + if (p - str < reg->dmax) { + *low = (UChar* )str; + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low); + } + else { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); + } + } } } /* no needs to adjust *high, *high is used as range check only */ @@ -3240,8 +4481,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, - "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", - (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); + "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); #endif return 1; /* success */ } @@ -3249,17 +4490,14 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, return 0; /* fail */ } -static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, - int** skip)); #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 static int backward_search_range(regex_t* reg, const UChar* str, const UChar* end, - UChar* s, const UChar* range, UChar* adjrange, - UChar** low, UChar** high) + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) { - int r; UChar *p; range += reg->dmin; @@ -3267,33 +4505,39 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, - range, adjrange, end, p); + range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_BM: - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM_NO_REV: +#ifdef USE_INT_MAP_BACKWARD if (IS_NULL(reg->int_map_backward)) { + int r; + if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; + goto exact_method; r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; + &(reg->int_map_backward)); + if (r != 0) return r; } p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); + end, p); +#else + goto exact_method; +#endif break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search_backward(reg->enc, reg->map, range, adjrange, p); break; } @@ -3304,41 +4548,41 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, str, p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } - } - break; + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + break; case ANCHOR_END_LINE: - if (ON_STR_END(p)) { + if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(prev)) goto fail; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) #ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) #endif - ) { - p = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(p)) goto fail; - goto retry; - } - break; + ) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; } } /* no needs to adjust *high, *high is used as range check only */ - if (reg->dmax != ONIG_INFINITE_DISTANCE) { + if (reg->dmax != INFINITE_LEN) { *low = p - reg->dmax; *high = p - reg->dmin; *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); @@ -3346,7 +4590,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "backward_search_range: low: %d, high: %d\n", - (int )(*low - str), (int )(*high - str)); + (int )(*low - str), (int )(*high - str)); #endif return 1; /* success */ } @@ -3361,60 +4605,57 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, extern int onig_search(regex_t* reg, const UChar* str, const UChar* end, - const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option) +{ + int r; + OnigMatchParam mp; + + onig_initialize_match_param(&mp); + r = onig_search_with_param(reg, str, end, start, range, region, option, &mp); + onig_free_match_param_content(&mp); + return r; + +} + +extern int +onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) { int r; UChar *s, *prev; - OnigMatchArg msa; + MatchArg msa; const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE const UChar *orig_range = range; -#endif - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, - "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); + "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n", + str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif + ADJUST_MATCH_PARAM(reg, mp); + if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { r = onig_region_resize_clear(region, reg->num_mem + 1); - if (r) goto finish_no_msa; + if (r != 0) goto finish_no_msa; } if (start > end || start < str) goto mismatch_no_msa; + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) { + r = ONIGERR_INVALID_WIDE_CHAR_VALUE; + goto finish_no_msa; + } + } + -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ @@ -3436,29 +4677,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ /* anchor optimize: resume search range */ @@ -3469,56 +4687,55 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, /* search start-position only */ begin_position: if (range > start) - range = start + 1; + range = start + 1; else - range = start; + range = start; } else if (reg->anchor & ANCHOR_BEGIN_BUF) { /* search str-position only */ if (range > start) { - if (start != str) goto mismatch_no_msa; - range = str + 1; + if (start != str) goto mismatch_no_msa; + range = str + 1; } else { - if (range <= str) { - start = str; - range = str; - } - else - goto mismatch_no_msa; + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_END_BUF) { min_semi_end = max_semi_end = (UChar* )end; end_buf: - if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) - goto mismatch_no_msa; + if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; if (range > start) { - if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { - start = min_semi_end - reg->anchor_dmax; - if (start < end) - start = onigenc_get_right_adjust_char_head(reg->enc, str, start); - else { /* match with empty at end */ - start = onigenc_get_prev_char_head(reg->enc, str, end); - } - } - if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { - range = max_semi_end - reg->anchor_dmin + 1; - } - - if (start >= range) goto mismatch_no_msa; + if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start); + } + if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start > range) goto mismatch_no_msa; + /* If start == range, match with empty at end. + Backward search is used. */ } else { - if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { - range = min_semi_end - reg->anchor_dmax; - } - if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { - start = max_semi_end - reg->anchor_dmin; - start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); - } - if (range > start) goto mismatch_no_msa; + if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); + } + if (range > start) goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_SEMI_END_BUF) { @@ -3526,25 +4743,25 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, max_semi_end = (UChar* )end; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - min_semi_end = pre_end; + min_semi_end = pre_end; #ifdef USE_CRNL_AS_LINE_TERMINATOR - pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); - if (IS_NOT_NULL(pre_end) && - ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - } + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } #endif - if (min_semi_end > str && start <= min_semi_end) { - goto end_buf; - } + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } } else { - min_semi_end = (UChar* )end; - goto end_buf; + min_semi_end = (UChar* )end; + goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) { goto begin_position; } } @@ -3560,11 +4777,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; prev = (UChar* )NULL; - MATCH_ARG_INIT(msa, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ -#endif + MATCH_ARG_INIT(msa, reg, option, region, start, mp); MATCH_AND_RETURN_CHECK(end); goto mismatch; } @@ -3573,16 +4786,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", - (int )(end - str), (int )(start - str), (int )(range - str)); + (int )(end - str), (int )(start - str), (int )(range - str)); #endif - MATCH_ARG_INIT(msa, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = (MIN(start, range) - str); - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp); s = (UChar* )start; if (range > start) { /* forward search */ @@ -3591,51 +4798,53 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else prev = (UChar* )NULL; - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; sch_range = (UChar* )range; if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = (UChar* )end; - else { - sch_range += reg->dmax; - if (sch_range > end) sch_range = (UChar* )end; - } + if (reg->dmax == INFINITE_LEN) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } } if ((end - start) < reg->threshold_len) goto mismatch; - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; - if (s < low) { - s = low; - prev = low_prev; - } - while (s <= high) { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s); - } - } while (s < range); - goto mismatch; + if (reg->dmax != INFINITE_LEN) { + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s); + } + } while (s < range); + goto mismatch; } else { /* check only. */ - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) { do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; s += enclen(reg->enc, s); - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); + if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enclen(reg->enc, s); + } } } while (s < range); goto mismatch; @@ -3654,56 +4863,54 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE if (orig_start < end) orig_start += enclen(reg->enc, orig_start); /* is upper range */ -#endif - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; if (range < end) - adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); else - adjrange = (UChar* )end; - - if (reg->dmax != ONIG_INFINITE_DISTANCE && - (end - range) >= reg->threshold_len) { - do { - sch_start = s + reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) - goto mismatch; - - if (s > high) - s = high; - - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } - } while (s >= range); - goto mismatch; + adjrange = (UChar* )end; + + if (reg->dmax != INFINITE_LEN && + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } + } while (s >= range); + goto mismatch; } else { /* check only. */ - if ((end - range) < reg->threshold_len) goto mismatch; - - sch_start = s; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = (UChar* )end; - else { - sch_start += reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - else - sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, - start, sch_start); - } - } - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) goto mismatch; + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == INFINITE_LEN) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; } } @@ -3727,10 +4934,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, finish: MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, - then the region is not setted in match_at(). */ + then the region is not set in match_at(). */ if (IS_FIND_NOT_EMPTY(reg->options) && region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3748,7 +4954,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - ONIG_STATE_DEC_THREAD(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %d\n", r); @@ -3756,9 +4961,57 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, return r; match: - ONIG_STATE_DEC_THREAD(reg); MATCH_ARG_FREE(msa); - return (int)(s - str); + return (int )(s - str); +} + +extern int +onig_scan(regex_t* reg, const UChar* str, const UChar* end, + OnigRegion* region, OnigOptionType option, + int (*scan_callback)(int, int, OnigRegion*, void*), + void* callback_arg) +{ + int r; + int n; + int rs; + const UChar* start; + + if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) { + if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + + ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING); + } + + n = 0; + start = str; + while (1) { + r = onig_search(reg, str, end, start, end, region, option); + if (r >= 0) { + rs = scan_callback(n, r, region, callback_arg); + n++; + if (rs != 0) + return rs; + + if (region->end[0] == start - str) { + if (start >= end) break; + start += enclen(reg->enc, start); + } + else + start = str + region->end[0]; + + if (start > end) + break; + } + else if (r == ONIG_MISMATCH) { + break; + } + else { /* error */ + return r; + } + } + + return n; } extern OnigEncoding @@ -3799,7 +5052,7 @@ onig_number_of_capture_histories(regex_t* reg) n = 0; for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(reg->capture_history, i) != 0) + if (MEM_STATUS_AT(reg->capture_history, i) != 0) n++; } return n; @@ -3814,3 +5067,627 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from) *to = *from; } + +/* for callout functions */ + +#ifdef USE_CALLOUT + +extern OnigCalloutFunc +onig_get_progress_callout(void) +{ + return DefaultProgressCallout; +} + +extern int +onig_set_progress_callout(OnigCalloutFunc f) +{ + DefaultProgressCallout = f; + return ONIG_NORMAL; +} + +extern OnigCalloutFunc +onig_get_retraction_callout(void) +{ + return DefaultRetractionCallout; +} + +extern int +onig_set_retraction_callout(OnigCalloutFunc f) +{ + DefaultRetractionCallout = f; + return ONIG_NORMAL; +} + +extern int +onig_get_callout_num_by_callout_args(OnigCalloutArgs* args) +{ + return args->num; +} + +extern OnigCalloutIn +onig_get_callout_in_by_callout_args(OnigCalloutArgs* args) +{ + return args->in; +} + +extern int +onig_get_name_id_by_callout_args(OnigCalloutArgs* args) +{ + return args->name_id; +} + +extern const UChar* +onig_get_contents_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.start; + } + + return 0; +} + +extern const UChar* +onig_get_contents_end_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.end; + } + + return 0; +} + +extern int +onig_get_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.passed_num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, + OnigType* type, OnigValue* val) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT; + if (e->of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(type)) *type = e->u.arg.types[index]; + if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index]; + return ONIG_NORMAL; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern const UChar* +onig_get_string_by_callout_args(OnigCalloutArgs* args) +{ + return args->string; +} + +extern const UChar* +onig_get_string_end_by_callout_args(OnigCalloutArgs* args) +{ + return args->string_end; +} + +extern const UChar* +onig_get_start_by_callout_args(OnigCalloutArgs* args) +{ + return args->start; +} + +extern const UChar* +onig_get_right_range_by_callout_args(OnigCalloutArgs* args) +{ + return args->right_range; +} + +extern const UChar* +onig_get_current_by_callout_args(OnigCalloutArgs* args) +{ + return args->current; +} + +extern OnigRegex +onig_get_regex_by_callout_args(OnigCalloutArgs* args) +{ + return args->regex; +} + +extern unsigned long +onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args) +{ + return args->retry_in_match_counter; +} + + +extern int +onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end) +{ + OnigRegex reg; + const UChar* str; + StackType* stk_base; + int i; + + i = mem_num; + reg = a->regex; + str = a->string; + stk_base = a->stk_base; + + if (i > 0) { + if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { + if (MEM_STATUS_AT(reg->bt_mem_start, i)) + *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str); + else + *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str); + + *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )a->mem_end_stk[i])) - str); + } + else { + *begin = *end = ONIG_REGION_NOTPOS; + } + } + else if (i == 0) { +#if 0 + *begin = a->start - str; + *end = a->current - str; +#else + return ONIGERR_INVALID_ARGUMENT; +#endif + } + else + return ONIGERR_INVALID_ARGUMENT; + + return ONIG_NORMAL; +} + +extern int +onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes) +{ + int n; + + n = (int )(a->stk - a->stk_base); + + if (used_num != 0) + *used_num = n; + + if (used_bytes != 0) + *used_bytes = n * sizeof(StackType); + + return ONIG_NORMAL; +} + + +/* builtin callout functions */ + +extern int +onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_FAIL; +} + +extern int +onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_MISMATCH; +} + +#if 0 +extern int +onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_SUCCESS; +} +#endif + +extern int +onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int n; + OnigValue val; + + r = onig_get_arg_by_callout_args(args, 0, 0, &val); + if (r != ONIG_NORMAL) return r; + + n = (int )val.l; + if (n >= 0) { + n = ONIGERR_INVALID_CALLOUT_BODY; + } + + return n; +} + +extern int +onig_builtin_count(OnigCalloutArgs* args, void* user_data) +{ + (void )onig_check_callout_data_and_clear_old_values(args); + + return onig_builtin_total_count(args, user_data); +} + +extern int +onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + OnigType type; + OnigValue val; + OnigValue aval; + OnigCodePoint count_type; + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + + count_type = aval.c; + if (count_type != '>' && count_type != 'X' && count_type != '<') + return ONIGERR_INVALID_CALLOUT_ARG; + + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + val.l = 0; + } + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + slot = 2; + if (count_type == '<') + val.l++; + else if (count_type == 'X') + val.l--; + } + else { + slot = 1; + if (count_type != '<') + val.l++; + } + + r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + /* slot 1: in progress counter, slot 2: in retraction counter */ + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + val.l = 0; + } + + val.l++; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + long max_val; + OnigCodePoint count_type; + OnigType type; + OnigValue val; + OnigValue aval; + + (void )onig_check_callout_data_and_clear_old_values(args); + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + type = ONIG_TYPE_LONG; + val.l = 0; + } + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + max_val = 0L; + else + max_val = aval.l; + } + else { /* LONG */ + max_val = aval.l; + } + + r = onig_get_arg_by_callout_args(args, 1, &type, &aval); + if (r != ONIG_NORMAL) return r; + + count_type = aval.c; + if (count_type != '>' && count_type != 'X' && count_type != '<') + return ONIGERR_INVALID_CALLOUT_ARG; + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + if (count_type == '<') { + if (val.l >= max_val) return ONIG_CALLOUT_FAIL; + val.l++; + } + else if (count_type == 'X') + val.l--; + } + else { + if (count_type != '<') { + if (val.l >= max_val) return ONIG_CALLOUT_FAIL; + val.l++; + } + } + + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +enum OP_CMP { + OP_EQ, + OP_NE, + OP_LT, + OP_GT, + OP_LE, + OP_GE +}; + +extern int +onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + long lv; + long rv; + OnigType type; + OnigValue val; + regex_t* reg; + enum OP_CMP op; + + reg = args->regex; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + lv = 0L; + else + lv = val.l; + } + else { /* ONIG_TYPE_LONG */ + lv = val.l; + } + + r = onig_get_arg_by_callout_args(args, 2, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + rv = 0L; + else + rv = val.l; + } + else { /* ONIG_TYPE_LONG */ + rv = val.l; + } + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + OnigCodePoint c1, c2; + UChar* p; + + r = onig_get_arg_by_callout_args(args, 1, &type, &val); + if (r != ONIG_NORMAL) return r; + + p = val.s.start; + c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p < val.s.end) { + c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG; + } + else + c2 = 0; + + switch (c1) { + case '=': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_EQ; + break; + case '!': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_NE; + break; + case '<': + if (c2 == '=') op = OP_LE; + else if (c2 == 0) op = OP_LT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + case '>': + if (c2 == '=') op = OP_GE; + else if (c2 == 0) op = OP_GT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + default: + return ONIGERR_INVALID_CALLOUT_ARG; + break; + } + val.l = (long )op; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + } + else { + op = (enum OP_CMP )val.l; + } + + switch (op) { + case OP_EQ: r = (lv == rv); break; + case OP_NE: r = (lv != rv); break; + case OP_LT: r = (lv < rv); break; + case OP_GT: r = (lv > rv); break; + case OP_LE: r = (lv <= rv); break; + case OP_GE: r = (lv >= rv); break; + } + + return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS; +} + + +//#include + +static FILE* OutFp; + +/* name start with "onig_" for macros. */ +static int +onig_builtin_monitor(OnigCalloutArgs* args, void* user_data) +{ + int r; + int num; + size_t tag_len; + const UChar* start; + const UChar* right; + const UChar* current; + const UChar* string; + const UChar* strend; + const UChar* tag_start; + const UChar* tag_end; + regex_t* reg; + OnigCalloutIn in; + OnigType type; + OnigValue val; + char buf[20]; + FILE* fp; + + fp = OutFp; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + in = onig_get_callout_in_by_callout_args(args); + if (in == ONIG_CALLOUT_IN_PROGRESS) { + if (val.c == '<') + return ONIG_CALLOUT_SUCCESS; + } + else { + if (val.c != 'X' && val.c != '<') + return ONIG_CALLOUT_SUCCESS; + } + + num = onig_get_callout_num_by_callout_args(args); + start = onig_get_start_by_callout_args(args); + right = onig_get_right_range_by_callout_args(args); + current = onig_get_current_by_callout_args(args); + string = onig_get_string_by_callout_args(args); + strend = onig_get_string_end_by_callout_args(args); + reg = onig_get_regex_by_callout_args(args); + tag_start = onig_get_callout_tag_start(reg, num); + tag_end = onig_get_callout_tag_end(reg, num); + + if (tag_start == 0) + sprintf_s(buf, sizeof(buf), "#%d", num); + else { + /* CAUTION: tag string is not terminated with NULL. */ + int i; + + tag_len = tag_end - tag_start; + if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1; + for (i = 0; i < tag_len; i++) buf[i] = tag_start[i]; + buf[tag_len] = '\0'; + } + + fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n", + buf, + in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=", + (int )(current - string), + (int )(start - string), + (int )(right - string), + (int )(strend - string)); + //fflush(fp); + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */) +{ + int id; + char* name; + OnigEncoding enc; + unsigned int ts[4]; + OnigValue opts[4]; + + if (IS_NOT_NULL(fp)) + OutFp = (FILE* )fp; + else + OutFp = stdout; + + enc = ONIG_ENCODING_ASCII; + + name = "MON"; + ts[0] = ONIG_TYPE_CHAR; + opts[0].c = '>'; + BC_B_O(name, monitor, 1, ts, 1, opts); + + return ONIG_NORMAL; +} + +#endif /* USE_CALLOUT */