regparse.c - Oniguruma (regular expression library)\r
**********************************************************************/\r
/*-\r
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
* All rights reserved.\r
*\r
- * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.<BR>\r
- *\r
* Redistribution and use in source and binary forms, with or without\r
* modification, are permitted provided that the following conditions\r
* are met:\r
#include "regparse.h"\r
#include "st.h"\r
\r
+#ifdef DEBUG_NODE_FREE\r
+#include <stdio.h>\r
+#endif\r
+\r
+#define INIT_TAG_NAMES_ALLOC_NUM 5\r
+\r
#define WARN_BUFSIZE 256\r
\r
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
\r
+#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \\r
+ ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)\r
+#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \\r
+ ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')\r
+\r
+\r
+OnigSyntaxType OnigSyntaxOniguruma = {\r
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
+ ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
+ ONIG_SYN_OP_ESC_C_CONTROL )\r
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
+ , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
+ ONIG_SYN_OP2_OPTION_RUBY |\r
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
+ ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
+ ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
+ ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |\r
+ ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |\r
+ ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
+ ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
+ ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |\r
+ ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
+ ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
+ ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
+ , ( SYN_GNU_REGEX_BV | \r
+ ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
+ ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
+ ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
+ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
+ ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
+ , ONIG_OPTION_NONE\r
+ ,\r
+ {\r
+ (OnigCodePoint )'\\' /* esc */\r
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
+ }\r
+};\r
\r
OnigSyntaxType OnigSyntaxRuby = {\r
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
+ ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
ONIG_SYN_OP_ESC_C_CONTROL )\r
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
ONIG_SYN_OP2_OPTION_RUBY |\r
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
+ ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
+ ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
+ ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
+ ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
+ ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
- ONIG_SYN_OP2_ESC_H_XDIGIT )\r
+ ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
, ( SYN_GNU_REGEX_BV | \r
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
}\r
};\r
\r
-OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;\r
+OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;\r
\r
extern void onig_null_warn(const char* s ARG_UNUSED) { }\r
\r
onig_verb_warn = f;\r
}\r
\r
+extern void\r
+onig_warning(const char* s)\r
+{\r
+ if (onig_warn == onig_null_warn) return ;\r
+\r
+ (*onig_warn)(s);\r
+}\r
+\r
+#define DEFAULT_MAX_CAPTURE_NUM 32767\r
+\r
+static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;\r
+\r
+extern int\r
+onig_set_capture_num_limit(int num)\r
+{\r
+ if (num < 0) return -1;\r
+\r
+ MaxCaptureNum = num;\r
+ return 0;\r
+}\r
+\r
+static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
+\r
+extern unsigned int\r
+onig_get_parse_depth_limit(void)\r
+{\r
+ return ParseDepthLimit;\r
+}\r
+\r
+extern int\r
+onig_set_parse_depth_limit(unsigned int depth)\r
+{\r
+ if (depth == 0)\r
+ ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
+ else\r
+ ParseDepthLimit = depth;\r
+ return 0;\r
+}\r
+\r
+static int\r
+positive_int_multiply(int x, int y)\r
+{\r
+ if (x == 0 || y == 0) return 0;\r
+\r
+ if (x < INT_MAX / y)\r
+ return x * y;\r
+ else\r
+ return -1;\r
+}\r
+\r
static void\r
bbuf_free(BBuf* bbuf)\r
{\r
\r
*rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r
CHECK_NULL_RETURN_MEMERR(to);\r
- r = BBUF_INIT(to, from->alloc);\r
- if (r != 0) return r;\r
+ r = BB_INIT(to, from->alloc);\r
+ if (r != 0) {\r
+ xfree(to->p);\r
+ *rto = 0;\r
+ return r;\r
+ }\r
to->used = from->used;\r
xmemcpy(to->p, from->p, from->used);\r
return 0;\r
}\r
\r
-#define BACKREF_REL_TO_ABS(rel_no, env) \\r
- ((env)->num_mem + 1 + (rel_no))\r
+static int backref_rel_to_abs(int rel_no, ScanEnv* env)\r
+{\r
+ if (rel_no > 0) {\r
+ return env->num_mem + rel_no;\r
+ }\r
+ else {\r
+ return env->num_mem + 1 + rel_no;\r
+ }\r
+}\r
+\r
+#define OPTION_ON(v,f) ((v) |= (f))\r
+#define OPTION_OFF(v,f) ((v) &= ~(f))\r
\r
-#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r
+#define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r
\r
#define MBCODE_START_POS(enc) \\r
(OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r
if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r
r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r
- if (r) return r;\\r
+ if (r != 0) return r;\\r
}\\r
} while (0)\r
\r
extern void\r
onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r
{\r
- int len = (int)(end - src);\r
+ int len = (int )(end - src);\r
if (len > 0) {\r
xmemcpy(dest, src, len);\r
dest[len] = (UChar )0;\r
}\r
}\r
\r
-#ifdef USE_NAMED_GROUP\r
-static UChar*\r
-strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)\r
+static int\r
+save_entry(ScanEnv* env, enum SaveType type, int* id)\r
{\r
- int slen, term_len, i;\r
- UChar *r;\r
+ int nid = env->save_num;\r
\r
- slen = (int)(end - s);\r
- term_len = ONIGENC_MBC_MINLEN(enc);\r
-\r
- r = (UChar* )xmalloc(slen + term_len);\r
- CHECK_NULL_RETURN(r);\r
- xmemcpy(r, s, slen);\r
+#if 0\r
+ if (IS_NULL(env->saves)) {\r
+ int n = 10;\r
+ env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);\r
+ CHECK_NULL_RETURN_MEMERR(env->saves);\r
+ env->save_alloc_num = n;\r
+ }\r
+ else if (env->save_alloc_num <= nid) {\r
+ int n = env->save_alloc_num * 2;\r
+ SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+ env->saves = p;\r
+ env->save_alloc_num = n;\r
+ }\r
\r
- for (i = 0; i < term_len; i++)\r
- r[slen + i] = (UChar )0;\r
+ env->saves[nid].type = type;\r
+#endif\r
\r
- return r;\r
+ env->save_num++;\r
+ *id = nid;\r
+ return 0;\r
}\r
-#endif\r
\r
/* scan pattern methods */\r
#define PEND_VALUE 0\r
\r
static UChar*\r
strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r
- int capa, int oldCapa)\r
+ int capa, int oldCapa)\r
{\r
UChar* r;\r
\r
/* dest on static area */\r
static UChar*\r
strcat_capa_from_static(UChar* dest, UChar* dest_end,\r
- const UChar* src, const UChar* src_end, int capa)\r
+ const UChar* src, const UChar* src_end, int capa)\r
{\r
UChar* r;\r
\r
\r
extern int\r
onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r
- const UChar* end_key, hash_data_type *value)\r
+ const UChar* end_key, hash_data_type *value)\r
{\r
st_str_end_key key;\r
\r
\r
extern int\r
onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r
- const UChar* end_key, hash_data_type value)\r
+ const UChar* end_key, hash_data_type value)\r
{\r
st_str_end_key* key;\r
int result;\r
\r
key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r
+ CHECK_NULL_RETURN_MEMERR(key);\r
+\r
key->s = (UChar* )str_key;\r
key->end = (UChar* )end_key;\r
result = onig_st_insert(table, (st_data_t )key, value);\r
return result;\r
}\r
\r
-#endif /* USE_ST_LIBRARY */\r
\r
+typedef struct {\r
+ OnigEncoding enc;\r
+ int type; /* callout type: single or not */\r
+ UChar* s;\r
+ UChar* end;\r
+} st_callout_name_key;\r
+\r
+static int\r
+callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)\r
+{\r
+ UChar *p, *q;\r
+ int c;\r
+\r
+ if (x->enc != y->enc) return 1;\r
+ if (x->type != y->type) return 1;\r
+ if ((x->end - x->s) != (y->end - y->s))\r
+ return 1;\r
+\r
+ p = x->s;\r
+ q = y->s;\r
+ while (p < x->end) {\r
+ c = (int )*p - (int )*q;\r
+ if (c != 0) return c;\r
+\r
+ p++; q++;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static int\r
+callout_name_table_hash(st_callout_name_key* x)\r
+{\r
+ UChar *p;\r
+ int val = 0;\r
+\r
+ p = x->s;\r
+ while (p < x->end) {\r
+ val = val * 997 + (int )*p++;\r
+ }\r
+\r
+ /* use intptr_t for escape warning in Windows */\r
+ return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;\r
+}\r
+\r
+extern hash_table_type*\r
+onig_st_init_callout_name_table_with_size(int size)\r
+{\r
+ static struct st_hash_type hashType = {\r
+ callout_name_table_cmp,\r
+ callout_name_table_hash,\r
+ };\r
+\r
+ return (hash_table_type* )\r
+ onig_st_init_table_with_size(&hashType, size);\r
+}\r
+\r
+extern int\r
+onig_st_lookup_callout_name_table(hash_table_type* table,\r
+ OnigEncoding enc,\r
+ int type,\r
+ const UChar* str_key,\r
+ const UChar* end_key,\r
+ hash_data_type *value)\r
+{\r
+ st_callout_name_key key;\r
+\r
+ key.enc = enc;\r
+ key.type = type;\r
+ key.s = (UChar* )str_key;\r
+ key.end = (UChar* )end_key;\r
+\r
+ return onig_st_lookup(table, (st_data_t )(&key), value);\r
+}\r
+\r
+static int\r
+st_insert_callout_name_table(hash_table_type* table,\r
+ OnigEncoding enc, int type,\r
+ UChar* str_key, UChar* end_key,\r
+ hash_data_type value)\r
+{\r
+ st_callout_name_key* key;\r
+ int result;\r
+\r
+ key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));\r
+ CHECK_NULL_RETURN_MEMERR(key);\r
+\r
+ /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */\r
+ key->enc = enc;\r
+ key->type = type;\r
+ key->s = str_key;\r
+ key->end = end_key;\r
+ result = onig_st_insert(table, (st_data_t )key, value);\r
+ if (result) {\r
+ xfree(key);\r
+ }\r
+ return result;\r
+}\r
+\r
+#endif /* USE_ST_LIBRARY */\r
\r
-#ifdef USE_NAMED_GROUP\r
\r
#define INIT_NAME_BACKREFS_ALLOC_NUM 8\r
\r
\r
#ifdef USE_ST_LIBRARY\r
\r
+#define INIT_NAMES_ALLOC_NUM 5\r
+\r
typedef st_table NameTable;\r
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r
\r
NameTable* t;\r
\r
r = names_clear(reg);\r
- if (r) return r;\r
+ if (r != 0) return r;\r
\r
t = (NameTable* )reg->name_table;\r
if (IS_NOT_NULL(t)) onig_st_free_table(t);\r
int r = (*(arg->func))(e->name,\r
e->name + e->name_len,\r
e->back_num,\r
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
- arg->reg, arg->arg);\r
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
+ arg->reg, arg->arg);\r
if (r != 0) {\r
arg->ret = r;\r
return ST_STOP;\r
e = &(t->e[i]);\r
fprintf(fp, "%s: ", e->name);\r
if (e->back_num == 0) {\r
- fputs("-", fp);\r
+ fputs("-", fp);\r
}\r
else if (e->back_num == 1) {\r
- fprintf(fp, "%d", e->back_ref1);\r
+ fprintf(fp, "%d", e->back_ref1);\r
}\r
else {\r
- for (j = 0; j < e->back_num; j++) {\r
- if (j > 0) fprintf(fp, ", ");\r
- fprintf(fp, "%d", e->back_refs[j]);\r
- }\r
+ for (j = 0; j < e->back_num; j++) {\r
+ if (j > 0) fprintf(fp, ", ");\r
+ fprintf(fp, "%d", e->back_refs[j]);\r
+ }\r
}\r
fputs("\n", fp);\r
}\r
for (i = 0; i < t->num; i++) {\r
e = &(t->e[i]);\r
if (IS_NOT_NULL(e->name)) {\r
- xfree(e->name);\r
- e->name = NULL;\r
- e->name_len = 0;\r
- e->back_num = 0;\r
- e->back_alloc = 0;\r
- if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
- e->back_refs = (int* )NULL;\r
+ xfree(e->name);\r
+ e->name = NULL;\r
+ e->name_len = 0;\r
+ e->back_num = 0;\r
+ e->back_alloc = 0;\r
+ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
+ e->back_refs = (int* )NULL;\r
}\r
}\r
if (IS_NOT_NULL(t->e)) {\r
NameTable* t;\r
\r
r = names_clear(reg);\r
- if (r) return r;\r
+ if (r != 0) return r;\r
\r
t = (NameTable* )reg->name_table;\r
if (IS_NOT_NULL(t)) xfree(t);\r
for (i = 0; i < t->num; i++) {\r
e = &(t->e[i]);\r
if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
- return e;\r
+ return e;\r
}\r
}\r
return (NameEntry* )NULL;\r
for (i = 0; i < t->num; i++) {\r
e = &(t->e[i]);\r
r = (*func)(e->name, e->name + e->name_len, e->back_num,\r
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
- reg, arg);\r
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
+ reg, arg);\r
if (r != 0) return r;\r
}\r
}\r
static int\r
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r
{\r
+ int r;\r
int alloc;\r
NameEntry* e;\r
NameTable* t = (NameTable* )reg->name_table;\r
if (IS_NULL(e)) {\r
#ifdef USE_ST_LIBRARY\r
if (IS_NULL(t)) {\r
- t = onig_st_init_strend_table_with_size(5);\r
+ t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);\r
reg->name_table = (void* )t;\r
}\r
e = (NameEntry* )xmalloc(sizeof(NameEntry));\r
CHECK_NULL_RETURN_MEMERR(e);\r
\r
- e->name = strdup_with_null(reg->enc, name, name_end);\r
+ e->name = onigenc_strdup(reg->enc, name, name_end);\r
if (IS_NULL(e->name)) {\r
xfree(e); return ONIGERR_MEMORY;\r
}\r
- onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r
- (HashDataType )e);\r
+ r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r
+ (HashDataType )e);\r
+ if (r < 0) return r;\r
\r
- e->name_len = (int)(name_end - name);\r
+ e->name_len = (int )(name_end - name);\r
e->back_num = 0;\r
e->back_alloc = 0;\r
e->back_refs = (int* )NULL;\r
\r
t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r
if (IS_NULL(t->e)) {\r
- xfree(t);\r
- return ONIGERR_MEMORY;\r
+ xfree(t);\r
+ return ONIGERR_MEMORY;\r
}\r
t->alloc = alloc;\r
reg->name_table = t;\r
int i;\r
\r
alloc = t->alloc * 2;\r
- t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);\r
+ t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);\r
CHECK_NULL_RETURN_MEMERR(t->e);\r
t->alloc = alloc;\r
\r
clear:\r
for (i = t->num; i < t->alloc; i++) {\r
- t->e[i].name = NULL;\r
- t->e[i].name_len = 0;\r
- t->e[i].back_num = 0;\r
- t->e[i].back_alloc = 0;\r
- t->e[i].back_refs = (int* )NULL;\r
+ t->e[i].name = NULL;\r
+ t->e[i].name_len = 0;\r
+ t->e[i].back_num = 0;\r
+ t->e[i].back_alloc = 0;\r
+ t->e[i].back_refs = (int* )NULL;\r
}\r
}\r
e = &(t->e[t->num]);\r
t->num++;\r
- e->name = strdup_with_null(reg->enc, name, name_end);\r
+ e->name = onigenc_strdup(reg->enc, name, name_end);\r
if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
e->name_len = name_end - name;\r
#endif\r
if (e->back_num >= 1 &&\r
! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r
onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r
- name, name_end);\r
+ name, name_end);\r
return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
}\r
\r
}\r
else {\r
if (e->back_num > e->back_alloc) {\r
- alloc = e->back_alloc * 2;\r
- e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r
- CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
- e->back_alloc = alloc;\r
+ alloc = e->back_alloc * 2;\r
+ e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r
+ CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
+ e->back_alloc = alloc;\r
}\r
e->back_refs[e->back_num - 1] = backref;\r
}\r
\r
extern int\r
onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
- const UChar* name_end, int** nums)\r
+ const UChar* name_end, int** nums)\r
{\r
NameEntry* e = name_find(reg, name, name_end);\r
\r
\r
extern int\r
onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
- const UChar* name_end, OnigRegion *region)\r
+ const UChar* name_end, OnigRegion *region)\r
{\r
int i, n, *nums;\r
\r
else {\r
if (IS_NOT_NULL(region)) {\r
for (i = n - 1; i >= 0; i--) {\r
- if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r
- return nums[i];\r
+ if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r
+ return nums[i];\r
}\r
}\r
return nums[n - 1];\r
}\r
}\r
\r
-#else /* USE_NAMED_GROUP */\r
-\r
-extern int\r
-onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
- const UChar* name_end, int** nums)\r
-{\r
- return ONIG_NO_SUPPORT_CONFIG;\r
-}\r
-\r
-extern int\r
-onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
- const UChar* name_end, OnigRegion* region)\r
-{\r
- return ONIG_NO_SUPPORT_CONFIG;\r
-}\r
-\r
-extern int\r
-onig_foreach_name(regex_t* reg,\r
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
-{\r
- return ONIG_NO_SUPPORT_CONFIG;\r
-}\r
-\r
-extern int\r
-onig_number_of_names(regex_t* reg)\r
-{\r
- return 0;\r
-}\r
-#endif /* else USE_NAMED_GROUP */\r
-\r
extern int\r
onig_noname_group_capture_is_active(regex_t* reg)\r
{\r
if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
return 0;\r
\r
-#ifdef USE_NAMED_GROUP\r
if (onig_number_of_names(reg) > 0 &&\r
IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
!ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
return 0;\r
}\r
-#endif\r
\r
return 1;\r
}\r
\r
+#ifdef USE_CALLOUT\r
\r
-#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16\r
-\r
-static void\r
-scan_env_clear(ScanEnv* env)\r
-{\r
- int i;\r
-\r
- BIT_STATUS_CLEAR(env->capture_history);\r
- BIT_STATUS_CLEAR(env->bt_mem_start);\r
- BIT_STATUS_CLEAR(env->bt_mem_end);\r
- BIT_STATUS_CLEAR(env->backrefed_mem);\r
- env->error = (UChar* )NULL;\r
- env->error_end = (UChar* )NULL;\r
- env->num_call = 0;\r
- env->num_mem = 0;\r
-#ifdef USE_NAMED_GROUP\r
- env->num_named = 0;\r
-#endif\r
- env->mem_alloc = 0;\r
- env->mem_nodes_dynamic = (Node** )NULL;\r
+typedef struct {\r
+ OnigCalloutType type;\r
+ int in;\r
+ OnigCalloutFunc start_func;\r
+ OnigCalloutFunc end_func;\r
+ int arg_num;\r
+ int opt_arg_num;\r
+ unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
+ OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];\r
+ UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */\r
+} CalloutNameListEntry;\r
\r
- for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)\r
- env->mem_nodes_static[i] = NULL_NODE;\r
+typedef struct {\r
+ int n;\r
+ int alloc;\r
+ CalloutNameListEntry* v;\r
+} CalloutNameListType;\r
\r
-#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
- env->num_comb_exp_check = 0;\r
- env->comb_exp_max_regnum = 0;\r
- env->curr_max_regnum = 0;\r
- env->has_recursion = 0;\r
-#endif\r
-}\r
+static CalloutNameListType* GlobalCalloutNameList;\r
\r
static int\r
-scan_env_add_mem_entry(ScanEnv* env)\r
+make_callout_func_list(CalloutNameListType** rs, int init_size)\r
{\r
- int i, need, alloc;\r
- Node** p;\r
+ CalloutNameListType* s;\r
+ CalloutNameListEntry* v;\r
\r
- need = env->num_mem + 1;\r
- if (need >= SCANENV_MEMNODES_SIZE) {\r
- if (env->mem_alloc <= need) {\r
- if (IS_NULL(env->mem_nodes_dynamic)) {\r
- alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;\r
- p = (Node** )xmalloc(sizeof(Node*) * alloc);\r
- xmemcpy(p, env->mem_nodes_static,\r
- sizeof(Node*) * SCANENV_MEMNODES_SIZE);\r
- }\r
- else {\r
- alloc = env->mem_alloc * 2;\r
- p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc, sizeof(Node*) * env->mem_alloc);\r
- }\r
- CHECK_NULL_RETURN_MEMERR(p);\r
+ *rs = 0;\r
\r
- for (i = env->num_mem + 1; i < alloc; i++)\r
- p[i] = NULL_NODE;\r
+ s = xmalloc(sizeof(*s));\r
+ if (IS_NULL(s)) return ONIGERR_MEMORY;\r
\r
- env->mem_nodes_dynamic = p;\r
- env->mem_alloc = alloc;\r
- }\r
+ v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);\r
+ if (IS_NULL(v)) {\r
+ xfree(s);\r
+ return ONIGERR_MEMORY;\r
}\r
\r
- env->num_mem++;\r
- return env->num_mem;\r
-}\r
+ s->n = 0;\r
+ s->alloc = init_size;\r
+ s->v = v;\r
\r
-static int\r
-scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r
-{\r
- if (env->num_mem >= num)\r
- SCANENV_MEM_NODES(env)[num] = node;\r
- else\r
- return ONIGERR_PARSER_BUG;\r
- return 0;\r
+ *rs = s;\r
+ return ONIG_NORMAL;\r
}\r
\r
+static void\r
+free_callout_func_list(CalloutNameListType* s)\r
+{\r
+ if (IS_NOT_NULL(s)) {\r
+ if (IS_NOT_NULL(s->v)) {\r
+ int i, j;\r
+\r
+ for (i = 0; i < s->n; i++) {\r
+ CalloutNameListEntry* e = s->v + i;\r
+ for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {\r
+ if (e->arg_types[j] == ONIG_TYPE_STRING) {\r
+ UChar* p = e->opt_defaults[j].s.start;\r
+ if (IS_NOT_NULL(p)) xfree(p);\r
+ }\r
+ }\r
+ }\r
+ xfree(s->v);\r
+ }\r
+ xfree(s);\r
+ }\r
+}\r
\r
-#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
-typedef struct _FreeNode {\r
- struct _FreeNode* next;\r
-} FreeNode;\r
+static int\r
+callout_func_list_add(CalloutNameListType* s, int* rid)\r
+{\r
+ if (s->n >= s->alloc) {\r
+ int new_size = s->alloc * 2;\r
+ CalloutNameListEntry* nv = (CalloutNameListEntry* )\r
+ xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);\r
+ if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
\r
-static FreeNode* FreeNodeList = (FreeNode* )NULL;\r
-#endif\r
+ s->alloc = new_size;\r
+ s->v = nv;\r
+ }\r
\r
-extern void\r
-onig_node_free(Node* node)\r
-{\r
- start:\r
- if (IS_NULL(node)) return ;\r
+ *rid = s->n;\r
\r
- switch (NTYPE(node)) {\r
- case NT_STR:\r
- if (NSTR(node)->capa != 0 &&\r
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {\r
- xfree(NSTR(node)->s);\r
- }\r
- break;\r
+ xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));\r
+ s->n++;\r
+ return ONIG_NORMAL;\r
+}\r
\r
- case NT_LIST:\r
- case NT_ALT:\r
- onig_node_free(NCAR(node));\r
- {\r
- Node* next_node = NCDR(node);\r
\r
-#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
- {\r
- FreeNode* n = (FreeNode* )node;\r
+typedef struct {\r
+ UChar* name;\r
+ int name_len; /* byte length */\r
+ int id;\r
+} CalloutNameEntry;\r
\r
- THREAD_ATOMIC_START;\r
- n->next = FreeNodeList;\r
- FreeNodeList = n;\r
- THREAD_ATOMIC_END;\r
- }\r
+#ifdef USE_ST_LIBRARY\r
+typedef st_table CalloutNameTable;\r
#else\r
- xfree(node);\r
+typedef struct {\r
+ CalloutNameEntry* e;\r
+ int num;\r
+ int alloc;\r
+} CalloutNameTable;\r
#endif\r
- node = next_node;\r
- goto start;\r
- }\r
- break;\r
\r
- case NT_CCLASS:\r
- {\r
- CClassNode* cc = NCCLASS(node);\r
+static CalloutNameTable* GlobalCalloutNameTable;\r
+static int CalloutNameIDCounter;\r
\r
- if (IS_NCCLASS_SHARE(cc)) return ;\r
- if (cc->mbuf)\r
- bbuf_free(cc->mbuf);\r
- }\r
- break;\r
+#ifdef USE_ST_LIBRARY\r
\r
- case NT_QTFR:\r
- if (NQTFR(node)->target)\r
- onig_node_free(NQTFR(node)->target);\r
- break;\r
+static int\r
+i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,\r
+ void* arg ARG_UNUSED)\r
+{\r
+ xfree(e->name);\r
+ /*xfree(key->s); */ /* is same as e->name */\r
+ xfree(key);\r
+ xfree(e);\r
+ return ST_DELETE;\r
+}\r
\r
- case NT_ENCLOSE:\r
- if (NENCLOSE(node)->target)\r
- onig_node_free(NENCLOSE(node)->target);\r
- break;\r
+static int\r
+callout_name_table_clear(CalloutNameTable* t)\r
+{\r
+ if (IS_NOT_NULL(t)) {\r
+ onig_st_foreach(t, i_free_callout_name_entry, 0);\r
+ }\r
+ return 0;\r
+}\r
\r
- case NT_BREF:\r
- if (IS_NOT_NULL(NBREF(node)->back_dynamic))\r
- xfree(NBREF(node)->back_dynamic);\r
- break;\r
+static int\r
+global_callout_name_table_free(void)\r
+{\r
+ if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
+ int r = callout_name_table_clear(GlobalCalloutNameTable);\r
+ if (r != 0) return r;\r
\r
- case NT_ANCHOR:\r
- if (NANCHOR(node)->target)\r
- onig_node_free(NANCHOR(node)->target);\r
- break;\r
+ onig_st_free_table(GlobalCalloutNameTable);\r
+ GlobalCalloutNameTable = 0;\r
+ CalloutNameIDCounter = 0;\r
}\r
\r
-#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
- {\r
- FreeNode* n = (FreeNode* )node;\r
+ return 0;\r
+}\r
+\r
+static CalloutNameEntry*\r
+callout_name_find(OnigEncoding enc, int is_not_single,\r
+ const UChar* name, const UChar* name_end)\r
+{\r
+ int r;\r
+ CalloutNameEntry* e;\r
+ CalloutNameTable* t = GlobalCalloutNameTable;\r
\r
- THREAD_ATOMIC_START;\r
- n->next = FreeNodeList;\r
- FreeNodeList = n;\r
- THREAD_ATOMIC_END;\r
+ e = (CalloutNameEntry* )NULL;\r
+ if (IS_NOT_NULL(t)) {\r
+ r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
+ (HashDataType* )((void* )(&e)));\r
+ if (r == 0) { /* not found */\r
+ if (enc != ONIG_ENCODING_ASCII &&\r
+ ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r
+ enc = ONIG_ENCODING_ASCII;\r
+ onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
+ (HashDataType* )((void* )(&e)));\r
+ }\r
+ }\r
}\r
-#else\r
- xfree(node);\r
-#endif\r
+ return e;\r
}\r
\r
-#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
-extern int\r
-onig_free_node_list(void)\r
+#else\r
+\r
+static int\r
+callout_name_table_clear(CalloutNameTable* t)\r
{\r
- FreeNode* n;\r
+ int i;\r
+ CalloutNameEntry* e;\r
\r
- /* THREAD_ATOMIC_START; */\r
- while (IS_NOT_NULL(FreeNodeList)) {\r
- n = FreeNodeList;\r
- FreeNodeList = FreeNodeList->next;\r
- xfree(n);\r
+ if (IS_NOT_NULL(t)) {\r
+ for (i = 0; i < t->num; i++) {\r
+ e = &(t->e[i]);\r
+ if (IS_NOT_NULL(e->name)) {\r
+ xfree(e->name);\r
+ e->name = NULL;\r
+ e->name_len = 0;\r
+ e->id = 0;\r
+ e->func = 0;\r
+ }\r
+ }\r
+ if (IS_NOT_NULL(t->e)) {\r
+ xfree(t->e);\r
+ t->e = NULL;\r
+ }\r
+ t->num = 0;\r
}\r
- /* THREAD_ATOMIC_END; */\r
return 0;\r
}\r
-#endif\r
\r
-static Node*\r
-node_new(void)\r
+static int\r
+global_callout_name_table_free(void)\r
{\r
- Node* node;\r
+ if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
+ int r = callout_name_table_clear(GlobalCalloutNameTable);\r
+ if (r != 0) return r;\r
\r
-#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
- THREAD_ATOMIC_START;\r
- if (IS_NOT_NULL(FreeNodeList)) {\r
- node = (Node* )FreeNodeList;\r
- FreeNodeList = FreeNodeList->next;\r
- THREAD_ATOMIC_END;\r
- return node;\r
+ xfree(GlobalCalloutNameTable);\r
+ GlobalCalloutNameTable = 0;\r
+ CalloutNameIDCounter = 0;\r
}\r
- THREAD_ATOMIC_END;\r
-#endif\r
-\r
- node = (Node* )xmalloc(sizeof(Node));\r
- /* xmemset(node, 0, sizeof(Node)); */\r
- return node;\r
+ return 0;\r
}\r
\r
-\r
-static void\r
-initialize_cclass(CClassNode* cc)\r
+static CalloutNameEntry*\r
+callout_name_find(UChar* name, UChar* name_end)\r
{\r
- BITSET_CLEAR(cc->bs);\r
- /* cc->base.flags = 0; */\r
- cc->flags = 0;\r
- cc->mbuf = NULL;\r
+ int i, len;\r
+ CalloutNameEntry* e;\r
+ CalloutNameTable* t = Calloutnames;\r
+\r
+ if (IS_NOT_NULL(t)) {\r
+ len = name_end - name;\r
+ for (i = 0; i < t->num; i++) {\r
+ e = &(t->e[i]);\r
+ if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
+ return e;\r
+ }\r
+ }\r
+ return (CalloutNameEntry* )NULL;\r
}\r
\r
-static Node*\r
-node_new_cclass(void)\r
+#endif\r
+\r
+/* name string must be single byte char string. */\r
+static int\r
+callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,\r
+ int is_not_single, UChar* name, UChar* name_end)\r
{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ int r;\r
+ CalloutNameEntry* e;\r
+ CalloutNameTable* t = GlobalCalloutNameTable;\r
\r
- SET_NTYPE(node, NT_CCLASS);\r
- initialize_cclass(NCCLASS(node));\r
- return node;\r
-}\r
+ *rentry = 0;\r
+ if (name_end - name <= 0)\r
+ return ONIGERR_INVALID_CALLOUT_NAME;\r
\r
-static Node*\r
-node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,\r
- const OnigCodePoint ranges[])\r
-{\r
- int n, i;\r
- CClassNode* cc;\r
- OnigCodePoint j;\r
+ e = callout_name_find(enc, is_not_single, name, name_end);\r
+ if (IS_NULL(e)) {\r
+#ifdef USE_ST_LIBRARY\r
+ if (IS_NULL(t)) {\r
+ t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);\r
+ GlobalCalloutNameTable = t;\r
+ }\r
+ e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));\r
+ CHECK_NULL_RETURN_MEMERR(e);\r
\r
- Node* node = node_new_cclass();\r
- CHECK_NULL_RETURN(node);\r
+ e->name = onigenc_strdup(enc, name, name_end);\r
+ if (IS_NULL(e->name)) {\r
+ xfree(e); return ONIGERR_MEMORY;\r
+ }\r
\r
- cc = NCCLASS(node);\r
- if (not != 0) NCCLASS_SET_NOT(cc);\r
+ r = st_insert_callout_name_table(t, enc, is_not_single,\r
+ e->name, (e->name + (name_end - name)),\r
+ (HashDataType )e);\r
+ if (r < 0) return r;\r
\r
- BITSET_CLEAR(cc->bs);\r
- if (sb_out > 0 && IS_NOT_NULL(ranges)) {\r
- n = ONIGENC_CODE_RANGE_NUM(ranges);\r
- for (i = 0; i < n; i++) {\r
- for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);\r
- j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {\r
- if (j >= sb_out) goto sb_end;\r
+#else\r
\r
- BITSET_SET_BIT(cc->bs, j);\r
+ int alloc;\r
+\r
+ if (IS_NULL(t)) {\r
+ alloc = INIT_NAMES_ALLOC_NUM;\r
+ t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));\r
+ CHECK_NULL_RETURN_MEMERR(t);\r
+ t->e = NULL;\r
+ t->alloc = 0;\r
+ t->num = 0;\r
+\r
+ t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);\r
+ if (IS_NULL(t->e)) {\r
+ xfree(t);\r
+ return ONIGERR_MEMORY;\r
}\r
+ t->alloc = alloc;\r
+ GlobalCalloutNameTable = t;\r
+ goto clear;\r
}\r
- }\r
-\r
- sb_end:\r
- if (IS_NULL(ranges)) {\r
- is_null:\r
- cc->mbuf = NULL;\r
- }\r
- else {\r
- BBuf* bbuf;\r
+ else if (t->num == t->alloc) {\r
+ int i;\r
\r
- n = ONIGENC_CODE_RANGE_NUM(ranges);\r
- if (n == 0) goto is_null;\r
+ alloc = t->alloc * 2;\r
+ t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);\r
+ CHECK_NULL_RETURN_MEMERR(t->e);\r
+ t->alloc = alloc;\r
\r
- bbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
- CHECK_NULL_RETURN(bbuf);\r
- bbuf->alloc = n + 1;\r
- bbuf->used = n + 1;\r
- bbuf->p = (UChar* )((void* )ranges);\r
+ clear:\r
+ for (i = t->num; i < t->alloc; i++) {\r
+ t->e[i].name = NULL;\r
+ t->e[i].name_len = 0;\r
+ t->e[i].id = 0;\r
+ }\r
+ }\r
+ e = &(t->e[t->num]);\r
+ t->num++;\r
+ e->name = onigenc_strdup(enc, name, name_end);\r
+ if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
+#endif\r
\r
- cc->mbuf = bbuf;\r
+ CalloutNameIDCounter++;\r
+ e->id = CalloutNameIDCounter;\r
+ e->name_len = (int )(name_end - name);\r
}\r
\r
- return node;\r
+ *rentry = e;\r
+ return e->id;\r
}\r
\r
-static Node*\r
-node_new_ctype(int type, int not)\r
+static int\r
+is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ UChar* p;\r
+ OnigCodePoint c;\r
\r
- SET_NTYPE(node, NT_CTYPE);\r
- NCTYPE(node)->ctype = type;\r
- NCTYPE(node)->not = not;\r
- return node;\r
-}\r
+ if (name >= name_end) return 0;\r
\r
-static Node*\r
-node_new_anychar(void)\r
-{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ p = name;\r
+ while (p < name_end) {\r
+ c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
+ if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))\r
+ return 0;\r
\r
- SET_NTYPE(node, NT_CANY);\r
- return node;\r
-}\r
+ if (p == name) {\r
+ if (c >= '0' && c <= '9') return 0;\r
+ }\r
\r
-static Node*\r
-node_new_list(Node* left, Node* right)\r
-{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ p += ONIGENC_MBC_ENC_LEN(enc, p);\r
+ }\r
\r
- SET_NTYPE(node, NT_LIST);\r
- NCAR(node) = left;\r
- NCDR(node) = right;\r
- return node;\r
+ return 1;\r
}\r
\r
-extern Node*\r
-onig_node_new_list(Node* left, Node* right)\r
+static int\r
+is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
{\r
- return node_new_list(left, right);\r
-}\r
+ UChar* p;\r
+ OnigCodePoint c;\r
\r
-extern Node*\r
-onig_node_list_add(Node* list, Node* x)\r
-{\r
- Node *n;\r
+ if (name >= name_end) return 0;\r
\r
- n = onig_node_new_list(x, NULL);\r
- if (IS_NULL(n)) return NULL_NODE;\r
+ p = name;\r
+ while (p < name_end) {\r
+ c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
+ if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))\r
+ return 0;\r
\r
- if (IS_NOT_NULL(list)) {\r
- while (IS_NOT_NULL(NCDR(list)))\r
- list = NCDR(list);\r
+ if (p == name) {\r
+ if (c >= '0' && c <= '9') return 0;\r
+ }\r
\r
- NCDR(list) = n;\r
+ p += ONIGENC_MBC_ENC_LEN(enc, p);\r
}\r
\r
- return n;\r
+ return 1;\r
}\r
\r
-extern Node*\r
-onig_node_new_alt(Node* left, Node* right)\r
+extern int\r
+onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,\r
+ UChar* name, UChar* name_end, int in,\r
+ OnigCalloutFunc start_func,\r
+ OnigCalloutFunc end_func,\r
+ int arg_num, unsigned int arg_types[],\r
+ int opt_arg_num, OnigValue opt_defaults[])\r
{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ int r;\r
+ int i;\r
+ int j;\r
+ int id;\r
+ int is_not_single;\r
+ CalloutNameEntry* e;\r
+ CalloutNameListEntry* fe;\r
\r
- SET_NTYPE(node, NT_ALT);\r
- NCAR(node) = left;\r
- NCDR(node) = right;\r
- return node;\r
-}\r
+ if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)\r
+ return ONIGERR_INVALID_ARGUMENT;\r
\r
-extern Node*\r
-onig_node_new_anchor(int type)\r
-{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
\r
- SET_NTYPE(node, NT_ANCHOR);\r
- NANCHOR(node)->type = type;\r
- NANCHOR(node)->target = NULL;\r
- NANCHOR(node)->char_len = -1;\r
- return node;\r
-}\r
+ if (opt_arg_num < 0 || opt_arg_num > arg_num)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
\r
-static Node*\r
-node_new_backref(int back_num, int* backrefs, int by_name,\r
-#ifdef USE_BACKREF_WITH_LEVEL\r
- int exist_level, int nest_level,\r
-#endif\r
- ScanEnv* env)\r
-{\r
- int i;\r
- Node* node = node_new();\r
+ if (start_func == 0 && end_func == 0)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
\r
- CHECK_NULL_RETURN(node);\r
+ if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
\r
- SET_NTYPE(node, NT_BREF);\r
- NBREF(node)->state = 0;\r
- NBREF(node)->back_num = back_num;\r
- NBREF(node)->back_dynamic = (int* )NULL;\r
- if (by_name != 0)\r
- NBREF(node)->state |= NST_NAME_REF;\r
+ for (i = 0; i < arg_num; i++) {\r
+ unsigned int t = arg_types[i];\r
+ if (t == ONIG_TYPE_VOID)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
+ else {\r
+ if (i >= arg_num - opt_arg_num) {\r
+ if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&\r
+ t != ONIG_TYPE_TAG)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
+ }\r
+ else {\r
+ if (t != ONIG_TYPE_LONG) {\r
+ t = t & ~ONIG_TYPE_LONG;\r
+ if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
+ }\r
+ }\r
+ }\r
+ }\r
\r
-#ifdef USE_BACKREF_WITH_LEVEL\r
- if (exist_level != 0) {\r
- NBREF(node)->state |= NST_NEST_LEVEL;\r
- NBREF(node)->nest_level = nest_level;\r
+ if (! is_allowed_callout_name(enc, name, name_end)) {\r
+ return ONIGERR_INVALID_CALLOUT_NAME;\r
}\r
-#endif\r
\r
- for (i = 0; i < back_num; i++) {\r
- if (backrefs[i] <= env->num_mem &&\r
- IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {\r
- NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */\r
- break;\r
- }\r
+ is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);\r
+ id = callout_name_entry(&e, enc, is_not_single, name, name_end);\r
+ if (id < 0) return id;\r
+\r
+ r = ONIG_NORMAL;\r
+ if (IS_NULL(GlobalCalloutNameList)) {\r
+ r = make_callout_func_list(&GlobalCalloutNameList, 10);\r
+ if (r != ONIG_NORMAL) return r;\r
}\r
\r
- if (back_num <= NODE_BACKREFS_SIZE) {\r
- for (i = 0; i < back_num; i++)\r
- NBREF(node)->back_static[i] = backrefs[i];\r
+ while (id >= GlobalCalloutNameList->n) {\r
+ int rid;\r
+ r = callout_func_list_add(GlobalCalloutNameList, &rid);\r
+ if (r != ONIG_NORMAL) return r;\r
}\r
- else {\r
- int* p = (int* )xmalloc(sizeof(int) * back_num);\r
- if (IS_NULL(p)) {\r
- onig_node_free(node);\r
- return NULL;\r
+\r
+ fe = GlobalCalloutNameList->v + id;\r
+ fe->type = callout_type;\r
+ fe->in = in;\r
+ fe->start_func = start_func;\r
+ fe->end_func = end_func;\r
+ fe->arg_num = arg_num;\r
+ fe->opt_arg_num = opt_arg_num;\r
+ fe->name = e->name;\r
+\r
+ for (i = 0; i < arg_num; i++) {\r
+ fe->arg_types[i] = arg_types[i];\r
+ }\r
+ for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {\r
+ if (fe->arg_types[i] == ONIG_TYPE_STRING) {\r
+ OnigValue* val = opt_defaults + j;\r
+ UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);\r
+ CHECK_NULL_RETURN_MEMERR(ds);\r
+\r
+ fe->opt_defaults[i].s.start = ds;\r
+ fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);\r
+ }\r
+ else {\r
+ fe->opt_defaults[i] = opt_defaults[j];\r
}\r
- NBREF(node)->back_dynamic = p;\r
- for (i = 0; i < back_num; i++)\r
- p[i] = backrefs[i];\r
}\r
- return node;\r
+\r
+ r = id;\r
+ return r;\r
}\r
\r
-#ifdef USE_SUBEXP_CALL\r
-static Node*\r
-node_new_call(UChar* name, UChar* name_end, int gnum)\r
+static int\r
+get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,\r
+ UChar* name, UChar* name_end, int* rid)\r
{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ int r;\r
+ CalloutNameEntry* e;\r
\r
- SET_NTYPE(node, NT_CALL);\r
- NCALL(node)->state = 0;\r
- NCALL(node)->target = NULL_NODE;\r
- NCALL(node)->name = name;\r
- NCALL(node)->name_end = name_end;\r
- NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */\r
- return node;\r
-}\r
-#endif\r
+ if (! is_allowed_callout_name(enc, name, name_end)) {\r
+ return ONIGERR_INVALID_CALLOUT_NAME;\r
+ }\r
\r
-static Node*\r
-node_new_quantifier(int lower, int upper, int by_number)\r
-{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ e = callout_name_find(enc, is_not_single, name, name_end);\r
+ if (IS_NULL(e)) {\r
+ return ONIGERR_UNDEFINED_CALLOUT_NAME;\r
+ }\r
\r
- SET_NTYPE(node, NT_QTFR);\r
- NQTFR(node)->state = 0;\r
- NQTFR(node)->target = NULL;\r
- NQTFR(node)->lower = lower;\r
- NQTFR(node)->upper = upper;\r
- NQTFR(node)->greedy = 1;\r
- NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;\r
- NQTFR(node)->head_exact = NULL_NODE;\r
- NQTFR(node)->next_head_exact = NULL_NODE;\r
- NQTFR(node)->is_refered = 0;\r
- if (by_number != 0)\r
- NQTFR(node)->state |= NST_BY_NUMBER;\r
+ r = ONIG_NORMAL;\r
+ *rid = e->id;\r
\r
-#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
- NQTFR(node)->comb_exp_check_num = 0;\r
-#endif\r
-\r
- return node;\r
+ return r;\r
}\r
\r
-static Node*\r
-node_new_enclose(int type)\r
+extern OnigCalloutFunc\r
+onig_get_callout_start_func(regex_t* reg, int callout_num)\r
{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ /* If used for callouts of contents, return 0. */\r
+ CalloutListEntry* e;\r
\r
- SET_NTYPE(node, NT_ENCLOSE);\r
- NENCLOSE(node)->type = type;\r
- NENCLOSE(node)->state = 0;\r
- NENCLOSE(node)->regnum = 0;\r
- NENCLOSE(node)->option = 0;\r
- NENCLOSE(node)->target = NULL;\r
- NENCLOSE(node)->call_addr = -1;\r
- NENCLOSE(node)->opt_count = 0;\r
- return node;\r
+ e = onig_reg_callout_list_at(reg, callout_num);\r
+ return e->start_func;\r
}\r
\r
-extern Node*\r
-onig_node_new_enclose(int type)\r
+extern const UChar*\r
+onig_get_callout_tag_start(regex_t* reg, int callout_num)\r
{\r
- return node_new_enclose(type);\r
+ CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
+ return e->tag_start;\r
}\r
\r
-static Node*\r
-node_new_enclose_memory(OnigOptionType option, int is_named)\r
+extern const UChar*\r
+onig_get_callout_tag_end(regex_t* reg, int callout_num)\r
{\r
- Node* node = node_new_enclose(ENCLOSE_MEMORY);\r
- CHECK_NULL_RETURN(node);\r
- if (is_named != 0)\r
- SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);\r
-\r
-#ifdef USE_SUBEXP_CALL\r
- NENCLOSE(node)->option = option;\r
-#endif\r
- return node;\r
+ CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
+ return e->tag_end;\r
}\r
\r
-static Node*\r
-node_new_option(OnigOptionType option)\r
-{\r
- Node* node = node_new_enclose(ENCLOSE_OPTION);\r
- CHECK_NULL_RETURN(node);\r
- NENCLOSE(node)->option = option;\r
- return node;\r
-}\r
\r
-extern int\r
-onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r
+extern OnigCalloutType\r
+onig_get_callout_type_by_name_id(int name_id)\r
{\r
- int addlen = (int)(end - s);\r
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
+ return 0;\r
\r
- if (addlen > 0) {\r
- int len = (int)(NSTR(node)->end - NSTR(node)->s);\r
+ return GlobalCalloutNameList->v[name_id].type;\r
+}\r
\r
- if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {\r
- UChar* p;\r
- int capa = len + addlen + NODE_STR_MARGIN;\r
+extern OnigCalloutFunc\r
+onig_get_callout_start_func_by_name_id(int name_id)\r
+{\r
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
+ return 0;\r
\r
- if (capa <= NSTR(node)->capa) {\r
- onig_strcpy(NSTR(node)->s + len, s, end);\r
- }\r
- else {\r
- if (NSTR(node)->s == NSTR(node)->buf)\r
- p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,\r
- s, end, capa);\r
- else\r
- p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa, NSTR(node)->capa);\r
+ return GlobalCalloutNameList->v[name_id].start_func;\r
+}\r
\r
- CHECK_NULL_RETURN_MEMERR(p);\r
- NSTR(node)->s = p;\r
- NSTR(node)->capa = capa;\r
- }\r
- }\r
- else {\r
- onig_strcpy(NSTR(node)->s + len, s, end);\r
- }\r
- NSTR(node)->end = NSTR(node)->s + len + addlen;\r
- }\r
+extern OnigCalloutFunc\r
+onig_get_callout_end_func_by_name_id(int name_id)\r
+{\r
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
+ return 0;\r
\r
- return 0;\r
+ return GlobalCalloutNameList->v[name_id].end_func;\r
}\r
\r
extern int\r
-onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r
+onig_get_callout_in_by_name_id(int name_id)\r
{\r
- onig_node_str_clear(node);\r
- return onig_node_str_cat(node, s, end);\r
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
+ return 0;\r
+\r
+ return GlobalCalloutNameList->v[name_id].in;\r
}\r
\r
static int\r
-node_str_cat_char(Node* node, UChar c)\r
+get_callout_arg_num_by_name_id(int name_id)\r
{\r
- UChar s[1];\r
-\r
- s[0] = c;\r
- return onig_node_str_cat(node, s, s + 1);\r
+ return GlobalCalloutNameList->v[name_id].arg_num;\r
}\r
\r
-extern void\r
-onig_node_conv_to_str_node(Node* node, int flag)\r
+static int\r
+get_callout_opt_arg_num_by_name_id(int name_id)\r
{\r
- SET_NTYPE(node, NT_STR);\r
- NSTR(node)->flag = flag;\r
- NSTR(node)->capa = 0;\r
- NSTR(node)->s = NSTR(node)->buf;\r
- NSTR(node)->end = NSTR(node)->buf;\r
+ return GlobalCalloutNameList->v[name_id].opt_arg_num;\r
}\r
\r
-extern void\r
-onig_node_str_clear(Node* node)\r
+static unsigned int\r
+get_callout_arg_type_by_name_id(int name_id, int index)\r
{\r
- if (NSTR(node)->capa != 0 &&\r
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {\r
- xfree(NSTR(node)->s);\r
- }\r
+ return GlobalCalloutNameList->v[name_id].arg_types[index];\r
+}\r
\r
- NSTR(node)->capa = 0;\r
- NSTR(node)->flag = 0;\r
- NSTR(node)->s = NSTR(node)->buf;\r
- NSTR(node)->end = NSTR(node)->buf;\r
+static OnigValue\r
+get_callout_opt_default_by_name_id(int name_id, int index)\r
+{\r
+ return GlobalCalloutNameList->v[name_id].opt_defaults[index];\r
}\r
\r
-static Node*\r
-node_new_str(const UChar* s, const UChar* end)\r
+extern UChar*\r
+onig_get_callout_name_by_name_id(int name_id)\r
{\r
- Node* node = node_new();\r
- CHECK_NULL_RETURN(node);\r
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
+ return 0;\r
\r
- SET_NTYPE(node, NT_STR);\r
- NSTR(node)->capa = 0;\r
- NSTR(node)->flag = 0;\r
- NSTR(node)->s = NSTR(node)->buf;\r
- NSTR(node)->end = NSTR(node)->buf;\r
- if (onig_node_str_cat(node, s, end)) {\r
- onig_node_free(node);\r
- return NULL;\r
- }\r
- return node;\r
+ return GlobalCalloutNameList->v[name_id].name;\r
}\r
\r
-extern Node*\r
-onig_node_new_str(const UChar* s, const UChar* end)\r
+extern int\r
+onig_global_callout_names_free(void)\r
{\r
- return node_new_str(s, end);\r
+ free_callout_func_list(GlobalCalloutNameList);\r
+ GlobalCalloutNameList = 0;\r
+\r
+ global_callout_name_table_free();\r
+ return ONIG_NORMAL;\r
}\r
\r
-static Node*\r
-node_new_str_raw(UChar* s, UChar* end)\r
+\r
+typedef st_table CalloutTagTable;\r
+typedef intptr_t CalloutTagVal;\r
+\r
+#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)\r
+\r
+static int\r
+i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)\r
{\r
- Node* node = node_new_str(s, end);\r
- NSTRING_SET_RAW(node);\r
- return node;\r
+ int num;\r
+ RegexExt* ext = (RegexExt* )arg;\r
+\r
+ num = (int )e - 1;\r
+ ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;\r
+ return ST_CONTINUE;\r
}\r
\r
-static Node*\r
-node_new_empty(void)\r
+static int\r
+setup_ext_callout_list_values(regex_t* reg)\r
{\r
- return node_new_str(NULL, NULL);\r
+ int i, j;\r
+ RegexExt* ext;\r
+\r
+ ext = REG_EXTP(reg);\r
+ if (IS_NOT_NULL(ext->tag_table)) {\r
+ onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,\r
+ (st_data_t )ext);\r
+ }\r
+\r
+ for (i = 0; i < ext->callout_num; i++) {\r
+ CalloutListEntry* e = ext->callout_list + i;\r
+ if (e->of == ONIG_CALLOUT_OF_NAME) {\r
+ for (j = 0; j < e->u.arg.num; j++) {\r
+ if (e->u.arg.types[j] == ONIG_TYPE_TAG) {\r
+ UChar* start;\r
+ UChar* end;\r
+ int num;\r
+ start = e->u.arg.vals[j].s.start;\r
+ end = e->u.arg.vals[j].s.end;\r
+ num = onig_get_callout_num_by_tag(reg, start, end);\r
+ if (num < 0) return num;\r
+ e->u.arg.vals[j].tag = num;\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+ return ONIG_NORMAL;\r
}\r
\r
-static Node*\r
-node_new_str_raw_char(UChar c)\r
+extern int\r
+onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)\r
{\r
- UChar p[1];\r
+ RegexExt* ext = REG_EXTP(reg);\r
\r
- p[0] = c;\r
- return node_new_str_raw(p, p + 1);\r
+ if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;\r
+ if (callout_num > ext->callout_num) return 0;\r
+\r
+ return (ext->callout_list[callout_num].flag &\r
+ CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;\r
}\r
\r
-static Node*\r
-str_node_split_last_char(StrNode* sn, OnigEncoding enc)\r
+static int\r
+i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)\r
{\r
- const UChar *p;\r
- Node* n = NULL_NODE;\r
-\r
- if (sn->end > sn->s) {\r
- p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r
- if (p && p > sn->s) { /* can be splitted. */\r
- n = node_new_str(p, sn->end);\r
- if ((sn->flag & NSTR_RAW) != 0)\r
- NSTRING_SET_RAW(n);\r
- sn->end = (UChar* )p;\r
- }\r
- }\r
- return n;\r
+ xfree(key);\r
+ return ST_DELETE;\r
}\r
\r
static int\r
-str_node_can_be_split(StrNode* sn, OnigEncoding enc)\r
+callout_tag_table_clear(CalloutTagTable* t)\r
{\r
- if (sn->end > sn->s) {\r
- return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r
+ if (IS_NOT_NULL(t)) {\r
+ onig_st_foreach(t, i_free_callout_tag_entry, 0);\r
}\r
return 0;\r
}\r
\r
-#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
-static int\r
-node_str_head_pad(StrNode* sn, int num, UChar val)\r
+extern int\r
+onig_callout_tag_table_free(void* table)\r
{\r
- UChar buf[NODE_STR_BUF_SIZE];\r
- int i, len;\r
+ CalloutTagTable* t = (CalloutTagTable* )table;\r
\r
- len = sn->end - sn->s;\r
- onig_strcpy(buf, sn->s, sn->end);\r
- onig_strcpy(&(sn->s[num]), buf, buf + len);\r
- sn->end += num;\r
+ if (IS_NOT_NULL(t)) {\r
+ int r = callout_tag_table_clear(t);\r
+ if (r != 0) return r;\r
\r
- for (i = 0; i < num; i++) {\r
- sn->s[i] = val;\r
+ onig_st_free_table(t);\r
}\r
+\r
+ return 0;\r
}\r
-#endif\r
\r
extern int\r
-onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r
+onig_get_callout_num_by_tag(regex_t* reg,\r
+ const UChar* tag, const UChar* tag_end)\r
{\r
- unsigned int num, val;\r
- OnigCodePoint c;\r
- UChar* p = *src;\r
- PFETCH_READY;\r
+ int r;\r
+ RegexExt* ext;\r
+ CalloutTagVal e;\r
\r
- num = 0;\r
- while (!PEND) {\r
- PFETCH(c);\r
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
- val = (unsigned int )DIGITVAL(c);\r
- if ((INT_MAX_LIMIT - val) / 10UL < num)\r
- return -1; /* overflow */\r
+ ext = REG_EXTP(reg);\r
+ if (IS_NULL(ext) || IS_NULL(ext->tag_table))\r
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
\r
- num = num * 10 + val;\r
- }\r
- else {\r
- PUNFETCH;\r
- break;\r
- }\r
+ r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,\r
+ (HashDataType* )((void* )(&e)));\r
+ if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
+ return (int )e;\r
+}\r
+\r
+static CalloutTagVal\r
+callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)\r
+{\r
+ CalloutTagVal e;\r
+\r
+ e = -1;\r
+ if (IS_NOT_NULL(t)) {\r
+ onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
}\r
- *src = p;\r
- return num;\r
+ return e;\r
}\r
\r
static int\r
-scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,\r
- OnigEncoding enc)\r
+callout_tag_table_new(CalloutTagTable** rt)\r
{\r
- OnigCodePoint c;\r
- unsigned int num, val;\r
- UChar* p = *src;\r
- PFETCH_READY;\r
+ CalloutTagTable* t;\r
\r
- num = 0;\r
- while (!PEND && maxlen-- != 0) {\r
- PFETCH(c);\r
- if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {\r
- val = (unsigned int )XDIGITVAL(enc,c);\r
- if ((INT_MAX_LIMIT - val) / 16UL < num)\r
- return -1; /* overflow */\r
+ *rt = 0;\r
+ t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);\r
+ CHECK_NULL_RETURN_MEMERR(t);\r
\r
- num = (num << 4) + XDIGITVAL(enc,c);\r
- }\r
- else {\r
- PUNFETCH;\r
- break;\r
- }\r
- }\r
- *src = p;\r
- return num;\r
+ *rt = t;\r
+ return ONIG_NORMAL;\r
}\r
\r
static int\r
-scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r
- OnigEncoding enc)\r
+callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,\r
+ CalloutTagVal entry_val)\r
{\r
- OnigCodePoint c;\r
- unsigned int num, val;\r
- UChar* p = *src;\r
- PFETCH_READY;\r
+ int r;\r
+ CalloutTagVal val;\r
\r
- num = 0;\r
- while (!PEND && maxlen-- != 0) {\r
- PFETCH(c);\r
- if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {\r
- val = ODIGITVAL(c);\r
- if ((INT_MAX_LIMIT - val) / 8UL < num)\r
- return -1; /* overflow */\r
+ if (name_end - name <= 0)\r
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
\r
- num = (num << 3) + val;\r
- }\r
- else {\r
- PUNFETCH;\r
- break;\r
- }\r
- }\r
- *src = p;\r
- return num;\r
-}\r
+ val = callout_tag_find(t, name, name_end);\r
+ if (val >= 0)\r
+ return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
\r
+ r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);\r
+ if (r < 0) return r;\r
\r
-#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \\r
- BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r
+ return ONIG_NORMAL;\r
+}\r
\r
-/* data format:\r
- [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r
- (all data size is OnigCodePoint)\r
- */\r
static int\r
-new_code_range(BBuf** pbuf)\r
+ext_ensure_tag_table(regex_t* reg)\r
{\r
-#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r
int r;\r
- OnigCodePoint n;\r
- BBuf* bbuf;\r
+ RegexExt* ext;\r
+ CalloutTagTable* t;\r
\r
- bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
- CHECK_NULL_RETURN_MEMERR(*pbuf);\r
- r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r
- if (r) return r;\r
+ ext = onig_get_regex_ext(reg);\r
+ CHECK_NULL_RETURN_MEMERR(ext);\r
\r
- n = 0;\r
- BBUF_WRITE_CODE_POINT(bbuf, 0, n);\r
- return 0;\r
+ if (IS_NULL(ext->tag_table)) {\r
+ r = callout_tag_table_new(&t);\r
+ if (r != ONIG_NORMAL) return r;\r
+\r
+ ext->tag_table = t;\r
+ }\r
+\r
+ return ONIG_NORMAL;\r
}\r
\r
static int\r
-add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r
+callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,\r
+ CalloutTagVal entry_val)\r
{\r
- int r, inc_n, pos;\r
- int low, high, bound, x;\r
- OnigCodePoint n, *data;\r
- BBuf* bbuf;\r
+ int r;\r
+ RegexExt* ext;\r
+ CalloutListEntry* e;\r
\r
- if (from > to) {\r
- n = from; from = to; to = n;\r
- }\r
+ r = ext_ensure_tag_table(reg);\r
+ if (r != ONIG_NORMAL) return r;\r
\r
- if (IS_NULL(*pbuf)) {\r
- r = new_code_range(pbuf);\r
- if (r) return r;\r
- bbuf = *pbuf;\r
- n = 0;\r
- }\r
- else {\r
- bbuf = *pbuf;\r
- GET_CODE_POINT(n, bbuf->p);\r
- }\r
- data = (OnigCodePoint* )(bbuf->p);\r
- data++;\r
+ ext = onig_get_regex_ext(reg);\r
+ r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);\r
\r
- for (low = 0, bound = n; low < bound; ) {\r
- x = (low + bound) >> 1;\r
- if (from > data[x*2 + 1])\r
- low = x + 1;\r
- else\r
- bound = x;\r
- }\r
+ e = onig_reg_callout_list_at(reg, (int )entry_val);\r
+ e->tag_start = name;\r
+ e->tag_end = name_end;\r
\r
- for (high = low, bound = n; high < bound; ) {\r
- x = (high + bound) >> 1;\r
- if (to >= data[x*2] - 1)\r
- high = x + 1;\r
- else\r
- bound = x;\r
- }\r
+ return r;\r
+}\r
\r
- inc_n = low + 1 - high;\r
- if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r
- return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r
+#endif /* USE_CALLOUT */\r
\r
- if (inc_n != 1) {\r
- if (from > data[low*2])\r
- from = data[low*2];\r
- if (to < data[(high - 1)*2 + 1])\r
- to = data[(high - 1)*2 + 1];\r
- }\r
\r
- if (inc_n != 0 && (OnigCodePoint )high < n) {\r
- int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r
- int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r
- int size = (n - high) * 2 * SIZE_CODE_POINT;\r
+#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16\r
\r
- if (inc_n > 0) {\r
- BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r
- }\r
- else {\r
- BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r
- }\r
- }\r
+static void\r
+scan_env_clear(ScanEnv* env)\r
+{\r
+ MEM_STATUS_CLEAR(env->capture_history);\r
+ MEM_STATUS_CLEAR(env->bt_mem_start);\r
+ MEM_STATUS_CLEAR(env->bt_mem_end);\r
+ MEM_STATUS_CLEAR(env->backrefed_mem);\r
+ env->error = (UChar* )NULL;\r
+ env->error_end = (UChar* )NULL;\r
+ env->num_call = 0;\r
\r
- pos = SIZE_CODE_POINT * (1 + low * 2);\r
- BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r
- BBUF_WRITE_CODE_POINT(bbuf, pos, from);\r
- BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r
- n += inc_n;\r
- BBUF_WRITE_CODE_POINT(bbuf, 0, n);\r
+#ifdef USE_CALL\r
+ env->unset_addr_list = NULL;\r
+ env->has_call_zero = 0;\r
+#endif\r
\r
- return 0;\r
-}\r
+ env->num_mem = 0;\r
+ env->num_named = 0;\r
+ env->mem_alloc = 0;\r
+ env->mem_env_dynamic = (MemEnv* )NULL;\r
\r
-static int\r
-add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r
-{\r
- if (from > to) {\r
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
- return 0;\r
- else\r
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
- }\r
+ xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));\r
\r
- return add_code_range_to_buf(pbuf, from, to);\r
+ env->parse_depth = 0;\r
+ env->keep_num = 0;\r
+ env->save_num = 0;\r
+ env->save_alloc_num = 0;\r
+ env->saves = 0;\r
}\r
\r
static int\r
-not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r
+scan_env_add_mem_entry(ScanEnv* env)\r
{\r
- int r, i, n;\r
- OnigCodePoint pre, from, *data, to = 0;\r
+ int i, need, alloc;\r
+ MemEnv* p;\r
\r
- *pbuf = (BBuf* )NULL;\r
- if (IS_NULL(bbuf)) {\r
- set_all:\r
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
- }\r
+ need = env->num_mem + 1;\r
+ if (need > MaxCaptureNum && MaxCaptureNum != 0)\r
+ return ONIGERR_TOO_MANY_CAPTURES;\r
\r
- data = (OnigCodePoint* )(bbuf->p);\r
- GET_CODE_POINT(n, data);\r
- data++;\r
- if (n <= 0) goto set_all;\r
+ if (need >= SCANENV_MEMENV_SIZE) {\r
+ if (env->mem_alloc <= need) {\r
+ if (IS_NULL(env->mem_env_dynamic)) {\r
+ alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;\r
+ p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+ xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));\r
+ }\r
+ else {\r
+ alloc = env->mem_alloc * 2;\r
+ p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+ }\r
\r
- r = 0;\r
- pre = MBCODE_START_POS(enc);\r
- for (i = 0; i < n; i++) {\r
- from = data[i*2];\r
- to = data[i*2+1];\r
- if (pre <= from - 1) {\r
- r = add_code_range_to_buf(pbuf, pre, from - 1);\r
- if (r != 0) return r;\r
+ for (i = env->num_mem + 1; i < alloc; i++) {\r
+ p[i].node = NULL_NODE;\r
+#if 0\r
+ p[i].in = 0;\r
+ p[i].recursion = 0;\r
+#endif\r
+ }\r
+\r
+ env->mem_env_dynamic = p;\r
+ env->mem_alloc = alloc;\r
}\r
- if (to == ~((OnigCodePoint )0)) break;\r
- pre = to + 1;\r
- }\r
- if (to < ~((OnigCodePoint )0)) {\r
- r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r
}\r
- return r;\r
-}\r
\r
-#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\\r
- BBuf *tbuf; \\r
- int tnot; \\r
- tnot = not1; not1 = not2; not2 = tnot; \\r
- tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r
-} while (0)\r
+ env->num_mem++;\r
+ return env->num_mem;\r
+}\r
\r
static int\r
-or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r
- BBuf* bbuf2, int not2, BBuf** pbuf)\r
+scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r
{\r
- int r;\r
- OnigCodePoint i, n1, *data1;\r
- OnigCodePoint from, to;\r
+ if (env->num_mem >= num)\r
+ SCANENV_MEMENV(env)[num].node = node;\r
+ else\r
+ return ONIGERR_PARSER_BUG;\r
+ return 0;\r
+}\r
\r
- *pbuf = (BBuf* )NULL;\r
- if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r
- if (not1 != 0 || not2 != 0)\r
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
- return 0;\r
- }\r
+extern void\r
+onig_node_free(Node* node)\r
+{\r
+ start:\r
+ if (IS_NULL(node)) return ;\r
\r
- r = 0;\r
- if (IS_NULL(bbuf2))\r
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
+#ifdef DEBUG_NODE_FREE\r
+ fprintf(stderr, "onig_node_free: %p\n", node);\r
+#endif\r
\r
- if (IS_NULL(bbuf1)) {\r
- if (not1 != 0) {\r
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
- }\r
- else {\r
- if (not2 == 0) {\r
- return bbuf_clone(pbuf, bbuf2);\r
- }\r
- else {\r
- return not_code_range_buf(enc, bbuf2, pbuf);\r
- }\r
+ switch (NODE_TYPE(node)) {\r
+ case NODE_STRING:\r
+ if (STR_(node)->capa != 0 &&\r
+ IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
+ xfree(STR_(node)->s);\r
}\r
- }\r
+ break;\r
\r
- if (not1 != 0)\r
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
+ case NODE_LIST:\r
+ case NODE_ALT:\r
+ onig_node_free(NODE_CAR(node));\r
+ {\r
+ Node* next_node = NODE_CDR(node);\r
\r
- data1 = (OnigCodePoint* )(bbuf1->p);\r
- GET_CODE_POINT(n1, data1);\r
- data1++;\r
+ xfree(node);\r
+ node = next_node;\r
+ goto start;\r
+ }\r
+ break;\r
\r
- if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r
- r = bbuf_clone(pbuf, bbuf2);\r
- }\r
- else if (not1 == 0) { /* 1 OR (not 2) */\r
- r = not_code_range_buf(enc, bbuf2, pbuf);\r
- }\r
- if (r != 0) return r;\r
+ case NODE_CCLASS:\r
+ {\r
+ CClassNode* cc = CCLASS_(node);\r
\r
- for (i = 0; i < n1; i++) {\r
- from = data1[i*2];\r
- to = data1[i*2+1];\r
- r = add_code_range_to_buf(pbuf, from, to);\r
- if (r != 0) return r;\r
- }\r
- return 0;\r
-}\r
+ if (cc->mbuf)\r
+ bbuf_free(cc->mbuf);\r
+ }\r
+ break;\r
\r
-static int\r
-and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r
- OnigCodePoint* data, int n)\r
-{\r
- int i, r;\r
- OnigCodePoint from2, to2;\r
+ case NODE_BACKREF:\r
+ if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))\r
+ xfree(BACKREF_(node)->back_dynamic);\r
+ break;\r
\r
- for (i = 0; i < n; i++) {\r
- from2 = data[i*2];\r
- to2 = data[i*2+1];\r
- if (from2 < from1) {\r
- if (to2 < from1) continue;\r
- else {\r
- from1 = to2 + 1;\r
- }\r
- }\r
- else if (from2 <= to1) {\r
- if (to2 < to1) {\r
- if (from1 <= from2 - 1) {\r
- r = add_code_range_to_buf(pbuf, from1, from2-1);\r
- if (r != 0) return r;\r
- }\r
- from1 = to2 + 1;\r
- }\r
- else {\r
- to1 = from2 - 1;\r
+ case NODE_ENCLOSURE:\r
+ if (NODE_BODY(node))\r
+ onig_node_free(NODE_BODY(node));\r
+\r
+ {\r
+ EnclosureNode* en = ENCLOSURE_(node);\r
+ if (en->type == ENCLOSURE_IF_ELSE) {\r
+ onig_node_free(en->te.Then);\r
+ onig_node_free(en->te.Else);\r
}\r
}\r
- else {\r
- from1 = from2;\r
- }\r
- if (from1 > to1) break;\r
- }\r
- if (from1 <= to1) {\r
- r = add_code_range_to_buf(pbuf, from1, to1);\r
- if (r != 0) return r;\r
+ break;\r
+\r
+ case NODE_QUANT:\r
+ case NODE_ANCHOR:\r
+ if (NODE_BODY(node))\r
+ onig_node_free(NODE_BODY(node));\r
+ break;\r
+\r
+ case NODE_CTYPE:\r
+ case NODE_CALL:\r
+ case NODE_GIMMICK:\r
+ break;\r
}\r
- return 0;\r
+\r
+ xfree(node);\r
}\r
\r
-static int\r
-and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r
+static void\r
+cons_node_free_alone(Node* node)\r
{\r
- int r;\r
- OnigCodePoint i, j, n1, n2, *data1, *data2;\r
- OnigCodePoint from, to, from1, to1, from2, to2;\r
+ NODE_CAR(node) = 0;\r
+ NODE_CDR(node) = 0;\r
+ onig_node_free(node);\r
+}\r
\r
- *pbuf = (BBuf* )NULL;\r
- if (IS_NULL(bbuf1)) {\r
- if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r
- return bbuf_clone(pbuf, bbuf2);\r
- return 0;\r
- }\r
- else if (IS_NULL(bbuf2)) {\r
- if (not2 != 0)\r
- return bbuf_clone(pbuf, bbuf1);\r
- return 0;\r
- }\r
+static Node*\r
+node_new(void)\r
+{\r
+ Node* node;\r
\r
- if (not1 != 0)\r
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
+ node = (Node* )xmalloc(sizeof(Node));\r
+ xmemset(node, 0, sizeof(*node));\r
\r
- data1 = (OnigCodePoint* )(bbuf1->p);\r
- data2 = (OnigCodePoint* )(bbuf2->p);\r
- GET_CODE_POINT(n1, data1);\r
- GET_CODE_POINT(n2, data2);\r
- data1++;\r
- data2++;\r
+#ifdef DEBUG_NODE_FREE\r
+ fprintf(stderr, "node_new: %p\n", node);\r
+#endif\r
+ return node;\r
+}\r
\r
- if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r
- for (i = 0; i < n1; i++) {\r
- from1 = data1[i*2];\r
- to1 = data1[i*2+1];\r
- for (j = 0; j < n2; j++) {\r
- from2 = data2[j*2];\r
- to2 = data2[j*2+1];\r
- if (from2 > to1) break;\r
- if (to2 < from1) continue;\r
- from = MAX(from1, from2);\r
- to = MIN(to1, to2);\r
- r = add_code_range_to_buf(pbuf, from, to);\r
- if (r != 0) return r;\r
- }\r
- }\r
- }\r
- else if (not1 == 0) { /* 1 AND (not 2) */\r
- for (i = 0; i < n1; i++) {\r
- from1 = data1[i*2];\r
- to1 = data1[i*2+1];\r
- r = and_code_range1(pbuf, from1, to1, data2, n2);\r
- if (r != 0) return r;\r
- }\r
- }\r
\r
- return 0;\r
+static void\r
+initialize_cclass(CClassNode* cc)\r
+{\r
+ BITSET_CLEAR(cc->bs);\r
+ cc->flags = 0;\r
+ cc->mbuf = NULL;\r
}\r
\r
-static int\r
-and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
+static Node*\r
+node_new_cclass(void)\r
{\r
- int r, not1, not2;\r
- BBuf *buf1, *buf2, *pbuf;\r
- BitSetRef bsr1, bsr2;\r
- BitSet bs1, bs2;\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
\r
- not1 = IS_NCCLASS_NOT(dest);\r
- bsr1 = dest->bs;\r
- buf1 = dest->mbuf;\r
- not2 = IS_NCCLASS_NOT(cc);\r
- bsr2 = cc->bs;\r
- buf2 = cc->mbuf;\r
+ NODE_SET_TYPE(node, NODE_CCLASS);\r
+ initialize_cclass(CCLASS_(node));\r
+ return node;\r
+}\r
\r
- if (not1 != 0) {\r
+static Node*\r
+node_new_ctype(int type, int not, OnigOptionType options)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_CTYPE);\r
+ CTYPE_(node)->ctype = type;\r
+ CTYPE_(node)->not = not;\r
+ CTYPE_(node)->options = options;\r
+ CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);\r
+ return node;\r
+}\r
+\r
+static Node*\r
+node_new_anychar(void)\r
+{\r
+ Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);\r
+ return node;\r
+}\r
+\r
+static Node*\r
+node_new_anychar_with_fixed_option(OnigOptionType option)\r
+{\r
+ CtypeNode* ct;\r
+ Node* node;\r
+\r
+ node = node_new_anychar();\r
+ ct = CTYPE_(node);\r
+ ct->options = option;\r
+ NODE_STATUS_ADD(node, FIXED_OPTION);\r
+ return node;\r
+}\r
+\r
+static int\r
+node_new_no_newline(Node** node, ScanEnv* env)\r
+{\r
+ Node* n;\r
+\r
+ n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);\r
+ CHECK_NULL_RETURN_MEMERR(n);\r
+ *node = n;\r
+ return 0;\r
+}\r
+\r
+static int\r
+node_new_true_anychar(Node** node, ScanEnv* env)\r
+{\r
+ Node* n;\r
+\r
+ n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);\r
+ CHECK_NULL_RETURN_MEMERR(n);\r
+ *node = n;\r
+ return 0;\r
+}\r
+\r
+static Node*\r
+node_new_list(Node* left, Node* right)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_LIST);\r
+ NODE_CAR(node) = left;\r
+ NODE_CDR(node) = right;\r
+ return node;\r
+}\r
+\r
+extern Node*\r
+onig_node_new_list(Node* left, Node* right)\r
+{\r
+ return node_new_list(left, right);\r
+}\r
+\r
+extern Node*\r
+onig_node_list_add(Node* list, Node* x)\r
+{\r
+ Node *n;\r
+\r
+ n = onig_node_new_list(x, NULL);\r
+ if (IS_NULL(n)) return NULL_NODE;\r
+\r
+ if (IS_NOT_NULL(list)) {\r
+ while (IS_NOT_NULL(NODE_CDR(list)))\r
+ list = NODE_CDR(list);\r
+\r
+ NODE_CDR(list) = n;\r
+ }\r
+\r
+ return n;\r
+}\r
+\r
+extern Node*\r
+onig_node_new_alt(Node* left, Node* right)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_ALT);\r
+ NODE_CAR(node) = left;\r
+ NODE_CDR(node) = right;\r
+ return node;\r
+}\r
+\r
+static Node*\r
+make_list_or_alt(NodeType type, int n, Node* ns[])\r
+{\r
+ Node* r;\r
+\r
+ if (n <= 0) return NULL_NODE;\r
+\r
+ if (n == 1) {\r
+ r = node_new();\r
+ CHECK_NULL_RETURN(r);\r
+ NODE_SET_TYPE(r, type);\r
+ NODE_CAR(r) = ns[0];\r
+ NODE_CDR(r) = NULL_NODE;\r
+ }\r
+ else {\r
+ Node* right;\r
+\r
+ r = node_new();\r
+ CHECK_NULL_RETURN(r);\r
+\r
+ right = make_list_or_alt(type, n - 1, ns + 1);\r
+ if (IS_NULL(right)) {\r
+ onig_node_free(r);\r
+ return NULL_NODE;\r
+ }\r
+\r
+ NODE_SET_TYPE(r, type);\r
+ NODE_CAR(r) = ns[0];\r
+ NODE_CDR(r) = right;\r
+ }\r
+\r
+ return r;\r
+}\r
+\r
+static Node*\r
+make_list(int n, Node* ns[])\r
+{\r
+ return make_list_or_alt(NODE_LIST, n, ns);\r
+}\r
+\r
+static Node*\r
+make_alt(int n, Node* ns[])\r
+{\r
+ return make_list_or_alt(NODE_ALT, n, ns);\r
+}\r
+\r
+extern Node*\r
+onig_node_new_anchor(int type, int ascii_mode)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_ANCHOR);\r
+ ANCHOR_(node)->type = type;\r
+ ANCHOR_(node)->char_len = -1;\r
+ ANCHOR_(node)->ascii_mode = ascii_mode;\r
+ return node;\r
+}\r
+\r
+static Node*\r
+node_new_backref(int back_num, int* backrefs, int by_name,\r
+#ifdef USE_BACKREF_WITH_LEVEL\r
+ int exist_level, int nest_level,\r
+#endif\r
+ ScanEnv* env)\r
+{\r
+ int i;\r
+ Node* node = node_new();\r
+\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_BACKREF);\r
+ BACKREF_(node)->back_num = back_num;\r
+ BACKREF_(node)->back_dynamic = (int* )NULL;\r
+ if (by_name != 0)\r
+ NODE_STATUS_ADD(node, BY_NAME);\r
+\r
+#ifdef USE_BACKREF_WITH_LEVEL\r
+ if (exist_level != 0) {\r
+ NODE_STATUS_ADD(node, NEST_LEVEL);\r
+ BACKREF_(node)->nest_level = nest_level;\r
+ }\r
+#endif\r
+\r
+ for (i = 0; i < back_num; i++) {\r
+ if (backrefs[i] <= env->num_mem &&\r
+ IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {\r
+ NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (back_num <= NODE_BACKREFS_SIZE) {\r
+ for (i = 0; i < back_num; i++)\r
+ BACKREF_(node)->back_static[i] = backrefs[i];\r
+ }\r
+ else {\r
+ int* p = (int* )xmalloc(sizeof(int) * back_num);\r
+ if (IS_NULL(p)) {\r
+ onig_node_free(node);\r
+ return NULL;\r
+ }\r
+ BACKREF_(node)->back_dynamic = p;\r
+ for (i = 0; i < back_num; i++)\r
+ p[i] = backrefs[i];\r
+ }\r
+ return node;\r
+}\r
+\r
+static Node*\r
+node_new_backref_checker(int back_num, int* backrefs, int by_name,\r
+#ifdef USE_BACKREF_WITH_LEVEL\r
+ int exist_level, int nest_level,\r
+#endif\r
+ ScanEnv* env)\r
+{\r
+ Node* node;\r
+\r
+ node = node_new_backref(back_num, backrefs, by_name,\r
+#ifdef USE_BACKREF_WITH_LEVEL\r
+ exist_level, nest_level,\r
+#endif\r
+ env);\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_STATUS_ADD(node, CHECKER);\r
+ return node;\r
+}\r
+\r
+#ifdef USE_CALL\r
+static Node*\r
+node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_CALL);\r
+ CALL_(node)->by_number = by_number;\r
+ CALL_(node)->name = name;\r
+ CALL_(node)->name_end = name_end;\r
+ CALL_(node)->group_num = gnum;\r
+ CALL_(node)->entry_count = 1;\r
+ return node;\r
+}\r
+#endif\r
+\r
+static Node*\r
+node_new_quantifier(int lower, int upper, int by_number)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_QUANT);\r
+ QUANT_(node)->lower = lower;\r
+ QUANT_(node)->upper = upper;\r
+ QUANT_(node)->greedy = 1;\r
+ QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;\r
+ QUANT_(node)->head_exact = NULL_NODE;\r
+ QUANT_(node)->next_head_exact = NULL_NODE;\r
+ QUANT_(node)->is_refered = 0;\r
+ if (by_number != 0)\r
+ NODE_STATUS_ADD(node, BY_NUMBER);\r
+\r
+ return node;\r
+}\r
+\r
+static Node*\r
+node_new_enclosure(enum EnclosureType type)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_ENCLOSURE);\r
+ ENCLOSURE_(node)->type = type;\r
+\r
+ switch (type) {\r
+ case ENCLOSURE_MEMORY:\r
+ ENCLOSURE_(node)->m.regnum = 0;\r
+ ENCLOSURE_(node)->m.called_addr = -1;\r
+ ENCLOSURE_(node)->m.entry_count = 1;\r
+ ENCLOSURE_(node)->m.called_state = 0;\r
+ break;\r
+\r
+ case ENCLOSURE_OPTION:\r
+ ENCLOSURE_(node)->o.options = 0;\r
+ break;\r
+\r
+ case ENCLOSURE_STOP_BACKTRACK:\r
+ break;\r
+\r
+ case ENCLOSURE_IF_ELSE:\r
+ ENCLOSURE_(node)->te.Then = 0;\r
+ ENCLOSURE_(node)->te.Else = 0;\r
+ break;\r
+ }\r
+\r
+ ENCLOSURE_(node)->opt_count = 0;\r
+ return node;\r
+}\r
+\r
+extern Node*\r
+onig_node_new_enclosure(int type)\r
+{\r
+ return node_new_enclosure(type);\r
+}\r
+\r
+static Node*\r
+node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)\r
+{\r
+ Node* n;\r
+ n = node_new_enclosure(ENCLOSURE_IF_ELSE);\r
+ CHECK_NULL_RETURN(n);\r
+\r
+ NODE_BODY(n) = cond;\r
+ ENCLOSURE_(n)->te.Then = Then;\r
+ ENCLOSURE_(n)->te.Else = Else;\r
+ return n;\r
+}\r
+\r
+static Node*\r
+node_new_memory(int is_named)\r
+{\r
+ Node* node = node_new_enclosure(ENCLOSURE_MEMORY);\r
+ CHECK_NULL_RETURN(node);\r
+ if (is_named != 0)\r
+ NODE_STATUS_ADD(node, NAMED_GROUP);\r
+\r
+ return node;\r
+}\r
+\r
+static Node*\r
+node_new_option(OnigOptionType option)\r
+{\r
+ Node* node = node_new_enclosure(ENCLOSURE_OPTION);\r
+ CHECK_NULL_RETURN(node);\r
+ ENCLOSURE_(node)->o.options = option;\r
+ return node;\r
+}\r
+\r
+static int\r
+node_new_fail(Node** node, ScanEnv* env)\r
+{\r
+ *node = node_new();\r
+ CHECK_NULL_RETURN_MEMERR(*node);\r
+\r
+ NODE_SET_TYPE(*node, NODE_GIMMICK);\r
+ GIMMICK_(*node)->type = GIMMICK_FAIL;\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)\r
+{\r
+ int id;\r
+ int r;\r
+\r
+ r = save_entry(env, save_type, &id);\r
+ if (r != ONIG_NORMAL) return r;\r
+\r
+ *node = node_new();\r
+ CHECK_NULL_RETURN_MEMERR(*node);\r
+\r
+ NODE_SET_TYPE(*node, NODE_GIMMICK);\r
+ GIMMICK_(*node)->id = id;\r
+ GIMMICK_(*node)->type = GIMMICK_SAVE;\r
+ GIMMICK_(*node)->detail_type = (int )save_type;\r
+\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,\r
+ int id, ScanEnv* env)\r
+{\r
+ *node = node_new();\r
+ CHECK_NULL_RETURN_MEMERR(*node);\r
+\r
+ NODE_SET_TYPE(*node, NODE_GIMMICK);\r
+ GIMMICK_(*node)->id = id;\r
+ GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;\r
+ GIMMICK_(*node)->detail_type = (int )update_var_type;\r
+\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+node_new_keep(Node** node, ScanEnv* env)\r
+{\r
+ int r;\r
+\r
+ r = node_new_save_gimmick(node, SAVE_KEEP, env);\r
+ if (r != 0) return r;\r
+\r
+ env->keep_num++;\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+#ifdef USE_CALLOUT\r
+\r
+extern void\r
+onig_free_reg_callout_list(int n, CalloutListEntry* list)\r
+{\r
+ int i;\r
+ int j;\r
+\r
+ if (IS_NULL(list)) return ;\r
+\r
+ for (i = 0; i < n; i++) {\r
+ if (list[i].of == ONIG_CALLOUT_OF_NAME) {\r
+ for (j = 0; j < list[i].u.arg.passed_num; j++) {\r
+ if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {\r
+ if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))\r
+ xfree(list[i].u.arg.vals[j].s.start);\r
+ }\r
+ }\r
+ }\r
+ else { /* ONIG_CALLOUT_OF_CONTENTS */\r
+ if (IS_NOT_NULL(list[i].u.content.start)) {\r
+ xfree((void* )list[i].u.content.start);\r
+ }\r
+ }\r
+ }\r
+\r
+ xfree(list);\r
+}\r
+\r
+extern CalloutListEntry*\r
+onig_reg_callout_list_at(regex_t* reg, int num)\r
+{\r
+ RegexExt* ext = REG_EXTP(reg);\r
+ CHECK_NULL_RETURN(ext);\r
+\r
+ if (num <= 0 || num > ext->callout_num)\r
+ return 0;\r
+\r
+ num--;\r
+ return ext->callout_list + num;\r
+}\r
+\r
+static int\r
+reg_callout_list_entry(ScanEnv* env, int* rnum)\r
+{\r
+#define INIT_CALLOUT_LIST_NUM 3\r
+\r
+ int num;\r
+ CalloutListEntry* list;\r
+ CalloutListEntry* e;\r
+ RegexExt* ext;\r
+\r
+ ext = onig_get_regex_ext(env->reg);\r
+ CHECK_NULL_RETURN_MEMERR(ext);\r
+\r
+ if (IS_NULL(ext->callout_list)) {\r
+ list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);\r
+ CHECK_NULL_RETURN_MEMERR(list);\r
+\r
+ ext->callout_list = list;\r
+ ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;\r
+ ext->callout_num = 0;\r
+ }\r
+\r
+ num = ext->callout_num + 1;\r
+ if (num > ext->callout_list_alloc) {\r
+ int alloc = ext->callout_list_alloc * 2;\r
+ list = (CalloutListEntry* )xrealloc(ext->callout_list,\r
+ sizeof(CalloutListEntry) * alloc,\r
+ sizeof(CalloutListEntry) * ext->callout_list_alloc);\r
+ CHECK_NULL_RETURN_MEMERR(list);\r
+\r
+ ext->callout_list = list;\r
+ ext->callout_list_alloc = alloc;\r
+ }\r
+\r
+ e = ext->callout_list + (num - 1);\r
+\r
+ e->flag = 0;\r
+ e->of = 0;\r
+ e->in = ONIG_CALLOUT_OF_CONTENTS;\r
+ e->type = 0;\r
+ e->tag_start = 0;\r
+ e->tag_end = 0;\r
+ e->start_func = 0;\r
+ e->end_func = 0;\r
+ e->u.arg.num = 0;\r
+ e->u.arg.passed_num = 0;\r
+\r
+ ext->callout_num = num;\r
+ *rnum = num;\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,\r
+ ScanEnv* env)\r
+{\r
+ *node = node_new();\r
+ CHECK_NULL_RETURN_MEMERR(*node);\r
+\r
+ NODE_SET_TYPE(*node, NODE_GIMMICK);\r
+ GIMMICK_(*node)->id = id;\r
+ GIMMICK_(*node)->num = num;\r
+ GIMMICK_(*node)->type = GIMMICK_CALLOUT;\r
+ GIMMICK_(*node)->detail_type = (int )callout_of;\r
+\r
+ return ONIG_NORMAL;\r
+}\r
+#endif\r
+\r
+static int\r
+make_extended_grapheme_cluster(Node** node, ScanEnv* env)\r
+{\r
+ int r;\r
+ int i;\r
+ Node* x;\r
+ Node* ns[2];\r
+\r
+ /* \X == (?>\O(?:\Y\O)*) */\r
+\r
+ ns[1] = NULL_NODE;\r
+\r
+ r = ONIGERR_MEMORY;\r
+ ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);\r
+ if (IS_NULL(ns[0])) goto err;\r
+\r
+ r = node_new_true_anychar(&ns[1], env);\r
+ if (r != 0) goto err1;\r
+\r
+ x = make_list(2, ns);\r
+ if (IS_NULL(x)) goto err;\r
+ ns[0] = x;\r
+ ns[1] = NULL_NODE;\r
+\r
+ x = node_new_quantifier(0, REPEAT_INFINITE, 1);\r
+ if (IS_NULL(x)) goto err;\r
+\r
+ NODE_BODY(x) = ns[0];\r
+ ns[0] = NULL_NODE;\r
+ ns[1] = x;\r
+\r
+ r = node_new_true_anychar(&ns[0], env);\r
+ if (r != 0) goto err1;\r
+\r
+ x = make_list(2, ns);\r
+ if (IS_NULL(x)) goto err;\r
+\r
+ ns[0] = x;\r
+ ns[1] = NULL_NODE;\r
+\r
+ x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
+ if (IS_NULL(x)) goto err;\r
+\r
+ NODE_BODY(x) = ns[0];\r
+\r
+ *node = x;\r
+ return ONIG_NORMAL;\r
+\r
+ err:\r
+ r = ONIGERR_MEMORY;\r
+ err1:\r
+ for (i = 0; i < 2; i++) onig_node_free(ns[i]);\r
+ return r;\r
+}\r
+\r
+static int\r
+make_absent_engine(Node** node, int pre_save_right_id, Node* absent,\r
+ Node* step_one, int lower, int upper, int possessive,\r
+ int is_range_cutter, ScanEnv* env)\r
+{\r
+ int r;\r
+ int i;\r
+ int id;\r
+ Node* x;\r
+ Node* ns[4];\r
+\r
+ for (i = 0; i < 4; i++) ns[i] = NULL_NODE;\r
+\r
+ ns[1] = absent;\r
+ ns[3] = step_one; /* for err */\r
+ r = node_new_save_gimmick(&ns[0], SAVE_S, env);\r
+ if (r != 0) goto err;\r
+\r
+ id = GIMMICK_(ns[0])->id;\r
+ r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,\r
+ id, env);\r
+ if (r != 0) goto err;\r
+\r
+ r = node_new_fail(&ns[3], env);\r
+ if (r != 0) goto err;\r
+\r
+ x = make_list(4, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ ns[0] = x;\r
+ ns[1] = step_one;\r
+ ns[2] = ns[3] = NULL_NODE;\r
+\r
+ x = make_alt(2, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ ns[0] = x;\r
+\r
+ x = node_new_quantifier(lower, upper, 0);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ NODE_BODY(x) = ns[0];\r
+ ns[0] = x;\r
+\r
+ if (possessive != 0) {\r
+ x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ NODE_BODY(x) = ns[0];\r
+ ns[0] = x;\r
+ }\r
+\r
+ r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
+ pre_save_right_id, env);\r
+ if (r != 0) goto err;\r
+\r
+ r = node_new_fail(&ns[2], env);\r
+ if (r != 0) goto err;\r
+\r
+ x = make_list(2, ns + 1);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ ns[1] = x; ns[2] = NULL_NODE;\r
+\r
+ x = make_alt(2, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ if (is_range_cutter != 0)\r
+ NODE_STATUS_ADD(x, SUPER);\r
+\r
+ *node = x;\r
+ return ONIG_NORMAL;\r
+\r
+ err0:\r
+ r = ONIGERR_MEMORY;\r
+ err:\r
+ for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
+ return r;\r
+}\r
+\r
+static int\r
+make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,\r
+ ScanEnv* env)\r
+{\r
+ int r;\r
+ int id;\r
+ Node* save;\r
+ Node* x;\r
+ Node* ns[2];\r
+\r
+ *node1 = *node2 = NULL_NODE;\r
+ save = ns[0] = ns[1] = NULL_NODE;\r
+\r
+ r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
+ if (r != 0) goto err;\r
+\r
+ id = GIMMICK_(save)->id;\r
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
+ id, env);\r
+ if (r != 0) goto err;\r
+\r
+ r = node_new_fail(&ns[1], env);\r
+ if (r != 0) goto err;\r
+\r
+ x = make_list(2, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ ns[0] = NULL_NODE; ns[1] = x;\r
+\r
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
+ pre_save_right_id, env);\r
+ if (r != 0) goto err;\r
+\r
+ x = make_alt(2, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ *node1 = save;\r
+ *node2 = x;\r
+ return ONIG_NORMAL;\r
+\r
+ err0:\r
+ r = ONIGERR_MEMORY;\r
+ err:\r
+ onig_node_free(save);\r
+ onig_node_free(ns[0]);\r
+ onig_node_free(ns[1]);\r
+ return r;\r
+}\r
+\r
+static int\r
+make_range_clear(Node** node, ScanEnv* env)\r
+{\r
+ int r;\r
+ int id;\r
+ Node* save;\r
+ Node* x;\r
+ Node* ns[2];\r
+\r
+ *node = NULL_NODE;\r
+ save = ns[0] = ns[1] = NULL_NODE;\r
+\r
+ r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
+ if (r != 0) goto err;\r
+\r
+ id = GIMMICK_(save)->id;\r
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
+ id, env);\r
+ if (r != 0) goto err;\r
+\r
+ r = node_new_fail(&ns[1], env);\r
+ if (r != 0) goto err;\r
+\r
+ x = make_list(2, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ ns[0] = NULL_NODE; ns[1] = x;\r
+\r
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);\r
+ if (r != 0) goto err;\r
+\r
+ x = make_alt(2, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ NODE_STATUS_ADD(x, SUPER);\r
+\r
+ ns[0] = save;\r
+ ns[1] = x;\r
+ save = NULL_NODE;\r
+ x = make_list(2, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ *node = x;\r
+ return ONIG_NORMAL;\r
+\r
+ err0:\r
+ r = ONIGERR_MEMORY;\r
+ err:\r
+ onig_node_free(save);\r
+ onig_node_free(ns[0]);\r
+ onig_node_free(ns[1]);\r
+ return r;\r
+}\r
+\r
+static int\r
+is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,\r
+ int* is_possessive, ScanEnv* env)\r
+{\r
+ Node* quant;\r
+ Node* body;\r
+\r
+ *rquant = *rbody = 0;\r
+ *is_possessive = 0;\r
+\r
+ if (NODE_TYPE(node) == NODE_QUANT) {\r
+ quant = node;\r
+ }\r
+ else {\r
+ if (NODE_TYPE(node) == NODE_ENCLOSURE) {\r
+ EnclosureNode* en = ENCLOSURE_(node);\r
+ if (en->type == ENCLOSURE_STOP_BACKTRACK) {\r
+ *is_possessive = 1;\r
+ quant = NODE_ENCLOSURE_BODY(en);\r
+ if (NODE_TYPE(quant) != NODE_QUANT)\r
+ return 0;\r
+ }\r
+ else\r
+ return 0;\r
+ }\r
+ else\r
+ return 0;\r
+ }\r
+\r
+ if (QUANT_(quant)->greedy == 0)\r
+ return 0;\r
+\r
+ body = NODE_BODY(quant);\r
+ switch (NODE_TYPE(body)) {\r
+ case NODE_STRING:\r
+ {\r
+ int len;\r
+ StrNode* sn = STR_(body);\r
+ UChar *s = sn->s;\r
+\r
+ len = 0;\r
+ while (s < sn->end) {\r
+ s += enclen(env->enc, s);\r
+ len++;\r
+ }\r
+ if (len != 1)\r
+ return 0;\r
+ }\r
+\r
+ case NODE_CCLASS:\r
+ break;\r
+\r
+ default:\r
+ return 0;\r
+ break;\r
+ }\r
+\r
+ if (node != quant) {\r
+ NODE_BODY(node) = 0;\r
+ onig_node_free(node);\r
+ }\r
+ NODE_BODY(quant) = NULL_NODE;\r
+ *rquant = quant;\r
+ *rbody = body;\r
+ return 1;\r
+}\r
+\r
+static int\r
+make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,\r
+ Node* body, int possessive, ScanEnv* env)\r
+{\r
+ int r;\r
+ int i;\r
+ int id1;\r
+ int lower, upper;\r
+ Node* x;\r
+ Node* ns[4];\r
+\r
+ *node = NULL_NODE;\r
+ r = ONIGERR_MEMORY;\r
+ ns[0] = ns[1] = NULL_NODE;\r
+ ns[2] = body, ns[3] = absent;\r
+\r
+ lower = QUANT_(quant)->lower;\r
+ upper = QUANT_(quant)->upper;\r
+ onig_node_free(quant);\r
+\r
+ r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
+ if (r != 0) goto err;\r
+\r
+ id1 = GIMMICK_(ns[0])->id;\r
+\r
+ r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,\r
+ 0, env);\r
+ if (r != 0) goto err;\r
+\r
+ ns[2] = ns[3] = NULL_NODE;\r
+\r
+ r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
+ id1, env);\r
+ if (r != 0) goto err;\r
+\r
+ x = make_list(3, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+\r
+ *node = x;\r
+ return ONIG_NORMAL;\r
+\r
+ err0:\r
+ r = ONIGERR_MEMORY;\r
+ err:\r
+ for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
+ return r;\r
+}\r
+\r
+static int\r
+make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,\r
+ ScanEnv* env)\r
+{\r
+ int r;\r
+ int i;\r
+ int id1, id2;\r
+ int possessive;\r
+ Node* x;\r
+ Node* ns[7];\r
+\r
+ r = ONIGERR_MEMORY;\r
+ for (i = 0; i < 7; i++) ns[i] = NULL_NODE;\r
+ ns[4] = expr; ns[5] = absent;\r
+\r
+ if (is_range_cutter == 0) {\r
+ Node* quant;\r
+ Node* body;\r
+\r
+ if (expr == NULL_NODE) {\r
+ /* default expr \O* */\r
+ quant = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
+ if (IS_NULL(quant)) goto err0;\r
+\r
+ r = node_new_true_anychar(&body, env);\r
+ if (r != 0) {\r
+ onig_node_free(quant);\r
+ goto err;\r
+ }\r
+ possessive = 0;\r
+ goto simple;\r
+ }\r
+ else {\r
+ if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {\r
+ simple:\r
+ r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,\r
+ body, possessive, env);\r
+ if (r != 0) {\r
+ ns[4] = NULL_NODE;\r
+ onig_node_free(quant);\r
+ onig_node_free(body);\r
+ goto err;\r
+ }\r
+\r
+ return ONIG_NORMAL;\r
+ }\r
+ }\r
+ }\r
+\r
+ r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
+ if (r != 0) goto err;\r
+\r
+ id1 = GIMMICK_(ns[0])->id;\r
+\r
+ r = node_new_save_gimmick(&ns[1], SAVE_S, env);\r
+ if (r != 0) goto err;\r
+\r
+ id2 = GIMMICK_(ns[1])->id;\r
+\r
+ r = node_new_true_anychar(&ns[3], env);\r
+ if (r != 0) goto err;\r
+\r
+ possessive = 1;\r
+ r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,\r
+ possessive, is_range_cutter, env);\r
+ if (r != 0) goto err;\r
+\r
+ ns[3] = NULL_NODE;\r
+ ns[5] = NULL_NODE;\r
+\r
+ r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);\r
+ if (r != 0) goto err;\r
+\r
+ if (is_range_cutter != 0) {\r
+ x = make_list(4, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+ }\r
+ else {\r
+ r = make_absent_tail(&ns[5], &ns[6], id1, env);\r
+ if (r != 0) goto err;\r
+ \r
+ x = make_list(7, ns);\r
+ if (IS_NULL(x)) goto err0;\r
+ }\r
+\r
+ *node = x;\r
+ return ONIG_NORMAL;\r
+\r
+ err0:\r
+ r = ONIGERR_MEMORY;\r
+ err:\r
+ for (i = 0; i < 7; i++) onig_node_free(ns[i]);\r
+ return r; \r
+}\r
+\r
+extern int\r
+onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r
+{\r
+ int addlen = (int )(end - s);\r
+\r
+ if (addlen > 0) {\r
+ int len = (int )(STR_(node)->end - STR_(node)->s);\r
+\r
+ if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {\r
+ UChar* p;\r
+ int capa = len + addlen + NODE_STRING_MARGIN;\r
+\r
+ if (capa <= STR_(node)->capa) {\r
+ onig_strcpy(STR_(node)->s + len, s, end);\r
+ }\r
+ else {\r
+ if (STR_(node)->s == STR_(node)->buf)\r
+ p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,\r
+ s, end, capa);\r
+ else\r
+ p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);\r
+\r
+ CHECK_NULL_RETURN_MEMERR(p);\r
+ STR_(node)->s = p;\r
+ STR_(node)->capa = capa;\r
+ }\r
+ }\r
+ else {\r
+ onig_strcpy(STR_(node)->s + len, s, end);\r
+ }\r
+ STR_(node)->end = STR_(node)->s + len + addlen;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+extern int\r
+onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r
+{\r
+ onig_node_str_clear(node);\r
+ return onig_node_str_cat(node, s, end);\r
+}\r
+\r
+static int\r
+node_str_cat_char(Node* node, UChar c)\r
+{\r
+ UChar s[1];\r
+\r
+ s[0] = c;\r
+ return onig_node_str_cat(node, s, s + 1);\r
+}\r
+\r
+extern void\r
+onig_node_conv_to_str_node(Node* node, int flag)\r
+{\r
+ NODE_SET_TYPE(node, NODE_STRING);\r
+ STR_(node)->flag = flag;\r
+ STR_(node)->capa = 0;\r
+ STR_(node)->s = STR_(node)->buf;\r
+ STR_(node)->end = STR_(node)->buf;\r
+}\r
+\r
+extern void\r
+onig_node_str_clear(Node* node)\r
+{\r
+ if (STR_(node)->capa != 0 &&\r
+ IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
+ xfree(STR_(node)->s);\r
+ }\r
+\r
+ STR_(node)->capa = 0;\r
+ STR_(node)->flag = 0;\r
+ STR_(node)->s = STR_(node)->buf;\r
+ STR_(node)->end = STR_(node)->buf;\r
+}\r
+\r
+static Node*\r
+node_new_str(const UChar* s, const UChar* end)\r
+{\r
+ Node* node = node_new();\r
+ CHECK_NULL_RETURN(node);\r
+\r
+ NODE_SET_TYPE(node, NODE_STRING);\r
+ STR_(node)->capa = 0;\r
+ STR_(node)->flag = 0;\r
+ STR_(node)->s = STR_(node)->buf;\r
+ STR_(node)->end = STR_(node)->buf;\r
+ if (onig_node_str_cat(node, s, end)) {\r
+ onig_node_free(node);\r
+ return NULL;\r
+ }\r
+ return node;\r
+}\r
+\r
+extern Node*\r
+onig_node_new_str(const UChar* s, const UChar* end)\r
+{\r
+ return node_new_str(s, end);\r
+}\r
+\r
+static Node*\r
+node_new_str_raw(UChar* s, UChar* end)\r
+{\r
+ Node* node = node_new_str(s, end);\r
+ NODE_STRING_SET_RAW(node);\r
+ return node;\r
+}\r
+\r
+static Node*\r
+node_new_empty(void)\r
+{\r
+ return node_new_str(NULL, NULL);\r
+}\r
+\r
+static Node*\r
+node_new_str_raw_char(UChar c)\r
+{\r
+ UChar p[1];\r
+\r
+ p[0] = c;\r
+ return node_new_str_raw(p, p + 1);\r
+}\r
+\r
+static Node*\r
+str_node_split_last_char(Node* node, OnigEncoding enc)\r
+{\r
+ const UChar *p;\r
+ Node* rn;\r
+ StrNode* sn;\r
+\r
+ sn = STR_(node);\r
+ rn = NULL_NODE;\r
+ if (sn->end > sn->s) {\r
+ p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r
+ if (p && p > sn->s) { /* can be split. */\r
+ rn = node_new_str(p, sn->end);\r
+ if (NODE_STRING_IS_RAW(node))\r
+ NODE_STRING_SET_RAW(rn);\r
+\r
+ sn->end = (UChar* )p;\r
+ }\r
+ }\r
+ return rn;\r
+}\r
+\r
+static int\r
+str_node_can_be_split(Node* node, OnigEncoding enc)\r
+{\r
+ StrNode* sn = STR_(node);\r
+ if (sn->end > sn->s) {\r
+ return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r
+ }\r
+ return 0;\r
+}\r
+\r
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
+static int\r
+node_str_head_pad(StrNode* sn, int num, UChar val)\r
+{\r
+ UChar buf[NODE_STRING_BUF_SIZE];\r
+ int i, len;\r
+\r
+ len = sn->end - sn->s;\r
+ onig_strcpy(buf, sn->s, sn->end);\r
+ onig_strcpy(&(sn->s[num]), buf, buf + len);\r
+ sn->end += num;\r
+\r
+ for (i = 0; i < num; i++) {\r
+ sn->s[i] = val;\r
+ }\r
+}\r
+#endif\r
+\r
+extern int\r
+onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r
+{\r
+ unsigned int num, val;\r
+ OnigCodePoint c;\r
+ UChar* p = *src;\r
+ PFETCH_READY;\r
+\r
+ num = 0;\r
+ while (! PEND) {\r
+ PFETCH(c);\r
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
+ val = (unsigned int )DIGITVAL(c);\r
+ if ((INT_MAX_LIMIT - val) / 10UL < num)\r
+ return -1; /* overflow */\r
+\r
+ num = num * 10 + val;\r
+ }\r
+ else {\r
+ PUNFETCH;\r
+ break;\r
+ }\r
+ }\r
+ *src = p;\r
+ return num;\r
+}\r
+\r
+static int\r
+scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,\r
+ int maxlen, OnigEncoding enc)\r
+{\r
+ OnigCodePoint c;\r
+ unsigned int num, val;\r
+ int n;\r
+ UChar* p = *src;\r
+ PFETCH_READY;\r
+\r
+ num = 0;\r
+ n = 0;\r
+ while (! PEND && n < maxlen) {\r
+ PFETCH(c);\r
+ if (IS_CODE_XDIGIT_ASCII(enc, c)) {\r
+ n++;\r
+ val = (unsigned int )XDIGITVAL(enc,c);\r
+ if ((INT_MAX_LIMIT - val) / 16UL < num)\r
+ return ONIGERR_TOO_BIG_NUMBER; /* overflow */\r
+\r
+ num = (num << 4) + XDIGITVAL(enc,c);\r
+ }\r
+ else {\r
+ PUNFETCH;\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (n < minlen)\r
+ return ONIGERR_INVALID_CODE_POINT_VALUE;\r
+\r
+ *src = p;\r
+ return num;\r
+}\r
+\r
+static int\r
+scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r
+ OnigEncoding enc)\r
+{\r
+ OnigCodePoint c;\r
+ unsigned int num, val;\r
+ UChar* p = *src;\r
+ PFETCH_READY;\r
+\r
+ num = 0;\r
+ while (! PEND && maxlen-- != 0) {\r
+ PFETCH(c);\r
+ if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {\r
+ val = ODIGITVAL(c);\r
+ if ((INT_MAX_LIMIT - val) / 8UL < num)\r
+ return -1; /* overflow */\r
+\r
+ num = (num << 3) + val;\r
+ }\r
+ else {\r
+ PUNFETCH;\r
+ break;\r
+ }\r
+ }\r
+ *src = p;\r
+ return num;\r
+}\r
+\r
+\r
+#define BB_WRITE_CODE_POINT(bbuf,pos,code) \\r
+ BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r
+\r
+/* data format:\r
+ [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r
+ (all data size is OnigCodePoint)\r
+ */\r
+static int\r
+new_code_range(BBuf** pbuf)\r
+{\r
+#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r
+ int r;\r
+ OnigCodePoint n;\r
+ BBuf* bbuf;\r
+\r
+ bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
+ CHECK_NULL_RETURN_MEMERR(bbuf);\r
+ r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r
+ if (r != 0) {\r
+ xfree(bbuf);\r
+ *pbuf = 0;\r
+ return r;\r
+ }\r
+\r
+ n = 0;\r
+ BB_WRITE_CODE_POINT(bbuf, 0, n);\r
+ return 0;\r
+}\r
+\r
+static int\r
+add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r
+{\r
+ int r, inc_n, pos;\r
+ int low, high, bound, x;\r
+ OnigCodePoint n, *data;\r
+ BBuf* bbuf;\r
+\r
+ if (from > to) {\r
+ n = from; from = to; to = n;\r
+ }\r
+\r
+ if (IS_NULL(*pbuf)) {\r
+ r = new_code_range(pbuf);\r
+ if (r != 0) return r;\r
+ bbuf = *pbuf;\r
+ n = 0;\r
+ }\r
+ else {\r
+ bbuf = *pbuf;\r
+ GET_CODE_POINT(n, bbuf->p);\r
+ }\r
+ data = (OnigCodePoint* )(bbuf->p);\r
+ data++;\r
+\r
+ for (low = 0, bound = n; low < bound; ) {\r
+ x = (low + bound) >> 1;\r
+ if (from > data[x*2 + 1])\r
+ low = x + 1;\r
+ else\r
+ bound = x;\r
+ }\r
+\r
+ high = (to == ~((OnigCodePoint )0)) ? n : low;\r
+ for (bound = n; high < bound; ) {\r
+ x = (high + bound) >> 1;\r
+ if (to + 1 >= data[x*2])\r
+ high = x + 1;\r
+ else\r
+ bound = x;\r
+ }\r
+\r
+ inc_n = low + 1 - high;\r
+ if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r
+ return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r
+\r
+ if (inc_n != 1) {\r
+ if (from > data[low*2])\r
+ from = data[low*2];\r
+ if (to < data[(high - 1)*2 + 1])\r
+ to = data[(high - 1)*2 + 1];\r
+ }\r
+\r
+ if (inc_n != 0 && (OnigCodePoint )high < n) {\r
+ int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r
+ int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r
+ int size = (n - high) * 2 * SIZE_CODE_POINT;\r
+\r
+ if (inc_n > 0) {\r
+ BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r
+ }\r
+ else {\r
+ BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r
+ }\r
+ }\r
+\r
+ pos = SIZE_CODE_POINT * (1 + low * 2);\r
+ BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r
+ BB_WRITE_CODE_POINT(bbuf, pos, from);\r
+ BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r
+ n += inc_n;\r
+ BB_WRITE_CODE_POINT(bbuf, 0, n);\r
+\r
+ return 0;\r
+}\r
+\r
+static int\r
+add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r
+{\r
+ if (from > to) {\r
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
+ return 0;\r
+ else\r
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
+ }\r
+\r
+ return add_code_range_to_buf(pbuf, from, to);\r
+}\r
+\r
+static int\r
+not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r
+{\r
+ int r, i, n;\r
+ OnigCodePoint pre, from, *data, to = 0;\r
+\r
+ *pbuf = (BBuf* )NULL;\r
+ if (IS_NULL(bbuf)) {\r
+ set_all:\r
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
+ }\r
+\r
+ data = (OnigCodePoint* )(bbuf->p);\r
+ GET_CODE_POINT(n, data);\r
+ data++;\r
+ if (n <= 0) goto set_all;\r
+\r
+ r = 0;\r
+ pre = MBCODE_START_POS(enc);\r
+ for (i = 0; i < n; i++) {\r
+ from = data[i*2];\r
+ to = data[i*2+1];\r
+ if (pre <= from - 1) {\r
+ r = add_code_range_to_buf(pbuf, pre, from - 1);\r
+ if (r != 0) return r;\r
+ }\r
+ if (to == ~((OnigCodePoint )0)) break;\r
+ pre = to + 1;\r
+ }\r
+ if (to < ~((OnigCodePoint )0)) {\r
+ r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r
+ }\r
+ return r;\r
+}\r
+\r
+#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\\r
+ BBuf *tbuf; \\r
+ int tnot; \\r
+ tnot = not1; not1 = not2; not2 = tnot; \\r
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r
+} while (0)\r
+\r
+static int\r
+or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r
+ BBuf* bbuf2, int not2, BBuf** pbuf)\r
+{\r
+ int r;\r
+ OnigCodePoint i, n1, *data1;\r
+ OnigCodePoint from, to;\r
+\r
+ *pbuf = (BBuf* )NULL;\r
+ if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r
+ if (not1 != 0 || not2 != 0)\r
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
+ return 0;\r
+ }\r
+\r
+ r = 0;\r
+ if (IS_NULL(bbuf2))\r
+ SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
+\r
+ if (IS_NULL(bbuf1)) {\r
+ if (not1 != 0) {\r
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
+ }\r
+ else {\r
+ if (not2 == 0) {\r
+ return bbuf_clone(pbuf, bbuf2);\r
+ }\r
+ else {\r
+ return not_code_range_buf(enc, bbuf2, pbuf);\r
+ }\r
+ }\r
+ }\r
+\r
+ if (not1 != 0)\r
+ SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
+\r
+ data1 = (OnigCodePoint* )(bbuf1->p);\r
+ GET_CODE_POINT(n1, data1);\r
+ data1++;\r
+\r
+ if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r
+ r = bbuf_clone(pbuf, bbuf2);\r
+ }\r
+ else if (not1 == 0) { /* 1 OR (not 2) */\r
+ r = not_code_range_buf(enc, bbuf2, pbuf);\r
+ }\r
+ if (r != 0) return r;\r
+\r
+ for (i = 0; i < n1; i++) {\r
+ from = data1[i*2];\r
+ to = data1[i*2+1];\r
+ r = add_code_range_to_buf(pbuf, from, to);\r
+ if (r != 0) return r;\r
+ }\r
+ return 0;\r
+}\r
+\r
+static int\r
+and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r
+ OnigCodePoint* data, int n)\r
+{\r
+ int i, r;\r
+ OnigCodePoint from2, to2;\r
+\r
+ for (i = 0; i < n; i++) {\r
+ from2 = data[i*2];\r
+ to2 = data[i*2+1];\r
+ if (from2 < from1) {\r
+ if (to2 < from1) continue;\r
+ else {\r
+ from1 = to2 + 1;\r
+ }\r
+ }\r
+ else if (from2 <= to1) {\r
+ if (to2 < to1) {\r
+ if (from1 <= from2 - 1) {\r
+ r = add_code_range_to_buf(pbuf, from1, from2-1);\r
+ if (r != 0) return r;\r
+ }\r
+ from1 = to2 + 1;\r
+ }\r
+ else {\r
+ to1 = from2 - 1;\r
+ }\r
+ }\r
+ else {\r
+ from1 = from2;\r
+ }\r
+ if (from1 > to1) break;\r
+ }\r
+ if (from1 <= to1) {\r
+ r = add_code_range_to_buf(pbuf, from1, to1);\r
+ if (r != 0) return r;\r
+ }\r
+ return 0;\r
+}\r
+\r
+static int\r
+and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r
+{\r
+ int r;\r
+ OnigCodePoint i, j, n1, n2, *data1, *data2;\r
+ OnigCodePoint from, to, from1, to1, from2, to2;\r
+\r
+ *pbuf = (BBuf* )NULL;\r
+ if (IS_NULL(bbuf1)) {\r
+ if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r
+ return bbuf_clone(pbuf, bbuf2);\r
+ return 0;\r
+ }\r
+ else if (IS_NULL(bbuf2)) {\r
+ if (not2 != 0)\r
+ return bbuf_clone(pbuf, bbuf1);\r
+ return 0;\r
+ }\r
+\r
+ if (not1 != 0)\r
+ SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
+\r
+ data1 = (OnigCodePoint* )(bbuf1->p);\r
+ data2 = (OnigCodePoint* )(bbuf2->p);\r
+ GET_CODE_POINT(n1, data1);\r
+ GET_CODE_POINT(n2, data2);\r
+ data1++;\r
+ data2++;\r
+\r
+ if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r
+ for (i = 0; i < n1; i++) {\r
+ from1 = data1[i*2];\r
+ to1 = data1[i*2+1];\r
+ for (j = 0; j < n2; j++) {\r
+ from2 = data2[j*2];\r
+ to2 = data2[j*2+1];\r
+ if (from2 > to1) break;\r
+ if (to2 < from1) continue;\r
+ from = MAX(from1, from2);\r
+ to = MIN(to1, to2);\r
+ r = add_code_range_to_buf(pbuf, from, to);\r
+ if (r != 0) return r;\r
+ }\r
+ }\r
+ }\r
+ else if (not1 == 0) { /* 1 AND (not 2) */\r
+ for (i = 0; i < n1; i++) {\r
+ from1 = data1[i*2];\r
+ to1 = data1[i*2+1];\r
+ r = and_code_range1(pbuf, from1, to1, data2, n2);\r
+ if (r != 0) return r;\r
+ }\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static int\r
+and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
+{\r
+ int r, not1, not2;\r
+ BBuf *buf1, *buf2, *pbuf;\r
+ BitSetRef bsr1, bsr2;\r
+ BitSet bs1, bs2;\r
+\r
+ not1 = IS_NCCLASS_NOT(dest);\r
+ bsr1 = dest->bs;\r
+ buf1 = dest->mbuf;\r
+ not2 = IS_NCCLASS_NOT(cc);\r
+ bsr2 = cc->bs;\r
+ buf2 = cc->mbuf;\r
+\r
+ if (not1 != 0) {\r
bitset_invert_to(bsr1, bs1);\r
bsr1 = bs1;\r
}\r
bitset_and(bsr1, bsr2);\r
if (bsr1 != dest->bs) {\r
bitset_copy(dest->bs, bsr1);\r
- bsr1 = dest->bs;\r
}\r
if (not1 != 0) {\r
bitset_invert(dest->bs);\r
else {\r
r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r
if (r == 0 && not1 != 0) {\r
- BBuf *tbuf;\r
- r = not_code_range_buf(enc, pbuf, &tbuf);\r
- if (r != 0) {\r
- bbuf_free(pbuf);\r
- return r;\r
- }\r
- bbuf_free(pbuf);\r
- pbuf = tbuf;\r
+ BBuf *tbuf;\r
+ r = not_code_range_buf(enc, pbuf, &tbuf);\r
+ if (r != 0) {\r
+ bbuf_free(pbuf);\r
+ return r;\r
+ }\r
+ bbuf_free(pbuf);\r
+ pbuf = tbuf;\r
}\r
}\r
if (r != 0) return r;\r
bitset_or(bsr1, bsr2);\r
if (bsr1 != dest->bs) {\r
bitset_copy(dest->bs, bsr1);\r
- bsr1 = dest->bs;\r
}\r
if (not1 != 0) {\r
bitset_invert(dest->bs);\r
else {\r
r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r
if (r == 0 && not1 != 0) {\r
- BBuf *tbuf;\r
- r = not_code_range_buf(enc, pbuf, &tbuf);\r
- if (r != 0) {\r
- bbuf_free(pbuf);\r
- return r;\r
- }\r
- bbuf_free(pbuf);\r
- pbuf = tbuf;\r
+ BBuf *tbuf;\r
+ r = not_code_range_buf(enc, pbuf, &tbuf);\r
+ if (r != 0) {\r
+ bbuf_free(pbuf);\r
+ return r;\r
+ }\r
+ bbuf_free(pbuf);\r
+ pbuf = tbuf;\r
}\r
}\r
if (r != 0) return r;\r
return 0;\r
}\r
\r
-static int\r
-conv_backslash_value(int c, ScanEnv* env)\r
+static OnigCodePoint\r
+conv_backslash_value(OnigCodePoint c, ScanEnv* env)\r
{\r
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r
switch (c) {\r
case 'e': return '\033';\r
case 'v':\r
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r
- return '\v';\r
+ return '\v';\r
break;\r
\r
default:\r
static int\r
is_invalid_quantifier_target(Node* node)\r
{\r
- switch (NTYPE(node)) {\r
- case NT_ANCHOR:\r
+ switch (NODE_TYPE(node)) {\r
+ case NODE_ANCHOR:\r
+ case NODE_GIMMICK:\r
return 1;\r
break;\r
\r
- case NT_ENCLOSE:\r
+ case NODE_ENCLOSURE:\r
/* allow enclosed elements */\r
- /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */\r
+ /* return is_invalid_quantifier_target(NODE_BODY(node)); */\r
break;\r
\r
- case NT_LIST:\r
+ case NODE_LIST:\r
do {\r
- if (! is_invalid_quantifier_target(NCAR(node))) return 0;\r
- } while (IS_NOT_NULL(node = NCDR(node)));\r
+ if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;\r
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
return 0;\r
break;\r
\r
- case NT_ALT:\r
+ case NODE_ALT:\r
do {\r
- if (is_invalid_quantifier_target(NCAR(node))) return 1;\r
- } while (IS_NOT_NULL(node = NCDR(node)));\r
+ if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;\r
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
break;\r
\r
default:\r
\r
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r
static int\r
-popular_quantifier_num(QtfrNode* q)\r
+quantifier_type_num(QuantNode* q)\r
{\r
if (q->greedy) {\r
if (q->lower == 0) {\r
onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r
{\r
int pnum, cnum;\r
- QtfrNode *p, *c;\r
+ QuantNode *p, *c;\r
+\r
+ p = QUANT_(pnode);\r
+ c = QUANT_(cnode);\r
+ pnum = quantifier_type_num(p);\r
+ cnum = quantifier_type_num(c);\r
+ if (pnum < 0 || cnum < 0) {\r
+ if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {\r
+ if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {\r
+ int n = positive_int_multiply(p->lower, c->lower);\r
+ if (n >= 0) {\r
+ p->lower = p->upper = n;\r
+ NODE_BODY(pnode) = NODE_BODY(cnode);\r
+ goto remove_cnode;\r
+ }\r
+ }\r
+ }\r
\r
- p = NQTFR(pnode);\r
- c = NQTFR(cnode);\r
- pnum = popular_quantifier_num(p);\r
- cnum = popular_quantifier_num(c);\r
- if (pnum < 0 || cnum < 0) return ;\r
+ return ;\r
+ }\r
\r
switch(ReduceTypeTable[cnum][pnum]) {\r
case RQ_DEL:\r
*pnode = *cnode;\r
break;\r
case RQ_A:\r
- p->target = c->target;\r
+ NODE_BODY(pnode) = NODE_BODY(cnode);\r
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r
break;\r
case RQ_AQ:\r
- p->target = c->target;\r
+ NODE_BODY(pnode) = NODE_BODY(cnode);\r
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r
break;\r
case RQ_QQ:\r
- p->target = c->target;\r
+ NODE_BODY(pnode) = NODE_BODY(cnode);\r
p->lower = 0; p->upper = 1; p->greedy = 0;\r
break;\r
case RQ_P_QQ:\r
- p->target = cnode;\r
+ NODE_BODY(pnode) = cnode;\r
p->lower = 0; p->upper = 1; p->greedy = 0;\r
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r
return ;\r
break;\r
case RQ_PQ_Q:\r
- p->target = cnode;\r
+ NODE_BODY(pnode) = cnode;\r
p->lower = 0; p->upper = 1; p->greedy = 1;\r
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r
return ;\r
break;\r
case RQ_ASIS:\r
- p->target = cnode;\r
+ NODE_BODY(pnode) = cnode;\r
return ;\r
break;\r
}\r
\r
- c->target = NULL_NODE;\r
+ remove_cnode:\r
+ NODE_BODY(cnode) = NULL_NODE;\r
onig_node_free(cnode);\r
}\r
\r
+static int\r
+node_new_general_newline(Node** node, ScanEnv* env)\r
+{\r
+ int r;\r
+ int dlen, alen;\r
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];\r
+ Node* crnl;\r
+ Node* ncc;\r
+ Node* x;\r
+ CClassNode* cc;\r
+\r
+ dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);\r
+ if (dlen < 0) return dlen;\r
+ alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);\r
+ if (alen < 0) return alen;\r
+\r
+ crnl = node_new_str_raw(buf, buf + dlen + alen);\r
+ CHECK_NULL_RETURN_MEMERR(crnl);\r
+\r
+ ncc = node_new_cclass();\r
+ if (IS_NULL(ncc)) goto err2;\r
+\r
+ cc = CCLASS_(ncc);\r
+ if (dlen == 1) {\r
+ bitset_set_range(cc->bs, 0x0a, 0x0d);\r
+ }\r
+ else {\r
+ r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);\r
+ if (r != 0) {\r
+ err1:\r
+ onig_node_free(ncc);\r
+ err2:\r
+ onig_node_free(crnl);\r
+ return ONIGERR_MEMORY;\r
+ }\r
+ }\r
+\r
+ if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {\r
+ r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);\r
+ if (r != 0) goto err1;\r
+ r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);\r
+ if (r != 0) goto err1;\r
+ }\r
+\r
+ x = node_new_enclosure_if_else(crnl, 0, ncc);\r
+ if (IS_NULL(x)) goto err1;\r
+\r
+ *node = x;\r
+ return 0;\r
+}\r
\r
enum TokenSyms {\r
TK_EOT = 0, /* end of token */\r
TK_CC_OPEN,\r
TK_QUOTE_OPEN,\r
TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r
+ TK_KEEP, /* \K */\r
+ TK_GENERAL_NEWLINE, /* \R */\r
+ TK_NO_NEWLINE, /* \N */\r
+ TK_TRUE_ANYCHAR, /* \O */\r
+ TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */\r
+\r
/* in cc */\r
TK_CC_CLOSE,\r
TK_CC_RANGE,\r
UChar* name;\r
UChar* name_end;\r
int gnum;\r
+ int by_number;\r
} call;\r
struct {\r
int ctype;\r
\r
if (p == prev) {\r
if (non_low != 0)\r
- goto invalid;\r
+ goto invalid;\r
up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r
}\r
}\r
return r; /* 0: normal {n,m}, 2: fixed {n} */\r
\r
invalid:\r
- if (syn_allow)\r
+ if (syn_allow) {\r
+ /* *src = p; */ /* !!! Don't do this line !!! */\r
return 1; /* OK */\r
+ }\r
else\r
return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r
}\r
\r
/* \M-, \C-, \c, or \... */\r
static int\r
-fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)\r
+fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)\r
{\r
int v;\r
OnigCodePoint c;\r
if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
PFETCH_S(c);\r
if (c == MC_ESC(env->syntax)) {\r
- v = fetch_escaped_value(&p, end, env);\r
+ v = fetch_escaped_value(&p, end, env, &c);\r
if (v < 0) return v;\r
- c = (OnigCodePoint )v;\r
}\r
c = ((c & 0xff) | 0x80);\r
}\r
}\r
else {\r
if (c == MC_ESC(env->syntax)) {\r
- v = fetch_escaped_value(&p, end, env);\r
+ v = fetch_escaped_value(&p, end, env, &c);\r
if (v < 0) return v;\r
- c = (OnigCodePoint )v;\r
}\r
c &= 0x9f;\r
}\r
}\r
\r
*src = p;\r
- return c;\r
+ *val = c;\r
+ return 0;\r
}\r
\r
static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r
get_name_end_code_point(OnigCodePoint start)\r
{\r
switch (start) {\r
- case '<': return (OnigCodePoint )'>'; break;\r
+ case '<': return (OnigCodePoint )'>'; break;\r
case '\'': return (OnigCodePoint )'\''; break;\r
+ case '(': return (OnigCodePoint )')'; break;\r
default:\r
break;\r
}\r
return (OnigCodePoint )0;\r
}\r
\r
-#ifdef USE_NAMED_GROUP\r
+enum REF_NUM {\r
+ IS_NOT_NUM = 0,\r
+ IS_ABS_NUM = 1,\r
+ IS_REL_NUM = 2\r
+};\r
+\r
#ifdef USE_BACKREF_WITH_LEVEL\r
/*\r
\k<name+n>, \k<name-n>\r
\k<num+n>, \k<num-n>\r
\k<-num+n>, \k<-num-n>\r
+ \k<+num+n>, \k<+num-n>\r
*/\r
static int\r
fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r
- UChar** rname_end, ScanEnv* env,\r
- int* rback_num, int* rlevel)\r
+ UChar** rname_end, ScanEnv* env,\r
+ int* rback_num, int* rlevel, enum REF_NUM* num_type)\r
{\r
- int r, sign, is_num, exist_level;\r
+ int r, sign, exist_level;\r
+ int digit_count;\r
OnigCodePoint end_code;\r
OnigCodePoint c = 0;\r
OnigEncoding enc = env->enc;\r
PFETCH_READY;\r
\r
*rback_num = 0;\r
- is_num = exist_level = 0;\r
+ exist_level = 0;\r
+ *num_type = IS_NOT_NUM;\r
sign = 1;\r
pnum_head = *src;\r
\r
end_code = get_name_end_code_point(start_code);\r
\r
+ digit_count = 0;\r
name_end = end;\r
r = 0;\r
if (PEND) {\r
if (c == end_code)\r
return ONIGERR_EMPTY_GROUP_NAME;\r
\r
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
- is_num = 1;\r
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
+ *num_type = IS_ABS_NUM;\r
+ digit_count++;\r
}\r
else if (c == '-') {\r
- is_num = 2;\r
+ *num_type = IS_REL_NUM;\r
sign = -1;\r
pnum_head = p;\r
}\r
+ else if (c == '+') {\r
+ *num_type = IS_REL_NUM;\r
+ sign = 1;\r
+ pnum_head = p;\r
+ }\r
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
}\r
name_end = p;\r
PFETCH(c);\r
if (c == end_code || c == ')' || c == '+' || c == '-') {\r
- if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;\r
+ if (*num_type != IS_NOT_NUM && digit_count == 0)\r
+ r = ONIGERR_INVALID_GROUP_NAME;\r
break;\r
}\r
\r
- if (is_num != 0) {\r
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
- is_num = 1;\r
+ if (*num_type != IS_NOT_NUM) {\r
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
+ digit_count++;\r
}\r
else {\r
r = ONIGERR_INVALID_GROUP_NAME;\r
- is_num = 0;\r
+ *num_type = IS_NOT_NUM;\r
}\r
}\r
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
int level;\r
int flag = (c == '-' ? -1 : 1);\r
\r
+ if (PEND) {\r
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
+ goto end;\r
+ }\r
PFETCH(c);\r
- if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;\r
+ if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;\r
PUNFETCH;\r
level = onig_scan_unsigned_number(&p, end, enc);\r
if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r
*rlevel = (level * flag);\r
exist_level = 1;\r
\r
- PFETCH(c);\r
- if (c == end_code)\r
- goto end;\r
+ if (!PEND) {\r
+ PFETCH(c);\r
+ if (c == end_code)\r
+ goto end;\r
+ }\r
}\r
\r
err:\r
- r = ONIGERR_INVALID_GROUP_NAME;\r
name_end = end;\r
+ err2:\r
+ r = ONIGERR_INVALID_GROUP_NAME;\r
}\r
\r
end:\r
if (r == 0) {\r
- if (is_num != 0) {\r
+ if (*num_type != IS_NOT_NUM) {\r
*rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- else if (*rback_num == 0) goto err;\r
+ else if (*rback_num == 0) {\r
+ if (*num_type == IS_REL_NUM)\r
+ goto err2;\r
+ }\r
\r
*rback_num *= sign;\r
}\r
#endif /* USE_BACKREF_WITH_LEVEL */\r
\r
/*\r
- def: 0 -> define name (don't allow number name)\r
+ ref: 0 -> define name (don't allow number name)\r
1 -> reference name (allow number name)\r
*/\r
static int\r
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
- UChar** rname_end, ScanEnv* env, int* rback_num, int ref)\r
+ UChar** rname_end, ScanEnv* env, int* rback_num,\r
+ enum REF_NUM* num_type, int ref)\r
{\r
- int r, is_num, sign;\r
+ int r, sign;\r
+ int digit_count;\r
OnigCodePoint end_code;\r
OnigCodePoint c = 0;\r
OnigEncoding enc = env->enc;\r
\r
end_code = get_name_end_code_point(start_code);\r
\r
+ digit_count = 0;\r
name_end = end;\r
pnum_head = *src;\r
r = 0;\r
- is_num = 0;\r
+ *num_type = IS_NOT_NUM;\r
sign = 1;\r
if (PEND) {\r
return ONIGERR_EMPTY_GROUP_NAME;\r
if (c == end_code)\r
return ONIGERR_EMPTY_GROUP_NAME;\r
\r
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
if (ref == 1)\r
- is_num = 1;\r
+ *num_type = IS_ABS_NUM;\r
else {\r
r = ONIGERR_INVALID_GROUP_NAME;\r
- is_num = 0;\r
}\r
+ digit_count++;\r
}\r
else if (c == '-') {\r
if (ref == 1) {\r
- is_num = 2;\r
+ *num_type = IS_REL_NUM;\r
sign = -1;\r
pnum_head = p;\r
}\r
else {\r
r = ONIGERR_INVALID_GROUP_NAME;\r
- is_num = 0;\r
}\r
}\r
- else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
- }\r
- }\r
-\r
- if (r == 0) {\r
- while (!PEND) {\r
- name_end = p;\r
- PFETCH_S(c);\r
- if (c == end_code || c == ')') {\r
- if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;\r
- break;\r
- }\r
-\r
- if (is_num != 0) {\r
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
- is_num = 1;\r
- }\r
- else {\r
- if (!ONIGENC_IS_CODE_WORD(enc, c))\r
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
- else\r
- r = ONIGERR_INVALID_GROUP_NAME;\r
- is_num = 0;\r
- }\r
+ else if (c == '+') {\r
+ if (ref == 1) {\r
+ *num_type = IS_REL_NUM;\r
+ sign = 1;\r
+ pnum_head = p;\r
}\r
else {\r
- if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
- }\r
- }\r
- }\r
-\r
- if (c != end_code) {\r
- r = ONIGERR_INVALID_GROUP_NAME;\r
- name_end = end;\r
- }\r
-\r
- if (is_num != 0) {\r
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
- if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- else if (*rback_num == 0) {\r
r = ONIGERR_INVALID_GROUP_NAME;\r
- goto err;\r
- }\r
-\r
- *rback_num *= sign;\r
- }\r
-\r
- *rname_end = name_end;\r
- *src = p;\r
- return 0;\r
- }\r
- else {\r
- while (!PEND) {\r
- name_end = p;\r
- PFETCH_S(c);\r
- if (c == end_code || c == ')')\r
- break;\r
- }\r
- if (PEND)\r
- name_end = end;\r
-\r
- err:\r
- onig_scan_env_set_error_string(env, r, *src, name_end);\r
- return r;\r
- }\r
-}\r
-#else\r
-static int\r
-fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
- UChar** rname_end, ScanEnv* env, int* rback_num, int ref)\r
-{\r
- int r, is_num, sign;\r
- OnigCodePoint end_code;\r
- OnigCodePoint c = 0;\r
- UChar *name_end;\r
- OnigEncoding enc = env->enc;\r
- UChar *pnum_head;\r
- UChar *p = *src;\r
- PFETCH_READY;\r
-\r
- *rback_num = 0;\r
-\r
- end_code = get_name_end_code_point(start_code);\r
-\r
- *rname_end = name_end = end;\r
- r = 0;\r
- pnum_head = *src;\r
- is_num = 0;\r
- sign = 1;\r
-\r
- if (PEND) {\r
- return ONIGERR_EMPTY_GROUP_NAME;\r
- }\r
- else {\r
- PFETCH(c);\r
- if (c == end_code)\r
- return ONIGERR_EMPTY_GROUP_NAME;\r
-\r
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
- is_num = 1;\r
- }\r
- else if (c == '-') {\r
- is_num = 2;\r
- sign = -1;\r
- pnum_head = p;\r
- }\r
- else {\r
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
- }\r
- }\r
-\r
- while (!PEND) {\r
- name_end = p;\r
-\r
- PFETCH(c);\r
- if (c == end_code || c == ')') break;\r
- if (! ONIGENC_IS_CODE_DIGIT(enc, c))\r
+ }\r
+ }\r
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
- }\r
- if (r == 0 && c != end_code) {\r
- r = ONIGERR_INVALID_GROUP_NAME;\r
- name_end = end;\r
+ }\r
}\r
\r
if (r == 0) {\r
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
- if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- else if (*rback_num == 0) {\r
+ while (!PEND) {\r
+ name_end = p;\r
+ PFETCH_S(c);\r
+ if (c == end_code || c == ')') {\r
+ if (*num_type != IS_NOT_NUM && digit_count == 0)\r
+ r = ONIGERR_INVALID_GROUP_NAME;\r
+ break;\r
+ }\r
+\r
+ if (*num_type != IS_NOT_NUM) {\r
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
+ digit_count++;\r
+ }\r
+ else {\r
+ if (!ONIGENC_IS_CODE_WORD(enc, c))\r
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
+ else\r
+ r = ONIGERR_INVALID_GROUP_NAME;\r
+\r
+ *num_type = IS_NOT_NUM;\r
+ }\r
+ }\r
+ else {\r
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
+ }\r
+ }\r
+ }\r
+\r
+ if (c != end_code) {\r
r = ONIGERR_INVALID_GROUP_NAME;\r
goto err;\r
}\r
- *rback_num *= sign;\r
+\r
+ if (*num_type != IS_NOT_NUM) {\r
+ *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
+ if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
+ else if (*rback_num == 0) {\r
+ if (*num_type == IS_REL_NUM) {\r
+ r = ONIGERR_INVALID_GROUP_NAME;\r
+ goto err;\r
+ }\r
+ }\r
+\r
+ *rback_num *= sign;\r
+ }\r
\r
*rname_end = name_end;\r
*src = p;\r
return 0;\r
}\r
else {\r
+ while (!PEND) {\r
+ name_end = p;\r
+ PFETCH_S(c);\r
+ if (c == end_code || c == ')')\r
+ break;\r
+ }\r
+ if (PEND)\r
+ name_end = end;\r
+\r
err:\r
onig_scan_env_set_error_string(env, r, *src, name_end);\r
return r;\r
}\r
}\r
-#endif /* USE_NAMED_GROUP */\r
\r
static void\r
CC_ESC_WARN(ScanEnv* env, UChar *c)\r
IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r
UChar buf[WARN_BUFSIZE];\r
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
- env->pattern, env->pattern_end,\r
- (UChar* )"character class has '%s' without escape", c);\r
+ env->pattern, env->pattern_end,\r
+ (UChar* )"character class has '%s' without escape",\r
+ c);\r
(*onig_warn)((char* )buf);\r
}\r
}\r
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r
UChar buf[WARN_BUFSIZE];\r
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r
- (env)->pattern, (env)->pattern_end,\r
- (UChar* )"regular expression has '%s' without escape", c);\r
+ (env)->pattern, (env)->pattern_end,\r
+ (UChar* )"regular expression has '%s' without escape", c);\r
(*onig_warn)((char* )buf);\r
}\r
}\r
\r
static UChar*\r
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
- UChar **next, OnigEncoding enc)\r
+ UChar **next, OnigEncoding enc)\r
{\r
int i;\r
OnigCodePoint x;\r
q = p + enclen(enc, p);\r
if (x == s[0]) {\r
for (i = 1; i < n && q < to; i++) {\r
- x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
- if (x != s[i]) break;\r
- q += enclen(enc, q);\r
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
+ if (x != s[i]) break;\r
+ q += enclen(enc, q);\r
}\r
if (i >= n) {\r
- if (IS_NOT_NULL(next))\r
- *next = q;\r
- return p;\r
+ if (IS_NOT_NULL(next))\r
+ *next = q;\r
+ return p;\r
}\r
}\r
p = q;\r
\r
static int\r
str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
- OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r
+ OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r
{\r
int i, in_esc;\r
OnigCodePoint x;\r
x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
q = p + enclen(enc, p);\r
if (x == s[0]) {\r
- for (i = 1; i < n && q < to; i++) {\r
- x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
- if (x != s[i]) break;\r
- q += enclen(enc, q);\r
- }\r
- if (i >= n) return 1;\r
- p += enclen(enc, p);\r
+ for (i = 1; i < n && q < to; i++) {\r
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
+ if (x != s[i]) break;\r
+ q += enclen(enc, q);\r
+ }\r
+ if (i >= n) return 1;\r
+ p += enclen(enc, p);\r
}\r
else {\r
- x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
- if (x == bad) return 0;\r
- else if (x == MC_ESC(syn)) in_esc = 1;\r
- p = q;\r
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
+ if (x == bad) return 0;\r
+ else if (x == MC_ESC(syn)) in_esc = 1;\r
+ p = q;\r
}\r
}\r
}\r
\r
case 'p':\r
case 'P':\r
+ if (PEND) break;\r
+\r
c2 = PPEEK;\r
if (c2 == '{' &&\r
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
- PINC;\r
- tok->type = TK_CHAR_PROPERTY;\r
- tok->u.prop.not = (c == 'P' ? 1 : 0);\r
-\r
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
- PFETCH(c2);\r
- if (c2 == '^') {\r
- tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
- }\r
- else\r
- PUNFETCH;\r
- }\r
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
+ PINC;\r
+ tok->type = TK_CHAR_PROPERTY;\r
+ tok->u.prop.not = (c == 'P' ? 1 : 0);\r
+\r
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
+ PFETCH(c2);\r
+ if (c2 == '^') {\r
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
+ }\r
+ else\r
+ PUNFETCH;\r
+ }\r
+ }\r
+ break;\r
+\r
+ case 'o':\r
+ if (PEND) break;\r
+\r
+ prev = p;\r
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
+ PINC;\r
+ num = scan_unsigned_octal_number(&p, end, 11, enc);\r
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
+ if (!PEND) {\r
+ c2 = PPEEK;\r
+ if (IS_CODE_DIGIT_ASCII(enc, c2))\r
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
+ }\r
+\r
+ if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
+ PINC;\r
+ tok->type = TK_CODE_POINT;\r
+ tok->base = 8;\r
+ tok->u.code = (OnigCodePoint )num;\r
+ }\r
+ else {\r
+ /* can't read nothing or invalid format */\r
+ p = prev;\r
+ }\r
}\r
break;\r
\r
\r
prev = p;\r
if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
- PINC;\r
- num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
- if (!PEND) {\r
+ PINC;\r
+ num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
+ if (num < 0) {\r
+ if (num == ONIGERR_TOO_BIG_NUMBER)\r
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
+ else\r
+ return num;\r
+ }\r
+ if (!PEND) {\r
c2 = PPEEK;\r
- if (ONIGENC_IS_CODE_XDIGIT(enc, c2))\r
+ if (IS_CODE_XDIGIT_ASCII(enc, c2))\r
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
}\r
\r
- if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
- PINC;\r
- tok->type = TK_CODE_POINT;\r
- tok->base = 16;\r
- tok->u.code = (OnigCodePoint )num;\r
- }\r
- else {\r
- /* can't read nothing or invalid format */\r
- p = prev;\r
- }\r
+ if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
+ PINC;\r
+ tok->type = TK_CODE_POINT;\r
+ tok->base = 16;\r
+ tok->u.code = (OnigCodePoint )num;\r
+ }\r
+ else {\r
+ /* can't read nothing or invalid format */\r
+ p = prev;\r
+ }\r
}\r
else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
- num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- if (p == prev) { /* can't read nothing. */\r
- num = 0; /* but, it's not error */\r
- }\r
- tok->type = TK_RAW_BYTE;\r
- tok->base = 16;\r
- tok->u.c = num;\r
+ num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
+ if (num < 0) return num;\r
+ if (p == prev) { /* can't read nothing. */\r
+ num = 0; /* but, it's not error */\r
+ }\r
+ tok->type = TK_RAW_BYTE;\r
+ tok->base = 16;\r
+ tok->u.c = num;\r
}\r
break;\r
\r
\r
prev = p;\r
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
- num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- if (p == prev) { /* can't read nothing. */\r
- num = 0; /* but, it's not error */\r
- }\r
- tok->type = TK_CODE_POINT;\r
- tok->base = 16;\r
- tok->u.code = (OnigCodePoint )num;\r
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
+ if (num < 0) return num;\r
+ if (p == prev) { /* can't read nothing. */\r
+ num = 0; /* but, it's not error */\r
+ }\r
+ tok->type = TK_CODE_POINT;\r
+ tok->base = 16;\r
+ tok->u.code = (OnigCodePoint )num;\r
}\r
break;\r
\r
case '0':\r
case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
- PUNFETCH;\r
- prev = p;\r
- num = scan_unsigned_octal_number(&p, end, 3, enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- if (p == prev) { /* can't read nothing. */\r
- num = 0; /* but, it's not error */\r
- }\r
- tok->type = TK_RAW_BYTE;\r
- tok->base = 8;\r
- tok->u.c = num;\r
+ PUNFETCH;\r
+ prev = p;\r
+ num = scan_unsigned_octal_number(&p, end, 3, enc);\r
+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
+ if (p == prev) { /* can't read nothing. */\r
+ num = 0; /* but, it's not error */\r
+ }\r
+ tok->type = TK_RAW_BYTE;\r
+ tok->base = 8;\r
+ tok->u.c = num;\r
}\r
break;\r
\r
default:\r
PUNFETCH;\r
- num = fetch_escaped_value(&p, end, env);\r
+ num = fetch_escaped_value(&p, end, env, &c2);\r
if (num < 0) return num;\r
- if (tok->u.c != num) {\r
- tok->u.code = (OnigCodePoint )num;\r
- tok->type = TK_CODE_POINT;\r
+ if (tok->u.c != c2) {\r
+ tok->u.code = c2;\r
+ tok->type = TK_CODE_POINT;\r
}\r
break;\r
}\r
else if (c == '[') {\r
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r
OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r
- tok->backp = p; /* point at '[' is readed */\r
+ tok->backp = p; /* point at '[' is read */\r
PINC;\r
if (str_exist_check_with_esc(send, 2, p, end,\r
(OnigCodePoint )']', enc, syn)) {\r
- tok->type = TK_POSIX_BRACKET_OPEN;\r
+ tok->type = TK_POSIX_BRACKET_OPEN;\r
}\r
else {\r
- PUNFETCH;\r
- goto cc_in_cc;\r
+ PUNFETCH;\r
+ goto cc_in_cc;\r
}\r
}\r
else {\r
cc_in_cc:\r
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r
- tok->type = TK_CC_CC_OPEN;\r
+ tok->type = TK_CC_CC_OPEN;\r
}\r
else {\r
- CC_ESC_WARN(env, (UChar* )"[");\r
+ CC_ESC_WARN(env, (UChar* )"[");\r
}\r
}\r
}\r
else if (c == '&') {\r
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r
- !PEND && (PPEEK_IS('&'))) {\r
+ !PEND && (PPEEK_IS('&'))) {\r
PINC;\r
tok->type = TK_CC_AND;\r
}\r
tok->u.repeat.upper = 1;\r
greedy_check:\r
if (!PEND && PPEEK_IS('?') &&\r
- IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r
- PFETCH(c);\r
- tok->u.repeat.greedy = 0;\r
- tok->u.repeat.possessive = 0;\r
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r
+ PFETCH(c);\r
+ tok->u.repeat.greedy = 0;\r
+ tok->u.repeat.possessive = 0;\r
}\r
else {\r
possessive_check:\r
- if (!PEND && PPEEK_IS('+') &&\r
- ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r
- tok->type != TK_INTERVAL) ||\r
- (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r
- tok->type == TK_INTERVAL))) {\r
- PFETCH(c);\r
- tok->u.repeat.greedy = 1;\r
- tok->u.repeat.possessive = 1;\r
- }\r
- else {\r
- tok->u.repeat.greedy = 1;\r
- tok->u.repeat.possessive = 0;\r
- }\r
+ if (!PEND && PPEEK_IS('+') &&\r
+ ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r
+ tok->type != TK_INTERVAL) ||\r
+ (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r
+ tok->type == TK_INTERVAL))) {\r
+ PFETCH(c);\r
+ tok->u.repeat.greedy = 1;\r
+ tok->u.repeat.possessive = 1;\r
+ }\r
+ else {\r
+ tok->u.repeat.greedy = 1;\r
+ tok->u.repeat.possessive = 0;\r
+ }\r
}\r
break;\r
\r
if (r < 0) return r; /* error */\r
if (r == 0) goto greedy_check;\r
else if (r == 2) { /* {n} */\r
- if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
- goto possessive_check;\r
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
+ goto possessive_check;\r
\r
- goto greedy_check;\r
+ goto greedy_check;\r
}\r
/* r == 1 : normal char */\r
break;\r
case 'b':\r
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
tok->type = TK_ANCHOR;\r
- tok->u.anchor = ANCHOR_WORD_BOUND;\r
+ tok->u.anchor = ANCHOR_WORD_BOUNDARY;\r
break;\r
\r
case 'B':\r
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
tok->type = TK_ANCHOR;\r
- tok->u.anchor = ANCHOR_NOT_WORD_BOUND;\r
+ tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;\r
+ break;\r
+\r
+ case 'y':\r
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
+ tok->type = TK_ANCHOR;\r
+ tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
+ break;\r
+\r
+ case 'Y':\r
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
+ tok->type = TK_ANCHOR;\r
+ tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
break;\r
\r
#ifdef USE_WORD_BEGIN_END\r
tok->u.prop.not = 1;\r
break;\r
\r
+ case 'K':\r
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;\r
+ tok->type = TK_KEEP;\r
+ break;\r
+\r
+ case 'R':\r
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;\r
+ tok->type = TK_GENERAL_NEWLINE;\r
+ break;\r
+\r
+ case 'N':\r
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
+ tok->type = TK_NO_NEWLINE;\r
+ break;\r
+\r
+ case 'O':\r
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
+ tok->type = TK_TRUE_ANYCHAR;\r
+ break;\r
+\r
+ case 'X':\r
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
+ tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;\r
+ break;\r
+\r
case 'A':\r
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
begin_buf:\r
goto end_buf;\r
break;\r
\r
+ case 'o':\r
+ if (PEND) break;\r
+\r
+ prev = p;\r
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
+ PINC;\r
+ num = scan_unsigned_octal_number(&p, end, 11, enc);\r
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
+ if (!PEND) {\r
+ if (IS_CODE_DIGIT_ASCII(enc, PPEEK))\r
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
+ }\r
+\r
+ if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
+ PINC;\r
+ tok->type = TK_CODE_POINT;\r
+ tok->u.code = (OnigCodePoint )num;\r
+ }\r
+ else {\r
+ /* can't read nothing or invalid format */\r
+ p = prev;\r
+ }\r
+ }\r
+ break;\r
+\r
case 'x':\r
if (PEND) break;\r
\r
prev = p;\r
if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
- PINC;\r
- num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
- if (!PEND) {\r
- if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))\r
+ PINC;\r
+ num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
+ if (num < 0) {\r
+ if (num == ONIGERR_TOO_BIG_NUMBER)\r
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
+ else\r
+ return num;\r
+ }\r
+ if (!PEND) {\r
+ if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))\r
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
}\r
\r
- if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
- PINC;\r
- tok->type = TK_CODE_POINT;\r
- tok->u.code = (OnigCodePoint )num;\r
- }\r
- else {\r
- /* can't read nothing or invalid format */\r
- p = prev;\r
- }\r
+ if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
+ PINC;\r
+ tok->type = TK_CODE_POINT;\r
+ tok->u.code = (OnigCodePoint )num;\r
+ }\r
+ else {\r
+ /* can't read nothing or invalid format */\r
+ p = prev;\r
+ }\r
}\r
else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
- num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- if (p == prev) { /* can't read nothing. */\r
- num = 0; /* but, it's not error */\r
- }\r
- tok->type = TK_RAW_BYTE;\r
- tok->base = 16;\r
- tok->u.c = num;\r
+ num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
+ if (num < 0) return num;\r
+ if (p == prev) { /* can't read nothing. */\r
+ num = 0; /* but, it's not error */\r
+ }\r
+ tok->type = TK_RAW_BYTE;\r
+ tok->base = 16;\r
+ tok->u.c = num;\r
}\r
break;\r
\r
\r
prev = p;\r
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
- num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- if (p == prev) { /* can't read nothing. */\r
- num = 0; /* but, it's not error */\r
- }\r
- tok->type = TK_CODE_POINT;\r
- tok->base = 16;\r
- tok->u.code = (OnigCodePoint )num;\r
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
+ if (num < 0) return num;\r
+ if (p == prev) { /* can't read nothing. */\r
+ num = 0; /* but, it's not error */\r
+ }\r
+ tok->type = TK_CODE_POINT;\r
+ tok->base = 16;\r
+ tok->u.code = (OnigCodePoint )num;\r
}\r
break;\r
\r
}\r
\r
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r
- (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r
- if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
- if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))\r
- return ONIGERR_INVALID_BACKREF;\r
- }\r
-\r
- tok->type = TK_BACKREF;\r
- tok->u.backref.num = 1;\r
- tok->u.backref.ref1 = num;\r
- tok->u.backref.by_name = 0;\r
+ (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
+ if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))\r
+ return ONIGERR_INVALID_BACKREF;\r
+ }\r
+\r
+ tok->type = TK_BACKREF;\r
+ tok->u.backref.num = 1;\r
+ tok->u.backref.ref1 = num;\r
+ tok->u.backref.by_name = 0;\r
#ifdef USE_BACKREF_WITH_LEVEL\r
- tok->u.backref.exist_level = 0;\r
+ tok->u.backref.exist_level = 0;\r
#endif\r
- break;\r
+ break;\r
}\r
\r
skip_backref:\r
if (c == '8' || c == '9') {\r
- /* normal char */\r
- p = prev; PINC;\r
- break;\r
+ /* normal char */\r
+ p = prev; PINC;\r
+ break;\r
}\r
\r
p = prev;\r
/* fall through */\r
case '0':\r
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
- prev = p;\r
- num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
- if (p == prev) { /* can't read nothing. */\r
- num = 0; /* but, it's not error */\r
- }\r
- tok->type = TK_RAW_BYTE;\r
- tok->base = 8;\r
- tok->u.c = num;\r
+ prev = p;\r
+ num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r
+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
+ if (p == prev) { /* can't read nothing. */\r
+ num = 0; /* but, it's not error */\r
+ }\r
+ tok->type = TK_RAW_BYTE;\r
+ tok->base = 8;\r
+ tok->u.c = num;\r
}\r
else if (c != '0') {\r
- PINC;\r
+ PINC;\r
}\r
break;\r
\r
-#ifdef USE_NAMED_GROUP\r
case 'k':\r
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r
- PFETCH(c);\r
- if (c == '<' || c == '\'') {\r
- UChar* name_end;\r
- int* backs;\r
- int back_num;\r
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r
+ PFETCH(c);\r
+ if (c == '<' || c == '\'') {\r
+ UChar* name_end;\r
+ int* backs;\r
+ int back_num;\r
+ enum REF_NUM num_type;\r
\r
- prev = p;\r
+ prev = p;\r
\r
#ifdef USE_BACKREF_WITH_LEVEL\r
- name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
- r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r
- env, &back_num, &tok->u.backref.level);\r
- if (r == 1) tok->u.backref.exist_level = 1;\r
- else tok->u.backref.exist_level = 0;\r
+ name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
+ r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r
+ env, &back_num, &tok->u.backref.level, &num_type);\r
+ if (r == 1) tok->u.backref.exist_level = 1;\r
+ else tok->u.backref.exist_level = 0;\r
#else\r
- r = fetch_name(&p, end, &name_end, env, &back_num, 1);\r
+ r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);\r
#endif\r
- if (r < 0) return r;\r
-\r
- if (back_num != 0) {\r
- if (back_num < 0) {\r
- back_num = BACKREF_REL_TO_ABS(back_num, env);\r
- if (back_num <= 0)\r
- return ONIGERR_INVALID_BACKREF;\r
- }\r
-\r
- if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
- if (back_num > env->num_mem ||\r
- IS_NULL(SCANENV_MEM_NODES(env)[back_num]))\r
- return ONIGERR_INVALID_BACKREF;\r
- }\r
- tok->type = TK_BACKREF;\r
- tok->u.backref.by_name = 0;\r
- tok->u.backref.num = 1;\r
- tok->u.backref.ref1 = back_num;\r
- }\r
- else {\r
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
- if (num <= 0) {\r
- onig_scan_env_set_error_string(env,\r
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
- return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
- }\r
- if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
- int i;\r
- for (i = 0; i < num; i++) {\r
- if (backs[i] > env->num_mem ||\r
- IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))\r
- return ONIGERR_INVALID_BACKREF;\r
- }\r
- }\r
-\r
- tok->type = TK_BACKREF;\r
- tok->u.backref.by_name = 1;\r
- if (num == 1) {\r
- tok->u.backref.num = 1;\r
- tok->u.backref.ref1 = backs[0];\r
- }\r
- else {\r
- tok->u.backref.num = num;\r
- tok->u.backref.refs = backs;\r
- }\r
- }\r
- }\r
- else\r
- PUNFETCH;\r
+ if (r < 0) return r;\r
+\r
+ if (num_type != IS_NOT_NUM) {\r
+ if (num_type == IS_REL_NUM) {\r
+ back_num = backref_rel_to_abs(back_num, env);\r
+ }\r
+ if (back_num <= 0)\r
+ return ONIGERR_INVALID_BACKREF;\r
+\r
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
+ if (back_num > env->num_mem ||\r
+ IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
+ return ONIGERR_INVALID_BACKREF;\r
+ }\r
+ tok->type = TK_BACKREF;\r
+ tok->u.backref.by_name = 0;\r
+ tok->u.backref.num = 1;\r
+ tok->u.backref.ref1 = back_num;\r
+ }\r
+ else {\r
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
+ if (num <= 0) {\r
+ onig_scan_env_set_error_string(env,\r
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
+ }\r
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
+ int i;\r
+ for (i = 0; i < num; i++) {\r
+ if (backs[i] > env->num_mem ||\r
+ IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
+ return ONIGERR_INVALID_BACKREF;\r
+ }\r
+ }\r
+\r
+ tok->type = TK_BACKREF;\r
+ tok->u.backref.by_name = 1;\r
+ if (num == 1) {\r
+ tok->u.backref.num = 1;\r
+ tok->u.backref.ref1 = backs[0];\r
+ }\r
+ else {\r
+ tok->u.backref.num = num;\r
+ tok->u.backref.refs = backs;\r
+ }\r
+ }\r
+ }\r
+ else\r
+ PUNFETCH;\r
}\r
break;\r
-#endif\r
\r
-#ifdef USE_SUBEXP_CALL\r
+#ifdef USE_CALL\r
case 'g':\r
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r
- PFETCH(c);\r
- if (c == '<' || c == '\'') {\r
- int gnum;\r
- UChar* name_end;\r
-\r
- prev = p;\r
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);\r
- if (r < 0) return r;\r
-\r
- tok->type = TK_CALL;\r
- tok->u.call.name = prev;\r
- tok->u.call.name_end = name_end;\r
- tok->u.call.gnum = gnum;\r
- }\r
- else\r
- PUNFETCH;\r
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r
+ PFETCH(c);\r
+ if (c == '<' || c == '\'') {\r
+ int gnum;\r
+ UChar* name_end;\r
+ enum REF_NUM num_type;\r
+\r
+ prev = p;\r
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,\r
+ &gnum, &num_type, 1);\r
+ if (r < 0) return r;\r
+\r
+ if (num_type != IS_NOT_NUM) {\r
+ if (num_type == IS_REL_NUM) {\r
+ gnum = backref_rel_to_abs(gnum, env);\r
+ if (gnum < 0) {\r
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
+ prev, name_end);\r
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
+ }\r
+ }\r
+ tok->u.call.by_number = 1;\r
+ tok->u.call.gnum = gnum;\r
+ }\r
+ else {\r
+ tok->u.call.by_number = 0;\r
+ tok->u.call.gnum = 0;\r
+ }\r
+\r
+ tok->type = TK_CALL;\r
+ tok->u.call.name = prev;\r
+ tok->u.call.name_end = name_end;\r
+ }\r
+ else\r
+ PUNFETCH;\r
}\r
break;\r
#endif\r
\r
case 'Q':\r
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r
- tok->type = TK_QUOTE_OPEN;\r
+ tok->type = TK_QUOTE_OPEN;\r
}\r
break;\r
\r
case 'p':\r
case 'P':\r
- if (PPEEK_IS('{') &&\r
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
- PINC;\r
- tok->type = TK_CHAR_PROPERTY;\r
- tok->u.prop.not = (c == 'P' ? 1 : 0);\r
-\r
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
- PFETCH(c);\r
- if (c == '^') {\r
- tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
- }\r
- else\r
- PUNFETCH;\r
- }\r
+ if (!PEND && PPEEK_IS('{') &&\r
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
+ PINC;\r
+ tok->type = TK_CHAR_PROPERTY;\r
+ tok->u.prop.not = (c == 'P' ? 1 : 0);\r
+\r
+ if (!PEND &&\r
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
+ PFETCH(c);\r
+ if (c == '^') {\r
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
+ }\r
+ else\r
+ PUNFETCH;\r
+ }\r
}\r
break;\r
\r
default:\r
- PUNFETCH;\r
- num = fetch_escaped_value(&p, end, env);\r
- if (num < 0) return num;\r
- /* set_raw: */\r
- if (tok->u.c != num) {\r
- tok->type = TK_CODE_POINT;\r
- tok->u.code = (OnigCodePoint )num;\r
- }\r
- else { /* string */\r
- p = tok->backp + enclen(enc, tok->backp);\r
+ {\r
+ OnigCodePoint c2;\r
+\r
+ PUNFETCH;\r
+ num = fetch_escaped_value(&p, end, env, &c2);\r
+ if (num < 0) return num;\r
+ /* set_raw: */\r
+ if (tok->u.c != c2) {\r
+ tok->type = TK_CODE_POINT;\r
+ tok->u.code = c2;\r
+ }\r
+ else { /* string */\r
+ p = tok->backp + enclen(enc, tok->backp);\r
+ }\r
}\r
break;\r
}\r
\r
#ifdef USE_VARIABLE_META_CHARS\r
if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r
- IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r
if (c == MC_ANYCHAR(syn))\r
- goto any_char;\r
+ goto any_char;\r
else if (c == MC_ANYTIME(syn))\r
- goto anytime;\r
+ goto anytime;\r
else if (c == MC_ZERO_OR_ONE_TIME(syn))\r
- goto zero_or_one_time;\r
+ goto zero_or_one_time;\r
else if (c == MC_ONE_OR_MORE_TIME(syn))\r
- goto one_or_more_time;\r
+ goto one_or_more_time;\r
else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r
- tok->type = TK_ANYCHAR_ANYTIME;\r
- goto out;\r
+ tok->type = TK_ANYCHAR_ANYTIME;\r
+ goto out;\r
}\r
}\r
#endif\r
if (r < 0) return r; /* error */\r
if (r == 0) goto greedy_check;\r
else if (r == 2) { /* {n} */\r
- if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
- goto possessive_check;\r
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
+ goto possessive_check;\r
\r
- goto greedy_check;\r
+ goto greedy_check;\r
}\r
/* r == 1 : normal char */\r
break;\r
break;\r
\r
case '(':\r
- if (PPEEK_IS('?') &&\r
+ if (!PEND && PPEEK_IS('?') &&\r
IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
PINC;\r
- if (PPEEK_IS('#')) {\r
- PFETCH(c);\r
- while (1) {\r
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ if (! PEND) {\r
+ c = PPEEK;\r
+ if (c == '#') {\r
PFETCH(c);\r
- if (c == MC_ESC(syn)) {\r
- if (!PEND) PFETCH(c);\r
+ while (1) {\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH(c);\r
+ if (c == MC_ESC(syn)) {\r
+ if (! PEND) PFETCH(c);\r
+ }\r
+ else {\r
+ if (c == ')') break;\r
+ }\r
}\r
- else {\r
- if (c == ')') break;\r
+ goto start;\r
+ }\r
+ else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {\r
+ int gnum;\r
+ UChar* name;\r
+ UChar* name_end;\r
+ enum REF_NUM num_type;\r
+\r
+ switch (c) {\r
+ case '&':\r
+ {\r
+ PINC;\r
+ name = p;\r
+ r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,\r
+ &num_type, 0);\r
+ if (r < 0) return r;\r
+\r
+ tok->type = TK_CALL;\r
+ tok->u.call.by_number = 0;\r
+ tok->u.call.gnum = 0;\r
+ tok->u.call.name = name;\r
+ tok->u.call.name_end = name_end;\r
+ }\r
+ break;\r
+\r
+ case 'R':\r
+ tok->type = TK_CALL;\r
+ tok->u.call.by_number = 1;\r
+ tok->u.call.gnum = 0;\r
+ tok->u.call.name = p;\r
+ PINC;\r
+ if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;\r
+ tok->u.call.name_end = p;\r
+ break;\r
+\r
+ case '-':\r
+ case '+':\r
+ goto lparen_qmark_num;\r
+ break;\r
+ default:\r
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;\r
+\r
+ lparen_qmark_num:\r
+ {\r
+ name = p;\r
+ r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,\r
+ &gnum, &num_type, 1);\r
+ if (r < 0) return r;\r
+\r
+ if (num_type == IS_NOT_NUM) {\r
+ return ONIGERR_INVALID_GROUP_NAME;\r
+ }\r
+ else {\r
+ if (num_type == IS_REL_NUM) {\r
+ gnum = backref_rel_to_abs(gnum, env);\r
+ if (gnum < 0) {\r
+ onig_scan_env_set_error_string(env,\r
+ ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);\r
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
+ }\r
+ }\r
+ tok->u.call.by_number = 1;\r
+ tok->u.call.gnum = gnum;\r
+ }\r
+\r
+ tok->type = TK_CALL;\r
+ tok->u.call.name = name;\r
+ tok->u.call.name_end = name_end;\r
+ }\r
+ break;\r
}\r
}\r
- goto start;\r
}\r
+ lparen_qmark_end:\r
PUNFETCH;\r
}\r
\r
case '^':\r
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
tok->type = TK_ANCHOR;\r
- tok->u.subtype = (IS_SINGLELINE(env->option)\r
- ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r
+ tok->u.subtype = (IS_SINGLELINE(env->options)\r
+ ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r
break;\r
\r
case '$':\r
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
tok->type = TK_ANCHOR;\r
- tok->u.subtype = (IS_SINGLELINE(env->option)\r
- ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r
+ tok->u.subtype = (IS_SINGLELINE(env->options)\r
+ ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r
break;\r
\r
case '[':\r
\r
case ']':\r
if (*src > env->pattern) /* /].../ is allowed. */\r
- CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r
+ CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r
break;\r
\r
case '#':\r
- if (IS_EXTEND(env->option)) {\r
- while (!PEND) {\r
- PFETCH(c);\r
- if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r
- break;\r
- }\r
- goto start;\r
- break;\r
+ if (IS_EXTEND(env->options)) {\r
+ while (!PEND) {\r
+ PFETCH(c);\r
+ if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r
+ break;\r
+ }\r
+ goto start;\r
+ break;\r
}\r
break;\r
\r
case ' ': case '\t': case '\n': case '\r': case '\f':\r
- if (IS_EXTEND(env->option))\r
- goto start;\r
+ if (IS_EXTEND(env->options))\r
+ goto start;\r
break;\r
\r
default:\r
\r
static int\r
add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
- OnigEncoding enc ARG_UNUSED,\r
- OnigCodePoint sb_out, const OnigCodePoint mbr[])\r
+ OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,\r
+ const OnigCodePoint mbr[])\r
{\r
int i, r;\r
OnigCodePoint j;\r
for (i = 0; i < n; i++) {\r
for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
- if (j >= sb_out) {\r
- if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;\r
- else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
- r = add_code_range_to_buf(&(cc->mbuf), j,\r
- ONIGENC_CODE_RANGE_TO(mbr, i));\r
- if (r != 0) return r;\r
- i++;\r
- }\r
-\r
- goto sb_end;\r
- }\r
+ if (j >= sb_out) {\r
+ if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
+ r = add_code_range_to_buf(&(cc->mbuf), j,\r
+ ONIGENC_CODE_RANGE_TO(mbr, i));\r
+ if (r != 0) return r;\r
+ i++;\r
+ }\r
+\r
+ goto sb_end;\r
+ }\r
BITSET_SET_BIT(cc->bs, j);\r
}\r
}\r
OnigCodePoint prev = 0;\r
\r
for (i = 0; i < n; i++) {\r
- for (j = prev;\r
- j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r
- if (j >= sb_out) {\r
- goto sb_end2;\r
- }\r
- BITSET_SET_BIT(cc->bs, j);\r
+ for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r
+ if (j >= sb_out) {\r
+ goto sb_end2;\r
+ }\r
+ BITSET_SET_BIT(cc->bs, j);\r
}\r
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
}\r
\r
for (i = 0; i < n; i++) {\r
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
- r = add_code_range_to_buf(&(cc->mbuf), prev,\r
+ r = add_code_range_to_buf(&(cc->mbuf), prev,\r
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r
- if (r != 0) return r;\r
+ if (r != 0) return r;\r
}\r
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
+ if (prev == 0) goto end;\r
+ }\r
+\r
+ r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
+ if (r != 0) return r;\r
+ }\r
+\r
+ end:\r
+ return 0;\r
+}\r
+\r
+static int\r
+add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
+ OnigEncoding enc ARG_UNUSED,\r
+ OnigCodePoint sb_out,\r
+ const OnigCodePoint mbr[], OnigCodePoint limit)\r
+{\r
+ int i, r;\r
+ OnigCodePoint j;\r
+ OnigCodePoint from;\r
+ OnigCodePoint to;\r
+\r
+ int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
+\r
+ if (not == 0) {\r
+ for (i = 0; i < n; i++) {\r
+ for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
+ j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
+ if (j > limit) goto end;\r
+ if (j >= sb_out) {\r
+ if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
+ to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
+ if (to > limit) to = limit;\r
+ r = add_code_range_to_buf(&(cc->mbuf), j, to);\r
+ if (r != 0) return r;\r
+ i++;\r
+ }\r
+\r
+ goto sb_end;\r
+ }\r
+ BITSET_SET_BIT(cc->bs, j);\r
+ }\r
}\r
- if (prev < 0x7fffffff) {\r
- r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);\r
+\r
+ sb_end:\r
+ for ( ; i < n; i++) {\r
+ from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
+ to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
+ if (from > limit) break;\r
+ if (to > limit) to = limit;\r
+ r = add_code_range_to_buf(&(cc->mbuf), from, to);\r
if (r != 0) return r;\r
}\r
}\r
+ else {\r
+ OnigCodePoint prev = 0;\r
+\r
+ for (i = 0; i < n; i++) {\r
+ from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
+ if (from > limit) {\r
+ for (j = prev; j < sb_out; j++) {\r
+ BITSET_SET_BIT(cc->bs, j);\r
+ }\r
+ goto sb_end2;\r
+ }\r
+ for (j = prev; j < from; j++) {\r
+ if (j >= sb_out) goto sb_end2;\r
+ BITSET_SET_BIT(cc->bs, j);\r
+ }\r
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
+ if (prev > limit) prev = limit;\r
+ prev++;\r
+ if (prev == 0) goto end;\r
+ }\r
+ for (j = prev; j < sb_out; j++) {\r
+ BITSET_SET_BIT(cc->bs, j);\r
+ }\r
+\r
+ sb_end2:\r
+ prev = sb_out;\r
+\r
+ for (i = 0; i < n; i++) {\r
+ from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
+ if (from > limit) goto last;\r
+\r
+ if (prev < from) {\r
+ r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);\r
+ if (r != 0) return r;\r
+ }\r
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
+ if (prev > limit) prev = limit;\r
+ prev++;\r
+ if (prev == 0) goto end;\r
+ }\r
+\r
+ last:\r
+ r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
+ if (r != 0) return r;\r
+ }\r
\r
+ end:\r
return 0;\r
}\r
\r
static int\r
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r
{\r
+#define ASCII_LIMIT 127\r
+\r
int c, r;\r
+ int ascii_mode;\r
const OnigCodePoint *ranges;\r
+ OnigCodePoint limit;\r
OnigCodePoint sb_out;\r
OnigEncoding enc = env->enc;\r
\r
+ ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);\r
+\r
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r
if (r == 0) {\r
- return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r
+ if (ascii_mode == 0)\r
+ r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r
+ else\r
+ r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,\r
+ ranges, ASCII_LIMIT);\r
+ return r;\r
}\r
else if (r != ONIG_NO_SUPPORT_CONFIG) {\r
return r;\r
}\r
\r
r = 0;\r
+ limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;\r
+\r
switch (ctype) {\r
case ONIGENC_CTYPE_ALPHA:\r
case ONIGENC_CTYPE_BLANK:\r
case ONIGENC_CTYPE_ASCII:\r
case ONIGENC_CTYPE_ALNUM:\r
if (not != 0) {\r
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
- BITSET_SET_BIT(cc->bs, c);\r
+ for (c = 0; c < (int )limit; c++) {\r
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
+ BITSET_SET_BIT(cc->bs, c);\r
}\r
+ for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
+ BITSET_SET_BIT(cc->bs, c);\r
+ }\r
+\r
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
}\r
else {\r
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
- BITSET_SET_BIT(cc->bs, c);\r
+ for (c = 0; c < (int )limit; c++) {\r
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
+ BITSET_SET_BIT(cc->bs, c);\r
}\r
}\r
break;\r
\r
case ONIGENC_CTYPE_GRAPH:\r
case ONIGENC_CTYPE_PRINT:\r
+ case ONIGENC_CTYPE_WORD:\r
if (not != 0) {\r
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
- BITSET_SET_BIT(cc->bs, c);\r
- }\r
- }\r
- else {\r
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
- BITSET_SET_BIT(cc->bs, c);\r
+ for (c = 0; c < (int )limit; c++) {\r
+ if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */\r
+ && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
+ BITSET_SET_BIT(cc->bs, c);\r
}\r
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
- }\r
- break;\r
-\r
- case ONIGENC_CTYPE_WORD:\r
- if (not == 0) {\r
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
- if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);\r
+ for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
+ if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)\r
+ BITSET_SET_BIT(cc->bs, c);\r
}\r
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
}\r
else {\r
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
- if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */\r
- && ! ONIGENC_IS_CODE_WORD(enc, c))\r
- BITSET_SET_BIT(cc->bs, c);\r
+ for (c = 0; c < (int )limit; c++) {\r
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
+ BITSET_SET_BIT(cc->bs, c);\r
}\r
+ if (ascii_mode == 0)\r
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
}\r
break;\r
\r
}\r
\r
static int\r
-parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,\r
- ScanEnv* env)\r
+parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
{\r
int r, ctype;\r
CClassNode* cc;\r
\r
*np = node_new_cclass();\r
CHECK_NULL_RETURN_MEMERR(*np);\r
- cc = NCCLASS(*np);\r
+ cc = CCLASS_(*np);\r
r = add_ctype_to_cc(cc, ctype, 0, env);\r
if (r != 0) return r;\r
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
\r
static int\r
next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r
- enum CCSTATE* state, ScanEnv* env)\r
+ enum CCSTATE* state, ScanEnv* env)\r
{\r
int r;\r
\r
}\r
\r
static int\r
-next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,\r
- int* vs_israw, int v_israw,\r
- enum CCVALTYPE intype, enum CCVALTYPE* type,\r
- enum CCSTATE* state, ScanEnv* env)\r
+next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,\r
+ int* from_israw, int to_israw,\r
+ enum CCVALTYPE intype, enum CCVALTYPE* type,\r
+ enum CCSTATE* state, ScanEnv* env)\r
{\r
int r;\r
\r
switch (*state) {\r
case CCS_VALUE:\r
- if (*type == CCV_SB)\r
- BITSET_SET_BIT(cc->bs, (int )(*vs));\r
+ if (*type == CCV_SB) {\r
+ if (*from > 0xff)\r
+ return ONIGERR_INVALID_CODE_POINT_VALUE;\r
+\r
+ BITSET_SET_BIT(cc->bs, (int )(*from));\r
+ }\r
else if (*type == CCV_CODE_POINT) {\r
- r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
+ r = add_code_range(&(cc->mbuf), env, *from, *from);\r
if (r < 0) return r;\r
}\r
break;\r
case CCS_RANGE:\r
if (intype == *type) {\r
if (intype == CCV_SB) {\r
- if (*vs > 0xff || v > 0xff)\r
+ if (*from > 0xff || to > 0xff)\r
return ONIGERR_INVALID_CODE_POINT_VALUE;\r
\r
- if (*vs > v) {\r
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
- goto ccs_range_end;\r
- else\r
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
- }\r
- bitset_set_range(cc->bs, (int )*vs, (int )v);\r
+ if (*from > to) {\r
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
+ goto ccs_range_end;\r
+ else\r
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
+ }\r
+ bitset_set_range(cc->bs, (int )*from, (int )to);\r
}\r
else {\r
- r = add_code_range(&(cc->mbuf), env, *vs, v);\r
- if (r < 0) return r;\r
+ r = add_code_range(&(cc->mbuf), env, *from, to);\r
+ if (r < 0) return r;\r
}\r
}\r
else {\r
-#if 0\r
- if (intype == CCV_CODE_POINT && *type == CCV_SB) {\r
-#endif\r
- if (*vs > v) {\r
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
- goto ccs_range_end;\r
- else\r
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
- }\r
- bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));\r
- r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);\r
- if (r < 0) return r;\r
-#if 0\r
+ if (*from > to) {\r
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
+ goto ccs_range_end;\r
+ else\r
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
}\r
- else\r
- return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;\r
-#endif\r
+ bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));\r
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);\r
+ if (r < 0) return r;\r
}\r
ccs_range_end:\r
*state = CCS_COMPLETE;\r
break;\r
}\r
\r
- *vs_israw = v_israw;\r
- *vs = v;\r
- *type = intype;\r
+ *from_israw = to_israw;\r
+ *from = to;\r
+ *type = intype;\r
return 0;\r
}\r
\r
static int\r
code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r
- ScanEnv* env)\r
+ ScanEnv* env)\r
{\r
int in_esc;\r
OnigCodePoint code;\r
}\r
\r
static int\r
-parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,\r
- ScanEnv* env)\r
+parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
{\r
int r, neg, len, fetched, and_start;\r
OnigCodePoint v, vs;\r
enum CCVALTYPE val_type, in_type;\r
int val_israw, in_israw;\r
\r
- prev_cc = (CClassNode* )NULL;\r
*np = NULL_NODE;\r
+ env->parse_depth++;\r
+ if (env->parse_depth > ParseDepthLimit)\r
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
+ prev_cc = (CClassNode* )NULL;\r
r = fetch_token_in_cc(tok, src, end, env);\r
if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r
neg = 1;\r
\r
*np = node = node_new_cclass();\r
CHECK_NULL_RETURN_MEMERR(node);\r
- cc = NCCLASS(node);\r
+ cc = CCLASS_(node);\r
\r
and_start = 0;\r
state = CCS_START;\r
fetched = 0;\r
switch (r) {\r
case TK_CHAR:\r
+ any_char_in:\r
len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r
if (len > 1) {\r
- in_type = CCV_CODE_POINT;\r
+ in_type = CCV_CODE_POINT;\r
}\r
else if (len < 0) {\r
- r = len;\r
- goto err;\r
+ r = len;\r
+ goto err;\r
}\r
else {\r
- sb_char:\r
- in_type = CCV_SB;\r
+ /* sb_char: */\r
+ in_type = CCV_SB;\r
}\r
v = (OnigCodePoint )tok->u.c;\r
in_israw = 0;\r
case TK_RAW_BYTE:\r
/* tok->base != 0 : octal or hexadec. */\r
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
- UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r
- UChar* psave = p;\r
- int i, base = tok->base;\r
-\r
- buf[0] = (UChar)tok->u.c;\r
- for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r
- r = fetch_token_in_cc(tok, &p, end, env);\r
- if (r < 0) goto err;\r
- if (r != TK_RAW_BYTE || tok->base != base) {\r
- fetched = 1;\r
- break;\r
- }\r
- buf[i] = (UChar)tok->u.c;\r
- }\r
-\r
- if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r
- r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
- goto err;\r
- }\r
-\r
- len = enclen(env->enc, buf);\r
- if (i < len) {\r
- r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
- goto err;\r
- }\r
- else if (i > len) { /* fetch back */\r
- p = psave;\r
- for (i = 1; i < len; i++) {\r
- r = fetch_token_in_cc(tok, &p, end, env);\r
- }\r
- fetched = 0;\r
- }\r
-\r
- if (i == 1) {\r
- v = (OnigCodePoint )buf[0];\r
- goto raw_single;\r
- }\r
- else {\r
- v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r
- in_type = CCV_CODE_POINT;\r
- }\r
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
+ UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r
+ UChar* psave = p;\r
+ int i, base = tok->base;\r
+\r
+ buf[0] = tok->u.c;\r
+ for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r
+ r = fetch_token_in_cc(tok, &p, end, env);\r
+ if (r < 0) goto err;\r
+ if (r != TK_RAW_BYTE || tok->base != base) {\r
+ fetched = 1;\r
+ break;\r
+ }\r
+ buf[i] = tok->u.c;\r
+ }\r
+\r
+ if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
+ goto err;\r
+ }\r
+\r
+ len = enclen(env->enc, buf);\r
+ if (i < len) {\r
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
+ goto err;\r
+ }\r
+ else if (i > len) { /* fetch back */\r
+ p = psave;\r
+ for (i = 1; i < len; i++) {\r
+ r = fetch_token_in_cc(tok, &p, end, env);\r
+ }\r
+ fetched = 0;\r
+ }\r
+\r
+ if (i == 1) {\r
+ v = (OnigCodePoint )buf[0];\r
+ goto raw_single;\r
+ }\r
+ else {\r
+ v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r
+ in_type = CCV_CODE_POINT;\r
+ }\r
+ }\r
+ else {\r
+ v = (OnigCodePoint )tok->u.c;\r
+ raw_single:\r
+ in_type = CCV_SB;\r
+ }\r
+ in_israw = 1;\r
+ goto val_entry2;\r
+ break;\r
+\r
+ case TK_CODE_POINT:\r
+ v = tok->u.code;\r
+ in_israw = 1;\r
+ val_entry:\r
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r
+ if (len < 0) {\r
+ r = len;\r
+ goto err;\r
+ }\r
+ in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r
+ val_entry2:\r
+ r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r
+ &state, env);\r
+ if (r != 0) goto err;\r
+ break;\r
+\r
+ case TK_POSIX_BRACKET_OPEN:\r
+ r = parse_posix_bracket(cc, &p, end, env);\r
+ if (r < 0) goto err;\r
+ if (r == 1) { /* is not POSIX bracket */\r
+ CC_ESC_WARN(env, (UChar* )"[");\r
+ p = tok->backp;\r
+ v = (OnigCodePoint )tok->u.c;\r
+ in_israw = 0;\r
+ goto val_entry;\r
+ }\r
+ goto next_class;\r
+ break;\r
+\r
+ case TK_CHAR_TYPE:\r
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r
+ if (r != 0) goto err;\r
+\r
+ next_class:\r
+ r = next_state_class(cc, &vs, &val_type, &state, env);\r
+ if (r != 0) goto err;\r
+ break;\r
+\r
+ case TK_CHAR_PROPERTY:\r
+ {\r
+ int ctype = fetch_char_property_to_ctype(&p, end, env);\r
+ if (ctype < 0) {\r
+ r = ctype;\r
+ goto err;\r
+ }\r
+ r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r
+ if (r != 0) goto err;\r
+ goto next_class;\r
+ }\r
+ break;\r
+\r
+ case TK_CC_RANGE:\r
+ if (state == CCS_VALUE) {\r
+ r = fetch_token_in_cc(tok, &p, end, env);\r
+ if (r < 0) goto err;\r
+ fetched = 1;\r
+ if (r == TK_CC_CLOSE) { /* allow [x-] */\r
+ range_end_val:\r
+ v = (OnigCodePoint )'-';\r
+ in_israw = 0;\r
+ goto val_entry;\r
+ }\r
+ else if (r == TK_CC_AND) {\r
+ CC_ESC_WARN(env, (UChar* )"-");\r
+ goto range_end_val;\r
+ }\r
+\r
+ if (val_type == CCV_CLASS) {\r
+ r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
+ goto err;\r
+ }\r
+\r
+ state = CCS_RANGE;\r
+ }\r
+ else if (state == CCS_START) {\r
+ /* [-xa] is allowed */\r
+ v = (OnigCodePoint )tok->u.c;\r
+ in_israw = 0;\r
+\r
+ r = fetch_token_in_cc(tok, &p, end, env);\r
+ if (r < 0) goto err;\r
+ fetched = 1;\r
+ /* [--x] or [a&&-x] is warned. */\r
+ if (r == TK_CC_RANGE || and_start != 0)\r
+ CC_ESC_WARN(env, (UChar* )"-");\r
+\r
+ goto val_entry;\r
+ }\r
+ else if (state == CCS_RANGE) {\r
+ CC_ESC_WARN(env, (UChar* )"-");\r
+ goto any_char_in; /* [!--x] is allowed */\r
+ }\r
+ else { /* CCS_COMPLETE */\r
+ r = fetch_token_in_cc(tok, &p, end, env);\r
+ if (r < 0) goto err;\r
+ fetched = 1;\r
+ if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r
+ else if (r == TK_CC_AND) {\r
+ CC_ESC_WARN(env, (UChar* )"-");\r
+ goto range_end_val;\r
+ }\r
+\r
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r
+ CC_ESC_WARN(env, (UChar* )"-");\r
+ goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */\r
+ }\r
+ r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
+ goto err;\r
+ }\r
+ break;\r
+\r
+ case TK_CC_CC_OPEN: /* [ */\r
+ {\r
+ Node *anode;\r
+ CClassNode* acc;\r
+\r
+ r = parse_char_class(&anode, tok, &p, end, env);\r
+ if (r != 0) {\r
+ onig_node_free(anode);\r
+ goto cc_open_err;\r
+ }\r
+ acc = CCLASS_(anode);\r
+ r = or_cclass(cc, acc, env->enc);\r
+ onig_node_free(anode);\r
+\r
+ cc_open_err:\r
+ if (r != 0) goto err;\r
+ }\r
+ break;\r
+\r
+ case TK_CC_AND: /* && */\r
+ {\r
+ if (state == CCS_VALUE) {\r
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
+ &val_type, &state, env);\r
+ if (r != 0) goto err;\r
+ }\r
+ /* initialize local variables */\r
+ and_start = 1;\r
+ state = CCS_START;\r
+\r
+ if (IS_NOT_NULL(prev_cc)) {\r
+ r = and_cclass(prev_cc, cc, env->enc);\r
+ if (r != 0) goto err;\r
+ bbuf_free(cc->mbuf);\r
+ }\r
+ else {\r
+ prev_cc = cc;\r
+ cc = &work_cc;\r
+ }\r
+ initialize_cclass(cc);\r
+ }\r
+ break;\r
+\r
+ case TK_EOT:\r
+ r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r
+ goto err;\r
+ break;\r
+ default:\r
+ r = ONIGERR_PARSER_BUG;\r
+ goto err;\r
+ break;\r
+ }\r
+\r
+ if (fetched)\r
+ r = tok->type;\r
+ else {\r
+ r = fetch_token_in_cc(tok, &p, end, env);\r
+ if (r < 0) goto err;\r
+ }\r
+ }\r
+\r
+ if (state == CCS_VALUE) {\r
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
+ &val_type, &state, env);\r
+ if (r != 0) goto err;\r
+ }\r
+\r
+ if (IS_NOT_NULL(prev_cc)) {\r
+ r = and_cclass(prev_cc, cc, env->enc);\r
+ if (r != 0) goto err;\r
+ bbuf_free(cc->mbuf);\r
+ cc = prev_cc;\r
+ }\r
+\r
+ if (neg != 0)\r
+ NCCLASS_SET_NOT(cc);\r
+ else\r
+ NCCLASS_CLEAR_NOT(cc);\r
+ if (IS_NCCLASS_NOT(cc) &&\r
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r
+ int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r
+ if (is_empty != 0)\r
+ BITSET_IS_EMPTY(cc->bs, is_empty);\r
+\r
+ if (is_empty == 0) {\r
+#define NEWLINE_CODE 0x0a\r
+\r
+ if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r
+ if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r
+ BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r
+ else\r
+ add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r
+ }\r
+ }\r
+ }\r
+ *src = p;\r
+ env->parse_depth--;\r
+ return 0;\r
+\r
+ err:\r
+ if (cc != CCLASS_(*np))\r
+ bbuf_free(cc->mbuf);\r
+ return r;\r
+}\r
+\r
+static int parse_subexp(Node** top, OnigToken* tok, int term,\r
+ UChar** src, UChar* end, ScanEnv* env);\r
+\r
+#ifdef USE_CALLOUT\r
+\r
+/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */\r
+static int\r
+parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
+{\r
+ int r;\r
+ int i;\r
+ int in;\r
+ int num;\r
+ OnigCodePoint c;\r
+ UChar* code_start;\r
+ UChar* code_end;\r
+ UChar* contents;\r
+ UChar* tag_start;\r
+ UChar* tag_end;\r
+ int brace_nest;\r
+ CalloutListEntry* e;\r
+ RegexExt* ext;\r
+ OnigEncoding enc = env->enc;\r
+ UChar* p = *src;\r
+\r
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+\r
+ brace_nest = 0;\r
+ while (PPEEK_IS('{')) {\r
+ brace_nest++;\r
+ PINC_S;\r
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+ }\r
+\r
+ in = ONIG_CALLOUT_IN_PROGRESS;\r
+ code_start = p;\r
+ while (1) {\r
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+\r
+ code_end = p;\r
+ PFETCH_S(c);\r
+ if (c == '}') {\r
+ i = brace_nest;\r
+ while (i > 0) {\r
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+ PFETCH_S(c);\r
+ if (c == '}') i--;\r
+ else break;\r
+ }\r
+ if (i == 0) break;\r
+ }\r
+ }\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+\r
+ PFETCH_S(c);\r
+ if (c == '[') {\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ tag_start = p;\r
+ while (! PEND) {\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ tag_end = p;\r
+ PFETCH_S(c);\r
+ if (c == ']') break;\r
+ }\r
+ if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH_S(c);\r
+ }\r
+ else {\r
+ tag_start = tag_end = 0;\r
+ }\r
+\r
+ if (c == 'X') {\r
+ in |= ONIG_CALLOUT_IN_RETRACTION;\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH_S(c);\r
+ }\r
+ else if (c == '<') {\r
+ in = ONIG_CALLOUT_IN_RETRACTION;\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH_S(c);\r
+ }\r
+ else if (c == '>') { /* no needs (default) */\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH_S(c);\r
+ }\r
+\r
+ if (c != cterm)\r
+ return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+\r
+ r = reg_callout_list_entry(env, &num);\r
+ if (r != 0) return r;\r
+\r
+ ext = onig_get_regex_ext(env->reg);\r
+ if (IS_NULL(ext->pattern)) {\r
+ r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
+ if (r != ONIG_NORMAL) return r;\r
+ }\r
+\r
+ if (tag_start != tag_end) {\r
+ r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
+ if (r != ONIG_NORMAL) return r;\r
+ }\r
+\r
+ contents = onigenc_strdup(enc, code_start, code_end);\r
+ CHECK_NULL_RETURN_MEMERR(contents);\r
+\r
+ r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);\r
+ if (r != 0) {\r
+ xfree(contents);\r
+ return r;\r
+ }\r
+\r
+ e = onig_reg_callout_list_at(env->reg, num);\r
+ e->of = ONIG_CALLOUT_OF_CONTENTS;\r
+ e->in = in;\r
+ e->name_id = ONIG_NON_NAME_ID;\r
+ e->u.content.start = contents;\r
+ e->u.content.end = contents + (code_end - code_start);\r
+\r
+ *src = p;\r
+ return 0;\r
+}\r
+\r
+static long\r
+parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)\r
+{\r
+ long v;\r
+ long d;\r
+ int flag;\r
+ UChar* p;\r
+ OnigCodePoint c;\r
+\r
+ if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;\r
+\r
+ flag = 1;\r
+ v = 0;\r
+ p = s;\r
+ while (p < end) {\r
+ c = ONIGENC_MBC_TO_CODE(enc, p, end);\r
+ p += ONIGENC_MBC_ENC_LEN(enc, p);\r
+ if (c >= '0' && c <= '9') {\r
+ d = (long )(c - '0');\r
+ if (v > (max - d) / 10)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
+\r
+ v = v * 10 + d;\r
+ }\r
+ else if (sign_on != 0 && (c == '-' || c == '+')) {\r
+ if (c == '-') flag = -1;\r
+ }\r
+ else\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
+\r
+ sign_on = 0;\r
+ }\r
+\r
+ *rl = flag * v;\r
+ return ONIG_NORMAL;\r
+}\r
+\r
+static int\r
+parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,\r
+ unsigned int types[], OnigValue vals[], ScanEnv* env)\r
+{\r
+#define MAX_CALLOUT_ARG_BYTE_LENGTH 128\r
+\r
+ int r;\r
+ int n;\r
+ int esc;\r
+ int cn;\r
+ UChar* s;\r
+ UChar* e;\r
+ UChar* eesc;\r
+ OnigCodePoint c;\r
+ UChar* bufend;\r
+ UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];\r
+ OnigEncoding enc = env->enc;\r
+ UChar* p = *src;\r
+\r
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+\r
+ n = 0;\r
+ while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {\r
+ c = 0;\r
+ cn = 0;\r
+ esc = 0;\r
+ eesc = 0;\r
+ bufend = buf;\r
+ s = e = p;\r
+ while (1) {\r
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+\r
+ e = p;\r
+ PFETCH_S(c);\r
+ if (esc != 0) {\r
+ esc = 0;\r
+ if (c == '\\' || c == cterm || c == ',') {\r
+ /* */\r
+ }\r
+ else {\r
+ e = eesc;\r
+ cn++;\r
+ }\r
+ goto add_char;\r
}\r
else {\r
- v = (OnigCodePoint )tok->u.c;\r
- raw_single:\r
- in_type = CCV_SB;\r
- }\r
- in_israw = 1;\r
- goto val_entry2;\r
- break;\r
+ if (c == '\\') {\r
+ esc = 1;\r
+ eesc = e;\r
+ }\r
+ else if (c == cterm || c == ',')\r
+ break;\r
+ else {\r
+ size_t clen;\r
\r
- case TK_CODE_POINT:\r
- v = tok->u.code;\r
- in_israw = 1;\r
- val_entry:\r
- len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r
- if (len < 0) {\r
- r = len;\r
- goto err;\r
- }\r
- in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r
- val_entry2:\r
- r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r
- &state, env);\r
- if (r != 0) goto err;\r
- break;\r
+ add_char:\r
+ if (skip_mode == 0) {\r
+ clen = p - e;\r
+ if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)\r
+ return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */\r
\r
- case TK_POSIX_BRACKET_OPEN:\r
- r = parse_posix_bracket(cc, &p, end, env);\r
- if (r < 0) goto err;\r
- if (r == 1) { /* is not POSIX bracket */\r
- CC_ESC_WARN(env, (UChar* )"[");\r
- p = tok->backp;\r
- v = (OnigCodePoint )tok->u.c;\r
- in_israw = 0;\r
- goto val_entry;\r
+ xmemcpy(bufend, e, clen);\r
+ bufend += clen;\r
+ }\r
+ cn++;\r
+ }\r
}\r
- goto next_class;\r
- break;\r
+ }\r
\r
- case TK_CHAR_TYPE:\r
- r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r
- if (r != 0) return r;\r
+ if (cn != 0) {\r
+ if (skip_mode == 0) {\r
+ if ((types[n] & ONIG_TYPE_LONG) != 0) {\r
+ int fixed = 0;\r
+ if (cn > 0) {\r
+ long rl;\r
+ r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);\r
+ if (r == ONIG_NORMAL) {\r
+ vals[n].l = rl;\r
+ fixed = 1;\r
+ types[n] = ONIG_TYPE_LONG;\r
+ }\r
+ }\r
\r
- next_class:\r
- r = next_state_class(cc, &vs, &val_type, &state, env);\r
- if (r != 0) goto err;\r
- break;\r
+ if (fixed == 0) {\r
+ types[n] = (types[n] & ~ONIG_TYPE_LONG);\r
+ if (types[n] == ONIG_TYPE_VOID)\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
+ }\r
+ }\r
\r
- case TK_CHAR_PROPERTY:\r
- {\r
- int ctype;\r
+ switch (types[n]) {\r
+ case ONIG_TYPE_LONG:\r
+ break;\r
\r
- ctype = fetch_char_property_to_ctype(&p, end, env);\r
- if (ctype < 0) return ctype;\r
- r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r
- if (r != 0) return r;\r
- goto next_class;\r
- }\r
- break;\r
+ case ONIG_TYPE_CHAR:\r
+ if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;\r
+ vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);\r
+ break;\r
\r
- case TK_CC_RANGE:\r
- if (state == CCS_VALUE) {\r
- r = fetch_token_in_cc(tok, &p, end, env);\r
- if (r < 0) goto err;\r
- fetched = 1;\r
- if (r == TK_CC_CLOSE) { /* allow [x-] */\r
- range_end_val:\r
- v = (OnigCodePoint )'-';\r
- in_israw = 0;\r
- goto val_entry;\r
- }\r
- else if (r == TK_CC_AND) {\r
- CC_ESC_WARN(env, (UChar* )"-");\r
- goto range_end_val;\r
- }\r
- state = CCS_RANGE;\r
- }\r
- else if (state == CCS_START) {\r
- /* [-xa] is allowed */\r
- v = (OnigCodePoint )tok->u.c;\r
- in_israw = 0;\r
+ case ONIG_TYPE_STRING:\r
+ {\r
+ UChar* rs = onigenc_strdup(enc, buf, bufend);\r
+ CHECK_NULL_RETURN_MEMERR(rs);\r
+ vals[n].s.start = rs;\r
+ vals[n].s.end = rs + (e - s);\r
+ }\r
+ break;\r
\r
- r = fetch_token_in_cc(tok, &p, end, env);\r
- if (r < 0) goto err;\r
- fetched = 1;\r
- /* [--x] or [a&&-x] is warned. */\r
- if (r == TK_CC_RANGE || and_start != 0)\r
- CC_ESC_WARN(env, (UChar* )"-");\r
+ case ONIG_TYPE_TAG:\r
+ if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))\r
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
\r
- goto val_entry;\r
- }\r
- else if (state == CCS_RANGE) {\r
- CC_ESC_WARN(env, (UChar* )"-");\r
- goto sb_char; /* [!--x] is allowed */\r
- }\r
- else { /* CCS_COMPLETE */\r
- r = fetch_token_in_cc(tok, &p, end, env);\r
- if (r < 0) goto err;\r
- fetched = 1;\r
- if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r
- else if (r == TK_CC_AND) {\r
- CC_ESC_WARN(env, (UChar* )"-");\r
- goto range_end_val;\r
- }\r
- \r
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r
- CC_ESC_WARN(env, (UChar* )"-");\r
- goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */\r
- }\r
- r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
- goto err;\r
+ vals[n].s.start = s;\r
+ vals[n].s.end = e;\r
+ break;\r
+\r
+ case ONIG_TYPE_VOID:\r
+ case ONIG_TYPE_POINTER:\r
+ return ONIGERR_PARSER_BUG;\r
+ break;\r
+ }\r
}\r
- break;\r
\r
- case TK_CC_CC_OPEN: /* [ */\r
- {\r
- Node *anode;\r
- CClassNode* acc;\r
+ n++;\r
+ }\r
\r
- r = parse_char_class(&anode, tok, &p, end, env);\r
- if (r != 0) goto cc_open_err;\r
- acc = NCCLASS(anode);\r
- r = or_cclass(cc, acc, env->enc);\r
+ if (c == cterm) break;\r
+ }\r
\r
- onig_node_free(anode);\r
- cc_open_err:\r
- if (r != 0) goto err;\r
- }\r
- break;\r
+ if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
\r
- case TK_CC_AND: /* && */\r
- {\r
- if (state == CCS_VALUE) {\r
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
- &val_type, &state, env);\r
- if (r != 0) goto err;\r
- }\r
- /* initialize local variables */\r
- and_start = 1;\r
- state = CCS_START;\r
-\r
- if (IS_NOT_NULL(prev_cc)) {\r
- r = and_cclass(prev_cc, cc, env->enc);\r
- if (r != 0) goto err;\r
- bbuf_free(cc->mbuf);\r
- }\r
- else {\r
- prev_cc = cc;\r
- cc = &work_cc;\r
- }\r
- initialize_cclass(cc);\r
- }\r
- break;\r
+ *src = p;\r
+ return n;\r
+}\r
\r
- case TK_EOT:\r
- r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r
- goto err;\r
- break;\r
- default:\r
- r = ONIGERR_PARSER_BUG;\r
- goto err;\r
- break;\r
- }\r
+/* (*name[TAG]) (*name[TAG]{a,b,..}) */\r
+static int\r
+parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
+{\r
+ int r;\r
+ int i;\r
+ int in;\r
+ int num;\r
+ int name_id;\r
+ int arg_num;\r
+ int max_arg_num;\r
+ int opt_arg_num;\r
+ int is_not_single;\r
+ OnigCodePoint c;\r
+ UChar* name_start;\r
+ UChar* name_end;\r
+ UChar* tag_start;\r
+ UChar* tag_end;\r
+ Node* node;\r
+ CalloutListEntry* e;\r
+ RegexExt* ext;\r
+ unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
+ OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];\r
+ OnigEncoding enc = env->enc;\r
+ UChar* p = *src;\r
\r
- if (fetched)\r
- r = tok->type;\r
- else {\r
- r = fetch_token_in_cc(tok, &p, end, env);\r
- if (r < 0) goto err;\r
+ /* PFETCH_READY; */\r
+ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
+\r
+ node = 0;\r
+ name_start = p;\r
+ while (1) {\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ name_end = p;\r
+ PFETCH_S(c);\r
+ if (c == cterm || c == '[' || c == '{') break;\r
+ }\r
+\r
+ if (! is_allowed_callout_name(enc, name_start, name_end))\r
+ return ONIGERR_INVALID_CALLOUT_NAME;\r
+\r
+ if (c == '[') {\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ tag_start = p;\r
+ while (! PEND) {\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ tag_end = p;\r
+ PFETCH_S(c);\r
+ if (c == ']') break;\r
}\r
+ if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
+ return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH_S(c);\r
+ }\r
+ else {\r
+ tag_start = tag_end = 0;\r
}\r
\r
- if (state == CCS_VALUE) {\r
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
- &val_type, &state, env);\r
- if (r != 0) goto err;\r
+ if (c == '{') {\r
+ UChar* save;\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+\r
+ /* read for single check only */\r
+ save = p;\r
+ arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);\r
+ if (arg_num < 0) return arg_num;\r
+\r
+ is_not_single = PPEEK_IS(cterm) ? 0 : 1;\r
+ p = save;\r
+ r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
+ &name_id);\r
+ if (r != ONIG_NORMAL) return r;\r
+\r
+ max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
+ for (i = 0; i < max_arg_num; i++) {\r
+ types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
+ }\r
+\r
+ arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);\r
+ if (arg_num < 0) return arg_num;\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH_S(c);\r
}\r
+ else {\r
+ arg_num = 0;\r
\r
- if (IS_NOT_NULL(prev_cc)) {\r
- r = and_cclass(prev_cc, cc, env->enc);\r
- if (r != 0) goto err;\r
- bbuf_free(cc->mbuf);\r
- cc = prev_cc;\r
+ is_not_single = 0;\r
+ r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
+ &name_id);\r
+ if (r != ONIG_NORMAL) return r;\r
+\r
+ max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
+ for (i = 0; i < max_arg_num; i++) {\r
+ types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
+ }\r
}\r
\r
- if (neg != 0)\r
- NCCLASS_SET_NOT(cc);\r
- else\r
- NCCLASS_CLEAR_NOT(cc);\r
- if (IS_NCCLASS_NOT(cc) &&\r
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r
- int is_empty;\r
+ in = onig_get_callout_in_by_name_id(name_id);\r
+ opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);\r
+ if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))\r
+ return ONIGERR_INVALID_CALLOUT_ARG;\r
\r
- is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r
- if (is_empty != 0)\r
- BITSET_IS_EMPTY(cc->bs, is_empty);\r
+ if (c != cterm)\r
+ return ONIGERR_INVALID_CALLOUT_PATTERN;\r
\r
- if (is_empty == 0) {\r
-#define NEWLINE_CODE 0x0a\r
+ r = reg_callout_list_entry(env, &num);\r
+ if (r != 0) return r;\r
\r
- if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r
- if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r
- BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r
- else\r
- add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r
- }\r
- }\r
+ ext = onig_get_regex_ext(env->reg);\r
+ if (IS_NULL(ext->pattern)) {\r
+ r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
+ if (r != ONIG_NORMAL) return r;\r
+ }\r
+\r
+ if (tag_start != tag_end) {\r
+ r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
+ if (r != ONIG_NORMAL) return r;\r
+ }\r
+\r
+ r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);\r
+ if (r != ONIG_NORMAL) return r;\r
+\r
+ e = onig_reg_callout_list_at(env->reg, num);\r
+ e->of = ONIG_CALLOUT_OF_NAME;\r
+ e->in = in;\r
+ e->name_id = name_id;\r
+ e->type = onig_get_callout_type_by_name_id(name_id);\r
+ e->start_func = onig_get_callout_start_func_by_name_id(name_id);\r
+ e->end_func = onig_get_callout_end_func_by_name_id(name_id);\r
+ e->u.arg.num = max_arg_num;\r
+ e->u.arg.passed_num = arg_num;\r
+ for (i = 0; i < max_arg_num; i++) {\r
+ e->u.arg.types[i] = types[i];\r
+ if (i < arg_num)\r
+ e->u.arg.vals[i] = vals[i];\r
+ else\r
+ e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);\r
}\r
+\r
+ *np = node;\r
*src = p;\r
return 0;\r
-\r
- err:\r
- if (cc != NCCLASS(*np))\r
- bbuf_free(cc->mbuf);\r
- onig_node_free(*np);\r
- return r;\r
}\r
-\r
-static int parse_subexp(Node** top, OnigToken* tok, int term,\r
- UChar** src, UChar* end, ScanEnv* env);\r
+#endif\r
\r
static int\r
-parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
- ScanEnv* env)\r
+parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
+ ScanEnv* env)\r
{\r
int r, num;\r
Node *target;\r
OnigOptionType option;\r
OnigCodePoint c;\r
+ int list_capture;\r
OnigEncoding enc = env->enc;\r
\r
-#ifdef USE_NAMED_GROUP\r
- int list_capture;\r
+ UChar* p = *src;\r
+ PFETCH_READY;\r
+\r
+ *np = NULL;\r
+ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
+\r
+ option = env->options;\r
+ c = PPEEK;\r
+ if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
+ PINC;\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+\r
+ PFETCH(c);\r
+ switch (c) {\r
+ case ':': /* (?:...) grouping only */\r
+ group:\r
+ r = fetch_token(tok, &p, end, env);\r
+ if (r < 0) return r;\r
+ r = parse_subexp(np, tok, term, &p, end, env);\r
+ if (r < 0) return r;\r
+ *src = p;\r
+ return 1; /* group */\r
+ break;\r
+\r
+ case '=':\r
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);\r
+ break;\r
+ case '!': /* preceding read */\r
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);\r
+ break;\r
+ case '>': /* (?>...) stop backtrack */\r
+ *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
+ break;\r
+\r
+ case '\'':\r
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
+ goto named_group1;\r
+ }\r
+ else\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ break;\r
+\r
+ case '<': /* look behind (?<=...), (?<!...) */\r
+ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
+ PFETCH(c);\r
+ if (c == '=')\r
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);\r
+ else if (c == '!')\r
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);\r
+ else {\r
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
+ UChar *name;\r
+ UChar *name_end;\r
+ enum REF_NUM num_type;\r
+\r
+ PUNFETCH;\r
+ c = '<';\r
+\r
+ named_group1:\r
+ list_capture = 0;\r
+\r
+ named_group2:\r
+ name = p;\r
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,\r
+ &num_type, 0);\r
+ if (r < 0) return r;\r
+\r
+ num = scan_env_add_mem_entry(env);\r
+ if (num < 0) return num;\r
+ if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)\r
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
+\r
+ r = name_add(env->reg, name, name_end, num, env);\r
+ if (r != 0) return r;\r
+ *np = node_new_memory(1);\r
+ CHECK_NULL_RETURN_MEMERR(*np);\r
+ ENCLOSURE_(*np)->m.regnum = num;\r
+ if (list_capture != 0)\r
+ MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
+ env->num_named++;\r
+ }\r
+ else {\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ }\r
+ }\r
+ break;\r
+\r
+ case '~':\r
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {\r
+ Node* absent;\r
+ Node* expr;\r
+ int head_bar;\r
+ int is_range_cutter;\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+\r
+ if (PPEEK_IS('|')) { /* (?~|generator|absent) */\r
+ PINC;\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+\r
+ head_bar = 1;\r
+ if (PPEEK_IS(')')) { /* (?~|) : range clear */\r
+ PINC;\r
+ r = make_range_clear(np, env);\r
+ if (r != 0) return r;\r
+ goto end;\r
+ }\r
+ }\r
+ else\r
+ head_bar = 0;\r
+\r
+ r = fetch_token(tok, &p, end, env);\r
+ if (r < 0) return r;\r
+ r = parse_subexp(&absent, tok, term, &p, end, env);\r
+ if (r < 0) {\r
+ onig_node_free(absent);\r
+ return r;\r
+ }\r
+\r
+ expr = NULL_NODE;\r
+ is_range_cutter = 0;\r
+ if (head_bar != 0) {\r
+ Node* top = absent;\r
+ if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {\r
+ expr = NULL_NODE;\r
+ is_range_cutter = 1;\r
+ /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */\r
+ }\r
+ else {\r
+ absent = NODE_CAR(top);\r
+ expr = NODE_CDR(top);\r
+ NODE_CAR(top) = NULL_NODE;\r
+ NODE_CDR(top) = NULL_NODE;\r
+ onig_node_free(top);\r
+ if (IS_NULL(NODE_CDR(expr))) {\r
+ top = expr;\r
+ expr = NODE_CAR(top);\r
+ NODE_CAR(top) = NULL_NODE;\r
+ onig_node_free(top);\r
+ }\r
+ }\r
+ }\r
+\r
+ r = make_absent_tree(np, absent, expr, is_range_cutter, env);\r
+ if (r != 0) {\r
+ return r;\r
+ }\r
+ goto end;\r
+ }\r
+ else {\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ }\r
+ break;\r
+\r
+#ifdef USE_CALLOUT\r
+ case '{':\r
+ if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+\r
+ r = parse_callout_of_contents(np, ')', &p, end, env);\r
+ if (r != 0) return r;\r
+\r
+ goto end;\r
+ break;\r
+#endif\r
+\r
+ case '(':\r
+ /* (?()...) */\r
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {\r
+ UChar *prev;\r
+ Node* condition;\r
+ int condition_is_checker;\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH(c);\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+\r
+ if (IS_CODE_DIGIT_ASCII(enc, c)\r
+ || c == '-' || c == '+' || c == '<' || c == '\'') {\r
+ UChar* name_end;\r
+ int back_num;\r
+ int exist_level;\r
+ int level;\r
+ enum REF_NUM num_type;\r
+ int is_enclosed;\r
+\r
+ is_enclosed = (c == '<' || c == '\'') ? 1 : 0;\r
+ if (! is_enclosed)\r
+ PUNFETCH;\r
+ prev = p;\r
+ exist_level = 0;\r
+#ifdef USE_BACKREF_WITH_LEVEL\r
+ name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
+ r = fetch_name_with_level(\r
+ (OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
+ &p, end, &name_end,\r
+ env, &back_num, &level, &num_type);\r
+ if (r == 1) exist_level = 1;\r
+#else\r
+ r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
+ &p, end, &name_end, env, &back_num, &num_type, 1);\r
+#endif\r
+ if (r < 0) {\r
+ if (is_enclosed == 0) {\r
+ goto any_condition;\r
+ }\r
+ else\r
+ return r;\r
+ }\r
+\r
+ condition_is_checker = 1;\r
+ if (num_type != IS_NOT_NUM) {\r
+ if (num_type == IS_REL_NUM) {\r
+ back_num = backref_rel_to_abs(back_num, env);\r
+ }\r
+ if (back_num <= 0)\r
+ return ONIGERR_INVALID_BACKREF;\r
+\r
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
+ if (back_num > env->num_mem ||\r
+ IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
+ return ONIGERR_INVALID_BACKREF;\r
+ }\r
+\r
+ condition = node_new_backref_checker(1, &back_num, 0,\r
+#ifdef USE_BACKREF_WITH_LEVEL\r
+ exist_level, level,\r
+#endif\r
+ env);\r
+ }\r
+ else {\r
+ int num;\r
+ int* backs;\r
+\r
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
+ if (num <= 0) {\r
+ onig_scan_env_set_error_string(env,\r
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
+ }\r
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
+ int i;\r
+ for (i = 0; i < num; i++) {\r
+ if (backs[i] > env->num_mem ||\r
+ IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
+ return ONIGERR_INVALID_BACKREF;\r
+ }\r
+ }\r
+\r
+ condition = node_new_backref_checker(num, backs, 1,\r
+#ifdef USE_BACKREF_WITH_LEVEL\r
+ exist_level, level,\r
+#endif\r
+ env);\r
+ }\r
+\r
+ if (is_enclosed != 0) {\r
+ if (PEND) goto err_if_else;\r
+ PFETCH(c);\r
+ if (c != ')') goto err_if_else;\r
+ }\r
+ }\r
+#ifdef USE_CALLOUT\r
+ else if (c == '?') {\r
+ if (IS_SYNTAX_OP2(env->syntax,\r
+ ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {\r
+ if (! PEND && PPEEK_IS('{')) {\r
+ /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */\r
+ condition_is_checker = 0;\r
+ PFETCH(c);\r
+ r = parse_callout_of_contents(&condition, ')', &p, end, env);\r
+ if (r != 0) return r;\r
+ goto end_condition;\r
+ }\r
+ }\r
+ goto any_condition;\r
+ }\r
+ else if (c == '*' &&\r
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
+ condition_is_checker = 0;\r
+ r = parse_callout_of_name(&condition, ')', &p, end, env);\r
+ if (r != 0) return r;\r
+ goto end_condition;\r
+ }\r
#endif\r
+ else {\r
+ any_condition:\r
+ PUNFETCH;\r
+ condition_is_checker = 0;\r
+ r = fetch_token(tok, &p, end, env);\r
+ if (r < 0) return r;\r
+ r = parse_subexp(&condition, tok, term, &p, end, env);\r
+ if (r < 0) {\r
+ onig_node_free(condition);\r
+ return r;\r
+ }\r
+ }\r
\r
- UChar* p = *src;\r
- PFETCH_READY;\r
+ end_condition:\r
+ CHECK_NULL_RETURN_MEMERR(condition);\r
\r
- *np = NULL;\r
- if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
+ if (PEND) {\r
+ err_if_else:\r
+ onig_node_free(condition);\r
+ return ONIGERR_END_PATTERN_IN_GROUP;\r
+ }\r
\r
- option = env->option;\r
- if (PPEEK_IS('?') &&\r
- IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
- PINC;\r
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ if (PPEEK_IS(')')) { /* case: empty body: make backref checker */\r
+ if (condition_is_checker == 0) {\r
+ onig_node_free(condition);\r
+ return ONIGERR_INVALID_IF_ELSE_SYNTAX;\r
+ }\r
+ PFETCH(c);\r
+ *np = condition;\r
+ }\r
+ else { /* if-else */\r
+ int then_is_empty;\r
+ Node *Then, *Else;\r
\r
- PFETCH(c);\r
- switch (c) {\r
- case ':': /* (?:...) grouping only */\r
- group:\r
- r = fetch_token(tok, &p, end, env);\r
- if (r < 0) return r;\r
- r = parse_subexp(np, tok, term, &p, end, env);\r
- if (r < 0) return r;\r
- *src = p;\r
- return 1; /* group */\r
- break;\r
+ if (PPEEK_IS('|')) {\r
+ PFETCH(c);\r
+ Then = 0;\r
+ then_is_empty = 1;\r
+ }\r
+ else\r
+ then_is_empty = 0;\r
\r
- case '=':\r
- *np = onig_node_new_anchor(ANCHOR_PREC_READ);\r
- break;\r
- case '!': /* preceding read */\r
- *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);\r
- break;\r
- case '>': /* (?>...) stop backtrack */\r
- *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
- break;\r
+ r = fetch_token(tok, &p, end, env);\r
+ if (r < 0) {\r
+ onig_node_free(condition);\r
+ return r;\r
+ }\r
+ r = parse_subexp(&target, tok, term, &p, end, env);\r
+ if (r < 0) {\r
+ onig_node_free(condition);\r
+ onig_node_free(target);\r
+ return r;\r
+ }\r
\r
-#ifdef USE_NAMED_GROUP\r
- case '\'':\r
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
- goto named_group1;\r
- }\r
- else\r
- return ONIGERR_UNDEFINED_GROUP_OPTION;\r
- break;\r
-#endif\r
+ if (then_is_empty != 0) {\r
+ Else = target;\r
+ }\r
+ else {\r
+ if (NODE_TYPE(target) == NODE_ALT) {\r
+ Then = NODE_CAR(target);\r
+ if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {\r
+ Else = NODE_CAR(NODE_CDR(target));\r
+ cons_node_free_alone(NODE_CDR(target));\r
+ }\r
+ else {\r
+ Else = NODE_CDR(target);\r
+ }\r
+ cons_node_free_alone(target);\r
+ }\r
+ else {\r
+ Then = target;\r
+ Else = 0;\r
+ }\r
+ }\r
\r
- case '<': /* look behind (?<=...), (?<!...) */\r
- PFETCH(c);\r
- if (c == '=')\r
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);\r
- else if (c == '!')\r
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);\r
-#ifdef USE_NAMED_GROUP\r
- else {\r
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
- UChar *name;\r
- UChar *name_end;\r
-\r
- PUNFETCH;\r
- c = '<';\r
-\r
- named_group1:\r
- list_capture = 0;\r
-\r
- named_group2:\r
- name = p;\r
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);\r
- if (r < 0) return r;\r
-\r
- num = scan_env_add_mem_entry(env);\r
- if (num < 0) return num;\r
- if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)\r
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
-\r
- r = name_add(env->reg, name, name_end, num, env);\r
- if (r != 0) return r;\r
- *np = node_new_enclose_memory(env->option, 1);\r
- CHECK_NULL_RETURN_MEMERR(*np);\r
- NENCLOSE(*np)->regnum = num;\r
- if (list_capture != 0)\r
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);\r
- env->num_named++;\r
- }\r
- else {\r
- return ONIGERR_UNDEFINED_GROUP_OPTION;\r
- }\r
+ *np = node_new_enclosure_if_else(condition, Then, Else);\r
+ if (IS_NULL(*np)) {\r
+ onig_node_free(condition);\r
+ onig_node_free(Then);\r
+ onig_node_free(Else);\r
+ return ONIGERR_MEMORY;\r
+ }\r
+ }\r
+ goto end;\r
}\r
-#else\r
else {\r
- return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
}\r
-#endif\r
break;\r
\r
case '@':\r
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r
-#ifdef USE_NAMED_GROUP\r
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
- PFETCH(c);\r
- if (c == '<' || c == '\'') {\r
- list_capture = 1;\r
- goto named_group2; /* (?@<name>...) */\r
- }\r
- PUNFETCH;\r
- }\r
-#endif\r
- *np = node_new_enclose_memory(env->option, 0);\r
- CHECK_NULL_RETURN_MEMERR(*np);\r
- num = scan_env_add_mem_entry(env);\r
- if (num < 0) {\r
- onig_node_free(*np);\r
- return num;\r
- }\r
- else if (num >= (int )BIT_STATUS_BITS_NUM) {\r
- onig_node_free(*np);\r
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
- }\r
- NENCLOSE(*np)->regnum = num;\r
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);\r
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
+ PFETCH(c);\r
+ if (c == '<' || c == '\'') {\r
+ list_capture = 1;\r
+ goto named_group2; /* (?@<name>...) */\r
+ }\r
+ PUNFETCH;\r
+ }\r
+\r
+ *np = node_new_memory(0);\r
+ CHECK_NULL_RETURN_MEMERR(*np);\r
+ num = scan_env_add_mem_entry(env);\r
+ if (num < 0) {\r
+ return num;\r
+ }\r
+ else if (num >= (int )MEM_STATUS_BITS_NUM) {\r
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
+ }\r
+ ENCLOSURE_(*np)->m.regnum = num;\r
+ MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
}\r
else {\r
- return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
}\r
break;\r
\r
case 'p':\r
#endif\r
case '-': case 'i': case 'm': case 's': case 'x':\r
+ case 'W': case 'D': case 'S': case 'P':\r
{\r
- int neg = 0;\r
-\r
- while (1) {\r
- switch (c) {\r
- case ':':\r
- case ')':\r
- break;\r
-\r
- case '-': neg = 1; break;\r
- case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;\r
- case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;\r
- case 's':\r
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
- ONOFF(option, ONIG_OPTION_MULTILINE, neg);\r
- }\r
- else\r
- return ONIGERR_UNDEFINED_GROUP_OPTION;\r
- break;\r
-\r
- case 'm':\r
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
- ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r
- }\r
- else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r
- ONOFF(option, ONIG_OPTION_MULTILINE, neg);\r
- }\r
- else\r
- return ONIGERR_UNDEFINED_GROUP_OPTION;\r
- break;\r
+ int neg = 0;\r
+\r
+ while (1) {\r
+ switch (c) {\r
+ case ':':\r
+ case ')':\r
+ break;\r
+\r
+ case '-': neg = 1; break;\r
+ case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;\r
+ case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;\r
+ case 's':\r
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
+ OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
+ }\r
+ else\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ break;\r
+\r
+ case 'm':\r
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
+ OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r
+ }\r
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r
+ OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
+ }\r
+ else\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ break;\r
#ifdef USE_POSIXLINE_OPTION\r
- case 'p':\r
- ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r
- break;\r
+ case 'p':\r
+ OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r
+ break;\r
#endif\r
- default:\r
- return ONIGERR_UNDEFINED_GROUP_OPTION;\r
- }\r
-\r
- if (c == ')') {\r
- *np = node_new_option(option);\r
- CHECK_NULL_RETURN_MEMERR(*np);\r
- *src = p;\r
- return 2; /* option only */\r
- }\r
- else if (c == ':') {\r
- OnigOptionType prev = env->option;\r
-\r
- env->option = option;\r
- r = fetch_token(tok, &p, end, env);\r
- if (r < 0) return r;\r
- r = parse_subexp(&target, tok, term, &p, end, env);\r
- env->option = prev;\r
- if (r < 0) return r;\r
- *np = node_new_option(option);\r
- CHECK_NULL_RETURN_MEMERR(*np);\r
- NENCLOSE(*np)->target = target;\r
- *src = p;\r
- return 0;\r
- }\r
-\r
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
- PFETCH(c);\r
- }\r
+ case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;\r
+ case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;\r
+ case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;\r
+ case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;\r
+\r
+ default:\r
+ return ONIGERR_UNDEFINED_GROUP_OPTION;\r
+ }\r
+\r
+ if (c == ')') {\r
+ *np = node_new_option(option);\r
+ CHECK_NULL_RETURN_MEMERR(*np);\r
+ *src = p;\r
+ return 2; /* option only */\r
+ }\r
+ else if (c == ':') {\r
+ OnigOptionType prev = env->options;\r
+\r
+ env->options = option;\r
+ r = fetch_token(tok, &p, end, env);\r
+ if (r < 0) return r;\r
+ r = parse_subexp(&target, tok, term, &p, end, env);\r
+ env->options = prev;\r
+ if (r < 0) {\r
+ onig_node_free(target);\r
+ return r;\r
+ }\r
+ *np = node_new_option(option);\r
+ CHECK_NULL_RETURN_MEMERR(*np);\r
+ NODE_BODY(*np) = target;\r
+ *src = p;\r
+ return 0;\r
+ }\r
+\r
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
+ PFETCH(c);\r
+ }\r
}\r
break;\r
\r
return ONIGERR_UNDEFINED_GROUP_OPTION;\r
}\r
}\r
+#ifdef USE_CALLOUT\r
+ else if (c == '*' &&\r
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
+ PINC;\r
+ r = parse_callout_of_name(np, ')', &p, end, env);\r
+ if (r != 0) return r;\r
+\r
+ goto end;\r
+ }\r
+#endif\r
else {\r
- if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
+ if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
goto group;\r
\r
- *np = node_new_enclose_memory(env->option, 0);\r
+ *np = node_new_memory(0);\r
CHECK_NULL_RETURN_MEMERR(*np);\r
num = scan_env_add_mem_entry(env);\r
if (num < 0) return num;\r
- NENCLOSE(*np)->regnum = num;\r
+ ENCLOSURE_(*np)->m.regnum = num;\r
}\r
\r
CHECK_NULL_RETURN_MEMERR(*np);\r
r = fetch_token(tok, &p, end, env);\r
if (r < 0) return r;\r
r = parse_subexp(&target, tok, term, &p, end, env);\r
- if (r < 0) return r;\r
+ if (r < 0) {\r
+ onig_node_free(target);\r
+ return r;\r
+ }\r
\r
- if (NTYPE(*np) == NT_ANCHOR)\r
- NANCHOR(*np)->target = target;\r
- else {\r
- NENCLOSE(*np)->target = target;\r
- if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {\r
+ NODE_BODY(*np) = target;\r
+\r
+ if (NODE_TYPE(*np) == NODE_ENCLOSURE) {\r
+ if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {\r
/* Don't move this to previous of parse_subexp() */\r
- r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);\r
+ r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);\r
if (r != 0) return r;\r
}\r
}\r
\r
+ end:\r
*src = p;\r
return 0;\r
}\r
static int\r
set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r
{\r
- QtfrNode* qn;\r
+ QuantNode* qn;\r
\r
- qn = NQTFR(qnode);\r
- if (qn->lower == 1 && qn->upper == 1) {\r
+ qn = QUANT_(qnode);\r
+ if (qn->lower == 1 && qn->upper == 1)\r
return 1;\r
- }\r
\r
- switch (NTYPE(target)) {\r
- case NT_STR:\r
+ switch (NODE_TYPE(target)) {\r
+ case NODE_STRING:\r
if (! group) {\r
- StrNode* sn = NSTR(target);\r
- if (str_node_can_be_split(sn, env->enc)) {\r
- Node* n = str_node_split_last_char(sn, env->enc);\r
- if (IS_NOT_NULL(n)) {\r
- qn->target = n;\r
- return 2;\r
- }\r
+ if (str_node_can_be_split(target, env->enc)) {\r
+ Node* n = str_node_split_last_char(target, env->enc);\r
+ if (IS_NOT_NULL(n)) {\r
+ NODE_BODY(qnode) = n;\r
+ return 2;\r
+ }\r
}\r
}\r
break;\r
\r
- case NT_QTFR:\r
+ case NODE_QUANT:\r
{ /* check redundant double repeat. */\r
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r
- QtfrNode* qnt = NQTFR(target);\r
- int nestq_num = popular_quantifier_num(qn);\r
- int targetq_num = popular_quantifier_num(qnt);\r
+ QuantNode* qnt = QUANT_(target);\r
+ int nestq_num = quantifier_type_num(qn);\r
+ int targetq_num = quantifier_type_num(qnt);\r
\r
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r
- if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&\r
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r
+ if (targetq_num >= 0 && nestq_num >= 0 &&\r
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r
UChar buf[WARN_BUFSIZE];\r
\r
switch(ReduceTypeTable[targetq_num][nestq_num]) {\r
case RQ_DEL:\r
if (onig_verb_warn != onig_null_warn) {\r
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
- env->pattern, env->pattern_end,\r
- (UChar* )"redundant nested repeat operator");\r
+ env->pattern, env->pattern_end,\r
+ (UChar* )"redundant nested repeat operator");\r
(*onig_verb_warn)((char* )buf);\r
}\r
goto warn_exit;\r
\r
warn_exit:\r
#endif\r
- if (targetq_num >= 0) {\r
- if (nestq_num >= 0) {\r
- onig_reduce_nested_quantifier(qnode, target);\r
- goto q_exit;\r
- }\r
- else if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r
- /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r
- if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r
- qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
- }\r
- }\r
+ if (targetq_num >= 0 && nestq_num < 0) {\r
+ if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r
+ if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
+ }\r
+ }\r
+ }\r
+ else {\r
+ NODE_BODY(qnode) = target;\r
+ onig_reduce_nested_quantifier(qnode, target);\r
+ goto q_exit;\r
}\r
}\r
break;\r
break;\r
}\r
\r
- qn->target = target;\r
+ NODE_BODY(qnode) = target;\r
q_exit:\r
return 0;\r
}\r
\r
\r
-#ifdef USE_SHARED_CCLASS_TABLE\r
-\r
-#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8\r
-\r
-/* for ctype node hash table */\r
-\r
-typedef struct {\r
- OnigEncoding enc;\r
- int not;\r
- int type;\r
-} type_cclass_key;\r
-\r
-static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)\r
-{\r
- if (x->type != y->type) return 1;\r
- if (x->enc != y->enc) return 1;\r
- if (x->not != y->not) return 1;\r
- return 0;\r
-}\r
-\r
-static int type_cclass_hash(type_cclass_key* key)\r
-{\r
- int i, val;\r
- UChar *p;\r
-\r
- val = 0;\r
-\r
- p = (UChar* )&(key->enc);\r
- for (i = 0; i < (int )sizeof(key->enc); i++) {\r
- val = val * 997 + (int )*p++;\r
- }\r
-\r
- p = (UChar* )(&key->type);\r
- for (i = 0; i < (int )sizeof(key->type); i++) {\r
- val = val * 997 + (int )*p++;\r
- }\r
-\r
- val += key->not;\r
- return val + (val >> 5);\r
-}\r
-\r
-static struct st_hash_type type_type_cclass_hash = {\r
- type_cclass_cmp,\r
- type_cclass_hash,\r
-};\r
-\r
-static st_table* OnigTypeCClassTable;\r
-\r
-\r
-static int\r
-i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)\r
-{\r
- if (IS_NOT_NULL(node)) {\r
- CClassNode* cc = NCCLASS(node);\r
- if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);\r
- xfree(node);\r
- }\r
-\r
- if (IS_NOT_NULL(key)) xfree(key);\r
- return ST_DELETE;\r
-}\r
-\r
-extern int\r
-onig_free_shared_cclass_table(void)\r
-{\r
- if (IS_NOT_NULL(OnigTypeCClassTable)) {\r
- onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);\r
- onig_st_free_table(OnigTypeCClassTable);\r
- OnigTypeCClassTable = NULL;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-#endif /* USE_SHARED_CCLASS_TABLE */\r
-\r
-\r
#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
static int\r
clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r
} IApplyCaseFoldArg;\r
\r
static int\r
-i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],\r
- int to_len, void* arg)\r
+i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)\r
{\r
IApplyCaseFoldArg* iarg;\r
ScanEnv* env;\r
int is_in = onig_is_code_in_cc(env->enc, from, cc);\r
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r
- (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r
+ (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
- add_code_range(&(cc->mbuf), env, *to, *to);\r
+ add_code_range(&(cc->mbuf), env, *to, *to);\r
}\r
else {\r
- BITSET_SET_BIT(bs, *to);\r
+ BITSET_SET_BIT(bs, *to);\r
}\r
}\r
#else\r
if (is_in != 0) {\r
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
- if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r
- add_code_range(&(cc->mbuf), env, *to, *to);\r
+ if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r
+ add_code_range(&(cc->mbuf), env, *to, *to);\r
}\r
else {\r
- if (IS_NCCLASS_NOT(cc)) {\r
- BITSET_CLEAR_BIT(bs, *to);\r
- }\r
- else\r
- BITSET_SET_BIT(bs, *to);\r
+ if (IS_NCCLASS_NOT(cc)) {\r
+ BITSET_CLEAR_BIT(bs, *to);\r
+ }\r
+ else\r
+ BITSET_SET_BIT(bs, *to);\r
}\r
}\r
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
\r
if (onig_is_code_in_cc(env->enc, from, cc)\r
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
- && !IS_NCCLASS_NOT(cc)\r
+ && !IS_NCCLASS_NOT(cc)\r
#endif\r
- ) {\r
+ ) {\r
for (i = 0; i < to_len; i++) {\r
- len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r
- if (i == 0) {\r
- snode = onig_node_new_str(buf, buf + len);\r
- CHECK_NULL_RETURN_MEMERR(snode);\r
-\r
- /* char-class expanded multi-char only\r
- compare with string folded at match time. */\r
- NSTRING_SET_AMBIG(snode);\r
- }\r
- else {\r
- r = onig_node_str_cat(snode, buf, buf + len);\r
- if (r < 0) {\r
- onig_node_free(snode);\r
- return r;\r
- }\r
- }\r
+ len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r
+ if (i == 0) {\r
+ snode = onig_node_new_str(buf, buf + len);\r
+ CHECK_NULL_RETURN_MEMERR(snode);\r
+\r
+ /* char-class expanded multi-char only\r
+ compare with string folded at match time. */\r
+ NODE_STRING_SET_AMBIG(snode);\r
+ }\r
+ else {\r
+ r = onig_node_str_cat(snode, buf, buf + len);\r
+ if (r < 0) {\r
+ onig_node_free(snode);\r
+ return r;\r
+ }\r
+ }\r
}\r
\r
*(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r
CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r
- iarg->ptail = &(NCDR((*(iarg->ptail))));\r
+ iarg->ptail = &(NODE_CDR((*(iarg->ptail))));\r
}\r
}\r
\r
}\r
\r
static int\r
-parse_exp(Node** np, OnigToken* tok, int term,\r
- UChar** src, UChar* end, ScanEnv* env)\r
+parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
+ ScanEnv* env)\r
{\r
int r, len, group = 0;\r
Node* qn;\r
break;\r
\r
case TK_SUBEXP_OPEN:\r
- r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r
+ r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r
if (r < 0) return r;\r
if (r == 1) group = 1;\r
else if (r == 2) { /* option only */\r
Node* target;\r
- OnigOptionType prev = env->option;\r
+ OnigOptionType prev = env->options;\r
\r
- env->option = NENCLOSE(*np)->option;\r
+ env->options = ENCLOSURE_(*np)->o.options;\r
r = fetch_token(tok, src, end, env);\r
if (r < 0) return r;\r
r = parse_subexp(&target, tok, term, src, end, env);\r
- env->option = prev;\r
- if (r < 0) return r;\r
- NENCLOSE(*np)->target = target; \r
+ env->options = prev;\r
+ if (r < 0) {\r
+ onig_node_free(target);\r
+ return r;\r
+ }\r
+ NODE_BODY(*np) = target;\r
return tok->type;\r
}\r
break;\r
CHECK_NULL_RETURN_MEMERR(*np);\r
\r
while (1) {\r
- r = fetch_token(tok, src, end, env);\r
- if (r < 0) return r;\r
- if (r != TK_STRING) break;\r
+ r = fetch_token(tok, src, end, env);\r
+ if (r < 0) return r;\r
+ if (r != TK_STRING) break;\r
\r
- r = onig_node_str_cat(*np, tok->backp, *src);\r
- if (r < 0) return r;\r
+ r = onig_node_str_cat(*np, tok->backp, *src);\r
+ if (r < 0) return r;\r
}\r
\r
string_end:\r
CHECK_NULL_RETURN_MEMERR(*np);\r
len = 1;\r
while (1) {\r
- if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r
- if (len == enclen(env->enc, NSTR(*np)->s)) {\r
- r = fetch_token(tok, src, end, env);\r
- NSTRING_CLEAR_RAW(*np);\r
- goto string_end;\r
- }\r
- }\r
-\r
- r = fetch_token(tok, src, end, env);\r
- if (r < 0) return r;\r
- if (r != TK_RAW_BYTE) {\r
- /* Don't use this, it is wrong for little endian encodings. */\r
+ if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r
+ if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */\r
+ r = fetch_token(tok, src, end, env);\r
+ NODE_STRING_CLEAR_RAW(*np);\r
+ goto string_end;\r
+ }\r
+ }\r
+\r
+ r = fetch_token(tok, src, end, env);\r
+ if (r < 0) return r;\r
+ if (r != TK_RAW_BYTE) {\r
+ /* Don't use this, it is wrong for little endian encodings. */\r
#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
- int rem;\r
- if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r
- rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r
- (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);\r
- if (len + rem == enclen(env->enc, NSTR(*np)->s)) {\r
- NSTRING_CLEAR_RAW(*np);\r
- goto string_end;\r
- }\r
- }\r
+ int rem;\r
+ if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r
+ rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r
+ (void )node_str_head_pad(STR_(*np), rem, (UChar )0);\r
+ if (len + rem == enclen(env->enc, STR_(*np)->s)) {\r
+ NODE_STRING_CLEAR_RAW(*np);\r
+ goto string_end;\r
+ }\r
+ }\r
#endif\r
- return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
- }\r
+ return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
+ }\r
\r
- r = node_str_cat_char(*np, (UChar )tok->u.c);\r
- if (r < 0) return r;\r
+ r = node_str_cat_char(*np, (UChar )tok->u.c);\r
+ if (r < 0) return r;\r
\r
- len++;\r
+ len++;\r
}\r
}\r
break;\r
qstart = *src;\r
qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r
if (IS_NULL(qend)) {\r
- nextp = qend = end;\r
+ nextp = qend = end;\r
}\r
*np = node_new_str(qstart, qend);\r
CHECK_NULL_RETURN_MEMERR(*np);\r
{\r
switch (tok->u.prop.ctype) {\r
case ONIGENC_CTYPE_WORD:\r
- *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);\r
- CHECK_NULL_RETURN_MEMERR(*np);\r
- break;\r
+ *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);\r
+ CHECK_NULL_RETURN_MEMERR(*np);\r
+ break;\r
\r
case ONIGENC_CTYPE_SPACE:\r
case ONIGENC_CTYPE_DIGIT:\r
case ONIGENC_CTYPE_XDIGIT:\r
- {\r
- CClassNode* cc;\r
-\r
-#ifdef USE_SHARED_CCLASS_TABLE\r
- const OnigCodePoint *mbr;\r
- OnigCodePoint sb_out;\r
-\r
- r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,\r
- &sb_out, &mbr);\r
- if (r == 0 &&\r
- ONIGENC_CODE_RANGE_NUM(mbr)\r
- >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {\r
- type_cclass_key key;\r
- type_cclass_key* new_key;\r
-\r
- key.enc = env->enc;\r
- key.not = tok->u.prop.not;\r
- key.type = tok->u.prop.ctype;\r
-\r
- THREAD_ATOMIC_START;\r
-\r
- if (IS_NULL(OnigTypeCClassTable)) {\r
- OnigTypeCClassTable\r
- = onig_st_init_table_with_size(&type_type_cclass_hash, 10);\r
- if (IS_NULL(OnigTypeCClassTable)) {\r
- THREAD_ATOMIC_END;\r
- return ONIGERR_MEMORY;\r
- }\r
- }\r
- else {\r
- if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,\r
- (st_data_t* )np)) {\r
- THREAD_ATOMIC_END;\r
- break;\r
- }\r
- }\r
-\r
- *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,\r
- sb_out, mbr);\r
- if (IS_NULL(*np)) {\r
- THREAD_ATOMIC_END;\r
- return ONIGERR_MEMORY;\r
- }\r
-\r
- cc = NCCLASS(*np);\r
- NCCLASS_SET_SHARE(cc);\r
- new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));\r
- xmemcpy(new_key, &key, sizeof(type_cclass_key));\r
- onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,\r
- (st_data_t )*np);\r
- \r
- THREAD_ATOMIC_END;\r
- }\r
- else {\r
-#endif\r
- *np = node_new_cclass();\r
- CHECK_NULL_RETURN_MEMERR(*np);\r
- cc = NCCLASS(*np);\r
- add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r
- if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
-#ifdef USE_SHARED_CCLASS_TABLE\r
- }\r
-#endif\r
- }\r
- break;\r
+ {\r
+ CClassNode* cc;\r
+\r
+ *np = node_new_cclass();\r
+ CHECK_NULL_RETURN_MEMERR(*np);\r
+ cc = CCLASS_(*np);\r
+ add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r
+ if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
+ }\r
+ break;\r
\r
default:\r
- return ONIGERR_PARSER_BUG;\r
- break;\r
+ return ONIGERR_PARSER_BUG;\r
+ break;\r
}\r
}\r
break;\r
r = parse_char_class(np, tok, src, end, env);\r
if (r != 0) return r;\r
\r
- cc = NCCLASS(*np);\r
- if (IS_IGNORECASE(env->option)) {\r
- IApplyCaseFoldArg iarg;\r
-\r
- iarg.env = env;\r
- iarg.cc = cc;\r
- iarg.alt_root = NULL_NODE;\r
- iarg.ptail = &(iarg.alt_root);\r
-\r
- r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r
- i_apply_case_fold, &iarg);\r
- if (r != 0) {\r
- onig_node_free(iarg.alt_root);\r
- return r;\r
- }\r
- if (IS_NOT_NULL(iarg.alt_root)) {\r
+ cc = CCLASS_(*np);\r
+ if (IS_IGNORECASE(env->options)) {\r
+ IApplyCaseFoldArg iarg;\r
+\r
+ iarg.env = env;\r
+ iarg.cc = cc;\r
+ iarg.alt_root = NULL_NODE;\r
+ iarg.ptail = &(iarg.alt_root);\r
+\r
+ r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r
+ i_apply_case_fold, &iarg);\r
+ if (r != 0) {\r
+ onig_node_free(iarg.alt_root);\r
+ return r;\r
+ }\r
+ if (IS_NOT_NULL(iarg.alt_root)) {\r
Node* work = onig_node_new_alt(*np, iarg.alt_root);\r
if (IS_NULL(work)) {\r
onig_node_free(iarg.alt_root);\r
return ONIGERR_MEMORY;\r
}\r
*np = work;\r
- }\r
+ }\r
}\r
}\r
break;\r
CHECK_NULL_RETURN_MEMERR(*np);\r
qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
CHECK_NULL_RETURN_MEMERR(qn);\r
- NQTFR(qn)->target = *np;\r
+ NODE_BODY(qn) = *np;\r
*np = qn;\r
break;\r
\r
case TK_BACKREF:\r
len = tok->u.backref.num;\r
*np = node_new_backref(len,\r
- (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r
- tok->u.backref.by_name,\r
+ (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r
+ tok->u.backref.by_name,\r
#ifdef USE_BACKREF_WITH_LEVEL\r
- tok->u.backref.exist_level,\r
- tok->u.backref.level,\r
+ tok->u.backref.exist_level,\r
+ tok->u.backref.level,\r
#endif\r
- env);\r
+ env);\r
CHECK_NULL_RETURN_MEMERR(*np);\r
break;\r
\r
-#ifdef USE_SUBEXP_CALL\r
+#ifdef USE_CALL\r
case TK_CALL:\r
{\r
int gnum = tok->u.call.gnum;\r
\r
- if (gnum < 0) {\r
- gnum = BACKREF_REL_TO_ABS(gnum, env);\r
- if (gnum <= 0)\r
- return ONIGERR_INVALID_BACKREF;\r
- }\r
- *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);\r
+ *np = node_new_call(tok->u.call.name, tok->u.call.name_end,\r
+ gnum, tok->u.call.by_number);\r
CHECK_NULL_RETURN_MEMERR(*np);\r
env->num_call++;\r
+ if (tok->u.call.by_number != 0 && gnum == 0) {\r
+ env->has_call_zero = 1;\r
+ }\r
}\r
break;\r
#endif\r
\r
case TK_ANCHOR:\r
- *np = onig_node_new_anchor(tok->u.anchor);\r
+ {\r
+ int ascii_mode =\r
+ IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;\r
+ *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);\r
+ }\r
break;\r
\r
case TK_OP_REPEAT:\r
case TK_INTERVAL:\r
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r
- return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r
else\r
- *np = node_new_empty();\r
+ *np = node_new_empty();\r
}\r
else {\r
goto tk_byte;\r
}\r
break;\r
\r
+ case TK_KEEP:\r
+ r = node_new_keep(np, env);\r
+ if (r < 0) return r;\r
+ break;\r
+\r
+ case TK_GENERAL_NEWLINE:\r
+ r = node_new_general_newline(np, env);\r
+ if (r < 0) return r;\r
+ break;\r
+\r
+ case TK_NO_NEWLINE:\r
+ r = node_new_no_newline(np, env);\r
+ if (r < 0) return r;\r
+ break;\r
+\r
+ case TK_TRUE_ANYCHAR:\r
+ r = node_new_true_anychar(np, env);\r
+ if (r < 0) return r;\r
+ break;\r
+\r
+ case TK_EXTENDED_GRAPHEME_CLUSTER:\r
+ r = make_extended_grapheme_cluster(np, env);\r
+ if (r < 0) return r;\r
+ break;\r
+\r
default:\r
return ONIGERR_PARSER_BUG;\r
break;\r
repeat:\r
if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r
if (is_invalid_quantifier_target(*targetp))\r
- return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r
\r
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r
- (r == TK_INTERVAL ? 1 : 0));\r
+ (r == TK_INTERVAL ? 1 : 0));\r
CHECK_NULL_RETURN_MEMERR(qn);\r
- NQTFR(qn)->greedy = tok->u.repeat.greedy;\r
+ QUANT_(qn)->greedy = tok->u.repeat.greedy;\r
r = set_quantifier(qn, *targetp, group, env);\r
if (r < 0) {\r
- onig_node_free(qn);\r
- return r;\r
+ onig_node_free(qn);\r
+ return r;\r
}\r
\r
if (tok->u.repeat.possessive != 0) {\r
- Node* en;\r
- en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
- if (IS_NULL(en)) {\r
- onig_node_free(qn);\r
- return ONIGERR_MEMORY;\r
- }\r
- NENCLOSE(en)->target = qn;\r
- qn = en;\r
+ Node* en;\r
+ en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
+ if (IS_NULL(en)) {\r
+ onig_node_free(qn);\r
+ return ONIGERR_MEMORY;\r
+ }\r
+ NODE_BODY(en) = qn;\r
+ qn = en;\r
}\r
\r
if (r == 0) {\r
- *targetp = qn;\r
+ *targetp = qn;\r
}\r
else if (r == 1) {\r
- onig_node_free(qn);\r
+ onig_node_free(qn);\r
}\r
else if (r == 2) { /* split case: /abc+/ */\r
- Node *tmp;\r
-\r
- *targetp = node_new_list(*targetp, NULL);\r
- if (IS_NULL(*targetp)) {\r
- onig_node_free(qn);\r
- return ONIGERR_MEMORY;\r
- }\r
- tmp = NCDR(*targetp) = node_new_list(qn, NULL);\r
- if (IS_NULL(tmp)) {\r
- onig_node_free(qn);\r
- return ONIGERR_MEMORY;\r
- }\r
- targetp = &(NCAR(tmp));\r
+ Node *tmp;\r
+\r
+ *targetp = node_new_list(*targetp, NULL);\r
+ if (IS_NULL(*targetp)) {\r
+ onig_node_free(qn);\r
+ return ONIGERR_MEMORY;\r
+ }\r
+ tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);\r
+ if (IS_NULL(tmp)) {\r
+ onig_node_free(qn);\r
+ return ONIGERR_MEMORY;\r
+ }\r
+ targetp = &(NODE_CAR(tmp));\r
}\r
goto re_entry;\r
}\r
}\r
\r
static int\r
-parse_branch(Node** top, OnigToken* tok, int term,\r
- UChar** src, UChar* end, ScanEnv* env)\r
+parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
+ ScanEnv* env)\r
{\r
int r;\r
Node *node, **headp;\r
\r
*top = NULL;\r
r = parse_exp(&node, tok, term, src, end, env);\r
- if (r < 0) return r;\r
+ if (r < 0) {\r
+ onig_node_free(node);\r
+ return r;\r
+ }\r
\r
if (r == TK_EOT || r == term || r == TK_ALT) {\r
*top = node;\r
}\r
else {\r
*top = node_new_list(node, NULL);\r
- headp = &(NCDR(*top));\r
+ headp = &(NODE_CDR(*top));\r
while (r != TK_EOT && r != term && r != TK_ALT) {\r
r = parse_exp(&node, tok, term, src, end, env);\r
- if (r < 0) return r;\r
+ if (r < 0) {\r
+ onig_node_free(node);\r
+ return r;\r
+ }\r
\r
- if (NTYPE(node) == NT_LIST) {\r
- *headp = node;\r
- while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);\r
- headp = &(NCDR(node));\r
+ if (NODE_TYPE(node) == NODE_LIST) {\r
+ *headp = node;\r
+ while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);\r
+ headp = &(NODE_CDR(node));\r
}\r
else {\r
- *headp = node_new_list(node, NULL);\r
- headp = &(NCDR(*headp));\r
+ *headp = node_new_list(node, NULL);\r
+ headp = &(NODE_CDR(*headp));\r
}\r
}\r
}\r
\r
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r
static int\r
-parse_subexp(Node** top, OnigToken* tok, int term,\r
- UChar** src, UChar* end, ScanEnv* env)\r
+parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
+ ScanEnv* env)\r
{\r
int r;\r
Node *node, **headp;\r
\r
*top = NULL;\r
+ env->parse_depth++;\r
+ if (env->parse_depth > ParseDepthLimit)\r
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
r = parse_branch(&node, tok, term, src, end, env);\r
if (r < 0) {\r
onig_node_free(node);\r
}\r
else if (r == TK_ALT) {\r
*top = onig_node_new_alt(node, NULL);\r
- headp = &(NCDR(*top));\r
+ headp = &(NODE_CDR(*top));\r
while (r == TK_ALT) {\r
r = fetch_token(tok, src, end, env);\r
if (r < 0) return r;\r
r = parse_branch(&node, tok, term, src, end, env);\r
- if (r < 0) return r;\r
-\r
+ if (r < 0) {\r
+ onig_node_free(node);\r
+ return r;\r
+ }\r
*headp = onig_node_new_alt(node, NULL);\r
- headp = &(NCDR(*headp));\r
+ headp = &(NODE_CDR(*headp));\r
}\r
\r
if (tok->type != (enum TokenSyms )term)\r
goto err;\r
}\r
else {\r
+ onig_node_free(node);\r
err:\r
if (term == TK_SUBEXP_CLOSE)\r
return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
return ONIGERR_PARSER_BUG;\r
}\r
\r
+ env->parse_depth--;\r
return r;\r
}\r
\r
if (r < 0) return r;\r
r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r
if (r < 0) return r;\r
+\r
+ return 0;\r
+}\r
+\r
+#ifdef USE_CALL\r
+static int\r
+make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)\r
+{\r
+ int r;\r
+\r
+ Node* x = node_new_memory(0 /* 0: is not named */);\r
+ CHECK_NULL_RETURN_MEMERR(x);\r
+\r
+ NODE_BODY(x) = node;\r
+ ENCLOSURE_(x)->m.regnum = 0;\r
+ r = scan_env_set_mem_node(env, 0, x);\r
+ if (r != 0) {\r
+ onig_node_free(x);\r
+ return r;\r
+ }\r
+\r
+ *rnode = x;\r
return 0;\r
}\r
+#endif\r
\r
extern int\r
-onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,\r
- regex_t* reg, ScanEnv* env)\r
+onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,\r
+ regex_t* reg, ScanEnv* env)\r
{\r
int r;\r
UChar* p;\r
+#ifdef USE_CALLOUT\r
+ RegexExt* ext;\r
+#endif\r
\r
-#ifdef USE_NAMED_GROUP\r
names_clear(reg);\r
-#endif\r
\r
scan_env_clear(env);\r
- env->option = reg->options;\r
+ env->options = reg->options;\r
env->case_fold_flag = reg->case_fold_flag;\r
env->enc = reg->enc;\r
env->syntax = reg->syntax;\r
env->reg = reg;\r
\r
*root = NULL;\r
+\r
+ if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))\r
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;\r
+\r
p = (UChar* )pattern;\r
r = parse_regexp(root, &p, (UChar* )end, env);\r
+\r
+#ifdef USE_CALL\r
+ if (r != 0) return r;\r
+\r
+ if (env->has_call_zero != 0) {\r
+ Node* zero_node;\r
+ r = make_call_zero_body(*root, env, &zero_node);\r
+ if (r != 0) return r;\r
+\r
+ *root = zero_node;\r
+ }\r
+#endif\r
+\r
reg->num_mem = env->num_mem;\r
+\r
+#ifdef USE_CALLOUT\r
+ ext = REG_EXTP(reg);\r
+ if (IS_NOT_NULL(ext) && ext->callout_num > 0) {\r
+ r = setup_ext_callout_list_values(reg);\r
+ }\r
+#endif\r
+\r
return r;\r
}\r
\r
extern void\r
onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r
- UChar* arg, UChar* arg_end)\r
+ UChar* arg, UChar* arg_end)\r
{\r
env->error = arg;\r
env->error_end = arg_end;\r