]>
Commit | Line | Data |
---|---|---|
14b0e578 CS |
1 | /**********************************************************************\r |
2 | regparse.c - Oniguruma (regular expression library)\r | |
3 | **********************************************************************/\r | |
4 | /*-\r | |
b602265d | 5 | * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r |
14b0e578 CS |
6 | * All rights reserved.\r |
7 | *\r | |
14b0e578 CS |
8 | * Redistribution and use in source and binary forms, with or without\r |
9 | * modification, are permitted provided that the following conditions\r | |
10 | * are met:\r | |
11 | * 1. Redistributions of source code must retain the above copyright\r | |
12 | * notice, this list of conditions and the following disclaimer.\r | |
13 | * 2. Redistributions in binary form must reproduce the above copyright\r | |
14 | * notice, this list of conditions and the following disclaimer in the\r | |
15 | * documentation and/or other materials provided with the distribution.\r | |
16 | *\r | |
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r | |
27 | * SUCH DAMAGE.\r | |
28 | */\r | |
29 | \r | |
30 | #include "regparse.h"\r | |
31 | #include "st.h"\r | |
32 | \r | |
b602265d DG |
33 | #ifdef DEBUG_NODE_FREE\r |
34 | #include <stdio.h>\r | |
35 | #endif\r | |
36 | \r | |
37 | #define INIT_TAG_NAMES_ALLOC_NUM 5\r | |
38 | \r | |
14b0e578 CS |
39 | #define WARN_BUFSIZE 256\r |
40 | \r | |
41 | #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
42 | \r | |
b602265d DG |
43 | #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \\r |
44 | ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)\r | |
45 | #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \\r | |
46 | ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')\r | |
47 | \r | |
48 | \r | |
49 | OnigSyntaxType OnigSyntaxOniguruma = {\r | |
50 | (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r | |
51 | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r | |
52 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r | |
53 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r | |
54 | ONIG_SYN_OP_ESC_C_CONTROL )\r | |
55 | & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r | |
56 | , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r | |
57 | ONIG_SYN_OP2_OPTION_RUBY |\r | |
58 | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r | |
59 | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r | |
60 | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r | |
61 | ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |\r | |
62 | ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |\r | |
63 | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r | |
64 | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r | |
65 | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |\r | |
66 | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r | |
67 | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r | |
68 | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r | |
69 | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r | |
70 | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r | |
71 | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r | |
72 | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r | |
73 | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r | |
74 | , ( SYN_GNU_REGEX_BV | \r | |
75 | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r | |
76 | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r | |
77 | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r | |
78 | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r | |
79 | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r | |
80 | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r | |
81 | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r | |
82 | , ONIG_OPTION_NONE\r | |
83 | ,\r | |
84 | {\r | |
85 | (OnigCodePoint )'\\' /* esc */\r | |
86 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r | |
87 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r | |
88 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r | |
89 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r | |
90 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r | |
91 | }\r | |
92 | };\r | |
14b0e578 CS |
93 | \r |
94 | OnigSyntaxType OnigSyntaxRuby = {\r | |
95 | (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r | |
96 | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r | |
b602265d DG |
97 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r |
98 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r | |
14b0e578 CS |
99 | ONIG_SYN_OP_ESC_C_CONTROL )\r |
100 | & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r | |
101 | , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r | |
102 | ONIG_SYN_OP2_OPTION_RUBY |\r | |
103 | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r | |
b602265d DG |
104 | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r |
105 | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r | |
106 | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r | |
107 | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r | |
108 | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r | |
14b0e578 CS |
109 | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r |
110 | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r | |
111 | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r | |
112 | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r | |
113 | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r | |
114 | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r | |
b602265d | 115 | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r |
14b0e578 CS |
116 | , ( SYN_GNU_REGEX_BV | \r |
117 | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r | |
118 | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r | |
119 | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r | |
120 | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r | |
121 | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r | |
122 | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r | |
123 | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r | |
124 | , ONIG_OPTION_NONE\r | |
125 | ,\r | |
126 | {\r | |
127 | (OnigCodePoint )'\\' /* esc */\r | |
128 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r | |
129 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r | |
130 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r | |
131 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r | |
132 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r | |
133 | }\r | |
134 | };\r | |
135 | \r | |
b602265d | 136 | OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;\r |
14b0e578 CS |
137 | \r |
138 | extern void onig_null_warn(const char* s ARG_UNUSED) { }\r | |
139 | \r | |
140 | #ifdef DEFAULT_WARN_FUNCTION\r | |
141 | static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r | |
142 | #else\r | |
143 | static OnigWarnFunc onig_warn = onig_null_warn;\r | |
144 | #endif\r | |
145 | \r | |
146 | #ifdef DEFAULT_VERB_WARN_FUNCTION\r | |
147 | static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r | |
148 | #else\r | |
149 | static OnigWarnFunc onig_verb_warn = onig_null_warn;\r | |
150 | #endif\r | |
151 | \r | |
152 | extern void onig_set_warn_func(OnigWarnFunc f)\r | |
153 | {\r | |
154 | onig_warn = f;\r | |
155 | }\r | |
156 | \r | |
157 | extern void onig_set_verb_warn_func(OnigWarnFunc f)\r | |
158 | {\r | |
159 | onig_verb_warn = f;\r | |
160 | }\r | |
161 | \r | |
b602265d DG |
162 | extern void\r |
163 | onig_warning(const char* s)\r | |
164 | {\r | |
165 | if (onig_warn == onig_null_warn) return ;\r | |
166 | \r | |
167 | (*onig_warn)(s);\r | |
168 | }\r | |
169 | \r | |
170 | #define DEFAULT_MAX_CAPTURE_NUM 32767\r | |
171 | \r | |
172 | static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;\r | |
173 | \r | |
174 | extern int\r | |
175 | onig_set_capture_num_limit(int num)\r | |
176 | {\r | |
177 | if (num < 0) return -1;\r | |
178 | \r | |
179 | MaxCaptureNum = num;\r | |
180 | return 0;\r | |
181 | }\r | |
182 | \r | |
183 | static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r | |
184 | \r | |
185 | extern unsigned int\r | |
186 | onig_get_parse_depth_limit(void)\r | |
187 | {\r | |
188 | return ParseDepthLimit;\r | |
189 | }\r | |
190 | \r | |
191 | extern int\r | |
192 | onig_set_parse_depth_limit(unsigned int depth)\r | |
193 | {\r | |
194 | if (depth == 0)\r | |
195 | ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r | |
196 | else\r | |
197 | ParseDepthLimit = depth;\r | |
198 | return 0;\r | |
199 | }\r | |
200 | \r | |
201 | static int\r | |
202 | positive_int_multiply(int x, int y)\r | |
203 | {\r | |
204 | if (x == 0 || y == 0) return 0;\r | |
205 | \r | |
206 | if (x < INT_MAX / y)\r | |
207 | return x * y;\r | |
208 | else\r | |
209 | return -1;\r | |
210 | }\r | |
211 | \r | |
14b0e578 CS |
212 | static void\r |
213 | bbuf_free(BBuf* bbuf)\r | |
214 | {\r | |
215 | if (IS_NOT_NULL(bbuf)) {\r | |
216 | if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r | |
217 | xfree(bbuf);\r | |
218 | }\r | |
219 | }\r | |
220 | \r | |
221 | static int\r | |
222 | bbuf_clone(BBuf** rto, BBuf* from)\r | |
223 | {\r | |
224 | int r;\r | |
225 | BBuf *to;\r | |
226 | \r | |
227 | *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r | |
228 | CHECK_NULL_RETURN_MEMERR(to);\r | |
b602265d DG |
229 | r = BB_INIT(to, from->alloc);\r |
230 | if (r != 0) {\r | |
231 | xfree(to->p);\r | |
232 | *rto = 0;\r | |
233 | return r;\r | |
234 | }\r | |
14b0e578 CS |
235 | to->used = from->used;\r |
236 | xmemcpy(to->p, from->p, from->used);\r | |
237 | return 0;\r | |
238 | }\r | |
239 | \r | |
b602265d DG |
240 | static int backref_rel_to_abs(int rel_no, ScanEnv* env)\r |
241 | {\r | |
242 | if (rel_no > 0) {\r | |
243 | return env->num_mem + rel_no;\r | |
244 | }\r | |
245 | else {\r | |
246 | return env->num_mem + 1 + rel_no;\r | |
247 | }\r | |
248 | }\r | |
249 | \r | |
250 | #define OPTION_ON(v,f) ((v) |= (f))\r | |
251 | #define OPTION_OFF(v,f) ((v) &= ~(f))\r | |
14b0e578 | 252 | \r |
b602265d | 253 | #define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r |
14b0e578 CS |
254 | \r |
255 | #define MBCODE_START_POS(enc) \\r | |
256 | (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r | |
257 | \r | |
258 | #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r | |
259 | add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r | |
260 | \r | |
261 | #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r | |
262 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r | |
263 | r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r | |
b602265d | 264 | if (r != 0) return r;\\r |
14b0e578 CS |
265 | }\\r |
266 | } while (0)\r | |
267 | \r | |
268 | \r | |
269 | #define BITSET_IS_EMPTY(bs,empty) do {\\r | |
270 | int i;\\r | |
271 | empty = 1;\\r | |
272 | for (i = 0; i < (int )BITSET_SIZE; i++) {\\r | |
273 | if ((bs)[i] != 0) {\\r | |
274 | empty = 0; break;\\r | |
275 | }\\r | |
276 | }\\r | |
277 | } while (0)\r | |
278 | \r | |
279 | static void\r | |
280 | bitset_set_range(BitSetRef bs, int from, int to)\r | |
281 | {\r | |
282 | int i;\r | |
283 | for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r | |
284 | BITSET_SET_BIT(bs, i);\r | |
285 | }\r | |
286 | }\r | |
287 | \r | |
288 | #if 0\r | |
289 | static void\r | |
290 | bitset_set_all(BitSetRef bs)\r | |
291 | {\r | |
292 | int i;\r | |
293 | for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r | |
294 | }\r | |
295 | #endif\r | |
296 | \r | |
297 | static void\r | |
298 | bitset_invert(BitSetRef bs)\r | |
299 | {\r | |
300 | int i;\r | |
301 | for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r | |
302 | }\r | |
303 | \r | |
304 | static void\r | |
305 | bitset_invert_to(BitSetRef from, BitSetRef to)\r | |
306 | {\r | |
307 | int i;\r | |
308 | for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r | |
309 | }\r | |
310 | \r | |
311 | static void\r | |
312 | bitset_and(BitSetRef dest, BitSetRef bs)\r | |
313 | {\r | |
314 | int i;\r | |
315 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r | |
316 | }\r | |
317 | \r | |
318 | static void\r | |
319 | bitset_or(BitSetRef dest, BitSetRef bs)\r | |
320 | {\r | |
321 | int i;\r | |
322 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r | |
323 | }\r | |
324 | \r | |
325 | static void\r | |
326 | bitset_copy(BitSetRef dest, BitSetRef bs)\r | |
327 | {\r | |
328 | int i;\r | |
329 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r | |
330 | }\r | |
331 | \r | |
332 | extern int\r | |
333 | onig_strncmp(const UChar* s1, const UChar* s2, int n)\r | |
334 | {\r | |
335 | int x;\r | |
336 | \r | |
337 | while (n-- > 0) {\r | |
338 | x = *s2++ - *s1++;\r | |
339 | if (x) return x;\r | |
340 | }\r | |
341 | return 0;\r | |
342 | }\r | |
343 | \r | |
344 | extern void\r | |
345 | onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r | |
346 | {\r | |
b602265d | 347 | int len = (int )(end - src);\r |
14b0e578 CS |
348 | if (len > 0) {\r |
349 | xmemcpy(dest, src, len);\r | |
350 | dest[len] = (UChar )0;\r | |
351 | }\r | |
352 | }\r | |
353 | \r | |
b602265d DG |
354 | static int\r |
355 | save_entry(ScanEnv* env, enum SaveType type, int* id)\r | |
14b0e578 | 356 | {\r |
b602265d | 357 | int nid = env->save_num;\r |
14b0e578 | 358 | \r |
b602265d DG |
359 | #if 0\r |
360 | if (IS_NULL(env->saves)) {\r | |
361 | int n = 10;\r | |
362 | env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);\r | |
363 | CHECK_NULL_RETURN_MEMERR(env->saves);\r | |
364 | env->save_alloc_num = n;\r | |
365 | }\r | |
366 | else if (env->save_alloc_num <= nid) {\r | |
367 | int n = env->save_alloc_num * 2;\r | |
368 | SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);\r | |
369 | CHECK_NULL_RETURN_MEMERR(p);\r | |
370 | env->saves = p;\r | |
371 | env->save_alloc_num = n;\r | |
372 | }\r | |
14b0e578 | 373 | \r |
b602265d DG |
374 | env->saves[nid].type = type;\r |
375 | #endif\r | |
14b0e578 | 376 | \r |
b602265d DG |
377 | env->save_num++;\r |
378 | *id = nid;\r | |
379 | return 0;\r | |
14b0e578 | 380 | }\r |
14b0e578 CS |
381 | \r |
382 | /* scan pattern methods */\r | |
383 | #define PEND_VALUE 0\r | |
384 | \r | |
385 | #define PFETCH_READY UChar* pfetch_prev\r | |
386 | #define PEND (p < end ? 0 : 1)\r | |
387 | #define PUNFETCH p = pfetch_prev\r | |
388 | #define PINC do { \\r | |
389 | pfetch_prev = p; \\r | |
390 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
391 | } while (0)\r | |
392 | #define PFETCH(c) do { \\r | |
393 | c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r | |
394 | pfetch_prev = p; \\r | |
395 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
396 | } while (0)\r | |
397 | \r | |
398 | #define PINC_S do { \\r | |
399 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
400 | } while (0)\r | |
401 | #define PFETCH_S(c) do { \\r | |
402 | c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r | |
403 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
404 | } while (0)\r | |
405 | \r | |
406 | #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r | |
407 | #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r | |
408 | \r | |
409 | static UChar*\r | |
410 | strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r | |
b602265d | 411 | int capa, int oldCapa)\r |
14b0e578 CS |
412 | {\r |
413 | UChar* r;\r | |
414 | \r | |
415 | if (dest)\r | |
416 | r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r | |
417 | else\r | |
418 | r = (UChar* )xmalloc(capa + 1);\r | |
419 | \r | |
420 | CHECK_NULL_RETURN(r);\r | |
421 | onig_strcpy(r + (dest_end - dest), src, src_end);\r | |
422 | return r;\r | |
423 | }\r | |
424 | \r | |
425 | /* dest on static area */\r | |
426 | static UChar*\r | |
427 | strcat_capa_from_static(UChar* dest, UChar* dest_end,\r | |
b602265d | 428 | const UChar* src, const UChar* src_end, int capa)\r |
14b0e578 CS |
429 | {\r |
430 | UChar* r;\r | |
431 | \r | |
432 | r = (UChar* )xmalloc(capa + 1);\r | |
433 | CHECK_NULL_RETURN(r);\r | |
434 | onig_strcpy(r, dest, dest_end);\r | |
435 | onig_strcpy(r + (dest_end - dest), src, src_end);\r | |
436 | return r;\r | |
437 | }\r | |
438 | \r | |
439 | \r | |
440 | #ifdef USE_ST_LIBRARY\r | |
441 | \r | |
442 | typedef struct {\r | |
443 | UChar* s;\r | |
444 | UChar* end;\r | |
445 | } st_str_end_key;\r | |
446 | \r | |
447 | static int\r | |
448 | str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r | |
449 | {\r | |
450 | UChar *p, *q;\r | |
451 | int c;\r | |
452 | \r | |
453 | if ((x->end - x->s) != (y->end - y->s))\r | |
454 | return 1;\r | |
455 | \r | |
456 | p = x->s;\r | |
457 | q = y->s;\r | |
458 | while (p < x->end) {\r | |
459 | c = (int )*p - (int )*q;\r | |
460 | if (c != 0) return c;\r | |
461 | \r | |
462 | p++; q++;\r | |
463 | }\r | |
464 | \r | |
465 | return 0;\r | |
466 | }\r | |
467 | \r | |
468 | static int\r | |
469 | str_end_hash(st_str_end_key* x)\r | |
470 | {\r | |
471 | UChar *p;\r | |
472 | int val = 0;\r | |
473 | \r | |
474 | p = x->s;\r | |
475 | while (p < x->end) {\r | |
476 | val = val * 997 + (int )*p++;\r | |
477 | }\r | |
478 | \r | |
479 | return val + (val >> 5);\r | |
480 | }\r | |
481 | \r | |
482 | extern hash_table_type*\r | |
483 | onig_st_init_strend_table_with_size(int size)\r | |
484 | {\r | |
485 | static struct st_hash_type hashType = {\r | |
486 | str_end_cmp,\r | |
487 | str_end_hash,\r | |
488 | };\r | |
489 | \r | |
490 | return (hash_table_type* )\r | |
491 | onig_st_init_table_with_size(&hashType, size);\r | |
492 | }\r | |
493 | \r | |
494 | extern int\r | |
495 | onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r | |
b602265d | 496 | const UChar* end_key, hash_data_type *value)\r |
14b0e578 CS |
497 | {\r |
498 | st_str_end_key key;\r | |
499 | \r | |
500 | key.s = (UChar* )str_key;\r | |
501 | key.end = (UChar* )end_key;\r | |
502 | \r | |
b602265d | 503 | return onig_st_lookup(table, (st_data_t )(&key), value);\r |
14b0e578 CS |
504 | }\r |
505 | \r | |
506 | extern int\r | |
507 | onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r | |
b602265d | 508 | const UChar* end_key, hash_data_type value)\r |
14b0e578 CS |
509 | {\r |
510 | st_str_end_key* key;\r | |
511 | int result;\r | |
512 | \r | |
513 | key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r | |
b0c2b797 | 514 | CHECK_NULL_RETURN_MEMERR(key);\r |
b602265d | 515 | \r |
14b0e578 CS |
516 | key->s = (UChar* )str_key;\r |
517 | key->end = (UChar* )end_key;\r | |
b602265d | 518 | result = onig_st_insert(table, (st_data_t )key, value);\r |
14b0e578 CS |
519 | if (result) {\r |
520 | xfree(key);\r | |
521 | }\r | |
522 | return result;\r | |
523 | }\r | |
524 | \r | |
14b0e578 | 525 | \r |
b602265d DG |
526 | typedef struct {\r |
527 | OnigEncoding enc;\r | |
528 | int type; /* callout type: single or not */\r | |
529 | UChar* s;\r | |
530 | UChar* end;\r | |
531 | } st_callout_name_key;\r | |
532 | \r | |
533 | static int\r | |
534 | callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)\r | |
535 | {\r | |
536 | UChar *p, *q;\r | |
537 | int c;\r | |
538 | \r | |
539 | if (x->enc != y->enc) return 1;\r | |
540 | if (x->type != y->type) return 1;\r | |
541 | if ((x->end - x->s) != (y->end - y->s))\r | |
542 | return 1;\r | |
543 | \r | |
544 | p = x->s;\r | |
545 | q = y->s;\r | |
546 | while (p < x->end) {\r | |
547 | c = (int )*p - (int )*q;\r | |
548 | if (c != 0) return c;\r | |
549 | \r | |
550 | p++; q++;\r | |
551 | }\r | |
552 | \r | |
553 | return 0;\r | |
554 | }\r | |
555 | \r | |
556 | static int\r | |
557 | callout_name_table_hash(st_callout_name_key* x)\r | |
558 | {\r | |
559 | UChar *p;\r | |
560 | int val = 0;\r | |
561 | \r | |
562 | p = x->s;\r | |
563 | while (p < x->end) {\r | |
564 | val = val * 997 + (int )*p++;\r | |
565 | }\r | |
566 | \r | |
567 | /* use intptr_t for escape warning in Windows */\r | |
568 | return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;\r | |
569 | }\r | |
570 | \r | |
571 | extern hash_table_type*\r | |
572 | onig_st_init_callout_name_table_with_size(int size)\r | |
573 | {\r | |
574 | static struct st_hash_type hashType = {\r | |
575 | callout_name_table_cmp,\r | |
576 | callout_name_table_hash,\r | |
577 | };\r | |
578 | \r | |
579 | return (hash_table_type* )\r | |
580 | onig_st_init_table_with_size(&hashType, size);\r | |
581 | }\r | |
582 | \r | |
583 | extern int\r | |
584 | onig_st_lookup_callout_name_table(hash_table_type* table,\r | |
585 | OnigEncoding enc,\r | |
586 | int type,\r | |
587 | const UChar* str_key,\r | |
588 | const UChar* end_key,\r | |
589 | hash_data_type *value)\r | |
590 | {\r | |
591 | st_callout_name_key key;\r | |
592 | \r | |
593 | key.enc = enc;\r | |
594 | key.type = type;\r | |
595 | key.s = (UChar* )str_key;\r | |
596 | key.end = (UChar* )end_key;\r | |
597 | \r | |
598 | return onig_st_lookup(table, (st_data_t )(&key), value);\r | |
599 | }\r | |
600 | \r | |
601 | static int\r | |
602 | st_insert_callout_name_table(hash_table_type* table,\r | |
603 | OnigEncoding enc, int type,\r | |
604 | UChar* str_key, UChar* end_key,\r | |
605 | hash_data_type value)\r | |
606 | {\r | |
607 | st_callout_name_key* key;\r | |
608 | int result;\r | |
609 | \r | |
610 | key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));\r | |
611 | CHECK_NULL_RETURN_MEMERR(key);\r | |
612 | \r | |
613 | /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */\r | |
614 | key->enc = enc;\r | |
615 | key->type = type;\r | |
616 | key->s = str_key;\r | |
617 | key->end = end_key;\r | |
618 | result = onig_st_insert(table, (st_data_t )key, value);\r | |
619 | if (result) {\r | |
620 | xfree(key);\r | |
621 | }\r | |
622 | return result;\r | |
623 | }\r | |
624 | \r | |
625 | #endif /* USE_ST_LIBRARY */\r | |
14b0e578 | 626 | \r |
14b0e578 CS |
627 | \r |
628 | #define INIT_NAME_BACKREFS_ALLOC_NUM 8\r | |
629 | \r | |
630 | typedef struct {\r | |
631 | UChar* name;\r | |
632 | int name_len; /* byte length */\r | |
633 | int back_num; /* number of backrefs */\r | |
634 | int back_alloc;\r | |
635 | int back_ref1;\r | |
636 | int* back_refs;\r | |
637 | } NameEntry;\r | |
638 | \r | |
639 | #ifdef USE_ST_LIBRARY\r | |
640 | \r | |
b602265d DG |
641 | #define INIT_NAMES_ALLOC_NUM 5\r |
642 | \r | |
14b0e578 CS |
643 | typedef st_table NameTable;\r |
644 | typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r | |
645 | \r | |
646 | #define NAMEBUF_SIZE 24\r | |
647 | #define NAMEBUF_SIZE_1 25\r | |
648 | \r | |
649 | #ifdef ONIG_DEBUG\r | |
650 | static int\r | |
651 | i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r | |
652 | {\r | |
653 | int i;\r | |
654 | FILE* fp = (FILE* )arg;\r | |
655 | \r | |
656 | fprintf(fp, "%s: ", e->name);\r | |
657 | if (e->back_num == 0)\r | |
658 | fputs("-", fp);\r | |
659 | else if (e->back_num == 1)\r | |
660 | fprintf(fp, "%d", e->back_ref1);\r | |
661 | else {\r | |
662 | for (i = 0; i < e->back_num; i++) {\r | |
663 | if (i > 0) fprintf(fp, ", ");\r | |
664 | fprintf(fp, "%d", e->back_refs[i]);\r | |
665 | }\r | |
666 | }\r | |
667 | fputs("\n", fp);\r | |
668 | return ST_CONTINUE;\r | |
669 | }\r | |
670 | \r | |
671 | extern int\r | |
672 | onig_print_names(FILE* fp, regex_t* reg)\r | |
673 | {\r | |
674 | NameTable* t = (NameTable* )reg->name_table;\r | |
675 | \r | |
676 | if (IS_NOT_NULL(t)) {\r | |
677 | fprintf(fp, "name table\n");\r | |
678 | onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r | |
679 | fputs("\n", fp);\r | |
680 | }\r | |
681 | return 0;\r | |
682 | }\r | |
683 | #endif /* ONIG_DEBUG */\r | |
684 | \r | |
685 | static int\r | |
686 | i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r | |
687 | {\r | |
688 | xfree(e->name);\r | |
689 | if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r | |
690 | xfree(key);\r | |
691 | xfree(e);\r | |
692 | return ST_DELETE;\r | |
693 | }\r | |
694 | \r | |
695 | static int\r | |
696 | names_clear(regex_t* reg)\r | |
697 | {\r | |
698 | NameTable* t = (NameTable* )reg->name_table;\r | |
699 | \r | |
700 | if (IS_NOT_NULL(t)) {\r | |
701 | onig_st_foreach(t, i_free_name_entry, 0);\r | |
702 | }\r | |
703 | return 0;\r | |
704 | }\r | |
705 | \r | |
706 | extern int\r | |
707 | onig_names_free(regex_t* reg)\r | |
708 | {\r | |
709 | int r;\r | |
710 | NameTable* t;\r | |
711 | \r | |
712 | r = names_clear(reg);\r | |
b602265d | 713 | if (r != 0) return r;\r |
14b0e578 CS |
714 | \r |
715 | t = (NameTable* )reg->name_table;\r | |
716 | if (IS_NOT_NULL(t)) onig_st_free_table(t);\r | |
717 | reg->name_table = (void* )NULL;\r | |
718 | return 0;\r | |
719 | }\r | |
720 | \r | |
721 | static NameEntry*\r | |
722 | name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r | |
723 | {\r | |
724 | NameEntry* e;\r | |
725 | NameTable* t = (NameTable* )reg->name_table;\r | |
726 | \r | |
727 | e = (NameEntry* )NULL;\r | |
728 | if (IS_NOT_NULL(t)) {\r | |
729 | onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r | |
730 | }\r | |
731 | return e;\r | |
732 | }\r | |
733 | \r | |
734 | typedef struct {\r | |
735 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r | |
736 | regex_t* reg;\r | |
737 | void* arg;\r | |
738 | int ret;\r | |
739 | OnigEncoding enc;\r | |
740 | } INamesArg;\r | |
741 | \r | |
742 | static int\r | |
743 | i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r | |
744 | {\r | |
745 | int r = (*(arg->func))(e->name,\r | |
746 | e->name + e->name_len,\r | |
747 | e->back_num,\r | |
b602265d DG |
748 | (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r |
749 | arg->reg, arg->arg);\r | |
14b0e578 CS |
750 | if (r != 0) {\r |
751 | arg->ret = r;\r | |
752 | return ST_STOP;\r | |
753 | }\r | |
754 | return ST_CONTINUE;\r | |
755 | }\r | |
756 | \r | |
757 | extern int\r | |
758 | onig_foreach_name(regex_t* reg,\r | |
759 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r | |
760 | {\r | |
761 | INamesArg narg;\r | |
762 | NameTable* t = (NameTable* )reg->name_table;\r | |
763 | \r | |
764 | narg.ret = 0;\r | |
765 | if (IS_NOT_NULL(t)) {\r | |
766 | narg.func = func;\r | |
767 | narg.reg = reg;\r | |
768 | narg.arg = arg;\r | |
769 | narg.enc = reg->enc; /* should be pattern encoding. */\r | |
b602265d | 770 | onig_st_foreach(t, i_names, (HashDataType )&narg);\r |
14b0e578 CS |
771 | }\r |
772 | return narg.ret;\r | |
773 | }\r | |
774 | \r | |
775 | static int\r | |
776 | i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r | |
777 | {\r | |
778 | int i;\r | |
779 | \r | |
780 | if (e->back_num > 1) {\r | |
781 | for (i = 0; i < e->back_num; i++) {\r | |
782 | e->back_refs[i] = map[e->back_refs[i]].new_val;\r | |
783 | }\r | |
784 | }\r | |
785 | else if (e->back_num == 1) {\r | |
786 | e->back_ref1 = map[e->back_ref1].new_val;\r | |
787 | }\r | |
788 | \r | |
789 | return ST_CONTINUE;\r | |
790 | }\r | |
791 | \r | |
792 | extern int\r | |
793 | onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r | |
794 | {\r | |
795 | NameTable* t = (NameTable* )reg->name_table;\r | |
796 | \r | |
797 | if (IS_NOT_NULL(t)) {\r | |
b602265d | 798 | onig_st_foreach(t, i_renumber_name, (HashDataType )map);\r |
14b0e578 CS |
799 | }\r |
800 | return 0;\r | |
801 | }\r | |
802 | \r | |
803 | \r | |
804 | extern int\r | |
805 | onig_number_of_names(regex_t* reg)\r | |
806 | {\r | |
807 | NameTable* t = (NameTable* )reg->name_table;\r | |
808 | \r | |
809 | if (IS_NOT_NULL(t))\r | |
810 | return t->num_entries;\r | |
811 | else\r | |
812 | return 0;\r | |
813 | }\r | |
814 | \r | |
815 | #else /* USE_ST_LIBRARY */\r | |
816 | \r | |
817 | #define INIT_NAMES_ALLOC_NUM 8\r | |
818 | \r | |
819 | typedef struct {\r | |
820 | NameEntry* e;\r | |
821 | int num;\r | |
822 | int alloc;\r | |
823 | } NameTable;\r | |
824 | \r | |
825 | #ifdef ONIG_DEBUG\r | |
826 | extern int\r | |
827 | onig_print_names(FILE* fp, regex_t* reg)\r | |
828 | {\r | |
829 | int i, j;\r | |
830 | NameEntry* e;\r | |
831 | NameTable* t = (NameTable* )reg->name_table;\r | |
832 | \r | |
833 | if (IS_NOT_NULL(t) && t->num > 0) {\r | |
834 | fprintf(fp, "name table\n");\r | |
835 | for (i = 0; i < t->num; i++) {\r | |
836 | e = &(t->e[i]);\r | |
837 | fprintf(fp, "%s: ", e->name);\r | |
838 | if (e->back_num == 0) {\r | |
b602265d | 839 | fputs("-", fp);\r |
14b0e578 CS |
840 | }\r |
841 | else if (e->back_num == 1) {\r | |
b602265d | 842 | fprintf(fp, "%d", e->back_ref1);\r |
14b0e578 CS |
843 | }\r |
844 | else {\r | |
b602265d DG |
845 | for (j = 0; j < e->back_num; j++) {\r |
846 | if (j > 0) fprintf(fp, ", ");\r | |
847 | fprintf(fp, "%d", e->back_refs[j]);\r | |
848 | }\r | |
14b0e578 CS |
849 | }\r |
850 | fputs("\n", fp);\r | |
851 | }\r | |
852 | fputs("\n", fp);\r | |
853 | }\r | |
854 | return 0;\r | |
855 | }\r | |
856 | #endif\r | |
857 | \r | |
858 | static int\r | |
859 | names_clear(regex_t* reg)\r | |
860 | {\r | |
861 | int i;\r | |
862 | NameEntry* e;\r | |
863 | NameTable* t = (NameTable* )reg->name_table;\r | |
864 | \r | |
865 | if (IS_NOT_NULL(t)) {\r | |
866 | for (i = 0; i < t->num; i++) {\r | |
867 | e = &(t->e[i]);\r | |
868 | if (IS_NOT_NULL(e->name)) {\r | |
b602265d DG |
869 | xfree(e->name);\r |
870 | e->name = NULL;\r | |
871 | e->name_len = 0;\r | |
872 | e->back_num = 0;\r | |
873 | e->back_alloc = 0;\r | |
874 | if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r | |
875 | e->back_refs = (int* )NULL;\r | |
14b0e578 CS |
876 | }\r |
877 | }\r | |
878 | if (IS_NOT_NULL(t->e)) {\r | |
879 | xfree(t->e);\r | |
880 | t->e = NULL;\r | |
881 | }\r | |
882 | t->num = 0;\r | |
883 | }\r | |
884 | return 0;\r | |
885 | }\r | |
886 | \r | |
887 | extern int\r | |
888 | onig_names_free(regex_t* reg)\r | |
889 | {\r | |
890 | int r;\r | |
891 | NameTable* t;\r | |
892 | \r | |
893 | r = names_clear(reg);\r | |
b602265d | 894 | if (r != 0) return r;\r |
14b0e578 CS |
895 | \r |
896 | t = (NameTable* )reg->name_table;\r | |
897 | if (IS_NOT_NULL(t)) xfree(t);\r | |
898 | reg->name_table = NULL;\r | |
899 | return 0;\r | |
900 | }\r | |
901 | \r | |
902 | static NameEntry*\r | |
903 | name_find(regex_t* reg, UChar* name, UChar* name_end)\r | |
904 | {\r | |
905 | int i, len;\r | |
906 | NameEntry* e;\r | |
907 | NameTable* t = (NameTable* )reg->name_table;\r | |
908 | \r | |
909 | if (IS_NOT_NULL(t)) {\r | |
910 | len = name_end - name;\r | |
911 | for (i = 0; i < t->num; i++) {\r | |
912 | e = &(t->e[i]);\r | |
913 | if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r | |
b602265d | 914 | return e;\r |
14b0e578 CS |
915 | }\r |
916 | }\r | |
917 | return (NameEntry* )NULL;\r | |
918 | }\r | |
919 | \r | |
920 | extern int\r | |
921 | onig_foreach_name(regex_t* reg,\r | |
922 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r | |
923 | {\r | |
924 | int i, r;\r | |
925 | NameEntry* e;\r | |
926 | NameTable* t = (NameTable* )reg->name_table;\r | |
927 | \r | |
928 | if (IS_NOT_NULL(t)) {\r | |
929 | for (i = 0; i < t->num; i++) {\r | |
930 | e = &(t->e[i]);\r | |
931 | r = (*func)(e->name, e->name + e->name_len, e->back_num,\r | |
b602265d DG |
932 | (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r |
933 | reg, arg);\r | |
14b0e578 CS |
934 | if (r != 0) return r;\r |
935 | }\r | |
936 | }\r | |
937 | return 0;\r | |
938 | }\r | |
939 | \r | |
940 | extern int\r | |
941 | onig_number_of_names(regex_t* reg)\r | |
942 | {\r | |
943 | NameTable* t = (NameTable* )reg->name_table;\r | |
944 | \r | |
945 | if (IS_NOT_NULL(t))\r | |
946 | return t->num;\r | |
947 | else\r | |
948 | return 0;\r | |
949 | }\r | |
950 | \r | |
951 | #endif /* else USE_ST_LIBRARY */\r | |
952 | \r | |
953 | static int\r | |
954 | name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r | |
955 | {\r | |
b602265d | 956 | int r;\r |
14b0e578 CS |
957 | int alloc;\r |
958 | NameEntry* e;\r | |
959 | NameTable* t = (NameTable* )reg->name_table;\r | |
960 | \r | |
961 | if (name_end - name <= 0)\r | |
962 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
963 | \r | |
964 | e = name_find(reg, name, name_end);\r | |
965 | if (IS_NULL(e)) {\r | |
966 | #ifdef USE_ST_LIBRARY\r | |
967 | if (IS_NULL(t)) {\r | |
b602265d | 968 | t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);\r |
a5def177 | 969 | CHECK_NULL_RETURN_MEMERR(t);\r |
14b0e578 CS |
970 | reg->name_table = (void* )t;\r |
971 | }\r | |
972 | e = (NameEntry* )xmalloc(sizeof(NameEntry));\r | |
973 | CHECK_NULL_RETURN_MEMERR(e);\r | |
974 | \r | |
b602265d | 975 | e->name = onigenc_strdup(reg->enc, name, name_end);\r |
14b0e578 CS |
976 | if (IS_NULL(e->name)) {\r |
977 | xfree(e); return ONIGERR_MEMORY;\r | |
978 | }\r | |
b602265d DG |
979 | r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r |
980 | (HashDataType )e);\r | |
981 | if (r < 0) return r;\r | |
14b0e578 | 982 | \r |
b602265d | 983 | e->name_len = (int )(name_end - name);\r |
14b0e578 CS |
984 | e->back_num = 0;\r |
985 | e->back_alloc = 0;\r | |
986 | e->back_refs = (int* )NULL;\r | |
987 | \r | |
988 | #else\r | |
989 | \r | |
990 | if (IS_NULL(t)) {\r | |
991 | alloc = INIT_NAMES_ALLOC_NUM;\r | |
992 | t = (NameTable* )xmalloc(sizeof(NameTable));\r | |
993 | CHECK_NULL_RETURN_MEMERR(t);\r | |
994 | t->e = NULL;\r | |
995 | t->alloc = 0;\r | |
996 | t->num = 0;\r | |
997 | \r | |
998 | t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r | |
999 | if (IS_NULL(t->e)) {\r | |
b602265d DG |
1000 | xfree(t);\r |
1001 | return ONIGERR_MEMORY;\r | |
14b0e578 CS |
1002 | }\r |
1003 | t->alloc = alloc;\r | |
1004 | reg->name_table = t;\r | |
1005 | goto clear;\r | |
1006 | }\r | |
1007 | else if (t->num == t->alloc) {\r | |
1008 | int i;\r | |
1009 | \r | |
1010 | alloc = t->alloc * 2;\r | |
b602265d | 1011 | t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);\r |
14b0e578 CS |
1012 | CHECK_NULL_RETURN_MEMERR(t->e);\r |
1013 | t->alloc = alloc;\r | |
1014 | \r | |
1015 | clear:\r | |
1016 | for (i = t->num; i < t->alloc; i++) {\r | |
b602265d DG |
1017 | t->e[i].name = NULL;\r |
1018 | t->e[i].name_len = 0;\r | |
1019 | t->e[i].back_num = 0;\r | |
1020 | t->e[i].back_alloc = 0;\r | |
1021 | t->e[i].back_refs = (int* )NULL;\r | |
14b0e578 CS |
1022 | }\r |
1023 | }\r | |
1024 | e = &(t->e[t->num]);\r | |
1025 | t->num++;\r | |
b602265d | 1026 | e->name = onigenc_strdup(reg->enc, name, name_end);\r |
14b0e578 CS |
1027 | if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r |
1028 | e->name_len = name_end - name;\r | |
1029 | #endif\r | |
1030 | }\r | |
1031 | \r | |
1032 | if (e->back_num >= 1 &&\r | |
1033 | ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r | |
1034 | onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r | |
b602265d | 1035 | name, name_end);\r |
14b0e578 CS |
1036 | return ONIGERR_MULTIPLEX_DEFINED_NAME;\r |
1037 | }\r | |
1038 | \r | |
1039 | e->back_num++;\r | |
1040 | if (e->back_num == 1) {\r | |
1041 | e->back_ref1 = backref;\r | |
1042 | }\r | |
1043 | else {\r | |
1044 | if (e->back_num == 2) {\r | |
1045 | alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r | |
1046 | e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r | |
1047 | CHECK_NULL_RETURN_MEMERR(e->back_refs);\r | |
1048 | e->back_alloc = alloc;\r | |
1049 | e->back_refs[0] = e->back_ref1;\r | |
1050 | e->back_refs[1] = backref;\r | |
1051 | }\r | |
1052 | else {\r | |
1053 | if (e->back_num > e->back_alloc) {\r | |
b602265d DG |
1054 | alloc = e->back_alloc * 2;\r |
1055 | e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r | |
1056 | CHECK_NULL_RETURN_MEMERR(e->back_refs);\r | |
1057 | e->back_alloc = alloc;\r | |
14b0e578 CS |
1058 | }\r |
1059 | e->back_refs[e->back_num - 1] = backref;\r | |
1060 | }\r | |
1061 | }\r | |
1062 | \r | |
1063 | return 0;\r | |
1064 | }\r | |
1065 | \r | |
1066 | extern int\r | |
1067 | onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r | |
b602265d | 1068 | const UChar* name_end, int** nums)\r |
14b0e578 CS |
1069 | {\r |
1070 | NameEntry* e = name_find(reg, name, name_end);\r | |
1071 | \r | |
1072 | if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
1073 | \r | |
1074 | switch (e->back_num) {\r | |
1075 | case 0:\r | |
1076 | break;\r | |
1077 | case 1:\r | |
1078 | *nums = &(e->back_ref1);\r | |
1079 | break;\r | |
1080 | default:\r | |
1081 | *nums = e->back_refs;\r | |
1082 | break;\r | |
1083 | }\r | |
1084 | return e->back_num;\r | |
1085 | }\r | |
1086 | \r | |
1087 | extern int\r | |
1088 | onig_name_to_backref_number(regex_t* reg, const UChar* name,\r | |
b602265d | 1089 | const UChar* name_end, OnigRegion *region)\r |
14b0e578 CS |
1090 | {\r |
1091 | int i, n, *nums;\r | |
1092 | \r | |
1093 | n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r | |
1094 | if (n < 0)\r | |
1095 | return n;\r | |
1096 | else if (n == 0)\r | |
1097 | return ONIGERR_PARSER_BUG;\r | |
1098 | else if (n == 1)\r | |
1099 | return nums[0];\r | |
1100 | else {\r | |
1101 | if (IS_NOT_NULL(region)) {\r | |
1102 | for (i = n - 1; i >= 0; i--) {\r | |
b602265d DG |
1103 | if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r |
1104 | return nums[i];\r | |
14b0e578 CS |
1105 | }\r |
1106 | }\r | |
1107 | return nums[n - 1];\r | |
1108 | }\r | |
1109 | }\r | |
1110 | \r | |
14b0e578 CS |
1111 | extern int\r |
1112 | onig_noname_group_capture_is_active(regex_t* reg)\r | |
1113 | {\r | |
1114 | if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r | |
1115 | return 0;\r | |
1116 | \r | |
14b0e578 CS |
1117 | if (onig_number_of_names(reg) > 0 &&\r |
1118 | IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r | |
1119 | !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r | |
1120 | return 0;\r | |
1121 | }\r | |
14b0e578 CS |
1122 | \r |
1123 | return 1;\r | |
1124 | }\r | |
1125 | \r | |
b602265d | 1126 | #ifdef USE_CALLOUT\r |
14b0e578 | 1127 | \r |
b602265d DG |
1128 | typedef struct {\r |
1129 | OnigCalloutType type;\r | |
1130 | int in;\r | |
1131 | OnigCalloutFunc start_func;\r | |
1132 | OnigCalloutFunc end_func;\r | |
1133 | int arg_num;\r | |
1134 | int opt_arg_num;\r | |
1135 | unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
1136 | OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
1137 | UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */\r | |
1138 | } CalloutNameListEntry;\r | |
14b0e578 | 1139 | \r |
b602265d DG |
1140 | typedef struct {\r |
1141 | int n;\r | |
1142 | int alloc;\r | |
1143 | CalloutNameListEntry* v;\r | |
1144 | } CalloutNameListType;\r | |
14b0e578 | 1145 | \r |
b602265d | 1146 | static CalloutNameListType* GlobalCalloutNameList;\r |
14b0e578 CS |
1147 | \r |
1148 | static int\r | |
b602265d | 1149 | make_callout_func_list(CalloutNameListType** rs, int init_size)\r |
14b0e578 | 1150 | {\r |
b602265d DG |
1151 | CalloutNameListType* s;\r |
1152 | CalloutNameListEntry* v;\r | |
14b0e578 | 1153 | \r |
b602265d | 1154 | *rs = 0;\r |
14b0e578 | 1155 | \r |
b602265d DG |
1156 | s = xmalloc(sizeof(*s));\r |
1157 | if (IS_NULL(s)) return ONIGERR_MEMORY;\r | |
14b0e578 | 1158 | \r |
b602265d DG |
1159 | v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);\r |
1160 | if (IS_NULL(v)) {\r | |
1161 | xfree(s);\r | |
1162 | return ONIGERR_MEMORY;\r | |
14b0e578 CS |
1163 | }\r |
1164 | \r | |
b602265d DG |
1165 | s->n = 0;\r |
1166 | s->alloc = init_size;\r | |
1167 | s->v = v;\r | |
14b0e578 | 1168 | \r |
b602265d DG |
1169 | *rs = s;\r |
1170 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1171 | }\r |
1172 | \r | |
b602265d DG |
1173 | static void\r |
1174 | free_callout_func_list(CalloutNameListType* s)\r | |
1175 | {\r | |
1176 | if (IS_NOT_NULL(s)) {\r | |
1177 | if (IS_NOT_NULL(s->v)) {\r | |
1178 | int i, j;\r | |
1179 | \r | |
1180 | for (i = 0; i < s->n; i++) {\r | |
1181 | CalloutNameListEntry* e = s->v + i;\r | |
1182 | for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {\r | |
1183 | if (e->arg_types[j] == ONIG_TYPE_STRING) {\r | |
1184 | UChar* p = e->opt_defaults[j].s.start;\r | |
1185 | if (IS_NOT_NULL(p)) xfree(p);\r | |
1186 | }\r | |
1187 | }\r | |
1188 | }\r | |
1189 | xfree(s->v);\r | |
1190 | }\r | |
1191 | xfree(s);\r | |
1192 | }\r | |
1193 | }\r | |
14b0e578 | 1194 | \r |
b602265d DG |
1195 | static int\r |
1196 | callout_func_list_add(CalloutNameListType* s, int* rid)\r | |
1197 | {\r | |
1198 | if (s->n >= s->alloc) {\r | |
1199 | int new_size = s->alloc * 2;\r | |
1200 | CalloutNameListEntry* nv = (CalloutNameListEntry* )\r | |
1201 | xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);\r | |
1202 | if (IS_NULL(nv)) return ONIGERR_MEMORY;\r | |
14b0e578 | 1203 | \r |
b602265d DG |
1204 | s->alloc = new_size;\r |
1205 | s->v = nv;\r | |
1206 | }\r | |
14b0e578 | 1207 | \r |
b602265d | 1208 | *rid = s->n;\r |
14b0e578 | 1209 | \r |
b602265d DG |
1210 | xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));\r |
1211 | s->n++;\r | |
1212 | return ONIG_NORMAL;\r | |
1213 | }\r | |
14b0e578 | 1214 | \r |
14b0e578 | 1215 | \r |
b602265d DG |
1216 | typedef struct {\r |
1217 | UChar* name;\r | |
1218 | int name_len; /* byte length */\r | |
1219 | int id;\r | |
1220 | } CalloutNameEntry;\r | |
14b0e578 | 1221 | \r |
b602265d DG |
1222 | #ifdef USE_ST_LIBRARY\r |
1223 | typedef st_table CalloutNameTable;\r | |
14b0e578 | 1224 | #else\r |
b602265d DG |
1225 | typedef struct {\r |
1226 | CalloutNameEntry* e;\r | |
1227 | int num;\r | |
1228 | int alloc;\r | |
1229 | } CalloutNameTable;\r | |
14b0e578 | 1230 | #endif\r |
14b0e578 | 1231 | \r |
b602265d DG |
1232 | static CalloutNameTable* GlobalCalloutNameTable;\r |
1233 | static int CalloutNameIDCounter;\r | |
14b0e578 | 1234 | \r |
b602265d | 1235 | #ifdef USE_ST_LIBRARY\r |
14b0e578 | 1236 | \r |
b602265d DG |
1237 | static int\r |
1238 | i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,\r | |
1239 | void* arg ARG_UNUSED)\r | |
1240 | {\r | |
1241 | xfree(e->name);\r | |
1242 | /*xfree(key->s); */ /* is same as e->name */\r | |
1243 | xfree(key);\r | |
1244 | xfree(e);\r | |
1245 | return ST_DELETE;\r | |
1246 | }\r | |
14b0e578 | 1247 | \r |
b602265d DG |
1248 | static int\r |
1249 | callout_name_table_clear(CalloutNameTable* t)\r | |
1250 | {\r | |
1251 | if (IS_NOT_NULL(t)) {\r | |
1252 | onig_st_foreach(t, i_free_callout_name_entry, 0);\r | |
1253 | }\r | |
1254 | return 0;\r | |
1255 | }\r | |
14b0e578 | 1256 | \r |
b602265d DG |
1257 | static int\r |
1258 | global_callout_name_table_free(void)\r | |
1259 | {\r | |
1260 | if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r | |
1261 | int r = callout_name_table_clear(GlobalCalloutNameTable);\r | |
1262 | if (r != 0) return r;\r | |
14b0e578 | 1263 | \r |
b602265d DG |
1264 | onig_st_free_table(GlobalCalloutNameTable);\r |
1265 | GlobalCalloutNameTable = 0;\r | |
1266 | CalloutNameIDCounter = 0;\r | |
14b0e578 CS |
1267 | }\r |
1268 | \r | |
b602265d DG |
1269 | return 0;\r |
1270 | }\r | |
1271 | \r | |
1272 | static CalloutNameEntry*\r | |
1273 | callout_name_find(OnigEncoding enc, int is_not_single,\r | |
1274 | const UChar* name, const UChar* name_end)\r | |
1275 | {\r | |
1276 | int r;\r | |
1277 | CalloutNameEntry* e;\r | |
1278 | CalloutNameTable* t = GlobalCalloutNameTable;\r | |
14b0e578 | 1279 | \r |
b602265d DG |
1280 | e = (CalloutNameEntry* )NULL;\r |
1281 | if (IS_NOT_NULL(t)) {\r | |
1282 | r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r | |
1283 | (HashDataType* )((void* )(&e)));\r | |
1284 | if (r == 0) { /* not found */\r | |
1285 | if (enc != ONIG_ENCODING_ASCII &&\r | |
1286 | ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r | |
1287 | enc = ONIG_ENCODING_ASCII;\r | |
1288 | onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r | |
1289 | (HashDataType* )((void* )(&e)));\r | |
1290 | }\r | |
1291 | }\r | |
14b0e578 | 1292 | }\r |
b602265d DG |
1293 | return e;\r |
1294 | }\r | |
1295 | \r | |
14b0e578 | 1296 | #else\r |
b602265d DG |
1297 | \r |
1298 | static int\r | |
1299 | callout_name_table_clear(CalloutNameTable* t)\r | |
1300 | {\r | |
1301 | int i;\r | |
1302 | CalloutNameEntry* e;\r | |
1303 | \r | |
1304 | if (IS_NOT_NULL(t)) {\r | |
1305 | for (i = 0; i < t->num; i++) {\r | |
1306 | e = &(t->e[i]);\r | |
1307 | if (IS_NOT_NULL(e->name)) {\r | |
1308 | xfree(e->name);\r | |
1309 | e->name = NULL;\r | |
1310 | e->name_len = 0;\r | |
1311 | e->id = 0;\r | |
1312 | e->func = 0;\r | |
1313 | }\r | |
1314 | }\r | |
1315 | if (IS_NOT_NULL(t->e)) {\r | |
1316 | xfree(t->e);\r | |
1317 | t->e = NULL;\r | |
1318 | }\r | |
1319 | t->num = 0;\r | |
1320 | }\r | |
1321 | return 0;\r | |
14b0e578 CS |
1322 | }\r |
1323 | \r | |
b602265d DG |
1324 | static int\r |
1325 | global_callout_name_table_free(void)\r | |
14b0e578 | 1326 | {\r |
b602265d DG |
1327 | if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r |
1328 | int r = callout_name_table_clear(GlobalCalloutNameTable);\r | |
1329 | if (r != 0) return r;\r | |
14b0e578 | 1330 | \r |
b602265d DG |
1331 | xfree(GlobalCalloutNameTable);\r |
1332 | GlobalCalloutNameTable = 0;\r | |
1333 | CalloutNameIDCounter = 0;\r | |
14b0e578 | 1334 | }\r |
14b0e578 CS |
1335 | return 0;\r |
1336 | }\r | |
14b0e578 | 1337 | \r |
b602265d DG |
1338 | static CalloutNameEntry*\r |
1339 | callout_name_find(UChar* name, UChar* name_end)\r | |
14b0e578 | 1340 | {\r |
b602265d DG |
1341 | int i, len;\r |
1342 | CalloutNameEntry* e;\r | |
1343 | CalloutNameTable* t = Calloutnames;\r | |
14b0e578 | 1344 | \r |
b602265d DG |
1345 | if (IS_NOT_NULL(t)) {\r |
1346 | len = name_end - name;\r | |
1347 | for (i = 0; i < t->num; i++) {\r | |
1348 | e = &(t->e[i]);\r | |
1349 | if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r | |
1350 | return e;\r | |
1351 | }\r | |
14b0e578 | 1352 | }\r |
b602265d DG |
1353 | return (CalloutNameEntry* )NULL;\r |
1354 | }\r | |
1355 | \r | |
14b0e578 CS |
1356 | #endif\r |
1357 | \r | |
b602265d DG |
1358 | /* name string must be single byte char string. */\r |
1359 | static int\r | |
1360 | callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,\r | |
1361 | int is_not_single, UChar* name, UChar* name_end)\r | |
1362 | {\r | |
1363 | int r;\r | |
1364 | CalloutNameEntry* e;\r | |
1365 | CalloutNameTable* t = GlobalCalloutNameTable;\r | |
14b0e578 | 1366 | \r |
b602265d DG |
1367 | *rentry = 0;\r |
1368 | if (name_end - name <= 0)\r | |
1369 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
14b0e578 | 1370 | \r |
b602265d DG |
1371 | e = callout_name_find(enc, is_not_single, name, name_end);\r |
1372 | if (IS_NULL(e)) {\r | |
1373 | #ifdef USE_ST_LIBRARY\r | |
1374 | if (IS_NULL(t)) {\r | |
1375 | t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);\r | |
a5def177 | 1376 | CHECK_NULL_RETURN_MEMERR(t);\r |
b602265d DG |
1377 | GlobalCalloutNameTable = t;\r |
1378 | }\r | |
1379 | e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));\r | |
1380 | CHECK_NULL_RETURN_MEMERR(e);\r | |
1381 | \r | |
1382 | e->name = onigenc_strdup(enc, name, name_end);\r | |
1383 | if (IS_NULL(e->name)) {\r | |
1384 | xfree(e); return ONIGERR_MEMORY;\r | |
1385 | }\r | |
1386 | \r | |
1387 | r = st_insert_callout_name_table(t, enc, is_not_single,\r | |
1388 | e->name, (e->name + (name_end - name)),\r | |
1389 | (HashDataType )e);\r | |
1390 | if (r < 0) return r;\r | |
1391 | \r | |
1392 | #else\r | |
1393 | \r | |
1394 | int alloc;\r | |
1395 | \r | |
1396 | if (IS_NULL(t)) {\r | |
1397 | alloc = INIT_NAMES_ALLOC_NUM;\r | |
1398 | t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));\r | |
1399 | CHECK_NULL_RETURN_MEMERR(t);\r | |
1400 | t->e = NULL;\r | |
1401 | t->alloc = 0;\r | |
1402 | t->num = 0;\r | |
1403 | \r | |
1404 | t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);\r | |
1405 | if (IS_NULL(t->e)) {\r | |
1406 | xfree(t);\r | |
1407 | return ONIGERR_MEMORY;\r | |
1408 | }\r | |
1409 | t->alloc = alloc;\r | |
1410 | GlobalCalloutNameTable = t;\r | |
1411 | goto clear;\r | |
1412 | }\r | |
1413 | else if (t->num == t->alloc) {\r | |
1414 | int i;\r | |
1415 | \r | |
1416 | alloc = t->alloc * 2;\r | |
1417 | t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);\r | |
1418 | CHECK_NULL_RETURN_MEMERR(t->e);\r | |
1419 | t->alloc = alloc;\r | |
1420 | \r | |
1421 | clear:\r | |
1422 | for (i = t->num; i < t->alloc; i++) {\r | |
1423 | t->e[i].name = NULL;\r | |
1424 | t->e[i].name_len = 0;\r | |
1425 | t->e[i].id = 0;\r | |
1426 | }\r | |
1427 | }\r | |
1428 | e = &(t->e[t->num]);\r | |
1429 | t->num++;\r | |
1430 | e->name = onigenc_strdup(enc, name, name_end);\r | |
1431 | if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r | |
1432 | #endif\r | |
1433 | \r | |
1434 | CalloutNameIDCounter++;\r | |
1435 | e->id = CalloutNameIDCounter;\r | |
1436 | e->name_len = (int )(name_end - name);\r | |
1437 | }\r | |
1438 | \r | |
1439 | *rentry = e;\r | |
1440 | return e->id;\r | |
1441 | }\r | |
1442 | \r | |
1443 | static int\r | |
1444 | is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)\r | |
14b0e578 | 1445 | {\r |
b602265d DG |
1446 | UChar* p;\r |
1447 | OnigCodePoint c;\r | |
1448 | \r | |
1449 | if (name >= name_end) return 0;\r | |
1450 | \r | |
1451 | p = name;\r | |
1452 | while (p < name_end) {\r | |
1453 | c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r | |
1454 | if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))\r | |
1455 | return 0;\r | |
1456 | \r | |
1457 | if (p == name) {\r | |
1458 | if (c >= '0' && c <= '9') return 0;\r | |
1459 | }\r | |
1460 | \r | |
1461 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
1462 | }\r | |
1463 | \r | |
1464 | return 1;\r | |
14b0e578 CS |
1465 | }\r |
1466 | \r | |
b602265d DG |
1467 | static int\r |
1468 | is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)\r | |
14b0e578 | 1469 | {\r |
b602265d DG |
1470 | UChar* p;\r |
1471 | OnigCodePoint c;\r | |
14b0e578 | 1472 | \r |
b602265d DG |
1473 | if (name >= name_end) return 0;\r |
1474 | \r | |
1475 | p = name;\r | |
1476 | while (p < name_end) {\r | |
1477 | c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r | |
1478 | if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))\r | |
1479 | return 0;\r | |
1480 | \r | |
1481 | if (p == name) {\r | |
1482 | if (c >= '0' && c <= '9') return 0;\r | |
1483 | }\r | |
1484 | \r | |
1485 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
1486 | }\r | |
1487 | \r | |
1488 | return 1;\r | |
14b0e578 CS |
1489 | }\r |
1490 | \r | |
b602265d DG |
1491 | extern int\r |
1492 | onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,\r | |
1493 | UChar* name, UChar* name_end, int in,\r | |
1494 | OnigCalloutFunc start_func,\r | |
1495 | OnigCalloutFunc end_func,\r | |
1496 | int arg_num, unsigned int arg_types[],\r | |
1497 | int opt_arg_num, OnigValue opt_defaults[])\r | |
14b0e578 | 1498 | {\r |
b602265d DG |
1499 | int r;\r |
1500 | int i;\r | |
1501 | int j;\r | |
1502 | int id;\r | |
1503 | int is_not_single;\r | |
1504 | CalloutNameEntry* e;\r | |
1505 | CalloutNameListEntry* fe;\r | |
14b0e578 | 1506 | \r |
b602265d DG |
1507 | if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)\r |
1508 | return ONIGERR_INVALID_ARGUMENT;\r | |
14b0e578 | 1509 | \r |
b602265d DG |
1510 | if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)\r |
1511 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 1512 | \r |
b602265d DG |
1513 | if (opt_arg_num < 0 || opt_arg_num > arg_num)\r |
1514 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 1515 | \r |
b602265d DG |
1516 | if (start_func == 0 && end_func == 0)\r |
1517 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1518 | \r | |
1519 | if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)\r | |
1520 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1521 | \r | |
1522 | for (i = 0; i < arg_num; i++) {\r | |
1523 | unsigned int t = arg_types[i];\r | |
1524 | if (t == ONIG_TYPE_VOID)\r | |
1525 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1526 | else {\r | |
1527 | if (i >= arg_num - opt_arg_num) {\r | |
1528 | if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&\r | |
1529 | t != ONIG_TYPE_TAG)\r | |
1530 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1531 | }\r | |
1532 | else {\r | |
1533 | if (t != ONIG_TYPE_LONG) {\r | |
1534 | t = t & ~ONIG_TYPE_LONG;\r | |
1535 | if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)\r | |
1536 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1537 | }\r | |
14b0e578 CS |
1538 | }\r |
1539 | }\r | |
1540 | }\r | |
1541 | \r | |
b602265d DG |
1542 | if (! is_allowed_callout_name(enc, name, name_end)) {\r |
1543 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
14b0e578 | 1544 | }\r |
14b0e578 | 1545 | \r |
b602265d DG |
1546 | is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);\r |
1547 | id = callout_name_entry(&e, enc, is_not_single, name, name_end);\r | |
1548 | if (id < 0) return id;\r | |
14b0e578 | 1549 | \r |
b602265d DG |
1550 | r = ONIG_NORMAL;\r |
1551 | if (IS_NULL(GlobalCalloutNameList)) {\r | |
1552 | r = make_callout_func_list(&GlobalCalloutNameList, 10);\r | |
1553 | if (r != ONIG_NORMAL) return r;\r | |
1554 | }\r | |
14b0e578 | 1555 | \r |
b602265d DG |
1556 | while (id >= GlobalCalloutNameList->n) {\r |
1557 | int rid;\r | |
1558 | r = callout_func_list_add(GlobalCalloutNameList, &rid);\r | |
1559 | if (r != ONIG_NORMAL) return r;\r | |
14b0e578 CS |
1560 | }\r |
1561 | \r | |
b602265d DG |
1562 | fe = GlobalCalloutNameList->v + id;\r |
1563 | fe->type = callout_type;\r | |
1564 | fe->in = in;\r | |
1565 | fe->start_func = start_func;\r | |
1566 | fe->end_func = end_func;\r | |
1567 | fe->arg_num = arg_num;\r | |
1568 | fe->opt_arg_num = opt_arg_num;\r | |
1569 | fe->name = e->name;\r | |
14b0e578 | 1570 | \r |
b602265d DG |
1571 | for (i = 0; i < arg_num; i++) {\r |
1572 | fe->arg_types[i] = arg_types[i];\r | |
1573 | }\r | |
1574 | for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {\r | |
1575 | if (fe->arg_types[i] == ONIG_TYPE_STRING) {\r | |
1576 | OnigValue* val = opt_defaults + j;\r | |
1577 | UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);\r | |
1578 | CHECK_NULL_RETURN_MEMERR(ds);\r | |
14b0e578 | 1579 | \r |
b602265d DG |
1580 | fe->opt_defaults[i].s.start = ds;\r |
1581 | fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);\r | |
1582 | }\r | |
1583 | else {\r | |
1584 | fe->opt_defaults[i] = opt_defaults[j];\r | |
1585 | }\r | |
1586 | }\r | |
1587 | \r | |
1588 | r = id;\r | |
1589 | return r;\r | |
14b0e578 CS |
1590 | }\r |
1591 | \r | |
b602265d DG |
1592 | static int\r |
1593 | get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,\r | |
1594 | UChar* name, UChar* name_end, int* rid)\r | |
14b0e578 | 1595 | {\r |
b602265d DG |
1596 | int r;\r |
1597 | CalloutNameEntry* e;\r | |
14b0e578 | 1598 | \r |
b602265d DG |
1599 | if (! is_allowed_callout_name(enc, name, name_end)) {\r |
1600 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
1601 | }\r | |
1602 | \r | |
1603 | e = callout_name_find(enc, is_not_single, name, name_end);\r | |
1604 | if (IS_NULL(e)) {\r | |
1605 | return ONIGERR_UNDEFINED_CALLOUT_NAME;\r | |
1606 | }\r | |
1607 | \r | |
1608 | r = ONIG_NORMAL;\r | |
1609 | *rid = e->id;\r | |
1610 | \r | |
1611 | return r;\r | |
14b0e578 CS |
1612 | }\r |
1613 | \r | |
b602265d DG |
1614 | extern OnigCalloutFunc\r |
1615 | onig_get_callout_start_func(regex_t* reg, int callout_num)\r | |
14b0e578 | 1616 | {\r |
b602265d DG |
1617 | /* If used for callouts of contents, return 0. */\r |
1618 | CalloutListEntry* e;\r | |
14b0e578 | 1619 | \r |
b602265d | 1620 | e = onig_reg_callout_list_at(reg, callout_num);\r |
a5def177 | 1621 | CHECK_NULL_RETURN(e);\r |
b602265d | 1622 | return e->start_func;\r |
14b0e578 CS |
1623 | }\r |
1624 | \r | |
b602265d DG |
1625 | extern const UChar*\r |
1626 | onig_get_callout_tag_start(regex_t* reg, int callout_num)\r | |
14b0e578 | 1627 | {\r |
b602265d | 1628 | CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r |
a5def177 | 1629 | CHECK_NULL_RETURN(e);\r |
b602265d | 1630 | return e->tag_start;\r |
14b0e578 CS |
1631 | }\r |
1632 | \r | |
b602265d DG |
1633 | extern const UChar*\r |
1634 | onig_get_callout_tag_end(regex_t* reg, int callout_num)\r | |
14b0e578 | 1635 | {\r |
b602265d | 1636 | CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r |
a5def177 | 1637 | CHECK_NULL_RETURN(e);\r |
b602265d DG |
1638 | return e->tag_end;\r |
1639 | }\r | |
14b0e578 | 1640 | \r |
14b0e578 | 1641 | \r |
b602265d DG |
1642 | extern OnigCalloutType\r |
1643 | onig_get_callout_type_by_name_id(int name_id)\r | |
1644 | {\r | |
1645 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r | |
1646 | return 0;\r | |
14b0e578 | 1647 | \r |
b602265d | 1648 | return GlobalCalloutNameList->v[name_id].type;\r |
14b0e578 CS |
1649 | }\r |
1650 | \r | |
b602265d DG |
1651 | extern OnigCalloutFunc\r |
1652 | onig_get_callout_start_func_by_name_id(int name_id)\r | |
14b0e578 | 1653 | {\r |
b602265d DG |
1654 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1655 | return 0;\r | |
14b0e578 | 1656 | \r |
b602265d | 1657 | return GlobalCalloutNameList->v[name_id].start_func;\r |
14b0e578 CS |
1658 | }\r |
1659 | \r | |
b602265d DG |
1660 | extern OnigCalloutFunc\r |
1661 | onig_get_callout_end_func_by_name_id(int name_id)\r | |
14b0e578 | 1662 | {\r |
b602265d DG |
1663 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1664 | return 0;\r | |
14b0e578 | 1665 | \r |
b602265d | 1666 | return GlobalCalloutNameList->v[name_id].end_func;\r |
14b0e578 CS |
1667 | }\r |
1668 | \r | |
b602265d DG |
1669 | extern int\r |
1670 | onig_get_callout_in_by_name_id(int name_id)\r | |
14b0e578 | 1671 | {\r |
b602265d DG |
1672 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1673 | return 0;\r | |
14b0e578 | 1674 | \r |
b602265d DG |
1675 | return GlobalCalloutNameList->v[name_id].in;\r |
1676 | }\r | |
14b0e578 | 1677 | \r |
b602265d DG |
1678 | static int\r |
1679 | get_callout_arg_num_by_name_id(int name_id)\r | |
1680 | {\r | |
1681 | return GlobalCalloutNameList->v[name_id].arg_num;\r | |
1682 | }\r | |
14b0e578 | 1683 | \r |
b602265d DG |
1684 | static int\r |
1685 | get_callout_opt_arg_num_by_name_id(int name_id)\r | |
14b0e578 | 1686 | {\r |
b602265d | 1687 | return GlobalCalloutNameList->v[name_id].opt_arg_num;\r |
14b0e578 | 1688 | }\r |
14b0e578 | 1689 | \r |
b602265d DG |
1690 | static unsigned int\r |
1691 | get_callout_arg_type_by_name_id(int name_id, int index)\r | |
14b0e578 | 1692 | {\r |
b602265d | 1693 | return GlobalCalloutNameList->v[name_id].arg_types[index];\r |
14b0e578 CS |
1694 | }\r |
1695 | \r | |
b602265d DG |
1696 | static OnigValue\r |
1697 | get_callout_opt_default_by_name_id(int name_id, int index)\r | |
14b0e578 | 1698 | {\r |
b602265d | 1699 | return GlobalCalloutNameList->v[name_id].opt_defaults[index];\r |
14b0e578 CS |
1700 | }\r |
1701 | \r | |
b602265d DG |
1702 | extern UChar*\r |
1703 | onig_get_callout_name_by_name_id(int name_id)\r | |
14b0e578 | 1704 | {\r |
b602265d DG |
1705 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1706 | return 0;\r | |
1707 | \r | |
1708 | return GlobalCalloutNameList->v[name_id].name;\r | |
14b0e578 CS |
1709 | }\r |
1710 | \r | |
b602265d DG |
1711 | extern int\r |
1712 | onig_global_callout_names_free(void)\r | |
14b0e578 | 1713 | {\r |
b602265d DG |
1714 | free_callout_func_list(GlobalCalloutNameList);\r |
1715 | GlobalCalloutNameList = 0;\r | |
14b0e578 | 1716 | \r |
b602265d DG |
1717 | global_callout_name_table_free();\r |
1718 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1719 | }\r |
1720 | \r | |
14b0e578 | 1721 | \r |
b602265d DG |
1722 | typedef st_table CalloutTagTable;\r |
1723 | typedef intptr_t CalloutTagVal;\r | |
14b0e578 | 1724 | \r |
b602265d | 1725 | #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)\r |
14b0e578 | 1726 | \r |
b602265d DG |
1727 | static int\r |
1728 | i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)\r | |
1729 | {\r | |
1730 | int num;\r | |
1731 | RegexExt* ext = (RegexExt* )arg;\r | |
14b0e578 | 1732 | \r |
b602265d DG |
1733 | num = (int )e - 1;\r |
1734 | ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;\r | |
1735 | return ST_CONTINUE;\r | |
1736 | }\r | |
14b0e578 | 1737 | \r |
b602265d DG |
1738 | static int\r |
1739 | setup_ext_callout_list_values(regex_t* reg)\r | |
1740 | {\r | |
1741 | int i, j;\r | |
1742 | RegexExt* ext;\r | |
1743 | \r | |
1744 | ext = REG_EXTP(reg);\r | |
1745 | if (IS_NOT_NULL(ext->tag_table)) {\r | |
1746 | onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,\r | |
1747 | (st_data_t )ext);\r | |
1748 | }\r | |
1749 | \r | |
1750 | for (i = 0; i < ext->callout_num; i++) {\r | |
1751 | CalloutListEntry* e = ext->callout_list + i;\r | |
1752 | if (e->of == ONIG_CALLOUT_OF_NAME) {\r | |
1753 | for (j = 0; j < e->u.arg.num; j++) {\r | |
1754 | if (e->u.arg.types[j] == ONIG_TYPE_TAG) {\r | |
1755 | UChar* start;\r | |
1756 | UChar* end;\r | |
1757 | int num;\r | |
1758 | start = e->u.arg.vals[j].s.start;\r | |
1759 | end = e->u.arg.vals[j].s.end;\r | |
1760 | num = onig_get_callout_num_by_tag(reg, start, end);\r | |
1761 | if (num < 0) return num;\r | |
1762 | e->u.arg.vals[j].tag = num;\r | |
1763 | }\r | |
14b0e578 CS |
1764 | }\r |
1765 | }\r | |
14b0e578 CS |
1766 | }\r |
1767 | \r | |
b602265d | 1768 | return ONIG_NORMAL;\r |
14b0e578 CS |
1769 | }\r |
1770 | \r | |
1771 | extern int\r | |
b602265d | 1772 | onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)\r |
14b0e578 | 1773 | {\r |
b602265d | 1774 | RegexExt* ext = REG_EXTP(reg);\r |
14b0e578 | 1775 | \r |
b602265d DG |
1776 | if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;\r |
1777 | if (callout_num > ext->callout_num) return 0;\r | |
14b0e578 | 1778 | \r |
b602265d DG |
1779 | return (ext->callout_list[callout_num].flag &\r |
1780 | CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;\r | |
14b0e578 CS |
1781 | }\r |
1782 | \r | |
b602265d DG |
1783 | static int\r |
1784 | i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)\r | |
14b0e578 | 1785 | {\r |
b602265d DG |
1786 | xfree(key);\r |
1787 | return ST_DELETE;\r | |
14b0e578 CS |
1788 | }\r |
1789 | \r | |
b602265d DG |
1790 | static int\r |
1791 | callout_tag_table_clear(CalloutTagTable* t)\r | |
14b0e578 | 1792 | {\r |
b602265d DG |
1793 | if (IS_NOT_NULL(t)) {\r |
1794 | onig_st_foreach(t, i_free_callout_tag_entry, 0);\r | |
14b0e578 | 1795 | }\r |
b602265d | 1796 | return 0;\r |
14b0e578 CS |
1797 | }\r |
1798 | \r | |
b602265d DG |
1799 | extern int\r |
1800 | onig_callout_tag_table_free(void* table)\r | |
14b0e578 | 1801 | {\r |
b602265d | 1802 | CalloutTagTable* t = (CalloutTagTable* )table;\r |
14b0e578 | 1803 | \r |
b602265d DG |
1804 | if (IS_NOT_NULL(t)) {\r |
1805 | int r = callout_tag_table_clear(t);\r | |
1806 | if (r != 0) return r;\r | |
14b0e578 | 1807 | \r |
b602265d DG |
1808 | onig_st_free_table(t);\r |
1809 | }\r | |
14b0e578 | 1810 | \r |
b602265d | 1811 | return 0;\r |
14b0e578 CS |
1812 | }\r |
1813 | \r | |
b602265d DG |
1814 | extern int\r |
1815 | onig_get_callout_num_by_tag(regex_t* reg,\r | |
1816 | const UChar* tag, const UChar* tag_end)\r | |
14b0e578 | 1817 | {\r |
b602265d DG |
1818 | int r;\r |
1819 | RegexExt* ext;\r | |
1820 | CalloutTagVal e;\r | |
14b0e578 | 1821 | \r |
b602265d DG |
1822 | ext = REG_EXTP(reg);\r |
1823 | if (IS_NULL(ext) || IS_NULL(ext->tag_table))\r | |
1824 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 1825 | \r |
b602265d DG |
1826 | r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,\r |
1827 | (HashDataType* )((void* )(&e)));\r | |
1828 | if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
1829 | return (int )e;\r | |
14b0e578 CS |
1830 | }\r |
1831 | \r | |
b602265d DG |
1832 | static CalloutTagVal\r |
1833 | callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)\r | |
14b0e578 | 1834 | {\r |
b602265d | 1835 | CalloutTagVal e;\r |
14b0e578 | 1836 | \r |
b602265d DG |
1837 | e = -1;\r |
1838 | if (IS_NOT_NULL(t)) {\r | |
1839 | onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r | |
14b0e578 | 1840 | }\r |
b602265d | 1841 | return e;\r |
14b0e578 CS |
1842 | }\r |
1843 | \r | |
1844 | static int\r | |
b602265d | 1845 | callout_tag_table_new(CalloutTagTable** rt)\r |
14b0e578 | 1846 | {\r |
b602265d DG |
1847 | CalloutTagTable* t;\r |
1848 | \r | |
1849 | *rt = 0;\r | |
1850 | t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);\r | |
1851 | CHECK_NULL_RETURN_MEMERR(t);\r | |
1852 | \r | |
1853 | *rt = t;\r | |
1854 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1855 | }\r |
1856 | \r | |
14b0e578 | 1857 | static int\r |
b602265d DG |
1858 | callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,\r |
1859 | CalloutTagVal entry_val)\r | |
14b0e578 | 1860 | {\r |
b602265d DG |
1861 | int r;\r |
1862 | CalloutTagVal val;\r | |
14b0e578 | 1863 | \r |
b602265d DG |
1864 | if (name_end - name <= 0)\r |
1865 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 1866 | \r |
b602265d DG |
1867 | val = callout_tag_find(t, name, name_end);\r |
1868 | if (val >= 0)\r | |
1869 | return ONIGERR_MULTIPLEX_DEFINED_NAME;\r | |
14b0e578 | 1870 | \r |
b602265d DG |
1871 | r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);\r |
1872 | if (r < 0) return r;\r | |
14b0e578 | 1873 | \r |
b602265d | 1874 | return ONIG_NORMAL;\r |
14b0e578 CS |
1875 | }\r |
1876 | \r | |
1877 | static int\r | |
b602265d | 1878 | ext_ensure_tag_table(regex_t* reg)\r |
14b0e578 | 1879 | {\r |
b602265d DG |
1880 | int r;\r |
1881 | RegexExt* ext;\r | |
1882 | CalloutTagTable* t;\r | |
14b0e578 | 1883 | \r |
b602265d DG |
1884 | ext = onig_get_regex_ext(reg);\r |
1885 | CHECK_NULL_RETURN_MEMERR(ext);\r | |
14b0e578 | 1886 | \r |
b602265d DG |
1887 | if (IS_NULL(ext->tag_table)) {\r |
1888 | r = callout_tag_table_new(&t);\r | |
1889 | if (r != ONIG_NORMAL) return r;\r | |
1890 | \r | |
1891 | ext->tag_table = t;\r | |
14b0e578 | 1892 | }\r |
b602265d DG |
1893 | \r |
1894 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1895 | }\r |
1896 | \r | |
1897 | static int\r | |
b602265d DG |
1898 | callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,\r |
1899 | CalloutTagVal entry_val)\r | |
14b0e578 | 1900 | {\r |
b602265d DG |
1901 | int r;\r |
1902 | RegexExt* ext;\r | |
1903 | CalloutListEntry* e;\r | |
14b0e578 | 1904 | \r |
b602265d DG |
1905 | r = ext_ensure_tag_table(reg);\r |
1906 | if (r != ONIG_NORMAL) return r;\r | |
14b0e578 | 1907 | \r |
b602265d DG |
1908 | ext = onig_get_regex_ext(reg);\r |
1909 | r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);\r | |
14b0e578 | 1910 | \r |
b602265d | 1911 | e = onig_reg_callout_list_at(reg, (int )entry_val);\r |
a5def177 | 1912 | CHECK_NULL_RETURN_MEMERR(e);\r |
b602265d DG |
1913 | e->tag_start = name;\r |
1914 | e->tag_end = name_end;\r | |
14b0e578 | 1915 | \r |
b602265d DG |
1916 | return r;\r |
1917 | }\r | |
14b0e578 | 1918 | \r |
b602265d | 1919 | #endif /* USE_CALLOUT */\r |
14b0e578 | 1920 | \r |
14b0e578 | 1921 | \r |
b602265d | 1922 | #define INIT_SCANENV_MEMENV_ALLOC_SIZE 16\r |
14b0e578 | 1923 | \r |
b602265d DG |
1924 | static void\r |
1925 | scan_env_clear(ScanEnv* env)\r | |
14b0e578 | 1926 | {\r |
b602265d DG |
1927 | MEM_STATUS_CLEAR(env->capture_history);\r |
1928 | MEM_STATUS_CLEAR(env->bt_mem_start);\r | |
1929 | MEM_STATUS_CLEAR(env->bt_mem_end);\r | |
1930 | MEM_STATUS_CLEAR(env->backrefed_mem);\r | |
1931 | env->error = (UChar* )NULL;\r | |
1932 | env->error_end = (UChar* )NULL;\r | |
1933 | env->num_call = 0;\r | |
14b0e578 | 1934 | \r |
b602265d DG |
1935 | #ifdef USE_CALL\r |
1936 | env->unset_addr_list = NULL;\r | |
1937 | env->has_call_zero = 0;\r | |
1938 | #endif\r | |
14b0e578 | 1939 | \r |
b602265d DG |
1940 | env->num_mem = 0;\r |
1941 | env->num_named = 0;\r | |
1942 | env->mem_alloc = 0;\r | |
1943 | env->mem_env_dynamic = (MemEnv* )NULL;\r | |
14b0e578 | 1944 | \r |
b602265d | 1945 | xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));\r |
14b0e578 | 1946 | \r |
b602265d DG |
1947 | env->parse_depth = 0;\r |
1948 | env->keep_num = 0;\r | |
1949 | env->save_num = 0;\r | |
1950 | env->save_alloc_num = 0;\r | |
1951 | env->saves = 0;\r | |
1952 | }\r | |
14b0e578 | 1953 | \r |
b602265d DG |
1954 | static int\r |
1955 | scan_env_add_mem_entry(ScanEnv* env)\r | |
1956 | {\r | |
1957 | int i, need, alloc;\r | |
1958 | MemEnv* p;\r | |
14b0e578 | 1959 | \r |
b602265d DG |
1960 | need = env->num_mem + 1;\r |
1961 | if (need > MaxCaptureNum && MaxCaptureNum != 0)\r | |
1962 | return ONIGERR_TOO_MANY_CAPTURES;\r | |
14b0e578 | 1963 | \r |
b602265d DG |
1964 | if (need >= SCANENV_MEMENV_SIZE) {\r |
1965 | if (env->mem_alloc <= need) {\r | |
1966 | if (IS_NULL(env->mem_env_dynamic)) {\r | |
1967 | alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;\r | |
1968 | p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);\r | |
1969 | CHECK_NULL_RETURN_MEMERR(p);\r | |
1970 | xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));\r | |
1971 | }\r | |
1972 | else {\r | |
1973 | alloc = env->mem_alloc * 2;\r | |
1974 | p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);\r | |
1975 | CHECK_NULL_RETURN_MEMERR(p);\r | |
1976 | }\r | |
14b0e578 | 1977 | \r |
b602265d DG |
1978 | for (i = env->num_mem + 1; i < alloc; i++) {\r |
1979 | p[i].node = NULL_NODE;\r | |
1980 | #if 0\r | |
1981 | p[i].in = 0;\r | |
1982 | p[i].recursion = 0;\r | |
1983 | #endif\r | |
1984 | }\r | |
1985 | \r | |
1986 | env->mem_env_dynamic = p;\r | |
1987 | env->mem_alloc = alloc;\r | |
14b0e578 CS |
1988 | }\r |
1989 | }\r | |
1990 | \r | |
b602265d DG |
1991 | env->num_mem++;\r |
1992 | return env->num_mem;\r | |
14b0e578 CS |
1993 | }\r |
1994 | \r | |
1995 | static int\r | |
b602265d | 1996 | scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r |
14b0e578 | 1997 | {\r |
b602265d DG |
1998 | if (env->num_mem >= num)\r |
1999 | SCANENV_MEMENV(env)[num].node = node;\r | |
2000 | else\r | |
2001 | return ONIGERR_PARSER_BUG;\r | |
2002 | return 0;\r | |
14b0e578 CS |
2003 | }\r |
2004 | \r | |
b602265d DG |
2005 | extern void\r |
2006 | onig_node_free(Node* node)\r | |
14b0e578 | 2007 | {\r |
b602265d DG |
2008 | start:\r |
2009 | if (IS_NULL(node)) return ;\r | |
14b0e578 | 2010 | \r |
b602265d DG |
2011 | #ifdef DEBUG_NODE_FREE\r |
2012 | fprintf(stderr, "onig_node_free: %p\n", node);\r | |
2013 | #endif\r | |
14b0e578 | 2014 | \r |
b602265d DG |
2015 | switch (NODE_TYPE(node)) {\r |
2016 | case NODE_STRING:\r | |
2017 | if (STR_(node)->capa != 0 &&\r | |
2018 | IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r | |
2019 | xfree(STR_(node)->s);\r | |
2020 | }\r | |
2021 | break;\r | |
14b0e578 | 2022 | \r |
b602265d DG |
2023 | case NODE_LIST:\r |
2024 | case NODE_ALT:\r | |
2025 | onig_node_free(NODE_CAR(node));\r | |
2026 | {\r | |
2027 | Node* next_node = NODE_CDR(node);\r | |
2028 | \r | |
2029 | xfree(node);\r | |
2030 | node = next_node;\r | |
2031 | goto start;\r | |
14b0e578 | 2032 | }\r |
b602265d | 2033 | break;\r |
14b0e578 | 2034 | \r |
b602265d DG |
2035 | case NODE_CCLASS:\r |
2036 | {\r | |
2037 | CClassNode* cc = CCLASS_(node);\r | |
14b0e578 | 2038 | \r |
b602265d DG |
2039 | if (cc->mbuf)\r |
2040 | bbuf_free(cc->mbuf);\r | |
2041 | }\r | |
2042 | break;\r | |
14b0e578 | 2043 | \r |
b602265d DG |
2044 | case NODE_BACKREF:\r |
2045 | if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))\r | |
2046 | xfree(BACKREF_(node)->back_dynamic);\r | |
2047 | break;\r | |
14b0e578 | 2048 | \r |
b602265d DG |
2049 | case NODE_ENCLOSURE:\r |
2050 | if (NODE_BODY(node))\r | |
2051 | onig_node_free(NODE_BODY(node));\r | |
14b0e578 | 2052 | \r |
b602265d DG |
2053 | {\r |
2054 | EnclosureNode* en = ENCLOSURE_(node);\r | |
2055 | if (en->type == ENCLOSURE_IF_ELSE) {\r | |
2056 | onig_node_free(en->te.Then);\r | |
2057 | onig_node_free(en->te.Else);\r | |
14b0e578 CS |
2058 | }\r |
2059 | }\r | |
b602265d | 2060 | break;\r |
14b0e578 | 2061 | \r |
b602265d DG |
2062 | case NODE_QUANT:\r |
2063 | case NODE_ANCHOR:\r | |
2064 | if (NODE_BODY(node))\r | |
2065 | onig_node_free(NODE_BODY(node));\r | |
2066 | break;\r | |
14b0e578 | 2067 | \r |
b602265d DG |
2068 | case NODE_CTYPE:\r |
2069 | case NODE_CALL:\r | |
2070 | case NODE_GIMMICK:\r | |
2071 | break;\r | |
14b0e578 | 2072 | }\r |
14b0e578 | 2073 | \r |
b602265d | 2074 | xfree(node);\r |
14b0e578 CS |
2075 | }\r |
2076 | \r | |
b602265d DG |
2077 | static void\r |
2078 | cons_node_free_alone(Node* node)\r | |
14b0e578 | 2079 | {\r |
b602265d DG |
2080 | NODE_CAR(node) = 0;\r |
2081 | NODE_CDR(node) = 0;\r | |
2082 | onig_node_free(node);\r | |
14b0e578 CS |
2083 | }\r |
2084 | \r | |
b602265d DG |
2085 | static Node*\r |
2086 | node_new(void)\r | |
14b0e578 | 2087 | {\r |
b602265d | 2088 | Node* node;\r |
14b0e578 | 2089 | \r |
b602265d DG |
2090 | node = (Node* )xmalloc(sizeof(Node));\r |
2091 | xmemset(node, 0, sizeof(*node));\r | |
14b0e578 | 2092 | \r |
b602265d DG |
2093 | #ifdef DEBUG_NODE_FREE\r |
2094 | fprintf(stderr, "node_new: %p\n", node);\r | |
2095 | #endif\r | |
2096 | return node;\r | |
2097 | }\r | |
14b0e578 | 2098 | \r |
14b0e578 | 2099 | \r |
b602265d DG |
2100 | static void\r |
2101 | initialize_cclass(CClassNode* cc)\r | |
2102 | {\r | |
2103 | BITSET_CLEAR(cc->bs);\r | |
2104 | cc->flags = 0;\r | |
2105 | cc->mbuf = NULL;\r | |
2106 | }\r | |
2107 | \r | |
2108 | static Node*\r | |
2109 | node_new_cclass(void)\r | |
2110 | {\r | |
2111 | Node* node = node_new();\r | |
2112 | CHECK_NULL_RETURN(node);\r | |
2113 | \r | |
2114 | NODE_SET_TYPE(node, NODE_CCLASS);\r | |
2115 | initialize_cclass(CCLASS_(node));\r | |
2116 | return node;\r | |
2117 | }\r | |
2118 | \r | |
2119 | static Node*\r | |
2120 | node_new_ctype(int type, int not, OnigOptionType options)\r | |
2121 | {\r | |
2122 | Node* node = node_new();\r | |
2123 | CHECK_NULL_RETURN(node);\r | |
2124 | \r | |
2125 | NODE_SET_TYPE(node, NODE_CTYPE);\r | |
2126 | CTYPE_(node)->ctype = type;\r | |
2127 | CTYPE_(node)->not = not;\r | |
2128 | CTYPE_(node)->options = options;\r | |
2129 | CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);\r | |
2130 | return node;\r | |
2131 | }\r | |
2132 | \r | |
2133 | static Node*\r | |
2134 | node_new_anychar(void)\r | |
2135 | {\r | |
2136 | Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);\r | |
2137 | return node;\r | |
2138 | }\r | |
2139 | \r | |
2140 | static Node*\r | |
2141 | node_new_anychar_with_fixed_option(OnigOptionType option)\r | |
2142 | {\r | |
2143 | CtypeNode* ct;\r | |
2144 | Node* node;\r | |
2145 | \r | |
2146 | node = node_new_anychar();\r | |
a5def177 DG |
2147 | CHECK_NULL_RETURN(node);\r |
2148 | \r | |
b602265d DG |
2149 | ct = CTYPE_(node);\r |
2150 | ct->options = option;\r | |
2151 | NODE_STATUS_ADD(node, FIXED_OPTION);\r | |
2152 | return node;\r | |
2153 | }\r | |
2154 | \r | |
2155 | static int\r | |
2156 | node_new_no_newline(Node** node, ScanEnv* env)\r | |
2157 | {\r | |
2158 | Node* n;\r | |
2159 | \r | |
2160 | n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);\r | |
2161 | CHECK_NULL_RETURN_MEMERR(n);\r | |
2162 | *node = n;\r | |
2163 | return 0;\r | |
2164 | }\r | |
2165 | \r | |
2166 | static int\r | |
2167 | node_new_true_anychar(Node** node, ScanEnv* env)\r | |
2168 | {\r | |
2169 | Node* n;\r | |
2170 | \r | |
2171 | n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);\r | |
2172 | CHECK_NULL_RETURN_MEMERR(n);\r | |
2173 | *node = n;\r | |
2174 | return 0;\r | |
2175 | }\r | |
2176 | \r | |
2177 | static Node*\r | |
2178 | node_new_list(Node* left, Node* right)\r | |
2179 | {\r | |
2180 | Node* node = node_new();\r | |
2181 | CHECK_NULL_RETURN(node);\r | |
2182 | \r | |
2183 | NODE_SET_TYPE(node, NODE_LIST);\r | |
2184 | NODE_CAR(node) = left;\r | |
2185 | NODE_CDR(node) = right;\r | |
2186 | return node;\r | |
2187 | }\r | |
2188 | \r | |
2189 | extern Node*\r | |
2190 | onig_node_new_list(Node* left, Node* right)\r | |
2191 | {\r | |
2192 | return node_new_list(left, right);\r | |
2193 | }\r | |
2194 | \r | |
2195 | extern Node*\r | |
2196 | onig_node_list_add(Node* list, Node* x)\r | |
2197 | {\r | |
2198 | Node *n;\r | |
2199 | \r | |
2200 | n = onig_node_new_list(x, NULL);\r | |
2201 | if (IS_NULL(n)) return NULL_NODE;\r | |
2202 | \r | |
2203 | if (IS_NOT_NULL(list)) {\r | |
2204 | while (IS_NOT_NULL(NODE_CDR(list)))\r | |
2205 | list = NODE_CDR(list);\r | |
2206 | \r | |
2207 | NODE_CDR(list) = n;\r | |
2208 | }\r | |
2209 | \r | |
2210 | return n;\r | |
2211 | }\r | |
2212 | \r | |
2213 | extern Node*\r | |
2214 | onig_node_new_alt(Node* left, Node* right)\r | |
2215 | {\r | |
2216 | Node* node = node_new();\r | |
2217 | CHECK_NULL_RETURN(node);\r | |
2218 | \r | |
2219 | NODE_SET_TYPE(node, NODE_ALT);\r | |
2220 | NODE_CAR(node) = left;\r | |
2221 | NODE_CDR(node) = right;\r | |
2222 | return node;\r | |
2223 | }\r | |
2224 | \r | |
2225 | static Node*\r | |
2226 | make_list_or_alt(NodeType type, int n, Node* ns[])\r | |
2227 | {\r | |
2228 | Node* r;\r | |
2229 | \r | |
2230 | if (n <= 0) return NULL_NODE;\r | |
2231 | \r | |
2232 | if (n == 1) {\r | |
2233 | r = node_new();\r | |
2234 | CHECK_NULL_RETURN(r);\r | |
2235 | NODE_SET_TYPE(r, type);\r | |
2236 | NODE_CAR(r) = ns[0];\r | |
2237 | NODE_CDR(r) = NULL_NODE;\r | |
2238 | }\r | |
2239 | else {\r | |
2240 | Node* right;\r | |
2241 | \r | |
2242 | r = node_new();\r | |
2243 | CHECK_NULL_RETURN(r);\r | |
2244 | \r | |
2245 | right = make_list_or_alt(type, n - 1, ns + 1);\r | |
2246 | if (IS_NULL(right)) {\r | |
2247 | onig_node_free(r);\r | |
2248 | return NULL_NODE;\r | |
2249 | }\r | |
2250 | \r | |
2251 | NODE_SET_TYPE(r, type);\r | |
2252 | NODE_CAR(r) = ns[0];\r | |
2253 | NODE_CDR(r) = right;\r | |
2254 | }\r | |
2255 | \r | |
2256 | return r;\r | |
2257 | }\r | |
2258 | \r | |
2259 | static Node*\r | |
2260 | make_list(int n, Node* ns[])\r | |
2261 | {\r | |
2262 | return make_list_or_alt(NODE_LIST, n, ns);\r | |
2263 | }\r | |
2264 | \r | |
2265 | static Node*\r | |
2266 | make_alt(int n, Node* ns[])\r | |
2267 | {\r | |
2268 | return make_list_or_alt(NODE_ALT, n, ns);\r | |
2269 | }\r | |
2270 | \r | |
2271 | extern Node*\r | |
2272 | onig_node_new_anchor(int type, int ascii_mode)\r | |
2273 | {\r | |
2274 | Node* node = node_new();\r | |
2275 | CHECK_NULL_RETURN(node);\r | |
2276 | \r | |
2277 | NODE_SET_TYPE(node, NODE_ANCHOR);\r | |
2278 | ANCHOR_(node)->type = type;\r | |
2279 | ANCHOR_(node)->char_len = -1;\r | |
2280 | ANCHOR_(node)->ascii_mode = ascii_mode;\r | |
2281 | return node;\r | |
2282 | }\r | |
2283 | \r | |
2284 | static Node*\r | |
2285 | node_new_backref(int back_num, int* backrefs, int by_name,\r | |
2286 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2287 | int exist_level, int nest_level,\r | |
2288 | #endif\r | |
2289 | ScanEnv* env)\r | |
2290 | {\r | |
2291 | int i;\r | |
2292 | Node* node = node_new();\r | |
2293 | \r | |
2294 | CHECK_NULL_RETURN(node);\r | |
2295 | \r | |
2296 | NODE_SET_TYPE(node, NODE_BACKREF);\r | |
2297 | BACKREF_(node)->back_num = back_num;\r | |
2298 | BACKREF_(node)->back_dynamic = (int* )NULL;\r | |
2299 | if (by_name != 0)\r | |
2300 | NODE_STATUS_ADD(node, BY_NAME);\r | |
2301 | \r | |
2302 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2303 | if (exist_level != 0) {\r | |
2304 | NODE_STATUS_ADD(node, NEST_LEVEL);\r | |
2305 | BACKREF_(node)->nest_level = nest_level;\r | |
2306 | }\r | |
2307 | #endif\r | |
2308 | \r | |
2309 | for (i = 0; i < back_num; i++) {\r | |
2310 | if (backrefs[i] <= env->num_mem &&\r | |
2311 | IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {\r | |
2312 | NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */\r | |
2313 | break;\r | |
2314 | }\r | |
2315 | }\r | |
2316 | \r | |
2317 | if (back_num <= NODE_BACKREFS_SIZE) {\r | |
2318 | for (i = 0; i < back_num; i++)\r | |
2319 | BACKREF_(node)->back_static[i] = backrefs[i];\r | |
2320 | }\r | |
2321 | else {\r | |
2322 | int* p = (int* )xmalloc(sizeof(int) * back_num);\r | |
2323 | if (IS_NULL(p)) {\r | |
2324 | onig_node_free(node);\r | |
2325 | return NULL;\r | |
2326 | }\r | |
2327 | BACKREF_(node)->back_dynamic = p;\r | |
2328 | for (i = 0; i < back_num; i++)\r | |
2329 | p[i] = backrefs[i];\r | |
2330 | }\r | |
2331 | return node;\r | |
2332 | }\r | |
2333 | \r | |
2334 | static Node*\r | |
2335 | node_new_backref_checker(int back_num, int* backrefs, int by_name,\r | |
2336 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2337 | int exist_level, int nest_level,\r | |
2338 | #endif\r | |
2339 | ScanEnv* env)\r | |
2340 | {\r | |
2341 | Node* node;\r | |
2342 | \r | |
2343 | node = node_new_backref(back_num, backrefs, by_name,\r | |
2344 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2345 | exist_level, nest_level,\r | |
2346 | #endif\r | |
2347 | env);\r | |
2348 | CHECK_NULL_RETURN(node);\r | |
2349 | \r | |
2350 | NODE_STATUS_ADD(node, CHECKER);\r | |
2351 | return node;\r | |
2352 | }\r | |
2353 | \r | |
2354 | #ifdef USE_CALL\r | |
2355 | static Node*\r | |
2356 | node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)\r | |
2357 | {\r | |
2358 | Node* node = node_new();\r | |
2359 | CHECK_NULL_RETURN(node);\r | |
2360 | \r | |
2361 | NODE_SET_TYPE(node, NODE_CALL);\r | |
2362 | CALL_(node)->by_number = by_number;\r | |
2363 | CALL_(node)->name = name;\r | |
2364 | CALL_(node)->name_end = name_end;\r | |
2365 | CALL_(node)->group_num = gnum;\r | |
2366 | CALL_(node)->entry_count = 1;\r | |
2367 | return node;\r | |
2368 | }\r | |
2369 | #endif\r | |
2370 | \r | |
2371 | static Node*\r | |
2372 | node_new_quantifier(int lower, int upper, int by_number)\r | |
2373 | {\r | |
2374 | Node* node = node_new();\r | |
2375 | CHECK_NULL_RETURN(node);\r | |
2376 | \r | |
2377 | NODE_SET_TYPE(node, NODE_QUANT);\r | |
2378 | QUANT_(node)->lower = lower;\r | |
2379 | QUANT_(node)->upper = upper;\r | |
2380 | QUANT_(node)->greedy = 1;\r | |
2381 | QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;\r | |
2382 | QUANT_(node)->head_exact = NULL_NODE;\r | |
2383 | QUANT_(node)->next_head_exact = NULL_NODE;\r | |
2384 | QUANT_(node)->is_refered = 0;\r | |
2385 | if (by_number != 0)\r | |
2386 | NODE_STATUS_ADD(node, BY_NUMBER);\r | |
2387 | \r | |
2388 | return node;\r | |
2389 | }\r | |
2390 | \r | |
2391 | static Node*\r | |
2392 | node_new_enclosure(enum EnclosureType type)\r | |
2393 | {\r | |
2394 | Node* node = node_new();\r | |
2395 | CHECK_NULL_RETURN(node);\r | |
2396 | \r | |
2397 | NODE_SET_TYPE(node, NODE_ENCLOSURE);\r | |
2398 | ENCLOSURE_(node)->type = type;\r | |
2399 | \r | |
2400 | switch (type) {\r | |
2401 | case ENCLOSURE_MEMORY:\r | |
2402 | ENCLOSURE_(node)->m.regnum = 0;\r | |
2403 | ENCLOSURE_(node)->m.called_addr = -1;\r | |
2404 | ENCLOSURE_(node)->m.entry_count = 1;\r | |
2405 | ENCLOSURE_(node)->m.called_state = 0;\r | |
2406 | break;\r | |
2407 | \r | |
2408 | case ENCLOSURE_OPTION:\r | |
2409 | ENCLOSURE_(node)->o.options = 0;\r | |
2410 | break;\r | |
2411 | \r | |
2412 | case ENCLOSURE_STOP_BACKTRACK:\r | |
2413 | break;\r | |
2414 | \r | |
2415 | case ENCLOSURE_IF_ELSE:\r | |
2416 | ENCLOSURE_(node)->te.Then = 0;\r | |
2417 | ENCLOSURE_(node)->te.Else = 0;\r | |
2418 | break;\r | |
2419 | }\r | |
2420 | \r | |
2421 | ENCLOSURE_(node)->opt_count = 0;\r | |
2422 | return node;\r | |
2423 | }\r | |
2424 | \r | |
2425 | extern Node*\r | |
2426 | onig_node_new_enclosure(int type)\r | |
2427 | {\r | |
2428 | return node_new_enclosure(type);\r | |
2429 | }\r | |
2430 | \r | |
2431 | static Node*\r | |
2432 | node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)\r | |
2433 | {\r | |
2434 | Node* n;\r | |
2435 | n = node_new_enclosure(ENCLOSURE_IF_ELSE);\r | |
2436 | CHECK_NULL_RETURN(n);\r | |
2437 | \r | |
2438 | NODE_BODY(n) = cond;\r | |
2439 | ENCLOSURE_(n)->te.Then = Then;\r | |
2440 | ENCLOSURE_(n)->te.Else = Else;\r | |
2441 | return n;\r | |
2442 | }\r | |
2443 | \r | |
2444 | static Node*\r | |
2445 | node_new_memory(int is_named)\r | |
2446 | {\r | |
2447 | Node* node = node_new_enclosure(ENCLOSURE_MEMORY);\r | |
2448 | CHECK_NULL_RETURN(node);\r | |
2449 | if (is_named != 0)\r | |
2450 | NODE_STATUS_ADD(node, NAMED_GROUP);\r | |
2451 | \r | |
2452 | return node;\r | |
2453 | }\r | |
2454 | \r | |
2455 | static Node*\r | |
2456 | node_new_option(OnigOptionType option)\r | |
2457 | {\r | |
2458 | Node* node = node_new_enclosure(ENCLOSURE_OPTION);\r | |
2459 | CHECK_NULL_RETURN(node);\r | |
2460 | ENCLOSURE_(node)->o.options = option;\r | |
2461 | return node;\r | |
2462 | }\r | |
2463 | \r | |
2464 | static int\r | |
2465 | node_new_fail(Node** node, ScanEnv* env)\r | |
2466 | {\r | |
2467 | *node = node_new();\r | |
2468 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2469 | \r | |
2470 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2471 | GIMMICK_(*node)->type = GIMMICK_FAIL;\r | |
2472 | return ONIG_NORMAL;\r | |
2473 | }\r | |
2474 | \r | |
2475 | static int\r | |
2476 | node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)\r | |
2477 | {\r | |
2478 | int id;\r | |
2479 | int r;\r | |
2480 | \r | |
2481 | r = save_entry(env, save_type, &id);\r | |
2482 | if (r != ONIG_NORMAL) return r;\r | |
2483 | \r | |
2484 | *node = node_new();\r | |
2485 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2486 | \r | |
2487 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2488 | GIMMICK_(*node)->id = id;\r | |
2489 | GIMMICK_(*node)->type = GIMMICK_SAVE;\r | |
2490 | GIMMICK_(*node)->detail_type = (int )save_type;\r | |
2491 | \r | |
2492 | return ONIG_NORMAL;\r | |
2493 | }\r | |
2494 | \r | |
2495 | static int\r | |
2496 | node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,\r | |
2497 | int id, ScanEnv* env)\r | |
2498 | {\r | |
2499 | *node = node_new();\r | |
2500 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2501 | \r | |
2502 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2503 | GIMMICK_(*node)->id = id;\r | |
2504 | GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;\r | |
2505 | GIMMICK_(*node)->detail_type = (int )update_var_type;\r | |
2506 | \r | |
2507 | return ONIG_NORMAL;\r | |
2508 | }\r | |
2509 | \r | |
2510 | static int\r | |
2511 | node_new_keep(Node** node, ScanEnv* env)\r | |
2512 | {\r | |
2513 | int r;\r | |
2514 | \r | |
2515 | r = node_new_save_gimmick(node, SAVE_KEEP, env);\r | |
2516 | if (r != 0) return r;\r | |
2517 | \r | |
2518 | env->keep_num++;\r | |
2519 | return ONIG_NORMAL;\r | |
2520 | }\r | |
2521 | \r | |
2522 | #ifdef USE_CALLOUT\r | |
2523 | \r | |
2524 | extern void\r | |
2525 | onig_free_reg_callout_list(int n, CalloutListEntry* list)\r | |
2526 | {\r | |
2527 | int i;\r | |
2528 | int j;\r | |
2529 | \r | |
2530 | if (IS_NULL(list)) return ;\r | |
2531 | \r | |
2532 | for (i = 0; i < n; i++) {\r | |
2533 | if (list[i].of == ONIG_CALLOUT_OF_NAME) {\r | |
2534 | for (j = 0; j < list[i].u.arg.passed_num; j++) {\r | |
2535 | if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {\r | |
2536 | if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))\r | |
2537 | xfree(list[i].u.arg.vals[j].s.start);\r | |
2538 | }\r | |
2539 | }\r | |
2540 | }\r | |
2541 | else { /* ONIG_CALLOUT_OF_CONTENTS */\r | |
2542 | if (IS_NOT_NULL(list[i].u.content.start)) {\r | |
2543 | xfree((void* )list[i].u.content.start);\r | |
2544 | }\r | |
2545 | }\r | |
2546 | }\r | |
2547 | \r | |
2548 | xfree(list);\r | |
2549 | }\r | |
2550 | \r | |
2551 | extern CalloutListEntry*\r | |
2552 | onig_reg_callout_list_at(regex_t* reg, int num)\r | |
2553 | {\r | |
2554 | RegexExt* ext = REG_EXTP(reg);\r | |
2555 | CHECK_NULL_RETURN(ext);\r | |
2556 | \r | |
2557 | if (num <= 0 || num > ext->callout_num)\r | |
2558 | return 0;\r | |
2559 | \r | |
2560 | num--;\r | |
2561 | return ext->callout_list + num;\r | |
2562 | }\r | |
2563 | \r | |
2564 | static int\r | |
2565 | reg_callout_list_entry(ScanEnv* env, int* rnum)\r | |
2566 | {\r | |
2567 | #define INIT_CALLOUT_LIST_NUM 3\r | |
2568 | \r | |
2569 | int num;\r | |
2570 | CalloutListEntry* list;\r | |
2571 | CalloutListEntry* e;\r | |
2572 | RegexExt* ext;\r | |
2573 | \r | |
2574 | ext = onig_get_regex_ext(env->reg);\r | |
2575 | CHECK_NULL_RETURN_MEMERR(ext);\r | |
2576 | \r | |
2577 | if (IS_NULL(ext->callout_list)) {\r | |
2578 | list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);\r | |
2579 | CHECK_NULL_RETURN_MEMERR(list);\r | |
2580 | \r | |
2581 | ext->callout_list = list;\r | |
2582 | ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;\r | |
2583 | ext->callout_num = 0;\r | |
2584 | }\r | |
2585 | \r | |
2586 | num = ext->callout_num + 1;\r | |
2587 | if (num > ext->callout_list_alloc) {\r | |
2588 | int alloc = ext->callout_list_alloc * 2;\r | |
2589 | list = (CalloutListEntry* )xrealloc(ext->callout_list,\r | |
2590 | sizeof(CalloutListEntry) * alloc,\r | |
2591 | sizeof(CalloutListEntry) * ext->callout_list_alloc);\r | |
2592 | CHECK_NULL_RETURN_MEMERR(list);\r | |
2593 | \r | |
2594 | ext->callout_list = list;\r | |
2595 | ext->callout_list_alloc = alloc;\r | |
2596 | }\r | |
2597 | \r | |
2598 | e = ext->callout_list + (num - 1);\r | |
2599 | \r | |
2600 | e->flag = 0;\r | |
2601 | e->of = 0;\r | |
2602 | e->in = ONIG_CALLOUT_OF_CONTENTS;\r | |
2603 | e->type = 0;\r | |
2604 | e->tag_start = 0;\r | |
2605 | e->tag_end = 0;\r | |
2606 | e->start_func = 0;\r | |
2607 | e->end_func = 0;\r | |
2608 | e->u.arg.num = 0;\r | |
2609 | e->u.arg.passed_num = 0;\r | |
2610 | \r | |
2611 | ext->callout_num = num;\r | |
2612 | *rnum = num;\r | |
2613 | return ONIG_NORMAL;\r | |
2614 | }\r | |
2615 | \r | |
2616 | static int\r | |
2617 | node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,\r | |
2618 | ScanEnv* env)\r | |
2619 | {\r | |
2620 | *node = node_new();\r | |
2621 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2622 | \r | |
2623 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2624 | GIMMICK_(*node)->id = id;\r | |
2625 | GIMMICK_(*node)->num = num;\r | |
2626 | GIMMICK_(*node)->type = GIMMICK_CALLOUT;\r | |
2627 | GIMMICK_(*node)->detail_type = (int )callout_of;\r | |
2628 | \r | |
2629 | return ONIG_NORMAL;\r | |
2630 | }\r | |
2631 | #endif\r | |
2632 | \r | |
2633 | static int\r | |
2634 | make_extended_grapheme_cluster(Node** node, ScanEnv* env)\r | |
2635 | {\r | |
2636 | int r;\r | |
2637 | int i;\r | |
2638 | Node* x;\r | |
2639 | Node* ns[2];\r | |
2640 | \r | |
2641 | /* \X == (?>\O(?:\Y\O)*) */\r | |
2642 | \r | |
2643 | ns[1] = NULL_NODE;\r | |
2644 | \r | |
2645 | r = ONIGERR_MEMORY;\r | |
2646 | ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);\r | |
2647 | if (IS_NULL(ns[0])) goto err;\r | |
2648 | \r | |
2649 | r = node_new_true_anychar(&ns[1], env);\r | |
2650 | if (r != 0) goto err1;\r | |
2651 | \r | |
2652 | x = make_list(2, ns);\r | |
2653 | if (IS_NULL(x)) goto err;\r | |
2654 | ns[0] = x;\r | |
2655 | ns[1] = NULL_NODE;\r | |
2656 | \r | |
2657 | x = node_new_quantifier(0, REPEAT_INFINITE, 1);\r | |
2658 | if (IS_NULL(x)) goto err;\r | |
2659 | \r | |
2660 | NODE_BODY(x) = ns[0];\r | |
2661 | ns[0] = NULL_NODE;\r | |
2662 | ns[1] = x;\r | |
2663 | \r | |
2664 | r = node_new_true_anychar(&ns[0], env);\r | |
2665 | if (r != 0) goto err1;\r | |
2666 | \r | |
2667 | x = make_list(2, ns);\r | |
2668 | if (IS_NULL(x)) goto err;\r | |
2669 | \r | |
2670 | ns[0] = x;\r | |
2671 | ns[1] = NULL_NODE;\r | |
2672 | \r | |
2673 | x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
2674 | if (IS_NULL(x)) goto err;\r | |
2675 | \r | |
2676 | NODE_BODY(x) = ns[0];\r | |
2677 | \r | |
2678 | *node = x;\r | |
2679 | return ONIG_NORMAL;\r | |
2680 | \r | |
2681 | err:\r | |
2682 | r = ONIGERR_MEMORY;\r | |
2683 | err1:\r | |
2684 | for (i = 0; i < 2; i++) onig_node_free(ns[i]);\r | |
2685 | return r;\r | |
2686 | }\r | |
2687 | \r | |
2688 | static int\r | |
2689 | make_absent_engine(Node** node, int pre_save_right_id, Node* absent,\r | |
2690 | Node* step_one, int lower, int upper, int possessive,\r | |
2691 | int is_range_cutter, ScanEnv* env)\r | |
2692 | {\r | |
2693 | int r;\r | |
2694 | int i;\r | |
2695 | int id;\r | |
2696 | Node* x;\r | |
2697 | Node* ns[4];\r | |
2698 | \r | |
2699 | for (i = 0; i < 4; i++) ns[i] = NULL_NODE;\r | |
2700 | \r | |
2701 | ns[1] = absent;\r | |
2702 | ns[3] = step_one; /* for err */\r | |
2703 | r = node_new_save_gimmick(&ns[0], SAVE_S, env);\r | |
2704 | if (r != 0) goto err;\r | |
2705 | \r | |
2706 | id = GIMMICK_(ns[0])->id;\r | |
2707 | r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,\r | |
2708 | id, env);\r | |
2709 | if (r != 0) goto err;\r | |
2710 | \r | |
2711 | r = node_new_fail(&ns[3], env);\r | |
2712 | if (r != 0) goto err;\r | |
2713 | \r | |
2714 | x = make_list(4, ns);\r | |
2715 | if (IS_NULL(x)) goto err0;\r | |
2716 | \r | |
2717 | ns[0] = x;\r | |
2718 | ns[1] = step_one;\r | |
2719 | ns[2] = ns[3] = NULL_NODE;\r | |
2720 | \r | |
2721 | x = make_alt(2, ns);\r | |
2722 | if (IS_NULL(x)) goto err0;\r | |
2723 | \r | |
2724 | ns[0] = x;\r | |
2725 | \r | |
2726 | x = node_new_quantifier(lower, upper, 0);\r | |
2727 | if (IS_NULL(x)) goto err0;\r | |
2728 | \r | |
2729 | NODE_BODY(x) = ns[0];\r | |
2730 | ns[0] = x;\r | |
2731 | \r | |
2732 | if (possessive != 0) {\r | |
2733 | x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
2734 | if (IS_NULL(x)) goto err0;\r | |
2735 | \r | |
2736 | NODE_BODY(x) = ns[0];\r | |
2737 | ns[0] = x;\r | |
2738 | }\r | |
2739 | \r | |
2740 | r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2741 | pre_save_right_id, env);\r | |
2742 | if (r != 0) goto err;\r | |
2743 | \r | |
2744 | r = node_new_fail(&ns[2], env);\r | |
2745 | if (r != 0) goto err;\r | |
2746 | \r | |
2747 | x = make_list(2, ns + 1);\r | |
2748 | if (IS_NULL(x)) goto err0;\r | |
2749 | \r | |
2750 | ns[1] = x; ns[2] = NULL_NODE;\r | |
2751 | \r | |
2752 | x = make_alt(2, ns);\r | |
2753 | if (IS_NULL(x)) goto err0;\r | |
2754 | \r | |
2755 | if (is_range_cutter != 0)\r | |
2756 | NODE_STATUS_ADD(x, SUPER);\r | |
2757 | \r | |
2758 | *node = x;\r | |
2759 | return ONIG_NORMAL;\r | |
2760 | \r | |
2761 | err0:\r | |
2762 | r = ONIGERR_MEMORY;\r | |
2763 | err:\r | |
2764 | for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r | |
2765 | return r;\r | |
2766 | }\r | |
2767 | \r | |
2768 | static int\r | |
2769 | make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,\r | |
2770 | ScanEnv* env)\r | |
2771 | {\r | |
2772 | int r;\r | |
2773 | int id;\r | |
2774 | Node* save;\r | |
2775 | Node* x;\r | |
2776 | Node* ns[2];\r | |
2777 | \r | |
2778 | *node1 = *node2 = NULL_NODE;\r | |
2779 | save = ns[0] = ns[1] = NULL_NODE;\r | |
2780 | \r | |
2781 | r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r | |
2782 | if (r != 0) goto err;\r | |
2783 | \r | |
2784 | id = GIMMICK_(save)->id;\r | |
2785 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2786 | id, env);\r | |
2787 | if (r != 0) goto err;\r | |
2788 | \r | |
2789 | r = node_new_fail(&ns[1], env);\r | |
2790 | if (r != 0) goto err;\r | |
2791 | \r | |
2792 | x = make_list(2, ns);\r | |
2793 | if (IS_NULL(x)) goto err0;\r | |
2794 | \r | |
2795 | ns[0] = NULL_NODE; ns[1] = x;\r | |
2796 | \r | |
2797 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2798 | pre_save_right_id, env);\r | |
2799 | if (r != 0) goto err;\r | |
2800 | \r | |
2801 | x = make_alt(2, ns);\r | |
2802 | if (IS_NULL(x)) goto err0;\r | |
2803 | \r | |
2804 | *node1 = save;\r | |
2805 | *node2 = x;\r | |
2806 | return ONIG_NORMAL;\r | |
2807 | \r | |
2808 | err0:\r | |
2809 | r = ONIGERR_MEMORY;\r | |
2810 | err:\r | |
2811 | onig_node_free(save);\r | |
2812 | onig_node_free(ns[0]);\r | |
2813 | onig_node_free(ns[1]);\r | |
2814 | return r;\r | |
2815 | }\r | |
2816 | \r | |
2817 | static int\r | |
2818 | make_range_clear(Node** node, ScanEnv* env)\r | |
2819 | {\r | |
2820 | int r;\r | |
2821 | int id;\r | |
2822 | Node* save;\r | |
2823 | Node* x;\r | |
2824 | Node* ns[2];\r | |
2825 | \r | |
2826 | *node = NULL_NODE;\r | |
2827 | save = ns[0] = ns[1] = NULL_NODE;\r | |
2828 | \r | |
2829 | r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r | |
2830 | if (r != 0) goto err;\r | |
2831 | \r | |
2832 | id = GIMMICK_(save)->id;\r | |
2833 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2834 | id, env);\r | |
2835 | if (r != 0) goto err;\r | |
2836 | \r | |
2837 | r = node_new_fail(&ns[1], env);\r | |
2838 | if (r != 0) goto err;\r | |
2839 | \r | |
2840 | x = make_list(2, ns);\r | |
2841 | if (IS_NULL(x)) goto err0;\r | |
2842 | \r | |
2843 | ns[0] = NULL_NODE; ns[1] = x;\r | |
2844 | \r | |
2845 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);\r | |
2846 | if (r != 0) goto err;\r | |
2847 | \r | |
2848 | x = make_alt(2, ns);\r | |
2849 | if (IS_NULL(x)) goto err0;\r | |
2850 | \r | |
2851 | NODE_STATUS_ADD(x, SUPER);\r | |
2852 | \r | |
2853 | ns[0] = save;\r | |
2854 | ns[1] = x;\r | |
2855 | save = NULL_NODE;\r | |
2856 | x = make_list(2, ns);\r | |
2857 | if (IS_NULL(x)) goto err0;\r | |
2858 | \r | |
2859 | *node = x;\r | |
2860 | return ONIG_NORMAL;\r | |
2861 | \r | |
2862 | err0:\r | |
2863 | r = ONIGERR_MEMORY;\r | |
2864 | err:\r | |
2865 | onig_node_free(save);\r | |
2866 | onig_node_free(ns[0]);\r | |
2867 | onig_node_free(ns[1]);\r | |
2868 | return r;\r | |
2869 | }\r | |
2870 | \r | |
2871 | static int\r | |
2872 | is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,\r | |
2873 | int* is_possessive, ScanEnv* env)\r | |
2874 | {\r | |
2875 | Node* quant;\r | |
2876 | Node* body;\r | |
2877 | \r | |
2878 | *rquant = *rbody = 0;\r | |
2879 | *is_possessive = 0;\r | |
2880 | \r | |
2881 | if (NODE_TYPE(node) == NODE_QUANT) {\r | |
2882 | quant = node;\r | |
2883 | }\r | |
2884 | else {\r | |
2885 | if (NODE_TYPE(node) == NODE_ENCLOSURE) {\r | |
2886 | EnclosureNode* en = ENCLOSURE_(node);\r | |
2887 | if (en->type == ENCLOSURE_STOP_BACKTRACK) {\r | |
2888 | *is_possessive = 1;\r | |
2889 | quant = NODE_ENCLOSURE_BODY(en);\r | |
2890 | if (NODE_TYPE(quant) != NODE_QUANT)\r | |
2891 | return 0;\r | |
2892 | }\r | |
2893 | else\r | |
2894 | return 0;\r | |
2895 | }\r | |
2896 | else\r | |
2897 | return 0;\r | |
2898 | }\r | |
2899 | \r | |
2900 | if (QUANT_(quant)->greedy == 0)\r | |
2901 | return 0;\r | |
2902 | \r | |
2903 | body = NODE_BODY(quant);\r | |
2904 | switch (NODE_TYPE(body)) {\r | |
2905 | case NODE_STRING:\r | |
2906 | {\r | |
2907 | int len;\r | |
2908 | StrNode* sn = STR_(body);\r | |
2909 | UChar *s = sn->s;\r | |
2910 | \r | |
2911 | len = 0;\r | |
2912 | while (s < sn->end) {\r | |
2913 | s += enclen(env->enc, s);\r | |
2914 | len++;\r | |
2915 | }\r | |
2916 | if (len != 1)\r | |
2917 | return 0;\r | |
2918 | }\r | |
2919 | \r | |
2920 | case NODE_CCLASS:\r | |
2921 | break;\r | |
2922 | \r | |
2923 | default:\r | |
2924 | return 0;\r | |
2925 | break;\r | |
2926 | }\r | |
2927 | \r | |
2928 | if (node != quant) {\r | |
2929 | NODE_BODY(node) = 0;\r | |
2930 | onig_node_free(node);\r | |
2931 | }\r | |
2932 | NODE_BODY(quant) = NULL_NODE;\r | |
2933 | *rquant = quant;\r | |
2934 | *rbody = body;\r | |
2935 | return 1;\r | |
2936 | }\r | |
2937 | \r | |
2938 | static int\r | |
2939 | make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,\r | |
2940 | Node* body, int possessive, ScanEnv* env)\r | |
2941 | {\r | |
2942 | int r;\r | |
2943 | int i;\r | |
2944 | int id1;\r | |
2945 | int lower, upper;\r | |
2946 | Node* x;\r | |
2947 | Node* ns[4];\r | |
2948 | \r | |
2949 | *node = NULL_NODE;\r | |
2950 | r = ONIGERR_MEMORY;\r | |
2951 | ns[0] = ns[1] = NULL_NODE;\r | |
2952 | ns[2] = body, ns[3] = absent;\r | |
2953 | \r | |
2954 | lower = QUANT_(quant)->lower;\r | |
2955 | upper = QUANT_(quant)->upper;\r | |
2956 | onig_node_free(quant);\r | |
2957 | \r | |
2958 | r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r | |
2959 | if (r != 0) goto err;\r | |
2960 | \r | |
2961 | id1 = GIMMICK_(ns[0])->id;\r | |
2962 | \r | |
2963 | r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,\r | |
2964 | 0, env);\r | |
2965 | if (r != 0) goto err;\r | |
2966 | \r | |
2967 | ns[2] = ns[3] = NULL_NODE;\r | |
2968 | \r | |
2969 | r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2970 | id1, env);\r | |
2971 | if (r != 0) goto err;\r | |
2972 | \r | |
2973 | x = make_list(3, ns);\r | |
2974 | if (IS_NULL(x)) goto err0;\r | |
2975 | \r | |
2976 | *node = x;\r | |
2977 | return ONIG_NORMAL;\r | |
2978 | \r | |
2979 | err0:\r | |
2980 | r = ONIGERR_MEMORY;\r | |
2981 | err:\r | |
2982 | for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r | |
2983 | return r;\r | |
2984 | }\r | |
2985 | \r | |
2986 | static int\r | |
2987 | make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,\r | |
2988 | ScanEnv* env)\r | |
2989 | {\r | |
2990 | int r;\r | |
2991 | int i;\r | |
2992 | int id1, id2;\r | |
2993 | int possessive;\r | |
2994 | Node* x;\r | |
2995 | Node* ns[7];\r | |
2996 | \r | |
2997 | r = ONIGERR_MEMORY;\r | |
2998 | for (i = 0; i < 7; i++) ns[i] = NULL_NODE;\r | |
2999 | ns[4] = expr; ns[5] = absent;\r | |
3000 | \r | |
3001 | if (is_range_cutter == 0) {\r | |
3002 | Node* quant;\r | |
3003 | Node* body;\r | |
3004 | \r | |
3005 | if (expr == NULL_NODE) {\r | |
3006 | /* default expr \O* */\r | |
3007 | quant = node_new_quantifier(0, REPEAT_INFINITE, 0);\r | |
3008 | if (IS_NULL(quant)) goto err0;\r | |
3009 | \r | |
3010 | r = node_new_true_anychar(&body, env);\r | |
3011 | if (r != 0) {\r | |
3012 | onig_node_free(quant);\r | |
3013 | goto err;\r | |
3014 | }\r | |
3015 | possessive = 0;\r | |
3016 | goto simple;\r | |
3017 | }\r | |
3018 | else {\r | |
3019 | if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {\r | |
3020 | simple:\r | |
3021 | r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,\r | |
3022 | body, possessive, env);\r | |
3023 | if (r != 0) {\r | |
3024 | ns[4] = NULL_NODE;\r | |
3025 | onig_node_free(quant);\r | |
3026 | onig_node_free(body);\r | |
3027 | goto err;\r | |
3028 | }\r | |
3029 | \r | |
3030 | return ONIG_NORMAL;\r | |
3031 | }\r | |
3032 | }\r | |
3033 | }\r | |
3034 | \r | |
3035 | r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r | |
3036 | if (r != 0) goto err;\r | |
3037 | \r | |
3038 | id1 = GIMMICK_(ns[0])->id;\r | |
3039 | \r | |
3040 | r = node_new_save_gimmick(&ns[1], SAVE_S, env);\r | |
3041 | if (r != 0) goto err;\r | |
3042 | \r | |
3043 | id2 = GIMMICK_(ns[1])->id;\r | |
3044 | \r | |
3045 | r = node_new_true_anychar(&ns[3], env);\r | |
3046 | if (r != 0) goto err;\r | |
3047 | \r | |
3048 | possessive = 1;\r | |
3049 | r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,\r | |
3050 | possessive, is_range_cutter, env);\r | |
3051 | if (r != 0) goto err;\r | |
3052 | \r | |
3053 | ns[3] = NULL_NODE;\r | |
3054 | ns[5] = NULL_NODE;\r | |
3055 | \r | |
3056 | r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);\r | |
3057 | if (r != 0) goto err;\r | |
3058 | \r | |
3059 | if (is_range_cutter != 0) {\r | |
3060 | x = make_list(4, ns);\r | |
3061 | if (IS_NULL(x)) goto err0;\r | |
3062 | }\r | |
3063 | else {\r | |
3064 | r = make_absent_tail(&ns[5], &ns[6], id1, env);\r | |
3065 | if (r != 0) goto err;\r | |
3066 | \r | |
3067 | x = make_list(7, ns);\r | |
3068 | if (IS_NULL(x)) goto err0;\r | |
3069 | }\r | |
3070 | \r | |
3071 | *node = x;\r | |
3072 | return ONIG_NORMAL;\r | |
3073 | \r | |
3074 | err0:\r | |
3075 | r = ONIGERR_MEMORY;\r | |
3076 | err:\r | |
3077 | for (i = 0; i < 7; i++) onig_node_free(ns[i]);\r | |
3078 | return r; \r | |
3079 | }\r | |
3080 | \r | |
3081 | extern int\r | |
3082 | onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r | |
3083 | {\r | |
3084 | int addlen = (int )(end - s);\r | |
3085 | \r | |
3086 | if (addlen > 0) {\r | |
3087 | int len = (int )(STR_(node)->end - STR_(node)->s);\r | |
3088 | \r | |
3089 | if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {\r | |
3090 | UChar* p;\r | |
3091 | int capa = len + addlen + NODE_STRING_MARGIN;\r | |
3092 | \r | |
3093 | if (capa <= STR_(node)->capa) {\r | |
3094 | onig_strcpy(STR_(node)->s + len, s, end);\r | |
3095 | }\r | |
3096 | else {\r | |
3097 | if (STR_(node)->s == STR_(node)->buf)\r | |
3098 | p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,\r | |
3099 | s, end, capa);\r | |
3100 | else\r | |
3101 | p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);\r | |
3102 | \r | |
3103 | CHECK_NULL_RETURN_MEMERR(p);\r | |
3104 | STR_(node)->s = p;\r | |
3105 | STR_(node)->capa = capa;\r | |
3106 | }\r | |
3107 | }\r | |
3108 | else {\r | |
3109 | onig_strcpy(STR_(node)->s + len, s, end);\r | |
3110 | }\r | |
3111 | STR_(node)->end = STR_(node)->s + len + addlen;\r | |
3112 | }\r | |
3113 | \r | |
3114 | return 0;\r | |
3115 | }\r | |
3116 | \r | |
3117 | extern int\r | |
3118 | onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r | |
3119 | {\r | |
3120 | onig_node_str_clear(node);\r | |
3121 | return onig_node_str_cat(node, s, end);\r | |
3122 | }\r | |
3123 | \r | |
3124 | static int\r | |
3125 | node_str_cat_char(Node* node, UChar c)\r | |
3126 | {\r | |
3127 | UChar s[1];\r | |
3128 | \r | |
3129 | s[0] = c;\r | |
3130 | return onig_node_str_cat(node, s, s + 1);\r | |
3131 | }\r | |
3132 | \r | |
3133 | extern void\r | |
3134 | onig_node_conv_to_str_node(Node* node, int flag)\r | |
3135 | {\r | |
3136 | NODE_SET_TYPE(node, NODE_STRING);\r | |
3137 | STR_(node)->flag = flag;\r | |
3138 | STR_(node)->capa = 0;\r | |
3139 | STR_(node)->s = STR_(node)->buf;\r | |
3140 | STR_(node)->end = STR_(node)->buf;\r | |
3141 | }\r | |
3142 | \r | |
3143 | extern void\r | |
3144 | onig_node_str_clear(Node* node)\r | |
3145 | {\r | |
3146 | if (STR_(node)->capa != 0 &&\r | |
3147 | IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r | |
3148 | xfree(STR_(node)->s);\r | |
3149 | }\r | |
3150 | \r | |
3151 | STR_(node)->capa = 0;\r | |
3152 | STR_(node)->flag = 0;\r | |
3153 | STR_(node)->s = STR_(node)->buf;\r | |
3154 | STR_(node)->end = STR_(node)->buf;\r | |
3155 | }\r | |
3156 | \r | |
3157 | static Node*\r | |
3158 | node_new_str(const UChar* s, const UChar* end)\r | |
3159 | {\r | |
3160 | Node* node = node_new();\r | |
3161 | CHECK_NULL_RETURN(node);\r | |
3162 | \r | |
3163 | NODE_SET_TYPE(node, NODE_STRING);\r | |
3164 | STR_(node)->capa = 0;\r | |
3165 | STR_(node)->flag = 0;\r | |
3166 | STR_(node)->s = STR_(node)->buf;\r | |
3167 | STR_(node)->end = STR_(node)->buf;\r | |
3168 | if (onig_node_str_cat(node, s, end)) {\r | |
3169 | onig_node_free(node);\r | |
3170 | return NULL;\r | |
3171 | }\r | |
3172 | return node;\r | |
3173 | }\r | |
3174 | \r | |
3175 | extern Node*\r | |
3176 | onig_node_new_str(const UChar* s, const UChar* end)\r | |
3177 | {\r | |
3178 | return node_new_str(s, end);\r | |
3179 | }\r | |
3180 | \r | |
3181 | static Node*\r | |
3182 | node_new_str_raw(UChar* s, UChar* end)\r | |
3183 | {\r | |
3184 | Node* node = node_new_str(s, end);\r | |
a5def177 | 3185 | CHECK_NULL_RETURN(node);\r |
b602265d DG |
3186 | NODE_STRING_SET_RAW(node);\r |
3187 | return node;\r | |
3188 | }\r | |
3189 | \r | |
3190 | static Node*\r | |
3191 | node_new_empty(void)\r | |
3192 | {\r | |
3193 | return node_new_str(NULL, NULL);\r | |
3194 | }\r | |
3195 | \r | |
3196 | static Node*\r | |
3197 | node_new_str_raw_char(UChar c)\r | |
3198 | {\r | |
3199 | UChar p[1];\r | |
3200 | \r | |
3201 | p[0] = c;\r | |
3202 | return node_new_str_raw(p, p + 1);\r | |
3203 | }\r | |
3204 | \r | |
3205 | static Node*\r | |
3206 | str_node_split_last_char(Node* node, OnigEncoding enc)\r | |
3207 | {\r | |
3208 | const UChar *p;\r | |
3209 | Node* rn;\r | |
3210 | StrNode* sn;\r | |
3211 | \r | |
3212 | sn = STR_(node);\r | |
3213 | rn = NULL_NODE;\r | |
3214 | if (sn->end > sn->s) {\r | |
3215 | p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r | |
3216 | if (p && p > sn->s) { /* can be split. */\r | |
3217 | rn = node_new_str(p, sn->end);\r | |
a5def177 | 3218 | CHECK_NULL_RETURN(rn);\r |
b602265d DG |
3219 | if (NODE_STRING_IS_RAW(node))\r |
3220 | NODE_STRING_SET_RAW(rn);\r | |
3221 | \r | |
3222 | sn->end = (UChar* )p;\r | |
3223 | }\r | |
3224 | }\r | |
3225 | return rn;\r | |
3226 | }\r | |
3227 | \r | |
3228 | static int\r | |
3229 | str_node_can_be_split(Node* node, OnigEncoding enc)\r | |
3230 | {\r | |
3231 | StrNode* sn = STR_(node);\r | |
3232 | if (sn->end > sn->s) {\r | |
3233 | return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r | |
3234 | }\r | |
3235 | return 0;\r | |
3236 | }\r | |
3237 | \r | |
3238 | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r | |
3239 | static int\r | |
3240 | node_str_head_pad(StrNode* sn, int num, UChar val)\r | |
3241 | {\r | |
3242 | UChar buf[NODE_STRING_BUF_SIZE];\r | |
3243 | int i, len;\r | |
3244 | \r | |
3245 | len = sn->end - sn->s;\r | |
3246 | onig_strcpy(buf, sn->s, sn->end);\r | |
3247 | onig_strcpy(&(sn->s[num]), buf, buf + len);\r | |
3248 | sn->end += num;\r | |
3249 | \r | |
3250 | for (i = 0; i < num; i++) {\r | |
3251 | sn->s[i] = val;\r | |
3252 | }\r | |
3253 | }\r | |
3254 | #endif\r | |
3255 | \r | |
3256 | extern int\r | |
3257 | onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r | |
3258 | {\r | |
3259 | unsigned int num, val;\r | |
3260 | OnigCodePoint c;\r | |
3261 | UChar* p = *src;\r | |
3262 | PFETCH_READY;\r | |
3263 | \r | |
3264 | num = 0;\r | |
3265 | while (! PEND) {\r | |
3266 | PFETCH(c);\r | |
3267 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
3268 | val = (unsigned int )DIGITVAL(c);\r | |
3269 | if ((INT_MAX_LIMIT - val) / 10UL < num)\r | |
3270 | return -1; /* overflow */\r | |
3271 | \r | |
3272 | num = num * 10 + val;\r | |
3273 | }\r | |
3274 | else {\r | |
3275 | PUNFETCH;\r | |
3276 | break;\r | |
3277 | }\r | |
3278 | }\r | |
3279 | *src = p;\r | |
3280 | return num;\r | |
3281 | }\r | |
3282 | \r | |
3283 | static int\r | |
3284 | scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,\r | |
3285 | int maxlen, OnigEncoding enc)\r | |
3286 | {\r | |
3287 | OnigCodePoint c;\r | |
3288 | unsigned int num, val;\r | |
3289 | int n;\r | |
3290 | UChar* p = *src;\r | |
3291 | PFETCH_READY;\r | |
3292 | \r | |
3293 | num = 0;\r | |
3294 | n = 0;\r | |
3295 | while (! PEND && n < maxlen) {\r | |
3296 | PFETCH(c);\r | |
3297 | if (IS_CODE_XDIGIT_ASCII(enc, c)) {\r | |
3298 | n++;\r | |
3299 | val = (unsigned int )XDIGITVAL(enc,c);\r | |
3300 | if ((INT_MAX_LIMIT - val) / 16UL < num)\r | |
3301 | return ONIGERR_TOO_BIG_NUMBER; /* overflow */\r | |
3302 | \r | |
3303 | num = (num << 4) + XDIGITVAL(enc,c);\r | |
3304 | }\r | |
3305 | else {\r | |
3306 | PUNFETCH;\r | |
3307 | break;\r | |
3308 | }\r | |
3309 | }\r | |
3310 | \r | |
3311 | if (n < minlen)\r | |
3312 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r | |
3313 | \r | |
3314 | *src = p;\r | |
3315 | return num;\r | |
3316 | }\r | |
3317 | \r | |
3318 | static int\r | |
3319 | scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r | |
3320 | OnigEncoding enc)\r | |
3321 | {\r | |
3322 | OnigCodePoint c;\r | |
3323 | unsigned int num, val;\r | |
3324 | UChar* p = *src;\r | |
3325 | PFETCH_READY;\r | |
3326 | \r | |
3327 | num = 0;\r | |
3328 | while (! PEND && maxlen-- != 0) {\r | |
3329 | PFETCH(c);\r | |
3330 | if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {\r | |
3331 | val = ODIGITVAL(c);\r | |
3332 | if ((INT_MAX_LIMIT - val) / 8UL < num)\r | |
3333 | return -1; /* overflow */\r | |
3334 | \r | |
3335 | num = (num << 3) + val;\r | |
3336 | }\r | |
3337 | else {\r | |
3338 | PUNFETCH;\r | |
3339 | break;\r | |
3340 | }\r | |
3341 | }\r | |
3342 | *src = p;\r | |
3343 | return num;\r | |
3344 | }\r | |
3345 | \r | |
3346 | \r | |
3347 | #define BB_WRITE_CODE_POINT(bbuf,pos,code) \\r | |
3348 | BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r | |
3349 | \r | |
3350 | /* data format:\r | |
3351 | [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r | |
3352 | (all data size is OnigCodePoint)\r | |
3353 | */\r | |
3354 | static int\r | |
3355 | new_code_range(BBuf** pbuf)\r | |
3356 | {\r | |
3357 | #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r | |
3358 | int r;\r | |
3359 | OnigCodePoint n;\r | |
3360 | BBuf* bbuf;\r | |
3361 | \r | |
3362 | bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r | |
3363 | CHECK_NULL_RETURN_MEMERR(bbuf);\r | |
3364 | r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r | |
3365 | if (r != 0) {\r | |
3366 | xfree(bbuf);\r | |
3367 | *pbuf = 0;\r | |
3368 | return r;\r | |
3369 | }\r | |
3370 | \r | |
3371 | n = 0;\r | |
3372 | BB_WRITE_CODE_POINT(bbuf, 0, n);\r | |
3373 | return 0;\r | |
3374 | }\r | |
3375 | \r | |
3376 | static int\r | |
3377 | add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r | |
3378 | {\r | |
3379 | int r, inc_n, pos;\r | |
3380 | int low, high, bound, x;\r | |
3381 | OnigCodePoint n, *data;\r | |
3382 | BBuf* bbuf;\r | |
3383 | \r | |
3384 | if (from > to) {\r | |
3385 | n = from; from = to; to = n;\r | |
3386 | }\r | |
3387 | \r | |
3388 | if (IS_NULL(*pbuf)) {\r | |
3389 | r = new_code_range(pbuf);\r | |
3390 | if (r != 0) return r;\r | |
3391 | bbuf = *pbuf;\r | |
3392 | n = 0;\r | |
3393 | }\r | |
3394 | else {\r | |
3395 | bbuf = *pbuf;\r | |
3396 | GET_CODE_POINT(n, bbuf->p);\r | |
3397 | }\r | |
3398 | data = (OnigCodePoint* )(bbuf->p);\r | |
3399 | data++;\r | |
3400 | \r | |
3401 | for (low = 0, bound = n; low < bound; ) {\r | |
3402 | x = (low + bound) >> 1;\r | |
3403 | if (from > data[x*2 + 1])\r | |
3404 | low = x + 1;\r | |
3405 | else\r | |
3406 | bound = x;\r | |
3407 | }\r | |
3408 | \r | |
3409 | high = (to == ~((OnigCodePoint )0)) ? n : low;\r | |
3410 | for (bound = n; high < bound; ) {\r | |
3411 | x = (high + bound) >> 1;\r | |
3412 | if (to + 1 >= data[x*2])\r | |
3413 | high = x + 1;\r | |
3414 | else\r | |
3415 | bound = x;\r | |
3416 | }\r | |
3417 | \r | |
3418 | inc_n = low + 1 - high;\r | |
3419 | if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r | |
3420 | return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r | |
3421 | \r | |
3422 | if (inc_n != 1) {\r | |
3423 | if (from > data[low*2])\r | |
3424 | from = data[low*2];\r | |
3425 | if (to < data[(high - 1)*2 + 1])\r | |
3426 | to = data[(high - 1)*2 + 1];\r | |
3427 | }\r | |
3428 | \r | |
3429 | if (inc_n != 0 && (OnigCodePoint )high < n) {\r | |
3430 | int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r | |
3431 | int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r | |
3432 | int size = (n - high) * 2 * SIZE_CODE_POINT;\r | |
3433 | \r | |
3434 | if (inc_n > 0) {\r | |
3435 | BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r | |
3436 | }\r | |
3437 | else {\r | |
3438 | BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r | |
3439 | }\r | |
3440 | }\r | |
3441 | \r | |
3442 | pos = SIZE_CODE_POINT * (1 + low * 2);\r | |
3443 | BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r | |
3444 | BB_WRITE_CODE_POINT(bbuf, pos, from);\r | |
3445 | BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r | |
3446 | n += inc_n;\r | |
3447 | BB_WRITE_CODE_POINT(bbuf, 0, n);\r | |
3448 | \r | |
3449 | return 0;\r | |
3450 | }\r | |
3451 | \r | |
3452 | static int\r | |
3453 | add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r | |
3454 | {\r | |
3455 | if (from > to) {\r | |
3456 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
3457 | return 0;\r | |
3458 | else\r | |
3459 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
3460 | }\r | |
3461 | \r | |
3462 | return add_code_range_to_buf(pbuf, from, to);\r | |
3463 | }\r | |
3464 | \r | |
3465 | static int\r | |
3466 | not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r | |
3467 | {\r | |
3468 | int r, i, n;\r | |
3469 | OnigCodePoint pre, from, *data, to = 0;\r | |
3470 | \r | |
3471 | *pbuf = (BBuf* )NULL;\r | |
3472 | if (IS_NULL(bbuf)) {\r | |
3473 | set_all:\r | |
3474 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3475 | }\r | |
3476 | \r | |
3477 | data = (OnigCodePoint* )(bbuf->p);\r | |
3478 | GET_CODE_POINT(n, data);\r | |
3479 | data++;\r | |
3480 | if (n <= 0) goto set_all;\r | |
3481 | \r | |
3482 | r = 0;\r | |
3483 | pre = MBCODE_START_POS(enc);\r | |
3484 | for (i = 0; i < n; i++) {\r | |
3485 | from = data[i*2];\r | |
3486 | to = data[i*2+1];\r | |
3487 | if (pre <= from - 1) {\r | |
3488 | r = add_code_range_to_buf(pbuf, pre, from - 1);\r | |
3489 | if (r != 0) return r;\r | |
3490 | }\r | |
3491 | if (to == ~((OnigCodePoint )0)) break;\r | |
3492 | pre = to + 1;\r | |
3493 | }\r | |
3494 | if (to < ~((OnigCodePoint )0)) {\r | |
3495 | r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r | |
3496 | }\r | |
3497 | return r;\r | |
3498 | }\r | |
3499 | \r | |
3500 | #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\\r | |
3501 | BBuf *tbuf; \\r | |
3502 | int tnot; \\r | |
3503 | tnot = not1; not1 = not2; not2 = tnot; \\r | |
3504 | tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r | |
3505 | } while (0)\r | |
3506 | \r | |
3507 | static int\r | |
3508 | or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r | |
3509 | BBuf* bbuf2, int not2, BBuf** pbuf)\r | |
3510 | {\r | |
3511 | int r;\r | |
3512 | OnigCodePoint i, n1, *data1;\r | |
3513 | OnigCodePoint from, to;\r | |
3514 | \r | |
3515 | *pbuf = (BBuf* )NULL;\r | |
3516 | if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r | |
3517 | if (not1 != 0 || not2 != 0)\r | |
3518 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3519 | return 0;\r | |
3520 | }\r | |
3521 | \r | |
3522 | r = 0;\r | |
3523 | if (IS_NULL(bbuf2))\r | |
3524 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3525 | \r | |
3526 | if (IS_NULL(bbuf1)) {\r | |
3527 | if (not1 != 0) {\r | |
3528 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3529 | }\r | |
3530 | else {\r | |
3531 | if (not2 == 0) {\r | |
3532 | return bbuf_clone(pbuf, bbuf2);\r | |
3533 | }\r | |
3534 | else {\r | |
3535 | return not_code_range_buf(enc, bbuf2, pbuf);\r | |
3536 | }\r | |
3537 | }\r | |
3538 | }\r | |
3539 | \r | |
3540 | if (not1 != 0)\r | |
3541 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3542 | \r | |
3543 | data1 = (OnigCodePoint* )(bbuf1->p);\r | |
3544 | GET_CODE_POINT(n1, data1);\r | |
3545 | data1++;\r | |
3546 | \r | |
3547 | if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r | |
3548 | r = bbuf_clone(pbuf, bbuf2);\r | |
3549 | }\r | |
3550 | else if (not1 == 0) { /* 1 OR (not 2) */\r | |
3551 | r = not_code_range_buf(enc, bbuf2, pbuf);\r | |
3552 | }\r | |
3553 | if (r != 0) return r;\r | |
3554 | \r | |
3555 | for (i = 0; i < n1; i++) {\r | |
3556 | from = data1[i*2];\r | |
3557 | to = data1[i*2+1];\r | |
3558 | r = add_code_range_to_buf(pbuf, from, to);\r | |
3559 | if (r != 0) return r;\r | |
3560 | }\r | |
3561 | return 0;\r | |
3562 | }\r | |
3563 | \r | |
3564 | static int\r | |
3565 | and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r | |
3566 | OnigCodePoint* data, int n)\r | |
3567 | {\r | |
3568 | int i, r;\r | |
3569 | OnigCodePoint from2, to2;\r | |
3570 | \r | |
3571 | for (i = 0; i < n; i++) {\r | |
3572 | from2 = data[i*2];\r | |
3573 | to2 = data[i*2+1];\r | |
3574 | if (from2 < from1) {\r | |
3575 | if (to2 < from1) continue;\r | |
3576 | else {\r | |
3577 | from1 = to2 + 1;\r | |
3578 | }\r | |
3579 | }\r | |
3580 | else if (from2 <= to1) {\r | |
3581 | if (to2 < to1) {\r | |
3582 | if (from1 <= from2 - 1) {\r | |
3583 | r = add_code_range_to_buf(pbuf, from1, from2-1);\r | |
3584 | if (r != 0) return r;\r | |
3585 | }\r | |
3586 | from1 = to2 + 1;\r | |
3587 | }\r | |
3588 | else {\r | |
3589 | to1 = from2 - 1;\r | |
3590 | }\r | |
3591 | }\r | |
3592 | else {\r | |
3593 | from1 = from2;\r | |
3594 | }\r | |
3595 | if (from1 > to1) break;\r | |
3596 | }\r | |
3597 | if (from1 <= to1) {\r | |
3598 | r = add_code_range_to_buf(pbuf, from1, to1);\r | |
3599 | if (r != 0) return r;\r | |
3600 | }\r | |
3601 | return 0;\r | |
3602 | }\r | |
3603 | \r | |
3604 | static int\r | |
3605 | and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r | |
3606 | {\r | |
3607 | int r;\r | |
3608 | OnigCodePoint i, j, n1, n2, *data1, *data2;\r | |
3609 | OnigCodePoint from, to, from1, to1, from2, to2;\r | |
3610 | \r | |
3611 | *pbuf = (BBuf* )NULL;\r | |
3612 | if (IS_NULL(bbuf1)) {\r | |
3613 | if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r | |
3614 | return bbuf_clone(pbuf, bbuf2);\r | |
3615 | return 0;\r | |
3616 | }\r | |
3617 | else if (IS_NULL(bbuf2)) {\r | |
3618 | if (not2 != 0)\r | |
3619 | return bbuf_clone(pbuf, bbuf1);\r | |
3620 | return 0;\r | |
3621 | }\r | |
3622 | \r | |
3623 | if (not1 != 0)\r | |
3624 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3625 | \r | |
3626 | data1 = (OnigCodePoint* )(bbuf1->p);\r | |
3627 | data2 = (OnigCodePoint* )(bbuf2->p);\r | |
3628 | GET_CODE_POINT(n1, data1);\r | |
3629 | GET_CODE_POINT(n2, data2);\r | |
3630 | data1++;\r | |
3631 | data2++;\r | |
3632 | \r | |
3633 | if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r | |
3634 | for (i = 0; i < n1; i++) {\r | |
3635 | from1 = data1[i*2];\r | |
3636 | to1 = data1[i*2+1];\r | |
3637 | for (j = 0; j < n2; j++) {\r | |
3638 | from2 = data2[j*2];\r | |
3639 | to2 = data2[j*2+1];\r | |
3640 | if (from2 > to1) break;\r | |
3641 | if (to2 < from1) continue;\r | |
3642 | from = MAX(from1, from2);\r | |
3643 | to = MIN(to1, to2);\r | |
3644 | r = add_code_range_to_buf(pbuf, from, to);\r | |
3645 | if (r != 0) return r;\r | |
3646 | }\r | |
3647 | }\r | |
3648 | }\r | |
3649 | else if (not1 == 0) { /* 1 AND (not 2) */\r | |
3650 | for (i = 0; i < n1; i++) {\r | |
3651 | from1 = data1[i*2];\r | |
3652 | to1 = data1[i*2+1];\r | |
14b0e578 CS |
3653 | r = and_code_range1(pbuf, from1, to1, data2, n2);\r |
3654 | if (r != 0) return r;\r | |
3655 | }\r | |
3656 | }\r | |
3657 | \r | |
3658 | return 0;\r | |
3659 | }\r | |
3660 | \r | |
3661 | static int\r | |
3662 | and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r | |
3663 | {\r | |
3664 | int r, not1, not2;\r | |
3665 | BBuf *buf1, *buf2, *pbuf;\r | |
3666 | BitSetRef bsr1, bsr2;\r | |
3667 | BitSet bs1, bs2;\r | |
3668 | \r | |
3669 | not1 = IS_NCCLASS_NOT(dest);\r | |
3670 | bsr1 = dest->bs;\r | |
3671 | buf1 = dest->mbuf;\r | |
3672 | not2 = IS_NCCLASS_NOT(cc);\r | |
3673 | bsr2 = cc->bs;\r | |
3674 | buf2 = cc->mbuf;\r | |
3675 | \r | |
3676 | if (not1 != 0) {\r | |
3677 | bitset_invert_to(bsr1, bs1);\r | |
3678 | bsr1 = bs1;\r | |
3679 | }\r | |
3680 | if (not2 != 0) {\r | |
3681 | bitset_invert_to(bsr2, bs2);\r | |
3682 | bsr2 = bs2;\r | |
3683 | }\r | |
3684 | bitset_and(bsr1, bsr2);\r | |
3685 | if (bsr1 != dest->bs) {\r | |
3686 | bitset_copy(dest->bs, bsr1);\r | |
14b0e578 CS |
3687 | }\r |
3688 | if (not1 != 0) {\r | |
3689 | bitset_invert(dest->bs);\r | |
3690 | }\r | |
3691 | \r | |
3692 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
3693 | if (not1 != 0 && not2 != 0) {\r | |
3694 | r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r | |
3695 | }\r | |
3696 | else {\r | |
3697 | r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r | |
3698 | if (r == 0 && not1 != 0) {\r | |
b602265d DG |
3699 | BBuf *tbuf;\r |
3700 | r = not_code_range_buf(enc, pbuf, &tbuf);\r | |
3701 | if (r != 0) {\r | |
3702 | bbuf_free(pbuf);\r | |
3703 | return r;\r | |
3704 | }\r | |
3705 | bbuf_free(pbuf);\r | |
3706 | pbuf = tbuf;\r | |
14b0e578 CS |
3707 | }\r |
3708 | }\r | |
3709 | if (r != 0) return r;\r | |
3710 | \r | |
3711 | dest->mbuf = pbuf;\r | |
3712 | bbuf_free(buf1);\r | |
3713 | return r;\r | |
3714 | }\r | |
3715 | return 0;\r | |
3716 | }\r | |
3717 | \r | |
3718 | static int\r | |
3719 | or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r | |
3720 | {\r | |
3721 | int r, not1, not2;\r | |
3722 | BBuf *buf1, *buf2, *pbuf;\r | |
3723 | BitSetRef bsr1, bsr2;\r | |
3724 | BitSet bs1, bs2;\r | |
3725 | \r | |
3726 | not1 = IS_NCCLASS_NOT(dest);\r | |
3727 | bsr1 = dest->bs;\r | |
3728 | buf1 = dest->mbuf;\r | |
3729 | not2 = IS_NCCLASS_NOT(cc);\r | |
3730 | bsr2 = cc->bs;\r | |
3731 | buf2 = cc->mbuf;\r | |
3732 | \r | |
3733 | if (not1 != 0) {\r | |
3734 | bitset_invert_to(bsr1, bs1);\r | |
3735 | bsr1 = bs1;\r | |
3736 | }\r | |
3737 | if (not2 != 0) {\r | |
3738 | bitset_invert_to(bsr2, bs2);\r | |
3739 | bsr2 = bs2;\r | |
3740 | }\r | |
3741 | bitset_or(bsr1, bsr2);\r | |
3742 | if (bsr1 != dest->bs) {\r | |
3743 | bitset_copy(dest->bs, bsr1);\r | |
14b0e578 CS |
3744 | }\r |
3745 | if (not1 != 0) {\r | |
3746 | bitset_invert(dest->bs);\r | |
3747 | }\r | |
3748 | \r | |
3749 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
3750 | if (not1 != 0 && not2 != 0) {\r | |
3751 | r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r | |
3752 | }\r | |
3753 | else {\r | |
3754 | r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r | |
3755 | if (r == 0 && not1 != 0) {\r | |
b602265d DG |
3756 | BBuf *tbuf;\r |
3757 | r = not_code_range_buf(enc, pbuf, &tbuf);\r | |
3758 | if (r != 0) {\r | |
3759 | bbuf_free(pbuf);\r | |
3760 | return r;\r | |
3761 | }\r | |
3762 | bbuf_free(pbuf);\r | |
3763 | pbuf = tbuf;\r | |
14b0e578 CS |
3764 | }\r |
3765 | }\r | |
3766 | if (r != 0) return r;\r | |
3767 | \r | |
3768 | dest->mbuf = pbuf;\r | |
3769 | bbuf_free(buf1);\r | |
3770 | return r;\r | |
3771 | }\r | |
3772 | else\r | |
3773 | return 0;\r | |
3774 | }\r | |
3775 | \r | |
b602265d DG |
3776 | static OnigCodePoint\r |
3777 | conv_backslash_value(OnigCodePoint c, ScanEnv* env)\r | |
14b0e578 CS |
3778 | {\r |
3779 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r | |
3780 | switch (c) {\r | |
3781 | case 'n': return '\n';\r | |
3782 | case 't': return '\t';\r | |
3783 | case 'r': return '\r';\r | |
3784 | case 'f': return '\f';\r | |
3785 | case 'a': return '\007';\r | |
3786 | case 'b': return '\010';\r | |
3787 | case 'e': return '\033';\r | |
3788 | case 'v':\r | |
3789 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r | |
b602265d | 3790 | return '\v';\r |
14b0e578 CS |
3791 | break;\r |
3792 | \r | |
3793 | default:\r | |
3794 | break;\r | |
3795 | }\r | |
3796 | }\r | |
3797 | return c;\r | |
3798 | }\r | |
3799 | \r | |
3800 | static int\r | |
3801 | is_invalid_quantifier_target(Node* node)\r | |
3802 | {\r | |
b602265d DG |
3803 | switch (NODE_TYPE(node)) {\r |
3804 | case NODE_ANCHOR:\r | |
3805 | case NODE_GIMMICK:\r | |
14b0e578 CS |
3806 | return 1;\r |
3807 | break;\r | |
3808 | \r | |
b602265d | 3809 | case NODE_ENCLOSURE:\r |
14b0e578 | 3810 | /* allow enclosed elements */\r |
b602265d | 3811 | /* return is_invalid_quantifier_target(NODE_BODY(node)); */\r |
14b0e578 CS |
3812 | break;\r |
3813 | \r | |
b602265d | 3814 | case NODE_LIST:\r |
14b0e578 | 3815 | do {\r |
b602265d DG |
3816 | if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;\r |
3817 | } while (IS_NOT_NULL(node = NODE_CDR(node)));\r | |
14b0e578 CS |
3818 | return 0;\r |
3819 | break;\r | |
3820 | \r | |
b602265d | 3821 | case NODE_ALT:\r |
14b0e578 | 3822 | do {\r |
b602265d DG |
3823 | if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;\r |
3824 | } while (IS_NOT_NULL(node = NODE_CDR(node)));\r | |
14b0e578 CS |
3825 | break;\r |
3826 | \r | |
3827 | default:\r | |
3828 | break;\r | |
3829 | }\r | |
3830 | return 0;\r | |
3831 | }\r | |
3832 | \r | |
3833 | /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r | |
3834 | static int\r | |
b602265d | 3835 | quantifier_type_num(QuantNode* q)\r |
14b0e578 CS |
3836 | {\r |
3837 | if (q->greedy) {\r | |
3838 | if (q->lower == 0) {\r | |
3839 | if (q->upper == 1) return 0;\r | |
3840 | else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r | |
3841 | }\r | |
3842 | else if (q->lower == 1) {\r | |
3843 | if (IS_REPEAT_INFINITE(q->upper)) return 2;\r | |
3844 | }\r | |
3845 | }\r | |
3846 | else {\r | |
3847 | if (q->lower == 0) {\r | |
3848 | if (q->upper == 1) return 3;\r | |
3849 | else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r | |
3850 | }\r | |
3851 | else if (q->lower == 1) {\r | |
3852 | if (IS_REPEAT_INFINITE(q->upper)) return 5;\r | |
3853 | }\r | |
3854 | }\r | |
3855 | return -1;\r | |
3856 | }\r | |
3857 | \r | |
3858 | \r | |
3859 | enum ReduceType {\r | |
3860 | RQ_ASIS = 0, /* as is */\r | |
3861 | RQ_DEL = 1, /* delete parent */\r | |
3862 | RQ_A, /* to '*' */\r | |
3863 | RQ_AQ, /* to '*?' */\r | |
3864 | RQ_QQ, /* to '??' */\r | |
3865 | RQ_P_QQ, /* to '+)??' */\r | |
3866 | RQ_PQ_Q /* to '+?)?' */\r | |
3867 | };\r | |
3868 | \r | |
3869 | static enum ReduceType ReduceTypeTable[6][6] = {\r | |
3870 | {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r | |
3871 | {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r | |
3872 | {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r | |
3873 | {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r | |
3874 | {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r | |
3875 | {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r | |
3876 | };\r | |
3877 | \r | |
3878 | extern void\r | |
3879 | onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r | |
3880 | {\r | |
3881 | int pnum, cnum;\r | |
b602265d DG |
3882 | QuantNode *p, *c;\r |
3883 | \r | |
3884 | p = QUANT_(pnode);\r | |
3885 | c = QUANT_(cnode);\r | |
3886 | pnum = quantifier_type_num(p);\r | |
3887 | cnum = quantifier_type_num(c);\r | |
3888 | if (pnum < 0 || cnum < 0) {\r | |
3889 | if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {\r | |
3890 | if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {\r | |
3891 | int n = positive_int_multiply(p->lower, c->lower);\r | |
3892 | if (n >= 0) {\r | |
3893 | p->lower = p->upper = n;\r | |
3894 | NODE_BODY(pnode) = NODE_BODY(cnode);\r | |
3895 | goto remove_cnode;\r | |
3896 | }\r | |
3897 | }\r | |
3898 | }\r | |
14b0e578 | 3899 | \r |
b602265d DG |
3900 | return ;\r |
3901 | }\r | |
14b0e578 CS |
3902 | \r |
3903 | switch(ReduceTypeTable[cnum][pnum]) {\r | |
3904 | case RQ_DEL:\r | |
b602265d | 3905 | *pnode = *cnode;\r |
14b0e578 CS |
3906 | break;\r |
3907 | case RQ_A:\r | |
b602265d | 3908 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3909 | p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r |
3910 | break;\r | |
3911 | case RQ_AQ:\r | |
b602265d | 3912 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3913 | p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r |
3914 | break;\r | |
3915 | case RQ_QQ:\r | |
b602265d | 3916 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3917 | p->lower = 0; p->upper = 1; p->greedy = 0;\r |
3918 | break;\r | |
3919 | case RQ_P_QQ:\r | |
b602265d | 3920 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3921 | p->lower = 0; p->upper = 1; p->greedy = 0;\r |
3922 | c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r | |
3923 | return ;\r | |
3924 | break;\r | |
3925 | case RQ_PQ_Q:\r | |
b602265d | 3926 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3927 | p->lower = 0; p->upper = 1; p->greedy = 1;\r |
3928 | c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r | |
3929 | return ;\r | |
3930 | break;\r | |
3931 | case RQ_ASIS:\r | |
b602265d | 3932 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3933 | return ;\r |
3934 | break;\r | |
3935 | }\r | |
3936 | \r | |
b602265d DG |
3937 | remove_cnode:\r |
3938 | NODE_BODY(cnode) = NULL_NODE;\r | |
14b0e578 CS |
3939 | onig_node_free(cnode);\r |
3940 | }\r | |
3941 | \r | |
b602265d DG |
3942 | static int\r |
3943 | node_new_general_newline(Node** node, ScanEnv* env)\r | |
3944 | {\r | |
3945 | int r;\r | |
3946 | int dlen, alen;\r | |
3947 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];\r | |
3948 | Node* crnl;\r | |
3949 | Node* ncc;\r | |
3950 | Node* x;\r | |
3951 | CClassNode* cc;\r | |
3952 | \r | |
3953 | dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);\r | |
3954 | if (dlen < 0) return dlen;\r | |
3955 | alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);\r | |
3956 | if (alen < 0) return alen;\r | |
3957 | \r | |
3958 | crnl = node_new_str_raw(buf, buf + dlen + alen);\r | |
3959 | CHECK_NULL_RETURN_MEMERR(crnl);\r | |
3960 | \r | |
3961 | ncc = node_new_cclass();\r | |
3962 | if (IS_NULL(ncc)) goto err2;\r | |
3963 | \r | |
3964 | cc = CCLASS_(ncc);\r | |
3965 | if (dlen == 1) {\r | |
3966 | bitset_set_range(cc->bs, 0x0a, 0x0d);\r | |
3967 | }\r | |
3968 | else {\r | |
3969 | r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);\r | |
3970 | if (r != 0) {\r | |
3971 | err1:\r | |
3972 | onig_node_free(ncc);\r | |
3973 | err2:\r | |
3974 | onig_node_free(crnl);\r | |
3975 | return ONIGERR_MEMORY;\r | |
3976 | }\r | |
3977 | }\r | |
3978 | \r | |
3979 | if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {\r | |
3980 | r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);\r | |
3981 | if (r != 0) goto err1;\r | |
3982 | r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);\r | |
3983 | if (r != 0) goto err1;\r | |
3984 | }\r | |
3985 | \r | |
3986 | x = node_new_enclosure_if_else(crnl, 0, ncc);\r | |
3987 | if (IS_NULL(x)) goto err1;\r | |
3988 | \r | |
3989 | *node = x;\r | |
3990 | return 0;\r | |
3991 | }\r | |
14b0e578 CS |
3992 | \r |
3993 | enum TokenSyms {\r | |
3994 | TK_EOT = 0, /* end of token */\r | |
3995 | TK_RAW_BYTE = 1,\r | |
3996 | TK_CHAR,\r | |
3997 | TK_STRING,\r | |
3998 | TK_CODE_POINT,\r | |
3999 | TK_ANYCHAR,\r | |
4000 | TK_CHAR_TYPE,\r | |
4001 | TK_BACKREF,\r | |
4002 | TK_CALL,\r | |
4003 | TK_ANCHOR,\r | |
4004 | TK_OP_REPEAT,\r | |
4005 | TK_INTERVAL,\r | |
4006 | TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r | |
4007 | TK_ALT,\r | |
4008 | TK_SUBEXP_OPEN,\r | |
4009 | TK_SUBEXP_CLOSE,\r | |
4010 | TK_CC_OPEN,\r | |
4011 | TK_QUOTE_OPEN,\r | |
4012 | TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r | |
b602265d DG |
4013 | TK_KEEP, /* \K */\r |
4014 | TK_GENERAL_NEWLINE, /* \R */\r | |
4015 | TK_NO_NEWLINE, /* \N */\r | |
4016 | TK_TRUE_ANYCHAR, /* \O */\r | |
4017 | TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */\r | |
4018 | \r | |
14b0e578 CS |
4019 | /* in cc */\r |
4020 | TK_CC_CLOSE,\r | |
4021 | TK_CC_RANGE,\r | |
4022 | TK_POSIX_BRACKET_OPEN,\r | |
4023 | TK_CC_AND, /* && */\r | |
4024 | TK_CC_CC_OPEN /* [ */\r | |
4025 | };\r | |
4026 | \r | |
4027 | typedef struct {\r | |
4028 | enum TokenSyms type;\r | |
4029 | int escaped;\r | |
4030 | int base; /* is number: 8, 16 (used in [....]) */\r | |
4031 | UChar* backp;\r | |
4032 | union {\r | |
4033 | UChar* s;\r | |
4034 | int c;\r | |
4035 | OnigCodePoint code;\r | |
4036 | int anchor;\r | |
4037 | int subtype;\r | |
4038 | struct {\r | |
4039 | int lower;\r | |
4040 | int upper;\r | |
4041 | int greedy;\r | |
4042 | int possessive;\r | |
4043 | } repeat;\r | |
4044 | struct {\r | |
4045 | int num;\r | |
4046 | int ref1;\r | |
4047 | int* refs;\r | |
4048 | int by_name;\r | |
4049 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
4050 | int exist_level;\r | |
4051 | int level; /* \k<name+n> */\r | |
4052 | #endif\r | |
4053 | } backref;\r | |
4054 | struct {\r | |
4055 | UChar* name;\r | |
4056 | UChar* name_end;\r | |
4057 | int gnum;\r | |
b602265d | 4058 | int by_number;\r |
14b0e578 CS |
4059 | } call;\r |
4060 | struct {\r | |
4061 | int ctype;\r | |
4062 | int not;\r | |
4063 | } prop;\r | |
4064 | } u;\r | |
4065 | } OnigToken;\r | |
4066 | \r | |
4067 | \r | |
4068 | static int\r | |
4069 | fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r | |
4070 | {\r | |
4071 | int low, up, syn_allow, non_low = 0;\r | |
4072 | int r = 0;\r | |
4073 | OnigCodePoint c;\r | |
4074 | OnigEncoding enc = env->enc;\r | |
4075 | UChar* p = *src;\r | |
4076 | PFETCH_READY;\r | |
4077 | \r | |
4078 | syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r | |
4079 | \r | |
4080 | if (PEND) {\r | |
4081 | if (syn_allow)\r | |
4082 | return 1; /* "....{" : OK! */\r | |
4083 | else\r | |
4084 | return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r | |
4085 | }\r | |
4086 | \r | |
4087 | if (! syn_allow) {\r | |
4088 | c = PPEEK;\r | |
4089 | if (c == ')' || c == '(' || c == '|') {\r | |
4090 | return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r | |
4091 | }\r | |
4092 | }\r | |
4093 | \r | |
4094 | low = onig_scan_unsigned_number(&p, end, env->enc);\r | |
4095 | if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4096 | if (low > ONIG_MAX_REPEAT_NUM)\r | |
4097 | return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4098 | \r | |
4099 | if (p == *src) { /* can't read low */\r | |
4100 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r | |
4101 | /* allow {,n} as {0,n} */\r | |
4102 | low = 0;\r | |
4103 | non_low = 1;\r | |
4104 | }\r | |
4105 | else\r | |
4106 | goto invalid;\r | |
4107 | }\r | |
4108 | \r | |
4109 | if (PEND) goto invalid;\r | |
4110 | PFETCH(c);\r | |
4111 | if (c == ',') {\r | |
4112 | UChar* prev = p;\r | |
4113 | up = onig_scan_unsigned_number(&p, end, env->enc);\r | |
4114 | if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4115 | if (up > ONIG_MAX_REPEAT_NUM)\r | |
4116 | return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4117 | \r | |
4118 | if (p == prev) {\r | |
4119 | if (non_low != 0)\r | |
b602265d | 4120 | goto invalid;\r |
14b0e578 CS |
4121 | up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r |
4122 | }\r | |
4123 | }\r | |
4124 | else {\r | |
4125 | if (non_low != 0)\r | |
4126 | goto invalid;\r | |
4127 | \r | |
4128 | PUNFETCH;\r | |
4129 | up = low; /* {n} : exact n times */\r | |
4130 | r = 2; /* fixed */\r | |
4131 | }\r | |
4132 | \r | |
4133 | if (PEND) goto invalid;\r | |
4134 | PFETCH(c);\r | |
4135 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r | |
4136 | if (c != MC_ESC(env->syntax)) goto invalid;\r | |
4137 | PFETCH(c);\r | |
4138 | }\r | |
4139 | if (c != '}') goto invalid;\r | |
4140 | \r | |
4141 | if (!IS_REPEAT_INFINITE(up) && low > up) {\r | |
4142 | return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r | |
4143 | }\r | |
4144 | \r | |
4145 | tok->type = TK_INTERVAL;\r | |
4146 | tok->u.repeat.lower = low;\r | |
4147 | tok->u.repeat.upper = up;\r | |
4148 | *src = p;\r | |
4149 | return r; /* 0: normal {n,m}, 2: fixed {n} */\r | |
4150 | \r | |
4151 | invalid:\r | |
b602265d DG |
4152 | if (syn_allow) {\r |
4153 | /* *src = p; */ /* !!! Don't do this line !!! */\r | |
14b0e578 | 4154 | return 1; /* OK */\r |
b602265d | 4155 | }\r |
14b0e578 CS |
4156 | else\r |
4157 | return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r | |
4158 | }\r | |
4159 | \r | |
4160 | /* \M-, \C-, \c, or \... */\r | |
4161 | static int\r | |
b602265d | 4162 | fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)\r |
14b0e578 CS |
4163 | {\r |
4164 | int v;\r | |
4165 | OnigCodePoint c;\r | |
4166 | OnigEncoding enc = env->enc;\r | |
4167 | UChar* p = *src;\r | |
4168 | \r | |
4169 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4170 | \r | |
4171 | PFETCH_S(c);\r | |
4172 | switch (c) {\r | |
4173 | case 'M':\r | |
4174 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r | |
4175 | if (PEND) return ONIGERR_END_PATTERN_AT_META;\r | |
4176 | PFETCH_S(c);\r | |
4177 | if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r | |
4178 | if (PEND) return ONIGERR_END_PATTERN_AT_META;\r | |
4179 | PFETCH_S(c);\r | |
4180 | if (c == MC_ESC(env->syntax)) {\r | |
b602265d | 4181 | v = fetch_escaped_value(&p, end, env, &c);\r |
14b0e578 | 4182 | if (v < 0) return v;\r |
14b0e578 CS |
4183 | }\r |
4184 | c = ((c & 0xff) | 0x80);\r | |
4185 | }\r | |
4186 | else\r | |
4187 | goto backslash;\r | |
4188 | break;\r | |
4189 | \r | |
4190 | case 'C':\r | |
4191 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r | |
4192 | if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r | |
4193 | PFETCH_S(c);\r | |
4194 | if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r | |
4195 | goto control;\r | |
4196 | }\r | |
4197 | else\r | |
4198 | goto backslash;\r | |
4199 | \r | |
4200 | case 'c':\r | |
4201 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r | |
4202 | control:\r | |
4203 | if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r | |
4204 | PFETCH_S(c);\r | |
4205 | if (c == '?') {\r | |
4206 | c = 0177;\r | |
4207 | }\r | |
4208 | else {\r | |
4209 | if (c == MC_ESC(env->syntax)) {\r | |
b602265d | 4210 | v = fetch_escaped_value(&p, end, env, &c);\r |
14b0e578 | 4211 | if (v < 0) return v;\r |
14b0e578 CS |
4212 | }\r |
4213 | c &= 0x9f;\r | |
4214 | }\r | |
4215 | break;\r | |
4216 | }\r | |
4217 | /* fall through */\r | |
4218 | \r | |
4219 | default:\r | |
4220 | {\r | |
4221 | backslash:\r | |
4222 | c = conv_backslash_value(c, env);\r | |
4223 | }\r | |
4224 | break;\r | |
4225 | }\r | |
4226 | \r | |
4227 | *src = p;\r | |
b602265d DG |
4228 | *val = c;\r |
4229 | return 0;\r | |
14b0e578 CS |
4230 | }\r |
4231 | \r | |
4232 | static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r | |
4233 | \r | |
4234 | static OnigCodePoint\r | |
4235 | get_name_end_code_point(OnigCodePoint start)\r | |
4236 | {\r | |
4237 | switch (start) {\r | |
b602265d | 4238 | case '<': return (OnigCodePoint )'>'; break;\r |
14b0e578 | 4239 | case '\'': return (OnigCodePoint )'\''; break;\r |
b602265d | 4240 | case '(': return (OnigCodePoint )')'; break;\r |
14b0e578 CS |
4241 | default:\r |
4242 | break;\r | |
4243 | }\r | |
4244 | \r | |
4245 | return (OnigCodePoint )0;\r | |
4246 | }\r | |
4247 | \r | |
b602265d DG |
4248 | enum REF_NUM {\r |
4249 | IS_NOT_NUM = 0,\r | |
4250 | IS_ABS_NUM = 1,\r | |
4251 | IS_REL_NUM = 2\r | |
4252 | };\r | |
4253 | \r | |
14b0e578 CS |
4254 | #ifdef USE_BACKREF_WITH_LEVEL\r |
4255 | /*\r | |
4256 | \k<name+n>, \k<name-n>\r | |
4257 | \k<num+n>, \k<num-n>\r | |
4258 | \k<-num+n>, \k<-num-n>\r | |
b602265d | 4259 | \k<+num+n>, \k<+num-n>\r |
14b0e578 CS |
4260 | */\r |
4261 | static int\r | |
4262 | fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r | |
b602265d DG |
4263 | UChar** rname_end, ScanEnv* env,\r |
4264 | int* rback_num, int* rlevel, enum REF_NUM* num_type)\r | |
14b0e578 | 4265 | {\r |
b602265d DG |
4266 | int r, sign, exist_level;\r |
4267 | int digit_count;\r | |
14b0e578 CS |
4268 | OnigCodePoint end_code;\r |
4269 | OnigCodePoint c = 0;\r | |
4270 | OnigEncoding enc = env->enc;\r | |
4271 | UChar *name_end;\r | |
4272 | UChar *pnum_head;\r | |
4273 | UChar *p = *src;\r | |
4274 | PFETCH_READY;\r | |
4275 | \r | |
4276 | *rback_num = 0;\r | |
b602265d DG |
4277 | exist_level = 0;\r |
4278 | *num_type = IS_NOT_NUM;\r | |
14b0e578 CS |
4279 | sign = 1;\r |
4280 | pnum_head = *src;\r | |
4281 | \r | |
4282 | end_code = get_name_end_code_point(start_code);\r | |
4283 | \r | |
b602265d | 4284 | digit_count = 0;\r |
14b0e578 CS |
4285 | name_end = end;\r |
4286 | r = 0;\r | |
4287 | if (PEND) {\r | |
4288 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4289 | }\r | |
4290 | else {\r | |
4291 | PFETCH(c);\r | |
4292 | if (c == end_code)\r | |
4293 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4294 | \r | |
b602265d DG |
4295 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r |
4296 | *num_type = IS_ABS_NUM;\r | |
4297 | digit_count++;\r | |
14b0e578 CS |
4298 | }\r |
4299 | else if (c == '-') {\r | |
b602265d | 4300 | *num_type = IS_REL_NUM;\r |
14b0e578 CS |
4301 | sign = -1;\r |
4302 | pnum_head = p;\r | |
4303 | }\r | |
b602265d DG |
4304 | else if (c == '+') {\r |
4305 | *num_type = IS_REL_NUM;\r | |
4306 | sign = 1;\r | |
4307 | pnum_head = p;\r | |
4308 | }\r | |
14b0e578 CS |
4309 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r |
4310 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4311 | }\r | |
4312 | }\r | |
4313 | \r | |
4314 | while (!PEND) {\r | |
4315 | name_end = p;\r | |
4316 | PFETCH(c);\r | |
4317 | if (c == end_code || c == ')' || c == '+' || c == '-') {\r | |
b602265d DG |
4318 | if (*num_type != IS_NOT_NUM && digit_count == 0)\r |
4319 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4320 | break;\r |
4321 | }\r | |
4322 | \r | |
b602265d DG |
4323 | if (*num_type != IS_NOT_NUM) {\r |
4324 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
4325 | digit_count++;\r | |
14b0e578 CS |
4326 | }\r |
4327 | else {\r | |
4328 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
b602265d | 4329 | *num_type = IS_NOT_NUM;\r |
14b0e578 CS |
4330 | }\r |
4331 | }\r | |
4332 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r | |
4333 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4334 | }\r | |
4335 | }\r | |
4336 | \r | |
4337 | if (r == 0 && c != end_code) {\r | |
4338 | if (c == '+' || c == '-') {\r | |
4339 | int level;\r | |
4340 | int flag = (c == '-' ? -1 : 1);\r | |
4341 | \r | |
b602265d DG |
4342 | if (PEND) {\r |
4343 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4344 | goto end;\r | |
4345 | }\r | |
14b0e578 | 4346 | PFETCH(c);\r |
b602265d | 4347 | if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;\r |
14b0e578 CS |
4348 | PUNFETCH;\r |
4349 | level = onig_scan_unsigned_number(&p, end, enc);\r | |
4350 | if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
4351 | *rlevel = (level * flag);\r | |
4352 | exist_level = 1;\r | |
4353 | \r | |
b602265d DG |
4354 | if (!PEND) {\r |
4355 | PFETCH(c);\r | |
4356 | if (c == end_code)\r | |
4357 | goto end;\r | |
4358 | }\r | |
14b0e578 CS |
4359 | }\r |
4360 | \r | |
4361 | err:\r | |
14b0e578 | 4362 | name_end = end;\r |
b602265d DG |
4363 | err2:\r |
4364 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4365 | }\r |
4366 | \r | |
4367 | end:\r | |
4368 | if (r == 0) {\r | |
b602265d | 4369 | if (*num_type != IS_NOT_NUM) {\r |
14b0e578 CS |
4370 | *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r |
4371 | if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
b602265d DG |
4372 | else if (*rback_num == 0) {\r |
4373 | if (*num_type == IS_REL_NUM)\r | |
4374 | goto err2;\r | |
4375 | }\r | |
14b0e578 CS |
4376 | \r |
4377 | *rback_num *= sign;\r | |
4378 | }\r | |
4379 | \r | |
4380 | *rname_end = name_end;\r | |
4381 | *src = p;\r | |
4382 | return (exist_level ? 1 : 0);\r | |
4383 | }\r | |
4384 | else {\r | |
4385 | onig_scan_env_set_error_string(env, r, *src, name_end);\r | |
4386 | return r;\r | |
4387 | }\r | |
4388 | }\r | |
4389 | #endif /* USE_BACKREF_WITH_LEVEL */\r | |
4390 | \r | |
4391 | /*\r | |
b602265d | 4392 | ref: 0 -> define name (don't allow number name)\r |
14b0e578 CS |
4393 | 1 -> reference name (allow number name)\r |
4394 | */\r | |
4395 | static int\r | |
4396 | fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r | |
b602265d DG |
4397 | UChar** rname_end, ScanEnv* env, int* rback_num,\r |
4398 | enum REF_NUM* num_type, int ref)\r | |
14b0e578 | 4399 | {\r |
b602265d DG |
4400 | int r, sign;\r |
4401 | int digit_count;\r | |
14b0e578 CS |
4402 | OnigCodePoint end_code;\r |
4403 | OnigCodePoint c = 0;\r | |
4404 | OnigEncoding enc = env->enc;\r | |
4405 | UChar *name_end;\r | |
4406 | UChar *pnum_head;\r | |
4407 | UChar *p = *src;\r | |
4408 | \r | |
4409 | *rback_num = 0;\r | |
4410 | \r | |
4411 | end_code = get_name_end_code_point(start_code);\r | |
4412 | \r | |
b602265d | 4413 | digit_count = 0;\r |
14b0e578 CS |
4414 | name_end = end;\r |
4415 | pnum_head = *src;\r | |
4416 | r = 0;\r | |
b602265d | 4417 | *num_type = IS_NOT_NUM;\r |
14b0e578 CS |
4418 | sign = 1;\r |
4419 | if (PEND) {\r | |
4420 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4421 | }\r | |
4422 | else {\r | |
4423 | PFETCH_S(c);\r | |
4424 | if (c == end_code)\r | |
4425 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4426 | \r | |
b602265d | 4427 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r |
14b0e578 | 4428 | if (ref == 1)\r |
b602265d | 4429 | *num_type = IS_ABS_NUM;\r |
14b0e578 CS |
4430 | else {\r |
4431 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 | 4432 | }\r |
b602265d | 4433 | digit_count++;\r |
14b0e578 CS |
4434 | }\r |
4435 | else if (c == '-') {\r | |
4436 | if (ref == 1) {\r | |
b602265d | 4437 | *num_type = IS_REL_NUM;\r |
14b0e578 CS |
4438 | sign = -1;\r |
4439 | pnum_head = p;\r | |
4440 | }\r | |
4441 | else {\r | |
4442 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4443 | }\r |
4444 | }\r | |
b602265d DG |
4445 | else if (c == '+') {\r |
4446 | if (ref == 1) {\r | |
4447 | *num_type = IS_REL_NUM;\r | |
4448 | sign = 1;\r | |
4449 | pnum_head = p;\r | |
14b0e578 CS |
4450 | }\r |
4451 | else {\r | |
14b0e578 | 4452 | r = ONIGERR_INVALID_GROUP_NAME;\r |
14b0e578 | 4453 | }\r |
14b0e578 | 4454 | }\r |
b602265d | 4455 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r |
14b0e578 | 4456 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r |
b602265d | 4457 | }\r |
14b0e578 CS |
4458 | }\r |
4459 | \r | |
4460 | if (r == 0) {\r | |
b602265d DG |
4461 | while (!PEND) {\r |
4462 | name_end = p;\r | |
4463 | PFETCH_S(c);\r | |
4464 | if (c == end_code || c == ')') {\r | |
4465 | if (*num_type != IS_NOT_NUM && digit_count == 0)\r | |
4466 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4467 | break;\r | |
4468 | }\r | |
4469 | \r | |
4470 | if (*num_type != IS_NOT_NUM) {\r | |
4471 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
4472 | digit_count++;\r | |
4473 | }\r | |
4474 | else {\r | |
4475 | if (!ONIGENC_IS_CODE_WORD(enc, c))\r | |
4476 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4477 | else\r | |
4478 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4479 | \r | |
4480 | *num_type = IS_NOT_NUM;\r | |
4481 | }\r | |
4482 | }\r | |
4483 | else {\r | |
4484 | if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r | |
4485 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4486 | }\r | |
4487 | }\r | |
4488 | }\r | |
4489 | \r | |
4490 | if (c != end_code) {\r | |
14b0e578 CS |
4491 | r = ONIGERR_INVALID_GROUP_NAME;\r |
4492 | goto err;\r | |
4493 | }\r | |
b602265d DG |
4494 | \r |
4495 | if (*num_type != IS_NOT_NUM) {\r | |
4496 | *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r | |
4497 | if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
4498 | else if (*rback_num == 0) {\r | |
4499 | if (*num_type == IS_REL_NUM) {\r | |
4500 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4501 | goto err;\r | |
4502 | }\r | |
4503 | }\r | |
4504 | \r | |
4505 | *rback_num *= sign;\r | |
4506 | }\r | |
14b0e578 CS |
4507 | \r |
4508 | *rname_end = name_end;\r | |
4509 | *src = p;\r | |
4510 | return 0;\r | |
4511 | }\r | |
4512 | else {\r | |
b602265d DG |
4513 | while (!PEND) {\r |
4514 | name_end = p;\r | |
4515 | PFETCH_S(c);\r | |
4516 | if (c == end_code || c == ')')\r | |
4517 | break;\r | |
4518 | }\r | |
4519 | if (PEND)\r | |
4520 | name_end = end;\r | |
4521 | \r | |
14b0e578 CS |
4522 | err:\r |
4523 | onig_scan_env_set_error_string(env, r, *src, name_end);\r | |
4524 | return r;\r | |
4525 | }\r | |
4526 | }\r | |
14b0e578 CS |
4527 | \r |
4528 | static void\r | |
4529 | CC_ESC_WARN(ScanEnv* env, UChar *c)\r | |
4530 | {\r | |
4531 | if (onig_warn == onig_null_warn) return ;\r | |
4532 | \r | |
4533 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r | |
4534 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r | |
4535 | UChar buf[WARN_BUFSIZE];\r | |
4536 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
b602265d DG |
4537 | env->pattern, env->pattern_end,\r |
4538 | (UChar* )"character class has '%s' without escape",\r | |
4539 | c);\r | |
14b0e578 CS |
4540 | (*onig_warn)((char* )buf);\r |
4541 | }\r | |
4542 | }\r | |
4543 | \r | |
4544 | static void\r | |
4545 | CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r | |
4546 | {\r | |
4547 | if (onig_warn == onig_null_warn) return ;\r | |
4548 | \r | |
4549 | if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r | |
4550 | UChar buf[WARN_BUFSIZE];\r | |
4551 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r | |
b602265d DG |
4552 | (env)->pattern, (env)->pattern_end,\r |
4553 | (UChar* )"regular expression has '%s' without escape", c);\r | |
14b0e578 CS |
4554 | (*onig_warn)((char* )buf);\r |
4555 | }\r | |
4556 | }\r | |
4557 | \r | |
4558 | static UChar*\r | |
4559 | find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r | |
b602265d | 4560 | UChar **next, OnigEncoding enc)\r |
14b0e578 CS |
4561 | {\r |
4562 | int i;\r | |
4563 | OnigCodePoint x;\r | |
4564 | UChar *q;\r | |
4565 | UChar *p = from;\r | |
4566 | \r | |
4567 | while (p < to) {\r | |
4568 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r | |
4569 | q = p + enclen(enc, p);\r | |
4570 | if (x == s[0]) {\r | |
4571 | for (i = 1; i < n && q < to; i++) {\r | |
b602265d DG |
4572 | x = ONIGENC_MBC_TO_CODE(enc, q, to);\r |
4573 | if (x != s[i]) break;\r | |
4574 | q += enclen(enc, q);\r | |
14b0e578 CS |
4575 | }\r |
4576 | if (i >= n) {\r | |
b602265d DG |
4577 | if (IS_NOT_NULL(next))\r |
4578 | *next = q;\r | |
4579 | return p;\r | |
14b0e578 CS |
4580 | }\r |
4581 | }\r | |
4582 | p = q;\r | |
4583 | }\r | |
4584 | return NULL_UCHARP;\r | |
4585 | }\r | |
4586 | \r | |
4587 | static int\r | |
4588 | str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r | |
b602265d | 4589 | OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r |
14b0e578 CS |
4590 | {\r |
4591 | int i, in_esc;\r | |
4592 | OnigCodePoint x;\r | |
4593 | UChar *q;\r | |
4594 | UChar *p = from;\r | |
4595 | \r | |
4596 | in_esc = 0;\r | |
4597 | while (p < to) {\r | |
4598 | if (in_esc) {\r | |
4599 | in_esc = 0;\r | |
4600 | p += enclen(enc, p);\r | |
4601 | }\r | |
4602 | else {\r | |
4603 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r | |
4604 | q = p + enclen(enc, p);\r | |
4605 | if (x == s[0]) {\r | |
b602265d DG |
4606 | for (i = 1; i < n && q < to; i++) {\r |
4607 | x = ONIGENC_MBC_TO_CODE(enc, q, to);\r | |
4608 | if (x != s[i]) break;\r | |
4609 | q += enclen(enc, q);\r | |
4610 | }\r | |
4611 | if (i >= n) return 1;\r | |
4612 | p += enclen(enc, p);\r | |
14b0e578 CS |
4613 | }\r |
4614 | else {\r | |
b602265d DG |
4615 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r |
4616 | if (x == bad) return 0;\r | |
4617 | else if (x == MC_ESC(syn)) in_esc = 1;\r | |
4618 | p = q;\r | |
14b0e578 CS |
4619 | }\r |
4620 | }\r | |
4621 | }\r | |
4622 | return 0;\r | |
4623 | }\r | |
4624 | \r | |
4625 | static int\r | |
4626 | fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r | |
4627 | {\r | |
4628 | int num;\r | |
4629 | OnigCodePoint c, c2;\r | |
4630 | OnigSyntaxType* syn = env->syntax;\r | |
4631 | OnigEncoding enc = env->enc;\r | |
4632 | UChar* prev;\r | |
4633 | UChar* p = *src;\r | |
4634 | PFETCH_READY;\r | |
4635 | \r | |
4636 | if (PEND) {\r | |
4637 | tok->type = TK_EOT;\r | |
4638 | return tok->type;\r | |
4639 | }\r | |
4640 | \r | |
4641 | PFETCH(c);\r | |
4642 | tok->type = TK_CHAR;\r | |
4643 | tok->base = 0;\r | |
4644 | tok->u.c = c;\r | |
4645 | tok->escaped = 0;\r | |
4646 | \r | |
4647 | if (c == ']') {\r | |
4648 | tok->type = TK_CC_CLOSE;\r | |
4649 | }\r | |
4650 | else if (c == '-') {\r | |
4651 | tok->type = TK_CC_RANGE;\r | |
4652 | }\r | |
4653 | else if (c == MC_ESC(syn)) {\r | |
4654 | if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r | |
4655 | goto end;\r | |
4656 | \r | |
4657 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4658 | \r | |
4659 | PFETCH(c);\r | |
4660 | tok->escaped = 1;\r | |
4661 | tok->u.c = c;\r | |
4662 | switch (c) {\r | |
4663 | case 'w':\r | |
4664 | tok->type = TK_CHAR_TYPE;\r | |
4665 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4666 | tok->u.prop.not = 0;\r | |
4667 | break;\r | |
4668 | case 'W':\r | |
4669 | tok->type = TK_CHAR_TYPE;\r | |
4670 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4671 | tok->u.prop.not = 1;\r | |
4672 | break;\r | |
4673 | case 'd':\r | |
4674 | tok->type = TK_CHAR_TYPE;\r | |
4675 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
4676 | tok->u.prop.not = 0;\r | |
4677 | break;\r | |
4678 | case 'D':\r | |
4679 | tok->type = TK_CHAR_TYPE;\r | |
4680 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
4681 | tok->u.prop.not = 1;\r | |
4682 | break;\r | |
4683 | case 's':\r | |
4684 | tok->type = TK_CHAR_TYPE;\r | |
4685 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
4686 | tok->u.prop.not = 0;\r | |
4687 | break;\r | |
4688 | case 'S':\r | |
4689 | tok->type = TK_CHAR_TYPE;\r | |
4690 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
4691 | tok->u.prop.not = 1;\r | |
4692 | break;\r | |
4693 | case 'h':\r | |
4694 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
4695 | tok->type = TK_CHAR_TYPE;\r | |
4696 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
4697 | tok->u.prop.not = 0;\r | |
4698 | break;\r | |
4699 | case 'H':\r | |
4700 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
4701 | tok->type = TK_CHAR_TYPE;\r | |
4702 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
4703 | tok->u.prop.not = 1;\r | |
4704 | break;\r | |
4705 | \r | |
4706 | case 'p':\r | |
4707 | case 'P':\r | |
b602265d DG |
4708 | if (PEND) break;\r |
4709 | \r | |
14b0e578 CS |
4710 | c2 = PPEEK;\r |
4711 | if (c2 == '{' &&\r | |
b602265d DG |
4712 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r |
4713 | PINC;\r | |
4714 | tok->type = TK_CHAR_PROPERTY;\r | |
4715 | tok->u.prop.not = (c == 'P' ? 1 : 0);\r | |
4716 | \r | |
4717 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r | |
4718 | PFETCH(c2);\r | |
4719 | if (c2 == '^') {\r | |
4720 | tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r | |
4721 | }\r | |
4722 | else\r | |
4723 | PUNFETCH;\r | |
4724 | }\r | |
4725 | }\r | |
4726 | break;\r | |
4727 | \r | |
4728 | case 'o':\r | |
4729 | if (PEND) break;\r | |
4730 | \r | |
4731 | prev = p;\r | |
4732 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r | |
4733 | PINC;\r | |
4734 | num = scan_unsigned_octal_number(&p, end, 11, enc);\r | |
4735 | if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
4736 | if (!PEND) {\r | |
4737 | c2 = PPEEK;\r | |
4738 | if (IS_CODE_DIGIT_ASCII(enc, c2))\r | |
4739 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r | |
4740 | }\r | |
4741 | \r | |
4742 | if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r | |
4743 | PINC;\r | |
4744 | tok->type = TK_CODE_POINT;\r | |
4745 | tok->base = 8;\r | |
4746 | tok->u.code = (OnigCodePoint )num;\r | |
4747 | }\r | |
4748 | else {\r | |
4749 | /* can't read nothing or invalid format */\r | |
4750 | p = prev;\r | |
4751 | }\r | |
14b0e578 CS |
4752 | }\r |
4753 | break;\r | |
4754 | \r | |
4755 | case 'x':\r | |
4756 | if (PEND) break;\r | |
4757 | \r | |
4758 | prev = p;\r | |
4759 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r | |
b602265d DG |
4760 | PINC;\r |
4761 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r | |
4762 | if (num < 0) {\r | |
4763 | if (num == ONIGERR_TOO_BIG_NUMBER)\r | |
4764 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
4765 | else\r | |
4766 | return num;\r | |
4767 | }\r | |
4768 | if (!PEND) {\r | |
14b0e578 | 4769 | c2 = PPEEK;\r |
b602265d | 4770 | if (IS_CODE_XDIGIT_ASCII(enc, c2))\r |
14b0e578 CS |
4771 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r |
4772 | }\r | |
4773 | \r | |
b602265d DG |
4774 | if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r |
4775 | PINC;\r | |
4776 | tok->type = TK_CODE_POINT;\r | |
4777 | tok->base = 16;\r | |
4778 | tok->u.code = (OnigCodePoint )num;\r | |
4779 | }\r | |
4780 | else {\r | |
4781 | /* can't read nothing or invalid format */\r | |
4782 | p = prev;\r | |
4783 | }\r | |
14b0e578 CS |
4784 | }\r |
4785 | else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r | |
b602265d DG |
4786 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r |
4787 | if (num < 0) return num;\r | |
4788 | if (p == prev) { /* can't read nothing. */\r | |
4789 | num = 0; /* but, it's not error */\r | |
4790 | }\r | |
4791 | tok->type = TK_RAW_BYTE;\r | |
4792 | tok->base = 16;\r | |
4793 | tok->u.c = num;\r | |
14b0e578 CS |
4794 | }\r |
4795 | break;\r | |
4796 | \r | |
4797 | case 'u':\r | |
4798 | if (PEND) break;\r | |
4799 | \r | |
4800 | prev = p;\r | |
4801 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r | |
b602265d DG |
4802 | num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r |
4803 | if (num < 0) return num;\r | |
4804 | if (p == prev) { /* can't read nothing. */\r | |
4805 | num = 0; /* but, it's not error */\r | |
4806 | }\r | |
4807 | tok->type = TK_CODE_POINT;\r | |
4808 | tok->base = 16;\r | |
4809 | tok->u.code = (OnigCodePoint )num;\r | |
14b0e578 CS |
4810 | }\r |
4811 | break;\r | |
4812 | \r | |
4813 | case '0':\r | |
4814 | case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r | |
4815 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r | |
b602265d DG |
4816 | PUNFETCH;\r |
4817 | prev = p;\r | |
4818 | num = scan_unsigned_octal_number(&p, end, 3, enc);\r | |
4819 | if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r | |
4820 | if (p == prev) { /* can't read nothing. */\r | |
4821 | num = 0; /* but, it's not error */\r | |
4822 | }\r | |
4823 | tok->type = TK_RAW_BYTE;\r | |
4824 | tok->base = 8;\r | |
4825 | tok->u.c = num;\r | |
14b0e578 CS |
4826 | }\r |
4827 | break;\r | |
4828 | \r | |
4829 | default:\r | |
4830 | PUNFETCH;\r | |
b602265d | 4831 | num = fetch_escaped_value(&p, end, env, &c2);\r |
14b0e578 | 4832 | if (num < 0) return num;\r |
b602265d DG |
4833 | if (tok->u.c != c2) {\r |
4834 | tok->u.code = c2;\r | |
4835 | tok->type = TK_CODE_POINT;\r | |
14b0e578 CS |
4836 | }\r |
4837 | break;\r | |
4838 | }\r | |
4839 | }\r | |
4840 | else if (c == '[') {\r | |
4841 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r | |
4842 | OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r | |
b602265d | 4843 | tok->backp = p; /* point at '[' is read */\r |
14b0e578 CS |
4844 | PINC;\r |
4845 | if (str_exist_check_with_esc(send, 2, p, end,\r | |
4846 | (OnigCodePoint )']', enc, syn)) {\r | |
b602265d | 4847 | tok->type = TK_POSIX_BRACKET_OPEN;\r |
14b0e578 CS |
4848 | }\r |
4849 | else {\r | |
b602265d DG |
4850 | PUNFETCH;\r |
4851 | goto cc_in_cc;\r | |
14b0e578 CS |
4852 | }\r |
4853 | }\r | |
4854 | else {\r | |
4855 | cc_in_cc:\r | |
4856 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r | |
b602265d | 4857 | tok->type = TK_CC_CC_OPEN;\r |
14b0e578 CS |
4858 | }\r |
4859 | else {\r | |
b602265d | 4860 | CC_ESC_WARN(env, (UChar* )"[");\r |
14b0e578 CS |
4861 | }\r |
4862 | }\r | |
4863 | }\r | |
4864 | else if (c == '&') {\r | |
4865 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r | |
b602265d | 4866 | !PEND && (PPEEK_IS('&'))) {\r |
14b0e578 CS |
4867 | PINC;\r |
4868 | tok->type = TK_CC_AND;\r | |
4869 | }\r | |
4870 | }\r | |
4871 | \r | |
4872 | end:\r | |
4873 | *src = p;\r | |
4874 | return tok->type;\r | |
4875 | }\r | |
4876 | \r | |
4877 | static int\r | |
4878 | fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r | |
4879 | {\r | |
4880 | int r, num;\r | |
4881 | OnigCodePoint c;\r | |
4882 | OnigEncoding enc = env->enc;\r | |
4883 | OnigSyntaxType* syn = env->syntax;\r | |
4884 | UChar* prev;\r | |
4885 | UChar* p = *src;\r | |
4886 | PFETCH_READY;\r | |
4887 | \r | |
4888 | start:\r | |
4889 | if (PEND) {\r | |
4890 | tok->type = TK_EOT;\r | |
4891 | return tok->type;\r | |
4892 | }\r | |
4893 | \r | |
4894 | tok->type = TK_STRING;\r | |
4895 | tok->base = 0;\r | |
4896 | tok->backp = p;\r | |
4897 | \r | |
4898 | PFETCH(c);\r | |
4899 | if (IS_MC_ESC_CODE(c, syn)) {\r | |
4900 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4901 | \r | |
4902 | tok->backp = p;\r | |
4903 | PFETCH(c);\r | |
4904 | \r | |
4905 | tok->u.c = c;\r | |
4906 | tok->escaped = 1;\r | |
4907 | switch (c) {\r | |
4908 | case '*':\r | |
4909 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r | |
4910 | tok->type = TK_OP_REPEAT;\r | |
4911 | tok->u.repeat.lower = 0;\r | |
4912 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
4913 | goto greedy_check;\r | |
4914 | break;\r | |
4915 | \r | |
4916 | case '+':\r | |
4917 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r | |
4918 | tok->type = TK_OP_REPEAT;\r | |
4919 | tok->u.repeat.lower = 1;\r | |
4920 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
4921 | goto greedy_check;\r | |
4922 | break;\r | |
4923 | \r | |
4924 | case '?':\r | |
4925 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r | |
4926 | tok->type = TK_OP_REPEAT;\r | |
4927 | tok->u.repeat.lower = 0;\r | |
4928 | tok->u.repeat.upper = 1;\r | |
4929 | greedy_check:\r | |
4930 | if (!PEND && PPEEK_IS('?') &&\r | |
b602265d DG |
4931 | IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r |
4932 | PFETCH(c);\r | |
4933 | tok->u.repeat.greedy = 0;\r | |
4934 | tok->u.repeat.possessive = 0;\r | |
14b0e578 CS |
4935 | }\r |
4936 | else {\r | |
4937 | possessive_check:\r | |
b602265d DG |
4938 | if (!PEND && PPEEK_IS('+') &&\r |
4939 | ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r | |
4940 | tok->type != TK_INTERVAL) ||\r | |
4941 | (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r | |
4942 | tok->type == TK_INTERVAL))) {\r | |
4943 | PFETCH(c);\r | |
4944 | tok->u.repeat.greedy = 1;\r | |
4945 | tok->u.repeat.possessive = 1;\r | |
4946 | }\r | |
4947 | else {\r | |
4948 | tok->u.repeat.greedy = 1;\r | |
4949 | tok->u.repeat.possessive = 0;\r | |
4950 | }\r | |
14b0e578 CS |
4951 | }\r |
4952 | break;\r | |
4953 | \r | |
4954 | case '{':\r | |
4955 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r | |
4956 | r = fetch_range_quantifier(&p, end, tok, env);\r | |
4957 | if (r < 0) return r; /* error */\r | |
4958 | if (r == 0) goto greedy_check;\r | |
4959 | else if (r == 2) { /* {n} */\r | |
b602265d DG |
4960 | if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r |
4961 | goto possessive_check;\r | |
14b0e578 | 4962 | \r |
b602265d | 4963 | goto greedy_check;\r |
14b0e578 CS |
4964 | }\r |
4965 | /* r == 1 : normal char */\r | |
4966 | break;\r | |
4967 | \r | |
4968 | case '|':\r | |
4969 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r | |
4970 | tok->type = TK_ALT;\r | |
4971 | break;\r | |
4972 | \r | |
4973 | case '(':\r | |
4974 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r | |
4975 | tok->type = TK_SUBEXP_OPEN;\r | |
4976 | break;\r | |
4977 | \r | |
4978 | case ')':\r | |
4979 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r | |
4980 | tok->type = TK_SUBEXP_CLOSE;\r | |
4981 | break;\r | |
4982 | \r | |
4983 | case 'w':\r | |
4984 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r | |
4985 | tok->type = TK_CHAR_TYPE;\r | |
4986 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4987 | tok->u.prop.not = 0;\r | |
4988 | break;\r | |
4989 | \r | |
4990 | case 'W':\r | |
4991 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r | |
4992 | tok->type = TK_CHAR_TYPE;\r | |
4993 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4994 | tok->u.prop.not = 1;\r | |
4995 | break;\r | |
4996 | \r | |
4997 | case 'b':\r | |
4998 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r | |
4999 | tok->type = TK_ANCHOR;\r | |
b602265d | 5000 | tok->u.anchor = ANCHOR_WORD_BOUNDARY;\r |
14b0e578 CS |
5001 | break;\r |
5002 | \r | |
5003 | case 'B':\r | |
5004 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r | |
5005 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5006 | tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;\r |
5007 | break;\r | |
5008 | \r | |
5009 | case 'y':\r | |
5010 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5011 | tok->type = TK_ANCHOR;\r | |
5012 | tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r | |
5013 | break;\r | |
5014 | \r | |
5015 | case 'Y':\r | |
5016 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5017 | tok->type = TK_ANCHOR;\r | |
5018 | tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r | |
14b0e578 CS |
5019 | break;\r |
5020 | \r | |
5021 | #ifdef USE_WORD_BEGIN_END\r | |
5022 | case '<':\r | |
5023 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r | |
5024 | tok->type = TK_ANCHOR;\r | |
5025 | tok->u.anchor = ANCHOR_WORD_BEGIN;\r | |
5026 | break;\r | |
5027 | \r | |
5028 | case '>':\r | |
5029 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r | |
5030 | tok->type = TK_ANCHOR;\r | |
5031 | tok->u.anchor = ANCHOR_WORD_END;\r | |
5032 | break;\r | |
5033 | #endif\r | |
5034 | \r | |
5035 | case 's':\r | |
5036 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r | |
5037 | tok->type = TK_CHAR_TYPE;\r | |
5038 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
5039 | tok->u.prop.not = 0;\r | |
5040 | break;\r | |
5041 | \r | |
5042 | case 'S':\r | |
5043 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r | |
5044 | tok->type = TK_CHAR_TYPE;\r | |
5045 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
5046 | tok->u.prop.not = 1;\r | |
5047 | break;\r | |
5048 | \r | |
5049 | case 'd':\r | |
5050 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r | |
5051 | tok->type = TK_CHAR_TYPE;\r | |
5052 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
5053 | tok->u.prop.not = 0;\r | |
5054 | break;\r | |
5055 | \r | |
5056 | case 'D':\r | |
5057 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r | |
5058 | tok->type = TK_CHAR_TYPE;\r | |
5059 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
5060 | tok->u.prop.not = 1;\r | |
5061 | break;\r | |
5062 | \r | |
5063 | case 'h':\r | |
5064 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
5065 | tok->type = TK_CHAR_TYPE;\r | |
5066 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
5067 | tok->u.prop.not = 0;\r | |
5068 | break;\r | |
5069 | \r | |
5070 | case 'H':\r | |
5071 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
5072 | tok->type = TK_CHAR_TYPE;\r | |
5073 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
5074 | tok->u.prop.not = 1;\r | |
5075 | break;\r | |
5076 | \r | |
b602265d DG |
5077 | case 'K':\r |
5078 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;\r | |
5079 | tok->type = TK_KEEP;\r | |
5080 | break;\r | |
5081 | \r | |
5082 | case 'R':\r | |
5083 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;\r | |
5084 | tok->type = TK_GENERAL_NEWLINE;\r | |
5085 | break;\r | |
5086 | \r | |
5087 | case 'N':\r | |
5088 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r | |
5089 | tok->type = TK_NO_NEWLINE;\r | |
5090 | break;\r | |
5091 | \r | |
5092 | case 'O':\r | |
5093 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r | |
5094 | tok->type = TK_TRUE_ANYCHAR;\r | |
5095 | break;\r | |
5096 | \r | |
5097 | case 'X':\r | |
5098 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5099 | tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;\r | |
5100 | break;\r | |
5101 | \r | |
14b0e578 CS |
5102 | case 'A':\r |
5103 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5104 | begin_buf:\r | |
5105 | tok->type = TK_ANCHOR;\r | |
5106 | tok->u.subtype = ANCHOR_BEGIN_BUF;\r | |
5107 | break;\r | |
5108 | \r | |
5109 | case 'Z':\r | |
5110 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5111 | tok->type = TK_ANCHOR;\r | |
5112 | tok->u.subtype = ANCHOR_SEMI_END_BUF;\r | |
5113 | break;\r | |
5114 | \r | |
5115 | case 'z':\r | |
5116 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5117 | end_buf:\r | |
5118 | tok->type = TK_ANCHOR;\r | |
5119 | tok->u.subtype = ANCHOR_END_BUF;\r | |
5120 | break;\r | |
5121 | \r | |
5122 | case 'G':\r | |
5123 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r | |
5124 | tok->type = TK_ANCHOR;\r | |
5125 | tok->u.subtype = ANCHOR_BEGIN_POSITION;\r | |
5126 | break;\r | |
5127 | \r | |
5128 | case '`':\r | |
5129 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r | |
5130 | goto begin_buf;\r | |
5131 | break;\r | |
5132 | \r | |
5133 | case '\'':\r | |
5134 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r | |
5135 | goto end_buf;\r | |
5136 | break;\r | |
5137 | \r | |
b602265d DG |
5138 | case 'o':\r |
5139 | if (PEND) break;\r | |
5140 | \r | |
5141 | prev = p;\r | |
5142 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r | |
5143 | PINC;\r | |
5144 | num = scan_unsigned_octal_number(&p, end, 11, enc);\r | |
5145 | if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
5146 | if (!PEND) {\r | |
5147 | if (IS_CODE_DIGIT_ASCII(enc, PPEEK))\r | |
5148 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r | |
5149 | }\r | |
5150 | \r | |
5151 | if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r | |
5152 | PINC;\r | |
5153 | tok->type = TK_CODE_POINT;\r | |
5154 | tok->u.code = (OnigCodePoint )num;\r | |
5155 | }\r | |
5156 | else {\r | |
5157 | /* can't read nothing or invalid format */\r | |
5158 | p = prev;\r | |
5159 | }\r | |
5160 | }\r | |
5161 | break;\r | |
5162 | \r | |
14b0e578 CS |
5163 | case 'x':\r |
5164 | if (PEND) break;\r | |
5165 | \r | |
5166 | prev = p;\r | |
5167 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r | |
b602265d DG |
5168 | PINC;\r |
5169 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r | |
5170 | if (num < 0) {\r | |
5171 | if (num == ONIGERR_TOO_BIG_NUMBER)\r | |
5172 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
5173 | else\r | |
5174 | return num;\r | |
5175 | }\r | |
5176 | if (!PEND) {\r | |
5177 | if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))\r | |
14b0e578 CS |
5178 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r |
5179 | }\r | |
5180 | \r | |
b602265d DG |
5181 | if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r |
5182 | PINC;\r | |
5183 | tok->type = TK_CODE_POINT;\r | |
5184 | tok->u.code = (OnigCodePoint )num;\r | |
5185 | }\r | |
5186 | else {\r | |
5187 | /* can't read nothing or invalid format */\r | |
5188 | p = prev;\r | |
5189 | }\r | |
14b0e578 CS |
5190 | }\r |
5191 | else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r | |
b602265d DG |
5192 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r |
5193 | if (num < 0) return num;\r | |
5194 | if (p == prev) { /* can't read nothing. */\r | |
5195 | num = 0; /* but, it's not error */\r | |
5196 | }\r | |
5197 | tok->type = TK_RAW_BYTE;\r | |
5198 | tok->base = 16;\r | |
5199 | tok->u.c = num;\r | |
14b0e578 CS |
5200 | }\r |
5201 | break;\r | |
5202 | \r | |
5203 | case 'u':\r | |
5204 | if (PEND) break;\r | |
5205 | \r | |
5206 | prev = p;\r | |
5207 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r | |
b602265d DG |
5208 | num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r |
5209 | if (num < 0) return num;\r | |
5210 | if (p == prev) { /* can't read nothing. */\r | |
5211 | num = 0; /* but, it's not error */\r | |
5212 | }\r | |
5213 | tok->type = TK_CODE_POINT;\r | |
5214 | tok->base = 16;\r | |
5215 | tok->u.code = (OnigCodePoint )num;\r | |
14b0e578 CS |
5216 | }\r |
5217 | break;\r | |
5218 | \r | |
5219 | case '1': case '2': case '3': case '4':\r | |
5220 | case '5': case '6': case '7': case '8': case '9':\r | |
5221 | PUNFETCH;\r | |
5222 | prev = p;\r | |
5223 | num = onig_scan_unsigned_number(&p, end, enc);\r | |
5224 | if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r | |
5225 | goto skip_backref;\r | |
5226 | }\r | |
5227 | \r | |
5228 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r | |
b602265d DG |
5229 | (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r |
5230 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5231 | if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))\r | |
5232 | return ONIGERR_INVALID_BACKREF;\r | |
5233 | }\r | |
5234 | \r | |
5235 | tok->type = TK_BACKREF;\r | |
5236 | tok->u.backref.num = 1;\r | |
5237 | tok->u.backref.ref1 = num;\r | |
5238 | tok->u.backref.by_name = 0;\r | |
14b0e578 | 5239 | #ifdef USE_BACKREF_WITH_LEVEL\r |
b602265d | 5240 | tok->u.backref.exist_level = 0;\r |
14b0e578 | 5241 | #endif\r |
b602265d | 5242 | break;\r |
14b0e578 CS |
5243 | }\r |
5244 | \r | |
5245 | skip_backref:\r | |
5246 | if (c == '8' || c == '9') {\r | |
b602265d DG |
5247 | /* normal char */\r |
5248 | p = prev; PINC;\r | |
5249 | break;\r | |
14b0e578 CS |
5250 | }\r |
5251 | \r | |
5252 | p = prev;\r | |
5253 | /* fall through */\r | |
5254 | case '0':\r | |
5255 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r | |
b602265d DG |
5256 | prev = p;\r |
5257 | num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r | |
5258 | if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r | |
5259 | if (p == prev) { /* can't read nothing. */\r | |
5260 | num = 0; /* but, it's not error */\r | |
5261 | }\r | |
5262 | tok->type = TK_RAW_BYTE;\r | |
5263 | tok->base = 8;\r | |
5264 | tok->u.c = num;\r | |
14b0e578 CS |
5265 | }\r |
5266 | else if (c != '0') {\r | |
b602265d | 5267 | PINC;\r |
14b0e578 CS |
5268 | }\r |
5269 | break;\r | |
5270 | \r | |
14b0e578 | 5271 | case 'k':\r |
b602265d DG |
5272 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r |
5273 | PFETCH(c);\r | |
5274 | if (c == '<' || c == '\'') {\r | |
5275 | UChar* name_end;\r | |
5276 | int* backs;\r | |
5277 | int back_num;\r | |
5278 | enum REF_NUM num_type;\r | |
14b0e578 | 5279 | \r |
b602265d | 5280 | prev = p;\r |
14b0e578 CS |
5281 | \r |
5282 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
b602265d DG |
5283 | name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r |
5284 | r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r | |
5285 | env, &back_num, &tok->u.backref.level, &num_type);\r | |
5286 | if (r == 1) tok->u.backref.exist_level = 1;\r | |
5287 | else tok->u.backref.exist_level = 0;\r | |
14b0e578 | 5288 | #else\r |
b602265d | 5289 | r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);\r |
14b0e578 | 5290 | #endif\r |
b602265d DG |
5291 | if (r < 0) return r;\r |
5292 | \r | |
5293 | if (num_type != IS_NOT_NUM) {\r | |
5294 | if (num_type == IS_REL_NUM) {\r | |
5295 | back_num = backref_rel_to_abs(back_num, env);\r | |
5296 | }\r | |
5297 | if (back_num <= 0)\r | |
5298 | return ONIGERR_INVALID_BACKREF;\r | |
5299 | \r | |
5300 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5301 | if (back_num > env->num_mem ||\r | |
5302 | IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r | |
5303 | return ONIGERR_INVALID_BACKREF;\r | |
5304 | }\r | |
5305 | tok->type = TK_BACKREF;\r | |
5306 | tok->u.backref.by_name = 0;\r | |
5307 | tok->u.backref.num = 1;\r | |
5308 | tok->u.backref.ref1 = back_num;\r | |
5309 | }\r | |
5310 | else {\r | |
5311 | num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r | |
5312 | if (num <= 0) {\r | |
5313 | onig_scan_env_set_error_string(env,\r | |
5314 | ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r | |
5315 | return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
5316 | }\r | |
5317 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5318 | int i;\r | |
5319 | for (i = 0; i < num; i++) {\r | |
5320 | if (backs[i] > env->num_mem ||\r | |
5321 | IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r | |
5322 | return ONIGERR_INVALID_BACKREF;\r | |
5323 | }\r | |
5324 | }\r | |
5325 | \r | |
5326 | tok->type = TK_BACKREF;\r | |
5327 | tok->u.backref.by_name = 1;\r | |
5328 | if (num == 1) {\r | |
5329 | tok->u.backref.num = 1;\r | |
5330 | tok->u.backref.ref1 = backs[0];\r | |
5331 | }\r | |
5332 | else {\r | |
5333 | tok->u.backref.num = num;\r | |
5334 | tok->u.backref.refs = backs;\r | |
5335 | }\r | |
5336 | }\r | |
5337 | }\r | |
5338 | else\r | |
5339 | PUNFETCH;\r | |
14b0e578 CS |
5340 | }\r |
5341 | break;\r | |
14b0e578 | 5342 | \r |
b602265d | 5343 | #ifdef USE_CALL\r |
14b0e578 | 5344 | case 'g':\r |
b602265d DG |
5345 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r |
5346 | PFETCH(c);\r | |
5347 | if (c == '<' || c == '\'') {\r | |
5348 | int gnum;\r | |
5349 | UChar* name_end;\r | |
5350 | enum REF_NUM num_type;\r | |
5351 | \r | |
5352 | prev = p;\r | |
5353 | r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,\r | |
5354 | &gnum, &num_type, 1);\r | |
5355 | if (r < 0) return r;\r | |
5356 | \r | |
5357 | if (num_type != IS_NOT_NUM) {\r | |
5358 | if (num_type == IS_REL_NUM) {\r | |
5359 | gnum = backref_rel_to_abs(gnum, env);\r | |
5360 | if (gnum < 0) {\r | |
5361 | onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r | |
5362 | prev, name_end);\r | |
5363 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r | |
5364 | }\r | |
5365 | }\r | |
5366 | tok->u.call.by_number = 1;\r | |
5367 | tok->u.call.gnum = gnum;\r | |
5368 | }\r | |
5369 | else {\r | |
5370 | tok->u.call.by_number = 0;\r | |
5371 | tok->u.call.gnum = 0;\r | |
5372 | }\r | |
5373 | \r | |
5374 | tok->type = TK_CALL;\r | |
5375 | tok->u.call.name = prev;\r | |
5376 | tok->u.call.name_end = name_end;\r | |
5377 | }\r | |
5378 | else\r | |
5379 | PUNFETCH;\r | |
14b0e578 CS |
5380 | }\r |
5381 | break;\r | |
5382 | #endif\r | |
5383 | \r | |
5384 | case 'Q':\r | |
5385 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r | |
b602265d | 5386 | tok->type = TK_QUOTE_OPEN;\r |
14b0e578 CS |
5387 | }\r |
5388 | break;\r | |
5389 | \r | |
5390 | case 'p':\r | |
5391 | case 'P':\r | |
b602265d DG |
5392 | if (!PEND && PPEEK_IS('{') &&\r |
5393 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r | |
5394 | PINC;\r | |
5395 | tok->type = TK_CHAR_PROPERTY;\r | |
5396 | tok->u.prop.not = (c == 'P' ? 1 : 0);\r | |
5397 | \r | |
5398 | if (!PEND &&\r | |
5399 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r | |
5400 | PFETCH(c);\r | |
5401 | if (c == '^') {\r | |
5402 | tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r | |
5403 | }\r | |
5404 | else\r | |
5405 | PUNFETCH;\r | |
5406 | }\r | |
14b0e578 CS |
5407 | }\r |
5408 | break;\r | |
5409 | \r | |
5410 | default:\r | |
b602265d DG |
5411 | {\r |
5412 | OnigCodePoint c2;\r | |
5413 | \r | |
5414 | PUNFETCH;\r | |
5415 | num = fetch_escaped_value(&p, end, env, &c2);\r | |
5416 | if (num < 0) return num;\r | |
5417 | /* set_raw: */\r | |
5418 | if (tok->u.c != c2) {\r | |
5419 | tok->type = TK_CODE_POINT;\r | |
5420 | tok->u.code = c2;\r | |
5421 | }\r | |
5422 | else { /* string */\r | |
5423 | p = tok->backp + enclen(enc, tok->backp);\r | |
5424 | }\r | |
14b0e578 CS |
5425 | }\r |
5426 | break;\r | |
5427 | }\r | |
5428 | }\r | |
5429 | else {\r | |
5430 | tok->u.c = c;\r | |
5431 | tok->escaped = 0;\r | |
5432 | \r | |
5433 | #ifdef USE_VARIABLE_META_CHARS\r | |
5434 | if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r | |
b602265d | 5435 | IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r |
14b0e578 | 5436 | if (c == MC_ANYCHAR(syn))\r |
b602265d | 5437 | goto any_char;\r |
14b0e578 | 5438 | else if (c == MC_ANYTIME(syn))\r |
b602265d | 5439 | goto anytime;\r |
14b0e578 | 5440 | else if (c == MC_ZERO_OR_ONE_TIME(syn))\r |
b602265d | 5441 | goto zero_or_one_time;\r |
14b0e578 | 5442 | else if (c == MC_ONE_OR_MORE_TIME(syn))\r |
b602265d | 5443 | goto one_or_more_time;\r |
14b0e578 | 5444 | else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r |
b602265d DG |
5445 | tok->type = TK_ANYCHAR_ANYTIME;\r |
5446 | goto out;\r | |
14b0e578 CS |
5447 | }\r |
5448 | }\r | |
5449 | #endif\r | |
5450 | \r | |
5451 | switch (c) {\r | |
5452 | case '.':\r | |
5453 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r | |
5454 | #ifdef USE_VARIABLE_META_CHARS\r | |
5455 | any_char:\r | |
5456 | #endif\r | |
5457 | tok->type = TK_ANYCHAR;\r | |
5458 | break;\r | |
5459 | \r | |
5460 | case '*':\r | |
5461 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r | |
5462 | #ifdef USE_VARIABLE_META_CHARS\r | |
5463 | anytime:\r | |
5464 | #endif\r | |
5465 | tok->type = TK_OP_REPEAT;\r | |
5466 | tok->u.repeat.lower = 0;\r | |
5467 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
5468 | goto greedy_check;\r | |
5469 | break;\r | |
5470 | \r | |
5471 | case '+':\r | |
5472 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r | |
5473 | #ifdef USE_VARIABLE_META_CHARS\r | |
5474 | one_or_more_time:\r | |
5475 | #endif\r | |
5476 | tok->type = TK_OP_REPEAT;\r | |
5477 | tok->u.repeat.lower = 1;\r | |
5478 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
5479 | goto greedy_check;\r | |
5480 | break;\r | |
5481 | \r | |
5482 | case '?':\r | |
5483 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r | |
5484 | #ifdef USE_VARIABLE_META_CHARS\r | |
5485 | zero_or_one_time:\r | |
5486 | #endif\r | |
5487 | tok->type = TK_OP_REPEAT;\r | |
5488 | tok->u.repeat.lower = 0;\r | |
5489 | tok->u.repeat.upper = 1;\r | |
5490 | goto greedy_check;\r | |
5491 | break;\r | |
5492 | \r | |
5493 | case '{':\r | |
5494 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r | |
5495 | r = fetch_range_quantifier(&p, end, tok, env);\r | |
5496 | if (r < 0) return r; /* error */\r | |
5497 | if (r == 0) goto greedy_check;\r | |
5498 | else if (r == 2) { /* {n} */\r | |
b602265d DG |
5499 | if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r |
5500 | goto possessive_check;\r | |
14b0e578 | 5501 | \r |
b602265d | 5502 | goto greedy_check;\r |
14b0e578 CS |
5503 | }\r |
5504 | /* r == 1 : normal char */\r | |
5505 | break;\r | |
5506 | \r | |
5507 | case '|':\r | |
5508 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r | |
5509 | tok->type = TK_ALT;\r | |
5510 | break;\r | |
5511 | \r | |
5512 | case '(':\r | |
b602265d | 5513 | if (!PEND && PPEEK_IS('?') &&\r |
14b0e578 CS |
5514 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r |
5515 | PINC;\r | |
b602265d DG |
5516 | if (! PEND) {\r |
5517 | c = PPEEK;\r | |
5518 | if (c == '#') {\r | |
14b0e578 | 5519 | PFETCH(c);\r |
b602265d DG |
5520 | while (1) {\r |
5521 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
5522 | PFETCH(c);\r | |
5523 | if (c == MC_ESC(syn)) {\r | |
5524 | if (! PEND) PFETCH(c);\r | |
5525 | }\r | |
5526 | else {\r | |
5527 | if (c == ')') break;\r | |
5528 | }\r | |
14b0e578 | 5529 | }\r |
b602265d DG |
5530 | goto start;\r |
5531 | }\r | |
5532 | else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {\r | |
5533 | int gnum;\r | |
5534 | UChar* name;\r | |
5535 | UChar* name_end;\r | |
5536 | enum REF_NUM num_type;\r | |
5537 | \r | |
5538 | switch (c) {\r | |
5539 | case '&':\r | |
5540 | {\r | |
5541 | PINC;\r | |
5542 | name = p;\r | |
5543 | r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,\r | |
5544 | &num_type, 0);\r | |
5545 | if (r < 0) return r;\r | |
5546 | \r | |
5547 | tok->type = TK_CALL;\r | |
5548 | tok->u.call.by_number = 0;\r | |
5549 | tok->u.call.gnum = 0;\r | |
5550 | tok->u.call.name = name;\r | |
5551 | tok->u.call.name_end = name_end;\r | |
5552 | }\r | |
5553 | break;\r | |
5554 | \r | |
5555 | case 'R':\r | |
5556 | tok->type = TK_CALL;\r | |
5557 | tok->u.call.by_number = 1;\r | |
5558 | tok->u.call.gnum = 0;\r | |
5559 | tok->u.call.name = p;\r | |
5560 | PINC;\r | |
5561 | if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;\r | |
5562 | tok->u.call.name_end = p;\r | |
5563 | break;\r | |
5564 | \r | |
5565 | case '-':\r | |
5566 | case '+':\r | |
5567 | goto lparen_qmark_num;\r | |
5568 | break;\r | |
5569 | default:\r | |
5570 | if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;\r | |
5571 | \r | |
5572 | lparen_qmark_num:\r | |
5573 | {\r | |
5574 | name = p;\r | |
5575 | r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,\r | |
5576 | &gnum, &num_type, 1);\r | |
5577 | if (r < 0) return r;\r | |
5578 | \r | |
5579 | if (num_type == IS_NOT_NUM) {\r | |
5580 | return ONIGERR_INVALID_GROUP_NAME;\r | |
5581 | }\r | |
5582 | else {\r | |
5583 | if (num_type == IS_REL_NUM) {\r | |
5584 | gnum = backref_rel_to_abs(gnum, env);\r | |
5585 | if (gnum < 0) {\r | |
5586 | onig_scan_env_set_error_string(env,\r | |
5587 | ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);\r | |
5588 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r | |
5589 | }\r | |
5590 | }\r | |
5591 | tok->u.call.by_number = 1;\r | |
5592 | tok->u.call.gnum = gnum;\r | |
5593 | }\r | |
5594 | \r | |
5595 | tok->type = TK_CALL;\r | |
5596 | tok->u.call.name = name;\r | |
5597 | tok->u.call.name_end = name_end;\r | |
5598 | }\r | |
5599 | break;\r | |
14b0e578 CS |
5600 | }\r |
5601 | }\r | |
14b0e578 | 5602 | }\r |
b602265d | 5603 | lparen_qmark_end:\r |
14b0e578 CS |
5604 | PUNFETCH;\r |
5605 | }\r | |
5606 | \r | |
5607 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r | |
5608 | tok->type = TK_SUBEXP_OPEN;\r | |
5609 | break;\r | |
5610 | \r | |
5611 | case ')':\r | |
5612 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r | |
5613 | tok->type = TK_SUBEXP_CLOSE;\r | |
5614 | break;\r | |
5615 | \r | |
5616 | case '^':\r | |
5617 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r | |
5618 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5619 | tok->u.subtype = (IS_SINGLELINE(env->options)\r |
5620 | ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r | |
14b0e578 CS |
5621 | break;\r |
5622 | \r | |
5623 | case '$':\r | |
5624 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r | |
5625 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5626 | tok->u.subtype = (IS_SINGLELINE(env->options)\r |
5627 | ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r | |
14b0e578 CS |
5628 | break;\r |
5629 | \r | |
5630 | case '[':\r | |
5631 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r | |
5632 | tok->type = TK_CC_OPEN;\r | |
5633 | break;\r | |
5634 | \r | |
5635 | case ']':\r | |
5636 | if (*src > env->pattern) /* /].../ is allowed. */\r | |
b602265d | 5637 | CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r |
14b0e578 CS |
5638 | break;\r |
5639 | \r | |
5640 | case '#':\r | |
b602265d DG |
5641 | if (IS_EXTEND(env->options)) {\r |
5642 | while (!PEND) {\r | |
5643 | PFETCH(c);\r | |
5644 | if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r | |
5645 | break;\r | |
5646 | }\r | |
5647 | goto start;\r | |
5648 | break;\r | |
14b0e578 CS |
5649 | }\r |
5650 | break;\r | |
5651 | \r | |
5652 | case ' ': case '\t': case '\n': case '\r': case '\f':\r | |
b602265d DG |
5653 | if (IS_EXTEND(env->options))\r |
5654 | goto start;\r | |
14b0e578 CS |
5655 | break;\r |
5656 | \r | |
5657 | default:\r | |
5658 | /* string */\r | |
5659 | break;\r | |
5660 | }\r | |
5661 | }\r | |
5662 | \r | |
5663 | #ifdef USE_VARIABLE_META_CHARS\r | |
5664 | out:\r | |
5665 | #endif\r | |
5666 | *src = p;\r | |
5667 | return tok->type;\r | |
5668 | }\r | |
5669 | \r | |
5670 | static int\r | |
5671 | add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r | |
b602265d DG |
5672 | OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,\r |
5673 | const OnigCodePoint mbr[])\r | |
14b0e578 CS |
5674 | {\r |
5675 | int i, r;\r | |
5676 | OnigCodePoint j;\r | |
5677 | \r | |
5678 | int n = ONIGENC_CODE_RANGE_NUM(mbr);\r | |
5679 | \r | |
5680 | if (not == 0) {\r | |
5681 | for (i = 0; i < n; i++) {\r | |
5682 | for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5683 | j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r | |
b602265d DG |
5684 | if (j >= sb_out) {\r |
5685 | if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
5686 | r = add_code_range_to_buf(&(cc->mbuf), j,\r | |
5687 | ONIGENC_CODE_RANGE_TO(mbr, i));\r | |
5688 | if (r != 0) return r;\r | |
5689 | i++;\r | |
5690 | }\r | |
5691 | \r | |
5692 | goto sb_end;\r | |
5693 | }\r | |
14b0e578 CS |
5694 | BITSET_SET_BIT(cc->bs, j);\r |
5695 | }\r | |
5696 | }\r | |
5697 | \r | |
5698 | sb_end:\r | |
5699 | for ( ; i < n; i++) {\r | |
5700 | r = add_code_range_to_buf(&(cc->mbuf),\r | |
5701 | ONIGENC_CODE_RANGE_FROM(mbr, i),\r | |
5702 | ONIGENC_CODE_RANGE_TO(mbr, i));\r | |
5703 | if (r != 0) return r;\r | |
5704 | }\r | |
5705 | }\r | |
5706 | else {\r | |
5707 | OnigCodePoint prev = 0;\r | |
5708 | \r | |
5709 | for (i = 0; i < n; i++) {\r | |
b602265d DG |
5710 | for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r |
5711 | if (j >= sb_out) {\r | |
5712 | goto sb_end2;\r | |
5713 | }\r | |
5714 | BITSET_SET_BIT(cc->bs, j);\r | |
14b0e578 CS |
5715 | }\r |
5716 | prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r | |
5717 | }\r | |
5718 | for (j = prev; j < sb_out; j++) {\r | |
5719 | BITSET_SET_BIT(cc->bs, j);\r | |
5720 | }\r | |
5721 | \r | |
5722 | sb_end2:\r | |
5723 | prev = sb_out;\r | |
5724 | \r | |
5725 | for (i = 0; i < n; i++) {\r | |
5726 | if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
b602265d | 5727 | r = add_code_range_to_buf(&(cc->mbuf), prev,\r |
14b0e578 | 5728 | ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r |
b602265d | 5729 | if (r != 0) return r;\r |
14b0e578 CS |
5730 | }\r |
5731 | prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r | |
b602265d DG |
5732 | if (prev == 0) goto end;\r |
5733 | }\r | |
5734 | \r | |
5735 | r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r | |
5736 | if (r != 0) return r;\r | |
5737 | }\r | |
5738 | \r | |
5739 | end:\r | |
5740 | return 0;\r | |
5741 | }\r | |
5742 | \r | |
5743 | static int\r | |
5744 | add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,\r | |
5745 | OnigEncoding enc ARG_UNUSED,\r | |
5746 | OnigCodePoint sb_out,\r | |
5747 | const OnigCodePoint mbr[], OnigCodePoint limit)\r | |
5748 | {\r | |
5749 | int i, r;\r | |
5750 | OnigCodePoint j;\r | |
5751 | OnigCodePoint from;\r | |
5752 | OnigCodePoint to;\r | |
5753 | \r | |
5754 | int n = ONIGENC_CODE_RANGE_NUM(mbr);\r | |
5755 | \r | |
5756 | if (not == 0) {\r | |
5757 | for (i = 0; i < n; i++) {\r | |
5758 | for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5759 | j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r | |
5760 | if (j > limit) goto end;\r | |
5761 | if (j >= sb_out) {\r | |
5762 | if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
5763 | to = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5764 | if (to > limit) to = limit;\r | |
5765 | r = add_code_range_to_buf(&(cc->mbuf), j, to);\r | |
5766 | if (r != 0) return r;\r | |
5767 | i++;\r | |
5768 | }\r | |
5769 | \r | |
5770 | goto sb_end;\r | |
5771 | }\r | |
5772 | BITSET_SET_BIT(cc->bs, j);\r | |
5773 | }\r | |
14b0e578 | 5774 | }\r |
b602265d DG |
5775 | \r |
5776 | sb_end:\r | |
5777 | for ( ; i < n; i++) {\r | |
5778 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5779 | to = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5780 | if (from > limit) break;\r | |
5781 | if (to > limit) to = limit;\r | |
5782 | r = add_code_range_to_buf(&(cc->mbuf), from, to);\r | |
14b0e578 CS |
5783 | if (r != 0) return r;\r |
5784 | }\r | |
5785 | }\r | |
b602265d DG |
5786 | else {\r |
5787 | OnigCodePoint prev = 0;\r | |
5788 | \r | |
5789 | for (i = 0; i < n; i++) {\r | |
5790 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5791 | if (from > limit) {\r | |
5792 | for (j = prev; j < sb_out; j++) {\r | |
5793 | BITSET_SET_BIT(cc->bs, j);\r | |
5794 | }\r | |
5795 | goto sb_end2;\r | |
5796 | }\r | |
5797 | for (j = prev; j < from; j++) {\r | |
5798 | if (j >= sb_out) goto sb_end2;\r | |
5799 | BITSET_SET_BIT(cc->bs, j);\r | |
5800 | }\r | |
5801 | prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5802 | if (prev > limit) prev = limit;\r | |
5803 | prev++;\r | |
5804 | if (prev == 0) goto end;\r | |
5805 | }\r | |
5806 | for (j = prev; j < sb_out; j++) {\r | |
5807 | BITSET_SET_BIT(cc->bs, j);\r | |
5808 | }\r | |
5809 | \r | |
5810 | sb_end2:\r | |
5811 | prev = sb_out;\r | |
5812 | \r | |
5813 | for (i = 0; i < n; i++) {\r | |
5814 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5815 | if (from > limit) goto last;\r | |
5816 | \r | |
5817 | if (prev < from) {\r | |
5818 | r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);\r | |
5819 | if (r != 0) return r;\r | |
5820 | }\r | |
5821 | prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5822 | if (prev > limit) prev = limit;\r | |
5823 | prev++;\r | |
5824 | if (prev == 0) goto end;\r | |
5825 | }\r | |
5826 | \r | |
5827 | last:\r | |
5828 | r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r | |
5829 | if (r != 0) return r;\r | |
5830 | }\r | |
14b0e578 | 5831 | \r |
b602265d | 5832 | end:\r |
14b0e578 CS |
5833 | return 0;\r |
5834 | }\r | |
5835 | \r | |
5836 | static int\r | |
5837 | add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r | |
5838 | {\r | |
b602265d DG |
5839 | #define ASCII_LIMIT 127\r |
5840 | \r | |
14b0e578 | 5841 | int c, r;\r |
b602265d | 5842 | int ascii_mode;\r |
14b0e578 | 5843 | const OnigCodePoint *ranges;\r |
b602265d | 5844 | OnigCodePoint limit;\r |
14b0e578 CS |
5845 | OnigCodePoint sb_out;\r |
5846 | OnigEncoding enc = env->enc;\r | |
5847 | \r | |
b602265d DG |
5848 | ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);\r |
5849 | \r | |
14b0e578 CS |
5850 | r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r |
5851 | if (r == 0) {\r | |
b602265d DG |
5852 | if (ascii_mode == 0)\r |
5853 | r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r | |
5854 | else\r | |
5855 | r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,\r | |
5856 | ranges, ASCII_LIMIT);\r | |
5857 | return r;\r | |
14b0e578 CS |
5858 | }\r |
5859 | else if (r != ONIG_NO_SUPPORT_CONFIG) {\r | |
5860 | return r;\r | |
5861 | }\r | |
5862 | \r | |
5863 | r = 0;\r | |
b602265d DG |
5864 | limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;\r |
5865 | \r | |
14b0e578 CS |
5866 | switch (ctype) {\r |
5867 | case ONIGENC_CTYPE_ALPHA:\r | |
5868 | case ONIGENC_CTYPE_BLANK:\r | |
5869 | case ONIGENC_CTYPE_CNTRL:\r | |
5870 | case ONIGENC_CTYPE_DIGIT:\r | |
5871 | case ONIGENC_CTYPE_LOWER:\r | |
5872 | case ONIGENC_CTYPE_PUNCT:\r | |
5873 | case ONIGENC_CTYPE_SPACE:\r | |
5874 | case ONIGENC_CTYPE_UPPER:\r | |
5875 | case ONIGENC_CTYPE_XDIGIT:\r | |
5876 | case ONIGENC_CTYPE_ASCII:\r | |
5877 | case ONIGENC_CTYPE_ALNUM:\r | |
5878 | if (not != 0) {\r | |
b602265d DG |
5879 | for (c = 0; c < (int )limit; c++) {\r |
5880 | if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5881 | BITSET_SET_BIT(cc->bs, c);\r | |
5882 | }\r | |
5883 | for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r | |
5884 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5885 | }\r |
b602265d | 5886 | \r |
14b0e578 CS |
5887 | ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r |
5888 | }\r | |
5889 | else {\r | |
b602265d DG |
5890 | for (c = 0; c < (int )limit; c++) {\r |
5891 | if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5892 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 CS |
5893 | }\r |
5894 | }\r | |
5895 | break;\r | |
5896 | \r | |
5897 | case ONIGENC_CTYPE_GRAPH:\r | |
5898 | case ONIGENC_CTYPE_PRINT:\r | |
b602265d | 5899 | case ONIGENC_CTYPE_WORD:\r |
14b0e578 | 5900 | if (not != 0) {\r |
b602265d DG |
5901 | for (c = 0; c < (int )limit; c++) {\r |
5902 | if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */\r | |
5903 | && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5904 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5905 | }\r |
b602265d DG |
5906 | for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r |
5907 | if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)\r | |
5908 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5909 | }\r |
14b0e578 CS |
5910 | }\r |
5911 | else {\r | |
b602265d DG |
5912 | for (c = 0; c < (int )limit; c++) {\r |
5913 | if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5914 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5915 | }\r |
b602265d DG |
5916 | if (ascii_mode == 0)\r |
5917 | ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r | |
14b0e578 CS |
5918 | }\r |
5919 | break;\r | |
5920 | \r | |
5921 | default:\r | |
5922 | return ONIGERR_PARSER_BUG;\r | |
5923 | break;\r | |
5924 | }\r | |
5925 | \r | |
5926 | return r;\r | |
5927 | }\r | |
5928 | \r | |
5929 | static int\r | |
5930 | parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r | |
5931 | {\r | |
5932 | #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r | |
5933 | #define POSIX_BRACKET_NAME_MIN_LEN 4\r | |
5934 | \r | |
5935 | static PosixBracketEntryType PBS[] = {\r | |
5936 | { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r | |
5937 | { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r | |
5938 | { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r | |
5939 | { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r | |
5940 | { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r | |
5941 | { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r | |
5942 | { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r | |
5943 | { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r | |
5944 | { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r | |
5945 | { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r | |
5946 | { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r | |
5947 | { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r | |
5948 | { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r | |
5949 | { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r | |
5950 | { (UChar* )NULL, -1, 0 }\r | |
5951 | };\r | |
5952 | \r | |
5953 | PosixBracketEntryType *pb;\r | |
5954 | int not, i, r;\r | |
5955 | OnigCodePoint c;\r | |
5956 | OnigEncoding enc = env->enc;\r | |
5957 | UChar *p = *src;\r | |
5958 | \r | |
5959 | if (PPEEK_IS('^')) {\r | |
5960 | PINC_S;\r | |
5961 | not = 1;\r | |
5962 | }\r | |
5963 | else\r | |
5964 | not = 0;\r | |
5965 | \r | |
5966 | if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r | |
5967 | goto not_posix_bracket;\r | |
5968 | \r | |
5969 | for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r | |
5970 | if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r | |
5971 | p = (UChar* )onigenc_step(enc, p, end, pb->len);\r | |
5972 | if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r | |
5973 | return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r | |
5974 | \r | |
5975 | r = add_ctype_to_cc(cc, pb->ctype, not, env);\r | |
5976 | if (r != 0) return r;\r | |
5977 | \r | |
5978 | PINC_S; PINC_S;\r | |
5979 | *src = p;\r | |
5980 | return 0;\r | |
5981 | }\r | |
5982 | }\r | |
5983 | \r | |
5984 | not_posix_bracket:\r | |
5985 | c = 0;\r | |
5986 | i = 0;\r | |
5987 | while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r | |
5988 | PINC_S;\r | |
5989 | if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r | |
5990 | }\r | |
5991 | if (c == ':' && ! PEND) {\r | |
5992 | PINC_S;\r | |
5993 | if (! PEND) {\r | |
5994 | PFETCH_S(c);\r | |
5995 | if (c == ']')\r | |
5996 | return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r | |
5997 | }\r | |
5998 | }\r | |
5999 | \r | |
6000 | return 1; /* 1: is not POSIX bracket, but no error. */\r | |
6001 | }\r | |
6002 | \r | |
6003 | static int\r | |
6004 | fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r | |
6005 | {\r | |
6006 | int r;\r | |
6007 | OnigCodePoint c;\r | |
6008 | OnigEncoding enc = env->enc;\r | |
6009 | UChar *prev, *start, *p = *src;\r | |
6010 | \r | |
6011 | r = 0;\r | |
6012 | start = prev = p;\r | |
6013 | \r | |
6014 | while (!PEND) {\r | |
6015 | prev = p;\r | |
6016 | PFETCH_S(c);\r | |
6017 | if (c == '}') {\r | |
6018 | r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r | |
6019 | if (r < 0) break;\r | |
6020 | \r | |
6021 | *src = p;\r | |
6022 | return r;\r | |
6023 | }\r | |
6024 | else if (c == '(' || c == ')' || c == '{' || c == '|') {\r | |
6025 | r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r | |
6026 | break;\r | |
6027 | }\r | |
6028 | }\r | |
6029 | \r | |
6030 | onig_scan_env_set_error_string(env, r, *src, prev);\r | |
6031 | return r;\r | |
6032 | }\r | |
6033 | \r | |
6034 | static int\r | |
b602265d | 6035 | parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r |
14b0e578 CS |
6036 | {\r |
6037 | int r, ctype;\r | |
6038 | CClassNode* cc;\r | |
6039 | \r | |
6040 | ctype = fetch_char_property_to_ctype(src, end, env);\r | |
6041 | if (ctype < 0) return ctype;\r | |
6042 | \r | |
6043 | *np = node_new_cclass();\r | |
6044 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
b602265d | 6045 | cc = CCLASS_(*np);\r |
14b0e578 CS |
6046 | r = add_ctype_to_cc(cc, ctype, 0, env);\r |
6047 | if (r != 0) return r;\r | |
6048 | if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r | |
6049 | \r | |
6050 | return 0;\r | |
6051 | }\r | |
6052 | \r | |
6053 | \r | |
6054 | enum CCSTATE {\r | |
6055 | CCS_VALUE,\r | |
6056 | CCS_RANGE,\r | |
6057 | CCS_COMPLETE,\r | |
6058 | CCS_START\r | |
6059 | };\r | |
6060 | \r | |
6061 | enum CCVALTYPE {\r | |
6062 | CCV_SB,\r | |
6063 | CCV_CODE_POINT,\r | |
6064 | CCV_CLASS\r | |
6065 | };\r | |
6066 | \r | |
6067 | static int\r | |
6068 | next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r | |
b602265d | 6069 | enum CCSTATE* state, ScanEnv* env)\r |
14b0e578 CS |
6070 | {\r |
6071 | int r;\r | |
6072 | \r | |
6073 | if (*state == CCS_RANGE)\r | |
6074 | return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r | |
6075 | \r | |
6076 | if (*state == CCS_VALUE && *type != CCV_CLASS) {\r | |
6077 | if (*type == CCV_SB)\r | |
6078 | BITSET_SET_BIT(cc->bs, (int )(*vs));\r | |
6079 | else if (*type == CCV_CODE_POINT) {\r | |
6080 | r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r | |
6081 | if (r < 0) return r;\r | |
6082 | }\r | |
6083 | }\r | |
6084 | \r | |
6085 | *state = CCS_VALUE;\r | |
6086 | *type = CCV_CLASS;\r | |
6087 | return 0;\r | |
6088 | }\r | |
6089 | \r | |
6090 | static int\r | |
b602265d DG |
6091 | next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,\r |
6092 | int* from_israw, int to_israw,\r | |
6093 | enum CCVALTYPE intype, enum CCVALTYPE* type,\r | |
6094 | enum CCSTATE* state, ScanEnv* env)\r | |
14b0e578 CS |
6095 | {\r |
6096 | int r;\r | |
6097 | \r | |
6098 | switch (*state) {\r | |
6099 | case CCS_VALUE:\r | |
b602265d DG |
6100 | if (*type == CCV_SB) {\r |
6101 | if (*from > 0xff)\r | |
6102 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r | |
6103 | \r | |
6104 | BITSET_SET_BIT(cc->bs, (int )(*from));\r | |
6105 | }\r | |
14b0e578 | 6106 | else if (*type == CCV_CODE_POINT) {\r |
b602265d | 6107 | r = add_code_range(&(cc->mbuf), env, *from, *from);\r |
14b0e578 CS |
6108 | if (r < 0) return r;\r |
6109 | }\r | |
6110 | break;\r | |
6111 | \r | |
6112 | case CCS_RANGE:\r | |
6113 | if (intype == *type) {\r | |
6114 | if (intype == CCV_SB) {\r | |
b602265d | 6115 | if (*from > 0xff || to > 0xff)\r |
14b0e578 CS |
6116 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r |
6117 | \r | |
b602265d DG |
6118 | if (*from > to) {\r |
6119 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
6120 | goto ccs_range_end;\r | |
6121 | else\r | |
6122 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
6123 | }\r | |
6124 | bitset_set_range(cc->bs, (int )*from, (int )to);\r | |
14b0e578 CS |
6125 | }\r |
6126 | else {\r | |
b602265d DG |
6127 | r = add_code_range(&(cc->mbuf), env, *from, to);\r |
6128 | if (r < 0) return r;\r | |
14b0e578 CS |
6129 | }\r |
6130 | }\r | |
6131 | else {\r | |
b602265d DG |
6132 | if (*from > to) {\r |
6133 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
6134 | goto ccs_range_end;\r | |
6135 | else\r | |
6136 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
14b0e578 | 6137 | }\r |
b602265d DG |
6138 | bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));\r |
6139 | r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);\r | |
6140 | if (r < 0) return r;\r | |
14b0e578 CS |
6141 | }\r |
6142 | ccs_range_end:\r | |
6143 | *state = CCS_COMPLETE;\r | |
6144 | break;\r | |
6145 | \r | |
6146 | case CCS_COMPLETE:\r | |
6147 | case CCS_START:\r | |
6148 | *state = CCS_VALUE;\r | |
6149 | break;\r | |
6150 | \r | |
6151 | default:\r | |
6152 | break;\r | |
6153 | }\r | |
6154 | \r | |
b602265d DG |
6155 | *from_israw = to_israw;\r |
6156 | *from = to;\r | |
6157 | *type = intype;\r | |
14b0e578 CS |
6158 | return 0;\r |
6159 | }\r | |
6160 | \r | |
6161 | static int\r | |
6162 | code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r | |
b602265d | 6163 | ScanEnv* env)\r |
14b0e578 CS |
6164 | {\r |
6165 | int in_esc;\r | |
6166 | OnigCodePoint code;\r | |
6167 | OnigEncoding enc = env->enc;\r | |
6168 | UChar* p = from;\r | |
6169 | \r | |
6170 | in_esc = 0;\r | |
6171 | while (! PEND) {\r | |
6172 | if (ignore_escaped && in_esc) {\r | |
6173 | in_esc = 0;\r | |
6174 | }\r | |
6175 | else {\r | |
6176 | PFETCH_S(code);\r | |
6177 | if (code == c) return 1;\r | |
6178 | if (code == MC_ESC(env->syntax)) in_esc = 1;\r | |
6179 | }\r | |
6180 | }\r | |
6181 | return 0;\r | |
6182 | }\r | |
6183 | \r | |
6184 | static int\r | |
b602265d | 6185 | parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r |
14b0e578 CS |
6186 | {\r |
6187 | int r, neg, len, fetched, and_start;\r | |
6188 | OnigCodePoint v, vs;\r | |
6189 | UChar *p;\r | |
6190 | Node* node;\r | |
6191 | CClassNode *cc, *prev_cc;\r | |
6192 | CClassNode work_cc;\r | |
6193 | \r | |
6194 | enum CCSTATE state;\r | |
6195 | enum CCVALTYPE val_type, in_type;\r | |
6196 | int val_israw, in_israw;\r | |
6197 | \r | |
14b0e578 | 6198 | *np = NULL_NODE;\r |
b602265d DG |
6199 | env->parse_depth++;\r |
6200 | if (env->parse_depth > ParseDepthLimit)\r | |
6201 | return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r | |
6202 | prev_cc = (CClassNode* )NULL;\r | |
14b0e578 CS |
6203 | r = fetch_token_in_cc(tok, src, end, env);\r |
6204 | if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r | |
6205 | neg = 1;\r | |
6206 | r = fetch_token_in_cc(tok, src, end, env);\r | |
6207 | }\r | |
6208 | else {\r | |
6209 | neg = 0;\r | |
6210 | }\r | |
6211 | \r | |
6212 | if (r < 0) return r;\r | |
6213 | if (r == TK_CC_CLOSE) {\r | |
6214 | if (! code_exist_check((OnigCodePoint )']',\r | |
6215 | *src, env->pattern_end, 1, env))\r | |
6216 | return ONIGERR_EMPTY_CHAR_CLASS;\r | |
6217 | \r | |
6218 | CC_ESC_WARN(env, (UChar* )"]");\r | |
6219 | r = tok->type = TK_CHAR; /* allow []...] */\r | |
6220 | }\r | |
6221 | \r | |
6222 | *np = node = node_new_cclass();\r | |
6223 | CHECK_NULL_RETURN_MEMERR(node);\r | |
b602265d | 6224 | cc = CCLASS_(node);\r |
14b0e578 CS |
6225 | \r |
6226 | and_start = 0;\r | |
6227 | state = CCS_START;\r | |
6228 | p = *src;\r | |
6229 | while (r != TK_CC_CLOSE) {\r | |
6230 | fetched = 0;\r | |
6231 | switch (r) {\r | |
6232 | case TK_CHAR:\r | |
b602265d | 6233 | any_char_in:\r |
14b0e578 CS |
6234 | len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r |
6235 | if (len > 1) {\r | |
b602265d | 6236 | in_type = CCV_CODE_POINT;\r |
14b0e578 CS |
6237 | }\r |
6238 | else if (len < 0) {\r | |
b602265d DG |
6239 | r = len;\r |
6240 | goto err;\r | |
14b0e578 CS |
6241 | }\r |
6242 | else {\r | |
b602265d DG |
6243 | /* sb_char: */\r |
6244 | in_type = CCV_SB;\r | |
14b0e578 CS |
6245 | }\r |
6246 | v = (OnigCodePoint )tok->u.c;\r | |
6247 | in_israw = 0;\r | |
6248 | goto val_entry2;\r | |
6249 | break;\r | |
6250 | \r | |
6251 | case TK_RAW_BYTE:\r | |
6252 | /* tok->base != 0 : octal or hexadec. */\r | |
6253 | if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r | |
b602265d DG |
6254 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r |
6255 | UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r | |
6256 | UChar* psave = p;\r | |
6257 | int i, base = tok->base;\r | |
6258 | \r | |
6259 | buf[0] = tok->u.c;\r | |
6260 | for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r | |
6261 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6262 | if (r < 0) goto err;\r | |
6263 | if (r != TK_RAW_BYTE || tok->base != base) {\r | |
6264 | fetched = 1;\r | |
6265 | break;\r | |
6266 | }\r | |
6267 | buf[i] = tok->u.c;\r | |
6268 | }\r | |
6269 | \r | |
6270 | if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r | |
6271 | r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r | |
6272 | goto err;\r | |
6273 | }\r | |
6274 | \r | |
6275 | len = enclen(env->enc, buf);\r | |
6276 | if (i < len) {\r | |
6277 | r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r | |
6278 | goto err;\r | |
6279 | }\r | |
6280 | else if (i > len) { /* fetch back */\r | |
6281 | p = psave;\r | |
6282 | for (i = 1; i < len; i++) {\r | |
6283 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6284 | }\r | |
6285 | fetched = 0;\r | |
6286 | }\r | |
6287 | \r | |
6288 | if (i == 1) {\r | |
6289 | v = (OnigCodePoint )buf[0];\r | |
6290 | goto raw_single;\r | |
6291 | }\r | |
6292 | else {\r | |
6293 | v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r | |
6294 | in_type = CCV_CODE_POINT;\r | |
6295 | }\r | |
6296 | }\r | |
6297 | else {\r | |
6298 | v = (OnigCodePoint )tok->u.c;\r | |
6299 | raw_single:\r | |
6300 | in_type = CCV_SB;\r | |
6301 | }\r | |
6302 | in_israw = 1;\r | |
6303 | goto val_entry2;\r | |
6304 | break;\r | |
6305 | \r | |
6306 | case TK_CODE_POINT:\r | |
6307 | v = tok->u.code;\r | |
6308 | in_israw = 1;\r | |
6309 | val_entry:\r | |
6310 | len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r | |
6311 | if (len < 0) {\r | |
6312 | r = len;\r | |
6313 | goto err;\r | |
6314 | }\r | |
6315 | in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r | |
6316 | val_entry2:\r | |
6317 | r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r | |
6318 | &state, env);\r | |
6319 | if (r != 0) goto err;\r | |
6320 | break;\r | |
6321 | \r | |
6322 | case TK_POSIX_BRACKET_OPEN:\r | |
6323 | r = parse_posix_bracket(cc, &p, end, env);\r | |
6324 | if (r < 0) goto err;\r | |
6325 | if (r == 1) { /* is not POSIX bracket */\r | |
6326 | CC_ESC_WARN(env, (UChar* )"[");\r | |
6327 | p = tok->backp;\r | |
6328 | v = (OnigCodePoint )tok->u.c;\r | |
6329 | in_israw = 0;\r | |
6330 | goto val_entry;\r | |
6331 | }\r | |
6332 | goto next_class;\r | |
6333 | break;\r | |
6334 | \r | |
6335 | case TK_CHAR_TYPE:\r | |
6336 | r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r | |
6337 | if (r != 0) goto err;\r | |
6338 | \r | |
6339 | next_class:\r | |
6340 | r = next_state_class(cc, &vs, &val_type, &state, env);\r | |
6341 | if (r != 0) goto err;\r | |
6342 | break;\r | |
6343 | \r | |
6344 | case TK_CHAR_PROPERTY:\r | |
6345 | {\r | |
6346 | int ctype = fetch_char_property_to_ctype(&p, end, env);\r | |
6347 | if (ctype < 0) {\r | |
6348 | r = ctype;\r | |
6349 | goto err;\r | |
6350 | }\r | |
6351 | r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r | |
6352 | if (r != 0) goto err;\r | |
6353 | goto next_class;\r | |
6354 | }\r | |
6355 | break;\r | |
6356 | \r | |
6357 | case TK_CC_RANGE:\r | |
6358 | if (state == CCS_VALUE) {\r | |
6359 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6360 | if (r < 0) goto err;\r | |
6361 | fetched = 1;\r | |
6362 | if (r == TK_CC_CLOSE) { /* allow [x-] */\r | |
6363 | range_end_val:\r | |
6364 | v = (OnigCodePoint )'-';\r | |
6365 | in_israw = 0;\r | |
6366 | goto val_entry;\r | |
6367 | }\r | |
6368 | else if (r == TK_CC_AND) {\r | |
6369 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6370 | goto range_end_val;\r | |
6371 | }\r | |
6372 | \r | |
6373 | if (val_type == CCV_CLASS) {\r | |
6374 | r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r | |
6375 | goto err;\r | |
6376 | }\r | |
6377 | \r | |
6378 | state = CCS_RANGE;\r | |
6379 | }\r | |
6380 | else if (state == CCS_START) {\r | |
6381 | /* [-xa] is allowed */\r | |
6382 | v = (OnigCodePoint )tok->u.c;\r | |
6383 | in_israw = 0;\r | |
6384 | \r | |
6385 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6386 | if (r < 0) goto err;\r | |
6387 | fetched = 1;\r | |
6388 | /* [--x] or [a&&-x] is warned. */\r | |
6389 | if (r == TK_CC_RANGE || and_start != 0)\r | |
6390 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6391 | \r | |
6392 | goto val_entry;\r | |
6393 | }\r | |
6394 | else if (state == CCS_RANGE) {\r | |
6395 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6396 | goto any_char_in; /* [!--x] is allowed */\r | |
6397 | }\r | |
6398 | else { /* CCS_COMPLETE */\r | |
6399 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6400 | if (r < 0) goto err;\r | |
6401 | fetched = 1;\r | |
6402 | if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r | |
6403 | else if (r == TK_CC_AND) {\r | |
6404 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6405 | goto range_end_val;\r | |
6406 | }\r | |
6407 | \r | |
6408 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r | |
6409 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6410 | goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */\r | |
6411 | }\r | |
6412 | r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r | |
6413 | goto err;\r | |
6414 | }\r | |
6415 | break;\r | |
6416 | \r | |
6417 | case TK_CC_CC_OPEN: /* [ */\r | |
6418 | {\r | |
6419 | Node *anode;\r | |
6420 | CClassNode* acc;\r | |
6421 | \r | |
6422 | r = parse_char_class(&anode, tok, &p, end, env);\r | |
6423 | if (r != 0) {\r | |
6424 | onig_node_free(anode);\r | |
6425 | goto cc_open_err;\r | |
6426 | }\r | |
6427 | acc = CCLASS_(anode);\r | |
6428 | r = or_cclass(cc, acc, env->enc);\r | |
6429 | onig_node_free(anode);\r | |
6430 | \r | |
6431 | cc_open_err:\r | |
6432 | if (r != 0) goto err;\r | |
6433 | }\r | |
6434 | break;\r | |
6435 | \r | |
6436 | case TK_CC_AND: /* && */\r | |
6437 | {\r | |
6438 | if (state == CCS_VALUE) {\r | |
6439 | r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r | |
6440 | &val_type, &state, env);\r | |
6441 | if (r != 0) goto err;\r | |
6442 | }\r | |
6443 | /* initialize local variables */\r | |
6444 | and_start = 1;\r | |
6445 | state = CCS_START;\r | |
6446 | \r | |
6447 | if (IS_NOT_NULL(prev_cc)) {\r | |
6448 | r = and_cclass(prev_cc, cc, env->enc);\r | |
6449 | if (r != 0) goto err;\r | |
6450 | bbuf_free(cc->mbuf);\r | |
6451 | }\r | |
6452 | else {\r | |
6453 | prev_cc = cc;\r | |
6454 | cc = &work_cc;\r | |
6455 | }\r | |
6456 | initialize_cclass(cc);\r | |
6457 | }\r | |
6458 | break;\r | |
6459 | \r | |
6460 | case TK_EOT:\r | |
6461 | r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r | |
6462 | goto err;\r | |
6463 | break;\r | |
6464 | default:\r | |
6465 | r = ONIGERR_PARSER_BUG;\r | |
6466 | goto err;\r | |
6467 | break;\r | |
6468 | }\r | |
6469 | \r | |
6470 | if (fetched)\r | |
6471 | r = tok->type;\r | |
6472 | else {\r | |
6473 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6474 | if (r < 0) goto err;\r | |
6475 | }\r | |
6476 | }\r | |
6477 | \r | |
6478 | if (state == CCS_VALUE) {\r | |
6479 | r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r | |
6480 | &val_type, &state, env);\r | |
6481 | if (r != 0) goto err;\r | |
6482 | }\r | |
6483 | \r | |
6484 | if (IS_NOT_NULL(prev_cc)) {\r | |
6485 | r = and_cclass(prev_cc, cc, env->enc);\r | |
6486 | if (r != 0) goto err;\r | |
6487 | bbuf_free(cc->mbuf);\r | |
6488 | cc = prev_cc;\r | |
6489 | }\r | |
6490 | \r | |
6491 | if (neg != 0)\r | |
6492 | NCCLASS_SET_NOT(cc);\r | |
6493 | else\r | |
6494 | NCCLASS_CLEAR_NOT(cc);\r | |
6495 | if (IS_NCCLASS_NOT(cc) &&\r | |
6496 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r | |
6497 | int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r | |
6498 | if (is_empty != 0)\r | |
6499 | BITSET_IS_EMPTY(cc->bs, is_empty);\r | |
6500 | \r | |
6501 | if (is_empty == 0) {\r | |
6502 | #define NEWLINE_CODE 0x0a\r | |
6503 | \r | |
6504 | if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r | |
6505 | if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r | |
6506 | BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r | |
6507 | else\r | |
6508 | add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r | |
6509 | }\r | |
6510 | }\r | |
6511 | }\r | |
6512 | *src = p;\r | |
6513 | env->parse_depth--;\r | |
6514 | return 0;\r | |
6515 | \r | |
6516 | err:\r | |
6517 | if (cc != CCLASS_(*np))\r | |
6518 | bbuf_free(cc->mbuf);\r | |
6519 | return r;\r | |
6520 | }\r | |
6521 | \r | |
6522 | static int parse_subexp(Node** top, OnigToken* tok, int term,\r | |
6523 | UChar** src, UChar* end, ScanEnv* env);\r | |
6524 | \r | |
6525 | #ifdef USE_CALLOUT\r | |
6526 | \r | |
6527 | /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */\r | |
6528 | static int\r | |
6529 | parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r | |
6530 | {\r | |
6531 | int r;\r | |
6532 | int i;\r | |
6533 | int in;\r | |
6534 | int num;\r | |
6535 | OnigCodePoint c;\r | |
6536 | UChar* code_start;\r | |
6537 | UChar* code_end;\r | |
6538 | UChar* contents;\r | |
6539 | UChar* tag_start;\r | |
6540 | UChar* tag_end;\r | |
6541 | int brace_nest;\r | |
6542 | CalloutListEntry* e;\r | |
6543 | RegexExt* ext;\r | |
6544 | OnigEncoding enc = env->enc;\r | |
6545 | UChar* p = *src;\r | |
6546 | \r | |
6547 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6548 | \r | |
6549 | brace_nest = 0;\r | |
6550 | while (PPEEK_IS('{')) {\r | |
6551 | brace_nest++;\r | |
6552 | PINC_S;\r | |
6553 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6554 | }\r | |
6555 | \r | |
6556 | in = ONIG_CALLOUT_IN_PROGRESS;\r | |
6557 | code_start = p;\r | |
6558 | while (1) {\r | |
6559 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6560 | \r | |
6561 | code_end = p;\r | |
6562 | PFETCH_S(c);\r | |
6563 | if (c == '}') {\r | |
6564 | i = brace_nest;\r | |
6565 | while (i > 0) {\r | |
6566 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6567 | PFETCH_S(c);\r | |
6568 | if (c == '}') i--;\r | |
6569 | else break;\r | |
6570 | }\r | |
6571 | if (i == 0) break;\r | |
6572 | }\r | |
6573 | }\r | |
6574 | \r | |
6575 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6576 | \r | |
6577 | PFETCH_S(c);\r | |
6578 | if (c == '[') {\r | |
6579 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6580 | tag_start = p;\r | |
6581 | while (! PEND) {\r | |
6582 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6583 | tag_end = p;\r | |
6584 | PFETCH_S(c);\r | |
6585 | if (c == ']') break;\r | |
6586 | }\r | |
6587 | if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r | |
6588 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
6589 | \r | |
6590 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6591 | PFETCH_S(c);\r | |
6592 | }\r | |
6593 | else {\r | |
6594 | tag_start = tag_end = 0;\r | |
6595 | }\r | |
6596 | \r | |
6597 | if (c == 'X') {\r | |
6598 | in |= ONIG_CALLOUT_IN_RETRACTION;\r | |
6599 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6600 | PFETCH_S(c);\r | |
6601 | }\r | |
6602 | else if (c == '<') {\r | |
6603 | in = ONIG_CALLOUT_IN_RETRACTION;\r | |
6604 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6605 | PFETCH_S(c);\r | |
6606 | }\r | |
6607 | else if (c == '>') { /* no needs (default) */\r | |
6608 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6609 | PFETCH_S(c);\r | |
6610 | }\r | |
6611 | \r | |
6612 | if (c != cterm)\r | |
6613 | return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6614 | \r | |
6615 | r = reg_callout_list_entry(env, &num);\r | |
6616 | if (r != 0) return r;\r | |
6617 | \r | |
6618 | ext = onig_get_regex_ext(env->reg);\r | |
6619 | if (IS_NULL(ext->pattern)) {\r | |
6620 | r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r | |
6621 | if (r != ONIG_NORMAL) return r;\r | |
6622 | }\r | |
6623 | \r | |
6624 | if (tag_start != tag_end) {\r | |
6625 | r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r | |
6626 | if (r != ONIG_NORMAL) return r;\r | |
6627 | }\r | |
6628 | \r | |
6629 | contents = onigenc_strdup(enc, code_start, code_end);\r | |
6630 | CHECK_NULL_RETURN_MEMERR(contents);\r | |
6631 | \r | |
6632 | r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);\r | |
6633 | if (r != 0) {\r | |
6634 | xfree(contents);\r | |
6635 | return r;\r | |
6636 | }\r | |
6637 | \r | |
6638 | e = onig_reg_callout_list_at(env->reg, num);\r | |
a5def177 DG |
6639 | if (IS_NULL(e)) {\r |
6640 | xfree(contents);\r | |
6641 | return ONIGERR_MEMORY;\r | |
6642 | }\r | |
6643 | \r | |
b602265d DG |
6644 | e->of = ONIG_CALLOUT_OF_CONTENTS;\r |
6645 | e->in = in;\r | |
6646 | e->name_id = ONIG_NON_NAME_ID;\r | |
6647 | e->u.content.start = contents;\r | |
6648 | e->u.content.end = contents + (code_end - code_start);\r | |
6649 | \r | |
6650 | *src = p;\r | |
6651 | return 0;\r | |
6652 | }\r | |
6653 | \r | |
6654 | static long\r | |
6655 | parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)\r | |
6656 | {\r | |
6657 | long v;\r | |
6658 | long d;\r | |
6659 | int flag;\r | |
6660 | UChar* p;\r | |
6661 | OnigCodePoint c;\r | |
6662 | \r | |
6663 | if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6664 | \r | |
6665 | flag = 1;\r | |
6666 | v = 0;\r | |
6667 | p = s;\r | |
6668 | while (p < end) {\r | |
6669 | c = ONIGENC_MBC_TO_CODE(enc, p, end);\r | |
6670 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
6671 | if (c >= '0' && c <= '9') {\r | |
6672 | d = (long )(c - '0');\r | |
6673 | if (v > (max - d) / 10)\r | |
6674 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6675 | \r | |
6676 | v = v * 10 + d;\r | |
6677 | }\r | |
6678 | else if (sign_on != 0 && (c == '-' || c == '+')) {\r | |
6679 | if (c == '-') flag = -1;\r | |
6680 | }\r | |
6681 | else\r | |
6682 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6683 | \r | |
6684 | sign_on = 0;\r | |
6685 | }\r | |
6686 | \r | |
6687 | *rl = flag * v;\r | |
6688 | return ONIG_NORMAL;\r | |
6689 | }\r | |
6690 | \r | |
6691 | static int\r | |
6692 | parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,\r | |
6693 | unsigned int types[], OnigValue vals[], ScanEnv* env)\r | |
6694 | {\r | |
6695 | #define MAX_CALLOUT_ARG_BYTE_LENGTH 128\r | |
6696 | \r | |
6697 | int r;\r | |
6698 | int n;\r | |
6699 | int esc;\r | |
6700 | int cn;\r | |
6701 | UChar* s;\r | |
6702 | UChar* e;\r | |
6703 | UChar* eesc;\r | |
6704 | OnigCodePoint c;\r | |
6705 | UChar* bufend;\r | |
6706 | UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];\r | |
6707 | OnigEncoding enc = env->enc;\r | |
6708 | UChar* p = *src;\r | |
6709 | \r | |
6710 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6711 | \r | |
6712 | n = 0;\r | |
6713 | while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {\r | |
6714 | c = 0;\r | |
6715 | cn = 0;\r | |
6716 | esc = 0;\r | |
6717 | eesc = 0;\r | |
6718 | bufend = buf;\r | |
6719 | s = e = p;\r | |
6720 | while (1) {\r | |
6721 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6722 | \r | |
6723 | e = p;\r | |
6724 | PFETCH_S(c);\r | |
6725 | if (esc != 0) {\r | |
6726 | esc = 0;\r | |
6727 | if (c == '\\' || c == cterm || c == ',') {\r | |
6728 | /* */\r | |
6729 | }\r | |
6730 | else {\r | |
6731 | e = eesc;\r | |
6732 | cn++;\r | |
6733 | }\r | |
6734 | goto add_char;\r | |
14b0e578 CS |
6735 | }\r |
6736 | else {\r | |
b602265d DG |
6737 | if (c == '\\') {\r |
6738 | esc = 1;\r | |
6739 | eesc = e;\r | |
6740 | }\r | |
6741 | else if (c == cterm || c == ',')\r | |
6742 | break;\r | |
6743 | else {\r | |
6744 | size_t clen;\r | |
14b0e578 | 6745 | \r |
b602265d DG |
6746 | add_char:\r |
6747 | if (skip_mode == 0) {\r | |
6748 | clen = p - e;\r | |
6749 | if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)\r | |
6750 | return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */\r | |
14b0e578 | 6751 | \r |
b602265d DG |
6752 | xmemcpy(bufend, e, clen);\r |
6753 | bufend += clen;\r | |
6754 | }\r | |
6755 | cn++;\r | |
6756 | }\r | |
14b0e578 | 6757 | }\r |
b602265d | 6758 | }\r |
14b0e578 | 6759 | \r |
b602265d DG |
6760 | if (cn != 0) {\r |
6761 | if (skip_mode == 0) {\r | |
6762 | if ((types[n] & ONIG_TYPE_LONG) != 0) {\r | |
6763 | int fixed = 0;\r | |
6764 | if (cn > 0) {\r | |
6765 | long rl;\r | |
6766 | r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);\r | |
6767 | if (r == ONIG_NORMAL) {\r | |
6768 | vals[n].l = rl;\r | |
6769 | fixed = 1;\r | |
6770 | types[n] = ONIG_TYPE_LONG;\r | |
6771 | }\r | |
6772 | }\r | |
14b0e578 | 6773 | \r |
b602265d DG |
6774 | if (fixed == 0) {\r |
6775 | types[n] = (types[n] & ~ONIG_TYPE_LONG);\r | |
6776 | if (types[n] == ONIG_TYPE_VOID)\r | |
6777 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6778 | }\r | |
6779 | }\r | |
14b0e578 | 6780 | \r |
b602265d DG |
6781 | switch (types[n]) {\r |
6782 | case ONIG_TYPE_LONG:\r | |
6783 | break;\r | |
14b0e578 | 6784 | \r |
b602265d DG |
6785 | case ONIG_TYPE_CHAR:\r |
6786 | if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6787 | vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);\r | |
6788 | break;\r | |
14b0e578 | 6789 | \r |
b602265d DG |
6790 | case ONIG_TYPE_STRING:\r |
6791 | {\r | |
6792 | UChar* rs = onigenc_strdup(enc, buf, bufend);\r | |
6793 | CHECK_NULL_RETURN_MEMERR(rs);\r | |
6794 | vals[n].s.start = rs;\r | |
6795 | vals[n].s.end = rs + (e - s);\r | |
6796 | }\r | |
6797 | break;\r | |
14b0e578 | 6798 | \r |
b602265d DG |
6799 | case ONIG_TYPE_TAG:\r |
6800 | if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))\r | |
6801 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 6802 | \r |
b602265d DG |
6803 | vals[n].s.start = s;\r |
6804 | vals[n].s.end = e;\r | |
6805 | break;\r | |
6806 | \r | |
6807 | case ONIG_TYPE_VOID:\r | |
6808 | case ONIG_TYPE_POINTER:\r | |
6809 | return ONIGERR_PARSER_BUG;\r | |
6810 | break;\r | |
6811 | }\r | |
14b0e578 | 6812 | }\r |
14b0e578 | 6813 | \r |
b602265d DG |
6814 | n++;\r |
6815 | }\r | |
14b0e578 | 6816 | \r |
b602265d DG |
6817 | if (c == cterm) break;\r |
6818 | }\r | |
14b0e578 | 6819 | \r |
b602265d | 6820 | if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;\r |
14b0e578 | 6821 | \r |
b602265d DG |
6822 | *src = p;\r |
6823 | return n;\r | |
6824 | }\r | |
14b0e578 | 6825 | \r |
b602265d DG |
6826 | /* (*name[TAG]) (*name[TAG]{a,b,..}) */\r |
6827 | static int\r | |
6828 | parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r | |
6829 | {\r | |
6830 | int r;\r | |
6831 | int i;\r | |
6832 | int in;\r | |
6833 | int num;\r | |
6834 | int name_id;\r | |
6835 | int arg_num;\r | |
6836 | int max_arg_num;\r | |
6837 | int opt_arg_num;\r | |
6838 | int is_not_single;\r | |
6839 | OnigCodePoint c;\r | |
6840 | UChar* name_start;\r | |
6841 | UChar* name_end;\r | |
6842 | UChar* tag_start;\r | |
6843 | UChar* tag_end;\r | |
6844 | Node* node;\r | |
6845 | CalloutListEntry* e;\r | |
6846 | RegexExt* ext;\r | |
6847 | unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
6848 | OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
6849 | OnigEncoding enc = env->enc;\r | |
6850 | UChar* p = *src;\r | |
14b0e578 | 6851 | \r |
b602265d DG |
6852 | /* PFETCH_READY; */\r |
6853 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6854 | \r | |
6855 | node = 0;\r | |
6856 | name_start = p;\r | |
6857 | while (1) {\r | |
6858 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6859 | name_end = p;\r | |
6860 | PFETCH_S(c);\r | |
6861 | if (c == cterm || c == '[' || c == '{') break;\r | |
6862 | }\r | |
6863 | \r | |
6864 | if (! is_allowed_callout_name(enc, name_start, name_end))\r | |
6865 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
6866 | \r | |
6867 | if (c == '[') {\r | |
6868 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6869 | tag_start = p;\r | |
6870 | while (! PEND) {\r | |
6871 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6872 | tag_end = p;\r | |
6873 | PFETCH_S(c);\r | |
6874 | if (c == ']') break;\r | |
14b0e578 | 6875 | }\r |
b602265d DG |
6876 | if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r |
6877 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
6878 | \r | |
6879 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6880 | PFETCH_S(c);\r | |
6881 | }\r | |
6882 | else {\r | |
6883 | tag_start = tag_end = 0;\r | |
14b0e578 CS |
6884 | }\r |
6885 | \r | |
b602265d DG |
6886 | if (c == '{') {\r |
6887 | UChar* save;\r | |
6888 | \r | |
6889 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6890 | \r | |
6891 | /* read for single check only */\r | |
6892 | save = p;\r | |
6893 | arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);\r | |
6894 | if (arg_num < 0) return arg_num;\r | |
6895 | \r | |
6896 | is_not_single = PPEEK_IS(cterm) ? 0 : 1;\r | |
6897 | p = save;\r | |
6898 | r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r | |
6899 | &name_id);\r | |
6900 | if (r != ONIG_NORMAL) return r;\r | |
6901 | \r | |
6902 | max_arg_num = get_callout_arg_num_by_name_id(name_id);\r | |
6903 | for (i = 0; i < max_arg_num; i++) {\r | |
6904 | types[i] = get_callout_arg_type_by_name_id(name_id, i);\r | |
6905 | }\r | |
6906 | \r | |
6907 | arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);\r | |
6908 | if (arg_num < 0) return arg_num;\r | |
6909 | \r | |
6910 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6911 | PFETCH_S(c);\r | |
14b0e578 | 6912 | }\r |
b602265d DG |
6913 | else {\r |
6914 | arg_num = 0;\r | |
14b0e578 | 6915 | \r |
b602265d DG |
6916 | is_not_single = 0;\r |
6917 | r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r | |
6918 | &name_id);\r | |
6919 | if (r != ONIG_NORMAL) return r;\r | |
6920 | \r | |
6921 | max_arg_num = get_callout_arg_num_by_name_id(name_id);\r | |
6922 | for (i = 0; i < max_arg_num; i++) {\r | |
6923 | types[i] = get_callout_arg_type_by_name_id(name_id, i);\r | |
6924 | }\r | |
14b0e578 CS |
6925 | }\r |
6926 | \r | |
b602265d DG |
6927 | in = onig_get_callout_in_by_name_id(name_id);\r |
6928 | opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);\r | |
6929 | if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))\r | |
6930 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 6931 | \r |
b602265d DG |
6932 | if (c != cterm)\r |
6933 | return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
14b0e578 | 6934 | \r |
b602265d DG |
6935 | r = reg_callout_list_entry(env, &num);\r |
6936 | if (r != 0) return r;\r | |
14b0e578 | 6937 | \r |
b602265d DG |
6938 | ext = onig_get_regex_ext(env->reg);\r |
6939 | if (IS_NULL(ext->pattern)) {\r | |
6940 | r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r | |
6941 | if (r != ONIG_NORMAL) return r;\r | |
6942 | }\r | |
6943 | \r | |
6944 | if (tag_start != tag_end) {\r | |
6945 | r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r | |
6946 | if (r != ONIG_NORMAL) return r;\r | |
6947 | }\r | |
6948 | \r | |
6949 | r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);\r | |
6950 | if (r != ONIG_NORMAL) return r;\r | |
6951 | \r | |
6952 | e = onig_reg_callout_list_at(env->reg, num);\r | |
a5def177 DG |
6953 | CHECK_NULL_RETURN_MEMERR(e);\r |
6954 | \r | |
b602265d DG |
6955 | e->of = ONIG_CALLOUT_OF_NAME;\r |
6956 | e->in = in;\r | |
6957 | e->name_id = name_id;\r | |
6958 | e->type = onig_get_callout_type_by_name_id(name_id);\r | |
6959 | e->start_func = onig_get_callout_start_func_by_name_id(name_id);\r | |
6960 | e->end_func = onig_get_callout_end_func_by_name_id(name_id);\r | |
6961 | e->u.arg.num = max_arg_num;\r | |
6962 | e->u.arg.passed_num = arg_num;\r | |
6963 | for (i = 0; i < max_arg_num; i++) {\r | |
6964 | e->u.arg.types[i] = types[i];\r | |
6965 | if (i < arg_num)\r | |
6966 | e->u.arg.vals[i] = vals[i];\r | |
6967 | else\r | |
6968 | e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);\r | |
14b0e578 | 6969 | }\r |
b602265d DG |
6970 | \r |
6971 | *np = node;\r | |
14b0e578 CS |
6972 | *src = p;\r |
6973 | return 0;\r | |
14b0e578 | 6974 | }\r |
b602265d | 6975 | #endif\r |
14b0e578 CS |
6976 | \r |
6977 | static int\r | |
b602265d DG |
6978 | parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r |
6979 | ScanEnv* env)\r | |
14b0e578 CS |
6980 | {\r |
6981 | int r, num;\r | |
6982 | Node *target;\r | |
6983 | OnigOptionType option;\r | |
6984 | OnigCodePoint c;\r | |
b602265d | 6985 | int list_capture;\r |
14b0e578 CS |
6986 | OnigEncoding enc = env->enc;\r |
6987 | \r | |
b602265d DG |
6988 | UChar* p = *src;\r |
6989 | PFETCH_READY;\r | |
6990 | \r | |
6991 | *np = NULL;\r | |
6992 | if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
6993 | \r | |
6994 | option = env->options;\r | |
6995 | c = PPEEK;\r | |
6996 | if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r | |
6997 | PINC;\r | |
6998 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6999 | \r | |
7000 | PFETCH(c);\r | |
7001 | switch (c) {\r | |
7002 | case ':': /* (?:...) grouping only */\r | |
7003 | group:\r | |
7004 | r = fetch_token(tok, &p, end, env);\r | |
7005 | if (r < 0) return r;\r | |
7006 | r = parse_subexp(np, tok, term, &p, end, env);\r | |
7007 | if (r < 0) return r;\r | |
7008 | *src = p;\r | |
7009 | return 1; /* group */\r | |
7010 | break;\r | |
7011 | \r | |
7012 | case '=':\r | |
7013 | *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);\r | |
7014 | break;\r | |
7015 | case '!': /* preceding read */\r | |
7016 | *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);\r | |
7017 | break;\r | |
7018 | case '>': /* (?>...) stop backtrack */\r | |
7019 | *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
7020 | break;\r | |
7021 | \r | |
7022 | case '\'':\r | |
7023 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r | |
7024 | goto named_group1;\r | |
7025 | }\r | |
7026 | else\r | |
7027 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7028 | break;\r | |
7029 | \r | |
7030 | case '<': /* look behind (?<=...), (?<!...) */\r | |
7031 | if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
7032 | PFETCH(c);\r | |
7033 | if (c == '=')\r | |
7034 | *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);\r | |
7035 | else if (c == '!')\r | |
7036 | *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);\r | |
7037 | else {\r | |
7038 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r | |
7039 | UChar *name;\r | |
7040 | UChar *name_end;\r | |
7041 | enum REF_NUM num_type;\r | |
7042 | \r | |
7043 | PUNFETCH;\r | |
7044 | c = '<';\r | |
7045 | \r | |
7046 | named_group1:\r | |
7047 | list_capture = 0;\r | |
7048 | \r | |
7049 | named_group2:\r | |
7050 | name = p;\r | |
7051 | r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,\r | |
7052 | &num_type, 0);\r | |
7053 | if (r < 0) return r;\r | |
7054 | \r | |
7055 | num = scan_env_add_mem_entry(env);\r | |
7056 | if (num < 0) return num;\r | |
7057 | if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)\r | |
7058 | return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r | |
7059 | \r | |
7060 | r = name_add(env->reg, name, name_end, num, env);\r | |
7061 | if (r != 0) return r;\r | |
7062 | *np = node_new_memory(1);\r | |
7063 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7064 | ENCLOSURE_(*np)->m.regnum = num;\r | |
7065 | if (list_capture != 0)\r | |
7066 | MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r | |
7067 | env->num_named++;\r | |
7068 | }\r | |
7069 | else {\r | |
7070 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7071 | }\r | |
7072 | }\r | |
7073 | break;\r | |
7074 | \r | |
7075 | case '~':\r | |
7076 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {\r | |
7077 | Node* absent;\r | |
7078 | Node* expr;\r | |
7079 | int head_bar;\r | |
7080 | int is_range_cutter;\r | |
7081 | \r | |
7082 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7083 | \r | |
7084 | if (PPEEK_IS('|')) { /* (?~|generator|absent) */\r | |
7085 | PINC;\r | |
7086 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7087 | \r | |
7088 | head_bar = 1;\r | |
7089 | if (PPEEK_IS(')')) { /* (?~|) : range clear */\r | |
7090 | PINC;\r | |
7091 | r = make_range_clear(np, env);\r | |
7092 | if (r != 0) return r;\r | |
7093 | goto end;\r | |
7094 | }\r | |
7095 | }\r | |
7096 | else\r | |
7097 | head_bar = 0;\r | |
7098 | \r | |
7099 | r = fetch_token(tok, &p, end, env);\r | |
7100 | if (r < 0) return r;\r | |
7101 | r = parse_subexp(&absent, tok, term, &p, end, env);\r | |
7102 | if (r < 0) {\r | |
7103 | onig_node_free(absent);\r | |
7104 | return r;\r | |
7105 | }\r | |
7106 | \r | |
7107 | expr = NULL_NODE;\r | |
7108 | is_range_cutter = 0;\r | |
7109 | if (head_bar != 0) {\r | |
7110 | Node* top = absent;\r | |
7111 | if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {\r | |
7112 | expr = NULL_NODE;\r | |
7113 | is_range_cutter = 1;\r | |
7114 | /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */\r | |
7115 | }\r | |
7116 | else {\r | |
7117 | absent = NODE_CAR(top);\r | |
7118 | expr = NODE_CDR(top);\r | |
7119 | NODE_CAR(top) = NULL_NODE;\r | |
7120 | NODE_CDR(top) = NULL_NODE;\r | |
7121 | onig_node_free(top);\r | |
7122 | if (IS_NULL(NODE_CDR(expr))) {\r | |
7123 | top = expr;\r | |
7124 | expr = NODE_CAR(top);\r | |
7125 | NODE_CAR(top) = NULL_NODE;\r | |
7126 | onig_node_free(top);\r | |
7127 | }\r | |
7128 | }\r | |
7129 | }\r | |
7130 | \r | |
7131 | r = make_absent_tree(np, absent, expr, is_range_cutter, env);\r | |
7132 | if (r != 0) {\r | |
7133 | return r;\r | |
7134 | }\r | |
7135 | goto end;\r | |
7136 | }\r | |
7137 | else {\r | |
7138 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7139 | }\r | |
7140 | break;\r | |
7141 | \r | |
7142 | #ifdef USE_CALLOUT\r | |
7143 | case '{':\r | |
7144 | if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))\r | |
7145 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7146 | \r | |
7147 | r = parse_callout_of_contents(np, ')', &p, end, env);\r | |
7148 | if (r != 0) return r;\r | |
7149 | \r | |
7150 | goto end;\r | |
7151 | break;\r | |
7152 | #endif\r | |
7153 | \r | |
7154 | case '(':\r | |
7155 | /* (?()...) */\r | |
7156 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {\r | |
7157 | UChar *prev;\r | |
7158 | Node* condition;\r | |
7159 | int condition_is_checker;\r | |
7160 | \r | |
7161 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7162 | PFETCH(c);\r | |
7163 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7164 | \r | |
7165 | if (IS_CODE_DIGIT_ASCII(enc, c)\r | |
7166 | || c == '-' || c == '+' || c == '<' || c == '\'') {\r | |
7167 | UChar* name_end;\r | |
7168 | int back_num;\r | |
7169 | int exist_level;\r | |
7170 | int level;\r | |
7171 | enum REF_NUM num_type;\r | |
7172 | int is_enclosed;\r | |
7173 | \r | |
7174 | is_enclosed = (c == '<' || c == '\'') ? 1 : 0;\r | |
7175 | if (! is_enclosed)\r | |
7176 | PUNFETCH;\r | |
7177 | prev = p;\r | |
7178 | exist_level = 0;\r | |
7179 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7180 | name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r | |
7181 | r = fetch_name_with_level(\r | |
7182 | (OnigCodePoint )(is_enclosed != 0 ? c : '('),\r | |
7183 | &p, end, &name_end,\r | |
7184 | env, &back_num, &level, &num_type);\r | |
7185 | if (r == 1) exist_level = 1;\r | |
7186 | #else\r | |
7187 | r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),\r | |
7188 | &p, end, &name_end, env, &back_num, &num_type, 1);\r | |
7189 | #endif\r | |
7190 | if (r < 0) {\r | |
7191 | if (is_enclosed == 0) {\r | |
7192 | goto any_condition;\r | |
7193 | }\r | |
7194 | else\r | |
7195 | return r;\r | |
7196 | }\r | |
7197 | \r | |
7198 | condition_is_checker = 1;\r | |
7199 | if (num_type != IS_NOT_NUM) {\r | |
7200 | if (num_type == IS_REL_NUM) {\r | |
7201 | back_num = backref_rel_to_abs(back_num, env);\r | |
7202 | }\r | |
7203 | if (back_num <= 0)\r | |
7204 | return ONIGERR_INVALID_BACKREF;\r | |
7205 | \r | |
7206 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
7207 | if (back_num > env->num_mem ||\r | |
7208 | IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r | |
7209 | return ONIGERR_INVALID_BACKREF;\r | |
7210 | }\r | |
7211 | \r | |
7212 | condition = node_new_backref_checker(1, &back_num, 0,\r | |
7213 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7214 | exist_level, level,\r | |
7215 | #endif\r | |
7216 | env);\r | |
7217 | }\r | |
7218 | else {\r | |
7219 | int num;\r | |
7220 | int* backs;\r | |
7221 | \r | |
7222 | num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r | |
7223 | if (num <= 0) {\r | |
7224 | onig_scan_env_set_error_string(env,\r | |
7225 | ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r | |
7226 | return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
7227 | }\r | |
7228 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
7229 | int i;\r | |
7230 | for (i = 0; i < num; i++) {\r | |
7231 | if (backs[i] > env->num_mem ||\r | |
7232 | IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r | |
7233 | return ONIGERR_INVALID_BACKREF;\r | |
7234 | }\r | |
7235 | }\r | |
7236 | \r | |
7237 | condition = node_new_backref_checker(num, backs, 1,\r | |
7238 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7239 | exist_level, level,\r | |
7240 | #endif\r | |
7241 | env);\r | |
7242 | }\r | |
7243 | \r | |
7244 | if (is_enclosed != 0) {\r | |
7245 | if (PEND) goto err_if_else;\r | |
7246 | PFETCH(c);\r | |
7247 | if (c != ')') goto err_if_else;\r | |
7248 | }\r | |
7249 | }\r | |
7250 | #ifdef USE_CALLOUT\r | |
7251 | else if (c == '?') {\r | |
7252 | if (IS_SYNTAX_OP2(env->syntax,\r | |
7253 | ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {\r | |
7254 | if (! PEND && PPEEK_IS('{')) {\r | |
7255 | /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */\r | |
7256 | condition_is_checker = 0;\r | |
7257 | PFETCH(c);\r | |
7258 | r = parse_callout_of_contents(&condition, ')', &p, end, env);\r | |
7259 | if (r != 0) return r;\r | |
7260 | goto end_condition;\r | |
7261 | }\r | |
7262 | }\r | |
7263 | goto any_condition;\r | |
7264 | }\r | |
7265 | else if (c == '*' &&\r | |
7266 | IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r | |
7267 | condition_is_checker = 0;\r | |
7268 | r = parse_callout_of_name(&condition, ')', &p, end, env);\r | |
7269 | if (r != 0) return r;\r | |
7270 | goto end_condition;\r | |
7271 | }\r | |
14b0e578 | 7272 | #endif\r |
b602265d DG |
7273 | else {\r |
7274 | any_condition:\r | |
7275 | PUNFETCH;\r | |
7276 | condition_is_checker = 0;\r | |
7277 | r = fetch_token(tok, &p, end, env);\r | |
7278 | if (r < 0) return r;\r | |
7279 | r = parse_subexp(&condition, tok, term, &p, end, env);\r | |
7280 | if (r < 0) {\r | |
7281 | onig_node_free(condition);\r | |
7282 | return r;\r | |
7283 | }\r | |
7284 | }\r | |
14b0e578 | 7285 | \r |
b602265d DG |
7286 | end_condition:\r |
7287 | CHECK_NULL_RETURN_MEMERR(condition);\r | |
14b0e578 | 7288 | \r |
b602265d DG |
7289 | if (PEND) {\r |
7290 | err_if_else:\r | |
7291 | onig_node_free(condition);\r | |
7292 | return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7293 | }\r | |
14b0e578 | 7294 | \r |
b602265d DG |
7295 | if (PPEEK_IS(')')) { /* case: empty body: make backref checker */\r |
7296 | if (condition_is_checker == 0) {\r | |
7297 | onig_node_free(condition);\r | |
7298 | return ONIGERR_INVALID_IF_ELSE_SYNTAX;\r | |
7299 | }\r | |
7300 | PFETCH(c);\r | |
7301 | *np = condition;\r | |
7302 | }\r | |
7303 | else { /* if-else */\r | |
7304 | int then_is_empty;\r | |
7305 | Node *Then, *Else;\r | |
14b0e578 | 7306 | \r |
b602265d DG |
7307 | if (PPEEK_IS('|')) {\r |
7308 | PFETCH(c);\r | |
7309 | Then = 0;\r | |
7310 | then_is_empty = 1;\r | |
7311 | }\r | |
7312 | else\r | |
7313 | then_is_empty = 0;\r | |
14b0e578 | 7314 | \r |
b602265d DG |
7315 | r = fetch_token(tok, &p, end, env);\r |
7316 | if (r < 0) {\r | |
7317 | onig_node_free(condition);\r | |
7318 | return r;\r | |
7319 | }\r | |
7320 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
7321 | if (r < 0) {\r | |
7322 | onig_node_free(condition);\r | |
7323 | onig_node_free(target);\r | |
7324 | return r;\r | |
7325 | }\r | |
14b0e578 | 7326 | \r |
b602265d DG |
7327 | if (then_is_empty != 0) {\r |
7328 | Else = target;\r | |
7329 | }\r | |
7330 | else {\r | |
7331 | if (NODE_TYPE(target) == NODE_ALT) {\r | |
7332 | Then = NODE_CAR(target);\r | |
7333 | if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {\r | |
7334 | Else = NODE_CAR(NODE_CDR(target));\r | |
7335 | cons_node_free_alone(NODE_CDR(target));\r | |
7336 | }\r | |
7337 | else {\r | |
7338 | Else = NODE_CDR(target);\r | |
7339 | }\r | |
7340 | cons_node_free_alone(target);\r | |
7341 | }\r | |
7342 | else {\r | |
7343 | Then = target;\r | |
7344 | Else = 0;\r | |
7345 | }\r | |
7346 | }\r | |
14b0e578 | 7347 | \r |
b602265d DG |
7348 | *np = node_new_enclosure_if_else(condition, Then, Else);\r |
7349 | if (IS_NULL(*np)) {\r | |
7350 | onig_node_free(condition);\r | |
7351 | onig_node_free(Then);\r | |
7352 | onig_node_free(Else);\r | |
7353 | return ONIGERR_MEMORY;\r | |
7354 | }\r | |
7355 | }\r | |
7356 | goto end;\r | |
14b0e578 | 7357 | }\r |
14b0e578 | 7358 | else {\r |
b602265d | 7359 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r |
14b0e578 | 7360 | }\r |
14b0e578 CS |
7361 | break;\r |
7362 | \r | |
7363 | case '@':\r | |
7364 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r | |
b602265d DG |
7365 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r |
7366 | PFETCH(c);\r | |
7367 | if (c == '<' || c == '\'') {\r | |
7368 | list_capture = 1;\r | |
7369 | goto named_group2; /* (?@<name>...) */\r | |
7370 | }\r | |
7371 | PUNFETCH;\r | |
7372 | }\r | |
7373 | \r | |
7374 | *np = node_new_memory(0);\r | |
7375 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7376 | num = scan_env_add_mem_entry(env);\r | |
7377 | if (num < 0) {\r | |
7378 | return num;\r | |
7379 | }\r | |
7380 | else if (num >= (int )MEM_STATUS_BITS_NUM) {\r | |
7381 | return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r | |
7382 | }\r | |
7383 | ENCLOSURE_(*np)->m.regnum = num;\r | |
7384 | MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r | |
14b0e578 CS |
7385 | }\r |
7386 | else {\r | |
b602265d | 7387 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r |
14b0e578 CS |
7388 | }\r |
7389 | break;\r | |
7390 | \r | |
7391 | #ifdef USE_POSIXLINE_OPTION\r | |
7392 | case 'p':\r | |
7393 | #endif\r | |
7394 | case '-': case 'i': case 'm': case 's': case 'x':\r | |
b602265d | 7395 | case 'W': case 'D': case 'S': case 'P':\r |
14b0e578 | 7396 | {\r |
b602265d DG |
7397 | int neg = 0;\r |
7398 | \r | |
7399 | while (1) {\r | |
7400 | switch (c) {\r | |
7401 | case ':':\r | |
7402 | case ')':\r | |
7403 | break;\r | |
7404 | \r | |
7405 | case '-': neg = 1; break;\r | |
7406 | case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;\r | |
7407 | case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;\r | |
7408 | case 's':\r | |
7409 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r | |
7410 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r | |
7411 | }\r | |
7412 | else\r | |
7413 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7414 | break;\r | |
7415 | \r | |
7416 | case 'm':\r | |
7417 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r | |
7418 | OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r | |
7419 | }\r | |
7420 | else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r | |
7421 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r | |
7422 | }\r | |
7423 | else\r | |
7424 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7425 | break;\r | |
14b0e578 | 7426 | #ifdef USE_POSIXLINE_OPTION\r |
b602265d DG |
7427 | case 'p':\r |
7428 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r | |
7429 | break;\r | |
14b0e578 | 7430 | #endif\r |
b602265d DG |
7431 | case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;\r |
7432 | case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;\r | |
7433 | case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;\r | |
7434 | case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;\r | |
7435 | \r | |
7436 | default:\r | |
7437 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7438 | }\r | |
7439 | \r | |
7440 | if (c == ')') {\r | |
7441 | *np = node_new_option(option);\r | |
7442 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7443 | *src = p;\r | |
7444 | return 2; /* option only */\r | |
7445 | }\r | |
7446 | else if (c == ':') {\r | |
7447 | OnigOptionType prev = env->options;\r | |
7448 | \r | |
7449 | env->options = option;\r | |
7450 | r = fetch_token(tok, &p, end, env);\r | |
7451 | if (r < 0) return r;\r | |
7452 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
7453 | env->options = prev;\r | |
7454 | if (r < 0) {\r | |
7455 | onig_node_free(target);\r | |
7456 | return r;\r | |
7457 | }\r | |
7458 | *np = node_new_option(option);\r | |
7459 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7460 | NODE_BODY(*np) = target;\r | |
7461 | *src = p;\r | |
7462 | return 0;\r | |
7463 | }\r | |
7464 | \r | |
7465 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7466 | PFETCH(c);\r | |
7467 | }\r | |
14b0e578 CS |
7468 | }\r |
7469 | break;\r | |
7470 | \r | |
7471 | default:\r | |
7472 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7473 | }\r | |
7474 | }\r | |
b602265d DG |
7475 | #ifdef USE_CALLOUT\r |
7476 | else if (c == '*' &&\r | |
7477 | IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r | |
7478 | PINC;\r | |
7479 | r = parse_callout_of_name(np, ')', &p, end, env);\r | |
7480 | if (r != 0) return r;\r | |
7481 | \r | |
7482 | goto end;\r | |
7483 | }\r | |
7484 | #endif\r | |
14b0e578 | 7485 | else {\r |
b602265d | 7486 | if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r |
14b0e578 CS |
7487 | goto group;\r |
7488 | \r | |
b602265d | 7489 | *np = node_new_memory(0);\r |
14b0e578 CS |
7490 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7491 | num = scan_env_add_mem_entry(env);\r | |
7492 | if (num < 0) return num;\r | |
b602265d | 7493 | ENCLOSURE_(*np)->m.regnum = num;\r |
14b0e578 CS |
7494 | }\r |
7495 | \r | |
7496 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7497 | r = fetch_token(tok, &p, end, env);\r | |
7498 | if (r < 0) return r;\r | |
7499 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
b602265d DG |
7500 | if (r < 0) {\r |
7501 | onig_node_free(target);\r | |
7502 | return r;\r | |
7503 | }\r | |
14b0e578 | 7504 | \r |
b602265d DG |
7505 | NODE_BODY(*np) = target;\r |
7506 | \r | |
7507 | if (NODE_TYPE(*np) == NODE_ENCLOSURE) {\r | |
7508 | if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {\r | |
14b0e578 | 7509 | /* Don't move this to previous of parse_subexp() */\r |
b602265d | 7510 | r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);\r |
14b0e578 CS |
7511 | if (r != 0) return r;\r |
7512 | }\r | |
7513 | }\r | |
7514 | \r | |
b602265d | 7515 | end:\r |
14b0e578 CS |
7516 | *src = p;\r |
7517 | return 0;\r | |
7518 | }\r | |
7519 | \r | |
7520 | static const char* PopularQStr[] = {\r | |
7521 | "?", "*", "+", "??", "*?", "+?"\r | |
7522 | };\r | |
7523 | \r | |
7524 | static const char* ReduceQStr[] = {\r | |
7525 | "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r | |
7526 | };\r | |
7527 | \r | |
7528 | static int\r | |
7529 | set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r | |
7530 | {\r | |
b602265d | 7531 | QuantNode* qn;\r |
14b0e578 | 7532 | \r |
b602265d DG |
7533 | qn = QUANT_(qnode);\r |
7534 | if (qn->lower == 1 && qn->upper == 1)\r | |
14b0e578 | 7535 | return 1;\r |
14b0e578 | 7536 | \r |
b602265d DG |
7537 | switch (NODE_TYPE(target)) {\r |
7538 | case NODE_STRING:\r | |
14b0e578 | 7539 | if (! group) {\r |
b602265d DG |
7540 | if (str_node_can_be_split(target, env->enc)) {\r |
7541 | Node* n = str_node_split_last_char(target, env->enc);\r | |
7542 | if (IS_NOT_NULL(n)) {\r | |
7543 | NODE_BODY(qnode) = n;\r | |
7544 | return 2;\r | |
7545 | }\r | |
14b0e578 CS |
7546 | }\r |
7547 | }\r | |
7548 | break;\r | |
7549 | \r | |
b602265d | 7550 | case NODE_QUANT:\r |
14b0e578 CS |
7551 | { /* check redundant double repeat. */\r |
7552 | /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r | |
b602265d DG |
7553 | QuantNode* qnt = QUANT_(target);\r |
7554 | int nestq_num = quantifier_type_num(qn);\r | |
7555 | int targetq_num = quantifier_type_num(qnt);\r | |
14b0e578 CS |
7556 | \r |
7557 | #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r | |
b602265d DG |
7558 | if (targetq_num >= 0 && nestq_num >= 0 &&\r |
7559 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r | |
14b0e578 CS |
7560 | UChar buf[WARN_BUFSIZE];\r |
7561 | \r | |
7562 | switch(ReduceTypeTable[targetq_num][nestq_num]) {\r | |
7563 | case RQ_ASIS:\r | |
7564 | break;\r | |
7565 | \r | |
7566 | case RQ_DEL:\r | |
7567 | if (onig_verb_warn != onig_null_warn) {\r | |
7568 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
b602265d DG |
7569 | env->pattern, env->pattern_end,\r |
7570 | (UChar* )"redundant nested repeat operator");\r | |
14b0e578 CS |
7571 | (*onig_verb_warn)((char* )buf);\r |
7572 | }\r | |
7573 | goto warn_exit;\r | |
7574 | break;\r | |
7575 | \r | |
7576 | default:\r | |
7577 | if (onig_verb_warn != onig_null_warn) {\r | |
7578 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
7579 | env->pattern, env->pattern_end,\r | |
7580 | (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r | |
7581 | PopularQStr[targetq_num], PopularQStr[nestq_num],\r | |
7582 | ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r | |
7583 | (*onig_verb_warn)((char* )buf);\r | |
7584 | }\r | |
7585 | goto warn_exit;\r | |
7586 | break;\r | |
7587 | }\r | |
7588 | }\r | |
7589 | \r | |
7590 | warn_exit:\r | |
7591 | #endif\r | |
b602265d DG |
7592 | if (targetq_num >= 0 && nestq_num < 0) {\r |
7593 | if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r | |
7594 | /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r | |
7595 | if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r | |
7596 | qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r | |
7597 | }\r | |
7598 | }\r | |
7599 | }\r | |
7600 | else {\r | |
7601 | NODE_BODY(qnode) = target;\r | |
7602 | onig_reduce_nested_quantifier(qnode, target);\r | |
7603 | goto q_exit;\r | |
14b0e578 CS |
7604 | }\r |
7605 | }\r | |
7606 | break;\r | |
7607 | \r | |
7608 | default:\r | |
7609 | break;\r | |
7610 | }\r | |
7611 | \r | |
b602265d | 7612 | NODE_BODY(qnode) = target;\r |
14b0e578 CS |
7613 | q_exit:\r |
7614 | return 0;\r | |
7615 | }\r | |
7616 | \r | |
7617 | \r | |
14b0e578 CS |
7618 | #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r |
7619 | static int\r | |
7620 | clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r | |
7621 | {\r | |
7622 | BBuf *tbuf;\r | |
7623 | int r;\r | |
7624 | \r | |
7625 | if (IS_NCCLASS_NOT(cc)) {\r | |
7626 | bitset_invert(cc->bs);\r | |
7627 | \r | |
7628 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
7629 | r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r | |
7630 | if (r != 0) return r;\r | |
7631 | \r | |
7632 | bbuf_free(cc->mbuf);\r | |
7633 | cc->mbuf = tbuf;\r | |
7634 | }\r | |
7635 | \r | |
7636 | NCCLASS_CLEAR_NOT(cc);\r | |
7637 | }\r | |
7638 | \r | |
7639 | return 0;\r | |
7640 | }\r | |
7641 | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r | |
7642 | \r | |
7643 | typedef struct {\r | |
7644 | ScanEnv* env;\r | |
7645 | CClassNode* cc;\r | |
7646 | Node* alt_root;\r | |
7647 | Node** ptail;\r | |
7648 | } IApplyCaseFoldArg;\r | |
7649 | \r | |
7650 | static int\r | |
b602265d | 7651 | i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)\r |
14b0e578 CS |
7652 | {\r |
7653 | IApplyCaseFoldArg* iarg;\r | |
7654 | ScanEnv* env;\r | |
7655 | CClassNode* cc;\r | |
7656 | BitSetRef bs;\r | |
7657 | \r | |
7658 | iarg = (IApplyCaseFoldArg* )arg;\r | |
7659 | env = iarg->env;\r | |
7660 | cc = iarg->cc;\r | |
7661 | bs = cc->bs;\r | |
7662 | \r | |
7663 | if (to_len == 1) {\r | |
7664 | int is_in = onig_is_code_in_cc(env->enc, from, cc);\r | |
7665 | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
7666 | if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r | |
b602265d | 7667 | (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r |
14b0e578 | 7668 | if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r |
b602265d | 7669 | add_code_range(&(cc->mbuf), env, *to, *to);\r |
14b0e578 CS |
7670 | }\r |
7671 | else {\r | |
b602265d | 7672 | BITSET_SET_BIT(bs, *to);\r |
14b0e578 CS |
7673 | }\r |
7674 | }\r | |
7675 | #else\r | |
7676 | if (is_in != 0) {\r | |
7677 | if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r | |
b602265d DG |
7678 | if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r |
7679 | add_code_range(&(cc->mbuf), env, *to, *to);\r | |
14b0e578 CS |
7680 | }\r |
7681 | else {\r | |
b602265d DG |
7682 | if (IS_NCCLASS_NOT(cc)) {\r |
7683 | BITSET_CLEAR_BIT(bs, *to);\r | |
7684 | }\r | |
7685 | else\r | |
7686 | BITSET_SET_BIT(bs, *to);\r | |
14b0e578 CS |
7687 | }\r |
7688 | }\r | |
7689 | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r | |
7690 | }\r | |
7691 | else {\r | |
7692 | int r, i, len;\r | |
7693 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r | |
7694 | Node *snode = NULL_NODE;\r | |
7695 | \r | |
7696 | if (onig_is_code_in_cc(env->enc, from, cc)\r | |
7697 | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
b602265d | 7698 | && !IS_NCCLASS_NOT(cc)\r |
14b0e578 | 7699 | #endif\r |
b602265d | 7700 | ) {\r |
14b0e578 | 7701 | for (i = 0; i < to_len; i++) {\r |
b602265d DG |
7702 | len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r |
7703 | if (i == 0) {\r | |
7704 | snode = onig_node_new_str(buf, buf + len);\r | |
7705 | CHECK_NULL_RETURN_MEMERR(snode);\r | |
7706 | \r | |
7707 | /* char-class expanded multi-char only\r | |
7708 | compare with string folded at match time. */\r | |
7709 | NODE_STRING_SET_AMBIG(snode);\r | |
7710 | }\r | |
7711 | else {\r | |
7712 | r = onig_node_str_cat(snode, buf, buf + len);\r | |
7713 | if (r < 0) {\r | |
7714 | onig_node_free(snode);\r | |
7715 | return r;\r | |
7716 | }\r | |
7717 | }\r | |
14b0e578 CS |
7718 | }\r |
7719 | \r | |
7720 | *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r | |
7721 | CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r | |
b602265d | 7722 | iarg->ptail = &(NODE_CDR((*(iarg->ptail))));\r |
14b0e578 CS |
7723 | }\r |
7724 | }\r | |
7725 | \r | |
7726 | return 0;\r | |
7727 | }\r | |
7728 | \r | |
7729 | static int\r | |
b602265d DG |
7730 | parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r |
7731 | ScanEnv* env)\r | |
14b0e578 CS |
7732 | {\r |
7733 | int r, len, group = 0;\r | |
7734 | Node* qn;\r | |
7735 | Node** targetp;\r | |
7736 | \r | |
7737 | *np = NULL;\r | |
7738 | if (tok->type == (enum TokenSyms )term)\r | |
7739 | goto end_of_token;\r | |
7740 | \r | |
7741 | switch (tok->type) {\r | |
7742 | case TK_ALT:\r | |
7743 | case TK_EOT:\r | |
7744 | end_of_token:\r | |
16bd7c35 DG |
7745 | *np = node_new_empty();\r |
7746 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7747 | return tok->type;\r | |
14b0e578 CS |
7748 | break;\r |
7749 | \r | |
7750 | case TK_SUBEXP_OPEN:\r | |
b602265d | 7751 | r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r |
14b0e578 CS |
7752 | if (r < 0) return r;\r |
7753 | if (r == 1) group = 1;\r | |
7754 | else if (r == 2) { /* option only */\r | |
7755 | Node* target;\r | |
b602265d | 7756 | OnigOptionType prev = env->options;\r |
14b0e578 | 7757 | \r |
b602265d | 7758 | env->options = ENCLOSURE_(*np)->o.options;\r |
14b0e578 CS |
7759 | r = fetch_token(tok, src, end, env);\r |
7760 | if (r < 0) return r;\r | |
7761 | r = parse_subexp(&target, tok, term, src, end, env);\r | |
b602265d DG |
7762 | env->options = prev;\r |
7763 | if (r < 0) {\r | |
7764 | onig_node_free(target);\r | |
7765 | return r;\r | |
7766 | }\r | |
7767 | NODE_BODY(*np) = target;\r | |
14b0e578 CS |
7768 | return tok->type;\r |
7769 | }\r | |
7770 | break;\r | |
7771 | \r | |
7772 | case TK_SUBEXP_CLOSE:\r | |
7773 | if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r | |
7774 | return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r | |
7775 | \r | |
7776 | if (tok->escaped) goto tk_raw_byte;\r | |
7777 | else goto tk_byte;\r | |
7778 | break;\r | |
7779 | \r | |
7780 | case TK_STRING:\r | |
7781 | tk_byte:\r | |
7782 | {\r | |
7783 | *np = node_new_str(tok->backp, *src);\r | |
7784 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7785 | \r | |
7786 | while (1) {\r | |
b602265d DG |
7787 | r = fetch_token(tok, src, end, env);\r |
7788 | if (r < 0) return r;\r | |
7789 | if (r != TK_STRING) break;\r | |
14b0e578 | 7790 | \r |
b602265d DG |
7791 | r = onig_node_str_cat(*np, tok->backp, *src);\r |
7792 | if (r < 0) return r;\r | |
14b0e578 CS |
7793 | }\r |
7794 | \r | |
7795 | string_end:\r | |
7796 | targetp = np;\r | |
7797 | goto repeat;\r | |
7798 | }\r | |
7799 | break;\r | |
7800 | \r | |
7801 | case TK_RAW_BYTE:\r | |
7802 | tk_raw_byte:\r | |
7803 | {\r | |
7804 | *np = node_new_str_raw_char((UChar )tok->u.c);\r | |
7805 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7806 | len = 1;\r | |
7807 | while (1) {\r | |
b602265d DG |
7808 | if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r |
7809 | if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */\r | |
7810 | r = fetch_token(tok, src, end, env);\r | |
7811 | NODE_STRING_CLEAR_RAW(*np);\r | |
7812 | goto string_end;\r | |
7813 | }\r | |
7814 | }\r | |
7815 | \r | |
7816 | r = fetch_token(tok, src, end, env);\r | |
7817 | if (r < 0) return r;\r | |
7818 | if (r != TK_RAW_BYTE) {\r | |
7819 | /* Don't use this, it is wrong for little endian encodings. */\r | |
14b0e578 | 7820 | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r |
b602265d DG |
7821 | int rem;\r |
7822 | if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r | |
7823 | rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r | |
7824 | (void )node_str_head_pad(STR_(*np), rem, (UChar )0);\r | |
7825 | if (len + rem == enclen(env->enc, STR_(*np)->s)) {\r | |
7826 | NODE_STRING_CLEAR_RAW(*np);\r | |
7827 | goto string_end;\r | |
7828 | }\r | |
7829 | }\r | |
14b0e578 | 7830 | #endif\r |
b602265d DG |
7831 | return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r |
7832 | }\r | |
14b0e578 | 7833 | \r |
b602265d DG |
7834 | r = node_str_cat_char(*np, (UChar )tok->u.c);\r |
7835 | if (r < 0) return r;\r | |
14b0e578 | 7836 | \r |
b602265d | 7837 | len++;\r |
14b0e578 CS |
7838 | }\r |
7839 | }\r | |
7840 | break;\r | |
7841 | \r | |
7842 | case TK_CODE_POINT:\r | |
7843 | {\r | |
7844 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r | |
7845 | int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r | |
7846 | if (num < 0) return num;\r | |
7847 | #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r | |
7848 | *np = node_new_str_raw(buf, buf + num);\r | |
7849 | #else\r | |
7850 | *np = node_new_str(buf, buf + num);\r | |
7851 | #endif\r | |
7852 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7853 | }\r | |
7854 | break;\r | |
7855 | \r | |
7856 | case TK_QUOTE_OPEN:\r | |
7857 | {\r | |
7858 | OnigCodePoint end_op[2];\r | |
7859 | UChar *qstart, *qend, *nextp;\r | |
7860 | \r | |
7861 | end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r | |
7862 | end_op[1] = (OnigCodePoint )'E';\r | |
7863 | qstart = *src;\r | |
7864 | qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r | |
7865 | if (IS_NULL(qend)) {\r | |
b602265d | 7866 | nextp = qend = end;\r |
14b0e578 CS |
7867 | }\r |
7868 | *np = node_new_str(qstart, qend);\r | |
7869 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7870 | *src = nextp;\r | |
7871 | }\r | |
7872 | break;\r | |
7873 | \r | |
7874 | case TK_CHAR_TYPE:\r | |
7875 | {\r | |
7876 | switch (tok->u.prop.ctype) {\r | |
7877 | case ONIGENC_CTYPE_WORD:\r | |
b602265d DG |
7878 | *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);\r |
7879 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7880 | break;\r | |
14b0e578 CS |
7881 | \r |
7882 | case ONIGENC_CTYPE_SPACE:\r | |
7883 | case ONIGENC_CTYPE_DIGIT:\r | |
7884 | case ONIGENC_CTYPE_XDIGIT:\r | |
b602265d DG |
7885 | {\r |
7886 | CClassNode* cc;\r | |
7887 | \r | |
7888 | *np = node_new_cclass();\r | |
7889 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7890 | cc = CCLASS_(*np);\r | |
7891 | add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r | |
7892 | if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r | |
7893 | }\r | |
7894 | break;\r | |
14b0e578 CS |
7895 | \r |
7896 | default:\r | |
b602265d DG |
7897 | return ONIGERR_PARSER_BUG;\r |
7898 | break;\r | |
14b0e578 CS |
7899 | }\r |
7900 | }\r | |
7901 | break;\r | |
7902 | \r | |
7903 | case TK_CHAR_PROPERTY:\r | |
7904 | r = parse_char_property(np, tok, src, end, env);\r | |
7905 | if (r != 0) return r;\r | |
7906 | break;\r | |
7907 | \r | |
7908 | case TK_CC_OPEN:\r | |
7909 | {\r | |
7910 | CClassNode* cc;\r | |
7911 | \r | |
7912 | r = parse_char_class(np, tok, src, end, env);\r | |
7913 | if (r != 0) return r;\r | |
7914 | \r | |
b602265d DG |
7915 | cc = CCLASS_(*np);\r |
7916 | if (IS_IGNORECASE(env->options)) {\r | |
7917 | IApplyCaseFoldArg iarg;\r | |
7918 | \r | |
7919 | iarg.env = env;\r | |
7920 | iarg.cc = cc;\r | |
7921 | iarg.alt_root = NULL_NODE;\r | |
7922 | iarg.ptail = &(iarg.alt_root);\r | |
7923 | \r | |
7924 | r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r | |
7925 | i_apply_case_fold, &iarg);\r | |
7926 | if (r != 0) {\r | |
7927 | onig_node_free(iarg.alt_root);\r | |
7928 | return r;\r | |
7929 | }\r | |
7930 | if (IS_NOT_NULL(iarg.alt_root)) {\r | |
14b0e578 CS |
7931 | Node* work = onig_node_new_alt(*np, iarg.alt_root);\r |
7932 | if (IS_NULL(work)) {\r | |
7933 | onig_node_free(iarg.alt_root);\r | |
7934 | return ONIGERR_MEMORY;\r | |
7935 | }\r | |
7936 | *np = work;\r | |
b602265d | 7937 | }\r |
14b0e578 CS |
7938 | }\r |
7939 | }\r | |
7940 | break;\r | |
7941 | \r | |
7942 | case TK_ANYCHAR:\r | |
7943 | *np = node_new_anychar();\r | |
7944 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7945 | break;\r | |
7946 | \r | |
7947 | case TK_ANYCHAR_ANYTIME:\r | |
7948 | *np = node_new_anychar();\r | |
7949 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7950 | qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r | |
7951 | CHECK_NULL_RETURN_MEMERR(qn);\r | |
b602265d | 7952 | NODE_BODY(qn) = *np;\r |
14b0e578 CS |
7953 | *np = qn;\r |
7954 | break;\r | |
7955 | \r | |
7956 | case TK_BACKREF:\r | |
7957 | len = tok->u.backref.num;\r | |
7958 | *np = node_new_backref(len,\r | |
b602265d DG |
7959 | (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r |
7960 | tok->u.backref.by_name,\r | |
14b0e578 | 7961 | #ifdef USE_BACKREF_WITH_LEVEL\r |
b602265d DG |
7962 | tok->u.backref.exist_level,\r |
7963 | tok->u.backref.level,\r | |
14b0e578 | 7964 | #endif\r |
b602265d | 7965 | env);\r |
14b0e578 CS |
7966 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7967 | break;\r | |
7968 | \r | |
b602265d | 7969 | #ifdef USE_CALL\r |
14b0e578 CS |
7970 | case TK_CALL:\r |
7971 | {\r | |
7972 | int gnum = tok->u.call.gnum;\r | |
7973 | \r | |
b602265d DG |
7974 | *np = node_new_call(tok->u.call.name, tok->u.call.name_end,\r |
7975 | gnum, tok->u.call.by_number);\r | |
14b0e578 CS |
7976 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7977 | env->num_call++;\r | |
b602265d DG |
7978 | if (tok->u.call.by_number != 0 && gnum == 0) {\r |
7979 | env->has_call_zero = 1;\r | |
7980 | }\r | |
14b0e578 CS |
7981 | }\r |
7982 | break;\r | |
7983 | #endif\r | |
7984 | \r | |
7985 | case TK_ANCHOR:\r | |
b602265d DG |
7986 | {\r |
7987 | int ascii_mode =\r | |
7988 | IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;\r | |
7989 | *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);\r | |
7990 | }\r | |
14b0e578 CS |
7991 | break;\r |
7992 | \r | |
7993 | case TK_OP_REPEAT:\r | |
7994 | case TK_INTERVAL:\r | |
7995 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r | |
7996 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r | |
b602265d | 7997 | return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r |
16bd7c35 | 7998 | else {\r |
b602265d | 7999 | *np = node_new_empty();\r |
16bd7c35 DG |
8000 | CHECK_NULL_RETURN_MEMERR(*np);\r |
8001 | }\r | |
14b0e578 CS |
8002 | }\r |
8003 | else {\r | |
8004 | goto tk_byte;\r | |
8005 | }\r | |
8006 | break;\r | |
8007 | \r | |
b602265d DG |
8008 | case TK_KEEP:\r |
8009 | r = node_new_keep(np, env);\r | |
8010 | if (r < 0) return r;\r | |
8011 | break;\r | |
8012 | \r | |
8013 | case TK_GENERAL_NEWLINE:\r | |
8014 | r = node_new_general_newline(np, env);\r | |
8015 | if (r < 0) return r;\r | |
8016 | break;\r | |
8017 | \r | |
8018 | case TK_NO_NEWLINE:\r | |
8019 | r = node_new_no_newline(np, env);\r | |
8020 | if (r < 0) return r;\r | |
8021 | break;\r | |
8022 | \r | |
8023 | case TK_TRUE_ANYCHAR:\r | |
8024 | r = node_new_true_anychar(np, env);\r | |
8025 | if (r < 0) return r;\r | |
8026 | break;\r | |
8027 | \r | |
8028 | case TK_EXTENDED_GRAPHEME_CLUSTER:\r | |
8029 | r = make_extended_grapheme_cluster(np, env);\r | |
8030 | if (r < 0) return r;\r | |
8031 | break;\r | |
8032 | \r | |
14b0e578 CS |
8033 | default:\r |
8034 | return ONIGERR_PARSER_BUG;\r | |
8035 | break;\r | |
8036 | }\r | |
8037 | \r | |
8038 | {\r | |
8039 | targetp = np;\r | |
8040 | \r | |
8041 | re_entry:\r | |
8042 | r = fetch_token(tok, src, end, env);\r | |
8043 | if (r < 0) return r;\r | |
8044 | \r | |
8045 | repeat:\r | |
8046 | if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r | |
8047 | if (is_invalid_quantifier_target(*targetp))\r | |
b602265d | 8048 | return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r |
14b0e578 CS |
8049 | \r |
8050 | qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r | |
b602265d | 8051 | (r == TK_INTERVAL ? 1 : 0));\r |
14b0e578 | 8052 | CHECK_NULL_RETURN_MEMERR(qn);\r |
b602265d | 8053 | QUANT_(qn)->greedy = tok->u.repeat.greedy;\r |
14b0e578 CS |
8054 | r = set_quantifier(qn, *targetp, group, env);\r |
8055 | if (r < 0) {\r | |
b602265d DG |
8056 | onig_node_free(qn);\r |
8057 | return r;\r | |
14b0e578 CS |
8058 | }\r |
8059 | \r | |
8060 | if (tok->u.repeat.possessive != 0) {\r | |
b602265d DG |
8061 | Node* en;\r |
8062 | en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
8063 | if (IS_NULL(en)) {\r | |
8064 | onig_node_free(qn);\r | |
8065 | return ONIGERR_MEMORY;\r | |
8066 | }\r | |
8067 | NODE_BODY(en) = qn;\r | |
8068 | qn = en;\r | |
14b0e578 CS |
8069 | }\r |
8070 | \r | |
8071 | if (r == 0) {\r | |
b602265d | 8072 | *targetp = qn;\r |
14b0e578 CS |
8073 | }\r |
8074 | else if (r == 1) {\r | |
b602265d | 8075 | onig_node_free(qn);\r |
14b0e578 CS |
8076 | }\r |
8077 | else if (r == 2) { /* split case: /abc+/ */\r | |
b602265d DG |
8078 | Node *tmp;\r |
8079 | \r | |
8080 | *targetp = node_new_list(*targetp, NULL);\r | |
8081 | if (IS_NULL(*targetp)) {\r | |
8082 | onig_node_free(qn);\r | |
8083 | return ONIGERR_MEMORY;\r | |
8084 | }\r | |
8085 | tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);\r | |
8086 | if (IS_NULL(tmp)) {\r | |
8087 | onig_node_free(qn);\r | |
8088 | return ONIGERR_MEMORY;\r | |
8089 | }\r | |
8090 | targetp = &(NODE_CAR(tmp));\r | |
14b0e578 CS |
8091 | }\r |
8092 | goto re_entry;\r | |
8093 | }\r | |
8094 | }\r | |
8095 | \r | |
8096 | return r;\r | |
8097 | }\r | |
8098 | \r | |
8099 | static int\r | |
b602265d DG |
8100 | parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r |
8101 | ScanEnv* env)\r | |
14b0e578 CS |
8102 | {\r |
8103 | int r;\r | |
8104 | Node *node, **headp;\r | |
8105 | \r | |
8106 | *top = NULL;\r | |
8107 | r = parse_exp(&node, tok, term, src, end, env);\r | |
b602265d DG |
8108 | if (r < 0) {\r |
8109 | onig_node_free(node);\r | |
8110 | return r;\r | |
8111 | }\r | |
14b0e578 CS |
8112 | \r |
8113 | if (r == TK_EOT || r == term || r == TK_ALT) {\r | |
8114 | *top = node;\r | |
8115 | }\r | |
8116 | else {\r | |
8117 | *top = node_new_list(node, NULL);\r | |
a5def177 DG |
8118 | if (IS_NULL(*top)) {\r |
8119 | onig_node_free(node);\r | |
8120 | return ONIGERR_MEMORY;\r | |
8121 | }\r | |
8122 | \r | |
b602265d | 8123 | headp = &(NODE_CDR(*top));\r |
14b0e578 CS |
8124 | while (r != TK_EOT && r != term && r != TK_ALT) {\r |
8125 | r = parse_exp(&node, tok, term, src, end, env);\r | |
b602265d DG |
8126 | if (r < 0) {\r |
8127 | onig_node_free(node);\r | |
8128 | return r;\r | |
8129 | }\r | |
14b0e578 | 8130 | \r |
b602265d DG |
8131 | if (NODE_TYPE(node) == NODE_LIST) {\r |
8132 | *headp = node;\r | |
8133 | while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);\r | |
8134 | headp = &(NODE_CDR(node));\r | |
14b0e578 CS |
8135 | }\r |
8136 | else {\r | |
b602265d DG |
8137 | *headp = node_new_list(node, NULL);\r |
8138 | headp = &(NODE_CDR(*headp));\r | |
14b0e578 CS |
8139 | }\r |
8140 | }\r | |
8141 | }\r | |
8142 | \r | |
8143 | return r;\r | |
8144 | }\r | |
8145 | \r | |
8146 | /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r | |
8147 | static int\r | |
b602265d DG |
8148 | parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r |
8149 | ScanEnv* env)\r | |
14b0e578 CS |
8150 | {\r |
8151 | int r;\r | |
8152 | Node *node, **headp;\r | |
8153 | \r | |
8154 | *top = NULL;\r | |
b602265d DG |
8155 | env->parse_depth++;\r |
8156 | if (env->parse_depth > ParseDepthLimit)\r | |
8157 | return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r | |
a5def177 | 8158 | \r |
14b0e578 CS |
8159 | r = parse_branch(&node, tok, term, src, end, env);\r |
8160 | if (r < 0) {\r | |
8161 | onig_node_free(node);\r | |
8162 | return r;\r | |
8163 | }\r | |
8164 | \r | |
8165 | if (r == term) {\r | |
8166 | *top = node;\r | |
8167 | }\r | |
8168 | else if (r == TK_ALT) {\r | |
8169 | *top = onig_node_new_alt(node, NULL);\r | |
a5def177 DG |
8170 | if (IS_NULL(*top)) {\r |
8171 | onig_node_free(node);\r | |
8172 | return ONIGERR_MEMORY;\r | |
8173 | }\r | |
8174 | \r | |
b602265d | 8175 | headp = &(NODE_CDR(*top));\r |
14b0e578 CS |
8176 | while (r == TK_ALT) {\r |
8177 | r = fetch_token(tok, src, end, env);\r | |
8178 | if (r < 0) return r;\r | |
8179 | r = parse_branch(&node, tok, term, src, end, env);\r | |
b602265d DG |
8180 | if (r < 0) {\r |
8181 | onig_node_free(node);\r | |
8182 | return r;\r | |
8183 | }\r | |
14b0e578 | 8184 | *headp = onig_node_new_alt(node, NULL);\r |
a5def177 DG |
8185 | if (IS_NULL(*headp)) {\r |
8186 | onig_node_free(node);\r | |
8187 | onig_node_free(*top);\r | |
8188 | return ONIGERR_MEMORY;\r | |
8189 | }\r | |
8190 | \r | |
b602265d | 8191 | headp = &(NODE_CDR(*headp));\r |
14b0e578 CS |
8192 | }\r |
8193 | \r | |
8194 | if (tok->type != (enum TokenSyms )term)\r | |
8195 | goto err;\r | |
8196 | }\r | |
8197 | else {\r | |
b602265d | 8198 | onig_node_free(node);\r |
14b0e578 CS |
8199 | err:\r |
8200 | if (term == TK_SUBEXP_CLOSE)\r | |
8201 | return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
8202 | else\r | |
8203 | return ONIGERR_PARSER_BUG;\r | |
8204 | }\r | |
8205 | \r | |
b602265d | 8206 | env->parse_depth--;\r |
14b0e578 CS |
8207 | return r;\r |
8208 | }\r | |
8209 | \r | |
8210 | static int\r | |
8211 | parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r | |
8212 | {\r | |
8213 | int r;\r | |
8214 | OnigToken tok;\r | |
8215 | \r | |
8216 | r = fetch_token(&tok, src, end, env);\r | |
8217 | if (r < 0) return r;\r | |
8218 | r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r | |
8219 | if (r < 0) return r;\r | |
b602265d DG |
8220 | \r |
8221 | return 0;\r | |
8222 | }\r | |
8223 | \r | |
8224 | #ifdef USE_CALL\r | |
8225 | static int\r | |
8226 | make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)\r | |
8227 | {\r | |
8228 | int r;\r | |
8229 | \r | |
8230 | Node* x = node_new_memory(0 /* 0: is not named */);\r | |
8231 | CHECK_NULL_RETURN_MEMERR(x);\r | |
8232 | \r | |
8233 | NODE_BODY(x) = node;\r | |
8234 | ENCLOSURE_(x)->m.regnum = 0;\r | |
8235 | r = scan_env_set_mem_node(env, 0, x);\r | |
8236 | if (r != 0) {\r | |
8237 | onig_node_free(x);\r | |
8238 | return r;\r | |
8239 | }\r | |
8240 | \r | |
8241 | *rnode = x;\r | |
14b0e578 CS |
8242 | return 0;\r |
8243 | }\r | |
b602265d | 8244 | #endif\r |
14b0e578 CS |
8245 | \r |
8246 | extern int\r | |
b602265d DG |
8247 | onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,\r |
8248 | regex_t* reg, ScanEnv* env)\r | |
14b0e578 CS |
8249 | {\r |
8250 | int r;\r | |
8251 | UChar* p;\r | |
b602265d DG |
8252 | #ifdef USE_CALLOUT\r |
8253 | RegexExt* ext;\r | |
8254 | #endif\r | |
14b0e578 | 8255 | \r |
14b0e578 | 8256 | names_clear(reg);\r |
14b0e578 CS |
8257 | \r |
8258 | scan_env_clear(env);\r | |
b602265d | 8259 | env->options = reg->options;\r |
14b0e578 CS |
8260 | env->case_fold_flag = reg->case_fold_flag;\r |
8261 | env->enc = reg->enc;\r | |
8262 | env->syntax = reg->syntax;\r | |
8263 | env->pattern = (UChar* )pattern;\r | |
8264 | env->pattern_end = (UChar* )end;\r | |
8265 | env->reg = reg;\r | |
8266 | \r | |
8267 | *root = NULL;\r | |
b602265d DG |
8268 | \r |
8269 | if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))\r | |
8270 | return ONIGERR_INVALID_WIDE_CHAR_VALUE;\r | |
8271 | \r | |
14b0e578 CS |
8272 | p = (UChar* )pattern;\r |
8273 | r = parse_regexp(root, &p, (UChar* )end, env);\r | |
b602265d DG |
8274 | \r |
8275 | #ifdef USE_CALL\r | |
8276 | if (r != 0) return r;\r | |
8277 | \r | |
8278 | if (env->has_call_zero != 0) {\r | |
8279 | Node* zero_node;\r | |
8280 | r = make_call_zero_body(*root, env, &zero_node);\r | |
8281 | if (r != 0) return r;\r | |
8282 | \r | |
8283 | *root = zero_node;\r | |
8284 | }\r | |
8285 | #endif\r | |
8286 | \r | |
14b0e578 | 8287 | reg->num_mem = env->num_mem;\r |
b602265d DG |
8288 | \r |
8289 | #ifdef USE_CALLOUT\r | |
8290 | ext = REG_EXTP(reg);\r | |
8291 | if (IS_NOT_NULL(ext) && ext->callout_num > 0) {\r | |
8292 | r = setup_ext_callout_list_values(reg);\r | |
8293 | }\r | |
8294 | #endif\r | |
8295 | \r | |
14b0e578 CS |
8296 | return r;\r |
8297 | }\r | |
8298 | \r | |
8299 | extern void\r | |
8300 | onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r | |
b602265d | 8301 | UChar* arg, UChar* arg_end)\r |
14b0e578 CS |
8302 | {\r |
8303 | env->error = arg;\r | |
8304 | env->error_end = arg_end;\r | |
8305 | }\r |