]>
Commit | Line | Data |
---|---|---|
14b0e578 CS |
1 | /**********************************************************************\r |
2 | regparse.c - Oniguruma (regular expression library)\r | |
3 | **********************************************************************/\r | |
4 | /*-\r | |
b602265d | 5 | * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r |
14b0e578 CS |
6 | * All rights reserved.\r |
7 | *\r | |
14b0e578 CS |
8 | * Redistribution and use in source and binary forms, with or without\r |
9 | * modification, are permitted provided that the following conditions\r | |
10 | * are met:\r | |
11 | * 1. Redistributions of source code must retain the above copyright\r | |
12 | * notice, this list of conditions and the following disclaimer.\r | |
13 | * 2. Redistributions in binary form must reproduce the above copyright\r | |
14 | * notice, this list of conditions and the following disclaimer in the\r | |
15 | * documentation and/or other materials provided with the distribution.\r | |
16 | *\r | |
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r | |
27 | * SUCH DAMAGE.\r | |
28 | */\r | |
29 | \r | |
30 | #include "regparse.h"\r | |
31 | #include "st.h"\r | |
32 | \r | |
b602265d DG |
33 | #ifdef DEBUG_NODE_FREE\r |
34 | #include <stdio.h>\r | |
35 | #endif\r | |
36 | \r | |
37 | #define INIT_TAG_NAMES_ALLOC_NUM 5\r | |
38 | \r | |
14b0e578 CS |
39 | #define WARN_BUFSIZE 256\r |
40 | \r | |
41 | #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
42 | \r | |
b602265d DG |
43 | #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \\r |
44 | ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)\r | |
45 | #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \\r | |
46 | ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')\r | |
47 | \r | |
48 | \r | |
49 | OnigSyntaxType OnigSyntaxOniguruma = {\r | |
50 | (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r | |
51 | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r | |
52 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r | |
53 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r | |
54 | ONIG_SYN_OP_ESC_C_CONTROL )\r | |
55 | & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r | |
56 | , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r | |
57 | ONIG_SYN_OP2_OPTION_RUBY |\r | |
58 | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r | |
59 | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r | |
60 | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r | |
61 | ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |\r | |
62 | ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |\r | |
63 | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r | |
64 | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r | |
65 | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |\r | |
66 | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r | |
67 | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r | |
68 | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r | |
69 | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r | |
70 | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r | |
71 | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r | |
72 | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r | |
73 | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r | |
74 | , ( SYN_GNU_REGEX_BV | \r | |
75 | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r | |
76 | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r | |
77 | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r | |
78 | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r | |
79 | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r | |
80 | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r | |
81 | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r | |
82 | , ONIG_OPTION_NONE\r | |
83 | ,\r | |
84 | {\r | |
85 | (OnigCodePoint )'\\' /* esc */\r | |
86 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r | |
87 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r | |
88 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r | |
89 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r | |
90 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r | |
91 | }\r | |
92 | };\r | |
14b0e578 CS |
93 | \r |
94 | OnigSyntaxType OnigSyntaxRuby = {\r | |
95 | (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r | |
96 | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r | |
b602265d DG |
97 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r |
98 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r | |
14b0e578 CS |
99 | ONIG_SYN_OP_ESC_C_CONTROL )\r |
100 | & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r | |
101 | , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r | |
102 | ONIG_SYN_OP2_OPTION_RUBY |\r | |
103 | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r | |
b602265d DG |
104 | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r |
105 | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r | |
106 | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r | |
107 | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r | |
108 | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r | |
14b0e578 CS |
109 | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r |
110 | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r | |
111 | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r | |
112 | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r | |
113 | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r | |
114 | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r | |
b602265d | 115 | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r |
14b0e578 CS |
116 | , ( SYN_GNU_REGEX_BV | \r |
117 | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r | |
118 | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r | |
119 | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r | |
120 | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r | |
121 | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r | |
122 | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r | |
123 | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r | |
124 | , ONIG_OPTION_NONE\r | |
125 | ,\r | |
126 | {\r | |
127 | (OnigCodePoint )'\\' /* esc */\r | |
128 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r | |
129 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r | |
130 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r | |
131 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r | |
132 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r | |
133 | }\r | |
134 | };\r | |
135 | \r | |
b602265d | 136 | OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;\r |
14b0e578 CS |
137 | \r |
138 | extern void onig_null_warn(const char* s ARG_UNUSED) { }\r | |
139 | \r | |
140 | #ifdef DEFAULT_WARN_FUNCTION\r | |
141 | static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r | |
142 | #else\r | |
143 | static OnigWarnFunc onig_warn = onig_null_warn;\r | |
144 | #endif\r | |
145 | \r | |
146 | #ifdef DEFAULT_VERB_WARN_FUNCTION\r | |
147 | static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r | |
148 | #else\r | |
149 | static OnigWarnFunc onig_verb_warn = onig_null_warn;\r | |
150 | #endif\r | |
151 | \r | |
152 | extern void onig_set_warn_func(OnigWarnFunc f)\r | |
153 | {\r | |
154 | onig_warn = f;\r | |
155 | }\r | |
156 | \r | |
157 | extern void onig_set_verb_warn_func(OnigWarnFunc f)\r | |
158 | {\r | |
159 | onig_verb_warn = f;\r | |
160 | }\r | |
161 | \r | |
b602265d DG |
162 | extern void\r |
163 | onig_warning(const char* s)\r | |
164 | {\r | |
165 | if (onig_warn == onig_null_warn) return ;\r | |
166 | \r | |
167 | (*onig_warn)(s);\r | |
168 | }\r | |
169 | \r | |
170 | #define DEFAULT_MAX_CAPTURE_NUM 32767\r | |
171 | \r | |
172 | static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;\r | |
173 | \r | |
174 | extern int\r | |
175 | onig_set_capture_num_limit(int num)\r | |
176 | {\r | |
177 | if (num < 0) return -1;\r | |
178 | \r | |
179 | MaxCaptureNum = num;\r | |
180 | return 0;\r | |
181 | }\r | |
182 | \r | |
183 | static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r | |
184 | \r | |
185 | extern unsigned int\r | |
186 | onig_get_parse_depth_limit(void)\r | |
187 | {\r | |
188 | return ParseDepthLimit;\r | |
189 | }\r | |
190 | \r | |
191 | extern int\r | |
192 | onig_set_parse_depth_limit(unsigned int depth)\r | |
193 | {\r | |
194 | if (depth == 0)\r | |
195 | ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r | |
196 | else\r | |
197 | ParseDepthLimit = depth;\r | |
198 | return 0;\r | |
199 | }\r | |
200 | \r | |
201 | static int\r | |
202 | positive_int_multiply(int x, int y)\r | |
203 | {\r | |
204 | if (x == 0 || y == 0) return 0;\r | |
205 | \r | |
206 | if (x < INT_MAX / y)\r | |
207 | return x * y;\r | |
208 | else\r | |
209 | return -1;\r | |
210 | }\r | |
211 | \r | |
14b0e578 CS |
212 | static void\r |
213 | bbuf_free(BBuf* bbuf)\r | |
214 | {\r | |
215 | if (IS_NOT_NULL(bbuf)) {\r | |
216 | if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r | |
217 | xfree(bbuf);\r | |
218 | }\r | |
219 | }\r | |
220 | \r | |
221 | static int\r | |
222 | bbuf_clone(BBuf** rto, BBuf* from)\r | |
223 | {\r | |
224 | int r;\r | |
225 | BBuf *to;\r | |
226 | \r | |
227 | *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r | |
228 | CHECK_NULL_RETURN_MEMERR(to);\r | |
b602265d DG |
229 | r = BB_INIT(to, from->alloc);\r |
230 | if (r != 0) {\r | |
231 | xfree(to->p);\r | |
232 | *rto = 0;\r | |
233 | return r;\r | |
234 | }\r | |
14b0e578 CS |
235 | to->used = from->used;\r |
236 | xmemcpy(to->p, from->p, from->used);\r | |
237 | return 0;\r | |
238 | }\r | |
239 | \r | |
b602265d DG |
240 | static int backref_rel_to_abs(int rel_no, ScanEnv* env)\r |
241 | {\r | |
242 | if (rel_no > 0) {\r | |
243 | return env->num_mem + rel_no;\r | |
244 | }\r | |
245 | else {\r | |
246 | return env->num_mem + 1 + rel_no;\r | |
247 | }\r | |
248 | }\r | |
249 | \r | |
250 | #define OPTION_ON(v,f) ((v) |= (f))\r | |
251 | #define OPTION_OFF(v,f) ((v) &= ~(f))\r | |
14b0e578 | 252 | \r |
b602265d | 253 | #define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r |
14b0e578 CS |
254 | \r |
255 | #define MBCODE_START_POS(enc) \\r | |
256 | (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r | |
257 | \r | |
258 | #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r | |
259 | add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r | |
260 | \r | |
261 | #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r | |
262 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r | |
263 | r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r | |
b602265d | 264 | if (r != 0) return r;\\r |
14b0e578 CS |
265 | }\\r |
266 | } while (0)\r | |
267 | \r | |
268 | \r | |
269 | #define BITSET_IS_EMPTY(bs,empty) do {\\r | |
270 | int i;\\r | |
271 | empty = 1;\\r | |
272 | for (i = 0; i < (int )BITSET_SIZE; i++) {\\r | |
273 | if ((bs)[i] != 0) {\\r | |
274 | empty = 0; break;\\r | |
275 | }\\r | |
276 | }\\r | |
277 | } while (0)\r | |
278 | \r | |
279 | static void\r | |
280 | bitset_set_range(BitSetRef bs, int from, int to)\r | |
281 | {\r | |
282 | int i;\r | |
283 | for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r | |
284 | BITSET_SET_BIT(bs, i);\r | |
285 | }\r | |
286 | }\r | |
287 | \r | |
288 | #if 0\r | |
289 | static void\r | |
290 | bitset_set_all(BitSetRef bs)\r | |
291 | {\r | |
292 | int i;\r | |
293 | for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r | |
294 | }\r | |
295 | #endif\r | |
296 | \r | |
297 | static void\r | |
298 | bitset_invert(BitSetRef bs)\r | |
299 | {\r | |
300 | int i;\r | |
301 | for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r | |
302 | }\r | |
303 | \r | |
304 | static void\r | |
305 | bitset_invert_to(BitSetRef from, BitSetRef to)\r | |
306 | {\r | |
307 | int i;\r | |
308 | for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r | |
309 | }\r | |
310 | \r | |
311 | static void\r | |
312 | bitset_and(BitSetRef dest, BitSetRef bs)\r | |
313 | {\r | |
314 | int i;\r | |
315 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r | |
316 | }\r | |
317 | \r | |
318 | static void\r | |
319 | bitset_or(BitSetRef dest, BitSetRef bs)\r | |
320 | {\r | |
321 | int i;\r | |
322 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r | |
323 | }\r | |
324 | \r | |
325 | static void\r | |
326 | bitset_copy(BitSetRef dest, BitSetRef bs)\r | |
327 | {\r | |
328 | int i;\r | |
329 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r | |
330 | }\r | |
331 | \r | |
332 | extern int\r | |
333 | onig_strncmp(const UChar* s1, const UChar* s2, int n)\r | |
334 | {\r | |
335 | int x;\r | |
336 | \r | |
337 | while (n-- > 0) {\r | |
338 | x = *s2++ - *s1++;\r | |
339 | if (x) return x;\r | |
340 | }\r | |
341 | return 0;\r | |
342 | }\r | |
343 | \r | |
344 | extern void\r | |
345 | onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r | |
346 | {\r | |
b602265d | 347 | int len = (int )(end - src);\r |
14b0e578 CS |
348 | if (len > 0) {\r |
349 | xmemcpy(dest, src, len);\r | |
350 | dest[len] = (UChar )0;\r | |
351 | }\r | |
352 | }\r | |
353 | \r | |
b602265d DG |
354 | static int\r |
355 | save_entry(ScanEnv* env, enum SaveType type, int* id)\r | |
14b0e578 | 356 | {\r |
b602265d | 357 | int nid = env->save_num;\r |
14b0e578 | 358 | \r |
b602265d DG |
359 | #if 0\r |
360 | if (IS_NULL(env->saves)) {\r | |
361 | int n = 10;\r | |
362 | env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);\r | |
363 | CHECK_NULL_RETURN_MEMERR(env->saves);\r | |
364 | env->save_alloc_num = n;\r | |
365 | }\r | |
366 | else if (env->save_alloc_num <= nid) {\r | |
367 | int n = env->save_alloc_num * 2;\r | |
368 | SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);\r | |
369 | CHECK_NULL_RETURN_MEMERR(p);\r | |
370 | env->saves = p;\r | |
371 | env->save_alloc_num = n;\r | |
372 | }\r | |
14b0e578 | 373 | \r |
b602265d DG |
374 | env->saves[nid].type = type;\r |
375 | #endif\r | |
14b0e578 | 376 | \r |
b602265d DG |
377 | env->save_num++;\r |
378 | *id = nid;\r | |
379 | return 0;\r | |
14b0e578 | 380 | }\r |
14b0e578 CS |
381 | \r |
382 | /* scan pattern methods */\r | |
383 | #define PEND_VALUE 0\r | |
384 | \r | |
385 | #define PFETCH_READY UChar* pfetch_prev\r | |
386 | #define PEND (p < end ? 0 : 1)\r | |
387 | #define PUNFETCH p = pfetch_prev\r | |
388 | #define PINC do { \\r | |
389 | pfetch_prev = p; \\r | |
390 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
391 | } while (0)\r | |
392 | #define PFETCH(c) do { \\r | |
393 | c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r | |
394 | pfetch_prev = p; \\r | |
395 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
396 | } while (0)\r | |
397 | \r | |
398 | #define PINC_S do { \\r | |
399 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
400 | } while (0)\r | |
401 | #define PFETCH_S(c) do { \\r | |
402 | c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r | |
403 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
404 | } while (0)\r | |
405 | \r | |
406 | #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r | |
407 | #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r | |
408 | \r | |
409 | static UChar*\r | |
410 | strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r | |
b602265d | 411 | int capa, int oldCapa)\r |
14b0e578 CS |
412 | {\r |
413 | UChar* r;\r | |
414 | \r | |
415 | if (dest)\r | |
416 | r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r | |
417 | else\r | |
418 | r = (UChar* )xmalloc(capa + 1);\r | |
419 | \r | |
420 | CHECK_NULL_RETURN(r);\r | |
421 | onig_strcpy(r + (dest_end - dest), src, src_end);\r | |
422 | return r;\r | |
423 | }\r | |
424 | \r | |
425 | /* dest on static area */\r | |
426 | static UChar*\r | |
427 | strcat_capa_from_static(UChar* dest, UChar* dest_end,\r | |
b602265d | 428 | const UChar* src, const UChar* src_end, int capa)\r |
14b0e578 CS |
429 | {\r |
430 | UChar* r;\r | |
431 | \r | |
432 | r = (UChar* )xmalloc(capa + 1);\r | |
433 | CHECK_NULL_RETURN(r);\r | |
434 | onig_strcpy(r, dest, dest_end);\r | |
435 | onig_strcpy(r + (dest_end - dest), src, src_end);\r | |
436 | return r;\r | |
437 | }\r | |
438 | \r | |
439 | \r | |
440 | #ifdef USE_ST_LIBRARY\r | |
441 | \r | |
442 | typedef struct {\r | |
443 | UChar* s;\r | |
444 | UChar* end;\r | |
445 | } st_str_end_key;\r | |
446 | \r | |
447 | static int\r | |
448 | str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r | |
449 | {\r | |
450 | UChar *p, *q;\r | |
451 | int c;\r | |
452 | \r | |
453 | if ((x->end - x->s) != (y->end - y->s))\r | |
454 | return 1;\r | |
455 | \r | |
456 | p = x->s;\r | |
457 | q = y->s;\r | |
458 | while (p < x->end) {\r | |
459 | c = (int )*p - (int )*q;\r | |
460 | if (c != 0) return c;\r | |
461 | \r | |
462 | p++; q++;\r | |
463 | }\r | |
464 | \r | |
465 | return 0;\r | |
466 | }\r | |
467 | \r | |
468 | static int\r | |
469 | str_end_hash(st_str_end_key* x)\r | |
470 | {\r | |
471 | UChar *p;\r | |
472 | int val = 0;\r | |
473 | \r | |
474 | p = x->s;\r | |
475 | while (p < x->end) {\r | |
476 | val = val * 997 + (int )*p++;\r | |
477 | }\r | |
478 | \r | |
479 | return val + (val >> 5);\r | |
480 | }\r | |
481 | \r | |
482 | extern hash_table_type*\r | |
483 | onig_st_init_strend_table_with_size(int size)\r | |
484 | {\r | |
485 | static struct st_hash_type hashType = {\r | |
486 | str_end_cmp,\r | |
487 | str_end_hash,\r | |
488 | };\r | |
489 | \r | |
490 | return (hash_table_type* )\r | |
491 | onig_st_init_table_with_size(&hashType, size);\r | |
492 | }\r | |
493 | \r | |
494 | extern int\r | |
495 | onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r | |
b602265d | 496 | const UChar* end_key, hash_data_type *value)\r |
14b0e578 CS |
497 | {\r |
498 | st_str_end_key key;\r | |
499 | \r | |
500 | key.s = (UChar* )str_key;\r | |
501 | key.end = (UChar* )end_key;\r | |
502 | \r | |
b602265d | 503 | return onig_st_lookup(table, (st_data_t )(&key), value);\r |
14b0e578 CS |
504 | }\r |
505 | \r | |
506 | extern int\r | |
507 | onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r | |
b602265d | 508 | const UChar* end_key, hash_data_type value)\r |
14b0e578 CS |
509 | {\r |
510 | st_str_end_key* key;\r | |
511 | int result;\r | |
512 | \r | |
513 | key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r | |
b0c2b797 | 514 | CHECK_NULL_RETURN_MEMERR(key);\r |
b602265d | 515 | \r |
14b0e578 CS |
516 | key->s = (UChar* )str_key;\r |
517 | key->end = (UChar* )end_key;\r | |
b602265d | 518 | result = onig_st_insert(table, (st_data_t )key, value);\r |
14b0e578 CS |
519 | if (result) {\r |
520 | xfree(key);\r | |
521 | }\r | |
522 | return result;\r | |
523 | }\r | |
524 | \r | |
14b0e578 | 525 | \r |
b602265d DG |
526 | typedef struct {\r |
527 | OnigEncoding enc;\r | |
528 | int type; /* callout type: single or not */\r | |
529 | UChar* s;\r | |
530 | UChar* end;\r | |
531 | } st_callout_name_key;\r | |
532 | \r | |
533 | static int\r | |
534 | callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)\r | |
535 | {\r | |
536 | UChar *p, *q;\r | |
537 | int c;\r | |
538 | \r | |
539 | if (x->enc != y->enc) return 1;\r | |
540 | if (x->type != y->type) return 1;\r | |
541 | if ((x->end - x->s) != (y->end - y->s))\r | |
542 | return 1;\r | |
543 | \r | |
544 | p = x->s;\r | |
545 | q = y->s;\r | |
546 | while (p < x->end) {\r | |
547 | c = (int )*p - (int )*q;\r | |
548 | if (c != 0) return c;\r | |
549 | \r | |
550 | p++; q++;\r | |
551 | }\r | |
552 | \r | |
553 | return 0;\r | |
554 | }\r | |
555 | \r | |
556 | static int\r | |
557 | callout_name_table_hash(st_callout_name_key* x)\r | |
558 | {\r | |
559 | UChar *p;\r | |
560 | int val = 0;\r | |
561 | \r | |
562 | p = x->s;\r | |
563 | while (p < x->end) {\r | |
564 | val = val * 997 + (int )*p++;\r | |
565 | }\r | |
566 | \r | |
567 | /* use intptr_t for escape warning in Windows */\r | |
568 | return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;\r | |
569 | }\r | |
570 | \r | |
571 | extern hash_table_type*\r | |
572 | onig_st_init_callout_name_table_with_size(int size)\r | |
573 | {\r | |
574 | static struct st_hash_type hashType = {\r | |
575 | callout_name_table_cmp,\r | |
576 | callout_name_table_hash,\r | |
577 | };\r | |
578 | \r | |
579 | return (hash_table_type* )\r | |
580 | onig_st_init_table_with_size(&hashType, size);\r | |
581 | }\r | |
582 | \r | |
583 | extern int\r | |
584 | onig_st_lookup_callout_name_table(hash_table_type* table,\r | |
585 | OnigEncoding enc,\r | |
586 | int type,\r | |
587 | const UChar* str_key,\r | |
588 | const UChar* end_key,\r | |
589 | hash_data_type *value)\r | |
590 | {\r | |
591 | st_callout_name_key key;\r | |
592 | \r | |
593 | key.enc = enc;\r | |
594 | key.type = type;\r | |
595 | key.s = (UChar* )str_key;\r | |
596 | key.end = (UChar* )end_key;\r | |
597 | \r | |
598 | return onig_st_lookup(table, (st_data_t )(&key), value);\r | |
599 | }\r | |
600 | \r | |
601 | static int\r | |
602 | st_insert_callout_name_table(hash_table_type* table,\r | |
603 | OnigEncoding enc, int type,\r | |
604 | UChar* str_key, UChar* end_key,\r | |
605 | hash_data_type value)\r | |
606 | {\r | |
607 | st_callout_name_key* key;\r | |
608 | int result;\r | |
609 | \r | |
610 | key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));\r | |
611 | CHECK_NULL_RETURN_MEMERR(key);\r | |
612 | \r | |
613 | /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */\r | |
614 | key->enc = enc;\r | |
615 | key->type = type;\r | |
616 | key->s = str_key;\r | |
617 | key->end = end_key;\r | |
618 | result = onig_st_insert(table, (st_data_t )key, value);\r | |
619 | if (result) {\r | |
620 | xfree(key);\r | |
621 | }\r | |
622 | return result;\r | |
623 | }\r | |
624 | \r | |
625 | #endif /* USE_ST_LIBRARY */\r | |
14b0e578 | 626 | \r |
14b0e578 CS |
627 | \r |
628 | #define INIT_NAME_BACKREFS_ALLOC_NUM 8\r | |
629 | \r | |
630 | typedef struct {\r | |
631 | UChar* name;\r | |
632 | int name_len; /* byte length */\r | |
633 | int back_num; /* number of backrefs */\r | |
634 | int back_alloc;\r | |
635 | int back_ref1;\r | |
636 | int* back_refs;\r | |
637 | } NameEntry;\r | |
638 | \r | |
639 | #ifdef USE_ST_LIBRARY\r | |
640 | \r | |
b602265d DG |
641 | #define INIT_NAMES_ALLOC_NUM 5\r |
642 | \r | |
14b0e578 CS |
643 | typedef st_table NameTable;\r |
644 | typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r | |
645 | \r | |
646 | #define NAMEBUF_SIZE 24\r | |
647 | #define NAMEBUF_SIZE_1 25\r | |
648 | \r | |
649 | #ifdef ONIG_DEBUG\r | |
650 | static int\r | |
651 | i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r | |
652 | {\r | |
653 | int i;\r | |
654 | FILE* fp = (FILE* )arg;\r | |
655 | \r | |
656 | fprintf(fp, "%s: ", e->name);\r | |
657 | if (e->back_num == 0)\r | |
658 | fputs("-", fp);\r | |
659 | else if (e->back_num == 1)\r | |
660 | fprintf(fp, "%d", e->back_ref1);\r | |
661 | else {\r | |
662 | for (i = 0; i < e->back_num; i++) {\r | |
663 | if (i > 0) fprintf(fp, ", ");\r | |
664 | fprintf(fp, "%d", e->back_refs[i]);\r | |
665 | }\r | |
666 | }\r | |
667 | fputs("\n", fp);\r | |
668 | return ST_CONTINUE;\r | |
669 | }\r | |
670 | \r | |
671 | extern int\r | |
672 | onig_print_names(FILE* fp, regex_t* reg)\r | |
673 | {\r | |
674 | NameTable* t = (NameTable* )reg->name_table;\r | |
675 | \r | |
676 | if (IS_NOT_NULL(t)) {\r | |
677 | fprintf(fp, "name table\n");\r | |
678 | onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r | |
679 | fputs("\n", fp);\r | |
680 | }\r | |
681 | return 0;\r | |
682 | }\r | |
683 | #endif /* ONIG_DEBUG */\r | |
684 | \r | |
685 | static int\r | |
686 | i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r | |
687 | {\r | |
688 | xfree(e->name);\r | |
689 | if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r | |
690 | xfree(key);\r | |
691 | xfree(e);\r | |
692 | return ST_DELETE;\r | |
693 | }\r | |
694 | \r | |
695 | static int\r | |
696 | names_clear(regex_t* reg)\r | |
697 | {\r | |
698 | NameTable* t = (NameTable* )reg->name_table;\r | |
699 | \r | |
700 | if (IS_NOT_NULL(t)) {\r | |
701 | onig_st_foreach(t, i_free_name_entry, 0);\r | |
702 | }\r | |
703 | return 0;\r | |
704 | }\r | |
705 | \r | |
706 | extern int\r | |
707 | onig_names_free(regex_t* reg)\r | |
708 | {\r | |
709 | int r;\r | |
710 | NameTable* t;\r | |
711 | \r | |
712 | r = names_clear(reg);\r | |
b602265d | 713 | if (r != 0) return r;\r |
14b0e578 CS |
714 | \r |
715 | t = (NameTable* )reg->name_table;\r | |
716 | if (IS_NOT_NULL(t)) onig_st_free_table(t);\r | |
717 | reg->name_table = (void* )NULL;\r | |
718 | return 0;\r | |
719 | }\r | |
720 | \r | |
721 | static NameEntry*\r | |
722 | name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r | |
723 | {\r | |
724 | NameEntry* e;\r | |
725 | NameTable* t = (NameTable* )reg->name_table;\r | |
726 | \r | |
727 | e = (NameEntry* )NULL;\r | |
728 | if (IS_NOT_NULL(t)) {\r | |
729 | onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r | |
730 | }\r | |
731 | return e;\r | |
732 | }\r | |
733 | \r | |
734 | typedef struct {\r | |
735 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r | |
736 | regex_t* reg;\r | |
737 | void* arg;\r | |
738 | int ret;\r | |
739 | OnigEncoding enc;\r | |
740 | } INamesArg;\r | |
741 | \r | |
742 | static int\r | |
743 | i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r | |
744 | {\r | |
745 | int r = (*(arg->func))(e->name,\r | |
746 | e->name + e->name_len,\r | |
747 | e->back_num,\r | |
b602265d DG |
748 | (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r |
749 | arg->reg, arg->arg);\r | |
14b0e578 CS |
750 | if (r != 0) {\r |
751 | arg->ret = r;\r | |
752 | return ST_STOP;\r | |
753 | }\r | |
754 | return ST_CONTINUE;\r | |
755 | }\r | |
756 | \r | |
757 | extern int\r | |
758 | onig_foreach_name(regex_t* reg,\r | |
759 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r | |
760 | {\r | |
761 | INamesArg narg;\r | |
762 | NameTable* t = (NameTable* )reg->name_table;\r | |
763 | \r | |
764 | narg.ret = 0;\r | |
765 | if (IS_NOT_NULL(t)) {\r | |
766 | narg.func = func;\r | |
767 | narg.reg = reg;\r | |
768 | narg.arg = arg;\r | |
769 | narg.enc = reg->enc; /* should be pattern encoding. */\r | |
b602265d | 770 | onig_st_foreach(t, i_names, (HashDataType )&narg);\r |
14b0e578 CS |
771 | }\r |
772 | return narg.ret;\r | |
773 | }\r | |
774 | \r | |
775 | static int\r | |
776 | i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r | |
777 | {\r | |
778 | int i;\r | |
779 | \r | |
780 | if (e->back_num > 1) {\r | |
781 | for (i = 0; i < e->back_num; i++) {\r | |
782 | e->back_refs[i] = map[e->back_refs[i]].new_val;\r | |
783 | }\r | |
784 | }\r | |
785 | else if (e->back_num == 1) {\r | |
786 | e->back_ref1 = map[e->back_ref1].new_val;\r | |
787 | }\r | |
788 | \r | |
789 | return ST_CONTINUE;\r | |
790 | }\r | |
791 | \r | |
792 | extern int\r | |
793 | onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r | |
794 | {\r | |
795 | NameTable* t = (NameTable* )reg->name_table;\r | |
796 | \r | |
797 | if (IS_NOT_NULL(t)) {\r | |
b602265d | 798 | onig_st_foreach(t, i_renumber_name, (HashDataType )map);\r |
14b0e578 CS |
799 | }\r |
800 | return 0;\r | |
801 | }\r | |
802 | \r | |
803 | \r | |
804 | extern int\r | |
805 | onig_number_of_names(regex_t* reg)\r | |
806 | {\r | |
807 | NameTable* t = (NameTable* )reg->name_table;\r | |
808 | \r | |
809 | if (IS_NOT_NULL(t))\r | |
810 | return t->num_entries;\r | |
811 | else\r | |
812 | return 0;\r | |
813 | }\r | |
814 | \r | |
815 | #else /* USE_ST_LIBRARY */\r | |
816 | \r | |
817 | #define INIT_NAMES_ALLOC_NUM 8\r | |
818 | \r | |
819 | typedef struct {\r | |
820 | NameEntry* e;\r | |
821 | int num;\r | |
822 | int alloc;\r | |
823 | } NameTable;\r | |
824 | \r | |
825 | #ifdef ONIG_DEBUG\r | |
826 | extern int\r | |
827 | onig_print_names(FILE* fp, regex_t* reg)\r | |
828 | {\r | |
829 | int i, j;\r | |
830 | NameEntry* e;\r | |
831 | NameTable* t = (NameTable* )reg->name_table;\r | |
832 | \r | |
833 | if (IS_NOT_NULL(t) && t->num > 0) {\r | |
834 | fprintf(fp, "name table\n");\r | |
835 | for (i = 0; i < t->num; i++) {\r | |
836 | e = &(t->e[i]);\r | |
837 | fprintf(fp, "%s: ", e->name);\r | |
838 | if (e->back_num == 0) {\r | |
b602265d | 839 | fputs("-", fp);\r |
14b0e578 CS |
840 | }\r |
841 | else if (e->back_num == 1) {\r | |
b602265d | 842 | fprintf(fp, "%d", e->back_ref1);\r |
14b0e578 CS |
843 | }\r |
844 | else {\r | |
b602265d DG |
845 | for (j = 0; j < e->back_num; j++) {\r |
846 | if (j > 0) fprintf(fp, ", ");\r | |
847 | fprintf(fp, "%d", e->back_refs[j]);\r | |
848 | }\r | |
14b0e578 CS |
849 | }\r |
850 | fputs("\n", fp);\r | |
851 | }\r | |
852 | fputs("\n", fp);\r | |
853 | }\r | |
854 | return 0;\r | |
855 | }\r | |
856 | #endif\r | |
857 | \r | |
858 | static int\r | |
859 | names_clear(regex_t* reg)\r | |
860 | {\r | |
861 | int i;\r | |
862 | NameEntry* e;\r | |
863 | NameTable* t = (NameTable* )reg->name_table;\r | |
864 | \r | |
865 | if (IS_NOT_NULL(t)) {\r | |
866 | for (i = 0; i < t->num; i++) {\r | |
867 | e = &(t->e[i]);\r | |
868 | if (IS_NOT_NULL(e->name)) {\r | |
b602265d DG |
869 | xfree(e->name);\r |
870 | e->name = NULL;\r | |
871 | e->name_len = 0;\r | |
872 | e->back_num = 0;\r | |
873 | e->back_alloc = 0;\r | |
874 | if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r | |
875 | e->back_refs = (int* )NULL;\r | |
14b0e578 CS |
876 | }\r |
877 | }\r | |
878 | if (IS_NOT_NULL(t->e)) {\r | |
879 | xfree(t->e);\r | |
880 | t->e = NULL;\r | |
881 | }\r | |
882 | t->num = 0;\r | |
883 | }\r | |
884 | return 0;\r | |
885 | }\r | |
886 | \r | |
887 | extern int\r | |
888 | onig_names_free(regex_t* reg)\r | |
889 | {\r | |
890 | int r;\r | |
891 | NameTable* t;\r | |
892 | \r | |
893 | r = names_clear(reg);\r | |
b602265d | 894 | if (r != 0) return r;\r |
14b0e578 CS |
895 | \r |
896 | t = (NameTable* )reg->name_table;\r | |
897 | if (IS_NOT_NULL(t)) xfree(t);\r | |
898 | reg->name_table = NULL;\r | |
899 | return 0;\r | |
900 | }\r | |
901 | \r | |
902 | static NameEntry*\r | |
903 | name_find(regex_t* reg, UChar* name, UChar* name_end)\r | |
904 | {\r | |
905 | int i, len;\r | |
906 | NameEntry* e;\r | |
907 | NameTable* t = (NameTable* )reg->name_table;\r | |
908 | \r | |
909 | if (IS_NOT_NULL(t)) {\r | |
910 | len = name_end - name;\r | |
911 | for (i = 0; i < t->num; i++) {\r | |
912 | e = &(t->e[i]);\r | |
913 | if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r | |
b602265d | 914 | return e;\r |
14b0e578 CS |
915 | }\r |
916 | }\r | |
917 | return (NameEntry* )NULL;\r | |
918 | }\r | |
919 | \r | |
920 | extern int\r | |
921 | onig_foreach_name(regex_t* reg,\r | |
922 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r | |
923 | {\r | |
924 | int i, r;\r | |
925 | NameEntry* e;\r | |
926 | NameTable* t = (NameTable* )reg->name_table;\r | |
927 | \r | |
928 | if (IS_NOT_NULL(t)) {\r | |
929 | for (i = 0; i < t->num; i++) {\r | |
930 | e = &(t->e[i]);\r | |
931 | r = (*func)(e->name, e->name + e->name_len, e->back_num,\r | |
b602265d DG |
932 | (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r |
933 | reg, arg);\r | |
14b0e578 CS |
934 | if (r != 0) return r;\r |
935 | }\r | |
936 | }\r | |
937 | return 0;\r | |
938 | }\r | |
939 | \r | |
940 | extern int\r | |
941 | onig_number_of_names(regex_t* reg)\r | |
942 | {\r | |
943 | NameTable* t = (NameTable* )reg->name_table;\r | |
944 | \r | |
945 | if (IS_NOT_NULL(t))\r | |
946 | return t->num;\r | |
947 | else\r | |
948 | return 0;\r | |
949 | }\r | |
950 | \r | |
951 | #endif /* else USE_ST_LIBRARY */\r | |
952 | \r | |
953 | static int\r | |
954 | name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r | |
955 | {\r | |
b602265d | 956 | int r;\r |
14b0e578 CS |
957 | int alloc;\r |
958 | NameEntry* e;\r | |
959 | NameTable* t = (NameTable* )reg->name_table;\r | |
960 | \r | |
961 | if (name_end - name <= 0)\r | |
962 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
963 | \r | |
964 | e = name_find(reg, name, name_end);\r | |
965 | if (IS_NULL(e)) {\r | |
966 | #ifdef USE_ST_LIBRARY\r | |
967 | if (IS_NULL(t)) {\r | |
b602265d | 968 | t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);\r |
a5def177 | 969 | CHECK_NULL_RETURN_MEMERR(t);\r |
14b0e578 CS |
970 | reg->name_table = (void* )t;\r |
971 | }\r | |
972 | e = (NameEntry* )xmalloc(sizeof(NameEntry));\r | |
973 | CHECK_NULL_RETURN_MEMERR(e);\r | |
974 | \r | |
b602265d | 975 | e->name = onigenc_strdup(reg->enc, name, name_end);\r |
14b0e578 CS |
976 | if (IS_NULL(e->name)) {\r |
977 | xfree(e); return ONIGERR_MEMORY;\r | |
978 | }\r | |
b602265d DG |
979 | r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r |
980 | (HashDataType )e);\r | |
981 | if (r < 0) return r;\r | |
14b0e578 | 982 | \r |
b602265d | 983 | e->name_len = (int )(name_end - name);\r |
14b0e578 CS |
984 | e->back_num = 0;\r |
985 | e->back_alloc = 0;\r | |
986 | e->back_refs = (int* )NULL;\r | |
987 | \r | |
988 | #else\r | |
989 | \r | |
990 | if (IS_NULL(t)) {\r | |
991 | alloc = INIT_NAMES_ALLOC_NUM;\r | |
992 | t = (NameTable* )xmalloc(sizeof(NameTable));\r | |
993 | CHECK_NULL_RETURN_MEMERR(t);\r | |
994 | t->e = NULL;\r | |
995 | t->alloc = 0;\r | |
996 | t->num = 0;\r | |
997 | \r | |
998 | t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r | |
999 | if (IS_NULL(t->e)) {\r | |
b602265d DG |
1000 | xfree(t);\r |
1001 | return ONIGERR_MEMORY;\r | |
14b0e578 CS |
1002 | }\r |
1003 | t->alloc = alloc;\r | |
1004 | reg->name_table = t;\r | |
1005 | goto clear;\r | |
1006 | }\r | |
1007 | else if (t->num == t->alloc) {\r | |
1008 | int i;\r | |
1009 | \r | |
1010 | alloc = t->alloc * 2;\r | |
b602265d | 1011 | t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);\r |
14b0e578 CS |
1012 | CHECK_NULL_RETURN_MEMERR(t->e);\r |
1013 | t->alloc = alloc;\r | |
1014 | \r | |
1015 | clear:\r | |
1016 | for (i = t->num; i < t->alloc; i++) {\r | |
b602265d DG |
1017 | t->e[i].name = NULL;\r |
1018 | t->e[i].name_len = 0;\r | |
1019 | t->e[i].back_num = 0;\r | |
1020 | t->e[i].back_alloc = 0;\r | |
1021 | t->e[i].back_refs = (int* )NULL;\r | |
14b0e578 CS |
1022 | }\r |
1023 | }\r | |
1024 | e = &(t->e[t->num]);\r | |
1025 | t->num++;\r | |
b602265d | 1026 | e->name = onigenc_strdup(reg->enc, name, name_end);\r |
14b0e578 CS |
1027 | if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r |
1028 | e->name_len = name_end - name;\r | |
1029 | #endif\r | |
1030 | }\r | |
1031 | \r | |
1032 | if (e->back_num >= 1 &&\r | |
1033 | ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r | |
1034 | onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r | |
b602265d | 1035 | name, name_end);\r |
14b0e578 CS |
1036 | return ONIGERR_MULTIPLEX_DEFINED_NAME;\r |
1037 | }\r | |
1038 | \r | |
1039 | e->back_num++;\r | |
1040 | if (e->back_num == 1) {\r | |
1041 | e->back_ref1 = backref;\r | |
1042 | }\r | |
1043 | else {\r | |
1044 | if (e->back_num == 2) {\r | |
1045 | alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r | |
1046 | e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r | |
1047 | CHECK_NULL_RETURN_MEMERR(e->back_refs);\r | |
1048 | e->back_alloc = alloc;\r | |
1049 | e->back_refs[0] = e->back_ref1;\r | |
1050 | e->back_refs[1] = backref;\r | |
1051 | }\r | |
1052 | else {\r | |
1053 | if (e->back_num > e->back_alloc) {\r | |
b602265d DG |
1054 | alloc = e->back_alloc * 2;\r |
1055 | e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r | |
1056 | CHECK_NULL_RETURN_MEMERR(e->back_refs);\r | |
1057 | e->back_alloc = alloc;\r | |
14b0e578 CS |
1058 | }\r |
1059 | e->back_refs[e->back_num - 1] = backref;\r | |
1060 | }\r | |
1061 | }\r | |
1062 | \r | |
1063 | return 0;\r | |
1064 | }\r | |
1065 | \r | |
1066 | extern int\r | |
1067 | onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r | |
b602265d | 1068 | const UChar* name_end, int** nums)\r |
14b0e578 CS |
1069 | {\r |
1070 | NameEntry* e = name_find(reg, name, name_end);\r | |
1071 | \r | |
1072 | if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
1073 | \r | |
1074 | switch (e->back_num) {\r | |
1075 | case 0:\r | |
1076 | break;\r | |
1077 | case 1:\r | |
1078 | *nums = &(e->back_ref1);\r | |
1079 | break;\r | |
1080 | default:\r | |
1081 | *nums = e->back_refs;\r | |
1082 | break;\r | |
1083 | }\r | |
1084 | return e->back_num;\r | |
1085 | }\r | |
1086 | \r | |
1087 | extern int\r | |
1088 | onig_name_to_backref_number(regex_t* reg, const UChar* name,\r | |
b602265d | 1089 | const UChar* name_end, OnigRegion *region)\r |
14b0e578 CS |
1090 | {\r |
1091 | int i, n, *nums;\r | |
1092 | \r | |
1093 | n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r | |
1094 | if (n < 0)\r | |
1095 | return n;\r | |
1096 | else if (n == 0)\r | |
1097 | return ONIGERR_PARSER_BUG;\r | |
1098 | else if (n == 1)\r | |
1099 | return nums[0];\r | |
1100 | else {\r | |
1101 | if (IS_NOT_NULL(region)) {\r | |
1102 | for (i = n - 1; i >= 0; i--) {\r | |
b602265d DG |
1103 | if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r |
1104 | return nums[i];\r | |
14b0e578 CS |
1105 | }\r |
1106 | }\r | |
1107 | return nums[n - 1];\r | |
1108 | }\r | |
1109 | }\r | |
1110 | \r | |
14b0e578 CS |
1111 | extern int\r |
1112 | onig_noname_group_capture_is_active(regex_t* reg)\r | |
1113 | {\r | |
1114 | if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r | |
1115 | return 0;\r | |
1116 | \r | |
14b0e578 CS |
1117 | if (onig_number_of_names(reg) > 0 &&\r |
1118 | IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r | |
1119 | !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r | |
1120 | return 0;\r | |
1121 | }\r | |
14b0e578 CS |
1122 | \r |
1123 | return 1;\r | |
1124 | }\r | |
1125 | \r | |
b602265d | 1126 | #ifdef USE_CALLOUT\r |
14b0e578 | 1127 | \r |
b602265d DG |
1128 | typedef struct {\r |
1129 | OnigCalloutType type;\r | |
1130 | int in;\r | |
1131 | OnigCalloutFunc start_func;\r | |
1132 | OnigCalloutFunc end_func;\r | |
1133 | int arg_num;\r | |
1134 | int opt_arg_num;\r | |
1135 | unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
1136 | OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
1137 | UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */\r | |
1138 | } CalloutNameListEntry;\r | |
14b0e578 | 1139 | \r |
b602265d DG |
1140 | typedef struct {\r |
1141 | int n;\r | |
1142 | int alloc;\r | |
1143 | CalloutNameListEntry* v;\r | |
1144 | } CalloutNameListType;\r | |
14b0e578 | 1145 | \r |
b602265d | 1146 | static CalloutNameListType* GlobalCalloutNameList;\r |
14b0e578 CS |
1147 | \r |
1148 | static int\r | |
b602265d | 1149 | make_callout_func_list(CalloutNameListType** rs, int init_size)\r |
14b0e578 | 1150 | {\r |
b602265d DG |
1151 | CalloutNameListType* s;\r |
1152 | CalloutNameListEntry* v;\r | |
14b0e578 | 1153 | \r |
b602265d | 1154 | *rs = 0;\r |
14b0e578 | 1155 | \r |
b602265d DG |
1156 | s = xmalloc(sizeof(*s));\r |
1157 | if (IS_NULL(s)) return ONIGERR_MEMORY;\r | |
14b0e578 | 1158 | \r |
b602265d DG |
1159 | v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);\r |
1160 | if (IS_NULL(v)) {\r | |
1161 | xfree(s);\r | |
1162 | return ONIGERR_MEMORY;\r | |
14b0e578 CS |
1163 | }\r |
1164 | \r | |
b602265d DG |
1165 | s->n = 0;\r |
1166 | s->alloc = init_size;\r | |
1167 | s->v = v;\r | |
14b0e578 | 1168 | \r |
b602265d DG |
1169 | *rs = s;\r |
1170 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1171 | }\r |
1172 | \r | |
b602265d DG |
1173 | static void\r |
1174 | free_callout_func_list(CalloutNameListType* s)\r | |
1175 | {\r | |
1176 | if (IS_NOT_NULL(s)) {\r | |
1177 | if (IS_NOT_NULL(s->v)) {\r | |
1178 | int i, j;\r | |
1179 | \r | |
1180 | for (i = 0; i < s->n; i++) {\r | |
1181 | CalloutNameListEntry* e = s->v + i;\r | |
1182 | for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {\r | |
1183 | if (e->arg_types[j] == ONIG_TYPE_STRING) {\r | |
1184 | UChar* p = e->opt_defaults[j].s.start;\r | |
1185 | if (IS_NOT_NULL(p)) xfree(p);\r | |
1186 | }\r | |
1187 | }\r | |
1188 | }\r | |
1189 | xfree(s->v);\r | |
1190 | }\r | |
1191 | xfree(s);\r | |
1192 | }\r | |
1193 | }\r | |
14b0e578 | 1194 | \r |
b602265d DG |
1195 | static int\r |
1196 | callout_func_list_add(CalloutNameListType* s, int* rid)\r | |
1197 | {\r | |
1198 | if (s->n >= s->alloc) {\r | |
1199 | int new_size = s->alloc * 2;\r | |
1200 | CalloutNameListEntry* nv = (CalloutNameListEntry* )\r | |
1201 | xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);\r | |
1202 | if (IS_NULL(nv)) return ONIGERR_MEMORY;\r | |
14b0e578 | 1203 | \r |
b602265d DG |
1204 | s->alloc = new_size;\r |
1205 | s->v = nv;\r | |
1206 | }\r | |
14b0e578 | 1207 | \r |
b602265d | 1208 | *rid = s->n;\r |
14b0e578 | 1209 | \r |
b602265d DG |
1210 | xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));\r |
1211 | s->n++;\r | |
1212 | return ONIG_NORMAL;\r | |
1213 | }\r | |
14b0e578 | 1214 | \r |
14b0e578 | 1215 | \r |
b602265d DG |
1216 | typedef struct {\r |
1217 | UChar* name;\r | |
1218 | int name_len; /* byte length */\r | |
1219 | int id;\r | |
1220 | } CalloutNameEntry;\r | |
14b0e578 | 1221 | \r |
b602265d DG |
1222 | #ifdef USE_ST_LIBRARY\r |
1223 | typedef st_table CalloutNameTable;\r | |
14b0e578 | 1224 | #else\r |
b602265d DG |
1225 | typedef struct {\r |
1226 | CalloutNameEntry* e;\r | |
1227 | int num;\r | |
1228 | int alloc;\r | |
1229 | } CalloutNameTable;\r | |
14b0e578 | 1230 | #endif\r |
14b0e578 | 1231 | \r |
b602265d DG |
1232 | static CalloutNameTable* GlobalCalloutNameTable;\r |
1233 | static int CalloutNameIDCounter;\r | |
14b0e578 | 1234 | \r |
b602265d | 1235 | #ifdef USE_ST_LIBRARY\r |
14b0e578 | 1236 | \r |
b602265d DG |
1237 | static int\r |
1238 | i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,\r | |
1239 | void* arg ARG_UNUSED)\r | |
1240 | {\r | |
1241 | xfree(e->name);\r | |
1242 | /*xfree(key->s); */ /* is same as e->name */\r | |
1243 | xfree(key);\r | |
1244 | xfree(e);\r | |
1245 | return ST_DELETE;\r | |
1246 | }\r | |
14b0e578 | 1247 | \r |
b602265d DG |
1248 | static int\r |
1249 | callout_name_table_clear(CalloutNameTable* t)\r | |
1250 | {\r | |
1251 | if (IS_NOT_NULL(t)) {\r | |
1252 | onig_st_foreach(t, i_free_callout_name_entry, 0);\r | |
1253 | }\r | |
1254 | return 0;\r | |
1255 | }\r | |
14b0e578 | 1256 | \r |
b602265d DG |
1257 | static int\r |
1258 | global_callout_name_table_free(void)\r | |
1259 | {\r | |
1260 | if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r | |
1261 | int r = callout_name_table_clear(GlobalCalloutNameTable);\r | |
1262 | if (r != 0) return r;\r | |
14b0e578 | 1263 | \r |
b602265d DG |
1264 | onig_st_free_table(GlobalCalloutNameTable);\r |
1265 | GlobalCalloutNameTable = 0;\r | |
1266 | CalloutNameIDCounter = 0;\r | |
14b0e578 CS |
1267 | }\r |
1268 | \r | |
b602265d DG |
1269 | return 0;\r |
1270 | }\r | |
1271 | \r | |
1272 | static CalloutNameEntry*\r | |
1273 | callout_name_find(OnigEncoding enc, int is_not_single,\r | |
1274 | const UChar* name, const UChar* name_end)\r | |
1275 | {\r | |
1276 | int r;\r | |
1277 | CalloutNameEntry* e;\r | |
1278 | CalloutNameTable* t = GlobalCalloutNameTable;\r | |
14b0e578 | 1279 | \r |
b602265d DG |
1280 | e = (CalloutNameEntry* )NULL;\r |
1281 | if (IS_NOT_NULL(t)) {\r | |
1282 | r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r | |
1283 | (HashDataType* )((void* )(&e)));\r | |
1284 | if (r == 0) { /* not found */\r | |
1285 | if (enc != ONIG_ENCODING_ASCII &&\r | |
1286 | ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r | |
1287 | enc = ONIG_ENCODING_ASCII;\r | |
1288 | onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r | |
1289 | (HashDataType* )((void* )(&e)));\r | |
1290 | }\r | |
1291 | }\r | |
14b0e578 | 1292 | }\r |
b602265d DG |
1293 | return e;\r |
1294 | }\r | |
1295 | \r | |
14b0e578 | 1296 | #else\r |
b602265d DG |
1297 | \r |
1298 | static int\r | |
1299 | callout_name_table_clear(CalloutNameTable* t)\r | |
1300 | {\r | |
1301 | int i;\r | |
1302 | CalloutNameEntry* e;\r | |
1303 | \r | |
1304 | if (IS_NOT_NULL(t)) {\r | |
1305 | for (i = 0; i < t->num; i++) {\r | |
1306 | e = &(t->e[i]);\r | |
1307 | if (IS_NOT_NULL(e->name)) {\r | |
1308 | xfree(e->name);\r | |
1309 | e->name = NULL;\r | |
1310 | e->name_len = 0;\r | |
1311 | e->id = 0;\r | |
1312 | e->func = 0;\r | |
1313 | }\r | |
1314 | }\r | |
1315 | if (IS_NOT_NULL(t->e)) {\r | |
1316 | xfree(t->e);\r | |
1317 | t->e = NULL;\r | |
1318 | }\r | |
1319 | t->num = 0;\r | |
1320 | }\r | |
1321 | return 0;\r | |
14b0e578 CS |
1322 | }\r |
1323 | \r | |
b602265d DG |
1324 | static int\r |
1325 | global_callout_name_table_free(void)\r | |
14b0e578 | 1326 | {\r |
b602265d DG |
1327 | if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r |
1328 | int r = callout_name_table_clear(GlobalCalloutNameTable);\r | |
1329 | if (r != 0) return r;\r | |
14b0e578 | 1330 | \r |
b602265d DG |
1331 | xfree(GlobalCalloutNameTable);\r |
1332 | GlobalCalloutNameTable = 0;\r | |
1333 | CalloutNameIDCounter = 0;\r | |
14b0e578 | 1334 | }\r |
14b0e578 CS |
1335 | return 0;\r |
1336 | }\r | |
14b0e578 | 1337 | \r |
b602265d DG |
1338 | static CalloutNameEntry*\r |
1339 | callout_name_find(UChar* name, UChar* name_end)\r | |
14b0e578 | 1340 | {\r |
b602265d DG |
1341 | int i, len;\r |
1342 | CalloutNameEntry* e;\r | |
1343 | CalloutNameTable* t = Calloutnames;\r | |
14b0e578 | 1344 | \r |
b602265d DG |
1345 | if (IS_NOT_NULL(t)) {\r |
1346 | len = name_end - name;\r | |
1347 | for (i = 0; i < t->num; i++) {\r | |
1348 | e = &(t->e[i]);\r | |
1349 | if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r | |
1350 | return e;\r | |
1351 | }\r | |
14b0e578 | 1352 | }\r |
b602265d DG |
1353 | return (CalloutNameEntry* )NULL;\r |
1354 | }\r | |
1355 | \r | |
14b0e578 CS |
1356 | #endif\r |
1357 | \r | |
b602265d DG |
1358 | /* name string must be single byte char string. */\r |
1359 | static int\r | |
1360 | callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,\r | |
1361 | int is_not_single, UChar* name, UChar* name_end)\r | |
1362 | {\r | |
1363 | int r;\r | |
1364 | CalloutNameEntry* e;\r | |
1365 | CalloutNameTable* t = GlobalCalloutNameTable;\r | |
14b0e578 | 1366 | \r |
b602265d DG |
1367 | *rentry = 0;\r |
1368 | if (name_end - name <= 0)\r | |
1369 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
14b0e578 | 1370 | \r |
b602265d DG |
1371 | e = callout_name_find(enc, is_not_single, name, name_end);\r |
1372 | if (IS_NULL(e)) {\r | |
1373 | #ifdef USE_ST_LIBRARY\r | |
1374 | if (IS_NULL(t)) {\r | |
1375 | t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);\r | |
a5def177 | 1376 | CHECK_NULL_RETURN_MEMERR(t);\r |
b602265d DG |
1377 | GlobalCalloutNameTable = t;\r |
1378 | }\r | |
1379 | e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));\r | |
1380 | CHECK_NULL_RETURN_MEMERR(e);\r | |
1381 | \r | |
1382 | e->name = onigenc_strdup(enc, name, name_end);\r | |
1383 | if (IS_NULL(e->name)) {\r | |
1384 | xfree(e); return ONIGERR_MEMORY;\r | |
1385 | }\r | |
1386 | \r | |
1387 | r = st_insert_callout_name_table(t, enc, is_not_single,\r | |
1388 | e->name, (e->name + (name_end - name)),\r | |
1389 | (HashDataType )e);\r | |
1390 | if (r < 0) return r;\r | |
1391 | \r | |
1392 | #else\r | |
1393 | \r | |
1394 | int alloc;\r | |
1395 | \r | |
1396 | if (IS_NULL(t)) {\r | |
1397 | alloc = INIT_NAMES_ALLOC_NUM;\r | |
1398 | t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));\r | |
1399 | CHECK_NULL_RETURN_MEMERR(t);\r | |
1400 | t->e = NULL;\r | |
1401 | t->alloc = 0;\r | |
1402 | t->num = 0;\r | |
1403 | \r | |
1404 | t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);\r | |
1405 | if (IS_NULL(t->e)) {\r | |
1406 | xfree(t);\r | |
1407 | return ONIGERR_MEMORY;\r | |
1408 | }\r | |
1409 | t->alloc = alloc;\r | |
1410 | GlobalCalloutNameTable = t;\r | |
1411 | goto clear;\r | |
1412 | }\r | |
1413 | else if (t->num == t->alloc) {\r | |
1414 | int i;\r | |
1415 | \r | |
1416 | alloc = t->alloc * 2;\r | |
1417 | t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);\r | |
1418 | CHECK_NULL_RETURN_MEMERR(t->e);\r | |
1419 | t->alloc = alloc;\r | |
1420 | \r | |
1421 | clear:\r | |
1422 | for (i = t->num; i < t->alloc; i++) {\r | |
1423 | t->e[i].name = NULL;\r | |
1424 | t->e[i].name_len = 0;\r | |
1425 | t->e[i].id = 0;\r | |
1426 | }\r | |
1427 | }\r | |
1428 | e = &(t->e[t->num]);\r | |
1429 | t->num++;\r | |
1430 | e->name = onigenc_strdup(enc, name, name_end);\r | |
1431 | if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r | |
1432 | #endif\r | |
1433 | \r | |
1434 | CalloutNameIDCounter++;\r | |
1435 | e->id = CalloutNameIDCounter;\r | |
1436 | e->name_len = (int )(name_end - name);\r | |
1437 | }\r | |
1438 | \r | |
1439 | *rentry = e;\r | |
1440 | return e->id;\r | |
1441 | }\r | |
1442 | \r | |
1443 | static int\r | |
1444 | is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)\r | |
14b0e578 | 1445 | {\r |
b602265d DG |
1446 | UChar* p;\r |
1447 | OnigCodePoint c;\r | |
1448 | \r | |
1449 | if (name >= name_end) return 0;\r | |
1450 | \r | |
1451 | p = name;\r | |
1452 | while (p < name_end) {\r | |
1453 | c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r | |
1454 | if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))\r | |
1455 | return 0;\r | |
1456 | \r | |
1457 | if (p == name) {\r | |
1458 | if (c >= '0' && c <= '9') return 0;\r | |
1459 | }\r | |
1460 | \r | |
1461 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
1462 | }\r | |
1463 | \r | |
1464 | return 1;\r | |
14b0e578 CS |
1465 | }\r |
1466 | \r | |
b602265d DG |
1467 | static int\r |
1468 | is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)\r | |
14b0e578 | 1469 | {\r |
b602265d DG |
1470 | UChar* p;\r |
1471 | OnigCodePoint c;\r | |
14b0e578 | 1472 | \r |
b602265d DG |
1473 | if (name >= name_end) return 0;\r |
1474 | \r | |
1475 | p = name;\r | |
1476 | while (p < name_end) {\r | |
1477 | c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r | |
1478 | if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))\r | |
1479 | return 0;\r | |
1480 | \r | |
1481 | if (p == name) {\r | |
1482 | if (c >= '0' && c <= '9') return 0;\r | |
1483 | }\r | |
1484 | \r | |
1485 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
1486 | }\r | |
1487 | \r | |
1488 | return 1;\r | |
14b0e578 CS |
1489 | }\r |
1490 | \r | |
b602265d DG |
1491 | extern int\r |
1492 | onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,\r | |
1493 | UChar* name, UChar* name_end, int in,\r | |
1494 | OnigCalloutFunc start_func,\r | |
1495 | OnigCalloutFunc end_func,\r | |
1496 | int arg_num, unsigned int arg_types[],\r | |
1497 | int opt_arg_num, OnigValue opt_defaults[])\r | |
14b0e578 | 1498 | {\r |
b602265d DG |
1499 | int r;\r |
1500 | int i;\r | |
1501 | int j;\r | |
1502 | int id;\r | |
1503 | int is_not_single;\r | |
1504 | CalloutNameEntry* e;\r | |
1505 | CalloutNameListEntry* fe;\r | |
14b0e578 | 1506 | \r |
b602265d DG |
1507 | if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)\r |
1508 | return ONIGERR_INVALID_ARGUMENT;\r | |
14b0e578 | 1509 | \r |
b602265d DG |
1510 | if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)\r |
1511 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 1512 | \r |
b602265d DG |
1513 | if (opt_arg_num < 0 || opt_arg_num > arg_num)\r |
1514 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 1515 | \r |
b602265d DG |
1516 | if (start_func == 0 && end_func == 0)\r |
1517 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1518 | \r | |
1519 | if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)\r | |
1520 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1521 | \r | |
1522 | for (i = 0; i < arg_num; i++) {\r | |
1523 | unsigned int t = arg_types[i];\r | |
1524 | if (t == ONIG_TYPE_VOID)\r | |
1525 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1526 | else {\r | |
1527 | if (i >= arg_num - opt_arg_num) {\r | |
1528 | if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&\r | |
1529 | t != ONIG_TYPE_TAG)\r | |
1530 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1531 | }\r | |
1532 | else {\r | |
1533 | if (t != ONIG_TYPE_LONG) {\r | |
1534 | t = t & ~ONIG_TYPE_LONG;\r | |
1535 | if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)\r | |
1536 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1537 | }\r | |
14b0e578 CS |
1538 | }\r |
1539 | }\r | |
1540 | }\r | |
1541 | \r | |
b602265d DG |
1542 | if (! is_allowed_callout_name(enc, name, name_end)) {\r |
1543 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
14b0e578 | 1544 | }\r |
14b0e578 | 1545 | \r |
b602265d DG |
1546 | is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);\r |
1547 | id = callout_name_entry(&e, enc, is_not_single, name, name_end);\r | |
1548 | if (id < 0) return id;\r | |
14b0e578 | 1549 | \r |
b602265d DG |
1550 | r = ONIG_NORMAL;\r |
1551 | if (IS_NULL(GlobalCalloutNameList)) {\r | |
1552 | r = make_callout_func_list(&GlobalCalloutNameList, 10);\r | |
1553 | if (r != ONIG_NORMAL) return r;\r | |
1554 | }\r | |
14b0e578 | 1555 | \r |
b602265d DG |
1556 | while (id >= GlobalCalloutNameList->n) {\r |
1557 | int rid;\r | |
1558 | r = callout_func_list_add(GlobalCalloutNameList, &rid);\r | |
1559 | if (r != ONIG_NORMAL) return r;\r | |
14b0e578 CS |
1560 | }\r |
1561 | \r | |
b602265d DG |
1562 | fe = GlobalCalloutNameList->v + id;\r |
1563 | fe->type = callout_type;\r | |
1564 | fe->in = in;\r | |
1565 | fe->start_func = start_func;\r | |
1566 | fe->end_func = end_func;\r | |
1567 | fe->arg_num = arg_num;\r | |
1568 | fe->opt_arg_num = opt_arg_num;\r | |
1569 | fe->name = e->name;\r | |
14b0e578 | 1570 | \r |
b602265d DG |
1571 | for (i = 0; i < arg_num; i++) {\r |
1572 | fe->arg_types[i] = arg_types[i];\r | |
1573 | }\r | |
1574 | for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {\r | |
6d665168 | 1575 | if(IS_NULL(opt_defaults))return ONIGERR_INVALID_ARGUMENT;\r |
b602265d DG |
1576 | if (fe->arg_types[i] == ONIG_TYPE_STRING) {\r |
1577 | OnigValue* val = opt_defaults + j;\r | |
1578 | UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);\r | |
1579 | CHECK_NULL_RETURN_MEMERR(ds);\r | |
14b0e578 | 1580 | \r |
b602265d DG |
1581 | fe->opt_defaults[i].s.start = ds;\r |
1582 | fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);\r | |
1583 | }\r | |
1584 | else {\r | |
1585 | fe->opt_defaults[i] = opt_defaults[j];\r | |
1586 | }\r | |
1587 | }\r | |
1588 | \r | |
1589 | r = id;\r | |
1590 | return r;\r | |
14b0e578 CS |
1591 | }\r |
1592 | \r | |
b602265d DG |
1593 | static int\r |
1594 | get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,\r | |
1595 | UChar* name, UChar* name_end, int* rid)\r | |
14b0e578 | 1596 | {\r |
b602265d DG |
1597 | int r;\r |
1598 | CalloutNameEntry* e;\r | |
14b0e578 | 1599 | \r |
b602265d DG |
1600 | if (! is_allowed_callout_name(enc, name, name_end)) {\r |
1601 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
1602 | }\r | |
1603 | \r | |
1604 | e = callout_name_find(enc, is_not_single, name, name_end);\r | |
1605 | if (IS_NULL(e)) {\r | |
1606 | return ONIGERR_UNDEFINED_CALLOUT_NAME;\r | |
1607 | }\r | |
1608 | \r | |
1609 | r = ONIG_NORMAL;\r | |
1610 | *rid = e->id;\r | |
1611 | \r | |
1612 | return r;\r | |
14b0e578 CS |
1613 | }\r |
1614 | \r | |
b602265d DG |
1615 | extern OnigCalloutFunc\r |
1616 | onig_get_callout_start_func(regex_t* reg, int callout_num)\r | |
14b0e578 | 1617 | {\r |
b602265d DG |
1618 | /* If used for callouts of contents, return 0. */\r |
1619 | CalloutListEntry* e;\r | |
14b0e578 | 1620 | \r |
b602265d | 1621 | e = onig_reg_callout_list_at(reg, callout_num);\r |
a5def177 | 1622 | CHECK_NULL_RETURN(e);\r |
b602265d | 1623 | return e->start_func;\r |
14b0e578 CS |
1624 | }\r |
1625 | \r | |
b602265d DG |
1626 | extern const UChar*\r |
1627 | onig_get_callout_tag_start(regex_t* reg, int callout_num)\r | |
14b0e578 | 1628 | {\r |
b602265d | 1629 | CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r |
a5def177 | 1630 | CHECK_NULL_RETURN(e);\r |
b602265d | 1631 | return e->tag_start;\r |
14b0e578 CS |
1632 | }\r |
1633 | \r | |
b602265d DG |
1634 | extern const UChar*\r |
1635 | onig_get_callout_tag_end(regex_t* reg, int callout_num)\r | |
14b0e578 | 1636 | {\r |
b602265d | 1637 | CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r |
a5def177 | 1638 | CHECK_NULL_RETURN(e);\r |
b602265d DG |
1639 | return e->tag_end;\r |
1640 | }\r | |
14b0e578 | 1641 | \r |
14b0e578 | 1642 | \r |
b602265d DG |
1643 | extern OnigCalloutType\r |
1644 | onig_get_callout_type_by_name_id(int name_id)\r | |
1645 | {\r | |
1646 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r | |
1647 | return 0;\r | |
14b0e578 | 1648 | \r |
b602265d | 1649 | return GlobalCalloutNameList->v[name_id].type;\r |
14b0e578 CS |
1650 | }\r |
1651 | \r | |
b602265d DG |
1652 | extern OnigCalloutFunc\r |
1653 | onig_get_callout_start_func_by_name_id(int name_id)\r | |
14b0e578 | 1654 | {\r |
b602265d DG |
1655 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1656 | return 0;\r | |
14b0e578 | 1657 | \r |
b602265d | 1658 | return GlobalCalloutNameList->v[name_id].start_func;\r |
14b0e578 CS |
1659 | }\r |
1660 | \r | |
b602265d DG |
1661 | extern OnigCalloutFunc\r |
1662 | onig_get_callout_end_func_by_name_id(int name_id)\r | |
14b0e578 | 1663 | {\r |
b602265d DG |
1664 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1665 | return 0;\r | |
14b0e578 | 1666 | \r |
b602265d | 1667 | return GlobalCalloutNameList->v[name_id].end_func;\r |
14b0e578 CS |
1668 | }\r |
1669 | \r | |
b602265d DG |
1670 | extern int\r |
1671 | onig_get_callout_in_by_name_id(int name_id)\r | |
14b0e578 | 1672 | {\r |
b602265d DG |
1673 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1674 | return 0;\r | |
14b0e578 | 1675 | \r |
b602265d DG |
1676 | return GlobalCalloutNameList->v[name_id].in;\r |
1677 | }\r | |
14b0e578 | 1678 | \r |
b602265d DG |
1679 | static int\r |
1680 | get_callout_arg_num_by_name_id(int name_id)\r | |
1681 | {\r | |
1682 | return GlobalCalloutNameList->v[name_id].arg_num;\r | |
1683 | }\r | |
14b0e578 | 1684 | \r |
b602265d DG |
1685 | static int\r |
1686 | get_callout_opt_arg_num_by_name_id(int name_id)\r | |
14b0e578 | 1687 | {\r |
b602265d | 1688 | return GlobalCalloutNameList->v[name_id].opt_arg_num;\r |
14b0e578 | 1689 | }\r |
14b0e578 | 1690 | \r |
b602265d DG |
1691 | static unsigned int\r |
1692 | get_callout_arg_type_by_name_id(int name_id, int index)\r | |
14b0e578 | 1693 | {\r |
b602265d | 1694 | return GlobalCalloutNameList->v[name_id].arg_types[index];\r |
14b0e578 CS |
1695 | }\r |
1696 | \r | |
b602265d DG |
1697 | static OnigValue\r |
1698 | get_callout_opt_default_by_name_id(int name_id, int index)\r | |
14b0e578 | 1699 | {\r |
b602265d | 1700 | return GlobalCalloutNameList->v[name_id].opt_defaults[index];\r |
14b0e578 CS |
1701 | }\r |
1702 | \r | |
b602265d DG |
1703 | extern UChar*\r |
1704 | onig_get_callout_name_by_name_id(int name_id)\r | |
14b0e578 | 1705 | {\r |
b602265d DG |
1706 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1707 | return 0;\r | |
1708 | \r | |
1709 | return GlobalCalloutNameList->v[name_id].name;\r | |
14b0e578 CS |
1710 | }\r |
1711 | \r | |
b602265d DG |
1712 | extern int\r |
1713 | onig_global_callout_names_free(void)\r | |
14b0e578 | 1714 | {\r |
b602265d DG |
1715 | free_callout_func_list(GlobalCalloutNameList);\r |
1716 | GlobalCalloutNameList = 0;\r | |
14b0e578 | 1717 | \r |
b602265d DG |
1718 | global_callout_name_table_free();\r |
1719 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1720 | }\r |
1721 | \r | |
14b0e578 | 1722 | \r |
b602265d DG |
1723 | typedef st_table CalloutTagTable;\r |
1724 | typedef intptr_t CalloutTagVal;\r | |
14b0e578 | 1725 | \r |
b602265d | 1726 | #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)\r |
14b0e578 | 1727 | \r |
b602265d DG |
1728 | static int\r |
1729 | i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)\r | |
1730 | {\r | |
1731 | int num;\r | |
1732 | RegexExt* ext = (RegexExt* )arg;\r | |
14b0e578 | 1733 | \r |
b602265d DG |
1734 | num = (int )e - 1;\r |
1735 | ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;\r | |
1736 | return ST_CONTINUE;\r | |
1737 | }\r | |
14b0e578 | 1738 | \r |
b602265d DG |
1739 | static int\r |
1740 | setup_ext_callout_list_values(regex_t* reg)\r | |
1741 | {\r | |
1742 | int i, j;\r | |
1743 | RegexExt* ext;\r | |
1744 | \r | |
1745 | ext = REG_EXTP(reg);\r | |
1746 | if (IS_NOT_NULL(ext->tag_table)) {\r | |
1747 | onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,\r | |
1748 | (st_data_t )ext);\r | |
1749 | }\r | |
1750 | \r | |
1751 | for (i = 0; i < ext->callout_num; i++) {\r | |
1752 | CalloutListEntry* e = ext->callout_list + i;\r | |
1753 | if (e->of == ONIG_CALLOUT_OF_NAME) {\r | |
1754 | for (j = 0; j < e->u.arg.num; j++) {\r | |
1755 | if (e->u.arg.types[j] == ONIG_TYPE_TAG) {\r | |
1756 | UChar* start;\r | |
1757 | UChar* end;\r | |
1758 | int num;\r | |
1759 | start = e->u.arg.vals[j].s.start;\r | |
1760 | end = e->u.arg.vals[j].s.end;\r | |
1761 | num = onig_get_callout_num_by_tag(reg, start, end);\r | |
1762 | if (num < 0) return num;\r | |
1763 | e->u.arg.vals[j].tag = num;\r | |
1764 | }\r | |
14b0e578 CS |
1765 | }\r |
1766 | }\r | |
14b0e578 CS |
1767 | }\r |
1768 | \r | |
b602265d | 1769 | return ONIG_NORMAL;\r |
14b0e578 CS |
1770 | }\r |
1771 | \r | |
1772 | extern int\r | |
b602265d | 1773 | onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)\r |
14b0e578 | 1774 | {\r |
b602265d | 1775 | RegexExt* ext = REG_EXTP(reg);\r |
14b0e578 | 1776 | \r |
b602265d DG |
1777 | if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;\r |
1778 | if (callout_num > ext->callout_num) return 0;\r | |
14b0e578 | 1779 | \r |
b602265d DG |
1780 | return (ext->callout_list[callout_num].flag &\r |
1781 | CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;\r | |
14b0e578 CS |
1782 | }\r |
1783 | \r | |
b602265d DG |
1784 | static int\r |
1785 | i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)\r | |
14b0e578 | 1786 | {\r |
b602265d DG |
1787 | xfree(key);\r |
1788 | return ST_DELETE;\r | |
14b0e578 CS |
1789 | }\r |
1790 | \r | |
b602265d DG |
1791 | static int\r |
1792 | callout_tag_table_clear(CalloutTagTable* t)\r | |
14b0e578 | 1793 | {\r |
b602265d DG |
1794 | if (IS_NOT_NULL(t)) {\r |
1795 | onig_st_foreach(t, i_free_callout_tag_entry, 0);\r | |
14b0e578 | 1796 | }\r |
b602265d | 1797 | return 0;\r |
14b0e578 CS |
1798 | }\r |
1799 | \r | |
b602265d DG |
1800 | extern int\r |
1801 | onig_callout_tag_table_free(void* table)\r | |
14b0e578 | 1802 | {\r |
b602265d | 1803 | CalloutTagTable* t = (CalloutTagTable* )table;\r |
14b0e578 | 1804 | \r |
b602265d DG |
1805 | if (IS_NOT_NULL(t)) {\r |
1806 | int r = callout_tag_table_clear(t);\r | |
1807 | if (r != 0) return r;\r | |
14b0e578 | 1808 | \r |
b602265d DG |
1809 | onig_st_free_table(t);\r |
1810 | }\r | |
14b0e578 | 1811 | \r |
b602265d | 1812 | return 0;\r |
14b0e578 CS |
1813 | }\r |
1814 | \r | |
b602265d DG |
1815 | extern int\r |
1816 | onig_get_callout_num_by_tag(regex_t* reg,\r | |
1817 | const UChar* tag, const UChar* tag_end)\r | |
14b0e578 | 1818 | {\r |
b602265d DG |
1819 | int r;\r |
1820 | RegexExt* ext;\r | |
1821 | CalloutTagVal e;\r | |
14b0e578 | 1822 | \r |
b602265d DG |
1823 | ext = REG_EXTP(reg);\r |
1824 | if (IS_NULL(ext) || IS_NULL(ext->tag_table))\r | |
1825 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 1826 | \r |
b602265d DG |
1827 | r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,\r |
1828 | (HashDataType* )((void* )(&e)));\r | |
1829 | if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
1830 | return (int )e;\r | |
14b0e578 CS |
1831 | }\r |
1832 | \r | |
b602265d DG |
1833 | static CalloutTagVal\r |
1834 | callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)\r | |
14b0e578 | 1835 | {\r |
b602265d | 1836 | CalloutTagVal e;\r |
14b0e578 | 1837 | \r |
b602265d DG |
1838 | e = -1;\r |
1839 | if (IS_NOT_NULL(t)) {\r | |
1840 | onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r | |
14b0e578 | 1841 | }\r |
b602265d | 1842 | return e;\r |
14b0e578 CS |
1843 | }\r |
1844 | \r | |
1845 | static int\r | |
b602265d | 1846 | callout_tag_table_new(CalloutTagTable** rt)\r |
14b0e578 | 1847 | {\r |
b602265d DG |
1848 | CalloutTagTable* t;\r |
1849 | \r | |
1850 | *rt = 0;\r | |
1851 | t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);\r | |
1852 | CHECK_NULL_RETURN_MEMERR(t);\r | |
1853 | \r | |
1854 | *rt = t;\r | |
1855 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1856 | }\r |
1857 | \r | |
14b0e578 | 1858 | static int\r |
b602265d DG |
1859 | callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,\r |
1860 | CalloutTagVal entry_val)\r | |
14b0e578 | 1861 | {\r |
b602265d DG |
1862 | int r;\r |
1863 | CalloutTagVal val;\r | |
14b0e578 | 1864 | \r |
b602265d DG |
1865 | if (name_end - name <= 0)\r |
1866 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 1867 | \r |
b602265d DG |
1868 | val = callout_tag_find(t, name, name_end);\r |
1869 | if (val >= 0)\r | |
1870 | return ONIGERR_MULTIPLEX_DEFINED_NAME;\r | |
14b0e578 | 1871 | \r |
b602265d DG |
1872 | r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);\r |
1873 | if (r < 0) return r;\r | |
14b0e578 | 1874 | \r |
b602265d | 1875 | return ONIG_NORMAL;\r |
14b0e578 CS |
1876 | }\r |
1877 | \r | |
1878 | static int\r | |
b602265d | 1879 | ext_ensure_tag_table(regex_t* reg)\r |
14b0e578 | 1880 | {\r |
b602265d DG |
1881 | int r;\r |
1882 | RegexExt* ext;\r | |
1883 | CalloutTagTable* t;\r | |
14b0e578 | 1884 | \r |
b602265d DG |
1885 | ext = onig_get_regex_ext(reg);\r |
1886 | CHECK_NULL_RETURN_MEMERR(ext);\r | |
14b0e578 | 1887 | \r |
b602265d DG |
1888 | if (IS_NULL(ext->tag_table)) {\r |
1889 | r = callout_tag_table_new(&t);\r | |
1890 | if (r != ONIG_NORMAL) return r;\r | |
1891 | \r | |
1892 | ext->tag_table = t;\r | |
14b0e578 | 1893 | }\r |
b602265d DG |
1894 | \r |
1895 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1896 | }\r |
1897 | \r | |
1898 | static int\r | |
b602265d DG |
1899 | callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,\r |
1900 | CalloutTagVal entry_val)\r | |
14b0e578 | 1901 | {\r |
b602265d DG |
1902 | int r;\r |
1903 | RegexExt* ext;\r | |
1904 | CalloutListEntry* e;\r | |
14b0e578 | 1905 | \r |
b602265d DG |
1906 | r = ext_ensure_tag_table(reg);\r |
1907 | if (r != ONIG_NORMAL) return r;\r | |
14b0e578 | 1908 | \r |
b602265d | 1909 | ext = onig_get_regex_ext(reg);\r |
df8be9e5 | 1910 | CHECK_NULL_RETURN_MEMERR(ext);\r |
b602265d | 1911 | r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);\r |
14b0e578 | 1912 | \r |
b602265d | 1913 | e = onig_reg_callout_list_at(reg, (int )entry_val);\r |
a5def177 | 1914 | CHECK_NULL_RETURN_MEMERR(e);\r |
b602265d DG |
1915 | e->tag_start = name;\r |
1916 | e->tag_end = name_end;\r | |
14b0e578 | 1917 | \r |
b602265d DG |
1918 | return r;\r |
1919 | }\r | |
14b0e578 | 1920 | \r |
b602265d | 1921 | #endif /* USE_CALLOUT */\r |
14b0e578 | 1922 | \r |
14b0e578 | 1923 | \r |
b602265d | 1924 | #define INIT_SCANENV_MEMENV_ALLOC_SIZE 16\r |
14b0e578 | 1925 | \r |
b602265d DG |
1926 | static void\r |
1927 | scan_env_clear(ScanEnv* env)\r | |
14b0e578 | 1928 | {\r |
b602265d DG |
1929 | MEM_STATUS_CLEAR(env->capture_history);\r |
1930 | MEM_STATUS_CLEAR(env->bt_mem_start);\r | |
1931 | MEM_STATUS_CLEAR(env->bt_mem_end);\r | |
1932 | MEM_STATUS_CLEAR(env->backrefed_mem);\r | |
1933 | env->error = (UChar* )NULL;\r | |
1934 | env->error_end = (UChar* )NULL;\r | |
1935 | env->num_call = 0;\r | |
14b0e578 | 1936 | \r |
b602265d DG |
1937 | #ifdef USE_CALL\r |
1938 | env->unset_addr_list = NULL;\r | |
1939 | env->has_call_zero = 0;\r | |
1940 | #endif\r | |
14b0e578 | 1941 | \r |
b602265d DG |
1942 | env->num_mem = 0;\r |
1943 | env->num_named = 0;\r | |
1944 | env->mem_alloc = 0;\r | |
1945 | env->mem_env_dynamic = (MemEnv* )NULL;\r | |
14b0e578 | 1946 | \r |
b602265d | 1947 | xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));\r |
14b0e578 | 1948 | \r |
b602265d DG |
1949 | env->parse_depth = 0;\r |
1950 | env->keep_num = 0;\r | |
1951 | env->save_num = 0;\r | |
1952 | env->save_alloc_num = 0;\r | |
1953 | env->saves = 0;\r | |
1954 | }\r | |
14b0e578 | 1955 | \r |
b602265d DG |
1956 | static int\r |
1957 | scan_env_add_mem_entry(ScanEnv* env)\r | |
1958 | {\r | |
1959 | int i, need, alloc;\r | |
1960 | MemEnv* p;\r | |
14b0e578 | 1961 | \r |
b602265d DG |
1962 | need = env->num_mem + 1;\r |
1963 | if (need > MaxCaptureNum && MaxCaptureNum != 0)\r | |
1964 | return ONIGERR_TOO_MANY_CAPTURES;\r | |
14b0e578 | 1965 | \r |
b602265d DG |
1966 | if (need >= SCANENV_MEMENV_SIZE) {\r |
1967 | if (env->mem_alloc <= need) {\r | |
1968 | if (IS_NULL(env->mem_env_dynamic)) {\r | |
1969 | alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;\r | |
1970 | p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);\r | |
1971 | CHECK_NULL_RETURN_MEMERR(p);\r | |
1972 | xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));\r | |
1973 | }\r | |
1974 | else {\r | |
1975 | alloc = env->mem_alloc * 2;\r | |
1976 | p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);\r | |
1977 | CHECK_NULL_RETURN_MEMERR(p);\r | |
1978 | }\r | |
14b0e578 | 1979 | \r |
b602265d DG |
1980 | for (i = env->num_mem + 1; i < alloc; i++) {\r |
1981 | p[i].node = NULL_NODE;\r | |
1982 | #if 0\r | |
1983 | p[i].in = 0;\r | |
1984 | p[i].recursion = 0;\r | |
1985 | #endif\r | |
1986 | }\r | |
1987 | \r | |
1988 | env->mem_env_dynamic = p;\r | |
1989 | env->mem_alloc = alloc;\r | |
14b0e578 CS |
1990 | }\r |
1991 | }\r | |
1992 | \r | |
b602265d DG |
1993 | env->num_mem++;\r |
1994 | return env->num_mem;\r | |
14b0e578 CS |
1995 | }\r |
1996 | \r | |
1997 | static int\r | |
b602265d | 1998 | scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r |
14b0e578 | 1999 | {\r |
b602265d DG |
2000 | if (env->num_mem >= num)\r |
2001 | SCANENV_MEMENV(env)[num].node = node;\r | |
2002 | else\r | |
2003 | return ONIGERR_PARSER_BUG;\r | |
2004 | return 0;\r | |
14b0e578 CS |
2005 | }\r |
2006 | \r | |
b602265d DG |
2007 | extern void\r |
2008 | onig_node_free(Node* node)\r | |
14b0e578 | 2009 | {\r |
b602265d DG |
2010 | start:\r |
2011 | if (IS_NULL(node)) return ;\r | |
14b0e578 | 2012 | \r |
b602265d DG |
2013 | #ifdef DEBUG_NODE_FREE\r |
2014 | fprintf(stderr, "onig_node_free: %p\n", node);\r | |
2015 | #endif\r | |
14b0e578 | 2016 | \r |
b602265d DG |
2017 | switch (NODE_TYPE(node)) {\r |
2018 | case NODE_STRING:\r | |
2019 | if (STR_(node)->capa != 0 &&\r | |
2020 | IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r | |
2021 | xfree(STR_(node)->s);\r | |
2022 | }\r | |
2023 | break;\r | |
14b0e578 | 2024 | \r |
b602265d DG |
2025 | case NODE_LIST:\r |
2026 | case NODE_ALT:\r | |
2027 | onig_node_free(NODE_CAR(node));\r | |
2028 | {\r | |
2029 | Node* next_node = NODE_CDR(node);\r | |
2030 | \r | |
2031 | xfree(node);\r | |
2032 | node = next_node;\r | |
2033 | goto start;\r | |
14b0e578 | 2034 | }\r |
b602265d | 2035 | break;\r |
14b0e578 | 2036 | \r |
b602265d DG |
2037 | case NODE_CCLASS:\r |
2038 | {\r | |
2039 | CClassNode* cc = CCLASS_(node);\r | |
14b0e578 | 2040 | \r |
b602265d DG |
2041 | if (cc->mbuf)\r |
2042 | bbuf_free(cc->mbuf);\r | |
2043 | }\r | |
2044 | break;\r | |
14b0e578 | 2045 | \r |
b602265d DG |
2046 | case NODE_BACKREF:\r |
2047 | if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))\r | |
2048 | xfree(BACKREF_(node)->back_dynamic);\r | |
2049 | break;\r | |
14b0e578 | 2050 | \r |
b602265d DG |
2051 | case NODE_ENCLOSURE:\r |
2052 | if (NODE_BODY(node))\r | |
2053 | onig_node_free(NODE_BODY(node));\r | |
14b0e578 | 2054 | \r |
b602265d DG |
2055 | {\r |
2056 | EnclosureNode* en = ENCLOSURE_(node);\r | |
2057 | if (en->type == ENCLOSURE_IF_ELSE) {\r | |
2058 | onig_node_free(en->te.Then);\r | |
2059 | onig_node_free(en->te.Else);\r | |
14b0e578 CS |
2060 | }\r |
2061 | }\r | |
b602265d | 2062 | break;\r |
14b0e578 | 2063 | \r |
b602265d DG |
2064 | case NODE_QUANT:\r |
2065 | case NODE_ANCHOR:\r | |
2066 | if (NODE_BODY(node))\r | |
2067 | onig_node_free(NODE_BODY(node));\r | |
2068 | break;\r | |
14b0e578 | 2069 | \r |
b602265d DG |
2070 | case NODE_CTYPE:\r |
2071 | case NODE_CALL:\r | |
2072 | case NODE_GIMMICK:\r | |
2073 | break;\r | |
14b0e578 | 2074 | }\r |
14b0e578 | 2075 | \r |
b602265d | 2076 | xfree(node);\r |
14b0e578 CS |
2077 | }\r |
2078 | \r | |
b602265d DG |
2079 | static void\r |
2080 | cons_node_free_alone(Node* node)\r | |
14b0e578 | 2081 | {\r |
b602265d DG |
2082 | NODE_CAR(node) = 0;\r |
2083 | NODE_CDR(node) = 0;\r | |
2084 | onig_node_free(node);\r | |
14b0e578 CS |
2085 | }\r |
2086 | \r | |
b602265d DG |
2087 | static Node*\r |
2088 | node_new(void)\r | |
14b0e578 | 2089 | {\r |
b602265d | 2090 | Node* node;\r |
14b0e578 | 2091 | \r |
b602265d | 2092 | node = (Node* )xmalloc(sizeof(Node));\r |
df8be9e5 | 2093 | CHECK_NULL_RETURN(node);\r |
b602265d | 2094 | xmemset(node, 0, sizeof(*node));\r |
14b0e578 | 2095 | \r |
b602265d DG |
2096 | #ifdef DEBUG_NODE_FREE\r |
2097 | fprintf(stderr, "node_new: %p\n", node);\r | |
2098 | #endif\r | |
2099 | return node;\r | |
2100 | }\r | |
14b0e578 | 2101 | \r |
14b0e578 | 2102 | \r |
b602265d DG |
2103 | static void\r |
2104 | initialize_cclass(CClassNode* cc)\r | |
2105 | {\r | |
2106 | BITSET_CLEAR(cc->bs);\r | |
2107 | cc->flags = 0;\r | |
2108 | cc->mbuf = NULL;\r | |
2109 | }\r | |
2110 | \r | |
2111 | static Node*\r | |
2112 | node_new_cclass(void)\r | |
2113 | {\r | |
2114 | Node* node = node_new();\r | |
2115 | CHECK_NULL_RETURN(node);\r | |
2116 | \r | |
2117 | NODE_SET_TYPE(node, NODE_CCLASS);\r | |
2118 | initialize_cclass(CCLASS_(node));\r | |
2119 | return node;\r | |
2120 | }\r | |
2121 | \r | |
2122 | static Node*\r | |
2123 | node_new_ctype(int type, int not, OnigOptionType options)\r | |
2124 | {\r | |
2125 | Node* node = node_new();\r | |
2126 | CHECK_NULL_RETURN(node);\r | |
2127 | \r | |
2128 | NODE_SET_TYPE(node, NODE_CTYPE);\r | |
2129 | CTYPE_(node)->ctype = type;\r | |
2130 | CTYPE_(node)->not = not;\r | |
2131 | CTYPE_(node)->options = options;\r | |
2132 | CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);\r | |
2133 | return node;\r | |
2134 | }\r | |
2135 | \r | |
2136 | static Node*\r | |
2137 | node_new_anychar(void)\r | |
2138 | {\r | |
2139 | Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);\r | |
2140 | return node;\r | |
2141 | }\r | |
2142 | \r | |
2143 | static Node*\r | |
2144 | node_new_anychar_with_fixed_option(OnigOptionType option)\r | |
2145 | {\r | |
2146 | CtypeNode* ct;\r | |
2147 | Node* node;\r | |
2148 | \r | |
2149 | node = node_new_anychar();\r | |
a5def177 DG |
2150 | CHECK_NULL_RETURN(node);\r |
2151 | \r | |
b602265d DG |
2152 | ct = CTYPE_(node);\r |
2153 | ct->options = option;\r | |
2154 | NODE_STATUS_ADD(node, FIXED_OPTION);\r | |
2155 | return node;\r | |
2156 | }\r | |
2157 | \r | |
2158 | static int\r | |
2159 | node_new_no_newline(Node** node, ScanEnv* env)\r | |
2160 | {\r | |
2161 | Node* n;\r | |
2162 | \r | |
2163 | n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);\r | |
2164 | CHECK_NULL_RETURN_MEMERR(n);\r | |
2165 | *node = n;\r | |
2166 | return 0;\r | |
2167 | }\r | |
2168 | \r | |
2169 | static int\r | |
2170 | node_new_true_anychar(Node** node, ScanEnv* env)\r | |
2171 | {\r | |
2172 | Node* n;\r | |
2173 | \r | |
2174 | n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);\r | |
2175 | CHECK_NULL_RETURN_MEMERR(n);\r | |
2176 | *node = n;\r | |
2177 | return 0;\r | |
2178 | }\r | |
2179 | \r | |
2180 | static Node*\r | |
2181 | node_new_list(Node* left, Node* right)\r | |
2182 | {\r | |
2183 | Node* node = node_new();\r | |
2184 | CHECK_NULL_RETURN(node);\r | |
2185 | \r | |
2186 | NODE_SET_TYPE(node, NODE_LIST);\r | |
2187 | NODE_CAR(node) = left;\r | |
2188 | NODE_CDR(node) = right;\r | |
2189 | return node;\r | |
2190 | }\r | |
2191 | \r | |
2192 | extern Node*\r | |
2193 | onig_node_new_list(Node* left, Node* right)\r | |
2194 | {\r | |
2195 | return node_new_list(left, right);\r | |
2196 | }\r | |
2197 | \r | |
2198 | extern Node*\r | |
2199 | onig_node_list_add(Node* list, Node* x)\r | |
2200 | {\r | |
2201 | Node *n;\r | |
2202 | \r | |
2203 | n = onig_node_new_list(x, NULL);\r | |
2204 | if (IS_NULL(n)) return NULL_NODE;\r | |
2205 | \r | |
2206 | if (IS_NOT_NULL(list)) {\r | |
2207 | while (IS_NOT_NULL(NODE_CDR(list)))\r | |
2208 | list = NODE_CDR(list);\r | |
2209 | \r | |
2210 | NODE_CDR(list) = n;\r | |
2211 | }\r | |
2212 | \r | |
2213 | return n;\r | |
2214 | }\r | |
2215 | \r | |
2216 | extern Node*\r | |
2217 | onig_node_new_alt(Node* left, Node* right)\r | |
2218 | {\r | |
2219 | Node* node = node_new();\r | |
2220 | CHECK_NULL_RETURN(node);\r | |
2221 | \r | |
2222 | NODE_SET_TYPE(node, NODE_ALT);\r | |
2223 | NODE_CAR(node) = left;\r | |
2224 | NODE_CDR(node) = right;\r | |
2225 | return node;\r | |
2226 | }\r | |
2227 | \r | |
2228 | static Node*\r | |
2229 | make_list_or_alt(NodeType type, int n, Node* ns[])\r | |
2230 | {\r | |
2231 | Node* r;\r | |
2232 | \r | |
2233 | if (n <= 0) return NULL_NODE;\r | |
2234 | \r | |
2235 | if (n == 1) {\r | |
2236 | r = node_new();\r | |
2237 | CHECK_NULL_RETURN(r);\r | |
2238 | NODE_SET_TYPE(r, type);\r | |
2239 | NODE_CAR(r) = ns[0];\r | |
2240 | NODE_CDR(r) = NULL_NODE;\r | |
2241 | }\r | |
2242 | else {\r | |
2243 | Node* right;\r | |
2244 | \r | |
2245 | r = node_new();\r | |
2246 | CHECK_NULL_RETURN(r);\r | |
2247 | \r | |
2248 | right = make_list_or_alt(type, n - 1, ns + 1);\r | |
2249 | if (IS_NULL(right)) {\r | |
2250 | onig_node_free(r);\r | |
2251 | return NULL_NODE;\r | |
2252 | }\r | |
2253 | \r | |
2254 | NODE_SET_TYPE(r, type);\r | |
2255 | NODE_CAR(r) = ns[0];\r | |
2256 | NODE_CDR(r) = right;\r | |
2257 | }\r | |
2258 | \r | |
2259 | return r;\r | |
2260 | }\r | |
2261 | \r | |
2262 | static Node*\r | |
2263 | make_list(int n, Node* ns[])\r | |
2264 | {\r | |
2265 | return make_list_or_alt(NODE_LIST, n, ns);\r | |
2266 | }\r | |
2267 | \r | |
2268 | static Node*\r | |
2269 | make_alt(int n, Node* ns[])\r | |
2270 | {\r | |
2271 | return make_list_or_alt(NODE_ALT, n, ns);\r | |
2272 | }\r | |
2273 | \r | |
2274 | extern Node*\r | |
2275 | onig_node_new_anchor(int type, int ascii_mode)\r | |
2276 | {\r | |
2277 | Node* node = node_new();\r | |
2278 | CHECK_NULL_RETURN(node);\r | |
2279 | \r | |
2280 | NODE_SET_TYPE(node, NODE_ANCHOR);\r | |
2281 | ANCHOR_(node)->type = type;\r | |
2282 | ANCHOR_(node)->char_len = -1;\r | |
2283 | ANCHOR_(node)->ascii_mode = ascii_mode;\r | |
2284 | return node;\r | |
2285 | }\r | |
2286 | \r | |
2287 | static Node*\r | |
2288 | node_new_backref(int back_num, int* backrefs, int by_name,\r | |
2289 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2290 | int exist_level, int nest_level,\r | |
2291 | #endif\r | |
2292 | ScanEnv* env)\r | |
2293 | {\r | |
2294 | int i;\r | |
2295 | Node* node = node_new();\r | |
2296 | \r | |
2297 | CHECK_NULL_RETURN(node);\r | |
2298 | \r | |
2299 | NODE_SET_TYPE(node, NODE_BACKREF);\r | |
2300 | BACKREF_(node)->back_num = back_num;\r | |
2301 | BACKREF_(node)->back_dynamic = (int* )NULL;\r | |
2302 | if (by_name != 0)\r | |
2303 | NODE_STATUS_ADD(node, BY_NAME);\r | |
2304 | \r | |
2305 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2306 | if (exist_level != 0) {\r | |
2307 | NODE_STATUS_ADD(node, NEST_LEVEL);\r | |
2308 | BACKREF_(node)->nest_level = nest_level;\r | |
2309 | }\r | |
2310 | #endif\r | |
2311 | \r | |
2312 | for (i = 0; i < back_num; i++) {\r | |
2313 | if (backrefs[i] <= env->num_mem &&\r | |
2314 | IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {\r | |
2315 | NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */\r | |
2316 | break;\r | |
2317 | }\r | |
2318 | }\r | |
2319 | \r | |
2320 | if (back_num <= NODE_BACKREFS_SIZE) {\r | |
2321 | for (i = 0; i < back_num; i++)\r | |
2322 | BACKREF_(node)->back_static[i] = backrefs[i];\r | |
2323 | }\r | |
2324 | else {\r | |
2325 | int* p = (int* )xmalloc(sizeof(int) * back_num);\r | |
2326 | if (IS_NULL(p)) {\r | |
2327 | onig_node_free(node);\r | |
2328 | return NULL;\r | |
2329 | }\r | |
2330 | BACKREF_(node)->back_dynamic = p;\r | |
2331 | for (i = 0; i < back_num; i++)\r | |
2332 | p[i] = backrefs[i];\r | |
2333 | }\r | |
2334 | return node;\r | |
2335 | }\r | |
2336 | \r | |
2337 | static Node*\r | |
2338 | node_new_backref_checker(int back_num, int* backrefs, int by_name,\r | |
2339 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2340 | int exist_level, int nest_level,\r | |
2341 | #endif\r | |
2342 | ScanEnv* env)\r | |
2343 | {\r | |
2344 | Node* node;\r | |
2345 | \r | |
2346 | node = node_new_backref(back_num, backrefs, by_name,\r | |
2347 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2348 | exist_level, nest_level,\r | |
2349 | #endif\r | |
2350 | env);\r | |
2351 | CHECK_NULL_RETURN(node);\r | |
2352 | \r | |
2353 | NODE_STATUS_ADD(node, CHECKER);\r | |
2354 | return node;\r | |
2355 | }\r | |
2356 | \r | |
2357 | #ifdef USE_CALL\r | |
2358 | static Node*\r | |
2359 | node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)\r | |
2360 | {\r | |
2361 | Node* node = node_new();\r | |
2362 | CHECK_NULL_RETURN(node);\r | |
2363 | \r | |
2364 | NODE_SET_TYPE(node, NODE_CALL);\r | |
2365 | CALL_(node)->by_number = by_number;\r | |
2366 | CALL_(node)->name = name;\r | |
2367 | CALL_(node)->name_end = name_end;\r | |
2368 | CALL_(node)->group_num = gnum;\r | |
2369 | CALL_(node)->entry_count = 1;\r | |
2370 | return node;\r | |
2371 | }\r | |
2372 | #endif\r | |
2373 | \r | |
2374 | static Node*\r | |
2375 | node_new_quantifier(int lower, int upper, int by_number)\r | |
2376 | {\r | |
2377 | Node* node = node_new();\r | |
2378 | CHECK_NULL_RETURN(node);\r | |
2379 | \r | |
2380 | NODE_SET_TYPE(node, NODE_QUANT);\r | |
2381 | QUANT_(node)->lower = lower;\r | |
2382 | QUANT_(node)->upper = upper;\r | |
2383 | QUANT_(node)->greedy = 1;\r | |
2384 | QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;\r | |
2385 | QUANT_(node)->head_exact = NULL_NODE;\r | |
2386 | QUANT_(node)->next_head_exact = NULL_NODE;\r | |
2387 | QUANT_(node)->is_refered = 0;\r | |
2388 | if (by_number != 0)\r | |
2389 | NODE_STATUS_ADD(node, BY_NUMBER);\r | |
2390 | \r | |
2391 | return node;\r | |
2392 | }\r | |
2393 | \r | |
2394 | static Node*\r | |
2395 | node_new_enclosure(enum EnclosureType type)\r | |
2396 | {\r | |
2397 | Node* node = node_new();\r | |
2398 | CHECK_NULL_RETURN(node);\r | |
2399 | \r | |
2400 | NODE_SET_TYPE(node, NODE_ENCLOSURE);\r | |
2401 | ENCLOSURE_(node)->type = type;\r | |
2402 | \r | |
2403 | switch (type) {\r | |
2404 | case ENCLOSURE_MEMORY:\r | |
2405 | ENCLOSURE_(node)->m.regnum = 0;\r | |
2406 | ENCLOSURE_(node)->m.called_addr = -1;\r | |
2407 | ENCLOSURE_(node)->m.entry_count = 1;\r | |
2408 | ENCLOSURE_(node)->m.called_state = 0;\r | |
2409 | break;\r | |
2410 | \r | |
2411 | case ENCLOSURE_OPTION:\r | |
2412 | ENCLOSURE_(node)->o.options = 0;\r | |
2413 | break;\r | |
2414 | \r | |
2415 | case ENCLOSURE_STOP_BACKTRACK:\r | |
2416 | break;\r | |
2417 | \r | |
2418 | case ENCLOSURE_IF_ELSE:\r | |
2419 | ENCLOSURE_(node)->te.Then = 0;\r | |
2420 | ENCLOSURE_(node)->te.Else = 0;\r | |
2421 | break;\r | |
2422 | }\r | |
2423 | \r | |
2424 | ENCLOSURE_(node)->opt_count = 0;\r | |
2425 | return node;\r | |
2426 | }\r | |
2427 | \r | |
2428 | extern Node*\r | |
2429 | onig_node_new_enclosure(int type)\r | |
2430 | {\r | |
2431 | return node_new_enclosure(type);\r | |
2432 | }\r | |
2433 | \r | |
2434 | static Node*\r | |
2435 | node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)\r | |
2436 | {\r | |
2437 | Node* n;\r | |
2438 | n = node_new_enclosure(ENCLOSURE_IF_ELSE);\r | |
2439 | CHECK_NULL_RETURN(n);\r | |
2440 | \r | |
2441 | NODE_BODY(n) = cond;\r | |
2442 | ENCLOSURE_(n)->te.Then = Then;\r | |
2443 | ENCLOSURE_(n)->te.Else = Else;\r | |
2444 | return n;\r | |
2445 | }\r | |
2446 | \r | |
2447 | static Node*\r | |
2448 | node_new_memory(int is_named)\r | |
2449 | {\r | |
2450 | Node* node = node_new_enclosure(ENCLOSURE_MEMORY);\r | |
2451 | CHECK_NULL_RETURN(node);\r | |
2452 | if (is_named != 0)\r | |
2453 | NODE_STATUS_ADD(node, NAMED_GROUP);\r | |
2454 | \r | |
2455 | return node;\r | |
2456 | }\r | |
2457 | \r | |
2458 | static Node*\r | |
2459 | node_new_option(OnigOptionType option)\r | |
2460 | {\r | |
2461 | Node* node = node_new_enclosure(ENCLOSURE_OPTION);\r | |
2462 | CHECK_NULL_RETURN(node);\r | |
2463 | ENCLOSURE_(node)->o.options = option;\r | |
2464 | return node;\r | |
2465 | }\r | |
2466 | \r | |
2467 | static int\r | |
2468 | node_new_fail(Node** node, ScanEnv* env)\r | |
2469 | {\r | |
2470 | *node = node_new();\r | |
2471 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2472 | \r | |
2473 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2474 | GIMMICK_(*node)->type = GIMMICK_FAIL;\r | |
2475 | return ONIG_NORMAL;\r | |
2476 | }\r | |
2477 | \r | |
2478 | static int\r | |
2479 | node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)\r | |
2480 | {\r | |
2481 | int id;\r | |
2482 | int r;\r | |
2483 | \r | |
2484 | r = save_entry(env, save_type, &id);\r | |
2485 | if (r != ONIG_NORMAL) return r;\r | |
2486 | \r | |
2487 | *node = node_new();\r | |
2488 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2489 | \r | |
2490 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2491 | GIMMICK_(*node)->id = id;\r | |
2492 | GIMMICK_(*node)->type = GIMMICK_SAVE;\r | |
2493 | GIMMICK_(*node)->detail_type = (int )save_type;\r | |
2494 | \r | |
2495 | return ONIG_NORMAL;\r | |
2496 | }\r | |
2497 | \r | |
2498 | static int\r | |
2499 | node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,\r | |
2500 | int id, ScanEnv* env)\r | |
2501 | {\r | |
2502 | *node = node_new();\r | |
2503 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2504 | \r | |
2505 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2506 | GIMMICK_(*node)->id = id;\r | |
2507 | GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;\r | |
2508 | GIMMICK_(*node)->detail_type = (int )update_var_type;\r | |
2509 | \r | |
2510 | return ONIG_NORMAL;\r | |
2511 | }\r | |
2512 | \r | |
2513 | static int\r | |
2514 | node_new_keep(Node** node, ScanEnv* env)\r | |
2515 | {\r | |
2516 | int r;\r | |
2517 | \r | |
2518 | r = node_new_save_gimmick(node, SAVE_KEEP, env);\r | |
2519 | if (r != 0) return r;\r | |
2520 | \r | |
2521 | env->keep_num++;\r | |
2522 | return ONIG_NORMAL;\r | |
2523 | }\r | |
2524 | \r | |
2525 | #ifdef USE_CALLOUT\r | |
2526 | \r | |
2527 | extern void\r | |
2528 | onig_free_reg_callout_list(int n, CalloutListEntry* list)\r | |
2529 | {\r | |
2530 | int i;\r | |
2531 | int j;\r | |
2532 | \r | |
2533 | if (IS_NULL(list)) return ;\r | |
2534 | \r | |
2535 | for (i = 0; i < n; i++) {\r | |
2536 | if (list[i].of == ONIG_CALLOUT_OF_NAME) {\r | |
2537 | for (j = 0; j < list[i].u.arg.passed_num; j++) {\r | |
2538 | if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {\r | |
2539 | if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))\r | |
2540 | xfree(list[i].u.arg.vals[j].s.start);\r | |
2541 | }\r | |
2542 | }\r | |
2543 | }\r | |
2544 | else { /* ONIG_CALLOUT_OF_CONTENTS */\r | |
2545 | if (IS_NOT_NULL(list[i].u.content.start)) {\r | |
2546 | xfree((void* )list[i].u.content.start);\r | |
2547 | }\r | |
2548 | }\r | |
2549 | }\r | |
2550 | \r | |
2551 | xfree(list);\r | |
2552 | }\r | |
2553 | \r | |
2554 | extern CalloutListEntry*\r | |
2555 | onig_reg_callout_list_at(regex_t* reg, int num)\r | |
2556 | {\r | |
2557 | RegexExt* ext = REG_EXTP(reg);\r | |
2558 | CHECK_NULL_RETURN(ext);\r | |
2559 | \r | |
2560 | if (num <= 0 || num > ext->callout_num)\r | |
2561 | return 0;\r | |
2562 | \r | |
2563 | num--;\r | |
2564 | return ext->callout_list + num;\r | |
2565 | }\r | |
2566 | \r | |
2567 | static int\r | |
2568 | reg_callout_list_entry(ScanEnv* env, int* rnum)\r | |
2569 | {\r | |
2570 | #define INIT_CALLOUT_LIST_NUM 3\r | |
2571 | \r | |
2572 | int num;\r | |
2573 | CalloutListEntry* list;\r | |
2574 | CalloutListEntry* e;\r | |
2575 | RegexExt* ext;\r | |
2576 | \r | |
2577 | ext = onig_get_regex_ext(env->reg);\r | |
2578 | CHECK_NULL_RETURN_MEMERR(ext);\r | |
2579 | \r | |
2580 | if (IS_NULL(ext->callout_list)) {\r | |
2581 | list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);\r | |
2582 | CHECK_NULL_RETURN_MEMERR(list);\r | |
2583 | \r | |
2584 | ext->callout_list = list;\r | |
2585 | ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;\r | |
2586 | ext->callout_num = 0;\r | |
2587 | }\r | |
2588 | \r | |
2589 | num = ext->callout_num + 1;\r | |
2590 | if (num > ext->callout_list_alloc) {\r | |
2591 | int alloc = ext->callout_list_alloc * 2;\r | |
2592 | list = (CalloutListEntry* )xrealloc(ext->callout_list,\r | |
2593 | sizeof(CalloutListEntry) * alloc,\r | |
2594 | sizeof(CalloutListEntry) * ext->callout_list_alloc);\r | |
2595 | CHECK_NULL_RETURN_MEMERR(list);\r | |
2596 | \r | |
2597 | ext->callout_list = list;\r | |
2598 | ext->callout_list_alloc = alloc;\r | |
2599 | }\r | |
2600 | \r | |
2601 | e = ext->callout_list + (num - 1);\r | |
2602 | \r | |
2603 | e->flag = 0;\r | |
2604 | e->of = 0;\r | |
2605 | e->in = ONIG_CALLOUT_OF_CONTENTS;\r | |
2606 | e->type = 0;\r | |
2607 | e->tag_start = 0;\r | |
2608 | e->tag_end = 0;\r | |
2609 | e->start_func = 0;\r | |
2610 | e->end_func = 0;\r | |
2611 | e->u.arg.num = 0;\r | |
2612 | e->u.arg.passed_num = 0;\r | |
2613 | \r | |
2614 | ext->callout_num = num;\r | |
2615 | *rnum = num;\r | |
2616 | return ONIG_NORMAL;\r | |
2617 | }\r | |
2618 | \r | |
2619 | static int\r | |
2620 | node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,\r | |
2621 | ScanEnv* env)\r | |
2622 | {\r | |
2623 | *node = node_new();\r | |
2624 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2625 | \r | |
2626 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2627 | GIMMICK_(*node)->id = id;\r | |
2628 | GIMMICK_(*node)->num = num;\r | |
2629 | GIMMICK_(*node)->type = GIMMICK_CALLOUT;\r | |
2630 | GIMMICK_(*node)->detail_type = (int )callout_of;\r | |
2631 | \r | |
2632 | return ONIG_NORMAL;\r | |
2633 | }\r | |
2634 | #endif\r | |
2635 | \r | |
2636 | static int\r | |
2637 | make_extended_grapheme_cluster(Node** node, ScanEnv* env)\r | |
2638 | {\r | |
2639 | int r;\r | |
2640 | int i;\r | |
2641 | Node* x;\r | |
2642 | Node* ns[2];\r | |
2643 | \r | |
2644 | /* \X == (?>\O(?:\Y\O)*) */\r | |
2645 | \r | |
2646 | ns[1] = NULL_NODE;\r | |
2647 | \r | |
2648 | r = ONIGERR_MEMORY;\r | |
2649 | ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);\r | |
2650 | if (IS_NULL(ns[0])) goto err;\r | |
2651 | \r | |
2652 | r = node_new_true_anychar(&ns[1], env);\r | |
2653 | if (r != 0) goto err1;\r | |
2654 | \r | |
2655 | x = make_list(2, ns);\r | |
2656 | if (IS_NULL(x)) goto err;\r | |
2657 | ns[0] = x;\r | |
2658 | ns[1] = NULL_NODE;\r | |
2659 | \r | |
2660 | x = node_new_quantifier(0, REPEAT_INFINITE, 1);\r | |
2661 | if (IS_NULL(x)) goto err;\r | |
2662 | \r | |
2663 | NODE_BODY(x) = ns[0];\r | |
2664 | ns[0] = NULL_NODE;\r | |
2665 | ns[1] = x;\r | |
2666 | \r | |
2667 | r = node_new_true_anychar(&ns[0], env);\r | |
2668 | if (r != 0) goto err1;\r | |
2669 | \r | |
2670 | x = make_list(2, ns);\r | |
2671 | if (IS_NULL(x)) goto err;\r | |
2672 | \r | |
2673 | ns[0] = x;\r | |
2674 | ns[1] = NULL_NODE;\r | |
2675 | \r | |
2676 | x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
2677 | if (IS_NULL(x)) goto err;\r | |
2678 | \r | |
2679 | NODE_BODY(x) = ns[0];\r | |
2680 | \r | |
2681 | *node = x;\r | |
2682 | return ONIG_NORMAL;\r | |
2683 | \r | |
2684 | err:\r | |
2685 | r = ONIGERR_MEMORY;\r | |
2686 | err1:\r | |
2687 | for (i = 0; i < 2; i++) onig_node_free(ns[i]);\r | |
2688 | return r;\r | |
2689 | }\r | |
2690 | \r | |
2691 | static int\r | |
2692 | make_absent_engine(Node** node, int pre_save_right_id, Node* absent,\r | |
2693 | Node* step_one, int lower, int upper, int possessive,\r | |
2694 | int is_range_cutter, ScanEnv* env)\r | |
2695 | {\r | |
2696 | int r;\r | |
2697 | int i;\r | |
2698 | int id;\r | |
2699 | Node* x;\r | |
2700 | Node* ns[4];\r | |
2701 | \r | |
2702 | for (i = 0; i < 4; i++) ns[i] = NULL_NODE;\r | |
2703 | \r | |
2704 | ns[1] = absent;\r | |
2705 | ns[3] = step_one; /* for err */\r | |
2706 | r = node_new_save_gimmick(&ns[0], SAVE_S, env);\r | |
2707 | if (r != 0) goto err;\r | |
2708 | \r | |
2709 | id = GIMMICK_(ns[0])->id;\r | |
2710 | r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,\r | |
2711 | id, env);\r | |
2712 | if (r != 0) goto err;\r | |
2713 | \r | |
2714 | r = node_new_fail(&ns[3], env);\r | |
2715 | if (r != 0) goto err;\r | |
2716 | \r | |
2717 | x = make_list(4, ns);\r | |
2718 | if (IS_NULL(x)) goto err0;\r | |
2719 | \r | |
2720 | ns[0] = x;\r | |
2721 | ns[1] = step_one;\r | |
2722 | ns[2] = ns[3] = NULL_NODE;\r | |
2723 | \r | |
2724 | x = make_alt(2, ns);\r | |
2725 | if (IS_NULL(x)) goto err0;\r | |
2726 | \r | |
2727 | ns[0] = x;\r | |
2728 | \r | |
2729 | x = node_new_quantifier(lower, upper, 0);\r | |
2730 | if (IS_NULL(x)) goto err0;\r | |
2731 | \r | |
2732 | NODE_BODY(x) = ns[0];\r | |
2733 | ns[0] = x;\r | |
2734 | \r | |
2735 | if (possessive != 0) {\r | |
2736 | x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
2737 | if (IS_NULL(x)) goto err0;\r | |
2738 | \r | |
2739 | NODE_BODY(x) = ns[0];\r | |
2740 | ns[0] = x;\r | |
2741 | }\r | |
2742 | \r | |
2743 | r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2744 | pre_save_right_id, env);\r | |
2745 | if (r != 0) goto err;\r | |
2746 | \r | |
2747 | r = node_new_fail(&ns[2], env);\r | |
2748 | if (r != 0) goto err;\r | |
2749 | \r | |
2750 | x = make_list(2, ns + 1);\r | |
2751 | if (IS_NULL(x)) goto err0;\r | |
2752 | \r | |
2753 | ns[1] = x; ns[2] = NULL_NODE;\r | |
2754 | \r | |
2755 | x = make_alt(2, ns);\r | |
2756 | if (IS_NULL(x)) goto err0;\r | |
2757 | \r | |
2758 | if (is_range_cutter != 0)\r | |
2759 | NODE_STATUS_ADD(x, SUPER);\r | |
2760 | \r | |
2761 | *node = x;\r | |
2762 | return ONIG_NORMAL;\r | |
2763 | \r | |
2764 | err0:\r | |
2765 | r = ONIGERR_MEMORY;\r | |
2766 | err:\r | |
2767 | for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r | |
2768 | return r;\r | |
2769 | }\r | |
2770 | \r | |
2771 | static int\r | |
2772 | make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,\r | |
2773 | ScanEnv* env)\r | |
2774 | {\r | |
2775 | int r;\r | |
2776 | int id;\r | |
2777 | Node* save;\r | |
2778 | Node* x;\r | |
2779 | Node* ns[2];\r | |
2780 | \r | |
2781 | *node1 = *node2 = NULL_NODE;\r | |
2782 | save = ns[0] = ns[1] = NULL_NODE;\r | |
2783 | \r | |
2784 | r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r | |
2785 | if (r != 0) goto err;\r | |
2786 | \r | |
2787 | id = GIMMICK_(save)->id;\r | |
2788 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2789 | id, env);\r | |
2790 | if (r != 0) goto err;\r | |
2791 | \r | |
2792 | r = node_new_fail(&ns[1], env);\r | |
2793 | if (r != 0) goto err;\r | |
2794 | \r | |
2795 | x = make_list(2, ns);\r | |
2796 | if (IS_NULL(x)) goto err0;\r | |
2797 | \r | |
2798 | ns[0] = NULL_NODE; ns[1] = x;\r | |
2799 | \r | |
2800 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2801 | pre_save_right_id, env);\r | |
2802 | if (r != 0) goto err;\r | |
2803 | \r | |
2804 | x = make_alt(2, ns);\r | |
2805 | if (IS_NULL(x)) goto err0;\r | |
2806 | \r | |
2807 | *node1 = save;\r | |
2808 | *node2 = x;\r | |
2809 | return ONIG_NORMAL;\r | |
2810 | \r | |
2811 | err0:\r | |
2812 | r = ONIGERR_MEMORY;\r | |
2813 | err:\r | |
2814 | onig_node_free(save);\r | |
2815 | onig_node_free(ns[0]);\r | |
2816 | onig_node_free(ns[1]);\r | |
2817 | return r;\r | |
2818 | }\r | |
2819 | \r | |
2820 | static int\r | |
2821 | make_range_clear(Node** node, ScanEnv* env)\r | |
2822 | {\r | |
2823 | int r;\r | |
2824 | int id;\r | |
2825 | Node* save;\r | |
2826 | Node* x;\r | |
2827 | Node* ns[2];\r | |
2828 | \r | |
2829 | *node = NULL_NODE;\r | |
2830 | save = ns[0] = ns[1] = NULL_NODE;\r | |
2831 | \r | |
2832 | r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r | |
2833 | if (r != 0) goto err;\r | |
2834 | \r | |
2835 | id = GIMMICK_(save)->id;\r | |
2836 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2837 | id, env);\r | |
2838 | if (r != 0) goto err;\r | |
2839 | \r | |
2840 | r = node_new_fail(&ns[1], env);\r | |
2841 | if (r != 0) goto err;\r | |
2842 | \r | |
2843 | x = make_list(2, ns);\r | |
2844 | if (IS_NULL(x)) goto err0;\r | |
2845 | \r | |
2846 | ns[0] = NULL_NODE; ns[1] = x;\r | |
2847 | \r | |
2848 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);\r | |
2849 | if (r != 0) goto err;\r | |
2850 | \r | |
2851 | x = make_alt(2, ns);\r | |
2852 | if (IS_NULL(x)) goto err0;\r | |
2853 | \r | |
2854 | NODE_STATUS_ADD(x, SUPER);\r | |
2855 | \r | |
2856 | ns[0] = save;\r | |
2857 | ns[1] = x;\r | |
2858 | save = NULL_NODE;\r | |
2859 | x = make_list(2, ns);\r | |
2860 | if (IS_NULL(x)) goto err0;\r | |
2861 | \r | |
2862 | *node = x;\r | |
2863 | return ONIG_NORMAL;\r | |
2864 | \r | |
2865 | err0:\r | |
2866 | r = ONIGERR_MEMORY;\r | |
2867 | err:\r | |
2868 | onig_node_free(save);\r | |
2869 | onig_node_free(ns[0]);\r | |
2870 | onig_node_free(ns[1]);\r | |
2871 | return r;\r | |
2872 | }\r | |
2873 | \r | |
2874 | static int\r | |
2875 | is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,\r | |
2876 | int* is_possessive, ScanEnv* env)\r | |
2877 | {\r | |
2878 | Node* quant;\r | |
2879 | Node* body;\r | |
2880 | \r | |
2881 | *rquant = *rbody = 0;\r | |
2882 | *is_possessive = 0;\r | |
2883 | \r | |
2884 | if (NODE_TYPE(node) == NODE_QUANT) {\r | |
2885 | quant = node;\r | |
2886 | }\r | |
2887 | else {\r | |
2888 | if (NODE_TYPE(node) == NODE_ENCLOSURE) {\r | |
2889 | EnclosureNode* en = ENCLOSURE_(node);\r | |
2890 | if (en->type == ENCLOSURE_STOP_BACKTRACK) {\r | |
2891 | *is_possessive = 1;\r | |
2892 | quant = NODE_ENCLOSURE_BODY(en);\r | |
2893 | if (NODE_TYPE(quant) != NODE_QUANT)\r | |
2894 | return 0;\r | |
2895 | }\r | |
2896 | else\r | |
2897 | return 0;\r | |
2898 | }\r | |
2899 | else\r | |
2900 | return 0;\r | |
2901 | }\r | |
2902 | \r | |
2903 | if (QUANT_(quant)->greedy == 0)\r | |
2904 | return 0;\r | |
2905 | \r | |
2906 | body = NODE_BODY(quant);\r | |
2907 | switch (NODE_TYPE(body)) {\r | |
2908 | case NODE_STRING:\r | |
2909 | {\r | |
2910 | int len;\r | |
2911 | StrNode* sn = STR_(body);\r | |
2912 | UChar *s = sn->s;\r | |
2913 | \r | |
2914 | len = 0;\r | |
2915 | while (s < sn->end) {\r | |
2916 | s += enclen(env->enc, s);\r | |
2917 | len++;\r | |
2918 | }\r | |
2919 | if (len != 1)\r | |
2920 | return 0;\r | |
2921 | }\r | |
2922 | \r | |
2923 | case NODE_CCLASS:\r | |
2924 | break;\r | |
2925 | \r | |
2926 | default:\r | |
2927 | return 0;\r | |
2928 | break;\r | |
2929 | }\r | |
2930 | \r | |
2931 | if (node != quant) {\r | |
2932 | NODE_BODY(node) = 0;\r | |
2933 | onig_node_free(node);\r | |
2934 | }\r | |
2935 | NODE_BODY(quant) = NULL_NODE;\r | |
2936 | *rquant = quant;\r | |
2937 | *rbody = body;\r | |
2938 | return 1;\r | |
2939 | }\r | |
2940 | \r | |
2941 | static int\r | |
2942 | make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,\r | |
2943 | Node* body, int possessive, ScanEnv* env)\r | |
2944 | {\r | |
2945 | int r;\r | |
2946 | int i;\r | |
2947 | int id1;\r | |
2948 | int lower, upper;\r | |
2949 | Node* x;\r | |
2950 | Node* ns[4];\r | |
2951 | \r | |
2952 | *node = NULL_NODE;\r | |
2953 | r = ONIGERR_MEMORY;\r | |
2954 | ns[0] = ns[1] = NULL_NODE;\r | |
2955 | ns[2] = body, ns[3] = absent;\r | |
2956 | \r | |
2957 | lower = QUANT_(quant)->lower;\r | |
2958 | upper = QUANT_(quant)->upper;\r | |
2959 | onig_node_free(quant);\r | |
2960 | \r | |
2961 | r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r | |
2962 | if (r != 0) goto err;\r | |
2963 | \r | |
2964 | id1 = GIMMICK_(ns[0])->id;\r | |
2965 | \r | |
2966 | r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,\r | |
2967 | 0, env);\r | |
2968 | if (r != 0) goto err;\r | |
2969 | \r | |
2970 | ns[2] = ns[3] = NULL_NODE;\r | |
2971 | \r | |
2972 | r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2973 | id1, env);\r | |
2974 | if (r != 0) goto err;\r | |
2975 | \r | |
2976 | x = make_list(3, ns);\r | |
2977 | if (IS_NULL(x)) goto err0;\r | |
2978 | \r | |
2979 | *node = x;\r | |
2980 | return ONIG_NORMAL;\r | |
2981 | \r | |
2982 | err0:\r | |
2983 | r = ONIGERR_MEMORY;\r | |
2984 | err:\r | |
2985 | for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r | |
2986 | return r;\r | |
2987 | }\r | |
2988 | \r | |
2989 | static int\r | |
2990 | make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,\r | |
2991 | ScanEnv* env)\r | |
2992 | {\r | |
2993 | int r;\r | |
2994 | int i;\r | |
2995 | int id1, id2;\r | |
2996 | int possessive;\r | |
2997 | Node* x;\r | |
2998 | Node* ns[7];\r | |
2999 | \r | |
3000 | r = ONIGERR_MEMORY;\r | |
3001 | for (i = 0; i < 7; i++) ns[i] = NULL_NODE;\r | |
3002 | ns[4] = expr; ns[5] = absent;\r | |
3003 | \r | |
3004 | if (is_range_cutter == 0) {\r | |
3005 | Node* quant;\r | |
3006 | Node* body;\r | |
3007 | \r | |
3008 | if (expr == NULL_NODE) {\r | |
3009 | /* default expr \O* */\r | |
3010 | quant = node_new_quantifier(0, REPEAT_INFINITE, 0);\r | |
3011 | if (IS_NULL(quant)) goto err0;\r | |
3012 | \r | |
3013 | r = node_new_true_anychar(&body, env);\r | |
3014 | if (r != 0) {\r | |
3015 | onig_node_free(quant);\r | |
3016 | goto err;\r | |
3017 | }\r | |
3018 | possessive = 0;\r | |
3019 | goto simple;\r | |
3020 | }\r | |
3021 | else {\r | |
3022 | if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {\r | |
3023 | simple:\r | |
3024 | r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,\r | |
3025 | body, possessive, env);\r | |
3026 | if (r != 0) {\r | |
3027 | ns[4] = NULL_NODE;\r | |
3028 | onig_node_free(quant);\r | |
3029 | onig_node_free(body);\r | |
3030 | goto err;\r | |
3031 | }\r | |
3032 | \r | |
3033 | return ONIG_NORMAL;\r | |
3034 | }\r | |
3035 | }\r | |
3036 | }\r | |
3037 | \r | |
3038 | r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r | |
3039 | if (r != 0) goto err;\r | |
3040 | \r | |
3041 | id1 = GIMMICK_(ns[0])->id;\r | |
3042 | \r | |
3043 | r = node_new_save_gimmick(&ns[1], SAVE_S, env);\r | |
3044 | if (r != 0) goto err;\r | |
3045 | \r | |
3046 | id2 = GIMMICK_(ns[1])->id;\r | |
3047 | \r | |
3048 | r = node_new_true_anychar(&ns[3], env);\r | |
3049 | if (r != 0) goto err;\r | |
3050 | \r | |
3051 | possessive = 1;\r | |
3052 | r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,\r | |
3053 | possessive, is_range_cutter, env);\r | |
3054 | if (r != 0) goto err;\r | |
3055 | \r | |
3056 | ns[3] = NULL_NODE;\r | |
3057 | ns[5] = NULL_NODE;\r | |
3058 | \r | |
3059 | r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);\r | |
3060 | if (r != 0) goto err;\r | |
3061 | \r | |
3062 | if (is_range_cutter != 0) {\r | |
3063 | x = make_list(4, ns);\r | |
3064 | if (IS_NULL(x)) goto err0;\r | |
3065 | }\r | |
3066 | else {\r | |
3067 | r = make_absent_tail(&ns[5], &ns[6], id1, env);\r | |
3068 | if (r != 0) goto err;\r | |
3069 | \r | |
3070 | x = make_list(7, ns);\r | |
3071 | if (IS_NULL(x)) goto err0;\r | |
3072 | }\r | |
3073 | \r | |
3074 | *node = x;\r | |
3075 | return ONIG_NORMAL;\r | |
3076 | \r | |
3077 | err0:\r | |
3078 | r = ONIGERR_MEMORY;\r | |
3079 | err:\r | |
3080 | for (i = 0; i < 7; i++) onig_node_free(ns[i]);\r | |
3081 | return r; \r | |
3082 | }\r | |
3083 | \r | |
3084 | extern int\r | |
3085 | onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r | |
3086 | {\r | |
3087 | int addlen = (int )(end - s);\r | |
3088 | \r | |
3089 | if (addlen > 0) {\r | |
3090 | int len = (int )(STR_(node)->end - STR_(node)->s);\r | |
3091 | \r | |
3092 | if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {\r | |
3093 | UChar* p;\r | |
3094 | int capa = len + addlen + NODE_STRING_MARGIN;\r | |
3095 | \r | |
3096 | if (capa <= STR_(node)->capa) {\r | |
3097 | onig_strcpy(STR_(node)->s + len, s, end);\r | |
3098 | }\r | |
3099 | else {\r | |
3100 | if (STR_(node)->s == STR_(node)->buf)\r | |
3101 | p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,\r | |
3102 | s, end, capa);\r | |
3103 | else\r | |
3104 | p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);\r | |
3105 | \r | |
3106 | CHECK_NULL_RETURN_MEMERR(p);\r | |
3107 | STR_(node)->s = p;\r | |
3108 | STR_(node)->capa = capa;\r | |
3109 | }\r | |
3110 | }\r | |
3111 | else {\r | |
3112 | onig_strcpy(STR_(node)->s + len, s, end);\r | |
3113 | }\r | |
3114 | STR_(node)->end = STR_(node)->s + len + addlen;\r | |
3115 | }\r | |
3116 | \r | |
3117 | return 0;\r | |
3118 | }\r | |
3119 | \r | |
3120 | extern int\r | |
3121 | onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r | |
3122 | {\r | |
3123 | onig_node_str_clear(node);\r | |
3124 | return onig_node_str_cat(node, s, end);\r | |
3125 | }\r | |
3126 | \r | |
3127 | static int\r | |
3128 | node_str_cat_char(Node* node, UChar c)\r | |
3129 | {\r | |
3130 | UChar s[1];\r | |
3131 | \r | |
3132 | s[0] = c;\r | |
3133 | return onig_node_str_cat(node, s, s + 1);\r | |
3134 | }\r | |
3135 | \r | |
3136 | extern void\r | |
3137 | onig_node_conv_to_str_node(Node* node, int flag)\r | |
3138 | {\r | |
3139 | NODE_SET_TYPE(node, NODE_STRING);\r | |
3140 | STR_(node)->flag = flag;\r | |
3141 | STR_(node)->capa = 0;\r | |
3142 | STR_(node)->s = STR_(node)->buf;\r | |
3143 | STR_(node)->end = STR_(node)->buf;\r | |
3144 | }\r | |
3145 | \r | |
3146 | extern void\r | |
3147 | onig_node_str_clear(Node* node)\r | |
3148 | {\r | |
3149 | if (STR_(node)->capa != 0 &&\r | |
3150 | IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r | |
3151 | xfree(STR_(node)->s);\r | |
3152 | }\r | |
3153 | \r | |
3154 | STR_(node)->capa = 0;\r | |
3155 | STR_(node)->flag = 0;\r | |
3156 | STR_(node)->s = STR_(node)->buf;\r | |
3157 | STR_(node)->end = STR_(node)->buf;\r | |
3158 | }\r | |
3159 | \r | |
3160 | static Node*\r | |
3161 | node_new_str(const UChar* s, const UChar* end)\r | |
3162 | {\r | |
3163 | Node* node = node_new();\r | |
3164 | CHECK_NULL_RETURN(node);\r | |
3165 | \r | |
3166 | NODE_SET_TYPE(node, NODE_STRING);\r | |
3167 | STR_(node)->capa = 0;\r | |
3168 | STR_(node)->flag = 0;\r | |
3169 | STR_(node)->s = STR_(node)->buf;\r | |
3170 | STR_(node)->end = STR_(node)->buf;\r | |
3171 | if (onig_node_str_cat(node, s, end)) {\r | |
3172 | onig_node_free(node);\r | |
3173 | return NULL;\r | |
3174 | }\r | |
3175 | return node;\r | |
3176 | }\r | |
3177 | \r | |
3178 | extern Node*\r | |
3179 | onig_node_new_str(const UChar* s, const UChar* end)\r | |
3180 | {\r | |
3181 | return node_new_str(s, end);\r | |
3182 | }\r | |
3183 | \r | |
3184 | static Node*\r | |
3185 | node_new_str_raw(UChar* s, UChar* end)\r | |
3186 | {\r | |
3187 | Node* node = node_new_str(s, end);\r | |
a5def177 | 3188 | CHECK_NULL_RETURN(node);\r |
b602265d DG |
3189 | NODE_STRING_SET_RAW(node);\r |
3190 | return node;\r | |
3191 | }\r | |
3192 | \r | |
3193 | static Node*\r | |
3194 | node_new_empty(void)\r | |
3195 | {\r | |
3196 | return node_new_str(NULL, NULL);\r | |
3197 | }\r | |
3198 | \r | |
3199 | static Node*\r | |
3200 | node_new_str_raw_char(UChar c)\r | |
3201 | {\r | |
3202 | UChar p[1];\r | |
3203 | \r | |
3204 | p[0] = c;\r | |
3205 | return node_new_str_raw(p, p + 1);\r | |
3206 | }\r | |
3207 | \r | |
3208 | static Node*\r | |
3209 | str_node_split_last_char(Node* node, OnigEncoding enc)\r | |
3210 | {\r | |
3211 | const UChar *p;\r | |
3212 | Node* rn;\r | |
3213 | StrNode* sn;\r | |
3214 | \r | |
3215 | sn = STR_(node);\r | |
3216 | rn = NULL_NODE;\r | |
3217 | if (sn->end > sn->s) {\r | |
3218 | p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r | |
3219 | if (p && p > sn->s) { /* can be split. */\r | |
3220 | rn = node_new_str(p, sn->end);\r | |
a5def177 | 3221 | CHECK_NULL_RETURN(rn);\r |
b602265d DG |
3222 | if (NODE_STRING_IS_RAW(node))\r |
3223 | NODE_STRING_SET_RAW(rn);\r | |
3224 | \r | |
3225 | sn->end = (UChar* )p;\r | |
3226 | }\r | |
3227 | }\r | |
3228 | return rn;\r | |
3229 | }\r | |
3230 | \r | |
3231 | static int\r | |
3232 | str_node_can_be_split(Node* node, OnigEncoding enc)\r | |
3233 | {\r | |
3234 | StrNode* sn = STR_(node);\r | |
3235 | if (sn->end > sn->s) {\r | |
3236 | return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r | |
3237 | }\r | |
3238 | return 0;\r | |
3239 | }\r | |
3240 | \r | |
3241 | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r | |
3242 | static int\r | |
3243 | node_str_head_pad(StrNode* sn, int num, UChar val)\r | |
3244 | {\r | |
3245 | UChar buf[NODE_STRING_BUF_SIZE];\r | |
3246 | int i, len;\r | |
3247 | \r | |
3248 | len = sn->end - sn->s;\r | |
3249 | onig_strcpy(buf, sn->s, sn->end);\r | |
3250 | onig_strcpy(&(sn->s[num]), buf, buf + len);\r | |
3251 | sn->end += num;\r | |
3252 | \r | |
3253 | for (i = 0; i < num; i++) {\r | |
3254 | sn->s[i] = val;\r | |
3255 | }\r | |
3256 | }\r | |
3257 | #endif\r | |
3258 | \r | |
3259 | extern int\r | |
3260 | onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r | |
3261 | {\r | |
3262 | unsigned int num, val;\r | |
3263 | OnigCodePoint c;\r | |
3264 | UChar* p = *src;\r | |
3265 | PFETCH_READY;\r | |
3266 | \r | |
3267 | num = 0;\r | |
3268 | while (! PEND) {\r | |
3269 | PFETCH(c);\r | |
3270 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
3271 | val = (unsigned int )DIGITVAL(c);\r | |
3272 | if ((INT_MAX_LIMIT - val) / 10UL < num)\r | |
3273 | return -1; /* overflow */\r | |
3274 | \r | |
3275 | num = num * 10 + val;\r | |
3276 | }\r | |
3277 | else {\r | |
3278 | PUNFETCH;\r | |
3279 | break;\r | |
3280 | }\r | |
3281 | }\r | |
3282 | *src = p;\r | |
3283 | return num;\r | |
3284 | }\r | |
3285 | \r | |
3286 | static int\r | |
3287 | scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,\r | |
3288 | int maxlen, OnigEncoding enc)\r | |
3289 | {\r | |
3290 | OnigCodePoint c;\r | |
3291 | unsigned int num, val;\r | |
3292 | int n;\r | |
3293 | UChar* p = *src;\r | |
3294 | PFETCH_READY;\r | |
3295 | \r | |
3296 | num = 0;\r | |
3297 | n = 0;\r | |
3298 | while (! PEND && n < maxlen) {\r | |
3299 | PFETCH(c);\r | |
3300 | if (IS_CODE_XDIGIT_ASCII(enc, c)) {\r | |
3301 | n++;\r | |
3302 | val = (unsigned int )XDIGITVAL(enc,c);\r | |
3303 | if ((INT_MAX_LIMIT - val) / 16UL < num)\r | |
3304 | return ONIGERR_TOO_BIG_NUMBER; /* overflow */\r | |
3305 | \r | |
3306 | num = (num << 4) + XDIGITVAL(enc,c);\r | |
3307 | }\r | |
3308 | else {\r | |
3309 | PUNFETCH;\r | |
3310 | break;\r | |
3311 | }\r | |
3312 | }\r | |
3313 | \r | |
3314 | if (n < minlen)\r | |
3315 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r | |
3316 | \r | |
3317 | *src = p;\r | |
3318 | return num;\r | |
3319 | }\r | |
3320 | \r | |
3321 | static int\r | |
3322 | scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r | |
3323 | OnigEncoding enc)\r | |
3324 | {\r | |
3325 | OnigCodePoint c;\r | |
3326 | unsigned int num, val;\r | |
3327 | UChar* p = *src;\r | |
3328 | PFETCH_READY;\r | |
3329 | \r | |
3330 | num = 0;\r | |
3331 | while (! PEND && maxlen-- != 0) {\r | |
3332 | PFETCH(c);\r | |
3333 | if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {\r | |
3334 | val = ODIGITVAL(c);\r | |
3335 | if ((INT_MAX_LIMIT - val) / 8UL < num)\r | |
3336 | return -1; /* overflow */\r | |
3337 | \r | |
3338 | num = (num << 3) + val;\r | |
3339 | }\r | |
3340 | else {\r | |
3341 | PUNFETCH;\r | |
3342 | break;\r | |
3343 | }\r | |
3344 | }\r | |
3345 | *src = p;\r | |
3346 | return num;\r | |
3347 | }\r | |
3348 | \r | |
3349 | \r | |
3350 | #define BB_WRITE_CODE_POINT(bbuf,pos,code) \\r | |
3351 | BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r | |
3352 | \r | |
3353 | /* data format:\r | |
3354 | [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r | |
3355 | (all data size is OnigCodePoint)\r | |
3356 | */\r | |
3357 | static int\r | |
3358 | new_code_range(BBuf** pbuf)\r | |
3359 | {\r | |
3360 | #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r | |
3361 | int r;\r | |
3362 | OnigCodePoint n;\r | |
3363 | BBuf* bbuf;\r | |
3364 | \r | |
3365 | bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r | |
3366 | CHECK_NULL_RETURN_MEMERR(bbuf);\r | |
3367 | r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r | |
3368 | if (r != 0) {\r | |
3369 | xfree(bbuf);\r | |
3370 | *pbuf = 0;\r | |
3371 | return r;\r | |
3372 | }\r | |
3373 | \r | |
3374 | n = 0;\r | |
3375 | BB_WRITE_CODE_POINT(bbuf, 0, n);\r | |
3376 | return 0;\r | |
3377 | }\r | |
3378 | \r | |
3379 | static int\r | |
3380 | add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r | |
3381 | {\r | |
3382 | int r, inc_n, pos;\r | |
3383 | int low, high, bound, x;\r | |
3384 | OnigCodePoint n, *data;\r | |
3385 | BBuf* bbuf;\r | |
3386 | \r | |
3387 | if (from > to) {\r | |
3388 | n = from; from = to; to = n;\r | |
3389 | }\r | |
3390 | \r | |
3391 | if (IS_NULL(*pbuf)) {\r | |
3392 | r = new_code_range(pbuf);\r | |
3393 | if (r != 0) return r;\r | |
3394 | bbuf = *pbuf;\r | |
3395 | n = 0;\r | |
3396 | }\r | |
3397 | else {\r | |
3398 | bbuf = *pbuf;\r | |
3399 | GET_CODE_POINT(n, bbuf->p);\r | |
3400 | }\r | |
3401 | data = (OnigCodePoint* )(bbuf->p);\r | |
3402 | data++;\r | |
3403 | \r | |
3404 | for (low = 0, bound = n; low < bound; ) {\r | |
3405 | x = (low + bound) >> 1;\r | |
3406 | if (from > data[x*2 + 1])\r | |
3407 | low = x + 1;\r | |
3408 | else\r | |
3409 | bound = x;\r | |
3410 | }\r | |
3411 | \r | |
3412 | high = (to == ~((OnigCodePoint )0)) ? n : low;\r | |
3413 | for (bound = n; high < bound; ) {\r | |
3414 | x = (high + bound) >> 1;\r | |
3415 | if (to + 1 >= data[x*2])\r | |
3416 | high = x + 1;\r | |
3417 | else\r | |
3418 | bound = x;\r | |
3419 | }\r | |
3420 | \r | |
3421 | inc_n = low + 1 - high;\r | |
3422 | if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r | |
3423 | return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r | |
3424 | \r | |
3425 | if (inc_n != 1) {\r | |
3426 | if (from > data[low*2])\r | |
3427 | from = data[low*2];\r | |
3428 | if (to < data[(high - 1)*2 + 1])\r | |
3429 | to = data[(high - 1)*2 + 1];\r | |
3430 | }\r | |
3431 | \r | |
3432 | if (inc_n != 0 && (OnigCodePoint )high < n) {\r | |
3433 | int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r | |
3434 | int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r | |
3435 | int size = (n - high) * 2 * SIZE_CODE_POINT;\r | |
3436 | \r | |
3437 | if (inc_n > 0) {\r | |
3438 | BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r | |
3439 | }\r | |
3440 | else {\r | |
3441 | BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r | |
3442 | }\r | |
3443 | }\r | |
3444 | \r | |
3445 | pos = SIZE_CODE_POINT * (1 + low * 2);\r | |
3446 | BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r | |
3447 | BB_WRITE_CODE_POINT(bbuf, pos, from);\r | |
3448 | BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r | |
3449 | n += inc_n;\r | |
3450 | BB_WRITE_CODE_POINT(bbuf, 0, n);\r | |
3451 | \r | |
3452 | return 0;\r | |
3453 | }\r | |
3454 | \r | |
3455 | static int\r | |
3456 | add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r | |
3457 | {\r | |
3458 | if (from > to) {\r | |
3459 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
3460 | return 0;\r | |
3461 | else\r | |
3462 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
3463 | }\r | |
3464 | \r | |
3465 | return add_code_range_to_buf(pbuf, from, to);\r | |
3466 | }\r | |
3467 | \r | |
3468 | static int\r | |
3469 | not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r | |
3470 | {\r | |
3471 | int r, i, n;\r | |
3472 | OnigCodePoint pre, from, *data, to = 0;\r | |
3473 | \r | |
3474 | *pbuf = (BBuf* )NULL;\r | |
3475 | if (IS_NULL(bbuf)) {\r | |
3476 | set_all:\r | |
3477 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3478 | }\r | |
3479 | \r | |
3480 | data = (OnigCodePoint* )(bbuf->p);\r | |
3481 | GET_CODE_POINT(n, data);\r | |
3482 | data++;\r | |
3483 | if (n <= 0) goto set_all;\r | |
3484 | \r | |
3485 | r = 0;\r | |
3486 | pre = MBCODE_START_POS(enc);\r | |
3487 | for (i = 0; i < n; i++) {\r | |
3488 | from = data[i*2];\r | |
3489 | to = data[i*2+1];\r | |
3490 | if (pre <= from - 1) {\r | |
3491 | r = add_code_range_to_buf(pbuf, pre, from - 1);\r | |
3492 | if (r != 0) return r;\r | |
3493 | }\r | |
3494 | if (to == ~((OnigCodePoint )0)) break;\r | |
3495 | pre = to + 1;\r | |
3496 | }\r | |
3497 | if (to < ~((OnigCodePoint )0)) {\r | |
3498 | r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r | |
3499 | }\r | |
3500 | return r;\r | |
3501 | }\r | |
3502 | \r | |
3503 | #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\\r | |
3504 | BBuf *tbuf; \\r | |
3505 | int tnot; \\r | |
3506 | tnot = not1; not1 = not2; not2 = tnot; \\r | |
3507 | tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r | |
3508 | } while (0)\r | |
3509 | \r | |
3510 | static int\r | |
3511 | or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r | |
3512 | BBuf* bbuf2, int not2, BBuf** pbuf)\r | |
3513 | {\r | |
3514 | int r;\r | |
3515 | OnigCodePoint i, n1, *data1;\r | |
3516 | OnigCodePoint from, to;\r | |
3517 | \r | |
3518 | *pbuf = (BBuf* )NULL;\r | |
3519 | if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r | |
3520 | if (not1 != 0 || not2 != 0)\r | |
3521 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3522 | return 0;\r | |
3523 | }\r | |
3524 | \r | |
3525 | r = 0;\r | |
3526 | if (IS_NULL(bbuf2))\r | |
3527 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3528 | \r | |
3529 | if (IS_NULL(bbuf1)) {\r | |
3530 | if (not1 != 0) {\r | |
3531 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3532 | }\r | |
3533 | else {\r | |
3534 | if (not2 == 0) {\r | |
3535 | return bbuf_clone(pbuf, bbuf2);\r | |
3536 | }\r | |
3537 | else {\r | |
3538 | return not_code_range_buf(enc, bbuf2, pbuf);\r | |
3539 | }\r | |
3540 | }\r | |
3541 | }\r | |
3542 | \r | |
3543 | if (not1 != 0)\r | |
3544 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3545 | \r | |
3546 | data1 = (OnigCodePoint* )(bbuf1->p);\r | |
3547 | GET_CODE_POINT(n1, data1);\r | |
3548 | data1++;\r | |
3549 | \r | |
3550 | if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r | |
3551 | r = bbuf_clone(pbuf, bbuf2);\r | |
3552 | }\r | |
3553 | else if (not1 == 0) { /* 1 OR (not 2) */\r | |
3554 | r = not_code_range_buf(enc, bbuf2, pbuf);\r | |
3555 | }\r | |
3556 | if (r != 0) return r;\r | |
3557 | \r | |
3558 | for (i = 0; i < n1; i++) {\r | |
3559 | from = data1[i*2];\r | |
3560 | to = data1[i*2+1];\r | |
3561 | r = add_code_range_to_buf(pbuf, from, to);\r | |
3562 | if (r != 0) return r;\r | |
3563 | }\r | |
3564 | return 0;\r | |
3565 | }\r | |
3566 | \r | |
3567 | static int\r | |
3568 | and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r | |
3569 | OnigCodePoint* data, int n)\r | |
3570 | {\r | |
3571 | int i, r;\r | |
3572 | OnigCodePoint from2, to2;\r | |
3573 | \r | |
3574 | for (i = 0; i < n; i++) {\r | |
3575 | from2 = data[i*2];\r | |
3576 | to2 = data[i*2+1];\r | |
3577 | if (from2 < from1) {\r | |
3578 | if (to2 < from1) continue;\r | |
3579 | else {\r | |
3580 | from1 = to2 + 1;\r | |
3581 | }\r | |
3582 | }\r | |
3583 | else if (from2 <= to1) {\r | |
3584 | if (to2 < to1) {\r | |
3585 | if (from1 <= from2 - 1) {\r | |
3586 | r = add_code_range_to_buf(pbuf, from1, from2-1);\r | |
3587 | if (r != 0) return r;\r | |
3588 | }\r | |
3589 | from1 = to2 + 1;\r | |
3590 | }\r | |
3591 | else {\r | |
3592 | to1 = from2 - 1;\r | |
3593 | }\r | |
3594 | }\r | |
3595 | else {\r | |
3596 | from1 = from2;\r | |
3597 | }\r | |
3598 | if (from1 > to1) break;\r | |
3599 | }\r | |
3600 | if (from1 <= to1) {\r | |
3601 | r = add_code_range_to_buf(pbuf, from1, to1);\r | |
3602 | if (r != 0) return r;\r | |
3603 | }\r | |
3604 | return 0;\r | |
3605 | }\r | |
3606 | \r | |
3607 | static int\r | |
3608 | and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r | |
3609 | {\r | |
3610 | int r;\r | |
3611 | OnigCodePoint i, j, n1, n2, *data1, *data2;\r | |
3612 | OnigCodePoint from, to, from1, to1, from2, to2;\r | |
3613 | \r | |
3614 | *pbuf = (BBuf* )NULL;\r | |
3615 | if (IS_NULL(bbuf1)) {\r | |
3616 | if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r | |
3617 | return bbuf_clone(pbuf, bbuf2);\r | |
3618 | return 0;\r | |
3619 | }\r | |
3620 | else if (IS_NULL(bbuf2)) {\r | |
3621 | if (not2 != 0)\r | |
3622 | return bbuf_clone(pbuf, bbuf1);\r | |
3623 | return 0;\r | |
3624 | }\r | |
3625 | \r | |
3626 | if (not1 != 0)\r | |
3627 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3628 | \r | |
3629 | data1 = (OnigCodePoint* )(bbuf1->p);\r | |
3630 | data2 = (OnigCodePoint* )(bbuf2->p);\r | |
3631 | GET_CODE_POINT(n1, data1);\r | |
3632 | GET_CODE_POINT(n2, data2);\r | |
3633 | data1++;\r | |
3634 | data2++;\r | |
3635 | \r | |
3636 | if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r | |
3637 | for (i = 0; i < n1; i++) {\r | |
3638 | from1 = data1[i*2];\r | |
3639 | to1 = data1[i*2+1];\r | |
3640 | for (j = 0; j < n2; j++) {\r | |
3641 | from2 = data2[j*2];\r | |
3642 | to2 = data2[j*2+1];\r | |
3643 | if (from2 > to1) break;\r | |
3644 | if (to2 < from1) continue;\r | |
3645 | from = MAX(from1, from2);\r | |
3646 | to = MIN(to1, to2);\r | |
3647 | r = add_code_range_to_buf(pbuf, from, to);\r | |
3648 | if (r != 0) return r;\r | |
3649 | }\r | |
3650 | }\r | |
3651 | }\r | |
3652 | else if (not1 == 0) { /* 1 AND (not 2) */\r | |
3653 | for (i = 0; i < n1; i++) {\r | |
3654 | from1 = data1[i*2];\r | |
3655 | to1 = data1[i*2+1];\r | |
14b0e578 CS |
3656 | r = and_code_range1(pbuf, from1, to1, data2, n2);\r |
3657 | if (r != 0) return r;\r | |
3658 | }\r | |
3659 | }\r | |
3660 | \r | |
3661 | return 0;\r | |
3662 | }\r | |
3663 | \r | |
3664 | static int\r | |
3665 | and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r | |
3666 | {\r | |
3667 | int r, not1, not2;\r | |
3668 | BBuf *buf1, *buf2, *pbuf;\r | |
3669 | BitSetRef bsr1, bsr2;\r | |
3670 | BitSet bs1, bs2;\r | |
3671 | \r | |
3672 | not1 = IS_NCCLASS_NOT(dest);\r | |
3673 | bsr1 = dest->bs;\r | |
3674 | buf1 = dest->mbuf;\r | |
3675 | not2 = IS_NCCLASS_NOT(cc);\r | |
3676 | bsr2 = cc->bs;\r | |
3677 | buf2 = cc->mbuf;\r | |
3678 | \r | |
3679 | if (not1 != 0) {\r | |
3680 | bitset_invert_to(bsr1, bs1);\r | |
3681 | bsr1 = bs1;\r | |
3682 | }\r | |
3683 | if (not2 != 0) {\r | |
3684 | bitset_invert_to(bsr2, bs2);\r | |
3685 | bsr2 = bs2;\r | |
3686 | }\r | |
3687 | bitset_and(bsr1, bsr2);\r | |
3688 | if (bsr1 != dest->bs) {\r | |
3689 | bitset_copy(dest->bs, bsr1);\r | |
14b0e578 CS |
3690 | }\r |
3691 | if (not1 != 0) {\r | |
3692 | bitset_invert(dest->bs);\r | |
3693 | }\r | |
3694 | \r | |
3695 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
3696 | if (not1 != 0 && not2 != 0) {\r | |
3697 | r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r | |
3698 | }\r | |
3699 | else {\r | |
3700 | r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r | |
3701 | if (r == 0 && not1 != 0) {\r | |
b602265d DG |
3702 | BBuf *tbuf;\r |
3703 | r = not_code_range_buf(enc, pbuf, &tbuf);\r | |
3704 | if (r != 0) {\r | |
3705 | bbuf_free(pbuf);\r | |
3706 | return r;\r | |
3707 | }\r | |
3708 | bbuf_free(pbuf);\r | |
3709 | pbuf = tbuf;\r | |
14b0e578 CS |
3710 | }\r |
3711 | }\r | |
3712 | if (r != 0) return r;\r | |
3713 | \r | |
3714 | dest->mbuf = pbuf;\r | |
3715 | bbuf_free(buf1);\r | |
3716 | return r;\r | |
3717 | }\r | |
3718 | return 0;\r | |
3719 | }\r | |
3720 | \r | |
3721 | static int\r | |
3722 | or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r | |
3723 | {\r | |
3724 | int r, not1, not2;\r | |
3725 | BBuf *buf1, *buf2, *pbuf;\r | |
3726 | BitSetRef bsr1, bsr2;\r | |
3727 | BitSet bs1, bs2;\r | |
3728 | \r | |
3729 | not1 = IS_NCCLASS_NOT(dest);\r | |
3730 | bsr1 = dest->bs;\r | |
3731 | buf1 = dest->mbuf;\r | |
3732 | not2 = IS_NCCLASS_NOT(cc);\r | |
3733 | bsr2 = cc->bs;\r | |
3734 | buf2 = cc->mbuf;\r | |
3735 | \r | |
3736 | if (not1 != 0) {\r | |
3737 | bitset_invert_to(bsr1, bs1);\r | |
3738 | bsr1 = bs1;\r | |
3739 | }\r | |
3740 | if (not2 != 0) {\r | |
3741 | bitset_invert_to(bsr2, bs2);\r | |
3742 | bsr2 = bs2;\r | |
3743 | }\r | |
3744 | bitset_or(bsr1, bsr2);\r | |
3745 | if (bsr1 != dest->bs) {\r | |
3746 | bitset_copy(dest->bs, bsr1);\r | |
14b0e578 CS |
3747 | }\r |
3748 | if (not1 != 0) {\r | |
3749 | bitset_invert(dest->bs);\r | |
3750 | }\r | |
3751 | \r | |
3752 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
3753 | if (not1 != 0 && not2 != 0) {\r | |
3754 | r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r | |
3755 | }\r | |
3756 | else {\r | |
3757 | r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r | |
3758 | if (r == 0 && not1 != 0) {\r | |
b602265d DG |
3759 | BBuf *tbuf;\r |
3760 | r = not_code_range_buf(enc, pbuf, &tbuf);\r | |
3761 | if (r != 0) {\r | |
3762 | bbuf_free(pbuf);\r | |
3763 | return r;\r | |
3764 | }\r | |
3765 | bbuf_free(pbuf);\r | |
3766 | pbuf = tbuf;\r | |
14b0e578 CS |
3767 | }\r |
3768 | }\r | |
3769 | if (r != 0) return r;\r | |
3770 | \r | |
3771 | dest->mbuf = pbuf;\r | |
3772 | bbuf_free(buf1);\r | |
3773 | return r;\r | |
3774 | }\r | |
3775 | else\r | |
3776 | return 0;\r | |
3777 | }\r | |
3778 | \r | |
b602265d DG |
3779 | static OnigCodePoint\r |
3780 | conv_backslash_value(OnigCodePoint c, ScanEnv* env)\r | |
14b0e578 CS |
3781 | {\r |
3782 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r | |
3783 | switch (c) {\r | |
3784 | case 'n': return '\n';\r | |
3785 | case 't': return '\t';\r | |
3786 | case 'r': return '\r';\r | |
3787 | case 'f': return '\f';\r | |
3788 | case 'a': return '\007';\r | |
3789 | case 'b': return '\010';\r | |
3790 | case 'e': return '\033';\r | |
3791 | case 'v':\r | |
3792 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r | |
b602265d | 3793 | return '\v';\r |
14b0e578 CS |
3794 | break;\r |
3795 | \r | |
3796 | default:\r | |
3797 | break;\r | |
3798 | }\r | |
3799 | }\r | |
3800 | return c;\r | |
3801 | }\r | |
3802 | \r | |
3803 | static int\r | |
3804 | is_invalid_quantifier_target(Node* node)\r | |
3805 | {\r | |
b602265d DG |
3806 | switch (NODE_TYPE(node)) {\r |
3807 | case NODE_ANCHOR:\r | |
3808 | case NODE_GIMMICK:\r | |
14b0e578 CS |
3809 | return 1;\r |
3810 | break;\r | |
3811 | \r | |
b602265d | 3812 | case NODE_ENCLOSURE:\r |
14b0e578 | 3813 | /* allow enclosed elements */\r |
b602265d | 3814 | /* return is_invalid_quantifier_target(NODE_BODY(node)); */\r |
14b0e578 CS |
3815 | break;\r |
3816 | \r | |
b602265d | 3817 | case NODE_LIST:\r |
14b0e578 | 3818 | do {\r |
b602265d DG |
3819 | if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;\r |
3820 | } while (IS_NOT_NULL(node = NODE_CDR(node)));\r | |
14b0e578 CS |
3821 | return 0;\r |
3822 | break;\r | |
3823 | \r | |
b602265d | 3824 | case NODE_ALT:\r |
14b0e578 | 3825 | do {\r |
b602265d DG |
3826 | if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;\r |
3827 | } while (IS_NOT_NULL(node = NODE_CDR(node)));\r | |
14b0e578 CS |
3828 | break;\r |
3829 | \r | |
3830 | default:\r | |
3831 | break;\r | |
3832 | }\r | |
3833 | return 0;\r | |
3834 | }\r | |
3835 | \r | |
3836 | /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r | |
3837 | static int\r | |
b602265d | 3838 | quantifier_type_num(QuantNode* q)\r |
14b0e578 CS |
3839 | {\r |
3840 | if (q->greedy) {\r | |
3841 | if (q->lower == 0) {\r | |
3842 | if (q->upper == 1) return 0;\r | |
3843 | else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r | |
3844 | }\r | |
3845 | else if (q->lower == 1) {\r | |
3846 | if (IS_REPEAT_INFINITE(q->upper)) return 2;\r | |
3847 | }\r | |
3848 | }\r | |
3849 | else {\r | |
3850 | if (q->lower == 0) {\r | |
3851 | if (q->upper == 1) return 3;\r | |
3852 | else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r | |
3853 | }\r | |
3854 | else if (q->lower == 1) {\r | |
3855 | if (IS_REPEAT_INFINITE(q->upper)) return 5;\r | |
3856 | }\r | |
3857 | }\r | |
3858 | return -1;\r | |
3859 | }\r | |
3860 | \r | |
3861 | \r | |
3862 | enum ReduceType {\r | |
3863 | RQ_ASIS = 0, /* as is */\r | |
3864 | RQ_DEL = 1, /* delete parent */\r | |
3865 | RQ_A, /* to '*' */\r | |
3866 | RQ_AQ, /* to '*?' */\r | |
3867 | RQ_QQ, /* to '??' */\r | |
3868 | RQ_P_QQ, /* to '+)??' */\r | |
3869 | RQ_PQ_Q /* to '+?)?' */\r | |
3870 | };\r | |
3871 | \r | |
3872 | static enum ReduceType ReduceTypeTable[6][6] = {\r | |
3873 | {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r | |
3874 | {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r | |
3875 | {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r | |
3876 | {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r | |
3877 | {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r | |
3878 | {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r | |
3879 | };\r | |
3880 | \r | |
3881 | extern void\r | |
3882 | onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r | |
3883 | {\r | |
3884 | int pnum, cnum;\r | |
b602265d DG |
3885 | QuantNode *p, *c;\r |
3886 | \r | |
3887 | p = QUANT_(pnode);\r | |
3888 | c = QUANT_(cnode);\r | |
3889 | pnum = quantifier_type_num(p);\r | |
3890 | cnum = quantifier_type_num(c);\r | |
3891 | if (pnum < 0 || cnum < 0) {\r | |
3892 | if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {\r | |
3893 | if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {\r | |
3894 | int n = positive_int_multiply(p->lower, c->lower);\r | |
3895 | if (n >= 0) {\r | |
3896 | p->lower = p->upper = n;\r | |
3897 | NODE_BODY(pnode) = NODE_BODY(cnode);\r | |
3898 | goto remove_cnode;\r | |
3899 | }\r | |
3900 | }\r | |
3901 | }\r | |
14b0e578 | 3902 | \r |
b602265d DG |
3903 | return ;\r |
3904 | }\r | |
14b0e578 CS |
3905 | \r |
3906 | switch(ReduceTypeTable[cnum][pnum]) {\r | |
3907 | case RQ_DEL:\r | |
b602265d | 3908 | *pnode = *cnode;\r |
14b0e578 CS |
3909 | break;\r |
3910 | case RQ_A:\r | |
b602265d | 3911 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3912 | p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r |
3913 | break;\r | |
3914 | case RQ_AQ:\r | |
b602265d | 3915 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3916 | p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r |
3917 | break;\r | |
3918 | case RQ_QQ:\r | |
b602265d | 3919 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3920 | p->lower = 0; p->upper = 1; p->greedy = 0;\r |
3921 | break;\r | |
3922 | case RQ_P_QQ:\r | |
b602265d | 3923 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3924 | p->lower = 0; p->upper = 1; p->greedy = 0;\r |
3925 | c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r | |
3926 | return ;\r | |
3927 | break;\r | |
3928 | case RQ_PQ_Q:\r | |
b602265d | 3929 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3930 | p->lower = 0; p->upper = 1; p->greedy = 1;\r |
3931 | c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r | |
3932 | return ;\r | |
3933 | break;\r | |
3934 | case RQ_ASIS:\r | |
b602265d | 3935 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3936 | return ;\r |
3937 | break;\r | |
3938 | }\r | |
3939 | \r | |
b602265d DG |
3940 | remove_cnode:\r |
3941 | NODE_BODY(cnode) = NULL_NODE;\r | |
14b0e578 CS |
3942 | onig_node_free(cnode);\r |
3943 | }\r | |
3944 | \r | |
b602265d DG |
3945 | static int\r |
3946 | node_new_general_newline(Node** node, ScanEnv* env)\r | |
3947 | {\r | |
3948 | int r;\r | |
3949 | int dlen, alen;\r | |
3950 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];\r | |
3951 | Node* crnl;\r | |
3952 | Node* ncc;\r | |
3953 | Node* x;\r | |
3954 | CClassNode* cc;\r | |
3955 | \r | |
3956 | dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);\r | |
3957 | if (dlen < 0) return dlen;\r | |
3958 | alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);\r | |
3959 | if (alen < 0) return alen;\r | |
3960 | \r | |
3961 | crnl = node_new_str_raw(buf, buf + dlen + alen);\r | |
3962 | CHECK_NULL_RETURN_MEMERR(crnl);\r | |
3963 | \r | |
3964 | ncc = node_new_cclass();\r | |
3965 | if (IS_NULL(ncc)) goto err2;\r | |
3966 | \r | |
3967 | cc = CCLASS_(ncc);\r | |
3968 | if (dlen == 1) {\r | |
3969 | bitset_set_range(cc->bs, 0x0a, 0x0d);\r | |
3970 | }\r | |
3971 | else {\r | |
3972 | r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);\r | |
3973 | if (r != 0) {\r | |
3974 | err1:\r | |
3975 | onig_node_free(ncc);\r | |
3976 | err2:\r | |
3977 | onig_node_free(crnl);\r | |
3978 | return ONIGERR_MEMORY;\r | |
3979 | }\r | |
3980 | }\r | |
3981 | \r | |
3982 | if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {\r | |
3983 | r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);\r | |
3984 | if (r != 0) goto err1;\r | |
3985 | r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);\r | |
3986 | if (r != 0) goto err1;\r | |
3987 | }\r | |
3988 | \r | |
3989 | x = node_new_enclosure_if_else(crnl, 0, ncc);\r | |
3990 | if (IS_NULL(x)) goto err1;\r | |
3991 | \r | |
3992 | *node = x;\r | |
3993 | return 0;\r | |
3994 | }\r | |
14b0e578 CS |
3995 | \r |
3996 | enum TokenSyms {\r | |
3997 | TK_EOT = 0, /* end of token */\r | |
3998 | TK_RAW_BYTE = 1,\r | |
3999 | TK_CHAR,\r | |
4000 | TK_STRING,\r | |
4001 | TK_CODE_POINT,\r | |
4002 | TK_ANYCHAR,\r | |
4003 | TK_CHAR_TYPE,\r | |
4004 | TK_BACKREF,\r | |
4005 | TK_CALL,\r | |
4006 | TK_ANCHOR,\r | |
4007 | TK_OP_REPEAT,\r | |
4008 | TK_INTERVAL,\r | |
4009 | TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r | |
4010 | TK_ALT,\r | |
4011 | TK_SUBEXP_OPEN,\r | |
4012 | TK_SUBEXP_CLOSE,\r | |
4013 | TK_CC_OPEN,\r | |
4014 | TK_QUOTE_OPEN,\r | |
4015 | TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r | |
b602265d DG |
4016 | TK_KEEP, /* \K */\r |
4017 | TK_GENERAL_NEWLINE, /* \R */\r | |
4018 | TK_NO_NEWLINE, /* \N */\r | |
4019 | TK_TRUE_ANYCHAR, /* \O */\r | |
4020 | TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */\r | |
4021 | \r | |
14b0e578 CS |
4022 | /* in cc */\r |
4023 | TK_CC_CLOSE,\r | |
4024 | TK_CC_RANGE,\r | |
4025 | TK_POSIX_BRACKET_OPEN,\r | |
4026 | TK_CC_AND, /* && */\r | |
4027 | TK_CC_CC_OPEN /* [ */\r | |
4028 | };\r | |
4029 | \r | |
4030 | typedef struct {\r | |
4031 | enum TokenSyms type;\r | |
4032 | int escaped;\r | |
4033 | int base; /* is number: 8, 16 (used in [....]) */\r | |
4034 | UChar* backp;\r | |
4035 | union {\r | |
4036 | UChar* s;\r | |
4037 | int c;\r | |
4038 | OnigCodePoint code;\r | |
4039 | int anchor;\r | |
4040 | int subtype;\r | |
4041 | struct {\r | |
4042 | int lower;\r | |
4043 | int upper;\r | |
4044 | int greedy;\r | |
4045 | int possessive;\r | |
4046 | } repeat;\r | |
4047 | struct {\r | |
4048 | int num;\r | |
4049 | int ref1;\r | |
4050 | int* refs;\r | |
4051 | int by_name;\r | |
4052 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
4053 | int exist_level;\r | |
4054 | int level; /* \k<name+n> */\r | |
4055 | #endif\r | |
4056 | } backref;\r | |
4057 | struct {\r | |
4058 | UChar* name;\r | |
4059 | UChar* name_end;\r | |
4060 | int gnum;\r | |
b602265d | 4061 | int by_number;\r |
14b0e578 CS |
4062 | } call;\r |
4063 | struct {\r | |
4064 | int ctype;\r | |
4065 | int not;\r | |
4066 | } prop;\r | |
4067 | } u;\r | |
4068 | } OnigToken;\r | |
4069 | \r | |
4070 | \r | |
4071 | static int\r | |
4072 | fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r | |
4073 | {\r | |
4074 | int low, up, syn_allow, non_low = 0;\r | |
4075 | int r = 0;\r | |
4076 | OnigCodePoint c;\r | |
4077 | OnigEncoding enc = env->enc;\r | |
4078 | UChar* p = *src;\r | |
4079 | PFETCH_READY;\r | |
4080 | \r | |
4081 | syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r | |
4082 | \r | |
4083 | if (PEND) {\r | |
4084 | if (syn_allow)\r | |
4085 | return 1; /* "....{" : OK! */\r | |
4086 | else\r | |
4087 | return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r | |
4088 | }\r | |
4089 | \r | |
4090 | if (! syn_allow) {\r | |
4091 | c = PPEEK;\r | |
4092 | if (c == ')' || c == '(' || c == '|') {\r | |
4093 | return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r | |
4094 | }\r | |
4095 | }\r | |
4096 | \r | |
4097 | low = onig_scan_unsigned_number(&p, end, env->enc);\r | |
4098 | if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4099 | if (low > ONIG_MAX_REPEAT_NUM)\r | |
4100 | return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4101 | \r | |
4102 | if (p == *src) { /* can't read low */\r | |
4103 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r | |
4104 | /* allow {,n} as {0,n} */\r | |
4105 | low = 0;\r | |
4106 | non_low = 1;\r | |
4107 | }\r | |
4108 | else\r | |
4109 | goto invalid;\r | |
4110 | }\r | |
4111 | \r | |
4112 | if (PEND) goto invalid;\r | |
4113 | PFETCH(c);\r | |
4114 | if (c == ',') {\r | |
4115 | UChar* prev = p;\r | |
4116 | up = onig_scan_unsigned_number(&p, end, env->enc);\r | |
4117 | if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4118 | if (up > ONIG_MAX_REPEAT_NUM)\r | |
4119 | return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4120 | \r | |
4121 | if (p == prev) {\r | |
4122 | if (non_low != 0)\r | |
b602265d | 4123 | goto invalid;\r |
14b0e578 CS |
4124 | up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r |
4125 | }\r | |
4126 | }\r | |
4127 | else {\r | |
4128 | if (non_low != 0)\r | |
4129 | goto invalid;\r | |
4130 | \r | |
4131 | PUNFETCH;\r | |
4132 | up = low; /* {n} : exact n times */\r | |
4133 | r = 2; /* fixed */\r | |
4134 | }\r | |
4135 | \r | |
4136 | if (PEND) goto invalid;\r | |
4137 | PFETCH(c);\r | |
4138 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r | |
4139 | if (c != MC_ESC(env->syntax)) goto invalid;\r | |
4140 | PFETCH(c);\r | |
4141 | }\r | |
4142 | if (c != '}') goto invalid;\r | |
4143 | \r | |
4144 | if (!IS_REPEAT_INFINITE(up) && low > up) {\r | |
4145 | return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r | |
4146 | }\r | |
4147 | \r | |
4148 | tok->type = TK_INTERVAL;\r | |
4149 | tok->u.repeat.lower = low;\r | |
4150 | tok->u.repeat.upper = up;\r | |
4151 | *src = p;\r | |
4152 | return r; /* 0: normal {n,m}, 2: fixed {n} */\r | |
4153 | \r | |
4154 | invalid:\r | |
b602265d DG |
4155 | if (syn_allow) {\r |
4156 | /* *src = p; */ /* !!! Don't do this line !!! */\r | |
14b0e578 | 4157 | return 1; /* OK */\r |
b602265d | 4158 | }\r |
14b0e578 CS |
4159 | else\r |
4160 | return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r | |
4161 | }\r | |
4162 | \r | |
4163 | /* \M-, \C-, \c, or \... */\r | |
4164 | static int\r | |
b602265d | 4165 | fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)\r |
14b0e578 CS |
4166 | {\r |
4167 | int v;\r | |
4168 | OnigCodePoint c;\r | |
4169 | OnigEncoding enc = env->enc;\r | |
4170 | UChar* p = *src;\r | |
4171 | \r | |
4172 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4173 | \r | |
4174 | PFETCH_S(c);\r | |
4175 | switch (c) {\r | |
4176 | case 'M':\r | |
4177 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r | |
4178 | if (PEND) return ONIGERR_END_PATTERN_AT_META;\r | |
4179 | PFETCH_S(c);\r | |
4180 | if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r | |
4181 | if (PEND) return ONIGERR_END_PATTERN_AT_META;\r | |
4182 | PFETCH_S(c);\r | |
4183 | if (c == MC_ESC(env->syntax)) {\r | |
b602265d | 4184 | v = fetch_escaped_value(&p, end, env, &c);\r |
14b0e578 | 4185 | if (v < 0) return v;\r |
14b0e578 CS |
4186 | }\r |
4187 | c = ((c & 0xff) | 0x80);\r | |
4188 | }\r | |
4189 | else\r | |
4190 | goto backslash;\r | |
4191 | break;\r | |
4192 | \r | |
4193 | case 'C':\r | |
4194 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r | |
4195 | if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r | |
4196 | PFETCH_S(c);\r | |
4197 | if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r | |
4198 | goto control;\r | |
4199 | }\r | |
4200 | else\r | |
4201 | goto backslash;\r | |
4202 | \r | |
4203 | case 'c':\r | |
4204 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r | |
4205 | control:\r | |
4206 | if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r | |
4207 | PFETCH_S(c);\r | |
4208 | if (c == '?') {\r | |
4209 | c = 0177;\r | |
4210 | }\r | |
4211 | else {\r | |
4212 | if (c == MC_ESC(env->syntax)) {\r | |
b602265d | 4213 | v = fetch_escaped_value(&p, end, env, &c);\r |
14b0e578 | 4214 | if (v < 0) return v;\r |
14b0e578 CS |
4215 | }\r |
4216 | c &= 0x9f;\r | |
4217 | }\r | |
4218 | break;\r | |
4219 | }\r | |
4220 | /* fall through */\r | |
4221 | \r | |
4222 | default:\r | |
4223 | {\r | |
4224 | backslash:\r | |
4225 | c = conv_backslash_value(c, env);\r | |
4226 | }\r | |
4227 | break;\r | |
4228 | }\r | |
4229 | \r | |
4230 | *src = p;\r | |
b602265d DG |
4231 | *val = c;\r |
4232 | return 0;\r | |
14b0e578 CS |
4233 | }\r |
4234 | \r | |
4235 | static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r | |
4236 | \r | |
4237 | static OnigCodePoint\r | |
4238 | get_name_end_code_point(OnigCodePoint start)\r | |
4239 | {\r | |
4240 | switch (start) {\r | |
b602265d | 4241 | case '<': return (OnigCodePoint )'>'; break;\r |
14b0e578 | 4242 | case '\'': return (OnigCodePoint )'\''; break;\r |
b602265d | 4243 | case '(': return (OnigCodePoint )')'; break;\r |
14b0e578 CS |
4244 | default:\r |
4245 | break;\r | |
4246 | }\r | |
4247 | \r | |
4248 | return (OnigCodePoint )0;\r | |
4249 | }\r | |
4250 | \r | |
b602265d DG |
4251 | enum REF_NUM {\r |
4252 | IS_NOT_NUM = 0,\r | |
4253 | IS_ABS_NUM = 1,\r | |
4254 | IS_REL_NUM = 2\r | |
4255 | };\r | |
4256 | \r | |
14b0e578 CS |
4257 | #ifdef USE_BACKREF_WITH_LEVEL\r |
4258 | /*\r | |
4259 | \k<name+n>, \k<name-n>\r | |
4260 | \k<num+n>, \k<num-n>\r | |
4261 | \k<-num+n>, \k<-num-n>\r | |
b602265d | 4262 | \k<+num+n>, \k<+num-n>\r |
14b0e578 CS |
4263 | */\r |
4264 | static int\r | |
4265 | fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r | |
b602265d DG |
4266 | UChar** rname_end, ScanEnv* env,\r |
4267 | int* rback_num, int* rlevel, enum REF_NUM* num_type)\r | |
14b0e578 | 4268 | {\r |
b602265d DG |
4269 | int r, sign, exist_level;\r |
4270 | int digit_count;\r | |
14b0e578 CS |
4271 | OnigCodePoint end_code;\r |
4272 | OnigCodePoint c = 0;\r | |
4273 | OnigEncoding enc = env->enc;\r | |
4274 | UChar *name_end;\r | |
4275 | UChar *pnum_head;\r | |
4276 | UChar *p = *src;\r | |
4277 | PFETCH_READY;\r | |
4278 | \r | |
4279 | *rback_num = 0;\r | |
b602265d DG |
4280 | exist_level = 0;\r |
4281 | *num_type = IS_NOT_NUM;\r | |
14b0e578 CS |
4282 | sign = 1;\r |
4283 | pnum_head = *src;\r | |
4284 | \r | |
4285 | end_code = get_name_end_code_point(start_code);\r | |
4286 | \r | |
b602265d | 4287 | digit_count = 0;\r |
14b0e578 CS |
4288 | name_end = end;\r |
4289 | r = 0;\r | |
4290 | if (PEND) {\r | |
4291 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4292 | }\r | |
4293 | else {\r | |
4294 | PFETCH(c);\r | |
4295 | if (c == end_code)\r | |
4296 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4297 | \r | |
b602265d DG |
4298 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r |
4299 | *num_type = IS_ABS_NUM;\r | |
4300 | digit_count++;\r | |
14b0e578 CS |
4301 | }\r |
4302 | else if (c == '-') {\r | |
b602265d | 4303 | *num_type = IS_REL_NUM;\r |
14b0e578 CS |
4304 | sign = -1;\r |
4305 | pnum_head = p;\r | |
4306 | }\r | |
b602265d DG |
4307 | else if (c == '+') {\r |
4308 | *num_type = IS_REL_NUM;\r | |
4309 | sign = 1;\r | |
4310 | pnum_head = p;\r | |
4311 | }\r | |
14b0e578 CS |
4312 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r |
4313 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4314 | }\r | |
4315 | }\r | |
4316 | \r | |
4317 | while (!PEND) {\r | |
4318 | name_end = p;\r | |
4319 | PFETCH(c);\r | |
4320 | if (c == end_code || c == ')' || c == '+' || c == '-') {\r | |
b602265d DG |
4321 | if (*num_type != IS_NOT_NUM && digit_count == 0)\r |
4322 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4323 | break;\r |
4324 | }\r | |
4325 | \r | |
b602265d DG |
4326 | if (*num_type != IS_NOT_NUM) {\r |
4327 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
4328 | digit_count++;\r | |
14b0e578 CS |
4329 | }\r |
4330 | else {\r | |
4331 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
b602265d | 4332 | *num_type = IS_NOT_NUM;\r |
14b0e578 CS |
4333 | }\r |
4334 | }\r | |
4335 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r | |
4336 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4337 | }\r | |
4338 | }\r | |
4339 | \r | |
4340 | if (r == 0 && c != end_code) {\r | |
4341 | if (c == '+' || c == '-') {\r | |
4342 | int level;\r | |
4343 | int flag = (c == '-' ? -1 : 1);\r | |
4344 | \r | |
b602265d DG |
4345 | if (PEND) {\r |
4346 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4347 | goto end;\r | |
4348 | }\r | |
14b0e578 | 4349 | PFETCH(c);\r |
b602265d | 4350 | if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;\r |
14b0e578 CS |
4351 | PUNFETCH;\r |
4352 | level = onig_scan_unsigned_number(&p, end, enc);\r | |
4353 | if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
4354 | *rlevel = (level * flag);\r | |
4355 | exist_level = 1;\r | |
4356 | \r | |
b602265d DG |
4357 | if (!PEND) {\r |
4358 | PFETCH(c);\r | |
4359 | if (c == end_code)\r | |
4360 | goto end;\r | |
4361 | }\r | |
14b0e578 CS |
4362 | }\r |
4363 | \r | |
4364 | err:\r | |
14b0e578 | 4365 | name_end = end;\r |
b602265d DG |
4366 | err2:\r |
4367 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4368 | }\r |
4369 | \r | |
4370 | end:\r | |
4371 | if (r == 0) {\r | |
b602265d | 4372 | if (*num_type != IS_NOT_NUM) {\r |
14b0e578 CS |
4373 | *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r |
4374 | if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
b602265d DG |
4375 | else if (*rback_num == 0) {\r |
4376 | if (*num_type == IS_REL_NUM)\r | |
4377 | goto err2;\r | |
4378 | }\r | |
14b0e578 CS |
4379 | \r |
4380 | *rback_num *= sign;\r | |
4381 | }\r | |
4382 | \r | |
4383 | *rname_end = name_end;\r | |
4384 | *src = p;\r | |
4385 | return (exist_level ? 1 : 0);\r | |
4386 | }\r | |
4387 | else {\r | |
4388 | onig_scan_env_set_error_string(env, r, *src, name_end);\r | |
4389 | return r;\r | |
4390 | }\r | |
4391 | }\r | |
4392 | #endif /* USE_BACKREF_WITH_LEVEL */\r | |
4393 | \r | |
4394 | /*\r | |
b602265d | 4395 | ref: 0 -> define name (don't allow number name)\r |
14b0e578 CS |
4396 | 1 -> reference name (allow number name)\r |
4397 | */\r | |
4398 | static int\r | |
4399 | fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r | |
b602265d DG |
4400 | UChar** rname_end, ScanEnv* env, int* rback_num,\r |
4401 | enum REF_NUM* num_type, int ref)\r | |
14b0e578 | 4402 | {\r |
b602265d DG |
4403 | int r, sign;\r |
4404 | int digit_count;\r | |
14b0e578 CS |
4405 | OnigCodePoint end_code;\r |
4406 | OnigCodePoint c = 0;\r | |
4407 | OnigEncoding enc = env->enc;\r | |
4408 | UChar *name_end;\r | |
4409 | UChar *pnum_head;\r | |
4410 | UChar *p = *src;\r | |
4411 | \r | |
4412 | *rback_num = 0;\r | |
4413 | \r | |
4414 | end_code = get_name_end_code_point(start_code);\r | |
4415 | \r | |
b602265d | 4416 | digit_count = 0;\r |
14b0e578 CS |
4417 | name_end = end;\r |
4418 | pnum_head = *src;\r | |
4419 | r = 0;\r | |
b602265d | 4420 | *num_type = IS_NOT_NUM;\r |
14b0e578 CS |
4421 | sign = 1;\r |
4422 | if (PEND) {\r | |
4423 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4424 | }\r | |
4425 | else {\r | |
4426 | PFETCH_S(c);\r | |
4427 | if (c == end_code)\r | |
4428 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4429 | \r | |
b602265d | 4430 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r |
14b0e578 | 4431 | if (ref == 1)\r |
b602265d | 4432 | *num_type = IS_ABS_NUM;\r |
14b0e578 CS |
4433 | else {\r |
4434 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 | 4435 | }\r |
b602265d | 4436 | digit_count++;\r |
14b0e578 CS |
4437 | }\r |
4438 | else if (c == '-') {\r | |
4439 | if (ref == 1) {\r | |
b602265d | 4440 | *num_type = IS_REL_NUM;\r |
14b0e578 CS |
4441 | sign = -1;\r |
4442 | pnum_head = p;\r | |
4443 | }\r | |
4444 | else {\r | |
4445 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4446 | }\r |
4447 | }\r | |
b602265d DG |
4448 | else if (c == '+') {\r |
4449 | if (ref == 1) {\r | |
4450 | *num_type = IS_REL_NUM;\r | |
4451 | sign = 1;\r | |
4452 | pnum_head = p;\r | |
14b0e578 CS |
4453 | }\r |
4454 | else {\r | |
14b0e578 | 4455 | r = ONIGERR_INVALID_GROUP_NAME;\r |
14b0e578 | 4456 | }\r |
14b0e578 | 4457 | }\r |
b602265d | 4458 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r |
14b0e578 | 4459 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r |
b602265d | 4460 | }\r |
14b0e578 CS |
4461 | }\r |
4462 | \r | |
4463 | if (r == 0) {\r | |
b602265d DG |
4464 | while (!PEND) {\r |
4465 | name_end = p;\r | |
4466 | PFETCH_S(c);\r | |
4467 | if (c == end_code || c == ')') {\r | |
4468 | if (*num_type != IS_NOT_NUM && digit_count == 0)\r | |
4469 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4470 | break;\r | |
4471 | }\r | |
4472 | \r | |
4473 | if (*num_type != IS_NOT_NUM) {\r | |
4474 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
4475 | digit_count++;\r | |
4476 | }\r | |
4477 | else {\r | |
4478 | if (!ONIGENC_IS_CODE_WORD(enc, c))\r | |
4479 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4480 | else\r | |
4481 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4482 | \r | |
4483 | *num_type = IS_NOT_NUM;\r | |
4484 | }\r | |
4485 | }\r | |
4486 | else {\r | |
4487 | if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r | |
4488 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4489 | }\r | |
4490 | }\r | |
4491 | }\r | |
4492 | \r | |
4493 | if (c != end_code) {\r | |
14b0e578 CS |
4494 | r = ONIGERR_INVALID_GROUP_NAME;\r |
4495 | goto err;\r | |
4496 | }\r | |
b602265d DG |
4497 | \r |
4498 | if (*num_type != IS_NOT_NUM) {\r | |
4499 | *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r | |
4500 | if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
4501 | else if (*rback_num == 0) {\r | |
4502 | if (*num_type == IS_REL_NUM) {\r | |
4503 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4504 | goto err;\r | |
4505 | }\r | |
4506 | }\r | |
4507 | \r | |
4508 | *rback_num *= sign;\r | |
4509 | }\r | |
14b0e578 CS |
4510 | \r |
4511 | *rname_end = name_end;\r | |
4512 | *src = p;\r | |
4513 | return 0;\r | |
4514 | }\r | |
4515 | else {\r | |
b602265d DG |
4516 | while (!PEND) {\r |
4517 | name_end = p;\r | |
4518 | PFETCH_S(c);\r | |
4519 | if (c == end_code || c == ')')\r | |
4520 | break;\r | |
4521 | }\r | |
4522 | if (PEND)\r | |
4523 | name_end = end;\r | |
4524 | \r | |
14b0e578 CS |
4525 | err:\r |
4526 | onig_scan_env_set_error_string(env, r, *src, name_end);\r | |
4527 | return r;\r | |
4528 | }\r | |
4529 | }\r | |
14b0e578 CS |
4530 | \r |
4531 | static void\r | |
4532 | CC_ESC_WARN(ScanEnv* env, UChar *c)\r | |
4533 | {\r | |
4534 | if (onig_warn == onig_null_warn) return ;\r | |
4535 | \r | |
4536 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r | |
4537 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r | |
4538 | UChar buf[WARN_BUFSIZE];\r | |
4539 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
b602265d DG |
4540 | env->pattern, env->pattern_end,\r |
4541 | (UChar* )"character class has '%s' without escape",\r | |
4542 | c);\r | |
14b0e578 CS |
4543 | (*onig_warn)((char* )buf);\r |
4544 | }\r | |
4545 | }\r | |
4546 | \r | |
4547 | static void\r | |
4548 | CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r | |
4549 | {\r | |
4550 | if (onig_warn == onig_null_warn) return ;\r | |
4551 | \r | |
4552 | if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r | |
4553 | UChar buf[WARN_BUFSIZE];\r | |
4554 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r | |
b602265d DG |
4555 | (env)->pattern, (env)->pattern_end,\r |
4556 | (UChar* )"regular expression has '%s' without escape", c);\r | |
14b0e578 CS |
4557 | (*onig_warn)((char* )buf);\r |
4558 | }\r | |
4559 | }\r | |
4560 | \r | |
4561 | static UChar*\r | |
4562 | find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r | |
b602265d | 4563 | UChar **next, OnigEncoding enc)\r |
14b0e578 CS |
4564 | {\r |
4565 | int i;\r | |
4566 | OnigCodePoint x;\r | |
4567 | UChar *q;\r | |
4568 | UChar *p = from;\r | |
4569 | \r | |
4570 | while (p < to) {\r | |
4571 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r | |
4572 | q = p + enclen(enc, p);\r | |
4573 | if (x == s[0]) {\r | |
4574 | for (i = 1; i < n && q < to; i++) {\r | |
b602265d DG |
4575 | x = ONIGENC_MBC_TO_CODE(enc, q, to);\r |
4576 | if (x != s[i]) break;\r | |
4577 | q += enclen(enc, q);\r | |
14b0e578 CS |
4578 | }\r |
4579 | if (i >= n) {\r | |
b602265d DG |
4580 | if (IS_NOT_NULL(next))\r |
4581 | *next = q;\r | |
4582 | return p;\r | |
14b0e578 CS |
4583 | }\r |
4584 | }\r | |
4585 | p = q;\r | |
4586 | }\r | |
4587 | return NULL_UCHARP;\r | |
4588 | }\r | |
4589 | \r | |
4590 | static int\r | |
4591 | str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r | |
b602265d | 4592 | OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r |
14b0e578 CS |
4593 | {\r |
4594 | int i, in_esc;\r | |
4595 | OnigCodePoint x;\r | |
4596 | UChar *q;\r | |
4597 | UChar *p = from;\r | |
4598 | \r | |
4599 | in_esc = 0;\r | |
4600 | while (p < to) {\r | |
4601 | if (in_esc) {\r | |
4602 | in_esc = 0;\r | |
4603 | p += enclen(enc, p);\r | |
4604 | }\r | |
4605 | else {\r | |
4606 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r | |
4607 | q = p + enclen(enc, p);\r | |
4608 | if (x == s[0]) {\r | |
b602265d DG |
4609 | for (i = 1; i < n && q < to; i++) {\r |
4610 | x = ONIGENC_MBC_TO_CODE(enc, q, to);\r | |
4611 | if (x != s[i]) break;\r | |
4612 | q += enclen(enc, q);\r | |
4613 | }\r | |
4614 | if (i >= n) return 1;\r | |
4615 | p += enclen(enc, p);\r | |
14b0e578 CS |
4616 | }\r |
4617 | else {\r | |
b602265d DG |
4618 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r |
4619 | if (x == bad) return 0;\r | |
4620 | else if (x == MC_ESC(syn)) in_esc = 1;\r | |
4621 | p = q;\r | |
14b0e578 CS |
4622 | }\r |
4623 | }\r | |
4624 | }\r | |
4625 | return 0;\r | |
4626 | }\r | |
4627 | \r | |
4628 | static int\r | |
4629 | fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r | |
4630 | {\r | |
4631 | int num;\r | |
4632 | OnigCodePoint c, c2;\r | |
4633 | OnigSyntaxType* syn = env->syntax;\r | |
4634 | OnigEncoding enc = env->enc;\r | |
4635 | UChar* prev;\r | |
4636 | UChar* p = *src;\r | |
4637 | PFETCH_READY;\r | |
4638 | \r | |
4639 | if (PEND) {\r | |
4640 | tok->type = TK_EOT;\r | |
4641 | return tok->type;\r | |
4642 | }\r | |
4643 | \r | |
4644 | PFETCH(c);\r | |
4645 | tok->type = TK_CHAR;\r | |
4646 | tok->base = 0;\r | |
4647 | tok->u.c = c;\r | |
4648 | tok->escaped = 0;\r | |
4649 | \r | |
4650 | if (c == ']') {\r | |
4651 | tok->type = TK_CC_CLOSE;\r | |
4652 | }\r | |
4653 | else if (c == '-') {\r | |
4654 | tok->type = TK_CC_RANGE;\r | |
4655 | }\r | |
4656 | else if (c == MC_ESC(syn)) {\r | |
4657 | if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r | |
4658 | goto end;\r | |
4659 | \r | |
4660 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4661 | \r | |
4662 | PFETCH(c);\r | |
4663 | tok->escaped = 1;\r | |
4664 | tok->u.c = c;\r | |
4665 | switch (c) {\r | |
4666 | case 'w':\r | |
4667 | tok->type = TK_CHAR_TYPE;\r | |
4668 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4669 | tok->u.prop.not = 0;\r | |
4670 | break;\r | |
4671 | case 'W':\r | |
4672 | tok->type = TK_CHAR_TYPE;\r | |
4673 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4674 | tok->u.prop.not = 1;\r | |
4675 | break;\r | |
4676 | case 'd':\r | |
4677 | tok->type = TK_CHAR_TYPE;\r | |
4678 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
4679 | tok->u.prop.not = 0;\r | |
4680 | break;\r | |
4681 | case 'D':\r | |
4682 | tok->type = TK_CHAR_TYPE;\r | |
4683 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
4684 | tok->u.prop.not = 1;\r | |
4685 | break;\r | |
4686 | case 's':\r | |
4687 | tok->type = TK_CHAR_TYPE;\r | |
4688 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
4689 | tok->u.prop.not = 0;\r | |
4690 | break;\r | |
4691 | case 'S':\r | |
4692 | tok->type = TK_CHAR_TYPE;\r | |
4693 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
4694 | tok->u.prop.not = 1;\r | |
4695 | break;\r | |
4696 | case 'h':\r | |
4697 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
4698 | tok->type = TK_CHAR_TYPE;\r | |
4699 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
4700 | tok->u.prop.not = 0;\r | |
4701 | break;\r | |
4702 | case 'H':\r | |
4703 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
4704 | tok->type = TK_CHAR_TYPE;\r | |
4705 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
4706 | tok->u.prop.not = 1;\r | |
4707 | break;\r | |
4708 | \r | |
4709 | case 'p':\r | |
4710 | case 'P':\r | |
b602265d DG |
4711 | if (PEND) break;\r |
4712 | \r | |
14b0e578 CS |
4713 | c2 = PPEEK;\r |
4714 | if (c2 == '{' &&\r | |
b602265d DG |
4715 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r |
4716 | PINC;\r | |
4717 | tok->type = TK_CHAR_PROPERTY;\r | |
4718 | tok->u.prop.not = (c == 'P' ? 1 : 0);\r | |
4719 | \r | |
4720 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r | |
4721 | PFETCH(c2);\r | |
4722 | if (c2 == '^') {\r | |
4723 | tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r | |
4724 | }\r | |
4725 | else\r | |
4726 | PUNFETCH;\r | |
4727 | }\r | |
4728 | }\r | |
4729 | break;\r | |
4730 | \r | |
4731 | case 'o':\r | |
4732 | if (PEND) break;\r | |
4733 | \r | |
4734 | prev = p;\r | |
4735 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r | |
4736 | PINC;\r | |
4737 | num = scan_unsigned_octal_number(&p, end, 11, enc);\r | |
4738 | if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
4739 | if (!PEND) {\r | |
4740 | c2 = PPEEK;\r | |
4741 | if (IS_CODE_DIGIT_ASCII(enc, c2))\r | |
4742 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r | |
4743 | }\r | |
4744 | \r | |
4745 | if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r | |
4746 | PINC;\r | |
4747 | tok->type = TK_CODE_POINT;\r | |
4748 | tok->base = 8;\r | |
4749 | tok->u.code = (OnigCodePoint )num;\r | |
4750 | }\r | |
4751 | else {\r | |
4752 | /* can't read nothing or invalid format */\r | |
4753 | p = prev;\r | |
4754 | }\r | |
14b0e578 CS |
4755 | }\r |
4756 | break;\r | |
4757 | \r | |
4758 | case 'x':\r | |
4759 | if (PEND) break;\r | |
4760 | \r | |
4761 | prev = p;\r | |
4762 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r | |
b602265d DG |
4763 | PINC;\r |
4764 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r | |
4765 | if (num < 0) {\r | |
4766 | if (num == ONIGERR_TOO_BIG_NUMBER)\r | |
4767 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
4768 | else\r | |
4769 | return num;\r | |
4770 | }\r | |
4771 | if (!PEND) {\r | |
14b0e578 | 4772 | c2 = PPEEK;\r |
b602265d | 4773 | if (IS_CODE_XDIGIT_ASCII(enc, c2))\r |
14b0e578 CS |
4774 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r |
4775 | }\r | |
4776 | \r | |
b602265d DG |
4777 | if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r |
4778 | PINC;\r | |
4779 | tok->type = TK_CODE_POINT;\r | |
4780 | tok->base = 16;\r | |
4781 | tok->u.code = (OnigCodePoint )num;\r | |
4782 | }\r | |
4783 | else {\r | |
4784 | /* can't read nothing or invalid format */\r | |
4785 | p = prev;\r | |
4786 | }\r | |
14b0e578 CS |
4787 | }\r |
4788 | else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r | |
b602265d DG |
4789 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r |
4790 | if (num < 0) return num;\r | |
4791 | if (p == prev) { /* can't read nothing. */\r | |
4792 | num = 0; /* but, it's not error */\r | |
4793 | }\r | |
4794 | tok->type = TK_RAW_BYTE;\r | |
4795 | tok->base = 16;\r | |
4796 | tok->u.c = num;\r | |
14b0e578 CS |
4797 | }\r |
4798 | break;\r | |
4799 | \r | |
4800 | case 'u':\r | |
4801 | if (PEND) break;\r | |
4802 | \r | |
4803 | prev = p;\r | |
4804 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r | |
b602265d DG |
4805 | num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r |
4806 | if (num < 0) return num;\r | |
4807 | if (p == prev) { /* can't read nothing. */\r | |
4808 | num = 0; /* but, it's not error */\r | |
4809 | }\r | |
4810 | tok->type = TK_CODE_POINT;\r | |
4811 | tok->base = 16;\r | |
4812 | tok->u.code = (OnigCodePoint )num;\r | |
14b0e578 CS |
4813 | }\r |
4814 | break;\r | |
4815 | \r | |
4816 | case '0':\r | |
4817 | case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r | |
4818 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r | |
b602265d DG |
4819 | PUNFETCH;\r |
4820 | prev = p;\r | |
4821 | num = scan_unsigned_octal_number(&p, end, 3, enc);\r | |
4822 | if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r | |
4823 | if (p == prev) { /* can't read nothing. */\r | |
4824 | num = 0; /* but, it's not error */\r | |
4825 | }\r | |
4826 | tok->type = TK_RAW_BYTE;\r | |
4827 | tok->base = 8;\r | |
4828 | tok->u.c = num;\r | |
14b0e578 CS |
4829 | }\r |
4830 | break;\r | |
4831 | \r | |
4832 | default:\r | |
4833 | PUNFETCH;\r | |
b602265d | 4834 | num = fetch_escaped_value(&p, end, env, &c2);\r |
14b0e578 | 4835 | if (num < 0) return num;\r |
b602265d DG |
4836 | if (tok->u.c != c2) {\r |
4837 | tok->u.code = c2;\r | |
4838 | tok->type = TK_CODE_POINT;\r | |
14b0e578 CS |
4839 | }\r |
4840 | break;\r | |
4841 | }\r | |
4842 | }\r | |
4843 | else if (c == '[') {\r | |
4844 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r | |
4845 | OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r | |
b602265d | 4846 | tok->backp = p; /* point at '[' is read */\r |
14b0e578 CS |
4847 | PINC;\r |
4848 | if (str_exist_check_with_esc(send, 2, p, end,\r | |
4849 | (OnigCodePoint )']', enc, syn)) {\r | |
b602265d | 4850 | tok->type = TK_POSIX_BRACKET_OPEN;\r |
14b0e578 CS |
4851 | }\r |
4852 | else {\r | |
b602265d DG |
4853 | PUNFETCH;\r |
4854 | goto cc_in_cc;\r | |
14b0e578 CS |
4855 | }\r |
4856 | }\r | |
4857 | else {\r | |
4858 | cc_in_cc:\r | |
4859 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r | |
b602265d | 4860 | tok->type = TK_CC_CC_OPEN;\r |
14b0e578 CS |
4861 | }\r |
4862 | else {\r | |
b602265d | 4863 | CC_ESC_WARN(env, (UChar* )"[");\r |
14b0e578 CS |
4864 | }\r |
4865 | }\r | |
4866 | }\r | |
4867 | else if (c == '&') {\r | |
4868 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r | |
b602265d | 4869 | !PEND && (PPEEK_IS('&'))) {\r |
14b0e578 CS |
4870 | PINC;\r |
4871 | tok->type = TK_CC_AND;\r | |
4872 | }\r | |
4873 | }\r | |
4874 | \r | |
4875 | end:\r | |
4876 | *src = p;\r | |
4877 | return tok->type;\r | |
4878 | }\r | |
4879 | \r | |
4880 | static int\r | |
4881 | fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r | |
4882 | {\r | |
4883 | int r, num;\r | |
4884 | OnigCodePoint c;\r | |
4885 | OnigEncoding enc = env->enc;\r | |
4886 | OnigSyntaxType* syn = env->syntax;\r | |
4887 | UChar* prev;\r | |
4888 | UChar* p = *src;\r | |
4889 | PFETCH_READY;\r | |
4890 | \r | |
4891 | start:\r | |
4892 | if (PEND) {\r | |
4893 | tok->type = TK_EOT;\r | |
4894 | return tok->type;\r | |
4895 | }\r | |
4896 | \r | |
4897 | tok->type = TK_STRING;\r | |
4898 | tok->base = 0;\r | |
4899 | tok->backp = p;\r | |
4900 | \r | |
4901 | PFETCH(c);\r | |
4902 | if (IS_MC_ESC_CODE(c, syn)) {\r | |
4903 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4904 | \r | |
4905 | tok->backp = p;\r | |
4906 | PFETCH(c);\r | |
4907 | \r | |
4908 | tok->u.c = c;\r | |
4909 | tok->escaped = 1;\r | |
4910 | switch (c) {\r | |
4911 | case '*':\r | |
4912 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r | |
4913 | tok->type = TK_OP_REPEAT;\r | |
4914 | tok->u.repeat.lower = 0;\r | |
4915 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
4916 | goto greedy_check;\r | |
4917 | break;\r | |
4918 | \r | |
4919 | case '+':\r | |
4920 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r | |
4921 | tok->type = TK_OP_REPEAT;\r | |
4922 | tok->u.repeat.lower = 1;\r | |
4923 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
4924 | goto greedy_check;\r | |
4925 | break;\r | |
4926 | \r | |
4927 | case '?':\r | |
4928 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r | |
4929 | tok->type = TK_OP_REPEAT;\r | |
4930 | tok->u.repeat.lower = 0;\r | |
4931 | tok->u.repeat.upper = 1;\r | |
4932 | greedy_check:\r | |
4933 | if (!PEND && PPEEK_IS('?') &&\r | |
b602265d DG |
4934 | IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r |
4935 | PFETCH(c);\r | |
4936 | tok->u.repeat.greedy = 0;\r | |
4937 | tok->u.repeat.possessive = 0;\r | |
14b0e578 CS |
4938 | }\r |
4939 | else {\r | |
4940 | possessive_check:\r | |
b602265d DG |
4941 | if (!PEND && PPEEK_IS('+') &&\r |
4942 | ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r | |
4943 | tok->type != TK_INTERVAL) ||\r | |
4944 | (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r | |
4945 | tok->type == TK_INTERVAL))) {\r | |
4946 | PFETCH(c);\r | |
4947 | tok->u.repeat.greedy = 1;\r | |
4948 | tok->u.repeat.possessive = 1;\r | |
4949 | }\r | |
4950 | else {\r | |
4951 | tok->u.repeat.greedy = 1;\r | |
4952 | tok->u.repeat.possessive = 0;\r | |
4953 | }\r | |
14b0e578 CS |
4954 | }\r |
4955 | break;\r | |
4956 | \r | |
4957 | case '{':\r | |
4958 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r | |
4959 | r = fetch_range_quantifier(&p, end, tok, env);\r | |
4960 | if (r < 0) return r; /* error */\r | |
4961 | if (r == 0) goto greedy_check;\r | |
4962 | else if (r == 2) { /* {n} */\r | |
b602265d DG |
4963 | if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r |
4964 | goto possessive_check;\r | |
14b0e578 | 4965 | \r |
b602265d | 4966 | goto greedy_check;\r |
14b0e578 CS |
4967 | }\r |
4968 | /* r == 1 : normal char */\r | |
4969 | break;\r | |
4970 | \r | |
4971 | case '|':\r | |
4972 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r | |
4973 | tok->type = TK_ALT;\r | |
4974 | break;\r | |
4975 | \r | |
4976 | case '(':\r | |
4977 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r | |
4978 | tok->type = TK_SUBEXP_OPEN;\r | |
4979 | break;\r | |
4980 | \r | |
4981 | case ')':\r | |
4982 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r | |
4983 | tok->type = TK_SUBEXP_CLOSE;\r | |
4984 | break;\r | |
4985 | \r | |
4986 | case 'w':\r | |
4987 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r | |
4988 | tok->type = TK_CHAR_TYPE;\r | |
4989 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4990 | tok->u.prop.not = 0;\r | |
4991 | break;\r | |
4992 | \r | |
4993 | case 'W':\r | |
4994 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r | |
4995 | tok->type = TK_CHAR_TYPE;\r | |
4996 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4997 | tok->u.prop.not = 1;\r | |
4998 | break;\r | |
4999 | \r | |
5000 | case 'b':\r | |
5001 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r | |
5002 | tok->type = TK_ANCHOR;\r | |
b602265d | 5003 | tok->u.anchor = ANCHOR_WORD_BOUNDARY;\r |
14b0e578 CS |
5004 | break;\r |
5005 | \r | |
5006 | case 'B':\r | |
5007 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r | |
5008 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5009 | tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;\r |
5010 | break;\r | |
5011 | \r | |
5012 | case 'y':\r | |
5013 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5014 | tok->type = TK_ANCHOR;\r | |
5015 | tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r | |
5016 | break;\r | |
5017 | \r | |
5018 | case 'Y':\r | |
5019 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5020 | tok->type = TK_ANCHOR;\r | |
5021 | tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r | |
14b0e578 CS |
5022 | break;\r |
5023 | \r | |
5024 | #ifdef USE_WORD_BEGIN_END\r | |
5025 | case '<':\r | |
5026 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r | |
5027 | tok->type = TK_ANCHOR;\r | |
5028 | tok->u.anchor = ANCHOR_WORD_BEGIN;\r | |
5029 | break;\r | |
5030 | \r | |
5031 | case '>':\r | |
5032 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r | |
5033 | tok->type = TK_ANCHOR;\r | |
5034 | tok->u.anchor = ANCHOR_WORD_END;\r | |
5035 | break;\r | |
5036 | #endif\r | |
5037 | \r | |
5038 | case 's':\r | |
5039 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r | |
5040 | tok->type = TK_CHAR_TYPE;\r | |
5041 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
5042 | tok->u.prop.not = 0;\r | |
5043 | break;\r | |
5044 | \r | |
5045 | case 'S':\r | |
5046 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r | |
5047 | tok->type = TK_CHAR_TYPE;\r | |
5048 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
5049 | tok->u.prop.not = 1;\r | |
5050 | break;\r | |
5051 | \r | |
5052 | case 'd':\r | |
5053 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r | |
5054 | tok->type = TK_CHAR_TYPE;\r | |
5055 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
5056 | tok->u.prop.not = 0;\r | |
5057 | break;\r | |
5058 | \r | |
5059 | case 'D':\r | |
5060 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r | |
5061 | tok->type = TK_CHAR_TYPE;\r | |
5062 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
5063 | tok->u.prop.not = 1;\r | |
5064 | break;\r | |
5065 | \r | |
5066 | case 'h':\r | |
5067 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
5068 | tok->type = TK_CHAR_TYPE;\r | |
5069 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
5070 | tok->u.prop.not = 0;\r | |
5071 | break;\r | |
5072 | \r | |
5073 | case 'H':\r | |
5074 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
5075 | tok->type = TK_CHAR_TYPE;\r | |
5076 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
5077 | tok->u.prop.not = 1;\r | |
5078 | break;\r | |
5079 | \r | |
b602265d DG |
5080 | case 'K':\r |
5081 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;\r | |
5082 | tok->type = TK_KEEP;\r | |
5083 | break;\r | |
5084 | \r | |
5085 | case 'R':\r | |
5086 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;\r | |
5087 | tok->type = TK_GENERAL_NEWLINE;\r | |
5088 | break;\r | |
5089 | \r | |
5090 | case 'N':\r | |
5091 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r | |
5092 | tok->type = TK_NO_NEWLINE;\r | |
5093 | break;\r | |
5094 | \r | |
5095 | case 'O':\r | |
5096 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r | |
5097 | tok->type = TK_TRUE_ANYCHAR;\r | |
5098 | break;\r | |
5099 | \r | |
5100 | case 'X':\r | |
5101 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5102 | tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;\r | |
5103 | break;\r | |
5104 | \r | |
14b0e578 CS |
5105 | case 'A':\r |
5106 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5107 | begin_buf:\r | |
5108 | tok->type = TK_ANCHOR;\r | |
5109 | tok->u.subtype = ANCHOR_BEGIN_BUF;\r | |
5110 | break;\r | |
5111 | \r | |
5112 | case 'Z':\r | |
5113 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5114 | tok->type = TK_ANCHOR;\r | |
5115 | tok->u.subtype = ANCHOR_SEMI_END_BUF;\r | |
5116 | break;\r | |
5117 | \r | |
5118 | case 'z':\r | |
5119 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5120 | end_buf:\r | |
5121 | tok->type = TK_ANCHOR;\r | |
5122 | tok->u.subtype = ANCHOR_END_BUF;\r | |
5123 | break;\r | |
5124 | \r | |
5125 | case 'G':\r | |
5126 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r | |
5127 | tok->type = TK_ANCHOR;\r | |
5128 | tok->u.subtype = ANCHOR_BEGIN_POSITION;\r | |
5129 | break;\r | |
5130 | \r | |
5131 | case '`':\r | |
5132 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r | |
5133 | goto begin_buf;\r | |
5134 | break;\r | |
5135 | \r | |
5136 | case '\'':\r | |
5137 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r | |
5138 | goto end_buf;\r | |
5139 | break;\r | |
5140 | \r | |
b602265d DG |
5141 | case 'o':\r |
5142 | if (PEND) break;\r | |
5143 | \r | |
5144 | prev = p;\r | |
5145 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r | |
5146 | PINC;\r | |
5147 | num = scan_unsigned_octal_number(&p, end, 11, enc);\r | |
5148 | if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
5149 | if (!PEND) {\r | |
5150 | if (IS_CODE_DIGIT_ASCII(enc, PPEEK))\r | |
5151 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r | |
5152 | }\r | |
5153 | \r | |
5154 | if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r | |
5155 | PINC;\r | |
5156 | tok->type = TK_CODE_POINT;\r | |
5157 | tok->u.code = (OnigCodePoint )num;\r | |
5158 | }\r | |
5159 | else {\r | |
5160 | /* can't read nothing or invalid format */\r | |
5161 | p = prev;\r | |
5162 | }\r | |
5163 | }\r | |
5164 | break;\r | |
5165 | \r | |
14b0e578 CS |
5166 | case 'x':\r |
5167 | if (PEND) break;\r | |
5168 | \r | |
5169 | prev = p;\r | |
5170 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r | |
b602265d DG |
5171 | PINC;\r |
5172 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r | |
5173 | if (num < 0) {\r | |
5174 | if (num == ONIGERR_TOO_BIG_NUMBER)\r | |
5175 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
5176 | else\r | |
5177 | return num;\r | |
5178 | }\r | |
5179 | if (!PEND) {\r | |
5180 | if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))\r | |
14b0e578 CS |
5181 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r |
5182 | }\r | |
5183 | \r | |
b602265d DG |
5184 | if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r |
5185 | PINC;\r | |
5186 | tok->type = TK_CODE_POINT;\r | |
5187 | tok->u.code = (OnigCodePoint )num;\r | |
5188 | }\r | |
5189 | else {\r | |
5190 | /* can't read nothing or invalid format */\r | |
5191 | p = prev;\r | |
5192 | }\r | |
14b0e578 CS |
5193 | }\r |
5194 | else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r | |
b602265d DG |
5195 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r |
5196 | if (num < 0) return num;\r | |
5197 | if (p == prev) { /* can't read nothing. */\r | |
5198 | num = 0; /* but, it's not error */\r | |
5199 | }\r | |
5200 | tok->type = TK_RAW_BYTE;\r | |
5201 | tok->base = 16;\r | |
5202 | tok->u.c = num;\r | |
14b0e578 CS |
5203 | }\r |
5204 | break;\r | |
5205 | \r | |
5206 | case 'u':\r | |
5207 | if (PEND) break;\r | |
5208 | \r | |
5209 | prev = p;\r | |
5210 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r | |
b602265d DG |
5211 | num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r |
5212 | if (num < 0) return num;\r | |
5213 | if (p == prev) { /* can't read nothing. */\r | |
5214 | num = 0; /* but, it's not error */\r | |
5215 | }\r | |
5216 | tok->type = TK_CODE_POINT;\r | |
5217 | tok->base = 16;\r | |
5218 | tok->u.code = (OnigCodePoint )num;\r | |
14b0e578 CS |
5219 | }\r |
5220 | break;\r | |
5221 | \r | |
5222 | case '1': case '2': case '3': case '4':\r | |
5223 | case '5': case '6': case '7': case '8': case '9':\r | |
5224 | PUNFETCH;\r | |
5225 | prev = p;\r | |
5226 | num = onig_scan_unsigned_number(&p, end, enc);\r | |
5227 | if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r | |
5228 | goto skip_backref;\r | |
5229 | }\r | |
5230 | \r | |
5231 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r | |
b602265d DG |
5232 | (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r |
5233 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5234 | if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))\r | |
5235 | return ONIGERR_INVALID_BACKREF;\r | |
5236 | }\r | |
5237 | \r | |
5238 | tok->type = TK_BACKREF;\r | |
5239 | tok->u.backref.num = 1;\r | |
5240 | tok->u.backref.ref1 = num;\r | |
5241 | tok->u.backref.by_name = 0;\r | |
14b0e578 | 5242 | #ifdef USE_BACKREF_WITH_LEVEL\r |
b602265d | 5243 | tok->u.backref.exist_level = 0;\r |
14b0e578 | 5244 | #endif\r |
b602265d | 5245 | break;\r |
14b0e578 CS |
5246 | }\r |
5247 | \r | |
5248 | skip_backref:\r | |
5249 | if (c == '8' || c == '9') {\r | |
b602265d DG |
5250 | /* normal char */\r |
5251 | p = prev; PINC;\r | |
5252 | break;\r | |
14b0e578 CS |
5253 | }\r |
5254 | \r | |
5255 | p = prev;\r | |
5256 | /* fall through */\r | |
5257 | case '0':\r | |
5258 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r | |
b602265d DG |
5259 | prev = p;\r |
5260 | num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r | |
5261 | if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r | |
5262 | if (p == prev) { /* can't read nothing. */\r | |
5263 | num = 0; /* but, it's not error */\r | |
5264 | }\r | |
5265 | tok->type = TK_RAW_BYTE;\r | |
5266 | tok->base = 8;\r | |
5267 | tok->u.c = num;\r | |
14b0e578 CS |
5268 | }\r |
5269 | else if (c != '0') {\r | |
b602265d | 5270 | PINC;\r |
14b0e578 CS |
5271 | }\r |
5272 | break;\r | |
5273 | \r | |
14b0e578 | 5274 | case 'k':\r |
b602265d DG |
5275 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r |
5276 | PFETCH(c);\r | |
5277 | if (c == '<' || c == '\'') {\r | |
5278 | UChar* name_end;\r | |
5279 | int* backs;\r | |
5280 | int back_num;\r | |
5281 | enum REF_NUM num_type;\r | |
14b0e578 | 5282 | \r |
b602265d | 5283 | prev = p;\r |
14b0e578 CS |
5284 | \r |
5285 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
b602265d DG |
5286 | name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r |
5287 | r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r | |
5288 | env, &back_num, &tok->u.backref.level, &num_type);\r | |
5289 | if (r == 1) tok->u.backref.exist_level = 1;\r | |
5290 | else tok->u.backref.exist_level = 0;\r | |
14b0e578 | 5291 | #else\r |
b602265d | 5292 | r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);\r |
14b0e578 | 5293 | #endif\r |
b602265d DG |
5294 | if (r < 0) return r;\r |
5295 | \r | |
5296 | if (num_type != IS_NOT_NUM) {\r | |
5297 | if (num_type == IS_REL_NUM) {\r | |
5298 | back_num = backref_rel_to_abs(back_num, env);\r | |
5299 | }\r | |
5300 | if (back_num <= 0)\r | |
5301 | return ONIGERR_INVALID_BACKREF;\r | |
5302 | \r | |
5303 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5304 | if (back_num > env->num_mem ||\r | |
5305 | IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r | |
5306 | return ONIGERR_INVALID_BACKREF;\r | |
5307 | }\r | |
5308 | tok->type = TK_BACKREF;\r | |
5309 | tok->u.backref.by_name = 0;\r | |
5310 | tok->u.backref.num = 1;\r | |
5311 | tok->u.backref.ref1 = back_num;\r | |
5312 | }\r | |
5313 | else {\r | |
5314 | num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r | |
5315 | if (num <= 0) {\r | |
5316 | onig_scan_env_set_error_string(env,\r | |
5317 | ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r | |
5318 | return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
5319 | }\r | |
5320 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5321 | int i;\r | |
5322 | for (i = 0; i < num; i++) {\r | |
5323 | if (backs[i] > env->num_mem ||\r | |
5324 | IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r | |
5325 | return ONIGERR_INVALID_BACKREF;\r | |
5326 | }\r | |
5327 | }\r | |
5328 | \r | |
5329 | tok->type = TK_BACKREF;\r | |
5330 | tok->u.backref.by_name = 1;\r | |
5331 | if (num == 1) {\r | |
5332 | tok->u.backref.num = 1;\r | |
5333 | tok->u.backref.ref1 = backs[0];\r | |
5334 | }\r | |
5335 | else {\r | |
5336 | tok->u.backref.num = num;\r | |
5337 | tok->u.backref.refs = backs;\r | |
5338 | }\r | |
5339 | }\r | |
5340 | }\r | |
5341 | else\r | |
5342 | PUNFETCH;\r | |
14b0e578 CS |
5343 | }\r |
5344 | break;\r | |
14b0e578 | 5345 | \r |
b602265d | 5346 | #ifdef USE_CALL\r |
14b0e578 | 5347 | case 'g':\r |
b602265d DG |
5348 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r |
5349 | PFETCH(c);\r | |
5350 | if (c == '<' || c == '\'') {\r | |
5351 | int gnum;\r | |
5352 | UChar* name_end;\r | |
5353 | enum REF_NUM num_type;\r | |
5354 | \r | |
5355 | prev = p;\r | |
5356 | r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,\r | |
5357 | &gnum, &num_type, 1);\r | |
5358 | if (r < 0) return r;\r | |
5359 | \r | |
5360 | if (num_type != IS_NOT_NUM) {\r | |
5361 | if (num_type == IS_REL_NUM) {\r | |
5362 | gnum = backref_rel_to_abs(gnum, env);\r | |
5363 | if (gnum < 0) {\r | |
5364 | onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r | |
5365 | prev, name_end);\r | |
5366 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r | |
5367 | }\r | |
5368 | }\r | |
5369 | tok->u.call.by_number = 1;\r | |
5370 | tok->u.call.gnum = gnum;\r | |
5371 | }\r | |
5372 | else {\r | |
5373 | tok->u.call.by_number = 0;\r | |
5374 | tok->u.call.gnum = 0;\r | |
5375 | }\r | |
5376 | \r | |
5377 | tok->type = TK_CALL;\r | |
5378 | tok->u.call.name = prev;\r | |
5379 | tok->u.call.name_end = name_end;\r | |
5380 | }\r | |
5381 | else\r | |
5382 | PUNFETCH;\r | |
14b0e578 CS |
5383 | }\r |
5384 | break;\r | |
5385 | #endif\r | |
5386 | \r | |
5387 | case 'Q':\r | |
5388 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r | |
b602265d | 5389 | tok->type = TK_QUOTE_OPEN;\r |
14b0e578 CS |
5390 | }\r |
5391 | break;\r | |
5392 | \r | |
5393 | case 'p':\r | |
5394 | case 'P':\r | |
b602265d DG |
5395 | if (!PEND && PPEEK_IS('{') &&\r |
5396 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r | |
5397 | PINC;\r | |
5398 | tok->type = TK_CHAR_PROPERTY;\r | |
5399 | tok->u.prop.not = (c == 'P' ? 1 : 0);\r | |
5400 | \r | |
5401 | if (!PEND &&\r | |
5402 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r | |
5403 | PFETCH(c);\r | |
5404 | if (c == '^') {\r | |
5405 | tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r | |
5406 | }\r | |
5407 | else\r | |
5408 | PUNFETCH;\r | |
5409 | }\r | |
14b0e578 CS |
5410 | }\r |
5411 | break;\r | |
5412 | \r | |
5413 | default:\r | |
b602265d DG |
5414 | {\r |
5415 | OnigCodePoint c2;\r | |
5416 | \r | |
5417 | PUNFETCH;\r | |
5418 | num = fetch_escaped_value(&p, end, env, &c2);\r | |
5419 | if (num < 0) return num;\r | |
5420 | /* set_raw: */\r | |
5421 | if (tok->u.c != c2) {\r | |
5422 | tok->type = TK_CODE_POINT;\r | |
5423 | tok->u.code = c2;\r | |
5424 | }\r | |
5425 | else { /* string */\r | |
5426 | p = tok->backp + enclen(enc, tok->backp);\r | |
5427 | }\r | |
14b0e578 CS |
5428 | }\r |
5429 | break;\r | |
5430 | }\r | |
5431 | }\r | |
5432 | else {\r | |
5433 | tok->u.c = c;\r | |
5434 | tok->escaped = 0;\r | |
5435 | \r | |
5436 | #ifdef USE_VARIABLE_META_CHARS\r | |
5437 | if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r | |
b602265d | 5438 | IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r |
14b0e578 | 5439 | if (c == MC_ANYCHAR(syn))\r |
b602265d | 5440 | goto any_char;\r |
14b0e578 | 5441 | else if (c == MC_ANYTIME(syn))\r |
b602265d | 5442 | goto anytime;\r |
14b0e578 | 5443 | else if (c == MC_ZERO_OR_ONE_TIME(syn))\r |
b602265d | 5444 | goto zero_or_one_time;\r |
14b0e578 | 5445 | else if (c == MC_ONE_OR_MORE_TIME(syn))\r |
b602265d | 5446 | goto one_or_more_time;\r |
14b0e578 | 5447 | else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r |
b602265d DG |
5448 | tok->type = TK_ANYCHAR_ANYTIME;\r |
5449 | goto out;\r | |
14b0e578 CS |
5450 | }\r |
5451 | }\r | |
5452 | #endif\r | |
5453 | \r | |
5454 | switch (c) {\r | |
5455 | case '.':\r | |
5456 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r | |
5457 | #ifdef USE_VARIABLE_META_CHARS\r | |
5458 | any_char:\r | |
5459 | #endif\r | |
5460 | tok->type = TK_ANYCHAR;\r | |
5461 | break;\r | |
5462 | \r | |
5463 | case '*':\r | |
5464 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r | |
5465 | #ifdef USE_VARIABLE_META_CHARS\r | |
5466 | anytime:\r | |
5467 | #endif\r | |
5468 | tok->type = TK_OP_REPEAT;\r | |
5469 | tok->u.repeat.lower = 0;\r | |
5470 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
5471 | goto greedy_check;\r | |
5472 | break;\r | |
5473 | \r | |
5474 | case '+':\r | |
5475 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r | |
5476 | #ifdef USE_VARIABLE_META_CHARS\r | |
5477 | one_or_more_time:\r | |
5478 | #endif\r | |
5479 | tok->type = TK_OP_REPEAT;\r | |
5480 | tok->u.repeat.lower = 1;\r | |
5481 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
5482 | goto greedy_check;\r | |
5483 | break;\r | |
5484 | \r | |
5485 | case '?':\r | |
5486 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r | |
5487 | #ifdef USE_VARIABLE_META_CHARS\r | |
5488 | zero_or_one_time:\r | |
5489 | #endif\r | |
5490 | tok->type = TK_OP_REPEAT;\r | |
5491 | tok->u.repeat.lower = 0;\r | |
5492 | tok->u.repeat.upper = 1;\r | |
5493 | goto greedy_check;\r | |
5494 | break;\r | |
5495 | \r | |
5496 | case '{':\r | |
5497 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r | |
5498 | r = fetch_range_quantifier(&p, end, tok, env);\r | |
5499 | if (r < 0) return r; /* error */\r | |
5500 | if (r == 0) goto greedy_check;\r | |
5501 | else if (r == 2) { /* {n} */\r | |
b602265d DG |
5502 | if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r |
5503 | goto possessive_check;\r | |
14b0e578 | 5504 | \r |
b602265d | 5505 | goto greedy_check;\r |
14b0e578 CS |
5506 | }\r |
5507 | /* r == 1 : normal char */\r | |
5508 | break;\r | |
5509 | \r | |
5510 | case '|':\r | |
5511 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r | |
5512 | tok->type = TK_ALT;\r | |
5513 | break;\r | |
5514 | \r | |
5515 | case '(':\r | |
b602265d | 5516 | if (!PEND && PPEEK_IS('?') &&\r |
14b0e578 CS |
5517 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r |
5518 | PINC;\r | |
b602265d DG |
5519 | if (! PEND) {\r |
5520 | c = PPEEK;\r | |
5521 | if (c == '#') {\r | |
14b0e578 | 5522 | PFETCH(c);\r |
b602265d DG |
5523 | while (1) {\r |
5524 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
5525 | PFETCH(c);\r | |
5526 | if (c == MC_ESC(syn)) {\r | |
5527 | if (! PEND) PFETCH(c);\r | |
5528 | }\r | |
5529 | else {\r | |
5530 | if (c == ')') break;\r | |
5531 | }\r | |
14b0e578 | 5532 | }\r |
b602265d DG |
5533 | goto start;\r |
5534 | }\r | |
5535 | else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {\r | |
5536 | int gnum;\r | |
5537 | UChar* name;\r | |
5538 | UChar* name_end;\r | |
5539 | enum REF_NUM num_type;\r | |
5540 | \r | |
5541 | switch (c) {\r | |
5542 | case '&':\r | |
5543 | {\r | |
5544 | PINC;\r | |
5545 | name = p;\r | |
5546 | r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,\r | |
5547 | &num_type, 0);\r | |
5548 | if (r < 0) return r;\r | |
5549 | \r | |
5550 | tok->type = TK_CALL;\r | |
5551 | tok->u.call.by_number = 0;\r | |
5552 | tok->u.call.gnum = 0;\r | |
5553 | tok->u.call.name = name;\r | |
5554 | tok->u.call.name_end = name_end;\r | |
5555 | }\r | |
5556 | break;\r | |
5557 | \r | |
5558 | case 'R':\r | |
5559 | tok->type = TK_CALL;\r | |
5560 | tok->u.call.by_number = 1;\r | |
5561 | tok->u.call.gnum = 0;\r | |
5562 | tok->u.call.name = p;\r | |
5563 | PINC;\r | |
5564 | if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;\r | |
5565 | tok->u.call.name_end = p;\r | |
5566 | break;\r | |
5567 | \r | |
5568 | case '-':\r | |
5569 | case '+':\r | |
5570 | goto lparen_qmark_num;\r | |
5571 | break;\r | |
5572 | default:\r | |
5573 | if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;\r | |
5574 | \r | |
5575 | lparen_qmark_num:\r | |
5576 | {\r | |
5577 | name = p;\r | |
5578 | r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,\r | |
5579 | &gnum, &num_type, 1);\r | |
5580 | if (r < 0) return r;\r | |
5581 | \r | |
5582 | if (num_type == IS_NOT_NUM) {\r | |
5583 | return ONIGERR_INVALID_GROUP_NAME;\r | |
5584 | }\r | |
5585 | else {\r | |
5586 | if (num_type == IS_REL_NUM) {\r | |
5587 | gnum = backref_rel_to_abs(gnum, env);\r | |
5588 | if (gnum < 0) {\r | |
5589 | onig_scan_env_set_error_string(env,\r | |
5590 | ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);\r | |
5591 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r | |
5592 | }\r | |
5593 | }\r | |
5594 | tok->u.call.by_number = 1;\r | |
5595 | tok->u.call.gnum = gnum;\r | |
5596 | }\r | |
5597 | \r | |
5598 | tok->type = TK_CALL;\r | |
5599 | tok->u.call.name = name;\r | |
5600 | tok->u.call.name_end = name_end;\r | |
5601 | }\r | |
5602 | break;\r | |
14b0e578 CS |
5603 | }\r |
5604 | }\r | |
14b0e578 | 5605 | }\r |
b602265d | 5606 | lparen_qmark_end:\r |
14b0e578 CS |
5607 | PUNFETCH;\r |
5608 | }\r | |
5609 | \r | |
5610 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r | |
5611 | tok->type = TK_SUBEXP_OPEN;\r | |
5612 | break;\r | |
5613 | \r | |
5614 | case ')':\r | |
5615 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r | |
5616 | tok->type = TK_SUBEXP_CLOSE;\r | |
5617 | break;\r | |
5618 | \r | |
5619 | case '^':\r | |
5620 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r | |
5621 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5622 | tok->u.subtype = (IS_SINGLELINE(env->options)\r |
5623 | ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r | |
14b0e578 CS |
5624 | break;\r |
5625 | \r | |
5626 | case '$':\r | |
5627 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r | |
5628 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5629 | tok->u.subtype = (IS_SINGLELINE(env->options)\r |
5630 | ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r | |
14b0e578 CS |
5631 | break;\r |
5632 | \r | |
5633 | case '[':\r | |
5634 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r | |
5635 | tok->type = TK_CC_OPEN;\r | |
5636 | break;\r | |
5637 | \r | |
5638 | case ']':\r | |
5639 | if (*src > env->pattern) /* /].../ is allowed. */\r | |
b602265d | 5640 | CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r |
14b0e578 CS |
5641 | break;\r |
5642 | \r | |
5643 | case '#':\r | |
b602265d DG |
5644 | if (IS_EXTEND(env->options)) {\r |
5645 | while (!PEND) {\r | |
5646 | PFETCH(c);\r | |
5647 | if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r | |
5648 | break;\r | |
5649 | }\r | |
5650 | goto start;\r | |
5651 | break;\r | |
14b0e578 CS |
5652 | }\r |
5653 | break;\r | |
5654 | \r | |
5655 | case ' ': case '\t': case '\n': case '\r': case '\f':\r | |
b602265d DG |
5656 | if (IS_EXTEND(env->options))\r |
5657 | goto start;\r | |
14b0e578 CS |
5658 | break;\r |
5659 | \r | |
5660 | default:\r | |
5661 | /* string */\r | |
5662 | break;\r | |
5663 | }\r | |
5664 | }\r | |
5665 | \r | |
5666 | #ifdef USE_VARIABLE_META_CHARS\r | |
5667 | out:\r | |
5668 | #endif\r | |
5669 | *src = p;\r | |
5670 | return tok->type;\r | |
5671 | }\r | |
5672 | \r | |
5673 | static int\r | |
5674 | add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r | |
b602265d DG |
5675 | OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,\r |
5676 | const OnigCodePoint mbr[])\r | |
14b0e578 CS |
5677 | {\r |
5678 | int i, r;\r | |
5679 | OnigCodePoint j;\r | |
5680 | \r | |
5681 | int n = ONIGENC_CODE_RANGE_NUM(mbr);\r | |
5682 | \r | |
5683 | if (not == 0) {\r | |
5684 | for (i = 0; i < n; i++) {\r | |
5685 | for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5686 | j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r | |
b602265d DG |
5687 | if (j >= sb_out) {\r |
5688 | if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
5689 | r = add_code_range_to_buf(&(cc->mbuf), j,\r | |
5690 | ONIGENC_CODE_RANGE_TO(mbr, i));\r | |
5691 | if (r != 0) return r;\r | |
5692 | i++;\r | |
5693 | }\r | |
5694 | \r | |
5695 | goto sb_end;\r | |
5696 | }\r | |
14b0e578 CS |
5697 | BITSET_SET_BIT(cc->bs, j);\r |
5698 | }\r | |
5699 | }\r | |
5700 | \r | |
5701 | sb_end:\r | |
5702 | for ( ; i < n; i++) {\r | |
5703 | r = add_code_range_to_buf(&(cc->mbuf),\r | |
5704 | ONIGENC_CODE_RANGE_FROM(mbr, i),\r | |
5705 | ONIGENC_CODE_RANGE_TO(mbr, i));\r | |
5706 | if (r != 0) return r;\r | |
5707 | }\r | |
5708 | }\r | |
5709 | else {\r | |
5710 | OnigCodePoint prev = 0;\r | |
5711 | \r | |
5712 | for (i = 0; i < n; i++) {\r | |
b602265d DG |
5713 | for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r |
5714 | if (j >= sb_out) {\r | |
5715 | goto sb_end2;\r | |
5716 | }\r | |
5717 | BITSET_SET_BIT(cc->bs, j);\r | |
14b0e578 CS |
5718 | }\r |
5719 | prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r | |
5720 | }\r | |
5721 | for (j = prev; j < sb_out; j++) {\r | |
5722 | BITSET_SET_BIT(cc->bs, j);\r | |
5723 | }\r | |
5724 | \r | |
5725 | sb_end2:\r | |
5726 | prev = sb_out;\r | |
5727 | \r | |
5728 | for (i = 0; i < n; i++) {\r | |
5729 | if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
b602265d | 5730 | r = add_code_range_to_buf(&(cc->mbuf), prev,\r |
14b0e578 | 5731 | ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r |
b602265d | 5732 | if (r != 0) return r;\r |
14b0e578 CS |
5733 | }\r |
5734 | prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r | |
b602265d DG |
5735 | if (prev == 0) goto end;\r |
5736 | }\r | |
5737 | \r | |
5738 | r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r | |
5739 | if (r != 0) return r;\r | |
5740 | }\r | |
5741 | \r | |
5742 | end:\r | |
5743 | return 0;\r | |
5744 | }\r | |
5745 | \r | |
5746 | static int\r | |
5747 | add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,\r | |
5748 | OnigEncoding enc ARG_UNUSED,\r | |
5749 | OnigCodePoint sb_out,\r | |
5750 | const OnigCodePoint mbr[], OnigCodePoint limit)\r | |
5751 | {\r | |
5752 | int i, r;\r | |
5753 | OnigCodePoint j;\r | |
5754 | OnigCodePoint from;\r | |
5755 | OnigCodePoint to;\r | |
5756 | \r | |
5757 | int n = ONIGENC_CODE_RANGE_NUM(mbr);\r | |
5758 | \r | |
5759 | if (not == 0) {\r | |
5760 | for (i = 0; i < n; i++) {\r | |
5761 | for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5762 | j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r | |
5763 | if (j > limit) goto end;\r | |
5764 | if (j >= sb_out) {\r | |
5765 | if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
5766 | to = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5767 | if (to > limit) to = limit;\r | |
5768 | r = add_code_range_to_buf(&(cc->mbuf), j, to);\r | |
5769 | if (r != 0) return r;\r | |
5770 | i++;\r | |
5771 | }\r | |
5772 | \r | |
5773 | goto sb_end;\r | |
5774 | }\r | |
5775 | BITSET_SET_BIT(cc->bs, j);\r | |
5776 | }\r | |
14b0e578 | 5777 | }\r |
b602265d DG |
5778 | \r |
5779 | sb_end:\r | |
5780 | for ( ; i < n; i++) {\r | |
5781 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5782 | to = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5783 | if (from > limit) break;\r | |
5784 | if (to > limit) to = limit;\r | |
5785 | r = add_code_range_to_buf(&(cc->mbuf), from, to);\r | |
14b0e578 CS |
5786 | if (r != 0) return r;\r |
5787 | }\r | |
5788 | }\r | |
b602265d DG |
5789 | else {\r |
5790 | OnigCodePoint prev = 0;\r | |
5791 | \r | |
5792 | for (i = 0; i < n; i++) {\r | |
5793 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5794 | if (from > limit) {\r | |
5795 | for (j = prev; j < sb_out; j++) {\r | |
5796 | BITSET_SET_BIT(cc->bs, j);\r | |
5797 | }\r | |
5798 | goto sb_end2;\r | |
5799 | }\r | |
5800 | for (j = prev; j < from; j++) {\r | |
5801 | if (j >= sb_out) goto sb_end2;\r | |
5802 | BITSET_SET_BIT(cc->bs, j);\r | |
5803 | }\r | |
5804 | prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5805 | if (prev > limit) prev = limit;\r | |
5806 | prev++;\r | |
5807 | if (prev == 0) goto end;\r | |
5808 | }\r | |
5809 | for (j = prev; j < sb_out; j++) {\r | |
5810 | BITSET_SET_BIT(cc->bs, j);\r | |
5811 | }\r | |
5812 | \r | |
5813 | sb_end2:\r | |
5814 | prev = sb_out;\r | |
5815 | \r | |
5816 | for (i = 0; i < n; i++) {\r | |
5817 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5818 | if (from > limit) goto last;\r | |
5819 | \r | |
5820 | if (prev < from) {\r | |
5821 | r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);\r | |
5822 | if (r != 0) return r;\r | |
5823 | }\r | |
5824 | prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5825 | if (prev > limit) prev = limit;\r | |
5826 | prev++;\r | |
5827 | if (prev == 0) goto end;\r | |
5828 | }\r | |
5829 | \r | |
5830 | last:\r | |
5831 | r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r | |
5832 | if (r != 0) return r;\r | |
5833 | }\r | |
14b0e578 | 5834 | \r |
b602265d | 5835 | end:\r |
14b0e578 CS |
5836 | return 0;\r |
5837 | }\r | |
5838 | \r | |
5839 | static int\r | |
5840 | add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r | |
5841 | {\r | |
b602265d DG |
5842 | #define ASCII_LIMIT 127\r |
5843 | \r | |
14b0e578 | 5844 | int c, r;\r |
b602265d | 5845 | int ascii_mode;\r |
14b0e578 | 5846 | const OnigCodePoint *ranges;\r |
b602265d | 5847 | OnigCodePoint limit;\r |
14b0e578 CS |
5848 | OnigCodePoint sb_out;\r |
5849 | OnigEncoding enc = env->enc;\r | |
5850 | \r | |
b602265d DG |
5851 | ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);\r |
5852 | \r | |
14b0e578 CS |
5853 | r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r |
5854 | if (r == 0) {\r | |
b602265d DG |
5855 | if (ascii_mode == 0)\r |
5856 | r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r | |
5857 | else\r | |
5858 | r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,\r | |
5859 | ranges, ASCII_LIMIT);\r | |
5860 | return r;\r | |
14b0e578 CS |
5861 | }\r |
5862 | else if (r != ONIG_NO_SUPPORT_CONFIG) {\r | |
5863 | return r;\r | |
5864 | }\r | |
5865 | \r | |
5866 | r = 0;\r | |
b602265d DG |
5867 | limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;\r |
5868 | \r | |
14b0e578 CS |
5869 | switch (ctype) {\r |
5870 | case ONIGENC_CTYPE_ALPHA:\r | |
5871 | case ONIGENC_CTYPE_BLANK:\r | |
5872 | case ONIGENC_CTYPE_CNTRL:\r | |
5873 | case ONIGENC_CTYPE_DIGIT:\r | |
5874 | case ONIGENC_CTYPE_LOWER:\r | |
5875 | case ONIGENC_CTYPE_PUNCT:\r | |
5876 | case ONIGENC_CTYPE_SPACE:\r | |
5877 | case ONIGENC_CTYPE_UPPER:\r | |
5878 | case ONIGENC_CTYPE_XDIGIT:\r | |
5879 | case ONIGENC_CTYPE_ASCII:\r | |
5880 | case ONIGENC_CTYPE_ALNUM:\r | |
5881 | if (not != 0) {\r | |
b602265d DG |
5882 | for (c = 0; c < (int )limit; c++) {\r |
5883 | if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5884 | BITSET_SET_BIT(cc->bs, c);\r | |
5885 | }\r | |
5886 | for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r | |
5887 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5888 | }\r |
b602265d | 5889 | \r |
14b0e578 CS |
5890 | ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r |
5891 | }\r | |
5892 | else {\r | |
b602265d DG |
5893 | for (c = 0; c < (int )limit; c++) {\r |
5894 | if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5895 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 CS |
5896 | }\r |
5897 | }\r | |
5898 | break;\r | |
5899 | \r | |
5900 | case ONIGENC_CTYPE_GRAPH:\r | |
5901 | case ONIGENC_CTYPE_PRINT:\r | |
b602265d | 5902 | case ONIGENC_CTYPE_WORD:\r |
14b0e578 | 5903 | if (not != 0) {\r |
b602265d DG |
5904 | for (c = 0; c < (int )limit; c++) {\r |
5905 | if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */\r | |
5906 | && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5907 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5908 | }\r |
b602265d DG |
5909 | for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r |
5910 | if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)\r | |
5911 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5912 | }\r |
14b0e578 CS |
5913 | }\r |
5914 | else {\r | |
b602265d DG |
5915 | for (c = 0; c < (int )limit; c++) {\r |
5916 | if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5917 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5918 | }\r |
b602265d DG |
5919 | if (ascii_mode == 0)\r |
5920 | ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r | |
14b0e578 CS |
5921 | }\r |
5922 | break;\r | |
5923 | \r | |
5924 | default:\r | |
5925 | return ONIGERR_PARSER_BUG;\r | |
5926 | break;\r | |
5927 | }\r | |
5928 | \r | |
5929 | return r;\r | |
5930 | }\r | |
5931 | \r | |
5932 | static int\r | |
5933 | parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r | |
5934 | {\r | |
5935 | #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r | |
5936 | #define POSIX_BRACKET_NAME_MIN_LEN 4\r | |
5937 | \r | |
5938 | static PosixBracketEntryType PBS[] = {\r | |
5939 | { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r | |
5940 | { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r | |
5941 | { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r | |
5942 | { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r | |
5943 | { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r | |
5944 | { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r | |
5945 | { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r | |
5946 | { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r | |
5947 | { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r | |
5948 | { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r | |
5949 | { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r | |
5950 | { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r | |
5951 | { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r | |
5952 | { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r | |
5953 | { (UChar* )NULL, -1, 0 }\r | |
5954 | };\r | |
5955 | \r | |
5956 | PosixBracketEntryType *pb;\r | |
5957 | int not, i, r;\r | |
5958 | OnigCodePoint c;\r | |
5959 | OnigEncoding enc = env->enc;\r | |
5960 | UChar *p = *src;\r | |
5961 | \r | |
5962 | if (PPEEK_IS('^')) {\r | |
5963 | PINC_S;\r | |
5964 | not = 1;\r | |
5965 | }\r | |
5966 | else\r | |
5967 | not = 0;\r | |
5968 | \r | |
5969 | if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r | |
5970 | goto not_posix_bracket;\r | |
5971 | \r | |
5972 | for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r | |
5973 | if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r | |
5974 | p = (UChar* )onigenc_step(enc, p, end, pb->len);\r | |
5975 | if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r | |
5976 | return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r | |
5977 | \r | |
5978 | r = add_ctype_to_cc(cc, pb->ctype, not, env);\r | |
5979 | if (r != 0) return r;\r | |
5980 | \r | |
5981 | PINC_S; PINC_S;\r | |
5982 | *src = p;\r | |
5983 | return 0;\r | |
5984 | }\r | |
5985 | }\r | |
5986 | \r | |
5987 | not_posix_bracket:\r | |
5988 | c = 0;\r | |
5989 | i = 0;\r | |
5990 | while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r | |
5991 | PINC_S;\r | |
5992 | if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r | |
5993 | }\r | |
5994 | if (c == ':' && ! PEND) {\r | |
5995 | PINC_S;\r | |
5996 | if (! PEND) {\r | |
5997 | PFETCH_S(c);\r | |
5998 | if (c == ']')\r | |
5999 | return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r | |
6000 | }\r | |
6001 | }\r | |
6002 | \r | |
6003 | return 1; /* 1: is not POSIX bracket, but no error. */\r | |
6004 | }\r | |
6005 | \r | |
6006 | static int\r | |
6007 | fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r | |
6008 | {\r | |
6009 | int r;\r | |
6010 | OnigCodePoint c;\r | |
6011 | OnigEncoding enc = env->enc;\r | |
6012 | UChar *prev, *start, *p = *src;\r | |
6013 | \r | |
6014 | r = 0;\r | |
6015 | start = prev = p;\r | |
6016 | \r | |
6017 | while (!PEND) {\r | |
6018 | prev = p;\r | |
6019 | PFETCH_S(c);\r | |
6020 | if (c == '}') {\r | |
6021 | r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r | |
6022 | if (r < 0) break;\r | |
6023 | \r | |
6024 | *src = p;\r | |
6025 | return r;\r | |
6026 | }\r | |
6027 | else if (c == '(' || c == ')' || c == '{' || c == '|') {\r | |
6028 | r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r | |
6029 | break;\r | |
6030 | }\r | |
6031 | }\r | |
6032 | \r | |
6033 | onig_scan_env_set_error_string(env, r, *src, prev);\r | |
6034 | return r;\r | |
6035 | }\r | |
6036 | \r | |
6037 | static int\r | |
b602265d | 6038 | parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r |
14b0e578 CS |
6039 | {\r |
6040 | int r, ctype;\r | |
6041 | CClassNode* cc;\r | |
6042 | \r | |
6043 | ctype = fetch_char_property_to_ctype(src, end, env);\r | |
6044 | if (ctype < 0) return ctype;\r | |
6045 | \r | |
6046 | *np = node_new_cclass();\r | |
6047 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
b602265d | 6048 | cc = CCLASS_(*np);\r |
14b0e578 CS |
6049 | r = add_ctype_to_cc(cc, ctype, 0, env);\r |
6050 | if (r != 0) return r;\r | |
6051 | if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r | |
6052 | \r | |
6053 | return 0;\r | |
6054 | }\r | |
6055 | \r | |
6056 | \r | |
6057 | enum CCSTATE {\r | |
6058 | CCS_VALUE,\r | |
6059 | CCS_RANGE,\r | |
6060 | CCS_COMPLETE,\r | |
6061 | CCS_START\r | |
6062 | };\r | |
6063 | \r | |
6064 | enum CCVALTYPE {\r | |
6065 | CCV_SB,\r | |
6066 | CCV_CODE_POINT,\r | |
6067 | CCV_CLASS\r | |
6068 | };\r | |
6069 | \r | |
6070 | static int\r | |
6071 | next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r | |
b602265d | 6072 | enum CCSTATE* state, ScanEnv* env)\r |
14b0e578 CS |
6073 | {\r |
6074 | int r;\r | |
6075 | \r | |
6076 | if (*state == CCS_RANGE)\r | |
6077 | return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r | |
6078 | \r | |
6079 | if (*state == CCS_VALUE && *type != CCV_CLASS) {\r | |
6080 | if (*type == CCV_SB)\r | |
6081 | BITSET_SET_BIT(cc->bs, (int )(*vs));\r | |
6082 | else if (*type == CCV_CODE_POINT) {\r | |
6083 | r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r | |
6084 | if (r < 0) return r;\r | |
6085 | }\r | |
6086 | }\r | |
6087 | \r | |
6088 | *state = CCS_VALUE;\r | |
6089 | *type = CCV_CLASS;\r | |
6090 | return 0;\r | |
6091 | }\r | |
6092 | \r | |
6093 | static int\r | |
b602265d DG |
6094 | next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,\r |
6095 | int* from_israw, int to_israw,\r | |
6096 | enum CCVALTYPE intype, enum CCVALTYPE* type,\r | |
6097 | enum CCSTATE* state, ScanEnv* env)\r | |
14b0e578 CS |
6098 | {\r |
6099 | int r;\r | |
6100 | \r | |
6101 | switch (*state) {\r | |
6102 | case CCS_VALUE:\r | |
b602265d DG |
6103 | if (*type == CCV_SB) {\r |
6104 | if (*from > 0xff)\r | |
6105 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r | |
6106 | \r | |
6107 | BITSET_SET_BIT(cc->bs, (int )(*from));\r | |
6108 | }\r | |
14b0e578 | 6109 | else if (*type == CCV_CODE_POINT) {\r |
b602265d | 6110 | r = add_code_range(&(cc->mbuf), env, *from, *from);\r |
14b0e578 CS |
6111 | if (r < 0) return r;\r |
6112 | }\r | |
6113 | break;\r | |
6114 | \r | |
6115 | case CCS_RANGE:\r | |
6116 | if (intype == *type) {\r | |
6117 | if (intype == CCV_SB) {\r | |
b602265d | 6118 | if (*from > 0xff || to > 0xff)\r |
14b0e578 CS |
6119 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r |
6120 | \r | |
b602265d DG |
6121 | if (*from > to) {\r |
6122 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
6123 | goto ccs_range_end;\r | |
6124 | else\r | |
6125 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
6126 | }\r | |
6127 | bitset_set_range(cc->bs, (int )*from, (int )to);\r | |
14b0e578 CS |
6128 | }\r |
6129 | else {\r | |
b602265d DG |
6130 | r = add_code_range(&(cc->mbuf), env, *from, to);\r |
6131 | if (r < 0) return r;\r | |
14b0e578 CS |
6132 | }\r |
6133 | }\r | |
6134 | else {\r | |
b602265d DG |
6135 | if (*from > to) {\r |
6136 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
6137 | goto ccs_range_end;\r | |
6138 | else\r | |
6139 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
14b0e578 | 6140 | }\r |
b602265d DG |
6141 | bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));\r |
6142 | r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);\r | |
6143 | if (r < 0) return r;\r | |
14b0e578 CS |
6144 | }\r |
6145 | ccs_range_end:\r | |
6146 | *state = CCS_COMPLETE;\r | |
6147 | break;\r | |
6148 | \r | |
6149 | case CCS_COMPLETE:\r | |
6150 | case CCS_START:\r | |
6151 | *state = CCS_VALUE;\r | |
6152 | break;\r | |
6153 | \r | |
6154 | default:\r | |
6155 | break;\r | |
6156 | }\r | |
6157 | \r | |
b602265d DG |
6158 | *from_israw = to_israw;\r |
6159 | *from = to;\r | |
6160 | *type = intype;\r | |
14b0e578 CS |
6161 | return 0;\r |
6162 | }\r | |
6163 | \r | |
6164 | static int\r | |
6165 | code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r | |
b602265d | 6166 | ScanEnv* env)\r |
14b0e578 CS |
6167 | {\r |
6168 | int in_esc;\r | |
6169 | OnigCodePoint code;\r | |
6170 | OnigEncoding enc = env->enc;\r | |
6171 | UChar* p = from;\r | |
6172 | \r | |
6173 | in_esc = 0;\r | |
6174 | while (! PEND) {\r | |
6175 | if (ignore_escaped && in_esc) {\r | |
6176 | in_esc = 0;\r | |
6177 | }\r | |
6178 | else {\r | |
6179 | PFETCH_S(code);\r | |
6180 | if (code == c) return 1;\r | |
6181 | if (code == MC_ESC(env->syntax)) in_esc = 1;\r | |
6182 | }\r | |
6183 | }\r | |
6184 | return 0;\r | |
6185 | }\r | |
6186 | \r | |
6187 | static int\r | |
b602265d | 6188 | parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r |
14b0e578 CS |
6189 | {\r |
6190 | int r, neg, len, fetched, and_start;\r | |
6191 | OnigCodePoint v, vs;\r | |
6192 | UChar *p;\r | |
6193 | Node* node;\r | |
6194 | CClassNode *cc, *prev_cc;\r | |
6195 | CClassNode work_cc;\r | |
6196 | \r | |
6197 | enum CCSTATE state;\r | |
6198 | enum CCVALTYPE val_type, in_type;\r | |
6199 | int val_israw, in_israw;\r | |
6200 | \r | |
14b0e578 | 6201 | *np = NULL_NODE;\r |
b602265d DG |
6202 | env->parse_depth++;\r |
6203 | if (env->parse_depth > ParseDepthLimit)\r | |
6204 | return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r | |
6205 | prev_cc = (CClassNode* )NULL;\r | |
14b0e578 CS |
6206 | r = fetch_token_in_cc(tok, src, end, env);\r |
6207 | if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r | |
6208 | neg = 1;\r | |
6209 | r = fetch_token_in_cc(tok, src, end, env);\r | |
6210 | }\r | |
6211 | else {\r | |
6212 | neg = 0;\r | |
6213 | }\r | |
6214 | \r | |
6215 | if (r < 0) return r;\r | |
6216 | if (r == TK_CC_CLOSE) {\r | |
6217 | if (! code_exist_check((OnigCodePoint )']',\r | |
6218 | *src, env->pattern_end, 1, env))\r | |
6219 | return ONIGERR_EMPTY_CHAR_CLASS;\r | |
6220 | \r | |
6221 | CC_ESC_WARN(env, (UChar* )"]");\r | |
6222 | r = tok->type = TK_CHAR; /* allow []...] */\r | |
6223 | }\r | |
6224 | \r | |
6225 | *np = node = node_new_cclass();\r | |
6226 | CHECK_NULL_RETURN_MEMERR(node);\r | |
b602265d | 6227 | cc = CCLASS_(node);\r |
14b0e578 CS |
6228 | \r |
6229 | and_start = 0;\r | |
6230 | state = CCS_START;\r | |
6231 | p = *src;\r | |
6232 | while (r != TK_CC_CLOSE) {\r | |
6233 | fetched = 0;\r | |
6234 | switch (r) {\r | |
6235 | case TK_CHAR:\r | |
b602265d | 6236 | any_char_in:\r |
14b0e578 CS |
6237 | len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r |
6238 | if (len > 1) {\r | |
b602265d | 6239 | in_type = CCV_CODE_POINT;\r |
14b0e578 CS |
6240 | }\r |
6241 | else if (len < 0) {\r | |
b602265d DG |
6242 | r = len;\r |
6243 | goto err;\r | |
14b0e578 CS |
6244 | }\r |
6245 | else {\r | |
b602265d DG |
6246 | /* sb_char: */\r |
6247 | in_type = CCV_SB;\r | |
14b0e578 CS |
6248 | }\r |
6249 | v = (OnigCodePoint )tok->u.c;\r | |
6250 | in_israw = 0;\r | |
6251 | goto val_entry2;\r | |
6252 | break;\r | |
6253 | \r | |
6254 | case TK_RAW_BYTE:\r | |
6255 | /* tok->base != 0 : octal or hexadec. */\r | |
6256 | if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r | |
b602265d DG |
6257 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r |
6258 | UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r | |
6259 | UChar* psave = p;\r | |
6260 | int i, base = tok->base;\r | |
6261 | \r | |
6262 | buf[0] = tok->u.c;\r | |
6263 | for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r | |
6264 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6265 | if (r < 0) goto err;\r | |
6266 | if (r != TK_RAW_BYTE || tok->base != base) {\r | |
6267 | fetched = 1;\r | |
6268 | break;\r | |
6269 | }\r | |
6270 | buf[i] = tok->u.c;\r | |
6271 | }\r | |
6272 | \r | |
6273 | if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r | |
6274 | r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r | |
6275 | goto err;\r | |
6276 | }\r | |
6277 | \r | |
6278 | len = enclen(env->enc, buf);\r | |
6279 | if (i < len) {\r | |
6280 | r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r | |
6281 | goto err;\r | |
6282 | }\r | |
6283 | else if (i > len) { /* fetch back */\r | |
6284 | p = psave;\r | |
6285 | for (i = 1; i < len; i++) {\r | |
6286 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6287 | }\r | |
6288 | fetched = 0;\r | |
6289 | }\r | |
6290 | \r | |
6291 | if (i == 1) {\r | |
6292 | v = (OnigCodePoint )buf[0];\r | |
6293 | goto raw_single;\r | |
6294 | }\r | |
6295 | else {\r | |
6296 | v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r | |
6297 | in_type = CCV_CODE_POINT;\r | |
6298 | }\r | |
6299 | }\r | |
6300 | else {\r | |
6301 | v = (OnigCodePoint )tok->u.c;\r | |
6302 | raw_single:\r | |
6303 | in_type = CCV_SB;\r | |
6304 | }\r | |
6305 | in_israw = 1;\r | |
6306 | goto val_entry2;\r | |
6307 | break;\r | |
6308 | \r | |
6309 | case TK_CODE_POINT:\r | |
6310 | v = tok->u.code;\r | |
6311 | in_israw = 1;\r | |
6312 | val_entry:\r | |
6313 | len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r | |
6314 | if (len < 0) {\r | |
6315 | r = len;\r | |
6316 | goto err;\r | |
6317 | }\r | |
6318 | in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r | |
6319 | val_entry2:\r | |
6320 | r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r | |
6321 | &state, env);\r | |
6322 | if (r != 0) goto err;\r | |
6323 | break;\r | |
6324 | \r | |
6325 | case TK_POSIX_BRACKET_OPEN:\r | |
6326 | r = parse_posix_bracket(cc, &p, end, env);\r | |
6327 | if (r < 0) goto err;\r | |
6328 | if (r == 1) { /* is not POSIX bracket */\r | |
6329 | CC_ESC_WARN(env, (UChar* )"[");\r | |
6330 | p = tok->backp;\r | |
6331 | v = (OnigCodePoint )tok->u.c;\r | |
6332 | in_israw = 0;\r | |
6333 | goto val_entry;\r | |
6334 | }\r | |
6335 | goto next_class;\r | |
6336 | break;\r | |
6337 | \r | |
6338 | case TK_CHAR_TYPE:\r | |
6339 | r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r | |
6340 | if (r != 0) goto err;\r | |
6341 | \r | |
6342 | next_class:\r | |
6343 | r = next_state_class(cc, &vs, &val_type, &state, env);\r | |
6344 | if (r != 0) goto err;\r | |
6345 | break;\r | |
6346 | \r | |
6347 | case TK_CHAR_PROPERTY:\r | |
6348 | {\r | |
6349 | int ctype = fetch_char_property_to_ctype(&p, end, env);\r | |
6350 | if (ctype < 0) {\r | |
6351 | r = ctype;\r | |
6352 | goto err;\r | |
6353 | }\r | |
6354 | r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r | |
6355 | if (r != 0) goto err;\r | |
6356 | goto next_class;\r | |
6357 | }\r | |
6358 | break;\r | |
6359 | \r | |
6360 | case TK_CC_RANGE:\r | |
6361 | if (state == CCS_VALUE) {\r | |
6362 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6363 | if (r < 0) goto err;\r | |
6364 | fetched = 1;\r | |
6365 | if (r == TK_CC_CLOSE) { /* allow [x-] */\r | |
6366 | range_end_val:\r | |
6367 | v = (OnigCodePoint )'-';\r | |
6368 | in_israw = 0;\r | |
6369 | goto val_entry;\r | |
6370 | }\r | |
6371 | else if (r == TK_CC_AND) {\r | |
6372 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6373 | goto range_end_val;\r | |
6374 | }\r | |
6375 | \r | |
6376 | if (val_type == CCV_CLASS) {\r | |
6377 | r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r | |
6378 | goto err;\r | |
6379 | }\r | |
6380 | \r | |
6381 | state = CCS_RANGE;\r | |
6382 | }\r | |
6383 | else if (state == CCS_START) {\r | |
6384 | /* [-xa] is allowed */\r | |
6385 | v = (OnigCodePoint )tok->u.c;\r | |
6386 | in_israw = 0;\r | |
6387 | \r | |
6388 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6389 | if (r < 0) goto err;\r | |
6390 | fetched = 1;\r | |
6391 | /* [--x] or [a&&-x] is warned. */\r | |
6392 | if (r == TK_CC_RANGE || and_start != 0)\r | |
6393 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6394 | \r | |
6395 | goto val_entry;\r | |
6396 | }\r | |
6397 | else if (state == CCS_RANGE) {\r | |
6398 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6399 | goto any_char_in; /* [!--x] is allowed */\r | |
6400 | }\r | |
6401 | else { /* CCS_COMPLETE */\r | |
6402 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6403 | if (r < 0) goto err;\r | |
6404 | fetched = 1;\r | |
6405 | if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r | |
6406 | else if (r == TK_CC_AND) {\r | |
6407 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6408 | goto range_end_val;\r | |
6409 | }\r | |
6410 | \r | |
6411 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r | |
6412 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6413 | goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */\r | |
6414 | }\r | |
6415 | r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r | |
6416 | goto err;\r | |
6417 | }\r | |
6418 | break;\r | |
6419 | \r | |
6420 | case TK_CC_CC_OPEN: /* [ */\r | |
6421 | {\r | |
6422 | Node *anode;\r | |
6423 | CClassNode* acc;\r | |
6424 | \r | |
6425 | r = parse_char_class(&anode, tok, &p, end, env);\r | |
6426 | if (r != 0) {\r | |
6427 | onig_node_free(anode);\r | |
6428 | goto cc_open_err;\r | |
6429 | }\r | |
6430 | acc = CCLASS_(anode);\r | |
6431 | r = or_cclass(cc, acc, env->enc);\r | |
6432 | onig_node_free(anode);\r | |
6433 | \r | |
6434 | cc_open_err:\r | |
6435 | if (r != 0) goto err;\r | |
6436 | }\r | |
6437 | break;\r | |
6438 | \r | |
6439 | case TK_CC_AND: /* && */\r | |
6440 | {\r | |
6441 | if (state == CCS_VALUE) {\r | |
6442 | r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r | |
6443 | &val_type, &state, env);\r | |
6444 | if (r != 0) goto err;\r | |
6445 | }\r | |
6446 | /* initialize local variables */\r | |
6447 | and_start = 1;\r | |
6448 | state = CCS_START;\r | |
6449 | \r | |
6450 | if (IS_NOT_NULL(prev_cc)) {\r | |
6451 | r = and_cclass(prev_cc, cc, env->enc);\r | |
6452 | if (r != 0) goto err;\r | |
6453 | bbuf_free(cc->mbuf);\r | |
6454 | }\r | |
6455 | else {\r | |
6456 | prev_cc = cc;\r | |
6457 | cc = &work_cc;\r | |
6458 | }\r | |
6459 | initialize_cclass(cc);\r | |
6460 | }\r | |
6461 | break;\r | |
6462 | \r | |
6463 | case TK_EOT:\r | |
6464 | r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r | |
6465 | goto err;\r | |
6466 | break;\r | |
6467 | default:\r | |
6468 | r = ONIGERR_PARSER_BUG;\r | |
6469 | goto err;\r | |
6470 | break;\r | |
6471 | }\r | |
6472 | \r | |
6473 | if (fetched)\r | |
6474 | r = tok->type;\r | |
6475 | else {\r | |
6476 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6477 | if (r < 0) goto err;\r | |
6478 | }\r | |
6479 | }\r | |
6480 | \r | |
6481 | if (state == CCS_VALUE) {\r | |
6482 | r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r | |
6483 | &val_type, &state, env);\r | |
6484 | if (r != 0) goto err;\r | |
6485 | }\r | |
6486 | \r | |
6487 | if (IS_NOT_NULL(prev_cc)) {\r | |
6488 | r = and_cclass(prev_cc, cc, env->enc);\r | |
6489 | if (r != 0) goto err;\r | |
6490 | bbuf_free(cc->mbuf);\r | |
6491 | cc = prev_cc;\r | |
6492 | }\r | |
6493 | \r | |
6494 | if (neg != 0)\r | |
6495 | NCCLASS_SET_NOT(cc);\r | |
6496 | else\r | |
6497 | NCCLASS_CLEAR_NOT(cc);\r | |
6498 | if (IS_NCCLASS_NOT(cc) &&\r | |
6499 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r | |
6500 | int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r | |
6501 | if (is_empty != 0)\r | |
6502 | BITSET_IS_EMPTY(cc->bs, is_empty);\r | |
6503 | \r | |
6504 | if (is_empty == 0) {\r | |
6505 | #define NEWLINE_CODE 0x0a\r | |
6506 | \r | |
6507 | if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r | |
6508 | if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r | |
6509 | BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r | |
6510 | else\r | |
6511 | add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r | |
6512 | }\r | |
6513 | }\r | |
6514 | }\r | |
6515 | *src = p;\r | |
6516 | env->parse_depth--;\r | |
6517 | return 0;\r | |
6518 | \r | |
6519 | err:\r | |
6520 | if (cc != CCLASS_(*np))\r | |
6521 | bbuf_free(cc->mbuf);\r | |
6522 | return r;\r | |
6523 | }\r | |
6524 | \r | |
6525 | static int parse_subexp(Node** top, OnigToken* tok, int term,\r | |
6526 | UChar** src, UChar* end, ScanEnv* env);\r | |
6527 | \r | |
6528 | #ifdef USE_CALLOUT\r | |
6529 | \r | |
6530 | /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */\r | |
6531 | static int\r | |
6532 | parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r | |
6533 | {\r | |
6534 | int r;\r | |
6535 | int i;\r | |
6536 | int in;\r | |
6537 | int num;\r | |
6538 | OnigCodePoint c;\r | |
6539 | UChar* code_start;\r | |
6540 | UChar* code_end;\r | |
6541 | UChar* contents;\r | |
6542 | UChar* tag_start;\r | |
6543 | UChar* tag_end;\r | |
6544 | int brace_nest;\r | |
6545 | CalloutListEntry* e;\r | |
6546 | RegexExt* ext;\r | |
6547 | OnigEncoding enc = env->enc;\r | |
6548 | UChar* p = *src;\r | |
6549 | \r | |
6550 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6551 | \r | |
6552 | brace_nest = 0;\r | |
6553 | while (PPEEK_IS('{')) {\r | |
6554 | brace_nest++;\r | |
6555 | PINC_S;\r | |
6556 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6557 | }\r | |
6558 | \r | |
6559 | in = ONIG_CALLOUT_IN_PROGRESS;\r | |
6560 | code_start = p;\r | |
6561 | while (1) {\r | |
6562 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6563 | \r | |
6564 | code_end = p;\r | |
6565 | PFETCH_S(c);\r | |
6566 | if (c == '}') {\r | |
6567 | i = brace_nest;\r | |
6568 | while (i > 0) {\r | |
6569 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6570 | PFETCH_S(c);\r | |
6571 | if (c == '}') i--;\r | |
6572 | else break;\r | |
6573 | }\r | |
6574 | if (i == 0) break;\r | |
6575 | }\r | |
6576 | }\r | |
6577 | \r | |
6578 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6579 | \r | |
6580 | PFETCH_S(c);\r | |
6581 | if (c == '[') {\r | |
6582 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6583 | tag_start = p;\r | |
6584 | while (! PEND) {\r | |
6585 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6586 | tag_end = p;\r | |
6587 | PFETCH_S(c);\r | |
6588 | if (c == ']') break;\r | |
6589 | }\r | |
6590 | if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r | |
6591 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
6592 | \r | |
6593 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6594 | PFETCH_S(c);\r | |
6595 | }\r | |
6596 | else {\r | |
6597 | tag_start = tag_end = 0;\r | |
6598 | }\r | |
6599 | \r | |
6600 | if (c == 'X') {\r | |
6601 | in |= ONIG_CALLOUT_IN_RETRACTION;\r | |
6602 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6603 | PFETCH_S(c);\r | |
6604 | }\r | |
6605 | else if (c == '<') {\r | |
6606 | in = ONIG_CALLOUT_IN_RETRACTION;\r | |
6607 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6608 | PFETCH_S(c);\r | |
6609 | }\r | |
6610 | else if (c == '>') { /* no needs (default) */\r | |
6611 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6612 | PFETCH_S(c);\r | |
6613 | }\r | |
6614 | \r | |
6615 | if (c != cterm)\r | |
6616 | return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6617 | \r | |
6618 | r = reg_callout_list_entry(env, &num);\r | |
6619 | if (r != 0) return r;\r | |
6620 | \r | |
6621 | ext = onig_get_regex_ext(env->reg);\r | |
df8be9e5 | 6622 | CHECK_NULL_RETURN_MEMERR(ext);\r |
b602265d DG |
6623 | if (IS_NULL(ext->pattern)) {\r |
6624 | r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r | |
6625 | if (r != ONIG_NORMAL) return r;\r | |
6626 | }\r | |
6627 | \r | |
6628 | if (tag_start != tag_end) {\r | |
6629 | r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r | |
6630 | if (r != ONIG_NORMAL) return r;\r | |
6631 | }\r | |
6632 | \r | |
6633 | contents = onigenc_strdup(enc, code_start, code_end);\r | |
6634 | CHECK_NULL_RETURN_MEMERR(contents);\r | |
6635 | \r | |
6636 | r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);\r | |
6637 | if (r != 0) {\r | |
6638 | xfree(contents);\r | |
6639 | return r;\r | |
6640 | }\r | |
6641 | \r | |
6642 | e = onig_reg_callout_list_at(env->reg, num);\r | |
a5def177 DG |
6643 | if (IS_NULL(e)) {\r |
6644 | xfree(contents);\r | |
6645 | return ONIGERR_MEMORY;\r | |
6646 | }\r | |
6647 | \r | |
b602265d DG |
6648 | e->of = ONIG_CALLOUT_OF_CONTENTS;\r |
6649 | e->in = in;\r | |
6650 | e->name_id = ONIG_NON_NAME_ID;\r | |
6651 | e->u.content.start = contents;\r | |
6652 | e->u.content.end = contents + (code_end - code_start);\r | |
6653 | \r | |
6654 | *src = p;\r | |
6655 | return 0;\r | |
6656 | }\r | |
6657 | \r | |
6658 | static long\r | |
6659 | parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)\r | |
6660 | {\r | |
6661 | long v;\r | |
6662 | long d;\r | |
6663 | int flag;\r | |
6664 | UChar* p;\r | |
6665 | OnigCodePoint c;\r | |
6666 | \r | |
6667 | if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6668 | \r | |
6669 | flag = 1;\r | |
6670 | v = 0;\r | |
6671 | p = s;\r | |
6672 | while (p < end) {\r | |
6673 | c = ONIGENC_MBC_TO_CODE(enc, p, end);\r | |
6674 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
6675 | if (c >= '0' && c <= '9') {\r | |
6676 | d = (long )(c - '0');\r | |
6677 | if (v > (max - d) / 10)\r | |
6678 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6679 | \r | |
6680 | v = v * 10 + d;\r | |
6681 | }\r | |
6682 | else if (sign_on != 0 && (c == '-' || c == '+')) {\r | |
6683 | if (c == '-') flag = -1;\r | |
6684 | }\r | |
6685 | else\r | |
6686 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6687 | \r | |
6688 | sign_on = 0;\r | |
6689 | }\r | |
6690 | \r | |
6691 | *rl = flag * v;\r | |
6692 | return ONIG_NORMAL;\r | |
6693 | }\r | |
6694 | \r | |
6695 | static int\r | |
6696 | parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,\r | |
6697 | unsigned int types[], OnigValue vals[], ScanEnv* env)\r | |
6698 | {\r | |
6699 | #define MAX_CALLOUT_ARG_BYTE_LENGTH 128\r | |
6700 | \r | |
6701 | int r;\r | |
6702 | int n;\r | |
6703 | int esc;\r | |
6704 | int cn;\r | |
6705 | UChar* s;\r | |
6706 | UChar* e;\r | |
6707 | UChar* eesc;\r | |
6708 | OnigCodePoint c;\r | |
6709 | UChar* bufend;\r | |
6710 | UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];\r | |
6711 | OnigEncoding enc = env->enc;\r | |
6712 | UChar* p = *src;\r | |
6713 | \r | |
6714 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6715 | \r | |
6716 | n = 0;\r | |
6717 | while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {\r | |
6718 | c = 0;\r | |
6719 | cn = 0;\r | |
6720 | esc = 0;\r | |
6721 | eesc = 0;\r | |
6722 | bufend = buf;\r | |
6723 | s = e = p;\r | |
6724 | while (1) {\r | |
6725 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6726 | \r | |
6727 | e = p;\r | |
6728 | PFETCH_S(c);\r | |
6729 | if (esc != 0) {\r | |
6730 | esc = 0;\r | |
6731 | if (c == '\\' || c == cterm || c == ',') {\r | |
6732 | /* */\r | |
6733 | }\r | |
6734 | else {\r | |
6735 | e = eesc;\r | |
6736 | cn++;\r | |
6737 | }\r | |
6738 | goto add_char;\r | |
14b0e578 CS |
6739 | }\r |
6740 | else {\r | |
b602265d DG |
6741 | if (c == '\\') {\r |
6742 | esc = 1;\r | |
6743 | eesc = e;\r | |
6744 | }\r | |
6745 | else if (c == cterm || c == ',')\r | |
6746 | break;\r | |
6747 | else {\r | |
6748 | size_t clen;\r | |
14b0e578 | 6749 | \r |
b602265d DG |
6750 | add_char:\r |
6751 | if (skip_mode == 0) {\r | |
6752 | clen = p - e;\r | |
6753 | if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)\r | |
6754 | return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */\r | |
14b0e578 | 6755 | \r |
b602265d DG |
6756 | xmemcpy(bufend, e, clen);\r |
6757 | bufend += clen;\r | |
6758 | }\r | |
6759 | cn++;\r | |
6760 | }\r | |
14b0e578 | 6761 | }\r |
b602265d | 6762 | }\r |
14b0e578 | 6763 | \r |
b602265d DG |
6764 | if (cn != 0) {\r |
6765 | if (skip_mode == 0) {\r | |
6766 | if ((types[n] & ONIG_TYPE_LONG) != 0) {\r | |
6767 | int fixed = 0;\r | |
6768 | if (cn > 0) {\r | |
6769 | long rl;\r | |
6770 | r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);\r | |
6771 | if (r == ONIG_NORMAL) {\r | |
6772 | vals[n].l = rl;\r | |
6773 | fixed = 1;\r | |
6774 | types[n] = ONIG_TYPE_LONG;\r | |
6775 | }\r | |
6776 | }\r | |
14b0e578 | 6777 | \r |
b602265d DG |
6778 | if (fixed == 0) {\r |
6779 | types[n] = (types[n] & ~ONIG_TYPE_LONG);\r | |
6780 | if (types[n] == ONIG_TYPE_VOID)\r | |
6781 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6782 | }\r | |
6783 | }\r | |
14b0e578 | 6784 | \r |
b602265d DG |
6785 | switch (types[n]) {\r |
6786 | case ONIG_TYPE_LONG:\r | |
6787 | break;\r | |
14b0e578 | 6788 | \r |
b602265d DG |
6789 | case ONIG_TYPE_CHAR:\r |
6790 | if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6791 | vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);\r | |
6792 | break;\r | |
14b0e578 | 6793 | \r |
b602265d DG |
6794 | case ONIG_TYPE_STRING:\r |
6795 | {\r | |
6796 | UChar* rs = onigenc_strdup(enc, buf, bufend);\r | |
6797 | CHECK_NULL_RETURN_MEMERR(rs);\r | |
6798 | vals[n].s.start = rs;\r | |
6799 | vals[n].s.end = rs + (e - s);\r | |
6800 | }\r | |
6801 | break;\r | |
14b0e578 | 6802 | \r |
b602265d DG |
6803 | case ONIG_TYPE_TAG:\r |
6804 | if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))\r | |
6805 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 6806 | \r |
b602265d DG |
6807 | vals[n].s.start = s;\r |
6808 | vals[n].s.end = e;\r | |
6809 | break;\r | |
6810 | \r | |
6811 | case ONIG_TYPE_VOID:\r | |
6812 | case ONIG_TYPE_POINTER:\r | |
6813 | return ONIGERR_PARSER_BUG;\r | |
6814 | break;\r | |
6815 | }\r | |
14b0e578 | 6816 | }\r |
14b0e578 | 6817 | \r |
b602265d DG |
6818 | n++;\r |
6819 | }\r | |
14b0e578 | 6820 | \r |
b602265d DG |
6821 | if (c == cterm) break;\r |
6822 | }\r | |
14b0e578 | 6823 | \r |
b602265d | 6824 | if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;\r |
14b0e578 | 6825 | \r |
b602265d DG |
6826 | *src = p;\r |
6827 | return n;\r | |
6828 | }\r | |
14b0e578 | 6829 | \r |
b602265d DG |
6830 | /* (*name[TAG]) (*name[TAG]{a,b,..}) */\r |
6831 | static int\r | |
6832 | parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r | |
6833 | {\r | |
6834 | int r;\r | |
6835 | int i;\r | |
6836 | int in;\r | |
6837 | int num;\r | |
6838 | int name_id;\r | |
6839 | int arg_num;\r | |
6840 | int max_arg_num;\r | |
6841 | int opt_arg_num;\r | |
6842 | int is_not_single;\r | |
6843 | OnigCodePoint c;\r | |
6844 | UChar* name_start;\r | |
6845 | UChar* name_end;\r | |
6846 | UChar* tag_start;\r | |
6847 | UChar* tag_end;\r | |
6848 | Node* node;\r | |
6849 | CalloutListEntry* e;\r | |
6850 | RegexExt* ext;\r | |
6851 | unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
6852 | OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
6853 | OnigEncoding enc = env->enc;\r | |
6854 | UChar* p = *src;\r | |
14b0e578 | 6855 | \r |
b602265d DG |
6856 | /* PFETCH_READY; */\r |
6857 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6858 | \r | |
6859 | node = 0;\r | |
6860 | name_start = p;\r | |
6861 | while (1) {\r | |
6862 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6863 | name_end = p;\r | |
6864 | PFETCH_S(c);\r | |
6865 | if (c == cterm || c == '[' || c == '{') break;\r | |
6866 | }\r | |
6867 | \r | |
6868 | if (! is_allowed_callout_name(enc, name_start, name_end))\r | |
6869 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
6870 | \r | |
6871 | if (c == '[') {\r | |
6872 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6873 | tag_start = p;\r | |
6874 | while (! PEND) {\r | |
6875 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6876 | tag_end = p;\r | |
6877 | PFETCH_S(c);\r | |
6878 | if (c == ']') break;\r | |
14b0e578 | 6879 | }\r |
b602265d DG |
6880 | if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r |
6881 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
6882 | \r | |
6883 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6884 | PFETCH_S(c);\r | |
6885 | }\r | |
6886 | else {\r | |
6887 | tag_start = tag_end = 0;\r | |
14b0e578 CS |
6888 | }\r |
6889 | \r | |
b602265d DG |
6890 | if (c == '{') {\r |
6891 | UChar* save;\r | |
6892 | \r | |
6893 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6894 | \r | |
6895 | /* read for single check only */\r | |
6896 | save = p;\r | |
6897 | arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);\r | |
6898 | if (arg_num < 0) return arg_num;\r | |
6899 | \r | |
6900 | is_not_single = PPEEK_IS(cterm) ? 0 : 1;\r | |
6901 | p = save;\r | |
6902 | r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r | |
6903 | &name_id);\r | |
6904 | if (r != ONIG_NORMAL) return r;\r | |
6905 | \r | |
6906 | max_arg_num = get_callout_arg_num_by_name_id(name_id);\r | |
6907 | for (i = 0; i < max_arg_num; i++) {\r | |
6908 | types[i] = get_callout_arg_type_by_name_id(name_id, i);\r | |
6909 | }\r | |
6910 | \r | |
6911 | arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);\r | |
6912 | if (arg_num < 0) return arg_num;\r | |
6913 | \r | |
6914 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6915 | PFETCH_S(c);\r | |
14b0e578 | 6916 | }\r |
b602265d DG |
6917 | else {\r |
6918 | arg_num = 0;\r | |
14b0e578 | 6919 | \r |
b602265d DG |
6920 | is_not_single = 0;\r |
6921 | r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r | |
6922 | &name_id);\r | |
6923 | if (r != ONIG_NORMAL) return r;\r | |
6924 | \r | |
6925 | max_arg_num = get_callout_arg_num_by_name_id(name_id);\r | |
6926 | for (i = 0; i < max_arg_num; i++) {\r | |
6927 | types[i] = get_callout_arg_type_by_name_id(name_id, i);\r | |
6928 | }\r | |
14b0e578 CS |
6929 | }\r |
6930 | \r | |
b602265d DG |
6931 | in = onig_get_callout_in_by_name_id(name_id);\r |
6932 | opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);\r | |
6933 | if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))\r | |
6934 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 6935 | \r |
b602265d DG |
6936 | if (c != cterm)\r |
6937 | return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
14b0e578 | 6938 | \r |
b602265d DG |
6939 | r = reg_callout_list_entry(env, &num);\r |
6940 | if (r != 0) return r;\r | |
14b0e578 | 6941 | \r |
b602265d | 6942 | ext = onig_get_regex_ext(env->reg);\r |
df8be9e5 | 6943 | CHECK_NULL_RETURN_MEMERR(ext);\r |
b602265d DG |
6944 | if (IS_NULL(ext->pattern)) {\r |
6945 | r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r | |
6946 | if (r != ONIG_NORMAL) return r;\r | |
6947 | }\r | |
6948 | \r | |
6949 | if (tag_start != tag_end) {\r | |
6950 | r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r | |
6951 | if (r != ONIG_NORMAL) return r;\r | |
6952 | }\r | |
6953 | \r | |
6954 | r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);\r | |
6955 | if (r != ONIG_NORMAL) return r;\r | |
6956 | \r | |
6957 | e = onig_reg_callout_list_at(env->reg, num);\r | |
a5def177 DG |
6958 | CHECK_NULL_RETURN_MEMERR(e);\r |
6959 | \r | |
b602265d DG |
6960 | e->of = ONIG_CALLOUT_OF_NAME;\r |
6961 | e->in = in;\r | |
6962 | e->name_id = name_id;\r | |
6963 | e->type = onig_get_callout_type_by_name_id(name_id);\r | |
6964 | e->start_func = onig_get_callout_start_func_by_name_id(name_id);\r | |
6965 | e->end_func = onig_get_callout_end_func_by_name_id(name_id);\r | |
6966 | e->u.arg.num = max_arg_num;\r | |
6967 | e->u.arg.passed_num = arg_num;\r | |
6968 | for (i = 0; i < max_arg_num; i++) {\r | |
6969 | e->u.arg.types[i] = types[i];\r | |
6970 | if (i < arg_num)\r | |
6971 | e->u.arg.vals[i] = vals[i];\r | |
6972 | else\r | |
6973 | e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);\r | |
14b0e578 | 6974 | }\r |
b602265d DG |
6975 | \r |
6976 | *np = node;\r | |
14b0e578 CS |
6977 | *src = p;\r |
6978 | return 0;\r | |
14b0e578 | 6979 | }\r |
b602265d | 6980 | #endif\r |
14b0e578 CS |
6981 | \r |
6982 | static int\r | |
b602265d DG |
6983 | parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r |
6984 | ScanEnv* env)\r | |
14b0e578 CS |
6985 | {\r |
6986 | int r, num;\r | |
6987 | Node *target;\r | |
6988 | OnigOptionType option;\r | |
6989 | OnigCodePoint c;\r | |
b602265d | 6990 | int list_capture;\r |
14b0e578 CS |
6991 | OnigEncoding enc = env->enc;\r |
6992 | \r | |
b602265d DG |
6993 | UChar* p = *src;\r |
6994 | PFETCH_READY;\r | |
6995 | \r | |
6996 | *np = NULL;\r | |
6997 | if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
6998 | \r | |
6999 | option = env->options;\r | |
7000 | c = PPEEK;\r | |
7001 | if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r | |
7002 | PINC;\r | |
7003 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7004 | \r | |
7005 | PFETCH(c);\r | |
7006 | switch (c) {\r | |
7007 | case ':': /* (?:...) grouping only */\r | |
7008 | group:\r | |
7009 | r = fetch_token(tok, &p, end, env);\r | |
7010 | if (r < 0) return r;\r | |
7011 | r = parse_subexp(np, tok, term, &p, end, env);\r | |
7012 | if (r < 0) return r;\r | |
7013 | *src = p;\r | |
7014 | return 1; /* group */\r | |
7015 | break;\r | |
7016 | \r | |
7017 | case '=':\r | |
7018 | *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);\r | |
7019 | break;\r | |
7020 | case '!': /* preceding read */\r | |
7021 | *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);\r | |
7022 | break;\r | |
7023 | case '>': /* (?>...) stop backtrack */\r | |
7024 | *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
7025 | break;\r | |
7026 | \r | |
7027 | case '\'':\r | |
7028 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r | |
7029 | goto named_group1;\r | |
7030 | }\r | |
7031 | else\r | |
7032 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7033 | break;\r | |
7034 | \r | |
7035 | case '<': /* look behind (?<=...), (?<!...) */\r | |
7036 | if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
7037 | PFETCH(c);\r | |
7038 | if (c == '=')\r | |
7039 | *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);\r | |
7040 | else if (c == '!')\r | |
7041 | *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);\r | |
7042 | else {\r | |
7043 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r | |
7044 | UChar *name;\r | |
7045 | UChar *name_end;\r | |
7046 | enum REF_NUM num_type;\r | |
7047 | \r | |
7048 | PUNFETCH;\r | |
7049 | c = '<';\r | |
7050 | \r | |
7051 | named_group1:\r | |
7052 | list_capture = 0;\r | |
7053 | \r | |
7054 | named_group2:\r | |
7055 | name = p;\r | |
7056 | r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,\r | |
7057 | &num_type, 0);\r | |
7058 | if (r < 0) return r;\r | |
7059 | \r | |
7060 | num = scan_env_add_mem_entry(env);\r | |
7061 | if (num < 0) return num;\r | |
7062 | if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)\r | |
7063 | return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r | |
7064 | \r | |
7065 | r = name_add(env->reg, name, name_end, num, env);\r | |
7066 | if (r != 0) return r;\r | |
7067 | *np = node_new_memory(1);\r | |
7068 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7069 | ENCLOSURE_(*np)->m.regnum = num;\r | |
7070 | if (list_capture != 0)\r | |
7071 | MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r | |
7072 | env->num_named++;\r | |
7073 | }\r | |
7074 | else {\r | |
7075 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7076 | }\r | |
7077 | }\r | |
7078 | break;\r | |
7079 | \r | |
7080 | case '~':\r | |
7081 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {\r | |
7082 | Node* absent;\r | |
7083 | Node* expr;\r | |
7084 | int head_bar;\r | |
7085 | int is_range_cutter;\r | |
7086 | \r | |
7087 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7088 | \r | |
7089 | if (PPEEK_IS('|')) { /* (?~|generator|absent) */\r | |
7090 | PINC;\r | |
7091 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7092 | \r | |
7093 | head_bar = 1;\r | |
7094 | if (PPEEK_IS(')')) { /* (?~|) : range clear */\r | |
7095 | PINC;\r | |
7096 | r = make_range_clear(np, env);\r | |
7097 | if (r != 0) return r;\r | |
7098 | goto end;\r | |
7099 | }\r | |
7100 | }\r | |
7101 | else\r | |
7102 | head_bar = 0;\r | |
7103 | \r | |
7104 | r = fetch_token(tok, &p, end, env);\r | |
7105 | if (r < 0) return r;\r | |
7106 | r = parse_subexp(&absent, tok, term, &p, end, env);\r | |
7107 | if (r < 0) {\r | |
7108 | onig_node_free(absent);\r | |
7109 | return r;\r | |
7110 | }\r | |
7111 | \r | |
7112 | expr = NULL_NODE;\r | |
7113 | is_range_cutter = 0;\r | |
7114 | if (head_bar != 0) {\r | |
7115 | Node* top = absent;\r | |
7116 | if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {\r | |
7117 | expr = NULL_NODE;\r | |
7118 | is_range_cutter = 1;\r | |
7119 | /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */\r | |
7120 | }\r | |
7121 | else {\r | |
7122 | absent = NODE_CAR(top);\r | |
7123 | expr = NODE_CDR(top);\r | |
7124 | NODE_CAR(top) = NULL_NODE;\r | |
7125 | NODE_CDR(top) = NULL_NODE;\r | |
7126 | onig_node_free(top);\r | |
7127 | if (IS_NULL(NODE_CDR(expr))) {\r | |
7128 | top = expr;\r | |
7129 | expr = NODE_CAR(top);\r | |
7130 | NODE_CAR(top) = NULL_NODE;\r | |
7131 | onig_node_free(top);\r | |
7132 | }\r | |
7133 | }\r | |
7134 | }\r | |
7135 | \r | |
7136 | r = make_absent_tree(np, absent, expr, is_range_cutter, env);\r | |
7137 | if (r != 0) {\r | |
7138 | return r;\r | |
7139 | }\r | |
7140 | goto end;\r | |
7141 | }\r | |
7142 | else {\r | |
7143 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7144 | }\r | |
7145 | break;\r | |
7146 | \r | |
7147 | #ifdef USE_CALLOUT\r | |
7148 | case '{':\r | |
7149 | if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))\r | |
7150 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7151 | \r | |
7152 | r = parse_callout_of_contents(np, ')', &p, end, env);\r | |
7153 | if (r != 0) return r;\r | |
7154 | \r | |
7155 | goto end;\r | |
7156 | break;\r | |
7157 | #endif\r | |
7158 | \r | |
7159 | case '(':\r | |
7160 | /* (?()...) */\r | |
7161 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {\r | |
7162 | UChar *prev;\r | |
7163 | Node* condition;\r | |
7164 | int condition_is_checker;\r | |
7165 | \r | |
7166 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7167 | PFETCH(c);\r | |
7168 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7169 | \r | |
7170 | if (IS_CODE_DIGIT_ASCII(enc, c)\r | |
7171 | || c == '-' || c == '+' || c == '<' || c == '\'') {\r | |
7172 | UChar* name_end;\r | |
7173 | int back_num;\r | |
7174 | int exist_level;\r | |
7175 | int level;\r | |
7176 | enum REF_NUM num_type;\r | |
7177 | int is_enclosed;\r | |
7178 | \r | |
7179 | is_enclosed = (c == '<' || c == '\'') ? 1 : 0;\r | |
7180 | if (! is_enclosed)\r | |
7181 | PUNFETCH;\r | |
7182 | prev = p;\r | |
7183 | exist_level = 0;\r | |
7184 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7185 | name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r | |
7186 | r = fetch_name_with_level(\r | |
7187 | (OnigCodePoint )(is_enclosed != 0 ? c : '('),\r | |
7188 | &p, end, &name_end,\r | |
7189 | env, &back_num, &level, &num_type);\r | |
7190 | if (r == 1) exist_level = 1;\r | |
7191 | #else\r | |
7192 | r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),\r | |
7193 | &p, end, &name_end, env, &back_num, &num_type, 1);\r | |
7194 | #endif\r | |
7195 | if (r < 0) {\r | |
7196 | if (is_enclosed == 0) {\r | |
7197 | goto any_condition;\r | |
7198 | }\r | |
7199 | else\r | |
7200 | return r;\r | |
7201 | }\r | |
7202 | \r | |
7203 | condition_is_checker = 1;\r | |
7204 | if (num_type != IS_NOT_NUM) {\r | |
7205 | if (num_type == IS_REL_NUM) {\r | |
7206 | back_num = backref_rel_to_abs(back_num, env);\r | |
7207 | }\r | |
7208 | if (back_num <= 0)\r | |
7209 | return ONIGERR_INVALID_BACKREF;\r | |
7210 | \r | |
7211 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
7212 | if (back_num > env->num_mem ||\r | |
7213 | IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r | |
7214 | return ONIGERR_INVALID_BACKREF;\r | |
7215 | }\r | |
7216 | \r | |
7217 | condition = node_new_backref_checker(1, &back_num, 0,\r | |
7218 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7219 | exist_level, level,\r | |
7220 | #endif\r | |
7221 | env);\r | |
7222 | }\r | |
7223 | else {\r | |
7224 | int num;\r | |
7225 | int* backs;\r | |
7226 | \r | |
7227 | num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r | |
7228 | if (num <= 0) {\r | |
7229 | onig_scan_env_set_error_string(env,\r | |
7230 | ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r | |
7231 | return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
7232 | }\r | |
7233 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
7234 | int i;\r | |
7235 | for (i = 0; i < num; i++) {\r | |
7236 | if (backs[i] > env->num_mem ||\r | |
7237 | IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r | |
7238 | return ONIGERR_INVALID_BACKREF;\r | |
7239 | }\r | |
7240 | }\r | |
7241 | \r | |
7242 | condition = node_new_backref_checker(num, backs, 1,\r | |
7243 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7244 | exist_level, level,\r | |
7245 | #endif\r | |
7246 | env);\r | |
7247 | }\r | |
7248 | \r | |
7249 | if (is_enclosed != 0) {\r | |
7250 | if (PEND) goto err_if_else;\r | |
7251 | PFETCH(c);\r | |
7252 | if (c != ')') goto err_if_else;\r | |
7253 | }\r | |
7254 | }\r | |
7255 | #ifdef USE_CALLOUT\r | |
7256 | else if (c == '?') {\r | |
7257 | if (IS_SYNTAX_OP2(env->syntax,\r | |
7258 | ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {\r | |
7259 | if (! PEND && PPEEK_IS('{')) {\r | |
7260 | /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */\r | |
7261 | condition_is_checker = 0;\r | |
7262 | PFETCH(c);\r | |
7263 | r = parse_callout_of_contents(&condition, ')', &p, end, env);\r | |
7264 | if (r != 0) return r;\r | |
7265 | goto end_condition;\r | |
7266 | }\r | |
7267 | }\r | |
7268 | goto any_condition;\r | |
7269 | }\r | |
7270 | else if (c == '*' &&\r | |
7271 | IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r | |
7272 | condition_is_checker = 0;\r | |
7273 | r = parse_callout_of_name(&condition, ')', &p, end, env);\r | |
7274 | if (r != 0) return r;\r | |
7275 | goto end_condition;\r | |
7276 | }\r | |
14b0e578 | 7277 | #endif\r |
b602265d DG |
7278 | else {\r |
7279 | any_condition:\r | |
7280 | PUNFETCH;\r | |
7281 | condition_is_checker = 0;\r | |
7282 | r = fetch_token(tok, &p, end, env);\r | |
7283 | if (r < 0) return r;\r | |
7284 | r = parse_subexp(&condition, tok, term, &p, end, env);\r | |
7285 | if (r < 0) {\r | |
7286 | onig_node_free(condition);\r | |
7287 | return r;\r | |
7288 | }\r | |
7289 | }\r | |
14b0e578 | 7290 | \r |
b602265d DG |
7291 | end_condition:\r |
7292 | CHECK_NULL_RETURN_MEMERR(condition);\r | |
14b0e578 | 7293 | \r |
b602265d DG |
7294 | if (PEND) {\r |
7295 | err_if_else:\r | |
7296 | onig_node_free(condition);\r | |
7297 | return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7298 | }\r | |
14b0e578 | 7299 | \r |
b602265d DG |
7300 | if (PPEEK_IS(')')) { /* case: empty body: make backref checker */\r |
7301 | if (condition_is_checker == 0) {\r | |
7302 | onig_node_free(condition);\r | |
7303 | return ONIGERR_INVALID_IF_ELSE_SYNTAX;\r | |
7304 | }\r | |
7305 | PFETCH(c);\r | |
7306 | *np = condition;\r | |
7307 | }\r | |
7308 | else { /* if-else */\r | |
7309 | int then_is_empty;\r | |
7310 | Node *Then, *Else;\r | |
14b0e578 | 7311 | \r |
b602265d DG |
7312 | if (PPEEK_IS('|')) {\r |
7313 | PFETCH(c);\r | |
7314 | Then = 0;\r | |
7315 | then_is_empty = 1;\r | |
7316 | }\r | |
7317 | else\r | |
7318 | then_is_empty = 0;\r | |
14b0e578 | 7319 | \r |
b602265d DG |
7320 | r = fetch_token(tok, &p, end, env);\r |
7321 | if (r < 0) {\r | |
7322 | onig_node_free(condition);\r | |
7323 | return r;\r | |
7324 | }\r | |
7325 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
7326 | if (r < 0) {\r | |
7327 | onig_node_free(condition);\r | |
7328 | onig_node_free(target);\r | |
7329 | return r;\r | |
7330 | }\r | |
14b0e578 | 7331 | \r |
b602265d DG |
7332 | if (then_is_empty != 0) {\r |
7333 | Else = target;\r | |
7334 | }\r | |
7335 | else {\r | |
7336 | if (NODE_TYPE(target) == NODE_ALT) {\r | |
7337 | Then = NODE_CAR(target);\r | |
7338 | if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {\r | |
7339 | Else = NODE_CAR(NODE_CDR(target));\r | |
7340 | cons_node_free_alone(NODE_CDR(target));\r | |
7341 | }\r | |
7342 | else {\r | |
7343 | Else = NODE_CDR(target);\r | |
7344 | }\r | |
7345 | cons_node_free_alone(target);\r | |
7346 | }\r | |
7347 | else {\r | |
7348 | Then = target;\r | |
7349 | Else = 0;\r | |
7350 | }\r | |
7351 | }\r | |
14b0e578 | 7352 | \r |
b602265d DG |
7353 | *np = node_new_enclosure_if_else(condition, Then, Else);\r |
7354 | if (IS_NULL(*np)) {\r | |
7355 | onig_node_free(condition);\r | |
7356 | onig_node_free(Then);\r | |
7357 | onig_node_free(Else);\r | |
7358 | return ONIGERR_MEMORY;\r | |
7359 | }\r | |
7360 | }\r | |
7361 | goto end;\r | |
14b0e578 | 7362 | }\r |
14b0e578 | 7363 | else {\r |
b602265d | 7364 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r |
14b0e578 | 7365 | }\r |
14b0e578 CS |
7366 | break;\r |
7367 | \r | |
7368 | case '@':\r | |
7369 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r | |
b602265d DG |
7370 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r |
7371 | PFETCH(c);\r | |
7372 | if (c == '<' || c == '\'') {\r | |
7373 | list_capture = 1;\r | |
7374 | goto named_group2; /* (?@<name>...) */\r | |
7375 | }\r | |
7376 | PUNFETCH;\r | |
7377 | }\r | |
7378 | \r | |
7379 | *np = node_new_memory(0);\r | |
7380 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7381 | num = scan_env_add_mem_entry(env);\r | |
7382 | if (num < 0) {\r | |
7383 | return num;\r | |
7384 | }\r | |
7385 | else if (num >= (int )MEM_STATUS_BITS_NUM) {\r | |
7386 | return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r | |
7387 | }\r | |
7388 | ENCLOSURE_(*np)->m.regnum = num;\r | |
7389 | MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r | |
14b0e578 CS |
7390 | }\r |
7391 | else {\r | |
b602265d | 7392 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r |
14b0e578 CS |
7393 | }\r |
7394 | break;\r | |
7395 | \r | |
7396 | #ifdef USE_POSIXLINE_OPTION\r | |
7397 | case 'p':\r | |
7398 | #endif\r | |
7399 | case '-': case 'i': case 'm': case 's': case 'x':\r | |
b602265d | 7400 | case 'W': case 'D': case 'S': case 'P':\r |
14b0e578 | 7401 | {\r |
b602265d DG |
7402 | int neg = 0;\r |
7403 | \r | |
7404 | while (1) {\r | |
7405 | switch (c) {\r | |
7406 | case ':':\r | |
7407 | case ')':\r | |
7408 | break;\r | |
7409 | \r | |
7410 | case '-': neg = 1; break;\r | |
7411 | case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;\r | |
7412 | case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;\r | |
7413 | case 's':\r | |
7414 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r | |
7415 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r | |
7416 | }\r | |
7417 | else\r | |
7418 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7419 | break;\r | |
7420 | \r | |
7421 | case 'm':\r | |
7422 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r | |
7423 | OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r | |
7424 | }\r | |
7425 | else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r | |
7426 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r | |
7427 | }\r | |
7428 | else\r | |
7429 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7430 | break;\r | |
14b0e578 | 7431 | #ifdef USE_POSIXLINE_OPTION\r |
b602265d DG |
7432 | case 'p':\r |
7433 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r | |
7434 | break;\r | |
14b0e578 | 7435 | #endif\r |
b602265d DG |
7436 | case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;\r |
7437 | case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;\r | |
7438 | case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;\r | |
7439 | case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;\r | |
7440 | \r | |
7441 | default:\r | |
7442 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7443 | }\r | |
7444 | \r | |
7445 | if (c == ')') {\r | |
7446 | *np = node_new_option(option);\r | |
7447 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7448 | *src = p;\r | |
7449 | return 2; /* option only */\r | |
7450 | }\r | |
7451 | else if (c == ':') {\r | |
7452 | OnigOptionType prev = env->options;\r | |
7453 | \r | |
7454 | env->options = option;\r | |
7455 | r = fetch_token(tok, &p, end, env);\r | |
7456 | if (r < 0) return r;\r | |
7457 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
7458 | env->options = prev;\r | |
7459 | if (r < 0) {\r | |
7460 | onig_node_free(target);\r | |
7461 | return r;\r | |
7462 | }\r | |
7463 | *np = node_new_option(option);\r | |
7464 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7465 | NODE_BODY(*np) = target;\r | |
7466 | *src = p;\r | |
7467 | return 0;\r | |
7468 | }\r | |
7469 | \r | |
7470 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7471 | PFETCH(c);\r | |
7472 | }\r | |
14b0e578 CS |
7473 | }\r |
7474 | break;\r | |
7475 | \r | |
7476 | default:\r | |
7477 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7478 | }\r | |
7479 | }\r | |
b602265d DG |
7480 | #ifdef USE_CALLOUT\r |
7481 | else if (c == '*' &&\r | |
7482 | IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r | |
7483 | PINC;\r | |
7484 | r = parse_callout_of_name(np, ')', &p, end, env);\r | |
7485 | if (r != 0) return r;\r | |
7486 | \r | |
7487 | goto end;\r | |
7488 | }\r | |
7489 | #endif\r | |
14b0e578 | 7490 | else {\r |
b602265d | 7491 | if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r |
14b0e578 CS |
7492 | goto group;\r |
7493 | \r | |
b602265d | 7494 | *np = node_new_memory(0);\r |
14b0e578 CS |
7495 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7496 | num = scan_env_add_mem_entry(env);\r | |
7497 | if (num < 0) return num;\r | |
b602265d | 7498 | ENCLOSURE_(*np)->m.regnum = num;\r |
14b0e578 CS |
7499 | }\r |
7500 | \r | |
7501 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7502 | r = fetch_token(tok, &p, end, env);\r | |
7503 | if (r < 0) return r;\r | |
7504 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
b602265d DG |
7505 | if (r < 0) {\r |
7506 | onig_node_free(target);\r | |
7507 | return r;\r | |
7508 | }\r | |
14b0e578 | 7509 | \r |
b602265d DG |
7510 | NODE_BODY(*np) = target;\r |
7511 | \r | |
7512 | if (NODE_TYPE(*np) == NODE_ENCLOSURE) {\r | |
7513 | if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {\r | |
14b0e578 | 7514 | /* Don't move this to previous of parse_subexp() */\r |
b602265d | 7515 | r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);\r |
14b0e578 CS |
7516 | if (r != 0) return r;\r |
7517 | }\r | |
7518 | }\r | |
7519 | \r | |
b602265d | 7520 | end:\r |
14b0e578 CS |
7521 | *src = p;\r |
7522 | return 0;\r | |
7523 | }\r | |
7524 | \r | |
7525 | static const char* PopularQStr[] = {\r | |
7526 | "?", "*", "+", "??", "*?", "+?"\r | |
7527 | };\r | |
7528 | \r | |
7529 | static const char* ReduceQStr[] = {\r | |
7530 | "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r | |
7531 | };\r | |
7532 | \r | |
7533 | static int\r | |
7534 | set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r | |
7535 | {\r | |
b602265d | 7536 | QuantNode* qn;\r |
14b0e578 | 7537 | \r |
b602265d DG |
7538 | qn = QUANT_(qnode);\r |
7539 | if (qn->lower == 1 && qn->upper == 1)\r | |
14b0e578 | 7540 | return 1;\r |
14b0e578 | 7541 | \r |
b602265d DG |
7542 | switch (NODE_TYPE(target)) {\r |
7543 | case NODE_STRING:\r | |
14b0e578 | 7544 | if (! group) {\r |
b602265d DG |
7545 | if (str_node_can_be_split(target, env->enc)) {\r |
7546 | Node* n = str_node_split_last_char(target, env->enc);\r | |
7547 | if (IS_NOT_NULL(n)) {\r | |
7548 | NODE_BODY(qnode) = n;\r | |
7549 | return 2;\r | |
7550 | }\r | |
14b0e578 CS |
7551 | }\r |
7552 | }\r | |
7553 | break;\r | |
7554 | \r | |
b602265d | 7555 | case NODE_QUANT:\r |
14b0e578 CS |
7556 | { /* check redundant double repeat. */\r |
7557 | /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r | |
b602265d DG |
7558 | QuantNode* qnt = QUANT_(target);\r |
7559 | int nestq_num = quantifier_type_num(qn);\r | |
7560 | int targetq_num = quantifier_type_num(qnt);\r | |
14b0e578 CS |
7561 | \r |
7562 | #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r | |
b602265d DG |
7563 | if (targetq_num >= 0 && nestq_num >= 0 &&\r |
7564 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r | |
14b0e578 CS |
7565 | UChar buf[WARN_BUFSIZE];\r |
7566 | \r | |
7567 | switch(ReduceTypeTable[targetq_num][nestq_num]) {\r | |
7568 | case RQ_ASIS:\r | |
7569 | break;\r | |
7570 | \r | |
7571 | case RQ_DEL:\r | |
7572 | if (onig_verb_warn != onig_null_warn) {\r | |
7573 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
b602265d DG |
7574 | env->pattern, env->pattern_end,\r |
7575 | (UChar* )"redundant nested repeat operator");\r | |
14b0e578 CS |
7576 | (*onig_verb_warn)((char* )buf);\r |
7577 | }\r | |
7578 | goto warn_exit;\r | |
7579 | break;\r | |
7580 | \r | |
7581 | default:\r | |
7582 | if (onig_verb_warn != onig_null_warn) {\r | |
7583 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
7584 | env->pattern, env->pattern_end,\r | |
7585 | (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r | |
7586 | PopularQStr[targetq_num], PopularQStr[nestq_num],\r | |
7587 | ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r | |
7588 | (*onig_verb_warn)((char* )buf);\r | |
7589 | }\r | |
7590 | goto warn_exit;\r | |
7591 | break;\r | |
7592 | }\r | |
7593 | }\r | |
7594 | \r | |
7595 | warn_exit:\r | |
7596 | #endif\r | |
b602265d DG |
7597 | if (targetq_num >= 0 && nestq_num < 0) {\r |
7598 | if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r | |
7599 | /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r | |
7600 | if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r | |
7601 | qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r | |
7602 | }\r | |
7603 | }\r | |
7604 | }\r | |
7605 | else {\r | |
7606 | NODE_BODY(qnode) = target;\r | |
7607 | onig_reduce_nested_quantifier(qnode, target);\r | |
7608 | goto q_exit;\r | |
14b0e578 CS |
7609 | }\r |
7610 | }\r | |
7611 | break;\r | |
7612 | \r | |
7613 | default:\r | |
7614 | break;\r | |
7615 | }\r | |
7616 | \r | |
b602265d | 7617 | NODE_BODY(qnode) = target;\r |
14b0e578 CS |
7618 | q_exit:\r |
7619 | return 0;\r | |
7620 | }\r | |
7621 | \r | |
7622 | \r | |
14b0e578 CS |
7623 | #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r |
7624 | static int\r | |
7625 | clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r | |
7626 | {\r | |
7627 | BBuf *tbuf;\r | |
7628 | int r;\r | |
7629 | \r | |
7630 | if (IS_NCCLASS_NOT(cc)) {\r | |
7631 | bitset_invert(cc->bs);\r | |
7632 | \r | |
7633 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
7634 | r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r | |
7635 | if (r != 0) return r;\r | |
7636 | \r | |
7637 | bbuf_free(cc->mbuf);\r | |
7638 | cc->mbuf = tbuf;\r | |
7639 | }\r | |
7640 | \r | |
7641 | NCCLASS_CLEAR_NOT(cc);\r | |
7642 | }\r | |
7643 | \r | |
7644 | return 0;\r | |
7645 | }\r | |
7646 | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r | |
7647 | \r | |
7648 | typedef struct {\r | |
7649 | ScanEnv* env;\r | |
7650 | CClassNode* cc;\r | |
7651 | Node* alt_root;\r | |
7652 | Node** ptail;\r | |
7653 | } IApplyCaseFoldArg;\r | |
7654 | \r | |
7655 | static int\r | |
b602265d | 7656 | i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)\r |
14b0e578 CS |
7657 | {\r |
7658 | IApplyCaseFoldArg* iarg;\r | |
7659 | ScanEnv* env;\r | |
7660 | CClassNode* cc;\r | |
7661 | BitSetRef bs;\r | |
7662 | \r | |
7663 | iarg = (IApplyCaseFoldArg* )arg;\r | |
7664 | env = iarg->env;\r | |
7665 | cc = iarg->cc;\r | |
7666 | bs = cc->bs;\r | |
7667 | \r | |
7668 | if (to_len == 1) {\r | |
7669 | int is_in = onig_is_code_in_cc(env->enc, from, cc);\r | |
7670 | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
7671 | if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r | |
b602265d | 7672 | (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r |
14b0e578 | 7673 | if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r |
b602265d | 7674 | add_code_range(&(cc->mbuf), env, *to, *to);\r |
14b0e578 CS |
7675 | }\r |
7676 | else {\r | |
b602265d | 7677 | BITSET_SET_BIT(bs, *to);\r |
14b0e578 CS |
7678 | }\r |
7679 | }\r | |
7680 | #else\r | |
7681 | if (is_in != 0) {\r | |
7682 | if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r | |
b602265d DG |
7683 | if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r |
7684 | add_code_range(&(cc->mbuf), env, *to, *to);\r | |
14b0e578 CS |
7685 | }\r |
7686 | else {\r | |
b602265d DG |
7687 | if (IS_NCCLASS_NOT(cc)) {\r |
7688 | BITSET_CLEAR_BIT(bs, *to);\r | |
7689 | }\r | |
7690 | else\r | |
7691 | BITSET_SET_BIT(bs, *to);\r | |
14b0e578 CS |
7692 | }\r |
7693 | }\r | |
7694 | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r | |
7695 | }\r | |
7696 | else {\r | |
7697 | int r, i, len;\r | |
7698 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r | |
7699 | Node *snode = NULL_NODE;\r | |
7700 | \r | |
7701 | if (onig_is_code_in_cc(env->enc, from, cc)\r | |
7702 | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
b602265d | 7703 | && !IS_NCCLASS_NOT(cc)\r |
14b0e578 | 7704 | #endif\r |
b602265d | 7705 | ) {\r |
14b0e578 | 7706 | for (i = 0; i < to_len; i++) {\r |
b602265d DG |
7707 | len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r |
7708 | if (i == 0) {\r | |
7709 | snode = onig_node_new_str(buf, buf + len);\r | |
7710 | CHECK_NULL_RETURN_MEMERR(snode);\r | |
7711 | \r | |
7712 | /* char-class expanded multi-char only\r | |
7713 | compare with string folded at match time. */\r | |
7714 | NODE_STRING_SET_AMBIG(snode);\r | |
7715 | }\r | |
7716 | else {\r | |
7717 | r = onig_node_str_cat(snode, buf, buf + len);\r | |
7718 | if (r < 0) {\r | |
7719 | onig_node_free(snode);\r | |
7720 | return r;\r | |
7721 | }\r | |
7722 | }\r | |
14b0e578 CS |
7723 | }\r |
7724 | \r | |
7725 | *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r | |
7726 | CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r | |
b602265d | 7727 | iarg->ptail = &(NODE_CDR((*(iarg->ptail))));\r |
14b0e578 CS |
7728 | }\r |
7729 | }\r | |
7730 | \r | |
7731 | return 0;\r | |
7732 | }\r | |
7733 | \r | |
7734 | static int\r | |
b602265d DG |
7735 | parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r |
7736 | ScanEnv* env)\r | |
14b0e578 CS |
7737 | {\r |
7738 | int r, len, group = 0;\r | |
7739 | Node* qn;\r | |
7740 | Node** targetp;\r | |
7741 | \r | |
7742 | *np = NULL;\r | |
7743 | if (tok->type == (enum TokenSyms )term)\r | |
7744 | goto end_of_token;\r | |
7745 | \r | |
7746 | switch (tok->type) {\r | |
7747 | case TK_ALT:\r | |
7748 | case TK_EOT:\r | |
7749 | end_of_token:\r | |
16bd7c35 DG |
7750 | *np = node_new_empty();\r |
7751 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7752 | return tok->type;\r | |
14b0e578 CS |
7753 | break;\r |
7754 | \r | |
7755 | case TK_SUBEXP_OPEN:\r | |
b602265d | 7756 | r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r |
14b0e578 CS |
7757 | if (r < 0) return r;\r |
7758 | if (r == 1) group = 1;\r | |
7759 | else if (r == 2) { /* option only */\r | |
7760 | Node* target;\r | |
b602265d | 7761 | OnigOptionType prev = env->options;\r |
14b0e578 | 7762 | \r |
b602265d | 7763 | env->options = ENCLOSURE_(*np)->o.options;\r |
14b0e578 CS |
7764 | r = fetch_token(tok, src, end, env);\r |
7765 | if (r < 0) return r;\r | |
7766 | r = parse_subexp(&target, tok, term, src, end, env);\r | |
b602265d DG |
7767 | env->options = prev;\r |
7768 | if (r < 0) {\r | |
7769 | onig_node_free(target);\r | |
7770 | return r;\r | |
7771 | }\r | |
7772 | NODE_BODY(*np) = target;\r | |
14b0e578 CS |
7773 | return tok->type;\r |
7774 | }\r | |
7775 | break;\r | |
7776 | \r | |
7777 | case TK_SUBEXP_CLOSE:\r | |
7778 | if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r | |
7779 | return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r | |
7780 | \r | |
7781 | if (tok->escaped) goto tk_raw_byte;\r | |
7782 | else goto tk_byte;\r | |
7783 | break;\r | |
7784 | \r | |
7785 | case TK_STRING:\r | |
7786 | tk_byte:\r | |
7787 | {\r | |
7788 | *np = node_new_str(tok->backp, *src);\r | |
7789 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7790 | \r | |
7791 | while (1) {\r | |
b602265d DG |
7792 | r = fetch_token(tok, src, end, env);\r |
7793 | if (r < 0) return r;\r | |
7794 | if (r != TK_STRING) break;\r | |
14b0e578 | 7795 | \r |
b602265d DG |
7796 | r = onig_node_str_cat(*np, tok->backp, *src);\r |
7797 | if (r < 0) return r;\r | |
14b0e578 CS |
7798 | }\r |
7799 | \r | |
7800 | string_end:\r | |
7801 | targetp = np;\r | |
7802 | goto repeat;\r | |
7803 | }\r | |
7804 | break;\r | |
7805 | \r | |
7806 | case TK_RAW_BYTE:\r | |
7807 | tk_raw_byte:\r | |
7808 | {\r | |
7809 | *np = node_new_str_raw_char((UChar )tok->u.c);\r | |
7810 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7811 | len = 1;\r | |
7812 | while (1) {\r | |
b602265d DG |
7813 | if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r |
7814 | if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */\r | |
7815 | r = fetch_token(tok, src, end, env);\r | |
7816 | NODE_STRING_CLEAR_RAW(*np);\r | |
7817 | goto string_end;\r | |
7818 | }\r | |
7819 | }\r | |
7820 | \r | |
7821 | r = fetch_token(tok, src, end, env);\r | |
7822 | if (r < 0) return r;\r | |
7823 | if (r != TK_RAW_BYTE) {\r | |
7824 | /* Don't use this, it is wrong for little endian encodings. */\r | |
14b0e578 | 7825 | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r |
b602265d DG |
7826 | int rem;\r |
7827 | if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r | |
7828 | rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r | |
7829 | (void )node_str_head_pad(STR_(*np), rem, (UChar )0);\r | |
7830 | if (len + rem == enclen(env->enc, STR_(*np)->s)) {\r | |
7831 | NODE_STRING_CLEAR_RAW(*np);\r | |
7832 | goto string_end;\r | |
7833 | }\r | |
7834 | }\r | |
14b0e578 | 7835 | #endif\r |
b602265d DG |
7836 | return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r |
7837 | }\r | |
14b0e578 | 7838 | \r |
b602265d DG |
7839 | r = node_str_cat_char(*np, (UChar )tok->u.c);\r |
7840 | if (r < 0) return r;\r | |
14b0e578 | 7841 | \r |
b602265d | 7842 | len++;\r |
14b0e578 CS |
7843 | }\r |
7844 | }\r | |
7845 | break;\r | |
7846 | \r | |
7847 | case TK_CODE_POINT:\r | |
7848 | {\r | |
7849 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r | |
7850 | int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r | |
7851 | if (num < 0) return num;\r | |
7852 | #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r | |
7853 | *np = node_new_str_raw(buf, buf + num);\r | |
7854 | #else\r | |
7855 | *np = node_new_str(buf, buf + num);\r | |
7856 | #endif\r | |
7857 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7858 | }\r | |
7859 | break;\r | |
7860 | \r | |
7861 | case TK_QUOTE_OPEN:\r | |
7862 | {\r | |
7863 | OnigCodePoint end_op[2];\r | |
7864 | UChar *qstart, *qend, *nextp;\r | |
7865 | \r | |
7866 | end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r | |
7867 | end_op[1] = (OnigCodePoint )'E';\r | |
7868 | qstart = *src;\r | |
7869 | qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r | |
7870 | if (IS_NULL(qend)) {\r | |
b602265d | 7871 | nextp = qend = end;\r |
14b0e578 CS |
7872 | }\r |
7873 | *np = node_new_str(qstart, qend);\r | |
7874 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7875 | *src = nextp;\r | |
7876 | }\r | |
7877 | break;\r | |
7878 | \r | |
7879 | case TK_CHAR_TYPE:\r | |
7880 | {\r | |
7881 | switch (tok->u.prop.ctype) {\r | |
7882 | case ONIGENC_CTYPE_WORD:\r | |
b602265d DG |
7883 | *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);\r |
7884 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7885 | break;\r | |
14b0e578 CS |
7886 | \r |
7887 | case ONIGENC_CTYPE_SPACE:\r | |
7888 | case ONIGENC_CTYPE_DIGIT:\r | |
7889 | case ONIGENC_CTYPE_XDIGIT:\r | |
b602265d DG |
7890 | {\r |
7891 | CClassNode* cc;\r | |
7892 | \r | |
7893 | *np = node_new_cclass();\r | |
7894 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7895 | cc = CCLASS_(*np);\r | |
7896 | add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r | |
7897 | if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r | |
7898 | }\r | |
7899 | break;\r | |
14b0e578 CS |
7900 | \r |
7901 | default:\r | |
b602265d DG |
7902 | return ONIGERR_PARSER_BUG;\r |
7903 | break;\r | |
14b0e578 CS |
7904 | }\r |
7905 | }\r | |
7906 | break;\r | |
7907 | \r | |
7908 | case TK_CHAR_PROPERTY:\r | |
7909 | r = parse_char_property(np, tok, src, end, env);\r | |
7910 | if (r != 0) return r;\r | |
7911 | break;\r | |
7912 | \r | |
7913 | case TK_CC_OPEN:\r | |
7914 | {\r | |
7915 | CClassNode* cc;\r | |
7916 | \r | |
7917 | r = parse_char_class(np, tok, src, end, env);\r | |
7918 | if (r != 0) return r;\r | |
7919 | \r | |
b602265d DG |
7920 | cc = CCLASS_(*np);\r |
7921 | if (IS_IGNORECASE(env->options)) {\r | |
7922 | IApplyCaseFoldArg iarg;\r | |
7923 | \r | |
7924 | iarg.env = env;\r | |
7925 | iarg.cc = cc;\r | |
7926 | iarg.alt_root = NULL_NODE;\r | |
7927 | iarg.ptail = &(iarg.alt_root);\r | |
7928 | \r | |
7929 | r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r | |
7930 | i_apply_case_fold, &iarg);\r | |
7931 | if (r != 0) {\r | |
7932 | onig_node_free(iarg.alt_root);\r | |
7933 | return r;\r | |
7934 | }\r | |
7935 | if (IS_NOT_NULL(iarg.alt_root)) {\r | |
14b0e578 CS |
7936 | Node* work = onig_node_new_alt(*np, iarg.alt_root);\r |
7937 | if (IS_NULL(work)) {\r | |
7938 | onig_node_free(iarg.alt_root);\r | |
7939 | return ONIGERR_MEMORY;\r | |
7940 | }\r | |
7941 | *np = work;\r | |
b602265d | 7942 | }\r |
14b0e578 CS |
7943 | }\r |
7944 | }\r | |
7945 | break;\r | |
7946 | \r | |
7947 | case TK_ANYCHAR:\r | |
7948 | *np = node_new_anychar();\r | |
7949 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7950 | break;\r | |
7951 | \r | |
7952 | case TK_ANYCHAR_ANYTIME:\r | |
7953 | *np = node_new_anychar();\r | |
7954 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7955 | qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r | |
7956 | CHECK_NULL_RETURN_MEMERR(qn);\r | |
b602265d | 7957 | NODE_BODY(qn) = *np;\r |
14b0e578 CS |
7958 | *np = qn;\r |
7959 | break;\r | |
7960 | \r | |
7961 | case TK_BACKREF:\r | |
7962 | len = tok->u.backref.num;\r | |
7963 | *np = node_new_backref(len,\r | |
b602265d DG |
7964 | (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r |
7965 | tok->u.backref.by_name,\r | |
14b0e578 | 7966 | #ifdef USE_BACKREF_WITH_LEVEL\r |
b602265d DG |
7967 | tok->u.backref.exist_level,\r |
7968 | tok->u.backref.level,\r | |
14b0e578 | 7969 | #endif\r |
b602265d | 7970 | env);\r |
14b0e578 CS |
7971 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7972 | break;\r | |
7973 | \r | |
b602265d | 7974 | #ifdef USE_CALL\r |
14b0e578 CS |
7975 | case TK_CALL:\r |
7976 | {\r | |
7977 | int gnum = tok->u.call.gnum;\r | |
7978 | \r | |
b602265d DG |
7979 | *np = node_new_call(tok->u.call.name, tok->u.call.name_end,\r |
7980 | gnum, tok->u.call.by_number);\r | |
14b0e578 CS |
7981 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7982 | env->num_call++;\r | |
b602265d DG |
7983 | if (tok->u.call.by_number != 0 && gnum == 0) {\r |
7984 | env->has_call_zero = 1;\r | |
7985 | }\r | |
14b0e578 CS |
7986 | }\r |
7987 | break;\r | |
7988 | #endif\r | |
7989 | \r | |
7990 | case TK_ANCHOR:\r | |
b602265d DG |
7991 | {\r |
7992 | int ascii_mode =\r | |
7993 | IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;\r | |
7994 | *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);\r | |
df8be9e5 | 7995 | CHECK_NULL_RETURN_MEMERR(*np);\r |
b602265d | 7996 | }\r |
14b0e578 CS |
7997 | break;\r |
7998 | \r | |
7999 | case TK_OP_REPEAT:\r | |
8000 | case TK_INTERVAL:\r | |
8001 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r | |
8002 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r | |
b602265d | 8003 | return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r |
16bd7c35 | 8004 | else {\r |
b602265d | 8005 | *np = node_new_empty();\r |
16bd7c35 DG |
8006 | CHECK_NULL_RETURN_MEMERR(*np);\r |
8007 | }\r | |
14b0e578 CS |
8008 | }\r |
8009 | else {\r | |
8010 | goto tk_byte;\r | |
8011 | }\r | |
8012 | break;\r | |
8013 | \r | |
b602265d DG |
8014 | case TK_KEEP:\r |
8015 | r = node_new_keep(np, env);\r | |
8016 | if (r < 0) return r;\r | |
8017 | break;\r | |
8018 | \r | |
8019 | case TK_GENERAL_NEWLINE:\r | |
8020 | r = node_new_general_newline(np, env);\r | |
8021 | if (r < 0) return r;\r | |
8022 | break;\r | |
8023 | \r | |
8024 | case TK_NO_NEWLINE:\r | |
8025 | r = node_new_no_newline(np, env);\r | |
8026 | if (r < 0) return r;\r | |
8027 | break;\r | |
8028 | \r | |
8029 | case TK_TRUE_ANYCHAR:\r | |
8030 | r = node_new_true_anychar(np, env);\r | |
8031 | if (r < 0) return r;\r | |
8032 | break;\r | |
8033 | \r | |
8034 | case TK_EXTENDED_GRAPHEME_CLUSTER:\r | |
8035 | r = make_extended_grapheme_cluster(np, env);\r | |
8036 | if (r < 0) return r;\r | |
8037 | break;\r | |
8038 | \r | |
14b0e578 CS |
8039 | default:\r |
8040 | return ONIGERR_PARSER_BUG;\r | |
8041 | break;\r | |
8042 | }\r | |
8043 | \r | |
8044 | {\r | |
8045 | targetp = np;\r | |
8046 | \r | |
8047 | re_entry:\r | |
8048 | r = fetch_token(tok, src, end, env);\r | |
8049 | if (r < 0) return r;\r | |
8050 | \r | |
8051 | repeat:\r | |
8052 | if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r | |
8053 | if (is_invalid_quantifier_target(*targetp))\r | |
b602265d | 8054 | return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r |
14b0e578 CS |
8055 | \r |
8056 | qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r | |
b602265d | 8057 | (r == TK_INTERVAL ? 1 : 0));\r |
14b0e578 | 8058 | CHECK_NULL_RETURN_MEMERR(qn);\r |
b602265d | 8059 | QUANT_(qn)->greedy = tok->u.repeat.greedy;\r |
14b0e578 CS |
8060 | r = set_quantifier(qn, *targetp, group, env);\r |
8061 | if (r < 0) {\r | |
b602265d DG |
8062 | onig_node_free(qn);\r |
8063 | return r;\r | |
14b0e578 CS |
8064 | }\r |
8065 | \r | |
8066 | if (tok->u.repeat.possessive != 0) {\r | |
b602265d DG |
8067 | Node* en;\r |
8068 | en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
8069 | if (IS_NULL(en)) {\r | |
8070 | onig_node_free(qn);\r | |
8071 | return ONIGERR_MEMORY;\r | |
8072 | }\r | |
8073 | NODE_BODY(en) = qn;\r | |
8074 | qn = en;\r | |
14b0e578 CS |
8075 | }\r |
8076 | \r | |
8077 | if (r == 0) {\r | |
b602265d | 8078 | *targetp = qn;\r |
14b0e578 CS |
8079 | }\r |
8080 | else if (r == 1) {\r | |
b602265d | 8081 | onig_node_free(qn);\r |
14b0e578 CS |
8082 | }\r |
8083 | else if (r == 2) { /* split case: /abc+/ */\r | |
b602265d DG |
8084 | Node *tmp;\r |
8085 | \r | |
8086 | *targetp = node_new_list(*targetp, NULL);\r | |
8087 | if (IS_NULL(*targetp)) {\r | |
8088 | onig_node_free(qn);\r | |
8089 | return ONIGERR_MEMORY;\r | |
8090 | }\r | |
8091 | tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);\r | |
8092 | if (IS_NULL(tmp)) {\r | |
8093 | onig_node_free(qn);\r | |
8094 | return ONIGERR_MEMORY;\r | |
8095 | }\r | |
8096 | targetp = &(NODE_CAR(tmp));\r | |
14b0e578 CS |
8097 | }\r |
8098 | goto re_entry;\r | |
8099 | }\r | |
8100 | }\r | |
8101 | \r | |
8102 | return r;\r | |
8103 | }\r | |
8104 | \r | |
8105 | static int\r | |
b602265d DG |
8106 | parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r |
8107 | ScanEnv* env)\r | |
14b0e578 CS |
8108 | {\r |
8109 | int r;\r | |
8110 | Node *node, **headp;\r | |
8111 | \r | |
8112 | *top = NULL;\r | |
8113 | r = parse_exp(&node, tok, term, src, end, env);\r | |
b602265d DG |
8114 | if (r < 0) {\r |
8115 | onig_node_free(node);\r | |
8116 | return r;\r | |
8117 | }\r | |
14b0e578 CS |
8118 | \r |
8119 | if (r == TK_EOT || r == term || r == TK_ALT) {\r | |
8120 | *top = node;\r | |
8121 | }\r | |
8122 | else {\r | |
8123 | *top = node_new_list(node, NULL);\r | |
a5def177 DG |
8124 | if (IS_NULL(*top)) {\r |
8125 | onig_node_free(node);\r | |
8126 | return ONIGERR_MEMORY;\r | |
8127 | }\r | |
8128 | \r | |
b602265d | 8129 | headp = &(NODE_CDR(*top));\r |
14b0e578 CS |
8130 | while (r != TK_EOT && r != term && r != TK_ALT) {\r |
8131 | r = parse_exp(&node, tok, term, src, end, env);\r | |
b602265d DG |
8132 | if (r < 0) {\r |
8133 | onig_node_free(node);\r | |
8134 | return r;\r | |
8135 | }\r | |
14b0e578 | 8136 | \r |
b602265d DG |
8137 | if (NODE_TYPE(node) == NODE_LIST) {\r |
8138 | *headp = node;\r | |
8139 | while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);\r | |
8140 | headp = &(NODE_CDR(node));\r | |
14b0e578 CS |
8141 | }\r |
8142 | else {\r | |
b602265d DG |
8143 | *headp = node_new_list(node, NULL);\r |
8144 | headp = &(NODE_CDR(*headp));\r | |
14b0e578 CS |
8145 | }\r |
8146 | }\r | |
8147 | }\r | |
8148 | \r | |
8149 | return r;\r | |
8150 | }\r | |
8151 | \r | |
8152 | /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r | |
8153 | static int\r | |
b602265d DG |
8154 | parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r |
8155 | ScanEnv* env)\r | |
14b0e578 CS |
8156 | {\r |
8157 | int r;\r | |
8158 | Node *node, **headp;\r | |
8159 | \r | |
8160 | *top = NULL;\r | |
b602265d DG |
8161 | env->parse_depth++;\r |
8162 | if (env->parse_depth > ParseDepthLimit)\r | |
8163 | return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r | |
a5def177 | 8164 | \r |
14b0e578 CS |
8165 | r = parse_branch(&node, tok, term, src, end, env);\r |
8166 | if (r < 0) {\r | |
8167 | onig_node_free(node);\r | |
8168 | return r;\r | |
8169 | }\r | |
8170 | \r | |
8171 | if (r == term) {\r | |
8172 | *top = node;\r | |
8173 | }\r | |
8174 | else if (r == TK_ALT) {\r | |
8175 | *top = onig_node_new_alt(node, NULL);\r | |
a5def177 DG |
8176 | if (IS_NULL(*top)) {\r |
8177 | onig_node_free(node);\r | |
8178 | return ONIGERR_MEMORY;\r | |
8179 | }\r | |
8180 | \r | |
b602265d | 8181 | headp = &(NODE_CDR(*top));\r |
14b0e578 CS |
8182 | while (r == TK_ALT) {\r |
8183 | r = fetch_token(tok, src, end, env);\r | |
8184 | if (r < 0) return r;\r | |
8185 | r = parse_branch(&node, tok, term, src, end, env);\r | |
b602265d DG |
8186 | if (r < 0) {\r |
8187 | onig_node_free(node);\r | |
8188 | return r;\r | |
8189 | }\r | |
14b0e578 | 8190 | *headp = onig_node_new_alt(node, NULL);\r |
a5def177 DG |
8191 | if (IS_NULL(*headp)) {\r |
8192 | onig_node_free(node);\r | |
8193 | onig_node_free(*top);\r | |
8194 | return ONIGERR_MEMORY;\r | |
8195 | }\r | |
8196 | \r | |
b602265d | 8197 | headp = &(NODE_CDR(*headp));\r |
14b0e578 CS |
8198 | }\r |
8199 | \r | |
8200 | if (tok->type != (enum TokenSyms )term)\r | |
8201 | goto err;\r | |
8202 | }\r | |
8203 | else {\r | |
b602265d | 8204 | onig_node_free(node);\r |
14b0e578 CS |
8205 | err:\r |
8206 | if (term == TK_SUBEXP_CLOSE)\r | |
8207 | return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
8208 | else\r | |
8209 | return ONIGERR_PARSER_BUG;\r | |
8210 | }\r | |
8211 | \r | |
b602265d | 8212 | env->parse_depth--;\r |
14b0e578 CS |
8213 | return r;\r |
8214 | }\r | |
8215 | \r | |
8216 | static int\r | |
8217 | parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r | |
8218 | {\r | |
8219 | int r;\r | |
8220 | OnigToken tok;\r | |
8221 | \r | |
8222 | r = fetch_token(&tok, src, end, env);\r | |
8223 | if (r < 0) return r;\r | |
8224 | r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r | |
8225 | if (r < 0) return r;\r | |
b602265d DG |
8226 | \r |
8227 | return 0;\r | |
8228 | }\r | |
8229 | \r | |
8230 | #ifdef USE_CALL\r | |
8231 | static int\r | |
8232 | make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)\r | |
8233 | {\r | |
8234 | int r;\r | |
8235 | \r | |
8236 | Node* x = node_new_memory(0 /* 0: is not named */);\r | |
8237 | CHECK_NULL_RETURN_MEMERR(x);\r | |
8238 | \r | |
8239 | NODE_BODY(x) = node;\r | |
8240 | ENCLOSURE_(x)->m.regnum = 0;\r | |
8241 | r = scan_env_set_mem_node(env, 0, x);\r | |
8242 | if (r != 0) {\r | |
8243 | onig_node_free(x);\r | |
8244 | return r;\r | |
8245 | }\r | |
8246 | \r | |
8247 | *rnode = x;\r | |
14b0e578 CS |
8248 | return 0;\r |
8249 | }\r | |
b602265d | 8250 | #endif\r |
14b0e578 CS |
8251 | \r |
8252 | extern int\r | |
b602265d DG |
8253 | onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,\r |
8254 | regex_t* reg, ScanEnv* env)\r | |
14b0e578 CS |
8255 | {\r |
8256 | int r;\r | |
8257 | UChar* p;\r | |
b602265d DG |
8258 | #ifdef USE_CALLOUT\r |
8259 | RegexExt* ext;\r | |
8260 | #endif\r | |
14b0e578 | 8261 | \r |
14b0e578 | 8262 | names_clear(reg);\r |
14b0e578 CS |
8263 | \r |
8264 | scan_env_clear(env);\r | |
b602265d | 8265 | env->options = reg->options;\r |
14b0e578 CS |
8266 | env->case_fold_flag = reg->case_fold_flag;\r |
8267 | env->enc = reg->enc;\r | |
8268 | env->syntax = reg->syntax;\r | |
8269 | env->pattern = (UChar* )pattern;\r | |
8270 | env->pattern_end = (UChar* )end;\r | |
8271 | env->reg = reg;\r | |
8272 | \r | |
8273 | *root = NULL;\r | |
b602265d DG |
8274 | \r |
8275 | if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))\r | |
8276 | return ONIGERR_INVALID_WIDE_CHAR_VALUE;\r | |
8277 | \r | |
14b0e578 CS |
8278 | p = (UChar* )pattern;\r |
8279 | r = parse_regexp(root, &p, (UChar* )end, env);\r | |
b602265d DG |
8280 | \r |
8281 | #ifdef USE_CALL\r | |
8282 | if (r != 0) return r;\r | |
8283 | \r | |
8284 | if (env->has_call_zero != 0) {\r | |
8285 | Node* zero_node;\r | |
8286 | r = make_call_zero_body(*root, env, &zero_node);\r | |
8287 | if (r != 0) return r;\r | |
8288 | \r | |
8289 | *root = zero_node;\r | |
8290 | }\r | |
8291 | #endif\r | |
8292 | \r | |
14b0e578 | 8293 | reg->num_mem = env->num_mem;\r |
b602265d DG |
8294 | \r |
8295 | #ifdef USE_CALLOUT\r | |
8296 | ext = REG_EXTP(reg);\r | |
8297 | if (IS_NOT_NULL(ext) && ext->callout_num > 0) {\r | |
8298 | r = setup_ext_callout_list_values(reg);\r | |
8299 | }\r | |
8300 | #endif\r | |
8301 | \r | |
14b0e578 CS |
8302 | return r;\r |
8303 | }\r | |
8304 | \r | |
8305 | extern void\r | |
8306 | onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r | |
b602265d | 8307 | UChar* arg, UChar* arg_end)\r |
14b0e578 CS |
8308 | {\r |
8309 | env->error = arg;\r | |
8310 | env->error_end = arg_end;\r | |
8311 | }\r |