]>
Commit | Line | Data |
---|---|---|
14b0e578 CS |
1 | /**********************************************************************\r |
2 | regparse.c - Oniguruma (regular expression library)\r | |
3 | **********************************************************************/\r | |
4 | /*-\r | |
b602265d | 5 | * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r |
14b0e578 CS |
6 | * All rights reserved.\r |
7 | *\r | |
14b0e578 CS |
8 | * Redistribution and use in source and binary forms, with or without\r |
9 | * modification, are permitted provided that the following conditions\r | |
10 | * are met:\r | |
11 | * 1. Redistributions of source code must retain the above copyright\r | |
12 | * notice, this list of conditions and the following disclaimer.\r | |
13 | * 2. Redistributions in binary form must reproduce the above copyright\r | |
14 | * notice, this list of conditions and the following disclaimer in the\r | |
15 | * documentation and/or other materials provided with the distribution.\r | |
16 | *\r | |
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r | |
27 | * SUCH DAMAGE.\r | |
28 | */\r | |
29 | \r | |
30 | #include "regparse.h"\r | |
31 | #include "st.h"\r | |
32 | \r | |
b602265d DG |
33 | #ifdef DEBUG_NODE_FREE\r |
34 | #include <stdio.h>\r | |
35 | #endif\r | |
36 | \r | |
37 | #define INIT_TAG_NAMES_ALLOC_NUM 5\r | |
38 | \r | |
14b0e578 CS |
39 | #define WARN_BUFSIZE 256\r |
40 | \r | |
41 | #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
42 | \r | |
b602265d DG |
43 | #define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \\r |
44 | ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)\r | |
45 | #define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \\r | |
46 | ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')\r | |
47 | \r | |
48 | \r | |
49 | OnigSyntaxType OnigSyntaxOniguruma = {\r | |
50 | (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r | |
51 | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r | |
52 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r | |
53 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r | |
54 | ONIG_SYN_OP_ESC_C_CONTROL )\r | |
55 | & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r | |
56 | , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r | |
57 | ONIG_SYN_OP2_OPTION_RUBY |\r | |
58 | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r | |
59 | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r | |
60 | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r | |
61 | ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |\r | |
62 | ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |\r | |
63 | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r | |
64 | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r | |
65 | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |\r | |
66 | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r | |
67 | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r | |
68 | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r | |
69 | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r | |
70 | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r | |
71 | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r | |
72 | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r | |
73 | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r | |
74 | , ( SYN_GNU_REGEX_BV | \r | |
75 | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r | |
76 | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r | |
77 | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r | |
78 | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r | |
79 | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r | |
80 | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r | |
81 | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r | |
82 | , ONIG_OPTION_NONE\r | |
83 | ,\r | |
84 | {\r | |
85 | (OnigCodePoint )'\\' /* esc */\r | |
86 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r | |
87 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r | |
88 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r | |
89 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r | |
90 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r | |
91 | }\r | |
92 | };\r | |
14b0e578 CS |
93 | \r |
94 | OnigSyntaxType OnigSyntaxRuby = {\r | |
95 | (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r | |
96 | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r | |
b602265d DG |
97 | ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r |
98 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r | |
14b0e578 CS |
99 | ONIG_SYN_OP_ESC_C_CONTROL )\r |
100 | & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r | |
101 | , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r | |
102 | ONIG_SYN_OP2_OPTION_RUBY |\r | |
103 | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r | |
b602265d DG |
104 | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r |
105 | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r | |
106 | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r | |
107 | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r | |
108 | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r | |
14b0e578 CS |
109 | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r |
110 | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r | |
111 | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r | |
112 | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r | |
113 | ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r | |
114 | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r | |
b602265d | 115 | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r |
14b0e578 CS |
116 | , ( SYN_GNU_REGEX_BV | \r |
117 | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r | |
118 | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r | |
119 | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r | |
120 | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r | |
121 | ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r | |
122 | ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r | |
123 | ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r | |
124 | , ONIG_OPTION_NONE\r | |
125 | ,\r | |
126 | {\r | |
127 | (OnigCodePoint )'\\' /* esc */\r | |
128 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r | |
129 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r | |
130 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r | |
131 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r | |
132 | , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r | |
133 | }\r | |
134 | };\r | |
135 | \r | |
b602265d | 136 | OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;\r |
14b0e578 CS |
137 | \r |
138 | extern void onig_null_warn(const char* s ARG_UNUSED) { }\r | |
139 | \r | |
140 | #ifdef DEFAULT_WARN_FUNCTION\r | |
141 | static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r | |
142 | #else\r | |
143 | static OnigWarnFunc onig_warn = onig_null_warn;\r | |
144 | #endif\r | |
145 | \r | |
146 | #ifdef DEFAULT_VERB_WARN_FUNCTION\r | |
147 | static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r | |
148 | #else\r | |
149 | static OnigWarnFunc onig_verb_warn = onig_null_warn;\r | |
150 | #endif\r | |
151 | \r | |
152 | extern void onig_set_warn_func(OnigWarnFunc f)\r | |
153 | {\r | |
154 | onig_warn = f;\r | |
155 | }\r | |
156 | \r | |
157 | extern void onig_set_verb_warn_func(OnigWarnFunc f)\r | |
158 | {\r | |
159 | onig_verb_warn = f;\r | |
160 | }\r | |
161 | \r | |
b602265d DG |
162 | extern void\r |
163 | onig_warning(const char* s)\r | |
164 | {\r | |
165 | if (onig_warn == onig_null_warn) return ;\r | |
166 | \r | |
167 | (*onig_warn)(s);\r | |
168 | }\r | |
169 | \r | |
170 | #define DEFAULT_MAX_CAPTURE_NUM 32767\r | |
171 | \r | |
172 | static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;\r | |
173 | \r | |
174 | extern int\r | |
175 | onig_set_capture_num_limit(int num)\r | |
176 | {\r | |
177 | if (num < 0) return -1;\r | |
178 | \r | |
179 | MaxCaptureNum = num;\r | |
180 | return 0;\r | |
181 | }\r | |
182 | \r | |
183 | static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r | |
184 | \r | |
185 | extern unsigned int\r | |
186 | onig_get_parse_depth_limit(void)\r | |
187 | {\r | |
188 | return ParseDepthLimit;\r | |
189 | }\r | |
190 | \r | |
191 | extern int\r | |
192 | onig_set_parse_depth_limit(unsigned int depth)\r | |
193 | {\r | |
194 | if (depth == 0)\r | |
195 | ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r | |
196 | else\r | |
197 | ParseDepthLimit = depth;\r | |
198 | return 0;\r | |
199 | }\r | |
200 | \r | |
201 | static int\r | |
202 | positive_int_multiply(int x, int y)\r | |
203 | {\r | |
204 | if (x == 0 || y == 0) return 0;\r | |
205 | \r | |
206 | if (x < INT_MAX / y)\r | |
207 | return x * y;\r | |
208 | else\r | |
209 | return -1;\r | |
210 | }\r | |
211 | \r | |
14b0e578 CS |
212 | static void\r |
213 | bbuf_free(BBuf* bbuf)\r | |
214 | {\r | |
215 | if (IS_NOT_NULL(bbuf)) {\r | |
216 | if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r | |
217 | xfree(bbuf);\r | |
218 | }\r | |
219 | }\r | |
220 | \r | |
221 | static int\r | |
222 | bbuf_clone(BBuf** rto, BBuf* from)\r | |
223 | {\r | |
224 | int r;\r | |
225 | BBuf *to;\r | |
226 | \r | |
227 | *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r | |
228 | CHECK_NULL_RETURN_MEMERR(to);\r | |
b602265d DG |
229 | r = BB_INIT(to, from->alloc);\r |
230 | if (r != 0) {\r | |
231 | xfree(to->p);\r | |
232 | *rto = 0;\r | |
233 | return r;\r | |
234 | }\r | |
14b0e578 CS |
235 | to->used = from->used;\r |
236 | xmemcpy(to->p, from->p, from->used);\r | |
237 | return 0;\r | |
238 | }\r | |
239 | \r | |
b602265d DG |
240 | static int backref_rel_to_abs(int rel_no, ScanEnv* env)\r |
241 | {\r | |
242 | if (rel_no > 0) {\r | |
243 | return env->num_mem + rel_no;\r | |
244 | }\r | |
245 | else {\r | |
246 | return env->num_mem + 1 + rel_no;\r | |
247 | }\r | |
248 | }\r | |
249 | \r | |
250 | #define OPTION_ON(v,f) ((v) |= (f))\r | |
251 | #define OPTION_OFF(v,f) ((v) &= ~(f))\r | |
14b0e578 | 252 | \r |
b602265d | 253 | #define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r |
14b0e578 CS |
254 | \r |
255 | #define MBCODE_START_POS(enc) \\r | |
256 | (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r | |
257 | \r | |
258 | #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r | |
259 | add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r | |
260 | \r | |
261 | #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r | |
262 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r | |
263 | r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r | |
b602265d | 264 | if (r != 0) return r;\\r |
14b0e578 CS |
265 | }\\r |
266 | } while (0)\r | |
267 | \r | |
268 | \r | |
269 | #define BITSET_IS_EMPTY(bs,empty) do {\\r | |
270 | int i;\\r | |
271 | empty = 1;\\r | |
272 | for (i = 0; i < (int )BITSET_SIZE; i++) {\\r | |
273 | if ((bs)[i] != 0) {\\r | |
274 | empty = 0; break;\\r | |
275 | }\\r | |
276 | }\\r | |
277 | } while (0)\r | |
278 | \r | |
279 | static void\r | |
280 | bitset_set_range(BitSetRef bs, int from, int to)\r | |
281 | {\r | |
282 | int i;\r | |
283 | for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r | |
284 | BITSET_SET_BIT(bs, i);\r | |
285 | }\r | |
286 | }\r | |
287 | \r | |
288 | #if 0\r | |
289 | static void\r | |
290 | bitset_set_all(BitSetRef bs)\r | |
291 | {\r | |
292 | int i;\r | |
293 | for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r | |
294 | }\r | |
295 | #endif\r | |
296 | \r | |
297 | static void\r | |
298 | bitset_invert(BitSetRef bs)\r | |
299 | {\r | |
300 | int i;\r | |
301 | for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r | |
302 | }\r | |
303 | \r | |
304 | static void\r | |
305 | bitset_invert_to(BitSetRef from, BitSetRef to)\r | |
306 | {\r | |
307 | int i;\r | |
308 | for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r | |
309 | }\r | |
310 | \r | |
311 | static void\r | |
312 | bitset_and(BitSetRef dest, BitSetRef bs)\r | |
313 | {\r | |
314 | int i;\r | |
315 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r | |
316 | }\r | |
317 | \r | |
318 | static void\r | |
319 | bitset_or(BitSetRef dest, BitSetRef bs)\r | |
320 | {\r | |
321 | int i;\r | |
322 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r | |
323 | }\r | |
324 | \r | |
325 | static void\r | |
326 | bitset_copy(BitSetRef dest, BitSetRef bs)\r | |
327 | {\r | |
328 | int i;\r | |
329 | for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r | |
330 | }\r | |
331 | \r | |
332 | extern int\r | |
333 | onig_strncmp(const UChar* s1, const UChar* s2, int n)\r | |
334 | {\r | |
335 | int x;\r | |
336 | \r | |
337 | while (n-- > 0) {\r | |
338 | x = *s2++ - *s1++;\r | |
339 | if (x) return x;\r | |
340 | }\r | |
341 | return 0;\r | |
342 | }\r | |
343 | \r | |
344 | extern void\r | |
345 | onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r | |
346 | {\r | |
b602265d | 347 | int len = (int )(end - src);\r |
14b0e578 CS |
348 | if (len > 0) {\r |
349 | xmemcpy(dest, src, len);\r | |
350 | dest[len] = (UChar )0;\r | |
351 | }\r | |
352 | }\r | |
353 | \r | |
b602265d DG |
354 | static int\r |
355 | save_entry(ScanEnv* env, enum SaveType type, int* id)\r | |
14b0e578 | 356 | {\r |
b602265d | 357 | int nid = env->save_num;\r |
14b0e578 | 358 | \r |
b602265d DG |
359 | #if 0\r |
360 | if (IS_NULL(env->saves)) {\r | |
361 | int n = 10;\r | |
362 | env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);\r | |
363 | CHECK_NULL_RETURN_MEMERR(env->saves);\r | |
364 | env->save_alloc_num = n;\r | |
365 | }\r | |
366 | else if (env->save_alloc_num <= nid) {\r | |
367 | int n = env->save_alloc_num * 2;\r | |
368 | SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);\r | |
369 | CHECK_NULL_RETURN_MEMERR(p);\r | |
370 | env->saves = p;\r | |
371 | env->save_alloc_num = n;\r | |
372 | }\r | |
14b0e578 | 373 | \r |
b602265d DG |
374 | env->saves[nid].type = type;\r |
375 | #endif\r | |
14b0e578 | 376 | \r |
b602265d DG |
377 | env->save_num++;\r |
378 | *id = nid;\r | |
379 | return 0;\r | |
14b0e578 | 380 | }\r |
14b0e578 CS |
381 | \r |
382 | /* scan pattern methods */\r | |
383 | #define PEND_VALUE 0\r | |
384 | \r | |
385 | #define PFETCH_READY UChar* pfetch_prev\r | |
386 | #define PEND (p < end ? 0 : 1)\r | |
387 | #define PUNFETCH p = pfetch_prev\r | |
388 | #define PINC do { \\r | |
389 | pfetch_prev = p; \\r | |
390 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
391 | } while (0)\r | |
392 | #define PFETCH(c) do { \\r | |
393 | c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r | |
394 | pfetch_prev = p; \\r | |
395 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
396 | } while (0)\r | |
397 | \r | |
398 | #define PINC_S do { \\r | |
399 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
400 | } while (0)\r | |
401 | #define PFETCH_S(c) do { \\r | |
402 | c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r | |
403 | p += ONIGENC_MBC_ENC_LEN(enc, p); \\r | |
404 | } while (0)\r | |
405 | \r | |
406 | #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r | |
407 | #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r | |
408 | \r | |
409 | static UChar*\r | |
410 | strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r | |
b602265d | 411 | int capa, int oldCapa)\r |
14b0e578 CS |
412 | {\r |
413 | UChar* r;\r | |
414 | \r | |
415 | if (dest)\r | |
416 | r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r | |
417 | else\r | |
418 | r = (UChar* )xmalloc(capa + 1);\r | |
419 | \r | |
420 | CHECK_NULL_RETURN(r);\r | |
421 | onig_strcpy(r + (dest_end - dest), src, src_end);\r | |
422 | return r;\r | |
423 | }\r | |
424 | \r | |
425 | /* dest on static area */\r | |
426 | static UChar*\r | |
427 | strcat_capa_from_static(UChar* dest, UChar* dest_end,\r | |
b602265d | 428 | const UChar* src, const UChar* src_end, int capa)\r |
14b0e578 CS |
429 | {\r |
430 | UChar* r;\r | |
431 | \r | |
432 | r = (UChar* )xmalloc(capa + 1);\r | |
433 | CHECK_NULL_RETURN(r);\r | |
434 | onig_strcpy(r, dest, dest_end);\r | |
435 | onig_strcpy(r + (dest_end - dest), src, src_end);\r | |
436 | return r;\r | |
437 | }\r | |
438 | \r | |
439 | \r | |
440 | #ifdef USE_ST_LIBRARY\r | |
441 | \r | |
442 | typedef struct {\r | |
443 | UChar* s;\r | |
444 | UChar* end;\r | |
445 | } st_str_end_key;\r | |
446 | \r | |
447 | static int\r | |
448 | str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r | |
449 | {\r | |
450 | UChar *p, *q;\r | |
451 | int c;\r | |
452 | \r | |
453 | if ((x->end - x->s) != (y->end - y->s))\r | |
454 | return 1;\r | |
455 | \r | |
456 | p = x->s;\r | |
457 | q = y->s;\r | |
458 | while (p < x->end) {\r | |
459 | c = (int )*p - (int )*q;\r | |
460 | if (c != 0) return c;\r | |
461 | \r | |
462 | p++; q++;\r | |
463 | }\r | |
464 | \r | |
465 | return 0;\r | |
466 | }\r | |
467 | \r | |
468 | static int\r | |
469 | str_end_hash(st_str_end_key* x)\r | |
470 | {\r | |
471 | UChar *p;\r | |
472 | int val = 0;\r | |
473 | \r | |
474 | p = x->s;\r | |
475 | while (p < x->end) {\r | |
476 | val = val * 997 + (int )*p++;\r | |
477 | }\r | |
478 | \r | |
479 | return val + (val >> 5);\r | |
480 | }\r | |
481 | \r | |
482 | extern hash_table_type*\r | |
483 | onig_st_init_strend_table_with_size(int size)\r | |
484 | {\r | |
485 | static struct st_hash_type hashType = {\r | |
486 | str_end_cmp,\r | |
487 | str_end_hash,\r | |
488 | };\r | |
489 | \r | |
490 | return (hash_table_type* )\r | |
491 | onig_st_init_table_with_size(&hashType, size);\r | |
492 | }\r | |
493 | \r | |
494 | extern int\r | |
495 | onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r | |
b602265d | 496 | const UChar* end_key, hash_data_type *value)\r |
14b0e578 CS |
497 | {\r |
498 | st_str_end_key key;\r | |
499 | \r | |
500 | key.s = (UChar* )str_key;\r | |
501 | key.end = (UChar* )end_key;\r | |
502 | \r | |
b602265d | 503 | return onig_st_lookup(table, (st_data_t )(&key), value);\r |
14b0e578 CS |
504 | }\r |
505 | \r | |
506 | extern int\r | |
507 | onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r | |
b602265d | 508 | const UChar* end_key, hash_data_type value)\r |
14b0e578 CS |
509 | {\r |
510 | st_str_end_key* key;\r | |
511 | int result;\r | |
512 | \r | |
513 | key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r | |
b0c2b797 | 514 | CHECK_NULL_RETURN_MEMERR(key);\r |
b602265d | 515 | \r |
14b0e578 CS |
516 | key->s = (UChar* )str_key;\r |
517 | key->end = (UChar* )end_key;\r | |
b602265d | 518 | result = onig_st_insert(table, (st_data_t )key, value);\r |
14b0e578 CS |
519 | if (result) {\r |
520 | xfree(key);\r | |
521 | }\r | |
522 | return result;\r | |
523 | }\r | |
524 | \r | |
14b0e578 | 525 | \r |
b602265d DG |
526 | typedef struct {\r |
527 | OnigEncoding enc;\r | |
528 | int type; /* callout type: single or not */\r | |
529 | UChar* s;\r | |
530 | UChar* end;\r | |
531 | } st_callout_name_key;\r | |
532 | \r | |
533 | static int\r | |
534 | callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)\r | |
535 | {\r | |
536 | UChar *p, *q;\r | |
537 | int c;\r | |
538 | \r | |
539 | if (x->enc != y->enc) return 1;\r | |
540 | if (x->type != y->type) return 1;\r | |
541 | if ((x->end - x->s) != (y->end - y->s))\r | |
542 | return 1;\r | |
543 | \r | |
544 | p = x->s;\r | |
545 | q = y->s;\r | |
546 | while (p < x->end) {\r | |
547 | c = (int )*p - (int )*q;\r | |
548 | if (c != 0) return c;\r | |
549 | \r | |
550 | p++; q++;\r | |
551 | }\r | |
552 | \r | |
553 | return 0;\r | |
554 | }\r | |
555 | \r | |
556 | static int\r | |
557 | callout_name_table_hash(st_callout_name_key* x)\r | |
558 | {\r | |
559 | UChar *p;\r | |
560 | int val = 0;\r | |
561 | \r | |
562 | p = x->s;\r | |
563 | while (p < x->end) {\r | |
564 | val = val * 997 + (int )*p++;\r | |
565 | }\r | |
566 | \r | |
567 | /* use intptr_t for escape warning in Windows */\r | |
568 | return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;\r | |
569 | }\r | |
570 | \r | |
571 | extern hash_table_type*\r | |
572 | onig_st_init_callout_name_table_with_size(int size)\r | |
573 | {\r | |
574 | static struct st_hash_type hashType = {\r | |
575 | callout_name_table_cmp,\r | |
576 | callout_name_table_hash,\r | |
577 | };\r | |
578 | \r | |
579 | return (hash_table_type* )\r | |
580 | onig_st_init_table_with_size(&hashType, size);\r | |
581 | }\r | |
582 | \r | |
583 | extern int\r | |
584 | onig_st_lookup_callout_name_table(hash_table_type* table,\r | |
585 | OnigEncoding enc,\r | |
586 | int type,\r | |
587 | const UChar* str_key,\r | |
588 | const UChar* end_key,\r | |
589 | hash_data_type *value)\r | |
590 | {\r | |
591 | st_callout_name_key key;\r | |
592 | \r | |
593 | key.enc = enc;\r | |
594 | key.type = type;\r | |
595 | key.s = (UChar* )str_key;\r | |
596 | key.end = (UChar* )end_key;\r | |
597 | \r | |
598 | return onig_st_lookup(table, (st_data_t )(&key), value);\r | |
599 | }\r | |
600 | \r | |
601 | static int\r | |
602 | st_insert_callout_name_table(hash_table_type* table,\r | |
603 | OnigEncoding enc, int type,\r | |
604 | UChar* str_key, UChar* end_key,\r | |
605 | hash_data_type value)\r | |
606 | {\r | |
607 | st_callout_name_key* key;\r | |
608 | int result;\r | |
609 | \r | |
610 | key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));\r | |
611 | CHECK_NULL_RETURN_MEMERR(key);\r | |
612 | \r | |
613 | /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */\r | |
614 | key->enc = enc;\r | |
615 | key->type = type;\r | |
616 | key->s = str_key;\r | |
617 | key->end = end_key;\r | |
618 | result = onig_st_insert(table, (st_data_t )key, value);\r | |
619 | if (result) {\r | |
620 | xfree(key);\r | |
621 | }\r | |
622 | return result;\r | |
623 | }\r | |
624 | \r | |
625 | #endif /* USE_ST_LIBRARY */\r | |
14b0e578 | 626 | \r |
14b0e578 CS |
627 | \r |
628 | #define INIT_NAME_BACKREFS_ALLOC_NUM 8\r | |
629 | \r | |
630 | typedef struct {\r | |
631 | UChar* name;\r | |
632 | int name_len; /* byte length */\r | |
633 | int back_num; /* number of backrefs */\r | |
634 | int back_alloc;\r | |
635 | int back_ref1;\r | |
636 | int* back_refs;\r | |
637 | } NameEntry;\r | |
638 | \r | |
639 | #ifdef USE_ST_LIBRARY\r | |
640 | \r | |
b602265d DG |
641 | #define INIT_NAMES_ALLOC_NUM 5\r |
642 | \r | |
14b0e578 CS |
643 | typedef st_table NameTable;\r |
644 | typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r | |
645 | \r | |
646 | #define NAMEBUF_SIZE 24\r | |
647 | #define NAMEBUF_SIZE_1 25\r | |
648 | \r | |
649 | #ifdef ONIG_DEBUG\r | |
650 | static int\r | |
651 | i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r | |
652 | {\r | |
653 | int i;\r | |
654 | FILE* fp = (FILE* )arg;\r | |
655 | \r | |
656 | fprintf(fp, "%s: ", e->name);\r | |
657 | if (e->back_num == 0)\r | |
658 | fputs("-", fp);\r | |
659 | else if (e->back_num == 1)\r | |
660 | fprintf(fp, "%d", e->back_ref1);\r | |
661 | else {\r | |
662 | for (i = 0; i < e->back_num; i++) {\r | |
663 | if (i > 0) fprintf(fp, ", ");\r | |
664 | fprintf(fp, "%d", e->back_refs[i]);\r | |
665 | }\r | |
666 | }\r | |
667 | fputs("\n", fp);\r | |
668 | return ST_CONTINUE;\r | |
669 | }\r | |
670 | \r | |
671 | extern int\r | |
672 | onig_print_names(FILE* fp, regex_t* reg)\r | |
673 | {\r | |
674 | NameTable* t = (NameTable* )reg->name_table;\r | |
675 | \r | |
676 | if (IS_NOT_NULL(t)) {\r | |
677 | fprintf(fp, "name table\n");\r | |
678 | onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r | |
679 | fputs("\n", fp);\r | |
680 | }\r | |
681 | return 0;\r | |
682 | }\r | |
683 | #endif /* ONIG_DEBUG */\r | |
684 | \r | |
685 | static int\r | |
686 | i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r | |
687 | {\r | |
688 | xfree(e->name);\r | |
689 | if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r | |
690 | xfree(key);\r | |
691 | xfree(e);\r | |
692 | return ST_DELETE;\r | |
693 | }\r | |
694 | \r | |
695 | static int\r | |
696 | names_clear(regex_t* reg)\r | |
697 | {\r | |
698 | NameTable* t = (NameTable* )reg->name_table;\r | |
699 | \r | |
700 | if (IS_NOT_NULL(t)) {\r | |
701 | onig_st_foreach(t, i_free_name_entry, 0);\r | |
702 | }\r | |
703 | return 0;\r | |
704 | }\r | |
705 | \r | |
706 | extern int\r | |
707 | onig_names_free(regex_t* reg)\r | |
708 | {\r | |
709 | int r;\r | |
710 | NameTable* t;\r | |
711 | \r | |
712 | r = names_clear(reg);\r | |
b602265d | 713 | if (r != 0) return r;\r |
14b0e578 CS |
714 | \r |
715 | t = (NameTable* )reg->name_table;\r | |
716 | if (IS_NOT_NULL(t)) onig_st_free_table(t);\r | |
717 | reg->name_table = (void* )NULL;\r | |
718 | return 0;\r | |
719 | }\r | |
720 | \r | |
721 | static NameEntry*\r | |
722 | name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r | |
723 | {\r | |
724 | NameEntry* e;\r | |
725 | NameTable* t = (NameTable* )reg->name_table;\r | |
726 | \r | |
727 | e = (NameEntry* )NULL;\r | |
728 | if (IS_NOT_NULL(t)) {\r | |
729 | onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r | |
730 | }\r | |
731 | return e;\r | |
732 | }\r | |
733 | \r | |
734 | typedef struct {\r | |
735 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r | |
736 | regex_t* reg;\r | |
737 | void* arg;\r | |
738 | int ret;\r | |
739 | OnigEncoding enc;\r | |
740 | } INamesArg;\r | |
741 | \r | |
742 | static int\r | |
743 | i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r | |
744 | {\r | |
745 | int r = (*(arg->func))(e->name,\r | |
746 | e->name + e->name_len,\r | |
747 | e->back_num,\r | |
b602265d DG |
748 | (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r |
749 | arg->reg, arg->arg);\r | |
14b0e578 CS |
750 | if (r != 0) {\r |
751 | arg->ret = r;\r | |
752 | return ST_STOP;\r | |
753 | }\r | |
754 | return ST_CONTINUE;\r | |
755 | }\r | |
756 | \r | |
757 | extern int\r | |
758 | onig_foreach_name(regex_t* reg,\r | |
759 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r | |
760 | {\r | |
761 | INamesArg narg;\r | |
762 | NameTable* t = (NameTable* )reg->name_table;\r | |
763 | \r | |
764 | narg.ret = 0;\r | |
765 | if (IS_NOT_NULL(t)) {\r | |
766 | narg.func = func;\r | |
767 | narg.reg = reg;\r | |
768 | narg.arg = arg;\r | |
769 | narg.enc = reg->enc; /* should be pattern encoding. */\r | |
b602265d | 770 | onig_st_foreach(t, i_names, (HashDataType )&narg);\r |
14b0e578 CS |
771 | }\r |
772 | return narg.ret;\r | |
773 | }\r | |
774 | \r | |
775 | static int\r | |
776 | i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r | |
777 | {\r | |
778 | int i;\r | |
779 | \r | |
780 | if (e->back_num > 1) {\r | |
781 | for (i = 0; i < e->back_num; i++) {\r | |
782 | e->back_refs[i] = map[e->back_refs[i]].new_val;\r | |
783 | }\r | |
784 | }\r | |
785 | else if (e->back_num == 1) {\r | |
786 | e->back_ref1 = map[e->back_ref1].new_val;\r | |
787 | }\r | |
788 | \r | |
789 | return ST_CONTINUE;\r | |
790 | }\r | |
791 | \r | |
792 | extern int\r | |
793 | onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r | |
794 | {\r | |
795 | NameTable* t = (NameTable* )reg->name_table;\r | |
796 | \r | |
797 | if (IS_NOT_NULL(t)) {\r | |
b602265d | 798 | onig_st_foreach(t, i_renumber_name, (HashDataType )map);\r |
14b0e578 CS |
799 | }\r |
800 | return 0;\r | |
801 | }\r | |
802 | \r | |
803 | \r | |
804 | extern int\r | |
805 | onig_number_of_names(regex_t* reg)\r | |
806 | {\r | |
807 | NameTable* t = (NameTable* )reg->name_table;\r | |
808 | \r | |
809 | if (IS_NOT_NULL(t))\r | |
810 | return t->num_entries;\r | |
811 | else\r | |
812 | return 0;\r | |
813 | }\r | |
814 | \r | |
815 | #else /* USE_ST_LIBRARY */\r | |
816 | \r | |
817 | #define INIT_NAMES_ALLOC_NUM 8\r | |
818 | \r | |
819 | typedef struct {\r | |
820 | NameEntry* e;\r | |
821 | int num;\r | |
822 | int alloc;\r | |
823 | } NameTable;\r | |
824 | \r | |
825 | #ifdef ONIG_DEBUG\r | |
826 | extern int\r | |
827 | onig_print_names(FILE* fp, regex_t* reg)\r | |
828 | {\r | |
829 | int i, j;\r | |
830 | NameEntry* e;\r | |
831 | NameTable* t = (NameTable* )reg->name_table;\r | |
832 | \r | |
833 | if (IS_NOT_NULL(t) && t->num > 0) {\r | |
834 | fprintf(fp, "name table\n");\r | |
835 | for (i = 0; i < t->num; i++) {\r | |
836 | e = &(t->e[i]);\r | |
837 | fprintf(fp, "%s: ", e->name);\r | |
838 | if (e->back_num == 0) {\r | |
b602265d | 839 | fputs("-", fp);\r |
14b0e578 CS |
840 | }\r |
841 | else if (e->back_num == 1) {\r | |
b602265d | 842 | fprintf(fp, "%d", e->back_ref1);\r |
14b0e578 CS |
843 | }\r |
844 | else {\r | |
b602265d DG |
845 | for (j = 0; j < e->back_num; j++) {\r |
846 | if (j > 0) fprintf(fp, ", ");\r | |
847 | fprintf(fp, "%d", e->back_refs[j]);\r | |
848 | }\r | |
14b0e578 CS |
849 | }\r |
850 | fputs("\n", fp);\r | |
851 | }\r | |
852 | fputs("\n", fp);\r | |
853 | }\r | |
854 | return 0;\r | |
855 | }\r | |
856 | #endif\r | |
857 | \r | |
858 | static int\r | |
859 | names_clear(regex_t* reg)\r | |
860 | {\r | |
861 | int i;\r | |
862 | NameEntry* e;\r | |
863 | NameTable* t = (NameTable* )reg->name_table;\r | |
864 | \r | |
865 | if (IS_NOT_NULL(t)) {\r | |
866 | for (i = 0; i < t->num; i++) {\r | |
867 | e = &(t->e[i]);\r | |
868 | if (IS_NOT_NULL(e->name)) {\r | |
b602265d DG |
869 | xfree(e->name);\r |
870 | e->name = NULL;\r | |
871 | e->name_len = 0;\r | |
872 | e->back_num = 0;\r | |
873 | e->back_alloc = 0;\r | |
874 | if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r | |
875 | e->back_refs = (int* )NULL;\r | |
14b0e578 CS |
876 | }\r |
877 | }\r | |
878 | if (IS_NOT_NULL(t->e)) {\r | |
879 | xfree(t->e);\r | |
880 | t->e = NULL;\r | |
881 | }\r | |
882 | t->num = 0;\r | |
883 | }\r | |
884 | return 0;\r | |
885 | }\r | |
886 | \r | |
887 | extern int\r | |
888 | onig_names_free(regex_t* reg)\r | |
889 | {\r | |
890 | int r;\r | |
891 | NameTable* t;\r | |
892 | \r | |
893 | r = names_clear(reg);\r | |
b602265d | 894 | if (r != 0) return r;\r |
14b0e578 CS |
895 | \r |
896 | t = (NameTable* )reg->name_table;\r | |
897 | if (IS_NOT_NULL(t)) xfree(t);\r | |
898 | reg->name_table = NULL;\r | |
899 | return 0;\r | |
900 | }\r | |
901 | \r | |
902 | static NameEntry*\r | |
903 | name_find(regex_t* reg, UChar* name, UChar* name_end)\r | |
904 | {\r | |
905 | int i, len;\r | |
906 | NameEntry* e;\r | |
907 | NameTable* t = (NameTable* )reg->name_table;\r | |
908 | \r | |
909 | if (IS_NOT_NULL(t)) {\r | |
910 | len = name_end - name;\r | |
911 | for (i = 0; i < t->num; i++) {\r | |
912 | e = &(t->e[i]);\r | |
913 | if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r | |
b602265d | 914 | return e;\r |
14b0e578 CS |
915 | }\r |
916 | }\r | |
917 | return (NameEntry* )NULL;\r | |
918 | }\r | |
919 | \r | |
920 | extern int\r | |
921 | onig_foreach_name(regex_t* reg,\r | |
922 | int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r | |
923 | {\r | |
924 | int i, r;\r | |
925 | NameEntry* e;\r | |
926 | NameTable* t = (NameTable* )reg->name_table;\r | |
927 | \r | |
928 | if (IS_NOT_NULL(t)) {\r | |
929 | for (i = 0; i < t->num; i++) {\r | |
930 | e = &(t->e[i]);\r | |
931 | r = (*func)(e->name, e->name + e->name_len, e->back_num,\r | |
b602265d DG |
932 | (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r |
933 | reg, arg);\r | |
14b0e578 CS |
934 | if (r != 0) return r;\r |
935 | }\r | |
936 | }\r | |
937 | return 0;\r | |
938 | }\r | |
939 | \r | |
940 | extern int\r | |
941 | onig_number_of_names(regex_t* reg)\r | |
942 | {\r | |
943 | NameTable* t = (NameTable* )reg->name_table;\r | |
944 | \r | |
945 | if (IS_NOT_NULL(t))\r | |
946 | return t->num;\r | |
947 | else\r | |
948 | return 0;\r | |
949 | }\r | |
950 | \r | |
951 | #endif /* else USE_ST_LIBRARY */\r | |
952 | \r | |
953 | static int\r | |
954 | name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r | |
955 | {\r | |
b602265d | 956 | int r;\r |
14b0e578 CS |
957 | int alloc;\r |
958 | NameEntry* e;\r | |
959 | NameTable* t = (NameTable* )reg->name_table;\r | |
960 | \r | |
961 | if (name_end - name <= 0)\r | |
962 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
963 | \r | |
964 | e = name_find(reg, name, name_end);\r | |
965 | if (IS_NULL(e)) {\r | |
966 | #ifdef USE_ST_LIBRARY\r | |
967 | if (IS_NULL(t)) {\r | |
b602265d | 968 | t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);\r |
14b0e578 CS |
969 | reg->name_table = (void* )t;\r |
970 | }\r | |
971 | e = (NameEntry* )xmalloc(sizeof(NameEntry));\r | |
972 | CHECK_NULL_RETURN_MEMERR(e);\r | |
973 | \r | |
b602265d | 974 | e->name = onigenc_strdup(reg->enc, name, name_end);\r |
14b0e578 CS |
975 | if (IS_NULL(e->name)) {\r |
976 | xfree(e); return ONIGERR_MEMORY;\r | |
977 | }\r | |
b602265d DG |
978 | r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r |
979 | (HashDataType )e);\r | |
980 | if (r < 0) return r;\r | |
14b0e578 | 981 | \r |
b602265d | 982 | e->name_len = (int )(name_end - name);\r |
14b0e578 CS |
983 | e->back_num = 0;\r |
984 | e->back_alloc = 0;\r | |
985 | e->back_refs = (int* )NULL;\r | |
986 | \r | |
987 | #else\r | |
988 | \r | |
989 | if (IS_NULL(t)) {\r | |
990 | alloc = INIT_NAMES_ALLOC_NUM;\r | |
991 | t = (NameTable* )xmalloc(sizeof(NameTable));\r | |
992 | CHECK_NULL_RETURN_MEMERR(t);\r | |
993 | t->e = NULL;\r | |
994 | t->alloc = 0;\r | |
995 | t->num = 0;\r | |
996 | \r | |
997 | t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r | |
998 | if (IS_NULL(t->e)) {\r | |
b602265d DG |
999 | xfree(t);\r |
1000 | return ONIGERR_MEMORY;\r | |
14b0e578 CS |
1001 | }\r |
1002 | t->alloc = alloc;\r | |
1003 | reg->name_table = t;\r | |
1004 | goto clear;\r | |
1005 | }\r | |
1006 | else if (t->num == t->alloc) {\r | |
1007 | int i;\r | |
1008 | \r | |
1009 | alloc = t->alloc * 2;\r | |
b602265d | 1010 | t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);\r |
14b0e578 CS |
1011 | CHECK_NULL_RETURN_MEMERR(t->e);\r |
1012 | t->alloc = alloc;\r | |
1013 | \r | |
1014 | clear:\r | |
1015 | for (i = t->num; i < t->alloc; i++) {\r | |
b602265d DG |
1016 | t->e[i].name = NULL;\r |
1017 | t->e[i].name_len = 0;\r | |
1018 | t->e[i].back_num = 0;\r | |
1019 | t->e[i].back_alloc = 0;\r | |
1020 | t->e[i].back_refs = (int* )NULL;\r | |
14b0e578 CS |
1021 | }\r |
1022 | }\r | |
1023 | e = &(t->e[t->num]);\r | |
1024 | t->num++;\r | |
b602265d | 1025 | e->name = onigenc_strdup(reg->enc, name, name_end);\r |
14b0e578 CS |
1026 | if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r |
1027 | e->name_len = name_end - name;\r | |
1028 | #endif\r | |
1029 | }\r | |
1030 | \r | |
1031 | if (e->back_num >= 1 &&\r | |
1032 | ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r | |
1033 | onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r | |
b602265d | 1034 | name, name_end);\r |
14b0e578 CS |
1035 | return ONIGERR_MULTIPLEX_DEFINED_NAME;\r |
1036 | }\r | |
1037 | \r | |
1038 | e->back_num++;\r | |
1039 | if (e->back_num == 1) {\r | |
1040 | e->back_ref1 = backref;\r | |
1041 | }\r | |
1042 | else {\r | |
1043 | if (e->back_num == 2) {\r | |
1044 | alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r | |
1045 | e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r | |
1046 | CHECK_NULL_RETURN_MEMERR(e->back_refs);\r | |
1047 | e->back_alloc = alloc;\r | |
1048 | e->back_refs[0] = e->back_ref1;\r | |
1049 | e->back_refs[1] = backref;\r | |
1050 | }\r | |
1051 | else {\r | |
1052 | if (e->back_num > e->back_alloc) {\r | |
b602265d DG |
1053 | alloc = e->back_alloc * 2;\r |
1054 | e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r | |
1055 | CHECK_NULL_RETURN_MEMERR(e->back_refs);\r | |
1056 | e->back_alloc = alloc;\r | |
14b0e578 CS |
1057 | }\r |
1058 | e->back_refs[e->back_num - 1] = backref;\r | |
1059 | }\r | |
1060 | }\r | |
1061 | \r | |
1062 | return 0;\r | |
1063 | }\r | |
1064 | \r | |
1065 | extern int\r | |
1066 | onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r | |
b602265d | 1067 | const UChar* name_end, int** nums)\r |
14b0e578 CS |
1068 | {\r |
1069 | NameEntry* e = name_find(reg, name, name_end);\r | |
1070 | \r | |
1071 | if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
1072 | \r | |
1073 | switch (e->back_num) {\r | |
1074 | case 0:\r | |
1075 | break;\r | |
1076 | case 1:\r | |
1077 | *nums = &(e->back_ref1);\r | |
1078 | break;\r | |
1079 | default:\r | |
1080 | *nums = e->back_refs;\r | |
1081 | break;\r | |
1082 | }\r | |
1083 | return e->back_num;\r | |
1084 | }\r | |
1085 | \r | |
1086 | extern int\r | |
1087 | onig_name_to_backref_number(regex_t* reg, const UChar* name,\r | |
b602265d | 1088 | const UChar* name_end, OnigRegion *region)\r |
14b0e578 CS |
1089 | {\r |
1090 | int i, n, *nums;\r | |
1091 | \r | |
1092 | n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r | |
1093 | if (n < 0)\r | |
1094 | return n;\r | |
1095 | else if (n == 0)\r | |
1096 | return ONIGERR_PARSER_BUG;\r | |
1097 | else if (n == 1)\r | |
1098 | return nums[0];\r | |
1099 | else {\r | |
1100 | if (IS_NOT_NULL(region)) {\r | |
1101 | for (i = n - 1; i >= 0; i--) {\r | |
b602265d DG |
1102 | if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r |
1103 | return nums[i];\r | |
14b0e578 CS |
1104 | }\r |
1105 | }\r | |
1106 | return nums[n - 1];\r | |
1107 | }\r | |
1108 | }\r | |
1109 | \r | |
14b0e578 CS |
1110 | extern int\r |
1111 | onig_noname_group_capture_is_active(regex_t* reg)\r | |
1112 | {\r | |
1113 | if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r | |
1114 | return 0;\r | |
1115 | \r | |
14b0e578 CS |
1116 | if (onig_number_of_names(reg) > 0 &&\r |
1117 | IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r | |
1118 | !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r | |
1119 | return 0;\r | |
1120 | }\r | |
14b0e578 CS |
1121 | \r |
1122 | return 1;\r | |
1123 | }\r | |
1124 | \r | |
b602265d | 1125 | #ifdef USE_CALLOUT\r |
14b0e578 | 1126 | \r |
b602265d DG |
1127 | typedef struct {\r |
1128 | OnigCalloutType type;\r | |
1129 | int in;\r | |
1130 | OnigCalloutFunc start_func;\r | |
1131 | OnigCalloutFunc end_func;\r | |
1132 | int arg_num;\r | |
1133 | int opt_arg_num;\r | |
1134 | unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
1135 | OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
1136 | UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */\r | |
1137 | } CalloutNameListEntry;\r | |
14b0e578 | 1138 | \r |
b602265d DG |
1139 | typedef struct {\r |
1140 | int n;\r | |
1141 | int alloc;\r | |
1142 | CalloutNameListEntry* v;\r | |
1143 | } CalloutNameListType;\r | |
14b0e578 | 1144 | \r |
b602265d | 1145 | static CalloutNameListType* GlobalCalloutNameList;\r |
14b0e578 CS |
1146 | \r |
1147 | static int\r | |
b602265d | 1148 | make_callout_func_list(CalloutNameListType** rs, int init_size)\r |
14b0e578 | 1149 | {\r |
b602265d DG |
1150 | CalloutNameListType* s;\r |
1151 | CalloutNameListEntry* v;\r | |
14b0e578 | 1152 | \r |
b602265d | 1153 | *rs = 0;\r |
14b0e578 | 1154 | \r |
b602265d DG |
1155 | s = xmalloc(sizeof(*s));\r |
1156 | if (IS_NULL(s)) return ONIGERR_MEMORY;\r | |
14b0e578 | 1157 | \r |
b602265d DG |
1158 | v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);\r |
1159 | if (IS_NULL(v)) {\r | |
1160 | xfree(s);\r | |
1161 | return ONIGERR_MEMORY;\r | |
14b0e578 CS |
1162 | }\r |
1163 | \r | |
b602265d DG |
1164 | s->n = 0;\r |
1165 | s->alloc = init_size;\r | |
1166 | s->v = v;\r | |
14b0e578 | 1167 | \r |
b602265d DG |
1168 | *rs = s;\r |
1169 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1170 | }\r |
1171 | \r | |
b602265d DG |
1172 | static void\r |
1173 | free_callout_func_list(CalloutNameListType* s)\r | |
1174 | {\r | |
1175 | if (IS_NOT_NULL(s)) {\r | |
1176 | if (IS_NOT_NULL(s->v)) {\r | |
1177 | int i, j;\r | |
1178 | \r | |
1179 | for (i = 0; i < s->n; i++) {\r | |
1180 | CalloutNameListEntry* e = s->v + i;\r | |
1181 | for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {\r | |
1182 | if (e->arg_types[j] == ONIG_TYPE_STRING) {\r | |
1183 | UChar* p = e->opt_defaults[j].s.start;\r | |
1184 | if (IS_NOT_NULL(p)) xfree(p);\r | |
1185 | }\r | |
1186 | }\r | |
1187 | }\r | |
1188 | xfree(s->v);\r | |
1189 | }\r | |
1190 | xfree(s);\r | |
1191 | }\r | |
1192 | }\r | |
14b0e578 | 1193 | \r |
b602265d DG |
1194 | static int\r |
1195 | callout_func_list_add(CalloutNameListType* s, int* rid)\r | |
1196 | {\r | |
1197 | if (s->n >= s->alloc) {\r | |
1198 | int new_size = s->alloc * 2;\r | |
1199 | CalloutNameListEntry* nv = (CalloutNameListEntry* )\r | |
1200 | xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);\r | |
1201 | if (IS_NULL(nv)) return ONIGERR_MEMORY;\r | |
14b0e578 | 1202 | \r |
b602265d DG |
1203 | s->alloc = new_size;\r |
1204 | s->v = nv;\r | |
1205 | }\r | |
14b0e578 | 1206 | \r |
b602265d | 1207 | *rid = s->n;\r |
14b0e578 | 1208 | \r |
b602265d DG |
1209 | xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));\r |
1210 | s->n++;\r | |
1211 | return ONIG_NORMAL;\r | |
1212 | }\r | |
14b0e578 | 1213 | \r |
14b0e578 | 1214 | \r |
b602265d DG |
1215 | typedef struct {\r |
1216 | UChar* name;\r | |
1217 | int name_len; /* byte length */\r | |
1218 | int id;\r | |
1219 | } CalloutNameEntry;\r | |
14b0e578 | 1220 | \r |
b602265d DG |
1221 | #ifdef USE_ST_LIBRARY\r |
1222 | typedef st_table CalloutNameTable;\r | |
14b0e578 | 1223 | #else\r |
b602265d DG |
1224 | typedef struct {\r |
1225 | CalloutNameEntry* e;\r | |
1226 | int num;\r | |
1227 | int alloc;\r | |
1228 | } CalloutNameTable;\r | |
14b0e578 | 1229 | #endif\r |
14b0e578 | 1230 | \r |
b602265d DG |
1231 | static CalloutNameTable* GlobalCalloutNameTable;\r |
1232 | static int CalloutNameIDCounter;\r | |
14b0e578 | 1233 | \r |
b602265d | 1234 | #ifdef USE_ST_LIBRARY\r |
14b0e578 | 1235 | \r |
b602265d DG |
1236 | static int\r |
1237 | i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,\r | |
1238 | void* arg ARG_UNUSED)\r | |
1239 | {\r | |
1240 | xfree(e->name);\r | |
1241 | /*xfree(key->s); */ /* is same as e->name */\r | |
1242 | xfree(key);\r | |
1243 | xfree(e);\r | |
1244 | return ST_DELETE;\r | |
1245 | }\r | |
14b0e578 | 1246 | \r |
b602265d DG |
1247 | static int\r |
1248 | callout_name_table_clear(CalloutNameTable* t)\r | |
1249 | {\r | |
1250 | if (IS_NOT_NULL(t)) {\r | |
1251 | onig_st_foreach(t, i_free_callout_name_entry, 0);\r | |
1252 | }\r | |
1253 | return 0;\r | |
1254 | }\r | |
14b0e578 | 1255 | \r |
b602265d DG |
1256 | static int\r |
1257 | global_callout_name_table_free(void)\r | |
1258 | {\r | |
1259 | if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r | |
1260 | int r = callout_name_table_clear(GlobalCalloutNameTable);\r | |
1261 | if (r != 0) return r;\r | |
14b0e578 | 1262 | \r |
b602265d DG |
1263 | onig_st_free_table(GlobalCalloutNameTable);\r |
1264 | GlobalCalloutNameTable = 0;\r | |
1265 | CalloutNameIDCounter = 0;\r | |
14b0e578 CS |
1266 | }\r |
1267 | \r | |
b602265d DG |
1268 | return 0;\r |
1269 | }\r | |
1270 | \r | |
1271 | static CalloutNameEntry*\r | |
1272 | callout_name_find(OnigEncoding enc, int is_not_single,\r | |
1273 | const UChar* name, const UChar* name_end)\r | |
1274 | {\r | |
1275 | int r;\r | |
1276 | CalloutNameEntry* e;\r | |
1277 | CalloutNameTable* t = GlobalCalloutNameTable;\r | |
14b0e578 | 1278 | \r |
b602265d DG |
1279 | e = (CalloutNameEntry* )NULL;\r |
1280 | if (IS_NOT_NULL(t)) {\r | |
1281 | r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r | |
1282 | (HashDataType* )((void* )(&e)));\r | |
1283 | if (r == 0) { /* not found */\r | |
1284 | if (enc != ONIG_ENCODING_ASCII &&\r | |
1285 | ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r | |
1286 | enc = ONIG_ENCODING_ASCII;\r | |
1287 | onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r | |
1288 | (HashDataType* )((void* )(&e)));\r | |
1289 | }\r | |
1290 | }\r | |
14b0e578 | 1291 | }\r |
b602265d DG |
1292 | return e;\r |
1293 | }\r | |
1294 | \r | |
14b0e578 | 1295 | #else\r |
b602265d DG |
1296 | \r |
1297 | static int\r | |
1298 | callout_name_table_clear(CalloutNameTable* t)\r | |
1299 | {\r | |
1300 | int i;\r | |
1301 | CalloutNameEntry* e;\r | |
1302 | \r | |
1303 | if (IS_NOT_NULL(t)) {\r | |
1304 | for (i = 0; i < t->num; i++) {\r | |
1305 | e = &(t->e[i]);\r | |
1306 | if (IS_NOT_NULL(e->name)) {\r | |
1307 | xfree(e->name);\r | |
1308 | e->name = NULL;\r | |
1309 | e->name_len = 0;\r | |
1310 | e->id = 0;\r | |
1311 | e->func = 0;\r | |
1312 | }\r | |
1313 | }\r | |
1314 | if (IS_NOT_NULL(t->e)) {\r | |
1315 | xfree(t->e);\r | |
1316 | t->e = NULL;\r | |
1317 | }\r | |
1318 | t->num = 0;\r | |
1319 | }\r | |
1320 | return 0;\r | |
14b0e578 CS |
1321 | }\r |
1322 | \r | |
b602265d DG |
1323 | static int\r |
1324 | global_callout_name_table_free(void)\r | |
14b0e578 | 1325 | {\r |
b602265d DG |
1326 | if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r |
1327 | int r = callout_name_table_clear(GlobalCalloutNameTable);\r | |
1328 | if (r != 0) return r;\r | |
14b0e578 | 1329 | \r |
b602265d DG |
1330 | xfree(GlobalCalloutNameTable);\r |
1331 | GlobalCalloutNameTable = 0;\r | |
1332 | CalloutNameIDCounter = 0;\r | |
14b0e578 | 1333 | }\r |
14b0e578 CS |
1334 | return 0;\r |
1335 | }\r | |
14b0e578 | 1336 | \r |
b602265d DG |
1337 | static CalloutNameEntry*\r |
1338 | callout_name_find(UChar* name, UChar* name_end)\r | |
14b0e578 | 1339 | {\r |
b602265d DG |
1340 | int i, len;\r |
1341 | CalloutNameEntry* e;\r | |
1342 | CalloutNameTable* t = Calloutnames;\r | |
14b0e578 | 1343 | \r |
b602265d DG |
1344 | if (IS_NOT_NULL(t)) {\r |
1345 | len = name_end - name;\r | |
1346 | for (i = 0; i < t->num; i++) {\r | |
1347 | e = &(t->e[i]);\r | |
1348 | if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r | |
1349 | return e;\r | |
1350 | }\r | |
14b0e578 | 1351 | }\r |
b602265d DG |
1352 | return (CalloutNameEntry* )NULL;\r |
1353 | }\r | |
1354 | \r | |
14b0e578 CS |
1355 | #endif\r |
1356 | \r | |
b602265d DG |
1357 | /* name string must be single byte char string. */\r |
1358 | static int\r | |
1359 | callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,\r | |
1360 | int is_not_single, UChar* name, UChar* name_end)\r | |
1361 | {\r | |
1362 | int r;\r | |
1363 | CalloutNameEntry* e;\r | |
1364 | CalloutNameTable* t = GlobalCalloutNameTable;\r | |
14b0e578 | 1365 | \r |
b602265d DG |
1366 | *rentry = 0;\r |
1367 | if (name_end - name <= 0)\r | |
1368 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
14b0e578 | 1369 | \r |
b602265d DG |
1370 | e = callout_name_find(enc, is_not_single, name, name_end);\r |
1371 | if (IS_NULL(e)) {\r | |
1372 | #ifdef USE_ST_LIBRARY\r | |
1373 | if (IS_NULL(t)) {\r | |
1374 | t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);\r | |
1375 | GlobalCalloutNameTable = t;\r | |
1376 | }\r | |
1377 | e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));\r | |
1378 | CHECK_NULL_RETURN_MEMERR(e);\r | |
1379 | \r | |
1380 | e->name = onigenc_strdup(enc, name, name_end);\r | |
1381 | if (IS_NULL(e->name)) {\r | |
1382 | xfree(e); return ONIGERR_MEMORY;\r | |
1383 | }\r | |
1384 | \r | |
1385 | r = st_insert_callout_name_table(t, enc, is_not_single,\r | |
1386 | e->name, (e->name + (name_end - name)),\r | |
1387 | (HashDataType )e);\r | |
1388 | if (r < 0) return r;\r | |
1389 | \r | |
1390 | #else\r | |
1391 | \r | |
1392 | int alloc;\r | |
1393 | \r | |
1394 | if (IS_NULL(t)) {\r | |
1395 | alloc = INIT_NAMES_ALLOC_NUM;\r | |
1396 | t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));\r | |
1397 | CHECK_NULL_RETURN_MEMERR(t);\r | |
1398 | t->e = NULL;\r | |
1399 | t->alloc = 0;\r | |
1400 | t->num = 0;\r | |
1401 | \r | |
1402 | t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);\r | |
1403 | if (IS_NULL(t->e)) {\r | |
1404 | xfree(t);\r | |
1405 | return ONIGERR_MEMORY;\r | |
1406 | }\r | |
1407 | t->alloc = alloc;\r | |
1408 | GlobalCalloutNameTable = t;\r | |
1409 | goto clear;\r | |
1410 | }\r | |
1411 | else if (t->num == t->alloc) {\r | |
1412 | int i;\r | |
1413 | \r | |
1414 | alloc = t->alloc * 2;\r | |
1415 | t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);\r | |
1416 | CHECK_NULL_RETURN_MEMERR(t->e);\r | |
1417 | t->alloc = alloc;\r | |
1418 | \r | |
1419 | clear:\r | |
1420 | for (i = t->num; i < t->alloc; i++) {\r | |
1421 | t->e[i].name = NULL;\r | |
1422 | t->e[i].name_len = 0;\r | |
1423 | t->e[i].id = 0;\r | |
1424 | }\r | |
1425 | }\r | |
1426 | e = &(t->e[t->num]);\r | |
1427 | t->num++;\r | |
1428 | e->name = onigenc_strdup(enc, name, name_end);\r | |
1429 | if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r | |
1430 | #endif\r | |
1431 | \r | |
1432 | CalloutNameIDCounter++;\r | |
1433 | e->id = CalloutNameIDCounter;\r | |
1434 | e->name_len = (int )(name_end - name);\r | |
1435 | }\r | |
1436 | \r | |
1437 | *rentry = e;\r | |
1438 | return e->id;\r | |
1439 | }\r | |
1440 | \r | |
1441 | static int\r | |
1442 | is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)\r | |
14b0e578 | 1443 | {\r |
b602265d DG |
1444 | UChar* p;\r |
1445 | OnigCodePoint c;\r | |
1446 | \r | |
1447 | if (name >= name_end) return 0;\r | |
1448 | \r | |
1449 | p = name;\r | |
1450 | while (p < name_end) {\r | |
1451 | c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r | |
1452 | if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))\r | |
1453 | return 0;\r | |
1454 | \r | |
1455 | if (p == name) {\r | |
1456 | if (c >= '0' && c <= '9') return 0;\r | |
1457 | }\r | |
1458 | \r | |
1459 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
1460 | }\r | |
1461 | \r | |
1462 | return 1;\r | |
14b0e578 CS |
1463 | }\r |
1464 | \r | |
b602265d DG |
1465 | static int\r |
1466 | is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)\r | |
14b0e578 | 1467 | {\r |
b602265d DG |
1468 | UChar* p;\r |
1469 | OnigCodePoint c;\r | |
14b0e578 | 1470 | \r |
b602265d DG |
1471 | if (name >= name_end) return 0;\r |
1472 | \r | |
1473 | p = name;\r | |
1474 | while (p < name_end) {\r | |
1475 | c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r | |
1476 | if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))\r | |
1477 | return 0;\r | |
1478 | \r | |
1479 | if (p == name) {\r | |
1480 | if (c >= '0' && c <= '9') return 0;\r | |
1481 | }\r | |
1482 | \r | |
1483 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
1484 | }\r | |
1485 | \r | |
1486 | return 1;\r | |
14b0e578 CS |
1487 | }\r |
1488 | \r | |
b602265d DG |
1489 | extern int\r |
1490 | onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,\r | |
1491 | UChar* name, UChar* name_end, int in,\r | |
1492 | OnigCalloutFunc start_func,\r | |
1493 | OnigCalloutFunc end_func,\r | |
1494 | int arg_num, unsigned int arg_types[],\r | |
1495 | int opt_arg_num, OnigValue opt_defaults[])\r | |
14b0e578 | 1496 | {\r |
b602265d DG |
1497 | int r;\r |
1498 | int i;\r | |
1499 | int j;\r | |
1500 | int id;\r | |
1501 | int is_not_single;\r | |
1502 | CalloutNameEntry* e;\r | |
1503 | CalloutNameListEntry* fe;\r | |
14b0e578 | 1504 | \r |
b602265d DG |
1505 | if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)\r |
1506 | return ONIGERR_INVALID_ARGUMENT;\r | |
14b0e578 | 1507 | \r |
b602265d DG |
1508 | if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)\r |
1509 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 1510 | \r |
b602265d DG |
1511 | if (opt_arg_num < 0 || opt_arg_num > arg_num)\r |
1512 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 1513 | \r |
b602265d DG |
1514 | if (start_func == 0 && end_func == 0)\r |
1515 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1516 | \r | |
1517 | if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)\r | |
1518 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1519 | \r | |
1520 | for (i = 0; i < arg_num; i++) {\r | |
1521 | unsigned int t = arg_types[i];\r | |
1522 | if (t == ONIG_TYPE_VOID)\r | |
1523 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1524 | else {\r | |
1525 | if (i >= arg_num - opt_arg_num) {\r | |
1526 | if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&\r | |
1527 | t != ONIG_TYPE_TAG)\r | |
1528 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1529 | }\r | |
1530 | else {\r | |
1531 | if (t != ONIG_TYPE_LONG) {\r | |
1532 | t = t & ~ONIG_TYPE_LONG;\r | |
1533 | if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)\r | |
1534 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
1535 | }\r | |
14b0e578 CS |
1536 | }\r |
1537 | }\r | |
1538 | }\r | |
1539 | \r | |
b602265d DG |
1540 | if (! is_allowed_callout_name(enc, name, name_end)) {\r |
1541 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
14b0e578 | 1542 | }\r |
14b0e578 | 1543 | \r |
b602265d DG |
1544 | is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);\r |
1545 | id = callout_name_entry(&e, enc, is_not_single, name, name_end);\r | |
1546 | if (id < 0) return id;\r | |
14b0e578 | 1547 | \r |
b602265d DG |
1548 | r = ONIG_NORMAL;\r |
1549 | if (IS_NULL(GlobalCalloutNameList)) {\r | |
1550 | r = make_callout_func_list(&GlobalCalloutNameList, 10);\r | |
1551 | if (r != ONIG_NORMAL) return r;\r | |
1552 | }\r | |
14b0e578 | 1553 | \r |
b602265d DG |
1554 | while (id >= GlobalCalloutNameList->n) {\r |
1555 | int rid;\r | |
1556 | r = callout_func_list_add(GlobalCalloutNameList, &rid);\r | |
1557 | if (r != ONIG_NORMAL) return r;\r | |
14b0e578 CS |
1558 | }\r |
1559 | \r | |
b602265d DG |
1560 | fe = GlobalCalloutNameList->v + id;\r |
1561 | fe->type = callout_type;\r | |
1562 | fe->in = in;\r | |
1563 | fe->start_func = start_func;\r | |
1564 | fe->end_func = end_func;\r | |
1565 | fe->arg_num = arg_num;\r | |
1566 | fe->opt_arg_num = opt_arg_num;\r | |
1567 | fe->name = e->name;\r | |
14b0e578 | 1568 | \r |
b602265d DG |
1569 | for (i = 0; i < arg_num; i++) {\r |
1570 | fe->arg_types[i] = arg_types[i];\r | |
1571 | }\r | |
1572 | for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {\r | |
1573 | if (fe->arg_types[i] == ONIG_TYPE_STRING) {\r | |
1574 | OnigValue* val = opt_defaults + j;\r | |
1575 | UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);\r | |
1576 | CHECK_NULL_RETURN_MEMERR(ds);\r | |
14b0e578 | 1577 | \r |
b602265d DG |
1578 | fe->opt_defaults[i].s.start = ds;\r |
1579 | fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);\r | |
1580 | }\r | |
1581 | else {\r | |
1582 | fe->opt_defaults[i] = opt_defaults[j];\r | |
1583 | }\r | |
1584 | }\r | |
1585 | \r | |
1586 | r = id;\r | |
1587 | return r;\r | |
14b0e578 CS |
1588 | }\r |
1589 | \r | |
b602265d DG |
1590 | static int\r |
1591 | get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,\r | |
1592 | UChar* name, UChar* name_end, int* rid)\r | |
14b0e578 | 1593 | {\r |
b602265d DG |
1594 | int r;\r |
1595 | CalloutNameEntry* e;\r | |
14b0e578 | 1596 | \r |
b602265d DG |
1597 | if (! is_allowed_callout_name(enc, name, name_end)) {\r |
1598 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
1599 | }\r | |
1600 | \r | |
1601 | e = callout_name_find(enc, is_not_single, name, name_end);\r | |
1602 | if (IS_NULL(e)) {\r | |
1603 | return ONIGERR_UNDEFINED_CALLOUT_NAME;\r | |
1604 | }\r | |
1605 | \r | |
1606 | r = ONIG_NORMAL;\r | |
1607 | *rid = e->id;\r | |
1608 | \r | |
1609 | return r;\r | |
14b0e578 CS |
1610 | }\r |
1611 | \r | |
b602265d DG |
1612 | extern OnigCalloutFunc\r |
1613 | onig_get_callout_start_func(regex_t* reg, int callout_num)\r | |
14b0e578 | 1614 | {\r |
b602265d DG |
1615 | /* If used for callouts of contents, return 0. */\r |
1616 | CalloutListEntry* e;\r | |
14b0e578 | 1617 | \r |
b602265d DG |
1618 | e = onig_reg_callout_list_at(reg, callout_num);\r |
1619 | return e->start_func;\r | |
14b0e578 CS |
1620 | }\r |
1621 | \r | |
b602265d DG |
1622 | extern const UChar*\r |
1623 | onig_get_callout_tag_start(regex_t* reg, int callout_num)\r | |
14b0e578 | 1624 | {\r |
b602265d DG |
1625 | CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r |
1626 | return e->tag_start;\r | |
14b0e578 CS |
1627 | }\r |
1628 | \r | |
b602265d DG |
1629 | extern const UChar*\r |
1630 | onig_get_callout_tag_end(regex_t* reg, int callout_num)\r | |
14b0e578 | 1631 | {\r |
b602265d DG |
1632 | CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r |
1633 | return e->tag_end;\r | |
1634 | }\r | |
14b0e578 | 1635 | \r |
14b0e578 | 1636 | \r |
b602265d DG |
1637 | extern OnigCalloutType\r |
1638 | onig_get_callout_type_by_name_id(int name_id)\r | |
1639 | {\r | |
1640 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r | |
1641 | return 0;\r | |
14b0e578 | 1642 | \r |
b602265d | 1643 | return GlobalCalloutNameList->v[name_id].type;\r |
14b0e578 CS |
1644 | }\r |
1645 | \r | |
b602265d DG |
1646 | extern OnigCalloutFunc\r |
1647 | onig_get_callout_start_func_by_name_id(int name_id)\r | |
14b0e578 | 1648 | {\r |
b602265d DG |
1649 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1650 | return 0;\r | |
14b0e578 | 1651 | \r |
b602265d | 1652 | return GlobalCalloutNameList->v[name_id].start_func;\r |
14b0e578 CS |
1653 | }\r |
1654 | \r | |
b602265d DG |
1655 | extern OnigCalloutFunc\r |
1656 | onig_get_callout_end_func_by_name_id(int name_id)\r | |
14b0e578 | 1657 | {\r |
b602265d DG |
1658 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1659 | return 0;\r | |
14b0e578 | 1660 | \r |
b602265d | 1661 | return GlobalCalloutNameList->v[name_id].end_func;\r |
14b0e578 CS |
1662 | }\r |
1663 | \r | |
b602265d DG |
1664 | extern int\r |
1665 | onig_get_callout_in_by_name_id(int name_id)\r | |
14b0e578 | 1666 | {\r |
b602265d DG |
1667 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1668 | return 0;\r | |
14b0e578 | 1669 | \r |
b602265d DG |
1670 | return GlobalCalloutNameList->v[name_id].in;\r |
1671 | }\r | |
14b0e578 | 1672 | \r |
b602265d DG |
1673 | static int\r |
1674 | get_callout_arg_num_by_name_id(int name_id)\r | |
1675 | {\r | |
1676 | return GlobalCalloutNameList->v[name_id].arg_num;\r | |
1677 | }\r | |
14b0e578 | 1678 | \r |
b602265d DG |
1679 | static int\r |
1680 | get_callout_opt_arg_num_by_name_id(int name_id)\r | |
14b0e578 | 1681 | {\r |
b602265d | 1682 | return GlobalCalloutNameList->v[name_id].opt_arg_num;\r |
14b0e578 | 1683 | }\r |
14b0e578 | 1684 | \r |
b602265d DG |
1685 | static unsigned int\r |
1686 | get_callout_arg_type_by_name_id(int name_id, int index)\r | |
14b0e578 | 1687 | {\r |
b602265d | 1688 | return GlobalCalloutNameList->v[name_id].arg_types[index];\r |
14b0e578 CS |
1689 | }\r |
1690 | \r | |
b602265d DG |
1691 | static OnigValue\r |
1692 | get_callout_opt_default_by_name_id(int name_id, int index)\r | |
14b0e578 | 1693 | {\r |
b602265d | 1694 | return GlobalCalloutNameList->v[name_id].opt_defaults[index];\r |
14b0e578 CS |
1695 | }\r |
1696 | \r | |
b602265d DG |
1697 | extern UChar*\r |
1698 | onig_get_callout_name_by_name_id(int name_id)\r | |
14b0e578 | 1699 | {\r |
b602265d DG |
1700 | if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r |
1701 | return 0;\r | |
1702 | \r | |
1703 | return GlobalCalloutNameList->v[name_id].name;\r | |
14b0e578 CS |
1704 | }\r |
1705 | \r | |
b602265d DG |
1706 | extern int\r |
1707 | onig_global_callout_names_free(void)\r | |
14b0e578 | 1708 | {\r |
b602265d DG |
1709 | free_callout_func_list(GlobalCalloutNameList);\r |
1710 | GlobalCalloutNameList = 0;\r | |
14b0e578 | 1711 | \r |
b602265d DG |
1712 | global_callout_name_table_free();\r |
1713 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1714 | }\r |
1715 | \r | |
14b0e578 | 1716 | \r |
b602265d DG |
1717 | typedef st_table CalloutTagTable;\r |
1718 | typedef intptr_t CalloutTagVal;\r | |
14b0e578 | 1719 | \r |
b602265d | 1720 | #define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)\r |
14b0e578 | 1721 | \r |
b602265d DG |
1722 | static int\r |
1723 | i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)\r | |
1724 | {\r | |
1725 | int num;\r | |
1726 | RegexExt* ext = (RegexExt* )arg;\r | |
14b0e578 | 1727 | \r |
b602265d DG |
1728 | num = (int )e - 1;\r |
1729 | ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;\r | |
1730 | return ST_CONTINUE;\r | |
1731 | }\r | |
14b0e578 | 1732 | \r |
b602265d DG |
1733 | static int\r |
1734 | setup_ext_callout_list_values(regex_t* reg)\r | |
1735 | {\r | |
1736 | int i, j;\r | |
1737 | RegexExt* ext;\r | |
1738 | \r | |
1739 | ext = REG_EXTP(reg);\r | |
1740 | if (IS_NOT_NULL(ext->tag_table)) {\r | |
1741 | onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,\r | |
1742 | (st_data_t )ext);\r | |
1743 | }\r | |
1744 | \r | |
1745 | for (i = 0; i < ext->callout_num; i++) {\r | |
1746 | CalloutListEntry* e = ext->callout_list + i;\r | |
1747 | if (e->of == ONIG_CALLOUT_OF_NAME) {\r | |
1748 | for (j = 0; j < e->u.arg.num; j++) {\r | |
1749 | if (e->u.arg.types[j] == ONIG_TYPE_TAG) {\r | |
1750 | UChar* start;\r | |
1751 | UChar* end;\r | |
1752 | int num;\r | |
1753 | start = e->u.arg.vals[j].s.start;\r | |
1754 | end = e->u.arg.vals[j].s.end;\r | |
1755 | num = onig_get_callout_num_by_tag(reg, start, end);\r | |
1756 | if (num < 0) return num;\r | |
1757 | e->u.arg.vals[j].tag = num;\r | |
1758 | }\r | |
14b0e578 CS |
1759 | }\r |
1760 | }\r | |
14b0e578 CS |
1761 | }\r |
1762 | \r | |
b602265d | 1763 | return ONIG_NORMAL;\r |
14b0e578 CS |
1764 | }\r |
1765 | \r | |
1766 | extern int\r | |
b602265d | 1767 | onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)\r |
14b0e578 | 1768 | {\r |
b602265d | 1769 | RegexExt* ext = REG_EXTP(reg);\r |
14b0e578 | 1770 | \r |
b602265d DG |
1771 | if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;\r |
1772 | if (callout_num > ext->callout_num) return 0;\r | |
14b0e578 | 1773 | \r |
b602265d DG |
1774 | return (ext->callout_list[callout_num].flag &\r |
1775 | CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;\r | |
14b0e578 CS |
1776 | }\r |
1777 | \r | |
b602265d DG |
1778 | static int\r |
1779 | i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)\r | |
14b0e578 | 1780 | {\r |
b602265d DG |
1781 | xfree(key);\r |
1782 | return ST_DELETE;\r | |
14b0e578 CS |
1783 | }\r |
1784 | \r | |
b602265d DG |
1785 | static int\r |
1786 | callout_tag_table_clear(CalloutTagTable* t)\r | |
14b0e578 | 1787 | {\r |
b602265d DG |
1788 | if (IS_NOT_NULL(t)) {\r |
1789 | onig_st_foreach(t, i_free_callout_tag_entry, 0);\r | |
14b0e578 | 1790 | }\r |
b602265d | 1791 | return 0;\r |
14b0e578 CS |
1792 | }\r |
1793 | \r | |
b602265d DG |
1794 | extern int\r |
1795 | onig_callout_tag_table_free(void* table)\r | |
14b0e578 | 1796 | {\r |
b602265d | 1797 | CalloutTagTable* t = (CalloutTagTable* )table;\r |
14b0e578 | 1798 | \r |
b602265d DG |
1799 | if (IS_NOT_NULL(t)) {\r |
1800 | int r = callout_tag_table_clear(t);\r | |
1801 | if (r != 0) return r;\r | |
14b0e578 | 1802 | \r |
b602265d DG |
1803 | onig_st_free_table(t);\r |
1804 | }\r | |
14b0e578 | 1805 | \r |
b602265d | 1806 | return 0;\r |
14b0e578 CS |
1807 | }\r |
1808 | \r | |
b602265d DG |
1809 | extern int\r |
1810 | onig_get_callout_num_by_tag(regex_t* reg,\r | |
1811 | const UChar* tag, const UChar* tag_end)\r | |
14b0e578 | 1812 | {\r |
b602265d DG |
1813 | int r;\r |
1814 | RegexExt* ext;\r | |
1815 | CalloutTagVal e;\r | |
14b0e578 | 1816 | \r |
b602265d DG |
1817 | ext = REG_EXTP(reg);\r |
1818 | if (IS_NULL(ext) || IS_NULL(ext->tag_table))\r | |
1819 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 1820 | \r |
b602265d DG |
1821 | r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,\r |
1822 | (HashDataType* )((void* )(&e)));\r | |
1823 | if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
1824 | return (int )e;\r | |
14b0e578 CS |
1825 | }\r |
1826 | \r | |
b602265d DG |
1827 | static CalloutTagVal\r |
1828 | callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)\r | |
14b0e578 | 1829 | {\r |
b602265d | 1830 | CalloutTagVal e;\r |
14b0e578 | 1831 | \r |
b602265d DG |
1832 | e = -1;\r |
1833 | if (IS_NOT_NULL(t)) {\r | |
1834 | onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r | |
14b0e578 | 1835 | }\r |
b602265d | 1836 | return e;\r |
14b0e578 CS |
1837 | }\r |
1838 | \r | |
1839 | static int\r | |
b602265d | 1840 | callout_tag_table_new(CalloutTagTable** rt)\r |
14b0e578 | 1841 | {\r |
b602265d DG |
1842 | CalloutTagTable* t;\r |
1843 | \r | |
1844 | *rt = 0;\r | |
1845 | t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);\r | |
1846 | CHECK_NULL_RETURN_MEMERR(t);\r | |
1847 | \r | |
1848 | *rt = t;\r | |
1849 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1850 | }\r |
1851 | \r | |
14b0e578 | 1852 | static int\r |
b602265d DG |
1853 | callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,\r |
1854 | CalloutTagVal entry_val)\r | |
14b0e578 | 1855 | {\r |
b602265d DG |
1856 | int r;\r |
1857 | CalloutTagVal val;\r | |
14b0e578 | 1858 | \r |
b602265d DG |
1859 | if (name_end - name <= 0)\r |
1860 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 1861 | \r |
b602265d DG |
1862 | val = callout_tag_find(t, name, name_end);\r |
1863 | if (val >= 0)\r | |
1864 | return ONIGERR_MULTIPLEX_DEFINED_NAME;\r | |
14b0e578 | 1865 | \r |
b602265d DG |
1866 | r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);\r |
1867 | if (r < 0) return r;\r | |
14b0e578 | 1868 | \r |
b602265d | 1869 | return ONIG_NORMAL;\r |
14b0e578 CS |
1870 | }\r |
1871 | \r | |
1872 | static int\r | |
b602265d | 1873 | ext_ensure_tag_table(regex_t* reg)\r |
14b0e578 | 1874 | {\r |
b602265d DG |
1875 | int r;\r |
1876 | RegexExt* ext;\r | |
1877 | CalloutTagTable* t;\r | |
14b0e578 | 1878 | \r |
b602265d DG |
1879 | ext = onig_get_regex_ext(reg);\r |
1880 | CHECK_NULL_RETURN_MEMERR(ext);\r | |
14b0e578 | 1881 | \r |
b602265d DG |
1882 | if (IS_NULL(ext->tag_table)) {\r |
1883 | r = callout_tag_table_new(&t);\r | |
1884 | if (r != ONIG_NORMAL) return r;\r | |
1885 | \r | |
1886 | ext->tag_table = t;\r | |
14b0e578 | 1887 | }\r |
b602265d DG |
1888 | \r |
1889 | return ONIG_NORMAL;\r | |
14b0e578 CS |
1890 | }\r |
1891 | \r | |
1892 | static int\r | |
b602265d DG |
1893 | callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,\r |
1894 | CalloutTagVal entry_val)\r | |
14b0e578 | 1895 | {\r |
b602265d DG |
1896 | int r;\r |
1897 | RegexExt* ext;\r | |
1898 | CalloutListEntry* e;\r | |
14b0e578 | 1899 | \r |
b602265d DG |
1900 | r = ext_ensure_tag_table(reg);\r |
1901 | if (r != ONIG_NORMAL) return r;\r | |
14b0e578 | 1902 | \r |
b602265d DG |
1903 | ext = onig_get_regex_ext(reg);\r |
1904 | r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);\r | |
14b0e578 | 1905 | \r |
b602265d DG |
1906 | e = onig_reg_callout_list_at(reg, (int )entry_val);\r |
1907 | e->tag_start = name;\r | |
1908 | e->tag_end = name_end;\r | |
14b0e578 | 1909 | \r |
b602265d DG |
1910 | return r;\r |
1911 | }\r | |
14b0e578 | 1912 | \r |
b602265d | 1913 | #endif /* USE_CALLOUT */\r |
14b0e578 | 1914 | \r |
14b0e578 | 1915 | \r |
b602265d | 1916 | #define INIT_SCANENV_MEMENV_ALLOC_SIZE 16\r |
14b0e578 | 1917 | \r |
b602265d DG |
1918 | static void\r |
1919 | scan_env_clear(ScanEnv* env)\r | |
14b0e578 | 1920 | {\r |
b602265d DG |
1921 | MEM_STATUS_CLEAR(env->capture_history);\r |
1922 | MEM_STATUS_CLEAR(env->bt_mem_start);\r | |
1923 | MEM_STATUS_CLEAR(env->bt_mem_end);\r | |
1924 | MEM_STATUS_CLEAR(env->backrefed_mem);\r | |
1925 | env->error = (UChar* )NULL;\r | |
1926 | env->error_end = (UChar* )NULL;\r | |
1927 | env->num_call = 0;\r | |
14b0e578 | 1928 | \r |
b602265d DG |
1929 | #ifdef USE_CALL\r |
1930 | env->unset_addr_list = NULL;\r | |
1931 | env->has_call_zero = 0;\r | |
1932 | #endif\r | |
14b0e578 | 1933 | \r |
b602265d DG |
1934 | env->num_mem = 0;\r |
1935 | env->num_named = 0;\r | |
1936 | env->mem_alloc = 0;\r | |
1937 | env->mem_env_dynamic = (MemEnv* )NULL;\r | |
14b0e578 | 1938 | \r |
b602265d | 1939 | xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));\r |
14b0e578 | 1940 | \r |
b602265d DG |
1941 | env->parse_depth = 0;\r |
1942 | env->keep_num = 0;\r | |
1943 | env->save_num = 0;\r | |
1944 | env->save_alloc_num = 0;\r | |
1945 | env->saves = 0;\r | |
1946 | }\r | |
14b0e578 | 1947 | \r |
b602265d DG |
1948 | static int\r |
1949 | scan_env_add_mem_entry(ScanEnv* env)\r | |
1950 | {\r | |
1951 | int i, need, alloc;\r | |
1952 | MemEnv* p;\r | |
14b0e578 | 1953 | \r |
b602265d DG |
1954 | need = env->num_mem + 1;\r |
1955 | if (need > MaxCaptureNum && MaxCaptureNum != 0)\r | |
1956 | return ONIGERR_TOO_MANY_CAPTURES;\r | |
14b0e578 | 1957 | \r |
b602265d DG |
1958 | if (need >= SCANENV_MEMENV_SIZE) {\r |
1959 | if (env->mem_alloc <= need) {\r | |
1960 | if (IS_NULL(env->mem_env_dynamic)) {\r | |
1961 | alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;\r | |
1962 | p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);\r | |
1963 | CHECK_NULL_RETURN_MEMERR(p);\r | |
1964 | xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));\r | |
1965 | }\r | |
1966 | else {\r | |
1967 | alloc = env->mem_alloc * 2;\r | |
1968 | p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);\r | |
1969 | CHECK_NULL_RETURN_MEMERR(p);\r | |
1970 | }\r | |
14b0e578 | 1971 | \r |
b602265d DG |
1972 | for (i = env->num_mem + 1; i < alloc; i++) {\r |
1973 | p[i].node = NULL_NODE;\r | |
1974 | #if 0\r | |
1975 | p[i].in = 0;\r | |
1976 | p[i].recursion = 0;\r | |
1977 | #endif\r | |
1978 | }\r | |
1979 | \r | |
1980 | env->mem_env_dynamic = p;\r | |
1981 | env->mem_alloc = alloc;\r | |
14b0e578 CS |
1982 | }\r |
1983 | }\r | |
1984 | \r | |
b602265d DG |
1985 | env->num_mem++;\r |
1986 | return env->num_mem;\r | |
14b0e578 CS |
1987 | }\r |
1988 | \r | |
1989 | static int\r | |
b602265d | 1990 | scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r |
14b0e578 | 1991 | {\r |
b602265d DG |
1992 | if (env->num_mem >= num)\r |
1993 | SCANENV_MEMENV(env)[num].node = node;\r | |
1994 | else\r | |
1995 | return ONIGERR_PARSER_BUG;\r | |
1996 | return 0;\r | |
14b0e578 CS |
1997 | }\r |
1998 | \r | |
b602265d DG |
1999 | extern void\r |
2000 | onig_node_free(Node* node)\r | |
14b0e578 | 2001 | {\r |
b602265d DG |
2002 | start:\r |
2003 | if (IS_NULL(node)) return ;\r | |
14b0e578 | 2004 | \r |
b602265d DG |
2005 | #ifdef DEBUG_NODE_FREE\r |
2006 | fprintf(stderr, "onig_node_free: %p\n", node);\r | |
2007 | #endif\r | |
14b0e578 | 2008 | \r |
b602265d DG |
2009 | switch (NODE_TYPE(node)) {\r |
2010 | case NODE_STRING:\r | |
2011 | if (STR_(node)->capa != 0 &&\r | |
2012 | IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r | |
2013 | xfree(STR_(node)->s);\r | |
2014 | }\r | |
2015 | break;\r | |
14b0e578 | 2016 | \r |
b602265d DG |
2017 | case NODE_LIST:\r |
2018 | case NODE_ALT:\r | |
2019 | onig_node_free(NODE_CAR(node));\r | |
2020 | {\r | |
2021 | Node* next_node = NODE_CDR(node);\r | |
2022 | \r | |
2023 | xfree(node);\r | |
2024 | node = next_node;\r | |
2025 | goto start;\r | |
14b0e578 | 2026 | }\r |
b602265d | 2027 | break;\r |
14b0e578 | 2028 | \r |
b602265d DG |
2029 | case NODE_CCLASS:\r |
2030 | {\r | |
2031 | CClassNode* cc = CCLASS_(node);\r | |
14b0e578 | 2032 | \r |
b602265d DG |
2033 | if (cc->mbuf)\r |
2034 | bbuf_free(cc->mbuf);\r | |
2035 | }\r | |
2036 | break;\r | |
14b0e578 | 2037 | \r |
b602265d DG |
2038 | case NODE_BACKREF:\r |
2039 | if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))\r | |
2040 | xfree(BACKREF_(node)->back_dynamic);\r | |
2041 | break;\r | |
14b0e578 | 2042 | \r |
b602265d DG |
2043 | case NODE_ENCLOSURE:\r |
2044 | if (NODE_BODY(node))\r | |
2045 | onig_node_free(NODE_BODY(node));\r | |
14b0e578 | 2046 | \r |
b602265d DG |
2047 | {\r |
2048 | EnclosureNode* en = ENCLOSURE_(node);\r | |
2049 | if (en->type == ENCLOSURE_IF_ELSE) {\r | |
2050 | onig_node_free(en->te.Then);\r | |
2051 | onig_node_free(en->te.Else);\r | |
14b0e578 CS |
2052 | }\r |
2053 | }\r | |
b602265d | 2054 | break;\r |
14b0e578 | 2055 | \r |
b602265d DG |
2056 | case NODE_QUANT:\r |
2057 | case NODE_ANCHOR:\r | |
2058 | if (NODE_BODY(node))\r | |
2059 | onig_node_free(NODE_BODY(node));\r | |
2060 | break;\r | |
14b0e578 | 2061 | \r |
b602265d DG |
2062 | case NODE_CTYPE:\r |
2063 | case NODE_CALL:\r | |
2064 | case NODE_GIMMICK:\r | |
2065 | break;\r | |
14b0e578 | 2066 | }\r |
14b0e578 | 2067 | \r |
b602265d | 2068 | xfree(node);\r |
14b0e578 CS |
2069 | }\r |
2070 | \r | |
b602265d DG |
2071 | static void\r |
2072 | cons_node_free_alone(Node* node)\r | |
14b0e578 | 2073 | {\r |
b602265d DG |
2074 | NODE_CAR(node) = 0;\r |
2075 | NODE_CDR(node) = 0;\r | |
2076 | onig_node_free(node);\r | |
14b0e578 CS |
2077 | }\r |
2078 | \r | |
b602265d DG |
2079 | static Node*\r |
2080 | node_new(void)\r | |
14b0e578 | 2081 | {\r |
b602265d | 2082 | Node* node;\r |
14b0e578 | 2083 | \r |
b602265d DG |
2084 | node = (Node* )xmalloc(sizeof(Node));\r |
2085 | xmemset(node, 0, sizeof(*node));\r | |
14b0e578 | 2086 | \r |
b602265d DG |
2087 | #ifdef DEBUG_NODE_FREE\r |
2088 | fprintf(stderr, "node_new: %p\n", node);\r | |
2089 | #endif\r | |
2090 | return node;\r | |
2091 | }\r | |
14b0e578 | 2092 | \r |
14b0e578 | 2093 | \r |
b602265d DG |
2094 | static void\r |
2095 | initialize_cclass(CClassNode* cc)\r | |
2096 | {\r | |
2097 | BITSET_CLEAR(cc->bs);\r | |
2098 | cc->flags = 0;\r | |
2099 | cc->mbuf = NULL;\r | |
2100 | }\r | |
2101 | \r | |
2102 | static Node*\r | |
2103 | node_new_cclass(void)\r | |
2104 | {\r | |
2105 | Node* node = node_new();\r | |
2106 | CHECK_NULL_RETURN(node);\r | |
2107 | \r | |
2108 | NODE_SET_TYPE(node, NODE_CCLASS);\r | |
2109 | initialize_cclass(CCLASS_(node));\r | |
2110 | return node;\r | |
2111 | }\r | |
2112 | \r | |
2113 | static Node*\r | |
2114 | node_new_ctype(int type, int not, OnigOptionType options)\r | |
2115 | {\r | |
2116 | Node* node = node_new();\r | |
2117 | CHECK_NULL_RETURN(node);\r | |
2118 | \r | |
2119 | NODE_SET_TYPE(node, NODE_CTYPE);\r | |
2120 | CTYPE_(node)->ctype = type;\r | |
2121 | CTYPE_(node)->not = not;\r | |
2122 | CTYPE_(node)->options = options;\r | |
2123 | CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);\r | |
2124 | return node;\r | |
2125 | }\r | |
2126 | \r | |
2127 | static Node*\r | |
2128 | node_new_anychar(void)\r | |
2129 | {\r | |
2130 | Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);\r | |
2131 | return node;\r | |
2132 | }\r | |
2133 | \r | |
2134 | static Node*\r | |
2135 | node_new_anychar_with_fixed_option(OnigOptionType option)\r | |
2136 | {\r | |
2137 | CtypeNode* ct;\r | |
2138 | Node* node;\r | |
2139 | \r | |
2140 | node = node_new_anychar();\r | |
2141 | ct = CTYPE_(node);\r | |
2142 | ct->options = option;\r | |
2143 | NODE_STATUS_ADD(node, FIXED_OPTION);\r | |
2144 | return node;\r | |
2145 | }\r | |
2146 | \r | |
2147 | static int\r | |
2148 | node_new_no_newline(Node** node, ScanEnv* env)\r | |
2149 | {\r | |
2150 | Node* n;\r | |
2151 | \r | |
2152 | n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);\r | |
2153 | CHECK_NULL_RETURN_MEMERR(n);\r | |
2154 | *node = n;\r | |
2155 | return 0;\r | |
2156 | }\r | |
2157 | \r | |
2158 | static int\r | |
2159 | node_new_true_anychar(Node** node, ScanEnv* env)\r | |
2160 | {\r | |
2161 | Node* n;\r | |
2162 | \r | |
2163 | n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);\r | |
2164 | CHECK_NULL_RETURN_MEMERR(n);\r | |
2165 | *node = n;\r | |
2166 | return 0;\r | |
2167 | }\r | |
2168 | \r | |
2169 | static Node*\r | |
2170 | node_new_list(Node* left, Node* right)\r | |
2171 | {\r | |
2172 | Node* node = node_new();\r | |
2173 | CHECK_NULL_RETURN(node);\r | |
2174 | \r | |
2175 | NODE_SET_TYPE(node, NODE_LIST);\r | |
2176 | NODE_CAR(node) = left;\r | |
2177 | NODE_CDR(node) = right;\r | |
2178 | return node;\r | |
2179 | }\r | |
2180 | \r | |
2181 | extern Node*\r | |
2182 | onig_node_new_list(Node* left, Node* right)\r | |
2183 | {\r | |
2184 | return node_new_list(left, right);\r | |
2185 | }\r | |
2186 | \r | |
2187 | extern Node*\r | |
2188 | onig_node_list_add(Node* list, Node* x)\r | |
2189 | {\r | |
2190 | Node *n;\r | |
2191 | \r | |
2192 | n = onig_node_new_list(x, NULL);\r | |
2193 | if (IS_NULL(n)) return NULL_NODE;\r | |
2194 | \r | |
2195 | if (IS_NOT_NULL(list)) {\r | |
2196 | while (IS_NOT_NULL(NODE_CDR(list)))\r | |
2197 | list = NODE_CDR(list);\r | |
2198 | \r | |
2199 | NODE_CDR(list) = n;\r | |
2200 | }\r | |
2201 | \r | |
2202 | return n;\r | |
2203 | }\r | |
2204 | \r | |
2205 | extern Node*\r | |
2206 | onig_node_new_alt(Node* left, Node* right)\r | |
2207 | {\r | |
2208 | Node* node = node_new();\r | |
2209 | CHECK_NULL_RETURN(node);\r | |
2210 | \r | |
2211 | NODE_SET_TYPE(node, NODE_ALT);\r | |
2212 | NODE_CAR(node) = left;\r | |
2213 | NODE_CDR(node) = right;\r | |
2214 | return node;\r | |
2215 | }\r | |
2216 | \r | |
2217 | static Node*\r | |
2218 | make_list_or_alt(NodeType type, int n, Node* ns[])\r | |
2219 | {\r | |
2220 | Node* r;\r | |
2221 | \r | |
2222 | if (n <= 0) return NULL_NODE;\r | |
2223 | \r | |
2224 | if (n == 1) {\r | |
2225 | r = node_new();\r | |
2226 | CHECK_NULL_RETURN(r);\r | |
2227 | NODE_SET_TYPE(r, type);\r | |
2228 | NODE_CAR(r) = ns[0];\r | |
2229 | NODE_CDR(r) = NULL_NODE;\r | |
2230 | }\r | |
2231 | else {\r | |
2232 | Node* right;\r | |
2233 | \r | |
2234 | r = node_new();\r | |
2235 | CHECK_NULL_RETURN(r);\r | |
2236 | \r | |
2237 | right = make_list_or_alt(type, n - 1, ns + 1);\r | |
2238 | if (IS_NULL(right)) {\r | |
2239 | onig_node_free(r);\r | |
2240 | return NULL_NODE;\r | |
2241 | }\r | |
2242 | \r | |
2243 | NODE_SET_TYPE(r, type);\r | |
2244 | NODE_CAR(r) = ns[0];\r | |
2245 | NODE_CDR(r) = right;\r | |
2246 | }\r | |
2247 | \r | |
2248 | return r;\r | |
2249 | }\r | |
2250 | \r | |
2251 | static Node*\r | |
2252 | make_list(int n, Node* ns[])\r | |
2253 | {\r | |
2254 | return make_list_or_alt(NODE_LIST, n, ns);\r | |
2255 | }\r | |
2256 | \r | |
2257 | static Node*\r | |
2258 | make_alt(int n, Node* ns[])\r | |
2259 | {\r | |
2260 | return make_list_or_alt(NODE_ALT, n, ns);\r | |
2261 | }\r | |
2262 | \r | |
2263 | extern Node*\r | |
2264 | onig_node_new_anchor(int type, int ascii_mode)\r | |
2265 | {\r | |
2266 | Node* node = node_new();\r | |
2267 | CHECK_NULL_RETURN(node);\r | |
2268 | \r | |
2269 | NODE_SET_TYPE(node, NODE_ANCHOR);\r | |
2270 | ANCHOR_(node)->type = type;\r | |
2271 | ANCHOR_(node)->char_len = -1;\r | |
2272 | ANCHOR_(node)->ascii_mode = ascii_mode;\r | |
2273 | return node;\r | |
2274 | }\r | |
2275 | \r | |
2276 | static Node*\r | |
2277 | node_new_backref(int back_num, int* backrefs, int by_name,\r | |
2278 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2279 | int exist_level, int nest_level,\r | |
2280 | #endif\r | |
2281 | ScanEnv* env)\r | |
2282 | {\r | |
2283 | int i;\r | |
2284 | Node* node = node_new();\r | |
2285 | \r | |
2286 | CHECK_NULL_RETURN(node);\r | |
2287 | \r | |
2288 | NODE_SET_TYPE(node, NODE_BACKREF);\r | |
2289 | BACKREF_(node)->back_num = back_num;\r | |
2290 | BACKREF_(node)->back_dynamic = (int* )NULL;\r | |
2291 | if (by_name != 0)\r | |
2292 | NODE_STATUS_ADD(node, BY_NAME);\r | |
2293 | \r | |
2294 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2295 | if (exist_level != 0) {\r | |
2296 | NODE_STATUS_ADD(node, NEST_LEVEL);\r | |
2297 | BACKREF_(node)->nest_level = nest_level;\r | |
2298 | }\r | |
2299 | #endif\r | |
2300 | \r | |
2301 | for (i = 0; i < back_num; i++) {\r | |
2302 | if (backrefs[i] <= env->num_mem &&\r | |
2303 | IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {\r | |
2304 | NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */\r | |
2305 | break;\r | |
2306 | }\r | |
2307 | }\r | |
2308 | \r | |
2309 | if (back_num <= NODE_BACKREFS_SIZE) {\r | |
2310 | for (i = 0; i < back_num; i++)\r | |
2311 | BACKREF_(node)->back_static[i] = backrefs[i];\r | |
2312 | }\r | |
2313 | else {\r | |
2314 | int* p = (int* )xmalloc(sizeof(int) * back_num);\r | |
2315 | if (IS_NULL(p)) {\r | |
2316 | onig_node_free(node);\r | |
2317 | return NULL;\r | |
2318 | }\r | |
2319 | BACKREF_(node)->back_dynamic = p;\r | |
2320 | for (i = 0; i < back_num; i++)\r | |
2321 | p[i] = backrefs[i];\r | |
2322 | }\r | |
2323 | return node;\r | |
2324 | }\r | |
2325 | \r | |
2326 | static Node*\r | |
2327 | node_new_backref_checker(int back_num, int* backrefs, int by_name,\r | |
2328 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2329 | int exist_level, int nest_level,\r | |
2330 | #endif\r | |
2331 | ScanEnv* env)\r | |
2332 | {\r | |
2333 | Node* node;\r | |
2334 | \r | |
2335 | node = node_new_backref(back_num, backrefs, by_name,\r | |
2336 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2337 | exist_level, nest_level,\r | |
2338 | #endif\r | |
2339 | env);\r | |
2340 | CHECK_NULL_RETURN(node);\r | |
2341 | \r | |
2342 | NODE_STATUS_ADD(node, CHECKER);\r | |
2343 | return node;\r | |
2344 | }\r | |
2345 | \r | |
2346 | #ifdef USE_CALL\r | |
2347 | static Node*\r | |
2348 | node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)\r | |
2349 | {\r | |
2350 | Node* node = node_new();\r | |
2351 | CHECK_NULL_RETURN(node);\r | |
2352 | \r | |
2353 | NODE_SET_TYPE(node, NODE_CALL);\r | |
2354 | CALL_(node)->by_number = by_number;\r | |
2355 | CALL_(node)->name = name;\r | |
2356 | CALL_(node)->name_end = name_end;\r | |
2357 | CALL_(node)->group_num = gnum;\r | |
2358 | CALL_(node)->entry_count = 1;\r | |
2359 | return node;\r | |
2360 | }\r | |
2361 | #endif\r | |
2362 | \r | |
2363 | static Node*\r | |
2364 | node_new_quantifier(int lower, int upper, int by_number)\r | |
2365 | {\r | |
2366 | Node* node = node_new();\r | |
2367 | CHECK_NULL_RETURN(node);\r | |
2368 | \r | |
2369 | NODE_SET_TYPE(node, NODE_QUANT);\r | |
2370 | QUANT_(node)->lower = lower;\r | |
2371 | QUANT_(node)->upper = upper;\r | |
2372 | QUANT_(node)->greedy = 1;\r | |
2373 | QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;\r | |
2374 | QUANT_(node)->head_exact = NULL_NODE;\r | |
2375 | QUANT_(node)->next_head_exact = NULL_NODE;\r | |
2376 | QUANT_(node)->is_refered = 0;\r | |
2377 | if (by_number != 0)\r | |
2378 | NODE_STATUS_ADD(node, BY_NUMBER);\r | |
2379 | \r | |
2380 | return node;\r | |
2381 | }\r | |
2382 | \r | |
2383 | static Node*\r | |
2384 | node_new_enclosure(enum EnclosureType type)\r | |
2385 | {\r | |
2386 | Node* node = node_new();\r | |
2387 | CHECK_NULL_RETURN(node);\r | |
2388 | \r | |
2389 | NODE_SET_TYPE(node, NODE_ENCLOSURE);\r | |
2390 | ENCLOSURE_(node)->type = type;\r | |
2391 | \r | |
2392 | switch (type) {\r | |
2393 | case ENCLOSURE_MEMORY:\r | |
2394 | ENCLOSURE_(node)->m.regnum = 0;\r | |
2395 | ENCLOSURE_(node)->m.called_addr = -1;\r | |
2396 | ENCLOSURE_(node)->m.entry_count = 1;\r | |
2397 | ENCLOSURE_(node)->m.called_state = 0;\r | |
2398 | break;\r | |
2399 | \r | |
2400 | case ENCLOSURE_OPTION:\r | |
2401 | ENCLOSURE_(node)->o.options = 0;\r | |
2402 | break;\r | |
2403 | \r | |
2404 | case ENCLOSURE_STOP_BACKTRACK:\r | |
2405 | break;\r | |
2406 | \r | |
2407 | case ENCLOSURE_IF_ELSE:\r | |
2408 | ENCLOSURE_(node)->te.Then = 0;\r | |
2409 | ENCLOSURE_(node)->te.Else = 0;\r | |
2410 | break;\r | |
2411 | }\r | |
2412 | \r | |
2413 | ENCLOSURE_(node)->opt_count = 0;\r | |
2414 | return node;\r | |
2415 | }\r | |
2416 | \r | |
2417 | extern Node*\r | |
2418 | onig_node_new_enclosure(int type)\r | |
2419 | {\r | |
2420 | return node_new_enclosure(type);\r | |
2421 | }\r | |
2422 | \r | |
2423 | static Node*\r | |
2424 | node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)\r | |
2425 | {\r | |
2426 | Node* n;\r | |
2427 | n = node_new_enclosure(ENCLOSURE_IF_ELSE);\r | |
2428 | CHECK_NULL_RETURN(n);\r | |
2429 | \r | |
2430 | NODE_BODY(n) = cond;\r | |
2431 | ENCLOSURE_(n)->te.Then = Then;\r | |
2432 | ENCLOSURE_(n)->te.Else = Else;\r | |
2433 | return n;\r | |
2434 | }\r | |
2435 | \r | |
2436 | static Node*\r | |
2437 | node_new_memory(int is_named)\r | |
2438 | {\r | |
2439 | Node* node = node_new_enclosure(ENCLOSURE_MEMORY);\r | |
2440 | CHECK_NULL_RETURN(node);\r | |
2441 | if (is_named != 0)\r | |
2442 | NODE_STATUS_ADD(node, NAMED_GROUP);\r | |
2443 | \r | |
2444 | return node;\r | |
2445 | }\r | |
2446 | \r | |
2447 | static Node*\r | |
2448 | node_new_option(OnigOptionType option)\r | |
2449 | {\r | |
2450 | Node* node = node_new_enclosure(ENCLOSURE_OPTION);\r | |
2451 | CHECK_NULL_RETURN(node);\r | |
2452 | ENCLOSURE_(node)->o.options = option;\r | |
2453 | return node;\r | |
2454 | }\r | |
2455 | \r | |
2456 | static int\r | |
2457 | node_new_fail(Node** node, ScanEnv* env)\r | |
2458 | {\r | |
2459 | *node = node_new();\r | |
2460 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2461 | \r | |
2462 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2463 | GIMMICK_(*node)->type = GIMMICK_FAIL;\r | |
2464 | return ONIG_NORMAL;\r | |
2465 | }\r | |
2466 | \r | |
2467 | static int\r | |
2468 | node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)\r | |
2469 | {\r | |
2470 | int id;\r | |
2471 | int r;\r | |
2472 | \r | |
2473 | r = save_entry(env, save_type, &id);\r | |
2474 | if (r != ONIG_NORMAL) return r;\r | |
2475 | \r | |
2476 | *node = node_new();\r | |
2477 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2478 | \r | |
2479 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2480 | GIMMICK_(*node)->id = id;\r | |
2481 | GIMMICK_(*node)->type = GIMMICK_SAVE;\r | |
2482 | GIMMICK_(*node)->detail_type = (int )save_type;\r | |
2483 | \r | |
2484 | return ONIG_NORMAL;\r | |
2485 | }\r | |
2486 | \r | |
2487 | static int\r | |
2488 | node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,\r | |
2489 | int id, ScanEnv* env)\r | |
2490 | {\r | |
2491 | *node = node_new();\r | |
2492 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2493 | \r | |
2494 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2495 | GIMMICK_(*node)->id = id;\r | |
2496 | GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;\r | |
2497 | GIMMICK_(*node)->detail_type = (int )update_var_type;\r | |
2498 | \r | |
2499 | return ONIG_NORMAL;\r | |
2500 | }\r | |
2501 | \r | |
2502 | static int\r | |
2503 | node_new_keep(Node** node, ScanEnv* env)\r | |
2504 | {\r | |
2505 | int r;\r | |
2506 | \r | |
2507 | r = node_new_save_gimmick(node, SAVE_KEEP, env);\r | |
2508 | if (r != 0) return r;\r | |
2509 | \r | |
2510 | env->keep_num++;\r | |
2511 | return ONIG_NORMAL;\r | |
2512 | }\r | |
2513 | \r | |
2514 | #ifdef USE_CALLOUT\r | |
2515 | \r | |
2516 | extern void\r | |
2517 | onig_free_reg_callout_list(int n, CalloutListEntry* list)\r | |
2518 | {\r | |
2519 | int i;\r | |
2520 | int j;\r | |
2521 | \r | |
2522 | if (IS_NULL(list)) return ;\r | |
2523 | \r | |
2524 | for (i = 0; i < n; i++) {\r | |
2525 | if (list[i].of == ONIG_CALLOUT_OF_NAME) {\r | |
2526 | for (j = 0; j < list[i].u.arg.passed_num; j++) {\r | |
2527 | if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {\r | |
2528 | if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))\r | |
2529 | xfree(list[i].u.arg.vals[j].s.start);\r | |
2530 | }\r | |
2531 | }\r | |
2532 | }\r | |
2533 | else { /* ONIG_CALLOUT_OF_CONTENTS */\r | |
2534 | if (IS_NOT_NULL(list[i].u.content.start)) {\r | |
2535 | xfree((void* )list[i].u.content.start);\r | |
2536 | }\r | |
2537 | }\r | |
2538 | }\r | |
2539 | \r | |
2540 | xfree(list);\r | |
2541 | }\r | |
2542 | \r | |
2543 | extern CalloutListEntry*\r | |
2544 | onig_reg_callout_list_at(regex_t* reg, int num)\r | |
2545 | {\r | |
2546 | RegexExt* ext = REG_EXTP(reg);\r | |
2547 | CHECK_NULL_RETURN(ext);\r | |
2548 | \r | |
2549 | if (num <= 0 || num > ext->callout_num)\r | |
2550 | return 0;\r | |
2551 | \r | |
2552 | num--;\r | |
2553 | return ext->callout_list + num;\r | |
2554 | }\r | |
2555 | \r | |
2556 | static int\r | |
2557 | reg_callout_list_entry(ScanEnv* env, int* rnum)\r | |
2558 | {\r | |
2559 | #define INIT_CALLOUT_LIST_NUM 3\r | |
2560 | \r | |
2561 | int num;\r | |
2562 | CalloutListEntry* list;\r | |
2563 | CalloutListEntry* e;\r | |
2564 | RegexExt* ext;\r | |
2565 | \r | |
2566 | ext = onig_get_regex_ext(env->reg);\r | |
2567 | CHECK_NULL_RETURN_MEMERR(ext);\r | |
2568 | \r | |
2569 | if (IS_NULL(ext->callout_list)) {\r | |
2570 | list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);\r | |
2571 | CHECK_NULL_RETURN_MEMERR(list);\r | |
2572 | \r | |
2573 | ext->callout_list = list;\r | |
2574 | ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;\r | |
2575 | ext->callout_num = 0;\r | |
2576 | }\r | |
2577 | \r | |
2578 | num = ext->callout_num + 1;\r | |
2579 | if (num > ext->callout_list_alloc) {\r | |
2580 | int alloc = ext->callout_list_alloc * 2;\r | |
2581 | list = (CalloutListEntry* )xrealloc(ext->callout_list,\r | |
2582 | sizeof(CalloutListEntry) * alloc,\r | |
2583 | sizeof(CalloutListEntry) * ext->callout_list_alloc);\r | |
2584 | CHECK_NULL_RETURN_MEMERR(list);\r | |
2585 | \r | |
2586 | ext->callout_list = list;\r | |
2587 | ext->callout_list_alloc = alloc;\r | |
2588 | }\r | |
2589 | \r | |
2590 | e = ext->callout_list + (num - 1);\r | |
2591 | \r | |
2592 | e->flag = 0;\r | |
2593 | e->of = 0;\r | |
2594 | e->in = ONIG_CALLOUT_OF_CONTENTS;\r | |
2595 | e->type = 0;\r | |
2596 | e->tag_start = 0;\r | |
2597 | e->tag_end = 0;\r | |
2598 | e->start_func = 0;\r | |
2599 | e->end_func = 0;\r | |
2600 | e->u.arg.num = 0;\r | |
2601 | e->u.arg.passed_num = 0;\r | |
2602 | \r | |
2603 | ext->callout_num = num;\r | |
2604 | *rnum = num;\r | |
2605 | return ONIG_NORMAL;\r | |
2606 | }\r | |
2607 | \r | |
2608 | static int\r | |
2609 | node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,\r | |
2610 | ScanEnv* env)\r | |
2611 | {\r | |
2612 | *node = node_new();\r | |
2613 | CHECK_NULL_RETURN_MEMERR(*node);\r | |
2614 | \r | |
2615 | NODE_SET_TYPE(*node, NODE_GIMMICK);\r | |
2616 | GIMMICK_(*node)->id = id;\r | |
2617 | GIMMICK_(*node)->num = num;\r | |
2618 | GIMMICK_(*node)->type = GIMMICK_CALLOUT;\r | |
2619 | GIMMICK_(*node)->detail_type = (int )callout_of;\r | |
2620 | \r | |
2621 | return ONIG_NORMAL;\r | |
2622 | }\r | |
2623 | #endif\r | |
2624 | \r | |
2625 | static int\r | |
2626 | make_extended_grapheme_cluster(Node** node, ScanEnv* env)\r | |
2627 | {\r | |
2628 | int r;\r | |
2629 | int i;\r | |
2630 | Node* x;\r | |
2631 | Node* ns[2];\r | |
2632 | \r | |
2633 | /* \X == (?>\O(?:\Y\O)*) */\r | |
2634 | \r | |
2635 | ns[1] = NULL_NODE;\r | |
2636 | \r | |
2637 | r = ONIGERR_MEMORY;\r | |
2638 | ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);\r | |
2639 | if (IS_NULL(ns[0])) goto err;\r | |
2640 | \r | |
2641 | r = node_new_true_anychar(&ns[1], env);\r | |
2642 | if (r != 0) goto err1;\r | |
2643 | \r | |
2644 | x = make_list(2, ns);\r | |
2645 | if (IS_NULL(x)) goto err;\r | |
2646 | ns[0] = x;\r | |
2647 | ns[1] = NULL_NODE;\r | |
2648 | \r | |
2649 | x = node_new_quantifier(0, REPEAT_INFINITE, 1);\r | |
2650 | if (IS_NULL(x)) goto err;\r | |
2651 | \r | |
2652 | NODE_BODY(x) = ns[0];\r | |
2653 | ns[0] = NULL_NODE;\r | |
2654 | ns[1] = x;\r | |
2655 | \r | |
2656 | r = node_new_true_anychar(&ns[0], env);\r | |
2657 | if (r != 0) goto err1;\r | |
2658 | \r | |
2659 | x = make_list(2, ns);\r | |
2660 | if (IS_NULL(x)) goto err;\r | |
2661 | \r | |
2662 | ns[0] = x;\r | |
2663 | ns[1] = NULL_NODE;\r | |
2664 | \r | |
2665 | x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
2666 | if (IS_NULL(x)) goto err;\r | |
2667 | \r | |
2668 | NODE_BODY(x) = ns[0];\r | |
2669 | \r | |
2670 | *node = x;\r | |
2671 | return ONIG_NORMAL;\r | |
2672 | \r | |
2673 | err:\r | |
2674 | r = ONIGERR_MEMORY;\r | |
2675 | err1:\r | |
2676 | for (i = 0; i < 2; i++) onig_node_free(ns[i]);\r | |
2677 | return r;\r | |
2678 | }\r | |
2679 | \r | |
2680 | static int\r | |
2681 | make_absent_engine(Node** node, int pre_save_right_id, Node* absent,\r | |
2682 | Node* step_one, int lower, int upper, int possessive,\r | |
2683 | int is_range_cutter, ScanEnv* env)\r | |
2684 | {\r | |
2685 | int r;\r | |
2686 | int i;\r | |
2687 | int id;\r | |
2688 | Node* x;\r | |
2689 | Node* ns[4];\r | |
2690 | \r | |
2691 | for (i = 0; i < 4; i++) ns[i] = NULL_NODE;\r | |
2692 | \r | |
2693 | ns[1] = absent;\r | |
2694 | ns[3] = step_one; /* for err */\r | |
2695 | r = node_new_save_gimmick(&ns[0], SAVE_S, env);\r | |
2696 | if (r != 0) goto err;\r | |
2697 | \r | |
2698 | id = GIMMICK_(ns[0])->id;\r | |
2699 | r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,\r | |
2700 | id, env);\r | |
2701 | if (r != 0) goto err;\r | |
2702 | \r | |
2703 | r = node_new_fail(&ns[3], env);\r | |
2704 | if (r != 0) goto err;\r | |
2705 | \r | |
2706 | x = make_list(4, ns);\r | |
2707 | if (IS_NULL(x)) goto err0;\r | |
2708 | \r | |
2709 | ns[0] = x;\r | |
2710 | ns[1] = step_one;\r | |
2711 | ns[2] = ns[3] = NULL_NODE;\r | |
2712 | \r | |
2713 | x = make_alt(2, ns);\r | |
2714 | if (IS_NULL(x)) goto err0;\r | |
2715 | \r | |
2716 | ns[0] = x;\r | |
2717 | \r | |
2718 | x = node_new_quantifier(lower, upper, 0);\r | |
2719 | if (IS_NULL(x)) goto err0;\r | |
2720 | \r | |
2721 | NODE_BODY(x) = ns[0];\r | |
2722 | ns[0] = x;\r | |
2723 | \r | |
2724 | if (possessive != 0) {\r | |
2725 | x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
2726 | if (IS_NULL(x)) goto err0;\r | |
2727 | \r | |
2728 | NODE_BODY(x) = ns[0];\r | |
2729 | ns[0] = x;\r | |
2730 | }\r | |
2731 | \r | |
2732 | r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2733 | pre_save_right_id, env);\r | |
2734 | if (r != 0) goto err;\r | |
2735 | \r | |
2736 | r = node_new_fail(&ns[2], env);\r | |
2737 | if (r != 0) goto err;\r | |
2738 | \r | |
2739 | x = make_list(2, ns + 1);\r | |
2740 | if (IS_NULL(x)) goto err0;\r | |
2741 | \r | |
2742 | ns[1] = x; ns[2] = NULL_NODE;\r | |
2743 | \r | |
2744 | x = make_alt(2, ns);\r | |
2745 | if (IS_NULL(x)) goto err0;\r | |
2746 | \r | |
2747 | if (is_range_cutter != 0)\r | |
2748 | NODE_STATUS_ADD(x, SUPER);\r | |
2749 | \r | |
2750 | *node = x;\r | |
2751 | return ONIG_NORMAL;\r | |
2752 | \r | |
2753 | err0:\r | |
2754 | r = ONIGERR_MEMORY;\r | |
2755 | err:\r | |
2756 | for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r | |
2757 | return r;\r | |
2758 | }\r | |
2759 | \r | |
2760 | static int\r | |
2761 | make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,\r | |
2762 | ScanEnv* env)\r | |
2763 | {\r | |
2764 | int r;\r | |
2765 | int id;\r | |
2766 | Node* save;\r | |
2767 | Node* x;\r | |
2768 | Node* ns[2];\r | |
2769 | \r | |
2770 | *node1 = *node2 = NULL_NODE;\r | |
2771 | save = ns[0] = ns[1] = NULL_NODE;\r | |
2772 | \r | |
2773 | r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r | |
2774 | if (r != 0) goto err;\r | |
2775 | \r | |
2776 | id = GIMMICK_(save)->id;\r | |
2777 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2778 | id, env);\r | |
2779 | if (r != 0) goto err;\r | |
2780 | \r | |
2781 | r = node_new_fail(&ns[1], env);\r | |
2782 | if (r != 0) goto err;\r | |
2783 | \r | |
2784 | x = make_list(2, ns);\r | |
2785 | if (IS_NULL(x)) goto err0;\r | |
2786 | \r | |
2787 | ns[0] = NULL_NODE; ns[1] = x;\r | |
2788 | \r | |
2789 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2790 | pre_save_right_id, env);\r | |
2791 | if (r != 0) goto err;\r | |
2792 | \r | |
2793 | x = make_alt(2, ns);\r | |
2794 | if (IS_NULL(x)) goto err0;\r | |
2795 | \r | |
2796 | *node1 = save;\r | |
2797 | *node2 = x;\r | |
2798 | return ONIG_NORMAL;\r | |
2799 | \r | |
2800 | err0:\r | |
2801 | r = ONIGERR_MEMORY;\r | |
2802 | err:\r | |
2803 | onig_node_free(save);\r | |
2804 | onig_node_free(ns[0]);\r | |
2805 | onig_node_free(ns[1]);\r | |
2806 | return r;\r | |
2807 | }\r | |
2808 | \r | |
2809 | static int\r | |
2810 | make_range_clear(Node** node, ScanEnv* env)\r | |
2811 | {\r | |
2812 | int r;\r | |
2813 | int id;\r | |
2814 | Node* save;\r | |
2815 | Node* x;\r | |
2816 | Node* ns[2];\r | |
2817 | \r | |
2818 | *node = NULL_NODE;\r | |
2819 | save = ns[0] = ns[1] = NULL_NODE;\r | |
2820 | \r | |
2821 | r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r | |
2822 | if (r != 0) goto err;\r | |
2823 | \r | |
2824 | id = GIMMICK_(save)->id;\r | |
2825 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2826 | id, env);\r | |
2827 | if (r != 0) goto err;\r | |
2828 | \r | |
2829 | r = node_new_fail(&ns[1], env);\r | |
2830 | if (r != 0) goto err;\r | |
2831 | \r | |
2832 | x = make_list(2, ns);\r | |
2833 | if (IS_NULL(x)) goto err0;\r | |
2834 | \r | |
2835 | ns[0] = NULL_NODE; ns[1] = x;\r | |
2836 | \r | |
2837 | r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);\r | |
2838 | if (r != 0) goto err;\r | |
2839 | \r | |
2840 | x = make_alt(2, ns);\r | |
2841 | if (IS_NULL(x)) goto err0;\r | |
2842 | \r | |
2843 | NODE_STATUS_ADD(x, SUPER);\r | |
2844 | \r | |
2845 | ns[0] = save;\r | |
2846 | ns[1] = x;\r | |
2847 | save = NULL_NODE;\r | |
2848 | x = make_list(2, ns);\r | |
2849 | if (IS_NULL(x)) goto err0;\r | |
2850 | \r | |
2851 | *node = x;\r | |
2852 | return ONIG_NORMAL;\r | |
2853 | \r | |
2854 | err0:\r | |
2855 | r = ONIGERR_MEMORY;\r | |
2856 | err:\r | |
2857 | onig_node_free(save);\r | |
2858 | onig_node_free(ns[0]);\r | |
2859 | onig_node_free(ns[1]);\r | |
2860 | return r;\r | |
2861 | }\r | |
2862 | \r | |
2863 | static int\r | |
2864 | is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,\r | |
2865 | int* is_possessive, ScanEnv* env)\r | |
2866 | {\r | |
2867 | Node* quant;\r | |
2868 | Node* body;\r | |
2869 | \r | |
2870 | *rquant = *rbody = 0;\r | |
2871 | *is_possessive = 0;\r | |
2872 | \r | |
2873 | if (NODE_TYPE(node) == NODE_QUANT) {\r | |
2874 | quant = node;\r | |
2875 | }\r | |
2876 | else {\r | |
2877 | if (NODE_TYPE(node) == NODE_ENCLOSURE) {\r | |
2878 | EnclosureNode* en = ENCLOSURE_(node);\r | |
2879 | if (en->type == ENCLOSURE_STOP_BACKTRACK) {\r | |
2880 | *is_possessive = 1;\r | |
2881 | quant = NODE_ENCLOSURE_BODY(en);\r | |
2882 | if (NODE_TYPE(quant) != NODE_QUANT)\r | |
2883 | return 0;\r | |
2884 | }\r | |
2885 | else\r | |
2886 | return 0;\r | |
2887 | }\r | |
2888 | else\r | |
2889 | return 0;\r | |
2890 | }\r | |
2891 | \r | |
2892 | if (QUANT_(quant)->greedy == 0)\r | |
2893 | return 0;\r | |
2894 | \r | |
2895 | body = NODE_BODY(quant);\r | |
2896 | switch (NODE_TYPE(body)) {\r | |
2897 | case NODE_STRING:\r | |
2898 | {\r | |
2899 | int len;\r | |
2900 | StrNode* sn = STR_(body);\r | |
2901 | UChar *s = sn->s;\r | |
2902 | \r | |
2903 | len = 0;\r | |
2904 | while (s < sn->end) {\r | |
2905 | s += enclen(env->enc, s);\r | |
2906 | len++;\r | |
2907 | }\r | |
2908 | if (len != 1)\r | |
2909 | return 0;\r | |
2910 | }\r | |
2911 | \r | |
2912 | case NODE_CCLASS:\r | |
2913 | break;\r | |
2914 | \r | |
2915 | default:\r | |
2916 | return 0;\r | |
2917 | break;\r | |
2918 | }\r | |
2919 | \r | |
2920 | if (node != quant) {\r | |
2921 | NODE_BODY(node) = 0;\r | |
2922 | onig_node_free(node);\r | |
2923 | }\r | |
2924 | NODE_BODY(quant) = NULL_NODE;\r | |
2925 | *rquant = quant;\r | |
2926 | *rbody = body;\r | |
2927 | return 1;\r | |
2928 | }\r | |
2929 | \r | |
2930 | static int\r | |
2931 | make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,\r | |
2932 | Node* body, int possessive, ScanEnv* env)\r | |
2933 | {\r | |
2934 | int r;\r | |
2935 | int i;\r | |
2936 | int id1;\r | |
2937 | int lower, upper;\r | |
2938 | Node* x;\r | |
2939 | Node* ns[4];\r | |
2940 | \r | |
2941 | *node = NULL_NODE;\r | |
2942 | r = ONIGERR_MEMORY;\r | |
2943 | ns[0] = ns[1] = NULL_NODE;\r | |
2944 | ns[2] = body, ns[3] = absent;\r | |
2945 | \r | |
2946 | lower = QUANT_(quant)->lower;\r | |
2947 | upper = QUANT_(quant)->upper;\r | |
2948 | onig_node_free(quant);\r | |
2949 | \r | |
2950 | r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r | |
2951 | if (r != 0) goto err;\r | |
2952 | \r | |
2953 | id1 = GIMMICK_(ns[0])->id;\r | |
2954 | \r | |
2955 | r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,\r | |
2956 | 0, env);\r | |
2957 | if (r != 0) goto err;\r | |
2958 | \r | |
2959 | ns[2] = ns[3] = NULL_NODE;\r | |
2960 | \r | |
2961 | r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r | |
2962 | id1, env);\r | |
2963 | if (r != 0) goto err;\r | |
2964 | \r | |
2965 | x = make_list(3, ns);\r | |
2966 | if (IS_NULL(x)) goto err0;\r | |
2967 | \r | |
2968 | *node = x;\r | |
2969 | return ONIG_NORMAL;\r | |
2970 | \r | |
2971 | err0:\r | |
2972 | r = ONIGERR_MEMORY;\r | |
2973 | err:\r | |
2974 | for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r | |
2975 | return r;\r | |
2976 | }\r | |
2977 | \r | |
2978 | static int\r | |
2979 | make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,\r | |
2980 | ScanEnv* env)\r | |
2981 | {\r | |
2982 | int r;\r | |
2983 | int i;\r | |
2984 | int id1, id2;\r | |
2985 | int possessive;\r | |
2986 | Node* x;\r | |
2987 | Node* ns[7];\r | |
2988 | \r | |
2989 | r = ONIGERR_MEMORY;\r | |
2990 | for (i = 0; i < 7; i++) ns[i] = NULL_NODE;\r | |
2991 | ns[4] = expr; ns[5] = absent;\r | |
2992 | \r | |
2993 | if (is_range_cutter == 0) {\r | |
2994 | Node* quant;\r | |
2995 | Node* body;\r | |
2996 | \r | |
2997 | if (expr == NULL_NODE) {\r | |
2998 | /* default expr \O* */\r | |
2999 | quant = node_new_quantifier(0, REPEAT_INFINITE, 0);\r | |
3000 | if (IS_NULL(quant)) goto err0;\r | |
3001 | \r | |
3002 | r = node_new_true_anychar(&body, env);\r | |
3003 | if (r != 0) {\r | |
3004 | onig_node_free(quant);\r | |
3005 | goto err;\r | |
3006 | }\r | |
3007 | possessive = 0;\r | |
3008 | goto simple;\r | |
3009 | }\r | |
3010 | else {\r | |
3011 | if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {\r | |
3012 | simple:\r | |
3013 | r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,\r | |
3014 | body, possessive, env);\r | |
3015 | if (r != 0) {\r | |
3016 | ns[4] = NULL_NODE;\r | |
3017 | onig_node_free(quant);\r | |
3018 | onig_node_free(body);\r | |
3019 | goto err;\r | |
3020 | }\r | |
3021 | \r | |
3022 | return ONIG_NORMAL;\r | |
3023 | }\r | |
3024 | }\r | |
3025 | }\r | |
3026 | \r | |
3027 | r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r | |
3028 | if (r != 0) goto err;\r | |
3029 | \r | |
3030 | id1 = GIMMICK_(ns[0])->id;\r | |
3031 | \r | |
3032 | r = node_new_save_gimmick(&ns[1], SAVE_S, env);\r | |
3033 | if (r != 0) goto err;\r | |
3034 | \r | |
3035 | id2 = GIMMICK_(ns[1])->id;\r | |
3036 | \r | |
3037 | r = node_new_true_anychar(&ns[3], env);\r | |
3038 | if (r != 0) goto err;\r | |
3039 | \r | |
3040 | possessive = 1;\r | |
3041 | r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,\r | |
3042 | possessive, is_range_cutter, env);\r | |
3043 | if (r != 0) goto err;\r | |
3044 | \r | |
3045 | ns[3] = NULL_NODE;\r | |
3046 | ns[5] = NULL_NODE;\r | |
3047 | \r | |
3048 | r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);\r | |
3049 | if (r != 0) goto err;\r | |
3050 | \r | |
3051 | if (is_range_cutter != 0) {\r | |
3052 | x = make_list(4, ns);\r | |
3053 | if (IS_NULL(x)) goto err0;\r | |
3054 | }\r | |
3055 | else {\r | |
3056 | r = make_absent_tail(&ns[5], &ns[6], id1, env);\r | |
3057 | if (r != 0) goto err;\r | |
3058 | \r | |
3059 | x = make_list(7, ns);\r | |
3060 | if (IS_NULL(x)) goto err0;\r | |
3061 | }\r | |
3062 | \r | |
3063 | *node = x;\r | |
3064 | return ONIG_NORMAL;\r | |
3065 | \r | |
3066 | err0:\r | |
3067 | r = ONIGERR_MEMORY;\r | |
3068 | err:\r | |
3069 | for (i = 0; i < 7; i++) onig_node_free(ns[i]);\r | |
3070 | return r; \r | |
3071 | }\r | |
3072 | \r | |
3073 | extern int\r | |
3074 | onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r | |
3075 | {\r | |
3076 | int addlen = (int )(end - s);\r | |
3077 | \r | |
3078 | if (addlen > 0) {\r | |
3079 | int len = (int )(STR_(node)->end - STR_(node)->s);\r | |
3080 | \r | |
3081 | if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {\r | |
3082 | UChar* p;\r | |
3083 | int capa = len + addlen + NODE_STRING_MARGIN;\r | |
3084 | \r | |
3085 | if (capa <= STR_(node)->capa) {\r | |
3086 | onig_strcpy(STR_(node)->s + len, s, end);\r | |
3087 | }\r | |
3088 | else {\r | |
3089 | if (STR_(node)->s == STR_(node)->buf)\r | |
3090 | p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,\r | |
3091 | s, end, capa);\r | |
3092 | else\r | |
3093 | p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);\r | |
3094 | \r | |
3095 | CHECK_NULL_RETURN_MEMERR(p);\r | |
3096 | STR_(node)->s = p;\r | |
3097 | STR_(node)->capa = capa;\r | |
3098 | }\r | |
3099 | }\r | |
3100 | else {\r | |
3101 | onig_strcpy(STR_(node)->s + len, s, end);\r | |
3102 | }\r | |
3103 | STR_(node)->end = STR_(node)->s + len + addlen;\r | |
3104 | }\r | |
3105 | \r | |
3106 | return 0;\r | |
3107 | }\r | |
3108 | \r | |
3109 | extern int\r | |
3110 | onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r | |
3111 | {\r | |
3112 | onig_node_str_clear(node);\r | |
3113 | return onig_node_str_cat(node, s, end);\r | |
3114 | }\r | |
3115 | \r | |
3116 | static int\r | |
3117 | node_str_cat_char(Node* node, UChar c)\r | |
3118 | {\r | |
3119 | UChar s[1];\r | |
3120 | \r | |
3121 | s[0] = c;\r | |
3122 | return onig_node_str_cat(node, s, s + 1);\r | |
3123 | }\r | |
3124 | \r | |
3125 | extern void\r | |
3126 | onig_node_conv_to_str_node(Node* node, int flag)\r | |
3127 | {\r | |
3128 | NODE_SET_TYPE(node, NODE_STRING);\r | |
3129 | STR_(node)->flag = flag;\r | |
3130 | STR_(node)->capa = 0;\r | |
3131 | STR_(node)->s = STR_(node)->buf;\r | |
3132 | STR_(node)->end = STR_(node)->buf;\r | |
3133 | }\r | |
3134 | \r | |
3135 | extern void\r | |
3136 | onig_node_str_clear(Node* node)\r | |
3137 | {\r | |
3138 | if (STR_(node)->capa != 0 &&\r | |
3139 | IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r | |
3140 | xfree(STR_(node)->s);\r | |
3141 | }\r | |
3142 | \r | |
3143 | STR_(node)->capa = 0;\r | |
3144 | STR_(node)->flag = 0;\r | |
3145 | STR_(node)->s = STR_(node)->buf;\r | |
3146 | STR_(node)->end = STR_(node)->buf;\r | |
3147 | }\r | |
3148 | \r | |
3149 | static Node*\r | |
3150 | node_new_str(const UChar* s, const UChar* end)\r | |
3151 | {\r | |
3152 | Node* node = node_new();\r | |
3153 | CHECK_NULL_RETURN(node);\r | |
3154 | \r | |
3155 | NODE_SET_TYPE(node, NODE_STRING);\r | |
3156 | STR_(node)->capa = 0;\r | |
3157 | STR_(node)->flag = 0;\r | |
3158 | STR_(node)->s = STR_(node)->buf;\r | |
3159 | STR_(node)->end = STR_(node)->buf;\r | |
3160 | if (onig_node_str_cat(node, s, end)) {\r | |
3161 | onig_node_free(node);\r | |
3162 | return NULL;\r | |
3163 | }\r | |
3164 | return node;\r | |
3165 | }\r | |
3166 | \r | |
3167 | extern Node*\r | |
3168 | onig_node_new_str(const UChar* s, const UChar* end)\r | |
3169 | {\r | |
3170 | return node_new_str(s, end);\r | |
3171 | }\r | |
3172 | \r | |
3173 | static Node*\r | |
3174 | node_new_str_raw(UChar* s, UChar* end)\r | |
3175 | {\r | |
3176 | Node* node = node_new_str(s, end);\r | |
3177 | NODE_STRING_SET_RAW(node);\r | |
3178 | return node;\r | |
3179 | }\r | |
3180 | \r | |
3181 | static Node*\r | |
3182 | node_new_empty(void)\r | |
3183 | {\r | |
3184 | return node_new_str(NULL, NULL);\r | |
3185 | }\r | |
3186 | \r | |
3187 | static Node*\r | |
3188 | node_new_str_raw_char(UChar c)\r | |
3189 | {\r | |
3190 | UChar p[1];\r | |
3191 | \r | |
3192 | p[0] = c;\r | |
3193 | return node_new_str_raw(p, p + 1);\r | |
3194 | }\r | |
3195 | \r | |
3196 | static Node*\r | |
3197 | str_node_split_last_char(Node* node, OnigEncoding enc)\r | |
3198 | {\r | |
3199 | const UChar *p;\r | |
3200 | Node* rn;\r | |
3201 | StrNode* sn;\r | |
3202 | \r | |
3203 | sn = STR_(node);\r | |
3204 | rn = NULL_NODE;\r | |
3205 | if (sn->end > sn->s) {\r | |
3206 | p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r | |
3207 | if (p && p > sn->s) { /* can be split. */\r | |
3208 | rn = node_new_str(p, sn->end);\r | |
3209 | if (NODE_STRING_IS_RAW(node))\r | |
3210 | NODE_STRING_SET_RAW(rn);\r | |
3211 | \r | |
3212 | sn->end = (UChar* )p;\r | |
3213 | }\r | |
3214 | }\r | |
3215 | return rn;\r | |
3216 | }\r | |
3217 | \r | |
3218 | static int\r | |
3219 | str_node_can_be_split(Node* node, OnigEncoding enc)\r | |
3220 | {\r | |
3221 | StrNode* sn = STR_(node);\r | |
3222 | if (sn->end > sn->s) {\r | |
3223 | return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r | |
3224 | }\r | |
3225 | return 0;\r | |
3226 | }\r | |
3227 | \r | |
3228 | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r | |
3229 | static int\r | |
3230 | node_str_head_pad(StrNode* sn, int num, UChar val)\r | |
3231 | {\r | |
3232 | UChar buf[NODE_STRING_BUF_SIZE];\r | |
3233 | int i, len;\r | |
3234 | \r | |
3235 | len = sn->end - sn->s;\r | |
3236 | onig_strcpy(buf, sn->s, sn->end);\r | |
3237 | onig_strcpy(&(sn->s[num]), buf, buf + len);\r | |
3238 | sn->end += num;\r | |
3239 | \r | |
3240 | for (i = 0; i < num; i++) {\r | |
3241 | sn->s[i] = val;\r | |
3242 | }\r | |
3243 | }\r | |
3244 | #endif\r | |
3245 | \r | |
3246 | extern int\r | |
3247 | onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r | |
3248 | {\r | |
3249 | unsigned int num, val;\r | |
3250 | OnigCodePoint c;\r | |
3251 | UChar* p = *src;\r | |
3252 | PFETCH_READY;\r | |
3253 | \r | |
3254 | num = 0;\r | |
3255 | while (! PEND) {\r | |
3256 | PFETCH(c);\r | |
3257 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
3258 | val = (unsigned int )DIGITVAL(c);\r | |
3259 | if ((INT_MAX_LIMIT - val) / 10UL < num)\r | |
3260 | return -1; /* overflow */\r | |
3261 | \r | |
3262 | num = num * 10 + val;\r | |
3263 | }\r | |
3264 | else {\r | |
3265 | PUNFETCH;\r | |
3266 | break;\r | |
3267 | }\r | |
3268 | }\r | |
3269 | *src = p;\r | |
3270 | return num;\r | |
3271 | }\r | |
3272 | \r | |
3273 | static int\r | |
3274 | scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,\r | |
3275 | int maxlen, OnigEncoding enc)\r | |
3276 | {\r | |
3277 | OnigCodePoint c;\r | |
3278 | unsigned int num, val;\r | |
3279 | int n;\r | |
3280 | UChar* p = *src;\r | |
3281 | PFETCH_READY;\r | |
3282 | \r | |
3283 | num = 0;\r | |
3284 | n = 0;\r | |
3285 | while (! PEND && n < maxlen) {\r | |
3286 | PFETCH(c);\r | |
3287 | if (IS_CODE_XDIGIT_ASCII(enc, c)) {\r | |
3288 | n++;\r | |
3289 | val = (unsigned int )XDIGITVAL(enc,c);\r | |
3290 | if ((INT_MAX_LIMIT - val) / 16UL < num)\r | |
3291 | return ONIGERR_TOO_BIG_NUMBER; /* overflow */\r | |
3292 | \r | |
3293 | num = (num << 4) + XDIGITVAL(enc,c);\r | |
3294 | }\r | |
3295 | else {\r | |
3296 | PUNFETCH;\r | |
3297 | break;\r | |
3298 | }\r | |
3299 | }\r | |
3300 | \r | |
3301 | if (n < minlen)\r | |
3302 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r | |
3303 | \r | |
3304 | *src = p;\r | |
3305 | return num;\r | |
3306 | }\r | |
3307 | \r | |
3308 | static int\r | |
3309 | scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r | |
3310 | OnigEncoding enc)\r | |
3311 | {\r | |
3312 | OnigCodePoint c;\r | |
3313 | unsigned int num, val;\r | |
3314 | UChar* p = *src;\r | |
3315 | PFETCH_READY;\r | |
3316 | \r | |
3317 | num = 0;\r | |
3318 | while (! PEND && maxlen-- != 0) {\r | |
3319 | PFETCH(c);\r | |
3320 | if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {\r | |
3321 | val = ODIGITVAL(c);\r | |
3322 | if ((INT_MAX_LIMIT - val) / 8UL < num)\r | |
3323 | return -1; /* overflow */\r | |
3324 | \r | |
3325 | num = (num << 3) + val;\r | |
3326 | }\r | |
3327 | else {\r | |
3328 | PUNFETCH;\r | |
3329 | break;\r | |
3330 | }\r | |
3331 | }\r | |
3332 | *src = p;\r | |
3333 | return num;\r | |
3334 | }\r | |
3335 | \r | |
3336 | \r | |
3337 | #define BB_WRITE_CODE_POINT(bbuf,pos,code) \\r | |
3338 | BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r | |
3339 | \r | |
3340 | /* data format:\r | |
3341 | [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r | |
3342 | (all data size is OnigCodePoint)\r | |
3343 | */\r | |
3344 | static int\r | |
3345 | new_code_range(BBuf** pbuf)\r | |
3346 | {\r | |
3347 | #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r | |
3348 | int r;\r | |
3349 | OnigCodePoint n;\r | |
3350 | BBuf* bbuf;\r | |
3351 | \r | |
3352 | bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r | |
3353 | CHECK_NULL_RETURN_MEMERR(bbuf);\r | |
3354 | r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r | |
3355 | if (r != 0) {\r | |
3356 | xfree(bbuf);\r | |
3357 | *pbuf = 0;\r | |
3358 | return r;\r | |
3359 | }\r | |
3360 | \r | |
3361 | n = 0;\r | |
3362 | BB_WRITE_CODE_POINT(bbuf, 0, n);\r | |
3363 | return 0;\r | |
3364 | }\r | |
3365 | \r | |
3366 | static int\r | |
3367 | add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r | |
3368 | {\r | |
3369 | int r, inc_n, pos;\r | |
3370 | int low, high, bound, x;\r | |
3371 | OnigCodePoint n, *data;\r | |
3372 | BBuf* bbuf;\r | |
3373 | \r | |
3374 | if (from > to) {\r | |
3375 | n = from; from = to; to = n;\r | |
3376 | }\r | |
3377 | \r | |
3378 | if (IS_NULL(*pbuf)) {\r | |
3379 | r = new_code_range(pbuf);\r | |
3380 | if (r != 0) return r;\r | |
3381 | bbuf = *pbuf;\r | |
3382 | n = 0;\r | |
3383 | }\r | |
3384 | else {\r | |
3385 | bbuf = *pbuf;\r | |
3386 | GET_CODE_POINT(n, bbuf->p);\r | |
3387 | }\r | |
3388 | data = (OnigCodePoint* )(bbuf->p);\r | |
3389 | data++;\r | |
3390 | \r | |
3391 | for (low = 0, bound = n; low < bound; ) {\r | |
3392 | x = (low + bound) >> 1;\r | |
3393 | if (from > data[x*2 + 1])\r | |
3394 | low = x + 1;\r | |
3395 | else\r | |
3396 | bound = x;\r | |
3397 | }\r | |
3398 | \r | |
3399 | high = (to == ~((OnigCodePoint )0)) ? n : low;\r | |
3400 | for (bound = n; high < bound; ) {\r | |
3401 | x = (high + bound) >> 1;\r | |
3402 | if (to + 1 >= data[x*2])\r | |
3403 | high = x + 1;\r | |
3404 | else\r | |
3405 | bound = x;\r | |
3406 | }\r | |
3407 | \r | |
3408 | inc_n = low + 1 - high;\r | |
3409 | if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r | |
3410 | return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r | |
3411 | \r | |
3412 | if (inc_n != 1) {\r | |
3413 | if (from > data[low*2])\r | |
3414 | from = data[low*2];\r | |
3415 | if (to < data[(high - 1)*2 + 1])\r | |
3416 | to = data[(high - 1)*2 + 1];\r | |
3417 | }\r | |
3418 | \r | |
3419 | if (inc_n != 0 && (OnigCodePoint )high < n) {\r | |
3420 | int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r | |
3421 | int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r | |
3422 | int size = (n - high) * 2 * SIZE_CODE_POINT;\r | |
3423 | \r | |
3424 | if (inc_n > 0) {\r | |
3425 | BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r | |
3426 | }\r | |
3427 | else {\r | |
3428 | BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r | |
3429 | }\r | |
3430 | }\r | |
3431 | \r | |
3432 | pos = SIZE_CODE_POINT * (1 + low * 2);\r | |
3433 | BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r | |
3434 | BB_WRITE_CODE_POINT(bbuf, pos, from);\r | |
3435 | BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r | |
3436 | n += inc_n;\r | |
3437 | BB_WRITE_CODE_POINT(bbuf, 0, n);\r | |
3438 | \r | |
3439 | return 0;\r | |
3440 | }\r | |
3441 | \r | |
3442 | static int\r | |
3443 | add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r | |
3444 | {\r | |
3445 | if (from > to) {\r | |
3446 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
3447 | return 0;\r | |
3448 | else\r | |
3449 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
3450 | }\r | |
3451 | \r | |
3452 | return add_code_range_to_buf(pbuf, from, to);\r | |
3453 | }\r | |
3454 | \r | |
3455 | static int\r | |
3456 | not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r | |
3457 | {\r | |
3458 | int r, i, n;\r | |
3459 | OnigCodePoint pre, from, *data, to = 0;\r | |
3460 | \r | |
3461 | *pbuf = (BBuf* )NULL;\r | |
3462 | if (IS_NULL(bbuf)) {\r | |
3463 | set_all:\r | |
3464 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3465 | }\r | |
3466 | \r | |
3467 | data = (OnigCodePoint* )(bbuf->p);\r | |
3468 | GET_CODE_POINT(n, data);\r | |
3469 | data++;\r | |
3470 | if (n <= 0) goto set_all;\r | |
3471 | \r | |
3472 | r = 0;\r | |
3473 | pre = MBCODE_START_POS(enc);\r | |
3474 | for (i = 0; i < n; i++) {\r | |
3475 | from = data[i*2];\r | |
3476 | to = data[i*2+1];\r | |
3477 | if (pre <= from - 1) {\r | |
3478 | r = add_code_range_to_buf(pbuf, pre, from - 1);\r | |
3479 | if (r != 0) return r;\r | |
3480 | }\r | |
3481 | if (to == ~((OnigCodePoint )0)) break;\r | |
3482 | pre = to + 1;\r | |
3483 | }\r | |
3484 | if (to < ~((OnigCodePoint )0)) {\r | |
3485 | r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r | |
3486 | }\r | |
3487 | return r;\r | |
3488 | }\r | |
3489 | \r | |
3490 | #define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\\r | |
3491 | BBuf *tbuf; \\r | |
3492 | int tnot; \\r | |
3493 | tnot = not1; not1 = not2; not2 = tnot; \\r | |
3494 | tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r | |
3495 | } while (0)\r | |
3496 | \r | |
3497 | static int\r | |
3498 | or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r | |
3499 | BBuf* bbuf2, int not2, BBuf** pbuf)\r | |
3500 | {\r | |
3501 | int r;\r | |
3502 | OnigCodePoint i, n1, *data1;\r | |
3503 | OnigCodePoint from, to;\r | |
3504 | \r | |
3505 | *pbuf = (BBuf* )NULL;\r | |
3506 | if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r | |
3507 | if (not1 != 0 || not2 != 0)\r | |
3508 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3509 | return 0;\r | |
3510 | }\r | |
3511 | \r | |
3512 | r = 0;\r | |
3513 | if (IS_NULL(bbuf2))\r | |
3514 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3515 | \r | |
3516 | if (IS_NULL(bbuf1)) {\r | |
3517 | if (not1 != 0) {\r | |
3518 | return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r | |
3519 | }\r | |
3520 | else {\r | |
3521 | if (not2 == 0) {\r | |
3522 | return bbuf_clone(pbuf, bbuf2);\r | |
3523 | }\r | |
3524 | else {\r | |
3525 | return not_code_range_buf(enc, bbuf2, pbuf);\r | |
3526 | }\r | |
3527 | }\r | |
3528 | }\r | |
3529 | \r | |
3530 | if (not1 != 0)\r | |
3531 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3532 | \r | |
3533 | data1 = (OnigCodePoint* )(bbuf1->p);\r | |
3534 | GET_CODE_POINT(n1, data1);\r | |
3535 | data1++;\r | |
3536 | \r | |
3537 | if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r | |
3538 | r = bbuf_clone(pbuf, bbuf2);\r | |
3539 | }\r | |
3540 | else if (not1 == 0) { /* 1 OR (not 2) */\r | |
3541 | r = not_code_range_buf(enc, bbuf2, pbuf);\r | |
3542 | }\r | |
3543 | if (r != 0) return r;\r | |
3544 | \r | |
3545 | for (i = 0; i < n1; i++) {\r | |
3546 | from = data1[i*2];\r | |
3547 | to = data1[i*2+1];\r | |
3548 | r = add_code_range_to_buf(pbuf, from, to);\r | |
3549 | if (r != 0) return r;\r | |
3550 | }\r | |
3551 | return 0;\r | |
3552 | }\r | |
3553 | \r | |
3554 | static int\r | |
3555 | and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r | |
3556 | OnigCodePoint* data, int n)\r | |
3557 | {\r | |
3558 | int i, r;\r | |
3559 | OnigCodePoint from2, to2;\r | |
3560 | \r | |
3561 | for (i = 0; i < n; i++) {\r | |
3562 | from2 = data[i*2];\r | |
3563 | to2 = data[i*2+1];\r | |
3564 | if (from2 < from1) {\r | |
3565 | if (to2 < from1) continue;\r | |
3566 | else {\r | |
3567 | from1 = to2 + 1;\r | |
3568 | }\r | |
3569 | }\r | |
3570 | else if (from2 <= to1) {\r | |
3571 | if (to2 < to1) {\r | |
3572 | if (from1 <= from2 - 1) {\r | |
3573 | r = add_code_range_to_buf(pbuf, from1, from2-1);\r | |
3574 | if (r != 0) return r;\r | |
3575 | }\r | |
3576 | from1 = to2 + 1;\r | |
3577 | }\r | |
3578 | else {\r | |
3579 | to1 = from2 - 1;\r | |
3580 | }\r | |
3581 | }\r | |
3582 | else {\r | |
3583 | from1 = from2;\r | |
3584 | }\r | |
3585 | if (from1 > to1) break;\r | |
3586 | }\r | |
3587 | if (from1 <= to1) {\r | |
3588 | r = add_code_range_to_buf(pbuf, from1, to1);\r | |
3589 | if (r != 0) return r;\r | |
3590 | }\r | |
3591 | return 0;\r | |
3592 | }\r | |
3593 | \r | |
3594 | static int\r | |
3595 | and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r | |
3596 | {\r | |
3597 | int r;\r | |
3598 | OnigCodePoint i, j, n1, n2, *data1, *data2;\r | |
3599 | OnigCodePoint from, to, from1, to1, from2, to2;\r | |
3600 | \r | |
3601 | *pbuf = (BBuf* )NULL;\r | |
3602 | if (IS_NULL(bbuf1)) {\r | |
3603 | if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r | |
3604 | return bbuf_clone(pbuf, bbuf2);\r | |
3605 | return 0;\r | |
3606 | }\r | |
3607 | else if (IS_NULL(bbuf2)) {\r | |
3608 | if (not2 != 0)\r | |
3609 | return bbuf_clone(pbuf, bbuf1);\r | |
3610 | return 0;\r | |
3611 | }\r | |
3612 | \r | |
3613 | if (not1 != 0)\r | |
3614 | SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r | |
3615 | \r | |
3616 | data1 = (OnigCodePoint* )(bbuf1->p);\r | |
3617 | data2 = (OnigCodePoint* )(bbuf2->p);\r | |
3618 | GET_CODE_POINT(n1, data1);\r | |
3619 | GET_CODE_POINT(n2, data2);\r | |
3620 | data1++;\r | |
3621 | data2++;\r | |
3622 | \r | |
3623 | if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r | |
3624 | for (i = 0; i < n1; i++) {\r | |
3625 | from1 = data1[i*2];\r | |
3626 | to1 = data1[i*2+1];\r | |
3627 | for (j = 0; j < n2; j++) {\r | |
3628 | from2 = data2[j*2];\r | |
3629 | to2 = data2[j*2+1];\r | |
3630 | if (from2 > to1) break;\r | |
3631 | if (to2 < from1) continue;\r | |
3632 | from = MAX(from1, from2);\r | |
3633 | to = MIN(to1, to2);\r | |
3634 | r = add_code_range_to_buf(pbuf, from, to);\r | |
3635 | if (r != 0) return r;\r | |
3636 | }\r | |
3637 | }\r | |
3638 | }\r | |
3639 | else if (not1 == 0) { /* 1 AND (not 2) */\r | |
3640 | for (i = 0; i < n1; i++) {\r | |
3641 | from1 = data1[i*2];\r | |
3642 | to1 = data1[i*2+1];\r | |
14b0e578 CS |
3643 | r = and_code_range1(pbuf, from1, to1, data2, n2);\r |
3644 | if (r != 0) return r;\r | |
3645 | }\r | |
3646 | }\r | |
3647 | \r | |
3648 | return 0;\r | |
3649 | }\r | |
3650 | \r | |
3651 | static int\r | |
3652 | and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r | |
3653 | {\r | |
3654 | int r, not1, not2;\r | |
3655 | BBuf *buf1, *buf2, *pbuf;\r | |
3656 | BitSetRef bsr1, bsr2;\r | |
3657 | BitSet bs1, bs2;\r | |
3658 | \r | |
3659 | not1 = IS_NCCLASS_NOT(dest);\r | |
3660 | bsr1 = dest->bs;\r | |
3661 | buf1 = dest->mbuf;\r | |
3662 | not2 = IS_NCCLASS_NOT(cc);\r | |
3663 | bsr2 = cc->bs;\r | |
3664 | buf2 = cc->mbuf;\r | |
3665 | \r | |
3666 | if (not1 != 0) {\r | |
3667 | bitset_invert_to(bsr1, bs1);\r | |
3668 | bsr1 = bs1;\r | |
3669 | }\r | |
3670 | if (not2 != 0) {\r | |
3671 | bitset_invert_to(bsr2, bs2);\r | |
3672 | bsr2 = bs2;\r | |
3673 | }\r | |
3674 | bitset_and(bsr1, bsr2);\r | |
3675 | if (bsr1 != dest->bs) {\r | |
3676 | bitset_copy(dest->bs, bsr1);\r | |
14b0e578 CS |
3677 | }\r |
3678 | if (not1 != 0) {\r | |
3679 | bitset_invert(dest->bs);\r | |
3680 | }\r | |
3681 | \r | |
3682 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
3683 | if (not1 != 0 && not2 != 0) {\r | |
3684 | r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r | |
3685 | }\r | |
3686 | else {\r | |
3687 | r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r | |
3688 | if (r == 0 && not1 != 0) {\r | |
b602265d DG |
3689 | BBuf *tbuf;\r |
3690 | r = not_code_range_buf(enc, pbuf, &tbuf);\r | |
3691 | if (r != 0) {\r | |
3692 | bbuf_free(pbuf);\r | |
3693 | return r;\r | |
3694 | }\r | |
3695 | bbuf_free(pbuf);\r | |
3696 | pbuf = tbuf;\r | |
14b0e578 CS |
3697 | }\r |
3698 | }\r | |
3699 | if (r != 0) return r;\r | |
3700 | \r | |
3701 | dest->mbuf = pbuf;\r | |
3702 | bbuf_free(buf1);\r | |
3703 | return r;\r | |
3704 | }\r | |
3705 | return 0;\r | |
3706 | }\r | |
3707 | \r | |
3708 | static int\r | |
3709 | or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r | |
3710 | {\r | |
3711 | int r, not1, not2;\r | |
3712 | BBuf *buf1, *buf2, *pbuf;\r | |
3713 | BitSetRef bsr1, bsr2;\r | |
3714 | BitSet bs1, bs2;\r | |
3715 | \r | |
3716 | not1 = IS_NCCLASS_NOT(dest);\r | |
3717 | bsr1 = dest->bs;\r | |
3718 | buf1 = dest->mbuf;\r | |
3719 | not2 = IS_NCCLASS_NOT(cc);\r | |
3720 | bsr2 = cc->bs;\r | |
3721 | buf2 = cc->mbuf;\r | |
3722 | \r | |
3723 | if (not1 != 0) {\r | |
3724 | bitset_invert_to(bsr1, bs1);\r | |
3725 | bsr1 = bs1;\r | |
3726 | }\r | |
3727 | if (not2 != 0) {\r | |
3728 | bitset_invert_to(bsr2, bs2);\r | |
3729 | bsr2 = bs2;\r | |
3730 | }\r | |
3731 | bitset_or(bsr1, bsr2);\r | |
3732 | if (bsr1 != dest->bs) {\r | |
3733 | bitset_copy(dest->bs, bsr1);\r | |
14b0e578 CS |
3734 | }\r |
3735 | if (not1 != 0) {\r | |
3736 | bitset_invert(dest->bs);\r | |
3737 | }\r | |
3738 | \r | |
3739 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
3740 | if (not1 != 0 && not2 != 0) {\r | |
3741 | r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r | |
3742 | }\r | |
3743 | else {\r | |
3744 | r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r | |
3745 | if (r == 0 && not1 != 0) {\r | |
b602265d DG |
3746 | BBuf *tbuf;\r |
3747 | r = not_code_range_buf(enc, pbuf, &tbuf);\r | |
3748 | if (r != 0) {\r | |
3749 | bbuf_free(pbuf);\r | |
3750 | return r;\r | |
3751 | }\r | |
3752 | bbuf_free(pbuf);\r | |
3753 | pbuf = tbuf;\r | |
14b0e578 CS |
3754 | }\r |
3755 | }\r | |
3756 | if (r != 0) return r;\r | |
3757 | \r | |
3758 | dest->mbuf = pbuf;\r | |
3759 | bbuf_free(buf1);\r | |
3760 | return r;\r | |
3761 | }\r | |
3762 | else\r | |
3763 | return 0;\r | |
3764 | }\r | |
3765 | \r | |
b602265d DG |
3766 | static OnigCodePoint\r |
3767 | conv_backslash_value(OnigCodePoint c, ScanEnv* env)\r | |
14b0e578 CS |
3768 | {\r |
3769 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r | |
3770 | switch (c) {\r | |
3771 | case 'n': return '\n';\r | |
3772 | case 't': return '\t';\r | |
3773 | case 'r': return '\r';\r | |
3774 | case 'f': return '\f';\r | |
3775 | case 'a': return '\007';\r | |
3776 | case 'b': return '\010';\r | |
3777 | case 'e': return '\033';\r | |
3778 | case 'v':\r | |
3779 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r | |
b602265d | 3780 | return '\v';\r |
14b0e578 CS |
3781 | break;\r |
3782 | \r | |
3783 | default:\r | |
3784 | break;\r | |
3785 | }\r | |
3786 | }\r | |
3787 | return c;\r | |
3788 | }\r | |
3789 | \r | |
3790 | static int\r | |
3791 | is_invalid_quantifier_target(Node* node)\r | |
3792 | {\r | |
b602265d DG |
3793 | switch (NODE_TYPE(node)) {\r |
3794 | case NODE_ANCHOR:\r | |
3795 | case NODE_GIMMICK:\r | |
14b0e578 CS |
3796 | return 1;\r |
3797 | break;\r | |
3798 | \r | |
b602265d | 3799 | case NODE_ENCLOSURE:\r |
14b0e578 | 3800 | /* allow enclosed elements */\r |
b602265d | 3801 | /* return is_invalid_quantifier_target(NODE_BODY(node)); */\r |
14b0e578 CS |
3802 | break;\r |
3803 | \r | |
b602265d | 3804 | case NODE_LIST:\r |
14b0e578 | 3805 | do {\r |
b602265d DG |
3806 | if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;\r |
3807 | } while (IS_NOT_NULL(node = NODE_CDR(node)));\r | |
14b0e578 CS |
3808 | return 0;\r |
3809 | break;\r | |
3810 | \r | |
b602265d | 3811 | case NODE_ALT:\r |
14b0e578 | 3812 | do {\r |
b602265d DG |
3813 | if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;\r |
3814 | } while (IS_NOT_NULL(node = NODE_CDR(node)));\r | |
14b0e578 CS |
3815 | break;\r |
3816 | \r | |
3817 | default:\r | |
3818 | break;\r | |
3819 | }\r | |
3820 | return 0;\r | |
3821 | }\r | |
3822 | \r | |
3823 | /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r | |
3824 | static int\r | |
b602265d | 3825 | quantifier_type_num(QuantNode* q)\r |
14b0e578 CS |
3826 | {\r |
3827 | if (q->greedy) {\r | |
3828 | if (q->lower == 0) {\r | |
3829 | if (q->upper == 1) return 0;\r | |
3830 | else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r | |
3831 | }\r | |
3832 | else if (q->lower == 1) {\r | |
3833 | if (IS_REPEAT_INFINITE(q->upper)) return 2;\r | |
3834 | }\r | |
3835 | }\r | |
3836 | else {\r | |
3837 | if (q->lower == 0) {\r | |
3838 | if (q->upper == 1) return 3;\r | |
3839 | else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r | |
3840 | }\r | |
3841 | else if (q->lower == 1) {\r | |
3842 | if (IS_REPEAT_INFINITE(q->upper)) return 5;\r | |
3843 | }\r | |
3844 | }\r | |
3845 | return -1;\r | |
3846 | }\r | |
3847 | \r | |
3848 | \r | |
3849 | enum ReduceType {\r | |
3850 | RQ_ASIS = 0, /* as is */\r | |
3851 | RQ_DEL = 1, /* delete parent */\r | |
3852 | RQ_A, /* to '*' */\r | |
3853 | RQ_AQ, /* to '*?' */\r | |
3854 | RQ_QQ, /* to '??' */\r | |
3855 | RQ_P_QQ, /* to '+)??' */\r | |
3856 | RQ_PQ_Q /* to '+?)?' */\r | |
3857 | };\r | |
3858 | \r | |
3859 | static enum ReduceType ReduceTypeTable[6][6] = {\r | |
3860 | {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r | |
3861 | {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r | |
3862 | {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r | |
3863 | {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r | |
3864 | {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r | |
3865 | {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r | |
3866 | };\r | |
3867 | \r | |
3868 | extern void\r | |
3869 | onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r | |
3870 | {\r | |
3871 | int pnum, cnum;\r | |
b602265d DG |
3872 | QuantNode *p, *c;\r |
3873 | \r | |
3874 | p = QUANT_(pnode);\r | |
3875 | c = QUANT_(cnode);\r | |
3876 | pnum = quantifier_type_num(p);\r | |
3877 | cnum = quantifier_type_num(c);\r | |
3878 | if (pnum < 0 || cnum < 0) {\r | |
3879 | if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {\r | |
3880 | if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {\r | |
3881 | int n = positive_int_multiply(p->lower, c->lower);\r | |
3882 | if (n >= 0) {\r | |
3883 | p->lower = p->upper = n;\r | |
3884 | NODE_BODY(pnode) = NODE_BODY(cnode);\r | |
3885 | goto remove_cnode;\r | |
3886 | }\r | |
3887 | }\r | |
3888 | }\r | |
14b0e578 | 3889 | \r |
b602265d DG |
3890 | return ;\r |
3891 | }\r | |
14b0e578 CS |
3892 | \r |
3893 | switch(ReduceTypeTable[cnum][pnum]) {\r | |
3894 | case RQ_DEL:\r | |
b602265d | 3895 | *pnode = *cnode;\r |
14b0e578 CS |
3896 | break;\r |
3897 | case RQ_A:\r | |
b602265d | 3898 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3899 | p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r |
3900 | break;\r | |
3901 | case RQ_AQ:\r | |
b602265d | 3902 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3903 | p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r |
3904 | break;\r | |
3905 | case RQ_QQ:\r | |
b602265d | 3906 | NODE_BODY(pnode) = NODE_BODY(cnode);\r |
14b0e578 CS |
3907 | p->lower = 0; p->upper = 1; p->greedy = 0;\r |
3908 | break;\r | |
3909 | case RQ_P_QQ:\r | |
b602265d | 3910 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3911 | p->lower = 0; p->upper = 1; p->greedy = 0;\r |
3912 | c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r | |
3913 | return ;\r | |
3914 | break;\r | |
3915 | case RQ_PQ_Q:\r | |
b602265d | 3916 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3917 | p->lower = 0; p->upper = 1; p->greedy = 1;\r |
3918 | c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r | |
3919 | return ;\r | |
3920 | break;\r | |
3921 | case RQ_ASIS:\r | |
b602265d | 3922 | NODE_BODY(pnode) = cnode;\r |
14b0e578 CS |
3923 | return ;\r |
3924 | break;\r | |
3925 | }\r | |
3926 | \r | |
b602265d DG |
3927 | remove_cnode:\r |
3928 | NODE_BODY(cnode) = NULL_NODE;\r | |
14b0e578 CS |
3929 | onig_node_free(cnode);\r |
3930 | }\r | |
3931 | \r | |
b602265d DG |
3932 | static int\r |
3933 | node_new_general_newline(Node** node, ScanEnv* env)\r | |
3934 | {\r | |
3935 | int r;\r | |
3936 | int dlen, alen;\r | |
3937 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];\r | |
3938 | Node* crnl;\r | |
3939 | Node* ncc;\r | |
3940 | Node* x;\r | |
3941 | CClassNode* cc;\r | |
3942 | \r | |
3943 | dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);\r | |
3944 | if (dlen < 0) return dlen;\r | |
3945 | alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);\r | |
3946 | if (alen < 0) return alen;\r | |
3947 | \r | |
3948 | crnl = node_new_str_raw(buf, buf + dlen + alen);\r | |
3949 | CHECK_NULL_RETURN_MEMERR(crnl);\r | |
3950 | \r | |
3951 | ncc = node_new_cclass();\r | |
3952 | if (IS_NULL(ncc)) goto err2;\r | |
3953 | \r | |
3954 | cc = CCLASS_(ncc);\r | |
3955 | if (dlen == 1) {\r | |
3956 | bitset_set_range(cc->bs, 0x0a, 0x0d);\r | |
3957 | }\r | |
3958 | else {\r | |
3959 | r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);\r | |
3960 | if (r != 0) {\r | |
3961 | err1:\r | |
3962 | onig_node_free(ncc);\r | |
3963 | err2:\r | |
3964 | onig_node_free(crnl);\r | |
3965 | return ONIGERR_MEMORY;\r | |
3966 | }\r | |
3967 | }\r | |
3968 | \r | |
3969 | if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {\r | |
3970 | r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);\r | |
3971 | if (r != 0) goto err1;\r | |
3972 | r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);\r | |
3973 | if (r != 0) goto err1;\r | |
3974 | }\r | |
3975 | \r | |
3976 | x = node_new_enclosure_if_else(crnl, 0, ncc);\r | |
3977 | if (IS_NULL(x)) goto err1;\r | |
3978 | \r | |
3979 | *node = x;\r | |
3980 | return 0;\r | |
3981 | }\r | |
14b0e578 CS |
3982 | \r |
3983 | enum TokenSyms {\r | |
3984 | TK_EOT = 0, /* end of token */\r | |
3985 | TK_RAW_BYTE = 1,\r | |
3986 | TK_CHAR,\r | |
3987 | TK_STRING,\r | |
3988 | TK_CODE_POINT,\r | |
3989 | TK_ANYCHAR,\r | |
3990 | TK_CHAR_TYPE,\r | |
3991 | TK_BACKREF,\r | |
3992 | TK_CALL,\r | |
3993 | TK_ANCHOR,\r | |
3994 | TK_OP_REPEAT,\r | |
3995 | TK_INTERVAL,\r | |
3996 | TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r | |
3997 | TK_ALT,\r | |
3998 | TK_SUBEXP_OPEN,\r | |
3999 | TK_SUBEXP_CLOSE,\r | |
4000 | TK_CC_OPEN,\r | |
4001 | TK_QUOTE_OPEN,\r | |
4002 | TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r | |
b602265d DG |
4003 | TK_KEEP, /* \K */\r |
4004 | TK_GENERAL_NEWLINE, /* \R */\r | |
4005 | TK_NO_NEWLINE, /* \N */\r | |
4006 | TK_TRUE_ANYCHAR, /* \O */\r | |
4007 | TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */\r | |
4008 | \r | |
14b0e578 CS |
4009 | /* in cc */\r |
4010 | TK_CC_CLOSE,\r | |
4011 | TK_CC_RANGE,\r | |
4012 | TK_POSIX_BRACKET_OPEN,\r | |
4013 | TK_CC_AND, /* && */\r | |
4014 | TK_CC_CC_OPEN /* [ */\r | |
4015 | };\r | |
4016 | \r | |
4017 | typedef struct {\r | |
4018 | enum TokenSyms type;\r | |
4019 | int escaped;\r | |
4020 | int base; /* is number: 8, 16 (used in [....]) */\r | |
4021 | UChar* backp;\r | |
4022 | union {\r | |
4023 | UChar* s;\r | |
4024 | int c;\r | |
4025 | OnigCodePoint code;\r | |
4026 | int anchor;\r | |
4027 | int subtype;\r | |
4028 | struct {\r | |
4029 | int lower;\r | |
4030 | int upper;\r | |
4031 | int greedy;\r | |
4032 | int possessive;\r | |
4033 | } repeat;\r | |
4034 | struct {\r | |
4035 | int num;\r | |
4036 | int ref1;\r | |
4037 | int* refs;\r | |
4038 | int by_name;\r | |
4039 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
4040 | int exist_level;\r | |
4041 | int level; /* \k<name+n> */\r | |
4042 | #endif\r | |
4043 | } backref;\r | |
4044 | struct {\r | |
4045 | UChar* name;\r | |
4046 | UChar* name_end;\r | |
4047 | int gnum;\r | |
b602265d | 4048 | int by_number;\r |
14b0e578 CS |
4049 | } call;\r |
4050 | struct {\r | |
4051 | int ctype;\r | |
4052 | int not;\r | |
4053 | } prop;\r | |
4054 | } u;\r | |
4055 | } OnigToken;\r | |
4056 | \r | |
4057 | \r | |
4058 | static int\r | |
4059 | fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r | |
4060 | {\r | |
4061 | int low, up, syn_allow, non_low = 0;\r | |
4062 | int r = 0;\r | |
4063 | OnigCodePoint c;\r | |
4064 | OnigEncoding enc = env->enc;\r | |
4065 | UChar* p = *src;\r | |
4066 | PFETCH_READY;\r | |
4067 | \r | |
4068 | syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r | |
4069 | \r | |
4070 | if (PEND) {\r | |
4071 | if (syn_allow)\r | |
4072 | return 1; /* "....{" : OK! */\r | |
4073 | else\r | |
4074 | return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r | |
4075 | }\r | |
4076 | \r | |
4077 | if (! syn_allow) {\r | |
4078 | c = PPEEK;\r | |
4079 | if (c == ')' || c == '(' || c == '|') {\r | |
4080 | return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r | |
4081 | }\r | |
4082 | }\r | |
4083 | \r | |
4084 | low = onig_scan_unsigned_number(&p, end, env->enc);\r | |
4085 | if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4086 | if (low > ONIG_MAX_REPEAT_NUM)\r | |
4087 | return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4088 | \r | |
4089 | if (p == *src) { /* can't read low */\r | |
4090 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r | |
4091 | /* allow {,n} as {0,n} */\r | |
4092 | low = 0;\r | |
4093 | non_low = 1;\r | |
4094 | }\r | |
4095 | else\r | |
4096 | goto invalid;\r | |
4097 | }\r | |
4098 | \r | |
4099 | if (PEND) goto invalid;\r | |
4100 | PFETCH(c);\r | |
4101 | if (c == ',') {\r | |
4102 | UChar* prev = p;\r | |
4103 | up = onig_scan_unsigned_number(&p, end, env->enc);\r | |
4104 | if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4105 | if (up > ONIG_MAX_REPEAT_NUM)\r | |
4106 | return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r | |
4107 | \r | |
4108 | if (p == prev) {\r | |
4109 | if (non_low != 0)\r | |
b602265d | 4110 | goto invalid;\r |
14b0e578 CS |
4111 | up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r |
4112 | }\r | |
4113 | }\r | |
4114 | else {\r | |
4115 | if (non_low != 0)\r | |
4116 | goto invalid;\r | |
4117 | \r | |
4118 | PUNFETCH;\r | |
4119 | up = low; /* {n} : exact n times */\r | |
4120 | r = 2; /* fixed */\r | |
4121 | }\r | |
4122 | \r | |
4123 | if (PEND) goto invalid;\r | |
4124 | PFETCH(c);\r | |
4125 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r | |
4126 | if (c != MC_ESC(env->syntax)) goto invalid;\r | |
4127 | PFETCH(c);\r | |
4128 | }\r | |
4129 | if (c != '}') goto invalid;\r | |
4130 | \r | |
4131 | if (!IS_REPEAT_INFINITE(up) && low > up) {\r | |
4132 | return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r | |
4133 | }\r | |
4134 | \r | |
4135 | tok->type = TK_INTERVAL;\r | |
4136 | tok->u.repeat.lower = low;\r | |
4137 | tok->u.repeat.upper = up;\r | |
4138 | *src = p;\r | |
4139 | return r; /* 0: normal {n,m}, 2: fixed {n} */\r | |
4140 | \r | |
4141 | invalid:\r | |
b602265d DG |
4142 | if (syn_allow) {\r |
4143 | /* *src = p; */ /* !!! Don't do this line !!! */\r | |
14b0e578 | 4144 | return 1; /* OK */\r |
b602265d | 4145 | }\r |
14b0e578 CS |
4146 | else\r |
4147 | return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r | |
4148 | }\r | |
4149 | \r | |
4150 | /* \M-, \C-, \c, or \... */\r | |
4151 | static int\r | |
b602265d | 4152 | fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)\r |
14b0e578 CS |
4153 | {\r |
4154 | int v;\r | |
4155 | OnigCodePoint c;\r | |
4156 | OnigEncoding enc = env->enc;\r | |
4157 | UChar* p = *src;\r | |
4158 | \r | |
4159 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4160 | \r | |
4161 | PFETCH_S(c);\r | |
4162 | switch (c) {\r | |
4163 | case 'M':\r | |
4164 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r | |
4165 | if (PEND) return ONIGERR_END_PATTERN_AT_META;\r | |
4166 | PFETCH_S(c);\r | |
4167 | if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r | |
4168 | if (PEND) return ONIGERR_END_PATTERN_AT_META;\r | |
4169 | PFETCH_S(c);\r | |
4170 | if (c == MC_ESC(env->syntax)) {\r | |
b602265d | 4171 | v = fetch_escaped_value(&p, end, env, &c);\r |
14b0e578 | 4172 | if (v < 0) return v;\r |
14b0e578 CS |
4173 | }\r |
4174 | c = ((c & 0xff) | 0x80);\r | |
4175 | }\r | |
4176 | else\r | |
4177 | goto backslash;\r | |
4178 | break;\r | |
4179 | \r | |
4180 | case 'C':\r | |
4181 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r | |
4182 | if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r | |
4183 | PFETCH_S(c);\r | |
4184 | if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r | |
4185 | goto control;\r | |
4186 | }\r | |
4187 | else\r | |
4188 | goto backslash;\r | |
4189 | \r | |
4190 | case 'c':\r | |
4191 | if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r | |
4192 | control:\r | |
4193 | if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r | |
4194 | PFETCH_S(c);\r | |
4195 | if (c == '?') {\r | |
4196 | c = 0177;\r | |
4197 | }\r | |
4198 | else {\r | |
4199 | if (c == MC_ESC(env->syntax)) {\r | |
b602265d | 4200 | v = fetch_escaped_value(&p, end, env, &c);\r |
14b0e578 | 4201 | if (v < 0) return v;\r |
14b0e578 CS |
4202 | }\r |
4203 | c &= 0x9f;\r | |
4204 | }\r | |
4205 | break;\r | |
4206 | }\r | |
4207 | /* fall through */\r | |
4208 | \r | |
4209 | default:\r | |
4210 | {\r | |
4211 | backslash:\r | |
4212 | c = conv_backslash_value(c, env);\r | |
4213 | }\r | |
4214 | break;\r | |
4215 | }\r | |
4216 | \r | |
4217 | *src = p;\r | |
b602265d DG |
4218 | *val = c;\r |
4219 | return 0;\r | |
14b0e578 CS |
4220 | }\r |
4221 | \r | |
4222 | static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r | |
4223 | \r | |
4224 | static OnigCodePoint\r | |
4225 | get_name_end_code_point(OnigCodePoint start)\r | |
4226 | {\r | |
4227 | switch (start) {\r | |
b602265d | 4228 | case '<': return (OnigCodePoint )'>'; break;\r |
14b0e578 | 4229 | case '\'': return (OnigCodePoint )'\''; break;\r |
b602265d | 4230 | case '(': return (OnigCodePoint )')'; break;\r |
14b0e578 CS |
4231 | default:\r |
4232 | break;\r | |
4233 | }\r | |
4234 | \r | |
4235 | return (OnigCodePoint )0;\r | |
4236 | }\r | |
4237 | \r | |
b602265d DG |
4238 | enum REF_NUM {\r |
4239 | IS_NOT_NUM = 0,\r | |
4240 | IS_ABS_NUM = 1,\r | |
4241 | IS_REL_NUM = 2\r | |
4242 | };\r | |
4243 | \r | |
14b0e578 CS |
4244 | #ifdef USE_BACKREF_WITH_LEVEL\r |
4245 | /*\r | |
4246 | \k<name+n>, \k<name-n>\r | |
4247 | \k<num+n>, \k<num-n>\r | |
4248 | \k<-num+n>, \k<-num-n>\r | |
b602265d | 4249 | \k<+num+n>, \k<+num-n>\r |
14b0e578 CS |
4250 | */\r |
4251 | static int\r | |
4252 | fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r | |
b602265d DG |
4253 | UChar** rname_end, ScanEnv* env,\r |
4254 | int* rback_num, int* rlevel, enum REF_NUM* num_type)\r | |
14b0e578 | 4255 | {\r |
b602265d DG |
4256 | int r, sign, exist_level;\r |
4257 | int digit_count;\r | |
14b0e578 CS |
4258 | OnigCodePoint end_code;\r |
4259 | OnigCodePoint c = 0;\r | |
4260 | OnigEncoding enc = env->enc;\r | |
4261 | UChar *name_end;\r | |
4262 | UChar *pnum_head;\r | |
4263 | UChar *p = *src;\r | |
4264 | PFETCH_READY;\r | |
4265 | \r | |
4266 | *rback_num = 0;\r | |
b602265d DG |
4267 | exist_level = 0;\r |
4268 | *num_type = IS_NOT_NUM;\r | |
14b0e578 CS |
4269 | sign = 1;\r |
4270 | pnum_head = *src;\r | |
4271 | \r | |
4272 | end_code = get_name_end_code_point(start_code);\r | |
4273 | \r | |
b602265d | 4274 | digit_count = 0;\r |
14b0e578 CS |
4275 | name_end = end;\r |
4276 | r = 0;\r | |
4277 | if (PEND) {\r | |
4278 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4279 | }\r | |
4280 | else {\r | |
4281 | PFETCH(c);\r | |
4282 | if (c == end_code)\r | |
4283 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4284 | \r | |
b602265d DG |
4285 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r |
4286 | *num_type = IS_ABS_NUM;\r | |
4287 | digit_count++;\r | |
14b0e578 CS |
4288 | }\r |
4289 | else if (c == '-') {\r | |
b602265d | 4290 | *num_type = IS_REL_NUM;\r |
14b0e578 CS |
4291 | sign = -1;\r |
4292 | pnum_head = p;\r | |
4293 | }\r | |
b602265d DG |
4294 | else if (c == '+') {\r |
4295 | *num_type = IS_REL_NUM;\r | |
4296 | sign = 1;\r | |
4297 | pnum_head = p;\r | |
4298 | }\r | |
14b0e578 CS |
4299 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r |
4300 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4301 | }\r | |
4302 | }\r | |
4303 | \r | |
4304 | while (!PEND) {\r | |
4305 | name_end = p;\r | |
4306 | PFETCH(c);\r | |
4307 | if (c == end_code || c == ')' || c == '+' || c == '-') {\r | |
b602265d DG |
4308 | if (*num_type != IS_NOT_NUM && digit_count == 0)\r |
4309 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4310 | break;\r |
4311 | }\r | |
4312 | \r | |
b602265d DG |
4313 | if (*num_type != IS_NOT_NUM) {\r |
4314 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
4315 | digit_count++;\r | |
14b0e578 CS |
4316 | }\r |
4317 | else {\r | |
4318 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
b602265d | 4319 | *num_type = IS_NOT_NUM;\r |
14b0e578 CS |
4320 | }\r |
4321 | }\r | |
4322 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r | |
4323 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4324 | }\r | |
4325 | }\r | |
4326 | \r | |
4327 | if (r == 0 && c != end_code) {\r | |
4328 | if (c == '+' || c == '-') {\r | |
4329 | int level;\r | |
4330 | int flag = (c == '-' ? -1 : 1);\r | |
4331 | \r | |
b602265d DG |
4332 | if (PEND) {\r |
4333 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4334 | goto end;\r | |
4335 | }\r | |
14b0e578 | 4336 | PFETCH(c);\r |
b602265d | 4337 | if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;\r |
14b0e578 CS |
4338 | PUNFETCH;\r |
4339 | level = onig_scan_unsigned_number(&p, end, enc);\r | |
4340 | if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
4341 | *rlevel = (level * flag);\r | |
4342 | exist_level = 1;\r | |
4343 | \r | |
b602265d DG |
4344 | if (!PEND) {\r |
4345 | PFETCH(c);\r | |
4346 | if (c == end_code)\r | |
4347 | goto end;\r | |
4348 | }\r | |
14b0e578 CS |
4349 | }\r |
4350 | \r | |
4351 | err:\r | |
14b0e578 | 4352 | name_end = end;\r |
b602265d DG |
4353 | err2:\r |
4354 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4355 | }\r |
4356 | \r | |
4357 | end:\r | |
4358 | if (r == 0) {\r | |
b602265d | 4359 | if (*num_type != IS_NOT_NUM) {\r |
14b0e578 CS |
4360 | *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r |
4361 | if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
b602265d DG |
4362 | else if (*rback_num == 0) {\r |
4363 | if (*num_type == IS_REL_NUM)\r | |
4364 | goto err2;\r | |
4365 | }\r | |
14b0e578 CS |
4366 | \r |
4367 | *rback_num *= sign;\r | |
4368 | }\r | |
4369 | \r | |
4370 | *rname_end = name_end;\r | |
4371 | *src = p;\r | |
4372 | return (exist_level ? 1 : 0);\r | |
4373 | }\r | |
4374 | else {\r | |
4375 | onig_scan_env_set_error_string(env, r, *src, name_end);\r | |
4376 | return r;\r | |
4377 | }\r | |
4378 | }\r | |
4379 | #endif /* USE_BACKREF_WITH_LEVEL */\r | |
4380 | \r | |
4381 | /*\r | |
b602265d | 4382 | ref: 0 -> define name (don't allow number name)\r |
14b0e578 CS |
4383 | 1 -> reference name (allow number name)\r |
4384 | */\r | |
4385 | static int\r | |
4386 | fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r | |
b602265d DG |
4387 | UChar** rname_end, ScanEnv* env, int* rback_num,\r |
4388 | enum REF_NUM* num_type, int ref)\r | |
14b0e578 | 4389 | {\r |
b602265d DG |
4390 | int r, sign;\r |
4391 | int digit_count;\r | |
14b0e578 CS |
4392 | OnigCodePoint end_code;\r |
4393 | OnigCodePoint c = 0;\r | |
4394 | OnigEncoding enc = env->enc;\r | |
4395 | UChar *name_end;\r | |
4396 | UChar *pnum_head;\r | |
4397 | UChar *p = *src;\r | |
4398 | \r | |
4399 | *rback_num = 0;\r | |
4400 | \r | |
4401 | end_code = get_name_end_code_point(start_code);\r | |
4402 | \r | |
b602265d | 4403 | digit_count = 0;\r |
14b0e578 CS |
4404 | name_end = end;\r |
4405 | pnum_head = *src;\r | |
4406 | r = 0;\r | |
b602265d | 4407 | *num_type = IS_NOT_NUM;\r |
14b0e578 CS |
4408 | sign = 1;\r |
4409 | if (PEND) {\r | |
4410 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4411 | }\r | |
4412 | else {\r | |
4413 | PFETCH_S(c);\r | |
4414 | if (c == end_code)\r | |
4415 | return ONIGERR_EMPTY_GROUP_NAME;\r | |
4416 | \r | |
b602265d | 4417 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r |
14b0e578 | 4418 | if (ref == 1)\r |
b602265d | 4419 | *num_type = IS_ABS_NUM;\r |
14b0e578 CS |
4420 | else {\r |
4421 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 | 4422 | }\r |
b602265d | 4423 | digit_count++;\r |
14b0e578 CS |
4424 | }\r |
4425 | else if (c == '-') {\r | |
4426 | if (ref == 1) {\r | |
b602265d | 4427 | *num_type = IS_REL_NUM;\r |
14b0e578 CS |
4428 | sign = -1;\r |
4429 | pnum_head = p;\r | |
4430 | }\r | |
4431 | else {\r | |
4432 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
14b0e578 CS |
4433 | }\r |
4434 | }\r | |
b602265d DG |
4435 | else if (c == '+') {\r |
4436 | if (ref == 1) {\r | |
4437 | *num_type = IS_REL_NUM;\r | |
4438 | sign = 1;\r | |
4439 | pnum_head = p;\r | |
14b0e578 CS |
4440 | }\r |
4441 | else {\r | |
14b0e578 | 4442 | r = ONIGERR_INVALID_GROUP_NAME;\r |
14b0e578 | 4443 | }\r |
14b0e578 | 4444 | }\r |
b602265d | 4445 | else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r |
14b0e578 | 4446 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r |
b602265d | 4447 | }\r |
14b0e578 CS |
4448 | }\r |
4449 | \r | |
4450 | if (r == 0) {\r | |
b602265d DG |
4451 | while (!PEND) {\r |
4452 | name_end = p;\r | |
4453 | PFETCH_S(c);\r | |
4454 | if (c == end_code || c == ')') {\r | |
4455 | if (*num_type != IS_NOT_NUM && digit_count == 0)\r | |
4456 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4457 | break;\r | |
4458 | }\r | |
4459 | \r | |
4460 | if (*num_type != IS_NOT_NUM) {\r | |
4461 | if (IS_CODE_DIGIT_ASCII(enc, c)) {\r | |
4462 | digit_count++;\r | |
4463 | }\r | |
4464 | else {\r | |
4465 | if (!ONIGENC_IS_CODE_WORD(enc, c))\r | |
4466 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4467 | else\r | |
4468 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4469 | \r | |
4470 | *num_type = IS_NOT_NUM;\r | |
4471 | }\r | |
4472 | }\r | |
4473 | else {\r | |
4474 | if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r | |
4475 | r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r | |
4476 | }\r | |
4477 | }\r | |
4478 | }\r | |
4479 | \r | |
4480 | if (c != end_code) {\r | |
14b0e578 CS |
4481 | r = ONIGERR_INVALID_GROUP_NAME;\r |
4482 | goto err;\r | |
4483 | }\r | |
b602265d DG |
4484 | \r |
4485 | if (*num_type != IS_NOT_NUM) {\r | |
4486 | *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r | |
4487 | if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r | |
4488 | else if (*rback_num == 0) {\r | |
4489 | if (*num_type == IS_REL_NUM) {\r | |
4490 | r = ONIGERR_INVALID_GROUP_NAME;\r | |
4491 | goto err;\r | |
4492 | }\r | |
4493 | }\r | |
4494 | \r | |
4495 | *rback_num *= sign;\r | |
4496 | }\r | |
14b0e578 CS |
4497 | \r |
4498 | *rname_end = name_end;\r | |
4499 | *src = p;\r | |
4500 | return 0;\r | |
4501 | }\r | |
4502 | else {\r | |
b602265d DG |
4503 | while (!PEND) {\r |
4504 | name_end = p;\r | |
4505 | PFETCH_S(c);\r | |
4506 | if (c == end_code || c == ')')\r | |
4507 | break;\r | |
4508 | }\r | |
4509 | if (PEND)\r | |
4510 | name_end = end;\r | |
4511 | \r | |
14b0e578 CS |
4512 | err:\r |
4513 | onig_scan_env_set_error_string(env, r, *src, name_end);\r | |
4514 | return r;\r | |
4515 | }\r | |
4516 | }\r | |
14b0e578 CS |
4517 | \r |
4518 | static void\r | |
4519 | CC_ESC_WARN(ScanEnv* env, UChar *c)\r | |
4520 | {\r | |
4521 | if (onig_warn == onig_null_warn) return ;\r | |
4522 | \r | |
4523 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r | |
4524 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r | |
4525 | UChar buf[WARN_BUFSIZE];\r | |
4526 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
b602265d DG |
4527 | env->pattern, env->pattern_end,\r |
4528 | (UChar* )"character class has '%s' without escape",\r | |
4529 | c);\r | |
14b0e578 CS |
4530 | (*onig_warn)((char* )buf);\r |
4531 | }\r | |
4532 | }\r | |
4533 | \r | |
4534 | static void\r | |
4535 | CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r | |
4536 | {\r | |
4537 | if (onig_warn == onig_null_warn) return ;\r | |
4538 | \r | |
4539 | if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r | |
4540 | UChar buf[WARN_BUFSIZE];\r | |
4541 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r | |
b602265d DG |
4542 | (env)->pattern, (env)->pattern_end,\r |
4543 | (UChar* )"regular expression has '%s' without escape", c);\r | |
14b0e578 CS |
4544 | (*onig_warn)((char* )buf);\r |
4545 | }\r | |
4546 | }\r | |
4547 | \r | |
4548 | static UChar*\r | |
4549 | find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r | |
b602265d | 4550 | UChar **next, OnigEncoding enc)\r |
14b0e578 CS |
4551 | {\r |
4552 | int i;\r | |
4553 | OnigCodePoint x;\r | |
4554 | UChar *q;\r | |
4555 | UChar *p = from;\r | |
4556 | \r | |
4557 | while (p < to) {\r | |
4558 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r | |
4559 | q = p + enclen(enc, p);\r | |
4560 | if (x == s[0]) {\r | |
4561 | for (i = 1; i < n && q < to; i++) {\r | |
b602265d DG |
4562 | x = ONIGENC_MBC_TO_CODE(enc, q, to);\r |
4563 | if (x != s[i]) break;\r | |
4564 | q += enclen(enc, q);\r | |
14b0e578 CS |
4565 | }\r |
4566 | if (i >= n) {\r | |
b602265d DG |
4567 | if (IS_NOT_NULL(next))\r |
4568 | *next = q;\r | |
4569 | return p;\r | |
14b0e578 CS |
4570 | }\r |
4571 | }\r | |
4572 | p = q;\r | |
4573 | }\r | |
4574 | return NULL_UCHARP;\r | |
4575 | }\r | |
4576 | \r | |
4577 | static int\r | |
4578 | str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r | |
b602265d | 4579 | OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r |
14b0e578 CS |
4580 | {\r |
4581 | int i, in_esc;\r | |
4582 | OnigCodePoint x;\r | |
4583 | UChar *q;\r | |
4584 | UChar *p = from;\r | |
4585 | \r | |
4586 | in_esc = 0;\r | |
4587 | while (p < to) {\r | |
4588 | if (in_esc) {\r | |
4589 | in_esc = 0;\r | |
4590 | p += enclen(enc, p);\r | |
4591 | }\r | |
4592 | else {\r | |
4593 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r | |
4594 | q = p + enclen(enc, p);\r | |
4595 | if (x == s[0]) {\r | |
b602265d DG |
4596 | for (i = 1; i < n && q < to; i++) {\r |
4597 | x = ONIGENC_MBC_TO_CODE(enc, q, to);\r | |
4598 | if (x != s[i]) break;\r | |
4599 | q += enclen(enc, q);\r | |
4600 | }\r | |
4601 | if (i >= n) return 1;\r | |
4602 | p += enclen(enc, p);\r | |
14b0e578 CS |
4603 | }\r |
4604 | else {\r | |
b602265d DG |
4605 | x = ONIGENC_MBC_TO_CODE(enc, p, to);\r |
4606 | if (x == bad) return 0;\r | |
4607 | else if (x == MC_ESC(syn)) in_esc = 1;\r | |
4608 | p = q;\r | |
14b0e578 CS |
4609 | }\r |
4610 | }\r | |
4611 | }\r | |
4612 | return 0;\r | |
4613 | }\r | |
4614 | \r | |
4615 | static int\r | |
4616 | fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r | |
4617 | {\r | |
4618 | int num;\r | |
4619 | OnigCodePoint c, c2;\r | |
4620 | OnigSyntaxType* syn = env->syntax;\r | |
4621 | OnigEncoding enc = env->enc;\r | |
4622 | UChar* prev;\r | |
4623 | UChar* p = *src;\r | |
4624 | PFETCH_READY;\r | |
4625 | \r | |
4626 | if (PEND) {\r | |
4627 | tok->type = TK_EOT;\r | |
4628 | return tok->type;\r | |
4629 | }\r | |
4630 | \r | |
4631 | PFETCH(c);\r | |
4632 | tok->type = TK_CHAR;\r | |
4633 | tok->base = 0;\r | |
4634 | tok->u.c = c;\r | |
4635 | tok->escaped = 0;\r | |
4636 | \r | |
4637 | if (c == ']') {\r | |
4638 | tok->type = TK_CC_CLOSE;\r | |
4639 | }\r | |
4640 | else if (c == '-') {\r | |
4641 | tok->type = TK_CC_RANGE;\r | |
4642 | }\r | |
4643 | else if (c == MC_ESC(syn)) {\r | |
4644 | if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r | |
4645 | goto end;\r | |
4646 | \r | |
4647 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4648 | \r | |
4649 | PFETCH(c);\r | |
4650 | tok->escaped = 1;\r | |
4651 | tok->u.c = c;\r | |
4652 | switch (c) {\r | |
4653 | case 'w':\r | |
4654 | tok->type = TK_CHAR_TYPE;\r | |
4655 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4656 | tok->u.prop.not = 0;\r | |
4657 | break;\r | |
4658 | case 'W':\r | |
4659 | tok->type = TK_CHAR_TYPE;\r | |
4660 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4661 | tok->u.prop.not = 1;\r | |
4662 | break;\r | |
4663 | case 'd':\r | |
4664 | tok->type = TK_CHAR_TYPE;\r | |
4665 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
4666 | tok->u.prop.not = 0;\r | |
4667 | break;\r | |
4668 | case 'D':\r | |
4669 | tok->type = TK_CHAR_TYPE;\r | |
4670 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
4671 | tok->u.prop.not = 1;\r | |
4672 | break;\r | |
4673 | case 's':\r | |
4674 | tok->type = TK_CHAR_TYPE;\r | |
4675 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
4676 | tok->u.prop.not = 0;\r | |
4677 | break;\r | |
4678 | case 'S':\r | |
4679 | tok->type = TK_CHAR_TYPE;\r | |
4680 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
4681 | tok->u.prop.not = 1;\r | |
4682 | break;\r | |
4683 | case 'h':\r | |
4684 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
4685 | tok->type = TK_CHAR_TYPE;\r | |
4686 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
4687 | tok->u.prop.not = 0;\r | |
4688 | break;\r | |
4689 | case 'H':\r | |
4690 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
4691 | tok->type = TK_CHAR_TYPE;\r | |
4692 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
4693 | tok->u.prop.not = 1;\r | |
4694 | break;\r | |
4695 | \r | |
4696 | case 'p':\r | |
4697 | case 'P':\r | |
b602265d DG |
4698 | if (PEND) break;\r |
4699 | \r | |
14b0e578 CS |
4700 | c2 = PPEEK;\r |
4701 | if (c2 == '{' &&\r | |
b602265d DG |
4702 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r |
4703 | PINC;\r | |
4704 | tok->type = TK_CHAR_PROPERTY;\r | |
4705 | tok->u.prop.not = (c == 'P' ? 1 : 0);\r | |
4706 | \r | |
4707 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r | |
4708 | PFETCH(c2);\r | |
4709 | if (c2 == '^') {\r | |
4710 | tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r | |
4711 | }\r | |
4712 | else\r | |
4713 | PUNFETCH;\r | |
4714 | }\r | |
4715 | }\r | |
4716 | break;\r | |
4717 | \r | |
4718 | case 'o':\r | |
4719 | if (PEND) break;\r | |
4720 | \r | |
4721 | prev = p;\r | |
4722 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r | |
4723 | PINC;\r | |
4724 | num = scan_unsigned_octal_number(&p, end, 11, enc);\r | |
4725 | if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
4726 | if (!PEND) {\r | |
4727 | c2 = PPEEK;\r | |
4728 | if (IS_CODE_DIGIT_ASCII(enc, c2))\r | |
4729 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r | |
4730 | }\r | |
4731 | \r | |
4732 | if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r | |
4733 | PINC;\r | |
4734 | tok->type = TK_CODE_POINT;\r | |
4735 | tok->base = 8;\r | |
4736 | tok->u.code = (OnigCodePoint )num;\r | |
4737 | }\r | |
4738 | else {\r | |
4739 | /* can't read nothing or invalid format */\r | |
4740 | p = prev;\r | |
4741 | }\r | |
14b0e578 CS |
4742 | }\r |
4743 | break;\r | |
4744 | \r | |
4745 | case 'x':\r | |
4746 | if (PEND) break;\r | |
4747 | \r | |
4748 | prev = p;\r | |
4749 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r | |
b602265d DG |
4750 | PINC;\r |
4751 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r | |
4752 | if (num < 0) {\r | |
4753 | if (num == ONIGERR_TOO_BIG_NUMBER)\r | |
4754 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
4755 | else\r | |
4756 | return num;\r | |
4757 | }\r | |
4758 | if (!PEND) {\r | |
14b0e578 | 4759 | c2 = PPEEK;\r |
b602265d | 4760 | if (IS_CODE_XDIGIT_ASCII(enc, c2))\r |
14b0e578 CS |
4761 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r |
4762 | }\r | |
4763 | \r | |
b602265d DG |
4764 | if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r |
4765 | PINC;\r | |
4766 | tok->type = TK_CODE_POINT;\r | |
4767 | tok->base = 16;\r | |
4768 | tok->u.code = (OnigCodePoint )num;\r | |
4769 | }\r | |
4770 | else {\r | |
4771 | /* can't read nothing or invalid format */\r | |
4772 | p = prev;\r | |
4773 | }\r | |
14b0e578 CS |
4774 | }\r |
4775 | else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r | |
b602265d DG |
4776 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r |
4777 | if (num < 0) return num;\r | |
4778 | if (p == prev) { /* can't read nothing. */\r | |
4779 | num = 0; /* but, it's not error */\r | |
4780 | }\r | |
4781 | tok->type = TK_RAW_BYTE;\r | |
4782 | tok->base = 16;\r | |
4783 | tok->u.c = num;\r | |
14b0e578 CS |
4784 | }\r |
4785 | break;\r | |
4786 | \r | |
4787 | case 'u':\r | |
4788 | if (PEND) break;\r | |
4789 | \r | |
4790 | prev = p;\r | |
4791 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r | |
b602265d DG |
4792 | num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r |
4793 | if (num < 0) return num;\r | |
4794 | if (p == prev) { /* can't read nothing. */\r | |
4795 | num = 0; /* but, it's not error */\r | |
4796 | }\r | |
4797 | tok->type = TK_CODE_POINT;\r | |
4798 | tok->base = 16;\r | |
4799 | tok->u.code = (OnigCodePoint )num;\r | |
14b0e578 CS |
4800 | }\r |
4801 | break;\r | |
4802 | \r | |
4803 | case '0':\r | |
4804 | case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r | |
4805 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r | |
b602265d DG |
4806 | PUNFETCH;\r |
4807 | prev = p;\r | |
4808 | num = scan_unsigned_octal_number(&p, end, 3, enc);\r | |
4809 | if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r | |
4810 | if (p == prev) { /* can't read nothing. */\r | |
4811 | num = 0; /* but, it's not error */\r | |
4812 | }\r | |
4813 | tok->type = TK_RAW_BYTE;\r | |
4814 | tok->base = 8;\r | |
4815 | tok->u.c = num;\r | |
14b0e578 CS |
4816 | }\r |
4817 | break;\r | |
4818 | \r | |
4819 | default:\r | |
4820 | PUNFETCH;\r | |
b602265d | 4821 | num = fetch_escaped_value(&p, end, env, &c2);\r |
14b0e578 | 4822 | if (num < 0) return num;\r |
b602265d DG |
4823 | if (tok->u.c != c2) {\r |
4824 | tok->u.code = c2;\r | |
4825 | tok->type = TK_CODE_POINT;\r | |
14b0e578 CS |
4826 | }\r |
4827 | break;\r | |
4828 | }\r | |
4829 | }\r | |
4830 | else if (c == '[') {\r | |
4831 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r | |
4832 | OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r | |
b602265d | 4833 | tok->backp = p; /* point at '[' is read */\r |
14b0e578 CS |
4834 | PINC;\r |
4835 | if (str_exist_check_with_esc(send, 2, p, end,\r | |
4836 | (OnigCodePoint )']', enc, syn)) {\r | |
b602265d | 4837 | tok->type = TK_POSIX_BRACKET_OPEN;\r |
14b0e578 CS |
4838 | }\r |
4839 | else {\r | |
b602265d DG |
4840 | PUNFETCH;\r |
4841 | goto cc_in_cc;\r | |
14b0e578 CS |
4842 | }\r |
4843 | }\r | |
4844 | else {\r | |
4845 | cc_in_cc:\r | |
4846 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r | |
b602265d | 4847 | tok->type = TK_CC_CC_OPEN;\r |
14b0e578 CS |
4848 | }\r |
4849 | else {\r | |
b602265d | 4850 | CC_ESC_WARN(env, (UChar* )"[");\r |
14b0e578 CS |
4851 | }\r |
4852 | }\r | |
4853 | }\r | |
4854 | else if (c == '&') {\r | |
4855 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r | |
b602265d | 4856 | !PEND && (PPEEK_IS('&'))) {\r |
14b0e578 CS |
4857 | PINC;\r |
4858 | tok->type = TK_CC_AND;\r | |
4859 | }\r | |
4860 | }\r | |
4861 | \r | |
4862 | end:\r | |
4863 | *src = p;\r | |
4864 | return tok->type;\r | |
4865 | }\r | |
4866 | \r | |
4867 | static int\r | |
4868 | fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r | |
4869 | {\r | |
4870 | int r, num;\r | |
4871 | OnigCodePoint c;\r | |
4872 | OnigEncoding enc = env->enc;\r | |
4873 | OnigSyntaxType* syn = env->syntax;\r | |
4874 | UChar* prev;\r | |
4875 | UChar* p = *src;\r | |
4876 | PFETCH_READY;\r | |
4877 | \r | |
4878 | start:\r | |
4879 | if (PEND) {\r | |
4880 | tok->type = TK_EOT;\r | |
4881 | return tok->type;\r | |
4882 | }\r | |
4883 | \r | |
4884 | tok->type = TK_STRING;\r | |
4885 | tok->base = 0;\r | |
4886 | tok->backp = p;\r | |
4887 | \r | |
4888 | PFETCH(c);\r | |
4889 | if (IS_MC_ESC_CODE(c, syn)) {\r | |
4890 | if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r | |
4891 | \r | |
4892 | tok->backp = p;\r | |
4893 | PFETCH(c);\r | |
4894 | \r | |
4895 | tok->u.c = c;\r | |
4896 | tok->escaped = 1;\r | |
4897 | switch (c) {\r | |
4898 | case '*':\r | |
4899 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r | |
4900 | tok->type = TK_OP_REPEAT;\r | |
4901 | tok->u.repeat.lower = 0;\r | |
4902 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
4903 | goto greedy_check;\r | |
4904 | break;\r | |
4905 | \r | |
4906 | case '+':\r | |
4907 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r | |
4908 | tok->type = TK_OP_REPEAT;\r | |
4909 | tok->u.repeat.lower = 1;\r | |
4910 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
4911 | goto greedy_check;\r | |
4912 | break;\r | |
4913 | \r | |
4914 | case '?':\r | |
4915 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r | |
4916 | tok->type = TK_OP_REPEAT;\r | |
4917 | tok->u.repeat.lower = 0;\r | |
4918 | tok->u.repeat.upper = 1;\r | |
4919 | greedy_check:\r | |
4920 | if (!PEND && PPEEK_IS('?') &&\r | |
b602265d DG |
4921 | IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r |
4922 | PFETCH(c);\r | |
4923 | tok->u.repeat.greedy = 0;\r | |
4924 | tok->u.repeat.possessive = 0;\r | |
14b0e578 CS |
4925 | }\r |
4926 | else {\r | |
4927 | possessive_check:\r | |
b602265d DG |
4928 | if (!PEND && PPEEK_IS('+') &&\r |
4929 | ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r | |
4930 | tok->type != TK_INTERVAL) ||\r | |
4931 | (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r | |
4932 | tok->type == TK_INTERVAL))) {\r | |
4933 | PFETCH(c);\r | |
4934 | tok->u.repeat.greedy = 1;\r | |
4935 | tok->u.repeat.possessive = 1;\r | |
4936 | }\r | |
4937 | else {\r | |
4938 | tok->u.repeat.greedy = 1;\r | |
4939 | tok->u.repeat.possessive = 0;\r | |
4940 | }\r | |
14b0e578 CS |
4941 | }\r |
4942 | break;\r | |
4943 | \r | |
4944 | case '{':\r | |
4945 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r | |
4946 | r = fetch_range_quantifier(&p, end, tok, env);\r | |
4947 | if (r < 0) return r; /* error */\r | |
4948 | if (r == 0) goto greedy_check;\r | |
4949 | else if (r == 2) { /* {n} */\r | |
b602265d DG |
4950 | if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r |
4951 | goto possessive_check;\r | |
14b0e578 | 4952 | \r |
b602265d | 4953 | goto greedy_check;\r |
14b0e578 CS |
4954 | }\r |
4955 | /* r == 1 : normal char */\r | |
4956 | break;\r | |
4957 | \r | |
4958 | case '|':\r | |
4959 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r | |
4960 | tok->type = TK_ALT;\r | |
4961 | break;\r | |
4962 | \r | |
4963 | case '(':\r | |
4964 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r | |
4965 | tok->type = TK_SUBEXP_OPEN;\r | |
4966 | break;\r | |
4967 | \r | |
4968 | case ')':\r | |
4969 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r | |
4970 | tok->type = TK_SUBEXP_CLOSE;\r | |
4971 | break;\r | |
4972 | \r | |
4973 | case 'w':\r | |
4974 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r | |
4975 | tok->type = TK_CHAR_TYPE;\r | |
4976 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4977 | tok->u.prop.not = 0;\r | |
4978 | break;\r | |
4979 | \r | |
4980 | case 'W':\r | |
4981 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r | |
4982 | tok->type = TK_CHAR_TYPE;\r | |
4983 | tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r | |
4984 | tok->u.prop.not = 1;\r | |
4985 | break;\r | |
4986 | \r | |
4987 | case 'b':\r | |
4988 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r | |
4989 | tok->type = TK_ANCHOR;\r | |
b602265d | 4990 | tok->u.anchor = ANCHOR_WORD_BOUNDARY;\r |
14b0e578 CS |
4991 | break;\r |
4992 | \r | |
4993 | case 'B':\r | |
4994 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r | |
4995 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
4996 | tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;\r |
4997 | break;\r | |
4998 | \r | |
4999 | case 'y':\r | |
5000 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5001 | tok->type = TK_ANCHOR;\r | |
5002 | tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r | |
5003 | break;\r | |
5004 | \r | |
5005 | case 'Y':\r | |
5006 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5007 | tok->type = TK_ANCHOR;\r | |
5008 | tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r | |
14b0e578 CS |
5009 | break;\r |
5010 | \r | |
5011 | #ifdef USE_WORD_BEGIN_END\r | |
5012 | case '<':\r | |
5013 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r | |
5014 | tok->type = TK_ANCHOR;\r | |
5015 | tok->u.anchor = ANCHOR_WORD_BEGIN;\r | |
5016 | break;\r | |
5017 | \r | |
5018 | case '>':\r | |
5019 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r | |
5020 | tok->type = TK_ANCHOR;\r | |
5021 | tok->u.anchor = ANCHOR_WORD_END;\r | |
5022 | break;\r | |
5023 | #endif\r | |
5024 | \r | |
5025 | case 's':\r | |
5026 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r | |
5027 | tok->type = TK_CHAR_TYPE;\r | |
5028 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
5029 | tok->u.prop.not = 0;\r | |
5030 | break;\r | |
5031 | \r | |
5032 | case 'S':\r | |
5033 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r | |
5034 | tok->type = TK_CHAR_TYPE;\r | |
5035 | tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r | |
5036 | tok->u.prop.not = 1;\r | |
5037 | break;\r | |
5038 | \r | |
5039 | case 'd':\r | |
5040 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r | |
5041 | tok->type = TK_CHAR_TYPE;\r | |
5042 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
5043 | tok->u.prop.not = 0;\r | |
5044 | break;\r | |
5045 | \r | |
5046 | case 'D':\r | |
5047 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r | |
5048 | tok->type = TK_CHAR_TYPE;\r | |
5049 | tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r | |
5050 | tok->u.prop.not = 1;\r | |
5051 | break;\r | |
5052 | \r | |
5053 | case 'h':\r | |
5054 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
5055 | tok->type = TK_CHAR_TYPE;\r | |
5056 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
5057 | tok->u.prop.not = 0;\r | |
5058 | break;\r | |
5059 | \r | |
5060 | case 'H':\r | |
5061 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r | |
5062 | tok->type = TK_CHAR_TYPE;\r | |
5063 | tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r | |
5064 | tok->u.prop.not = 1;\r | |
5065 | break;\r | |
5066 | \r | |
b602265d DG |
5067 | case 'K':\r |
5068 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;\r | |
5069 | tok->type = TK_KEEP;\r | |
5070 | break;\r | |
5071 | \r | |
5072 | case 'R':\r | |
5073 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;\r | |
5074 | tok->type = TK_GENERAL_NEWLINE;\r | |
5075 | break;\r | |
5076 | \r | |
5077 | case 'N':\r | |
5078 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r | |
5079 | tok->type = TK_NO_NEWLINE;\r | |
5080 | break;\r | |
5081 | \r | |
5082 | case 'O':\r | |
5083 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r | |
5084 | tok->type = TK_TRUE_ANYCHAR;\r | |
5085 | break;\r | |
5086 | \r | |
5087 | case 'X':\r | |
5088 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r | |
5089 | tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;\r | |
5090 | break;\r | |
5091 | \r | |
14b0e578 CS |
5092 | case 'A':\r |
5093 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5094 | begin_buf:\r | |
5095 | tok->type = TK_ANCHOR;\r | |
5096 | tok->u.subtype = ANCHOR_BEGIN_BUF;\r | |
5097 | break;\r | |
5098 | \r | |
5099 | case 'Z':\r | |
5100 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5101 | tok->type = TK_ANCHOR;\r | |
5102 | tok->u.subtype = ANCHOR_SEMI_END_BUF;\r | |
5103 | break;\r | |
5104 | \r | |
5105 | case 'z':\r | |
5106 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r | |
5107 | end_buf:\r | |
5108 | tok->type = TK_ANCHOR;\r | |
5109 | tok->u.subtype = ANCHOR_END_BUF;\r | |
5110 | break;\r | |
5111 | \r | |
5112 | case 'G':\r | |
5113 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r | |
5114 | tok->type = TK_ANCHOR;\r | |
5115 | tok->u.subtype = ANCHOR_BEGIN_POSITION;\r | |
5116 | break;\r | |
5117 | \r | |
5118 | case '`':\r | |
5119 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r | |
5120 | goto begin_buf;\r | |
5121 | break;\r | |
5122 | \r | |
5123 | case '\'':\r | |
5124 | if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r | |
5125 | goto end_buf;\r | |
5126 | break;\r | |
5127 | \r | |
b602265d DG |
5128 | case 'o':\r |
5129 | if (PEND) break;\r | |
5130 | \r | |
5131 | prev = p;\r | |
5132 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r | |
5133 | PINC;\r | |
5134 | num = scan_unsigned_octal_number(&p, end, 11, enc);\r | |
5135 | if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
5136 | if (!PEND) {\r | |
5137 | if (IS_CODE_DIGIT_ASCII(enc, PPEEK))\r | |
5138 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r | |
5139 | }\r | |
5140 | \r | |
5141 | if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r | |
5142 | PINC;\r | |
5143 | tok->type = TK_CODE_POINT;\r | |
5144 | tok->u.code = (OnigCodePoint )num;\r | |
5145 | }\r | |
5146 | else {\r | |
5147 | /* can't read nothing or invalid format */\r | |
5148 | p = prev;\r | |
5149 | }\r | |
5150 | }\r | |
5151 | break;\r | |
5152 | \r | |
14b0e578 CS |
5153 | case 'x':\r |
5154 | if (PEND) break;\r | |
5155 | \r | |
5156 | prev = p;\r | |
5157 | if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r | |
b602265d DG |
5158 | PINC;\r |
5159 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r | |
5160 | if (num < 0) {\r | |
5161 | if (num == ONIGERR_TOO_BIG_NUMBER)\r | |
5162 | return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r | |
5163 | else\r | |
5164 | return num;\r | |
5165 | }\r | |
5166 | if (!PEND) {\r | |
5167 | if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))\r | |
14b0e578 CS |
5168 | return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r |
5169 | }\r | |
5170 | \r | |
b602265d DG |
5171 | if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r |
5172 | PINC;\r | |
5173 | tok->type = TK_CODE_POINT;\r | |
5174 | tok->u.code = (OnigCodePoint )num;\r | |
5175 | }\r | |
5176 | else {\r | |
5177 | /* can't read nothing or invalid format */\r | |
5178 | p = prev;\r | |
5179 | }\r | |
14b0e578 CS |
5180 | }\r |
5181 | else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r | |
b602265d DG |
5182 | num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r |
5183 | if (num < 0) return num;\r | |
5184 | if (p == prev) { /* can't read nothing. */\r | |
5185 | num = 0; /* but, it's not error */\r | |
5186 | }\r | |
5187 | tok->type = TK_RAW_BYTE;\r | |
5188 | tok->base = 16;\r | |
5189 | tok->u.c = num;\r | |
14b0e578 CS |
5190 | }\r |
5191 | break;\r | |
5192 | \r | |
5193 | case 'u':\r | |
5194 | if (PEND) break;\r | |
5195 | \r | |
5196 | prev = p;\r | |
5197 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r | |
b602265d DG |
5198 | num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r |
5199 | if (num < 0) return num;\r | |
5200 | if (p == prev) { /* can't read nothing. */\r | |
5201 | num = 0; /* but, it's not error */\r | |
5202 | }\r | |
5203 | tok->type = TK_CODE_POINT;\r | |
5204 | tok->base = 16;\r | |
5205 | tok->u.code = (OnigCodePoint )num;\r | |
14b0e578 CS |
5206 | }\r |
5207 | break;\r | |
5208 | \r | |
5209 | case '1': case '2': case '3': case '4':\r | |
5210 | case '5': case '6': case '7': case '8': case '9':\r | |
5211 | PUNFETCH;\r | |
5212 | prev = p;\r | |
5213 | num = onig_scan_unsigned_number(&p, end, enc);\r | |
5214 | if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r | |
5215 | goto skip_backref;\r | |
5216 | }\r | |
5217 | \r | |
5218 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r | |
b602265d DG |
5219 | (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r |
5220 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5221 | if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))\r | |
5222 | return ONIGERR_INVALID_BACKREF;\r | |
5223 | }\r | |
5224 | \r | |
5225 | tok->type = TK_BACKREF;\r | |
5226 | tok->u.backref.num = 1;\r | |
5227 | tok->u.backref.ref1 = num;\r | |
5228 | tok->u.backref.by_name = 0;\r | |
14b0e578 | 5229 | #ifdef USE_BACKREF_WITH_LEVEL\r |
b602265d | 5230 | tok->u.backref.exist_level = 0;\r |
14b0e578 | 5231 | #endif\r |
b602265d | 5232 | break;\r |
14b0e578 CS |
5233 | }\r |
5234 | \r | |
5235 | skip_backref:\r | |
5236 | if (c == '8' || c == '9') {\r | |
b602265d DG |
5237 | /* normal char */\r |
5238 | p = prev; PINC;\r | |
5239 | break;\r | |
14b0e578 CS |
5240 | }\r |
5241 | \r | |
5242 | p = prev;\r | |
5243 | /* fall through */\r | |
5244 | case '0':\r | |
5245 | if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r | |
b602265d DG |
5246 | prev = p;\r |
5247 | num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r | |
5248 | if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r | |
5249 | if (p == prev) { /* can't read nothing. */\r | |
5250 | num = 0; /* but, it's not error */\r | |
5251 | }\r | |
5252 | tok->type = TK_RAW_BYTE;\r | |
5253 | tok->base = 8;\r | |
5254 | tok->u.c = num;\r | |
14b0e578 CS |
5255 | }\r |
5256 | else if (c != '0') {\r | |
b602265d | 5257 | PINC;\r |
14b0e578 CS |
5258 | }\r |
5259 | break;\r | |
5260 | \r | |
14b0e578 | 5261 | case 'k':\r |
b602265d DG |
5262 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r |
5263 | PFETCH(c);\r | |
5264 | if (c == '<' || c == '\'') {\r | |
5265 | UChar* name_end;\r | |
5266 | int* backs;\r | |
5267 | int back_num;\r | |
5268 | enum REF_NUM num_type;\r | |
14b0e578 | 5269 | \r |
b602265d | 5270 | prev = p;\r |
14b0e578 CS |
5271 | \r |
5272 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
b602265d DG |
5273 | name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r |
5274 | r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r | |
5275 | env, &back_num, &tok->u.backref.level, &num_type);\r | |
5276 | if (r == 1) tok->u.backref.exist_level = 1;\r | |
5277 | else tok->u.backref.exist_level = 0;\r | |
14b0e578 | 5278 | #else\r |
b602265d | 5279 | r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);\r |
14b0e578 | 5280 | #endif\r |
b602265d DG |
5281 | if (r < 0) return r;\r |
5282 | \r | |
5283 | if (num_type != IS_NOT_NUM) {\r | |
5284 | if (num_type == IS_REL_NUM) {\r | |
5285 | back_num = backref_rel_to_abs(back_num, env);\r | |
5286 | }\r | |
5287 | if (back_num <= 0)\r | |
5288 | return ONIGERR_INVALID_BACKREF;\r | |
5289 | \r | |
5290 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5291 | if (back_num > env->num_mem ||\r | |
5292 | IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r | |
5293 | return ONIGERR_INVALID_BACKREF;\r | |
5294 | }\r | |
5295 | tok->type = TK_BACKREF;\r | |
5296 | tok->u.backref.by_name = 0;\r | |
5297 | tok->u.backref.num = 1;\r | |
5298 | tok->u.backref.ref1 = back_num;\r | |
5299 | }\r | |
5300 | else {\r | |
5301 | num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r | |
5302 | if (num <= 0) {\r | |
5303 | onig_scan_env_set_error_string(env,\r | |
5304 | ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r | |
5305 | return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
5306 | }\r | |
5307 | if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
5308 | int i;\r | |
5309 | for (i = 0; i < num; i++) {\r | |
5310 | if (backs[i] > env->num_mem ||\r | |
5311 | IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r | |
5312 | return ONIGERR_INVALID_BACKREF;\r | |
5313 | }\r | |
5314 | }\r | |
5315 | \r | |
5316 | tok->type = TK_BACKREF;\r | |
5317 | tok->u.backref.by_name = 1;\r | |
5318 | if (num == 1) {\r | |
5319 | tok->u.backref.num = 1;\r | |
5320 | tok->u.backref.ref1 = backs[0];\r | |
5321 | }\r | |
5322 | else {\r | |
5323 | tok->u.backref.num = num;\r | |
5324 | tok->u.backref.refs = backs;\r | |
5325 | }\r | |
5326 | }\r | |
5327 | }\r | |
5328 | else\r | |
5329 | PUNFETCH;\r | |
14b0e578 CS |
5330 | }\r |
5331 | break;\r | |
14b0e578 | 5332 | \r |
b602265d | 5333 | #ifdef USE_CALL\r |
14b0e578 | 5334 | case 'g':\r |
b602265d DG |
5335 | if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r |
5336 | PFETCH(c);\r | |
5337 | if (c == '<' || c == '\'') {\r | |
5338 | int gnum;\r | |
5339 | UChar* name_end;\r | |
5340 | enum REF_NUM num_type;\r | |
5341 | \r | |
5342 | prev = p;\r | |
5343 | r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,\r | |
5344 | &gnum, &num_type, 1);\r | |
5345 | if (r < 0) return r;\r | |
5346 | \r | |
5347 | if (num_type != IS_NOT_NUM) {\r | |
5348 | if (num_type == IS_REL_NUM) {\r | |
5349 | gnum = backref_rel_to_abs(gnum, env);\r | |
5350 | if (gnum < 0) {\r | |
5351 | onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r | |
5352 | prev, name_end);\r | |
5353 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r | |
5354 | }\r | |
5355 | }\r | |
5356 | tok->u.call.by_number = 1;\r | |
5357 | tok->u.call.gnum = gnum;\r | |
5358 | }\r | |
5359 | else {\r | |
5360 | tok->u.call.by_number = 0;\r | |
5361 | tok->u.call.gnum = 0;\r | |
5362 | }\r | |
5363 | \r | |
5364 | tok->type = TK_CALL;\r | |
5365 | tok->u.call.name = prev;\r | |
5366 | tok->u.call.name_end = name_end;\r | |
5367 | }\r | |
5368 | else\r | |
5369 | PUNFETCH;\r | |
14b0e578 CS |
5370 | }\r |
5371 | break;\r | |
5372 | #endif\r | |
5373 | \r | |
5374 | case 'Q':\r | |
5375 | if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r | |
b602265d | 5376 | tok->type = TK_QUOTE_OPEN;\r |
14b0e578 CS |
5377 | }\r |
5378 | break;\r | |
5379 | \r | |
5380 | case 'p':\r | |
5381 | case 'P':\r | |
b602265d DG |
5382 | if (!PEND && PPEEK_IS('{') &&\r |
5383 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r | |
5384 | PINC;\r | |
5385 | tok->type = TK_CHAR_PROPERTY;\r | |
5386 | tok->u.prop.not = (c == 'P' ? 1 : 0);\r | |
5387 | \r | |
5388 | if (!PEND &&\r | |
5389 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r | |
5390 | PFETCH(c);\r | |
5391 | if (c == '^') {\r | |
5392 | tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r | |
5393 | }\r | |
5394 | else\r | |
5395 | PUNFETCH;\r | |
5396 | }\r | |
14b0e578 CS |
5397 | }\r |
5398 | break;\r | |
5399 | \r | |
5400 | default:\r | |
b602265d DG |
5401 | {\r |
5402 | OnigCodePoint c2;\r | |
5403 | \r | |
5404 | PUNFETCH;\r | |
5405 | num = fetch_escaped_value(&p, end, env, &c2);\r | |
5406 | if (num < 0) return num;\r | |
5407 | /* set_raw: */\r | |
5408 | if (tok->u.c != c2) {\r | |
5409 | tok->type = TK_CODE_POINT;\r | |
5410 | tok->u.code = c2;\r | |
5411 | }\r | |
5412 | else { /* string */\r | |
5413 | p = tok->backp + enclen(enc, tok->backp);\r | |
5414 | }\r | |
14b0e578 CS |
5415 | }\r |
5416 | break;\r | |
5417 | }\r | |
5418 | }\r | |
5419 | else {\r | |
5420 | tok->u.c = c;\r | |
5421 | tok->escaped = 0;\r | |
5422 | \r | |
5423 | #ifdef USE_VARIABLE_META_CHARS\r | |
5424 | if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r | |
b602265d | 5425 | IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r |
14b0e578 | 5426 | if (c == MC_ANYCHAR(syn))\r |
b602265d | 5427 | goto any_char;\r |
14b0e578 | 5428 | else if (c == MC_ANYTIME(syn))\r |
b602265d | 5429 | goto anytime;\r |
14b0e578 | 5430 | else if (c == MC_ZERO_OR_ONE_TIME(syn))\r |
b602265d | 5431 | goto zero_or_one_time;\r |
14b0e578 | 5432 | else if (c == MC_ONE_OR_MORE_TIME(syn))\r |
b602265d | 5433 | goto one_or_more_time;\r |
14b0e578 | 5434 | else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r |
b602265d DG |
5435 | tok->type = TK_ANYCHAR_ANYTIME;\r |
5436 | goto out;\r | |
14b0e578 CS |
5437 | }\r |
5438 | }\r | |
5439 | #endif\r | |
5440 | \r | |
5441 | switch (c) {\r | |
5442 | case '.':\r | |
5443 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r | |
5444 | #ifdef USE_VARIABLE_META_CHARS\r | |
5445 | any_char:\r | |
5446 | #endif\r | |
5447 | tok->type = TK_ANYCHAR;\r | |
5448 | break;\r | |
5449 | \r | |
5450 | case '*':\r | |
5451 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r | |
5452 | #ifdef USE_VARIABLE_META_CHARS\r | |
5453 | anytime:\r | |
5454 | #endif\r | |
5455 | tok->type = TK_OP_REPEAT;\r | |
5456 | tok->u.repeat.lower = 0;\r | |
5457 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
5458 | goto greedy_check;\r | |
5459 | break;\r | |
5460 | \r | |
5461 | case '+':\r | |
5462 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r | |
5463 | #ifdef USE_VARIABLE_META_CHARS\r | |
5464 | one_or_more_time:\r | |
5465 | #endif\r | |
5466 | tok->type = TK_OP_REPEAT;\r | |
5467 | tok->u.repeat.lower = 1;\r | |
5468 | tok->u.repeat.upper = REPEAT_INFINITE;\r | |
5469 | goto greedy_check;\r | |
5470 | break;\r | |
5471 | \r | |
5472 | case '?':\r | |
5473 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r | |
5474 | #ifdef USE_VARIABLE_META_CHARS\r | |
5475 | zero_or_one_time:\r | |
5476 | #endif\r | |
5477 | tok->type = TK_OP_REPEAT;\r | |
5478 | tok->u.repeat.lower = 0;\r | |
5479 | tok->u.repeat.upper = 1;\r | |
5480 | goto greedy_check;\r | |
5481 | break;\r | |
5482 | \r | |
5483 | case '{':\r | |
5484 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r | |
5485 | r = fetch_range_quantifier(&p, end, tok, env);\r | |
5486 | if (r < 0) return r; /* error */\r | |
5487 | if (r == 0) goto greedy_check;\r | |
5488 | else if (r == 2) { /* {n} */\r | |
b602265d DG |
5489 | if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r |
5490 | goto possessive_check;\r | |
14b0e578 | 5491 | \r |
b602265d | 5492 | goto greedy_check;\r |
14b0e578 CS |
5493 | }\r |
5494 | /* r == 1 : normal char */\r | |
5495 | break;\r | |
5496 | \r | |
5497 | case '|':\r | |
5498 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r | |
5499 | tok->type = TK_ALT;\r | |
5500 | break;\r | |
5501 | \r | |
5502 | case '(':\r | |
b602265d | 5503 | if (!PEND && PPEEK_IS('?') &&\r |
14b0e578 CS |
5504 | IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r |
5505 | PINC;\r | |
b602265d DG |
5506 | if (! PEND) {\r |
5507 | c = PPEEK;\r | |
5508 | if (c == '#') {\r | |
14b0e578 | 5509 | PFETCH(c);\r |
b602265d DG |
5510 | while (1) {\r |
5511 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
5512 | PFETCH(c);\r | |
5513 | if (c == MC_ESC(syn)) {\r | |
5514 | if (! PEND) PFETCH(c);\r | |
5515 | }\r | |
5516 | else {\r | |
5517 | if (c == ')') break;\r | |
5518 | }\r | |
14b0e578 | 5519 | }\r |
b602265d DG |
5520 | goto start;\r |
5521 | }\r | |
5522 | else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {\r | |
5523 | int gnum;\r | |
5524 | UChar* name;\r | |
5525 | UChar* name_end;\r | |
5526 | enum REF_NUM num_type;\r | |
5527 | \r | |
5528 | switch (c) {\r | |
5529 | case '&':\r | |
5530 | {\r | |
5531 | PINC;\r | |
5532 | name = p;\r | |
5533 | r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,\r | |
5534 | &num_type, 0);\r | |
5535 | if (r < 0) return r;\r | |
5536 | \r | |
5537 | tok->type = TK_CALL;\r | |
5538 | tok->u.call.by_number = 0;\r | |
5539 | tok->u.call.gnum = 0;\r | |
5540 | tok->u.call.name = name;\r | |
5541 | tok->u.call.name_end = name_end;\r | |
5542 | }\r | |
5543 | break;\r | |
5544 | \r | |
5545 | case 'R':\r | |
5546 | tok->type = TK_CALL;\r | |
5547 | tok->u.call.by_number = 1;\r | |
5548 | tok->u.call.gnum = 0;\r | |
5549 | tok->u.call.name = p;\r | |
5550 | PINC;\r | |
5551 | if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;\r | |
5552 | tok->u.call.name_end = p;\r | |
5553 | break;\r | |
5554 | \r | |
5555 | case '-':\r | |
5556 | case '+':\r | |
5557 | goto lparen_qmark_num;\r | |
5558 | break;\r | |
5559 | default:\r | |
5560 | if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;\r | |
5561 | \r | |
5562 | lparen_qmark_num:\r | |
5563 | {\r | |
5564 | name = p;\r | |
5565 | r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,\r | |
5566 | &gnum, &num_type, 1);\r | |
5567 | if (r < 0) return r;\r | |
5568 | \r | |
5569 | if (num_type == IS_NOT_NUM) {\r | |
5570 | return ONIGERR_INVALID_GROUP_NAME;\r | |
5571 | }\r | |
5572 | else {\r | |
5573 | if (num_type == IS_REL_NUM) {\r | |
5574 | gnum = backref_rel_to_abs(gnum, env);\r | |
5575 | if (gnum < 0) {\r | |
5576 | onig_scan_env_set_error_string(env,\r | |
5577 | ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);\r | |
5578 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r | |
5579 | }\r | |
5580 | }\r | |
5581 | tok->u.call.by_number = 1;\r | |
5582 | tok->u.call.gnum = gnum;\r | |
5583 | }\r | |
5584 | \r | |
5585 | tok->type = TK_CALL;\r | |
5586 | tok->u.call.name = name;\r | |
5587 | tok->u.call.name_end = name_end;\r | |
5588 | }\r | |
5589 | break;\r | |
14b0e578 CS |
5590 | }\r |
5591 | }\r | |
14b0e578 | 5592 | }\r |
b602265d | 5593 | lparen_qmark_end:\r |
14b0e578 CS |
5594 | PUNFETCH;\r |
5595 | }\r | |
5596 | \r | |
5597 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r | |
5598 | tok->type = TK_SUBEXP_OPEN;\r | |
5599 | break;\r | |
5600 | \r | |
5601 | case ')':\r | |
5602 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r | |
5603 | tok->type = TK_SUBEXP_CLOSE;\r | |
5604 | break;\r | |
5605 | \r | |
5606 | case '^':\r | |
5607 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r | |
5608 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5609 | tok->u.subtype = (IS_SINGLELINE(env->options)\r |
5610 | ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r | |
14b0e578 CS |
5611 | break;\r |
5612 | \r | |
5613 | case '$':\r | |
5614 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r | |
5615 | tok->type = TK_ANCHOR;\r | |
b602265d DG |
5616 | tok->u.subtype = (IS_SINGLELINE(env->options)\r |
5617 | ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r | |
14b0e578 CS |
5618 | break;\r |
5619 | \r | |
5620 | case '[':\r | |
5621 | if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r | |
5622 | tok->type = TK_CC_OPEN;\r | |
5623 | break;\r | |
5624 | \r | |
5625 | case ']':\r | |
5626 | if (*src > env->pattern) /* /].../ is allowed. */\r | |
b602265d | 5627 | CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r |
14b0e578 CS |
5628 | break;\r |
5629 | \r | |
5630 | case '#':\r | |
b602265d DG |
5631 | if (IS_EXTEND(env->options)) {\r |
5632 | while (!PEND) {\r | |
5633 | PFETCH(c);\r | |
5634 | if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r | |
5635 | break;\r | |
5636 | }\r | |
5637 | goto start;\r | |
5638 | break;\r | |
14b0e578 CS |
5639 | }\r |
5640 | break;\r | |
5641 | \r | |
5642 | case ' ': case '\t': case '\n': case '\r': case '\f':\r | |
b602265d DG |
5643 | if (IS_EXTEND(env->options))\r |
5644 | goto start;\r | |
14b0e578 CS |
5645 | break;\r |
5646 | \r | |
5647 | default:\r | |
5648 | /* string */\r | |
5649 | break;\r | |
5650 | }\r | |
5651 | }\r | |
5652 | \r | |
5653 | #ifdef USE_VARIABLE_META_CHARS\r | |
5654 | out:\r | |
5655 | #endif\r | |
5656 | *src = p;\r | |
5657 | return tok->type;\r | |
5658 | }\r | |
5659 | \r | |
5660 | static int\r | |
5661 | add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r | |
b602265d DG |
5662 | OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,\r |
5663 | const OnigCodePoint mbr[])\r | |
14b0e578 CS |
5664 | {\r |
5665 | int i, r;\r | |
5666 | OnigCodePoint j;\r | |
5667 | \r | |
5668 | int n = ONIGENC_CODE_RANGE_NUM(mbr);\r | |
5669 | \r | |
5670 | if (not == 0) {\r | |
5671 | for (i = 0; i < n; i++) {\r | |
5672 | for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5673 | j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r | |
b602265d DG |
5674 | if (j >= sb_out) {\r |
5675 | if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
5676 | r = add_code_range_to_buf(&(cc->mbuf), j,\r | |
5677 | ONIGENC_CODE_RANGE_TO(mbr, i));\r | |
5678 | if (r != 0) return r;\r | |
5679 | i++;\r | |
5680 | }\r | |
5681 | \r | |
5682 | goto sb_end;\r | |
5683 | }\r | |
14b0e578 CS |
5684 | BITSET_SET_BIT(cc->bs, j);\r |
5685 | }\r | |
5686 | }\r | |
5687 | \r | |
5688 | sb_end:\r | |
5689 | for ( ; i < n; i++) {\r | |
5690 | r = add_code_range_to_buf(&(cc->mbuf),\r | |
5691 | ONIGENC_CODE_RANGE_FROM(mbr, i),\r | |
5692 | ONIGENC_CODE_RANGE_TO(mbr, i));\r | |
5693 | if (r != 0) return r;\r | |
5694 | }\r | |
5695 | }\r | |
5696 | else {\r | |
5697 | OnigCodePoint prev = 0;\r | |
5698 | \r | |
5699 | for (i = 0; i < n; i++) {\r | |
b602265d DG |
5700 | for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r |
5701 | if (j >= sb_out) {\r | |
5702 | goto sb_end2;\r | |
5703 | }\r | |
5704 | BITSET_SET_BIT(cc->bs, j);\r | |
14b0e578 CS |
5705 | }\r |
5706 | prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r | |
5707 | }\r | |
5708 | for (j = prev; j < sb_out; j++) {\r | |
5709 | BITSET_SET_BIT(cc->bs, j);\r | |
5710 | }\r | |
5711 | \r | |
5712 | sb_end2:\r | |
5713 | prev = sb_out;\r | |
5714 | \r | |
5715 | for (i = 0; i < n; i++) {\r | |
5716 | if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
b602265d | 5717 | r = add_code_range_to_buf(&(cc->mbuf), prev,\r |
14b0e578 | 5718 | ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r |
b602265d | 5719 | if (r != 0) return r;\r |
14b0e578 CS |
5720 | }\r |
5721 | prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r | |
b602265d DG |
5722 | if (prev == 0) goto end;\r |
5723 | }\r | |
5724 | \r | |
5725 | r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r | |
5726 | if (r != 0) return r;\r | |
5727 | }\r | |
5728 | \r | |
5729 | end:\r | |
5730 | return 0;\r | |
5731 | }\r | |
5732 | \r | |
5733 | static int\r | |
5734 | add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,\r | |
5735 | OnigEncoding enc ARG_UNUSED,\r | |
5736 | OnigCodePoint sb_out,\r | |
5737 | const OnigCodePoint mbr[], OnigCodePoint limit)\r | |
5738 | {\r | |
5739 | int i, r;\r | |
5740 | OnigCodePoint j;\r | |
5741 | OnigCodePoint from;\r | |
5742 | OnigCodePoint to;\r | |
5743 | \r | |
5744 | int n = ONIGENC_CODE_RANGE_NUM(mbr);\r | |
5745 | \r | |
5746 | if (not == 0) {\r | |
5747 | for (i = 0; i < n; i++) {\r | |
5748 | for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5749 | j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r | |
5750 | if (j > limit) goto end;\r | |
5751 | if (j >= sb_out) {\r | |
5752 | if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r | |
5753 | to = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5754 | if (to > limit) to = limit;\r | |
5755 | r = add_code_range_to_buf(&(cc->mbuf), j, to);\r | |
5756 | if (r != 0) return r;\r | |
5757 | i++;\r | |
5758 | }\r | |
5759 | \r | |
5760 | goto sb_end;\r | |
5761 | }\r | |
5762 | BITSET_SET_BIT(cc->bs, j);\r | |
5763 | }\r | |
14b0e578 | 5764 | }\r |
b602265d DG |
5765 | \r |
5766 | sb_end:\r | |
5767 | for ( ; i < n; i++) {\r | |
5768 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5769 | to = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5770 | if (from > limit) break;\r | |
5771 | if (to > limit) to = limit;\r | |
5772 | r = add_code_range_to_buf(&(cc->mbuf), from, to);\r | |
14b0e578 CS |
5773 | if (r != 0) return r;\r |
5774 | }\r | |
5775 | }\r | |
b602265d DG |
5776 | else {\r |
5777 | OnigCodePoint prev = 0;\r | |
5778 | \r | |
5779 | for (i = 0; i < n; i++) {\r | |
5780 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5781 | if (from > limit) {\r | |
5782 | for (j = prev; j < sb_out; j++) {\r | |
5783 | BITSET_SET_BIT(cc->bs, j);\r | |
5784 | }\r | |
5785 | goto sb_end2;\r | |
5786 | }\r | |
5787 | for (j = prev; j < from; j++) {\r | |
5788 | if (j >= sb_out) goto sb_end2;\r | |
5789 | BITSET_SET_BIT(cc->bs, j);\r | |
5790 | }\r | |
5791 | prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5792 | if (prev > limit) prev = limit;\r | |
5793 | prev++;\r | |
5794 | if (prev == 0) goto end;\r | |
5795 | }\r | |
5796 | for (j = prev; j < sb_out; j++) {\r | |
5797 | BITSET_SET_BIT(cc->bs, j);\r | |
5798 | }\r | |
5799 | \r | |
5800 | sb_end2:\r | |
5801 | prev = sb_out;\r | |
5802 | \r | |
5803 | for (i = 0; i < n; i++) {\r | |
5804 | from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r | |
5805 | if (from > limit) goto last;\r | |
5806 | \r | |
5807 | if (prev < from) {\r | |
5808 | r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);\r | |
5809 | if (r != 0) return r;\r | |
5810 | }\r | |
5811 | prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r | |
5812 | if (prev > limit) prev = limit;\r | |
5813 | prev++;\r | |
5814 | if (prev == 0) goto end;\r | |
5815 | }\r | |
5816 | \r | |
5817 | last:\r | |
5818 | r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r | |
5819 | if (r != 0) return r;\r | |
5820 | }\r | |
14b0e578 | 5821 | \r |
b602265d | 5822 | end:\r |
14b0e578 CS |
5823 | return 0;\r |
5824 | }\r | |
5825 | \r | |
5826 | static int\r | |
5827 | add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r | |
5828 | {\r | |
b602265d DG |
5829 | #define ASCII_LIMIT 127\r |
5830 | \r | |
14b0e578 | 5831 | int c, r;\r |
b602265d | 5832 | int ascii_mode;\r |
14b0e578 | 5833 | const OnigCodePoint *ranges;\r |
b602265d | 5834 | OnigCodePoint limit;\r |
14b0e578 CS |
5835 | OnigCodePoint sb_out;\r |
5836 | OnigEncoding enc = env->enc;\r | |
5837 | \r | |
b602265d DG |
5838 | ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);\r |
5839 | \r | |
14b0e578 CS |
5840 | r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r |
5841 | if (r == 0) {\r | |
b602265d DG |
5842 | if (ascii_mode == 0)\r |
5843 | r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r | |
5844 | else\r | |
5845 | r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,\r | |
5846 | ranges, ASCII_LIMIT);\r | |
5847 | return r;\r | |
14b0e578 CS |
5848 | }\r |
5849 | else if (r != ONIG_NO_SUPPORT_CONFIG) {\r | |
5850 | return r;\r | |
5851 | }\r | |
5852 | \r | |
5853 | r = 0;\r | |
b602265d DG |
5854 | limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;\r |
5855 | \r | |
14b0e578 CS |
5856 | switch (ctype) {\r |
5857 | case ONIGENC_CTYPE_ALPHA:\r | |
5858 | case ONIGENC_CTYPE_BLANK:\r | |
5859 | case ONIGENC_CTYPE_CNTRL:\r | |
5860 | case ONIGENC_CTYPE_DIGIT:\r | |
5861 | case ONIGENC_CTYPE_LOWER:\r | |
5862 | case ONIGENC_CTYPE_PUNCT:\r | |
5863 | case ONIGENC_CTYPE_SPACE:\r | |
5864 | case ONIGENC_CTYPE_UPPER:\r | |
5865 | case ONIGENC_CTYPE_XDIGIT:\r | |
5866 | case ONIGENC_CTYPE_ASCII:\r | |
5867 | case ONIGENC_CTYPE_ALNUM:\r | |
5868 | if (not != 0) {\r | |
b602265d DG |
5869 | for (c = 0; c < (int )limit; c++) {\r |
5870 | if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5871 | BITSET_SET_BIT(cc->bs, c);\r | |
5872 | }\r | |
5873 | for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r | |
5874 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5875 | }\r |
b602265d | 5876 | \r |
14b0e578 CS |
5877 | ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r |
5878 | }\r | |
5879 | else {\r | |
b602265d DG |
5880 | for (c = 0; c < (int )limit; c++) {\r |
5881 | if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5882 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 CS |
5883 | }\r |
5884 | }\r | |
5885 | break;\r | |
5886 | \r | |
5887 | case ONIGENC_CTYPE_GRAPH:\r | |
5888 | case ONIGENC_CTYPE_PRINT:\r | |
b602265d | 5889 | case ONIGENC_CTYPE_WORD:\r |
14b0e578 | 5890 | if (not != 0) {\r |
b602265d DG |
5891 | for (c = 0; c < (int )limit; c++) {\r |
5892 | if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */\r | |
5893 | && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5894 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5895 | }\r |
b602265d DG |
5896 | for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r |
5897 | if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)\r | |
5898 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5899 | }\r |
14b0e578 CS |
5900 | }\r |
5901 | else {\r | |
b602265d DG |
5902 | for (c = 0; c < (int )limit; c++) {\r |
5903 | if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r | |
5904 | BITSET_SET_BIT(cc->bs, c);\r | |
14b0e578 | 5905 | }\r |
b602265d DG |
5906 | if (ascii_mode == 0)\r |
5907 | ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r | |
14b0e578 CS |
5908 | }\r |
5909 | break;\r | |
5910 | \r | |
5911 | default:\r | |
5912 | return ONIGERR_PARSER_BUG;\r | |
5913 | break;\r | |
5914 | }\r | |
5915 | \r | |
5916 | return r;\r | |
5917 | }\r | |
5918 | \r | |
5919 | static int\r | |
5920 | parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r | |
5921 | {\r | |
5922 | #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r | |
5923 | #define POSIX_BRACKET_NAME_MIN_LEN 4\r | |
5924 | \r | |
5925 | static PosixBracketEntryType PBS[] = {\r | |
5926 | { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r | |
5927 | { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r | |
5928 | { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r | |
5929 | { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r | |
5930 | { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r | |
5931 | { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r | |
5932 | { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r | |
5933 | { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r | |
5934 | { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r | |
5935 | { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r | |
5936 | { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r | |
5937 | { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r | |
5938 | { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r | |
5939 | { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r | |
5940 | { (UChar* )NULL, -1, 0 }\r | |
5941 | };\r | |
5942 | \r | |
5943 | PosixBracketEntryType *pb;\r | |
5944 | int not, i, r;\r | |
5945 | OnigCodePoint c;\r | |
5946 | OnigEncoding enc = env->enc;\r | |
5947 | UChar *p = *src;\r | |
5948 | \r | |
5949 | if (PPEEK_IS('^')) {\r | |
5950 | PINC_S;\r | |
5951 | not = 1;\r | |
5952 | }\r | |
5953 | else\r | |
5954 | not = 0;\r | |
5955 | \r | |
5956 | if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r | |
5957 | goto not_posix_bracket;\r | |
5958 | \r | |
5959 | for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r | |
5960 | if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r | |
5961 | p = (UChar* )onigenc_step(enc, p, end, pb->len);\r | |
5962 | if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r | |
5963 | return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r | |
5964 | \r | |
5965 | r = add_ctype_to_cc(cc, pb->ctype, not, env);\r | |
5966 | if (r != 0) return r;\r | |
5967 | \r | |
5968 | PINC_S; PINC_S;\r | |
5969 | *src = p;\r | |
5970 | return 0;\r | |
5971 | }\r | |
5972 | }\r | |
5973 | \r | |
5974 | not_posix_bracket:\r | |
5975 | c = 0;\r | |
5976 | i = 0;\r | |
5977 | while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r | |
5978 | PINC_S;\r | |
5979 | if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r | |
5980 | }\r | |
5981 | if (c == ':' && ! PEND) {\r | |
5982 | PINC_S;\r | |
5983 | if (! PEND) {\r | |
5984 | PFETCH_S(c);\r | |
5985 | if (c == ']')\r | |
5986 | return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r | |
5987 | }\r | |
5988 | }\r | |
5989 | \r | |
5990 | return 1; /* 1: is not POSIX bracket, but no error. */\r | |
5991 | }\r | |
5992 | \r | |
5993 | static int\r | |
5994 | fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r | |
5995 | {\r | |
5996 | int r;\r | |
5997 | OnigCodePoint c;\r | |
5998 | OnigEncoding enc = env->enc;\r | |
5999 | UChar *prev, *start, *p = *src;\r | |
6000 | \r | |
6001 | r = 0;\r | |
6002 | start = prev = p;\r | |
6003 | \r | |
6004 | while (!PEND) {\r | |
6005 | prev = p;\r | |
6006 | PFETCH_S(c);\r | |
6007 | if (c == '}') {\r | |
6008 | r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r | |
6009 | if (r < 0) break;\r | |
6010 | \r | |
6011 | *src = p;\r | |
6012 | return r;\r | |
6013 | }\r | |
6014 | else if (c == '(' || c == ')' || c == '{' || c == '|') {\r | |
6015 | r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r | |
6016 | break;\r | |
6017 | }\r | |
6018 | }\r | |
6019 | \r | |
6020 | onig_scan_env_set_error_string(env, r, *src, prev);\r | |
6021 | return r;\r | |
6022 | }\r | |
6023 | \r | |
6024 | static int\r | |
b602265d | 6025 | parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r |
14b0e578 CS |
6026 | {\r |
6027 | int r, ctype;\r | |
6028 | CClassNode* cc;\r | |
6029 | \r | |
6030 | ctype = fetch_char_property_to_ctype(src, end, env);\r | |
6031 | if (ctype < 0) return ctype;\r | |
6032 | \r | |
6033 | *np = node_new_cclass();\r | |
6034 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
b602265d | 6035 | cc = CCLASS_(*np);\r |
14b0e578 CS |
6036 | r = add_ctype_to_cc(cc, ctype, 0, env);\r |
6037 | if (r != 0) return r;\r | |
6038 | if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r | |
6039 | \r | |
6040 | return 0;\r | |
6041 | }\r | |
6042 | \r | |
6043 | \r | |
6044 | enum CCSTATE {\r | |
6045 | CCS_VALUE,\r | |
6046 | CCS_RANGE,\r | |
6047 | CCS_COMPLETE,\r | |
6048 | CCS_START\r | |
6049 | };\r | |
6050 | \r | |
6051 | enum CCVALTYPE {\r | |
6052 | CCV_SB,\r | |
6053 | CCV_CODE_POINT,\r | |
6054 | CCV_CLASS\r | |
6055 | };\r | |
6056 | \r | |
6057 | static int\r | |
6058 | next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r | |
b602265d | 6059 | enum CCSTATE* state, ScanEnv* env)\r |
14b0e578 CS |
6060 | {\r |
6061 | int r;\r | |
6062 | \r | |
6063 | if (*state == CCS_RANGE)\r | |
6064 | return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r | |
6065 | \r | |
6066 | if (*state == CCS_VALUE && *type != CCV_CLASS) {\r | |
6067 | if (*type == CCV_SB)\r | |
6068 | BITSET_SET_BIT(cc->bs, (int )(*vs));\r | |
6069 | else if (*type == CCV_CODE_POINT) {\r | |
6070 | r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r | |
6071 | if (r < 0) return r;\r | |
6072 | }\r | |
6073 | }\r | |
6074 | \r | |
6075 | *state = CCS_VALUE;\r | |
6076 | *type = CCV_CLASS;\r | |
6077 | return 0;\r | |
6078 | }\r | |
6079 | \r | |
6080 | static int\r | |
b602265d DG |
6081 | next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,\r |
6082 | int* from_israw, int to_israw,\r | |
6083 | enum CCVALTYPE intype, enum CCVALTYPE* type,\r | |
6084 | enum CCSTATE* state, ScanEnv* env)\r | |
14b0e578 CS |
6085 | {\r |
6086 | int r;\r | |
6087 | \r | |
6088 | switch (*state) {\r | |
6089 | case CCS_VALUE:\r | |
b602265d DG |
6090 | if (*type == CCV_SB) {\r |
6091 | if (*from > 0xff)\r | |
6092 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r | |
6093 | \r | |
6094 | BITSET_SET_BIT(cc->bs, (int )(*from));\r | |
6095 | }\r | |
14b0e578 | 6096 | else if (*type == CCV_CODE_POINT) {\r |
b602265d | 6097 | r = add_code_range(&(cc->mbuf), env, *from, *from);\r |
14b0e578 CS |
6098 | if (r < 0) return r;\r |
6099 | }\r | |
6100 | break;\r | |
6101 | \r | |
6102 | case CCS_RANGE:\r | |
6103 | if (intype == *type) {\r | |
6104 | if (intype == CCV_SB) {\r | |
b602265d | 6105 | if (*from > 0xff || to > 0xff)\r |
14b0e578 CS |
6106 | return ONIGERR_INVALID_CODE_POINT_VALUE;\r |
6107 | \r | |
b602265d DG |
6108 | if (*from > to) {\r |
6109 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
6110 | goto ccs_range_end;\r | |
6111 | else\r | |
6112 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
6113 | }\r | |
6114 | bitset_set_range(cc->bs, (int )*from, (int )to);\r | |
14b0e578 CS |
6115 | }\r |
6116 | else {\r | |
b602265d DG |
6117 | r = add_code_range(&(cc->mbuf), env, *from, to);\r |
6118 | if (r < 0) return r;\r | |
14b0e578 CS |
6119 | }\r |
6120 | }\r | |
6121 | else {\r | |
b602265d DG |
6122 | if (*from > to) {\r |
6123 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r | |
6124 | goto ccs_range_end;\r | |
6125 | else\r | |
6126 | return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r | |
14b0e578 | 6127 | }\r |
b602265d DG |
6128 | bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));\r |
6129 | r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);\r | |
6130 | if (r < 0) return r;\r | |
14b0e578 CS |
6131 | }\r |
6132 | ccs_range_end:\r | |
6133 | *state = CCS_COMPLETE;\r | |
6134 | break;\r | |
6135 | \r | |
6136 | case CCS_COMPLETE:\r | |
6137 | case CCS_START:\r | |
6138 | *state = CCS_VALUE;\r | |
6139 | break;\r | |
6140 | \r | |
6141 | default:\r | |
6142 | break;\r | |
6143 | }\r | |
6144 | \r | |
b602265d DG |
6145 | *from_israw = to_israw;\r |
6146 | *from = to;\r | |
6147 | *type = intype;\r | |
14b0e578 CS |
6148 | return 0;\r |
6149 | }\r | |
6150 | \r | |
6151 | static int\r | |
6152 | code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r | |
b602265d | 6153 | ScanEnv* env)\r |
14b0e578 CS |
6154 | {\r |
6155 | int in_esc;\r | |
6156 | OnigCodePoint code;\r | |
6157 | OnigEncoding enc = env->enc;\r | |
6158 | UChar* p = from;\r | |
6159 | \r | |
6160 | in_esc = 0;\r | |
6161 | while (! PEND) {\r | |
6162 | if (ignore_escaped && in_esc) {\r | |
6163 | in_esc = 0;\r | |
6164 | }\r | |
6165 | else {\r | |
6166 | PFETCH_S(code);\r | |
6167 | if (code == c) return 1;\r | |
6168 | if (code == MC_ESC(env->syntax)) in_esc = 1;\r | |
6169 | }\r | |
6170 | }\r | |
6171 | return 0;\r | |
6172 | }\r | |
6173 | \r | |
6174 | static int\r | |
b602265d | 6175 | parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r |
14b0e578 CS |
6176 | {\r |
6177 | int r, neg, len, fetched, and_start;\r | |
6178 | OnigCodePoint v, vs;\r | |
6179 | UChar *p;\r | |
6180 | Node* node;\r | |
6181 | CClassNode *cc, *prev_cc;\r | |
6182 | CClassNode work_cc;\r | |
6183 | \r | |
6184 | enum CCSTATE state;\r | |
6185 | enum CCVALTYPE val_type, in_type;\r | |
6186 | int val_israw, in_israw;\r | |
6187 | \r | |
14b0e578 | 6188 | *np = NULL_NODE;\r |
b602265d DG |
6189 | env->parse_depth++;\r |
6190 | if (env->parse_depth > ParseDepthLimit)\r | |
6191 | return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r | |
6192 | prev_cc = (CClassNode* )NULL;\r | |
14b0e578 CS |
6193 | r = fetch_token_in_cc(tok, src, end, env);\r |
6194 | if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r | |
6195 | neg = 1;\r | |
6196 | r = fetch_token_in_cc(tok, src, end, env);\r | |
6197 | }\r | |
6198 | else {\r | |
6199 | neg = 0;\r | |
6200 | }\r | |
6201 | \r | |
6202 | if (r < 0) return r;\r | |
6203 | if (r == TK_CC_CLOSE) {\r | |
6204 | if (! code_exist_check((OnigCodePoint )']',\r | |
6205 | *src, env->pattern_end, 1, env))\r | |
6206 | return ONIGERR_EMPTY_CHAR_CLASS;\r | |
6207 | \r | |
6208 | CC_ESC_WARN(env, (UChar* )"]");\r | |
6209 | r = tok->type = TK_CHAR; /* allow []...] */\r | |
6210 | }\r | |
6211 | \r | |
6212 | *np = node = node_new_cclass();\r | |
6213 | CHECK_NULL_RETURN_MEMERR(node);\r | |
b602265d | 6214 | cc = CCLASS_(node);\r |
14b0e578 CS |
6215 | \r |
6216 | and_start = 0;\r | |
6217 | state = CCS_START;\r | |
6218 | p = *src;\r | |
6219 | while (r != TK_CC_CLOSE) {\r | |
6220 | fetched = 0;\r | |
6221 | switch (r) {\r | |
6222 | case TK_CHAR:\r | |
b602265d | 6223 | any_char_in:\r |
14b0e578 CS |
6224 | len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r |
6225 | if (len > 1) {\r | |
b602265d | 6226 | in_type = CCV_CODE_POINT;\r |
14b0e578 CS |
6227 | }\r |
6228 | else if (len < 0) {\r | |
b602265d DG |
6229 | r = len;\r |
6230 | goto err;\r | |
14b0e578 CS |
6231 | }\r |
6232 | else {\r | |
b602265d DG |
6233 | /* sb_char: */\r |
6234 | in_type = CCV_SB;\r | |
14b0e578 CS |
6235 | }\r |
6236 | v = (OnigCodePoint )tok->u.c;\r | |
6237 | in_israw = 0;\r | |
6238 | goto val_entry2;\r | |
6239 | break;\r | |
6240 | \r | |
6241 | case TK_RAW_BYTE:\r | |
6242 | /* tok->base != 0 : octal or hexadec. */\r | |
6243 | if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r | |
b602265d DG |
6244 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r |
6245 | UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r | |
6246 | UChar* psave = p;\r | |
6247 | int i, base = tok->base;\r | |
6248 | \r | |
6249 | buf[0] = tok->u.c;\r | |
6250 | for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r | |
6251 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6252 | if (r < 0) goto err;\r | |
6253 | if (r != TK_RAW_BYTE || tok->base != base) {\r | |
6254 | fetched = 1;\r | |
6255 | break;\r | |
6256 | }\r | |
6257 | buf[i] = tok->u.c;\r | |
6258 | }\r | |
6259 | \r | |
6260 | if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r | |
6261 | r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r | |
6262 | goto err;\r | |
6263 | }\r | |
6264 | \r | |
6265 | len = enclen(env->enc, buf);\r | |
6266 | if (i < len) {\r | |
6267 | r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r | |
6268 | goto err;\r | |
6269 | }\r | |
6270 | else if (i > len) { /* fetch back */\r | |
6271 | p = psave;\r | |
6272 | for (i = 1; i < len; i++) {\r | |
6273 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6274 | }\r | |
6275 | fetched = 0;\r | |
6276 | }\r | |
6277 | \r | |
6278 | if (i == 1) {\r | |
6279 | v = (OnigCodePoint )buf[0];\r | |
6280 | goto raw_single;\r | |
6281 | }\r | |
6282 | else {\r | |
6283 | v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r | |
6284 | in_type = CCV_CODE_POINT;\r | |
6285 | }\r | |
6286 | }\r | |
6287 | else {\r | |
6288 | v = (OnigCodePoint )tok->u.c;\r | |
6289 | raw_single:\r | |
6290 | in_type = CCV_SB;\r | |
6291 | }\r | |
6292 | in_israw = 1;\r | |
6293 | goto val_entry2;\r | |
6294 | break;\r | |
6295 | \r | |
6296 | case TK_CODE_POINT:\r | |
6297 | v = tok->u.code;\r | |
6298 | in_israw = 1;\r | |
6299 | val_entry:\r | |
6300 | len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r | |
6301 | if (len < 0) {\r | |
6302 | r = len;\r | |
6303 | goto err;\r | |
6304 | }\r | |
6305 | in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r | |
6306 | val_entry2:\r | |
6307 | r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r | |
6308 | &state, env);\r | |
6309 | if (r != 0) goto err;\r | |
6310 | break;\r | |
6311 | \r | |
6312 | case TK_POSIX_BRACKET_OPEN:\r | |
6313 | r = parse_posix_bracket(cc, &p, end, env);\r | |
6314 | if (r < 0) goto err;\r | |
6315 | if (r == 1) { /* is not POSIX bracket */\r | |
6316 | CC_ESC_WARN(env, (UChar* )"[");\r | |
6317 | p = tok->backp;\r | |
6318 | v = (OnigCodePoint )tok->u.c;\r | |
6319 | in_israw = 0;\r | |
6320 | goto val_entry;\r | |
6321 | }\r | |
6322 | goto next_class;\r | |
6323 | break;\r | |
6324 | \r | |
6325 | case TK_CHAR_TYPE:\r | |
6326 | r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r | |
6327 | if (r != 0) goto err;\r | |
6328 | \r | |
6329 | next_class:\r | |
6330 | r = next_state_class(cc, &vs, &val_type, &state, env);\r | |
6331 | if (r != 0) goto err;\r | |
6332 | break;\r | |
6333 | \r | |
6334 | case TK_CHAR_PROPERTY:\r | |
6335 | {\r | |
6336 | int ctype = fetch_char_property_to_ctype(&p, end, env);\r | |
6337 | if (ctype < 0) {\r | |
6338 | r = ctype;\r | |
6339 | goto err;\r | |
6340 | }\r | |
6341 | r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r | |
6342 | if (r != 0) goto err;\r | |
6343 | goto next_class;\r | |
6344 | }\r | |
6345 | break;\r | |
6346 | \r | |
6347 | case TK_CC_RANGE:\r | |
6348 | if (state == CCS_VALUE) {\r | |
6349 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6350 | if (r < 0) goto err;\r | |
6351 | fetched = 1;\r | |
6352 | if (r == TK_CC_CLOSE) { /* allow [x-] */\r | |
6353 | range_end_val:\r | |
6354 | v = (OnigCodePoint )'-';\r | |
6355 | in_israw = 0;\r | |
6356 | goto val_entry;\r | |
6357 | }\r | |
6358 | else if (r == TK_CC_AND) {\r | |
6359 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6360 | goto range_end_val;\r | |
6361 | }\r | |
6362 | \r | |
6363 | if (val_type == CCV_CLASS) {\r | |
6364 | r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r | |
6365 | goto err;\r | |
6366 | }\r | |
6367 | \r | |
6368 | state = CCS_RANGE;\r | |
6369 | }\r | |
6370 | else if (state == CCS_START) {\r | |
6371 | /* [-xa] is allowed */\r | |
6372 | v = (OnigCodePoint )tok->u.c;\r | |
6373 | in_israw = 0;\r | |
6374 | \r | |
6375 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6376 | if (r < 0) goto err;\r | |
6377 | fetched = 1;\r | |
6378 | /* [--x] or [a&&-x] is warned. */\r | |
6379 | if (r == TK_CC_RANGE || and_start != 0)\r | |
6380 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6381 | \r | |
6382 | goto val_entry;\r | |
6383 | }\r | |
6384 | else if (state == CCS_RANGE) {\r | |
6385 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6386 | goto any_char_in; /* [!--x] is allowed */\r | |
6387 | }\r | |
6388 | else { /* CCS_COMPLETE */\r | |
6389 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6390 | if (r < 0) goto err;\r | |
6391 | fetched = 1;\r | |
6392 | if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r | |
6393 | else if (r == TK_CC_AND) {\r | |
6394 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6395 | goto range_end_val;\r | |
6396 | }\r | |
6397 | \r | |
6398 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r | |
6399 | CC_ESC_WARN(env, (UChar* )"-");\r | |
6400 | goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */\r | |
6401 | }\r | |
6402 | r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r | |
6403 | goto err;\r | |
6404 | }\r | |
6405 | break;\r | |
6406 | \r | |
6407 | case TK_CC_CC_OPEN: /* [ */\r | |
6408 | {\r | |
6409 | Node *anode;\r | |
6410 | CClassNode* acc;\r | |
6411 | \r | |
6412 | r = parse_char_class(&anode, tok, &p, end, env);\r | |
6413 | if (r != 0) {\r | |
6414 | onig_node_free(anode);\r | |
6415 | goto cc_open_err;\r | |
6416 | }\r | |
6417 | acc = CCLASS_(anode);\r | |
6418 | r = or_cclass(cc, acc, env->enc);\r | |
6419 | onig_node_free(anode);\r | |
6420 | \r | |
6421 | cc_open_err:\r | |
6422 | if (r != 0) goto err;\r | |
6423 | }\r | |
6424 | break;\r | |
6425 | \r | |
6426 | case TK_CC_AND: /* && */\r | |
6427 | {\r | |
6428 | if (state == CCS_VALUE) {\r | |
6429 | r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r | |
6430 | &val_type, &state, env);\r | |
6431 | if (r != 0) goto err;\r | |
6432 | }\r | |
6433 | /* initialize local variables */\r | |
6434 | and_start = 1;\r | |
6435 | state = CCS_START;\r | |
6436 | \r | |
6437 | if (IS_NOT_NULL(prev_cc)) {\r | |
6438 | r = and_cclass(prev_cc, cc, env->enc);\r | |
6439 | if (r != 0) goto err;\r | |
6440 | bbuf_free(cc->mbuf);\r | |
6441 | }\r | |
6442 | else {\r | |
6443 | prev_cc = cc;\r | |
6444 | cc = &work_cc;\r | |
6445 | }\r | |
6446 | initialize_cclass(cc);\r | |
6447 | }\r | |
6448 | break;\r | |
6449 | \r | |
6450 | case TK_EOT:\r | |
6451 | r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r | |
6452 | goto err;\r | |
6453 | break;\r | |
6454 | default:\r | |
6455 | r = ONIGERR_PARSER_BUG;\r | |
6456 | goto err;\r | |
6457 | break;\r | |
6458 | }\r | |
6459 | \r | |
6460 | if (fetched)\r | |
6461 | r = tok->type;\r | |
6462 | else {\r | |
6463 | r = fetch_token_in_cc(tok, &p, end, env);\r | |
6464 | if (r < 0) goto err;\r | |
6465 | }\r | |
6466 | }\r | |
6467 | \r | |
6468 | if (state == CCS_VALUE) {\r | |
6469 | r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r | |
6470 | &val_type, &state, env);\r | |
6471 | if (r != 0) goto err;\r | |
6472 | }\r | |
6473 | \r | |
6474 | if (IS_NOT_NULL(prev_cc)) {\r | |
6475 | r = and_cclass(prev_cc, cc, env->enc);\r | |
6476 | if (r != 0) goto err;\r | |
6477 | bbuf_free(cc->mbuf);\r | |
6478 | cc = prev_cc;\r | |
6479 | }\r | |
6480 | \r | |
6481 | if (neg != 0)\r | |
6482 | NCCLASS_SET_NOT(cc);\r | |
6483 | else\r | |
6484 | NCCLASS_CLEAR_NOT(cc);\r | |
6485 | if (IS_NCCLASS_NOT(cc) &&\r | |
6486 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r | |
6487 | int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r | |
6488 | if (is_empty != 0)\r | |
6489 | BITSET_IS_EMPTY(cc->bs, is_empty);\r | |
6490 | \r | |
6491 | if (is_empty == 0) {\r | |
6492 | #define NEWLINE_CODE 0x0a\r | |
6493 | \r | |
6494 | if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r | |
6495 | if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r | |
6496 | BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r | |
6497 | else\r | |
6498 | add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r | |
6499 | }\r | |
6500 | }\r | |
6501 | }\r | |
6502 | *src = p;\r | |
6503 | env->parse_depth--;\r | |
6504 | return 0;\r | |
6505 | \r | |
6506 | err:\r | |
6507 | if (cc != CCLASS_(*np))\r | |
6508 | bbuf_free(cc->mbuf);\r | |
6509 | return r;\r | |
6510 | }\r | |
6511 | \r | |
6512 | static int parse_subexp(Node** top, OnigToken* tok, int term,\r | |
6513 | UChar** src, UChar* end, ScanEnv* env);\r | |
6514 | \r | |
6515 | #ifdef USE_CALLOUT\r | |
6516 | \r | |
6517 | /* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */\r | |
6518 | static int\r | |
6519 | parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r | |
6520 | {\r | |
6521 | int r;\r | |
6522 | int i;\r | |
6523 | int in;\r | |
6524 | int num;\r | |
6525 | OnigCodePoint c;\r | |
6526 | UChar* code_start;\r | |
6527 | UChar* code_end;\r | |
6528 | UChar* contents;\r | |
6529 | UChar* tag_start;\r | |
6530 | UChar* tag_end;\r | |
6531 | int brace_nest;\r | |
6532 | CalloutListEntry* e;\r | |
6533 | RegexExt* ext;\r | |
6534 | OnigEncoding enc = env->enc;\r | |
6535 | UChar* p = *src;\r | |
6536 | \r | |
6537 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6538 | \r | |
6539 | brace_nest = 0;\r | |
6540 | while (PPEEK_IS('{')) {\r | |
6541 | brace_nest++;\r | |
6542 | PINC_S;\r | |
6543 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6544 | }\r | |
6545 | \r | |
6546 | in = ONIG_CALLOUT_IN_PROGRESS;\r | |
6547 | code_start = p;\r | |
6548 | while (1) {\r | |
6549 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6550 | \r | |
6551 | code_end = p;\r | |
6552 | PFETCH_S(c);\r | |
6553 | if (c == '}') {\r | |
6554 | i = brace_nest;\r | |
6555 | while (i > 0) {\r | |
6556 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6557 | PFETCH_S(c);\r | |
6558 | if (c == '}') i--;\r | |
6559 | else break;\r | |
6560 | }\r | |
6561 | if (i == 0) break;\r | |
6562 | }\r | |
6563 | }\r | |
6564 | \r | |
6565 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6566 | \r | |
6567 | PFETCH_S(c);\r | |
6568 | if (c == '[') {\r | |
6569 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6570 | tag_start = p;\r | |
6571 | while (! PEND) {\r | |
6572 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6573 | tag_end = p;\r | |
6574 | PFETCH_S(c);\r | |
6575 | if (c == ']') break;\r | |
6576 | }\r | |
6577 | if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r | |
6578 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
6579 | \r | |
6580 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6581 | PFETCH_S(c);\r | |
6582 | }\r | |
6583 | else {\r | |
6584 | tag_start = tag_end = 0;\r | |
6585 | }\r | |
6586 | \r | |
6587 | if (c == 'X') {\r | |
6588 | in |= ONIG_CALLOUT_IN_RETRACTION;\r | |
6589 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6590 | PFETCH_S(c);\r | |
6591 | }\r | |
6592 | else if (c == '<') {\r | |
6593 | in = ONIG_CALLOUT_IN_RETRACTION;\r | |
6594 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6595 | PFETCH_S(c);\r | |
6596 | }\r | |
6597 | else if (c == '>') { /* no needs (default) */\r | |
6598 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6599 | PFETCH_S(c);\r | |
6600 | }\r | |
6601 | \r | |
6602 | if (c != cterm)\r | |
6603 | return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6604 | \r | |
6605 | r = reg_callout_list_entry(env, &num);\r | |
6606 | if (r != 0) return r;\r | |
6607 | \r | |
6608 | ext = onig_get_regex_ext(env->reg);\r | |
6609 | if (IS_NULL(ext->pattern)) {\r | |
6610 | r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r | |
6611 | if (r != ONIG_NORMAL) return r;\r | |
6612 | }\r | |
6613 | \r | |
6614 | if (tag_start != tag_end) {\r | |
6615 | r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r | |
6616 | if (r != ONIG_NORMAL) return r;\r | |
6617 | }\r | |
6618 | \r | |
6619 | contents = onigenc_strdup(enc, code_start, code_end);\r | |
6620 | CHECK_NULL_RETURN_MEMERR(contents);\r | |
6621 | \r | |
6622 | r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);\r | |
6623 | if (r != 0) {\r | |
6624 | xfree(contents);\r | |
6625 | return r;\r | |
6626 | }\r | |
6627 | \r | |
6628 | e = onig_reg_callout_list_at(env->reg, num);\r | |
6629 | e->of = ONIG_CALLOUT_OF_CONTENTS;\r | |
6630 | e->in = in;\r | |
6631 | e->name_id = ONIG_NON_NAME_ID;\r | |
6632 | e->u.content.start = contents;\r | |
6633 | e->u.content.end = contents + (code_end - code_start);\r | |
6634 | \r | |
6635 | *src = p;\r | |
6636 | return 0;\r | |
6637 | }\r | |
6638 | \r | |
6639 | static long\r | |
6640 | parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)\r | |
6641 | {\r | |
6642 | long v;\r | |
6643 | long d;\r | |
6644 | int flag;\r | |
6645 | UChar* p;\r | |
6646 | OnigCodePoint c;\r | |
6647 | \r | |
6648 | if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6649 | \r | |
6650 | flag = 1;\r | |
6651 | v = 0;\r | |
6652 | p = s;\r | |
6653 | while (p < end) {\r | |
6654 | c = ONIGENC_MBC_TO_CODE(enc, p, end);\r | |
6655 | p += ONIGENC_MBC_ENC_LEN(enc, p);\r | |
6656 | if (c >= '0' && c <= '9') {\r | |
6657 | d = (long )(c - '0');\r | |
6658 | if (v > (max - d) / 10)\r | |
6659 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6660 | \r | |
6661 | v = v * 10 + d;\r | |
6662 | }\r | |
6663 | else if (sign_on != 0 && (c == '-' || c == '+')) {\r | |
6664 | if (c == '-') flag = -1;\r | |
6665 | }\r | |
6666 | else\r | |
6667 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6668 | \r | |
6669 | sign_on = 0;\r | |
6670 | }\r | |
6671 | \r | |
6672 | *rl = flag * v;\r | |
6673 | return ONIG_NORMAL;\r | |
6674 | }\r | |
6675 | \r | |
6676 | static int\r | |
6677 | parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,\r | |
6678 | unsigned int types[], OnigValue vals[], ScanEnv* env)\r | |
6679 | {\r | |
6680 | #define MAX_CALLOUT_ARG_BYTE_LENGTH 128\r | |
6681 | \r | |
6682 | int r;\r | |
6683 | int n;\r | |
6684 | int esc;\r | |
6685 | int cn;\r | |
6686 | UChar* s;\r | |
6687 | UChar* e;\r | |
6688 | UChar* eesc;\r | |
6689 | OnigCodePoint c;\r | |
6690 | UChar* bufend;\r | |
6691 | UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];\r | |
6692 | OnigEncoding enc = env->enc;\r | |
6693 | UChar* p = *src;\r | |
6694 | \r | |
6695 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6696 | \r | |
6697 | n = 0;\r | |
6698 | while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {\r | |
6699 | c = 0;\r | |
6700 | cn = 0;\r | |
6701 | esc = 0;\r | |
6702 | eesc = 0;\r | |
6703 | bufend = buf;\r | |
6704 | s = e = p;\r | |
6705 | while (1) {\r | |
6706 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6707 | \r | |
6708 | e = p;\r | |
6709 | PFETCH_S(c);\r | |
6710 | if (esc != 0) {\r | |
6711 | esc = 0;\r | |
6712 | if (c == '\\' || c == cterm || c == ',') {\r | |
6713 | /* */\r | |
6714 | }\r | |
6715 | else {\r | |
6716 | e = eesc;\r | |
6717 | cn++;\r | |
6718 | }\r | |
6719 | goto add_char;\r | |
14b0e578 CS |
6720 | }\r |
6721 | else {\r | |
b602265d DG |
6722 | if (c == '\\') {\r |
6723 | esc = 1;\r | |
6724 | eesc = e;\r | |
6725 | }\r | |
6726 | else if (c == cterm || c == ',')\r | |
6727 | break;\r | |
6728 | else {\r | |
6729 | size_t clen;\r | |
14b0e578 | 6730 | \r |
b602265d DG |
6731 | add_char:\r |
6732 | if (skip_mode == 0) {\r | |
6733 | clen = p - e;\r | |
6734 | if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)\r | |
6735 | return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */\r | |
14b0e578 | 6736 | \r |
b602265d DG |
6737 | xmemcpy(bufend, e, clen);\r |
6738 | bufend += clen;\r | |
6739 | }\r | |
6740 | cn++;\r | |
6741 | }\r | |
14b0e578 | 6742 | }\r |
b602265d | 6743 | }\r |
14b0e578 | 6744 | \r |
b602265d DG |
6745 | if (cn != 0) {\r |
6746 | if (skip_mode == 0) {\r | |
6747 | if ((types[n] & ONIG_TYPE_LONG) != 0) {\r | |
6748 | int fixed = 0;\r | |
6749 | if (cn > 0) {\r | |
6750 | long rl;\r | |
6751 | r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);\r | |
6752 | if (r == ONIG_NORMAL) {\r | |
6753 | vals[n].l = rl;\r | |
6754 | fixed = 1;\r | |
6755 | types[n] = ONIG_TYPE_LONG;\r | |
6756 | }\r | |
6757 | }\r | |
14b0e578 | 6758 | \r |
b602265d DG |
6759 | if (fixed == 0) {\r |
6760 | types[n] = (types[n] & ~ONIG_TYPE_LONG);\r | |
6761 | if (types[n] == ONIG_TYPE_VOID)\r | |
6762 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6763 | }\r | |
6764 | }\r | |
14b0e578 | 6765 | \r |
b602265d DG |
6766 | switch (types[n]) {\r |
6767 | case ONIG_TYPE_LONG:\r | |
6768 | break;\r | |
14b0e578 | 6769 | \r |
b602265d DG |
6770 | case ONIG_TYPE_CHAR:\r |
6771 | if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;\r | |
6772 | vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);\r | |
6773 | break;\r | |
14b0e578 | 6774 | \r |
b602265d DG |
6775 | case ONIG_TYPE_STRING:\r |
6776 | {\r | |
6777 | UChar* rs = onigenc_strdup(enc, buf, bufend);\r | |
6778 | CHECK_NULL_RETURN_MEMERR(rs);\r | |
6779 | vals[n].s.start = rs;\r | |
6780 | vals[n].s.end = rs + (e - s);\r | |
6781 | }\r | |
6782 | break;\r | |
14b0e578 | 6783 | \r |
b602265d DG |
6784 | case ONIG_TYPE_TAG:\r |
6785 | if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))\r | |
6786 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
14b0e578 | 6787 | \r |
b602265d DG |
6788 | vals[n].s.start = s;\r |
6789 | vals[n].s.end = e;\r | |
6790 | break;\r | |
6791 | \r | |
6792 | case ONIG_TYPE_VOID:\r | |
6793 | case ONIG_TYPE_POINTER:\r | |
6794 | return ONIGERR_PARSER_BUG;\r | |
6795 | break;\r | |
6796 | }\r | |
14b0e578 | 6797 | }\r |
14b0e578 | 6798 | \r |
b602265d DG |
6799 | n++;\r |
6800 | }\r | |
14b0e578 | 6801 | \r |
b602265d DG |
6802 | if (c == cterm) break;\r |
6803 | }\r | |
14b0e578 | 6804 | \r |
b602265d | 6805 | if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;\r |
14b0e578 | 6806 | \r |
b602265d DG |
6807 | *src = p;\r |
6808 | return n;\r | |
6809 | }\r | |
14b0e578 | 6810 | \r |
b602265d DG |
6811 | /* (*name[TAG]) (*name[TAG]{a,b,..}) */\r |
6812 | static int\r | |
6813 | parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r | |
6814 | {\r | |
6815 | int r;\r | |
6816 | int i;\r | |
6817 | int in;\r | |
6818 | int num;\r | |
6819 | int name_id;\r | |
6820 | int arg_num;\r | |
6821 | int max_arg_num;\r | |
6822 | int opt_arg_num;\r | |
6823 | int is_not_single;\r | |
6824 | OnigCodePoint c;\r | |
6825 | UChar* name_start;\r | |
6826 | UChar* name_end;\r | |
6827 | UChar* tag_start;\r | |
6828 | UChar* tag_end;\r | |
6829 | Node* node;\r | |
6830 | CalloutListEntry* e;\r | |
6831 | RegexExt* ext;\r | |
6832 | unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
6833 | OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];\r | |
6834 | OnigEncoding enc = env->enc;\r | |
6835 | UChar* p = *src;\r | |
14b0e578 | 6836 | \r |
b602265d DG |
6837 | /* PFETCH_READY; */\r |
6838 | if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
6839 | \r | |
6840 | node = 0;\r | |
6841 | name_start = p;\r | |
6842 | while (1) {\r | |
6843 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6844 | name_end = p;\r | |
6845 | PFETCH_S(c);\r | |
6846 | if (c == cterm || c == '[' || c == '{') break;\r | |
6847 | }\r | |
6848 | \r | |
6849 | if (! is_allowed_callout_name(enc, name_start, name_end))\r | |
6850 | return ONIGERR_INVALID_CALLOUT_NAME;\r | |
6851 | \r | |
6852 | if (c == '[') {\r | |
6853 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6854 | tag_start = p;\r | |
6855 | while (! PEND) {\r | |
6856 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6857 | tag_end = p;\r | |
6858 | PFETCH_S(c);\r | |
6859 | if (c == ']') break;\r | |
14b0e578 | 6860 | }\r |
b602265d DG |
6861 | if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r |
6862 | return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r | |
6863 | \r | |
6864 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6865 | PFETCH_S(c);\r | |
6866 | }\r | |
6867 | else {\r | |
6868 | tag_start = tag_end = 0;\r | |
14b0e578 CS |
6869 | }\r |
6870 | \r | |
b602265d DG |
6871 | if (c == '{') {\r |
6872 | UChar* save;\r | |
6873 | \r | |
6874 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6875 | \r | |
6876 | /* read for single check only */\r | |
6877 | save = p;\r | |
6878 | arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);\r | |
6879 | if (arg_num < 0) return arg_num;\r | |
6880 | \r | |
6881 | is_not_single = PPEEK_IS(cterm) ? 0 : 1;\r | |
6882 | p = save;\r | |
6883 | r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r | |
6884 | &name_id);\r | |
6885 | if (r != ONIG_NORMAL) return r;\r | |
6886 | \r | |
6887 | max_arg_num = get_callout_arg_num_by_name_id(name_id);\r | |
6888 | for (i = 0; i < max_arg_num; i++) {\r | |
6889 | types[i] = get_callout_arg_type_by_name_id(name_id, i);\r | |
6890 | }\r | |
6891 | \r | |
6892 | arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);\r | |
6893 | if (arg_num < 0) return arg_num;\r | |
6894 | \r | |
6895 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6896 | PFETCH_S(c);\r | |
14b0e578 | 6897 | }\r |
b602265d DG |
6898 | else {\r |
6899 | arg_num = 0;\r | |
14b0e578 | 6900 | \r |
b602265d DG |
6901 | is_not_single = 0;\r |
6902 | r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r | |
6903 | &name_id);\r | |
6904 | if (r != ONIG_NORMAL) return r;\r | |
6905 | \r | |
6906 | max_arg_num = get_callout_arg_num_by_name_id(name_id);\r | |
6907 | for (i = 0; i < max_arg_num; i++) {\r | |
6908 | types[i] = get_callout_arg_type_by_name_id(name_id, i);\r | |
6909 | }\r | |
14b0e578 CS |
6910 | }\r |
6911 | \r | |
b602265d DG |
6912 | in = onig_get_callout_in_by_name_id(name_id);\r |
6913 | opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);\r | |
6914 | if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))\r | |
6915 | return ONIGERR_INVALID_CALLOUT_ARG;\r | |
14b0e578 | 6916 | \r |
b602265d DG |
6917 | if (c != cterm)\r |
6918 | return ONIGERR_INVALID_CALLOUT_PATTERN;\r | |
14b0e578 | 6919 | \r |
b602265d DG |
6920 | r = reg_callout_list_entry(env, &num);\r |
6921 | if (r != 0) return r;\r | |
14b0e578 | 6922 | \r |
b602265d DG |
6923 | ext = onig_get_regex_ext(env->reg);\r |
6924 | if (IS_NULL(ext->pattern)) {\r | |
6925 | r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r | |
6926 | if (r != ONIG_NORMAL) return r;\r | |
6927 | }\r | |
6928 | \r | |
6929 | if (tag_start != tag_end) {\r | |
6930 | r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r | |
6931 | if (r != ONIG_NORMAL) return r;\r | |
6932 | }\r | |
6933 | \r | |
6934 | r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);\r | |
6935 | if (r != ONIG_NORMAL) return r;\r | |
6936 | \r | |
6937 | e = onig_reg_callout_list_at(env->reg, num);\r | |
6938 | e->of = ONIG_CALLOUT_OF_NAME;\r | |
6939 | e->in = in;\r | |
6940 | e->name_id = name_id;\r | |
6941 | e->type = onig_get_callout_type_by_name_id(name_id);\r | |
6942 | e->start_func = onig_get_callout_start_func_by_name_id(name_id);\r | |
6943 | e->end_func = onig_get_callout_end_func_by_name_id(name_id);\r | |
6944 | e->u.arg.num = max_arg_num;\r | |
6945 | e->u.arg.passed_num = arg_num;\r | |
6946 | for (i = 0; i < max_arg_num; i++) {\r | |
6947 | e->u.arg.types[i] = types[i];\r | |
6948 | if (i < arg_num)\r | |
6949 | e->u.arg.vals[i] = vals[i];\r | |
6950 | else\r | |
6951 | e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);\r | |
14b0e578 | 6952 | }\r |
b602265d DG |
6953 | \r |
6954 | *np = node;\r | |
14b0e578 CS |
6955 | *src = p;\r |
6956 | return 0;\r | |
14b0e578 | 6957 | }\r |
b602265d | 6958 | #endif\r |
14b0e578 CS |
6959 | \r |
6960 | static int\r | |
b602265d DG |
6961 | parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r |
6962 | ScanEnv* env)\r | |
14b0e578 CS |
6963 | {\r |
6964 | int r, num;\r | |
6965 | Node *target;\r | |
6966 | OnigOptionType option;\r | |
6967 | OnigCodePoint c;\r | |
b602265d | 6968 | int list_capture;\r |
14b0e578 CS |
6969 | OnigEncoding enc = env->enc;\r |
6970 | \r | |
b602265d DG |
6971 | UChar* p = *src;\r |
6972 | PFETCH_READY;\r | |
6973 | \r | |
6974 | *np = NULL;\r | |
6975 | if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
6976 | \r | |
6977 | option = env->options;\r | |
6978 | c = PPEEK;\r | |
6979 | if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r | |
6980 | PINC;\r | |
6981 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
6982 | \r | |
6983 | PFETCH(c);\r | |
6984 | switch (c) {\r | |
6985 | case ':': /* (?:...) grouping only */\r | |
6986 | group:\r | |
6987 | r = fetch_token(tok, &p, end, env);\r | |
6988 | if (r < 0) return r;\r | |
6989 | r = parse_subexp(np, tok, term, &p, end, env);\r | |
6990 | if (r < 0) return r;\r | |
6991 | *src = p;\r | |
6992 | return 1; /* group */\r | |
6993 | break;\r | |
6994 | \r | |
6995 | case '=':\r | |
6996 | *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);\r | |
6997 | break;\r | |
6998 | case '!': /* preceding read */\r | |
6999 | *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);\r | |
7000 | break;\r | |
7001 | case '>': /* (?>...) stop backtrack */\r | |
7002 | *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
7003 | break;\r | |
7004 | \r | |
7005 | case '\'':\r | |
7006 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r | |
7007 | goto named_group1;\r | |
7008 | }\r | |
7009 | else\r | |
7010 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7011 | break;\r | |
7012 | \r | |
7013 | case '<': /* look behind (?<=...), (?<!...) */\r | |
7014 | if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
7015 | PFETCH(c);\r | |
7016 | if (c == '=')\r | |
7017 | *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);\r | |
7018 | else if (c == '!')\r | |
7019 | *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);\r | |
7020 | else {\r | |
7021 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r | |
7022 | UChar *name;\r | |
7023 | UChar *name_end;\r | |
7024 | enum REF_NUM num_type;\r | |
7025 | \r | |
7026 | PUNFETCH;\r | |
7027 | c = '<';\r | |
7028 | \r | |
7029 | named_group1:\r | |
7030 | list_capture = 0;\r | |
7031 | \r | |
7032 | named_group2:\r | |
7033 | name = p;\r | |
7034 | r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,\r | |
7035 | &num_type, 0);\r | |
7036 | if (r < 0) return r;\r | |
7037 | \r | |
7038 | num = scan_env_add_mem_entry(env);\r | |
7039 | if (num < 0) return num;\r | |
7040 | if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)\r | |
7041 | return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r | |
7042 | \r | |
7043 | r = name_add(env->reg, name, name_end, num, env);\r | |
7044 | if (r != 0) return r;\r | |
7045 | *np = node_new_memory(1);\r | |
7046 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7047 | ENCLOSURE_(*np)->m.regnum = num;\r | |
7048 | if (list_capture != 0)\r | |
7049 | MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r | |
7050 | env->num_named++;\r | |
7051 | }\r | |
7052 | else {\r | |
7053 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7054 | }\r | |
7055 | }\r | |
7056 | break;\r | |
7057 | \r | |
7058 | case '~':\r | |
7059 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {\r | |
7060 | Node* absent;\r | |
7061 | Node* expr;\r | |
7062 | int head_bar;\r | |
7063 | int is_range_cutter;\r | |
7064 | \r | |
7065 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7066 | \r | |
7067 | if (PPEEK_IS('|')) { /* (?~|generator|absent) */\r | |
7068 | PINC;\r | |
7069 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7070 | \r | |
7071 | head_bar = 1;\r | |
7072 | if (PPEEK_IS(')')) { /* (?~|) : range clear */\r | |
7073 | PINC;\r | |
7074 | r = make_range_clear(np, env);\r | |
7075 | if (r != 0) return r;\r | |
7076 | goto end;\r | |
7077 | }\r | |
7078 | }\r | |
7079 | else\r | |
7080 | head_bar = 0;\r | |
7081 | \r | |
7082 | r = fetch_token(tok, &p, end, env);\r | |
7083 | if (r < 0) return r;\r | |
7084 | r = parse_subexp(&absent, tok, term, &p, end, env);\r | |
7085 | if (r < 0) {\r | |
7086 | onig_node_free(absent);\r | |
7087 | return r;\r | |
7088 | }\r | |
7089 | \r | |
7090 | expr = NULL_NODE;\r | |
7091 | is_range_cutter = 0;\r | |
7092 | if (head_bar != 0) {\r | |
7093 | Node* top = absent;\r | |
7094 | if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {\r | |
7095 | expr = NULL_NODE;\r | |
7096 | is_range_cutter = 1;\r | |
7097 | /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */\r | |
7098 | }\r | |
7099 | else {\r | |
7100 | absent = NODE_CAR(top);\r | |
7101 | expr = NODE_CDR(top);\r | |
7102 | NODE_CAR(top) = NULL_NODE;\r | |
7103 | NODE_CDR(top) = NULL_NODE;\r | |
7104 | onig_node_free(top);\r | |
7105 | if (IS_NULL(NODE_CDR(expr))) {\r | |
7106 | top = expr;\r | |
7107 | expr = NODE_CAR(top);\r | |
7108 | NODE_CAR(top) = NULL_NODE;\r | |
7109 | onig_node_free(top);\r | |
7110 | }\r | |
7111 | }\r | |
7112 | }\r | |
7113 | \r | |
7114 | r = make_absent_tree(np, absent, expr, is_range_cutter, env);\r | |
7115 | if (r != 0) {\r | |
7116 | return r;\r | |
7117 | }\r | |
7118 | goto end;\r | |
7119 | }\r | |
7120 | else {\r | |
7121 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7122 | }\r | |
7123 | break;\r | |
7124 | \r | |
7125 | #ifdef USE_CALLOUT\r | |
7126 | case '{':\r | |
7127 | if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))\r | |
7128 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7129 | \r | |
7130 | r = parse_callout_of_contents(np, ')', &p, end, env);\r | |
7131 | if (r != 0) return r;\r | |
7132 | \r | |
7133 | goto end;\r | |
7134 | break;\r | |
7135 | #endif\r | |
7136 | \r | |
7137 | case '(':\r | |
7138 | /* (?()...) */\r | |
7139 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {\r | |
7140 | UChar *prev;\r | |
7141 | Node* condition;\r | |
7142 | int condition_is_checker;\r | |
7143 | \r | |
7144 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7145 | PFETCH(c);\r | |
7146 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7147 | \r | |
7148 | if (IS_CODE_DIGIT_ASCII(enc, c)\r | |
7149 | || c == '-' || c == '+' || c == '<' || c == '\'') {\r | |
7150 | UChar* name_end;\r | |
7151 | int back_num;\r | |
7152 | int exist_level;\r | |
7153 | int level;\r | |
7154 | enum REF_NUM num_type;\r | |
7155 | int is_enclosed;\r | |
7156 | \r | |
7157 | is_enclosed = (c == '<' || c == '\'') ? 1 : 0;\r | |
7158 | if (! is_enclosed)\r | |
7159 | PUNFETCH;\r | |
7160 | prev = p;\r | |
7161 | exist_level = 0;\r | |
7162 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7163 | name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r | |
7164 | r = fetch_name_with_level(\r | |
7165 | (OnigCodePoint )(is_enclosed != 0 ? c : '('),\r | |
7166 | &p, end, &name_end,\r | |
7167 | env, &back_num, &level, &num_type);\r | |
7168 | if (r == 1) exist_level = 1;\r | |
7169 | #else\r | |
7170 | r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),\r | |
7171 | &p, end, &name_end, env, &back_num, &num_type, 1);\r | |
7172 | #endif\r | |
7173 | if (r < 0) {\r | |
7174 | if (is_enclosed == 0) {\r | |
7175 | goto any_condition;\r | |
7176 | }\r | |
7177 | else\r | |
7178 | return r;\r | |
7179 | }\r | |
7180 | \r | |
7181 | condition_is_checker = 1;\r | |
7182 | if (num_type != IS_NOT_NUM) {\r | |
7183 | if (num_type == IS_REL_NUM) {\r | |
7184 | back_num = backref_rel_to_abs(back_num, env);\r | |
7185 | }\r | |
7186 | if (back_num <= 0)\r | |
7187 | return ONIGERR_INVALID_BACKREF;\r | |
7188 | \r | |
7189 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
7190 | if (back_num > env->num_mem ||\r | |
7191 | IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r | |
7192 | return ONIGERR_INVALID_BACKREF;\r | |
7193 | }\r | |
7194 | \r | |
7195 | condition = node_new_backref_checker(1, &back_num, 0,\r | |
7196 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7197 | exist_level, level,\r | |
7198 | #endif\r | |
7199 | env);\r | |
7200 | }\r | |
7201 | else {\r | |
7202 | int num;\r | |
7203 | int* backs;\r | |
7204 | \r | |
7205 | num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r | |
7206 | if (num <= 0) {\r | |
7207 | onig_scan_env_set_error_string(env,\r | |
7208 | ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r | |
7209 | return ONIGERR_UNDEFINED_NAME_REFERENCE;\r | |
7210 | }\r | |
7211 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r | |
7212 | int i;\r | |
7213 | for (i = 0; i < num; i++) {\r | |
7214 | if (backs[i] > env->num_mem ||\r | |
7215 | IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r | |
7216 | return ONIGERR_INVALID_BACKREF;\r | |
7217 | }\r | |
7218 | }\r | |
7219 | \r | |
7220 | condition = node_new_backref_checker(num, backs, 1,\r | |
7221 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
7222 | exist_level, level,\r | |
7223 | #endif\r | |
7224 | env);\r | |
7225 | }\r | |
7226 | \r | |
7227 | if (is_enclosed != 0) {\r | |
7228 | if (PEND) goto err_if_else;\r | |
7229 | PFETCH(c);\r | |
7230 | if (c != ')') goto err_if_else;\r | |
7231 | }\r | |
7232 | }\r | |
7233 | #ifdef USE_CALLOUT\r | |
7234 | else if (c == '?') {\r | |
7235 | if (IS_SYNTAX_OP2(env->syntax,\r | |
7236 | ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {\r | |
7237 | if (! PEND && PPEEK_IS('{')) {\r | |
7238 | /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */\r | |
7239 | condition_is_checker = 0;\r | |
7240 | PFETCH(c);\r | |
7241 | r = parse_callout_of_contents(&condition, ')', &p, end, env);\r | |
7242 | if (r != 0) return r;\r | |
7243 | goto end_condition;\r | |
7244 | }\r | |
7245 | }\r | |
7246 | goto any_condition;\r | |
7247 | }\r | |
7248 | else if (c == '*' &&\r | |
7249 | IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r | |
7250 | condition_is_checker = 0;\r | |
7251 | r = parse_callout_of_name(&condition, ')', &p, end, env);\r | |
7252 | if (r != 0) return r;\r | |
7253 | goto end_condition;\r | |
7254 | }\r | |
14b0e578 | 7255 | #endif\r |
b602265d DG |
7256 | else {\r |
7257 | any_condition:\r | |
7258 | PUNFETCH;\r | |
7259 | condition_is_checker = 0;\r | |
7260 | r = fetch_token(tok, &p, end, env);\r | |
7261 | if (r < 0) return r;\r | |
7262 | r = parse_subexp(&condition, tok, term, &p, end, env);\r | |
7263 | if (r < 0) {\r | |
7264 | onig_node_free(condition);\r | |
7265 | return r;\r | |
7266 | }\r | |
7267 | }\r | |
14b0e578 | 7268 | \r |
b602265d DG |
7269 | end_condition:\r |
7270 | CHECK_NULL_RETURN_MEMERR(condition);\r | |
14b0e578 | 7271 | \r |
b602265d DG |
7272 | if (PEND) {\r |
7273 | err_if_else:\r | |
7274 | onig_node_free(condition);\r | |
7275 | return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7276 | }\r | |
14b0e578 | 7277 | \r |
b602265d DG |
7278 | if (PPEEK_IS(')')) { /* case: empty body: make backref checker */\r |
7279 | if (condition_is_checker == 0) {\r | |
7280 | onig_node_free(condition);\r | |
7281 | return ONIGERR_INVALID_IF_ELSE_SYNTAX;\r | |
7282 | }\r | |
7283 | PFETCH(c);\r | |
7284 | *np = condition;\r | |
7285 | }\r | |
7286 | else { /* if-else */\r | |
7287 | int then_is_empty;\r | |
7288 | Node *Then, *Else;\r | |
14b0e578 | 7289 | \r |
b602265d DG |
7290 | if (PPEEK_IS('|')) {\r |
7291 | PFETCH(c);\r | |
7292 | Then = 0;\r | |
7293 | then_is_empty = 1;\r | |
7294 | }\r | |
7295 | else\r | |
7296 | then_is_empty = 0;\r | |
14b0e578 | 7297 | \r |
b602265d DG |
7298 | r = fetch_token(tok, &p, end, env);\r |
7299 | if (r < 0) {\r | |
7300 | onig_node_free(condition);\r | |
7301 | return r;\r | |
7302 | }\r | |
7303 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
7304 | if (r < 0) {\r | |
7305 | onig_node_free(condition);\r | |
7306 | onig_node_free(target);\r | |
7307 | return r;\r | |
7308 | }\r | |
14b0e578 | 7309 | \r |
b602265d DG |
7310 | if (then_is_empty != 0) {\r |
7311 | Else = target;\r | |
7312 | }\r | |
7313 | else {\r | |
7314 | if (NODE_TYPE(target) == NODE_ALT) {\r | |
7315 | Then = NODE_CAR(target);\r | |
7316 | if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {\r | |
7317 | Else = NODE_CAR(NODE_CDR(target));\r | |
7318 | cons_node_free_alone(NODE_CDR(target));\r | |
7319 | }\r | |
7320 | else {\r | |
7321 | Else = NODE_CDR(target);\r | |
7322 | }\r | |
7323 | cons_node_free_alone(target);\r | |
7324 | }\r | |
7325 | else {\r | |
7326 | Then = target;\r | |
7327 | Else = 0;\r | |
7328 | }\r | |
7329 | }\r | |
14b0e578 | 7330 | \r |
b602265d DG |
7331 | *np = node_new_enclosure_if_else(condition, Then, Else);\r |
7332 | if (IS_NULL(*np)) {\r | |
7333 | onig_node_free(condition);\r | |
7334 | onig_node_free(Then);\r | |
7335 | onig_node_free(Else);\r | |
7336 | return ONIGERR_MEMORY;\r | |
7337 | }\r | |
7338 | }\r | |
7339 | goto end;\r | |
14b0e578 | 7340 | }\r |
14b0e578 | 7341 | else {\r |
b602265d | 7342 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r |
14b0e578 | 7343 | }\r |
14b0e578 CS |
7344 | break;\r |
7345 | \r | |
7346 | case '@':\r | |
7347 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r | |
b602265d DG |
7348 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r |
7349 | PFETCH(c);\r | |
7350 | if (c == '<' || c == '\'') {\r | |
7351 | list_capture = 1;\r | |
7352 | goto named_group2; /* (?@<name>...) */\r | |
7353 | }\r | |
7354 | PUNFETCH;\r | |
7355 | }\r | |
7356 | \r | |
7357 | *np = node_new_memory(0);\r | |
7358 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7359 | num = scan_env_add_mem_entry(env);\r | |
7360 | if (num < 0) {\r | |
7361 | return num;\r | |
7362 | }\r | |
7363 | else if (num >= (int )MEM_STATUS_BITS_NUM) {\r | |
7364 | return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r | |
7365 | }\r | |
7366 | ENCLOSURE_(*np)->m.regnum = num;\r | |
7367 | MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r | |
14b0e578 CS |
7368 | }\r |
7369 | else {\r | |
b602265d | 7370 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r |
14b0e578 CS |
7371 | }\r |
7372 | break;\r | |
7373 | \r | |
7374 | #ifdef USE_POSIXLINE_OPTION\r | |
7375 | case 'p':\r | |
7376 | #endif\r | |
7377 | case '-': case 'i': case 'm': case 's': case 'x':\r | |
b602265d | 7378 | case 'W': case 'D': case 'S': case 'P':\r |
14b0e578 | 7379 | {\r |
b602265d DG |
7380 | int neg = 0;\r |
7381 | \r | |
7382 | while (1) {\r | |
7383 | switch (c) {\r | |
7384 | case ':':\r | |
7385 | case ')':\r | |
7386 | break;\r | |
7387 | \r | |
7388 | case '-': neg = 1; break;\r | |
7389 | case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;\r | |
7390 | case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;\r | |
7391 | case 's':\r | |
7392 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r | |
7393 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r | |
7394 | }\r | |
7395 | else\r | |
7396 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7397 | break;\r | |
7398 | \r | |
7399 | case 'm':\r | |
7400 | if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r | |
7401 | OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r | |
7402 | }\r | |
7403 | else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r | |
7404 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r | |
7405 | }\r | |
7406 | else\r | |
7407 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7408 | break;\r | |
14b0e578 | 7409 | #ifdef USE_POSIXLINE_OPTION\r |
b602265d DG |
7410 | case 'p':\r |
7411 | OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r | |
7412 | break;\r | |
14b0e578 | 7413 | #endif\r |
b602265d DG |
7414 | case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;\r |
7415 | case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;\r | |
7416 | case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;\r | |
7417 | case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;\r | |
7418 | \r | |
7419 | default:\r | |
7420 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7421 | }\r | |
7422 | \r | |
7423 | if (c == ')') {\r | |
7424 | *np = node_new_option(option);\r | |
7425 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7426 | *src = p;\r | |
7427 | return 2; /* option only */\r | |
7428 | }\r | |
7429 | else if (c == ':') {\r | |
7430 | OnigOptionType prev = env->options;\r | |
7431 | \r | |
7432 | env->options = option;\r | |
7433 | r = fetch_token(tok, &p, end, env);\r | |
7434 | if (r < 0) return r;\r | |
7435 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
7436 | env->options = prev;\r | |
7437 | if (r < 0) {\r | |
7438 | onig_node_free(target);\r | |
7439 | return r;\r | |
7440 | }\r | |
7441 | *np = node_new_option(option);\r | |
7442 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7443 | NODE_BODY(*np) = target;\r | |
7444 | *src = p;\r | |
7445 | return 0;\r | |
7446 | }\r | |
7447 | \r | |
7448 | if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r | |
7449 | PFETCH(c);\r | |
7450 | }\r | |
14b0e578 CS |
7451 | }\r |
7452 | break;\r | |
7453 | \r | |
7454 | default:\r | |
7455 | return ONIGERR_UNDEFINED_GROUP_OPTION;\r | |
7456 | }\r | |
7457 | }\r | |
b602265d DG |
7458 | #ifdef USE_CALLOUT\r |
7459 | else if (c == '*' &&\r | |
7460 | IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r | |
7461 | PINC;\r | |
7462 | r = parse_callout_of_name(np, ')', &p, end, env);\r | |
7463 | if (r != 0) return r;\r | |
7464 | \r | |
7465 | goto end;\r | |
7466 | }\r | |
7467 | #endif\r | |
14b0e578 | 7468 | else {\r |
b602265d | 7469 | if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r |
14b0e578 CS |
7470 | goto group;\r |
7471 | \r | |
b602265d | 7472 | *np = node_new_memory(0);\r |
14b0e578 CS |
7473 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7474 | num = scan_env_add_mem_entry(env);\r | |
7475 | if (num < 0) return num;\r | |
b602265d | 7476 | ENCLOSURE_(*np)->m.regnum = num;\r |
14b0e578 CS |
7477 | }\r |
7478 | \r | |
7479 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7480 | r = fetch_token(tok, &p, end, env);\r | |
7481 | if (r < 0) return r;\r | |
7482 | r = parse_subexp(&target, tok, term, &p, end, env);\r | |
b602265d DG |
7483 | if (r < 0) {\r |
7484 | onig_node_free(target);\r | |
7485 | return r;\r | |
7486 | }\r | |
14b0e578 | 7487 | \r |
b602265d DG |
7488 | NODE_BODY(*np) = target;\r |
7489 | \r | |
7490 | if (NODE_TYPE(*np) == NODE_ENCLOSURE) {\r | |
7491 | if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {\r | |
14b0e578 | 7492 | /* Don't move this to previous of parse_subexp() */\r |
b602265d | 7493 | r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);\r |
14b0e578 CS |
7494 | if (r != 0) return r;\r |
7495 | }\r | |
7496 | }\r | |
7497 | \r | |
b602265d | 7498 | end:\r |
14b0e578 CS |
7499 | *src = p;\r |
7500 | return 0;\r | |
7501 | }\r | |
7502 | \r | |
7503 | static const char* PopularQStr[] = {\r | |
7504 | "?", "*", "+", "??", "*?", "+?"\r | |
7505 | };\r | |
7506 | \r | |
7507 | static const char* ReduceQStr[] = {\r | |
7508 | "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r | |
7509 | };\r | |
7510 | \r | |
7511 | static int\r | |
7512 | set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r | |
7513 | {\r | |
b602265d | 7514 | QuantNode* qn;\r |
14b0e578 | 7515 | \r |
b602265d DG |
7516 | qn = QUANT_(qnode);\r |
7517 | if (qn->lower == 1 && qn->upper == 1)\r | |
14b0e578 | 7518 | return 1;\r |
14b0e578 | 7519 | \r |
b602265d DG |
7520 | switch (NODE_TYPE(target)) {\r |
7521 | case NODE_STRING:\r | |
14b0e578 | 7522 | if (! group) {\r |
b602265d DG |
7523 | if (str_node_can_be_split(target, env->enc)) {\r |
7524 | Node* n = str_node_split_last_char(target, env->enc);\r | |
7525 | if (IS_NOT_NULL(n)) {\r | |
7526 | NODE_BODY(qnode) = n;\r | |
7527 | return 2;\r | |
7528 | }\r | |
14b0e578 CS |
7529 | }\r |
7530 | }\r | |
7531 | break;\r | |
7532 | \r | |
b602265d | 7533 | case NODE_QUANT:\r |
14b0e578 CS |
7534 | { /* check redundant double repeat. */\r |
7535 | /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r | |
b602265d DG |
7536 | QuantNode* qnt = QUANT_(target);\r |
7537 | int nestq_num = quantifier_type_num(qn);\r | |
7538 | int targetq_num = quantifier_type_num(qnt);\r | |
14b0e578 CS |
7539 | \r |
7540 | #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r | |
b602265d DG |
7541 | if (targetq_num >= 0 && nestq_num >= 0 &&\r |
7542 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r | |
14b0e578 CS |
7543 | UChar buf[WARN_BUFSIZE];\r |
7544 | \r | |
7545 | switch(ReduceTypeTable[targetq_num][nestq_num]) {\r | |
7546 | case RQ_ASIS:\r | |
7547 | break;\r | |
7548 | \r | |
7549 | case RQ_DEL:\r | |
7550 | if (onig_verb_warn != onig_null_warn) {\r | |
7551 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
b602265d DG |
7552 | env->pattern, env->pattern_end,\r |
7553 | (UChar* )"redundant nested repeat operator");\r | |
14b0e578 CS |
7554 | (*onig_verb_warn)((char* )buf);\r |
7555 | }\r | |
7556 | goto warn_exit;\r | |
7557 | break;\r | |
7558 | \r | |
7559 | default:\r | |
7560 | if (onig_verb_warn != onig_null_warn) {\r | |
7561 | onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r | |
7562 | env->pattern, env->pattern_end,\r | |
7563 | (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r | |
7564 | PopularQStr[targetq_num], PopularQStr[nestq_num],\r | |
7565 | ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r | |
7566 | (*onig_verb_warn)((char* )buf);\r | |
7567 | }\r | |
7568 | goto warn_exit;\r | |
7569 | break;\r | |
7570 | }\r | |
7571 | }\r | |
7572 | \r | |
7573 | warn_exit:\r | |
7574 | #endif\r | |
b602265d DG |
7575 | if (targetq_num >= 0 && nestq_num < 0) {\r |
7576 | if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r | |
7577 | /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r | |
7578 | if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r | |
7579 | qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r | |
7580 | }\r | |
7581 | }\r | |
7582 | }\r | |
7583 | else {\r | |
7584 | NODE_BODY(qnode) = target;\r | |
7585 | onig_reduce_nested_quantifier(qnode, target);\r | |
7586 | goto q_exit;\r | |
14b0e578 CS |
7587 | }\r |
7588 | }\r | |
7589 | break;\r | |
7590 | \r | |
7591 | default:\r | |
7592 | break;\r | |
7593 | }\r | |
7594 | \r | |
b602265d | 7595 | NODE_BODY(qnode) = target;\r |
14b0e578 CS |
7596 | q_exit:\r |
7597 | return 0;\r | |
7598 | }\r | |
7599 | \r | |
7600 | \r | |
14b0e578 CS |
7601 | #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r |
7602 | static int\r | |
7603 | clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r | |
7604 | {\r | |
7605 | BBuf *tbuf;\r | |
7606 | int r;\r | |
7607 | \r | |
7608 | if (IS_NCCLASS_NOT(cc)) {\r | |
7609 | bitset_invert(cc->bs);\r | |
7610 | \r | |
7611 | if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r | |
7612 | r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r | |
7613 | if (r != 0) return r;\r | |
7614 | \r | |
7615 | bbuf_free(cc->mbuf);\r | |
7616 | cc->mbuf = tbuf;\r | |
7617 | }\r | |
7618 | \r | |
7619 | NCCLASS_CLEAR_NOT(cc);\r | |
7620 | }\r | |
7621 | \r | |
7622 | return 0;\r | |
7623 | }\r | |
7624 | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r | |
7625 | \r | |
7626 | typedef struct {\r | |
7627 | ScanEnv* env;\r | |
7628 | CClassNode* cc;\r | |
7629 | Node* alt_root;\r | |
7630 | Node** ptail;\r | |
7631 | } IApplyCaseFoldArg;\r | |
7632 | \r | |
7633 | static int\r | |
b602265d | 7634 | i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)\r |
14b0e578 CS |
7635 | {\r |
7636 | IApplyCaseFoldArg* iarg;\r | |
7637 | ScanEnv* env;\r | |
7638 | CClassNode* cc;\r | |
7639 | BitSetRef bs;\r | |
7640 | \r | |
7641 | iarg = (IApplyCaseFoldArg* )arg;\r | |
7642 | env = iarg->env;\r | |
7643 | cc = iarg->cc;\r | |
7644 | bs = cc->bs;\r | |
7645 | \r | |
7646 | if (to_len == 1) {\r | |
7647 | int is_in = onig_is_code_in_cc(env->enc, from, cc);\r | |
7648 | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
7649 | if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r | |
b602265d | 7650 | (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r |
14b0e578 | 7651 | if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r |
b602265d | 7652 | add_code_range(&(cc->mbuf), env, *to, *to);\r |
14b0e578 CS |
7653 | }\r |
7654 | else {\r | |
b602265d | 7655 | BITSET_SET_BIT(bs, *to);\r |
14b0e578 CS |
7656 | }\r |
7657 | }\r | |
7658 | #else\r | |
7659 | if (is_in != 0) {\r | |
7660 | if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r | |
b602265d DG |
7661 | if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r |
7662 | add_code_range(&(cc->mbuf), env, *to, *to);\r | |
14b0e578 CS |
7663 | }\r |
7664 | else {\r | |
b602265d DG |
7665 | if (IS_NCCLASS_NOT(cc)) {\r |
7666 | BITSET_CLEAR_BIT(bs, *to);\r | |
7667 | }\r | |
7668 | else\r | |
7669 | BITSET_SET_BIT(bs, *to);\r | |
14b0e578 CS |
7670 | }\r |
7671 | }\r | |
7672 | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r | |
7673 | }\r | |
7674 | else {\r | |
7675 | int r, i, len;\r | |
7676 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r | |
7677 | Node *snode = NULL_NODE;\r | |
7678 | \r | |
7679 | if (onig_is_code_in_cc(env->enc, from, cc)\r | |
7680 | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r | |
b602265d | 7681 | && !IS_NCCLASS_NOT(cc)\r |
14b0e578 | 7682 | #endif\r |
b602265d | 7683 | ) {\r |
14b0e578 | 7684 | for (i = 0; i < to_len; i++) {\r |
b602265d DG |
7685 | len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r |
7686 | if (i == 0) {\r | |
7687 | snode = onig_node_new_str(buf, buf + len);\r | |
7688 | CHECK_NULL_RETURN_MEMERR(snode);\r | |
7689 | \r | |
7690 | /* char-class expanded multi-char only\r | |
7691 | compare with string folded at match time. */\r | |
7692 | NODE_STRING_SET_AMBIG(snode);\r | |
7693 | }\r | |
7694 | else {\r | |
7695 | r = onig_node_str_cat(snode, buf, buf + len);\r | |
7696 | if (r < 0) {\r | |
7697 | onig_node_free(snode);\r | |
7698 | return r;\r | |
7699 | }\r | |
7700 | }\r | |
14b0e578 CS |
7701 | }\r |
7702 | \r | |
7703 | *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r | |
7704 | CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r | |
b602265d | 7705 | iarg->ptail = &(NODE_CDR((*(iarg->ptail))));\r |
14b0e578 CS |
7706 | }\r |
7707 | }\r | |
7708 | \r | |
7709 | return 0;\r | |
7710 | }\r | |
7711 | \r | |
7712 | static int\r | |
b602265d DG |
7713 | parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r |
7714 | ScanEnv* env)\r | |
14b0e578 CS |
7715 | {\r |
7716 | int r, len, group = 0;\r | |
7717 | Node* qn;\r | |
7718 | Node** targetp;\r | |
7719 | \r | |
7720 | *np = NULL;\r | |
7721 | if (tok->type == (enum TokenSyms )term)\r | |
7722 | goto end_of_token;\r | |
7723 | \r | |
7724 | switch (tok->type) {\r | |
7725 | case TK_ALT:\r | |
7726 | case TK_EOT:\r | |
7727 | end_of_token:\r | |
7728 | *np = node_new_empty();\r | |
7729 | return tok->type;\r | |
7730 | break;\r | |
7731 | \r | |
7732 | case TK_SUBEXP_OPEN:\r | |
b602265d | 7733 | r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r |
14b0e578 CS |
7734 | if (r < 0) return r;\r |
7735 | if (r == 1) group = 1;\r | |
7736 | else if (r == 2) { /* option only */\r | |
7737 | Node* target;\r | |
b602265d | 7738 | OnigOptionType prev = env->options;\r |
14b0e578 | 7739 | \r |
b602265d | 7740 | env->options = ENCLOSURE_(*np)->o.options;\r |
14b0e578 CS |
7741 | r = fetch_token(tok, src, end, env);\r |
7742 | if (r < 0) return r;\r | |
7743 | r = parse_subexp(&target, tok, term, src, end, env);\r | |
b602265d DG |
7744 | env->options = prev;\r |
7745 | if (r < 0) {\r | |
7746 | onig_node_free(target);\r | |
7747 | return r;\r | |
7748 | }\r | |
7749 | NODE_BODY(*np) = target;\r | |
14b0e578 CS |
7750 | return tok->type;\r |
7751 | }\r | |
7752 | break;\r | |
7753 | \r | |
7754 | case TK_SUBEXP_CLOSE:\r | |
7755 | if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r | |
7756 | return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r | |
7757 | \r | |
7758 | if (tok->escaped) goto tk_raw_byte;\r | |
7759 | else goto tk_byte;\r | |
7760 | break;\r | |
7761 | \r | |
7762 | case TK_STRING:\r | |
7763 | tk_byte:\r | |
7764 | {\r | |
7765 | *np = node_new_str(tok->backp, *src);\r | |
7766 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7767 | \r | |
7768 | while (1) {\r | |
b602265d DG |
7769 | r = fetch_token(tok, src, end, env);\r |
7770 | if (r < 0) return r;\r | |
7771 | if (r != TK_STRING) break;\r | |
14b0e578 | 7772 | \r |
b602265d DG |
7773 | r = onig_node_str_cat(*np, tok->backp, *src);\r |
7774 | if (r < 0) return r;\r | |
14b0e578 CS |
7775 | }\r |
7776 | \r | |
7777 | string_end:\r | |
7778 | targetp = np;\r | |
7779 | goto repeat;\r | |
7780 | }\r | |
7781 | break;\r | |
7782 | \r | |
7783 | case TK_RAW_BYTE:\r | |
7784 | tk_raw_byte:\r | |
7785 | {\r | |
7786 | *np = node_new_str_raw_char((UChar )tok->u.c);\r | |
7787 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7788 | len = 1;\r | |
7789 | while (1) {\r | |
b602265d DG |
7790 | if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r |
7791 | if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */\r | |
7792 | r = fetch_token(tok, src, end, env);\r | |
7793 | NODE_STRING_CLEAR_RAW(*np);\r | |
7794 | goto string_end;\r | |
7795 | }\r | |
7796 | }\r | |
7797 | \r | |
7798 | r = fetch_token(tok, src, end, env);\r | |
7799 | if (r < 0) return r;\r | |
7800 | if (r != TK_RAW_BYTE) {\r | |
7801 | /* Don't use this, it is wrong for little endian encodings. */\r | |
14b0e578 | 7802 | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r |
b602265d DG |
7803 | int rem;\r |
7804 | if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r | |
7805 | rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r | |
7806 | (void )node_str_head_pad(STR_(*np), rem, (UChar )0);\r | |
7807 | if (len + rem == enclen(env->enc, STR_(*np)->s)) {\r | |
7808 | NODE_STRING_CLEAR_RAW(*np);\r | |
7809 | goto string_end;\r | |
7810 | }\r | |
7811 | }\r | |
14b0e578 | 7812 | #endif\r |
b602265d DG |
7813 | return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r |
7814 | }\r | |
14b0e578 | 7815 | \r |
b602265d DG |
7816 | r = node_str_cat_char(*np, (UChar )tok->u.c);\r |
7817 | if (r < 0) return r;\r | |
14b0e578 | 7818 | \r |
b602265d | 7819 | len++;\r |
14b0e578 CS |
7820 | }\r |
7821 | }\r | |
7822 | break;\r | |
7823 | \r | |
7824 | case TK_CODE_POINT:\r | |
7825 | {\r | |
7826 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r | |
7827 | int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r | |
7828 | if (num < 0) return num;\r | |
7829 | #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r | |
7830 | *np = node_new_str_raw(buf, buf + num);\r | |
7831 | #else\r | |
7832 | *np = node_new_str(buf, buf + num);\r | |
7833 | #endif\r | |
7834 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7835 | }\r | |
7836 | break;\r | |
7837 | \r | |
7838 | case TK_QUOTE_OPEN:\r | |
7839 | {\r | |
7840 | OnigCodePoint end_op[2];\r | |
7841 | UChar *qstart, *qend, *nextp;\r | |
7842 | \r | |
7843 | end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r | |
7844 | end_op[1] = (OnigCodePoint )'E';\r | |
7845 | qstart = *src;\r | |
7846 | qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r | |
7847 | if (IS_NULL(qend)) {\r | |
b602265d | 7848 | nextp = qend = end;\r |
14b0e578 CS |
7849 | }\r |
7850 | *np = node_new_str(qstart, qend);\r | |
7851 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7852 | *src = nextp;\r | |
7853 | }\r | |
7854 | break;\r | |
7855 | \r | |
7856 | case TK_CHAR_TYPE:\r | |
7857 | {\r | |
7858 | switch (tok->u.prop.ctype) {\r | |
7859 | case ONIGENC_CTYPE_WORD:\r | |
b602265d DG |
7860 | *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);\r |
7861 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7862 | break;\r | |
14b0e578 CS |
7863 | \r |
7864 | case ONIGENC_CTYPE_SPACE:\r | |
7865 | case ONIGENC_CTYPE_DIGIT:\r | |
7866 | case ONIGENC_CTYPE_XDIGIT:\r | |
b602265d DG |
7867 | {\r |
7868 | CClassNode* cc;\r | |
7869 | \r | |
7870 | *np = node_new_cclass();\r | |
7871 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7872 | cc = CCLASS_(*np);\r | |
7873 | add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r | |
7874 | if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r | |
7875 | }\r | |
7876 | break;\r | |
14b0e578 CS |
7877 | \r |
7878 | default:\r | |
b602265d DG |
7879 | return ONIGERR_PARSER_BUG;\r |
7880 | break;\r | |
14b0e578 CS |
7881 | }\r |
7882 | }\r | |
7883 | break;\r | |
7884 | \r | |
7885 | case TK_CHAR_PROPERTY:\r | |
7886 | r = parse_char_property(np, tok, src, end, env);\r | |
7887 | if (r != 0) return r;\r | |
7888 | break;\r | |
7889 | \r | |
7890 | case TK_CC_OPEN:\r | |
7891 | {\r | |
7892 | CClassNode* cc;\r | |
7893 | \r | |
7894 | r = parse_char_class(np, tok, src, end, env);\r | |
7895 | if (r != 0) return r;\r | |
7896 | \r | |
b602265d DG |
7897 | cc = CCLASS_(*np);\r |
7898 | if (IS_IGNORECASE(env->options)) {\r | |
7899 | IApplyCaseFoldArg iarg;\r | |
7900 | \r | |
7901 | iarg.env = env;\r | |
7902 | iarg.cc = cc;\r | |
7903 | iarg.alt_root = NULL_NODE;\r | |
7904 | iarg.ptail = &(iarg.alt_root);\r | |
7905 | \r | |
7906 | r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r | |
7907 | i_apply_case_fold, &iarg);\r | |
7908 | if (r != 0) {\r | |
7909 | onig_node_free(iarg.alt_root);\r | |
7910 | return r;\r | |
7911 | }\r | |
7912 | if (IS_NOT_NULL(iarg.alt_root)) {\r | |
14b0e578 CS |
7913 | Node* work = onig_node_new_alt(*np, iarg.alt_root);\r |
7914 | if (IS_NULL(work)) {\r | |
7915 | onig_node_free(iarg.alt_root);\r | |
7916 | return ONIGERR_MEMORY;\r | |
7917 | }\r | |
7918 | *np = work;\r | |
b602265d | 7919 | }\r |
14b0e578 CS |
7920 | }\r |
7921 | }\r | |
7922 | break;\r | |
7923 | \r | |
7924 | case TK_ANYCHAR:\r | |
7925 | *np = node_new_anychar();\r | |
7926 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7927 | break;\r | |
7928 | \r | |
7929 | case TK_ANYCHAR_ANYTIME:\r | |
7930 | *np = node_new_anychar();\r | |
7931 | CHECK_NULL_RETURN_MEMERR(*np);\r | |
7932 | qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r | |
7933 | CHECK_NULL_RETURN_MEMERR(qn);\r | |
b602265d | 7934 | NODE_BODY(qn) = *np;\r |
14b0e578 CS |
7935 | *np = qn;\r |
7936 | break;\r | |
7937 | \r | |
7938 | case TK_BACKREF:\r | |
7939 | len = tok->u.backref.num;\r | |
7940 | *np = node_new_backref(len,\r | |
b602265d DG |
7941 | (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r |
7942 | tok->u.backref.by_name,\r | |
14b0e578 | 7943 | #ifdef USE_BACKREF_WITH_LEVEL\r |
b602265d DG |
7944 | tok->u.backref.exist_level,\r |
7945 | tok->u.backref.level,\r | |
14b0e578 | 7946 | #endif\r |
b602265d | 7947 | env);\r |
14b0e578 CS |
7948 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7949 | break;\r | |
7950 | \r | |
b602265d | 7951 | #ifdef USE_CALL\r |
14b0e578 CS |
7952 | case TK_CALL:\r |
7953 | {\r | |
7954 | int gnum = tok->u.call.gnum;\r | |
7955 | \r | |
b602265d DG |
7956 | *np = node_new_call(tok->u.call.name, tok->u.call.name_end,\r |
7957 | gnum, tok->u.call.by_number);\r | |
14b0e578 CS |
7958 | CHECK_NULL_RETURN_MEMERR(*np);\r |
7959 | env->num_call++;\r | |
b602265d DG |
7960 | if (tok->u.call.by_number != 0 && gnum == 0) {\r |
7961 | env->has_call_zero = 1;\r | |
7962 | }\r | |
14b0e578 CS |
7963 | }\r |
7964 | break;\r | |
7965 | #endif\r | |
7966 | \r | |
7967 | case TK_ANCHOR:\r | |
b602265d DG |
7968 | {\r |
7969 | int ascii_mode =\r | |
7970 | IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;\r | |
7971 | *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);\r | |
7972 | }\r | |
14b0e578 CS |
7973 | break;\r |
7974 | \r | |
7975 | case TK_OP_REPEAT:\r | |
7976 | case TK_INTERVAL:\r | |
7977 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r | |
7978 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r | |
b602265d | 7979 | return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r |
14b0e578 | 7980 | else\r |
b602265d | 7981 | *np = node_new_empty();\r |
14b0e578 CS |
7982 | }\r |
7983 | else {\r | |
7984 | goto tk_byte;\r | |
7985 | }\r | |
7986 | break;\r | |
7987 | \r | |
b602265d DG |
7988 | case TK_KEEP:\r |
7989 | r = node_new_keep(np, env);\r | |
7990 | if (r < 0) return r;\r | |
7991 | break;\r | |
7992 | \r | |
7993 | case TK_GENERAL_NEWLINE:\r | |
7994 | r = node_new_general_newline(np, env);\r | |
7995 | if (r < 0) return r;\r | |
7996 | break;\r | |
7997 | \r | |
7998 | case TK_NO_NEWLINE:\r | |
7999 | r = node_new_no_newline(np, env);\r | |
8000 | if (r < 0) return r;\r | |
8001 | break;\r | |
8002 | \r | |
8003 | case TK_TRUE_ANYCHAR:\r | |
8004 | r = node_new_true_anychar(np, env);\r | |
8005 | if (r < 0) return r;\r | |
8006 | break;\r | |
8007 | \r | |
8008 | case TK_EXTENDED_GRAPHEME_CLUSTER:\r | |
8009 | r = make_extended_grapheme_cluster(np, env);\r | |
8010 | if (r < 0) return r;\r | |
8011 | break;\r | |
8012 | \r | |
14b0e578 CS |
8013 | default:\r |
8014 | return ONIGERR_PARSER_BUG;\r | |
8015 | break;\r | |
8016 | }\r | |
8017 | \r | |
8018 | {\r | |
8019 | targetp = np;\r | |
8020 | \r | |
8021 | re_entry:\r | |
8022 | r = fetch_token(tok, src, end, env);\r | |
8023 | if (r < 0) return r;\r | |
8024 | \r | |
8025 | repeat:\r | |
8026 | if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r | |
8027 | if (is_invalid_quantifier_target(*targetp))\r | |
b602265d | 8028 | return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r |
14b0e578 CS |
8029 | \r |
8030 | qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r | |
b602265d | 8031 | (r == TK_INTERVAL ? 1 : 0));\r |
14b0e578 | 8032 | CHECK_NULL_RETURN_MEMERR(qn);\r |
b602265d | 8033 | QUANT_(qn)->greedy = tok->u.repeat.greedy;\r |
14b0e578 CS |
8034 | r = set_quantifier(qn, *targetp, group, env);\r |
8035 | if (r < 0) {\r | |
b602265d DG |
8036 | onig_node_free(qn);\r |
8037 | return r;\r | |
14b0e578 CS |
8038 | }\r |
8039 | \r | |
8040 | if (tok->u.repeat.possessive != 0) {\r | |
b602265d DG |
8041 | Node* en;\r |
8042 | en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r | |
8043 | if (IS_NULL(en)) {\r | |
8044 | onig_node_free(qn);\r | |
8045 | return ONIGERR_MEMORY;\r | |
8046 | }\r | |
8047 | NODE_BODY(en) = qn;\r | |
8048 | qn = en;\r | |
14b0e578 CS |
8049 | }\r |
8050 | \r | |
8051 | if (r == 0) {\r | |
b602265d | 8052 | *targetp = qn;\r |
14b0e578 CS |
8053 | }\r |
8054 | else if (r == 1) {\r | |
b602265d | 8055 | onig_node_free(qn);\r |
14b0e578 CS |
8056 | }\r |
8057 | else if (r == 2) { /* split case: /abc+/ */\r | |
b602265d DG |
8058 | Node *tmp;\r |
8059 | \r | |
8060 | *targetp = node_new_list(*targetp, NULL);\r | |
8061 | if (IS_NULL(*targetp)) {\r | |
8062 | onig_node_free(qn);\r | |
8063 | return ONIGERR_MEMORY;\r | |
8064 | }\r | |
8065 | tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);\r | |
8066 | if (IS_NULL(tmp)) {\r | |
8067 | onig_node_free(qn);\r | |
8068 | return ONIGERR_MEMORY;\r | |
8069 | }\r | |
8070 | targetp = &(NODE_CAR(tmp));\r | |
14b0e578 CS |
8071 | }\r |
8072 | goto re_entry;\r | |
8073 | }\r | |
8074 | }\r | |
8075 | \r | |
8076 | return r;\r | |
8077 | }\r | |
8078 | \r | |
8079 | static int\r | |
b602265d DG |
8080 | parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r |
8081 | ScanEnv* env)\r | |
14b0e578 CS |
8082 | {\r |
8083 | int r;\r | |
8084 | Node *node, **headp;\r | |
8085 | \r | |
8086 | *top = NULL;\r | |
8087 | r = parse_exp(&node, tok, term, src, end, env);\r | |
b602265d DG |
8088 | if (r < 0) {\r |
8089 | onig_node_free(node);\r | |
8090 | return r;\r | |
8091 | }\r | |
14b0e578 CS |
8092 | \r |
8093 | if (r == TK_EOT || r == term || r == TK_ALT) {\r | |
8094 | *top = node;\r | |
8095 | }\r | |
8096 | else {\r | |
8097 | *top = node_new_list(node, NULL);\r | |
b602265d | 8098 | headp = &(NODE_CDR(*top));\r |
14b0e578 CS |
8099 | while (r != TK_EOT && r != term && r != TK_ALT) {\r |
8100 | r = parse_exp(&node, tok, term, src, end, env);\r | |
b602265d DG |
8101 | if (r < 0) {\r |
8102 | onig_node_free(node);\r | |
8103 | return r;\r | |
8104 | }\r | |
14b0e578 | 8105 | \r |
b602265d DG |
8106 | if (NODE_TYPE(node) == NODE_LIST) {\r |
8107 | *headp = node;\r | |
8108 | while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);\r | |
8109 | headp = &(NODE_CDR(node));\r | |
14b0e578 CS |
8110 | }\r |
8111 | else {\r | |
b602265d DG |
8112 | *headp = node_new_list(node, NULL);\r |
8113 | headp = &(NODE_CDR(*headp));\r | |
14b0e578 CS |
8114 | }\r |
8115 | }\r | |
8116 | }\r | |
8117 | \r | |
8118 | return r;\r | |
8119 | }\r | |
8120 | \r | |
8121 | /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r | |
8122 | static int\r | |
b602265d DG |
8123 | parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r |
8124 | ScanEnv* env)\r | |
14b0e578 CS |
8125 | {\r |
8126 | int r;\r | |
8127 | Node *node, **headp;\r | |
8128 | \r | |
8129 | *top = NULL;\r | |
b602265d DG |
8130 | env->parse_depth++;\r |
8131 | if (env->parse_depth > ParseDepthLimit)\r | |
8132 | return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r | |
14b0e578 CS |
8133 | r = parse_branch(&node, tok, term, src, end, env);\r |
8134 | if (r < 0) {\r | |
8135 | onig_node_free(node);\r | |
8136 | return r;\r | |
8137 | }\r | |
8138 | \r | |
8139 | if (r == term) {\r | |
8140 | *top = node;\r | |
8141 | }\r | |
8142 | else if (r == TK_ALT) {\r | |
8143 | *top = onig_node_new_alt(node, NULL);\r | |
b602265d | 8144 | headp = &(NODE_CDR(*top));\r |
14b0e578 CS |
8145 | while (r == TK_ALT) {\r |
8146 | r = fetch_token(tok, src, end, env);\r | |
8147 | if (r < 0) return r;\r | |
8148 | r = parse_branch(&node, tok, term, src, end, env);\r | |
b602265d DG |
8149 | if (r < 0) {\r |
8150 | onig_node_free(node);\r | |
8151 | return r;\r | |
8152 | }\r | |
14b0e578 | 8153 | *headp = onig_node_new_alt(node, NULL);\r |
b602265d | 8154 | headp = &(NODE_CDR(*headp));\r |
14b0e578 CS |
8155 | }\r |
8156 | \r | |
8157 | if (tok->type != (enum TokenSyms )term)\r | |
8158 | goto err;\r | |
8159 | }\r | |
8160 | else {\r | |
b602265d | 8161 | onig_node_free(node);\r |
14b0e578 CS |
8162 | err:\r |
8163 | if (term == TK_SUBEXP_CLOSE)\r | |
8164 | return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r | |
8165 | else\r | |
8166 | return ONIGERR_PARSER_BUG;\r | |
8167 | }\r | |
8168 | \r | |
b602265d | 8169 | env->parse_depth--;\r |
14b0e578 CS |
8170 | return r;\r |
8171 | }\r | |
8172 | \r | |
8173 | static int\r | |
8174 | parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r | |
8175 | {\r | |
8176 | int r;\r | |
8177 | OnigToken tok;\r | |
8178 | \r | |
8179 | r = fetch_token(&tok, src, end, env);\r | |
8180 | if (r < 0) return r;\r | |
8181 | r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r | |
8182 | if (r < 0) return r;\r | |
b602265d DG |
8183 | \r |
8184 | return 0;\r | |
8185 | }\r | |
8186 | \r | |
8187 | #ifdef USE_CALL\r | |
8188 | static int\r | |
8189 | make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)\r | |
8190 | {\r | |
8191 | int r;\r | |
8192 | \r | |
8193 | Node* x = node_new_memory(0 /* 0: is not named */);\r | |
8194 | CHECK_NULL_RETURN_MEMERR(x);\r | |
8195 | \r | |
8196 | NODE_BODY(x) = node;\r | |
8197 | ENCLOSURE_(x)->m.regnum = 0;\r | |
8198 | r = scan_env_set_mem_node(env, 0, x);\r | |
8199 | if (r != 0) {\r | |
8200 | onig_node_free(x);\r | |
8201 | return r;\r | |
8202 | }\r | |
8203 | \r | |
8204 | *rnode = x;\r | |
14b0e578 CS |
8205 | return 0;\r |
8206 | }\r | |
b602265d | 8207 | #endif\r |
14b0e578 CS |
8208 | \r |
8209 | extern int\r | |
b602265d DG |
8210 | onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,\r |
8211 | regex_t* reg, ScanEnv* env)\r | |
14b0e578 CS |
8212 | {\r |
8213 | int r;\r | |
8214 | UChar* p;\r | |
b602265d DG |
8215 | #ifdef USE_CALLOUT\r |
8216 | RegexExt* ext;\r | |
8217 | #endif\r | |
14b0e578 | 8218 | \r |
14b0e578 | 8219 | names_clear(reg);\r |
14b0e578 CS |
8220 | \r |
8221 | scan_env_clear(env);\r | |
b602265d | 8222 | env->options = reg->options;\r |
14b0e578 CS |
8223 | env->case_fold_flag = reg->case_fold_flag;\r |
8224 | env->enc = reg->enc;\r | |
8225 | env->syntax = reg->syntax;\r | |
8226 | env->pattern = (UChar* )pattern;\r | |
8227 | env->pattern_end = (UChar* )end;\r | |
8228 | env->reg = reg;\r | |
8229 | \r | |
8230 | *root = NULL;\r | |
b602265d DG |
8231 | \r |
8232 | if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))\r | |
8233 | return ONIGERR_INVALID_WIDE_CHAR_VALUE;\r | |
8234 | \r | |
14b0e578 CS |
8235 | p = (UChar* )pattern;\r |
8236 | r = parse_regexp(root, &p, (UChar* )end, env);\r | |
b602265d DG |
8237 | \r |
8238 | #ifdef USE_CALL\r | |
8239 | if (r != 0) return r;\r | |
8240 | \r | |
8241 | if (env->has_call_zero != 0) {\r | |
8242 | Node* zero_node;\r | |
8243 | r = make_call_zero_body(*root, env, &zero_node);\r | |
8244 | if (r != 0) return r;\r | |
8245 | \r | |
8246 | *root = zero_node;\r | |
8247 | }\r | |
8248 | #endif\r | |
8249 | \r | |
14b0e578 | 8250 | reg->num_mem = env->num_mem;\r |
b602265d DG |
8251 | \r |
8252 | #ifdef USE_CALLOUT\r | |
8253 | ext = REG_EXTP(reg);\r | |
8254 | if (IS_NOT_NULL(ext) && ext->callout_num > 0) {\r | |
8255 | r = setup_ext_callout_list_values(reg);\r | |
8256 | }\r | |
8257 | #endif\r | |
8258 | \r | |
14b0e578 CS |
8259 | return r;\r |
8260 | }\r | |
8261 | \r | |
8262 | extern void\r | |
8263 | onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r | |
b602265d | 8264 | UChar* arg, UChar* arg_end)\r |
14b0e578 CS |
8265 | {\r |
8266 | env->error = arg;\r | |
8267 | env->error_end = arg_end;\r | |
8268 | }\r |