]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
MdeModulePkg/RegularExpressionDxe: Add null pointer check
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regparse.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b602265d 5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
14b0e578
CS
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regparse.h"\r
31#include "st.h"\r
32\r
b602265d
DG
33#ifdef DEBUG_NODE_FREE\r
34#include <stdio.h>\r
35#endif\r
36\r
37#define INIT_TAG_NAMES_ALLOC_NUM 5\r
38\r
14b0e578
CS
39#define WARN_BUFSIZE 256\r
40\r
41#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
42\r
b602265d
DG
43#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \\r
44 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)\r
45#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \\r
46 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')\r
47\r
48\r
49OnigSyntaxType OnigSyntaxOniguruma = {\r
50 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
51 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
52 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
53 ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
54 ONIG_SYN_OP_ESC_C_CONTROL )\r
55 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
56 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
57 ONIG_SYN_OP2_OPTION_RUBY |\r
58 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
59 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
60 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
61 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |\r
62 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |\r
63 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
64 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
65 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |\r
66 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
67 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
68 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
69 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
70 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
71 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
72 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
73 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
74 , ( SYN_GNU_REGEX_BV | \r
75 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
76 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
77 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
78 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
79 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
80 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
81 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
82 , ONIG_OPTION_NONE\r
83 ,\r
84 {\r
85 (OnigCodePoint )'\\' /* esc */\r
86 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
87 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
88 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
89 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
90 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
91 }\r
92};\r
14b0e578
CS
93\r
94OnigSyntaxType OnigSyntaxRuby = {\r
95 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
96 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
b602265d
DG
97 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
98 ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
14b0e578
CS
99 ONIG_SYN_OP_ESC_C_CONTROL )\r
100 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
101 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
102 ONIG_SYN_OP2_OPTION_RUBY |\r
103 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
b602265d
DG
104 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
105 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
106 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
107 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
108 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
14b0e578
CS
109 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
110 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
111 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
112 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
113 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
114 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
b602265d 115 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
14b0e578
CS
116 , ( SYN_GNU_REGEX_BV | \r
117 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
118 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
119 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
120 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
121 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
122 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
123 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
124 , ONIG_OPTION_NONE\r
125 ,\r
126 {\r
127 (OnigCodePoint )'\\' /* esc */\r
128 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
129 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
130 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
131 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
132 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
133 }\r
134};\r
135\r
b602265d 136OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;\r
14b0e578
CS
137\r
138extern void onig_null_warn(const char* s ARG_UNUSED) { }\r
139\r
140#ifdef DEFAULT_WARN_FUNCTION\r
141static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r
142#else\r
143static OnigWarnFunc onig_warn = onig_null_warn;\r
144#endif\r
145\r
146#ifdef DEFAULT_VERB_WARN_FUNCTION\r
147static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r
148#else\r
149static OnigWarnFunc onig_verb_warn = onig_null_warn;\r
150#endif\r
151\r
152extern void onig_set_warn_func(OnigWarnFunc f)\r
153{\r
154 onig_warn = f;\r
155}\r
156\r
157extern void onig_set_verb_warn_func(OnigWarnFunc f)\r
158{\r
159 onig_verb_warn = f;\r
160}\r
161\r
b602265d
DG
162extern void\r
163onig_warning(const char* s)\r
164{\r
165 if (onig_warn == onig_null_warn) return ;\r
166\r
167 (*onig_warn)(s);\r
168}\r
169\r
170#define DEFAULT_MAX_CAPTURE_NUM 32767\r
171\r
172static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;\r
173\r
174extern int\r
175onig_set_capture_num_limit(int num)\r
176{\r
177 if (num < 0) return -1;\r
178\r
179 MaxCaptureNum = num;\r
180 return 0;\r
181}\r
182\r
183static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
184\r
185extern unsigned int\r
186onig_get_parse_depth_limit(void)\r
187{\r
188 return ParseDepthLimit;\r
189}\r
190\r
191extern int\r
192onig_set_parse_depth_limit(unsigned int depth)\r
193{\r
194 if (depth == 0)\r
195 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
196 else\r
197 ParseDepthLimit = depth;\r
198 return 0;\r
199}\r
200\r
201static int\r
202positive_int_multiply(int x, int y)\r
203{\r
204 if (x == 0 || y == 0) return 0;\r
205\r
206 if (x < INT_MAX / y)\r
207 return x * y;\r
208 else\r
209 return -1;\r
210}\r
211\r
14b0e578
CS
212static void\r
213bbuf_free(BBuf* bbuf)\r
214{\r
215 if (IS_NOT_NULL(bbuf)) {\r
216 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r
217 xfree(bbuf);\r
218 }\r
219}\r
220\r
221static int\r
222bbuf_clone(BBuf** rto, BBuf* from)\r
223{\r
224 int r;\r
225 BBuf *to;\r
226\r
227 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r
228 CHECK_NULL_RETURN_MEMERR(to);\r
b602265d
DG
229 r = BB_INIT(to, from->alloc);\r
230 if (r != 0) {\r
231 xfree(to->p);\r
232 *rto = 0;\r
233 return r;\r
234 }\r
14b0e578
CS
235 to->used = from->used;\r
236 xmemcpy(to->p, from->p, from->used);\r
237 return 0;\r
238}\r
239\r
b602265d
DG
240static int backref_rel_to_abs(int rel_no, ScanEnv* env)\r
241{\r
242 if (rel_no > 0) {\r
243 return env->num_mem + rel_no;\r
244 }\r
245 else {\r
246 return env->num_mem + 1 + rel_no;\r
247 }\r
248}\r
249\r
250#define OPTION_ON(v,f) ((v) |= (f))\r
251#define OPTION_OFF(v,f) ((v) &= ~(f))\r
14b0e578 252\r
b602265d 253#define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r
14b0e578
CS
254\r
255#define MBCODE_START_POS(enc) \\r
256 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r
257\r
258#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r
259 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r
260\r
261#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r
262 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r
263 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r
b602265d 264 if (r != 0) return r;\\r
14b0e578
CS
265 }\\r
266} while (0)\r
267\r
268\r
269#define BITSET_IS_EMPTY(bs,empty) do {\\r
270 int i;\\r
271 empty = 1;\\r
272 for (i = 0; i < (int )BITSET_SIZE; i++) {\\r
273 if ((bs)[i] != 0) {\\r
274 empty = 0; break;\\r
275 }\\r
276 }\\r
277} while (0)\r
278\r
279static void\r
280bitset_set_range(BitSetRef bs, int from, int to)\r
281{\r
282 int i;\r
283 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r
284 BITSET_SET_BIT(bs, i);\r
285 }\r
286}\r
287\r
288#if 0\r
289static void\r
290bitset_set_all(BitSetRef bs)\r
291{\r
292 int i;\r
293 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r
294}\r
295#endif\r
296\r
297static void\r
298bitset_invert(BitSetRef bs)\r
299{\r
300 int i;\r
301 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r
302}\r
303\r
304static void\r
305bitset_invert_to(BitSetRef from, BitSetRef to)\r
306{\r
307 int i;\r
308 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r
309}\r
310\r
311static void\r
312bitset_and(BitSetRef dest, BitSetRef bs)\r
313{\r
314 int i;\r
315 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r
316}\r
317\r
318static void\r
319bitset_or(BitSetRef dest, BitSetRef bs)\r
320{\r
321 int i;\r
322 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r
323}\r
324\r
325static void\r
326bitset_copy(BitSetRef dest, BitSetRef bs)\r
327{\r
328 int i;\r
329 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r
330}\r
331\r
332extern int\r
333onig_strncmp(const UChar* s1, const UChar* s2, int n)\r
334{\r
335 int x;\r
336\r
337 while (n-- > 0) {\r
338 x = *s2++ - *s1++;\r
339 if (x) return x;\r
340 }\r
341 return 0;\r
342}\r
343\r
344extern void\r
345onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r
346{\r
b602265d 347 int len = (int )(end - src);\r
14b0e578
CS
348 if (len > 0) {\r
349 xmemcpy(dest, src, len);\r
350 dest[len] = (UChar )0;\r
351 }\r
352}\r
353\r
b602265d
DG
354static int\r
355save_entry(ScanEnv* env, enum SaveType type, int* id)\r
14b0e578 356{\r
b602265d 357 int nid = env->save_num;\r
14b0e578 358\r
b602265d
DG
359#if 0\r
360 if (IS_NULL(env->saves)) {\r
361 int n = 10;\r
362 env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);\r
363 CHECK_NULL_RETURN_MEMERR(env->saves);\r
364 env->save_alloc_num = n;\r
365 }\r
366 else if (env->save_alloc_num <= nid) {\r
367 int n = env->save_alloc_num * 2;\r
368 SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);\r
369 CHECK_NULL_RETURN_MEMERR(p);\r
370 env->saves = p;\r
371 env->save_alloc_num = n;\r
372 }\r
14b0e578 373\r
b602265d
DG
374 env->saves[nid].type = type;\r
375#endif\r
14b0e578 376\r
b602265d
DG
377 env->save_num++;\r
378 *id = nid;\r
379 return 0;\r
14b0e578 380}\r
14b0e578
CS
381\r
382/* scan pattern methods */\r
383#define PEND_VALUE 0\r
384\r
385#define PFETCH_READY UChar* pfetch_prev\r
386#define PEND (p < end ? 0 : 1)\r
387#define PUNFETCH p = pfetch_prev\r
388#define PINC do { \\r
389 pfetch_prev = p; \\r
390 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
391} while (0)\r
392#define PFETCH(c) do { \\r
393 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
394 pfetch_prev = p; \\r
395 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
396} while (0)\r
397\r
398#define PINC_S do { \\r
399 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
400} while (0)\r
401#define PFETCH_S(c) do { \\r
402 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
403 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
404} while (0)\r
405\r
406#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r
407#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r
408\r
409static UChar*\r
410strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r
b602265d 411 int capa, int oldCapa)\r
14b0e578
CS
412{\r
413 UChar* r;\r
414\r
415 if (dest)\r
416 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r
417 else\r
418 r = (UChar* )xmalloc(capa + 1);\r
419\r
420 CHECK_NULL_RETURN(r);\r
421 onig_strcpy(r + (dest_end - dest), src, src_end);\r
422 return r;\r
423}\r
424\r
425/* dest on static area */\r
426static UChar*\r
427strcat_capa_from_static(UChar* dest, UChar* dest_end,\r
b602265d 428 const UChar* src, const UChar* src_end, int capa)\r
14b0e578
CS
429{\r
430 UChar* r;\r
431\r
432 r = (UChar* )xmalloc(capa + 1);\r
433 CHECK_NULL_RETURN(r);\r
434 onig_strcpy(r, dest, dest_end);\r
435 onig_strcpy(r + (dest_end - dest), src, src_end);\r
436 return r;\r
437}\r
438\r
439\r
440#ifdef USE_ST_LIBRARY\r
441\r
442typedef struct {\r
443 UChar* s;\r
444 UChar* end;\r
445} st_str_end_key;\r
446\r
447static int\r
448str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r
449{\r
450 UChar *p, *q;\r
451 int c;\r
452\r
453 if ((x->end - x->s) != (y->end - y->s))\r
454 return 1;\r
455\r
456 p = x->s;\r
457 q = y->s;\r
458 while (p < x->end) {\r
459 c = (int )*p - (int )*q;\r
460 if (c != 0) return c;\r
461\r
462 p++; q++;\r
463 }\r
464\r
465 return 0;\r
466}\r
467\r
468static int\r
469str_end_hash(st_str_end_key* x)\r
470{\r
471 UChar *p;\r
472 int val = 0;\r
473\r
474 p = x->s;\r
475 while (p < x->end) {\r
476 val = val * 997 + (int )*p++;\r
477 }\r
478\r
479 return val + (val >> 5);\r
480}\r
481\r
482extern hash_table_type*\r
483onig_st_init_strend_table_with_size(int size)\r
484{\r
485 static struct st_hash_type hashType = {\r
486 str_end_cmp,\r
487 str_end_hash,\r
488 };\r
489\r
490 return (hash_table_type* )\r
491 onig_st_init_table_with_size(&hashType, size);\r
492}\r
493\r
494extern int\r
495onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r
b602265d 496 const UChar* end_key, hash_data_type *value)\r
14b0e578
CS
497{\r
498 st_str_end_key key;\r
499\r
500 key.s = (UChar* )str_key;\r
501 key.end = (UChar* )end_key;\r
502\r
b602265d 503 return onig_st_lookup(table, (st_data_t )(&key), value);\r
14b0e578
CS
504}\r
505\r
506extern int\r
507onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r
b602265d 508 const UChar* end_key, hash_data_type value)\r
14b0e578
CS
509{\r
510 st_str_end_key* key;\r
511 int result;\r
512\r
513 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r
b0c2b797 514 CHECK_NULL_RETURN_MEMERR(key);\r
b602265d 515\r
14b0e578
CS
516 key->s = (UChar* )str_key;\r
517 key->end = (UChar* )end_key;\r
b602265d 518 result = onig_st_insert(table, (st_data_t )key, value);\r
14b0e578
CS
519 if (result) {\r
520 xfree(key);\r
521 }\r
522 return result;\r
523}\r
524\r
14b0e578 525\r
b602265d
DG
526typedef struct {\r
527 OnigEncoding enc;\r
528 int type; /* callout type: single or not */\r
529 UChar* s;\r
530 UChar* end;\r
531} st_callout_name_key;\r
532\r
533static int\r
534callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)\r
535{\r
536 UChar *p, *q;\r
537 int c;\r
538\r
539 if (x->enc != y->enc) return 1;\r
540 if (x->type != y->type) return 1;\r
541 if ((x->end - x->s) != (y->end - y->s))\r
542 return 1;\r
543\r
544 p = x->s;\r
545 q = y->s;\r
546 while (p < x->end) {\r
547 c = (int )*p - (int )*q;\r
548 if (c != 0) return c;\r
549\r
550 p++; q++;\r
551 }\r
552\r
553 return 0;\r
554}\r
555\r
556static int\r
557callout_name_table_hash(st_callout_name_key* x)\r
558{\r
559 UChar *p;\r
560 int val = 0;\r
561\r
562 p = x->s;\r
563 while (p < x->end) {\r
564 val = val * 997 + (int )*p++;\r
565 }\r
566\r
567 /* use intptr_t for escape warning in Windows */\r
568 return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;\r
569}\r
570\r
571extern hash_table_type*\r
572onig_st_init_callout_name_table_with_size(int size)\r
573{\r
574 static struct st_hash_type hashType = {\r
575 callout_name_table_cmp,\r
576 callout_name_table_hash,\r
577 };\r
578\r
579 return (hash_table_type* )\r
580 onig_st_init_table_with_size(&hashType, size);\r
581}\r
582\r
583extern int\r
584onig_st_lookup_callout_name_table(hash_table_type* table,\r
585 OnigEncoding enc,\r
586 int type,\r
587 const UChar* str_key,\r
588 const UChar* end_key,\r
589 hash_data_type *value)\r
590{\r
591 st_callout_name_key key;\r
592\r
593 key.enc = enc;\r
594 key.type = type;\r
595 key.s = (UChar* )str_key;\r
596 key.end = (UChar* )end_key;\r
597\r
598 return onig_st_lookup(table, (st_data_t )(&key), value);\r
599}\r
600\r
601static int\r
602st_insert_callout_name_table(hash_table_type* table,\r
603 OnigEncoding enc, int type,\r
604 UChar* str_key, UChar* end_key,\r
605 hash_data_type value)\r
606{\r
607 st_callout_name_key* key;\r
608 int result;\r
609\r
610 key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));\r
611 CHECK_NULL_RETURN_MEMERR(key);\r
612\r
613 /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */\r
614 key->enc = enc;\r
615 key->type = type;\r
616 key->s = str_key;\r
617 key->end = end_key;\r
618 result = onig_st_insert(table, (st_data_t )key, value);\r
619 if (result) {\r
620 xfree(key);\r
621 }\r
622 return result;\r
623}\r
624\r
625#endif /* USE_ST_LIBRARY */\r
14b0e578 626\r
14b0e578
CS
627\r
628#define INIT_NAME_BACKREFS_ALLOC_NUM 8\r
629\r
630typedef struct {\r
631 UChar* name;\r
632 int name_len; /* byte length */\r
633 int back_num; /* number of backrefs */\r
634 int back_alloc;\r
635 int back_ref1;\r
636 int* back_refs;\r
637} NameEntry;\r
638\r
639#ifdef USE_ST_LIBRARY\r
640\r
b602265d
DG
641#define INIT_NAMES_ALLOC_NUM 5\r
642\r
14b0e578
CS
643typedef st_table NameTable;\r
644typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r
645\r
646#define NAMEBUF_SIZE 24\r
647#define NAMEBUF_SIZE_1 25\r
648\r
649#ifdef ONIG_DEBUG\r
650static int\r
651i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r
652{\r
653 int i;\r
654 FILE* fp = (FILE* )arg;\r
655\r
656 fprintf(fp, "%s: ", e->name);\r
657 if (e->back_num == 0)\r
658 fputs("-", fp);\r
659 else if (e->back_num == 1)\r
660 fprintf(fp, "%d", e->back_ref1);\r
661 else {\r
662 for (i = 0; i < e->back_num; i++) {\r
663 if (i > 0) fprintf(fp, ", ");\r
664 fprintf(fp, "%d", e->back_refs[i]);\r
665 }\r
666 }\r
667 fputs("\n", fp);\r
668 return ST_CONTINUE;\r
669}\r
670\r
671extern int\r
672onig_print_names(FILE* fp, regex_t* reg)\r
673{\r
674 NameTable* t = (NameTable* )reg->name_table;\r
675\r
676 if (IS_NOT_NULL(t)) {\r
677 fprintf(fp, "name table\n");\r
678 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r
679 fputs("\n", fp);\r
680 }\r
681 return 0;\r
682}\r
683#endif /* ONIG_DEBUG */\r
684\r
685static int\r
686i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r
687{\r
688 xfree(e->name);\r
689 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
690 xfree(key);\r
691 xfree(e);\r
692 return ST_DELETE;\r
693}\r
694\r
695static int\r
696names_clear(regex_t* reg)\r
697{\r
698 NameTable* t = (NameTable* )reg->name_table;\r
699\r
700 if (IS_NOT_NULL(t)) {\r
701 onig_st_foreach(t, i_free_name_entry, 0);\r
702 }\r
703 return 0;\r
704}\r
705\r
706extern int\r
707onig_names_free(regex_t* reg)\r
708{\r
709 int r;\r
710 NameTable* t;\r
711\r
712 r = names_clear(reg);\r
b602265d 713 if (r != 0) return r;\r
14b0e578
CS
714\r
715 t = (NameTable* )reg->name_table;\r
716 if (IS_NOT_NULL(t)) onig_st_free_table(t);\r
717 reg->name_table = (void* )NULL;\r
718 return 0;\r
719}\r
720\r
721static NameEntry*\r
722name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r
723{\r
724 NameEntry* e;\r
725 NameTable* t = (NameTable* )reg->name_table;\r
726\r
727 e = (NameEntry* )NULL;\r
728 if (IS_NOT_NULL(t)) {\r
729 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
730 }\r
731 return e;\r
732}\r
733\r
734typedef struct {\r
735 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r
736 regex_t* reg;\r
737 void* arg;\r
738 int ret;\r
739 OnigEncoding enc;\r
740} INamesArg;\r
741\r
742static int\r
743i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r
744{\r
745 int r = (*(arg->func))(e->name,\r
746 e->name + e->name_len,\r
747 e->back_num,\r
b602265d
DG
748 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
749 arg->reg, arg->arg);\r
14b0e578
CS
750 if (r != 0) {\r
751 arg->ret = r;\r
752 return ST_STOP;\r
753 }\r
754 return ST_CONTINUE;\r
755}\r
756\r
757extern int\r
758onig_foreach_name(regex_t* reg,\r
759 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
760{\r
761 INamesArg narg;\r
762 NameTable* t = (NameTable* )reg->name_table;\r
763\r
764 narg.ret = 0;\r
765 if (IS_NOT_NULL(t)) {\r
766 narg.func = func;\r
767 narg.reg = reg;\r
768 narg.arg = arg;\r
769 narg.enc = reg->enc; /* should be pattern encoding. */\r
b602265d 770 onig_st_foreach(t, i_names, (HashDataType )&narg);\r
14b0e578
CS
771 }\r
772 return narg.ret;\r
773}\r
774\r
775static int\r
776i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r
777{\r
778 int i;\r
779\r
780 if (e->back_num > 1) {\r
781 for (i = 0; i < e->back_num; i++) {\r
782 e->back_refs[i] = map[e->back_refs[i]].new_val;\r
783 }\r
784 }\r
785 else if (e->back_num == 1) {\r
786 e->back_ref1 = map[e->back_ref1].new_val;\r
787 }\r
788\r
789 return ST_CONTINUE;\r
790}\r
791\r
792extern int\r
793onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r
794{\r
795 NameTable* t = (NameTable* )reg->name_table;\r
796\r
797 if (IS_NOT_NULL(t)) {\r
b602265d 798 onig_st_foreach(t, i_renumber_name, (HashDataType )map);\r
14b0e578
CS
799 }\r
800 return 0;\r
801}\r
802\r
803\r
804extern int\r
805onig_number_of_names(regex_t* reg)\r
806{\r
807 NameTable* t = (NameTable* )reg->name_table;\r
808\r
809 if (IS_NOT_NULL(t))\r
810 return t->num_entries;\r
811 else\r
812 return 0;\r
813}\r
814\r
815#else /* USE_ST_LIBRARY */\r
816\r
817#define INIT_NAMES_ALLOC_NUM 8\r
818\r
819typedef struct {\r
820 NameEntry* e;\r
821 int num;\r
822 int alloc;\r
823} NameTable;\r
824\r
825#ifdef ONIG_DEBUG\r
826extern int\r
827onig_print_names(FILE* fp, regex_t* reg)\r
828{\r
829 int i, j;\r
830 NameEntry* e;\r
831 NameTable* t = (NameTable* )reg->name_table;\r
832\r
833 if (IS_NOT_NULL(t) && t->num > 0) {\r
834 fprintf(fp, "name table\n");\r
835 for (i = 0; i < t->num; i++) {\r
836 e = &(t->e[i]);\r
837 fprintf(fp, "%s: ", e->name);\r
838 if (e->back_num == 0) {\r
b602265d 839 fputs("-", fp);\r
14b0e578
CS
840 }\r
841 else if (e->back_num == 1) {\r
b602265d 842 fprintf(fp, "%d", e->back_ref1);\r
14b0e578
CS
843 }\r
844 else {\r
b602265d
DG
845 for (j = 0; j < e->back_num; j++) {\r
846 if (j > 0) fprintf(fp, ", ");\r
847 fprintf(fp, "%d", e->back_refs[j]);\r
848 }\r
14b0e578
CS
849 }\r
850 fputs("\n", fp);\r
851 }\r
852 fputs("\n", fp);\r
853 }\r
854 return 0;\r
855}\r
856#endif\r
857\r
858static int\r
859names_clear(regex_t* reg)\r
860{\r
861 int i;\r
862 NameEntry* e;\r
863 NameTable* t = (NameTable* )reg->name_table;\r
864\r
865 if (IS_NOT_NULL(t)) {\r
866 for (i = 0; i < t->num; i++) {\r
867 e = &(t->e[i]);\r
868 if (IS_NOT_NULL(e->name)) {\r
b602265d
DG
869 xfree(e->name);\r
870 e->name = NULL;\r
871 e->name_len = 0;\r
872 e->back_num = 0;\r
873 e->back_alloc = 0;\r
874 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
875 e->back_refs = (int* )NULL;\r
14b0e578
CS
876 }\r
877 }\r
878 if (IS_NOT_NULL(t->e)) {\r
879 xfree(t->e);\r
880 t->e = NULL;\r
881 }\r
882 t->num = 0;\r
883 }\r
884 return 0;\r
885}\r
886\r
887extern int\r
888onig_names_free(regex_t* reg)\r
889{\r
890 int r;\r
891 NameTable* t;\r
892\r
893 r = names_clear(reg);\r
b602265d 894 if (r != 0) return r;\r
14b0e578
CS
895\r
896 t = (NameTable* )reg->name_table;\r
897 if (IS_NOT_NULL(t)) xfree(t);\r
898 reg->name_table = NULL;\r
899 return 0;\r
900}\r
901\r
902static NameEntry*\r
903name_find(regex_t* reg, UChar* name, UChar* name_end)\r
904{\r
905 int i, len;\r
906 NameEntry* e;\r
907 NameTable* t = (NameTable* )reg->name_table;\r
908\r
909 if (IS_NOT_NULL(t)) {\r
910 len = name_end - name;\r
911 for (i = 0; i < t->num; i++) {\r
912 e = &(t->e[i]);\r
913 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
b602265d 914 return e;\r
14b0e578
CS
915 }\r
916 }\r
917 return (NameEntry* )NULL;\r
918}\r
919\r
920extern int\r
921onig_foreach_name(regex_t* reg,\r
922 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
923{\r
924 int i, r;\r
925 NameEntry* e;\r
926 NameTable* t = (NameTable* )reg->name_table;\r
927\r
928 if (IS_NOT_NULL(t)) {\r
929 for (i = 0; i < t->num; i++) {\r
930 e = &(t->e[i]);\r
931 r = (*func)(e->name, e->name + e->name_len, e->back_num,\r
b602265d
DG
932 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
933 reg, arg);\r
14b0e578
CS
934 if (r != 0) return r;\r
935 }\r
936 }\r
937 return 0;\r
938}\r
939\r
940extern int\r
941onig_number_of_names(regex_t* reg)\r
942{\r
943 NameTable* t = (NameTable* )reg->name_table;\r
944\r
945 if (IS_NOT_NULL(t))\r
946 return t->num;\r
947 else\r
948 return 0;\r
949}\r
950\r
951#endif /* else USE_ST_LIBRARY */\r
952\r
953static int\r
954name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r
955{\r
b602265d 956 int r;\r
14b0e578
CS
957 int alloc;\r
958 NameEntry* e;\r
959 NameTable* t = (NameTable* )reg->name_table;\r
960\r
961 if (name_end - name <= 0)\r
962 return ONIGERR_EMPTY_GROUP_NAME;\r
963\r
964 e = name_find(reg, name, name_end);\r
965 if (IS_NULL(e)) {\r
966#ifdef USE_ST_LIBRARY\r
967 if (IS_NULL(t)) {\r
b602265d 968 t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);\r
a5def177 969 CHECK_NULL_RETURN_MEMERR(t);\r
14b0e578
CS
970 reg->name_table = (void* )t;\r
971 }\r
972 e = (NameEntry* )xmalloc(sizeof(NameEntry));\r
973 CHECK_NULL_RETURN_MEMERR(e);\r
974\r
b602265d 975 e->name = onigenc_strdup(reg->enc, name, name_end);\r
14b0e578
CS
976 if (IS_NULL(e->name)) {\r
977 xfree(e); return ONIGERR_MEMORY;\r
978 }\r
b602265d
DG
979 r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r
980 (HashDataType )e);\r
981 if (r < 0) return r;\r
14b0e578 982\r
b602265d 983 e->name_len = (int )(name_end - name);\r
14b0e578
CS
984 e->back_num = 0;\r
985 e->back_alloc = 0;\r
986 e->back_refs = (int* )NULL;\r
987\r
988#else\r
989\r
990 if (IS_NULL(t)) {\r
991 alloc = INIT_NAMES_ALLOC_NUM;\r
992 t = (NameTable* )xmalloc(sizeof(NameTable));\r
993 CHECK_NULL_RETURN_MEMERR(t);\r
994 t->e = NULL;\r
995 t->alloc = 0;\r
996 t->num = 0;\r
997\r
998 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r
999 if (IS_NULL(t->e)) {\r
b602265d
DG
1000 xfree(t);\r
1001 return ONIGERR_MEMORY;\r
14b0e578
CS
1002 }\r
1003 t->alloc = alloc;\r
1004 reg->name_table = t;\r
1005 goto clear;\r
1006 }\r
1007 else if (t->num == t->alloc) {\r
1008 int i;\r
1009\r
1010 alloc = t->alloc * 2;\r
b602265d 1011 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);\r
14b0e578
CS
1012 CHECK_NULL_RETURN_MEMERR(t->e);\r
1013 t->alloc = alloc;\r
1014\r
1015 clear:\r
1016 for (i = t->num; i < t->alloc; i++) {\r
b602265d
DG
1017 t->e[i].name = NULL;\r
1018 t->e[i].name_len = 0;\r
1019 t->e[i].back_num = 0;\r
1020 t->e[i].back_alloc = 0;\r
1021 t->e[i].back_refs = (int* )NULL;\r
14b0e578
CS
1022 }\r
1023 }\r
1024 e = &(t->e[t->num]);\r
1025 t->num++;\r
b602265d 1026 e->name = onigenc_strdup(reg->enc, name, name_end);\r
14b0e578
CS
1027 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
1028 e->name_len = name_end - name;\r
1029#endif\r
1030 }\r
1031\r
1032 if (e->back_num >= 1 &&\r
1033 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r
1034 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r
b602265d 1035 name, name_end);\r
14b0e578
CS
1036 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
1037 }\r
1038\r
1039 e->back_num++;\r
1040 if (e->back_num == 1) {\r
1041 e->back_ref1 = backref;\r
1042 }\r
1043 else {\r
1044 if (e->back_num == 2) {\r
1045 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r
1046 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r
1047 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
1048 e->back_alloc = alloc;\r
1049 e->back_refs[0] = e->back_ref1;\r
1050 e->back_refs[1] = backref;\r
1051 }\r
1052 else {\r
1053 if (e->back_num > e->back_alloc) {\r
b602265d
DG
1054 alloc = e->back_alloc * 2;\r
1055 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r
1056 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
1057 e->back_alloc = alloc;\r
14b0e578
CS
1058 }\r
1059 e->back_refs[e->back_num - 1] = backref;\r
1060 }\r
1061 }\r
1062\r
1063 return 0;\r
1064}\r
1065\r
1066extern int\r
1067onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
b602265d 1068 const UChar* name_end, int** nums)\r
14b0e578
CS
1069{\r
1070 NameEntry* e = name_find(reg, name, name_end);\r
1071\r
1072 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
1073\r
1074 switch (e->back_num) {\r
1075 case 0:\r
1076 break;\r
1077 case 1:\r
1078 *nums = &(e->back_ref1);\r
1079 break;\r
1080 default:\r
1081 *nums = e->back_refs;\r
1082 break;\r
1083 }\r
1084 return e->back_num;\r
1085}\r
1086\r
1087extern int\r
1088onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
b602265d 1089 const UChar* name_end, OnigRegion *region)\r
14b0e578
CS
1090{\r
1091 int i, n, *nums;\r
1092\r
1093 n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r
1094 if (n < 0)\r
1095 return n;\r
1096 else if (n == 0)\r
1097 return ONIGERR_PARSER_BUG;\r
1098 else if (n == 1)\r
1099 return nums[0];\r
1100 else {\r
1101 if (IS_NOT_NULL(region)) {\r
1102 for (i = n - 1; i >= 0; i--) {\r
b602265d
DG
1103 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r
1104 return nums[i];\r
14b0e578
CS
1105 }\r
1106 }\r
1107 return nums[n - 1];\r
1108 }\r
1109}\r
1110\r
14b0e578
CS
1111extern int\r
1112onig_noname_group_capture_is_active(regex_t* reg)\r
1113{\r
1114 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
1115 return 0;\r
1116\r
14b0e578
CS
1117 if (onig_number_of_names(reg) > 0 &&\r
1118 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
1119 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
1120 return 0;\r
1121 }\r
14b0e578
CS
1122\r
1123 return 1;\r
1124}\r
1125\r
b602265d 1126#ifdef USE_CALLOUT\r
14b0e578 1127\r
b602265d
DG
1128typedef struct {\r
1129 OnigCalloutType type;\r
1130 int in;\r
1131 OnigCalloutFunc start_func;\r
1132 OnigCalloutFunc end_func;\r
1133 int arg_num;\r
1134 int opt_arg_num;\r
1135 unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
1136 OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];\r
1137 UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */\r
1138} CalloutNameListEntry;\r
14b0e578 1139\r
b602265d
DG
1140typedef struct {\r
1141 int n;\r
1142 int alloc;\r
1143 CalloutNameListEntry* v;\r
1144} CalloutNameListType;\r
14b0e578 1145\r
b602265d 1146static CalloutNameListType* GlobalCalloutNameList;\r
14b0e578
CS
1147\r
1148static int\r
b602265d 1149make_callout_func_list(CalloutNameListType** rs, int init_size)\r
14b0e578 1150{\r
b602265d
DG
1151 CalloutNameListType* s;\r
1152 CalloutNameListEntry* v;\r
14b0e578 1153\r
b602265d 1154 *rs = 0;\r
14b0e578 1155\r
b602265d
DG
1156 s = xmalloc(sizeof(*s));\r
1157 if (IS_NULL(s)) return ONIGERR_MEMORY;\r
14b0e578 1158\r
b602265d
DG
1159 v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);\r
1160 if (IS_NULL(v)) {\r
1161 xfree(s);\r
1162 return ONIGERR_MEMORY;\r
14b0e578
CS
1163 }\r
1164\r
b602265d
DG
1165 s->n = 0;\r
1166 s->alloc = init_size;\r
1167 s->v = v;\r
14b0e578 1168\r
b602265d
DG
1169 *rs = s;\r
1170 return ONIG_NORMAL;\r
14b0e578
CS
1171}\r
1172\r
b602265d
DG
1173static void\r
1174free_callout_func_list(CalloutNameListType* s)\r
1175{\r
1176 if (IS_NOT_NULL(s)) {\r
1177 if (IS_NOT_NULL(s->v)) {\r
1178 int i, j;\r
1179\r
1180 for (i = 0; i < s->n; i++) {\r
1181 CalloutNameListEntry* e = s->v + i;\r
1182 for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {\r
1183 if (e->arg_types[j] == ONIG_TYPE_STRING) {\r
1184 UChar* p = e->opt_defaults[j].s.start;\r
1185 if (IS_NOT_NULL(p)) xfree(p);\r
1186 }\r
1187 }\r
1188 }\r
1189 xfree(s->v);\r
1190 }\r
1191 xfree(s);\r
1192 }\r
1193}\r
14b0e578 1194\r
b602265d
DG
1195static int\r
1196callout_func_list_add(CalloutNameListType* s, int* rid)\r
1197{\r
1198 if (s->n >= s->alloc) {\r
1199 int new_size = s->alloc * 2;\r
1200 CalloutNameListEntry* nv = (CalloutNameListEntry* )\r
1201 xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);\r
1202 if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
14b0e578 1203\r
b602265d
DG
1204 s->alloc = new_size;\r
1205 s->v = nv;\r
1206 }\r
14b0e578 1207\r
b602265d 1208 *rid = s->n;\r
14b0e578 1209\r
b602265d
DG
1210 xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));\r
1211 s->n++;\r
1212 return ONIG_NORMAL;\r
1213}\r
14b0e578 1214\r
14b0e578 1215\r
b602265d
DG
1216typedef struct {\r
1217 UChar* name;\r
1218 int name_len; /* byte length */\r
1219 int id;\r
1220} CalloutNameEntry;\r
14b0e578 1221\r
b602265d
DG
1222#ifdef USE_ST_LIBRARY\r
1223typedef st_table CalloutNameTable;\r
14b0e578 1224#else\r
b602265d
DG
1225typedef struct {\r
1226 CalloutNameEntry* e;\r
1227 int num;\r
1228 int alloc;\r
1229} CalloutNameTable;\r
14b0e578 1230#endif\r
14b0e578 1231\r
b602265d
DG
1232static CalloutNameTable* GlobalCalloutNameTable;\r
1233static int CalloutNameIDCounter;\r
14b0e578 1234\r
b602265d 1235#ifdef USE_ST_LIBRARY\r
14b0e578 1236\r
b602265d
DG
1237static int\r
1238i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,\r
1239 void* arg ARG_UNUSED)\r
1240{\r
1241 xfree(e->name);\r
1242 /*xfree(key->s); */ /* is same as e->name */\r
1243 xfree(key);\r
1244 xfree(e);\r
1245 return ST_DELETE;\r
1246}\r
14b0e578 1247\r
b602265d
DG
1248static int\r
1249callout_name_table_clear(CalloutNameTable* t)\r
1250{\r
1251 if (IS_NOT_NULL(t)) {\r
1252 onig_st_foreach(t, i_free_callout_name_entry, 0);\r
1253 }\r
1254 return 0;\r
1255}\r
14b0e578 1256\r
b602265d
DG
1257static int\r
1258global_callout_name_table_free(void)\r
1259{\r
1260 if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
1261 int r = callout_name_table_clear(GlobalCalloutNameTable);\r
1262 if (r != 0) return r;\r
14b0e578 1263\r
b602265d
DG
1264 onig_st_free_table(GlobalCalloutNameTable);\r
1265 GlobalCalloutNameTable = 0;\r
1266 CalloutNameIDCounter = 0;\r
14b0e578
CS
1267 }\r
1268\r
b602265d
DG
1269 return 0;\r
1270}\r
1271\r
1272static CalloutNameEntry*\r
1273callout_name_find(OnigEncoding enc, int is_not_single,\r
1274 const UChar* name, const UChar* name_end)\r
1275{\r
1276 int r;\r
1277 CalloutNameEntry* e;\r
1278 CalloutNameTable* t = GlobalCalloutNameTable;\r
14b0e578 1279\r
b602265d
DG
1280 e = (CalloutNameEntry* )NULL;\r
1281 if (IS_NOT_NULL(t)) {\r
1282 r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
1283 (HashDataType* )((void* )(&e)));\r
1284 if (r == 0) { /* not found */\r
1285 if (enc != ONIG_ENCODING_ASCII &&\r
1286 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r
1287 enc = ONIG_ENCODING_ASCII;\r
1288 onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
1289 (HashDataType* )((void* )(&e)));\r
1290 }\r
1291 }\r
14b0e578 1292 }\r
b602265d
DG
1293 return e;\r
1294}\r
1295\r
14b0e578 1296#else\r
b602265d
DG
1297\r
1298static int\r
1299callout_name_table_clear(CalloutNameTable* t)\r
1300{\r
1301 int i;\r
1302 CalloutNameEntry* e;\r
1303\r
1304 if (IS_NOT_NULL(t)) {\r
1305 for (i = 0; i < t->num; i++) {\r
1306 e = &(t->e[i]);\r
1307 if (IS_NOT_NULL(e->name)) {\r
1308 xfree(e->name);\r
1309 e->name = NULL;\r
1310 e->name_len = 0;\r
1311 e->id = 0;\r
1312 e->func = 0;\r
1313 }\r
1314 }\r
1315 if (IS_NOT_NULL(t->e)) {\r
1316 xfree(t->e);\r
1317 t->e = NULL;\r
1318 }\r
1319 t->num = 0;\r
1320 }\r
1321 return 0;\r
14b0e578
CS
1322}\r
1323\r
b602265d
DG
1324static int\r
1325global_callout_name_table_free(void)\r
14b0e578 1326{\r
b602265d
DG
1327 if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
1328 int r = callout_name_table_clear(GlobalCalloutNameTable);\r
1329 if (r != 0) return r;\r
14b0e578 1330\r
b602265d
DG
1331 xfree(GlobalCalloutNameTable);\r
1332 GlobalCalloutNameTable = 0;\r
1333 CalloutNameIDCounter = 0;\r
14b0e578 1334 }\r
14b0e578
CS
1335 return 0;\r
1336}\r
14b0e578 1337\r
b602265d
DG
1338static CalloutNameEntry*\r
1339callout_name_find(UChar* name, UChar* name_end)\r
14b0e578 1340{\r
b602265d
DG
1341 int i, len;\r
1342 CalloutNameEntry* e;\r
1343 CalloutNameTable* t = Calloutnames;\r
14b0e578 1344\r
b602265d
DG
1345 if (IS_NOT_NULL(t)) {\r
1346 len = name_end - name;\r
1347 for (i = 0; i < t->num; i++) {\r
1348 e = &(t->e[i]);\r
1349 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
1350 return e;\r
1351 }\r
14b0e578 1352 }\r
b602265d
DG
1353 return (CalloutNameEntry* )NULL;\r
1354}\r
1355\r
14b0e578
CS
1356#endif\r
1357\r
b602265d
DG
1358/* name string must be single byte char string. */\r
1359static int\r
1360callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,\r
1361 int is_not_single, UChar* name, UChar* name_end)\r
1362{\r
1363 int r;\r
1364 CalloutNameEntry* e;\r
1365 CalloutNameTable* t = GlobalCalloutNameTable;\r
14b0e578 1366\r
b602265d
DG
1367 *rentry = 0;\r
1368 if (name_end - name <= 0)\r
1369 return ONIGERR_INVALID_CALLOUT_NAME;\r
14b0e578 1370\r
b602265d
DG
1371 e = callout_name_find(enc, is_not_single, name, name_end);\r
1372 if (IS_NULL(e)) {\r
1373#ifdef USE_ST_LIBRARY\r
1374 if (IS_NULL(t)) {\r
1375 t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);\r
a5def177 1376 CHECK_NULL_RETURN_MEMERR(t);\r
b602265d
DG
1377 GlobalCalloutNameTable = t;\r
1378 }\r
1379 e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));\r
1380 CHECK_NULL_RETURN_MEMERR(e);\r
1381\r
1382 e->name = onigenc_strdup(enc, name, name_end);\r
1383 if (IS_NULL(e->name)) {\r
1384 xfree(e); return ONIGERR_MEMORY;\r
1385 }\r
1386\r
1387 r = st_insert_callout_name_table(t, enc, is_not_single,\r
1388 e->name, (e->name + (name_end - name)),\r
1389 (HashDataType )e);\r
1390 if (r < 0) return r;\r
1391\r
1392#else\r
1393\r
1394 int alloc;\r
1395\r
1396 if (IS_NULL(t)) {\r
1397 alloc = INIT_NAMES_ALLOC_NUM;\r
1398 t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));\r
1399 CHECK_NULL_RETURN_MEMERR(t);\r
1400 t->e = NULL;\r
1401 t->alloc = 0;\r
1402 t->num = 0;\r
1403\r
1404 t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);\r
1405 if (IS_NULL(t->e)) {\r
1406 xfree(t);\r
1407 return ONIGERR_MEMORY;\r
1408 }\r
1409 t->alloc = alloc;\r
1410 GlobalCalloutNameTable = t;\r
1411 goto clear;\r
1412 }\r
1413 else if (t->num == t->alloc) {\r
1414 int i;\r
1415\r
1416 alloc = t->alloc * 2;\r
1417 t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);\r
1418 CHECK_NULL_RETURN_MEMERR(t->e);\r
1419 t->alloc = alloc;\r
1420\r
1421 clear:\r
1422 for (i = t->num; i < t->alloc; i++) {\r
1423 t->e[i].name = NULL;\r
1424 t->e[i].name_len = 0;\r
1425 t->e[i].id = 0;\r
1426 }\r
1427 }\r
1428 e = &(t->e[t->num]);\r
1429 t->num++;\r
1430 e->name = onigenc_strdup(enc, name, name_end);\r
1431 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
1432#endif\r
1433\r
1434 CalloutNameIDCounter++;\r
1435 e->id = CalloutNameIDCounter;\r
1436 e->name_len = (int )(name_end - name);\r
1437 }\r
1438\r
1439 *rentry = e;\r
1440 return e->id;\r
1441}\r
1442\r
1443static int\r
1444is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
14b0e578 1445{\r
b602265d
DG
1446 UChar* p;\r
1447 OnigCodePoint c;\r
1448\r
1449 if (name >= name_end) return 0;\r
1450\r
1451 p = name;\r
1452 while (p < name_end) {\r
1453 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
1454 if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))\r
1455 return 0;\r
1456\r
1457 if (p == name) {\r
1458 if (c >= '0' && c <= '9') return 0;\r
1459 }\r
1460\r
1461 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
1462 }\r
1463\r
1464 return 1;\r
14b0e578
CS
1465}\r
1466\r
b602265d
DG
1467static int\r
1468is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
14b0e578 1469{\r
b602265d
DG
1470 UChar* p;\r
1471 OnigCodePoint c;\r
14b0e578 1472\r
b602265d
DG
1473 if (name >= name_end) return 0;\r
1474\r
1475 p = name;\r
1476 while (p < name_end) {\r
1477 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
1478 if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))\r
1479 return 0;\r
1480\r
1481 if (p == name) {\r
1482 if (c >= '0' && c <= '9') return 0;\r
1483 }\r
1484\r
1485 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
1486 }\r
1487\r
1488 return 1;\r
14b0e578
CS
1489}\r
1490\r
b602265d
DG
1491extern int\r
1492onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,\r
1493 UChar* name, UChar* name_end, int in,\r
1494 OnigCalloutFunc start_func,\r
1495 OnigCalloutFunc end_func,\r
1496 int arg_num, unsigned int arg_types[],\r
1497 int opt_arg_num, OnigValue opt_defaults[])\r
14b0e578 1498{\r
b602265d
DG
1499 int r;\r
1500 int i;\r
1501 int j;\r
1502 int id;\r
1503 int is_not_single;\r
1504 CalloutNameEntry* e;\r
1505 CalloutNameListEntry* fe;\r
14b0e578 1506\r
b602265d
DG
1507 if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)\r
1508 return ONIGERR_INVALID_ARGUMENT;\r
14b0e578 1509\r
b602265d
DG
1510 if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)\r
1511 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 1512\r
b602265d
DG
1513 if (opt_arg_num < 0 || opt_arg_num > arg_num)\r
1514 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 1515\r
b602265d
DG
1516 if (start_func == 0 && end_func == 0)\r
1517 return ONIGERR_INVALID_CALLOUT_ARG;\r
1518\r
1519 if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)\r
1520 return ONIGERR_INVALID_CALLOUT_ARG;\r
1521\r
1522 for (i = 0; i < arg_num; i++) {\r
1523 unsigned int t = arg_types[i];\r
1524 if (t == ONIG_TYPE_VOID)\r
1525 return ONIGERR_INVALID_CALLOUT_ARG;\r
1526 else {\r
1527 if (i >= arg_num - opt_arg_num) {\r
1528 if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&\r
1529 t != ONIG_TYPE_TAG)\r
1530 return ONIGERR_INVALID_CALLOUT_ARG;\r
1531 }\r
1532 else {\r
1533 if (t != ONIG_TYPE_LONG) {\r
1534 t = t & ~ONIG_TYPE_LONG;\r
1535 if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)\r
1536 return ONIGERR_INVALID_CALLOUT_ARG;\r
1537 }\r
14b0e578
CS
1538 }\r
1539 }\r
1540 }\r
1541\r
b602265d
DG
1542 if (! is_allowed_callout_name(enc, name, name_end)) {\r
1543 return ONIGERR_INVALID_CALLOUT_NAME;\r
14b0e578 1544 }\r
14b0e578 1545\r
b602265d
DG
1546 is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);\r
1547 id = callout_name_entry(&e, enc, is_not_single, name, name_end);\r
1548 if (id < 0) return id;\r
14b0e578 1549\r
b602265d
DG
1550 r = ONIG_NORMAL;\r
1551 if (IS_NULL(GlobalCalloutNameList)) {\r
1552 r = make_callout_func_list(&GlobalCalloutNameList, 10);\r
1553 if (r != ONIG_NORMAL) return r;\r
1554 }\r
14b0e578 1555\r
b602265d
DG
1556 while (id >= GlobalCalloutNameList->n) {\r
1557 int rid;\r
1558 r = callout_func_list_add(GlobalCalloutNameList, &rid);\r
1559 if (r != ONIG_NORMAL) return r;\r
14b0e578
CS
1560 }\r
1561\r
b602265d
DG
1562 fe = GlobalCalloutNameList->v + id;\r
1563 fe->type = callout_type;\r
1564 fe->in = in;\r
1565 fe->start_func = start_func;\r
1566 fe->end_func = end_func;\r
1567 fe->arg_num = arg_num;\r
1568 fe->opt_arg_num = opt_arg_num;\r
1569 fe->name = e->name;\r
14b0e578 1570\r
b602265d
DG
1571 for (i = 0; i < arg_num; i++) {\r
1572 fe->arg_types[i] = arg_types[i];\r
1573 }\r
1574 for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {\r
6d665168 1575 if(IS_NULL(opt_defaults))return ONIGERR_INVALID_ARGUMENT;\r
b602265d
DG
1576 if (fe->arg_types[i] == ONIG_TYPE_STRING) {\r
1577 OnigValue* val = opt_defaults + j;\r
1578 UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);\r
1579 CHECK_NULL_RETURN_MEMERR(ds);\r
14b0e578 1580\r
b602265d
DG
1581 fe->opt_defaults[i].s.start = ds;\r
1582 fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);\r
1583 }\r
1584 else {\r
1585 fe->opt_defaults[i] = opt_defaults[j];\r
1586 }\r
1587 }\r
1588\r
1589 r = id;\r
1590 return r;\r
14b0e578
CS
1591}\r
1592\r
b602265d
DG
1593static int\r
1594get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,\r
1595 UChar* name, UChar* name_end, int* rid)\r
14b0e578 1596{\r
b602265d
DG
1597 int r;\r
1598 CalloutNameEntry* e;\r
14b0e578 1599\r
b602265d
DG
1600 if (! is_allowed_callout_name(enc, name, name_end)) {\r
1601 return ONIGERR_INVALID_CALLOUT_NAME;\r
1602 }\r
1603\r
1604 e = callout_name_find(enc, is_not_single, name, name_end);\r
1605 if (IS_NULL(e)) {\r
1606 return ONIGERR_UNDEFINED_CALLOUT_NAME;\r
1607 }\r
1608\r
1609 r = ONIG_NORMAL;\r
1610 *rid = e->id;\r
1611\r
1612 return r;\r
14b0e578
CS
1613}\r
1614\r
b602265d
DG
1615extern OnigCalloutFunc\r
1616onig_get_callout_start_func(regex_t* reg, int callout_num)\r
14b0e578 1617{\r
b602265d
DG
1618 /* If used for callouts of contents, return 0. */\r
1619 CalloutListEntry* e;\r
14b0e578 1620\r
b602265d 1621 e = onig_reg_callout_list_at(reg, callout_num);\r
a5def177 1622 CHECK_NULL_RETURN(e);\r
b602265d 1623 return e->start_func;\r
14b0e578
CS
1624}\r
1625\r
b602265d
DG
1626extern const UChar*\r
1627onig_get_callout_tag_start(regex_t* reg, int callout_num)\r
14b0e578 1628{\r
b602265d 1629 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
a5def177 1630 CHECK_NULL_RETURN(e);\r
b602265d 1631 return e->tag_start;\r
14b0e578
CS
1632}\r
1633\r
b602265d
DG
1634extern const UChar*\r
1635onig_get_callout_tag_end(regex_t* reg, int callout_num)\r
14b0e578 1636{\r
b602265d 1637 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
a5def177 1638 CHECK_NULL_RETURN(e);\r
b602265d
DG
1639 return e->tag_end;\r
1640}\r
14b0e578 1641\r
14b0e578 1642\r
b602265d
DG
1643extern OnigCalloutType\r
1644onig_get_callout_type_by_name_id(int name_id)\r
1645{\r
1646 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1647 return 0;\r
14b0e578 1648\r
b602265d 1649 return GlobalCalloutNameList->v[name_id].type;\r
14b0e578
CS
1650}\r
1651\r
b602265d
DG
1652extern OnigCalloutFunc\r
1653onig_get_callout_start_func_by_name_id(int name_id)\r
14b0e578 1654{\r
b602265d
DG
1655 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1656 return 0;\r
14b0e578 1657\r
b602265d 1658 return GlobalCalloutNameList->v[name_id].start_func;\r
14b0e578
CS
1659}\r
1660\r
b602265d
DG
1661extern OnigCalloutFunc\r
1662onig_get_callout_end_func_by_name_id(int name_id)\r
14b0e578 1663{\r
b602265d
DG
1664 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1665 return 0;\r
14b0e578 1666\r
b602265d 1667 return GlobalCalloutNameList->v[name_id].end_func;\r
14b0e578
CS
1668}\r
1669\r
b602265d
DG
1670extern int\r
1671onig_get_callout_in_by_name_id(int name_id)\r
14b0e578 1672{\r
b602265d
DG
1673 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1674 return 0;\r
14b0e578 1675\r
b602265d
DG
1676 return GlobalCalloutNameList->v[name_id].in;\r
1677}\r
14b0e578 1678\r
b602265d
DG
1679static int\r
1680get_callout_arg_num_by_name_id(int name_id)\r
1681{\r
1682 return GlobalCalloutNameList->v[name_id].arg_num;\r
1683}\r
14b0e578 1684\r
b602265d
DG
1685static int\r
1686get_callout_opt_arg_num_by_name_id(int name_id)\r
14b0e578 1687{\r
b602265d 1688 return GlobalCalloutNameList->v[name_id].opt_arg_num;\r
14b0e578 1689}\r
14b0e578 1690\r
b602265d
DG
1691static unsigned int\r
1692get_callout_arg_type_by_name_id(int name_id, int index)\r
14b0e578 1693{\r
b602265d 1694 return GlobalCalloutNameList->v[name_id].arg_types[index];\r
14b0e578
CS
1695}\r
1696\r
b602265d
DG
1697static OnigValue\r
1698get_callout_opt_default_by_name_id(int name_id, int index)\r
14b0e578 1699{\r
b602265d 1700 return GlobalCalloutNameList->v[name_id].opt_defaults[index];\r
14b0e578
CS
1701}\r
1702\r
b602265d
DG
1703extern UChar*\r
1704onig_get_callout_name_by_name_id(int name_id)\r
14b0e578 1705{\r
b602265d
DG
1706 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1707 return 0;\r
1708\r
1709 return GlobalCalloutNameList->v[name_id].name;\r
14b0e578
CS
1710}\r
1711\r
b602265d
DG
1712extern int\r
1713onig_global_callout_names_free(void)\r
14b0e578 1714{\r
b602265d
DG
1715 free_callout_func_list(GlobalCalloutNameList);\r
1716 GlobalCalloutNameList = 0;\r
14b0e578 1717\r
b602265d
DG
1718 global_callout_name_table_free();\r
1719 return ONIG_NORMAL;\r
14b0e578
CS
1720}\r
1721\r
14b0e578 1722\r
b602265d
DG
1723typedef st_table CalloutTagTable;\r
1724typedef intptr_t CalloutTagVal;\r
14b0e578 1725\r
b602265d 1726#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)\r
14b0e578 1727\r
b602265d
DG
1728static int\r
1729i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)\r
1730{\r
1731 int num;\r
1732 RegexExt* ext = (RegexExt* )arg;\r
14b0e578 1733\r
b602265d
DG
1734 num = (int )e - 1;\r
1735 ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;\r
1736 return ST_CONTINUE;\r
1737}\r
14b0e578 1738\r
b602265d
DG
1739static int\r
1740setup_ext_callout_list_values(regex_t* reg)\r
1741{\r
1742 int i, j;\r
1743 RegexExt* ext;\r
1744\r
1745 ext = REG_EXTP(reg);\r
1746 if (IS_NOT_NULL(ext->tag_table)) {\r
1747 onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,\r
1748 (st_data_t )ext);\r
1749 }\r
1750\r
1751 for (i = 0; i < ext->callout_num; i++) {\r
1752 CalloutListEntry* e = ext->callout_list + i;\r
1753 if (e->of == ONIG_CALLOUT_OF_NAME) {\r
1754 for (j = 0; j < e->u.arg.num; j++) {\r
1755 if (e->u.arg.types[j] == ONIG_TYPE_TAG) {\r
1756 UChar* start;\r
1757 UChar* end;\r
1758 int num;\r
1759 start = e->u.arg.vals[j].s.start;\r
1760 end = e->u.arg.vals[j].s.end;\r
1761 num = onig_get_callout_num_by_tag(reg, start, end);\r
1762 if (num < 0) return num;\r
1763 e->u.arg.vals[j].tag = num;\r
1764 }\r
14b0e578
CS
1765 }\r
1766 }\r
14b0e578
CS
1767 }\r
1768\r
b602265d 1769 return ONIG_NORMAL;\r
14b0e578
CS
1770}\r
1771\r
1772extern int\r
b602265d 1773onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)\r
14b0e578 1774{\r
b602265d 1775 RegexExt* ext = REG_EXTP(reg);\r
14b0e578 1776\r
b602265d
DG
1777 if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;\r
1778 if (callout_num > ext->callout_num) return 0;\r
14b0e578 1779\r
b602265d
DG
1780 return (ext->callout_list[callout_num].flag &\r
1781 CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;\r
14b0e578
CS
1782}\r
1783\r
b602265d
DG
1784static int\r
1785i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)\r
14b0e578 1786{\r
b602265d
DG
1787 xfree(key);\r
1788 return ST_DELETE;\r
14b0e578
CS
1789}\r
1790\r
b602265d
DG
1791static int\r
1792callout_tag_table_clear(CalloutTagTable* t)\r
14b0e578 1793{\r
b602265d
DG
1794 if (IS_NOT_NULL(t)) {\r
1795 onig_st_foreach(t, i_free_callout_tag_entry, 0);\r
14b0e578 1796 }\r
b602265d 1797 return 0;\r
14b0e578
CS
1798}\r
1799\r
b602265d
DG
1800extern int\r
1801onig_callout_tag_table_free(void* table)\r
14b0e578 1802{\r
b602265d 1803 CalloutTagTable* t = (CalloutTagTable* )table;\r
14b0e578 1804\r
b602265d
DG
1805 if (IS_NOT_NULL(t)) {\r
1806 int r = callout_tag_table_clear(t);\r
1807 if (r != 0) return r;\r
14b0e578 1808\r
b602265d
DG
1809 onig_st_free_table(t);\r
1810 }\r
14b0e578 1811\r
b602265d 1812 return 0;\r
14b0e578
CS
1813}\r
1814\r
b602265d
DG
1815extern int\r
1816onig_get_callout_num_by_tag(regex_t* reg,\r
1817 const UChar* tag, const UChar* tag_end)\r
14b0e578 1818{\r
b602265d
DG
1819 int r;\r
1820 RegexExt* ext;\r
1821 CalloutTagVal e;\r
14b0e578 1822\r
b602265d
DG
1823 ext = REG_EXTP(reg);\r
1824 if (IS_NULL(ext) || IS_NULL(ext->tag_table))\r
1825 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 1826\r
b602265d
DG
1827 r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,\r
1828 (HashDataType* )((void* )(&e)));\r
1829 if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
1830 return (int )e;\r
14b0e578
CS
1831}\r
1832\r
b602265d
DG
1833static CalloutTagVal\r
1834callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)\r
14b0e578 1835{\r
b602265d 1836 CalloutTagVal e;\r
14b0e578 1837\r
b602265d
DG
1838 e = -1;\r
1839 if (IS_NOT_NULL(t)) {\r
1840 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
14b0e578 1841 }\r
b602265d 1842 return e;\r
14b0e578
CS
1843}\r
1844\r
1845static int\r
b602265d 1846callout_tag_table_new(CalloutTagTable** rt)\r
14b0e578 1847{\r
b602265d
DG
1848 CalloutTagTable* t;\r
1849\r
1850 *rt = 0;\r
1851 t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);\r
1852 CHECK_NULL_RETURN_MEMERR(t);\r
1853\r
1854 *rt = t;\r
1855 return ONIG_NORMAL;\r
14b0e578
CS
1856}\r
1857\r
14b0e578 1858static int\r
b602265d
DG
1859callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,\r
1860 CalloutTagVal entry_val)\r
14b0e578 1861{\r
b602265d
DG
1862 int r;\r
1863 CalloutTagVal val;\r
14b0e578 1864\r
b602265d
DG
1865 if (name_end - name <= 0)\r
1866 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 1867\r
b602265d
DG
1868 val = callout_tag_find(t, name, name_end);\r
1869 if (val >= 0)\r
1870 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
14b0e578 1871\r
b602265d
DG
1872 r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);\r
1873 if (r < 0) return r;\r
14b0e578 1874\r
b602265d 1875 return ONIG_NORMAL;\r
14b0e578
CS
1876}\r
1877\r
1878static int\r
b602265d 1879ext_ensure_tag_table(regex_t* reg)\r
14b0e578 1880{\r
b602265d
DG
1881 int r;\r
1882 RegexExt* ext;\r
1883 CalloutTagTable* t;\r
14b0e578 1884\r
b602265d
DG
1885 ext = onig_get_regex_ext(reg);\r
1886 CHECK_NULL_RETURN_MEMERR(ext);\r
14b0e578 1887\r
b602265d
DG
1888 if (IS_NULL(ext->tag_table)) {\r
1889 r = callout_tag_table_new(&t);\r
1890 if (r != ONIG_NORMAL) return r;\r
1891\r
1892 ext->tag_table = t;\r
14b0e578 1893 }\r
b602265d
DG
1894\r
1895 return ONIG_NORMAL;\r
14b0e578
CS
1896}\r
1897\r
1898static int\r
b602265d
DG
1899callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,\r
1900 CalloutTagVal entry_val)\r
14b0e578 1901{\r
b602265d
DG
1902 int r;\r
1903 RegexExt* ext;\r
1904 CalloutListEntry* e;\r
14b0e578 1905\r
b602265d
DG
1906 r = ext_ensure_tag_table(reg);\r
1907 if (r != ONIG_NORMAL) return r;\r
14b0e578 1908\r
b602265d 1909 ext = onig_get_regex_ext(reg);\r
df8be9e5 1910 CHECK_NULL_RETURN_MEMERR(ext);\r
b602265d 1911 r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);\r
14b0e578 1912\r
b602265d 1913 e = onig_reg_callout_list_at(reg, (int )entry_val);\r
a5def177 1914 CHECK_NULL_RETURN_MEMERR(e);\r
b602265d
DG
1915 e->tag_start = name;\r
1916 e->tag_end = name_end;\r
14b0e578 1917\r
b602265d
DG
1918 return r;\r
1919}\r
14b0e578 1920\r
b602265d 1921#endif /* USE_CALLOUT */\r
14b0e578 1922\r
14b0e578 1923\r
b602265d 1924#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16\r
14b0e578 1925\r
b602265d
DG
1926static void\r
1927scan_env_clear(ScanEnv* env)\r
14b0e578 1928{\r
b602265d
DG
1929 MEM_STATUS_CLEAR(env->capture_history);\r
1930 MEM_STATUS_CLEAR(env->bt_mem_start);\r
1931 MEM_STATUS_CLEAR(env->bt_mem_end);\r
1932 MEM_STATUS_CLEAR(env->backrefed_mem);\r
1933 env->error = (UChar* )NULL;\r
1934 env->error_end = (UChar* )NULL;\r
1935 env->num_call = 0;\r
14b0e578 1936\r
b602265d
DG
1937#ifdef USE_CALL\r
1938 env->unset_addr_list = NULL;\r
1939 env->has_call_zero = 0;\r
1940#endif\r
14b0e578 1941\r
b602265d
DG
1942 env->num_mem = 0;\r
1943 env->num_named = 0;\r
1944 env->mem_alloc = 0;\r
1945 env->mem_env_dynamic = (MemEnv* )NULL;\r
14b0e578 1946\r
b602265d 1947 xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));\r
14b0e578 1948\r
b602265d
DG
1949 env->parse_depth = 0;\r
1950 env->keep_num = 0;\r
1951 env->save_num = 0;\r
1952 env->save_alloc_num = 0;\r
1953 env->saves = 0;\r
1954}\r
14b0e578 1955\r
b602265d
DG
1956static int\r
1957scan_env_add_mem_entry(ScanEnv* env)\r
1958{\r
1959 int i, need, alloc;\r
1960 MemEnv* p;\r
14b0e578 1961\r
b602265d
DG
1962 need = env->num_mem + 1;\r
1963 if (need > MaxCaptureNum && MaxCaptureNum != 0)\r
1964 return ONIGERR_TOO_MANY_CAPTURES;\r
14b0e578 1965\r
b602265d
DG
1966 if (need >= SCANENV_MEMENV_SIZE) {\r
1967 if (env->mem_alloc <= need) {\r
1968 if (IS_NULL(env->mem_env_dynamic)) {\r
1969 alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;\r
1970 p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);\r
1971 CHECK_NULL_RETURN_MEMERR(p);\r
1972 xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));\r
1973 }\r
1974 else {\r
1975 alloc = env->mem_alloc * 2;\r
1976 p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);\r
1977 CHECK_NULL_RETURN_MEMERR(p);\r
1978 }\r
14b0e578 1979\r
b602265d
DG
1980 for (i = env->num_mem + 1; i < alloc; i++) {\r
1981 p[i].node = NULL_NODE;\r
1982#if 0\r
1983 p[i].in = 0;\r
1984 p[i].recursion = 0;\r
1985#endif\r
1986 }\r
1987\r
1988 env->mem_env_dynamic = p;\r
1989 env->mem_alloc = alloc;\r
14b0e578
CS
1990 }\r
1991 }\r
1992\r
b602265d
DG
1993 env->num_mem++;\r
1994 return env->num_mem;\r
14b0e578
CS
1995}\r
1996\r
1997static int\r
b602265d 1998scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r
14b0e578 1999{\r
b602265d
DG
2000 if (env->num_mem >= num)\r
2001 SCANENV_MEMENV(env)[num].node = node;\r
2002 else\r
2003 return ONIGERR_PARSER_BUG;\r
2004 return 0;\r
14b0e578
CS
2005}\r
2006\r
b602265d
DG
2007extern void\r
2008onig_node_free(Node* node)\r
14b0e578 2009{\r
b602265d
DG
2010 start:\r
2011 if (IS_NULL(node)) return ;\r
14b0e578 2012\r
b602265d
DG
2013#ifdef DEBUG_NODE_FREE\r
2014 fprintf(stderr, "onig_node_free: %p\n", node);\r
2015#endif\r
14b0e578 2016\r
b602265d
DG
2017 switch (NODE_TYPE(node)) {\r
2018 case NODE_STRING:\r
2019 if (STR_(node)->capa != 0 &&\r
2020 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
2021 xfree(STR_(node)->s);\r
2022 }\r
2023 break;\r
14b0e578 2024\r
b602265d
DG
2025 case NODE_LIST:\r
2026 case NODE_ALT:\r
2027 onig_node_free(NODE_CAR(node));\r
2028 {\r
2029 Node* next_node = NODE_CDR(node);\r
2030\r
2031 xfree(node);\r
2032 node = next_node;\r
2033 goto start;\r
14b0e578 2034 }\r
b602265d 2035 break;\r
14b0e578 2036\r
b602265d
DG
2037 case NODE_CCLASS:\r
2038 {\r
2039 CClassNode* cc = CCLASS_(node);\r
14b0e578 2040\r
b602265d
DG
2041 if (cc->mbuf)\r
2042 bbuf_free(cc->mbuf);\r
2043 }\r
2044 break;\r
14b0e578 2045\r
b602265d
DG
2046 case NODE_BACKREF:\r
2047 if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))\r
2048 xfree(BACKREF_(node)->back_dynamic);\r
2049 break;\r
14b0e578 2050\r
b602265d
DG
2051 case NODE_ENCLOSURE:\r
2052 if (NODE_BODY(node))\r
2053 onig_node_free(NODE_BODY(node));\r
14b0e578 2054\r
b602265d
DG
2055 {\r
2056 EnclosureNode* en = ENCLOSURE_(node);\r
2057 if (en->type == ENCLOSURE_IF_ELSE) {\r
2058 onig_node_free(en->te.Then);\r
2059 onig_node_free(en->te.Else);\r
14b0e578
CS
2060 }\r
2061 }\r
b602265d 2062 break;\r
14b0e578 2063\r
b602265d
DG
2064 case NODE_QUANT:\r
2065 case NODE_ANCHOR:\r
2066 if (NODE_BODY(node))\r
2067 onig_node_free(NODE_BODY(node));\r
2068 break;\r
14b0e578 2069\r
b602265d
DG
2070 case NODE_CTYPE:\r
2071 case NODE_CALL:\r
2072 case NODE_GIMMICK:\r
2073 break;\r
14b0e578 2074 }\r
14b0e578 2075\r
b602265d 2076 xfree(node);\r
14b0e578
CS
2077}\r
2078\r
b602265d
DG
2079static void\r
2080cons_node_free_alone(Node* node)\r
14b0e578 2081{\r
b602265d
DG
2082 NODE_CAR(node) = 0;\r
2083 NODE_CDR(node) = 0;\r
2084 onig_node_free(node);\r
14b0e578
CS
2085}\r
2086\r
b602265d
DG
2087static Node*\r
2088node_new(void)\r
14b0e578 2089{\r
b602265d 2090 Node* node;\r
14b0e578 2091\r
b602265d 2092 node = (Node* )xmalloc(sizeof(Node));\r
df8be9e5 2093 CHECK_NULL_RETURN(node);\r
b602265d 2094 xmemset(node, 0, sizeof(*node));\r
14b0e578 2095\r
b602265d
DG
2096#ifdef DEBUG_NODE_FREE\r
2097 fprintf(stderr, "node_new: %p\n", node);\r
2098#endif\r
2099 return node;\r
2100}\r
14b0e578 2101\r
14b0e578 2102\r
b602265d
DG
2103static void\r
2104initialize_cclass(CClassNode* cc)\r
2105{\r
2106 BITSET_CLEAR(cc->bs);\r
2107 cc->flags = 0;\r
2108 cc->mbuf = NULL;\r
2109}\r
2110\r
2111static Node*\r
2112node_new_cclass(void)\r
2113{\r
2114 Node* node = node_new();\r
2115 CHECK_NULL_RETURN(node);\r
2116\r
2117 NODE_SET_TYPE(node, NODE_CCLASS);\r
2118 initialize_cclass(CCLASS_(node));\r
2119 return node;\r
2120}\r
2121\r
2122static Node*\r
2123node_new_ctype(int type, int not, OnigOptionType options)\r
2124{\r
2125 Node* node = node_new();\r
2126 CHECK_NULL_RETURN(node);\r
2127\r
2128 NODE_SET_TYPE(node, NODE_CTYPE);\r
2129 CTYPE_(node)->ctype = type;\r
2130 CTYPE_(node)->not = not;\r
2131 CTYPE_(node)->options = options;\r
2132 CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);\r
2133 return node;\r
2134}\r
2135\r
2136static Node*\r
2137node_new_anychar(void)\r
2138{\r
2139 Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);\r
2140 return node;\r
2141}\r
2142\r
2143static Node*\r
2144node_new_anychar_with_fixed_option(OnigOptionType option)\r
2145{\r
2146 CtypeNode* ct;\r
2147 Node* node;\r
2148\r
2149 node = node_new_anychar();\r
a5def177
DG
2150 CHECK_NULL_RETURN(node);\r
2151\r
b602265d
DG
2152 ct = CTYPE_(node);\r
2153 ct->options = option;\r
2154 NODE_STATUS_ADD(node, FIXED_OPTION);\r
2155 return node;\r
2156}\r
2157\r
2158static int\r
2159node_new_no_newline(Node** node, ScanEnv* env)\r
2160{\r
2161 Node* n;\r
2162\r
2163 n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);\r
2164 CHECK_NULL_RETURN_MEMERR(n);\r
2165 *node = n;\r
2166 return 0;\r
2167}\r
2168\r
2169static int\r
2170node_new_true_anychar(Node** node, ScanEnv* env)\r
2171{\r
2172 Node* n;\r
2173\r
2174 n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);\r
2175 CHECK_NULL_RETURN_MEMERR(n);\r
2176 *node = n;\r
2177 return 0;\r
2178}\r
2179\r
2180static Node*\r
2181node_new_list(Node* left, Node* right)\r
2182{\r
2183 Node* node = node_new();\r
2184 CHECK_NULL_RETURN(node);\r
2185\r
2186 NODE_SET_TYPE(node, NODE_LIST);\r
2187 NODE_CAR(node) = left;\r
2188 NODE_CDR(node) = right;\r
2189 return node;\r
2190}\r
2191\r
2192extern Node*\r
2193onig_node_new_list(Node* left, Node* right)\r
2194{\r
2195 return node_new_list(left, right);\r
2196}\r
2197\r
2198extern Node*\r
2199onig_node_list_add(Node* list, Node* x)\r
2200{\r
2201 Node *n;\r
2202\r
2203 n = onig_node_new_list(x, NULL);\r
2204 if (IS_NULL(n)) return NULL_NODE;\r
2205\r
2206 if (IS_NOT_NULL(list)) {\r
2207 while (IS_NOT_NULL(NODE_CDR(list)))\r
2208 list = NODE_CDR(list);\r
2209\r
2210 NODE_CDR(list) = n;\r
2211 }\r
2212\r
2213 return n;\r
2214}\r
2215\r
2216extern Node*\r
2217onig_node_new_alt(Node* left, Node* right)\r
2218{\r
2219 Node* node = node_new();\r
2220 CHECK_NULL_RETURN(node);\r
2221\r
2222 NODE_SET_TYPE(node, NODE_ALT);\r
2223 NODE_CAR(node) = left;\r
2224 NODE_CDR(node) = right;\r
2225 return node;\r
2226}\r
2227\r
2228static Node*\r
2229make_list_or_alt(NodeType type, int n, Node* ns[])\r
2230{\r
2231 Node* r;\r
2232\r
2233 if (n <= 0) return NULL_NODE;\r
2234\r
2235 if (n == 1) {\r
2236 r = node_new();\r
2237 CHECK_NULL_RETURN(r);\r
2238 NODE_SET_TYPE(r, type);\r
2239 NODE_CAR(r) = ns[0];\r
2240 NODE_CDR(r) = NULL_NODE;\r
2241 }\r
2242 else {\r
2243 Node* right;\r
2244\r
2245 r = node_new();\r
2246 CHECK_NULL_RETURN(r);\r
2247\r
2248 right = make_list_or_alt(type, n - 1, ns + 1);\r
2249 if (IS_NULL(right)) {\r
2250 onig_node_free(r);\r
2251 return NULL_NODE;\r
2252 }\r
2253\r
2254 NODE_SET_TYPE(r, type);\r
2255 NODE_CAR(r) = ns[0];\r
2256 NODE_CDR(r) = right;\r
2257 }\r
2258\r
2259 return r;\r
2260}\r
2261\r
2262static Node*\r
2263make_list(int n, Node* ns[])\r
2264{\r
2265 return make_list_or_alt(NODE_LIST, n, ns);\r
2266}\r
2267\r
2268static Node*\r
2269make_alt(int n, Node* ns[])\r
2270{\r
2271 return make_list_or_alt(NODE_ALT, n, ns);\r
2272}\r
2273\r
2274extern Node*\r
2275onig_node_new_anchor(int type, int ascii_mode)\r
2276{\r
2277 Node* node = node_new();\r
2278 CHECK_NULL_RETURN(node);\r
2279\r
2280 NODE_SET_TYPE(node, NODE_ANCHOR);\r
2281 ANCHOR_(node)->type = type;\r
2282 ANCHOR_(node)->char_len = -1;\r
2283 ANCHOR_(node)->ascii_mode = ascii_mode;\r
2284 return node;\r
2285}\r
2286\r
2287static Node*\r
2288node_new_backref(int back_num, int* backrefs, int by_name,\r
2289#ifdef USE_BACKREF_WITH_LEVEL\r
2290 int exist_level, int nest_level,\r
2291#endif\r
2292 ScanEnv* env)\r
2293{\r
2294 int i;\r
2295 Node* node = node_new();\r
2296\r
2297 CHECK_NULL_RETURN(node);\r
2298\r
2299 NODE_SET_TYPE(node, NODE_BACKREF);\r
2300 BACKREF_(node)->back_num = back_num;\r
2301 BACKREF_(node)->back_dynamic = (int* )NULL;\r
2302 if (by_name != 0)\r
2303 NODE_STATUS_ADD(node, BY_NAME);\r
2304\r
2305#ifdef USE_BACKREF_WITH_LEVEL\r
2306 if (exist_level != 0) {\r
2307 NODE_STATUS_ADD(node, NEST_LEVEL);\r
2308 BACKREF_(node)->nest_level = nest_level;\r
2309 }\r
2310#endif\r
2311\r
2312 for (i = 0; i < back_num; i++) {\r
2313 if (backrefs[i] <= env->num_mem &&\r
2314 IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {\r
2315 NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */\r
2316 break;\r
2317 }\r
2318 }\r
2319\r
2320 if (back_num <= NODE_BACKREFS_SIZE) {\r
2321 for (i = 0; i < back_num; i++)\r
2322 BACKREF_(node)->back_static[i] = backrefs[i];\r
2323 }\r
2324 else {\r
2325 int* p = (int* )xmalloc(sizeof(int) * back_num);\r
2326 if (IS_NULL(p)) {\r
2327 onig_node_free(node);\r
2328 return NULL;\r
2329 }\r
2330 BACKREF_(node)->back_dynamic = p;\r
2331 for (i = 0; i < back_num; i++)\r
2332 p[i] = backrefs[i];\r
2333 }\r
2334 return node;\r
2335}\r
2336\r
2337static Node*\r
2338node_new_backref_checker(int back_num, int* backrefs, int by_name,\r
2339#ifdef USE_BACKREF_WITH_LEVEL\r
2340 int exist_level, int nest_level,\r
2341#endif\r
2342 ScanEnv* env)\r
2343{\r
2344 Node* node;\r
2345\r
2346 node = node_new_backref(back_num, backrefs, by_name,\r
2347#ifdef USE_BACKREF_WITH_LEVEL\r
2348 exist_level, nest_level,\r
2349#endif\r
2350 env);\r
2351 CHECK_NULL_RETURN(node);\r
2352\r
2353 NODE_STATUS_ADD(node, CHECKER);\r
2354 return node;\r
2355}\r
2356\r
2357#ifdef USE_CALL\r
2358static Node*\r
2359node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)\r
2360{\r
2361 Node* node = node_new();\r
2362 CHECK_NULL_RETURN(node);\r
2363\r
2364 NODE_SET_TYPE(node, NODE_CALL);\r
2365 CALL_(node)->by_number = by_number;\r
2366 CALL_(node)->name = name;\r
2367 CALL_(node)->name_end = name_end;\r
2368 CALL_(node)->group_num = gnum;\r
2369 CALL_(node)->entry_count = 1;\r
2370 return node;\r
2371}\r
2372#endif\r
2373\r
2374static Node*\r
2375node_new_quantifier(int lower, int upper, int by_number)\r
2376{\r
2377 Node* node = node_new();\r
2378 CHECK_NULL_RETURN(node);\r
2379\r
2380 NODE_SET_TYPE(node, NODE_QUANT);\r
2381 QUANT_(node)->lower = lower;\r
2382 QUANT_(node)->upper = upper;\r
2383 QUANT_(node)->greedy = 1;\r
2384 QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;\r
2385 QUANT_(node)->head_exact = NULL_NODE;\r
2386 QUANT_(node)->next_head_exact = NULL_NODE;\r
2387 QUANT_(node)->is_refered = 0;\r
2388 if (by_number != 0)\r
2389 NODE_STATUS_ADD(node, BY_NUMBER);\r
2390\r
2391 return node;\r
2392}\r
2393\r
2394static Node*\r
2395node_new_enclosure(enum EnclosureType type)\r
2396{\r
2397 Node* node = node_new();\r
2398 CHECK_NULL_RETURN(node);\r
2399\r
2400 NODE_SET_TYPE(node, NODE_ENCLOSURE);\r
2401 ENCLOSURE_(node)->type = type;\r
2402\r
2403 switch (type) {\r
2404 case ENCLOSURE_MEMORY:\r
2405 ENCLOSURE_(node)->m.regnum = 0;\r
2406 ENCLOSURE_(node)->m.called_addr = -1;\r
2407 ENCLOSURE_(node)->m.entry_count = 1;\r
2408 ENCLOSURE_(node)->m.called_state = 0;\r
2409 break;\r
2410\r
2411 case ENCLOSURE_OPTION:\r
2412 ENCLOSURE_(node)->o.options = 0;\r
2413 break;\r
2414\r
2415 case ENCLOSURE_STOP_BACKTRACK:\r
2416 break;\r
2417\r
2418 case ENCLOSURE_IF_ELSE:\r
2419 ENCLOSURE_(node)->te.Then = 0;\r
2420 ENCLOSURE_(node)->te.Else = 0;\r
2421 break;\r
2422 }\r
2423\r
2424 ENCLOSURE_(node)->opt_count = 0;\r
2425 return node;\r
2426}\r
2427\r
2428extern Node*\r
2429onig_node_new_enclosure(int type)\r
2430{\r
2431 return node_new_enclosure(type);\r
2432}\r
2433\r
2434static Node*\r
2435node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)\r
2436{\r
2437 Node* n;\r
2438 n = node_new_enclosure(ENCLOSURE_IF_ELSE);\r
2439 CHECK_NULL_RETURN(n);\r
2440\r
2441 NODE_BODY(n) = cond;\r
2442 ENCLOSURE_(n)->te.Then = Then;\r
2443 ENCLOSURE_(n)->te.Else = Else;\r
2444 return n;\r
2445}\r
2446\r
2447static Node*\r
2448node_new_memory(int is_named)\r
2449{\r
2450 Node* node = node_new_enclosure(ENCLOSURE_MEMORY);\r
2451 CHECK_NULL_RETURN(node);\r
2452 if (is_named != 0)\r
2453 NODE_STATUS_ADD(node, NAMED_GROUP);\r
2454\r
2455 return node;\r
2456}\r
2457\r
2458static Node*\r
2459node_new_option(OnigOptionType option)\r
2460{\r
2461 Node* node = node_new_enclosure(ENCLOSURE_OPTION);\r
2462 CHECK_NULL_RETURN(node);\r
2463 ENCLOSURE_(node)->o.options = option;\r
2464 return node;\r
2465}\r
2466\r
2467static int\r
2468node_new_fail(Node** node, ScanEnv* env)\r
2469{\r
2470 *node = node_new();\r
2471 CHECK_NULL_RETURN_MEMERR(*node);\r
2472\r
2473 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2474 GIMMICK_(*node)->type = GIMMICK_FAIL;\r
2475 return ONIG_NORMAL;\r
2476}\r
2477\r
2478static int\r
2479node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)\r
2480{\r
2481 int id;\r
2482 int r;\r
2483\r
2484 r = save_entry(env, save_type, &id);\r
2485 if (r != ONIG_NORMAL) return r;\r
2486\r
2487 *node = node_new();\r
2488 CHECK_NULL_RETURN_MEMERR(*node);\r
2489\r
2490 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2491 GIMMICK_(*node)->id = id;\r
2492 GIMMICK_(*node)->type = GIMMICK_SAVE;\r
2493 GIMMICK_(*node)->detail_type = (int )save_type;\r
2494\r
2495 return ONIG_NORMAL;\r
2496}\r
2497\r
2498static int\r
2499node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,\r
2500 int id, ScanEnv* env)\r
2501{\r
2502 *node = node_new();\r
2503 CHECK_NULL_RETURN_MEMERR(*node);\r
2504\r
2505 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2506 GIMMICK_(*node)->id = id;\r
2507 GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;\r
2508 GIMMICK_(*node)->detail_type = (int )update_var_type;\r
2509\r
2510 return ONIG_NORMAL;\r
2511}\r
2512\r
2513static int\r
2514node_new_keep(Node** node, ScanEnv* env)\r
2515{\r
2516 int r;\r
2517\r
2518 r = node_new_save_gimmick(node, SAVE_KEEP, env);\r
2519 if (r != 0) return r;\r
2520\r
2521 env->keep_num++;\r
2522 return ONIG_NORMAL;\r
2523}\r
2524\r
2525#ifdef USE_CALLOUT\r
2526\r
2527extern void\r
2528onig_free_reg_callout_list(int n, CalloutListEntry* list)\r
2529{\r
2530 int i;\r
2531 int j;\r
2532\r
2533 if (IS_NULL(list)) return ;\r
2534\r
2535 for (i = 0; i < n; i++) {\r
2536 if (list[i].of == ONIG_CALLOUT_OF_NAME) {\r
2537 for (j = 0; j < list[i].u.arg.passed_num; j++) {\r
2538 if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {\r
2539 if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))\r
2540 xfree(list[i].u.arg.vals[j].s.start);\r
2541 }\r
2542 }\r
2543 }\r
2544 else { /* ONIG_CALLOUT_OF_CONTENTS */\r
2545 if (IS_NOT_NULL(list[i].u.content.start)) {\r
2546 xfree((void* )list[i].u.content.start);\r
2547 }\r
2548 }\r
2549 }\r
2550\r
2551 xfree(list);\r
2552}\r
2553\r
2554extern CalloutListEntry*\r
2555onig_reg_callout_list_at(regex_t* reg, int num)\r
2556{\r
2557 RegexExt* ext = REG_EXTP(reg);\r
2558 CHECK_NULL_RETURN(ext);\r
2559\r
2560 if (num <= 0 || num > ext->callout_num)\r
2561 return 0;\r
2562\r
2563 num--;\r
2564 return ext->callout_list + num;\r
2565}\r
2566\r
2567static int\r
2568reg_callout_list_entry(ScanEnv* env, int* rnum)\r
2569{\r
2570#define INIT_CALLOUT_LIST_NUM 3\r
2571\r
2572 int num;\r
2573 CalloutListEntry* list;\r
2574 CalloutListEntry* e;\r
2575 RegexExt* ext;\r
2576\r
2577 ext = onig_get_regex_ext(env->reg);\r
2578 CHECK_NULL_RETURN_MEMERR(ext);\r
2579\r
2580 if (IS_NULL(ext->callout_list)) {\r
2581 list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);\r
2582 CHECK_NULL_RETURN_MEMERR(list);\r
2583\r
2584 ext->callout_list = list;\r
2585 ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;\r
2586 ext->callout_num = 0;\r
2587 }\r
2588\r
2589 num = ext->callout_num + 1;\r
2590 if (num > ext->callout_list_alloc) {\r
2591 int alloc = ext->callout_list_alloc * 2;\r
2592 list = (CalloutListEntry* )xrealloc(ext->callout_list,\r
2593 sizeof(CalloutListEntry) * alloc,\r
2594 sizeof(CalloutListEntry) * ext->callout_list_alloc);\r
2595 CHECK_NULL_RETURN_MEMERR(list);\r
2596\r
2597 ext->callout_list = list;\r
2598 ext->callout_list_alloc = alloc;\r
2599 }\r
2600\r
2601 e = ext->callout_list + (num - 1);\r
2602\r
2603 e->flag = 0;\r
2604 e->of = 0;\r
2605 e->in = ONIG_CALLOUT_OF_CONTENTS;\r
2606 e->type = 0;\r
2607 e->tag_start = 0;\r
2608 e->tag_end = 0;\r
2609 e->start_func = 0;\r
2610 e->end_func = 0;\r
2611 e->u.arg.num = 0;\r
2612 e->u.arg.passed_num = 0;\r
2613\r
2614 ext->callout_num = num;\r
2615 *rnum = num;\r
2616 return ONIG_NORMAL;\r
2617}\r
2618\r
2619static int\r
2620node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,\r
2621 ScanEnv* env)\r
2622{\r
2623 *node = node_new();\r
2624 CHECK_NULL_RETURN_MEMERR(*node);\r
2625\r
2626 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2627 GIMMICK_(*node)->id = id;\r
2628 GIMMICK_(*node)->num = num;\r
2629 GIMMICK_(*node)->type = GIMMICK_CALLOUT;\r
2630 GIMMICK_(*node)->detail_type = (int )callout_of;\r
2631\r
2632 return ONIG_NORMAL;\r
2633}\r
2634#endif\r
2635\r
2636static int\r
2637make_extended_grapheme_cluster(Node** node, ScanEnv* env)\r
2638{\r
2639 int r;\r
2640 int i;\r
2641 Node* x;\r
2642 Node* ns[2];\r
2643\r
2644 /* \X == (?>\O(?:\Y\O)*) */\r
2645\r
2646 ns[1] = NULL_NODE;\r
2647\r
2648 r = ONIGERR_MEMORY;\r
2649 ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);\r
2650 if (IS_NULL(ns[0])) goto err;\r
2651\r
2652 r = node_new_true_anychar(&ns[1], env);\r
2653 if (r != 0) goto err1;\r
2654\r
2655 x = make_list(2, ns);\r
2656 if (IS_NULL(x)) goto err;\r
2657 ns[0] = x;\r
2658 ns[1] = NULL_NODE;\r
2659\r
2660 x = node_new_quantifier(0, REPEAT_INFINITE, 1);\r
2661 if (IS_NULL(x)) goto err;\r
2662\r
2663 NODE_BODY(x) = ns[0];\r
2664 ns[0] = NULL_NODE;\r
2665 ns[1] = x;\r
2666\r
2667 r = node_new_true_anychar(&ns[0], env);\r
2668 if (r != 0) goto err1;\r
2669\r
2670 x = make_list(2, ns);\r
2671 if (IS_NULL(x)) goto err;\r
2672\r
2673 ns[0] = x;\r
2674 ns[1] = NULL_NODE;\r
2675\r
2676 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
2677 if (IS_NULL(x)) goto err;\r
2678\r
2679 NODE_BODY(x) = ns[0];\r
2680\r
2681 *node = x;\r
2682 return ONIG_NORMAL;\r
2683\r
2684 err:\r
2685 r = ONIGERR_MEMORY;\r
2686 err1:\r
2687 for (i = 0; i < 2; i++) onig_node_free(ns[i]);\r
2688 return r;\r
2689}\r
2690\r
2691static int\r
2692make_absent_engine(Node** node, int pre_save_right_id, Node* absent,\r
2693 Node* step_one, int lower, int upper, int possessive,\r
2694 int is_range_cutter, ScanEnv* env)\r
2695{\r
2696 int r;\r
2697 int i;\r
2698 int id;\r
2699 Node* x;\r
2700 Node* ns[4];\r
2701\r
2702 for (i = 0; i < 4; i++) ns[i] = NULL_NODE;\r
2703\r
2704 ns[1] = absent;\r
2705 ns[3] = step_one; /* for err */\r
2706 r = node_new_save_gimmick(&ns[0], SAVE_S, env);\r
2707 if (r != 0) goto err;\r
2708\r
2709 id = GIMMICK_(ns[0])->id;\r
2710 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,\r
2711 id, env);\r
2712 if (r != 0) goto err;\r
2713\r
2714 r = node_new_fail(&ns[3], env);\r
2715 if (r != 0) goto err;\r
2716\r
2717 x = make_list(4, ns);\r
2718 if (IS_NULL(x)) goto err0;\r
2719\r
2720 ns[0] = x;\r
2721 ns[1] = step_one;\r
2722 ns[2] = ns[3] = NULL_NODE;\r
2723\r
2724 x = make_alt(2, ns);\r
2725 if (IS_NULL(x)) goto err0;\r
2726\r
2727 ns[0] = x;\r
2728\r
2729 x = node_new_quantifier(lower, upper, 0);\r
2730 if (IS_NULL(x)) goto err0;\r
2731\r
2732 NODE_BODY(x) = ns[0];\r
2733 ns[0] = x;\r
2734\r
2735 if (possessive != 0) {\r
2736 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
2737 if (IS_NULL(x)) goto err0;\r
2738\r
2739 NODE_BODY(x) = ns[0];\r
2740 ns[0] = x;\r
2741 }\r
2742\r
2743 r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2744 pre_save_right_id, env);\r
2745 if (r != 0) goto err;\r
2746\r
2747 r = node_new_fail(&ns[2], env);\r
2748 if (r != 0) goto err;\r
2749\r
2750 x = make_list(2, ns + 1);\r
2751 if (IS_NULL(x)) goto err0;\r
2752\r
2753 ns[1] = x; ns[2] = NULL_NODE;\r
2754\r
2755 x = make_alt(2, ns);\r
2756 if (IS_NULL(x)) goto err0;\r
2757\r
2758 if (is_range_cutter != 0)\r
2759 NODE_STATUS_ADD(x, SUPER);\r
2760\r
2761 *node = x;\r
2762 return ONIG_NORMAL;\r
2763\r
2764 err0:\r
2765 r = ONIGERR_MEMORY;\r
2766 err:\r
2767 for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
2768 return r;\r
2769}\r
2770\r
2771static int\r
2772make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,\r
2773 ScanEnv* env)\r
2774{\r
2775 int r;\r
2776 int id;\r
2777 Node* save;\r
2778 Node* x;\r
2779 Node* ns[2];\r
2780\r
2781 *node1 = *node2 = NULL_NODE;\r
2782 save = ns[0] = ns[1] = NULL_NODE;\r
2783\r
2784 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
2785 if (r != 0) goto err;\r
2786\r
2787 id = GIMMICK_(save)->id;\r
2788 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2789 id, env);\r
2790 if (r != 0) goto err;\r
2791\r
2792 r = node_new_fail(&ns[1], env);\r
2793 if (r != 0) goto err;\r
2794\r
2795 x = make_list(2, ns);\r
2796 if (IS_NULL(x)) goto err0;\r
2797\r
2798 ns[0] = NULL_NODE; ns[1] = x;\r
2799\r
2800 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2801 pre_save_right_id, env);\r
2802 if (r != 0) goto err;\r
2803\r
2804 x = make_alt(2, ns);\r
2805 if (IS_NULL(x)) goto err0;\r
2806\r
2807 *node1 = save;\r
2808 *node2 = x;\r
2809 return ONIG_NORMAL;\r
2810\r
2811 err0:\r
2812 r = ONIGERR_MEMORY;\r
2813 err:\r
2814 onig_node_free(save);\r
2815 onig_node_free(ns[0]);\r
2816 onig_node_free(ns[1]);\r
2817 return r;\r
2818}\r
2819\r
2820static int\r
2821make_range_clear(Node** node, ScanEnv* env)\r
2822{\r
2823 int r;\r
2824 int id;\r
2825 Node* save;\r
2826 Node* x;\r
2827 Node* ns[2];\r
2828\r
2829 *node = NULL_NODE;\r
2830 save = ns[0] = ns[1] = NULL_NODE;\r
2831\r
2832 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
2833 if (r != 0) goto err;\r
2834\r
2835 id = GIMMICK_(save)->id;\r
2836 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2837 id, env);\r
2838 if (r != 0) goto err;\r
2839\r
2840 r = node_new_fail(&ns[1], env);\r
2841 if (r != 0) goto err;\r
2842\r
2843 x = make_list(2, ns);\r
2844 if (IS_NULL(x)) goto err0;\r
2845\r
2846 ns[0] = NULL_NODE; ns[1] = x;\r
2847\r
2848 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);\r
2849 if (r != 0) goto err;\r
2850\r
2851 x = make_alt(2, ns);\r
2852 if (IS_NULL(x)) goto err0;\r
2853\r
2854 NODE_STATUS_ADD(x, SUPER);\r
2855\r
2856 ns[0] = save;\r
2857 ns[1] = x;\r
2858 save = NULL_NODE;\r
2859 x = make_list(2, ns);\r
2860 if (IS_NULL(x)) goto err0;\r
2861\r
2862 *node = x;\r
2863 return ONIG_NORMAL;\r
2864\r
2865 err0:\r
2866 r = ONIGERR_MEMORY;\r
2867 err:\r
2868 onig_node_free(save);\r
2869 onig_node_free(ns[0]);\r
2870 onig_node_free(ns[1]);\r
2871 return r;\r
2872}\r
2873\r
2874static int\r
2875is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,\r
2876 int* is_possessive, ScanEnv* env)\r
2877{\r
2878 Node* quant;\r
2879 Node* body;\r
2880\r
2881 *rquant = *rbody = 0;\r
2882 *is_possessive = 0;\r
2883\r
2884 if (NODE_TYPE(node) == NODE_QUANT) {\r
2885 quant = node;\r
2886 }\r
2887 else {\r
2888 if (NODE_TYPE(node) == NODE_ENCLOSURE) {\r
2889 EnclosureNode* en = ENCLOSURE_(node);\r
2890 if (en->type == ENCLOSURE_STOP_BACKTRACK) {\r
2891 *is_possessive = 1;\r
2892 quant = NODE_ENCLOSURE_BODY(en);\r
2893 if (NODE_TYPE(quant) != NODE_QUANT)\r
2894 return 0;\r
2895 }\r
2896 else\r
2897 return 0;\r
2898 }\r
2899 else\r
2900 return 0;\r
2901 }\r
2902\r
2903 if (QUANT_(quant)->greedy == 0)\r
2904 return 0;\r
2905\r
2906 body = NODE_BODY(quant);\r
2907 switch (NODE_TYPE(body)) {\r
2908 case NODE_STRING:\r
2909 {\r
2910 int len;\r
2911 StrNode* sn = STR_(body);\r
2912 UChar *s = sn->s;\r
2913\r
2914 len = 0;\r
2915 while (s < sn->end) {\r
2916 s += enclen(env->enc, s);\r
2917 len++;\r
2918 }\r
2919 if (len != 1)\r
2920 return 0;\r
2921 }\r
2922\r
2923 case NODE_CCLASS:\r
2924 break;\r
2925\r
2926 default:\r
2927 return 0;\r
2928 break;\r
2929 }\r
2930\r
2931 if (node != quant) {\r
2932 NODE_BODY(node) = 0;\r
2933 onig_node_free(node);\r
2934 }\r
2935 NODE_BODY(quant) = NULL_NODE;\r
2936 *rquant = quant;\r
2937 *rbody = body;\r
2938 return 1;\r
2939}\r
2940\r
2941static int\r
2942make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,\r
2943 Node* body, int possessive, ScanEnv* env)\r
2944{\r
2945 int r;\r
2946 int i;\r
2947 int id1;\r
2948 int lower, upper;\r
2949 Node* x;\r
2950 Node* ns[4];\r
2951\r
2952 *node = NULL_NODE;\r
2953 r = ONIGERR_MEMORY;\r
2954 ns[0] = ns[1] = NULL_NODE;\r
2955 ns[2] = body, ns[3] = absent;\r
2956\r
2957 lower = QUANT_(quant)->lower;\r
2958 upper = QUANT_(quant)->upper;\r
2959 onig_node_free(quant);\r
2960\r
2961 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
2962 if (r != 0) goto err;\r
2963\r
2964 id1 = GIMMICK_(ns[0])->id;\r
2965\r
2966 r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,\r
2967 0, env);\r
2968 if (r != 0) goto err;\r
2969\r
2970 ns[2] = ns[3] = NULL_NODE;\r
2971\r
2972 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2973 id1, env);\r
2974 if (r != 0) goto err;\r
2975\r
2976 x = make_list(3, ns);\r
2977 if (IS_NULL(x)) goto err0;\r
2978\r
2979 *node = x;\r
2980 return ONIG_NORMAL;\r
2981\r
2982 err0:\r
2983 r = ONIGERR_MEMORY;\r
2984 err:\r
2985 for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
2986 return r;\r
2987}\r
2988\r
2989static int\r
2990make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,\r
2991 ScanEnv* env)\r
2992{\r
2993 int r;\r
2994 int i;\r
2995 int id1, id2;\r
2996 int possessive;\r
2997 Node* x;\r
2998 Node* ns[7];\r
2999\r
3000 r = ONIGERR_MEMORY;\r
3001 for (i = 0; i < 7; i++) ns[i] = NULL_NODE;\r
3002 ns[4] = expr; ns[5] = absent;\r
3003\r
3004 if (is_range_cutter == 0) {\r
3005 Node* quant;\r
3006 Node* body;\r
3007\r
3008 if (expr == NULL_NODE) {\r
3009 /* default expr \O* */\r
3010 quant = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
3011 if (IS_NULL(quant)) goto err0;\r
3012\r
3013 r = node_new_true_anychar(&body, env);\r
3014 if (r != 0) {\r
3015 onig_node_free(quant);\r
3016 goto err;\r
3017 }\r
3018 possessive = 0;\r
3019 goto simple;\r
3020 }\r
3021 else {\r
3022 if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {\r
3023 simple:\r
3024 r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,\r
3025 body, possessive, env);\r
3026 if (r != 0) {\r
3027 ns[4] = NULL_NODE;\r
3028 onig_node_free(quant);\r
3029 onig_node_free(body);\r
3030 goto err;\r
3031 }\r
3032\r
3033 return ONIG_NORMAL;\r
3034 }\r
3035 }\r
3036 }\r
3037\r
3038 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
3039 if (r != 0) goto err;\r
3040\r
3041 id1 = GIMMICK_(ns[0])->id;\r
3042\r
3043 r = node_new_save_gimmick(&ns[1], SAVE_S, env);\r
3044 if (r != 0) goto err;\r
3045\r
3046 id2 = GIMMICK_(ns[1])->id;\r
3047\r
3048 r = node_new_true_anychar(&ns[3], env);\r
3049 if (r != 0) goto err;\r
3050\r
3051 possessive = 1;\r
3052 r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,\r
3053 possessive, is_range_cutter, env);\r
3054 if (r != 0) goto err;\r
3055\r
3056 ns[3] = NULL_NODE;\r
3057 ns[5] = NULL_NODE;\r
3058\r
3059 r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);\r
3060 if (r != 0) goto err;\r
3061\r
3062 if (is_range_cutter != 0) {\r
3063 x = make_list(4, ns);\r
3064 if (IS_NULL(x)) goto err0;\r
3065 }\r
3066 else {\r
3067 r = make_absent_tail(&ns[5], &ns[6], id1, env);\r
3068 if (r != 0) goto err;\r
3069 \r
3070 x = make_list(7, ns);\r
3071 if (IS_NULL(x)) goto err0;\r
3072 }\r
3073\r
3074 *node = x;\r
3075 return ONIG_NORMAL;\r
3076\r
3077 err0:\r
3078 r = ONIGERR_MEMORY;\r
3079 err:\r
3080 for (i = 0; i < 7; i++) onig_node_free(ns[i]);\r
3081 return r; \r
3082}\r
3083\r
3084extern int\r
3085onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r
3086{\r
3087 int addlen = (int )(end - s);\r
3088\r
3089 if (addlen > 0) {\r
3090 int len = (int )(STR_(node)->end - STR_(node)->s);\r
3091\r
3092 if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {\r
3093 UChar* p;\r
3094 int capa = len + addlen + NODE_STRING_MARGIN;\r
3095\r
3096 if (capa <= STR_(node)->capa) {\r
3097 onig_strcpy(STR_(node)->s + len, s, end);\r
3098 }\r
3099 else {\r
3100 if (STR_(node)->s == STR_(node)->buf)\r
3101 p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,\r
3102 s, end, capa);\r
3103 else\r
3104 p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);\r
3105\r
3106 CHECK_NULL_RETURN_MEMERR(p);\r
3107 STR_(node)->s = p;\r
3108 STR_(node)->capa = capa;\r
3109 }\r
3110 }\r
3111 else {\r
3112 onig_strcpy(STR_(node)->s + len, s, end);\r
3113 }\r
3114 STR_(node)->end = STR_(node)->s + len + addlen;\r
3115 }\r
3116\r
3117 return 0;\r
3118}\r
3119\r
3120extern int\r
3121onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r
3122{\r
3123 onig_node_str_clear(node);\r
3124 return onig_node_str_cat(node, s, end);\r
3125}\r
3126\r
3127static int\r
3128node_str_cat_char(Node* node, UChar c)\r
3129{\r
3130 UChar s[1];\r
3131\r
3132 s[0] = c;\r
3133 return onig_node_str_cat(node, s, s + 1);\r
3134}\r
3135\r
3136extern void\r
3137onig_node_conv_to_str_node(Node* node, int flag)\r
3138{\r
3139 NODE_SET_TYPE(node, NODE_STRING);\r
3140 STR_(node)->flag = flag;\r
3141 STR_(node)->capa = 0;\r
3142 STR_(node)->s = STR_(node)->buf;\r
3143 STR_(node)->end = STR_(node)->buf;\r
3144}\r
3145\r
3146extern void\r
3147onig_node_str_clear(Node* node)\r
3148{\r
3149 if (STR_(node)->capa != 0 &&\r
3150 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
3151 xfree(STR_(node)->s);\r
3152 }\r
3153\r
3154 STR_(node)->capa = 0;\r
3155 STR_(node)->flag = 0;\r
3156 STR_(node)->s = STR_(node)->buf;\r
3157 STR_(node)->end = STR_(node)->buf;\r
3158}\r
3159\r
3160static Node*\r
3161node_new_str(const UChar* s, const UChar* end)\r
3162{\r
3163 Node* node = node_new();\r
3164 CHECK_NULL_RETURN(node);\r
3165\r
3166 NODE_SET_TYPE(node, NODE_STRING);\r
3167 STR_(node)->capa = 0;\r
3168 STR_(node)->flag = 0;\r
3169 STR_(node)->s = STR_(node)->buf;\r
3170 STR_(node)->end = STR_(node)->buf;\r
3171 if (onig_node_str_cat(node, s, end)) {\r
3172 onig_node_free(node);\r
3173 return NULL;\r
3174 }\r
3175 return node;\r
3176}\r
3177\r
3178extern Node*\r
3179onig_node_new_str(const UChar* s, const UChar* end)\r
3180{\r
3181 return node_new_str(s, end);\r
3182}\r
3183\r
3184static Node*\r
3185node_new_str_raw(UChar* s, UChar* end)\r
3186{\r
3187 Node* node = node_new_str(s, end);\r
a5def177 3188 CHECK_NULL_RETURN(node);\r
b602265d
DG
3189 NODE_STRING_SET_RAW(node);\r
3190 return node;\r
3191}\r
3192\r
3193static Node*\r
3194node_new_empty(void)\r
3195{\r
3196 return node_new_str(NULL, NULL);\r
3197}\r
3198\r
3199static Node*\r
3200node_new_str_raw_char(UChar c)\r
3201{\r
3202 UChar p[1];\r
3203\r
3204 p[0] = c;\r
3205 return node_new_str_raw(p, p + 1);\r
3206}\r
3207\r
3208static Node*\r
3209str_node_split_last_char(Node* node, OnigEncoding enc)\r
3210{\r
3211 const UChar *p;\r
3212 Node* rn;\r
3213 StrNode* sn;\r
3214\r
3215 sn = STR_(node);\r
3216 rn = NULL_NODE;\r
3217 if (sn->end > sn->s) {\r
3218 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r
3219 if (p && p > sn->s) { /* can be split. */\r
3220 rn = node_new_str(p, sn->end);\r
a5def177 3221 CHECK_NULL_RETURN(rn);\r
b602265d
DG
3222 if (NODE_STRING_IS_RAW(node))\r
3223 NODE_STRING_SET_RAW(rn);\r
3224\r
3225 sn->end = (UChar* )p;\r
3226 }\r
3227 }\r
3228 return rn;\r
3229}\r
3230\r
3231static int\r
3232str_node_can_be_split(Node* node, OnigEncoding enc)\r
3233{\r
3234 StrNode* sn = STR_(node);\r
3235 if (sn->end > sn->s) {\r
3236 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r
3237 }\r
3238 return 0;\r
3239}\r
3240\r
3241#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
3242static int\r
3243node_str_head_pad(StrNode* sn, int num, UChar val)\r
3244{\r
3245 UChar buf[NODE_STRING_BUF_SIZE];\r
3246 int i, len;\r
3247\r
3248 len = sn->end - sn->s;\r
3249 onig_strcpy(buf, sn->s, sn->end);\r
3250 onig_strcpy(&(sn->s[num]), buf, buf + len);\r
3251 sn->end += num;\r
3252\r
3253 for (i = 0; i < num; i++) {\r
3254 sn->s[i] = val;\r
3255 }\r
3256}\r
3257#endif\r
3258\r
3259extern int\r
3260onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r
3261{\r
3262 unsigned int num, val;\r
3263 OnigCodePoint c;\r
3264 UChar* p = *src;\r
3265 PFETCH_READY;\r
3266\r
3267 num = 0;\r
3268 while (! PEND) {\r
3269 PFETCH(c);\r
3270 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
3271 val = (unsigned int )DIGITVAL(c);\r
3272 if ((INT_MAX_LIMIT - val) / 10UL < num)\r
3273 return -1; /* overflow */\r
3274\r
3275 num = num * 10 + val;\r
3276 }\r
3277 else {\r
3278 PUNFETCH;\r
3279 break;\r
3280 }\r
3281 }\r
3282 *src = p;\r
3283 return num;\r
3284}\r
3285\r
3286static int\r
3287scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,\r
3288 int maxlen, OnigEncoding enc)\r
3289{\r
3290 OnigCodePoint c;\r
3291 unsigned int num, val;\r
3292 int n;\r
3293 UChar* p = *src;\r
3294 PFETCH_READY;\r
3295\r
3296 num = 0;\r
3297 n = 0;\r
3298 while (! PEND && n < maxlen) {\r
3299 PFETCH(c);\r
3300 if (IS_CODE_XDIGIT_ASCII(enc, c)) {\r
3301 n++;\r
3302 val = (unsigned int )XDIGITVAL(enc,c);\r
3303 if ((INT_MAX_LIMIT - val) / 16UL < num)\r
3304 return ONIGERR_TOO_BIG_NUMBER; /* overflow */\r
3305\r
3306 num = (num << 4) + XDIGITVAL(enc,c);\r
3307 }\r
3308 else {\r
3309 PUNFETCH;\r
3310 break;\r
3311 }\r
3312 }\r
3313\r
3314 if (n < minlen)\r
3315 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
3316\r
3317 *src = p;\r
3318 return num;\r
3319}\r
3320\r
3321static int\r
3322scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r
3323 OnigEncoding enc)\r
3324{\r
3325 OnigCodePoint c;\r
3326 unsigned int num, val;\r
3327 UChar* p = *src;\r
3328 PFETCH_READY;\r
3329\r
3330 num = 0;\r
3331 while (! PEND && maxlen-- != 0) {\r
3332 PFETCH(c);\r
3333 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {\r
3334 val = ODIGITVAL(c);\r
3335 if ((INT_MAX_LIMIT - val) / 8UL < num)\r
3336 return -1; /* overflow */\r
3337\r
3338 num = (num << 3) + val;\r
3339 }\r
3340 else {\r
3341 PUNFETCH;\r
3342 break;\r
3343 }\r
3344 }\r
3345 *src = p;\r
3346 return num;\r
3347}\r
3348\r
3349\r
3350#define BB_WRITE_CODE_POINT(bbuf,pos,code) \\r
3351 BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r
3352\r
3353/* data format:\r
3354 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r
3355 (all data size is OnigCodePoint)\r
3356 */\r
3357static int\r
3358new_code_range(BBuf** pbuf)\r
3359{\r
3360#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r
3361 int r;\r
3362 OnigCodePoint n;\r
3363 BBuf* bbuf;\r
3364\r
3365 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
3366 CHECK_NULL_RETURN_MEMERR(bbuf);\r
3367 r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r
3368 if (r != 0) {\r
3369 xfree(bbuf);\r
3370 *pbuf = 0;\r
3371 return r;\r
3372 }\r
3373\r
3374 n = 0;\r
3375 BB_WRITE_CODE_POINT(bbuf, 0, n);\r
3376 return 0;\r
3377}\r
3378\r
3379static int\r
3380add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r
3381{\r
3382 int r, inc_n, pos;\r
3383 int low, high, bound, x;\r
3384 OnigCodePoint n, *data;\r
3385 BBuf* bbuf;\r
3386\r
3387 if (from > to) {\r
3388 n = from; from = to; to = n;\r
3389 }\r
3390\r
3391 if (IS_NULL(*pbuf)) {\r
3392 r = new_code_range(pbuf);\r
3393 if (r != 0) return r;\r
3394 bbuf = *pbuf;\r
3395 n = 0;\r
3396 }\r
3397 else {\r
3398 bbuf = *pbuf;\r
3399 GET_CODE_POINT(n, bbuf->p);\r
3400 }\r
3401 data = (OnigCodePoint* )(bbuf->p);\r
3402 data++;\r
3403\r
3404 for (low = 0, bound = n; low < bound; ) {\r
3405 x = (low + bound) >> 1;\r
3406 if (from > data[x*2 + 1])\r
3407 low = x + 1;\r
3408 else\r
3409 bound = x;\r
3410 }\r
3411\r
3412 high = (to == ~((OnigCodePoint )0)) ? n : low;\r
3413 for (bound = n; high < bound; ) {\r
3414 x = (high + bound) >> 1;\r
3415 if (to + 1 >= data[x*2])\r
3416 high = x + 1;\r
3417 else\r
3418 bound = x;\r
3419 }\r
3420\r
3421 inc_n = low + 1 - high;\r
3422 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r
3423 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r
3424\r
3425 if (inc_n != 1) {\r
3426 if (from > data[low*2])\r
3427 from = data[low*2];\r
3428 if (to < data[(high - 1)*2 + 1])\r
3429 to = data[(high - 1)*2 + 1];\r
3430 }\r
3431\r
3432 if (inc_n != 0 && (OnigCodePoint )high < n) {\r
3433 int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r
3434 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r
3435 int size = (n - high) * 2 * SIZE_CODE_POINT;\r
3436\r
3437 if (inc_n > 0) {\r
3438 BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r
3439 }\r
3440 else {\r
3441 BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r
3442 }\r
3443 }\r
3444\r
3445 pos = SIZE_CODE_POINT * (1 + low * 2);\r
3446 BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r
3447 BB_WRITE_CODE_POINT(bbuf, pos, from);\r
3448 BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r
3449 n += inc_n;\r
3450 BB_WRITE_CODE_POINT(bbuf, 0, n);\r
3451\r
3452 return 0;\r
3453}\r
3454\r
3455static int\r
3456add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r
3457{\r
3458 if (from > to) {\r
3459 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
3460 return 0;\r
3461 else\r
3462 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
3463 }\r
3464\r
3465 return add_code_range_to_buf(pbuf, from, to);\r
3466}\r
3467\r
3468static int\r
3469not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r
3470{\r
3471 int r, i, n;\r
3472 OnigCodePoint pre, from, *data, to = 0;\r
3473\r
3474 *pbuf = (BBuf* )NULL;\r
3475 if (IS_NULL(bbuf)) {\r
3476 set_all:\r
3477 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3478 }\r
3479\r
3480 data = (OnigCodePoint* )(bbuf->p);\r
3481 GET_CODE_POINT(n, data);\r
3482 data++;\r
3483 if (n <= 0) goto set_all;\r
3484\r
3485 r = 0;\r
3486 pre = MBCODE_START_POS(enc);\r
3487 for (i = 0; i < n; i++) {\r
3488 from = data[i*2];\r
3489 to = data[i*2+1];\r
3490 if (pre <= from - 1) {\r
3491 r = add_code_range_to_buf(pbuf, pre, from - 1);\r
3492 if (r != 0) return r;\r
3493 }\r
3494 if (to == ~((OnigCodePoint )0)) break;\r
3495 pre = to + 1;\r
3496 }\r
3497 if (to < ~((OnigCodePoint )0)) {\r
3498 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r
3499 }\r
3500 return r;\r
3501}\r
3502\r
3503#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\\r
3504 BBuf *tbuf; \\r
3505 int tnot; \\r
3506 tnot = not1; not1 = not2; not2 = tnot; \\r
3507 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r
3508} while (0)\r
3509\r
3510static int\r
3511or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r
3512 BBuf* bbuf2, int not2, BBuf** pbuf)\r
3513{\r
3514 int r;\r
3515 OnigCodePoint i, n1, *data1;\r
3516 OnigCodePoint from, to;\r
3517\r
3518 *pbuf = (BBuf* )NULL;\r
3519 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r
3520 if (not1 != 0 || not2 != 0)\r
3521 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3522 return 0;\r
3523 }\r
3524\r
3525 r = 0;\r
3526 if (IS_NULL(bbuf2))\r
3527 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3528\r
3529 if (IS_NULL(bbuf1)) {\r
3530 if (not1 != 0) {\r
3531 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3532 }\r
3533 else {\r
3534 if (not2 == 0) {\r
3535 return bbuf_clone(pbuf, bbuf2);\r
3536 }\r
3537 else {\r
3538 return not_code_range_buf(enc, bbuf2, pbuf);\r
3539 }\r
3540 }\r
3541 }\r
3542\r
3543 if (not1 != 0)\r
3544 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3545\r
3546 data1 = (OnigCodePoint* )(bbuf1->p);\r
3547 GET_CODE_POINT(n1, data1);\r
3548 data1++;\r
3549\r
3550 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r
3551 r = bbuf_clone(pbuf, bbuf2);\r
3552 }\r
3553 else if (not1 == 0) { /* 1 OR (not 2) */\r
3554 r = not_code_range_buf(enc, bbuf2, pbuf);\r
3555 }\r
3556 if (r != 0) return r;\r
3557\r
3558 for (i = 0; i < n1; i++) {\r
3559 from = data1[i*2];\r
3560 to = data1[i*2+1];\r
3561 r = add_code_range_to_buf(pbuf, from, to);\r
3562 if (r != 0) return r;\r
3563 }\r
3564 return 0;\r
3565}\r
3566\r
3567static int\r
3568and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r
3569 OnigCodePoint* data, int n)\r
3570{\r
3571 int i, r;\r
3572 OnigCodePoint from2, to2;\r
3573\r
3574 for (i = 0; i < n; i++) {\r
3575 from2 = data[i*2];\r
3576 to2 = data[i*2+1];\r
3577 if (from2 < from1) {\r
3578 if (to2 < from1) continue;\r
3579 else {\r
3580 from1 = to2 + 1;\r
3581 }\r
3582 }\r
3583 else if (from2 <= to1) {\r
3584 if (to2 < to1) {\r
3585 if (from1 <= from2 - 1) {\r
3586 r = add_code_range_to_buf(pbuf, from1, from2-1);\r
3587 if (r != 0) return r;\r
3588 }\r
3589 from1 = to2 + 1;\r
3590 }\r
3591 else {\r
3592 to1 = from2 - 1;\r
3593 }\r
3594 }\r
3595 else {\r
3596 from1 = from2;\r
3597 }\r
3598 if (from1 > to1) break;\r
3599 }\r
3600 if (from1 <= to1) {\r
3601 r = add_code_range_to_buf(pbuf, from1, to1);\r
3602 if (r != 0) return r;\r
3603 }\r
3604 return 0;\r
3605}\r
3606\r
3607static int\r
3608and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r
3609{\r
3610 int r;\r
3611 OnigCodePoint i, j, n1, n2, *data1, *data2;\r
3612 OnigCodePoint from, to, from1, to1, from2, to2;\r
3613\r
3614 *pbuf = (BBuf* )NULL;\r
3615 if (IS_NULL(bbuf1)) {\r
3616 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r
3617 return bbuf_clone(pbuf, bbuf2);\r
3618 return 0;\r
3619 }\r
3620 else if (IS_NULL(bbuf2)) {\r
3621 if (not2 != 0)\r
3622 return bbuf_clone(pbuf, bbuf1);\r
3623 return 0;\r
3624 }\r
3625\r
3626 if (not1 != 0)\r
3627 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3628\r
3629 data1 = (OnigCodePoint* )(bbuf1->p);\r
3630 data2 = (OnigCodePoint* )(bbuf2->p);\r
3631 GET_CODE_POINT(n1, data1);\r
3632 GET_CODE_POINT(n2, data2);\r
3633 data1++;\r
3634 data2++;\r
3635\r
3636 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r
3637 for (i = 0; i < n1; i++) {\r
3638 from1 = data1[i*2];\r
3639 to1 = data1[i*2+1];\r
3640 for (j = 0; j < n2; j++) {\r
3641 from2 = data2[j*2];\r
3642 to2 = data2[j*2+1];\r
3643 if (from2 > to1) break;\r
3644 if (to2 < from1) continue;\r
3645 from = MAX(from1, from2);\r
3646 to = MIN(to1, to2);\r
3647 r = add_code_range_to_buf(pbuf, from, to);\r
3648 if (r != 0) return r;\r
3649 }\r
3650 }\r
3651 }\r
3652 else if (not1 == 0) { /* 1 AND (not 2) */\r
3653 for (i = 0; i < n1; i++) {\r
3654 from1 = data1[i*2];\r
3655 to1 = data1[i*2+1];\r
14b0e578
CS
3656 r = and_code_range1(pbuf, from1, to1, data2, n2);\r
3657 if (r != 0) return r;\r
3658 }\r
3659 }\r
3660\r
3661 return 0;\r
3662}\r
3663\r
3664static int\r
3665and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
3666{\r
3667 int r, not1, not2;\r
3668 BBuf *buf1, *buf2, *pbuf;\r
3669 BitSetRef bsr1, bsr2;\r
3670 BitSet bs1, bs2;\r
3671\r
3672 not1 = IS_NCCLASS_NOT(dest);\r
3673 bsr1 = dest->bs;\r
3674 buf1 = dest->mbuf;\r
3675 not2 = IS_NCCLASS_NOT(cc);\r
3676 bsr2 = cc->bs;\r
3677 buf2 = cc->mbuf;\r
3678\r
3679 if (not1 != 0) {\r
3680 bitset_invert_to(bsr1, bs1);\r
3681 bsr1 = bs1;\r
3682 }\r
3683 if (not2 != 0) {\r
3684 bitset_invert_to(bsr2, bs2);\r
3685 bsr2 = bs2;\r
3686 }\r
3687 bitset_and(bsr1, bsr2);\r
3688 if (bsr1 != dest->bs) {\r
3689 bitset_copy(dest->bs, bsr1);\r
14b0e578
CS
3690 }\r
3691 if (not1 != 0) {\r
3692 bitset_invert(dest->bs);\r
3693 }\r
3694\r
3695 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
3696 if (not1 != 0 && not2 != 0) {\r
3697 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r
3698 }\r
3699 else {\r
3700 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r
3701 if (r == 0 && not1 != 0) {\r
b602265d
DG
3702 BBuf *tbuf;\r
3703 r = not_code_range_buf(enc, pbuf, &tbuf);\r
3704 if (r != 0) {\r
3705 bbuf_free(pbuf);\r
3706 return r;\r
3707 }\r
3708 bbuf_free(pbuf);\r
3709 pbuf = tbuf;\r
14b0e578
CS
3710 }\r
3711 }\r
3712 if (r != 0) return r;\r
3713\r
3714 dest->mbuf = pbuf;\r
3715 bbuf_free(buf1);\r
3716 return r;\r
3717 }\r
3718 return 0;\r
3719}\r
3720\r
3721static int\r
3722or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
3723{\r
3724 int r, not1, not2;\r
3725 BBuf *buf1, *buf2, *pbuf;\r
3726 BitSetRef bsr1, bsr2;\r
3727 BitSet bs1, bs2;\r
3728\r
3729 not1 = IS_NCCLASS_NOT(dest);\r
3730 bsr1 = dest->bs;\r
3731 buf1 = dest->mbuf;\r
3732 not2 = IS_NCCLASS_NOT(cc);\r
3733 bsr2 = cc->bs;\r
3734 buf2 = cc->mbuf;\r
3735\r
3736 if (not1 != 0) {\r
3737 bitset_invert_to(bsr1, bs1);\r
3738 bsr1 = bs1;\r
3739 }\r
3740 if (not2 != 0) {\r
3741 bitset_invert_to(bsr2, bs2);\r
3742 bsr2 = bs2;\r
3743 }\r
3744 bitset_or(bsr1, bsr2);\r
3745 if (bsr1 != dest->bs) {\r
3746 bitset_copy(dest->bs, bsr1);\r
14b0e578
CS
3747 }\r
3748 if (not1 != 0) {\r
3749 bitset_invert(dest->bs);\r
3750 }\r
3751\r
3752 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
3753 if (not1 != 0 && not2 != 0) {\r
3754 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r
3755 }\r
3756 else {\r
3757 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r
3758 if (r == 0 && not1 != 0) {\r
b602265d
DG
3759 BBuf *tbuf;\r
3760 r = not_code_range_buf(enc, pbuf, &tbuf);\r
3761 if (r != 0) {\r
3762 bbuf_free(pbuf);\r
3763 return r;\r
3764 }\r
3765 bbuf_free(pbuf);\r
3766 pbuf = tbuf;\r
14b0e578
CS
3767 }\r
3768 }\r
3769 if (r != 0) return r;\r
3770\r
3771 dest->mbuf = pbuf;\r
3772 bbuf_free(buf1);\r
3773 return r;\r
3774 }\r
3775 else\r
3776 return 0;\r
3777}\r
3778\r
b602265d
DG
3779static OnigCodePoint\r
3780conv_backslash_value(OnigCodePoint c, ScanEnv* env)\r
14b0e578
CS
3781{\r
3782 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r
3783 switch (c) {\r
3784 case 'n': return '\n';\r
3785 case 't': return '\t';\r
3786 case 'r': return '\r';\r
3787 case 'f': return '\f';\r
3788 case 'a': return '\007';\r
3789 case 'b': return '\010';\r
3790 case 'e': return '\033';\r
3791 case 'v':\r
3792 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r
b602265d 3793 return '\v';\r
14b0e578
CS
3794 break;\r
3795\r
3796 default:\r
3797 break;\r
3798 }\r
3799 }\r
3800 return c;\r
3801}\r
3802\r
3803static int\r
3804is_invalid_quantifier_target(Node* node)\r
3805{\r
b602265d
DG
3806 switch (NODE_TYPE(node)) {\r
3807 case NODE_ANCHOR:\r
3808 case NODE_GIMMICK:\r
14b0e578
CS
3809 return 1;\r
3810 break;\r
3811\r
b602265d 3812 case NODE_ENCLOSURE:\r
14b0e578 3813 /* allow enclosed elements */\r
b602265d 3814 /* return is_invalid_quantifier_target(NODE_BODY(node)); */\r
14b0e578
CS
3815 break;\r
3816\r
b602265d 3817 case NODE_LIST:\r
14b0e578 3818 do {\r
b602265d
DG
3819 if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;\r
3820 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3821 return 0;\r
3822 break;\r
3823\r
b602265d 3824 case NODE_ALT:\r
14b0e578 3825 do {\r
b602265d
DG
3826 if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;\r
3827 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3828 break;\r
3829\r
3830 default:\r
3831 break;\r
3832 }\r
3833 return 0;\r
3834}\r
3835\r
3836/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r
3837static int\r
b602265d 3838quantifier_type_num(QuantNode* q)\r
14b0e578
CS
3839{\r
3840 if (q->greedy) {\r
3841 if (q->lower == 0) {\r
3842 if (q->upper == 1) return 0;\r
3843 else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r
3844 }\r
3845 else if (q->lower == 1) {\r
3846 if (IS_REPEAT_INFINITE(q->upper)) return 2;\r
3847 }\r
3848 }\r
3849 else {\r
3850 if (q->lower == 0) {\r
3851 if (q->upper == 1) return 3;\r
3852 else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r
3853 }\r
3854 else if (q->lower == 1) {\r
3855 if (IS_REPEAT_INFINITE(q->upper)) return 5;\r
3856 }\r
3857 }\r
3858 return -1;\r
3859}\r
3860\r
3861\r
3862enum ReduceType {\r
3863 RQ_ASIS = 0, /* as is */\r
3864 RQ_DEL = 1, /* delete parent */\r
3865 RQ_A, /* to '*' */\r
3866 RQ_AQ, /* to '*?' */\r
3867 RQ_QQ, /* to '??' */\r
3868 RQ_P_QQ, /* to '+)??' */\r
3869 RQ_PQ_Q /* to '+?)?' */\r
3870};\r
3871\r
3872static enum ReduceType ReduceTypeTable[6][6] = {\r
3873 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r
3874 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r
3875 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r
3876 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r
3877 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r
3878 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r
3879};\r
3880\r
3881extern void\r
3882onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r
3883{\r
3884 int pnum, cnum;\r
b602265d
DG
3885 QuantNode *p, *c;\r
3886\r
3887 p = QUANT_(pnode);\r
3888 c = QUANT_(cnode);\r
3889 pnum = quantifier_type_num(p);\r
3890 cnum = quantifier_type_num(c);\r
3891 if (pnum < 0 || cnum < 0) {\r
3892 if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {\r
3893 if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {\r
3894 int n = positive_int_multiply(p->lower, c->lower);\r
3895 if (n >= 0) {\r
3896 p->lower = p->upper = n;\r
3897 NODE_BODY(pnode) = NODE_BODY(cnode);\r
3898 goto remove_cnode;\r
3899 }\r
3900 }\r
3901 }\r
14b0e578 3902\r
b602265d
DG
3903 return ;\r
3904 }\r
14b0e578
CS
3905\r
3906 switch(ReduceTypeTable[cnum][pnum]) {\r
3907 case RQ_DEL:\r
b602265d 3908 *pnode = *cnode;\r
14b0e578
CS
3909 break;\r
3910 case RQ_A:\r
b602265d 3911 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3912 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r
3913 break;\r
3914 case RQ_AQ:\r
b602265d 3915 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3916 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r
3917 break;\r
3918 case RQ_QQ:\r
b602265d 3919 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3920 p->lower = 0; p->upper = 1; p->greedy = 0;\r
3921 break;\r
3922 case RQ_P_QQ:\r
b602265d 3923 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3924 p->lower = 0; p->upper = 1; p->greedy = 0;\r
3925 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r
3926 return ;\r
3927 break;\r
3928 case RQ_PQ_Q:\r
b602265d 3929 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3930 p->lower = 0; p->upper = 1; p->greedy = 1;\r
3931 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r
3932 return ;\r
3933 break;\r
3934 case RQ_ASIS:\r
b602265d 3935 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3936 return ;\r
3937 break;\r
3938 }\r
3939\r
b602265d
DG
3940 remove_cnode:\r
3941 NODE_BODY(cnode) = NULL_NODE;\r
14b0e578
CS
3942 onig_node_free(cnode);\r
3943}\r
3944\r
b602265d
DG
3945static int\r
3946node_new_general_newline(Node** node, ScanEnv* env)\r
3947{\r
3948 int r;\r
3949 int dlen, alen;\r
3950 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];\r
3951 Node* crnl;\r
3952 Node* ncc;\r
3953 Node* x;\r
3954 CClassNode* cc;\r
3955\r
3956 dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);\r
3957 if (dlen < 0) return dlen;\r
3958 alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);\r
3959 if (alen < 0) return alen;\r
3960\r
3961 crnl = node_new_str_raw(buf, buf + dlen + alen);\r
3962 CHECK_NULL_RETURN_MEMERR(crnl);\r
3963\r
3964 ncc = node_new_cclass();\r
3965 if (IS_NULL(ncc)) goto err2;\r
3966\r
3967 cc = CCLASS_(ncc);\r
3968 if (dlen == 1) {\r
3969 bitset_set_range(cc->bs, 0x0a, 0x0d);\r
3970 }\r
3971 else {\r
3972 r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);\r
3973 if (r != 0) {\r
3974 err1:\r
3975 onig_node_free(ncc);\r
3976 err2:\r
3977 onig_node_free(crnl);\r
3978 return ONIGERR_MEMORY;\r
3979 }\r
3980 }\r
3981\r
3982 if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {\r
3983 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);\r
3984 if (r != 0) goto err1;\r
3985 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);\r
3986 if (r != 0) goto err1;\r
3987 }\r
3988\r
3989 x = node_new_enclosure_if_else(crnl, 0, ncc);\r
3990 if (IS_NULL(x)) goto err1;\r
3991\r
3992 *node = x;\r
3993 return 0;\r
3994}\r
14b0e578
CS
3995\r
3996enum TokenSyms {\r
3997 TK_EOT = 0, /* end of token */\r
3998 TK_RAW_BYTE = 1,\r
3999 TK_CHAR,\r
4000 TK_STRING,\r
4001 TK_CODE_POINT,\r
4002 TK_ANYCHAR,\r
4003 TK_CHAR_TYPE,\r
4004 TK_BACKREF,\r
4005 TK_CALL,\r
4006 TK_ANCHOR,\r
4007 TK_OP_REPEAT,\r
4008 TK_INTERVAL,\r
4009 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r
4010 TK_ALT,\r
4011 TK_SUBEXP_OPEN,\r
4012 TK_SUBEXP_CLOSE,\r
4013 TK_CC_OPEN,\r
4014 TK_QUOTE_OPEN,\r
4015 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r
b602265d
DG
4016 TK_KEEP, /* \K */\r
4017 TK_GENERAL_NEWLINE, /* \R */\r
4018 TK_NO_NEWLINE, /* \N */\r
4019 TK_TRUE_ANYCHAR, /* \O */\r
4020 TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */\r
4021\r
14b0e578
CS
4022 /* in cc */\r
4023 TK_CC_CLOSE,\r
4024 TK_CC_RANGE,\r
4025 TK_POSIX_BRACKET_OPEN,\r
4026 TK_CC_AND, /* && */\r
4027 TK_CC_CC_OPEN /* [ */\r
4028};\r
4029\r
4030typedef struct {\r
4031 enum TokenSyms type;\r
4032 int escaped;\r
4033 int base; /* is number: 8, 16 (used in [....]) */\r
4034 UChar* backp;\r
4035 union {\r
4036 UChar* s;\r
4037 int c;\r
4038 OnigCodePoint code;\r
4039 int anchor;\r
4040 int subtype;\r
4041 struct {\r
4042 int lower;\r
4043 int upper;\r
4044 int greedy;\r
4045 int possessive;\r
4046 } repeat;\r
4047 struct {\r
4048 int num;\r
4049 int ref1;\r
4050 int* refs;\r
4051 int by_name;\r
4052#ifdef USE_BACKREF_WITH_LEVEL\r
4053 int exist_level;\r
4054 int level; /* \k<name+n> */\r
4055#endif\r
4056 } backref;\r
4057 struct {\r
4058 UChar* name;\r
4059 UChar* name_end;\r
4060 int gnum;\r
b602265d 4061 int by_number;\r
14b0e578
CS
4062 } call;\r
4063 struct {\r
4064 int ctype;\r
4065 int not;\r
4066 } prop;\r
4067 } u;\r
4068} OnigToken;\r
4069\r
4070\r
4071static int\r
4072fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r
4073{\r
4074 int low, up, syn_allow, non_low = 0;\r
4075 int r = 0;\r
4076 OnigCodePoint c;\r
4077 OnigEncoding enc = env->enc;\r
4078 UChar* p = *src;\r
4079 PFETCH_READY;\r
4080\r
4081 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r
4082\r
4083 if (PEND) {\r
4084 if (syn_allow)\r
4085 return 1; /* "....{" : OK! */\r
4086 else\r
4087 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r
4088 }\r
4089\r
4090 if (! syn_allow) {\r
4091 c = PPEEK;\r
4092 if (c == ')' || c == '(' || c == '|') {\r
4093 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r
4094 }\r
4095 }\r
4096\r
4097 low = onig_scan_unsigned_number(&p, end, env->enc);\r
4098 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4099 if (low > ONIG_MAX_REPEAT_NUM)\r
4100 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4101\r
4102 if (p == *src) { /* can't read low */\r
4103 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r
4104 /* allow {,n} as {0,n} */\r
4105 low = 0;\r
4106 non_low = 1;\r
4107 }\r
4108 else\r
4109 goto invalid;\r
4110 }\r
4111\r
4112 if (PEND) goto invalid;\r
4113 PFETCH(c);\r
4114 if (c == ',') {\r
4115 UChar* prev = p;\r
4116 up = onig_scan_unsigned_number(&p, end, env->enc);\r
4117 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4118 if (up > ONIG_MAX_REPEAT_NUM)\r
4119 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4120\r
4121 if (p == prev) {\r
4122 if (non_low != 0)\r
b602265d 4123 goto invalid;\r
14b0e578
CS
4124 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r
4125 }\r
4126 }\r
4127 else {\r
4128 if (non_low != 0)\r
4129 goto invalid;\r
4130\r
4131 PUNFETCH;\r
4132 up = low; /* {n} : exact n times */\r
4133 r = 2; /* fixed */\r
4134 }\r
4135\r
4136 if (PEND) goto invalid;\r
4137 PFETCH(c);\r
4138 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r
4139 if (c != MC_ESC(env->syntax)) goto invalid;\r
4140 PFETCH(c);\r
4141 }\r
4142 if (c != '}') goto invalid;\r
4143\r
4144 if (!IS_REPEAT_INFINITE(up) && low > up) {\r
4145 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r
4146 }\r
4147\r
4148 tok->type = TK_INTERVAL;\r
4149 tok->u.repeat.lower = low;\r
4150 tok->u.repeat.upper = up;\r
4151 *src = p;\r
4152 return r; /* 0: normal {n,m}, 2: fixed {n} */\r
4153\r
4154 invalid:\r
b602265d
DG
4155 if (syn_allow) {\r
4156 /* *src = p; */ /* !!! Don't do this line !!! */\r
14b0e578 4157 return 1; /* OK */\r
b602265d 4158 }\r
14b0e578
CS
4159 else\r
4160 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r
4161}\r
4162\r
4163/* \M-, \C-, \c, or \... */\r
4164static int\r
b602265d 4165fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)\r
14b0e578
CS
4166{\r
4167 int v;\r
4168 OnigCodePoint c;\r
4169 OnigEncoding enc = env->enc;\r
4170 UChar* p = *src;\r
4171\r
4172 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4173\r
4174 PFETCH_S(c);\r
4175 switch (c) {\r
4176 case 'M':\r
4177 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r
4178 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
4179 PFETCH_S(c);\r
4180 if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r
4181 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
4182 PFETCH_S(c);\r
4183 if (c == MC_ESC(env->syntax)) {\r
b602265d 4184 v = fetch_escaped_value(&p, end, env, &c);\r
14b0e578 4185 if (v < 0) return v;\r
14b0e578
CS
4186 }\r
4187 c = ((c & 0xff) | 0x80);\r
4188 }\r
4189 else\r
4190 goto backslash;\r
4191 break;\r
4192\r
4193 case 'C':\r
4194 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r
4195 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
4196 PFETCH_S(c);\r
4197 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r
4198 goto control;\r
4199 }\r
4200 else\r
4201 goto backslash;\r
4202\r
4203 case 'c':\r
4204 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r
4205 control:\r
4206 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
4207 PFETCH_S(c);\r
4208 if (c == '?') {\r
4209 c = 0177;\r
4210 }\r
4211 else {\r
4212 if (c == MC_ESC(env->syntax)) {\r
b602265d 4213 v = fetch_escaped_value(&p, end, env, &c);\r
14b0e578 4214 if (v < 0) return v;\r
14b0e578
CS
4215 }\r
4216 c &= 0x9f;\r
4217 }\r
4218 break;\r
4219 }\r
4220 /* fall through */\r
4221\r
4222 default:\r
4223 {\r
4224 backslash:\r
4225 c = conv_backslash_value(c, env);\r
4226 }\r
4227 break;\r
4228 }\r
4229\r
4230 *src = p;\r
b602265d
DG
4231 *val = c;\r
4232 return 0;\r
14b0e578
CS
4233}\r
4234\r
4235static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r
4236\r
4237static OnigCodePoint\r
4238get_name_end_code_point(OnigCodePoint start)\r
4239{\r
4240 switch (start) {\r
b602265d 4241 case '<': return (OnigCodePoint )'>'; break;\r
14b0e578 4242 case '\'': return (OnigCodePoint )'\''; break;\r
b602265d 4243 case '(': return (OnigCodePoint )')'; break;\r
14b0e578
CS
4244 default:\r
4245 break;\r
4246 }\r
4247\r
4248 return (OnigCodePoint )0;\r
4249}\r
4250\r
b602265d
DG
4251enum REF_NUM {\r
4252 IS_NOT_NUM = 0,\r
4253 IS_ABS_NUM = 1,\r
4254 IS_REL_NUM = 2\r
4255};\r
4256\r
14b0e578
CS
4257#ifdef USE_BACKREF_WITH_LEVEL\r
4258/*\r
4259 \k<name+n>, \k<name-n>\r
4260 \k<num+n>, \k<num-n>\r
4261 \k<-num+n>, \k<-num-n>\r
b602265d 4262 \k<+num+n>, \k<+num-n>\r
14b0e578
CS
4263*/\r
4264static int\r
4265fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r
b602265d
DG
4266 UChar** rname_end, ScanEnv* env,\r
4267 int* rback_num, int* rlevel, enum REF_NUM* num_type)\r
14b0e578 4268{\r
b602265d
DG
4269 int r, sign, exist_level;\r
4270 int digit_count;\r
14b0e578
CS
4271 OnigCodePoint end_code;\r
4272 OnigCodePoint c = 0;\r
4273 OnigEncoding enc = env->enc;\r
4274 UChar *name_end;\r
4275 UChar *pnum_head;\r
4276 UChar *p = *src;\r
4277 PFETCH_READY;\r
4278\r
4279 *rback_num = 0;\r
b602265d
DG
4280 exist_level = 0;\r
4281 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4282 sign = 1;\r
4283 pnum_head = *src;\r
4284\r
4285 end_code = get_name_end_code_point(start_code);\r
4286\r
b602265d 4287 digit_count = 0;\r
14b0e578
CS
4288 name_end = end;\r
4289 r = 0;\r
4290 if (PEND) {\r
4291 return ONIGERR_EMPTY_GROUP_NAME;\r
4292 }\r
4293 else {\r
4294 PFETCH(c);\r
4295 if (c == end_code)\r
4296 return ONIGERR_EMPTY_GROUP_NAME;\r
4297\r
b602265d
DG
4298 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4299 *num_type = IS_ABS_NUM;\r
4300 digit_count++;\r
14b0e578
CS
4301 }\r
4302 else if (c == '-') {\r
b602265d 4303 *num_type = IS_REL_NUM;\r
14b0e578
CS
4304 sign = -1;\r
4305 pnum_head = p;\r
4306 }\r
b602265d
DG
4307 else if (c == '+') {\r
4308 *num_type = IS_REL_NUM;\r
4309 sign = 1;\r
4310 pnum_head = p;\r
4311 }\r
14b0e578
CS
4312 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4313 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4314 }\r
4315 }\r
4316\r
4317 while (!PEND) {\r
4318 name_end = p;\r
4319 PFETCH(c);\r
4320 if (c == end_code || c == ')' || c == '+' || c == '-') {\r
b602265d
DG
4321 if (*num_type != IS_NOT_NUM && digit_count == 0)\r
4322 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4323 break;\r
4324 }\r
4325\r
b602265d
DG
4326 if (*num_type != IS_NOT_NUM) {\r
4327 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4328 digit_count++;\r
14b0e578
CS
4329 }\r
4330 else {\r
4331 r = ONIGERR_INVALID_GROUP_NAME;\r
b602265d 4332 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4333 }\r
4334 }\r
4335 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4336 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4337 }\r
4338 }\r
4339\r
4340 if (r == 0 && c != end_code) {\r
4341 if (c == '+' || c == '-') {\r
4342 int level;\r
4343 int flag = (c == '-' ? -1 : 1);\r
4344\r
b602265d
DG
4345 if (PEND) {\r
4346 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4347 goto end;\r
4348 }\r
14b0e578 4349 PFETCH(c);\r
b602265d 4350 if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;\r
14b0e578
CS
4351 PUNFETCH;\r
4352 level = onig_scan_unsigned_number(&p, end, enc);\r
4353 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r
4354 *rlevel = (level * flag);\r
4355 exist_level = 1;\r
4356\r
b602265d
DG
4357 if (!PEND) {\r
4358 PFETCH(c);\r
4359 if (c == end_code)\r
4360 goto end;\r
4361 }\r
14b0e578
CS
4362 }\r
4363\r
4364 err:\r
14b0e578 4365 name_end = end;\r
b602265d
DG
4366 err2:\r
4367 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4368 }\r
4369\r
4370 end:\r
4371 if (r == 0) {\r
b602265d 4372 if (*num_type != IS_NOT_NUM) {\r
14b0e578
CS
4373 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
4374 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
b602265d
DG
4375 else if (*rback_num == 0) {\r
4376 if (*num_type == IS_REL_NUM)\r
4377 goto err2;\r
4378 }\r
14b0e578
CS
4379\r
4380 *rback_num *= sign;\r
4381 }\r
4382\r
4383 *rname_end = name_end;\r
4384 *src = p;\r
4385 return (exist_level ? 1 : 0);\r
4386 }\r
4387 else {\r
4388 onig_scan_env_set_error_string(env, r, *src, name_end);\r
4389 return r;\r
4390 }\r
4391}\r
4392#endif /* USE_BACKREF_WITH_LEVEL */\r
4393\r
4394/*\r
b602265d 4395 ref: 0 -> define name (don't allow number name)\r
14b0e578
CS
4396 1 -> reference name (allow number name)\r
4397*/\r
4398static int\r
4399fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
b602265d
DG
4400 UChar** rname_end, ScanEnv* env, int* rback_num,\r
4401 enum REF_NUM* num_type, int ref)\r
14b0e578 4402{\r
b602265d
DG
4403 int r, sign;\r
4404 int digit_count;\r
14b0e578
CS
4405 OnigCodePoint end_code;\r
4406 OnigCodePoint c = 0;\r
4407 OnigEncoding enc = env->enc;\r
4408 UChar *name_end;\r
4409 UChar *pnum_head;\r
4410 UChar *p = *src;\r
4411\r
4412 *rback_num = 0;\r
4413\r
4414 end_code = get_name_end_code_point(start_code);\r
4415\r
b602265d 4416 digit_count = 0;\r
14b0e578
CS
4417 name_end = end;\r
4418 pnum_head = *src;\r
4419 r = 0;\r
b602265d 4420 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4421 sign = 1;\r
4422 if (PEND) {\r
4423 return ONIGERR_EMPTY_GROUP_NAME;\r
4424 }\r
4425 else {\r
4426 PFETCH_S(c);\r
4427 if (c == end_code)\r
4428 return ONIGERR_EMPTY_GROUP_NAME;\r
4429\r
b602265d 4430 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
14b0e578 4431 if (ref == 1)\r
b602265d 4432 *num_type = IS_ABS_NUM;\r
14b0e578
CS
4433 else {\r
4434 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578 4435 }\r
b602265d 4436 digit_count++;\r
14b0e578
CS
4437 }\r
4438 else if (c == '-') {\r
4439 if (ref == 1) {\r
b602265d 4440 *num_type = IS_REL_NUM;\r
14b0e578
CS
4441 sign = -1;\r
4442 pnum_head = p;\r
4443 }\r
4444 else {\r
4445 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4446 }\r
4447 }\r
b602265d
DG
4448 else if (c == '+') {\r
4449 if (ref == 1) {\r
4450 *num_type = IS_REL_NUM;\r
4451 sign = 1;\r
4452 pnum_head = p;\r
14b0e578
CS
4453 }\r
4454 else {\r
14b0e578 4455 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578 4456 }\r
14b0e578 4457 }\r
b602265d 4458 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
14b0e578 4459 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
b602265d 4460 }\r
14b0e578
CS
4461 }\r
4462\r
4463 if (r == 0) {\r
b602265d
DG
4464 while (!PEND) {\r
4465 name_end = p;\r
4466 PFETCH_S(c);\r
4467 if (c == end_code || c == ')') {\r
4468 if (*num_type != IS_NOT_NUM && digit_count == 0)\r
4469 r = ONIGERR_INVALID_GROUP_NAME;\r
4470 break;\r
4471 }\r
4472\r
4473 if (*num_type != IS_NOT_NUM) {\r
4474 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4475 digit_count++;\r
4476 }\r
4477 else {\r
4478 if (!ONIGENC_IS_CODE_WORD(enc, c))\r
4479 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4480 else\r
4481 r = ONIGERR_INVALID_GROUP_NAME;\r
4482\r
4483 *num_type = IS_NOT_NUM;\r
4484 }\r
4485 }\r
4486 else {\r
4487 if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4488 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4489 }\r
4490 }\r
4491 }\r
4492\r
4493 if (c != end_code) {\r
14b0e578
CS
4494 r = ONIGERR_INVALID_GROUP_NAME;\r
4495 goto err;\r
4496 }\r
b602265d
DG
4497\r
4498 if (*num_type != IS_NOT_NUM) {\r
4499 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
4500 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
4501 else if (*rback_num == 0) {\r
4502 if (*num_type == IS_REL_NUM) {\r
4503 r = ONIGERR_INVALID_GROUP_NAME;\r
4504 goto err;\r
4505 }\r
4506 }\r
4507\r
4508 *rback_num *= sign;\r
4509 }\r
14b0e578
CS
4510\r
4511 *rname_end = name_end;\r
4512 *src = p;\r
4513 return 0;\r
4514 }\r
4515 else {\r
b602265d
DG
4516 while (!PEND) {\r
4517 name_end = p;\r
4518 PFETCH_S(c);\r
4519 if (c == end_code || c == ')')\r
4520 break;\r
4521 }\r
4522 if (PEND)\r
4523 name_end = end;\r
4524\r
14b0e578
CS
4525 err:\r
4526 onig_scan_env_set_error_string(env, r, *src, name_end);\r
4527 return r;\r
4528 }\r
4529}\r
14b0e578
CS
4530\r
4531static void\r
4532CC_ESC_WARN(ScanEnv* env, UChar *c)\r
4533{\r
4534 if (onig_warn == onig_null_warn) return ;\r
4535\r
4536 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r
4537 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r
4538 UChar buf[WARN_BUFSIZE];\r
4539 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
b602265d
DG
4540 env->pattern, env->pattern_end,\r
4541 (UChar* )"character class has '%s' without escape",\r
4542 c);\r
14b0e578
CS
4543 (*onig_warn)((char* )buf);\r
4544 }\r
4545}\r
4546\r
4547static void\r
4548CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r
4549{\r
4550 if (onig_warn == onig_null_warn) return ;\r
4551\r
4552 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r
4553 UChar buf[WARN_BUFSIZE];\r
4554 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r
b602265d
DG
4555 (env)->pattern, (env)->pattern_end,\r
4556 (UChar* )"regular expression has '%s' without escape", c);\r
14b0e578
CS
4557 (*onig_warn)((char* )buf);\r
4558 }\r
4559}\r
4560\r
4561static UChar*\r
4562find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
b602265d 4563 UChar **next, OnigEncoding enc)\r
14b0e578
CS
4564{\r
4565 int i;\r
4566 OnigCodePoint x;\r
4567 UChar *q;\r
4568 UChar *p = from;\r
4569 \r
4570 while (p < to) {\r
4571 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4572 q = p + enclen(enc, p);\r
4573 if (x == s[0]) {\r
4574 for (i = 1; i < n && q < to; i++) {\r
b602265d
DG
4575 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
4576 if (x != s[i]) break;\r
4577 q += enclen(enc, q);\r
14b0e578
CS
4578 }\r
4579 if (i >= n) {\r
b602265d
DG
4580 if (IS_NOT_NULL(next))\r
4581 *next = q;\r
4582 return p;\r
14b0e578
CS
4583 }\r
4584 }\r
4585 p = q;\r
4586 }\r
4587 return NULL_UCHARP;\r
4588}\r
4589\r
4590static int\r
4591str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
b602265d 4592 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r
14b0e578
CS
4593{\r
4594 int i, in_esc;\r
4595 OnigCodePoint x;\r
4596 UChar *q;\r
4597 UChar *p = from;\r
4598\r
4599 in_esc = 0;\r
4600 while (p < to) {\r
4601 if (in_esc) {\r
4602 in_esc = 0;\r
4603 p += enclen(enc, p);\r
4604 }\r
4605 else {\r
4606 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4607 q = p + enclen(enc, p);\r
4608 if (x == s[0]) {\r
b602265d
DG
4609 for (i = 1; i < n && q < to; i++) {\r
4610 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
4611 if (x != s[i]) break;\r
4612 q += enclen(enc, q);\r
4613 }\r
4614 if (i >= n) return 1;\r
4615 p += enclen(enc, p);\r
14b0e578
CS
4616 }\r
4617 else {\r
b602265d
DG
4618 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4619 if (x == bad) return 0;\r
4620 else if (x == MC_ESC(syn)) in_esc = 1;\r
4621 p = q;\r
14b0e578
CS
4622 }\r
4623 }\r
4624 }\r
4625 return 0;\r
4626}\r
4627\r
4628static int\r
4629fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
4630{\r
4631 int num;\r
4632 OnigCodePoint c, c2;\r
4633 OnigSyntaxType* syn = env->syntax;\r
4634 OnigEncoding enc = env->enc;\r
4635 UChar* prev;\r
4636 UChar* p = *src;\r
4637 PFETCH_READY;\r
4638\r
4639 if (PEND) {\r
4640 tok->type = TK_EOT;\r
4641 return tok->type;\r
4642 }\r
4643\r
4644 PFETCH(c);\r
4645 tok->type = TK_CHAR;\r
4646 tok->base = 0;\r
4647 tok->u.c = c;\r
4648 tok->escaped = 0;\r
4649\r
4650 if (c == ']') {\r
4651 tok->type = TK_CC_CLOSE;\r
4652 }\r
4653 else if (c == '-') {\r
4654 tok->type = TK_CC_RANGE;\r
4655 }\r
4656 else if (c == MC_ESC(syn)) {\r
4657 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r
4658 goto end;\r
4659\r
4660 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4661\r
4662 PFETCH(c);\r
4663 tok->escaped = 1;\r
4664 tok->u.c = c;\r
4665 switch (c) {\r
4666 case 'w':\r
4667 tok->type = TK_CHAR_TYPE;\r
4668 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4669 tok->u.prop.not = 0;\r
4670 break;\r
4671 case 'W':\r
4672 tok->type = TK_CHAR_TYPE;\r
4673 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4674 tok->u.prop.not = 1;\r
4675 break;\r
4676 case 'd':\r
4677 tok->type = TK_CHAR_TYPE;\r
4678 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
4679 tok->u.prop.not = 0;\r
4680 break;\r
4681 case 'D':\r
4682 tok->type = TK_CHAR_TYPE;\r
4683 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
4684 tok->u.prop.not = 1;\r
4685 break;\r
4686 case 's':\r
4687 tok->type = TK_CHAR_TYPE;\r
4688 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
4689 tok->u.prop.not = 0;\r
4690 break;\r
4691 case 'S':\r
4692 tok->type = TK_CHAR_TYPE;\r
4693 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
4694 tok->u.prop.not = 1;\r
4695 break;\r
4696 case 'h':\r
4697 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
4698 tok->type = TK_CHAR_TYPE;\r
4699 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
4700 tok->u.prop.not = 0;\r
4701 break;\r
4702 case 'H':\r
4703 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
4704 tok->type = TK_CHAR_TYPE;\r
4705 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
4706 tok->u.prop.not = 1;\r
4707 break;\r
4708\r
4709 case 'p':\r
4710 case 'P':\r
b602265d
DG
4711 if (PEND) break;\r
4712\r
14b0e578
CS
4713 c2 = PPEEK;\r
4714 if (c2 == '{' &&\r
b602265d
DG
4715 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
4716 PINC;\r
4717 tok->type = TK_CHAR_PROPERTY;\r
4718 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
4719\r
4720 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
4721 PFETCH(c2);\r
4722 if (c2 == '^') {\r
4723 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
4724 }\r
4725 else\r
4726 PUNFETCH;\r
4727 }\r
4728 }\r
4729 break;\r
4730\r
4731 case 'o':\r
4732 if (PEND) break;\r
4733\r
4734 prev = p;\r
4735 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
4736 PINC;\r
4737 num = scan_unsigned_octal_number(&p, end, 11, enc);\r
4738 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
4739 if (!PEND) {\r
4740 c2 = PPEEK;\r
4741 if (IS_CODE_DIGIT_ASCII(enc, c2))\r
4742 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
4743 }\r
4744\r
4745 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
4746 PINC;\r
4747 tok->type = TK_CODE_POINT;\r
4748 tok->base = 8;\r
4749 tok->u.code = (OnigCodePoint )num;\r
4750 }\r
4751 else {\r
4752 /* can't read nothing or invalid format */\r
4753 p = prev;\r
4754 }\r
14b0e578
CS
4755 }\r
4756 break;\r
4757\r
4758 case 'x':\r
4759 if (PEND) break;\r
4760\r
4761 prev = p;\r
4762 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
b602265d
DG
4763 PINC;\r
4764 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
4765 if (num < 0) {\r
4766 if (num == ONIGERR_TOO_BIG_NUMBER)\r
4767 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
4768 else\r
4769 return num;\r
4770 }\r
4771 if (!PEND) {\r
14b0e578 4772 c2 = PPEEK;\r
b602265d 4773 if (IS_CODE_XDIGIT_ASCII(enc, c2))\r
14b0e578
CS
4774 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
4775 }\r
4776\r
b602265d
DG
4777 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
4778 PINC;\r
4779 tok->type = TK_CODE_POINT;\r
4780 tok->base = 16;\r
4781 tok->u.code = (OnigCodePoint )num;\r
4782 }\r
4783 else {\r
4784 /* can't read nothing or invalid format */\r
4785 p = prev;\r
4786 }\r
14b0e578
CS
4787 }\r
4788 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
b602265d
DG
4789 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
4790 if (num < 0) return num;\r
4791 if (p == prev) { /* can't read nothing. */\r
4792 num = 0; /* but, it's not error */\r
4793 }\r
4794 tok->type = TK_RAW_BYTE;\r
4795 tok->base = 16;\r
4796 tok->u.c = num;\r
14b0e578
CS
4797 }\r
4798 break;\r
4799\r
4800 case 'u':\r
4801 if (PEND) break;\r
4802\r
4803 prev = p;\r
4804 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
b602265d
DG
4805 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
4806 if (num < 0) return num;\r
4807 if (p == prev) { /* can't read nothing. */\r
4808 num = 0; /* but, it's not error */\r
4809 }\r
4810 tok->type = TK_CODE_POINT;\r
4811 tok->base = 16;\r
4812 tok->u.code = (OnigCodePoint )num;\r
14b0e578
CS
4813 }\r
4814 break;\r
4815\r
4816 case '0':\r
4817 case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r
4818 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
b602265d
DG
4819 PUNFETCH;\r
4820 prev = p;\r
4821 num = scan_unsigned_octal_number(&p, end, 3, enc);\r
4822 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
4823 if (p == prev) { /* can't read nothing. */\r
4824 num = 0; /* but, it's not error */\r
4825 }\r
4826 tok->type = TK_RAW_BYTE;\r
4827 tok->base = 8;\r
4828 tok->u.c = num;\r
14b0e578
CS
4829 }\r
4830 break;\r
4831\r
4832 default:\r
4833 PUNFETCH;\r
b602265d 4834 num = fetch_escaped_value(&p, end, env, &c2);\r
14b0e578 4835 if (num < 0) return num;\r
b602265d
DG
4836 if (tok->u.c != c2) {\r
4837 tok->u.code = c2;\r
4838 tok->type = TK_CODE_POINT;\r
14b0e578
CS
4839 }\r
4840 break;\r
4841 }\r
4842 }\r
4843 else if (c == '[') {\r
4844 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r
4845 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r
b602265d 4846 tok->backp = p; /* point at '[' is read */\r
14b0e578
CS
4847 PINC;\r
4848 if (str_exist_check_with_esc(send, 2, p, end,\r
4849 (OnigCodePoint )']', enc, syn)) {\r
b602265d 4850 tok->type = TK_POSIX_BRACKET_OPEN;\r
14b0e578
CS
4851 }\r
4852 else {\r
b602265d
DG
4853 PUNFETCH;\r
4854 goto cc_in_cc;\r
14b0e578
CS
4855 }\r
4856 }\r
4857 else {\r
4858 cc_in_cc:\r
4859 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r
b602265d 4860 tok->type = TK_CC_CC_OPEN;\r
14b0e578
CS
4861 }\r
4862 else {\r
b602265d 4863 CC_ESC_WARN(env, (UChar* )"[");\r
14b0e578
CS
4864 }\r
4865 }\r
4866 }\r
4867 else if (c == '&') {\r
4868 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r
b602265d 4869 !PEND && (PPEEK_IS('&'))) {\r
14b0e578
CS
4870 PINC;\r
4871 tok->type = TK_CC_AND;\r
4872 }\r
4873 }\r
4874\r
4875 end:\r
4876 *src = p;\r
4877 return tok->type;\r
4878}\r
4879\r
4880static int\r
4881fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
4882{\r
4883 int r, num;\r
4884 OnigCodePoint c;\r
4885 OnigEncoding enc = env->enc;\r
4886 OnigSyntaxType* syn = env->syntax;\r
4887 UChar* prev;\r
4888 UChar* p = *src;\r
4889 PFETCH_READY;\r
4890\r
4891 start:\r
4892 if (PEND) {\r
4893 tok->type = TK_EOT;\r
4894 return tok->type;\r
4895 }\r
4896\r
4897 tok->type = TK_STRING;\r
4898 tok->base = 0;\r
4899 tok->backp = p;\r
4900\r
4901 PFETCH(c);\r
4902 if (IS_MC_ESC_CODE(c, syn)) {\r
4903 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4904\r
4905 tok->backp = p;\r
4906 PFETCH(c);\r
4907\r
4908 tok->u.c = c;\r
4909 tok->escaped = 1;\r
4910 switch (c) {\r
4911 case '*':\r
4912 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r
4913 tok->type = TK_OP_REPEAT;\r
4914 tok->u.repeat.lower = 0;\r
4915 tok->u.repeat.upper = REPEAT_INFINITE;\r
4916 goto greedy_check;\r
4917 break;\r
4918\r
4919 case '+':\r
4920 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r
4921 tok->type = TK_OP_REPEAT;\r
4922 tok->u.repeat.lower = 1;\r
4923 tok->u.repeat.upper = REPEAT_INFINITE;\r
4924 goto greedy_check;\r
4925 break;\r
4926\r
4927 case '?':\r
4928 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r
4929 tok->type = TK_OP_REPEAT;\r
4930 tok->u.repeat.lower = 0;\r
4931 tok->u.repeat.upper = 1;\r
4932 greedy_check:\r
4933 if (!PEND && PPEEK_IS('?') &&\r
b602265d
DG
4934 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r
4935 PFETCH(c);\r
4936 tok->u.repeat.greedy = 0;\r
4937 tok->u.repeat.possessive = 0;\r
14b0e578
CS
4938 }\r
4939 else {\r
4940 possessive_check:\r
b602265d
DG
4941 if (!PEND && PPEEK_IS('+') &&\r
4942 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r
4943 tok->type != TK_INTERVAL) ||\r
4944 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r
4945 tok->type == TK_INTERVAL))) {\r
4946 PFETCH(c);\r
4947 tok->u.repeat.greedy = 1;\r
4948 tok->u.repeat.possessive = 1;\r
4949 }\r
4950 else {\r
4951 tok->u.repeat.greedy = 1;\r
4952 tok->u.repeat.possessive = 0;\r
4953 }\r
14b0e578
CS
4954 }\r
4955 break;\r
4956\r
4957 case '{':\r
4958 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r
4959 r = fetch_range_quantifier(&p, end, tok, env);\r
4960 if (r < 0) return r; /* error */\r
4961 if (r == 0) goto greedy_check;\r
4962 else if (r == 2) { /* {n} */\r
b602265d
DG
4963 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
4964 goto possessive_check;\r
14b0e578 4965\r
b602265d 4966 goto greedy_check;\r
14b0e578
CS
4967 }\r
4968 /* r == 1 : normal char */\r
4969 break;\r
4970\r
4971 case '|':\r
4972 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r
4973 tok->type = TK_ALT;\r
4974 break;\r
4975\r
4976 case '(':\r
4977 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
4978 tok->type = TK_SUBEXP_OPEN;\r
4979 break;\r
4980\r
4981 case ')':\r
4982 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
4983 tok->type = TK_SUBEXP_CLOSE;\r
4984 break;\r
4985\r
4986 case 'w':\r
4987 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
4988 tok->type = TK_CHAR_TYPE;\r
4989 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4990 tok->u.prop.not = 0;\r
4991 break;\r
4992\r
4993 case 'W':\r
4994 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
4995 tok->type = TK_CHAR_TYPE;\r
4996 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4997 tok->u.prop.not = 1;\r
4998 break;\r
4999\r
5000 case 'b':\r
5001 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
5002 tok->type = TK_ANCHOR;\r
b602265d 5003 tok->u.anchor = ANCHOR_WORD_BOUNDARY;\r
14b0e578
CS
5004 break;\r
5005\r
5006 case 'B':\r
5007 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
5008 tok->type = TK_ANCHOR;\r
b602265d
DG
5009 tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;\r
5010 break;\r
5011\r
5012 case 'y':\r
5013 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5014 tok->type = TK_ANCHOR;\r
5015 tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
5016 break;\r
5017\r
5018 case 'Y':\r
5019 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5020 tok->type = TK_ANCHOR;\r
5021 tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
14b0e578
CS
5022 break;\r
5023\r
5024#ifdef USE_WORD_BEGIN_END\r
5025 case '<':\r
5026 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
5027 tok->type = TK_ANCHOR;\r
5028 tok->u.anchor = ANCHOR_WORD_BEGIN;\r
5029 break;\r
5030\r
5031 case '>':\r
5032 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
5033 tok->type = TK_ANCHOR;\r
5034 tok->u.anchor = ANCHOR_WORD_END;\r
5035 break;\r
5036#endif\r
5037\r
5038 case 's':\r
5039 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
5040 tok->type = TK_CHAR_TYPE;\r
5041 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
5042 tok->u.prop.not = 0;\r
5043 break;\r
5044\r
5045 case 'S':\r
5046 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
5047 tok->type = TK_CHAR_TYPE;\r
5048 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
5049 tok->u.prop.not = 1;\r
5050 break;\r
5051\r
5052 case 'd':\r
5053 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
5054 tok->type = TK_CHAR_TYPE;\r
5055 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
5056 tok->u.prop.not = 0;\r
5057 break;\r
5058\r
5059 case 'D':\r
5060 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
5061 tok->type = TK_CHAR_TYPE;\r
5062 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
5063 tok->u.prop.not = 1;\r
5064 break;\r
5065\r
5066 case 'h':\r
5067 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
5068 tok->type = TK_CHAR_TYPE;\r
5069 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
5070 tok->u.prop.not = 0;\r
5071 break;\r
5072\r
5073 case 'H':\r
5074 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
5075 tok->type = TK_CHAR_TYPE;\r
5076 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
5077 tok->u.prop.not = 1;\r
5078 break;\r
5079\r
b602265d
DG
5080 case 'K':\r
5081 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;\r
5082 tok->type = TK_KEEP;\r
5083 break;\r
5084\r
5085 case 'R':\r
5086 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;\r
5087 tok->type = TK_GENERAL_NEWLINE;\r
5088 break;\r
5089\r
5090 case 'N':\r
5091 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
5092 tok->type = TK_NO_NEWLINE;\r
5093 break;\r
5094\r
5095 case 'O':\r
5096 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
5097 tok->type = TK_TRUE_ANYCHAR;\r
5098 break;\r
5099\r
5100 case 'X':\r
5101 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5102 tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;\r
5103 break;\r
5104\r
14b0e578
CS
5105 case 'A':\r
5106 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5107 begin_buf:\r
5108 tok->type = TK_ANCHOR;\r
5109 tok->u.subtype = ANCHOR_BEGIN_BUF;\r
5110 break;\r
5111\r
5112 case 'Z':\r
5113 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5114 tok->type = TK_ANCHOR;\r
5115 tok->u.subtype = ANCHOR_SEMI_END_BUF;\r
5116 break;\r
5117\r
5118 case 'z':\r
5119 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5120 end_buf:\r
5121 tok->type = TK_ANCHOR;\r
5122 tok->u.subtype = ANCHOR_END_BUF;\r
5123 break;\r
5124\r
5125 case 'G':\r
5126 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r
5127 tok->type = TK_ANCHOR;\r
5128 tok->u.subtype = ANCHOR_BEGIN_POSITION;\r
5129 break;\r
5130\r
5131 case '`':\r
5132 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
5133 goto begin_buf;\r
5134 break;\r
5135\r
5136 case '\'':\r
5137 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
5138 goto end_buf;\r
5139 break;\r
5140\r
b602265d
DG
5141 case 'o':\r
5142 if (PEND) break;\r
5143\r
5144 prev = p;\r
5145 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
5146 PINC;\r
5147 num = scan_unsigned_octal_number(&p, end, 11, enc);\r
5148 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
5149 if (!PEND) {\r
5150 if (IS_CODE_DIGIT_ASCII(enc, PPEEK))\r
5151 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
5152 }\r
5153\r
5154 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
5155 PINC;\r
5156 tok->type = TK_CODE_POINT;\r
5157 tok->u.code = (OnigCodePoint )num;\r
5158 }\r
5159 else {\r
5160 /* can't read nothing or invalid format */\r
5161 p = prev;\r
5162 }\r
5163 }\r
5164 break;\r
5165\r
14b0e578
CS
5166 case 'x':\r
5167 if (PEND) break;\r
5168\r
5169 prev = p;\r
5170 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
b602265d
DG
5171 PINC;\r
5172 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
5173 if (num < 0) {\r
5174 if (num == ONIGERR_TOO_BIG_NUMBER)\r
5175 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
5176 else\r
5177 return num;\r
5178 }\r
5179 if (!PEND) {\r
5180 if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))\r
14b0e578
CS
5181 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
5182 }\r
5183\r
b602265d
DG
5184 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
5185 PINC;\r
5186 tok->type = TK_CODE_POINT;\r
5187 tok->u.code = (OnigCodePoint )num;\r
5188 }\r
5189 else {\r
5190 /* can't read nothing or invalid format */\r
5191 p = prev;\r
5192 }\r
14b0e578
CS
5193 }\r
5194 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
b602265d
DG
5195 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
5196 if (num < 0) return num;\r
5197 if (p == prev) { /* can't read nothing. */\r
5198 num = 0; /* but, it's not error */\r
5199 }\r
5200 tok->type = TK_RAW_BYTE;\r
5201 tok->base = 16;\r
5202 tok->u.c = num;\r
14b0e578
CS
5203 }\r
5204 break;\r
5205\r
5206 case 'u':\r
5207 if (PEND) break;\r
5208\r
5209 prev = p;\r
5210 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
b602265d
DG
5211 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
5212 if (num < 0) return num;\r
5213 if (p == prev) { /* can't read nothing. */\r
5214 num = 0; /* but, it's not error */\r
5215 }\r
5216 tok->type = TK_CODE_POINT;\r
5217 tok->base = 16;\r
5218 tok->u.code = (OnigCodePoint )num;\r
14b0e578
CS
5219 }\r
5220 break;\r
5221\r
5222 case '1': case '2': case '3': case '4':\r
5223 case '5': case '6': case '7': case '8': case '9':\r
5224 PUNFETCH;\r
5225 prev = p;\r
5226 num = onig_scan_unsigned_number(&p, end, enc);\r
5227 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r
5228 goto skip_backref;\r
5229 }\r
5230\r
5231 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r
b602265d
DG
5232 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r
5233 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5234 if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))\r
5235 return ONIGERR_INVALID_BACKREF;\r
5236 }\r
5237\r
5238 tok->type = TK_BACKREF;\r
5239 tok->u.backref.num = 1;\r
5240 tok->u.backref.ref1 = num;\r
5241 tok->u.backref.by_name = 0;\r
14b0e578 5242#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d 5243 tok->u.backref.exist_level = 0;\r
14b0e578 5244#endif\r
b602265d 5245 break;\r
14b0e578
CS
5246 }\r
5247\r
5248 skip_backref:\r
5249 if (c == '8' || c == '9') {\r
b602265d
DG
5250 /* normal char */\r
5251 p = prev; PINC;\r
5252 break;\r
14b0e578
CS
5253 }\r
5254\r
5255 p = prev;\r
5256 /* fall through */\r
5257 case '0':\r
5258 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
b602265d
DG
5259 prev = p;\r
5260 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r
5261 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
5262 if (p == prev) { /* can't read nothing. */\r
5263 num = 0; /* but, it's not error */\r
5264 }\r
5265 tok->type = TK_RAW_BYTE;\r
5266 tok->base = 8;\r
5267 tok->u.c = num;\r
14b0e578
CS
5268 }\r
5269 else if (c != '0') {\r
b602265d 5270 PINC;\r
14b0e578
CS
5271 }\r
5272 break;\r
5273\r
14b0e578 5274 case 'k':\r
b602265d
DG
5275 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r
5276 PFETCH(c);\r
5277 if (c == '<' || c == '\'') {\r
5278 UChar* name_end;\r
5279 int* backs;\r
5280 int back_num;\r
5281 enum REF_NUM num_type;\r
14b0e578 5282\r
b602265d 5283 prev = p;\r
14b0e578
CS
5284\r
5285#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
5286 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
5287 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r
5288 env, &back_num, &tok->u.backref.level, &num_type);\r
5289 if (r == 1) tok->u.backref.exist_level = 1;\r
5290 else tok->u.backref.exist_level = 0;\r
14b0e578 5291#else\r
b602265d 5292 r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);\r
14b0e578 5293#endif\r
b602265d
DG
5294 if (r < 0) return r;\r
5295\r
5296 if (num_type != IS_NOT_NUM) {\r
5297 if (num_type == IS_REL_NUM) {\r
5298 back_num = backref_rel_to_abs(back_num, env);\r
5299 }\r
5300 if (back_num <= 0)\r
5301 return ONIGERR_INVALID_BACKREF;\r
5302\r
5303 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5304 if (back_num > env->num_mem ||\r
5305 IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
5306 return ONIGERR_INVALID_BACKREF;\r
5307 }\r
5308 tok->type = TK_BACKREF;\r
5309 tok->u.backref.by_name = 0;\r
5310 tok->u.backref.num = 1;\r
5311 tok->u.backref.ref1 = back_num;\r
5312 }\r
5313 else {\r
5314 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
5315 if (num <= 0) {\r
5316 onig_scan_env_set_error_string(env,\r
5317 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
5318 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
5319 }\r
5320 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5321 int i;\r
5322 for (i = 0; i < num; i++) {\r
5323 if (backs[i] > env->num_mem ||\r
5324 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
5325 return ONIGERR_INVALID_BACKREF;\r
5326 }\r
5327 }\r
5328\r
5329 tok->type = TK_BACKREF;\r
5330 tok->u.backref.by_name = 1;\r
5331 if (num == 1) {\r
5332 tok->u.backref.num = 1;\r
5333 tok->u.backref.ref1 = backs[0];\r
5334 }\r
5335 else {\r
5336 tok->u.backref.num = num;\r
5337 tok->u.backref.refs = backs;\r
5338 }\r
5339 }\r
5340 }\r
5341 else\r
5342 PUNFETCH;\r
14b0e578
CS
5343 }\r
5344 break;\r
14b0e578 5345\r
b602265d 5346#ifdef USE_CALL\r
14b0e578 5347 case 'g':\r
b602265d
DG
5348 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r
5349 PFETCH(c);\r
5350 if (c == '<' || c == '\'') {\r
5351 int gnum;\r
5352 UChar* name_end;\r
5353 enum REF_NUM num_type;\r
5354\r
5355 prev = p;\r
5356 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,\r
5357 &gnum, &num_type, 1);\r
5358 if (r < 0) return r;\r
5359\r
5360 if (num_type != IS_NOT_NUM) {\r
5361 if (num_type == IS_REL_NUM) {\r
5362 gnum = backref_rel_to_abs(gnum, env);\r
5363 if (gnum < 0) {\r
5364 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
5365 prev, name_end);\r
5366 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
5367 }\r
5368 }\r
5369 tok->u.call.by_number = 1;\r
5370 tok->u.call.gnum = gnum;\r
5371 }\r
5372 else {\r
5373 tok->u.call.by_number = 0;\r
5374 tok->u.call.gnum = 0;\r
5375 }\r
5376\r
5377 tok->type = TK_CALL;\r
5378 tok->u.call.name = prev;\r
5379 tok->u.call.name_end = name_end;\r
5380 }\r
5381 else\r
5382 PUNFETCH;\r
14b0e578
CS
5383 }\r
5384 break;\r
5385#endif\r
5386\r
5387 case 'Q':\r
5388 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r
b602265d 5389 tok->type = TK_QUOTE_OPEN;\r
14b0e578
CS
5390 }\r
5391 break;\r
5392\r
5393 case 'p':\r
5394 case 'P':\r
b602265d
DG
5395 if (!PEND && PPEEK_IS('{') &&\r
5396 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
5397 PINC;\r
5398 tok->type = TK_CHAR_PROPERTY;\r
5399 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
5400\r
5401 if (!PEND &&\r
5402 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
5403 PFETCH(c);\r
5404 if (c == '^') {\r
5405 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
5406 }\r
5407 else\r
5408 PUNFETCH;\r
5409 }\r
14b0e578
CS
5410 }\r
5411 break;\r
5412\r
5413 default:\r
b602265d
DG
5414 {\r
5415 OnigCodePoint c2;\r
5416\r
5417 PUNFETCH;\r
5418 num = fetch_escaped_value(&p, end, env, &c2);\r
5419 if (num < 0) return num;\r
5420 /* set_raw: */\r
5421 if (tok->u.c != c2) {\r
5422 tok->type = TK_CODE_POINT;\r
5423 tok->u.code = c2;\r
5424 }\r
5425 else { /* string */\r
5426 p = tok->backp + enclen(enc, tok->backp);\r
5427 }\r
14b0e578
CS
5428 }\r
5429 break;\r
5430 }\r
5431 }\r
5432 else {\r
5433 tok->u.c = c;\r
5434 tok->escaped = 0;\r
5435\r
5436#ifdef USE_VARIABLE_META_CHARS\r
5437 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r
b602265d 5438 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r
14b0e578 5439 if (c == MC_ANYCHAR(syn))\r
b602265d 5440 goto any_char;\r
14b0e578 5441 else if (c == MC_ANYTIME(syn))\r
b602265d 5442 goto anytime;\r
14b0e578 5443 else if (c == MC_ZERO_OR_ONE_TIME(syn))\r
b602265d 5444 goto zero_or_one_time;\r
14b0e578 5445 else if (c == MC_ONE_OR_MORE_TIME(syn))\r
b602265d 5446 goto one_or_more_time;\r
14b0e578 5447 else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r
b602265d
DG
5448 tok->type = TK_ANYCHAR_ANYTIME;\r
5449 goto out;\r
14b0e578
CS
5450 }\r
5451 }\r
5452#endif\r
5453\r
5454 switch (c) {\r
5455 case '.':\r
5456 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r
5457#ifdef USE_VARIABLE_META_CHARS\r
5458 any_char:\r
5459#endif\r
5460 tok->type = TK_ANYCHAR;\r
5461 break;\r
5462\r
5463 case '*':\r
5464 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r
5465#ifdef USE_VARIABLE_META_CHARS\r
5466 anytime:\r
5467#endif\r
5468 tok->type = TK_OP_REPEAT;\r
5469 tok->u.repeat.lower = 0;\r
5470 tok->u.repeat.upper = REPEAT_INFINITE;\r
5471 goto greedy_check;\r
5472 break;\r
5473\r
5474 case '+':\r
5475 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r
5476#ifdef USE_VARIABLE_META_CHARS\r
5477 one_or_more_time:\r
5478#endif\r
5479 tok->type = TK_OP_REPEAT;\r
5480 tok->u.repeat.lower = 1;\r
5481 tok->u.repeat.upper = REPEAT_INFINITE;\r
5482 goto greedy_check;\r
5483 break;\r
5484\r
5485 case '?':\r
5486 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r
5487#ifdef USE_VARIABLE_META_CHARS\r
5488 zero_or_one_time:\r
5489#endif\r
5490 tok->type = TK_OP_REPEAT;\r
5491 tok->u.repeat.lower = 0;\r
5492 tok->u.repeat.upper = 1;\r
5493 goto greedy_check;\r
5494 break;\r
5495\r
5496 case '{':\r
5497 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r
5498 r = fetch_range_quantifier(&p, end, tok, env);\r
5499 if (r < 0) return r; /* error */\r
5500 if (r == 0) goto greedy_check;\r
5501 else if (r == 2) { /* {n} */\r
b602265d
DG
5502 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
5503 goto possessive_check;\r
14b0e578 5504\r
b602265d 5505 goto greedy_check;\r
14b0e578
CS
5506 }\r
5507 /* r == 1 : normal char */\r
5508 break;\r
5509\r
5510 case '|':\r
5511 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r
5512 tok->type = TK_ALT;\r
5513 break;\r
5514\r
5515 case '(':\r
b602265d 5516 if (!PEND && PPEEK_IS('?') &&\r
14b0e578
CS
5517 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
5518 PINC;\r
b602265d
DG
5519 if (! PEND) {\r
5520 c = PPEEK;\r
5521 if (c == '#') {\r
14b0e578 5522 PFETCH(c);\r
b602265d
DG
5523 while (1) {\r
5524 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
5525 PFETCH(c);\r
5526 if (c == MC_ESC(syn)) {\r
5527 if (! PEND) PFETCH(c);\r
5528 }\r
5529 else {\r
5530 if (c == ')') break;\r
5531 }\r
14b0e578 5532 }\r
b602265d
DG
5533 goto start;\r
5534 }\r
5535 else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {\r
5536 int gnum;\r
5537 UChar* name;\r
5538 UChar* name_end;\r
5539 enum REF_NUM num_type;\r
5540\r
5541 switch (c) {\r
5542 case '&':\r
5543 {\r
5544 PINC;\r
5545 name = p;\r
5546 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,\r
5547 &num_type, 0);\r
5548 if (r < 0) return r;\r
5549\r
5550 tok->type = TK_CALL;\r
5551 tok->u.call.by_number = 0;\r
5552 tok->u.call.gnum = 0;\r
5553 tok->u.call.name = name;\r
5554 tok->u.call.name_end = name_end;\r
5555 }\r
5556 break;\r
5557\r
5558 case 'R':\r
5559 tok->type = TK_CALL;\r
5560 tok->u.call.by_number = 1;\r
5561 tok->u.call.gnum = 0;\r
5562 tok->u.call.name = p;\r
5563 PINC;\r
5564 if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;\r
5565 tok->u.call.name_end = p;\r
5566 break;\r
5567\r
5568 case '-':\r
5569 case '+':\r
5570 goto lparen_qmark_num;\r
5571 break;\r
5572 default:\r
5573 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;\r
5574\r
5575 lparen_qmark_num:\r
5576 {\r
5577 name = p;\r
5578 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,\r
5579 &gnum, &num_type, 1);\r
5580 if (r < 0) return r;\r
5581\r
5582 if (num_type == IS_NOT_NUM) {\r
5583 return ONIGERR_INVALID_GROUP_NAME;\r
5584 }\r
5585 else {\r
5586 if (num_type == IS_REL_NUM) {\r
5587 gnum = backref_rel_to_abs(gnum, env);\r
5588 if (gnum < 0) {\r
5589 onig_scan_env_set_error_string(env,\r
5590 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);\r
5591 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
5592 }\r
5593 }\r
5594 tok->u.call.by_number = 1;\r
5595 tok->u.call.gnum = gnum;\r
5596 }\r
5597\r
5598 tok->type = TK_CALL;\r
5599 tok->u.call.name = name;\r
5600 tok->u.call.name_end = name_end;\r
5601 }\r
5602 break;\r
14b0e578
CS
5603 }\r
5604 }\r
14b0e578 5605 }\r
b602265d 5606 lparen_qmark_end:\r
14b0e578
CS
5607 PUNFETCH;\r
5608 }\r
5609\r
5610 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
5611 tok->type = TK_SUBEXP_OPEN;\r
5612 break;\r
5613\r
5614 case ')':\r
5615 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
5616 tok->type = TK_SUBEXP_CLOSE;\r
5617 break;\r
5618\r
5619 case '^':\r
5620 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
5621 tok->type = TK_ANCHOR;\r
b602265d
DG
5622 tok->u.subtype = (IS_SINGLELINE(env->options)\r
5623 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r
14b0e578
CS
5624 break;\r
5625\r
5626 case '$':\r
5627 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
5628 tok->type = TK_ANCHOR;\r
b602265d
DG
5629 tok->u.subtype = (IS_SINGLELINE(env->options)\r
5630 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r
14b0e578
CS
5631 break;\r
5632\r
5633 case '[':\r
5634 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r
5635 tok->type = TK_CC_OPEN;\r
5636 break;\r
5637\r
5638 case ']':\r
5639 if (*src > env->pattern) /* /].../ is allowed. */\r
b602265d 5640 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r
14b0e578
CS
5641 break;\r
5642\r
5643 case '#':\r
b602265d
DG
5644 if (IS_EXTEND(env->options)) {\r
5645 while (!PEND) {\r
5646 PFETCH(c);\r
5647 if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r
5648 break;\r
5649 }\r
5650 goto start;\r
5651 break;\r
14b0e578
CS
5652 }\r
5653 break;\r
5654\r
5655 case ' ': case '\t': case '\n': case '\r': case '\f':\r
b602265d
DG
5656 if (IS_EXTEND(env->options))\r
5657 goto start;\r
14b0e578
CS
5658 break;\r
5659\r
5660 default:\r
5661 /* string */\r
5662 break;\r
5663 }\r
5664 }\r
5665\r
5666#ifdef USE_VARIABLE_META_CHARS\r
5667 out:\r
5668#endif\r
5669 *src = p;\r
5670 return tok->type;\r
5671}\r
5672\r
5673static int\r
5674add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
b602265d
DG
5675 OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,\r
5676 const OnigCodePoint mbr[])\r
14b0e578
CS
5677{\r
5678 int i, r;\r
5679 OnigCodePoint j;\r
5680\r
5681 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
5682\r
5683 if (not == 0) {\r
5684 for (i = 0; i < n; i++) {\r
5685 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5686 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
b602265d
DG
5687 if (j >= sb_out) {\r
5688 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
5689 r = add_code_range_to_buf(&(cc->mbuf), j,\r
5690 ONIGENC_CODE_RANGE_TO(mbr, i));\r
5691 if (r != 0) return r;\r
5692 i++;\r
5693 }\r
5694\r
5695 goto sb_end;\r
5696 }\r
14b0e578
CS
5697 BITSET_SET_BIT(cc->bs, j);\r
5698 }\r
5699 }\r
5700\r
5701 sb_end:\r
5702 for ( ; i < n; i++) {\r
5703 r = add_code_range_to_buf(&(cc->mbuf),\r
5704 ONIGENC_CODE_RANGE_FROM(mbr, i),\r
5705 ONIGENC_CODE_RANGE_TO(mbr, i));\r
5706 if (r != 0) return r;\r
5707 }\r
5708 }\r
5709 else {\r
5710 OnigCodePoint prev = 0;\r
5711\r
5712 for (i = 0; i < n; i++) {\r
b602265d
DG
5713 for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r
5714 if (j >= sb_out) {\r
5715 goto sb_end2;\r
5716 }\r
5717 BITSET_SET_BIT(cc->bs, j);\r
14b0e578
CS
5718 }\r
5719 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
5720 }\r
5721 for (j = prev; j < sb_out; j++) {\r
5722 BITSET_SET_BIT(cc->bs, j);\r
5723 }\r
5724\r
5725 sb_end2:\r
5726 prev = sb_out;\r
5727\r
5728 for (i = 0; i < n; i++) {\r
5729 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
b602265d 5730 r = add_code_range_to_buf(&(cc->mbuf), prev,\r
14b0e578 5731 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r
b602265d 5732 if (r != 0) return r;\r
14b0e578
CS
5733 }\r
5734 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
b602265d
DG
5735 if (prev == 0) goto end;\r
5736 }\r
5737\r
5738 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
5739 if (r != 0) return r;\r
5740 }\r
5741\r
5742 end:\r
5743 return 0;\r
5744}\r
5745\r
5746static int\r
5747add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
5748 OnigEncoding enc ARG_UNUSED,\r
5749 OnigCodePoint sb_out,\r
5750 const OnigCodePoint mbr[], OnigCodePoint limit)\r
5751{\r
5752 int i, r;\r
5753 OnigCodePoint j;\r
5754 OnigCodePoint from;\r
5755 OnigCodePoint to;\r
5756\r
5757 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
5758\r
5759 if (not == 0) {\r
5760 for (i = 0; i < n; i++) {\r
5761 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5762 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
5763 if (j > limit) goto end;\r
5764 if (j >= sb_out) {\r
5765 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
5766 to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5767 if (to > limit) to = limit;\r
5768 r = add_code_range_to_buf(&(cc->mbuf), j, to);\r
5769 if (r != 0) return r;\r
5770 i++;\r
5771 }\r
5772\r
5773 goto sb_end;\r
5774 }\r
5775 BITSET_SET_BIT(cc->bs, j);\r
5776 }\r
14b0e578 5777 }\r
b602265d
DG
5778\r
5779 sb_end:\r
5780 for ( ; i < n; i++) {\r
5781 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5782 to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5783 if (from > limit) break;\r
5784 if (to > limit) to = limit;\r
5785 r = add_code_range_to_buf(&(cc->mbuf), from, to);\r
14b0e578
CS
5786 if (r != 0) return r;\r
5787 }\r
5788 }\r
b602265d
DG
5789 else {\r
5790 OnigCodePoint prev = 0;\r
5791\r
5792 for (i = 0; i < n; i++) {\r
5793 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5794 if (from > limit) {\r
5795 for (j = prev; j < sb_out; j++) {\r
5796 BITSET_SET_BIT(cc->bs, j);\r
5797 }\r
5798 goto sb_end2;\r
5799 }\r
5800 for (j = prev; j < from; j++) {\r
5801 if (j >= sb_out) goto sb_end2;\r
5802 BITSET_SET_BIT(cc->bs, j);\r
5803 }\r
5804 prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5805 if (prev > limit) prev = limit;\r
5806 prev++;\r
5807 if (prev == 0) goto end;\r
5808 }\r
5809 for (j = prev; j < sb_out; j++) {\r
5810 BITSET_SET_BIT(cc->bs, j);\r
5811 }\r
5812\r
5813 sb_end2:\r
5814 prev = sb_out;\r
5815\r
5816 for (i = 0; i < n; i++) {\r
5817 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5818 if (from > limit) goto last;\r
5819\r
5820 if (prev < from) {\r
5821 r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);\r
5822 if (r != 0) return r;\r
5823 }\r
5824 prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5825 if (prev > limit) prev = limit;\r
5826 prev++;\r
5827 if (prev == 0) goto end;\r
5828 }\r
5829\r
5830 last:\r
5831 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
5832 if (r != 0) return r;\r
5833 }\r
14b0e578 5834\r
b602265d 5835 end:\r
14b0e578
CS
5836 return 0;\r
5837}\r
5838\r
5839static int\r
5840add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r
5841{\r
b602265d
DG
5842#define ASCII_LIMIT 127\r
5843\r
14b0e578 5844 int c, r;\r
b602265d 5845 int ascii_mode;\r
14b0e578 5846 const OnigCodePoint *ranges;\r
b602265d 5847 OnigCodePoint limit;\r
14b0e578
CS
5848 OnigCodePoint sb_out;\r
5849 OnigEncoding enc = env->enc;\r
5850\r
b602265d
DG
5851 ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);\r
5852\r
14b0e578
CS
5853 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r
5854 if (r == 0) {\r
b602265d
DG
5855 if (ascii_mode == 0)\r
5856 r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r
5857 else\r
5858 r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,\r
5859 ranges, ASCII_LIMIT);\r
5860 return r;\r
14b0e578
CS
5861 }\r
5862 else if (r != ONIG_NO_SUPPORT_CONFIG) {\r
5863 return r;\r
5864 }\r
5865\r
5866 r = 0;\r
b602265d
DG
5867 limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;\r
5868\r
14b0e578
CS
5869 switch (ctype) {\r
5870 case ONIGENC_CTYPE_ALPHA:\r
5871 case ONIGENC_CTYPE_BLANK:\r
5872 case ONIGENC_CTYPE_CNTRL:\r
5873 case ONIGENC_CTYPE_DIGIT:\r
5874 case ONIGENC_CTYPE_LOWER:\r
5875 case ONIGENC_CTYPE_PUNCT:\r
5876 case ONIGENC_CTYPE_SPACE:\r
5877 case ONIGENC_CTYPE_UPPER:\r
5878 case ONIGENC_CTYPE_XDIGIT:\r
5879 case ONIGENC_CTYPE_ASCII:\r
5880 case ONIGENC_CTYPE_ALNUM:\r
5881 if (not != 0) {\r
b602265d
DG
5882 for (c = 0; c < (int )limit; c++) {\r
5883 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5884 BITSET_SET_BIT(cc->bs, c);\r
5885 }\r
5886 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
5887 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5888 }\r
b602265d 5889\r
14b0e578
CS
5890 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
5891 }\r
5892 else {\r
b602265d
DG
5893 for (c = 0; c < (int )limit; c++) {\r
5894 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5895 BITSET_SET_BIT(cc->bs, c);\r
14b0e578
CS
5896 }\r
5897 }\r
5898 break;\r
5899\r
5900 case ONIGENC_CTYPE_GRAPH:\r
5901 case ONIGENC_CTYPE_PRINT:\r
b602265d 5902 case ONIGENC_CTYPE_WORD:\r
14b0e578 5903 if (not != 0) {\r
b602265d
DG
5904 for (c = 0; c < (int )limit; c++) {\r
5905 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */\r
5906 && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5907 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5908 }\r
b602265d
DG
5909 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
5910 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)\r
5911 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5912 }\r
14b0e578
CS
5913 }\r
5914 else {\r
b602265d
DG
5915 for (c = 0; c < (int )limit; c++) {\r
5916 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5917 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5918 }\r
b602265d
DG
5919 if (ascii_mode == 0)\r
5920 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
14b0e578
CS
5921 }\r
5922 break;\r
5923\r
5924 default:\r
5925 return ONIGERR_PARSER_BUG;\r
5926 break;\r
5927 }\r
5928\r
5929 return r;\r
5930}\r
5931\r
5932static int\r
5933parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r
5934{\r
5935#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r
5936#define POSIX_BRACKET_NAME_MIN_LEN 4\r
5937\r
5938 static PosixBracketEntryType PBS[] = {\r
5939 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r
5940 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r
5941 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r
5942 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r
5943 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r
5944 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r
5945 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r
5946 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r
5947 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r
5948 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r
5949 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r
5950 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r
5951 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r
5952 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r
5953 { (UChar* )NULL, -1, 0 }\r
5954 };\r
5955\r
5956 PosixBracketEntryType *pb;\r
5957 int not, i, r;\r
5958 OnigCodePoint c;\r
5959 OnigEncoding enc = env->enc;\r
5960 UChar *p = *src;\r
5961\r
5962 if (PPEEK_IS('^')) {\r
5963 PINC_S;\r
5964 not = 1;\r
5965 }\r
5966 else\r
5967 not = 0;\r
5968\r
5969 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r
5970 goto not_posix_bracket;\r
5971\r
5972 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r
5973 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r
5974 p = (UChar* )onigenc_step(enc, p, end, pb->len);\r
5975 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r
5976 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
5977\r
5978 r = add_ctype_to_cc(cc, pb->ctype, not, env);\r
5979 if (r != 0) return r;\r
5980\r
5981 PINC_S; PINC_S;\r
5982 *src = p;\r
5983 return 0;\r
5984 }\r
5985 }\r
5986\r
5987 not_posix_bracket:\r
5988 c = 0;\r
5989 i = 0;\r
5990 while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r
5991 PINC_S;\r
5992 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r
5993 }\r
5994 if (c == ':' && ! PEND) {\r
5995 PINC_S;\r
5996 if (! PEND) {\r
5997 PFETCH_S(c);\r
5998 if (c == ']')\r
5999 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
6000 }\r
6001 }\r
6002\r
6003 return 1; /* 1: is not POSIX bracket, but no error. */\r
6004}\r
6005\r
6006static int\r
6007fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r
6008{\r
6009 int r;\r
6010 OnigCodePoint c;\r
6011 OnigEncoding enc = env->enc;\r
6012 UChar *prev, *start, *p = *src;\r
6013\r
6014 r = 0;\r
6015 start = prev = p;\r
6016\r
6017 while (!PEND) {\r
6018 prev = p;\r
6019 PFETCH_S(c);\r
6020 if (c == '}') {\r
6021 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r
6022 if (r < 0) break;\r
6023\r
6024 *src = p;\r
6025 return r;\r
6026 }\r
6027 else if (c == '(' || c == ')' || c == '{' || c == '|') {\r
6028 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
6029 break;\r
6030 }\r
6031 }\r
6032\r
6033 onig_scan_env_set_error_string(env, r, *src, prev);\r
6034 return r;\r
6035}\r
6036\r
6037static int\r
b602265d 6038parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
14b0e578
CS
6039{\r
6040 int r, ctype;\r
6041 CClassNode* cc;\r
6042\r
6043 ctype = fetch_char_property_to_ctype(src, end, env);\r
6044 if (ctype < 0) return ctype;\r
6045\r
6046 *np = node_new_cclass();\r
6047 CHECK_NULL_RETURN_MEMERR(*np);\r
b602265d 6048 cc = CCLASS_(*np);\r
14b0e578
CS
6049 r = add_ctype_to_cc(cc, ctype, 0, env);\r
6050 if (r != 0) return r;\r
6051 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
6052\r
6053 return 0;\r
6054}\r
6055\r
6056\r
6057enum CCSTATE {\r
6058 CCS_VALUE,\r
6059 CCS_RANGE,\r
6060 CCS_COMPLETE,\r
6061 CCS_START\r
6062};\r
6063\r
6064enum CCVALTYPE {\r
6065 CCV_SB,\r
6066 CCV_CODE_POINT,\r
6067 CCV_CLASS\r
6068};\r
6069\r
6070static int\r
6071next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r
b602265d 6072 enum CCSTATE* state, ScanEnv* env)\r
14b0e578
CS
6073{\r
6074 int r;\r
6075\r
6076 if (*state == CCS_RANGE)\r
6077 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r
6078\r
6079 if (*state == CCS_VALUE && *type != CCV_CLASS) {\r
6080 if (*type == CCV_SB)\r
6081 BITSET_SET_BIT(cc->bs, (int )(*vs));\r
6082 else if (*type == CCV_CODE_POINT) {\r
6083 r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
6084 if (r < 0) return r;\r
6085 }\r
6086 }\r
6087\r
6088 *state = CCS_VALUE;\r
6089 *type = CCV_CLASS;\r
6090 return 0;\r
6091}\r
6092\r
6093static int\r
b602265d
DG
6094next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,\r
6095 int* from_israw, int to_israw,\r
6096 enum CCVALTYPE intype, enum CCVALTYPE* type,\r
6097 enum CCSTATE* state, ScanEnv* env)\r
14b0e578
CS
6098{\r
6099 int r;\r
6100\r
6101 switch (*state) {\r
6102 case CCS_VALUE:\r
b602265d
DG
6103 if (*type == CCV_SB) {\r
6104 if (*from > 0xff)\r
6105 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
6106\r
6107 BITSET_SET_BIT(cc->bs, (int )(*from));\r
6108 }\r
14b0e578 6109 else if (*type == CCV_CODE_POINT) {\r
b602265d 6110 r = add_code_range(&(cc->mbuf), env, *from, *from);\r
14b0e578
CS
6111 if (r < 0) return r;\r
6112 }\r
6113 break;\r
6114\r
6115 case CCS_RANGE:\r
6116 if (intype == *type) {\r
6117 if (intype == CCV_SB) {\r
b602265d 6118 if (*from > 0xff || to > 0xff)\r
14b0e578
CS
6119 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
6120\r
b602265d
DG
6121 if (*from > to) {\r
6122 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
6123 goto ccs_range_end;\r
6124 else\r
6125 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
6126 }\r
6127 bitset_set_range(cc->bs, (int )*from, (int )to);\r
14b0e578
CS
6128 }\r
6129 else {\r
b602265d
DG
6130 r = add_code_range(&(cc->mbuf), env, *from, to);\r
6131 if (r < 0) return r;\r
14b0e578
CS
6132 }\r
6133 }\r
6134 else {\r
b602265d
DG
6135 if (*from > to) {\r
6136 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
6137 goto ccs_range_end;\r
6138 else\r
6139 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
14b0e578 6140 }\r
b602265d
DG
6141 bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));\r
6142 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);\r
6143 if (r < 0) return r;\r
14b0e578
CS
6144 }\r
6145 ccs_range_end:\r
6146 *state = CCS_COMPLETE;\r
6147 break;\r
6148\r
6149 case CCS_COMPLETE:\r
6150 case CCS_START:\r
6151 *state = CCS_VALUE;\r
6152 break;\r
6153\r
6154 default:\r
6155 break;\r
6156 }\r
6157\r
b602265d
DG
6158 *from_israw = to_israw;\r
6159 *from = to;\r
6160 *type = intype;\r
14b0e578
CS
6161 return 0;\r
6162}\r
6163\r
6164static int\r
6165code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r
b602265d 6166 ScanEnv* env)\r
14b0e578
CS
6167{\r
6168 int in_esc;\r
6169 OnigCodePoint code;\r
6170 OnigEncoding enc = env->enc;\r
6171 UChar* p = from;\r
6172\r
6173 in_esc = 0;\r
6174 while (! PEND) {\r
6175 if (ignore_escaped && in_esc) {\r
6176 in_esc = 0;\r
6177 }\r
6178 else {\r
6179 PFETCH_S(code);\r
6180 if (code == c) return 1;\r
6181 if (code == MC_ESC(env->syntax)) in_esc = 1;\r
6182 }\r
6183 }\r
6184 return 0;\r
6185}\r
6186\r
6187static int\r
b602265d 6188parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
14b0e578
CS
6189{\r
6190 int r, neg, len, fetched, and_start;\r
6191 OnigCodePoint v, vs;\r
6192 UChar *p;\r
6193 Node* node;\r
6194 CClassNode *cc, *prev_cc;\r
6195 CClassNode work_cc;\r
6196\r
6197 enum CCSTATE state;\r
6198 enum CCVALTYPE val_type, in_type;\r
6199 int val_israw, in_israw;\r
6200\r
14b0e578 6201 *np = NULL_NODE;\r
b602265d
DG
6202 env->parse_depth++;\r
6203 if (env->parse_depth > ParseDepthLimit)\r
6204 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
6205 prev_cc = (CClassNode* )NULL;\r
14b0e578
CS
6206 r = fetch_token_in_cc(tok, src, end, env);\r
6207 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r
6208 neg = 1;\r
6209 r = fetch_token_in_cc(tok, src, end, env);\r
6210 }\r
6211 else {\r
6212 neg = 0;\r
6213 }\r
6214\r
6215 if (r < 0) return r;\r
6216 if (r == TK_CC_CLOSE) {\r
6217 if (! code_exist_check((OnigCodePoint )']',\r
6218 *src, env->pattern_end, 1, env))\r
6219 return ONIGERR_EMPTY_CHAR_CLASS;\r
6220\r
6221 CC_ESC_WARN(env, (UChar* )"]");\r
6222 r = tok->type = TK_CHAR; /* allow []...] */\r
6223 }\r
6224\r
6225 *np = node = node_new_cclass();\r
6226 CHECK_NULL_RETURN_MEMERR(node);\r
b602265d 6227 cc = CCLASS_(node);\r
14b0e578
CS
6228\r
6229 and_start = 0;\r
6230 state = CCS_START;\r
6231 p = *src;\r
6232 while (r != TK_CC_CLOSE) {\r
6233 fetched = 0;\r
6234 switch (r) {\r
6235 case TK_CHAR:\r
b602265d 6236 any_char_in:\r
14b0e578
CS
6237 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r
6238 if (len > 1) {\r
b602265d 6239 in_type = CCV_CODE_POINT;\r
14b0e578
CS
6240 }\r
6241 else if (len < 0) {\r
b602265d
DG
6242 r = len;\r
6243 goto err;\r
14b0e578
CS
6244 }\r
6245 else {\r
b602265d
DG
6246 /* sb_char: */\r
6247 in_type = CCV_SB;\r
14b0e578
CS
6248 }\r
6249 v = (OnigCodePoint )tok->u.c;\r
6250 in_israw = 0;\r
6251 goto val_entry2;\r
6252 break;\r
6253\r
6254 case TK_RAW_BYTE:\r
6255 /* tok->base != 0 : octal or hexadec. */\r
6256 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r
b602265d
DG
6257 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
6258 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r
6259 UChar* psave = p;\r
6260 int i, base = tok->base;\r
6261\r
6262 buf[0] = tok->u.c;\r
6263 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r
6264 r = fetch_token_in_cc(tok, &p, end, env);\r
6265 if (r < 0) goto err;\r
6266 if (r != TK_RAW_BYTE || tok->base != base) {\r
6267 fetched = 1;\r
6268 break;\r
6269 }\r
6270 buf[i] = tok->u.c;\r
6271 }\r
6272\r
6273 if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r
6274 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
6275 goto err;\r
6276 }\r
6277\r
6278 len = enclen(env->enc, buf);\r
6279 if (i < len) {\r
6280 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
6281 goto err;\r
6282 }\r
6283 else if (i > len) { /* fetch back */\r
6284 p = psave;\r
6285 for (i = 1; i < len; i++) {\r
6286 r = fetch_token_in_cc(tok, &p, end, env);\r
6287 }\r
6288 fetched = 0;\r
6289 }\r
6290\r
6291 if (i == 1) {\r
6292 v = (OnigCodePoint )buf[0];\r
6293 goto raw_single;\r
6294 }\r
6295 else {\r
6296 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r
6297 in_type = CCV_CODE_POINT;\r
6298 }\r
6299 }\r
6300 else {\r
6301 v = (OnigCodePoint )tok->u.c;\r
6302 raw_single:\r
6303 in_type = CCV_SB;\r
6304 }\r
6305 in_israw = 1;\r
6306 goto val_entry2;\r
6307 break;\r
6308\r
6309 case TK_CODE_POINT:\r
6310 v = tok->u.code;\r
6311 in_israw = 1;\r
6312 val_entry:\r
6313 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r
6314 if (len < 0) {\r
6315 r = len;\r
6316 goto err;\r
6317 }\r
6318 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r
6319 val_entry2:\r
6320 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r
6321 &state, env);\r
6322 if (r != 0) goto err;\r
6323 break;\r
6324\r
6325 case TK_POSIX_BRACKET_OPEN:\r
6326 r = parse_posix_bracket(cc, &p, end, env);\r
6327 if (r < 0) goto err;\r
6328 if (r == 1) { /* is not POSIX bracket */\r
6329 CC_ESC_WARN(env, (UChar* )"[");\r
6330 p = tok->backp;\r
6331 v = (OnigCodePoint )tok->u.c;\r
6332 in_israw = 0;\r
6333 goto val_entry;\r
6334 }\r
6335 goto next_class;\r
6336 break;\r
6337\r
6338 case TK_CHAR_TYPE:\r
6339 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r
6340 if (r != 0) goto err;\r
6341\r
6342 next_class:\r
6343 r = next_state_class(cc, &vs, &val_type, &state, env);\r
6344 if (r != 0) goto err;\r
6345 break;\r
6346\r
6347 case TK_CHAR_PROPERTY:\r
6348 {\r
6349 int ctype = fetch_char_property_to_ctype(&p, end, env);\r
6350 if (ctype < 0) {\r
6351 r = ctype;\r
6352 goto err;\r
6353 }\r
6354 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r
6355 if (r != 0) goto err;\r
6356 goto next_class;\r
6357 }\r
6358 break;\r
6359\r
6360 case TK_CC_RANGE:\r
6361 if (state == CCS_VALUE) {\r
6362 r = fetch_token_in_cc(tok, &p, end, env);\r
6363 if (r < 0) goto err;\r
6364 fetched = 1;\r
6365 if (r == TK_CC_CLOSE) { /* allow [x-] */\r
6366 range_end_val:\r
6367 v = (OnigCodePoint )'-';\r
6368 in_israw = 0;\r
6369 goto val_entry;\r
6370 }\r
6371 else if (r == TK_CC_AND) {\r
6372 CC_ESC_WARN(env, (UChar* )"-");\r
6373 goto range_end_val;\r
6374 }\r
6375\r
6376 if (val_type == CCV_CLASS) {\r
6377 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
6378 goto err;\r
6379 }\r
6380\r
6381 state = CCS_RANGE;\r
6382 }\r
6383 else if (state == CCS_START) {\r
6384 /* [-xa] is allowed */\r
6385 v = (OnigCodePoint )tok->u.c;\r
6386 in_israw = 0;\r
6387\r
6388 r = fetch_token_in_cc(tok, &p, end, env);\r
6389 if (r < 0) goto err;\r
6390 fetched = 1;\r
6391 /* [--x] or [a&&-x] is warned. */\r
6392 if (r == TK_CC_RANGE || and_start != 0)\r
6393 CC_ESC_WARN(env, (UChar* )"-");\r
6394\r
6395 goto val_entry;\r
6396 }\r
6397 else if (state == CCS_RANGE) {\r
6398 CC_ESC_WARN(env, (UChar* )"-");\r
6399 goto any_char_in; /* [!--x] is allowed */\r
6400 }\r
6401 else { /* CCS_COMPLETE */\r
6402 r = fetch_token_in_cc(tok, &p, end, env);\r
6403 if (r < 0) goto err;\r
6404 fetched = 1;\r
6405 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r
6406 else if (r == TK_CC_AND) {\r
6407 CC_ESC_WARN(env, (UChar* )"-");\r
6408 goto range_end_val;\r
6409 }\r
6410\r
6411 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r
6412 CC_ESC_WARN(env, (UChar* )"-");\r
6413 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */\r
6414 }\r
6415 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
6416 goto err;\r
6417 }\r
6418 break;\r
6419\r
6420 case TK_CC_CC_OPEN: /* [ */\r
6421 {\r
6422 Node *anode;\r
6423 CClassNode* acc;\r
6424\r
6425 r = parse_char_class(&anode, tok, &p, end, env);\r
6426 if (r != 0) {\r
6427 onig_node_free(anode);\r
6428 goto cc_open_err;\r
6429 }\r
6430 acc = CCLASS_(anode);\r
6431 r = or_cclass(cc, acc, env->enc);\r
6432 onig_node_free(anode);\r
6433\r
6434 cc_open_err:\r
6435 if (r != 0) goto err;\r
6436 }\r
6437 break;\r
6438\r
6439 case TK_CC_AND: /* && */\r
6440 {\r
6441 if (state == CCS_VALUE) {\r
6442 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
6443 &val_type, &state, env);\r
6444 if (r != 0) goto err;\r
6445 }\r
6446 /* initialize local variables */\r
6447 and_start = 1;\r
6448 state = CCS_START;\r
6449\r
6450 if (IS_NOT_NULL(prev_cc)) {\r
6451 r = and_cclass(prev_cc, cc, env->enc);\r
6452 if (r != 0) goto err;\r
6453 bbuf_free(cc->mbuf);\r
6454 }\r
6455 else {\r
6456 prev_cc = cc;\r
6457 cc = &work_cc;\r
6458 }\r
6459 initialize_cclass(cc);\r
6460 }\r
6461 break;\r
6462\r
6463 case TK_EOT:\r
6464 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r
6465 goto err;\r
6466 break;\r
6467 default:\r
6468 r = ONIGERR_PARSER_BUG;\r
6469 goto err;\r
6470 break;\r
6471 }\r
6472\r
6473 if (fetched)\r
6474 r = tok->type;\r
6475 else {\r
6476 r = fetch_token_in_cc(tok, &p, end, env);\r
6477 if (r < 0) goto err;\r
6478 }\r
6479 }\r
6480\r
6481 if (state == CCS_VALUE) {\r
6482 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
6483 &val_type, &state, env);\r
6484 if (r != 0) goto err;\r
6485 }\r
6486\r
6487 if (IS_NOT_NULL(prev_cc)) {\r
6488 r = and_cclass(prev_cc, cc, env->enc);\r
6489 if (r != 0) goto err;\r
6490 bbuf_free(cc->mbuf);\r
6491 cc = prev_cc;\r
6492 }\r
6493\r
6494 if (neg != 0)\r
6495 NCCLASS_SET_NOT(cc);\r
6496 else\r
6497 NCCLASS_CLEAR_NOT(cc);\r
6498 if (IS_NCCLASS_NOT(cc) &&\r
6499 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r
6500 int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r
6501 if (is_empty != 0)\r
6502 BITSET_IS_EMPTY(cc->bs, is_empty);\r
6503\r
6504 if (is_empty == 0) {\r
6505#define NEWLINE_CODE 0x0a\r
6506\r
6507 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r
6508 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r
6509 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r
6510 else\r
6511 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r
6512 }\r
6513 }\r
6514 }\r
6515 *src = p;\r
6516 env->parse_depth--;\r
6517 return 0;\r
6518\r
6519 err:\r
6520 if (cc != CCLASS_(*np))\r
6521 bbuf_free(cc->mbuf);\r
6522 return r;\r
6523}\r
6524\r
6525static int parse_subexp(Node** top, OnigToken* tok, int term,\r
6526 UChar** src, UChar* end, ScanEnv* env);\r
6527\r
6528#ifdef USE_CALLOUT\r
6529\r
6530/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */\r
6531static int\r
6532parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
6533{\r
6534 int r;\r
6535 int i;\r
6536 int in;\r
6537 int num;\r
6538 OnigCodePoint c;\r
6539 UChar* code_start;\r
6540 UChar* code_end;\r
6541 UChar* contents;\r
6542 UChar* tag_start;\r
6543 UChar* tag_end;\r
6544 int brace_nest;\r
6545 CalloutListEntry* e;\r
6546 RegexExt* ext;\r
6547 OnigEncoding enc = env->enc;\r
6548 UChar* p = *src;\r
6549\r
6550 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6551\r
6552 brace_nest = 0;\r
6553 while (PPEEK_IS('{')) {\r
6554 brace_nest++;\r
6555 PINC_S;\r
6556 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6557 }\r
6558\r
6559 in = ONIG_CALLOUT_IN_PROGRESS;\r
6560 code_start = p;\r
6561 while (1) {\r
6562 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6563\r
6564 code_end = p;\r
6565 PFETCH_S(c);\r
6566 if (c == '}') {\r
6567 i = brace_nest;\r
6568 while (i > 0) {\r
6569 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6570 PFETCH_S(c);\r
6571 if (c == '}') i--;\r
6572 else break;\r
6573 }\r
6574 if (i == 0) break;\r
6575 }\r
6576 }\r
6577\r
6578 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6579\r
6580 PFETCH_S(c);\r
6581 if (c == '[') {\r
6582 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6583 tag_start = p;\r
6584 while (! PEND) {\r
6585 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6586 tag_end = p;\r
6587 PFETCH_S(c);\r
6588 if (c == ']') break;\r
6589 }\r
6590 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
6591 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
6592\r
6593 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6594 PFETCH_S(c);\r
6595 }\r
6596 else {\r
6597 tag_start = tag_end = 0;\r
6598 }\r
6599\r
6600 if (c == 'X') {\r
6601 in |= ONIG_CALLOUT_IN_RETRACTION;\r
6602 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6603 PFETCH_S(c);\r
6604 }\r
6605 else if (c == '<') {\r
6606 in = ONIG_CALLOUT_IN_RETRACTION;\r
6607 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6608 PFETCH_S(c);\r
6609 }\r
6610 else if (c == '>') { /* no needs (default) */\r
6611 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6612 PFETCH_S(c);\r
6613 }\r
6614\r
6615 if (c != cterm)\r
6616 return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6617\r
6618 r = reg_callout_list_entry(env, &num);\r
6619 if (r != 0) return r;\r
6620\r
6621 ext = onig_get_regex_ext(env->reg);\r
df8be9e5 6622 CHECK_NULL_RETURN_MEMERR(ext);\r
b602265d
DG
6623 if (IS_NULL(ext->pattern)) {\r
6624 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
6625 if (r != ONIG_NORMAL) return r;\r
6626 }\r
6627\r
6628 if (tag_start != tag_end) {\r
6629 r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
6630 if (r != ONIG_NORMAL) return r;\r
6631 }\r
6632\r
6633 contents = onigenc_strdup(enc, code_start, code_end);\r
6634 CHECK_NULL_RETURN_MEMERR(contents);\r
6635\r
6636 r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);\r
6637 if (r != 0) {\r
6638 xfree(contents);\r
6639 return r;\r
6640 }\r
6641\r
6642 e = onig_reg_callout_list_at(env->reg, num);\r
a5def177
DG
6643 if (IS_NULL(e)) {\r
6644 xfree(contents);\r
6645 return ONIGERR_MEMORY;\r
6646 }\r
6647\r
b602265d
DG
6648 e->of = ONIG_CALLOUT_OF_CONTENTS;\r
6649 e->in = in;\r
6650 e->name_id = ONIG_NON_NAME_ID;\r
6651 e->u.content.start = contents;\r
6652 e->u.content.end = contents + (code_end - code_start);\r
6653\r
6654 *src = p;\r
6655 return 0;\r
6656}\r
6657\r
6658static long\r
6659parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)\r
6660{\r
6661 long v;\r
6662 long d;\r
6663 int flag;\r
6664 UChar* p;\r
6665 OnigCodePoint c;\r
6666\r
6667 if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;\r
6668\r
6669 flag = 1;\r
6670 v = 0;\r
6671 p = s;\r
6672 while (p < end) {\r
6673 c = ONIGENC_MBC_TO_CODE(enc, p, end);\r
6674 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
6675 if (c >= '0' && c <= '9') {\r
6676 d = (long )(c - '0');\r
6677 if (v > (max - d) / 10)\r
6678 return ONIGERR_INVALID_CALLOUT_ARG;\r
6679\r
6680 v = v * 10 + d;\r
6681 }\r
6682 else if (sign_on != 0 && (c == '-' || c == '+')) {\r
6683 if (c == '-') flag = -1;\r
6684 }\r
6685 else\r
6686 return ONIGERR_INVALID_CALLOUT_ARG;\r
6687\r
6688 sign_on = 0;\r
6689 }\r
6690\r
6691 *rl = flag * v;\r
6692 return ONIG_NORMAL;\r
6693}\r
6694\r
6695static int\r
6696parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,\r
6697 unsigned int types[], OnigValue vals[], ScanEnv* env)\r
6698{\r
6699#define MAX_CALLOUT_ARG_BYTE_LENGTH 128\r
6700\r
6701 int r;\r
6702 int n;\r
6703 int esc;\r
6704 int cn;\r
6705 UChar* s;\r
6706 UChar* e;\r
6707 UChar* eesc;\r
6708 OnigCodePoint c;\r
6709 UChar* bufend;\r
6710 UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];\r
6711 OnigEncoding enc = env->enc;\r
6712 UChar* p = *src;\r
6713\r
6714 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6715\r
6716 n = 0;\r
6717 while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {\r
6718 c = 0;\r
6719 cn = 0;\r
6720 esc = 0;\r
6721 eesc = 0;\r
6722 bufend = buf;\r
6723 s = e = p;\r
6724 while (1) {\r
6725 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6726\r
6727 e = p;\r
6728 PFETCH_S(c);\r
6729 if (esc != 0) {\r
6730 esc = 0;\r
6731 if (c == '\\' || c == cterm || c == ',') {\r
6732 /* */\r
6733 }\r
6734 else {\r
6735 e = eesc;\r
6736 cn++;\r
6737 }\r
6738 goto add_char;\r
14b0e578
CS
6739 }\r
6740 else {\r
b602265d
DG
6741 if (c == '\\') {\r
6742 esc = 1;\r
6743 eesc = e;\r
6744 }\r
6745 else if (c == cterm || c == ',')\r
6746 break;\r
6747 else {\r
6748 size_t clen;\r
14b0e578 6749\r
b602265d
DG
6750 add_char:\r
6751 if (skip_mode == 0) {\r
6752 clen = p - e;\r
6753 if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)\r
6754 return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */\r
14b0e578 6755\r
b602265d
DG
6756 xmemcpy(bufend, e, clen);\r
6757 bufend += clen;\r
6758 }\r
6759 cn++;\r
6760 }\r
14b0e578 6761 }\r
b602265d 6762 }\r
14b0e578 6763\r
b602265d
DG
6764 if (cn != 0) {\r
6765 if (skip_mode == 0) {\r
6766 if ((types[n] & ONIG_TYPE_LONG) != 0) {\r
6767 int fixed = 0;\r
6768 if (cn > 0) {\r
6769 long rl;\r
6770 r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);\r
6771 if (r == ONIG_NORMAL) {\r
6772 vals[n].l = rl;\r
6773 fixed = 1;\r
6774 types[n] = ONIG_TYPE_LONG;\r
6775 }\r
6776 }\r
14b0e578 6777\r
b602265d
DG
6778 if (fixed == 0) {\r
6779 types[n] = (types[n] & ~ONIG_TYPE_LONG);\r
6780 if (types[n] == ONIG_TYPE_VOID)\r
6781 return ONIGERR_INVALID_CALLOUT_ARG;\r
6782 }\r
6783 }\r
14b0e578 6784\r
b602265d
DG
6785 switch (types[n]) {\r
6786 case ONIG_TYPE_LONG:\r
6787 break;\r
14b0e578 6788\r
b602265d
DG
6789 case ONIG_TYPE_CHAR:\r
6790 if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;\r
6791 vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);\r
6792 break;\r
14b0e578 6793\r
b602265d
DG
6794 case ONIG_TYPE_STRING:\r
6795 {\r
6796 UChar* rs = onigenc_strdup(enc, buf, bufend);\r
6797 CHECK_NULL_RETURN_MEMERR(rs);\r
6798 vals[n].s.start = rs;\r
6799 vals[n].s.end = rs + (e - s);\r
6800 }\r
6801 break;\r
14b0e578 6802\r
b602265d
DG
6803 case ONIG_TYPE_TAG:\r
6804 if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))\r
6805 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 6806\r
b602265d
DG
6807 vals[n].s.start = s;\r
6808 vals[n].s.end = e;\r
6809 break;\r
6810\r
6811 case ONIG_TYPE_VOID:\r
6812 case ONIG_TYPE_POINTER:\r
6813 return ONIGERR_PARSER_BUG;\r
6814 break;\r
6815 }\r
14b0e578 6816 }\r
14b0e578 6817\r
b602265d
DG
6818 n++;\r
6819 }\r
14b0e578 6820\r
b602265d
DG
6821 if (c == cterm) break;\r
6822 }\r
14b0e578 6823\r
b602265d 6824 if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
14b0e578 6825\r
b602265d
DG
6826 *src = p;\r
6827 return n;\r
6828}\r
14b0e578 6829\r
b602265d
DG
6830/* (*name[TAG]) (*name[TAG]{a,b,..}) */\r
6831static int\r
6832parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
6833{\r
6834 int r;\r
6835 int i;\r
6836 int in;\r
6837 int num;\r
6838 int name_id;\r
6839 int arg_num;\r
6840 int max_arg_num;\r
6841 int opt_arg_num;\r
6842 int is_not_single;\r
6843 OnigCodePoint c;\r
6844 UChar* name_start;\r
6845 UChar* name_end;\r
6846 UChar* tag_start;\r
6847 UChar* tag_end;\r
6848 Node* node;\r
6849 CalloutListEntry* e;\r
6850 RegexExt* ext;\r
6851 unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
6852 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];\r
6853 OnigEncoding enc = env->enc;\r
6854 UChar* p = *src;\r
14b0e578 6855\r
b602265d
DG
6856 /* PFETCH_READY; */\r
6857 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6858\r
6859 node = 0;\r
6860 name_start = p;\r
6861 while (1) {\r
6862 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6863 name_end = p;\r
6864 PFETCH_S(c);\r
6865 if (c == cterm || c == '[' || c == '{') break;\r
6866 }\r
6867\r
6868 if (! is_allowed_callout_name(enc, name_start, name_end))\r
6869 return ONIGERR_INVALID_CALLOUT_NAME;\r
6870\r
6871 if (c == '[') {\r
6872 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6873 tag_start = p;\r
6874 while (! PEND) {\r
6875 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6876 tag_end = p;\r
6877 PFETCH_S(c);\r
6878 if (c == ']') break;\r
14b0e578 6879 }\r
b602265d
DG
6880 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
6881 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
6882\r
6883 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6884 PFETCH_S(c);\r
6885 }\r
6886 else {\r
6887 tag_start = tag_end = 0;\r
14b0e578
CS
6888 }\r
6889\r
b602265d
DG
6890 if (c == '{') {\r
6891 UChar* save;\r
6892\r
6893 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6894\r
6895 /* read for single check only */\r
6896 save = p;\r
6897 arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);\r
6898 if (arg_num < 0) return arg_num;\r
6899\r
6900 is_not_single = PPEEK_IS(cterm) ? 0 : 1;\r
6901 p = save;\r
6902 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
6903 &name_id);\r
6904 if (r != ONIG_NORMAL) return r;\r
6905\r
6906 max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
6907 for (i = 0; i < max_arg_num; i++) {\r
6908 types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
6909 }\r
6910\r
6911 arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);\r
6912 if (arg_num < 0) return arg_num;\r
6913\r
6914 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6915 PFETCH_S(c);\r
14b0e578 6916 }\r
b602265d
DG
6917 else {\r
6918 arg_num = 0;\r
14b0e578 6919\r
b602265d
DG
6920 is_not_single = 0;\r
6921 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
6922 &name_id);\r
6923 if (r != ONIG_NORMAL) return r;\r
6924\r
6925 max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
6926 for (i = 0; i < max_arg_num; i++) {\r
6927 types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
6928 }\r
14b0e578
CS
6929 }\r
6930\r
b602265d
DG
6931 in = onig_get_callout_in_by_name_id(name_id);\r
6932 opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);\r
6933 if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))\r
6934 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 6935\r
b602265d
DG
6936 if (c != cterm)\r
6937 return ONIGERR_INVALID_CALLOUT_PATTERN;\r
14b0e578 6938\r
b602265d
DG
6939 r = reg_callout_list_entry(env, &num);\r
6940 if (r != 0) return r;\r
14b0e578 6941\r
b602265d 6942 ext = onig_get_regex_ext(env->reg);\r
df8be9e5 6943 CHECK_NULL_RETURN_MEMERR(ext);\r
b602265d
DG
6944 if (IS_NULL(ext->pattern)) {\r
6945 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
6946 if (r != ONIG_NORMAL) return r;\r
6947 }\r
6948\r
6949 if (tag_start != tag_end) {\r
6950 r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
6951 if (r != ONIG_NORMAL) return r;\r
6952 }\r
6953\r
6954 r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);\r
6955 if (r != ONIG_NORMAL) return r;\r
6956\r
6957 e = onig_reg_callout_list_at(env->reg, num);\r
a5def177
DG
6958 CHECK_NULL_RETURN_MEMERR(e);\r
6959\r
b602265d
DG
6960 e->of = ONIG_CALLOUT_OF_NAME;\r
6961 e->in = in;\r
6962 e->name_id = name_id;\r
6963 e->type = onig_get_callout_type_by_name_id(name_id);\r
6964 e->start_func = onig_get_callout_start_func_by_name_id(name_id);\r
6965 e->end_func = onig_get_callout_end_func_by_name_id(name_id);\r
6966 e->u.arg.num = max_arg_num;\r
6967 e->u.arg.passed_num = arg_num;\r
6968 for (i = 0; i < max_arg_num; i++) {\r
6969 e->u.arg.types[i] = types[i];\r
6970 if (i < arg_num)\r
6971 e->u.arg.vals[i] = vals[i];\r
6972 else\r
6973 e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);\r
14b0e578 6974 }\r
b602265d
DG
6975\r
6976 *np = node;\r
14b0e578
CS
6977 *src = p;\r
6978 return 0;\r
14b0e578 6979}\r
b602265d 6980#endif\r
14b0e578
CS
6981\r
6982static int\r
b602265d
DG
6983parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
6984 ScanEnv* env)\r
14b0e578
CS
6985{\r
6986 int r, num;\r
6987 Node *target;\r
6988 OnigOptionType option;\r
6989 OnigCodePoint c;\r
b602265d 6990 int list_capture;\r
14b0e578
CS
6991 OnigEncoding enc = env->enc;\r
6992\r
b602265d
DG
6993 UChar* p = *src;\r
6994 PFETCH_READY;\r
6995\r
6996 *np = NULL;\r
6997 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
6998\r
6999 option = env->options;\r
7000 c = PPEEK;\r
7001 if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
7002 PINC;\r
7003 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7004\r
7005 PFETCH(c);\r
7006 switch (c) {\r
7007 case ':': /* (?:...) grouping only */\r
7008 group:\r
7009 r = fetch_token(tok, &p, end, env);\r
7010 if (r < 0) return r;\r
7011 r = parse_subexp(np, tok, term, &p, end, env);\r
7012 if (r < 0) return r;\r
7013 *src = p;\r
7014 return 1; /* group */\r
7015 break;\r
7016\r
7017 case '=':\r
7018 *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);\r
7019 break;\r
7020 case '!': /* preceding read */\r
7021 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);\r
7022 break;\r
7023 case '>': /* (?>...) stop backtrack */\r
7024 *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
7025 break;\r
7026\r
7027 case '\'':\r
7028 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7029 goto named_group1;\r
7030 }\r
7031 else\r
7032 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7033 break;\r
7034\r
7035 case '<': /* look behind (?<=...), (?<!...) */\r
7036 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
7037 PFETCH(c);\r
7038 if (c == '=')\r
7039 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);\r
7040 else if (c == '!')\r
7041 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);\r
7042 else {\r
7043 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7044 UChar *name;\r
7045 UChar *name_end;\r
7046 enum REF_NUM num_type;\r
7047\r
7048 PUNFETCH;\r
7049 c = '<';\r
7050\r
7051 named_group1:\r
7052 list_capture = 0;\r
7053\r
7054 named_group2:\r
7055 name = p;\r
7056 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,\r
7057 &num_type, 0);\r
7058 if (r < 0) return r;\r
7059\r
7060 num = scan_env_add_mem_entry(env);\r
7061 if (num < 0) return num;\r
7062 if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)\r
7063 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
7064\r
7065 r = name_add(env->reg, name, name_end, num, env);\r
7066 if (r != 0) return r;\r
7067 *np = node_new_memory(1);\r
7068 CHECK_NULL_RETURN_MEMERR(*np);\r
7069 ENCLOSURE_(*np)->m.regnum = num;\r
7070 if (list_capture != 0)\r
7071 MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
7072 env->num_named++;\r
7073 }\r
7074 else {\r
7075 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7076 }\r
7077 }\r
7078 break;\r
7079\r
7080 case '~':\r
7081 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {\r
7082 Node* absent;\r
7083 Node* expr;\r
7084 int head_bar;\r
7085 int is_range_cutter;\r
7086\r
7087 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7088\r
7089 if (PPEEK_IS('|')) { /* (?~|generator|absent) */\r
7090 PINC;\r
7091 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7092\r
7093 head_bar = 1;\r
7094 if (PPEEK_IS(')')) { /* (?~|) : range clear */\r
7095 PINC;\r
7096 r = make_range_clear(np, env);\r
7097 if (r != 0) return r;\r
7098 goto end;\r
7099 }\r
7100 }\r
7101 else\r
7102 head_bar = 0;\r
7103\r
7104 r = fetch_token(tok, &p, end, env);\r
7105 if (r < 0) return r;\r
7106 r = parse_subexp(&absent, tok, term, &p, end, env);\r
7107 if (r < 0) {\r
7108 onig_node_free(absent);\r
7109 return r;\r
7110 }\r
7111\r
7112 expr = NULL_NODE;\r
7113 is_range_cutter = 0;\r
7114 if (head_bar != 0) {\r
7115 Node* top = absent;\r
7116 if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {\r
7117 expr = NULL_NODE;\r
7118 is_range_cutter = 1;\r
7119 /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */\r
7120 }\r
7121 else {\r
7122 absent = NODE_CAR(top);\r
7123 expr = NODE_CDR(top);\r
7124 NODE_CAR(top) = NULL_NODE;\r
7125 NODE_CDR(top) = NULL_NODE;\r
7126 onig_node_free(top);\r
7127 if (IS_NULL(NODE_CDR(expr))) {\r
7128 top = expr;\r
7129 expr = NODE_CAR(top);\r
7130 NODE_CAR(top) = NULL_NODE;\r
7131 onig_node_free(top);\r
7132 }\r
7133 }\r
7134 }\r
7135\r
7136 r = make_absent_tree(np, absent, expr, is_range_cutter, env);\r
7137 if (r != 0) {\r
7138 return r;\r
7139 }\r
7140 goto end;\r
7141 }\r
7142 else {\r
7143 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7144 }\r
7145 break;\r
7146\r
7147#ifdef USE_CALLOUT\r
7148 case '{':\r
7149 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))\r
7150 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7151\r
7152 r = parse_callout_of_contents(np, ')', &p, end, env);\r
7153 if (r != 0) return r;\r
7154\r
7155 goto end;\r
7156 break;\r
7157#endif\r
7158\r
7159 case '(':\r
7160 /* (?()...) */\r
7161 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {\r
7162 UChar *prev;\r
7163 Node* condition;\r
7164 int condition_is_checker;\r
7165\r
7166 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7167 PFETCH(c);\r
7168 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7169\r
7170 if (IS_CODE_DIGIT_ASCII(enc, c)\r
7171 || c == '-' || c == '+' || c == '<' || c == '\'') {\r
7172 UChar* name_end;\r
7173 int back_num;\r
7174 int exist_level;\r
7175 int level;\r
7176 enum REF_NUM num_type;\r
7177 int is_enclosed;\r
7178\r
7179 is_enclosed = (c == '<' || c == '\'') ? 1 : 0;\r
7180 if (! is_enclosed)\r
7181 PUNFETCH;\r
7182 prev = p;\r
7183 exist_level = 0;\r
7184#ifdef USE_BACKREF_WITH_LEVEL\r
7185 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
7186 r = fetch_name_with_level(\r
7187 (OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
7188 &p, end, &name_end,\r
7189 env, &back_num, &level, &num_type);\r
7190 if (r == 1) exist_level = 1;\r
7191#else\r
7192 r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
7193 &p, end, &name_end, env, &back_num, &num_type, 1);\r
7194#endif\r
7195 if (r < 0) {\r
7196 if (is_enclosed == 0) {\r
7197 goto any_condition;\r
7198 }\r
7199 else\r
7200 return r;\r
7201 }\r
7202\r
7203 condition_is_checker = 1;\r
7204 if (num_type != IS_NOT_NUM) {\r
7205 if (num_type == IS_REL_NUM) {\r
7206 back_num = backref_rel_to_abs(back_num, env);\r
7207 }\r
7208 if (back_num <= 0)\r
7209 return ONIGERR_INVALID_BACKREF;\r
7210\r
7211 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
7212 if (back_num > env->num_mem ||\r
7213 IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
7214 return ONIGERR_INVALID_BACKREF;\r
7215 }\r
7216\r
7217 condition = node_new_backref_checker(1, &back_num, 0,\r
7218#ifdef USE_BACKREF_WITH_LEVEL\r
7219 exist_level, level,\r
7220#endif\r
7221 env);\r
7222 }\r
7223 else {\r
7224 int num;\r
7225 int* backs;\r
7226\r
7227 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
7228 if (num <= 0) {\r
7229 onig_scan_env_set_error_string(env,\r
7230 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
7231 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
7232 }\r
7233 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
7234 int i;\r
7235 for (i = 0; i < num; i++) {\r
7236 if (backs[i] > env->num_mem ||\r
7237 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
7238 return ONIGERR_INVALID_BACKREF;\r
7239 }\r
7240 }\r
7241\r
7242 condition = node_new_backref_checker(num, backs, 1,\r
7243#ifdef USE_BACKREF_WITH_LEVEL\r
7244 exist_level, level,\r
7245#endif\r
7246 env);\r
7247 }\r
7248\r
7249 if (is_enclosed != 0) {\r
7250 if (PEND) goto err_if_else;\r
7251 PFETCH(c);\r
7252 if (c != ')') goto err_if_else;\r
7253 }\r
7254 }\r
7255#ifdef USE_CALLOUT\r
7256 else if (c == '?') {\r
7257 if (IS_SYNTAX_OP2(env->syntax,\r
7258 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {\r
7259 if (! PEND && PPEEK_IS('{')) {\r
7260 /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */\r
7261 condition_is_checker = 0;\r
7262 PFETCH(c);\r
7263 r = parse_callout_of_contents(&condition, ')', &p, end, env);\r
7264 if (r != 0) return r;\r
7265 goto end_condition;\r
7266 }\r
7267 }\r
7268 goto any_condition;\r
7269 }\r
7270 else if (c == '*' &&\r
7271 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
7272 condition_is_checker = 0;\r
7273 r = parse_callout_of_name(&condition, ')', &p, end, env);\r
7274 if (r != 0) return r;\r
7275 goto end_condition;\r
7276 }\r
14b0e578 7277#endif\r
b602265d
DG
7278 else {\r
7279 any_condition:\r
7280 PUNFETCH;\r
7281 condition_is_checker = 0;\r
7282 r = fetch_token(tok, &p, end, env);\r
7283 if (r < 0) return r;\r
7284 r = parse_subexp(&condition, tok, term, &p, end, env);\r
7285 if (r < 0) {\r
7286 onig_node_free(condition);\r
7287 return r;\r
7288 }\r
7289 }\r
14b0e578 7290\r
b602265d
DG
7291 end_condition:\r
7292 CHECK_NULL_RETURN_MEMERR(condition);\r
14b0e578 7293\r
b602265d
DG
7294 if (PEND) {\r
7295 err_if_else:\r
7296 onig_node_free(condition);\r
7297 return ONIGERR_END_PATTERN_IN_GROUP;\r
7298 }\r
14b0e578 7299\r
b602265d
DG
7300 if (PPEEK_IS(')')) { /* case: empty body: make backref checker */\r
7301 if (condition_is_checker == 0) {\r
7302 onig_node_free(condition);\r
7303 return ONIGERR_INVALID_IF_ELSE_SYNTAX;\r
7304 }\r
7305 PFETCH(c);\r
7306 *np = condition;\r
7307 }\r
7308 else { /* if-else */\r
7309 int then_is_empty;\r
7310 Node *Then, *Else;\r
14b0e578 7311\r
b602265d
DG
7312 if (PPEEK_IS('|')) {\r
7313 PFETCH(c);\r
7314 Then = 0;\r
7315 then_is_empty = 1;\r
7316 }\r
7317 else\r
7318 then_is_empty = 0;\r
14b0e578 7319\r
b602265d
DG
7320 r = fetch_token(tok, &p, end, env);\r
7321 if (r < 0) {\r
7322 onig_node_free(condition);\r
7323 return r;\r
7324 }\r
7325 r = parse_subexp(&target, tok, term, &p, end, env);\r
7326 if (r < 0) {\r
7327 onig_node_free(condition);\r
7328 onig_node_free(target);\r
7329 return r;\r
7330 }\r
14b0e578 7331\r
b602265d
DG
7332 if (then_is_empty != 0) {\r
7333 Else = target;\r
7334 }\r
7335 else {\r
7336 if (NODE_TYPE(target) == NODE_ALT) {\r
7337 Then = NODE_CAR(target);\r
7338 if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {\r
7339 Else = NODE_CAR(NODE_CDR(target));\r
7340 cons_node_free_alone(NODE_CDR(target));\r
7341 }\r
7342 else {\r
7343 Else = NODE_CDR(target);\r
7344 }\r
7345 cons_node_free_alone(target);\r
7346 }\r
7347 else {\r
7348 Then = target;\r
7349 Else = 0;\r
7350 }\r
7351 }\r
14b0e578 7352\r
b602265d
DG
7353 *np = node_new_enclosure_if_else(condition, Then, Else);\r
7354 if (IS_NULL(*np)) {\r
7355 onig_node_free(condition);\r
7356 onig_node_free(Then);\r
7357 onig_node_free(Else);\r
7358 return ONIGERR_MEMORY;\r
7359 }\r
7360 }\r
7361 goto end;\r
14b0e578 7362 }\r
14b0e578 7363 else {\r
b602265d 7364 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
14b0e578 7365 }\r
14b0e578
CS
7366 break;\r
7367\r
7368 case '@':\r
7369 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r
b602265d
DG
7370 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7371 PFETCH(c);\r
7372 if (c == '<' || c == '\'') {\r
7373 list_capture = 1;\r
7374 goto named_group2; /* (?@<name>...) */\r
7375 }\r
7376 PUNFETCH;\r
7377 }\r
7378\r
7379 *np = node_new_memory(0);\r
7380 CHECK_NULL_RETURN_MEMERR(*np);\r
7381 num = scan_env_add_mem_entry(env);\r
7382 if (num < 0) {\r
7383 return num;\r
7384 }\r
7385 else if (num >= (int )MEM_STATUS_BITS_NUM) {\r
7386 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
7387 }\r
7388 ENCLOSURE_(*np)->m.regnum = num;\r
7389 MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
14b0e578
CS
7390 }\r
7391 else {\r
b602265d 7392 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
14b0e578
CS
7393 }\r
7394 break;\r
7395\r
7396#ifdef USE_POSIXLINE_OPTION\r
7397 case 'p':\r
7398#endif\r
7399 case '-': case 'i': case 'm': case 's': case 'x':\r
b602265d 7400 case 'W': case 'D': case 'S': case 'P':\r
14b0e578 7401 {\r
b602265d
DG
7402 int neg = 0;\r
7403\r
7404 while (1) {\r
7405 switch (c) {\r
7406 case ':':\r
7407 case ')':\r
7408 break;\r
7409\r
7410 case '-': neg = 1; break;\r
7411 case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;\r
7412 case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;\r
7413 case 's':\r
7414 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
7415 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
7416 }\r
7417 else\r
7418 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7419 break;\r
7420\r
7421 case 'm':\r
7422 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
7423 OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r
7424 }\r
7425 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r
7426 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
7427 }\r
7428 else\r
7429 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7430 break;\r
14b0e578 7431#ifdef USE_POSIXLINE_OPTION\r
b602265d
DG
7432 case 'p':\r
7433 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r
7434 break;\r
14b0e578 7435#endif\r
b602265d
DG
7436 case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;\r
7437 case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;\r
7438 case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;\r
7439 case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;\r
7440\r
7441 default:\r
7442 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7443 }\r
7444\r
7445 if (c == ')') {\r
7446 *np = node_new_option(option);\r
7447 CHECK_NULL_RETURN_MEMERR(*np);\r
7448 *src = p;\r
7449 return 2; /* option only */\r
7450 }\r
7451 else if (c == ':') {\r
7452 OnigOptionType prev = env->options;\r
7453\r
7454 env->options = option;\r
7455 r = fetch_token(tok, &p, end, env);\r
7456 if (r < 0) return r;\r
7457 r = parse_subexp(&target, tok, term, &p, end, env);\r
7458 env->options = prev;\r
7459 if (r < 0) {\r
7460 onig_node_free(target);\r
7461 return r;\r
7462 }\r
7463 *np = node_new_option(option);\r
7464 CHECK_NULL_RETURN_MEMERR(*np);\r
7465 NODE_BODY(*np) = target;\r
7466 *src = p;\r
7467 return 0;\r
7468 }\r
7469\r
7470 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7471 PFETCH(c);\r
7472 }\r
14b0e578
CS
7473 }\r
7474 break;\r
7475\r
7476 default:\r
7477 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7478 }\r
7479 }\r
b602265d
DG
7480#ifdef USE_CALLOUT\r
7481 else if (c == '*' &&\r
7482 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
7483 PINC;\r
7484 r = parse_callout_of_name(np, ')', &p, end, env);\r
7485 if (r != 0) return r;\r
7486\r
7487 goto end;\r
7488 }\r
7489#endif\r
14b0e578 7490 else {\r
b602265d 7491 if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
14b0e578
CS
7492 goto group;\r
7493\r
b602265d 7494 *np = node_new_memory(0);\r
14b0e578
CS
7495 CHECK_NULL_RETURN_MEMERR(*np);\r
7496 num = scan_env_add_mem_entry(env);\r
7497 if (num < 0) return num;\r
b602265d 7498 ENCLOSURE_(*np)->m.regnum = num;\r
14b0e578
CS
7499 }\r
7500\r
7501 CHECK_NULL_RETURN_MEMERR(*np);\r
7502 r = fetch_token(tok, &p, end, env);\r
7503 if (r < 0) return r;\r
7504 r = parse_subexp(&target, tok, term, &p, end, env);\r
b602265d
DG
7505 if (r < 0) {\r
7506 onig_node_free(target);\r
7507 return r;\r
7508 }\r
14b0e578 7509\r
b602265d
DG
7510 NODE_BODY(*np) = target;\r
7511\r
7512 if (NODE_TYPE(*np) == NODE_ENCLOSURE) {\r
7513 if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {\r
14b0e578 7514 /* Don't move this to previous of parse_subexp() */\r
b602265d 7515 r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);\r
14b0e578
CS
7516 if (r != 0) return r;\r
7517 }\r
7518 }\r
7519\r
b602265d 7520 end:\r
14b0e578
CS
7521 *src = p;\r
7522 return 0;\r
7523}\r
7524\r
7525static const char* PopularQStr[] = {\r
7526 "?", "*", "+", "??", "*?", "+?"\r
7527};\r
7528\r
7529static const char* ReduceQStr[] = {\r
7530 "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r
7531};\r
7532\r
7533static int\r
7534set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r
7535{\r
b602265d 7536 QuantNode* qn;\r
14b0e578 7537\r
b602265d
DG
7538 qn = QUANT_(qnode);\r
7539 if (qn->lower == 1 && qn->upper == 1)\r
14b0e578 7540 return 1;\r
14b0e578 7541\r
b602265d
DG
7542 switch (NODE_TYPE(target)) {\r
7543 case NODE_STRING:\r
14b0e578 7544 if (! group) {\r
b602265d
DG
7545 if (str_node_can_be_split(target, env->enc)) {\r
7546 Node* n = str_node_split_last_char(target, env->enc);\r
7547 if (IS_NOT_NULL(n)) {\r
7548 NODE_BODY(qnode) = n;\r
7549 return 2;\r
7550 }\r
14b0e578
CS
7551 }\r
7552 }\r
7553 break;\r
7554\r
b602265d 7555 case NODE_QUANT:\r
14b0e578
CS
7556 { /* check redundant double repeat. */\r
7557 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r
b602265d
DG
7558 QuantNode* qnt = QUANT_(target);\r
7559 int nestq_num = quantifier_type_num(qn);\r
7560 int targetq_num = quantifier_type_num(qnt);\r
14b0e578
CS
7561\r
7562#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r
b602265d
DG
7563 if (targetq_num >= 0 && nestq_num >= 0 &&\r
7564 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r
14b0e578
CS
7565 UChar buf[WARN_BUFSIZE];\r
7566\r
7567 switch(ReduceTypeTable[targetq_num][nestq_num]) {\r
7568 case RQ_ASIS:\r
7569 break;\r
7570\r
7571 case RQ_DEL:\r
7572 if (onig_verb_warn != onig_null_warn) {\r
7573 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
b602265d
DG
7574 env->pattern, env->pattern_end,\r
7575 (UChar* )"redundant nested repeat operator");\r
14b0e578
CS
7576 (*onig_verb_warn)((char* )buf);\r
7577 }\r
7578 goto warn_exit;\r
7579 break;\r
7580\r
7581 default:\r
7582 if (onig_verb_warn != onig_null_warn) {\r
7583 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
7584 env->pattern, env->pattern_end,\r
7585 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r
7586 PopularQStr[targetq_num], PopularQStr[nestq_num],\r
7587 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r
7588 (*onig_verb_warn)((char* )buf);\r
7589 }\r
7590 goto warn_exit;\r
7591 break;\r
7592 }\r
7593 }\r
7594\r
7595 warn_exit:\r
7596#endif\r
b602265d
DG
7597 if (targetq_num >= 0 && nestq_num < 0) {\r
7598 if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r
7599 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r
7600 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r
7601 qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
7602 }\r
7603 }\r
7604 }\r
7605 else {\r
7606 NODE_BODY(qnode) = target;\r
7607 onig_reduce_nested_quantifier(qnode, target);\r
7608 goto q_exit;\r
14b0e578
CS
7609 }\r
7610 }\r
7611 break;\r
7612\r
7613 default:\r
7614 break;\r
7615 }\r
7616\r
b602265d 7617 NODE_BODY(qnode) = target;\r
14b0e578
CS
7618 q_exit:\r
7619 return 0;\r
7620}\r
7621\r
7622\r
14b0e578
CS
7623#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
7624static int\r
7625clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r
7626{\r
7627 BBuf *tbuf;\r
7628 int r;\r
7629\r
7630 if (IS_NCCLASS_NOT(cc)) {\r
7631 bitset_invert(cc->bs);\r
7632\r
7633 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
7634 r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r
7635 if (r != 0) return r;\r
7636\r
7637 bbuf_free(cc->mbuf);\r
7638 cc->mbuf = tbuf;\r
7639 }\r
7640\r
7641 NCCLASS_CLEAR_NOT(cc);\r
7642 }\r
7643\r
7644 return 0;\r
7645}\r
7646#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
7647\r
7648typedef struct {\r
7649 ScanEnv* env;\r
7650 CClassNode* cc;\r
7651 Node* alt_root;\r
7652 Node** ptail;\r
7653} IApplyCaseFoldArg;\r
7654\r
7655static int\r
b602265d 7656i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)\r
14b0e578
CS
7657{\r
7658 IApplyCaseFoldArg* iarg;\r
7659 ScanEnv* env;\r
7660 CClassNode* cc;\r
7661 BitSetRef bs;\r
7662\r
7663 iarg = (IApplyCaseFoldArg* )arg;\r
7664 env = iarg->env;\r
7665 cc = iarg->cc;\r
7666 bs = cc->bs;\r
7667\r
7668 if (to_len == 1) {\r
7669 int is_in = onig_is_code_in_cc(env->enc, from, cc);\r
7670#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
7671 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r
b602265d 7672 (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r
14b0e578 7673 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
b602265d 7674 add_code_range(&(cc->mbuf), env, *to, *to);\r
14b0e578
CS
7675 }\r
7676 else {\r
b602265d 7677 BITSET_SET_BIT(bs, *to);\r
14b0e578
CS
7678 }\r
7679 }\r
7680#else\r
7681 if (is_in != 0) {\r
7682 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
b602265d
DG
7683 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r
7684 add_code_range(&(cc->mbuf), env, *to, *to);\r
14b0e578
CS
7685 }\r
7686 else {\r
b602265d
DG
7687 if (IS_NCCLASS_NOT(cc)) {\r
7688 BITSET_CLEAR_BIT(bs, *to);\r
7689 }\r
7690 else\r
7691 BITSET_SET_BIT(bs, *to);\r
14b0e578
CS
7692 }\r
7693 }\r
7694#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
7695 }\r
7696 else {\r
7697 int r, i, len;\r
7698 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
7699 Node *snode = NULL_NODE;\r
7700\r
7701 if (onig_is_code_in_cc(env->enc, from, cc)\r
7702#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
b602265d 7703 && !IS_NCCLASS_NOT(cc)\r
14b0e578 7704#endif\r
b602265d 7705 ) {\r
14b0e578 7706 for (i = 0; i < to_len; i++) {\r
b602265d
DG
7707 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r
7708 if (i == 0) {\r
7709 snode = onig_node_new_str(buf, buf + len);\r
7710 CHECK_NULL_RETURN_MEMERR(snode);\r
7711\r
7712 /* char-class expanded multi-char only\r
7713 compare with string folded at match time. */\r
7714 NODE_STRING_SET_AMBIG(snode);\r
7715 }\r
7716 else {\r
7717 r = onig_node_str_cat(snode, buf, buf + len);\r
7718 if (r < 0) {\r
7719 onig_node_free(snode);\r
7720 return r;\r
7721 }\r
7722 }\r
14b0e578
CS
7723 }\r
7724\r
7725 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r
7726 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r
b602265d 7727 iarg->ptail = &(NODE_CDR((*(iarg->ptail))));\r
14b0e578
CS
7728 }\r
7729 }\r
7730\r
7731 return 0;\r
7732}\r
7733\r
7734static int\r
b602265d
DG
7735parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
7736 ScanEnv* env)\r
14b0e578
CS
7737{\r
7738 int r, len, group = 0;\r
7739 Node* qn;\r
7740 Node** targetp;\r
7741\r
7742 *np = NULL;\r
7743 if (tok->type == (enum TokenSyms )term)\r
7744 goto end_of_token;\r
7745\r
7746 switch (tok->type) {\r
7747 case TK_ALT:\r
7748 case TK_EOT:\r
7749 end_of_token:\r
16bd7c35
DG
7750 *np = node_new_empty();\r
7751 CHECK_NULL_RETURN_MEMERR(*np);\r
7752 return tok->type;\r
14b0e578
CS
7753 break;\r
7754\r
7755 case TK_SUBEXP_OPEN:\r
b602265d 7756 r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r
14b0e578
CS
7757 if (r < 0) return r;\r
7758 if (r == 1) group = 1;\r
7759 else if (r == 2) { /* option only */\r
7760 Node* target;\r
b602265d 7761 OnigOptionType prev = env->options;\r
14b0e578 7762\r
b602265d 7763 env->options = ENCLOSURE_(*np)->o.options;\r
14b0e578
CS
7764 r = fetch_token(tok, src, end, env);\r
7765 if (r < 0) return r;\r
7766 r = parse_subexp(&target, tok, term, src, end, env);\r
b602265d
DG
7767 env->options = prev;\r
7768 if (r < 0) {\r
7769 onig_node_free(target);\r
7770 return r;\r
7771 }\r
7772 NODE_BODY(*np) = target;\r
14b0e578
CS
7773 return tok->type;\r
7774 }\r
7775 break;\r
7776\r
7777 case TK_SUBEXP_CLOSE:\r
7778 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r
7779 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r
7780\r
7781 if (tok->escaped) goto tk_raw_byte;\r
7782 else goto tk_byte;\r
7783 break;\r
7784\r
7785 case TK_STRING:\r
7786 tk_byte:\r
7787 {\r
7788 *np = node_new_str(tok->backp, *src);\r
7789 CHECK_NULL_RETURN_MEMERR(*np);\r
7790\r
7791 while (1) {\r
b602265d
DG
7792 r = fetch_token(tok, src, end, env);\r
7793 if (r < 0) return r;\r
7794 if (r != TK_STRING) break;\r
14b0e578 7795\r
b602265d
DG
7796 r = onig_node_str_cat(*np, tok->backp, *src);\r
7797 if (r < 0) return r;\r
14b0e578
CS
7798 }\r
7799\r
7800 string_end:\r
7801 targetp = np;\r
7802 goto repeat;\r
7803 }\r
7804 break;\r
7805\r
7806 case TK_RAW_BYTE:\r
7807 tk_raw_byte:\r
7808 {\r
7809 *np = node_new_str_raw_char((UChar )tok->u.c);\r
7810 CHECK_NULL_RETURN_MEMERR(*np);\r
7811 len = 1;\r
7812 while (1) {\r
b602265d
DG
7813 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r
7814 if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */\r
7815 r = fetch_token(tok, src, end, env);\r
7816 NODE_STRING_CLEAR_RAW(*np);\r
7817 goto string_end;\r
7818 }\r
7819 }\r
7820\r
7821 r = fetch_token(tok, src, end, env);\r
7822 if (r < 0) return r;\r
7823 if (r != TK_RAW_BYTE) {\r
7824 /* Don't use this, it is wrong for little endian encodings. */\r
14b0e578 7825#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
b602265d
DG
7826 int rem;\r
7827 if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r
7828 rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r
7829 (void )node_str_head_pad(STR_(*np), rem, (UChar )0);\r
7830 if (len + rem == enclen(env->enc, STR_(*np)->s)) {\r
7831 NODE_STRING_CLEAR_RAW(*np);\r
7832 goto string_end;\r
7833 }\r
7834 }\r
14b0e578 7835#endif\r
b602265d
DG
7836 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
7837 }\r
14b0e578 7838\r
b602265d
DG
7839 r = node_str_cat_char(*np, (UChar )tok->u.c);\r
7840 if (r < 0) return r;\r
14b0e578 7841\r
b602265d 7842 len++;\r
14b0e578
CS
7843 }\r
7844 }\r
7845 break;\r
7846\r
7847 case TK_CODE_POINT:\r
7848 {\r
7849 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
7850 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r
7851 if (num < 0) return num;\r
7852#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r
7853 *np = node_new_str_raw(buf, buf + num);\r
7854#else\r
7855 *np = node_new_str(buf, buf + num);\r
7856#endif\r
7857 CHECK_NULL_RETURN_MEMERR(*np);\r
7858 }\r
7859 break;\r
7860\r
7861 case TK_QUOTE_OPEN:\r
7862 {\r
7863 OnigCodePoint end_op[2];\r
7864 UChar *qstart, *qend, *nextp;\r
7865\r
7866 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r
7867 end_op[1] = (OnigCodePoint )'E';\r
7868 qstart = *src;\r
7869 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r
7870 if (IS_NULL(qend)) {\r
b602265d 7871 nextp = qend = end;\r
14b0e578
CS
7872 }\r
7873 *np = node_new_str(qstart, qend);\r
7874 CHECK_NULL_RETURN_MEMERR(*np);\r
7875 *src = nextp;\r
7876 }\r
7877 break;\r
7878\r
7879 case TK_CHAR_TYPE:\r
7880 {\r
7881 switch (tok->u.prop.ctype) {\r
7882 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
7883 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);\r
7884 CHECK_NULL_RETURN_MEMERR(*np);\r
7885 break;\r
14b0e578
CS
7886\r
7887 case ONIGENC_CTYPE_SPACE:\r
7888 case ONIGENC_CTYPE_DIGIT:\r
7889 case ONIGENC_CTYPE_XDIGIT:\r
b602265d
DG
7890 {\r
7891 CClassNode* cc;\r
7892\r
7893 *np = node_new_cclass();\r
7894 CHECK_NULL_RETURN_MEMERR(*np);\r
7895 cc = CCLASS_(*np);\r
7896 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r
7897 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
7898 }\r
7899 break;\r
14b0e578
CS
7900\r
7901 default:\r
b602265d
DG
7902 return ONIGERR_PARSER_BUG;\r
7903 break;\r
14b0e578
CS
7904 }\r
7905 }\r
7906 break;\r
7907\r
7908 case TK_CHAR_PROPERTY:\r
7909 r = parse_char_property(np, tok, src, end, env);\r
7910 if (r != 0) return r;\r
7911 break;\r
7912\r
7913 case TK_CC_OPEN:\r
7914 {\r
7915 CClassNode* cc;\r
7916\r
7917 r = parse_char_class(np, tok, src, end, env);\r
7918 if (r != 0) return r;\r
7919\r
b602265d
DG
7920 cc = CCLASS_(*np);\r
7921 if (IS_IGNORECASE(env->options)) {\r
7922 IApplyCaseFoldArg iarg;\r
7923\r
7924 iarg.env = env;\r
7925 iarg.cc = cc;\r
7926 iarg.alt_root = NULL_NODE;\r
7927 iarg.ptail = &(iarg.alt_root);\r
7928\r
7929 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r
7930 i_apply_case_fold, &iarg);\r
7931 if (r != 0) {\r
7932 onig_node_free(iarg.alt_root);\r
7933 return r;\r
7934 }\r
7935 if (IS_NOT_NULL(iarg.alt_root)) {\r
14b0e578
CS
7936 Node* work = onig_node_new_alt(*np, iarg.alt_root);\r
7937 if (IS_NULL(work)) {\r
7938 onig_node_free(iarg.alt_root);\r
7939 return ONIGERR_MEMORY;\r
7940 }\r
7941 *np = work;\r
b602265d 7942 }\r
14b0e578
CS
7943 }\r
7944 }\r
7945 break;\r
7946\r
7947 case TK_ANYCHAR:\r
7948 *np = node_new_anychar();\r
7949 CHECK_NULL_RETURN_MEMERR(*np);\r
7950 break;\r
7951\r
7952 case TK_ANYCHAR_ANYTIME:\r
7953 *np = node_new_anychar();\r
7954 CHECK_NULL_RETURN_MEMERR(*np);\r
7955 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
7956 CHECK_NULL_RETURN_MEMERR(qn);\r
b602265d 7957 NODE_BODY(qn) = *np;\r
14b0e578
CS
7958 *np = qn;\r
7959 break;\r
7960\r
7961 case TK_BACKREF:\r
7962 len = tok->u.backref.num;\r
7963 *np = node_new_backref(len,\r
b602265d
DG
7964 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r
7965 tok->u.backref.by_name,\r
14b0e578 7966#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
7967 tok->u.backref.exist_level,\r
7968 tok->u.backref.level,\r
14b0e578 7969#endif\r
b602265d 7970 env);\r
14b0e578
CS
7971 CHECK_NULL_RETURN_MEMERR(*np);\r
7972 break;\r
7973\r
b602265d 7974#ifdef USE_CALL\r
14b0e578
CS
7975 case TK_CALL:\r
7976 {\r
7977 int gnum = tok->u.call.gnum;\r
7978\r
b602265d
DG
7979 *np = node_new_call(tok->u.call.name, tok->u.call.name_end,\r
7980 gnum, tok->u.call.by_number);\r
14b0e578
CS
7981 CHECK_NULL_RETURN_MEMERR(*np);\r
7982 env->num_call++;\r
b602265d
DG
7983 if (tok->u.call.by_number != 0 && gnum == 0) {\r
7984 env->has_call_zero = 1;\r
7985 }\r
14b0e578
CS
7986 }\r
7987 break;\r
7988#endif\r
7989\r
7990 case TK_ANCHOR:\r
b602265d
DG
7991 {\r
7992 int ascii_mode =\r
7993 IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;\r
7994 *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);\r
df8be9e5 7995 CHECK_NULL_RETURN_MEMERR(*np);\r
b602265d 7996 }\r
14b0e578
CS
7997 break;\r
7998\r
7999 case TK_OP_REPEAT:\r
8000 case TK_INTERVAL:\r
8001 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r
8002 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r
b602265d 8003 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r
16bd7c35 8004 else {\r
b602265d 8005 *np = node_new_empty();\r
16bd7c35
DG
8006 CHECK_NULL_RETURN_MEMERR(*np);\r
8007 }\r
14b0e578
CS
8008 }\r
8009 else {\r
8010 goto tk_byte;\r
8011 }\r
8012 break;\r
8013\r
b602265d
DG
8014 case TK_KEEP:\r
8015 r = node_new_keep(np, env);\r
8016 if (r < 0) return r;\r
8017 break;\r
8018\r
8019 case TK_GENERAL_NEWLINE:\r
8020 r = node_new_general_newline(np, env);\r
8021 if (r < 0) return r;\r
8022 break;\r
8023\r
8024 case TK_NO_NEWLINE:\r
8025 r = node_new_no_newline(np, env);\r
8026 if (r < 0) return r;\r
8027 break;\r
8028\r
8029 case TK_TRUE_ANYCHAR:\r
8030 r = node_new_true_anychar(np, env);\r
8031 if (r < 0) return r;\r
8032 break;\r
8033\r
8034 case TK_EXTENDED_GRAPHEME_CLUSTER:\r
8035 r = make_extended_grapheme_cluster(np, env);\r
8036 if (r < 0) return r;\r
8037 break;\r
8038\r
14b0e578
CS
8039 default:\r
8040 return ONIGERR_PARSER_BUG;\r
8041 break;\r
8042 }\r
8043\r
8044 {\r
8045 targetp = np;\r
8046\r
8047 re_entry:\r
8048 r = fetch_token(tok, src, end, env);\r
8049 if (r < 0) return r;\r
8050\r
8051 repeat:\r
8052 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r
8053 if (is_invalid_quantifier_target(*targetp))\r
b602265d 8054 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r
14b0e578
CS
8055\r
8056 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r
b602265d 8057 (r == TK_INTERVAL ? 1 : 0));\r
14b0e578 8058 CHECK_NULL_RETURN_MEMERR(qn);\r
b602265d 8059 QUANT_(qn)->greedy = tok->u.repeat.greedy;\r
14b0e578
CS
8060 r = set_quantifier(qn, *targetp, group, env);\r
8061 if (r < 0) {\r
b602265d
DG
8062 onig_node_free(qn);\r
8063 return r;\r
14b0e578
CS
8064 }\r
8065\r
8066 if (tok->u.repeat.possessive != 0) {\r
b602265d
DG
8067 Node* en;\r
8068 en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
8069 if (IS_NULL(en)) {\r
8070 onig_node_free(qn);\r
8071 return ONIGERR_MEMORY;\r
8072 }\r
8073 NODE_BODY(en) = qn;\r
8074 qn = en;\r
14b0e578
CS
8075 }\r
8076\r
8077 if (r == 0) {\r
b602265d 8078 *targetp = qn;\r
14b0e578
CS
8079 }\r
8080 else if (r == 1) {\r
b602265d 8081 onig_node_free(qn);\r
14b0e578
CS
8082 }\r
8083 else if (r == 2) { /* split case: /abc+/ */\r
b602265d
DG
8084 Node *tmp;\r
8085\r
8086 *targetp = node_new_list(*targetp, NULL);\r
8087 if (IS_NULL(*targetp)) {\r
8088 onig_node_free(qn);\r
8089 return ONIGERR_MEMORY;\r
8090 }\r
8091 tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);\r
8092 if (IS_NULL(tmp)) {\r
8093 onig_node_free(qn);\r
8094 return ONIGERR_MEMORY;\r
8095 }\r
8096 targetp = &(NODE_CAR(tmp));\r
14b0e578
CS
8097 }\r
8098 goto re_entry;\r
8099 }\r
8100 }\r
8101\r
8102 return r;\r
8103}\r
8104\r
8105static int\r
b602265d
DG
8106parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
8107 ScanEnv* env)\r
14b0e578
CS
8108{\r
8109 int r;\r
8110 Node *node, **headp;\r
8111\r
8112 *top = NULL;\r
8113 r = parse_exp(&node, tok, term, src, end, env);\r
b602265d
DG
8114 if (r < 0) {\r
8115 onig_node_free(node);\r
8116 return r;\r
8117 }\r
14b0e578
CS
8118\r
8119 if (r == TK_EOT || r == term || r == TK_ALT) {\r
8120 *top = node;\r
8121 }\r
8122 else {\r
8123 *top = node_new_list(node, NULL);\r
a5def177
DG
8124 if (IS_NULL(*top)) {\r
8125 onig_node_free(node);\r
8126 return ONIGERR_MEMORY;\r
8127 }\r
8128\r
b602265d 8129 headp = &(NODE_CDR(*top));\r
14b0e578
CS
8130 while (r != TK_EOT && r != term && r != TK_ALT) {\r
8131 r = parse_exp(&node, tok, term, src, end, env);\r
b602265d
DG
8132 if (r < 0) {\r
8133 onig_node_free(node);\r
8134 return r;\r
8135 }\r
14b0e578 8136\r
b602265d
DG
8137 if (NODE_TYPE(node) == NODE_LIST) {\r
8138 *headp = node;\r
8139 while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);\r
8140 headp = &(NODE_CDR(node));\r
14b0e578
CS
8141 }\r
8142 else {\r
b602265d
DG
8143 *headp = node_new_list(node, NULL);\r
8144 headp = &(NODE_CDR(*headp));\r
14b0e578
CS
8145 }\r
8146 }\r
8147 }\r
8148\r
8149 return r;\r
8150}\r
8151\r
8152/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r
8153static int\r
b602265d
DG
8154parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
8155 ScanEnv* env)\r
14b0e578
CS
8156{\r
8157 int r;\r
8158 Node *node, **headp;\r
8159\r
8160 *top = NULL;\r
b602265d
DG
8161 env->parse_depth++;\r
8162 if (env->parse_depth > ParseDepthLimit)\r
8163 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
a5def177 8164\r
14b0e578
CS
8165 r = parse_branch(&node, tok, term, src, end, env);\r
8166 if (r < 0) {\r
8167 onig_node_free(node);\r
8168 return r;\r
8169 }\r
8170\r
8171 if (r == term) {\r
8172 *top = node;\r
8173 }\r
8174 else if (r == TK_ALT) {\r
8175 *top = onig_node_new_alt(node, NULL);\r
a5def177
DG
8176 if (IS_NULL(*top)) {\r
8177 onig_node_free(node);\r
8178 return ONIGERR_MEMORY;\r
8179 }\r
8180\r
b602265d 8181 headp = &(NODE_CDR(*top));\r
14b0e578
CS
8182 while (r == TK_ALT) {\r
8183 r = fetch_token(tok, src, end, env);\r
8184 if (r < 0) return r;\r
8185 r = parse_branch(&node, tok, term, src, end, env);\r
b602265d
DG
8186 if (r < 0) {\r
8187 onig_node_free(node);\r
8188 return r;\r
8189 }\r
14b0e578 8190 *headp = onig_node_new_alt(node, NULL);\r
a5def177
DG
8191 if (IS_NULL(*headp)) {\r
8192 onig_node_free(node);\r
8193 onig_node_free(*top);\r
8194 return ONIGERR_MEMORY;\r
8195 }\r
8196\r
b602265d 8197 headp = &(NODE_CDR(*headp));\r
14b0e578
CS
8198 }\r
8199\r
8200 if (tok->type != (enum TokenSyms )term)\r
8201 goto err;\r
8202 }\r
8203 else {\r
b602265d 8204 onig_node_free(node);\r
14b0e578
CS
8205 err:\r
8206 if (term == TK_SUBEXP_CLOSE)\r
8207 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
8208 else\r
8209 return ONIGERR_PARSER_BUG;\r
8210 }\r
8211\r
b602265d 8212 env->parse_depth--;\r
14b0e578
CS
8213 return r;\r
8214}\r
8215\r
8216static int\r
8217parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r
8218{\r
8219 int r;\r
8220 OnigToken tok;\r
8221\r
8222 r = fetch_token(&tok, src, end, env);\r
8223 if (r < 0) return r;\r
8224 r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r
8225 if (r < 0) return r;\r
b602265d
DG
8226\r
8227 return 0;\r
8228}\r
8229\r
8230#ifdef USE_CALL\r
8231static int\r
8232make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)\r
8233{\r
8234 int r;\r
8235\r
8236 Node* x = node_new_memory(0 /* 0: is not named */);\r
8237 CHECK_NULL_RETURN_MEMERR(x);\r
8238\r
8239 NODE_BODY(x) = node;\r
8240 ENCLOSURE_(x)->m.regnum = 0;\r
8241 r = scan_env_set_mem_node(env, 0, x);\r
8242 if (r != 0) {\r
8243 onig_node_free(x);\r
8244 return r;\r
8245 }\r
8246\r
8247 *rnode = x;\r
14b0e578
CS
8248 return 0;\r
8249}\r
b602265d 8250#endif\r
14b0e578
CS
8251\r
8252extern int\r
b602265d
DG
8253onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,\r
8254 regex_t* reg, ScanEnv* env)\r
14b0e578
CS
8255{\r
8256 int r;\r
8257 UChar* p;\r
b602265d
DG
8258#ifdef USE_CALLOUT\r
8259 RegexExt* ext;\r
8260#endif\r
14b0e578 8261\r
14b0e578 8262 names_clear(reg);\r
14b0e578
CS
8263\r
8264 scan_env_clear(env);\r
b602265d 8265 env->options = reg->options;\r
14b0e578
CS
8266 env->case_fold_flag = reg->case_fold_flag;\r
8267 env->enc = reg->enc;\r
8268 env->syntax = reg->syntax;\r
8269 env->pattern = (UChar* )pattern;\r
8270 env->pattern_end = (UChar* )end;\r
8271 env->reg = reg;\r
8272\r
8273 *root = NULL;\r
b602265d
DG
8274\r
8275 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))\r
8276 return ONIGERR_INVALID_WIDE_CHAR_VALUE;\r
8277\r
14b0e578
CS
8278 p = (UChar* )pattern;\r
8279 r = parse_regexp(root, &p, (UChar* )end, env);\r
b602265d
DG
8280\r
8281#ifdef USE_CALL\r
8282 if (r != 0) return r;\r
8283\r
8284 if (env->has_call_zero != 0) {\r
8285 Node* zero_node;\r
8286 r = make_call_zero_body(*root, env, &zero_node);\r
8287 if (r != 0) return r;\r
8288\r
8289 *root = zero_node;\r
8290 }\r
8291#endif\r
8292\r
14b0e578 8293 reg->num_mem = env->num_mem;\r
b602265d
DG
8294\r
8295#ifdef USE_CALLOUT\r
8296 ext = REG_EXTP(reg);\r
8297 if (IS_NOT_NULL(ext) && ext->callout_num > 0) {\r
8298 r = setup_ext_callout_list_values(reg);\r
8299 }\r
8300#endif\r
8301\r
14b0e578
CS
8302 return r;\r
8303}\r
8304\r
8305extern void\r
8306onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r
b602265d 8307 UChar* arg, UChar* arg_end)\r
14b0e578
CS
8308{\r
8309 env->error = arg;\r
8310 env->error_end = arg_end;\r
8311}\r