]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regparse.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b602265d 5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
14b0e578
CS
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regparse.h"\r
31#include "st.h"\r
32\r
b602265d
DG
33#ifdef DEBUG_NODE_FREE\r
34#include <stdio.h>\r
35#endif\r
36\r
37#define INIT_TAG_NAMES_ALLOC_NUM 5\r
38\r
14b0e578
CS
39#define WARN_BUFSIZE 256\r
40\r
41#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
42\r
b602265d
DG
43#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \\r
44 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)\r
45#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \\r
46 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')\r
47\r
48\r
49OnigSyntaxType OnigSyntaxOniguruma = {\r
50 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
51 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
52 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
53 ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
54 ONIG_SYN_OP_ESC_C_CONTROL )\r
55 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
56 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
57 ONIG_SYN_OP2_OPTION_RUBY |\r
58 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
59 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
60 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
61 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |\r
62 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |\r
63 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
64 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
65 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |\r
66 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
67 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
68 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
69 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
70 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
71 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
72 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
73 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
74 , ( SYN_GNU_REGEX_BV | \r
75 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
76 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
77 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
78 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
79 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
80 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
81 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
82 , ONIG_OPTION_NONE\r
83 ,\r
84 {\r
85 (OnigCodePoint )'\\' /* esc */\r
86 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
87 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
88 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
89 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
90 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
91 }\r
92};\r
14b0e578
CS
93\r
94OnigSyntaxType OnigSyntaxRuby = {\r
95 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
96 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
b602265d
DG
97 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
98 ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
14b0e578
CS
99 ONIG_SYN_OP_ESC_C_CONTROL )\r
100 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
101 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
102 ONIG_SYN_OP2_OPTION_RUBY |\r
103 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
b602265d
DG
104 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
105 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
106 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
107 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
108 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
14b0e578
CS
109 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
110 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
111 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
112 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
113 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
114 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
b602265d 115 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
14b0e578
CS
116 , ( SYN_GNU_REGEX_BV | \r
117 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
118 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
119 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
120 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
121 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
122 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
123 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
124 , ONIG_OPTION_NONE\r
125 ,\r
126 {\r
127 (OnigCodePoint )'\\' /* esc */\r
128 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
129 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
130 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
131 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
132 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
133 }\r
134};\r
135\r
b602265d 136OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;\r
14b0e578
CS
137\r
138extern void onig_null_warn(const char* s ARG_UNUSED) { }\r
139\r
140#ifdef DEFAULT_WARN_FUNCTION\r
141static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r
142#else\r
143static OnigWarnFunc onig_warn = onig_null_warn;\r
144#endif\r
145\r
146#ifdef DEFAULT_VERB_WARN_FUNCTION\r
147static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r
148#else\r
149static OnigWarnFunc onig_verb_warn = onig_null_warn;\r
150#endif\r
151\r
152extern void onig_set_warn_func(OnigWarnFunc f)\r
153{\r
154 onig_warn = f;\r
155}\r
156\r
157extern void onig_set_verb_warn_func(OnigWarnFunc f)\r
158{\r
159 onig_verb_warn = f;\r
160}\r
161\r
b602265d
DG
162extern void\r
163onig_warning(const char* s)\r
164{\r
165 if (onig_warn == onig_null_warn) return ;\r
166\r
167 (*onig_warn)(s);\r
168}\r
169\r
170#define DEFAULT_MAX_CAPTURE_NUM 32767\r
171\r
172static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;\r
173\r
174extern int\r
175onig_set_capture_num_limit(int num)\r
176{\r
177 if (num < 0) return -1;\r
178\r
179 MaxCaptureNum = num;\r
180 return 0;\r
181}\r
182\r
183static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
184\r
185extern unsigned int\r
186onig_get_parse_depth_limit(void)\r
187{\r
188 return ParseDepthLimit;\r
189}\r
190\r
191extern int\r
192onig_set_parse_depth_limit(unsigned int depth)\r
193{\r
194 if (depth == 0)\r
195 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
196 else\r
197 ParseDepthLimit = depth;\r
198 return 0;\r
199}\r
200\r
201static int\r
202positive_int_multiply(int x, int y)\r
203{\r
204 if (x == 0 || y == 0) return 0;\r
205\r
206 if (x < INT_MAX / y)\r
207 return x * y;\r
208 else\r
209 return -1;\r
210}\r
211\r
14b0e578
CS
212static void\r
213bbuf_free(BBuf* bbuf)\r
214{\r
215 if (IS_NOT_NULL(bbuf)) {\r
216 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r
217 xfree(bbuf);\r
218 }\r
219}\r
220\r
221static int\r
222bbuf_clone(BBuf** rto, BBuf* from)\r
223{\r
224 int r;\r
225 BBuf *to;\r
226\r
227 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r
228 CHECK_NULL_RETURN_MEMERR(to);\r
b602265d
DG
229 r = BB_INIT(to, from->alloc);\r
230 if (r != 0) {\r
231 xfree(to->p);\r
232 *rto = 0;\r
233 return r;\r
234 }\r
14b0e578
CS
235 to->used = from->used;\r
236 xmemcpy(to->p, from->p, from->used);\r
237 return 0;\r
238}\r
239\r
b602265d
DG
240static int backref_rel_to_abs(int rel_no, ScanEnv* env)\r
241{\r
242 if (rel_no > 0) {\r
243 return env->num_mem + rel_no;\r
244 }\r
245 else {\r
246 return env->num_mem + 1 + rel_no;\r
247 }\r
248}\r
249\r
250#define OPTION_ON(v,f) ((v) |= (f))\r
251#define OPTION_OFF(v,f) ((v) &= ~(f))\r
14b0e578 252\r
b602265d 253#define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r
14b0e578
CS
254\r
255#define MBCODE_START_POS(enc) \\r
256 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r
257\r
258#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r
259 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r
260\r
261#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r
262 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r
263 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r
b602265d 264 if (r != 0) return r;\\r
14b0e578
CS
265 }\\r
266} while (0)\r
267\r
268\r
269#define BITSET_IS_EMPTY(bs,empty) do {\\r
270 int i;\\r
271 empty = 1;\\r
272 for (i = 0; i < (int )BITSET_SIZE; i++) {\\r
273 if ((bs)[i] != 0) {\\r
274 empty = 0; break;\\r
275 }\\r
276 }\\r
277} while (0)\r
278\r
279static void\r
280bitset_set_range(BitSetRef bs, int from, int to)\r
281{\r
282 int i;\r
283 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r
284 BITSET_SET_BIT(bs, i);\r
285 }\r
286}\r
287\r
288#if 0\r
289static void\r
290bitset_set_all(BitSetRef bs)\r
291{\r
292 int i;\r
293 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r
294}\r
295#endif\r
296\r
297static void\r
298bitset_invert(BitSetRef bs)\r
299{\r
300 int i;\r
301 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r
302}\r
303\r
304static void\r
305bitset_invert_to(BitSetRef from, BitSetRef to)\r
306{\r
307 int i;\r
308 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r
309}\r
310\r
311static void\r
312bitset_and(BitSetRef dest, BitSetRef bs)\r
313{\r
314 int i;\r
315 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r
316}\r
317\r
318static void\r
319bitset_or(BitSetRef dest, BitSetRef bs)\r
320{\r
321 int i;\r
322 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r
323}\r
324\r
325static void\r
326bitset_copy(BitSetRef dest, BitSetRef bs)\r
327{\r
328 int i;\r
329 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r
330}\r
331\r
332extern int\r
333onig_strncmp(const UChar* s1, const UChar* s2, int n)\r
334{\r
335 int x;\r
336\r
337 while (n-- > 0) {\r
338 x = *s2++ - *s1++;\r
339 if (x) return x;\r
340 }\r
341 return 0;\r
342}\r
343\r
344extern void\r
345onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r
346{\r
b602265d 347 int len = (int )(end - src);\r
14b0e578
CS
348 if (len > 0) {\r
349 xmemcpy(dest, src, len);\r
350 dest[len] = (UChar )0;\r
351 }\r
352}\r
353\r
b602265d
DG
354static int\r
355save_entry(ScanEnv* env, enum SaveType type, int* id)\r
14b0e578 356{\r
b602265d 357 int nid = env->save_num;\r
14b0e578 358\r
b602265d
DG
359#if 0\r
360 if (IS_NULL(env->saves)) {\r
361 int n = 10;\r
362 env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);\r
363 CHECK_NULL_RETURN_MEMERR(env->saves);\r
364 env->save_alloc_num = n;\r
365 }\r
366 else if (env->save_alloc_num <= nid) {\r
367 int n = env->save_alloc_num * 2;\r
368 SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);\r
369 CHECK_NULL_RETURN_MEMERR(p);\r
370 env->saves = p;\r
371 env->save_alloc_num = n;\r
372 }\r
14b0e578 373\r
b602265d
DG
374 env->saves[nid].type = type;\r
375#endif\r
14b0e578 376\r
b602265d
DG
377 env->save_num++;\r
378 *id = nid;\r
379 return 0;\r
14b0e578 380}\r
14b0e578
CS
381\r
382/* scan pattern methods */\r
383#define PEND_VALUE 0\r
384\r
385#define PFETCH_READY UChar* pfetch_prev\r
386#define PEND (p < end ? 0 : 1)\r
387#define PUNFETCH p = pfetch_prev\r
388#define PINC do { \\r
389 pfetch_prev = p; \\r
390 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
391} while (0)\r
392#define PFETCH(c) do { \\r
393 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
394 pfetch_prev = p; \\r
395 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
396} while (0)\r
397\r
398#define PINC_S do { \\r
399 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
400} while (0)\r
401#define PFETCH_S(c) do { \\r
402 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
403 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
404} while (0)\r
405\r
406#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r
407#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r
408\r
409static UChar*\r
410strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r
b602265d 411 int capa, int oldCapa)\r
14b0e578
CS
412{\r
413 UChar* r;\r
414\r
415 if (dest)\r
416 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r
417 else\r
418 r = (UChar* )xmalloc(capa + 1);\r
419\r
420 CHECK_NULL_RETURN(r);\r
421 onig_strcpy(r + (dest_end - dest), src, src_end);\r
422 return r;\r
423}\r
424\r
425/* dest on static area */\r
426static UChar*\r
427strcat_capa_from_static(UChar* dest, UChar* dest_end,\r
b602265d 428 const UChar* src, const UChar* src_end, int capa)\r
14b0e578
CS
429{\r
430 UChar* r;\r
431\r
432 r = (UChar* )xmalloc(capa + 1);\r
433 CHECK_NULL_RETURN(r);\r
434 onig_strcpy(r, dest, dest_end);\r
435 onig_strcpy(r + (dest_end - dest), src, src_end);\r
436 return r;\r
437}\r
438\r
439\r
440#ifdef USE_ST_LIBRARY\r
441\r
442typedef struct {\r
443 UChar* s;\r
444 UChar* end;\r
445} st_str_end_key;\r
446\r
447static int\r
448str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r
449{\r
450 UChar *p, *q;\r
451 int c;\r
452\r
453 if ((x->end - x->s) != (y->end - y->s))\r
454 return 1;\r
455\r
456 p = x->s;\r
457 q = y->s;\r
458 while (p < x->end) {\r
459 c = (int )*p - (int )*q;\r
460 if (c != 0) return c;\r
461\r
462 p++; q++;\r
463 }\r
464\r
465 return 0;\r
466}\r
467\r
468static int\r
469str_end_hash(st_str_end_key* x)\r
470{\r
471 UChar *p;\r
472 int val = 0;\r
473\r
474 p = x->s;\r
475 while (p < x->end) {\r
476 val = val * 997 + (int )*p++;\r
477 }\r
478\r
479 return val + (val >> 5);\r
480}\r
481\r
482extern hash_table_type*\r
483onig_st_init_strend_table_with_size(int size)\r
484{\r
485 static struct st_hash_type hashType = {\r
486 str_end_cmp,\r
487 str_end_hash,\r
488 };\r
489\r
490 return (hash_table_type* )\r
491 onig_st_init_table_with_size(&hashType, size);\r
492}\r
493\r
494extern int\r
495onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r
b602265d 496 const UChar* end_key, hash_data_type *value)\r
14b0e578
CS
497{\r
498 st_str_end_key key;\r
499\r
500 key.s = (UChar* )str_key;\r
501 key.end = (UChar* )end_key;\r
502\r
b602265d 503 return onig_st_lookup(table, (st_data_t )(&key), value);\r
14b0e578
CS
504}\r
505\r
506extern int\r
507onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r
b602265d 508 const UChar* end_key, hash_data_type value)\r
14b0e578
CS
509{\r
510 st_str_end_key* key;\r
511 int result;\r
512\r
513 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r
b0c2b797 514 CHECK_NULL_RETURN_MEMERR(key);\r
b602265d 515\r
14b0e578
CS
516 key->s = (UChar* )str_key;\r
517 key->end = (UChar* )end_key;\r
b602265d 518 result = onig_st_insert(table, (st_data_t )key, value);\r
14b0e578
CS
519 if (result) {\r
520 xfree(key);\r
521 }\r
522 return result;\r
523}\r
524\r
14b0e578 525\r
b602265d
DG
526typedef struct {\r
527 OnigEncoding enc;\r
528 int type; /* callout type: single or not */\r
529 UChar* s;\r
530 UChar* end;\r
531} st_callout_name_key;\r
532\r
533static int\r
534callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)\r
535{\r
536 UChar *p, *q;\r
537 int c;\r
538\r
539 if (x->enc != y->enc) return 1;\r
540 if (x->type != y->type) return 1;\r
541 if ((x->end - x->s) != (y->end - y->s))\r
542 return 1;\r
543\r
544 p = x->s;\r
545 q = y->s;\r
546 while (p < x->end) {\r
547 c = (int )*p - (int )*q;\r
548 if (c != 0) return c;\r
549\r
550 p++; q++;\r
551 }\r
552\r
553 return 0;\r
554}\r
555\r
556static int\r
557callout_name_table_hash(st_callout_name_key* x)\r
558{\r
559 UChar *p;\r
560 int val = 0;\r
561\r
562 p = x->s;\r
563 while (p < x->end) {\r
564 val = val * 997 + (int )*p++;\r
565 }\r
566\r
567 /* use intptr_t for escape warning in Windows */\r
568 return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;\r
569}\r
570\r
571extern hash_table_type*\r
572onig_st_init_callout_name_table_with_size(int size)\r
573{\r
574 static struct st_hash_type hashType = {\r
575 callout_name_table_cmp,\r
576 callout_name_table_hash,\r
577 };\r
578\r
579 return (hash_table_type* )\r
580 onig_st_init_table_with_size(&hashType, size);\r
581}\r
582\r
583extern int\r
584onig_st_lookup_callout_name_table(hash_table_type* table,\r
585 OnigEncoding enc,\r
586 int type,\r
587 const UChar* str_key,\r
588 const UChar* end_key,\r
589 hash_data_type *value)\r
590{\r
591 st_callout_name_key key;\r
592\r
593 key.enc = enc;\r
594 key.type = type;\r
595 key.s = (UChar* )str_key;\r
596 key.end = (UChar* )end_key;\r
597\r
598 return onig_st_lookup(table, (st_data_t )(&key), value);\r
599}\r
600\r
601static int\r
602st_insert_callout_name_table(hash_table_type* table,\r
603 OnigEncoding enc, int type,\r
604 UChar* str_key, UChar* end_key,\r
605 hash_data_type value)\r
606{\r
607 st_callout_name_key* key;\r
608 int result;\r
609\r
610 key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));\r
611 CHECK_NULL_RETURN_MEMERR(key);\r
612\r
613 /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */\r
614 key->enc = enc;\r
615 key->type = type;\r
616 key->s = str_key;\r
617 key->end = end_key;\r
618 result = onig_st_insert(table, (st_data_t )key, value);\r
619 if (result) {\r
620 xfree(key);\r
621 }\r
622 return result;\r
623}\r
624\r
625#endif /* USE_ST_LIBRARY */\r
14b0e578 626\r
14b0e578
CS
627\r
628#define INIT_NAME_BACKREFS_ALLOC_NUM 8\r
629\r
630typedef struct {\r
631 UChar* name;\r
632 int name_len; /* byte length */\r
633 int back_num; /* number of backrefs */\r
634 int back_alloc;\r
635 int back_ref1;\r
636 int* back_refs;\r
637} NameEntry;\r
638\r
639#ifdef USE_ST_LIBRARY\r
640\r
b602265d
DG
641#define INIT_NAMES_ALLOC_NUM 5\r
642\r
14b0e578
CS
643typedef st_table NameTable;\r
644typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r
645\r
646#define NAMEBUF_SIZE 24\r
647#define NAMEBUF_SIZE_1 25\r
648\r
649#ifdef ONIG_DEBUG\r
650static int\r
651i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r
652{\r
653 int i;\r
654 FILE* fp = (FILE* )arg;\r
655\r
656 fprintf(fp, "%s: ", e->name);\r
657 if (e->back_num == 0)\r
658 fputs("-", fp);\r
659 else if (e->back_num == 1)\r
660 fprintf(fp, "%d", e->back_ref1);\r
661 else {\r
662 for (i = 0; i < e->back_num; i++) {\r
663 if (i > 0) fprintf(fp, ", ");\r
664 fprintf(fp, "%d", e->back_refs[i]);\r
665 }\r
666 }\r
667 fputs("\n", fp);\r
668 return ST_CONTINUE;\r
669}\r
670\r
671extern int\r
672onig_print_names(FILE* fp, regex_t* reg)\r
673{\r
674 NameTable* t = (NameTable* )reg->name_table;\r
675\r
676 if (IS_NOT_NULL(t)) {\r
677 fprintf(fp, "name table\n");\r
678 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r
679 fputs("\n", fp);\r
680 }\r
681 return 0;\r
682}\r
683#endif /* ONIG_DEBUG */\r
684\r
685static int\r
686i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r
687{\r
688 xfree(e->name);\r
689 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
690 xfree(key);\r
691 xfree(e);\r
692 return ST_DELETE;\r
693}\r
694\r
695static int\r
696names_clear(regex_t* reg)\r
697{\r
698 NameTable* t = (NameTable* )reg->name_table;\r
699\r
700 if (IS_NOT_NULL(t)) {\r
701 onig_st_foreach(t, i_free_name_entry, 0);\r
702 }\r
703 return 0;\r
704}\r
705\r
706extern int\r
707onig_names_free(regex_t* reg)\r
708{\r
709 int r;\r
710 NameTable* t;\r
711\r
712 r = names_clear(reg);\r
b602265d 713 if (r != 0) return r;\r
14b0e578
CS
714\r
715 t = (NameTable* )reg->name_table;\r
716 if (IS_NOT_NULL(t)) onig_st_free_table(t);\r
717 reg->name_table = (void* )NULL;\r
718 return 0;\r
719}\r
720\r
721static NameEntry*\r
722name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r
723{\r
724 NameEntry* e;\r
725 NameTable* t = (NameTable* )reg->name_table;\r
726\r
727 e = (NameEntry* )NULL;\r
728 if (IS_NOT_NULL(t)) {\r
729 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
730 }\r
731 return e;\r
732}\r
733\r
734typedef struct {\r
735 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r
736 regex_t* reg;\r
737 void* arg;\r
738 int ret;\r
739 OnigEncoding enc;\r
740} INamesArg;\r
741\r
742static int\r
743i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r
744{\r
745 int r = (*(arg->func))(e->name,\r
746 e->name + e->name_len,\r
747 e->back_num,\r
b602265d
DG
748 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
749 arg->reg, arg->arg);\r
14b0e578
CS
750 if (r != 0) {\r
751 arg->ret = r;\r
752 return ST_STOP;\r
753 }\r
754 return ST_CONTINUE;\r
755}\r
756\r
757extern int\r
758onig_foreach_name(regex_t* reg,\r
759 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
760{\r
761 INamesArg narg;\r
762 NameTable* t = (NameTable* )reg->name_table;\r
763\r
764 narg.ret = 0;\r
765 if (IS_NOT_NULL(t)) {\r
766 narg.func = func;\r
767 narg.reg = reg;\r
768 narg.arg = arg;\r
769 narg.enc = reg->enc; /* should be pattern encoding. */\r
b602265d 770 onig_st_foreach(t, i_names, (HashDataType )&narg);\r
14b0e578
CS
771 }\r
772 return narg.ret;\r
773}\r
774\r
775static int\r
776i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r
777{\r
778 int i;\r
779\r
780 if (e->back_num > 1) {\r
781 for (i = 0; i < e->back_num; i++) {\r
782 e->back_refs[i] = map[e->back_refs[i]].new_val;\r
783 }\r
784 }\r
785 else if (e->back_num == 1) {\r
786 e->back_ref1 = map[e->back_ref1].new_val;\r
787 }\r
788\r
789 return ST_CONTINUE;\r
790}\r
791\r
792extern int\r
793onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r
794{\r
795 NameTable* t = (NameTable* )reg->name_table;\r
796\r
797 if (IS_NOT_NULL(t)) {\r
b602265d 798 onig_st_foreach(t, i_renumber_name, (HashDataType )map);\r
14b0e578
CS
799 }\r
800 return 0;\r
801}\r
802\r
803\r
804extern int\r
805onig_number_of_names(regex_t* reg)\r
806{\r
807 NameTable* t = (NameTable* )reg->name_table;\r
808\r
809 if (IS_NOT_NULL(t))\r
810 return t->num_entries;\r
811 else\r
812 return 0;\r
813}\r
814\r
815#else /* USE_ST_LIBRARY */\r
816\r
817#define INIT_NAMES_ALLOC_NUM 8\r
818\r
819typedef struct {\r
820 NameEntry* e;\r
821 int num;\r
822 int alloc;\r
823} NameTable;\r
824\r
825#ifdef ONIG_DEBUG\r
826extern int\r
827onig_print_names(FILE* fp, regex_t* reg)\r
828{\r
829 int i, j;\r
830 NameEntry* e;\r
831 NameTable* t = (NameTable* )reg->name_table;\r
832\r
833 if (IS_NOT_NULL(t) && t->num > 0) {\r
834 fprintf(fp, "name table\n");\r
835 for (i = 0; i < t->num; i++) {\r
836 e = &(t->e[i]);\r
837 fprintf(fp, "%s: ", e->name);\r
838 if (e->back_num == 0) {\r
b602265d 839 fputs("-", fp);\r
14b0e578
CS
840 }\r
841 else if (e->back_num == 1) {\r
b602265d 842 fprintf(fp, "%d", e->back_ref1);\r
14b0e578
CS
843 }\r
844 else {\r
b602265d
DG
845 for (j = 0; j < e->back_num; j++) {\r
846 if (j > 0) fprintf(fp, ", ");\r
847 fprintf(fp, "%d", e->back_refs[j]);\r
848 }\r
14b0e578
CS
849 }\r
850 fputs("\n", fp);\r
851 }\r
852 fputs("\n", fp);\r
853 }\r
854 return 0;\r
855}\r
856#endif\r
857\r
858static int\r
859names_clear(regex_t* reg)\r
860{\r
861 int i;\r
862 NameEntry* e;\r
863 NameTable* t = (NameTable* )reg->name_table;\r
864\r
865 if (IS_NOT_NULL(t)) {\r
866 for (i = 0; i < t->num; i++) {\r
867 e = &(t->e[i]);\r
868 if (IS_NOT_NULL(e->name)) {\r
b602265d
DG
869 xfree(e->name);\r
870 e->name = NULL;\r
871 e->name_len = 0;\r
872 e->back_num = 0;\r
873 e->back_alloc = 0;\r
874 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
875 e->back_refs = (int* )NULL;\r
14b0e578
CS
876 }\r
877 }\r
878 if (IS_NOT_NULL(t->e)) {\r
879 xfree(t->e);\r
880 t->e = NULL;\r
881 }\r
882 t->num = 0;\r
883 }\r
884 return 0;\r
885}\r
886\r
887extern int\r
888onig_names_free(regex_t* reg)\r
889{\r
890 int r;\r
891 NameTable* t;\r
892\r
893 r = names_clear(reg);\r
b602265d 894 if (r != 0) return r;\r
14b0e578
CS
895\r
896 t = (NameTable* )reg->name_table;\r
897 if (IS_NOT_NULL(t)) xfree(t);\r
898 reg->name_table = NULL;\r
899 return 0;\r
900}\r
901\r
902static NameEntry*\r
903name_find(regex_t* reg, UChar* name, UChar* name_end)\r
904{\r
905 int i, len;\r
906 NameEntry* e;\r
907 NameTable* t = (NameTable* )reg->name_table;\r
908\r
909 if (IS_NOT_NULL(t)) {\r
910 len = name_end - name;\r
911 for (i = 0; i < t->num; i++) {\r
912 e = &(t->e[i]);\r
913 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
b602265d 914 return e;\r
14b0e578
CS
915 }\r
916 }\r
917 return (NameEntry* )NULL;\r
918}\r
919\r
920extern int\r
921onig_foreach_name(regex_t* reg,\r
922 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
923{\r
924 int i, r;\r
925 NameEntry* e;\r
926 NameTable* t = (NameTable* )reg->name_table;\r
927\r
928 if (IS_NOT_NULL(t)) {\r
929 for (i = 0; i < t->num; i++) {\r
930 e = &(t->e[i]);\r
931 r = (*func)(e->name, e->name + e->name_len, e->back_num,\r
b602265d
DG
932 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
933 reg, arg);\r
14b0e578
CS
934 if (r != 0) return r;\r
935 }\r
936 }\r
937 return 0;\r
938}\r
939\r
940extern int\r
941onig_number_of_names(regex_t* reg)\r
942{\r
943 NameTable* t = (NameTable* )reg->name_table;\r
944\r
945 if (IS_NOT_NULL(t))\r
946 return t->num;\r
947 else\r
948 return 0;\r
949}\r
950\r
951#endif /* else USE_ST_LIBRARY */\r
952\r
953static int\r
954name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r
955{\r
b602265d 956 int r;\r
14b0e578
CS
957 int alloc;\r
958 NameEntry* e;\r
959 NameTable* t = (NameTable* )reg->name_table;\r
960\r
961 if (name_end - name <= 0)\r
962 return ONIGERR_EMPTY_GROUP_NAME;\r
963\r
964 e = name_find(reg, name, name_end);\r
965 if (IS_NULL(e)) {\r
966#ifdef USE_ST_LIBRARY\r
967 if (IS_NULL(t)) {\r
b602265d 968 t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);\r
14b0e578
CS
969 reg->name_table = (void* )t;\r
970 }\r
971 e = (NameEntry* )xmalloc(sizeof(NameEntry));\r
972 CHECK_NULL_RETURN_MEMERR(e);\r
973\r
b602265d 974 e->name = onigenc_strdup(reg->enc, name, name_end);\r
14b0e578
CS
975 if (IS_NULL(e->name)) {\r
976 xfree(e); return ONIGERR_MEMORY;\r
977 }\r
b602265d
DG
978 r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r
979 (HashDataType )e);\r
980 if (r < 0) return r;\r
14b0e578 981\r
b602265d 982 e->name_len = (int )(name_end - name);\r
14b0e578
CS
983 e->back_num = 0;\r
984 e->back_alloc = 0;\r
985 e->back_refs = (int* )NULL;\r
986\r
987#else\r
988\r
989 if (IS_NULL(t)) {\r
990 alloc = INIT_NAMES_ALLOC_NUM;\r
991 t = (NameTable* )xmalloc(sizeof(NameTable));\r
992 CHECK_NULL_RETURN_MEMERR(t);\r
993 t->e = NULL;\r
994 t->alloc = 0;\r
995 t->num = 0;\r
996\r
997 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r
998 if (IS_NULL(t->e)) {\r
b602265d
DG
999 xfree(t);\r
1000 return ONIGERR_MEMORY;\r
14b0e578
CS
1001 }\r
1002 t->alloc = alloc;\r
1003 reg->name_table = t;\r
1004 goto clear;\r
1005 }\r
1006 else if (t->num == t->alloc) {\r
1007 int i;\r
1008\r
1009 alloc = t->alloc * 2;\r
b602265d 1010 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);\r
14b0e578
CS
1011 CHECK_NULL_RETURN_MEMERR(t->e);\r
1012 t->alloc = alloc;\r
1013\r
1014 clear:\r
1015 for (i = t->num; i < t->alloc; i++) {\r
b602265d
DG
1016 t->e[i].name = NULL;\r
1017 t->e[i].name_len = 0;\r
1018 t->e[i].back_num = 0;\r
1019 t->e[i].back_alloc = 0;\r
1020 t->e[i].back_refs = (int* )NULL;\r
14b0e578
CS
1021 }\r
1022 }\r
1023 e = &(t->e[t->num]);\r
1024 t->num++;\r
b602265d 1025 e->name = onigenc_strdup(reg->enc, name, name_end);\r
14b0e578
CS
1026 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
1027 e->name_len = name_end - name;\r
1028#endif\r
1029 }\r
1030\r
1031 if (e->back_num >= 1 &&\r
1032 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r
1033 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r
b602265d 1034 name, name_end);\r
14b0e578
CS
1035 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
1036 }\r
1037\r
1038 e->back_num++;\r
1039 if (e->back_num == 1) {\r
1040 e->back_ref1 = backref;\r
1041 }\r
1042 else {\r
1043 if (e->back_num == 2) {\r
1044 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r
1045 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r
1046 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
1047 e->back_alloc = alloc;\r
1048 e->back_refs[0] = e->back_ref1;\r
1049 e->back_refs[1] = backref;\r
1050 }\r
1051 else {\r
1052 if (e->back_num > e->back_alloc) {\r
b602265d
DG
1053 alloc = e->back_alloc * 2;\r
1054 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r
1055 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
1056 e->back_alloc = alloc;\r
14b0e578
CS
1057 }\r
1058 e->back_refs[e->back_num - 1] = backref;\r
1059 }\r
1060 }\r
1061\r
1062 return 0;\r
1063}\r
1064\r
1065extern int\r
1066onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
b602265d 1067 const UChar* name_end, int** nums)\r
14b0e578
CS
1068{\r
1069 NameEntry* e = name_find(reg, name, name_end);\r
1070\r
1071 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
1072\r
1073 switch (e->back_num) {\r
1074 case 0:\r
1075 break;\r
1076 case 1:\r
1077 *nums = &(e->back_ref1);\r
1078 break;\r
1079 default:\r
1080 *nums = e->back_refs;\r
1081 break;\r
1082 }\r
1083 return e->back_num;\r
1084}\r
1085\r
1086extern int\r
1087onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
b602265d 1088 const UChar* name_end, OnigRegion *region)\r
14b0e578
CS
1089{\r
1090 int i, n, *nums;\r
1091\r
1092 n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r
1093 if (n < 0)\r
1094 return n;\r
1095 else if (n == 0)\r
1096 return ONIGERR_PARSER_BUG;\r
1097 else if (n == 1)\r
1098 return nums[0];\r
1099 else {\r
1100 if (IS_NOT_NULL(region)) {\r
1101 for (i = n - 1; i >= 0; i--) {\r
b602265d
DG
1102 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r
1103 return nums[i];\r
14b0e578
CS
1104 }\r
1105 }\r
1106 return nums[n - 1];\r
1107 }\r
1108}\r
1109\r
14b0e578
CS
1110extern int\r
1111onig_noname_group_capture_is_active(regex_t* reg)\r
1112{\r
1113 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
1114 return 0;\r
1115\r
14b0e578
CS
1116 if (onig_number_of_names(reg) > 0 &&\r
1117 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
1118 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
1119 return 0;\r
1120 }\r
14b0e578
CS
1121\r
1122 return 1;\r
1123}\r
1124\r
b602265d 1125#ifdef USE_CALLOUT\r
14b0e578 1126\r
b602265d
DG
1127typedef struct {\r
1128 OnigCalloutType type;\r
1129 int in;\r
1130 OnigCalloutFunc start_func;\r
1131 OnigCalloutFunc end_func;\r
1132 int arg_num;\r
1133 int opt_arg_num;\r
1134 unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
1135 OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];\r
1136 UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */\r
1137} CalloutNameListEntry;\r
14b0e578 1138\r
b602265d
DG
1139typedef struct {\r
1140 int n;\r
1141 int alloc;\r
1142 CalloutNameListEntry* v;\r
1143} CalloutNameListType;\r
14b0e578 1144\r
b602265d 1145static CalloutNameListType* GlobalCalloutNameList;\r
14b0e578
CS
1146\r
1147static int\r
b602265d 1148make_callout_func_list(CalloutNameListType** rs, int init_size)\r
14b0e578 1149{\r
b602265d
DG
1150 CalloutNameListType* s;\r
1151 CalloutNameListEntry* v;\r
14b0e578 1152\r
b602265d 1153 *rs = 0;\r
14b0e578 1154\r
b602265d
DG
1155 s = xmalloc(sizeof(*s));\r
1156 if (IS_NULL(s)) return ONIGERR_MEMORY;\r
14b0e578 1157\r
b602265d
DG
1158 v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);\r
1159 if (IS_NULL(v)) {\r
1160 xfree(s);\r
1161 return ONIGERR_MEMORY;\r
14b0e578
CS
1162 }\r
1163\r
b602265d
DG
1164 s->n = 0;\r
1165 s->alloc = init_size;\r
1166 s->v = v;\r
14b0e578 1167\r
b602265d
DG
1168 *rs = s;\r
1169 return ONIG_NORMAL;\r
14b0e578
CS
1170}\r
1171\r
b602265d
DG
1172static void\r
1173free_callout_func_list(CalloutNameListType* s)\r
1174{\r
1175 if (IS_NOT_NULL(s)) {\r
1176 if (IS_NOT_NULL(s->v)) {\r
1177 int i, j;\r
1178\r
1179 for (i = 0; i < s->n; i++) {\r
1180 CalloutNameListEntry* e = s->v + i;\r
1181 for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {\r
1182 if (e->arg_types[j] == ONIG_TYPE_STRING) {\r
1183 UChar* p = e->opt_defaults[j].s.start;\r
1184 if (IS_NOT_NULL(p)) xfree(p);\r
1185 }\r
1186 }\r
1187 }\r
1188 xfree(s->v);\r
1189 }\r
1190 xfree(s);\r
1191 }\r
1192}\r
14b0e578 1193\r
b602265d
DG
1194static int\r
1195callout_func_list_add(CalloutNameListType* s, int* rid)\r
1196{\r
1197 if (s->n >= s->alloc) {\r
1198 int new_size = s->alloc * 2;\r
1199 CalloutNameListEntry* nv = (CalloutNameListEntry* )\r
1200 xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);\r
1201 if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
14b0e578 1202\r
b602265d
DG
1203 s->alloc = new_size;\r
1204 s->v = nv;\r
1205 }\r
14b0e578 1206\r
b602265d 1207 *rid = s->n;\r
14b0e578 1208\r
b602265d
DG
1209 xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));\r
1210 s->n++;\r
1211 return ONIG_NORMAL;\r
1212}\r
14b0e578 1213\r
14b0e578 1214\r
b602265d
DG
1215typedef struct {\r
1216 UChar* name;\r
1217 int name_len; /* byte length */\r
1218 int id;\r
1219} CalloutNameEntry;\r
14b0e578 1220\r
b602265d
DG
1221#ifdef USE_ST_LIBRARY\r
1222typedef st_table CalloutNameTable;\r
14b0e578 1223#else\r
b602265d
DG
1224typedef struct {\r
1225 CalloutNameEntry* e;\r
1226 int num;\r
1227 int alloc;\r
1228} CalloutNameTable;\r
14b0e578 1229#endif\r
14b0e578 1230\r
b602265d
DG
1231static CalloutNameTable* GlobalCalloutNameTable;\r
1232static int CalloutNameIDCounter;\r
14b0e578 1233\r
b602265d 1234#ifdef USE_ST_LIBRARY\r
14b0e578 1235\r
b602265d
DG
1236static int\r
1237i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,\r
1238 void* arg ARG_UNUSED)\r
1239{\r
1240 xfree(e->name);\r
1241 /*xfree(key->s); */ /* is same as e->name */\r
1242 xfree(key);\r
1243 xfree(e);\r
1244 return ST_DELETE;\r
1245}\r
14b0e578 1246\r
b602265d
DG
1247static int\r
1248callout_name_table_clear(CalloutNameTable* t)\r
1249{\r
1250 if (IS_NOT_NULL(t)) {\r
1251 onig_st_foreach(t, i_free_callout_name_entry, 0);\r
1252 }\r
1253 return 0;\r
1254}\r
14b0e578 1255\r
b602265d
DG
1256static int\r
1257global_callout_name_table_free(void)\r
1258{\r
1259 if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
1260 int r = callout_name_table_clear(GlobalCalloutNameTable);\r
1261 if (r != 0) return r;\r
14b0e578 1262\r
b602265d
DG
1263 onig_st_free_table(GlobalCalloutNameTable);\r
1264 GlobalCalloutNameTable = 0;\r
1265 CalloutNameIDCounter = 0;\r
14b0e578
CS
1266 }\r
1267\r
b602265d
DG
1268 return 0;\r
1269}\r
1270\r
1271static CalloutNameEntry*\r
1272callout_name_find(OnigEncoding enc, int is_not_single,\r
1273 const UChar* name, const UChar* name_end)\r
1274{\r
1275 int r;\r
1276 CalloutNameEntry* e;\r
1277 CalloutNameTable* t = GlobalCalloutNameTable;\r
14b0e578 1278\r
b602265d
DG
1279 e = (CalloutNameEntry* )NULL;\r
1280 if (IS_NOT_NULL(t)) {\r
1281 r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
1282 (HashDataType* )((void* )(&e)));\r
1283 if (r == 0) { /* not found */\r
1284 if (enc != ONIG_ENCODING_ASCII &&\r
1285 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r
1286 enc = ONIG_ENCODING_ASCII;\r
1287 onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
1288 (HashDataType* )((void* )(&e)));\r
1289 }\r
1290 }\r
14b0e578 1291 }\r
b602265d
DG
1292 return e;\r
1293}\r
1294\r
14b0e578 1295#else\r
b602265d
DG
1296\r
1297static int\r
1298callout_name_table_clear(CalloutNameTable* t)\r
1299{\r
1300 int i;\r
1301 CalloutNameEntry* e;\r
1302\r
1303 if (IS_NOT_NULL(t)) {\r
1304 for (i = 0; i < t->num; i++) {\r
1305 e = &(t->e[i]);\r
1306 if (IS_NOT_NULL(e->name)) {\r
1307 xfree(e->name);\r
1308 e->name = NULL;\r
1309 e->name_len = 0;\r
1310 e->id = 0;\r
1311 e->func = 0;\r
1312 }\r
1313 }\r
1314 if (IS_NOT_NULL(t->e)) {\r
1315 xfree(t->e);\r
1316 t->e = NULL;\r
1317 }\r
1318 t->num = 0;\r
1319 }\r
1320 return 0;\r
14b0e578
CS
1321}\r
1322\r
b602265d
DG
1323static int\r
1324global_callout_name_table_free(void)\r
14b0e578 1325{\r
b602265d
DG
1326 if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
1327 int r = callout_name_table_clear(GlobalCalloutNameTable);\r
1328 if (r != 0) return r;\r
14b0e578 1329\r
b602265d
DG
1330 xfree(GlobalCalloutNameTable);\r
1331 GlobalCalloutNameTable = 0;\r
1332 CalloutNameIDCounter = 0;\r
14b0e578 1333 }\r
14b0e578
CS
1334 return 0;\r
1335}\r
14b0e578 1336\r
b602265d
DG
1337static CalloutNameEntry*\r
1338callout_name_find(UChar* name, UChar* name_end)\r
14b0e578 1339{\r
b602265d
DG
1340 int i, len;\r
1341 CalloutNameEntry* e;\r
1342 CalloutNameTable* t = Calloutnames;\r
14b0e578 1343\r
b602265d
DG
1344 if (IS_NOT_NULL(t)) {\r
1345 len = name_end - name;\r
1346 for (i = 0; i < t->num; i++) {\r
1347 e = &(t->e[i]);\r
1348 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
1349 return e;\r
1350 }\r
14b0e578 1351 }\r
b602265d
DG
1352 return (CalloutNameEntry* )NULL;\r
1353}\r
1354\r
14b0e578
CS
1355#endif\r
1356\r
b602265d
DG
1357/* name string must be single byte char string. */\r
1358static int\r
1359callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,\r
1360 int is_not_single, UChar* name, UChar* name_end)\r
1361{\r
1362 int r;\r
1363 CalloutNameEntry* e;\r
1364 CalloutNameTable* t = GlobalCalloutNameTable;\r
14b0e578 1365\r
b602265d
DG
1366 *rentry = 0;\r
1367 if (name_end - name <= 0)\r
1368 return ONIGERR_INVALID_CALLOUT_NAME;\r
14b0e578 1369\r
b602265d
DG
1370 e = callout_name_find(enc, is_not_single, name, name_end);\r
1371 if (IS_NULL(e)) {\r
1372#ifdef USE_ST_LIBRARY\r
1373 if (IS_NULL(t)) {\r
1374 t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);\r
1375 GlobalCalloutNameTable = t;\r
1376 }\r
1377 e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));\r
1378 CHECK_NULL_RETURN_MEMERR(e);\r
1379\r
1380 e->name = onigenc_strdup(enc, name, name_end);\r
1381 if (IS_NULL(e->name)) {\r
1382 xfree(e); return ONIGERR_MEMORY;\r
1383 }\r
1384\r
1385 r = st_insert_callout_name_table(t, enc, is_not_single,\r
1386 e->name, (e->name + (name_end - name)),\r
1387 (HashDataType )e);\r
1388 if (r < 0) return r;\r
1389\r
1390#else\r
1391\r
1392 int alloc;\r
1393\r
1394 if (IS_NULL(t)) {\r
1395 alloc = INIT_NAMES_ALLOC_NUM;\r
1396 t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));\r
1397 CHECK_NULL_RETURN_MEMERR(t);\r
1398 t->e = NULL;\r
1399 t->alloc = 0;\r
1400 t->num = 0;\r
1401\r
1402 t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);\r
1403 if (IS_NULL(t->e)) {\r
1404 xfree(t);\r
1405 return ONIGERR_MEMORY;\r
1406 }\r
1407 t->alloc = alloc;\r
1408 GlobalCalloutNameTable = t;\r
1409 goto clear;\r
1410 }\r
1411 else if (t->num == t->alloc) {\r
1412 int i;\r
1413\r
1414 alloc = t->alloc * 2;\r
1415 t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);\r
1416 CHECK_NULL_RETURN_MEMERR(t->e);\r
1417 t->alloc = alloc;\r
1418\r
1419 clear:\r
1420 for (i = t->num; i < t->alloc; i++) {\r
1421 t->e[i].name = NULL;\r
1422 t->e[i].name_len = 0;\r
1423 t->e[i].id = 0;\r
1424 }\r
1425 }\r
1426 e = &(t->e[t->num]);\r
1427 t->num++;\r
1428 e->name = onigenc_strdup(enc, name, name_end);\r
1429 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
1430#endif\r
1431\r
1432 CalloutNameIDCounter++;\r
1433 e->id = CalloutNameIDCounter;\r
1434 e->name_len = (int )(name_end - name);\r
1435 }\r
1436\r
1437 *rentry = e;\r
1438 return e->id;\r
1439}\r
1440\r
1441static int\r
1442is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
14b0e578 1443{\r
b602265d
DG
1444 UChar* p;\r
1445 OnigCodePoint c;\r
1446\r
1447 if (name >= name_end) return 0;\r
1448\r
1449 p = name;\r
1450 while (p < name_end) {\r
1451 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
1452 if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))\r
1453 return 0;\r
1454\r
1455 if (p == name) {\r
1456 if (c >= '0' && c <= '9') return 0;\r
1457 }\r
1458\r
1459 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
1460 }\r
1461\r
1462 return 1;\r
14b0e578
CS
1463}\r
1464\r
b602265d
DG
1465static int\r
1466is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
14b0e578 1467{\r
b602265d
DG
1468 UChar* p;\r
1469 OnigCodePoint c;\r
14b0e578 1470\r
b602265d
DG
1471 if (name >= name_end) return 0;\r
1472\r
1473 p = name;\r
1474 while (p < name_end) {\r
1475 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
1476 if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))\r
1477 return 0;\r
1478\r
1479 if (p == name) {\r
1480 if (c >= '0' && c <= '9') return 0;\r
1481 }\r
1482\r
1483 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
1484 }\r
1485\r
1486 return 1;\r
14b0e578
CS
1487}\r
1488\r
b602265d
DG
1489extern int\r
1490onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,\r
1491 UChar* name, UChar* name_end, int in,\r
1492 OnigCalloutFunc start_func,\r
1493 OnigCalloutFunc end_func,\r
1494 int arg_num, unsigned int arg_types[],\r
1495 int opt_arg_num, OnigValue opt_defaults[])\r
14b0e578 1496{\r
b602265d
DG
1497 int r;\r
1498 int i;\r
1499 int j;\r
1500 int id;\r
1501 int is_not_single;\r
1502 CalloutNameEntry* e;\r
1503 CalloutNameListEntry* fe;\r
14b0e578 1504\r
b602265d
DG
1505 if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)\r
1506 return ONIGERR_INVALID_ARGUMENT;\r
14b0e578 1507\r
b602265d
DG
1508 if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)\r
1509 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 1510\r
b602265d
DG
1511 if (opt_arg_num < 0 || opt_arg_num > arg_num)\r
1512 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 1513\r
b602265d
DG
1514 if (start_func == 0 && end_func == 0)\r
1515 return ONIGERR_INVALID_CALLOUT_ARG;\r
1516\r
1517 if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)\r
1518 return ONIGERR_INVALID_CALLOUT_ARG;\r
1519\r
1520 for (i = 0; i < arg_num; i++) {\r
1521 unsigned int t = arg_types[i];\r
1522 if (t == ONIG_TYPE_VOID)\r
1523 return ONIGERR_INVALID_CALLOUT_ARG;\r
1524 else {\r
1525 if (i >= arg_num - opt_arg_num) {\r
1526 if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&\r
1527 t != ONIG_TYPE_TAG)\r
1528 return ONIGERR_INVALID_CALLOUT_ARG;\r
1529 }\r
1530 else {\r
1531 if (t != ONIG_TYPE_LONG) {\r
1532 t = t & ~ONIG_TYPE_LONG;\r
1533 if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)\r
1534 return ONIGERR_INVALID_CALLOUT_ARG;\r
1535 }\r
14b0e578
CS
1536 }\r
1537 }\r
1538 }\r
1539\r
b602265d
DG
1540 if (! is_allowed_callout_name(enc, name, name_end)) {\r
1541 return ONIGERR_INVALID_CALLOUT_NAME;\r
14b0e578 1542 }\r
14b0e578 1543\r
b602265d
DG
1544 is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);\r
1545 id = callout_name_entry(&e, enc, is_not_single, name, name_end);\r
1546 if (id < 0) return id;\r
14b0e578 1547\r
b602265d
DG
1548 r = ONIG_NORMAL;\r
1549 if (IS_NULL(GlobalCalloutNameList)) {\r
1550 r = make_callout_func_list(&GlobalCalloutNameList, 10);\r
1551 if (r != ONIG_NORMAL) return r;\r
1552 }\r
14b0e578 1553\r
b602265d
DG
1554 while (id >= GlobalCalloutNameList->n) {\r
1555 int rid;\r
1556 r = callout_func_list_add(GlobalCalloutNameList, &rid);\r
1557 if (r != ONIG_NORMAL) return r;\r
14b0e578
CS
1558 }\r
1559\r
b602265d
DG
1560 fe = GlobalCalloutNameList->v + id;\r
1561 fe->type = callout_type;\r
1562 fe->in = in;\r
1563 fe->start_func = start_func;\r
1564 fe->end_func = end_func;\r
1565 fe->arg_num = arg_num;\r
1566 fe->opt_arg_num = opt_arg_num;\r
1567 fe->name = e->name;\r
14b0e578 1568\r
b602265d
DG
1569 for (i = 0; i < arg_num; i++) {\r
1570 fe->arg_types[i] = arg_types[i];\r
1571 }\r
1572 for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {\r
1573 if (fe->arg_types[i] == ONIG_TYPE_STRING) {\r
1574 OnigValue* val = opt_defaults + j;\r
1575 UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);\r
1576 CHECK_NULL_RETURN_MEMERR(ds);\r
14b0e578 1577\r
b602265d
DG
1578 fe->opt_defaults[i].s.start = ds;\r
1579 fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);\r
1580 }\r
1581 else {\r
1582 fe->opt_defaults[i] = opt_defaults[j];\r
1583 }\r
1584 }\r
1585\r
1586 r = id;\r
1587 return r;\r
14b0e578
CS
1588}\r
1589\r
b602265d
DG
1590static int\r
1591get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,\r
1592 UChar* name, UChar* name_end, int* rid)\r
14b0e578 1593{\r
b602265d
DG
1594 int r;\r
1595 CalloutNameEntry* e;\r
14b0e578 1596\r
b602265d
DG
1597 if (! is_allowed_callout_name(enc, name, name_end)) {\r
1598 return ONIGERR_INVALID_CALLOUT_NAME;\r
1599 }\r
1600\r
1601 e = callout_name_find(enc, is_not_single, name, name_end);\r
1602 if (IS_NULL(e)) {\r
1603 return ONIGERR_UNDEFINED_CALLOUT_NAME;\r
1604 }\r
1605\r
1606 r = ONIG_NORMAL;\r
1607 *rid = e->id;\r
1608\r
1609 return r;\r
14b0e578
CS
1610}\r
1611\r
b602265d
DG
1612extern OnigCalloutFunc\r
1613onig_get_callout_start_func(regex_t* reg, int callout_num)\r
14b0e578 1614{\r
b602265d
DG
1615 /* If used for callouts of contents, return 0. */\r
1616 CalloutListEntry* e;\r
14b0e578 1617\r
b602265d
DG
1618 e = onig_reg_callout_list_at(reg, callout_num);\r
1619 return e->start_func;\r
14b0e578
CS
1620}\r
1621\r
b602265d
DG
1622extern const UChar*\r
1623onig_get_callout_tag_start(regex_t* reg, int callout_num)\r
14b0e578 1624{\r
b602265d
DG
1625 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
1626 return e->tag_start;\r
14b0e578
CS
1627}\r
1628\r
b602265d
DG
1629extern const UChar*\r
1630onig_get_callout_tag_end(regex_t* reg, int callout_num)\r
14b0e578 1631{\r
b602265d
DG
1632 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
1633 return e->tag_end;\r
1634}\r
14b0e578 1635\r
14b0e578 1636\r
b602265d
DG
1637extern OnigCalloutType\r
1638onig_get_callout_type_by_name_id(int name_id)\r
1639{\r
1640 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1641 return 0;\r
14b0e578 1642\r
b602265d 1643 return GlobalCalloutNameList->v[name_id].type;\r
14b0e578
CS
1644}\r
1645\r
b602265d
DG
1646extern OnigCalloutFunc\r
1647onig_get_callout_start_func_by_name_id(int name_id)\r
14b0e578 1648{\r
b602265d
DG
1649 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1650 return 0;\r
14b0e578 1651\r
b602265d 1652 return GlobalCalloutNameList->v[name_id].start_func;\r
14b0e578
CS
1653}\r
1654\r
b602265d
DG
1655extern OnigCalloutFunc\r
1656onig_get_callout_end_func_by_name_id(int name_id)\r
14b0e578 1657{\r
b602265d
DG
1658 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1659 return 0;\r
14b0e578 1660\r
b602265d 1661 return GlobalCalloutNameList->v[name_id].end_func;\r
14b0e578
CS
1662}\r
1663\r
b602265d
DG
1664extern int\r
1665onig_get_callout_in_by_name_id(int name_id)\r
14b0e578 1666{\r
b602265d
DG
1667 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1668 return 0;\r
14b0e578 1669\r
b602265d
DG
1670 return GlobalCalloutNameList->v[name_id].in;\r
1671}\r
14b0e578 1672\r
b602265d
DG
1673static int\r
1674get_callout_arg_num_by_name_id(int name_id)\r
1675{\r
1676 return GlobalCalloutNameList->v[name_id].arg_num;\r
1677}\r
14b0e578 1678\r
b602265d
DG
1679static int\r
1680get_callout_opt_arg_num_by_name_id(int name_id)\r
14b0e578 1681{\r
b602265d 1682 return GlobalCalloutNameList->v[name_id].opt_arg_num;\r
14b0e578 1683}\r
14b0e578 1684\r
b602265d
DG
1685static unsigned int\r
1686get_callout_arg_type_by_name_id(int name_id, int index)\r
14b0e578 1687{\r
b602265d 1688 return GlobalCalloutNameList->v[name_id].arg_types[index];\r
14b0e578
CS
1689}\r
1690\r
b602265d
DG
1691static OnigValue\r
1692get_callout_opt_default_by_name_id(int name_id, int index)\r
14b0e578 1693{\r
b602265d 1694 return GlobalCalloutNameList->v[name_id].opt_defaults[index];\r
14b0e578
CS
1695}\r
1696\r
b602265d
DG
1697extern UChar*\r
1698onig_get_callout_name_by_name_id(int name_id)\r
14b0e578 1699{\r
b602265d
DG
1700 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1701 return 0;\r
1702\r
1703 return GlobalCalloutNameList->v[name_id].name;\r
14b0e578
CS
1704}\r
1705\r
b602265d
DG
1706extern int\r
1707onig_global_callout_names_free(void)\r
14b0e578 1708{\r
b602265d
DG
1709 free_callout_func_list(GlobalCalloutNameList);\r
1710 GlobalCalloutNameList = 0;\r
14b0e578 1711\r
b602265d
DG
1712 global_callout_name_table_free();\r
1713 return ONIG_NORMAL;\r
14b0e578
CS
1714}\r
1715\r
14b0e578 1716\r
b602265d
DG
1717typedef st_table CalloutTagTable;\r
1718typedef intptr_t CalloutTagVal;\r
14b0e578 1719\r
b602265d 1720#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)\r
14b0e578 1721\r
b602265d
DG
1722static int\r
1723i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)\r
1724{\r
1725 int num;\r
1726 RegexExt* ext = (RegexExt* )arg;\r
14b0e578 1727\r
b602265d
DG
1728 num = (int )e - 1;\r
1729 ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;\r
1730 return ST_CONTINUE;\r
1731}\r
14b0e578 1732\r
b602265d
DG
1733static int\r
1734setup_ext_callout_list_values(regex_t* reg)\r
1735{\r
1736 int i, j;\r
1737 RegexExt* ext;\r
1738\r
1739 ext = REG_EXTP(reg);\r
1740 if (IS_NOT_NULL(ext->tag_table)) {\r
1741 onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,\r
1742 (st_data_t )ext);\r
1743 }\r
1744\r
1745 for (i = 0; i < ext->callout_num; i++) {\r
1746 CalloutListEntry* e = ext->callout_list + i;\r
1747 if (e->of == ONIG_CALLOUT_OF_NAME) {\r
1748 for (j = 0; j < e->u.arg.num; j++) {\r
1749 if (e->u.arg.types[j] == ONIG_TYPE_TAG) {\r
1750 UChar* start;\r
1751 UChar* end;\r
1752 int num;\r
1753 start = e->u.arg.vals[j].s.start;\r
1754 end = e->u.arg.vals[j].s.end;\r
1755 num = onig_get_callout_num_by_tag(reg, start, end);\r
1756 if (num < 0) return num;\r
1757 e->u.arg.vals[j].tag = num;\r
1758 }\r
14b0e578
CS
1759 }\r
1760 }\r
14b0e578
CS
1761 }\r
1762\r
b602265d 1763 return ONIG_NORMAL;\r
14b0e578
CS
1764}\r
1765\r
1766extern int\r
b602265d 1767onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)\r
14b0e578 1768{\r
b602265d 1769 RegexExt* ext = REG_EXTP(reg);\r
14b0e578 1770\r
b602265d
DG
1771 if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;\r
1772 if (callout_num > ext->callout_num) return 0;\r
14b0e578 1773\r
b602265d
DG
1774 return (ext->callout_list[callout_num].flag &\r
1775 CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;\r
14b0e578
CS
1776}\r
1777\r
b602265d
DG
1778static int\r
1779i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)\r
14b0e578 1780{\r
b602265d
DG
1781 xfree(key);\r
1782 return ST_DELETE;\r
14b0e578
CS
1783}\r
1784\r
b602265d
DG
1785static int\r
1786callout_tag_table_clear(CalloutTagTable* t)\r
14b0e578 1787{\r
b602265d
DG
1788 if (IS_NOT_NULL(t)) {\r
1789 onig_st_foreach(t, i_free_callout_tag_entry, 0);\r
14b0e578 1790 }\r
b602265d 1791 return 0;\r
14b0e578
CS
1792}\r
1793\r
b602265d
DG
1794extern int\r
1795onig_callout_tag_table_free(void* table)\r
14b0e578 1796{\r
b602265d 1797 CalloutTagTable* t = (CalloutTagTable* )table;\r
14b0e578 1798\r
b602265d
DG
1799 if (IS_NOT_NULL(t)) {\r
1800 int r = callout_tag_table_clear(t);\r
1801 if (r != 0) return r;\r
14b0e578 1802\r
b602265d
DG
1803 onig_st_free_table(t);\r
1804 }\r
14b0e578 1805\r
b602265d 1806 return 0;\r
14b0e578
CS
1807}\r
1808\r
b602265d
DG
1809extern int\r
1810onig_get_callout_num_by_tag(regex_t* reg,\r
1811 const UChar* tag, const UChar* tag_end)\r
14b0e578 1812{\r
b602265d
DG
1813 int r;\r
1814 RegexExt* ext;\r
1815 CalloutTagVal e;\r
14b0e578 1816\r
b602265d
DG
1817 ext = REG_EXTP(reg);\r
1818 if (IS_NULL(ext) || IS_NULL(ext->tag_table))\r
1819 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 1820\r
b602265d
DG
1821 r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,\r
1822 (HashDataType* )((void* )(&e)));\r
1823 if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
1824 return (int )e;\r
14b0e578
CS
1825}\r
1826\r
b602265d
DG
1827static CalloutTagVal\r
1828callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)\r
14b0e578 1829{\r
b602265d 1830 CalloutTagVal e;\r
14b0e578 1831\r
b602265d
DG
1832 e = -1;\r
1833 if (IS_NOT_NULL(t)) {\r
1834 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
14b0e578 1835 }\r
b602265d 1836 return e;\r
14b0e578
CS
1837}\r
1838\r
1839static int\r
b602265d 1840callout_tag_table_new(CalloutTagTable** rt)\r
14b0e578 1841{\r
b602265d
DG
1842 CalloutTagTable* t;\r
1843\r
1844 *rt = 0;\r
1845 t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);\r
1846 CHECK_NULL_RETURN_MEMERR(t);\r
1847\r
1848 *rt = t;\r
1849 return ONIG_NORMAL;\r
14b0e578
CS
1850}\r
1851\r
14b0e578 1852static int\r
b602265d
DG
1853callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,\r
1854 CalloutTagVal entry_val)\r
14b0e578 1855{\r
b602265d
DG
1856 int r;\r
1857 CalloutTagVal val;\r
14b0e578 1858\r
b602265d
DG
1859 if (name_end - name <= 0)\r
1860 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 1861\r
b602265d
DG
1862 val = callout_tag_find(t, name, name_end);\r
1863 if (val >= 0)\r
1864 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
14b0e578 1865\r
b602265d
DG
1866 r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);\r
1867 if (r < 0) return r;\r
14b0e578 1868\r
b602265d 1869 return ONIG_NORMAL;\r
14b0e578
CS
1870}\r
1871\r
1872static int\r
b602265d 1873ext_ensure_tag_table(regex_t* reg)\r
14b0e578 1874{\r
b602265d
DG
1875 int r;\r
1876 RegexExt* ext;\r
1877 CalloutTagTable* t;\r
14b0e578 1878\r
b602265d
DG
1879 ext = onig_get_regex_ext(reg);\r
1880 CHECK_NULL_RETURN_MEMERR(ext);\r
14b0e578 1881\r
b602265d
DG
1882 if (IS_NULL(ext->tag_table)) {\r
1883 r = callout_tag_table_new(&t);\r
1884 if (r != ONIG_NORMAL) return r;\r
1885\r
1886 ext->tag_table = t;\r
14b0e578 1887 }\r
b602265d
DG
1888\r
1889 return ONIG_NORMAL;\r
14b0e578
CS
1890}\r
1891\r
1892static int\r
b602265d
DG
1893callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,\r
1894 CalloutTagVal entry_val)\r
14b0e578 1895{\r
b602265d
DG
1896 int r;\r
1897 RegexExt* ext;\r
1898 CalloutListEntry* e;\r
14b0e578 1899\r
b602265d
DG
1900 r = ext_ensure_tag_table(reg);\r
1901 if (r != ONIG_NORMAL) return r;\r
14b0e578 1902\r
b602265d
DG
1903 ext = onig_get_regex_ext(reg);\r
1904 r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);\r
14b0e578 1905\r
b602265d
DG
1906 e = onig_reg_callout_list_at(reg, (int )entry_val);\r
1907 e->tag_start = name;\r
1908 e->tag_end = name_end;\r
14b0e578 1909\r
b602265d
DG
1910 return r;\r
1911}\r
14b0e578 1912\r
b602265d 1913#endif /* USE_CALLOUT */\r
14b0e578 1914\r
14b0e578 1915\r
b602265d 1916#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16\r
14b0e578 1917\r
b602265d
DG
1918static void\r
1919scan_env_clear(ScanEnv* env)\r
14b0e578 1920{\r
b602265d
DG
1921 MEM_STATUS_CLEAR(env->capture_history);\r
1922 MEM_STATUS_CLEAR(env->bt_mem_start);\r
1923 MEM_STATUS_CLEAR(env->bt_mem_end);\r
1924 MEM_STATUS_CLEAR(env->backrefed_mem);\r
1925 env->error = (UChar* )NULL;\r
1926 env->error_end = (UChar* )NULL;\r
1927 env->num_call = 0;\r
14b0e578 1928\r
b602265d
DG
1929#ifdef USE_CALL\r
1930 env->unset_addr_list = NULL;\r
1931 env->has_call_zero = 0;\r
1932#endif\r
14b0e578 1933\r
b602265d
DG
1934 env->num_mem = 0;\r
1935 env->num_named = 0;\r
1936 env->mem_alloc = 0;\r
1937 env->mem_env_dynamic = (MemEnv* )NULL;\r
14b0e578 1938\r
b602265d 1939 xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));\r
14b0e578 1940\r
b602265d
DG
1941 env->parse_depth = 0;\r
1942 env->keep_num = 0;\r
1943 env->save_num = 0;\r
1944 env->save_alloc_num = 0;\r
1945 env->saves = 0;\r
1946}\r
14b0e578 1947\r
b602265d
DG
1948static int\r
1949scan_env_add_mem_entry(ScanEnv* env)\r
1950{\r
1951 int i, need, alloc;\r
1952 MemEnv* p;\r
14b0e578 1953\r
b602265d
DG
1954 need = env->num_mem + 1;\r
1955 if (need > MaxCaptureNum && MaxCaptureNum != 0)\r
1956 return ONIGERR_TOO_MANY_CAPTURES;\r
14b0e578 1957\r
b602265d
DG
1958 if (need >= SCANENV_MEMENV_SIZE) {\r
1959 if (env->mem_alloc <= need) {\r
1960 if (IS_NULL(env->mem_env_dynamic)) {\r
1961 alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;\r
1962 p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);\r
1963 CHECK_NULL_RETURN_MEMERR(p);\r
1964 xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));\r
1965 }\r
1966 else {\r
1967 alloc = env->mem_alloc * 2;\r
1968 p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);\r
1969 CHECK_NULL_RETURN_MEMERR(p);\r
1970 }\r
14b0e578 1971\r
b602265d
DG
1972 for (i = env->num_mem + 1; i < alloc; i++) {\r
1973 p[i].node = NULL_NODE;\r
1974#if 0\r
1975 p[i].in = 0;\r
1976 p[i].recursion = 0;\r
1977#endif\r
1978 }\r
1979\r
1980 env->mem_env_dynamic = p;\r
1981 env->mem_alloc = alloc;\r
14b0e578
CS
1982 }\r
1983 }\r
1984\r
b602265d
DG
1985 env->num_mem++;\r
1986 return env->num_mem;\r
14b0e578
CS
1987}\r
1988\r
1989static int\r
b602265d 1990scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r
14b0e578 1991{\r
b602265d
DG
1992 if (env->num_mem >= num)\r
1993 SCANENV_MEMENV(env)[num].node = node;\r
1994 else\r
1995 return ONIGERR_PARSER_BUG;\r
1996 return 0;\r
14b0e578
CS
1997}\r
1998\r
b602265d
DG
1999extern void\r
2000onig_node_free(Node* node)\r
14b0e578 2001{\r
b602265d
DG
2002 start:\r
2003 if (IS_NULL(node)) return ;\r
14b0e578 2004\r
b602265d
DG
2005#ifdef DEBUG_NODE_FREE\r
2006 fprintf(stderr, "onig_node_free: %p\n", node);\r
2007#endif\r
14b0e578 2008\r
b602265d
DG
2009 switch (NODE_TYPE(node)) {\r
2010 case NODE_STRING:\r
2011 if (STR_(node)->capa != 0 &&\r
2012 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
2013 xfree(STR_(node)->s);\r
2014 }\r
2015 break;\r
14b0e578 2016\r
b602265d
DG
2017 case NODE_LIST:\r
2018 case NODE_ALT:\r
2019 onig_node_free(NODE_CAR(node));\r
2020 {\r
2021 Node* next_node = NODE_CDR(node);\r
2022\r
2023 xfree(node);\r
2024 node = next_node;\r
2025 goto start;\r
14b0e578 2026 }\r
b602265d 2027 break;\r
14b0e578 2028\r
b602265d
DG
2029 case NODE_CCLASS:\r
2030 {\r
2031 CClassNode* cc = CCLASS_(node);\r
14b0e578 2032\r
b602265d
DG
2033 if (cc->mbuf)\r
2034 bbuf_free(cc->mbuf);\r
2035 }\r
2036 break;\r
14b0e578 2037\r
b602265d
DG
2038 case NODE_BACKREF:\r
2039 if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))\r
2040 xfree(BACKREF_(node)->back_dynamic);\r
2041 break;\r
14b0e578 2042\r
b602265d
DG
2043 case NODE_ENCLOSURE:\r
2044 if (NODE_BODY(node))\r
2045 onig_node_free(NODE_BODY(node));\r
14b0e578 2046\r
b602265d
DG
2047 {\r
2048 EnclosureNode* en = ENCLOSURE_(node);\r
2049 if (en->type == ENCLOSURE_IF_ELSE) {\r
2050 onig_node_free(en->te.Then);\r
2051 onig_node_free(en->te.Else);\r
14b0e578
CS
2052 }\r
2053 }\r
b602265d 2054 break;\r
14b0e578 2055\r
b602265d
DG
2056 case NODE_QUANT:\r
2057 case NODE_ANCHOR:\r
2058 if (NODE_BODY(node))\r
2059 onig_node_free(NODE_BODY(node));\r
2060 break;\r
14b0e578 2061\r
b602265d
DG
2062 case NODE_CTYPE:\r
2063 case NODE_CALL:\r
2064 case NODE_GIMMICK:\r
2065 break;\r
14b0e578 2066 }\r
14b0e578 2067\r
b602265d 2068 xfree(node);\r
14b0e578
CS
2069}\r
2070\r
b602265d
DG
2071static void\r
2072cons_node_free_alone(Node* node)\r
14b0e578 2073{\r
b602265d
DG
2074 NODE_CAR(node) = 0;\r
2075 NODE_CDR(node) = 0;\r
2076 onig_node_free(node);\r
14b0e578
CS
2077}\r
2078\r
b602265d
DG
2079static Node*\r
2080node_new(void)\r
14b0e578 2081{\r
b602265d 2082 Node* node;\r
14b0e578 2083\r
b602265d
DG
2084 node = (Node* )xmalloc(sizeof(Node));\r
2085 xmemset(node, 0, sizeof(*node));\r
14b0e578 2086\r
b602265d
DG
2087#ifdef DEBUG_NODE_FREE\r
2088 fprintf(stderr, "node_new: %p\n", node);\r
2089#endif\r
2090 return node;\r
2091}\r
14b0e578 2092\r
14b0e578 2093\r
b602265d
DG
2094static void\r
2095initialize_cclass(CClassNode* cc)\r
2096{\r
2097 BITSET_CLEAR(cc->bs);\r
2098 cc->flags = 0;\r
2099 cc->mbuf = NULL;\r
2100}\r
2101\r
2102static Node*\r
2103node_new_cclass(void)\r
2104{\r
2105 Node* node = node_new();\r
2106 CHECK_NULL_RETURN(node);\r
2107\r
2108 NODE_SET_TYPE(node, NODE_CCLASS);\r
2109 initialize_cclass(CCLASS_(node));\r
2110 return node;\r
2111}\r
2112\r
2113static Node*\r
2114node_new_ctype(int type, int not, OnigOptionType options)\r
2115{\r
2116 Node* node = node_new();\r
2117 CHECK_NULL_RETURN(node);\r
2118\r
2119 NODE_SET_TYPE(node, NODE_CTYPE);\r
2120 CTYPE_(node)->ctype = type;\r
2121 CTYPE_(node)->not = not;\r
2122 CTYPE_(node)->options = options;\r
2123 CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);\r
2124 return node;\r
2125}\r
2126\r
2127static Node*\r
2128node_new_anychar(void)\r
2129{\r
2130 Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);\r
2131 return node;\r
2132}\r
2133\r
2134static Node*\r
2135node_new_anychar_with_fixed_option(OnigOptionType option)\r
2136{\r
2137 CtypeNode* ct;\r
2138 Node* node;\r
2139\r
2140 node = node_new_anychar();\r
2141 ct = CTYPE_(node);\r
2142 ct->options = option;\r
2143 NODE_STATUS_ADD(node, FIXED_OPTION);\r
2144 return node;\r
2145}\r
2146\r
2147static int\r
2148node_new_no_newline(Node** node, ScanEnv* env)\r
2149{\r
2150 Node* n;\r
2151\r
2152 n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);\r
2153 CHECK_NULL_RETURN_MEMERR(n);\r
2154 *node = n;\r
2155 return 0;\r
2156}\r
2157\r
2158static int\r
2159node_new_true_anychar(Node** node, ScanEnv* env)\r
2160{\r
2161 Node* n;\r
2162\r
2163 n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);\r
2164 CHECK_NULL_RETURN_MEMERR(n);\r
2165 *node = n;\r
2166 return 0;\r
2167}\r
2168\r
2169static Node*\r
2170node_new_list(Node* left, Node* right)\r
2171{\r
2172 Node* node = node_new();\r
2173 CHECK_NULL_RETURN(node);\r
2174\r
2175 NODE_SET_TYPE(node, NODE_LIST);\r
2176 NODE_CAR(node) = left;\r
2177 NODE_CDR(node) = right;\r
2178 return node;\r
2179}\r
2180\r
2181extern Node*\r
2182onig_node_new_list(Node* left, Node* right)\r
2183{\r
2184 return node_new_list(left, right);\r
2185}\r
2186\r
2187extern Node*\r
2188onig_node_list_add(Node* list, Node* x)\r
2189{\r
2190 Node *n;\r
2191\r
2192 n = onig_node_new_list(x, NULL);\r
2193 if (IS_NULL(n)) return NULL_NODE;\r
2194\r
2195 if (IS_NOT_NULL(list)) {\r
2196 while (IS_NOT_NULL(NODE_CDR(list)))\r
2197 list = NODE_CDR(list);\r
2198\r
2199 NODE_CDR(list) = n;\r
2200 }\r
2201\r
2202 return n;\r
2203}\r
2204\r
2205extern Node*\r
2206onig_node_new_alt(Node* left, Node* right)\r
2207{\r
2208 Node* node = node_new();\r
2209 CHECK_NULL_RETURN(node);\r
2210\r
2211 NODE_SET_TYPE(node, NODE_ALT);\r
2212 NODE_CAR(node) = left;\r
2213 NODE_CDR(node) = right;\r
2214 return node;\r
2215}\r
2216\r
2217static Node*\r
2218make_list_or_alt(NodeType type, int n, Node* ns[])\r
2219{\r
2220 Node* r;\r
2221\r
2222 if (n <= 0) return NULL_NODE;\r
2223\r
2224 if (n == 1) {\r
2225 r = node_new();\r
2226 CHECK_NULL_RETURN(r);\r
2227 NODE_SET_TYPE(r, type);\r
2228 NODE_CAR(r) = ns[0];\r
2229 NODE_CDR(r) = NULL_NODE;\r
2230 }\r
2231 else {\r
2232 Node* right;\r
2233\r
2234 r = node_new();\r
2235 CHECK_NULL_RETURN(r);\r
2236\r
2237 right = make_list_or_alt(type, n - 1, ns + 1);\r
2238 if (IS_NULL(right)) {\r
2239 onig_node_free(r);\r
2240 return NULL_NODE;\r
2241 }\r
2242\r
2243 NODE_SET_TYPE(r, type);\r
2244 NODE_CAR(r) = ns[0];\r
2245 NODE_CDR(r) = right;\r
2246 }\r
2247\r
2248 return r;\r
2249}\r
2250\r
2251static Node*\r
2252make_list(int n, Node* ns[])\r
2253{\r
2254 return make_list_or_alt(NODE_LIST, n, ns);\r
2255}\r
2256\r
2257static Node*\r
2258make_alt(int n, Node* ns[])\r
2259{\r
2260 return make_list_or_alt(NODE_ALT, n, ns);\r
2261}\r
2262\r
2263extern Node*\r
2264onig_node_new_anchor(int type, int ascii_mode)\r
2265{\r
2266 Node* node = node_new();\r
2267 CHECK_NULL_RETURN(node);\r
2268\r
2269 NODE_SET_TYPE(node, NODE_ANCHOR);\r
2270 ANCHOR_(node)->type = type;\r
2271 ANCHOR_(node)->char_len = -1;\r
2272 ANCHOR_(node)->ascii_mode = ascii_mode;\r
2273 return node;\r
2274}\r
2275\r
2276static Node*\r
2277node_new_backref(int back_num, int* backrefs, int by_name,\r
2278#ifdef USE_BACKREF_WITH_LEVEL\r
2279 int exist_level, int nest_level,\r
2280#endif\r
2281 ScanEnv* env)\r
2282{\r
2283 int i;\r
2284 Node* node = node_new();\r
2285\r
2286 CHECK_NULL_RETURN(node);\r
2287\r
2288 NODE_SET_TYPE(node, NODE_BACKREF);\r
2289 BACKREF_(node)->back_num = back_num;\r
2290 BACKREF_(node)->back_dynamic = (int* )NULL;\r
2291 if (by_name != 0)\r
2292 NODE_STATUS_ADD(node, BY_NAME);\r
2293\r
2294#ifdef USE_BACKREF_WITH_LEVEL\r
2295 if (exist_level != 0) {\r
2296 NODE_STATUS_ADD(node, NEST_LEVEL);\r
2297 BACKREF_(node)->nest_level = nest_level;\r
2298 }\r
2299#endif\r
2300\r
2301 for (i = 0; i < back_num; i++) {\r
2302 if (backrefs[i] <= env->num_mem &&\r
2303 IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {\r
2304 NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */\r
2305 break;\r
2306 }\r
2307 }\r
2308\r
2309 if (back_num <= NODE_BACKREFS_SIZE) {\r
2310 for (i = 0; i < back_num; i++)\r
2311 BACKREF_(node)->back_static[i] = backrefs[i];\r
2312 }\r
2313 else {\r
2314 int* p = (int* )xmalloc(sizeof(int) * back_num);\r
2315 if (IS_NULL(p)) {\r
2316 onig_node_free(node);\r
2317 return NULL;\r
2318 }\r
2319 BACKREF_(node)->back_dynamic = p;\r
2320 for (i = 0; i < back_num; i++)\r
2321 p[i] = backrefs[i];\r
2322 }\r
2323 return node;\r
2324}\r
2325\r
2326static Node*\r
2327node_new_backref_checker(int back_num, int* backrefs, int by_name,\r
2328#ifdef USE_BACKREF_WITH_LEVEL\r
2329 int exist_level, int nest_level,\r
2330#endif\r
2331 ScanEnv* env)\r
2332{\r
2333 Node* node;\r
2334\r
2335 node = node_new_backref(back_num, backrefs, by_name,\r
2336#ifdef USE_BACKREF_WITH_LEVEL\r
2337 exist_level, nest_level,\r
2338#endif\r
2339 env);\r
2340 CHECK_NULL_RETURN(node);\r
2341\r
2342 NODE_STATUS_ADD(node, CHECKER);\r
2343 return node;\r
2344}\r
2345\r
2346#ifdef USE_CALL\r
2347static Node*\r
2348node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)\r
2349{\r
2350 Node* node = node_new();\r
2351 CHECK_NULL_RETURN(node);\r
2352\r
2353 NODE_SET_TYPE(node, NODE_CALL);\r
2354 CALL_(node)->by_number = by_number;\r
2355 CALL_(node)->name = name;\r
2356 CALL_(node)->name_end = name_end;\r
2357 CALL_(node)->group_num = gnum;\r
2358 CALL_(node)->entry_count = 1;\r
2359 return node;\r
2360}\r
2361#endif\r
2362\r
2363static Node*\r
2364node_new_quantifier(int lower, int upper, int by_number)\r
2365{\r
2366 Node* node = node_new();\r
2367 CHECK_NULL_RETURN(node);\r
2368\r
2369 NODE_SET_TYPE(node, NODE_QUANT);\r
2370 QUANT_(node)->lower = lower;\r
2371 QUANT_(node)->upper = upper;\r
2372 QUANT_(node)->greedy = 1;\r
2373 QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;\r
2374 QUANT_(node)->head_exact = NULL_NODE;\r
2375 QUANT_(node)->next_head_exact = NULL_NODE;\r
2376 QUANT_(node)->is_refered = 0;\r
2377 if (by_number != 0)\r
2378 NODE_STATUS_ADD(node, BY_NUMBER);\r
2379\r
2380 return node;\r
2381}\r
2382\r
2383static Node*\r
2384node_new_enclosure(enum EnclosureType type)\r
2385{\r
2386 Node* node = node_new();\r
2387 CHECK_NULL_RETURN(node);\r
2388\r
2389 NODE_SET_TYPE(node, NODE_ENCLOSURE);\r
2390 ENCLOSURE_(node)->type = type;\r
2391\r
2392 switch (type) {\r
2393 case ENCLOSURE_MEMORY:\r
2394 ENCLOSURE_(node)->m.regnum = 0;\r
2395 ENCLOSURE_(node)->m.called_addr = -1;\r
2396 ENCLOSURE_(node)->m.entry_count = 1;\r
2397 ENCLOSURE_(node)->m.called_state = 0;\r
2398 break;\r
2399\r
2400 case ENCLOSURE_OPTION:\r
2401 ENCLOSURE_(node)->o.options = 0;\r
2402 break;\r
2403\r
2404 case ENCLOSURE_STOP_BACKTRACK:\r
2405 break;\r
2406\r
2407 case ENCLOSURE_IF_ELSE:\r
2408 ENCLOSURE_(node)->te.Then = 0;\r
2409 ENCLOSURE_(node)->te.Else = 0;\r
2410 break;\r
2411 }\r
2412\r
2413 ENCLOSURE_(node)->opt_count = 0;\r
2414 return node;\r
2415}\r
2416\r
2417extern Node*\r
2418onig_node_new_enclosure(int type)\r
2419{\r
2420 return node_new_enclosure(type);\r
2421}\r
2422\r
2423static Node*\r
2424node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)\r
2425{\r
2426 Node* n;\r
2427 n = node_new_enclosure(ENCLOSURE_IF_ELSE);\r
2428 CHECK_NULL_RETURN(n);\r
2429\r
2430 NODE_BODY(n) = cond;\r
2431 ENCLOSURE_(n)->te.Then = Then;\r
2432 ENCLOSURE_(n)->te.Else = Else;\r
2433 return n;\r
2434}\r
2435\r
2436static Node*\r
2437node_new_memory(int is_named)\r
2438{\r
2439 Node* node = node_new_enclosure(ENCLOSURE_MEMORY);\r
2440 CHECK_NULL_RETURN(node);\r
2441 if (is_named != 0)\r
2442 NODE_STATUS_ADD(node, NAMED_GROUP);\r
2443\r
2444 return node;\r
2445}\r
2446\r
2447static Node*\r
2448node_new_option(OnigOptionType option)\r
2449{\r
2450 Node* node = node_new_enclosure(ENCLOSURE_OPTION);\r
2451 CHECK_NULL_RETURN(node);\r
2452 ENCLOSURE_(node)->o.options = option;\r
2453 return node;\r
2454}\r
2455\r
2456static int\r
2457node_new_fail(Node** node, ScanEnv* env)\r
2458{\r
2459 *node = node_new();\r
2460 CHECK_NULL_RETURN_MEMERR(*node);\r
2461\r
2462 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2463 GIMMICK_(*node)->type = GIMMICK_FAIL;\r
2464 return ONIG_NORMAL;\r
2465}\r
2466\r
2467static int\r
2468node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)\r
2469{\r
2470 int id;\r
2471 int r;\r
2472\r
2473 r = save_entry(env, save_type, &id);\r
2474 if (r != ONIG_NORMAL) return r;\r
2475\r
2476 *node = node_new();\r
2477 CHECK_NULL_RETURN_MEMERR(*node);\r
2478\r
2479 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2480 GIMMICK_(*node)->id = id;\r
2481 GIMMICK_(*node)->type = GIMMICK_SAVE;\r
2482 GIMMICK_(*node)->detail_type = (int )save_type;\r
2483\r
2484 return ONIG_NORMAL;\r
2485}\r
2486\r
2487static int\r
2488node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,\r
2489 int id, ScanEnv* env)\r
2490{\r
2491 *node = node_new();\r
2492 CHECK_NULL_RETURN_MEMERR(*node);\r
2493\r
2494 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2495 GIMMICK_(*node)->id = id;\r
2496 GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;\r
2497 GIMMICK_(*node)->detail_type = (int )update_var_type;\r
2498\r
2499 return ONIG_NORMAL;\r
2500}\r
2501\r
2502static int\r
2503node_new_keep(Node** node, ScanEnv* env)\r
2504{\r
2505 int r;\r
2506\r
2507 r = node_new_save_gimmick(node, SAVE_KEEP, env);\r
2508 if (r != 0) return r;\r
2509\r
2510 env->keep_num++;\r
2511 return ONIG_NORMAL;\r
2512}\r
2513\r
2514#ifdef USE_CALLOUT\r
2515\r
2516extern void\r
2517onig_free_reg_callout_list(int n, CalloutListEntry* list)\r
2518{\r
2519 int i;\r
2520 int j;\r
2521\r
2522 if (IS_NULL(list)) return ;\r
2523\r
2524 for (i = 0; i < n; i++) {\r
2525 if (list[i].of == ONIG_CALLOUT_OF_NAME) {\r
2526 for (j = 0; j < list[i].u.arg.passed_num; j++) {\r
2527 if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {\r
2528 if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))\r
2529 xfree(list[i].u.arg.vals[j].s.start);\r
2530 }\r
2531 }\r
2532 }\r
2533 else { /* ONIG_CALLOUT_OF_CONTENTS */\r
2534 if (IS_NOT_NULL(list[i].u.content.start)) {\r
2535 xfree((void* )list[i].u.content.start);\r
2536 }\r
2537 }\r
2538 }\r
2539\r
2540 xfree(list);\r
2541}\r
2542\r
2543extern CalloutListEntry*\r
2544onig_reg_callout_list_at(regex_t* reg, int num)\r
2545{\r
2546 RegexExt* ext = REG_EXTP(reg);\r
2547 CHECK_NULL_RETURN(ext);\r
2548\r
2549 if (num <= 0 || num > ext->callout_num)\r
2550 return 0;\r
2551\r
2552 num--;\r
2553 return ext->callout_list + num;\r
2554}\r
2555\r
2556static int\r
2557reg_callout_list_entry(ScanEnv* env, int* rnum)\r
2558{\r
2559#define INIT_CALLOUT_LIST_NUM 3\r
2560\r
2561 int num;\r
2562 CalloutListEntry* list;\r
2563 CalloutListEntry* e;\r
2564 RegexExt* ext;\r
2565\r
2566 ext = onig_get_regex_ext(env->reg);\r
2567 CHECK_NULL_RETURN_MEMERR(ext);\r
2568\r
2569 if (IS_NULL(ext->callout_list)) {\r
2570 list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);\r
2571 CHECK_NULL_RETURN_MEMERR(list);\r
2572\r
2573 ext->callout_list = list;\r
2574 ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;\r
2575 ext->callout_num = 0;\r
2576 }\r
2577\r
2578 num = ext->callout_num + 1;\r
2579 if (num > ext->callout_list_alloc) {\r
2580 int alloc = ext->callout_list_alloc * 2;\r
2581 list = (CalloutListEntry* )xrealloc(ext->callout_list,\r
2582 sizeof(CalloutListEntry) * alloc,\r
2583 sizeof(CalloutListEntry) * ext->callout_list_alloc);\r
2584 CHECK_NULL_RETURN_MEMERR(list);\r
2585\r
2586 ext->callout_list = list;\r
2587 ext->callout_list_alloc = alloc;\r
2588 }\r
2589\r
2590 e = ext->callout_list + (num - 1);\r
2591\r
2592 e->flag = 0;\r
2593 e->of = 0;\r
2594 e->in = ONIG_CALLOUT_OF_CONTENTS;\r
2595 e->type = 0;\r
2596 e->tag_start = 0;\r
2597 e->tag_end = 0;\r
2598 e->start_func = 0;\r
2599 e->end_func = 0;\r
2600 e->u.arg.num = 0;\r
2601 e->u.arg.passed_num = 0;\r
2602\r
2603 ext->callout_num = num;\r
2604 *rnum = num;\r
2605 return ONIG_NORMAL;\r
2606}\r
2607\r
2608static int\r
2609node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,\r
2610 ScanEnv* env)\r
2611{\r
2612 *node = node_new();\r
2613 CHECK_NULL_RETURN_MEMERR(*node);\r
2614\r
2615 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2616 GIMMICK_(*node)->id = id;\r
2617 GIMMICK_(*node)->num = num;\r
2618 GIMMICK_(*node)->type = GIMMICK_CALLOUT;\r
2619 GIMMICK_(*node)->detail_type = (int )callout_of;\r
2620\r
2621 return ONIG_NORMAL;\r
2622}\r
2623#endif\r
2624\r
2625static int\r
2626make_extended_grapheme_cluster(Node** node, ScanEnv* env)\r
2627{\r
2628 int r;\r
2629 int i;\r
2630 Node* x;\r
2631 Node* ns[2];\r
2632\r
2633 /* \X == (?>\O(?:\Y\O)*) */\r
2634\r
2635 ns[1] = NULL_NODE;\r
2636\r
2637 r = ONIGERR_MEMORY;\r
2638 ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);\r
2639 if (IS_NULL(ns[0])) goto err;\r
2640\r
2641 r = node_new_true_anychar(&ns[1], env);\r
2642 if (r != 0) goto err1;\r
2643\r
2644 x = make_list(2, ns);\r
2645 if (IS_NULL(x)) goto err;\r
2646 ns[0] = x;\r
2647 ns[1] = NULL_NODE;\r
2648\r
2649 x = node_new_quantifier(0, REPEAT_INFINITE, 1);\r
2650 if (IS_NULL(x)) goto err;\r
2651\r
2652 NODE_BODY(x) = ns[0];\r
2653 ns[0] = NULL_NODE;\r
2654 ns[1] = x;\r
2655\r
2656 r = node_new_true_anychar(&ns[0], env);\r
2657 if (r != 0) goto err1;\r
2658\r
2659 x = make_list(2, ns);\r
2660 if (IS_NULL(x)) goto err;\r
2661\r
2662 ns[0] = x;\r
2663 ns[1] = NULL_NODE;\r
2664\r
2665 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
2666 if (IS_NULL(x)) goto err;\r
2667\r
2668 NODE_BODY(x) = ns[0];\r
2669\r
2670 *node = x;\r
2671 return ONIG_NORMAL;\r
2672\r
2673 err:\r
2674 r = ONIGERR_MEMORY;\r
2675 err1:\r
2676 for (i = 0; i < 2; i++) onig_node_free(ns[i]);\r
2677 return r;\r
2678}\r
2679\r
2680static int\r
2681make_absent_engine(Node** node, int pre_save_right_id, Node* absent,\r
2682 Node* step_one, int lower, int upper, int possessive,\r
2683 int is_range_cutter, ScanEnv* env)\r
2684{\r
2685 int r;\r
2686 int i;\r
2687 int id;\r
2688 Node* x;\r
2689 Node* ns[4];\r
2690\r
2691 for (i = 0; i < 4; i++) ns[i] = NULL_NODE;\r
2692\r
2693 ns[1] = absent;\r
2694 ns[3] = step_one; /* for err */\r
2695 r = node_new_save_gimmick(&ns[0], SAVE_S, env);\r
2696 if (r != 0) goto err;\r
2697\r
2698 id = GIMMICK_(ns[0])->id;\r
2699 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,\r
2700 id, env);\r
2701 if (r != 0) goto err;\r
2702\r
2703 r = node_new_fail(&ns[3], env);\r
2704 if (r != 0) goto err;\r
2705\r
2706 x = make_list(4, ns);\r
2707 if (IS_NULL(x)) goto err0;\r
2708\r
2709 ns[0] = x;\r
2710 ns[1] = step_one;\r
2711 ns[2] = ns[3] = NULL_NODE;\r
2712\r
2713 x = make_alt(2, ns);\r
2714 if (IS_NULL(x)) goto err0;\r
2715\r
2716 ns[0] = x;\r
2717\r
2718 x = node_new_quantifier(lower, upper, 0);\r
2719 if (IS_NULL(x)) goto err0;\r
2720\r
2721 NODE_BODY(x) = ns[0];\r
2722 ns[0] = x;\r
2723\r
2724 if (possessive != 0) {\r
2725 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
2726 if (IS_NULL(x)) goto err0;\r
2727\r
2728 NODE_BODY(x) = ns[0];\r
2729 ns[0] = x;\r
2730 }\r
2731\r
2732 r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2733 pre_save_right_id, env);\r
2734 if (r != 0) goto err;\r
2735\r
2736 r = node_new_fail(&ns[2], env);\r
2737 if (r != 0) goto err;\r
2738\r
2739 x = make_list(2, ns + 1);\r
2740 if (IS_NULL(x)) goto err0;\r
2741\r
2742 ns[1] = x; ns[2] = NULL_NODE;\r
2743\r
2744 x = make_alt(2, ns);\r
2745 if (IS_NULL(x)) goto err0;\r
2746\r
2747 if (is_range_cutter != 0)\r
2748 NODE_STATUS_ADD(x, SUPER);\r
2749\r
2750 *node = x;\r
2751 return ONIG_NORMAL;\r
2752\r
2753 err0:\r
2754 r = ONIGERR_MEMORY;\r
2755 err:\r
2756 for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
2757 return r;\r
2758}\r
2759\r
2760static int\r
2761make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,\r
2762 ScanEnv* env)\r
2763{\r
2764 int r;\r
2765 int id;\r
2766 Node* save;\r
2767 Node* x;\r
2768 Node* ns[2];\r
2769\r
2770 *node1 = *node2 = NULL_NODE;\r
2771 save = ns[0] = ns[1] = NULL_NODE;\r
2772\r
2773 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
2774 if (r != 0) goto err;\r
2775\r
2776 id = GIMMICK_(save)->id;\r
2777 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2778 id, env);\r
2779 if (r != 0) goto err;\r
2780\r
2781 r = node_new_fail(&ns[1], env);\r
2782 if (r != 0) goto err;\r
2783\r
2784 x = make_list(2, ns);\r
2785 if (IS_NULL(x)) goto err0;\r
2786\r
2787 ns[0] = NULL_NODE; ns[1] = x;\r
2788\r
2789 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2790 pre_save_right_id, env);\r
2791 if (r != 0) goto err;\r
2792\r
2793 x = make_alt(2, ns);\r
2794 if (IS_NULL(x)) goto err0;\r
2795\r
2796 *node1 = save;\r
2797 *node2 = x;\r
2798 return ONIG_NORMAL;\r
2799\r
2800 err0:\r
2801 r = ONIGERR_MEMORY;\r
2802 err:\r
2803 onig_node_free(save);\r
2804 onig_node_free(ns[0]);\r
2805 onig_node_free(ns[1]);\r
2806 return r;\r
2807}\r
2808\r
2809static int\r
2810make_range_clear(Node** node, ScanEnv* env)\r
2811{\r
2812 int r;\r
2813 int id;\r
2814 Node* save;\r
2815 Node* x;\r
2816 Node* ns[2];\r
2817\r
2818 *node = NULL_NODE;\r
2819 save = ns[0] = ns[1] = NULL_NODE;\r
2820\r
2821 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
2822 if (r != 0) goto err;\r
2823\r
2824 id = GIMMICK_(save)->id;\r
2825 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2826 id, env);\r
2827 if (r != 0) goto err;\r
2828\r
2829 r = node_new_fail(&ns[1], env);\r
2830 if (r != 0) goto err;\r
2831\r
2832 x = make_list(2, ns);\r
2833 if (IS_NULL(x)) goto err0;\r
2834\r
2835 ns[0] = NULL_NODE; ns[1] = x;\r
2836\r
2837 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);\r
2838 if (r != 0) goto err;\r
2839\r
2840 x = make_alt(2, ns);\r
2841 if (IS_NULL(x)) goto err0;\r
2842\r
2843 NODE_STATUS_ADD(x, SUPER);\r
2844\r
2845 ns[0] = save;\r
2846 ns[1] = x;\r
2847 save = NULL_NODE;\r
2848 x = make_list(2, ns);\r
2849 if (IS_NULL(x)) goto err0;\r
2850\r
2851 *node = x;\r
2852 return ONIG_NORMAL;\r
2853\r
2854 err0:\r
2855 r = ONIGERR_MEMORY;\r
2856 err:\r
2857 onig_node_free(save);\r
2858 onig_node_free(ns[0]);\r
2859 onig_node_free(ns[1]);\r
2860 return r;\r
2861}\r
2862\r
2863static int\r
2864is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,\r
2865 int* is_possessive, ScanEnv* env)\r
2866{\r
2867 Node* quant;\r
2868 Node* body;\r
2869\r
2870 *rquant = *rbody = 0;\r
2871 *is_possessive = 0;\r
2872\r
2873 if (NODE_TYPE(node) == NODE_QUANT) {\r
2874 quant = node;\r
2875 }\r
2876 else {\r
2877 if (NODE_TYPE(node) == NODE_ENCLOSURE) {\r
2878 EnclosureNode* en = ENCLOSURE_(node);\r
2879 if (en->type == ENCLOSURE_STOP_BACKTRACK) {\r
2880 *is_possessive = 1;\r
2881 quant = NODE_ENCLOSURE_BODY(en);\r
2882 if (NODE_TYPE(quant) != NODE_QUANT)\r
2883 return 0;\r
2884 }\r
2885 else\r
2886 return 0;\r
2887 }\r
2888 else\r
2889 return 0;\r
2890 }\r
2891\r
2892 if (QUANT_(quant)->greedy == 0)\r
2893 return 0;\r
2894\r
2895 body = NODE_BODY(quant);\r
2896 switch (NODE_TYPE(body)) {\r
2897 case NODE_STRING:\r
2898 {\r
2899 int len;\r
2900 StrNode* sn = STR_(body);\r
2901 UChar *s = sn->s;\r
2902\r
2903 len = 0;\r
2904 while (s < sn->end) {\r
2905 s += enclen(env->enc, s);\r
2906 len++;\r
2907 }\r
2908 if (len != 1)\r
2909 return 0;\r
2910 }\r
2911\r
2912 case NODE_CCLASS:\r
2913 break;\r
2914\r
2915 default:\r
2916 return 0;\r
2917 break;\r
2918 }\r
2919\r
2920 if (node != quant) {\r
2921 NODE_BODY(node) = 0;\r
2922 onig_node_free(node);\r
2923 }\r
2924 NODE_BODY(quant) = NULL_NODE;\r
2925 *rquant = quant;\r
2926 *rbody = body;\r
2927 return 1;\r
2928}\r
2929\r
2930static int\r
2931make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,\r
2932 Node* body, int possessive, ScanEnv* env)\r
2933{\r
2934 int r;\r
2935 int i;\r
2936 int id1;\r
2937 int lower, upper;\r
2938 Node* x;\r
2939 Node* ns[4];\r
2940\r
2941 *node = NULL_NODE;\r
2942 r = ONIGERR_MEMORY;\r
2943 ns[0] = ns[1] = NULL_NODE;\r
2944 ns[2] = body, ns[3] = absent;\r
2945\r
2946 lower = QUANT_(quant)->lower;\r
2947 upper = QUANT_(quant)->upper;\r
2948 onig_node_free(quant);\r
2949\r
2950 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
2951 if (r != 0) goto err;\r
2952\r
2953 id1 = GIMMICK_(ns[0])->id;\r
2954\r
2955 r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,\r
2956 0, env);\r
2957 if (r != 0) goto err;\r
2958\r
2959 ns[2] = ns[3] = NULL_NODE;\r
2960\r
2961 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2962 id1, env);\r
2963 if (r != 0) goto err;\r
2964\r
2965 x = make_list(3, ns);\r
2966 if (IS_NULL(x)) goto err0;\r
2967\r
2968 *node = x;\r
2969 return ONIG_NORMAL;\r
2970\r
2971 err0:\r
2972 r = ONIGERR_MEMORY;\r
2973 err:\r
2974 for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
2975 return r;\r
2976}\r
2977\r
2978static int\r
2979make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,\r
2980 ScanEnv* env)\r
2981{\r
2982 int r;\r
2983 int i;\r
2984 int id1, id2;\r
2985 int possessive;\r
2986 Node* x;\r
2987 Node* ns[7];\r
2988\r
2989 r = ONIGERR_MEMORY;\r
2990 for (i = 0; i < 7; i++) ns[i] = NULL_NODE;\r
2991 ns[4] = expr; ns[5] = absent;\r
2992\r
2993 if (is_range_cutter == 0) {\r
2994 Node* quant;\r
2995 Node* body;\r
2996\r
2997 if (expr == NULL_NODE) {\r
2998 /* default expr \O* */\r
2999 quant = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
3000 if (IS_NULL(quant)) goto err0;\r
3001\r
3002 r = node_new_true_anychar(&body, env);\r
3003 if (r != 0) {\r
3004 onig_node_free(quant);\r
3005 goto err;\r
3006 }\r
3007 possessive = 0;\r
3008 goto simple;\r
3009 }\r
3010 else {\r
3011 if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {\r
3012 simple:\r
3013 r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,\r
3014 body, possessive, env);\r
3015 if (r != 0) {\r
3016 ns[4] = NULL_NODE;\r
3017 onig_node_free(quant);\r
3018 onig_node_free(body);\r
3019 goto err;\r
3020 }\r
3021\r
3022 return ONIG_NORMAL;\r
3023 }\r
3024 }\r
3025 }\r
3026\r
3027 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
3028 if (r != 0) goto err;\r
3029\r
3030 id1 = GIMMICK_(ns[0])->id;\r
3031\r
3032 r = node_new_save_gimmick(&ns[1], SAVE_S, env);\r
3033 if (r != 0) goto err;\r
3034\r
3035 id2 = GIMMICK_(ns[1])->id;\r
3036\r
3037 r = node_new_true_anychar(&ns[3], env);\r
3038 if (r != 0) goto err;\r
3039\r
3040 possessive = 1;\r
3041 r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,\r
3042 possessive, is_range_cutter, env);\r
3043 if (r != 0) goto err;\r
3044\r
3045 ns[3] = NULL_NODE;\r
3046 ns[5] = NULL_NODE;\r
3047\r
3048 r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);\r
3049 if (r != 0) goto err;\r
3050\r
3051 if (is_range_cutter != 0) {\r
3052 x = make_list(4, ns);\r
3053 if (IS_NULL(x)) goto err0;\r
3054 }\r
3055 else {\r
3056 r = make_absent_tail(&ns[5], &ns[6], id1, env);\r
3057 if (r != 0) goto err;\r
3058 \r
3059 x = make_list(7, ns);\r
3060 if (IS_NULL(x)) goto err0;\r
3061 }\r
3062\r
3063 *node = x;\r
3064 return ONIG_NORMAL;\r
3065\r
3066 err0:\r
3067 r = ONIGERR_MEMORY;\r
3068 err:\r
3069 for (i = 0; i < 7; i++) onig_node_free(ns[i]);\r
3070 return r; \r
3071}\r
3072\r
3073extern int\r
3074onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r
3075{\r
3076 int addlen = (int )(end - s);\r
3077\r
3078 if (addlen > 0) {\r
3079 int len = (int )(STR_(node)->end - STR_(node)->s);\r
3080\r
3081 if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {\r
3082 UChar* p;\r
3083 int capa = len + addlen + NODE_STRING_MARGIN;\r
3084\r
3085 if (capa <= STR_(node)->capa) {\r
3086 onig_strcpy(STR_(node)->s + len, s, end);\r
3087 }\r
3088 else {\r
3089 if (STR_(node)->s == STR_(node)->buf)\r
3090 p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,\r
3091 s, end, capa);\r
3092 else\r
3093 p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);\r
3094\r
3095 CHECK_NULL_RETURN_MEMERR(p);\r
3096 STR_(node)->s = p;\r
3097 STR_(node)->capa = capa;\r
3098 }\r
3099 }\r
3100 else {\r
3101 onig_strcpy(STR_(node)->s + len, s, end);\r
3102 }\r
3103 STR_(node)->end = STR_(node)->s + len + addlen;\r
3104 }\r
3105\r
3106 return 0;\r
3107}\r
3108\r
3109extern int\r
3110onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r
3111{\r
3112 onig_node_str_clear(node);\r
3113 return onig_node_str_cat(node, s, end);\r
3114}\r
3115\r
3116static int\r
3117node_str_cat_char(Node* node, UChar c)\r
3118{\r
3119 UChar s[1];\r
3120\r
3121 s[0] = c;\r
3122 return onig_node_str_cat(node, s, s + 1);\r
3123}\r
3124\r
3125extern void\r
3126onig_node_conv_to_str_node(Node* node, int flag)\r
3127{\r
3128 NODE_SET_TYPE(node, NODE_STRING);\r
3129 STR_(node)->flag = flag;\r
3130 STR_(node)->capa = 0;\r
3131 STR_(node)->s = STR_(node)->buf;\r
3132 STR_(node)->end = STR_(node)->buf;\r
3133}\r
3134\r
3135extern void\r
3136onig_node_str_clear(Node* node)\r
3137{\r
3138 if (STR_(node)->capa != 0 &&\r
3139 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
3140 xfree(STR_(node)->s);\r
3141 }\r
3142\r
3143 STR_(node)->capa = 0;\r
3144 STR_(node)->flag = 0;\r
3145 STR_(node)->s = STR_(node)->buf;\r
3146 STR_(node)->end = STR_(node)->buf;\r
3147}\r
3148\r
3149static Node*\r
3150node_new_str(const UChar* s, const UChar* end)\r
3151{\r
3152 Node* node = node_new();\r
3153 CHECK_NULL_RETURN(node);\r
3154\r
3155 NODE_SET_TYPE(node, NODE_STRING);\r
3156 STR_(node)->capa = 0;\r
3157 STR_(node)->flag = 0;\r
3158 STR_(node)->s = STR_(node)->buf;\r
3159 STR_(node)->end = STR_(node)->buf;\r
3160 if (onig_node_str_cat(node, s, end)) {\r
3161 onig_node_free(node);\r
3162 return NULL;\r
3163 }\r
3164 return node;\r
3165}\r
3166\r
3167extern Node*\r
3168onig_node_new_str(const UChar* s, const UChar* end)\r
3169{\r
3170 return node_new_str(s, end);\r
3171}\r
3172\r
3173static Node*\r
3174node_new_str_raw(UChar* s, UChar* end)\r
3175{\r
3176 Node* node = node_new_str(s, end);\r
3177 NODE_STRING_SET_RAW(node);\r
3178 return node;\r
3179}\r
3180\r
3181static Node*\r
3182node_new_empty(void)\r
3183{\r
3184 return node_new_str(NULL, NULL);\r
3185}\r
3186\r
3187static Node*\r
3188node_new_str_raw_char(UChar c)\r
3189{\r
3190 UChar p[1];\r
3191\r
3192 p[0] = c;\r
3193 return node_new_str_raw(p, p + 1);\r
3194}\r
3195\r
3196static Node*\r
3197str_node_split_last_char(Node* node, OnigEncoding enc)\r
3198{\r
3199 const UChar *p;\r
3200 Node* rn;\r
3201 StrNode* sn;\r
3202\r
3203 sn = STR_(node);\r
3204 rn = NULL_NODE;\r
3205 if (sn->end > sn->s) {\r
3206 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r
3207 if (p && p > sn->s) { /* can be split. */\r
3208 rn = node_new_str(p, sn->end);\r
3209 if (NODE_STRING_IS_RAW(node))\r
3210 NODE_STRING_SET_RAW(rn);\r
3211\r
3212 sn->end = (UChar* )p;\r
3213 }\r
3214 }\r
3215 return rn;\r
3216}\r
3217\r
3218static int\r
3219str_node_can_be_split(Node* node, OnigEncoding enc)\r
3220{\r
3221 StrNode* sn = STR_(node);\r
3222 if (sn->end > sn->s) {\r
3223 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r
3224 }\r
3225 return 0;\r
3226}\r
3227\r
3228#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
3229static int\r
3230node_str_head_pad(StrNode* sn, int num, UChar val)\r
3231{\r
3232 UChar buf[NODE_STRING_BUF_SIZE];\r
3233 int i, len;\r
3234\r
3235 len = sn->end - sn->s;\r
3236 onig_strcpy(buf, sn->s, sn->end);\r
3237 onig_strcpy(&(sn->s[num]), buf, buf + len);\r
3238 sn->end += num;\r
3239\r
3240 for (i = 0; i < num; i++) {\r
3241 sn->s[i] = val;\r
3242 }\r
3243}\r
3244#endif\r
3245\r
3246extern int\r
3247onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r
3248{\r
3249 unsigned int num, val;\r
3250 OnigCodePoint c;\r
3251 UChar* p = *src;\r
3252 PFETCH_READY;\r
3253\r
3254 num = 0;\r
3255 while (! PEND) {\r
3256 PFETCH(c);\r
3257 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
3258 val = (unsigned int )DIGITVAL(c);\r
3259 if ((INT_MAX_LIMIT - val) / 10UL < num)\r
3260 return -1; /* overflow */\r
3261\r
3262 num = num * 10 + val;\r
3263 }\r
3264 else {\r
3265 PUNFETCH;\r
3266 break;\r
3267 }\r
3268 }\r
3269 *src = p;\r
3270 return num;\r
3271}\r
3272\r
3273static int\r
3274scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,\r
3275 int maxlen, OnigEncoding enc)\r
3276{\r
3277 OnigCodePoint c;\r
3278 unsigned int num, val;\r
3279 int n;\r
3280 UChar* p = *src;\r
3281 PFETCH_READY;\r
3282\r
3283 num = 0;\r
3284 n = 0;\r
3285 while (! PEND && n < maxlen) {\r
3286 PFETCH(c);\r
3287 if (IS_CODE_XDIGIT_ASCII(enc, c)) {\r
3288 n++;\r
3289 val = (unsigned int )XDIGITVAL(enc,c);\r
3290 if ((INT_MAX_LIMIT - val) / 16UL < num)\r
3291 return ONIGERR_TOO_BIG_NUMBER; /* overflow */\r
3292\r
3293 num = (num << 4) + XDIGITVAL(enc,c);\r
3294 }\r
3295 else {\r
3296 PUNFETCH;\r
3297 break;\r
3298 }\r
3299 }\r
3300\r
3301 if (n < minlen)\r
3302 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
3303\r
3304 *src = p;\r
3305 return num;\r
3306}\r
3307\r
3308static int\r
3309scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r
3310 OnigEncoding enc)\r
3311{\r
3312 OnigCodePoint c;\r
3313 unsigned int num, val;\r
3314 UChar* p = *src;\r
3315 PFETCH_READY;\r
3316\r
3317 num = 0;\r
3318 while (! PEND && maxlen-- != 0) {\r
3319 PFETCH(c);\r
3320 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {\r
3321 val = ODIGITVAL(c);\r
3322 if ((INT_MAX_LIMIT - val) / 8UL < num)\r
3323 return -1; /* overflow */\r
3324\r
3325 num = (num << 3) + val;\r
3326 }\r
3327 else {\r
3328 PUNFETCH;\r
3329 break;\r
3330 }\r
3331 }\r
3332 *src = p;\r
3333 return num;\r
3334}\r
3335\r
3336\r
3337#define BB_WRITE_CODE_POINT(bbuf,pos,code) \\r
3338 BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r
3339\r
3340/* data format:\r
3341 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r
3342 (all data size is OnigCodePoint)\r
3343 */\r
3344static int\r
3345new_code_range(BBuf** pbuf)\r
3346{\r
3347#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r
3348 int r;\r
3349 OnigCodePoint n;\r
3350 BBuf* bbuf;\r
3351\r
3352 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
3353 CHECK_NULL_RETURN_MEMERR(bbuf);\r
3354 r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r
3355 if (r != 0) {\r
3356 xfree(bbuf);\r
3357 *pbuf = 0;\r
3358 return r;\r
3359 }\r
3360\r
3361 n = 0;\r
3362 BB_WRITE_CODE_POINT(bbuf, 0, n);\r
3363 return 0;\r
3364}\r
3365\r
3366static int\r
3367add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r
3368{\r
3369 int r, inc_n, pos;\r
3370 int low, high, bound, x;\r
3371 OnigCodePoint n, *data;\r
3372 BBuf* bbuf;\r
3373\r
3374 if (from > to) {\r
3375 n = from; from = to; to = n;\r
3376 }\r
3377\r
3378 if (IS_NULL(*pbuf)) {\r
3379 r = new_code_range(pbuf);\r
3380 if (r != 0) return r;\r
3381 bbuf = *pbuf;\r
3382 n = 0;\r
3383 }\r
3384 else {\r
3385 bbuf = *pbuf;\r
3386 GET_CODE_POINT(n, bbuf->p);\r
3387 }\r
3388 data = (OnigCodePoint* )(bbuf->p);\r
3389 data++;\r
3390\r
3391 for (low = 0, bound = n; low < bound; ) {\r
3392 x = (low + bound) >> 1;\r
3393 if (from > data[x*2 + 1])\r
3394 low = x + 1;\r
3395 else\r
3396 bound = x;\r
3397 }\r
3398\r
3399 high = (to == ~((OnigCodePoint )0)) ? n : low;\r
3400 for (bound = n; high < bound; ) {\r
3401 x = (high + bound) >> 1;\r
3402 if (to + 1 >= data[x*2])\r
3403 high = x + 1;\r
3404 else\r
3405 bound = x;\r
3406 }\r
3407\r
3408 inc_n = low + 1 - high;\r
3409 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r
3410 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r
3411\r
3412 if (inc_n != 1) {\r
3413 if (from > data[low*2])\r
3414 from = data[low*2];\r
3415 if (to < data[(high - 1)*2 + 1])\r
3416 to = data[(high - 1)*2 + 1];\r
3417 }\r
3418\r
3419 if (inc_n != 0 && (OnigCodePoint )high < n) {\r
3420 int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r
3421 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r
3422 int size = (n - high) * 2 * SIZE_CODE_POINT;\r
3423\r
3424 if (inc_n > 0) {\r
3425 BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r
3426 }\r
3427 else {\r
3428 BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r
3429 }\r
3430 }\r
3431\r
3432 pos = SIZE_CODE_POINT * (1 + low * 2);\r
3433 BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r
3434 BB_WRITE_CODE_POINT(bbuf, pos, from);\r
3435 BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r
3436 n += inc_n;\r
3437 BB_WRITE_CODE_POINT(bbuf, 0, n);\r
3438\r
3439 return 0;\r
3440}\r
3441\r
3442static int\r
3443add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r
3444{\r
3445 if (from > to) {\r
3446 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
3447 return 0;\r
3448 else\r
3449 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
3450 }\r
3451\r
3452 return add_code_range_to_buf(pbuf, from, to);\r
3453}\r
3454\r
3455static int\r
3456not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r
3457{\r
3458 int r, i, n;\r
3459 OnigCodePoint pre, from, *data, to = 0;\r
3460\r
3461 *pbuf = (BBuf* )NULL;\r
3462 if (IS_NULL(bbuf)) {\r
3463 set_all:\r
3464 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3465 }\r
3466\r
3467 data = (OnigCodePoint* )(bbuf->p);\r
3468 GET_CODE_POINT(n, data);\r
3469 data++;\r
3470 if (n <= 0) goto set_all;\r
3471\r
3472 r = 0;\r
3473 pre = MBCODE_START_POS(enc);\r
3474 for (i = 0; i < n; i++) {\r
3475 from = data[i*2];\r
3476 to = data[i*2+1];\r
3477 if (pre <= from - 1) {\r
3478 r = add_code_range_to_buf(pbuf, pre, from - 1);\r
3479 if (r != 0) return r;\r
3480 }\r
3481 if (to == ~((OnigCodePoint )0)) break;\r
3482 pre = to + 1;\r
3483 }\r
3484 if (to < ~((OnigCodePoint )0)) {\r
3485 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r
3486 }\r
3487 return r;\r
3488}\r
3489\r
3490#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\\r
3491 BBuf *tbuf; \\r
3492 int tnot; \\r
3493 tnot = not1; not1 = not2; not2 = tnot; \\r
3494 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r
3495} while (0)\r
3496\r
3497static int\r
3498or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r
3499 BBuf* bbuf2, int not2, BBuf** pbuf)\r
3500{\r
3501 int r;\r
3502 OnigCodePoint i, n1, *data1;\r
3503 OnigCodePoint from, to;\r
3504\r
3505 *pbuf = (BBuf* )NULL;\r
3506 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r
3507 if (not1 != 0 || not2 != 0)\r
3508 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3509 return 0;\r
3510 }\r
3511\r
3512 r = 0;\r
3513 if (IS_NULL(bbuf2))\r
3514 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3515\r
3516 if (IS_NULL(bbuf1)) {\r
3517 if (not1 != 0) {\r
3518 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3519 }\r
3520 else {\r
3521 if (not2 == 0) {\r
3522 return bbuf_clone(pbuf, bbuf2);\r
3523 }\r
3524 else {\r
3525 return not_code_range_buf(enc, bbuf2, pbuf);\r
3526 }\r
3527 }\r
3528 }\r
3529\r
3530 if (not1 != 0)\r
3531 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3532\r
3533 data1 = (OnigCodePoint* )(bbuf1->p);\r
3534 GET_CODE_POINT(n1, data1);\r
3535 data1++;\r
3536\r
3537 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r
3538 r = bbuf_clone(pbuf, bbuf2);\r
3539 }\r
3540 else if (not1 == 0) { /* 1 OR (not 2) */\r
3541 r = not_code_range_buf(enc, bbuf2, pbuf);\r
3542 }\r
3543 if (r != 0) return r;\r
3544\r
3545 for (i = 0; i < n1; i++) {\r
3546 from = data1[i*2];\r
3547 to = data1[i*2+1];\r
3548 r = add_code_range_to_buf(pbuf, from, to);\r
3549 if (r != 0) return r;\r
3550 }\r
3551 return 0;\r
3552}\r
3553\r
3554static int\r
3555and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r
3556 OnigCodePoint* data, int n)\r
3557{\r
3558 int i, r;\r
3559 OnigCodePoint from2, to2;\r
3560\r
3561 for (i = 0; i < n; i++) {\r
3562 from2 = data[i*2];\r
3563 to2 = data[i*2+1];\r
3564 if (from2 < from1) {\r
3565 if (to2 < from1) continue;\r
3566 else {\r
3567 from1 = to2 + 1;\r
3568 }\r
3569 }\r
3570 else if (from2 <= to1) {\r
3571 if (to2 < to1) {\r
3572 if (from1 <= from2 - 1) {\r
3573 r = add_code_range_to_buf(pbuf, from1, from2-1);\r
3574 if (r != 0) return r;\r
3575 }\r
3576 from1 = to2 + 1;\r
3577 }\r
3578 else {\r
3579 to1 = from2 - 1;\r
3580 }\r
3581 }\r
3582 else {\r
3583 from1 = from2;\r
3584 }\r
3585 if (from1 > to1) break;\r
3586 }\r
3587 if (from1 <= to1) {\r
3588 r = add_code_range_to_buf(pbuf, from1, to1);\r
3589 if (r != 0) return r;\r
3590 }\r
3591 return 0;\r
3592}\r
3593\r
3594static int\r
3595and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r
3596{\r
3597 int r;\r
3598 OnigCodePoint i, j, n1, n2, *data1, *data2;\r
3599 OnigCodePoint from, to, from1, to1, from2, to2;\r
3600\r
3601 *pbuf = (BBuf* )NULL;\r
3602 if (IS_NULL(bbuf1)) {\r
3603 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r
3604 return bbuf_clone(pbuf, bbuf2);\r
3605 return 0;\r
3606 }\r
3607 else if (IS_NULL(bbuf2)) {\r
3608 if (not2 != 0)\r
3609 return bbuf_clone(pbuf, bbuf1);\r
3610 return 0;\r
3611 }\r
3612\r
3613 if (not1 != 0)\r
3614 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3615\r
3616 data1 = (OnigCodePoint* )(bbuf1->p);\r
3617 data2 = (OnigCodePoint* )(bbuf2->p);\r
3618 GET_CODE_POINT(n1, data1);\r
3619 GET_CODE_POINT(n2, data2);\r
3620 data1++;\r
3621 data2++;\r
3622\r
3623 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r
3624 for (i = 0; i < n1; i++) {\r
3625 from1 = data1[i*2];\r
3626 to1 = data1[i*2+1];\r
3627 for (j = 0; j < n2; j++) {\r
3628 from2 = data2[j*2];\r
3629 to2 = data2[j*2+1];\r
3630 if (from2 > to1) break;\r
3631 if (to2 < from1) continue;\r
3632 from = MAX(from1, from2);\r
3633 to = MIN(to1, to2);\r
3634 r = add_code_range_to_buf(pbuf, from, to);\r
3635 if (r != 0) return r;\r
3636 }\r
3637 }\r
3638 }\r
3639 else if (not1 == 0) { /* 1 AND (not 2) */\r
3640 for (i = 0; i < n1; i++) {\r
3641 from1 = data1[i*2];\r
3642 to1 = data1[i*2+1];\r
14b0e578
CS
3643 r = and_code_range1(pbuf, from1, to1, data2, n2);\r
3644 if (r != 0) return r;\r
3645 }\r
3646 }\r
3647\r
3648 return 0;\r
3649}\r
3650\r
3651static int\r
3652and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
3653{\r
3654 int r, not1, not2;\r
3655 BBuf *buf1, *buf2, *pbuf;\r
3656 BitSetRef bsr1, bsr2;\r
3657 BitSet bs1, bs2;\r
3658\r
3659 not1 = IS_NCCLASS_NOT(dest);\r
3660 bsr1 = dest->bs;\r
3661 buf1 = dest->mbuf;\r
3662 not2 = IS_NCCLASS_NOT(cc);\r
3663 bsr2 = cc->bs;\r
3664 buf2 = cc->mbuf;\r
3665\r
3666 if (not1 != 0) {\r
3667 bitset_invert_to(bsr1, bs1);\r
3668 bsr1 = bs1;\r
3669 }\r
3670 if (not2 != 0) {\r
3671 bitset_invert_to(bsr2, bs2);\r
3672 bsr2 = bs2;\r
3673 }\r
3674 bitset_and(bsr1, bsr2);\r
3675 if (bsr1 != dest->bs) {\r
3676 bitset_copy(dest->bs, bsr1);\r
14b0e578
CS
3677 }\r
3678 if (not1 != 0) {\r
3679 bitset_invert(dest->bs);\r
3680 }\r
3681\r
3682 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
3683 if (not1 != 0 && not2 != 0) {\r
3684 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r
3685 }\r
3686 else {\r
3687 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r
3688 if (r == 0 && not1 != 0) {\r
b602265d
DG
3689 BBuf *tbuf;\r
3690 r = not_code_range_buf(enc, pbuf, &tbuf);\r
3691 if (r != 0) {\r
3692 bbuf_free(pbuf);\r
3693 return r;\r
3694 }\r
3695 bbuf_free(pbuf);\r
3696 pbuf = tbuf;\r
14b0e578
CS
3697 }\r
3698 }\r
3699 if (r != 0) return r;\r
3700\r
3701 dest->mbuf = pbuf;\r
3702 bbuf_free(buf1);\r
3703 return r;\r
3704 }\r
3705 return 0;\r
3706}\r
3707\r
3708static int\r
3709or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
3710{\r
3711 int r, not1, not2;\r
3712 BBuf *buf1, *buf2, *pbuf;\r
3713 BitSetRef bsr1, bsr2;\r
3714 BitSet bs1, bs2;\r
3715\r
3716 not1 = IS_NCCLASS_NOT(dest);\r
3717 bsr1 = dest->bs;\r
3718 buf1 = dest->mbuf;\r
3719 not2 = IS_NCCLASS_NOT(cc);\r
3720 bsr2 = cc->bs;\r
3721 buf2 = cc->mbuf;\r
3722\r
3723 if (not1 != 0) {\r
3724 bitset_invert_to(bsr1, bs1);\r
3725 bsr1 = bs1;\r
3726 }\r
3727 if (not2 != 0) {\r
3728 bitset_invert_to(bsr2, bs2);\r
3729 bsr2 = bs2;\r
3730 }\r
3731 bitset_or(bsr1, bsr2);\r
3732 if (bsr1 != dest->bs) {\r
3733 bitset_copy(dest->bs, bsr1);\r
14b0e578
CS
3734 }\r
3735 if (not1 != 0) {\r
3736 bitset_invert(dest->bs);\r
3737 }\r
3738\r
3739 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
3740 if (not1 != 0 && not2 != 0) {\r
3741 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r
3742 }\r
3743 else {\r
3744 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r
3745 if (r == 0 && not1 != 0) {\r
b602265d
DG
3746 BBuf *tbuf;\r
3747 r = not_code_range_buf(enc, pbuf, &tbuf);\r
3748 if (r != 0) {\r
3749 bbuf_free(pbuf);\r
3750 return r;\r
3751 }\r
3752 bbuf_free(pbuf);\r
3753 pbuf = tbuf;\r
14b0e578
CS
3754 }\r
3755 }\r
3756 if (r != 0) return r;\r
3757\r
3758 dest->mbuf = pbuf;\r
3759 bbuf_free(buf1);\r
3760 return r;\r
3761 }\r
3762 else\r
3763 return 0;\r
3764}\r
3765\r
b602265d
DG
3766static OnigCodePoint\r
3767conv_backslash_value(OnigCodePoint c, ScanEnv* env)\r
14b0e578
CS
3768{\r
3769 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r
3770 switch (c) {\r
3771 case 'n': return '\n';\r
3772 case 't': return '\t';\r
3773 case 'r': return '\r';\r
3774 case 'f': return '\f';\r
3775 case 'a': return '\007';\r
3776 case 'b': return '\010';\r
3777 case 'e': return '\033';\r
3778 case 'v':\r
3779 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r
b602265d 3780 return '\v';\r
14b0e578
CS
3781 break;\r
3782\r
3783 default:\r
3784 break;\r
3785 }\r
3786 }\r
3787 return c;\r
3788}\r
3789\r
3790static int\r
3791is_invalid_quantifier_target(Node* node)\r
3792{\r
b602265d
DG
3793 switch (NODE_TYPE(node)) {\r
3794 case NODE_ANCHOR:\r
3795 case NODE_GIMMICK:\r
14b0e578
CS
3796 return 1;\r
3797 break;\r
3798\r
b602265d 3799 case NODE_ENCLOSURE:\r
14b0e578 3800 /* allow enclosed elements */\r
b602265d 3801 /* return is_invalid_quantifier_target(NODE_BODY(node)); */\r
14b0e578
CS
3802 break;\r
3803\r
b602265d 3804 case NODE_LIST:\r
14b0e578 3805 do {\r
b602265d
DG
3806 if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;\r
3807 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3808 return 0;\r
3809 break;\r
3810\r
b602265d 3811 case NODE_ALT:\r
14b0e578 3812 do {\r
b602265d
DG
3813 if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;\r
3814 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3815 break;\r
3816\r
3817 default:\r
3818 break;\r
3819 }\r
3820 return 0;\r
3821}\r
3822\r
3823/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r
3824static int\r
b602265d 3825quantifier_type_num(QuantNode* q)\r
14b0e578
CS
3826{\r
3827 if (q->greedy) {\r
3828 if (q->lower == 0) {\r
3829 if (q->upper == 1) return 0;\r
3830 else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r
3831 }\r
3832 else if (q->lower == 1) {\r
3833 if (IS_REPEAT_INFINITE(q->upper)) return 2;\r
3834 }\r
3835 }\r
3836 else {\r
3837 if (q->lower == 0) {\r
3838 if (q->upper == 1) return 3;\r
3839 else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r
3840 }\r
3841 else if (q->lower == 1) {\r
3842 if (IS_REPEAT_INFINITE(q->upper)) return 5;\r
3843 }\r
3844 }\r
3845 return -1;\r
3846}\r
3847\r
3848\r
3849enum ReduceType {\r
3850 RQ_ASIS = 0, /* as is */\r
3851 RQ_DEL = 1, /* delete parent */\r
3852 RQ_A, /* to '*' */\r
3853 RQ_AQ, /* to '*?' */\r
3854 RQ_QQ, /* to '??' */\r
3855 RQ_P_QQ, /* to '+)??' */\r
3856 RQ_PQ_Q /* to '+?)?' */\r
3857};\r
3858\r
3859static enum ReduceType ReduceTypeTable[6][6] = {\r
3860 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r
3861 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r
3862 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r
3863 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r
3864 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r
3865 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r
3866};\r
3867\r
3868extern void\r
3869onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r
3870{\r
3871 int pnum, cnum;\r
b602265d
DG
3872 QuantNode *p, *c;\r
3873\r
3874 p = QUANT_(pnode);\r
3875 c = QUANT_(cnode);\r
3876 pnum = quantifier_type_num(p);\r
3877 cnum = quantifier_type_num(c);\r
3878 if (pnum < 0 || cnum < 0) {\r
3879 if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {\r
3880 if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {\r
3881 int n = positive_int_multiply(p->lower, c->lower);\r
3882 if (n >= 0) {\r
3883 p->lower = p->upper = n;\r
3884 NODE_BODY(pnode) = NODE_BODY(cnode);\r
3885 goto remove_cnode;\r
3886 }\r
3887 }\r
3888 }\r
14b0e578 3889\r
b602265d
DG
3890 return ;\r
3891 }\r
14b0e578
CS
3892\r
3893 switch(ReduceTypeTable[cnum][pnum]) {\r
3894 case RQ_DEL:\r
b602265d 3895 *pnode = *cnode;\r
14b0e578
CS
3896 break;\r
3897 case RQ_A:\r
b602265d 3898 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3899 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r
3900 break;\r
3901 case RQ_AQ:\r
b602265d 3902 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3903 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r
3904 break;\r
3905 case RQ_QQ:\r
b602265d 3906 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3907 p->lower = 0; p->upper = 1; p->greedy = 0;\r
3908 break;\r
3909 case RQ_P_QQ:\r
b602265d 3910 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3911 p->lower = 0; p->upper = 1; p->greedy = 0;\r
3912 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r
3913 return ;\r
3914 break;\r
3915 case RQ_PQ_Q:\r
b602265d 3916 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3917 p->lower = 0; p->upper = 1; p->greedy = 1;\r
3918 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r
3919 return ;\r
3920 break;\r
3921 case RQ_ASIS:\r
b602265d 3922 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3923 return ;\r
3924 break;\r
3925 }\r
3926\r
b602265d
DG
3927 remove_cnode:\r
3928 NODE_BODY(cnode) = NULL_NODE;\r
14b0e578
CS
3929 onig_node_free(cnode);\r
3930}\r
3931\r
b602265d
DG
3932static int\r
3933node_new_general_newline(Node** node, ScanEnv* env)\r
3934{\r
3935 int r;\r
3936 int dlen, alen;\r
3937 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];\r
3938 Node* crnl;\r
3939 Node* ncc;\r
3940 Node* x;\r
3941 CClassNode* cc;\r
3942\r
3943 dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);\r
3944 if (dlen < 0) return dlen;\r
3945 alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);\r
3946 if (alen < 0) return alen;\r
3947\r
3948 crnl = node_new_str_raw(buf, buf + dlen + alen);\r
3949 CHECK_NULL_RETURN_MEMERR(crnl);\r
3950\r
3951 ncc = node_new_cclass();\r
3952 if (IS_NULL(ncc)) goto err2;\r
3953\r
3954 cc = CCLASS_(ncc);\r
3955 if (dlen == 1) {\r
3956 bitset_set_range(cc->bs, 0x0a, 0x0d);\r
3957 }\r
3958 else {\r
3959 r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);\r
3960 if (r != 0) {\r
3961 err1:\r
3962 onig_node_free(ncc);\r
3963 err2:\r
3964 onig_node_free(crnl);\r
3965 return ONIGERR_MEMORY;\r
3966 }\r
3967 }\r
3968\r
3969 if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {\r
3970 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);\r
3971 if (r != 0) goto err1;\r
3972 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);\r
3973 if (r != 0) goto err1;\r
3974 }\r
3975\r
3976 x = node_new_enclosure_if_else(crnl, 0, ncc);\r
3977 if (IS_NULL(x)) goto err1;\r
3978\r
3979 *node = x;\r
3980 return 0;\r
3981}\r
14b0e578
CS
3982\r
3983enum TokenSyms {\r
3984 TK_EOT = 0, /* end of token */\r
3985 TK_RAW_BYTE = 1,\r
3986 TK_CHAR,\r
3987 TK_STRING,\r
3988 TK_CODE_POINT,\r
3989 TK_ANYCHAR,\r
3990 TK_CHAR_TYPE,\r
3991 TK_BACKREF,\r
3992 TK_CALL,\r
3993 TK_ANCHOR,\r
3994 TK_OP_REPEAT,\r
3995 TK_INTERVAL,\r
3996 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r
3997 TK_ALT,\r
3998 TK_SUBEXP_OPEN,\r
3999 TK_SUBEXP_CLOSE,\r
4000 TK_CC_OPEN,\r
4001 TK_QUOTE_OPEN,\r
4002 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r
b602265d
DG
4003 TK_KEEP, /* \K */\r
4004 TK_GENERAL_NEWLINE, /* \R */\r
4005 TK_NO_NEWLINE, /* \N */\r
4006 TK_TRUE_ANYCHAR, /* \O */\r
4007 TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */\r
4008\r
14b0e578
CS
4009 /* in cc */\r
4010 TK_CC_CLOSE,\r
4011 TK_CC_RANGE,\r
4012 TK_POSIX_BRACKET_OPEN,\r
4013 TK_CC_AND, /* && */\r
4014 TK_CC_CC_OPEN /* [ */\r
4015};\r
4016\r
4017typedef struct {\r
4018 enum TokenSyms type;\r
4019 int escaped;\r
4020 int base; /* is number: 8, 16 (used in [....]) */\r
4021 UChar* backp;\r
4022 union {\r
4023 UChar* s;\r
4024 int c;\r
4025 OnigCodePoint code;\r
4026 int anchor;\r
4027 int subtype;\r
4028 struct {\r
4029 int lower;\r
4030 int upper;\r
4031 int greedy;\r
4032 int possessive;\r
4033 } repeat;\r
4034 struct {\r
4035 int num;\r
4036 int ref1;\r
4037 int* refs;\r
4038 int by_name;\r
4039#ifdef USE_BACKREF_WITH_LEVEL\r
4040 int exist_level;\r
4041 int level; /* \k<name+n> */\r
4042#endif\r
4043 } backref;\r
4044 struct {\r
4045 UChar* name;\r
4046 UChar* name_end;\r
4047 int gnum;\r
b602265d 4048 int by_number;\r
14b0e578
CS
4049 } call;\r
4050 struct {\r
4051 int ctype;\r
4052 int not;\r
4053 } prop;\r
4054 } u;\r
4055} OnigToken;\r
4056\r
4057\r
4058static int\r
4059fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r
4060{\r
4061 int low, up, syn_allow, non_low = 0;\r
4062 int r = 0;\r
4063 OnigCodePoint c;\r
4064 OnigEncoding enc = env->enc;\r
4065 UChar* p = *src;\r
4066 PFETCH_READY;\r
4067\r
4068 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r
4069\r
4070 if (PEND) {\r
4071 if (syn_allow)\r
4072 return 1; /* "....{" : OK! */\r
4073 else\r
4074 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r
4075 }\r
4076\r
4077 if (! syn_allow) {\r
4078 c = PPEEK;\r
4079 if (c == ')' || c == '(' || c == '|') {\r
4080 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r
4081 }\r
4082 }\r
4083\r
4084 low = onig_scan_unsigned_number(&p, end, env->enc);\r
4085 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4086 if (low > ONIG_MAX_REPEAT_NUM)\r
4087 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4088\r
4089 if (p == *src) { /* can't read low */\r
4090 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r
4091 /* allow {,n} as {0,n} */\r
4092 low = 0;\r
4093 non_low = 1;\r
4094 }\r
4095 else\r
4096 goto invalid;\r
4097 }\r
4098\r
4099 if (PEND) goto invalid;\r
4100 PFETCH(c);\r
4101 if (c == ',') {\r
4102 UChar* prev = p;\r
4103 up = onig_scan_unsigned_number(&p, end, env->enc);\r
4104 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4105 if (up > ONIG_MAX_REPEAT_NUM)\r
4106 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4107\r
4108 if (p == prev) {\r
4109 if (non_low != 0)\r
b602265d 4110 goto invalid;\r
14b0e578
CS
4111 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r
4112 }\r
4113 }\r
4114 else {\r
4115 if (non_low != 0)\r
4116 goto invalid;\r
4117\r
4118 PUNFETCH;\r
4119 up = low; /* {n} : exact n times */\r
4120 r = 2; /* fixed */\r
4121 }\r
4122\r
4123 if (PEND) goto invalid;\r
4124 PFETCH(c);\r
4125 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r
4126 if (c != MC_ESC(env->syntax)) goto invalid;\r
4127 PFETCH(c);\r
4128 }\r
4129 if (c != '}') goto invalid;\r
4130\r
4131 if (!IS_REPEAT_INFINITE(up) && low > up) {\r
4132 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r
4133 }\r
4134\r
4135 tok->type = TK_INTERVAL;\r
4136 tok->u.repeat.lower = low;\r
4137 tok->u.repeat.upper = up;\r
4138 *src = p;\r
4139 return r; /* 0: normal {n,m}, 2: fixed {n} */\r
4140\r
4141 invalid:\r
b602265d
DG
4142 if (syn_allow) {\r
4143 /* *src = p; */ /* !!! Don't do this line !!! */\r
14b0e578 4144 return 1; /* OK */\r
b602265d 4145 }\r
14b0e578
CS
4146 else\r
4147 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r
4148}\r
4149\r
4150/* \M-, \C-, \c, or \... */\r
4151static int\r
b602265d 4152fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)\r
14b0e578
CS
4153{\r
4154 int v;\r
4155 OnigCodePoint c;\r
4156 OnigEncoding enc = env->enc;\r
4157 UChar* p = *src;\r
4158\r
4159 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4160\r
4161 PFETCH_S(c);\r
4162 switch (c) {\r
4163 case 'M':\r
4164 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r
4165 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
4166 PFETCH_S(c);\r
4167 if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r
4168 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
4169 PFETCH_S(c);\r
4170 if (c == MC_ESC(env->syntax)) {\r
b602265d 4171 v = fetch_escaped_value(&p, end, env, &c);\r
14b0e578 4172 if (v < 0) return v;\r
14b0e578
CS
4173 }\r
4174 c = ((c & 0xff) | 0x80);\r
4175 }\r
4176 else\r
4177 goto backslash;\r
4178 break;\r
4179\r
4180 case 'C':\r
4181 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r
4182 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
4183 PFETCH_S(c);\r
4184 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r
4185 goto control;\r
4186 }\r
4187 else\r
4188 goto backslash;\r
4189\r
4190 case 'c':\r
4191 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r
4192 control:\r
4193 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
4194 PFETCH_S(c);\r
4195 if (c == '?') {\r
4196 c = 0177;\r
4197 }\r
4198 else {\r
4199 if (c == MC_ESC(env->syntax)) {\r
b602265d 4200 v = fetch_escaped_value(&p, end, env, &c);\r
14b0e578 4201 if (v < 0) return v;\r
14b0e578
CS
4202 }\r
4203 c &= 0x9f;\r
4204 }\r
4205 break;\r
4206 }\r
4207 /* fall through */\r
4208\r
4209 default:\r
4210 {\r
4211 backslash:\r
4212 c = conv_backslash_value(c, env);\r
4213 }\r
4214 break;\r
4215 }\r
4216\r
4217 *src = p;\r
b602265d
DG
4218 *val = c;\r
4219 return 0;\r
14b0e578
CS
4220}\r
4221\r
4222static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r
4223\r
4224static OnigCodePoint\r
4225get_name_end_code_point(OnigCodePoint start)\r
4226{\r
4227 switch (start) {\r
b602265d 4228 case '<': return (OnigCodePoint )'>'; break;\r
14b0e578 4229 case '\'': return (OnigCodePoint )'\''; break;\r
b602265d 4230 case '(': return (OnigCodePoint )')'; break;\r
14b0e578
CS
4231 default:\r
4232 break;\r
4233 }\r
4234\r
4235 return (OnigCodePoint )0;\r
4236}\r
4237\r
b602265d
DG
4238enum REF_NUM {\r
4239 IS_NOT_NUM = 0,\r
4240 IS_ABS_NUM = 1,\r
4241 IS_REL_NUM = 2\r
4242};\r
4243\r
14b0e578
CS
4244#ifdef USE_BACKREF_WITH_LEVEL\r
4245/*\r
4246 \k<name+n>, \k<name-n>\r
4247 \k<num+n>, \k<num-n>\r
4248 \k<-num+n>, \k<-num-n>\r
b602265d 4249 \k<+num+n>, \k<+num-n>\r
14b0e578
CS
4250*/\r
4251static int\r
4252fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r
b602265d
DG
4253 UChar** rname_end, ScanEnv* env,\r
4254 int* rback_num, int* rlevel, enum REF_NUM* num_type)\r
14b0e578 4255{\r
b602265d
DG
4256 int r, sign, exist_level;\r
4257 int digit_count;\r
14b0e578
CS
4258 OnigCodePoint end_code;\r
4259 OnigCodePoint c = 0;\r
4260 OnigEncoding enc = env->enc;\r
4261 UChar *name_end;\r
4262 UChar *pnum_head;\r
4263 UChar *p = *src;\r
4264 PFETCH_READY;\r
4265\r
4266 *rback_num = 0;\r
b602265d
DG
4267 exist_level = 0;\r
4268 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4269 sign = 1;\r
4270 pnum_head = *src;\r
4271\r
4272 end_code = get_name_end_code_point(start_code);\r
4273\r
b602265d 4274 digit_count = 0;\r
14b0e578
CS
4275 name_end = end;\r
4276 r = 0;\r
4277 if (PEND) {\r
4278 return ONIGERR_EMPTY_GROUP_NAME;\r
4279 }\r
4280 else {\r
4281 PFETCH(c);\r
4282 if (c == end_code)\r
4283 return ONIGERR_EMPTY_GROUP_NAME;\r
4284\r
b602265d
DG
4285 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4286 *num_type = IS_ABS_NUM;\r
4287 digit_count++;\r
14b0e578
CS
4288 }\r
4289 else if (c == '-') {\r
b602265d 4290 *num_type = IS_REL_NUM;\r
14b0e578
CS
4291 sign = -1;\r
4292 pnum_head = p;\r
4293 }\r
b602265d
DG
4294 else if (c == '+') {\r
4295 *num_type = IS_REL_NUM;\r
4296 sign = 1;\r
4297 pnum_head = p;\r
4298 }\r
14b0e578
CS
4299 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4300 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4301 }\r
4302 }\r
4303\r
4304 while (!PEND) {\r
4305 name_end = p;\r
4306 PFETCH(c);\r
4307 if (c == end_code || c == ')' || c == '+' || c == '-') {\r
b602265d
DG
4308 if (*num_type != IS_NOT_NUM && digit_count == 0)\r
4309 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4310 break;\r
4311 }\r
4312\r
b602265d
DG
4313 if (*num_type != IS_NOT_NUM) {\r
4314 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4315 digit_count++;\r
14b0e578
CS
4316 }\r
4317 else {\r
4318 r = ONIGERR_INVALID_GROUP_NAME;\r
b602265d 4319 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4320 }\r
4321 }\r
4322 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4323 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4324 }\r
4325 }\r
4326\r
4327 if (r == 0 && c != end_code) {\r
4328 if (c == '+' || c == '-') {\r
4329 int level;\r
4330 int flag = (c == '-' ? -1 : 1);\r
4331\r
b602265d
DG
4332 if (PEND) {\r
4333 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4334 goto end;\r
4335 }\r
14b0e578 4336 PFETCH(c);\r
b602265d 4337 if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;\r
14b0e578
CS
4338 PUNFETCH;\r
4339 level = onig_scan_unsigned_number(&p, end, enc);\r
4340 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r
4341 *rlevel = (level * flag);\r
4342 exist_level = 1;\r
4343\r
b602265d
DG
4344 if (!PEND) {\r
4345 PFETCH(c);\r
4346 if (c == end_code)\r
4347 goto end;\r
4348 }\r
14b0e578
CS
4349 }\r
4350\r
4351 err:\r
14b0e578 4352 name_end = end;\r
b602265d
DG
4353 err2:\r
4354 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4355 }\r
4356\r
4357 end:\r
4358 if (r == 0) {\r
b602265d 4359 if (*num_type != IS_NOT_NUM) {\r
14b0e578
CS
4360 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
4361 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
b602265d
DG
4362 else if (*rback_num == 0) {\r
4363 if (*num_type == IS_REL_NUM)\r
4364 goto err2;\r
4365 }\r
14b0e578
CS
4366\r
4367 *rback_num *= sign;\r
4368 }\r
4369\r
4370 *rname_end = name_end;\r
4371 *src = p;\r
4372 return (exist_level ? 1 : 0);\r
4373 }\r
4374 else {\r
4375 onig_scan_env_set_error_string(env, r, *src, name_end);\r
4376 return r;\r
4377 }\r
4378}\r
4379#endif /* USE_BACKREF_WITH_LEVEL */\r
4380\r
4381/*\r
b602265d 4382 ref: 0 -> define name (don't allow number name)\r
14b0e578
CS
4383 1 -> reference name (allow number name)\r
4384*/\r
4385static int\r
4386fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
b602265d
DG
4387 UChar** rname_end, ScanEnv* env, int* rback_num,\r
4388 enum REF_NUM* num_type, int ref)\r
14b0e578 4389{\r
b602265d
DG
4390 int r, sign;\r
4391 int digit_count;\r
14b0e578
CS
4392 OnigCodePoint end_code;\r
4393 OnigCodePoint c = 0;\r
4394 OnigEncoding enc = env->enc;\r
4395 UChar *name_end;\r
4396 UChar *pnum_head;\r
4397 UChar *p = *src;\r
4398\r
4399 *rback_num = 0;\r
4400\r
4401 end_code = get_name_end_code_point(start_code);\r
4402\r
b602265d 4403 digit_count = 0;\r
14b0e578
CS
4404 name_end = end;\r
4405 pnum_head = *src;\r
4406 r = 0;\r
b602265d 4407 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4408 sign = 1;\r
4409 if (PEND) {\r
4410 return ONIGERR_EMPTY_GROUP_NAME;\r
4411 }\r
4412 else {\r
4413 PFETCH_S(c);\r
4414 if (c == end_code)\r
4415 return ONIGERR_EMPTY_GROUP_NAME;\r
4416\r
b602265d 4417 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
14b0e578 4418 if (ref == 1)\r
b602265d 4419 *num_type = IS_ABS_NUM;\r
14b0e578
CS
4420 else {\r
4421 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578 4422 }\r
b602265d 4423 digit_count++;\r
14b0e578
CS
4424 }\r
4425 else if (c == '-') {\r
4426 if (ref == 1) {\r
b602265d 4427 *num_type = IS_REL_NUM;\r
14b0e578
CS
4428 sign = -1;\r
4429 pnum_head = p;\r
4430 }\r
4431 else {\r
4432 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4433 }\r
4434 }\r
b602265d
DG
4435 else if (c == '+') {\r
4436 if (ref == 1) {\r
4437 *num_type = IS_REL_NUM;\r
4438 sign = 1;\r
4439 pnum_head = p;\r
14b0e578
CS
4440 }\r
4441 else {\r
14b0e578 4442 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578 4443 }\r
14b0e578 4444 }\r
b602265d 4445 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
14b0e578 4446 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
b602265d 4447 }\r
14b0e578
CS
4448 }\r
4449\r
4450 if (r == 0) {\r
b602265d
DG
4451 while (!PEND) {\r
4452 name_end = p;\r
4453 PFETCH_S(c);\r
4454 if (c == end_code || c == ')') {\r
4455 if (*num_type != IS_NOT_NUM && digit_count == 0)\r
4456 r = ONIGERR_INVALID_GROUP_NAME;\r
4457 break;\r
4458 }\r
4459\r
4460 if (*num_type != IS_NOT_NUM) {\r
4461 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4462 digit_count++;\r
4463 }\r
4464 else {\r
4465 if (!ONIGENC_IS_CODE_WORD(enc, c))\r
4466 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4467 else\r
4468 r = ONIGERR_INVALID_GROUP_NAME;\r
4469\r
4470 *num_type = IS_NOT_NUM;\r
4471 }\r
4472 }\r
4473 else {\r
4474 if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4475 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4476 }\r
4477 }\r
4478 }\r
4479\r
4480 if (c != end_code) {\r
14b0e578
CS
4481 r = ONIGERR_INVALID_GROUP_NAME;\r
4482 goto err;\r
4483 }\r
b602265d
DG
4484\r
4485 if (*num_type != IS_NOT_NUM) {\r
4486 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
4487 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
4488 else if (*rback_num == 0) {\r
4489 if (*num_type == IS_REL_NUM) {\r
4490 r = ONIGERR_INVALID_GROUP_NAME;\r
4491 goto err;\r
4492 }\r
4493 }\r
4494\r
4495 *rback_num *= sign;\r
4496 }\r
14b0e578
CS
4497\r
4498 *rname_end = name_end;\r
4499 *src = p;\r
4500 return 0;\r
4501 }\r
4502 else {\r
b602265d
DG
4503 while (!PEND) {\r
4504 name_end = p;\r
4505 PFETCH_S(c);\r
4506 if (c == end_code || c == ')')\r
4507 break;\r
4508 }\r
4509 if (PEND)\r
4510 name_end = end;\r
4511\r
14b0e578
CS
4512 err:\r
4513 onig_scan_env_set_error_string(env, r, *src, name_end);\r
4514 return r;\r
4515 }\r
4516}\r
14b0e578
CS
4517\r
4518static void\r
4519CC_ESC_WARN(ScanEnv* env, UChar *c)\r
4520{\r
4521 if (onig_warn == onig_null_warn) return ;\r
4522\r
4523 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r
4524 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r
4525 UChar buf[WARN_BUFSIZE];\r
4526 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
b602265d
DG
4527 env->pattern, env->pattern_end,\r
4528 (UChar* )"character class has '%s' without escape",\r
4529 c);\r
14b0e578
CS
4530 (*onig_warn)((char* )buf);\r
4531 }\r
4532}\r
4533\r
4534static void\r
4535CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r
4536{\r
4537 if (onig_warn == onig_null_warn) return ;\r
4538\r
4539 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r
4540 UChar buf[WARN_BUFSIZE];\r
4541 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r
b602265d
DG
4542 (env)->pattern, (env)->pattern_end,\r
4543 (UChar* )"regular expression has '%s' without escape", c);\r
14b0e578
CS
4544 (*onig_warn)((char* )buf);\r
4545 }\r
4546}\r
4547\r
4548static UChar*\r
4549find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
b602265d 4550 UChar **next, OnigEncoding enc)\r
14b0e578
CS
4551{\r
4552 int i;\r
4553 OnigCodePoint x;\r
4554 UChar *q;\r
4555 UChar *p = from;\r
4556 \r
4557 while (p < to) {\r
4558 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4559 q = p + enclen(enc, p);\r
4560 if (x == s[0]) {\r
4561 for (i = 1; i < n && q < to; i++) {\r
b602265d
DG
4562 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
4563 if (x != s[i]) break;\r
4564 q += enclen(enc, q);\r
14b0e578
CS
4565 }\r
4566 if (i >= n) {\r
b602265d
DG
4567 if (IS_NOT_NULL(next))\r
4568 *next = q;\r
4569 return p;\r
14b0e578
CS
4570 }\r
4571 }\r
4572 p = q;\r
4573 }\r
4574 return NULL_UCHARP;\r
4575}\r
4576\r
4577static int\r
4578str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
b602265d 4579 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r
14b0e578
CS
4580{\r
4581 int i, in_esc;\r
4582 OnigCodePoint x;\r
4583 UChar *q;\r
4584 UChar *p = from;\r
4585\r
4586 in_esc = 0;\r
4587 while (p < to) {\r
4588 if (in_esc) {\r
4589 in_esc = 0;\r
4590 p += enclen(enc, p);\r
4591 }\r
4592 else {\r
4593 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4594 q = p + enclen(enc, p);\r
4595 if (x == s[0]) {\r
b602265d
DG
4596 for (i = 1; i < n && q < to; i++) {\r
4597 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
4598 if (x != s[i]) break;\r
4599 q += enclen(enc, q);\r
4600 }\r
4601 if (i >= n) return 1;\r
4602 p += enclen(enc, p);\r
14b0e578
CS
4603 }\r
4604 else {\r
b602265d
DG
4605 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4606 if (x == bad) return 0;\r
4607 else if (x == MC_ESC(syn)) in_esc = 1;\r
4608 p = q;\r
14b0e578
CS
4609 }\r
4610 }\r
4611 }\r
4612 return 0;\r
4613}\r
4614\r
4615static int\r
4616fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
4617{\r
4618 int num;\r
4619 OnigCodePoint c, c2;\r
4620 OnigSyntaxType* syn = env->syntax;\r
4621 OnigEncoding enc = env->enc;\r
4622 UChar* prev;\r
4623 UChar* p = *src;\r
4624 PFETCH_READY;\r
4625\r
4626 if (PEND) {\r
4627 tok->type = TK_EOT;\r
4628 return tok->type;\r
4629 }\r
4630\r
4631 PFETCH(c);\r
4632 tok->type = TK_CHAR;\r
4633 tok->base = 0;\r
4634 tok->u.c = c;\r
4635 tok->escaped = 0;\r
4636\r
4637 if (c == ']') {\r
4638 tok->type = TK_CC_CLOSE;\r
4639 }\r
4640 else if (c == '-') {\r
4641 tok->type = TK_CC_RANGE;\r
4642 }\r
4643 else if (c == MC_ESC(syn)) {\r
4644 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r
4645 goto end;\r
4646\r
4647 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4648\r
4649 PFETCH(c);\r
4650 tok->escaped = 1;\r
4651 tok->u.c = c;\r
4652 switch (c) {\r
4653 case 'w':\r
4654 tok->type = TK_CHAR_TYPE;\r
4655 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4656 tok->u.prop.not = 0;\r
4657 break;\r
4658 case 'W':\r
4659 tok->type = TK_CHAR_TYPE;\r
4660 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4661 tok->u.prop.not = 1;\r
4662 break;\r
4663 case 'd':\r
4664 tok->type = TK_CHAR_TYPE;\r
4665 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
4666 tok->u.prop.not = 0;\r
4667 break;\r
4668 case 'D':\r
4669 tok->type = TK_CHAR_TYPE;\r
4670 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
4671 tok->u.prop.not = 1;\r
4672 break;\r
4673 case 's':\r
4674 tok->type = TK_CHAR_TYPE;\r
4675 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
4676 tok->u.prop.not = 0;\r
4677 break;\r
4678 case 'S':\r
4679 tok->type = TK_CHAR_TYPE;\r
4680 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
4681 tok->u.prop.not = 1;\r
4682 break;\r
4683 case 'h':\r
4684 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
4685 tok->type = TK_CHAR_TYPE;\r
4686 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
4687 tok->u.prop.not = 0;\r
4688 break;\r
4689 case 'H':\r
4690 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
4691 tok->type = TK_CHAR_TYPE;\r
4692 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
4693 tok->u.prop.not = 1;\r
4694 break;\r
4695\r
4696 case 'p':\r
4697 case 'P':\r
b602265d
DG
4698 if (PEND) break;\r
4699\r
14b0e578
CS
4700 c2 = PPEEK;\r
4701 if (c2 == '{' &&\r
b602265d
DG
4702 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
4703 PINC;\r
4704 tok->type = TK_CHAR_PROPERTY;\r
4705 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
4706\r
4707 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
4708 PFETCH(c2);\r
4709 if (c2 == '^') {\r
4710 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
4711 }\r
4712 else\r
4713 PUNFETCH;\r
4714 }\r
4715 }\r
4716 break;\r
4717\r
4718 case 'o':\r
4719 if (PEND) break;\r
4720\r
4721 prev = p;\r
4722 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
4723 PINC;\r
4724 num = scan_unsigned_octal_number(&p, end, 11, enc);\r
4725 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
4726 if (!PEND) {\r
4727 c2 = PPEEK;\r
4728 if (IS_CODE_DIGIT_ASCII(enc, c2))\r
4729 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
4730 }\r
4731\r
4732 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
4733 PINC;\r
4734 tok->type = TK_CODE_POINT;\r
4735 tok->base = 8;\r
4736 tok->u.code = (OnigCodePoint )num;\r
4737 }\r
4738 else {\r
4739 /* can't read nothing or invalid format */\r
4740 p = prev;\r
4741 }\r
14b0e578
CS
4742 }\r
4743 break;\r
4744\r
4745 case 'x':\r
4746 if (PEND) break;\r
4747\r
4748 prev = p;\r
4749 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
b602265d
DG
4750 PINC;\r
4751 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
4752 if (num < 0) {\r
4753 if (num == ONIGERR_TOO_BIG_NUMBER)\r
4754 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
4755 else\r
4756 return num;\r
4757 }\r
4758 if (!PEND) {\r
14b0e578 4759 c2 = PPEEK;\r
b602265d 4760 if (IS_CODE_XDIGIT_ASCII(enc, c2))\r
14b0e578
CS
4761 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
4762 }\r
4763\r
b602265d
DG
4764 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
4765 PINC;\r
4766 tok->type = TK_CODE_POINT;\r
4767 tok->base = 16;\r
4768 tok->u.code = (OnigCodePoint )num;\r
4769 }\r
4770 else {\r
4771 /* can't read nothing or invalid format */\r
4772 p = prev;\r
4773 }\r
14b0e578
CS
4774 }\r
4775 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
b602265d
DG
4776 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
4777 if (num < 0) return num;\r
4778 if (p == prev) { /* can't read nothing. */\r
4779 num = 0; /* but, it's not error */\r
4780 }\r
4781 tok->type = TK_RAW_BYTE;\r
4782 tok->base = 16;\r
4783 tok->u.c = num;\r
14b0e578
CS
4784 }\r
4785 break;\r
4786\r
4787 case 'u':\r
4788 if (PEND) break;\r
4789\r
4790 prev = p;\r
4791 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
b602265d
DG
4792 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
4793 if (num < 0) return num;\r
4794 if (p == prev) { /* can't read nothing. */\r
4795 num = 0; /* but, it's not error */\r
4796 }\r
4797 tok->type = TK_CODE_POINT;\r
4798 tok->base = 16;\r
4799 tok->u.code = (OnigCodePoint )num;\r
14b0e578
CS
4800 }\r
4801 break;\r
4802\r
4803 case '0':\r
4804 case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r
4805 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
b602265d
DG
4806 PUNFETCH;\r
4807 prev = p;\r
4808 num = scan_unsigned_octal_number(&p, end, 3, enc);\r
4809 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
4810 if (p == prev) { /* can't read nothing. */\r
4811 num = 0; /* but, it's not error */\r
4812 }\r
4813 tok->type = TK_RAW_BYTE;\r
4814 tok->base = 8;\r
4815 tok->u.c = num;\r
14b0e578
CS
4816 }\r
4817 break;\r
4818\r
4819 default:\r
4820 PUNFETCH;\r
b602265d 4821 num = fetch_escaped_value(&p, end, env, &c2);\r
14b0e578 4822 if (num < 0) return num;\r
b602265d
DG
4823 if (tok->u.c != c2) {\r
4824 tok->u.code = c2;\r
4825 tok->type = TK_CODE_POINT;\r
14b0e578
CS
4826 }\r
4827 break;\r
4828 }\r
4829 }\r
4830 else if (c == '[') {\r
4831 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r
4832 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r
b602265d 4833 tok->backp = p; /* point at '[' is read */\r
14b0e578
CS
4834 PINC;\r
4835 if (str_exist_check_with_esc(send, 2, p, end,\r
4836 (OnigCodePoint )']', enc, syn)) {\r
b602265d 4837 tok->type = TK_POSIX_BRACKET_OPEN;\r
14b0e578
CS
4838 }\r
4839 else {\r
b602265d
DG
4840 PUNFETCH;\r
4841 goto cc_in_cc;\r
14b0e578
CS
4842 }\r
4843 }\r
4844 else {\r
4845 cc_in_cc:\r
4846 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r
b602265d 4847 tok->type = TK_CC_CC_OPEN;\r
14b0e578
CS
4848 }\r
4849 else {\r
b602265d 4850 CC_ESC_WARN(env, (UChar* )"[");\r
14b0e578
CS
4851 }\r
4852 }\r
4853 }\r
4854 else if (c == '&') {\r
4855 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r
b602265d 4856 !PEND && (PPEEK_IS('&'))) {\r
14b0e578
CS
4857 PINC;\r
4858 tok->type = TK_CC_AND;\r
4859 }\r
4860 }\r
4861\r
4862 end:\r
4863 *src = p;\r
4864 return tok->type;\r
4865}\r
4866\r
4867static int\r
4868fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
4869{\r
4870 int r, num;\r
4871 OnigCodePoint c;\r
4872 OnigEncoding enc = env->enc;\r
4873 OnigSyntaxType* syn = env->syntax;\r
4874 UChar* prev;\r
4875 UChar* p = *src;\r
4876 PFETCH_READY;\r
4877\r
4878 start:\r
4879 if (PEND) {\r
4880 tok->type = TK_EOT;\r
4881 return tok->type;\r
4882 }\r
4883\r
4884 tok->type = TK_STRING;\r
4885 tok->base = 0;\r
4886 tok->backp = p;\r
4887\r
4888 PFETCH(c);\r
4889 if (IS_MC_ESC_CODE(c, syn)) {\r
4890 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4891\r
4892 tok->backp = p;\r
4893 PFETCH(c);\r
4894\r
4895 tok->u.c = c;\r
4896 tok->escaped = 1;\r
4897 switch (c) {\r
4898 case '*':\r
4899 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r
4900 tok->type = TK_OP_REPEAT;\r
4901 tok->u.repeat.lower = 0;\r
4902 tok->u.repeat.upper = REPEAT_INFINITE;\r
4903 goto greedy_check;\r
4904 break;\r
4905\r
4906 case '+':\r
4907 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r
4908 tok->type = TK_OP_REPEAT;\r
4909 tok->u.repeat.lower = 1;\r
4910 tok->u.repeat.upper = REPEAT_INFINITE;\r
4911 goto greedy_check;\r
4912 break;\r
4913\r
4914 case '?':\r
4915 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r
4916 tok->type = TK_OP_REPEAT;\r
4917 tok->u.repeat.lower = 0;\r
4918 tok->u.repeat.upper = 1;\r
4919 greedy_check:\r
4920 if (!PEND && PPEEK_IS('?') &&\r
b602265d
DG
4921 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r
4922 PFETCH(c);\r
4923 tok->u.repeat.greedy = 0;\r
4924 tok->u.repeat.possessive = 0;\r
14b0e578
CS
4925 }\r
4926 else {\r
4927 possessive_check:\r
b602265d
DG
4928 if (!PEND && PPEEK_IS('+') &&\r
4929 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r
4930 tok->type != TK_INTERVAL) ||\r
4931 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r
4932 tok->type == TK_INTERVAL))) {\r
4933 PFETCH(c);\r
4934 tok->u.repeat.greedy = 1;\r
4935 tok->u.repeat.possessive = 1;\r
4936 }\r
4937 else {\r
4938 tok->u.repeat.greedy = 1;\r
4939 tok->u.repeat.possessive = 0;\r
4940 }\r
14b0e578
CS
4941 }\r
4942 break;\r
4943\r
4944 case '{':\r
4945 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r
4946 r = fetch_range_quantifier(&p, end, tok, env);\r
4947 if (r < 0) return r; /* error */\r
4948 if (r == 0) goto greedy_check;\r
4949 else if (r == 2) { /* {n} */\r
b602265d
DG
4950 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
4951 goto possessive_check;\r
14b0e578 4952\r
b602265d 4953 goto greedy_check;\r
14b0e578
CS
4954 }\r
4955 /* r == 1 : normal char */\r
4956 break;\r
4957\r
4958 case '|':\r
4959 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r
4960 tok->type = TK_ALT;\r
4961 break;\r
4962\r
4963 case '(':\r
4964 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
4965 tok->type = TK_SUBEXP_OPEN;\r
4966 break;\r
4967\r
4968 case ')':\r
4969 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
4970 tok->type = TK_SUBEXP_CLOSE;\r
4971 break;\r
4972\r
4973 case 'w':\r
4974 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
4975 tok->type = TK_CHAR_TYPE;\r
4976 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4977 tok->u.prop.not = 0;\r
4978 break;\r
4979\r
4980 case 'W':\r
4981 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
4982 tok->type = TK_CHAR_TYPE;\r
4983 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4984 tok->u.prop.not = 1;\r
4985 break;\r
4986\r
4987 case 'b':\r
4988 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
4989 tok->type = TK_ANCHOR;\r
b602265d 4990 tok->u.anchor = ANCHOR_WORD_BOUNDARY;\r
14b0e578
CS
4991 break;\r
4992\r
4993 case 'B':\r
4994 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
4995 tok->type = TK_ANCHOR;\r
b602265d
DG
4996 tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;\r
4997 break;\r
4998\r
4999 case 'y':\r
5000 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5001 tok->type = TK_ANCHOR;\r
5002 tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
5003 break;\r
5004\r
5005 case 'Y':\r
5006 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5007 tok->type = TK_ANCHOR;\r
5008 tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
14b0e578
CS
5009 break;\r
5010\r
5011#ifdef USE_WORD_BEGIN_END\r
5012 case '<':\r
5013 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
5014 tok->type = TK_ANCHOR;\r
5015 tok->u.anchor = ANCHOR_WORD_BEGIN;\r
5016 break;\r
5017\r
5018 case '>':\r
5019 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
5020 tok->type = TK_ANCHOR;\r
5021 tok->u.anchor = ANCHOR_WORD_END;\r
5022 break;\r
5023#endif\r
5024\r
5025 case 's':\r
5026 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
5027 tok->type = TK_CHAR_TYPE;\r
5028 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
5029 tok->u.prop.not = 0;\r
5030 break;\r
5031\r
5032 case 'S':\r
5033 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
5034 tok->type = TK_CHAR_TYPE;\r
5035 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
5036 tok->u.prop.not = 1;\r
5037 break;\r
5038\r
5039 case 'd':\r
5040 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
5041 tok->type = TK_CHAR_TYPE;\r
5042 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
5043 tok->u.prop.not = 0;\r
5044 break;\r
5045\r
5046 case 'D':\r
5047 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
5048 tok->type = TK_CHAR_TYPE;\r
5049 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
5050 tok->u.prop.not = 1;\r
5051 break;\r
5052\r
5053 case 'h':\r
5054 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
5055 tok->type = TK_CHAR_TYPE;\r
5056 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
5057 tok->u.prop.not = 0;\r
5058 break;\r
5059\r
5060 case 'H':\r
5061 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
5062 tok->type = TK_CHAR_TYPE;\r
5063 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
5064 tok->u.prop.not = 1;\r
5065 break;\r
5066\r
b602265d
DG
5067 case 'K':\r
5068 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;\r
5069 tok->type = TK_KEEP;\r
5070 break;\r
5071\r
5072 case 'R':\r
5073 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;\r
5074 tok->type = TK_GENERAL_NEWLINE;\r
5075 break;\r
5076\r
5077 case 'N':\r
5078 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
5079 tok->type = TK_NO_NEWLINE;\r
5080 break;\r
5081\r
5082 case 'O':\r
5083 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
5084 tok->type = TK_TRUE_ANYCHAR;\r
5085 break;\r
5086\r
5087 case 'X':\r
5088 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5089 tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;\r
5090 break;\r
5091\r
14b0e578
CS
5092 case 'A':\r
5093 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5094 begin_buf:\r
5095 tok->type = TK_ANCHOR;\r
5096 tok->u.subtype = ANCHOR_BEGIN_BUF;\r
5097 break;\r
5098\r
5099 case 'Z':\r
5100 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5101 tok->type = TK_ANCHOR;\r
5102 tok->u.subtype = ANCHOR_SEMI_END_BUF;\r
5103 break;\r
5104\r
5105 case 'z':\r
5106 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5107 end_buf:\r
5108 tok->type = TK_ANCHOR;\r
5109 tok->u.subtype = ANCHOR_END_BUF;\r
5110 break;\r
5111\r
5112 case 'G':\r
5113 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r
5114 tok->type = TK_ANCHOR;\r
5115 tok->u.subtype = ANCHOR_BEGIN_POSITION;\r
5116 break;\r
5117\r
5118 case '`':\r
5119 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
5120 goto begin_buf;\r
5121 break;\r
5122\r
5123 case '\'':\r
5124 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
5125 goto end_buf;\r
5126 break;\r
5127\r
b602265d
DG
5128 case 'o':\r
5129 if (PEND) break;\r
5130\r
5131 prev = p;\r
5132 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
5133 PINC;\r
5134 num = scan_unsigned_octal_number(&p, end, 11, enc);\r
5135 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
5136 if (!PEND) {\r
5137 if (IS_CODE_DIGIT_ASCII(enc, PPEEK))\r
5138 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
5139 }\r
5140\r
5141 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
5142 PINC;\r
5143 tok->type = TK_CODE_POINT;\r
5144 tok->u.code = (OnigCodePoint )num;\r
5145 }\r
5146 else {\r
5147 /* can't read nothing or invalid format */\r
5148 p = prev;\r
5149 }\r
5150 }\r
5151 break;\r
5152\r
14b0e578
CS
5153 case 'x':\r
5154 if (PEND) break;\r
5155\r
5156 prev = p;\r
5157 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
b602265d
DG
5158 PINC;\r
5159 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
5160 if (num < 0) {\r
5161 if (num == ONIGERR_TOO_BIG_NUMBER)\r
5162 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
5163 else\r
5164 return num;\r
5165 }\r
5166 if (!PEND) {\r
5167 if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))\r
14b0e578
CS
5168 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
5169 }\r
5170\r
b602265d
DG
5171 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
5172 PINC;\r
5173 tok->type = TK_CODE_POINT;\r
5174 tok->u.code = (OnigCodePoint )num;\r
5175 }\r
5176 else {\r
5177 /* can't read nothing or invalid format */\r
5178 p = prev;\r
5179 }\r
14b0e578
CS
5180 }\r
5181 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
b602265d
DG
5182 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
5183 if (num < 0) return num;\r
5184 if (p == prev) { /* can't read nothing. */\r
5185 num = 0; /* but, it's not error */\r
5186 }\r
5187 tok->type = TK_RAW_BYTE;\r
5188 tok->base = 16;\r
5189 tok->u.c = num;\r
14b0e578
CS
5190 }\r
5191 break;\r
5192\r
5193 case 'u':\r
5194 if (PEND) break;\r
5195\r
5196 prev = p;\r
5197 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
b602265d
DG
5198 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
5199 if (num < 0) return num;\r
5200 if (p == prev) { /* can't read nothing. */\r
5201 num = 0; /* but, it's not error */\r
5202 }\r
5203 tok->type = TK_CODE_POINT;\r
5204 tok->base = 16;\r
5205 tok->u.code = (OnigCodePoint )num;\r
14b0e578
CS
5206 }\r
5207 break;\r
5208\r
5209 case '1': case '2': case '3': case '4':\r
5210 case '5': case '6': case '7': case '8': case '9':\r
5211 PUNFETCH;\r
5212 prev = p;\r
5213 num = onig_scan_unsigned_number(&p, end, enc);\r
5214 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r
5215 goto skip_backref;\r
5216 }\r
5217\r
5218 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r
b602265d
DG
5219 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r
5220 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5221 if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))\r
5222 return ONIGERR_INVALID_BACKREF;\r
5223 }\r
5224\r
5225 tok->type = TK_BACKREF;\r
5226 tok->u.backref.num = 1;\r
5227 tok->u.backref.ref1 = num;\r
5228 tok->u.backref.by_name = 0;\r
14b0e578 5229#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d 5230 tok->u.backref.exist_level = 0;\r
14b0e578 5231#endif\r
b602265d 5232 break;\r
14b0e578
CS
5233 }\r
5234\r
5235 skip_backref:\r
5236 if (c == '8' || c == '9') {\r
b602265d
DG
5237 /* normal char */\r
5238 p = prev; PINC;\r
5239 break;\r
14b0e578
CS
5240 }\r
5241\r
5242 p = prev;\r
5243 /* fall through */\r
5244 case '0':\r
5245 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
b602265d
DG
5246 prev = p;\r
5247 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r
5248 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
5249 if (p == prev) { /* can't read nothing. */\r
5250 num = 0; /* but, it's not error */\r
5251 }\r
5252 tok->type = TK_RAW_BYTE;\r
5253 tok->base = 8;\r
5254 tok->u.c = num;\r
14b0e578
CS
5255 }\r
5256 else if (c != '0') {\r
b602265d 5257 PINC;\r
14b0e578
CS
5258 }\r
5259 break;\r
5260\r
14b0e578 5261 case 'k':\r
b602265d
DG
5262 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r
5263 PFETCH(c);\r
5264 if (c == '<' || c == '\'') {\r
5265 UChar* name_end;\r
5266 int* backs;\r
5267 int back_num;\r
5268 enum REF_NUM num_type;\r
14b0e578 5269\r
b602265d 5270 prev = p;\r
14b0e578
CS
5271\r
5272#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
5273 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
5274 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r
5275 env, &back_num, &tok->u.backref.level, &num_type);\r
5276 if (r == 1) tok->u.backref.exist_level = 1;\r
5277 else tok->u.backref.exist_level = 0;\r
14b0e578 5278#else\r
b602265d 5279 r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);\r
14b0e578 5280#endif\r
b602265d
DG
5281 if (r < 0) return r;\r
5282\r
5283 if (num_type != IS_NOT_NUM) {\r
5284 if (num_type == IS_REL_NUM) {\r
5285 back_num = backref_rel_to_abs(back_num, env);\r
5286 }\r
5287 if (back_num <= 0)\r
5288 return ONIGERR_INVALID_BACKREF;\r
5289\r
5290 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5291 if (back_num > env->num_mem ||\r
5292 IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
5293 return ONIGERR_INVALID_BACKREF;\r
5294 }\r
5295 tok->type = TK_BACKREF;\r
5296 tok->u.backref.by_name = 0;\r
5297 tok->u.backref.num = 1;\r
5298 tok->u.backref.ref1 = back_num;\r
5299 }\r
5300 else {\r
5301 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
5302 if (num <= 0) {\r
5303 onig_scan_env_set_error_string(env,\r
5304 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
5305 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
5306 }\r
5307 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5308 int i;\r
5309 for (i = 0; i < num; i++) {\r
5310 if (backs[i] > env->num_mem ||\r
5311 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
5312 return ONIGERR_INVALID_BACKREF;\r
5313 }\r
5314 }\r
5315\r
5316 tok->type = TK_BACKREF;\r
5317 tok->u.backref.by_name = 1;\r
5318 if (num == 1) {\r
5319 tok->u.backref.num = 1;\r
5320 tok->u.backref.ref1 = backs[0];\r
5321 }\r
5322 else {\r
5323 tok->u.backref.num = num;\r
5324 tok->u.backref.refs = backs;\r
5325 }\r
5326 }\r
5327 }\r
5328 else\r
5329 PUNFETCH;\r
14b0e578
CS
5330 }\r
5331 break;\r
14b0e578 5332\r
b602265d 5333#ifdef USE_CALL\r
14b0e578 5334 case 'g':\r
b602265d
DG
5335 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r
5336 PFETCH(c);\r
5337 if (c == '<' || c == '\'') {\r
5338 int gnum;\r
5339 UChar* name_end;\r
5340 enum REF_NUM num_type;\r
5341\r
5342 prev = p;\r
5343 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,\r
5344 &gnum, &num_type, 1);\r
5345 if (r < 0) return r;\r
5346\r
5347 if (num_type != IS_NOT_NUM) {\r
5348 if (num_type == IS_REL_NUM) {\r
5349 gnum = backref_rel_to_abs(gnum, env);\r
5350 if (gnum < 0) {\r
5351 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
5352 prev, name_end);\r
5353 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
5354 }\r
5355 }\r
5356 tok->u.call.by_number = 1;\r
5357 tok->u.call.gnum = gnum;\r
5358 }\r
5359 else {\r
5360 tok->u.call.by_number = 0;\r
5361 tok->u.call.gnum = 0;\r
5362 }\r
5363\r
5364 tok->type = TK_CALL;\r
5365 tok->u.call.name = prev;\r
5366 tok->u.call.name_end = name_end;\r
5367 }\r
5368 else\r
5369 PUNFETCH;\r
14b0e578
CS
5370 }\r
5371 break;\r
5372#endif\r
5373\r
5374 case 'Q':\r
5375 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r
b602265d 5376 tok->type = TK_QUOTE_OPEN;\r
14b0e578
CS
5377 }\r
5378 break;\r
5379\r
5380 case 'p':\r
5381 case 'P':\r
b602265d
DG
5382 if (!PEND && PPEEK_IS('{') &&\r
5383 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
5384 PINC;\r
5385 tok->type = TK_CHAR_PROPERTY;\r
5386 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
5387\r
5388 if (!PEND &&\r
5389 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
5390 PFETCH(c);\r
5391 if (c == '^') {\r
5392 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
5393 }\r
5394 else\r
5395 PUNFETCH;\r
5396 }\r
14b0e578
CS
5397 }\r
5398 break;\r
5399\r
5400 default:\r
b602265d
DG
5401 {\r
5402 OnigCodePoint c2;\r
5403\r
5404 PUNFETCH;\r
5405 num = fetch_escaped_value(&p, end, env, &c2);\r
5406 if (num < 0) return num;\r
5407 /* set_raw: */\r
5408 if (tok->u.c != c2) {\r
5409 tok->type = TK_CODE_POINT;\r
5410 tok->u.code = c2;\r
5411 }\r
5412 else { /* string */\r
5413 p = tok->backp + enclen(enc, tok->backp);\r
5414 }\r
14b0e578
CS
5415 }\r
5416 break;\r
5417 }\r
5418 }\r
5419 else {\r
5420 tok->u.c = c;\r
5421 tok->escaped = 0;\r
5422\r
5423#ifdef USE_VARIABLE_META_CHARS\r
5424 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r
b602265d 5425 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r
14b0e578 5426 if (c == MC_ANYCHAR(syn))\r
b602265d 5427 goto any_char;\r
14b0e578 5428 else if (c == MC_ANYTIME(syn))\r
b602265d 5429 goto anytime;\r
14b0e578 5430 else if (c == MC_ZERO_OR_ONE_TIME(syn))\r
b602265d 5431 goto zero_or_one_time;\r
14b0e578 5432 else if (c == MC_ONE_OR_MORE_TIME(syn))\r
b602265d 5433 goto one_or_more_time;\r
14b0e578 5434 else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r
b602265d
DG
5435 tok->type = TK_ANYCHAR_ANYTIME;\r
5436 goto out;\r
14b0e578
CS
5437 }\r
5438 }\r
5439#endif\r
5440\r
5441 switch (c) {\r
5442 case '.':\r
5443 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r
5444#ifdef USE_VARIABLE_META_CHARS\r
5445 any_char:\r
5446#endif\r
5447 tok->type = TK_ANYCHAR;\r
5448 break;\r
5449\r
5450 case '*':\r
5451 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r
5452#ifdef USE_VARIABLE_META_CHARS\r
5453 anytime:\r
5454#endif\r
5455 tok->type = TK_OP_REPEAT;\r
5456 tok->u.repeat.lower = 0;\r
5457 tok->u.repeat.upper = REPEAT_INFINITE;\r
5458 goto greedy_check;\r
5459 break;\r
5460\r
5461 case '+':\r
5462 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r
5463#ifdef USE_VARIABLE_META_CHARS\r
5464 one_or_more_time:\r
5465#endif\r
5466 tok->type = TK_OP_REPEAT;\r
5467 tok->u.repeat.lower = 1;\r
5468 tok->u.repeat.upper = REPEAT_INFINITE;\r
5469 goto greedy_check;\r
5470 break;\r
5471\r
5472 case '?':\r
5473 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r
5474#ifdef USE_VARIABLE_META_CHARS\r
5475 zero_or_one_time:\r
5476#endif\r
5477 tok->type = TK_OP_REPEAT;\r
5478 tok->u.repeat.lower = 0;\r
5479 tok->u.repeat.upper = 1;\r
5480 goto greedy_check;\r
5481 break;\r
5482\r
5483 case '{':\r
5484 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r
5485 r = fetch_range_quantifier(&p, end, tok, env);\r
5486 if (r < 0) return r; /* error */\r
5487 if (r == 0) goto greedy_check;\r
5488 else if (r == 2) { /* {n} */\r
b602265d
DG
5489 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
5490 goto possessive_check;\r
14b0e578 5491\r
b602265d 5492 goto greedy_check;\r
14b0e578
CS
5493 }\r
5494 /* r == 1 : normal char */\r
5495 break;\r
5496\r
5497 case '|':\r
5498 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r
5499 tok->type = TK_ALT;\r
5500 break;\r
5501\r
5502 case '(':\r
b602265d 5503 if (!PEND && PPEEK_IS('?') &&\r
14b0e578
CS
5504 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
5505 PINC;\r
b602265d
DG
5506 if (! PEND) {\r
5507 c = PPEEK;\r
5508 if (c == '#') {\r
14b0e578 5509 PFETCH(c);\r
b602265d
DG
5510 while (1) {\r
5511 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
5512 PFETCH(c);\r
5513 if (c == MC_ESC(syn)) {\r
5514 if (! PEND) PFETCH(c);\r
5515 }\r
5516 else {\r
5517 if (c == ')') break;\r
5518 }\r
14b0e578 5519 }\r
b602265d
DG
5520 goto start;\r
5521 }\r
5522 else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {\r
5523 int gnum;\r
5524 UChar* name;\r
5525 UChar* name_end;\r
5526 enum REF_NUM num_type;\r
5527\r
5528 switch (c) {\r
5529 case '&':\r
5530 {\r
5531 PINC;\r
5532 name = p;\r
5533 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,\r
5534 &num_type, 0);\r
5535 if (r < 0) return r;\r
5536\r
5537 tok->type = TK_CALL;\r
5538 tok->u.call.by_number = 0;\r
5539 tok->u.call.gnum = 0;\r
5540 tok->u.call.name = name;\r
5541 tok->u.call.name_end = name_end;\r
5542 }\r
5543 break;\r
5544\r
5545 case 'R':\r
5546 tok->type = TK_CALL;\r
5547 tok->u.call.by_number = 1;\r
5548 tok->u.call.gnum = 0;\r
5549 tok->u.call.name = p;\r
5550 PINC;\r
5551 if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;\r
5552 tok->u.call.name_end = p;\r
5553 break;\r
5554\r
5555 case '-':\r
5556 case '+':\r
5557 goto lparen_qmark_num;\r
5558 break;\r
5559 default:\r
5560 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;\r
5561\r
5562 lparen_qmark_num:\r
5563 {\r
5564 name = p;\r
5565 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,\r
5566 &gnum, &num_type, 1);\r
5567 if (r < 0) return r;\r
5568\r
5569 if (num_type == IS_NOT_NUM) {\r
5570 return ONIGERR_INVALID_GROUP_NAME;\r
5571 }\r
5572 else {\r
5573 if (num_type == IS_REL_NUM) {\r
5574 gnum = backref_rel_to_abs(gnum, env);\r
5575 if (gnum < 0) {\r
5576 onig_scan_env_set_error_string(env,\r
5577 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);\r
5578 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
5579 }\r
5580 }\r
5581 tok->u.call.by_number = 1;\r
5582 tok->u.call.gnum = gnum;\r
5583 }\r
5584\r
5585 tok->type = TK_CALL;\r
5586 tok->u.call.name = name;\r
5587 tok->u.call.name_end = name_end;\r
5588 }\r
5589 break;\r
14b0e578
CS
5590 }\r
5591 }\r
14b0e578 5592 }\r
b602265d 5593 lparen_qmark_end:\r
14b0e578
CS
5594 PUNFETCH;\r
5595 }\r
5596\r
5597 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
5598 tok->type = TK_SUBEXP_OPEN;\r
5599 break;\r
5600\r
5601 case ')':\r
5602 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
5603 tok->type = TK_SUBEXP_CLOSE;\r
5604 break;\r
5605\r
5606 case '^':\r
5607 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
5608 tok->type = TK_ANCHOR;\r
b602265d
DG
5609 tok->u.subtype = (IS_SINGLELINE(env->options)\r
5610 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r
14b0e578
CS
5611 break;\r
5612\r
5613 case '$':\r
5614 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
5615 tok->type = TK_ANCHOR;\r
b602265d
DG
5616 tok->u.subtype = (IS_SINGLELINE(env->options)\r
5617 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r
14b0e578
CS
5618 break;\r
5619\r
5620 case '[':\r
5621 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r
5622 tok->type = TK_CC_OPEN;\r
5623 break;\r
5624\r
5625 case ']':\r
5626 if (*src > env->pattern) /* /].../ is allowed. */\r
b602265d 5627 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r
14b0e578
CS
5628 break;\r
5629\r
5630 case '#':\r
b602265d
DG
5631 if (IS_EXTEND(env->options)) {\r
5632 while (!PEND) {\r
5633 PFETCH(c);\r
5634 if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r
5635 break;\r
5636 }\r
5637 goto start;\r
5638 break;\r
14b0e578
CS
5639 }\r
5640 break;\r
5641\r
5642 case ' ': case '\t': case '\n': case '\r': case '\f':\r
b602265d
DG
5643 if (IS_EXTEND(env->options))\r
5644 goto start;\r
14b0e578
CS
5645 break;\r
5646\r
5647 default:\r
5648 /* string */\r
5649 break;\r
5650 }\r
5651 }\r
5652\r
5653#ifdef USE_VARIABLE_META_CHARS\r
5654 out:\r
5655#endif\r
5656 *src = p;\r
5657 return tok->type;\r
5658}\r
5659\r
5660static int\r
5661add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
b602265d
DG
5662 OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,\r
5663 const OnigCodePoint mbr[])\r
14b0e578
CS
5664{\r
5665 int i, r;\r
5666 OnigCodePoint j;\r
5667\r
5668 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
5669\r
5670 if (not == 0) {\r
5671 for (i = 0; i < n; i++) {\r
5672 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5673 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
b602265d
DG
5674 if (j >= sb_out) {\r
5675 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
5676 r = add_code_range_to_buf(&(cc->mbuf), j,\r
5677 ONIGENC_CODE_RANGE_TO(mbr, i));\r
5678 if (r != 0) return r;\r
5679 i++;\r
5680 }\r
5681\r
5682 goto sb_end;\r
5683 }\r
14b0e578
CS
5684 BITSET_SET_BIT(cc->bs, j);\r
5685 }\r
5686 }\r
5687\r
5688 sb_end:\r
5689 for ( ; i < n; i++) {\r
5690 r = add_code_range_to_buf(&(cc->mbuf),\r
5691 ONIGENC_CODE_RANGE_FROM(mbr, i),\r
5692 ONIGENC_CODE_RANGE_TO(mbr, i));\r
5693 if (r != 0) return r;\r
5694 }\r
5695 }\r
5696 else {\r
5697 OnigCodePoint prev = 0;\r
5698\r
5699 for (i = 0; i < n; i++) {\r
b602265d
DG
5700 for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r
5701 if (j >= sb_out) {\r
5702 goto sb_end2;\r
5703 }\r
5704 BITSET_SET_BIT(cc->bs, j);\r
14b0e578
CS
5705 }\r
5706 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
5707 }\r
5708 for (j = prev; j < sb_out; j++) {\r
5709 BITSET_SET_BIT(cc->bs, j);\r
5710 }\r
5711\r
5712 sb_end2:\r
5713 prev = sb_out;\r
5714\r
5715 for (i = 0; i < n; i++) {\r
5716 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
b602265d 5717 r = add_code_range_to_buf(&(cc->mbuf), prev,\r
14b0e578 5718 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r
b602265d 5719 if (r != 0) return r;\r
14b0e578
CS
5720 }\r
5721 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
b602265d
DG
5722 if (prev == 0) goto end;\r
5723 }\r
5724\r
5725 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
5726 if (r != 0) return r;\r
5727 }\r
5728\r
5729 end:\r
5730 return 0;\r
5731}\r
5732\r
5733static int\r
5734add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
5735 OnigEncoding enc ARG_UNUSED,\r
5736 OnigCodePoint sb_out,\r
5737 const OnigCodePoint mbr[], OnigCodePoint limit)\r
5738{\r
5739 int i, r;\r
5740 OnigCodePoint j;\r
5741 OnigCodePoint from;\r
5742 OnigCodePoint to;\r
5743\r
5744 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
5745\r
5746 if (not == 0) {\r
5747 for (i = 0; i < n; i++) {\r
5748 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5749 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
5750 if (j > limit) goto end;\r
5751 if (j >= sb_out) {\r
5752 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
5753 to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5754 if (to > limit) to = limit;\r
5755 r = add_code_range_to_buf(&(cc->mbuf), j, to);\r
5756 if (r != 0) return r;\r
5757 i++;\r
5758 }\r
5759\r
5760 goto sb_end;\r
5761 }\r
5762 BITSET_SET_BIT(cc->bs, j);\r
5763 }\r
14b0e578 5764 }\r
b602265d
DG
5765\r
5766 sb_end:\r
5767 for ( ; i < n; i++) {\r
5768 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5769 to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5770 if (from > limit) break;\r
5771 if (to > limit) to = limit;\r
5772 r = add_code_range_to_buf(&(cc->mbuf), from, to);\r
14b0e578
CS
5773 if (r != 0) return r;\r
5774 }\r
5775 }\r
b602265d
DG
5776 else {\r
5777 OnigCodePoint prev = 0;\r
5778\r
5779 for (i = 0; i < n; i++) {\r
5780 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5781 if (from > limit) {\r
5782 for (j = prev; j < sb_out; j++) {\r
5783 BITSET_SET_BIT(cc->bs, j);\r
5784 }\r
5785 goto sb_end2;\r
5786 }\r
5787 for (j = prev; j < from; j++) {\r
5788 if (j >= sb_out) goto sb_end2;\r
5789 BITSET_SET_BIT(cc->bs, j);\r
5790 }\r
5791 prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5792 if (prev > limit) prev = limit;\r
5793 prev++;\r
5794 if (prev == 0) goto end;\r
5795 }\r
5796 for (j = prev; j < sb_out; j++) {\r
5797 BITSET_SET_BIT(cc->bs, j);\r
5798 }\r
5799\r
5800 sb_end2:\r
5801 prev = sb_out;\r
5802\r
5803 for (i = 0; i < n; i++) {\r
5804 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5805 if (from > limit) goto last;\r
5806\r
5807 if (prev < from) {\r
5808 r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);\r
5809 if (r != 0) return r;\r
5810 }\r
5811 prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5812 if (prev > limit) prev = limit;\r
5813 prev++;\r
5814 if (prev == 0) goto end;\r
5815 }\r
5816\r
5817 last:\r
5818 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
5819 if (r != 0) return r;\r
5820 }\r
14b0e578 5821\r
b602265d 5822 end:\r
14b0e578
CS
5823 return 0;\r
5824}\r
5825\r
5826static int\r
5827add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r
5828{\r
b602265d
DG
5829#define ASCII_LIMIT 127\r
5830\r
14b0e578 5831 int c, r;\r
b602265d 5832 int ascii_mode;\r
14b0e578 5833 const OnigCodePoint *ranges;\r
b602265d 5834 OnigCodePoint limit;\r
14b0e578
CS
5835 OnigCodePoint sb_out;\r
5836 OnigEncoding enc = env->enc;\r
5837\r
b602265d
DG
5838 ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);\r
5839\r
14b0e578
CS
5840 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r
5841 if (r == 0) {\r
b602265d
DG
5842 if (ascii_mode == 0)\r
5843 r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r
5844 else\r
5845 r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,\r
5846 ranges, ASCII_LIMIT);\r
5847 return r;\r
14b0e578
CS
5848 }\r
5849 else if (r != ONIG_NO_SUPPORT_CONFIG) {\r
5850 return r;\r
5851 }\r
5852\r
5853 r = 0;\r
b602265d
DG
5854 limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;\r
5855\r
14b0e578
CS
5856 switch (ctype) {\r
5857 case ONIGENC_CTYPE_ALPHA:\r
5858 case ONIGENC_CTYPE_BLANK:\r
5859 case ONIGENC_CTYPE_CNTRL:\r
5860 case ONIGENC_CTYPE_DIGIT:\r
5861 case ONIGENC_CTYPE_LOWER:\r
5862 case ONIGENC_CTYPE_PUNCT:\r
5863 case ONIGENC_CTYPE_SPACE:\r
5864 case ONIGENC_CTYPE_UPPER:\r
5865 case ONIGENC_CTYPE_XDIGIT:\r
5866 case ONIGENC_CTYPE_ASCII:\r
5867 case ONIGENC_CTYPE_ALNUM:\r
5868 if (not != 0) {\r
b602265d
DG
5869 for (c = 0; c < (int )limit; c++) {\r
5870 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5871 BITSET_SET_BIT(cc->bs, c);\r
5872 }\r
5873 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
5874 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5875 }\r
b602265d 5876\r
14b0e578
CS
5877 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
5878 }\r
5879 else {\r
b602265d
DG
5880 for (c = 0; c < (int )limit; c++) {\r
5881 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5882 BITSET_SET_BIT(cc->bs, c);\r
14b0e578
CS
5883 }\r
5884 }\r
5885 break;\r
5886\r
5887 case ONIGENC_CTYPE_GRAPH:\r
5888 case ONIGENC_CTYPE_PRINT:\r
b602265d 5889 case ONIGENC_CTYPE_WORD:\r
14b0e578 5890 if (not != 0) {\r
b602265d
DG
5891 for (c = 0; c < (int )limit; c++) {\r
5892 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */\r
5893 && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5894 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5895 }\r
b602265d
DG
5896 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
5897 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)\r
5898 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5899 }\r
14b0e578
CS
5900 }\r
5901 else {\r
b602265d
DG
5902 for (c = 0; c < (int )limit; c++) {\r
5903 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5904 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5905 }\r
b602265d
DG
5906 if (ascii_mode == 0)\r
5907 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
14b0e578
CS
5908 }\r
5909 break;\r
5910\r
5911 default:\r
5912 return ONIGERR_PARSER_BUG;\r
5913 break;\r
5914 }\r
5915\r
5916 return r;\r
5917}\r
5918\r
5919static int\r
5920parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r
5921{\r
5922#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r
5923#define POSIX_BRACKET_NAME_MIN_LEN 4\r
5924\r
5925 static PosixBracketEntryType PBS[] = {\r
5926 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r
5927 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r
5928 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r
5929 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r
5930 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r
5931 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r
5932 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r
5933 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r
5934 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r
5935 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r
5936 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r
5937 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r
5938 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r
5939 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r
5940 { (UChar* )NULL, -1, 0 }\r
5941 };\r
5942\r
5943 PosixBracketEntryType *pb;\r
5944 int not, i, r;\r
5945 OnigCodePoint c;\r
5946 OnigEncoding enc = env->enc;\r
5947 UChar *p = *src;\r
5948\r
5949 if (PPEEK_IS('^')) {\r
5950 PINC_S;\r
5951 not = 1;\r
5952 }\r
5953 else\r
5954 not = 0;\r
5955\r
5956 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r
5957 goto not_posix_bracket;\r
5958\r
5959 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r
5960 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r
5961 p = (UChar* )onigenc_step(enc, p, end, pb->len);\r
5962 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r
5963 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
5964\r
5965 r = add_ctype_to_cc(cc, pb->ctype, not, env);\r
5966 if (r != 0) return r;\r
5967\r
5968 PINC_S; PINC_S;\r
5969 *src = p;\r
5970 return 0;\r
5971 }\r
5972 }\r
5973\r
5974 not_posix_bracket:\r
5975 c = 0;\r
5976 i = 0;\r
5977 while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r
5978 PINC_S;\r
5979 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r
5980 }\r
5981 if (c == ':' && ! PEND) {\r
5982 PINC_S;\r
5983 if (! PEND) {\r
5984 PFETCH_S(c);\r
5985 if (c == ']')\r
5986 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
5987 }\r
5988 }\r
5989\r
5990 return 1; /* 1: is not POSIX bracket, but no error. */\r
5991}\r
5992\r
5993static int\r
5994fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r
5995{\r
5996 int r;\r
5997 OnigCodePoint c;\r
5998 OnigEncoding enc = env->enc;\r
5999 UChar *prev, *start, *p = *src;\r
6000\r
6001 r = 0;\r
6002 start = prev = p;\r
6003\r
6004 while (!PEND) {\r
6005 prev = p;\r
6006 PFETCH_S(c);\r
6007 if (c == '}') {\r
6008 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r
6009 if (r < 0) break;\r
6010\r
6011 *src = p;\r
6012 return r;\r
6013 }\r
6014 else if (c == '(' || c == ')' || c == '{' || c == '|') {\r
6015 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
6016 break;\r
6017 }\r
6018 }\r
6019\r
6020 onig_scan_env_set_error_string(env, r, *src, prev);\r
6021 return r;\r
6022}\r
6023\r
6024static int\r
b602265d 6025parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
14b0e578
CS
6026{\r
6027 int r, ctype;\r
6028 CClassNode* cc;\r
6029\r
6030 ctype = fetch_char_property_to_ctype(src, end, env);\r
6031 if (ctype < 0) return ctype;\r
6032\r
6033 *np = node_new_cclass();\r
6034 CHECK_NULL_RETURN_MEMERR(*np);\r
b602265d 6035 cc = CCLASS_(*np);\r
14b0e578
CS
6036 r = add_ctype_to_cc(cc, ctype, 0, env);\r
6037 if (r != 0) return r;\r
6038 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
6039\r
6040 return 0;\r
6041}\r
6042\r
6043\r
6044enum CCSTATE {\r
6045 CCS_VALUE,\r
6046 CCS_RANGE,\r
6047 CCS_COMPLETE,\r
6048 CCS_START\r
6049};\r
6050\r
6051enum CCVALTYPE {\r
6052 CCV_SB,\r
6053 CCV_CODE_POINT,\r
6054 CCV_CLASS\r
6055};\r
6056\r
6057static int\r
6058next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r
b602265d 6059 enum CCSTATE* state, ScanEnv* env)\r
14b0e578
CS
6060{\r
6061 int r;\r
6062\r
6063 if (*state == CCS_RANGE)\r
6064 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r
6065\r
6066 if (*state == CCS_VALUE && *type != CCV_CLASS) {\r
6067 if (*type == CCV_SB)\r
6068 BITSET_SET_BIT(cc->bs, (int )(*vs));\r
6069 else if (*type == CCV_CODE_POINT) {\r
6070 r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
6071 if (r < 0) return r;\r
6072 }\r
6073 }\r
6074\r
6075 *state = CCS_VALUE;\r
6076 *type = CCV_CLASS;\r
6077 return 0;\r
6078}\r
6079\r
6080static int\r
b602265d
DG
6081next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,\r
6082 int* from_israw, int to_israw,\r
6083 enum CCVALTYPE intype, enum CCVALTYPE* type,\r
6084 enum CCSTATE* state, ScanEnv* env)\r
14b0e578
CS
6085{\r
6086 int r;\r
6087\r
6088 switch (*state) {\r
6089 case CCS_VALUE:\r
b602265d
DG
6090 if (*type == CCV_SB) {\r
6091 if (*from > 0xff)\r
6092 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
6093\r
6094 BITSET_SET_BIT(cc->bs, (int )(*from));\r
6095 }\r
14b0e578 6096 else if (*type == CCV_CODE_POINT) {\r
b602265d 6097 r = add_code_range(&(cc->mbuf), env, *from, *from);\r
14b0e578
CS
6098 if (r < 0) return r;\r
6099 }\r
6100 break;\r
6101\r
6102 case CCS_RANGE:\r
6103 if (intype == *type) {\r
6104 if (intype == CCV_SB) {\r
b602265d 6105 if (*from > 0xff || to > 0xff)\r
14b0e578
CS
6106 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
6107\r
b602265d
DG
6108 if (*from > to) {\r
6109 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
6110 goto ccs_range_end;\r
6111 else\r
6112 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
6113 }\r
6114 bitset_set_range(cc->bs, (int )*from, (int )to);\r
14b0e578
CS
6115 }\r
6116 else {\r
b602265d
DG
6117 r = add_code_range(&(cc->mbuf), env, *from, to);\r
6118 if (r < 0) return r;\r
14b0e578
CS
6119 }\r
6120 }\r
6121 else {\r
b602265d
DG
6122 if (*from > to) {\r
6123 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
6124 goto ccs_range_end;\r
6125 else\r
6126 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
14b0e578 6127 }\r
b602265d
DG
6128 bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));\r
6129 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);\r
6130 if (r < 0) return r;\r
14b0e578
CS
6131 }\r
6132 ccs_range_end:\r
6133 *state = CCS_COMPLETE;\r
6134 break;\r
6135\r
6136 case CCS_COMPLETE:\r
6137 case CCS_START:\r
6138 *state = CCS_VALUE;\r
6139 break;\r
6140\r
6141 default:\r
6142 break;\r
6143 }\r
6144\r
b602265d
DG
6145 *from_israw = to_israw;\r
6146 *from = to;\r
6147 *type = intype;\r
14b0e578
CS
6148 return 0;\r
6149}\r
6150\r
6151static int\r
6152code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r
b602265d 6153 ScanEnv* env)\r
14b0e578
CS
6154{\r
6155 int in_esc;\r
6156 OnigCodePoint code;\r
6157 OnigEncoding enc = env->enc;\r
6158 UChar* p = from;\r
6159\r
6160 in_esc = 0;\r
6161 while (! PEND) {\r
6162 if (ignore_escaped && in_esc) {\r
6163 in_esc = 0;\r
6164 }\r
6165 else {\r
6166 PFETCH_S(code);\r
6167 if (code == c) return 1;\r
6168 if (code == MC_ESC(env->syntax)) in_esc = 1;\r
6169 }\r
6170 }\r
6171 return 0;\r
6172}\r
6173\r
6174static int\r
b602265d 6175parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
14b0e578
CS
6176{\r
6177 int r, neg, len, fetched, and_start;\r
6178 OnigCodePoint v, vs;\r
6179 UChar *p;\r
6180 Node* node;\r
6181 CClassNode *cc, *prev_cc;\r
6182 CClassNode work_cc;\r
6183\r
6184 enum CCSTATE state;\r
6185 enum CCVALTYPE val_type, in_type;\r
6186 int val_israw, in_israw;\r
6187\r
14b0e578 6188 *np = NULL_NODE;\r
b602265d
DG
6189 env->parse_depth++;\r
6190 if (env->parse_depth > ParseDepthLimit)\r
6191 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
6192 prev_cc = (CClassNode* )NULL;\r
14b0e578
CS
6193 r = fetch_token_in_cc(tok, src, end, env);\r
6194 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r
6195 neg = 1;\r
6196 r = fetch_token_in_cc(tok, src, end, env);\r
6197 }\r
6198 else {\r
6199 neg = 0;\r
6200 }\r
6201\r
6202 if (r < 0) return r;\r
6203 if (r == TK_CC_CLOSE) {\r
6204 if (! code_exist_check((OnigCodePoint )']',\r
6205 *src, env->pattern_end, 1, env))\r
6206 return ONIGERR_EMPTY_CHAR_CLASS;\r
6207\r
6208 CC_ESC_WARN(env, (UChar* )"]");\r
6209 r = tok->type = TK_CHAR; /* allow []...] */\r
6210 }\r
6211\r
6212 *np = node = node_new_cclass();\r
6213 CHECK_NULL_RETURN_MEMERR(node);\r
b602265d 6214 cc = CCLASS_(node);\r
14b0e578
CS
6215\r
6216 and_start = 0;\r
6217 state = CCS_START;\r
6218 p = *src;\r
6219 while (r != TK_CC_CLOSE) {\r
6220 fetched = 0;\r
6221 switch (r) {\r
6222 case TK_CHAR:\r
b602265d 6223 any_char_in:\r
14b0e578
CS
6224 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r
6225 if (len > 1) {\r
b602265d 6226 in_type = CCV_CODE_POINT;\r
14b0e578
CS
6227 }\r
6228 else if (len < 0) {\r
b602265d
DG
6229 r = len;\r
6230 goto err;\r
14b0e578
CS
6231 }\r
6232 else {\r
b602265d
DG
6233 /* sb_char: */\r
6234 in_type = CCV_SB;\r
14b0e578
CS
6235 }\r
6236 v = (OnigCodePoint )tok->u.c;\r
6237 in_israw = 0;\r
6238 goto val_entry2;\r
6239 break;\r
6240\r
6241 case TK_RAW_BYTE:\r
6242 /* tok->base != 0 : octal or hexadec. */\r
6243 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r
b602265d
DG
6244 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
6245 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r
6246 UChar* psave = p;\r
6247 int i, base = tok->base;\r
6248\r
6249 buf[0] = tok->u.c;\r
6250 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r
6251 r = fetch_token_in_cc(tok, &p, end, env);\r
6252 if (r < 0) goto err;\r
6253 if (r != TK_RAW_BYTE || tok->base != base) {\r
6254 fetched = 1;\r
6255 break;\r
6256 }\r
6257 buf[i] = tok->u.c;\r
6258 }\r
6259\r
6260 if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r
6261 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
6262 goto err;\r
6263 }\r
6264\r
6265 len = enclen(env->enc, buf);\r
6266 if (i < len) {\r
6267 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
6268 goto err;\r
6269 }\r
6270 else if (i > len) { /* fetch back */\r
6271 p = psave;\r
6272 for (i = 1; i < len; i++) {\r
6273 r = fetch_token_in_cc(tok, &p, end, env);\r
6274 }\r
6275 fetched = 0;\r
6276 }\r
6277\r
6278 if (i == 1) {\r
6279 v = (OnigCodePoint )buf[0];\r
6280 goto raw_single;\r
6281 }\r
6282 else {\r
6283 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r
6284 in_type = CCV_CODE_POINT;\r
6285 }\r
6286 }\r
6287 else {\r
6288 v = (OnigCodePoint )tok->u.c;\r
6289 raw_single:\r
6290 in_type = CCV_SB;\r
6291 }\r
6292 in_israw = 1;\r
6293 goto val_entry2;\r
6294 break;\r
6295\r
6296 case TK_CODE_POINT:\r
6297 v = tok->u.code;\r
6298 in_israw = 1;\r
6299 val_entry:\r
6300 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r
6301 if (len < 0) {\r
6302 r = len;\r
6303 goto err;\r
6304 }\r
6305 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r
6306 val_entry2:\r
6307 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r
6308 &state, env);\r
6309 if (r != 0) goto err;\r
6310 break;\r
6311\r
6312 case TK_POSIX_BRACKET_OPEN:\r
6313 r = parse_posix_bracket(cc, &p, end, env);\r
6314 if (r < 0) goto err;\r
6315 if (r == 1) { /* is not POSIX bracket */\r
6316 CC_ESC_WARN(env, (UChar* )"[");\r
6317 p = tok->backp;\r
6318 v = (OnigCodePoint )tok->u.c;\r
6319 in_israw = 0;\r
6320 goto val_entry;\r
6321 }\r
6322 goto next_class;\r
6323 break;\r
6324\r
6325 case TK_CHAR_TYPE:\r
6326 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r
6327 if (r != 0) goto err;\r
6328\r
6329 next_class:\r
6330 r = next_state_class(cc, &vs, &val_type, &state, env);\r
6331 if (r != 0) goto err;\r
6332 break;\r
6333\r
6334 case TK_CHAR_PROPERTY:\r
6335 {\r
6336 int ctype = fetch_char_property_to_ctype(&p, end, env);\r
6337 if (ctype < 0) {\r
6338 r = ctype;\r
6339 goto err;\r
6340 }\r
6341 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r
6342 if (r != 0) goto err;\r
6343 goto next_class;\r
6344 }\r
6345 break;\r
6346\r
6347 case TK_CC_RANGE:\r
6348 if (state == CCS_VALUE) {\r
6349 r = fetch_token_in_cc(tok, &p, end, env);\r
6350 if (r < 0) goto err;\r
6351 fetched = 1;\r
6352 if (r == TK_CC_CLOSE) { /* allow [x-] */\r
6353 range_end_val:\r
6354 v = (OnigCodePoint )'-';\r
6355 in_israw = 0;\r
6356 goto val_entry;\r
6357 }\r
6358 else if (r == TK_CC_AND) {\r
6359 CC_ESC_WARN(env, (UChar* )"-");\r
6360 goto range_end_val;\r
6361 }\r
6362\r
6363 if (val_type == CCV_CLASS) {\r
6364 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
6365 goto err;\r
6366 }\r
6367\r
6368 state = CCS_RANGE;\r
6369 }\r
6370 else if (state == CCS_START) {\r
6371 /* [-xa] is allowed */\r
6372 v = (OnigCodePoint )tok->u.c;\r
6373 in_israw = 0;\r
6374\r
6375 r = fetch_token_in_cc(tok, &p, end, env);\r
6376 if (r < 0) goto err;\r
6377 fetched = 1;\r
6378 /* [--x] or [a&&-x] is warned. */\r
6379 if (r == TK_CC_RANGE || and_start != 0)\r
6380 CC_ESC_WARN(env, (UChar* )"-");\r
6381\r
6382 goto val_entry;\r
6383 }\r
6384 else if (state == CCS_RANGE) {\r
6385 CC_ESC_WARN(env, (UChar* )"-");\r
6386 goto any_char_in; /* [!--x] is allowed */\r
6387 }\r
6388 else { /* CCS_COMPLETE */\r
6389 r = fetch_token_in_cc(tok, &p, end, env);\r
6390 if (r < 0) goto err;\r
6391 fetched = 1;\r
6392 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r
6393 else if (r == TK_CC_AND) {\r
6394 CC_ESC_WARN(env, (UChar* )"-");\r
6395 goto range_end_val;\r
6396 }\r
6397\r
6398 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r
6399 CC_ESC_WARN(env, (UChar* )"-");\r
6400 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */\r
6401 }\r
6402 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
6403 goto err;\r
6404 }\r
6405 break;\r
6406\r
6407 case TK_CC_CC_OPEN: /* [ */\r
6408 {\r
6409 Node *anode;\r
6410 CClassNode* acc;\r
6411\r
6412 r = parse_char_class(&anode, tok, &p, end, env);\r
6413 if (r != 0) {\r
6414 onig_node_free(anode);\r
6415 goto cc_open_err;\r
6416 }\r
6417 acc = CCLASS_(anode);\r
6418 r = or_cclass(cc, acc, env->enc);\r
6419 onig_node_free(anode);\r
6420\r
6421 cc_open_err:\r
6422 if (r != 0) goto err;\r
6423 }\r
6424 break;\r
6425\r
6426 case TK_CC_AND: /* && */\r
6427 {\r
6428 if (state == CCS_VALUE) {\r
6429 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
6430 &val_type, &state, env);\r
6431 if (r != 0) goto err;\r
6432 }\r
6433 /* initialize local variables */\r
6434 and_start = 1;\r
6435 state = CCS_START;\r
6436\r
6437 if (IS_NOT_NULL(prev_cc)) {\r
6438 r = and_cclass(prev_cc, cc, env->enc);\r
6439 if (r != 0) goto err;\r
6440 bbuf_free(cc->mbuf);\r
6441 }\r
6442 else {\r
6443 prev_cc = cc;\r
6444 cc = &work_cc;\r
6445 }\r
6446 initialize_cclass(cc);\r
6447 }\r
6448 break;\r
6449\r
6450 case TK_EOT:\r
6451 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r
6452 goto err;\r
6453 break;\r
6454 default:\r
6455 r = ONIGERR_PARSER_BUG;\r
6456 goto err;\r
6457 break;\r
6458 }\r
6459\r
6460 if (fetched)\r
6461 r = tok->type;\r
6462 else {\r
6463 r = fetch_token_in_cc(tok, &p, end, env);\r
6464 if (r < 0) goto err;\r
6465 }\r
6466 }\r
6467\r
6468 if (state == CCS_VALUE) {\r
6469 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
6470 &val_type, &state, env);\r
6471 if (r != 0) goto err;\r
6472 }\r
6473\r
6474 if (IS_NOT_NULL(prev_cc)) {\r
6475 r = and_cclass(prev_cc, cc, env->enc);\r
6476 if (r != 0) goto err;\r
6477 bbuf_free(cc->mbuf);\r
6478 cc = prev_cc;\r
6479 }\r
6480\r
6481 if (neg != 0)\r
6482 NCCLASS_SET_NOT(cc);\r
6483 else\r
6484 NCCLASS_CLEAR_NOT(cc);\r
6485 if (IS_NCCLASS_NOT(cc) &&\r
6486 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r
6487 int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r
6488 if (is_empty != 0)\r
6489 BITSET_IS_EMPTY(cc->bs, is_empty);\r
6490\r
6491 if (is_empty == 0) {\r
6492#define NEWLINE_CODE 0x0a\r
6493\r
6494 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r
6495 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r
6496 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r
6497 else\r
6498 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r
6499 }\r
6500 }\r
6501 }\r
6502 *src = p;\r
6503 env->parse_depth--;\r
6504 return 0;\r
6505\r
6506 err:\r
6507 if (cc != CCLASS_(*np))\r
6508 bbuf_free(cc->mbuf);\r
6509 return r;\r
6510}\r
6511\r
6512static int parse_subexp(Node** top, OnigToken* tok, int term,\r
6513 UChar** src, UChar* end, ScanEnv* env);\r
6514\r
6515#ifdef USE_CALLOUT\r
6516\r
6517/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */\r
6518static int\r
6519parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
6520{\r
6521 int r;\r
6522 int i;\r
6523 int in;\r
6524 int num;\r
6525 OnigCodePoint c;\r
6526 UChar* code_start;\r
6527 UChar* code_end;\r
6528 UChar* contents;\r
6529 UChar* tag_start;\r
6530 UChar* tag_end;\r
6531 int brace_nest;\r
6532 CalloutListEntry* e;\r
6533 RegexExt* ext;\r
6534 OnigEncoding enc = env->enc;\r
6535 UChar* p = *src;\r
6536\r
6537 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6538\r
6539 brace_nest = 0;\r
6540 while (PPEEK_IS('{')) {\r
6541 brace_nest++;\r
6542 PINC_S;\r
6543 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6544 }\r
6545\r
6546 in = ONIG_CALLOUT_IN_PROGRESS;\r
6547 code_start = p;\r
6548 while (1) {\r
6549 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6550\r
6551 code_end = p;\r
6552 PFETCH_S(c);\r
6553 if (c == '}') {\r
6554 i = brace_nest;\r
6555 while (i > 0) {\r
6556 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6557 PFETCH_S(c);\r
6558 if (c == '}') i--;\r
6559 else break;\r
6560 }\r
6561 if (i == 0) break;\r
6562 }\r
6563 }\r
6564\r
6565 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6566\r
6567 PFETCH_S(c);\r
6568 if (c == '[') {\r
6569 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6570 tag_start = p;\r
6571 while (! PEND) {\r
6572 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6573 tag_end = p;\r
6574 PFETCH_S(c);\r
6575 if (c == ']') break;\r
6576 }\r
6577 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
6578 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
6579\r
6580 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6581 PFETCH_S(c);\r
6582 }\r
6583 else {\r
6584 tag_start = tag_end = 0;\r
6585 }\r
6586\r
6587 if (c == 'X') {\r
6588 in |= ONIG_CALLOUT_IN_RETRACTION;\r
6589 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6590 PFETCH_S(c);\r
6591 }\r
6592 else if (c == '<') {\r
6593 in = ONIG_CALLOUT_IN_RETRACTION;\r
6594 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6595 PFETCH_S(c);\r
6596 }\r
6597 else if (c == '>') { /* no needs (default) */\r
6598 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6599 PFETCH_S(c);\r
6600 }\r
6601\r
6602 if (c != cterm)\r
6603 return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6604\r
6605 r = reg_callout_list_entry(env, &num);\r
6606 if (r != 0) return r;\r
6607\r
6608 ext = onig_get_regex_ext(env->reg);\r
6609 if (IS_NULL(ext->pattern)) {\r
6610 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
6611 if (r != ONIG_NORMAL) return r;\r
6612 }\r
6613\r
6614 if (tag_start != tag_end) {\r
6615 r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
6616 if (r != ONIG_NORMAL) return r;\r
6617 }\r
6618\r
6619 contents = onigenc_strdup(enc, code_start, code_end);\r
6620 CHECK_NULL_RETURN_MEMERR(contents);\r
6621\r
6622 r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);\r
6623 if (r != 0) {\r
6624 xfree(contents);\r
6625 return r;\r
6626 }\r
6627\r
6628 e = onig_reg_callout_list_at(env->reg, num);\r
6629 e->of = ONIG_CALLOUT_OF_CONTENTS;\r
6630 e->in = in;\r
6631 e->name_id = ONIG_NON_NAME_ID;\r
6632 e->u.content.start = contents;\r
6633 e->u.content.end = contents + (code_end - code_start);\r
6634\r
6635 *src = p;\r
6636 return 0;\r
6637}\r
6638\r
6639static long\r
6640parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)\r
6641{\r
6642 long v;\r
6643 long d;\r
6644 int flag;\r
6645 UChar* p;\r
6646 OnigCodePoint c;\r
6647\r
6648 if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;\r
6649\r
6650 flag = 1;\r
6651 v = 0;\r
6652 p = s;\r
6653 while (p < end) {\r
6654 c = ONIGENC_MBC_TO_CODE(enc, p, end);\r
6655 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
6656 if (c >= '0' && c <= '9') {\r
6657 d = (long )(c - '0');\r
6658 if (v > (max - d) / 10)\r
6659 return ONIGERR_INVALID_CALLOUT_ARG;\r
6660\r
6661 v = v * 10 + d;\r
6662 }\r
6663 else if (sign_on != 0 && (c == '-' || c == '+')) {\r
6664 if (c == '-') flag = -1;\r
6665 }\r
6666 else\r
6667 return ONIGERR_INVALID_CALLOUT_ARG;\r
6668\r
6669 sign_on = 0;\r
6670 }\r
6671\r
6672 *rl = flag * v;\r
6673 return ONIG_NORMAL;\r
6674}\r
6675\r
6676static int\r
6677parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,\r
6678 unsigned int types[], OnigValue vals[], ScanEnv* env)\r
6679{\r
6680#define MAX_CALLOUT_ARG_BYTE_LENGTH 128\r
6681\r
6682 int r;\r
6683 int n;\r
6684 int esc;\r
6685 int cn;\r
6686 UChar* s;\r
6687 UChar* e;\r
6688 UChar* eesc;\r
6689 OnigCodePoint c;\r
6690 UChar* bufend;\r
6691 UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];\r
6692 OnigEncoding enc = env->enc;\r
6693 UChar* p = *src;\r
6694\r
6695 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6696\r
6697 n = 0;\r
6698 while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {\r
6699 c = 0;\r
6700 cn = 0;\r
6701 esc = 0;\r
6702 eesc = 0;\r
6703 bufend = buf;\r
6704 s = e = p;\r
6705 while (1) {\r
6706 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6707\r
6708 e = p;\r
6709 PFETCH_S(c);\r
6710 if (esc != 0) {\r
6711 esc = 0;\r
6712 if (c == '\\' || c == cterm || c == ',') {\r
6713 /* */\r
6714 }\r
6715 else {\r
6716 e = eesc;\r
6717 cn++;\r
6718 }\r
6719 goto add_char;\r
14b0e578
CS
6720 }\r
6721 else {\r
b602265d
DG
6722 if (c == '\\') {\r
6723 esc = 1;\r
6724 eesc = e;\r
6725 }\r
6726 else if (c == cterm || c == ',')\r
6727 break;\r
6728 else {\r
6729 size_t clen;\r
14b0e578 6730\r
b602265d
DG
6731 add_char:\r
6732 if (skip_mode == 0) {\r
6733 clen = p - e;\r
6734 if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)\r
6735 return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */\r
14b0e578 6736\r
b602265d
DG
6737 xmemcpy(bufend, e, clen);\r
6738 bufend += clen;\r
6739 }\r
6740 cn++;\r
6741 }\r
14b0e578 6742 }\r
b602265d 6743 }\r
14b0e578 6744\r
b602265d
DG
6745 if (cn != 0) {\r
6746 if (skip_mode == 0) {\r
6747 if ((types[n] & ONIG_TYPE_LONG) != 0) {\r
6748 int fixed = 0;\r
6749 if (cn > 0) {\r
6750 long rl;\r
6751 r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);\r
6752 if (r == ONIG_NORMAL) {\r
6753 vals[n].l = rl;\r
6754 fixed = 1;\r
6755 types[n] = ONIG_TYPE_LONG;\r
6756 }\r
6757 }\r
14b0e578 6758\r
b602265d
DG
6759 if (fixed == 0) {\r
6760 types[n] = (types[n] & ~ONIG_TYPE_LONG);\r
6761 if (types[n] == ONIG_TYPE_VOID)\r
6762 return ONIGERR_INVALID_CALLOUT_ARG;\r
6763 }\r
6764 }\r
14b0e578 6765\r
b602265d
DG
6766 switch (types[n]) {\r
6767 case ONIG_TYPE_LONG:\r
6768 break;\r
14b0e578 6769\r
b602265d
DG
6770 case ONIG_TYPE_CHAR:\r
6771 if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;\r
6772 vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);\r
6773 break;\r
14b0e578 6774\r
b602265d
DG
6775 case ONIG_TYPE_STRING:\r
6776 {\r
6777 UChar* rs = onigenc_strdup(enc, buf, bufend);\r
6778 CHECK_NULL_RETURN_MEMERR(rs);\r
6779 vals[n].s.start = rs;\r
6780 vals[n].s.end = rs + (e - s);\r
6781 }\r
6782 break;\r
14b0e578 6783\r
b602265d
DG
6784 case ONIG_TYPE_TAG:\r
6785 if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))\r
6786 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 6787\r
b602265d
DG
6788 vals[n].s.start = s;\r
6789 vals[n].s.end = e;\r
6790 break;\r
6791\r
6792 case ONIG_TYPE_VOID:\r
6793 case ONIG_TYPE_POINTER:\r
6794 return ONIGERR_PARSER_BUG;\r
6795 break;\r
6796 }\r
14b0e578 6797 }\r
14b0e578 6798\r
b602265d
DG
6799 n++;\r
6800 }\r
14b0e578 6801\r
b602265d
DG
6802 if (c == cterm) break;\r
6803 }\r
14b0e578 6804\r
b602265d 6805 if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
14b0e578 6806\r
b602265d
DG
6807 *src = p;\r
6808 return n;\r
6809}\r
14b0e578 6810\r
b602265d
DG
6811/* (*name[TAG]) (*name[TAG]{a,b,..}) */\r
6812static int\r
6813parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
6814{\r
6815 int r;\r
6816 int i;\r
6817 int in;\r
6818 int num;\r
6819 int name_id;\r
6820 int arg_num;\r
6821 int max_arg_num;\r
6822 int opt_arg_num;\r
6823 int is_not_single;\r
6824 OnigCodePoint c;\r
6825 UChar* name_start;\r
6826 UChar* name_end;\r
6827 UChar* tag_start;\r
6828 UChar* tag_end;\r
6829 Node* node;\r
6830 CalloutListEntry* e;\r
6831 RegexExt* ext;\r
6832 unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
6833 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];\r
6834 OnigEncoding enc = env->enc;\r
6835 UChar* p = *src;\r
14b0e578 6836\r
b602265d
DG
6837 /* PFETCH_READY; */\r
6838 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6839\r
6840 node = 0;\r
6841 name_start = p;\r
6842 while (1) {\r
6843 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6844 name_end = p;\r
6845 PFETCH_S(c);\r
6846 if (c == cterm || c == '[' || c == '{') break;\r
6847 }\r
6848\r
6849 if (! is_allowed_callout_name(enc, name_start, name_end))\r
6850 return ONIGERR_INVALID_CALLOUT_NAME;\r
6851\r
6852 if (c == '[') {\r
6853 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6854 tag_start = p;\r
6855 while (! PEND) {\r
6856 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6857 tag_end = p;\r
6858 PFETCH_S(c);\r
6859 if (c == ']') break;\r
14b0e578 6860 }\r
b602265d
DG
6861 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
6862 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
6863\r
6864 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6865 PFETCH_S(c);\r
6866 }\r
6867 else {\r
6868 tag_start = tag_end = 0;\r
14b0e578
CS
6869 }\r
6870\r
b602265d
DG
6871 if (c == '{') {\r
6872 UChar* save;\r
6873\r
6874 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6875\r
6876 /* read for single check only */\r
6877 save = p;\r
6878 arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);\r
6879 if (arg_num < 0) return arg_num;\r
6880\r
6881 is_not_single = PPEEK_IS(cterm) ? 0 : 1;\r
6882 p = save;\r
6883 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
6884 &name_id);\r
6885 if (r != ONIG_NORMAL) return r;\r
6886\r
6887 max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
6888 for (i = 0; i < max_arg_num; i++) {\r
6889 types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
6890 }\r
6891\r
6892 arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);\r
6893 if (arg_num < 0) return arg_num;\r
6894\r
6895 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6896 PFETCH_S(c);\r
14b0e578 6897 }\r
b602265d
DG
6898 else {\r
6899 arg_num = 0;\r
14b0e578 6900\r
b602265d
DG
6901 is_not_single = 0;\r
6902 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
6903 &name_id);\r
6904 if (r != ONIG_NORMAL) return r;\r
6905\r
6906 max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
6907 for (i = 0; i < max_arg_num; i++) {\r
6908 types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
6909 }\r
14b0e578
CS
6910 }\r
6911\r
b602265d
DG
6912 in = onig_get_callout_in_by_name_id(name_id);\r
6913 opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);\r
6914 if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))\r
6915 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 6916\r
b602265d
DG
6917 if (c != cterm)\r
6918 return ONIGERR_INVALID_CALLOUT_PATTERN;\r
14b0e578 6919\r
b602265d
DG
6920 r = reg_callout_list_entry(env, &num);\r
6921 if (r != 0) return r;\r
14b0e578 6922\r
b602265d
DG
6923 ext = onig_get_regex_ext(env->reg);\r
6924 if (IS_NULL(ext->pattern)) {\r
6925 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
6926 if (r != ONIG_NORMAL) return r;\r
6927 }\r
6928\r
6929 if (tag_start != tag_end) {\r
6930 r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
6931 if (r != ONIG_NORMAL) return r;\r
6932 }\r
6933\r
6934 r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);\r
6935 if (r != ONIG_NORMAL) return r;\r
6936\r
6937 e = onig_reg_callout_list_at(env->reg, num);\r
6938 e->of = ONIG_CALLOUT_OF_NAME;\r
6939 e->in = in;\r
6940 e->name_id = name_id;\r
6941 e->type = onig_get_callout_type_by_name_id(name_id);\r
6942 e->start_func = onig_get_callout_start_func_by_name_id(name_id);\r
6943 e->end_func = onig_get_callout_end_func_by_name_id(name_id);\r
6944 e->u.arg.num = max_arg_num;\r
6945 e->u.arg.passed_num = arg_num;\r
6946 for (i = 0; i < max_arg_num; i++) {\r
6947 e->u.arg.types[i] = types[i];\r
6948 if (i < arg_num)\r
6949 e->u.arg.vals[i] = vals[i];\r
6950 else\r
6951 e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);\r
14b0e578 6952 }\r
b602265d
DG
6953\r
6954 *np = node;\r
14b0e578
CS
6955 *src = p;\r
6956 return 0;\r
14b0e578 6957}\r
b602265d 6958#endif\r
14b0e578
CS
6959\r
6960static int\r
b602265d
DG
6961parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
6962 ScanEnv* env)\r
14b0e578
CS
6963{\r
6964 int r, num;\r
6965 Node *target;\r
6966 OnigOptionType option;\r
6967 OnigCodePoint c;\r
b602265d 6968 int list_capture;\r
14b0e578
CS
6969 OnigEncoding enc = env->enc;\r
6970\r
b602265d
DG
6971 UChar* p = *src;\r
6972 PFETCH_READY;\r
6973\r
6974 *np = NULL;\r
6975 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
6976\r
6977 option = env->options;\r
6978 c = PPEEK;\r
6979 if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
6980 PINC;\r
6981 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6982\r
6983 PFETCH(c);\r
6984 switch (c) {\r
6985 case ':': /* (?:...) grouping only */\r
6986 group:\r
6987 r = fetch_token(tok, &p, end, env);\r
6988 if (r < 0) return r;\r
6989 r = parse_subexp(np, tok, term, &p, end, env);\r
6990 if (r < 0) return r;\r
6991 *src = p;\r
6992 return 1; /* group */\r
6993 break;\r
6994\r
6995 case '=':\r
6996 *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);\r
6997 break;\r
6998 case '!': /* preceding read */\r
6999 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);\r
7000 break;\r
7001 case '>': /* (?>...) stop backtrack */\r
7002 *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
7003 break;\r
7004\r
7005 case '\'':\r
7006 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7007 goto named_group1;\r
7008 }\r
7009 else\r
7010 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7011 break;\r
7012\r
7013 case '<': /* look behind (?<=...), (?<!...) */\r
7014 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
7015 PFETCH(c);\r
7016 if (c == '=')\r
7017 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);\r
7018 else if (c == '!')\r
7019 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);\r
7020 else {\r
7021 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7022 UChar *name;\r
7023 UChar *name_end;\r
7024 enum REF_NUM num_type;\r
7025\r
7026 PUNFETCH;\r
7027 c = '<';\r
7028\r
7029 named_group1:\r
7030 list_capture = 0;\r
7031\r
7032 named_group2:\r
7033 name = p;\r
7034 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,\r
7035 &num_type, 0);\r
7036 if (r < 0) return r;\r
7037\r
7038 num = scan_env_add_mem_entry(env);\r
7039 if (num < 0) return num;\r
7040 if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)\r
7041 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
7042\r
7043 r = name_add(env->reg, name, name_end, num, env);\r
7044 if (r != 0) return r;\r
7045 *np = node_new_memory(1);\r
7046 CHECK_NULL_RETURN_MEMERR(*np);\r
7047 ENCLOSURE_(*np)->m.regnum = num;\r
7048 if (list_capture != 0)\r
7049 MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
7050 env->num_named++;\r
7051 }\r
7052 else {\r
7053 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7054 }\r
7055 }\r
7056 break;\r
7057\r
7058 case '~':\r
7059 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {\r
7060 Node* absent;\r
7061 Node* expr;\r
7062 int head_bar;\r
7063 int is_range_cutter;\r
7064\r
7065 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7066\r
7067 if (PPEEK_IS('|')) { /* (?~|generator|absent) */\r
7068 PINC;\r
7069 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7070\r
7071 head_bar = 1;\r
7072 if (PPEEK_IS(')')) { /* (?~|) : range clear */\r
7073 PINC;\r
7074 r = make_range_clear(np, env);\r
7075 if (r != 0) return r;\r
7076 goto end;\r
7077 }\r
7078 }\r
7079 else\r
7080 head_bar = 0;\r
7081\r
7082 r = fetch_token(tok, &p, end, env);\r
7083 if (r < 0) return r;\r
7084 r = parse_subexp(&absent, tok, term, &p, end, env);\r
7085 if (r < 0) {\r
7086 onig_node_free(absent);\r
7087 return r;\r
7088 }\r
7089\r
7090 expr = NULL_NODE;\r
7091 is_range_cutter = 0;\r
7092 if (head_bar != 0) {\r
7093 Node* top = absent;\r
7094 if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {\r
7095 expr = NULL_NODE;\r
7096 is_range_cutter = 1;\r
7097 /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */\r
7098 }\r
7099 else {\r
7100 absent = NODE_CAR(top);\r
7101 expr = NODE_CDR(top);\r
7102 NODE_CAR(top) = NULL_NODE;\r
7103 NODE_CDR(top) = NULL_NODE;\r
7104 onig_node_free(top);\r
7105 if (IS_NULL(NODE_CDR(expr))) {\r
7106 top = expr;\r
7107 expr = NODE_CAR(top);\r
7108 NODE_CAR(top) = NULL_NODE;\r
7109 onig_node_free(top);\r
7110 }\r
7111 }\r
7112 }\r
7113\r
7114 r = make_absent_tree(np, absent, expr, is_range_cutter, env);\r
7115 if (r != 0) {\r
7116 return r;\r
7117 }\r
7118 goto end;\r
7119 }\r
7120 else {\r
7121 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7122 }\r
7123 break;\r
7124\r
7125#ifdef USE_CALLOUT\r
7126 case '{':\r
7127 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))\r
7128 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7129\r
7130 r = parse_callout_of_contents(np, ')', &p, end, env);\r
7131 if (r != 0) return r;\r
7132\r
7133 goto end;\r
7134 break;\r
7135#endif\r
7136\r
7137 case '(':\r
7138 /* (?()...) */\r
7139 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {\r
7140 UChar *prev;\r
7141 Node* condition;\r
7142 int condition_is_checker;\r
7143\r
7144 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7145 PFETCH(c);\r
7146 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7147\r
7148 if (IS_CODE_DIGIT_ASCII(enc, c)\r
7149 || c == '-' || c == '+' || c == '<' || c == '\'') {\r
7150 UChar* name_end;\r
7151 int back_num;\r
7152 int exist_level;\r
7153 int level;\r
7154 enum REF_NUM num_type;\r
7155 int is_enclosed;\r
7156\r
7157 is_enclosed = (c == '<' || c == '\'') ? 1 : 0;\r
7158 if (! is_enclosed)\r
7159 PUNFETCH;\r
7160 prev = p;\r
7161 exist_level = 0;\r
7162#ifdef USE_BACKREF_WITH_LEVEL\r
7163 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
7164 r = fetch_name_with_level(\r
7165 (OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
7166 &p, end, &name_end,\r
7167 env, &back_num, &level, &num_type);\r
7168 if (r == 1) exist_level = 1;\r
7169#else\r
7170 r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
7171 &p, end, &name_end, env, &back_num, &num_type, 1);\r
7172#endif\r
7173 if (r < 0) {\r
7174 if (is_enclosed == 0) {\r
7175 goto any_condition;\r
7176 }\r
7177 else\r
7178 return r;\r
7179 }\r
7180\r
7181 condition_is_checker = 1;\r
7182 if (num_type != IS_NOT_NUM) {\r
7183 if (num_type == IS_REL_NUM) {\r
7184 back_num = backref_rel_to_abs(back_num, env);\r
7185 }\r
7186 if (back_num <= 0)\r
7187 return ONIGERR_INVALID_BACKREF;\r
7188\r
7189 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
7190 if (back_num > env->num_mem ||\r
7191 IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
7192 return ONIGERR_INVALID_BACKREF;\r
7193 }\r
7194\r
7195 condition = node_new_backref_checker(1, &back_num, 0,\r
7196#ifdef USE_BACKREF_WITH_LEVEL\r
7197 exist_level, level,\r
7198#endif\r
7199 env);\r
7200 }\r
7201 else {\r
7202 int num;\r
7203 int* backs;\r
7204\r
7205 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
7206 if (num <= 0) {\r
7207 onig_scan_env_set_error_string(env,\r
7208 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
7209 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
7210 }\r
7211 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
7212 int i;\r
7213 for (i = 0; i < num; i++) {\r
7214 if (backs[i] > env->num_mem ||\r
7215 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
7216 return ONIGERR_INVALID_BACKREF;\r
7217 }\r
7218 }\r
7219\r
7220 condition = node_new_backref_checker(num, backs, 1,\r
7221#ifdef USE_BACKREF_WITH_LEVEL\r
7222 exist_level, level,\r
7223#endif\r
7224 env);\r
7225 }\r
7226\r
7227 if (is_enclosed != 0) {\r
7228 if (PEND) goto err_if_else;\r
7229 PFETCH(c);\r
7230 if (c != ')') goto err_if_else;\r
7231 }\r
7232 }\r
7233#ifdef USE_CALLOUT\r
7234 else if (c == '?') {\r
7235 if (IS_SYNTAX_OP2(env->syntax,\r
7236 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {\r
7237 if (! PEND && PPEEK_IS('{')) {\r
7238 /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */\r
7239 condition_is_checker = 0;\r
7240 PFETCH(c);\r
7241 r = parse_callout_of_contents(&condition, ')', &p, end, env);\r
7242 if (r != 0) return r;\r
7243 goto end_condition;\r
7244 }\r
7245 }\r
7246 goto any_condition;\r
7247 }\r
7248 else if (c == '*' &&\r
7249 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
7250 condition_is_checker = 0;\r
7251 r = parse_callout_of_name(&condition, ')', &p, end, env);\r
7252 if (r != 0) return r;\r
7253 goto end_condition;\r
7254 }\r
14b0e578 7255#endif\r
b602265d
DG
7256 else {\r
7257 any_condition:\r
7258 PUNFETCH;\r
7259 condition_is_checker = 0;\r
7260 r = fetch_token(tok, &p, end, env);\r
7261 if (r < 0) return r;\r
7262 r = parse_subexp(&condition, tok, term, &p, end, env);\r
7263 if (r < 0) {\r
7264 onig_node_free(condition);\r
7265 return r;\r
7266 }\r
7267 }\r
14b0e578 7268\r
b602265d
DG
7269 end_condition:\r
7270 CHECK_NULL_RETURN_MEMERR(condition);\r
14b0e578 7271\r
b602265d
DG
7272 if (PEND) {\r
7273 err_if_else:\r
7274 onig_node_free(condition);\r
7275 return ONIGERR_END_PATTERN_IN_GROUP;\r
7276 }\r
14b0e578 7277\r
b602265d
DG
7278 if (PPEEK_IS(')')) { /* case: empty body: make backref checker */\r
7279 if (condition_is_checker == 0) {\r
7280 onig_node_free(condition);\r
7281 return ONIGERR_INVALID_IF_ELSE_SYNTAX;\r
7282 }\r
7283 PFETCH(c);\r
7284 *np = condition;\r
7285 }\r
7286 else { /* if-else */\r
7287 int then_is_empty;\r
7288 Node *Then, *Else;\r
14b0e578 7289\r
b602265d
DG
7290 if (PPEEK_IS('|')) {\r
7291 PFETCH(c);\r
7292 Then = 0;\r
7293 then_is_empty = 1;\r
7294 }\r
7295 else\r
7296 then_is_empty = 0;\r
14b0e578 7297\r
b602265d
DG
7298 r = fetch_token(tok, &p, end, env);\r
7299 if (r < 0) {\r
7300 onig_node_free(condition);\r
7301 return r;\r
7302 }\r
7303 r = parse_subexp(&target, tok, term, &p, end, env);\r
7304 if (r < 0) {\r
7305 onig_node_free(condition);\r
7306 onig_node_free(target);\r
7307 return r;\r
7308 }\r
14b0e578 7309\r
b602265d
DG
7310 if (then_is_empty != 0) {\r
7311 Else = target;\r
7312 }\r
7313 else {\r
7314 if (NODE_TYPE(target) == NODE_ALT) {\r
7315 Then = NODE_CAR(target);\r
7316 if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {\r
7317 Else = NODE_CAR(NODE_CDR(target));\r
7318 cons_node_free_alone(NODE_CDR(target));\r
7319 }\r
7320 else {\r
7321 Else = NODE_CDR(target);\r
7322 }\r
7323 cons_node_free_alone(target);\r
7324 }\r
7325 else {\r
7326 Then = target;\r
7327 Else = 0;\r
7328 }\r
7329 }\r
14b0e578 7330\r
b602265d
DG
7331 *np = node_new_enclosure_if_else(condition, Then, Else);\r
7332 if (IS_NULL(*np)) {\r
7333 onig_node_free(condition);\r
7334 onig_node_free(Then);\r
7335 onig_node_free(Else);\r
7336 return ONIGERR_MEMORY;\r
7337 }\r
7338 }\r
7339 goto end;\r
14b0e578 7340 }\r
14b0e578 7341 else {\r
b602265d 7342 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
14b0e578 7343 }\r
14b0e578
CS
7344 break;\r
7345\r
7346 case '@':\r
7347 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r
b602265d
DG
7348 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7349 PFETCH(c);\r
7350 if (c == '<' || c == '\'') {\r
7351 list_capture = 1;\r
7352 goto named_group2; /* (?@<name>...) */\r
7353 }\r
7354 PUNFETCH;\r
7355 }\r
7356\r
7357 *np = node_new_memory(0);\r
7358 CHECK_NULL_RETURN_MEMERR(*np);\r
7359 num = scan_env_add_mem_entry(env);\r
7360 if (num < 0) {\r
7361 return num;\r
7362 }\r
7363 else if (num >= (int )MEM_STATUS_BITS_NUM) {\r
7364 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
7365 }\r
7366 ENCLOSURE_(*np)->m.regnum = num;\r
7367 MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
14b0e578
CS
7368 }\r
7369 else {\r
b602265d 7370 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
14b0e578
CS
7371 }\r
7372 break;\r
7373\r
7374#ifdef USE_POSIXLINE_OPTION\r
7375 case 'p':\r
7376#endif\r
7377 case '-': case 'i': case 'm': case 's': case 'x':\r
b602265d 7378 case 'W': case 'D': case 'S': case 'P':\r
14b0e578 7379 {\r
b602265d
DG
7380 int neg = 0;\r
7381\r
7382 while (1) {\r
7383 switch (c) {\r
7384 case ':':\r
7385 case ')':\r
7386 break;\r
7387\r
7388 case '-': neg = 1; break;\r
7389 case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;\r
7390 case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;\r
7391 case 's':\r
7392 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
7393 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
7394 }\r
7395 else\r
7396 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7397 break;\r
7398\r
7399 case 'm':\r
7400 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
7401 OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r
7402 }\r
7403 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r
7404 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
7405 }\r
7406 else\r
7407 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7408 break;\r
14b0e578 7409#ifdef USE_POSIXLINE_OPTION\r
b602265d
DG
7410 case 'p':\r
7411 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r
7412 break;\r
14b0e578 7413#endif\r
b602265d
DG
7414 case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;\r
7415 case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;\r
7416 case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;\r
7417 case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;\r
7418\r
7419 default:\r
7420 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7421 }\r
7422\r
7423 if (c == ')') {\r
7424 *np = node_new_option(option);\r
7425 CHECK_NULL_RETURN_MEMERR(*np);\r
7426 *src = p;\r
7427 return 2; /* option only */\r
7428 }\r
7429 else if (c == ':') {\r
7430 OnigOptionType prev = env->options;\r
7431\r
7432 env->options = option;\r
7433 r = fetch_token(tok, &p, end, env);\r
7434 if (r < 0) return r;\r
7435 r = parse_subexp(&target, tok, term, &p, end, env);\r
7436 env->options = prev;\r
7437 if (r < 0) {\r
7438 onig_node_free(target);\r
7439 return r;\r
7440 }\r
7441 *np = node_new_option(option);\r
7442 CHECK_NULL_RETURN_MEMERR(*np);\r
7443 NODE_BODY(*np) = target;\r
7444 *src = p;\r
7445 return 0;\r
7446 }\r
7447\r
7448 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7449 PFETCH(c);\r
7450 }\r
14b0e578
CS
7451 }\r
7452 break;\r
7453\r
7454 default:\r
7455 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7456 }\r
7457 }\r
b602265d
DG
7458#ifdef USE_CALLOUT\r
7459 else if (c == '*' &&\r
7460 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
7461 PINC;\r
7462 r = parse_callout_of_name(np, ')', &p, end, env);\r
7463 if (r != 0) return r;\r
7464\r
7465 goto end;\r
7466 }\r
7467#endif\r
14b0e578 7468 else {\r
b602265d 7469 if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
14b0e578
CS
7470 goto group;\r
7471\r
b602265d 7472 *np = node_new_memory(0);\r
14b0e578
CS
7473 CHECK_NULL_RETURN_MEMERR(*np);\r
7474 num = scan_env_add_mem_entry(env);\r
7475 if (num < 0) return num;\r
b602265d 7476 ENCLOSURE_(*np)->m.regnum = num;\r
14b0e578
CS
7477 }\r
7478\r
7479 CHECK_NULL_RETURN_MEMERR(*np);\r
7480 r = fetch_token(tok, &p, end, env);\r
7481 if (r < 0) return r;\r
7482 r = parse_subexp(&target, tok, term, &p, end, env);\r
b602265d
DG
7483 if (r < 0) {\r
7484 onig_node_free(target);\r
7485 return r;\r
7486 }\r
14b0e578 7487\r
b602265d
DG
7488 NODE_BODY(*np) = target;\r
7489\r
7490 if (NODE_TYPE(*np) == NODE_ENCLOSURE) {\r
7491 if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {\r
14b0e578 7492 /* Don't move this to previous of parse_subexp() */\r
b602265d 7493 r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);\r
14b0e578
CS
7494 if (r != 0) return r;\r
7495 }\r
7496 }\r
7497\r
b602265d 7498 end:\r
14b0e578
CS
7499 *src = p;\r
7500 return 0;\r
7501}\r
7502\r
7503static const char* PopularQStr[] = {\r
7504 "?", "*", "+", "??", "*?", "+?"\r
7505};\r
7506\r
7507static const char* ReduceQStr[] = {\r
7508 "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r
7509};\r
7510\r
7511static int\r
7512set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r
7513{\r
b602265d 7514 QuantNode* qn;\r
14b0e578 7515\r
b602265d
DG
7516 qn = QUANT_(qnode);\r
7517 if (qn->lower == 1 && qn->upper == 1)\r
14b0e578 7518 return 1;\r
14b0e578 7519\r
b602265d
DG
7520 switch (NODE_TYPE(target)) {\r
7521 case NODE_STRING:\r
14b0e578 7522 if (! group) {\r
b602265d
DG
7523 if (str_node_can_be_split(target, env->enc)) {\r
7524 Node* n = str_node_split_last_char(target, env->enc);\r
7525 if (IS_NOT_NULL(n)) {\r
7526 NODE_BODY(qnode) = n;\r
7527 return 2;\r
7528 }\r
14b0e578
CS
7529 }\r
7530 }\r
7531 break;\r
7532\r
b602265d 7533 case NODE_QUANT:\r
14b0e578
CS
7534 { /* check redundant double repeat. */\r
7535 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r
b602265d
DG
7536 QuantNode* qnt = QUANT_(target);\r
7537 int nestq_num = quantifier_type_num(qn);\r
7538 int targetq_num = quantifier_type_num(qnt);\r
14b0e578
CS
7539\r
7540#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r
b602265d
DG
7541 if (targetq_num >= 0 && nestq_num >= 0 &&\r
7542 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r
14b0e578
CS
7543 UChar buf[WARN_BUFSIZE];\r
7544\r
7545 switch(ReduceTypeTable[targetq_num][nestq_num]) {\r
7546 case RQ_ASIS:\r
7547 break;\r
7548\r
7549 case RQ_DEL:\r
7550 if (onig_verb_warn != onig_null_warn) {\r
7551 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
b602265d
DG
7552 env->pattern, env->pattern_end,\r
7553 (UChar* )"redundant nested repeat operator");\r
14b0e578
CS
7554 (*onig_verb_warn)((char* )buf);\r
7555 }\r
7556 goto warn_exit;\r
7557 break;\r
7558\r
7559 default:\r
7560 if (onig_verb_warn != onig_null_warn) {\r
7561 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
7562 env->pattern, env->pattern_end,\r
7563 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r
7564 PopularQStr[targetq_num], PopularQStr[nestq_num],\r
7565 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r
7566 (*onig_verb_warn)((char* )buf);\r
7567 }\r
7568 goto warn_exit;\r
7569 break;\r
7570 }\r
7571 }\r
7572\r
7573 warn_exit:\r
7574#endif\r
b602265d
DG
7575 if (targetq_num >= 0 && nestq_num < 0) {\r
7576 if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r
7577 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r
7578 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r
7579 qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
7580 }\r
7581 }\r
7582 }\r
7583 else {\r
7584 NODE_BODY(qnode) = target;\r
7585 onig_reduce_nested_quantifier(qnode, target);\r
7586 goto q_exit;\r
14b0e578
CS
7587 }\r
7588 }\r
7589 break;\r
7590\r
7591 default:\r
7592 break;\r
7593 }\r
7594\r
b602265d 7595 NODE_BODY(qnode) = target;\r
14b0e578
CS
7596 q_exit:\r
7597 return 0;\r
7598}\r
7599\r
7600\r
14b0e578
CS
7601#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
7602static int\r
7603clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r
7604{\r
7605 BBuf *tbuf;\r
7606 int r;\r
7607\r
7608 if (IS_NCCLASS_NOT(cc)) {\r
7609 bitset_invert(cc->bs);\r
7610\r
7611 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
7612 r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r
7613 if (r != 0) return r;\r
7614\r
7615 bbuf_free(cc->mbuf);\r
7616 cc->mbuf = tbuf;\r
7617 }\r
7618\r
7619 NCCLASS_CLEAR_NOT(cc);\r
7620 }\r
7621\r
7622 return 0;\r
7623}\r
7624#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
7625\r
7626typedef struct {\r
7627 ScanEnv* env;\r
7628 CClassNode* cc;\r
7629 Node* alt_root;\r
7630 Node** ptail;\r
7631} IApplyCaseFoldArg;\r
7632\r
7633static int\r
b602265d 7634i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)\r
14b0e578
CS
7635{\r
7636 IApplyCaseFoldArg* iarg;\r
7637 ScanEnv* env;\r
7638 CClassNode* cc;\r
7639 BitSetRef bs;\r
7640\r
7641 iarg = (IApplyCaseFoldArg* )arg;\r
7642 env = iarg->env;\r
7643 cc = iarg->cc;\r
7644 bs = cc->bs;\r
7645\r
7646 if (to_len == 1) {\r
7647 int is_in = onig_is_code_in_cc(env->enc, from, cc);\r
7648#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
7649 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r
b602265d 7650 (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r
14b0e578 7651 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
b602265d 7652 add_code_range(&(cc->mbuf), env, *to, *to);\r
14b0e578
CS
7653 }\r
7654 else {\r
b602265d 7655 BITSET_SET_BIT(bs, *to);\r
14b0e578
CS
7656 }\r
7657 }\r
7658#else\r
7659 if (is_in != 0) {\r
7660 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
b602265d
DG
7661 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r
7662 add_code_range(&(cc->mbuf), env, *to, *to);\r
14b0e578
CS
7663 }\r
7664 else {\r
b602265d
DG
7665 if (IS_NCCLASS_NOT(cc)) {\r
7666 BITSET_CLEAR_BIT(bs, *to);\r
7667 }\r
7668 else\r
7669 BITSET_SET_BIT(bs, *to);\r
14b0e578
CS
7670 }\r
7671 }\r
7672#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
7673 }\r
7674 else {\r
7675 int r, i, len;\r
7676 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
7677 Node *snode = NULL_NODE;\r
7678\r
7679 if (onig_is_code_in_cc(env->enc, from, cc)\r
7680#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
b602265d 7681 && !IS_NCCLASS_NOT(cc)\r
14b0e578 7682#endif\r
b602265d 7683 ) {\r
14b0e578 7684 for (i = 0; i < to_len; i++) {\r
b602265d
DG
7685 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r
7686 if (i == 0) {\r
7687 snode = onig_node_new_str(buf, buf + len);\r
7688 CHECK_NULL_RETURN_MEMERR(snode);\r
7689\r
7690 /* char-class expanded multi-char only\r
7691 compare with string folded at match time. */\r
7692 NODE_STRING_SET_AMBIG(snode);\r
7693 }\r
7694 else {\r
7695 r = onig_node_str_cat(snode, buf, buf + len);\r
7696 if (r < 0) {\r
7697 onig_node_free(snode);\r
7698 return r;\r
7699 }\r
7700 }\r
14b0e578
CS
7701 }\r
7702\r
7703 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r
7704 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r
b602265d 7705 iarg->ptail = &(NODE_CDR((*(iarg->ptail))));\r
14b0e578
CS
7706 }\r
7707 }\r
7708\r
7709 return 0;\r
7710}\r
7711\r
7712static int\r
b602265d
DG
7713parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
7714 ScanEnv* env)\r
14b0e578
CS
7715{\r
7716 int r, len, group = 0;\r
7717 Node* qn;\r
7718 Node** targetp;\r
7719\r
7720 *np = NULL;\r
7721 if (tok->type == (enum TokenSyms )term)\r
7722 goto end_of_token;\r
7723\r
7724 switch (tok->type) {\r
7725 case TK_ALT:\r
7726 case TK_EOT:\r
7727 end_of_token:\r
7728 *np = node_new_empty();\r
7729 return tok->type;\r
7730 break;\r
7731\r
7732 case TK_SUBEXP_OPEN:\r
b602265d 7733 r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r
14b0e578
CS
7734 if (r < 0) return r;\r
7735 if (r == 1) group = 1;\r
7736 else if (r == 2) { /* option only */\r
7737 Node* target;\r
b602265d 7738 OnigOptionType prev = env->options;\r
14b0e578 7739\r
b602265d 7740 env->options = ENCLOSURE_(*np)->o.options;\r
14b0e578
CS
7741 r = fetch_token(tok, src, end, env);\r
7742 if (r < 0) return r;\r
7743 r = parse_subexp(&target, tok, term, src, end, env);\r
b602265d
DG
7744 env->options = prev;\r
7745 if (r < 0) {\r
7746 onig_node_free(target);\r
7747 return r;\r
7748 }\r
7749 NODE_BODY(*np) = target;\r
14b0e578
CS
7750 return tok->type;\r
7751 }\r
7752 break;\r
7753\r
7754 case TK_SUBEXP_CLOSE:\r
7755 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r
7756 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r
7757\r
7758 if (tok->escaped) goto tk_raw_byte;\r
7759 else goto tk_byte;\r
7760 break;\r
7761\r
7762 case TK_STRING:\r
7763 tk_byte:\r
7764 {\r
7765 *np = node_new_str(tok->backp, *src);\r
7766 CHECK_NULL_RETURN_MEMERR(*np);\r
7767\r
7768 while (1) {\r
b602265d
DG
7769 r = fetch_token(tok, src, end, env);\r
7770 if (r < 0) return r;\r
7771 if (r != TK_STRING) break;\r
14b0e578 7772\r
b602265d
DG
7773 r = onig_node_str_cat(*np, tok->backp, *src);\r
7774 if (r < 0) return r;\r
14b0e578
CS
7775 }\r
7776\r
7777 string_end:\r
7778 targetp = np;\r
7779 goto repeat;\r
7780 }\r
7781 break;\r
7782\r
7783 case TK_RAW_BYTE:\r
7784 tk_raw_byte:\r
7785 {\r
7786 *np = node_new_str_raw_char((UChar )tok->u.c);\r
7787 CHECK_NULL_RETURN_MEMERR(*np);\r
7788 len = 1;\r
7789 while (1) {\r
b602265d
DG
7790 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r
7791 if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */\r
7792 r = fetch_token(tok, src, end, env);\r
7793 NODE_STRING_CLEAR_RAW(*np);\r
7794 goto string_end;\r
7795 }\r
7796 }\r
7797\r
7798 r = fetch_token(tok, src, end, env);\r
7799 if (r < 0) return r;\r
7800 if (r != TK_RAW_BYTE) {\r
7801 /* Don't use this, it is wrong for little endian encodings. */\r
14b0e578 7802#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
b602265d
DG
7803 int rem;\r
7804 if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r
7805 rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r
7806 (void )node_str_head_pad(STR_(*np), rem, (UChar )0);\r
7807 if (len + rem == enclen(env->enc, STR_(*np)->s)) {\r
7808 NODE_STRING_CLEAR_RAW(*np);\r
7809 goto string_end;\r
7810 }\r
7811 }\r
14b0e578 7812#endif\r
b602265d
DG
7813 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
7814 }\r
14b0e578 7815\r
b602265d
DG
7816 r = node_str_cat_char(*np, (UChar )tok->u.c);\r
7817 if (r < 0) return r;\r
14b0e578 7818\r
b602265d 7819 len++;\r
14b0e578
CS
7820 }\r
7821 }\r
7822 break;\r
7823\r
7824 case TK_CODE_POINT:\r
7825 {\r
7826 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
7827 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r
7828 if (num < 0) return num;\r
7829#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r
7830 *np = node_new_str_raw(buf, buf + num);\r
7831#else\r
7832 *np = node_new_str(buf, buf + num);\r
7833#endif\r
7834 CHECK_NULL_RETURN_MEMERR(*np);\r
7835 }\r
7836 break;\r
7837\r
7838 case TK_QUOTE_OPEN:\r
7839 {\r
7840 OnigCodePoint end_op[2];\r
7841 UChar *qstart, *qend, *nextp;\r
7842\r
7843 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r
7844 end_op[1] = (OnigCodePoint )'E';\r
7845 qstart = *src;\r
7846 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r
7847 if (IS_NULL(qend)) {\r
b602265d 7848 nextp = qend = end;\r
14b0e578
CS
7849 }\r
7850 *np = node_new_str(qstart, qend);\r
7851 CHECK_NULL_RETURN_MEMERR(*np);\r
7852 *src = nextp;\r
7853 }\r
7854 break;\r
7855\r
7856 case TK_CHAR_TYPE:\r
7857 {\r
7858 switch (tok->u.prop.ctype) {\r
7859 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
7860 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);\r
7861 CHECK_NULL_RETURN_MEMERR(*np);\r
7862 break;\r
14b0e578
CS
7863\r
7864 case ONIGENC_CTYPE_SPACE:\r
7865 case ONIGENC_CTYPE_DIGIT:\r
7866 case ONIGENC_CTYPE_XDIGIT:\r
b602265d
DG
7867 {\r
7868 CClassNode* cc;\r
7869\r
7870 *np = node_new_cclass();\r
7871 CHECK_NULL_RETURN_MEMERR(*np);\r
7872 cc = CCLASS_(*np);\r
7873 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r
7874 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
7875 }\r
7876 break;\r
14b0e578
CS
7877\r
7878 default:\r
b602265d
DG
7879 return ONIGERR_PARSER_BUG;\r
7880 break;\r
14b0e578
CS
7881 }\r
7882 }\r
7883 break;\r
7884\r
7885 case TK_CHAR_PROPERTY:\r
7886 r = parse_char_property(np, tok, src, end, env);\r
7887 if (r != 0) return r;\r
7888 break;\r
7889\r
7890 case TK_CC_OPEN:\r
7891 {\r
7892 CClassNode* cc;\r
7893\r
7894 r = parse_char_class(np, tok, src, end, env);\r
7895 if (r != 0) return r;\r
7896\r
b602265d
DG
7897 cc = CCLASS_(*np);\r
7898 if (IS_IGNORECASE(env->options)) {\r
7899 IApplyCaseFoldArg iarg;\r
7900\r
7901 iarg.env = env;\r
7902 iarg.cc = cc;\r
7903 iarg.alt_root = NULL_NODE;\r
7904 iarg.ptail = &(iarg.alt_root);\r
7905\r
7906 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r
7907 i_apply_case_fold, &iarg);\r
7908 if (r != 0) {\r
7909 onig_node_free(iarg.alt_root);\r
7910 return r;\r
7911 }\r
7912 if (IS_NOT_NULL(iarg.alt_root)) {\r
14b0e578
CS
7913 Node* work = onig_node_new_alt(*np, iarg.alt_root);\r
7914 if (IS_NULL(work)) {\r
7915 onig_node_free(iarg.alt_root);\r
7916 return ONIGERR_MEMORY;\r
7917 }\r
7918 *np = work;\r
b602265d 7919 }\r
14b0e578
CS
7920 }\r
7921 }\r
7922 break;\r
7923\r
7924 case TK_ANYCHAR:\r
7925 *np = node_new_anychar();\r
7926 CHECK_NULL_RETURN_MEMERR(*np);\r
7927 break;\r
7928\r
7929 case TK_ANYCHAR_ANYTIME:\r
7930 *np = node_new_anychar();\r
7931 CHECK_NULL_RETURN_MEMERR(*np);\r
7932 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
7933 CHECK_NULL_RETURN_MEMERR(qn);\r
b602265d 7934 NODE_BODY(qn) = *np;\r
14b0e578
CS
7935 *np = qn;\r
7936 break;\r
7937\r
7938 case TK_BACKREF:\r
7939 len = tok->u.backref.num;\r
7940 *np = node_new_backref(len,\r
b602265d
DG
7941 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r
7942 tok->u.backref.by_name,\r
14b0e578 7943#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
7944 tok->u.backref.exist_level,\r
7945 tok->u.backref.level,\r
14b0e578 7946#endif\r
b602265d 7947 env);\r
14b0e578
CS
7948 CHECK_NULL_RETURN_MEMERR(*np);\r
7949 break;\r
7950\r
b602265d 7951#ifdef USE_CALL\r
14b0e578
CS
7952 case TK_CALL:\r
7953 {\r
7954 int gnum = tok->u.call.gnum;\r
7955\r
b602265d
DG
7956 *np = node_new_call(tok->u.call.name, tok->u.call.name_end,\r
7957 gnum, tok->u.call.by_number);\r
14b0e578
CS
7958 CHECK_NULL_RETURN_MEMERR(*np);\r
7959 env->num_call++;\r
b602265d
DG
7960 if (tok->u.call.by_number != 0 && gnum == 0) {\r
7961 env->has_call_zero = 1;\r
7962 }\r
14b0e578
CS
7963 }\r
7964 break;\r
7965#endif\r
7966\r
7967 case TK_ANCHOR:\r
b602265d
DG
7968 {\r
7969 int ascii_mode =\r
7970 IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;\r
7971 *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);\r
7972 }\r
14b0e578
CS
7973 break;\r
7974\r
7975 case TK_OP_REPEAT:\r
7976 case TK_INTERVAL:\r
7977 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r
7978 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r
b602265d 7979 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r
14b0e578 7980 else\r
b602265d 7981 *np = node_new_empty();\r
14b0e578
CS
7982 }\r
7983 else {\r
7984 goto tk_byte;\r
7985 }\r
7986 break;\r
7987\r
b602265d
DG
7988 case TK_KEEP:\r
7989 r = node_new_keep(np, env);\r
7990 if (r < 0) return r;\r
7991 break;\r
7992\r
7993 case TK_GENERAL_NEWLINE:\r
7994 r = node_new_general_newline(np, env);\r
7995 if (r < 0) return r;\r
7996 break;\r
7997\r
7998 case TK_NO_NEWLINE:\r
7999 r = node_new_no_newline(np, env);\r
8000 if (r < 0) return r;\r
8001 break;\r
8002\r
8003 case TK_TRUE_ANYCHAR:\r
8004 r = node_new_true_anychar(np, env);\r
8005 if (r < 0) return r;\r
8006 break;\r
8007\r
8008 case TK_EXTENDED_GRAPHEME_CLUSTER:\r
8009 r = make_extended_grapheme_cluster(np, env);\r
8010 if (r < 0) return r;\r
8011 break;\r
8012\r
14b0e578
CS
8013 default:\r
8014 return ONIGERR_PARSER_BUG;\r
8015 break;\r
8016 }\r
8017\r
8018 {\r
8019 targetp = np;\r
8020\r
8021 re_entry:\r
8022 r = fetch_token(tok, src, end, env);\r
8023 if (r < 0) return r;\r
8024\r
8025 repeat:\r
8026 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r
8027 if (is_invalid_quantifier_target(*targetp))\r
b602265d 8028 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r
14b0e578
CS
8029\r
8030 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r
b602265d 8031 (r == TK_INTERVAL ? 1 : 0));\r
14b0e578 8032 CHECK_NULL_RETURN_MEMERR(qn);\r
b602265d 8033 QUANT_(qn)->greedy = tok->u.repeat.greedy;\r
14b0e578
CS
8034 r = set_quantifier(qn, *targetp, group, env);\r
8035 if (r < 0) {\r
b602265d
DG
8036 onig_node_free(qn);\r
8037 return r;\r
14b0e578
CS
8038 }\r
8039\r
8040 if (tok->u.repeat.possessive != 0) {\r
b602265d
DG
8041 Node* en;\r
8042 en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
8043 if (IS_NULL(en)) {\r
8044 onig_node_free(qn);\r
8045 return ONIGERR_MEMORY;\r
8046 }\r
8047 NODE_BODY(en) = qn;\r
8048 qn = en;\r
14b0e578
CS
8049 }\r
8050\r
8051 if (r == 0) {\r
b602265d 8052 *targetp = qn;\r
14b0e578
CS
8053 }\r
8054 else if (r == 1) {\r
b602265d 8055 onig_node_free(qn);\r
14b0e578
CS
8056 }\r
8057 else if (r == 2) { /* split case: /abc+/ */\r
b602265d
DG
8058 Node *tmp;\r
8059\r
8060 *targetp = node_new_list(*targetp, NULL);\r
8061 if (IS_NULL(*targetp)) {\r
8062 onig_node_free(qn);\r
8063 return ONIGERR_MEMORY;\r
8064 }\r
8065 tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);\r
8066 if (IS_NULL(tmp)) {\r
8067 onig_node_free(qn);\r
8068 return ONIGERR_MEMORY;\r
8069 }\r
8070 targetp = &(NODE_CAR(tmp));\r
14b0e578
CS
8071 }\r
8072 goto re_entry;\r
8073 }\r
8074 }\r
8075\r
8076 return r;\r
8077}\r
8078\r
8079static int\r
b602265d
DG
8080parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
8081 ScanEnv* env)\r
14b0e578
CS
8082{\r
8083 int r;\r
8084 Node *node, **headp;\r
8085\r
8086 *top = NULL;\r
8087 r = parse_exp(&node, tok, term, src, end, env);\r
b602265d
DG
8088 if (r < 0) {\r
8089 onig_node_free(node);\r
8090 return r;\r
8091 }\r
14b0e578
CS
8092\r
8093 if (r == TK_EOT || r == term || r == TK_ALT) {\r
8094 *top = node;\r
8095 }\r
8096 else {\r
8097 *top = node_new_list(node, NULL);\r
b602265d 8098 headp = &(NODE_CDR(*top));\r
14b0e578
CS
8099 while (r != TK_EOT && r != term && r != TK_ALT) {\r
8100 r = parse_exp(&node, tok, term, src, end, env);\r
b602265d
DG
8101 if (r < 0) {\r
8102 onig_node_free(node);\r
8103 return r;\r
8104 }\r
14b0e578 8105\r
b602265d
DG
8106 if (NODE_TYPE(node) == NODE_LIST) {\r
8107 *headp = node;\r
8108 while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);\r
8109 headp = &(NODE_CDR(node));\r
14b0e578
CS
8110 }\r
8111 else {\r
b602265d
DG
8112 *headp = node_new_list(node, NULL);\r
8113 headp = &(NODE_CDR(*headp));\r
14b0e578
CS
8114 }\r
8115 }\r
8116 }\r
8117\r
8118 return r;\r
8119}\r
8120\r
8121/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r
8122static int\r
b602265d
DG
8123parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
8124 ScanEnv* env)\r
14b0e578
CS
8125{\r
8126 int r;\r
8127 Node *node, **headp;\r
8128\r
8129 *top = NULL;\r
b602265d
DG
8130 env->parse_depth++;\r
8131 if (env->parse_depth > ParseDepthLimit)\r
8132 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
14b0e578
CS
8133 r = parse_branch(&node, tok, term, src, end, env);\r
8134 if (r < 0) {\r
8135 onig_node_free(node);\r
8136 return r;\r
8137 }\r
8138\r
8139 if (r == term) {\r
8140 *top = node;\r
8141 }\r
8142 else if (r == TK_ALT) {\r
8143 *top = onig_node_new_alt(node, NULL);\r
b602265d 8144 headp = &(NODE_CDR(*top));\r
14b0e578
CS
8145 while (r == TK_ALT) {\r
8146 r = fetch_token(tok, src, end, env);\r
8147 if (r < 0) return r;\r
8148 r = parse_branch(&node, tok, term, src, end, env);\r
b602265d
DG
8149 if (r < 0) {\r
8150 onig_node_free(node);\r
8151 return r;\r
8152 }\r
14b0e578 8153 *headp = onig_node_new_alt(node, NULL);\r
b602265d 8154 headp = &(NODE_CDR(*headp));\r
14b0e578
CS
8155 }\r
8156\r
8157 if (tok->type != (enum TokenSyms )term)\r
8158 goto err;\r
8159 }\r
8160 else {\r
b602265d 8161 onig_node_free(node);\r
14b0e578
CS
8162 err:\r
8163 if (term == TK_SUBEXP_CLOSE)\r
8164 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
8165 else\r
8166 return ONIGERR_PARSER_BUG;\r
8167 }\r
8168\r
b602265d 8169 env->parse_depth--;\r
14b0e578
CS
8170 return r;\r
8171}\r
8172\r
8173static int\r
8174parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r
8175{\r
8176 int r;\r
8177 OnigToken tok;\r
8178\r
8179 r = fetch_token(&tok, src, end, env);\r
8180 if (r < 0) return r;\r
8181 r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r
8182 if (r < 0) return r;\r
b602265d
DG
8183\r
8184 return 0;\r
8185}\r
8186\r
8187#ifdef USE_CALL\r
8188static int\r
8189make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)\r
8190{\r
8191 int r;\r
8192\r
8193 Node* x = node_new_memory(0 /* 0: is not named */);\r
8194 CHECK_NULL_RETURN_MEMERR(x);\r
8195\r
8196 NODE_BODY(x) = node;\r
8197 ENCLOSURE_(x)->m.regnum = 0;\r
8198 r = scan_env_set_mem_node(env, 0, x);\r
8199 if (r != 0) {\r
8200 onig_node_free(x);\r
8201 return r;\r
8202 }\r
8203\r
8204 *rnode = x;\r
14b0e578
CS
8205 return 0;\r
8206}\r
b602265d 8207#endif\r
14b0e578
CS
8208\r
8209extern int\r
b602265d
DG
8210onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,\r
8211 regex_t* reg, ScanEnv* env)\r
14b0e578
CS
8212{\r
8213 int r;\r
8214 UChar* p;\r
b602265d
DG
8215#ifdef USE_CALLOUT\r
8216 RegexExt* ext;\r
8217#endif\r
14b0e578 8218\r
14b0e578 8219 names_clear(reg);\r
14b0e578
CS
8220\r
8221 scan_env_clear(env);\r
b602265d 8222 env->options = reg->options;\r
14b0e578
CS
8223 env->case_fold_flag = reg->case_fold_flag;\r
8224 env->enc = reg->enc;\r
8225 env->syntax = reg->syntax;\r
8226 env->pattern = (UChar* )pattern;\r
8227 env->pattern_end = (UChar* )end;\r
8228 env->reg = reg;\r
8229\r
8230 *root = NULL;\r
b602265d
DG
8231\r
8232 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))\r
8233 return ONIGERR_INVALID_WIDE_CHAR_VALUE;\r
8234\r
14b0e578
CS
8235 p = (UChar* )pattern;\r
8236 r = parse_regexp(root, &p, (UChar* )end, env);\r
b602265d
DG
8237\r
8238#ifdef USE_CALL\r
8239 if (r != 0) return r;\r
8240\r
8241 if (env->has_call_zero != 0) {\r
8242 Node* zero_node;\r
8243 r = make_call_zero_body(*root, env, &zero_node);\r
8244 if (r != 0) return r;\r
8245\r
8246 *root = zero_node;\r
8247 }\r
8248#endif\r
8249\r
14b0e578 8250 reg->num_mem = env->num_mem;\r
b602265d
DG
8251\r
8252#ifdef USE_CALLOUT\r
8253 ext = REG_EXTP(reg);\r
8254 if (IS_NOT_NULL(ext) && ext->callout_num > 0) {\r
8255 r = setup_ext_callout_list_values(reg);\r
8256 }\r
8257#endif\r
8258\r
14b0e578
CS
8259 return r;\r
8260}\r
8261\r
8262extern void\r
8263onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r
b602265d 8264 UChar* arg, UChar* arg_end)\r
14b0e578
CS
8265{\r
8266 env->error = arg;\r
8267 env->error_end = arg_end;\r
8268}\r