]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
MdeModulePkg/RegularExpressionDxe: Miss null pointer check
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regparse.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
b602265d 5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
14b0e578
CS
6 * All rights reserved.\r
7 *\r
14b0e578
CS
8 * Redistribution and use in source and binary forms, with or without\r
9 * modification, are permitted provided that the following conditions\r
10 * are met:\r
11 * 1. Redistributions of source code must retain the above copyright\r
12 * notice, this list of conditions and the following disclaimer.\r
13 * 2. Redistributions in binary form must reproduce the above copyright\r
14 * notice, this list of conditions and the following disclaimer in the\r
15 * documentation and/or other materials provided with the distribution.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
27 * SUCH DAMAGE.\r
28 */\r
29\r
30#include "regparse.h"\r
31#include "st.h"\r
32\r
b602265d
DG
33#ifdef DEBUG_NODE_FREE\r
34#include <stdio.h>\r
35#endif\r
36\r
37#define INIT_TAG_NAMES_ALLOC_NUM 5\r
38\r
14b0e578
CS
39#define WARN_BUFSIZE 256\r
40\r
41#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
42\r
b602265d
DG
43#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \\r
44 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)\r
45#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \\r
46 ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')\r
47\r
48\r
49OnigSyntaxType OnigSyntaxOniguruma = {\r
50 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
51 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
52 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
53 ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
54 ONIG_SYN_OP_ESC_C_CONTROL )\r
55 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
56 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
57 ONIG_SYN_OP2_OPTION_RUBY |\r
58 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
59 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
60 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
61 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |\r
62 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |\r
63 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
64 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
65 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |\r
66 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
67 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
68 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
69 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
70 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
71 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
72 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
73 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
74 , ( SYN_GNU_REGEX_BV | \r
75 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
76 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
77 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
78 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
79 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
80 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
81 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
82 , ONIG_OPTION_NONE\r
83 ,\r
84 {\r
85 (OnigCodePoint )'\\' /* esc */\r
86 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
87 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
88 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
89 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
90 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
91 }\r
92};\r
14b0e578
CS
93\r
94OnigSyntaxType OnigSyntaxRuby = {\r
95 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
96 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
b602265d
DG
97 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |\r
98 ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
14b0e578
CS
99 ONIG_SYN_OP_ESC_C_CONTROL )\r
100 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
101 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
102 ONIG_SYN_OP2_OPTION_RUBY |\r
103 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
b602265d
DG
104 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |\r
105 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |\r
106 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |\r
107 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |\r
108 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |\r
14b0e578
CS
109 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
110 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
111 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
112 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
113 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
114 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
b602265d 115 ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )\r
14b0e578
CS
116 , ( SYN_GNU_REGEX_BV | \r
117 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
118 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
119 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
120 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
121 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
122 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
123 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
124 , ONIG_OPTION_NONE\r
125 ,\r
126 {\r
127 (OnigCodePoint )'\\' /* esc */\r
128 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
129 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
130 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
131 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
132 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
133 }\r
134};\r
135\r
b602265d 136OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;\r
14b0e578
CS
137\r
138extern void onig_null_warn(const char* s ARG_UNUSED) { }\r
139\r
140#ifdef DEFAULT_WARN_FUNCTION\r
141static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r
142#else\r
143static OnigWarnFunc onig_warn = onig_null_warn;\r
144#endif\r
145\r
146#ifdef DEFAULT_VERB_WARN_FUNCTION\r
147static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r
148#else\r
149static OnigWarnFunc onig_verb_warn = onig_null_warn;\r
150#endif\r
151\r
152extern void onig_set_warn_func(OnigWarnFunc f)\r
153{\r
154 onig_warn = f;\r
155}\r
156\r
157extern void onig_set_verb_warn_func(OnigWarnFunc f)\r
158{\r
159 onig_verb_warn = f;\r
160}\r
161\r
b602265d
DG
162extern void\r
163onig_warning(const char* s)\r
164{\r
165 if (onig_warn == onig_null_warn) return ;\r
166\r
167 (*onig_warn)(s);\r
168}\r
169\r
170#define DEFAULT_MAX_CAPTURE_NUM 32767\r
171\r
172static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;\r
173\r
174extern int\r
175onig_set_capture_num_limit(int num)\r
176{\r
177 if (num < 0) return -1;\r
178\r
179 MaxCaptureNum = num;\r
180 return 0;\r
181}\r
182\r
183static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
184\r
185extern unsigned int\r
186onig_get_parse_depth_limit(void)\r
187{\r
188 return ParseDepthLimit;\r
189}\r
190\r
191extern int\r
192onig_set_parse_depth_limit(unsigned int depth)\r
193{\r
194 if (depth == 0)\r
195 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;\r
196 else\r
197 ParseDepthLimit = depth;\r
198 return 0;\r
199}\r
200\r
201static int\r
202positive_int_multiply(int x, int y)\r
203{\r
204 if (x == 0 || y == 0) return 0;\r
205\r
206 if (x < INT_MAX / y)\r
207 return x * y;\r
208 else\r
209 return -1;\r
210}\r
211\r
14b0e578
CS
212static void\r
213bbuf_free(BBuf* bbuf)\r
214{\r
215 if (IS_NOT_NULL(bbuf)) {\r
216 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r
217 xfree(bbuf);\r
218 }\r
219}\r
220\r
221static int\r
222bbuf_clone(BBuf** rto, BBuf* from)\r
223{\r
224 int r;\r
225 BBuf *to;\r
226\r
227 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r
228 CHECK_NULL_RETURN_MEMERR(to);\r
b602265d
DG
229 r = BB_INIT(to, from->alloc);\r
230 if (r != 0) {\r
231 xfree(to->p);\r
232 *rto = 0;\r
233 return r;\r
234 }\r
14b0e578
CS
235 to->used = from->used;\r
236 xmemcpy(to->p, from->p, from->used);\r
237 return 0;\r
238}\r
239\r
b602265d
DG
240static int backref_rel_to_abs(int rel_no, ScanEnv* env)\r
241{\r
242 if (rel_no > 0) {\r
243 return env->num_mem + rel_no;\r
244 }\r
245 else {\r
246 return env->num_mem + 1 + rel_no;\r
247 }\r
248}\r
249\r
250#define OPTION_ON(v,f) ((v) |= (f))\r
251#define OPTION_OFF(v,f) ((v) &= ~(f))\r
14b0e578 252\r
b602265d 253#define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r
14b0e578
CS
254\r
255#define MBCODE_START_POS(enc) \\r
256 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r
257\r
258#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r
259 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r
260\r
261#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r
262 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r
263 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r
b602265d 264 if (r != 0) return r;\\r
14b0e578
CS
265 }\\r
266} while (0)\r
267\r
268\r
269#define BITSET_IS_EMPTY(bs,empty) do {\\r
270 int i;\\r
271 empty = 1;\\r
272 for (i = 0; i < (int )BITSET_SIZE; i++) {\\r
273 if ((bs)[i] != 0) {\\r
274 empty = 0; break;\\r
275 }\\r
276 }\\r
277} while (0)\r
278\r
279static void\r
280bitset_set_range(BitSetRef bs, int from, int to)\r
281{\r
282 int i;\r
283 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r
284 BITSET_SET_BIT(bs, i);\r
285 }\r
286}\r
287\r
288#if 0\r
289static void\r
290bitset_set_all(BitSetRef bs)\r
291{\r
292 int i;\r
293 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r
294}\r
295#endif\r
296\r
297static void\r
298bitset_invert(BitSetRef bs)\r
299{\r
300 int i;\r
301 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r
302}\r
303\r
304static void\r
305bitset_invert_to(BitSetRef from, BitSetRef to)\r
306{\r
307 int i;\r
308 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r
309}\r
310\r
311static void\r
312bitset_and(BitSetRef dest, BitSetRef bs)\r
313{\r
314 int i;\r
315 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r
316}\r
317\r
318static void\r
319bitset_or(BitSetRef dest, BitSetRef bs)\r
320{\r
321 int i;\r
322 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r
323}\r
324\r
325static void\r
326bitset_copy(BitSetRef dest, BitSetRef bs)\r
327{\r
328 int i;\r
329 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r
330}\r
331\r
332extern int\r
333onig_strncmp(const UChar* s1, const UChar* s2, int n)\r
334{\r
335 int x;\r
336\r
337 while (n-- > 0) {\r
338 x = *s2++ - *s1++;\r
339 if (x) return x;\r
340 }\r
341 return 0;\r
342}\r
343\r
344extern void\r
345onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r
346{\r
b602265d 347 int len = (int )(end - src);\r
14b0e578
CS
348 if (len > 0) {\r
349 xmemcpy(dest, src, len);\r
350 dest[len] = (UChar )0;\r
351 }\r
352}\r
353\r
b602265d
DG
354static int\r
355save_entry(ScanEnv* env, enum SaveType type, int* id)\r
14b0e578 356{\r
b602265d 357 int nid = env->save_num;\r
14b0e578 358\r
b602265d
DG
359#if 0\r
360 if (IS_NULL(env->saves)) {\r
361 int n = 10;\r
362 env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);\r
363 CHECK_NULL_RETURN_MEMERR(env->saves);\r
364 env->save_alloc_num = n;\r
365 }\r
366 else if (env->save_alloc_num <= nid) {\r
367 int n = env->save_alloc_num * 2;\r
368 SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);\r
369 CHECK_NULL_RETURN_MEMERR(p);\r
370 env->saves = p;\r
371 env->save_alloc_num = n;\r
372 }\r
14b0e578 373\r
b602265d
DG
374 env->saves[nid].type = type;\r
375#endif\r
14b0e578 376\r
b602265d
DG
377 env->save_num++;\r
378 *id = nid;\r
379 return 0;\r
14b0e578 380}\r
14b0e578
CS
381\r
382/* scan pattern methods */\r
383#define PEND_VALUE 0\r
384\r
385#define PFETCH_READY UChar* pfetch_prev\r
386#define PEND (p < end ? 0 : 1)\r
387#define PUNFETCH p = pfetch_prev\r
388#define PINC do { \\r
389 pfetch_prev = p; \\r
390 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
391} while (0)\r
392#define PFETCH(c) do { \\r
393 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
394 pfetch_prev = p; \\r
395 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
396} while (0)\r
397\r
398#define PINC_S do { \\r
399 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
400} while (0)\r
401#define PFETCH_S(c) do { \\r
402 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
403 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
404} while (0)\r
405\r
406#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r
407#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r
408\r
409static UChar*\r
410strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r
b602265d 411 int capa, int oldCapa)\r
14b0e578
CS
412{\r
413 UChar* r;\r
414\r
415 if (dest)\r
416 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r
417 else\r
418 r = (UChar* )xmalloc(capa + 1);\r
419\r
420 CHECK_NULL_RETURN(r);\r
421 onig_strcpy(r + (dest_end - dest), src, src_end);\r
422 return r;\r
423}\r
424\r
425/* dest on static area */\r
426static UChar*\r
427strcat_capa_from_static(UChar* dest, UChar* dest_end,\r
b602265d 428 const UChar* src, const UChar* src_end, int capa)\r
14b0e578
CS
429{\r
430 UChar* r;\r
431\r
432 r = (UChar* )xmalloc(capa + 1);\r
433 CHECK_NULL_RETURN(r);\r
434 onig_strcpy(r, dest, dest_end);\r
435 onig_strcpy(r + (dest_end - dest), src, src_end);\r
436 return r;\r
437}\r
438\r
439\r
440#ifdef USE_ST_LIBRARY\r
441\r
442typedef struct {\r
443 UChar* s;\r
444 UChar* end;\r
445} st_str_end_key;\r
446\r
447static int\r
448str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r
449{\r
450 UChar *p, *q;\r
451 int c;\r
452\r
453 if ((x->end - x->s) != (y->end - y->s))\r
454 return 1;\r
455\r
456 p = x->s;\r
457 q = y->s;\r
458 while (p < x->end) {\r
459 c = (int )*p - (int )*q;\r
460 if (c != 0) return c;\r
461\r
462 p++; q++;\r
463 }\r
464\r
465 return 0;\r
466}\r
467\r
468static int\r
469str_end_hash(st_str_end_key* x)\r
470{\r
471 UChar *p;\r
472 int val = 0;\r
473\r
474 p = x->s;\r
475 while (p < x->end) {\r
476 val = val * 997 + (int )*p++;\r
477 }\r
478\r
479 return val + (val >> 5);\r
480}\r
481\r
482extern hash_table_type*\r
483onig_st_init_strend_table_with_size(int size)\r
484{\r
485 static struct st_hash_type hashType = {\r
486 str_end_cmp,\r
487 str_end_hash,\r
488 };\r
489\r
490 return (hash_table_type* )\r
491 onig_st_init_table_with_size(&hashType, size);\r
492}\r
493\r
494extern int\r
495onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r
b602265d 496 const UChar* end_key, hash_data_type *value)\r
14b0e578
CS
497{\r
498 st_str_end_key key;\r
499\r
500 key.s = (UChar* )str_key;\r
501 key.end = (UChar* )end_key;\r
502\r
b602265d 503 return onig_st_lookup(table, (st_data_t )(&key), value);\r
14b0e578
CS
504}\r
505\r
506extern int\r
507onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r
b602265d 508 const UChar* end_key, hash_data_type value)\r
14b0e578
CS
509{\r
510 st_str_end_key* key;\r
511 int result;\r
512\r
513 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r
b0c2b797 514 CHECK_NULL_RETURN_MEMERR(key);\r
b602265d 515\r
14b0e578
CS
516 key->s = (UChar* )str_key;\r
517 key->end = (UChar* )end_key;\r
b602265d 518 result = onig_st_insert(table, (st_data_t )key, value);\r
14b0e578
CS
519 if (result) {\r
520 xfree(key);\r
521 }\r
522 return result;\r
523}\r
524\r
14b0e578 525\r
b602265d
DG
526typedef struct {\r
527 OnigEncoding enc;\r
528 int type; /* callout type: single or not */\r
529 UChar* s;\r
530 UChar* end;\r
531} st_callout_name_key;\r
532\r
533static int\r
534callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)\r
535{\r
536 UChar *p, *q;\r
537 int c;\r
538\r
539 if (x->enc != y->enc) return 1;\r
540 if (x->type != y->type) return 1;\r
541 if ((x->end - x->s) != (y->end - y->s))\r
542 return 1;\r
543\r
544 p = x->s;\r
545 q = y->s;\r
546 while (p < x->end) {\r
547 c = (int )*p - (int )*q;\r
548 if (c != 0) return c;\r
549\r
550 p++; q++;\r
551 }\r
552\r
553 return 0;\r
554}\r
555\r
556static int\r
557callout_name_table_hash(st_callout_name_key* x)\r
558{\r
559 UChar *p;\r
560 int val = 0;\r
561\r
562 p = x->s;\r
563 while (p < x->end) {\r
564 val = val * 997 + (int )*p++;\r
565 }\r
566\r
567 /* use intptr_t for escape warning in Windows */\r
568 return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;\r
569}\r
570\r
571extern hash_table_type*\r
572onig_st_init_callout_name_table_with_size(int size)\r
573{\r
574 static struct st_hash_type hashType = {\r
575 callout_name_table_cmp,\r
576 callout_name_table_hash,\r
577 };\r
578\r
579 return (hash_table_type* )\r
580 onig_st_init_table_with_size(&hashType, size);\r
581}\r
582\r
583extern int\r
584onig_st_lookup_callout_name_table(hash_table_type* table,\r
585 OnigEncoding enc,\r
586 int type,\r
587 const UChar* str_key,\r
588 const UChar* end_key,\r
589 hash_data_type *value)\r
590{\r
591 st_callout_name_key key;\r
592\r
593 key.enc = enc;\r
594 key.type = type;\r
595 key.s = (UChar* )str_key;\r
596 key.end = (UChar* )end_key;\r
597\r
598 return onig_st_lookup(table, (st_data_t )(&key), value);\r
599}\r
600\r
601static int\r
602st_insert_callout_name_table(hash_table_type* table,\r
603 OnigEncoding enc, int type,\r
604 UChar* str_key, UChar* end_key,\r
605 hash_data_type value)\r
606{\r
607 st_callout_name_key* key;\r
608 int result;\r
609\r
610 key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));\r
611 CHECK_NULL_RETURN_MEMERR(key);\r
612\r
613 /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */\r
614 key->enc = enc;\r
615 key->type = type;\r
616 key->s = str_key;\r
617 key->end = end_key;\r
618 result = onig_st_insert(table, (st_data_t )key, value);\r
619 if (result) {\r
620 xfree(key);\r
621 }\r
622 return result;\r
623}\r
624\r
625#endif /* USE_ST_LIBRARY */\r
14b0e578 626\r
14b0e578
CS
627\r
628#define INIT_NAME_BACKREFS_ALLOC_NUM 8\r
629\r
630typedef struct {\r
631 UChar* name;\r
632 int name_len; /* byte length */\r
633 int back_num; /* number of backrefs */\r
634 int back_alloc;\r
635 int back_ref1;\r
636 int* back_refs;\r
637} NameEntry;\r
638\r
639#ifdef USE_ST_LIBRARY\r
640\r
b602265d
DG
641#define INIT_NAMES_ALLOC_NUM 5\r
642\r
14b0e578
CS
643typedef st_table NameTable;\r
644typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r
645\r
646#define NAMEBUF_SIZE 24\r
647#define NAMEBUF_SIZE_1 25\r
648\r
649#ifdef ONIG_DEBUG\r
650static int\r
651i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r
652{\r
653 int i;\r
654 FILE* fp = (FILE* )arg;\r
655\r
656 fprintf(fp, "%s: ", e->name);\r
657 if (e->back_num == 0)\r
658 fputs("-", fp);\r
659 else if (e->back_num == 1)\r
660 fprintf(fp, "%d", e->back_ref1);\r
661 else {\r
662 for (i = 0; i < e->back_num; i++) {\r
663 if (i > 0) fprintf(fp, ", ");\r
664 fprintf(fp, "%d", e->back_refs[i]);\r
665 }\r
666 }\r
667 fputs("\n", fp);\r
668 return ST_CONTINUE;\r
669}\r
670\r
671extern int\r
672onig_print_names(FILE* fp, regex_t* reg)\r
673{\r
674 NameTable* t = (NameTable* )reg->name_table;\r
675\r
676 if (IS_NOT_NULL(t)) {\r
677 fprintf(fp, "name table\n");\r
678 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r
679 fputs("\n", fp);\r
680 }\r
681 return 0;\r
682}\r
683#endif /* ONIG_DEBUG */\r
684\r
685static int\r
686i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r
687{\r
688 xfree(e->name);\r
689 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
690 xfree(key);\r
691 xfree(e);\r
692 return ST_DELETE;\r
693}\r
694\r
695static int\r
696names_clear(regex_t* reg)\r
697{\r
698 NameTable* t = (NameTable* )reg->name_table;\r
699\r
700 if (IS_NOT_NULL(t)) {\r
701 onig_st_foreach(t, i_free_name_entry, 0);\r
702 }\r
703 return 0;\r
704}\r
705\r
706extern int\r
707onig_names_free(regex_t* reg)\r
708{\r
709 int r;\r
710 NameTable* t;\r
711\r
712 r = names_clear(reg);\r
b602265d 713 if (r != 0) return r;\r
14b0e578
CS
714\r
715 t = (NameTable* )reg->name_table;\r
716 if (IS_NOT_NULL(t)) onig_st_free_table(t);\r
717 reg->name_table = (void* )NULL;\r
718 return 0;\r
719}\r
720\r
721static NameEntry*\r
722name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r
723{\r
724 NameEntry* e;\r
725 NameTable* t = (NameTable* )reg->name_table;\r
726\r
727 e = (NameEntry* )NULL;\r
728 if (IS_NOT_NULL(t)) {\r
729 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
730 }\r
731 return e;\r
732}\r
733\r
734typedef struct {\r
735 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r
736 regex_t* reg;\r
737 void* arg;\r
738 int ret;\r
739 OnigEncoding enc;\r
740} INamesArg;\r
741\r
742static int\r
743i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r
744{\r
745 int r = (*(arg->func))(e->name,\r
746 e->name + e->name_len,\r
747 e->back_num,\r
b602265d
DG
748 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
749 arg->reg, arg->arg);\r
14b0e578
CS
750 if (r != 0) {\r
751 arg->ret = r;\r
752 return ST_STOP;\r
753 }\r
754 return ST_CONTINUE;\r
755}\r
756\r
757extern int\r
758onig_foreach_name(regex_t* reg,\r
759 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
760{\r
761 INamesArg narg;\r
762 NameTable* t = (NameTable* )reg->name_table;\r
763\r
764 narg.ret = 0;\r
765 if (IS_NOT_NULL(t)) {\r
766 narg.func = func;\r
767 narg.reg = reg;\r
768 narg.arg = arg;\r
769 narg.enc = reg->enc; /* should be pattern encoding. */\r
b602265d 770 onig_st_foreach(t, i_names, (HashDataType )&narg);\r
14b0e578
CS
771 }\r
772 return narg.ret;\r
773}\r
774\r
775static int\r
776i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r
777{\r
778 int i;\r
779\r
780 if (e->back_num > 1) {\r
781 for (i = 0; i < e->back_num; i++) {\r
782 e->back_refs[i] = map[e->back_refs[i]].new_val;\r
783 }\r
784 }\r
785 else if (e->back_num == 1) {\r
786 e->back_ref1 = map[e->back_ref1].new_val;\r
787 }\r
788\r
789 return ST_CONTINUE;\r
790}\r
791\r
792extern int\r
793onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r
794{\r
795 NameTable* t = (NameTable* )reg->name_table;\r
796\r
797 if (IS_NOT_NULL(t)) {\r
b602265d 798 onig_st_foreach(t, i_renumber_name, (HashDataType )map);\r
14b0e578
CS
799 }\r
800 return 0;\r
801}\r
802\r
803\r
804extern int\r
805onig_number_of_names(regex_t* reg)\r
806{\r
807 NameTable* t = (NameTable* )reg->name_table;\r
808\r
809 if (IS_NOT_NULL(t))\r
810 return t->num_entries;\r
811 else\r
812 return 0;\r
813}\r
814\r
815#else /* USE_ST_LIBRARY */\r
816\r
817#define INIT_NAMES_ALLOC_NUM 8\r
818\r
819typedef struct {\r
820 NameEntry* e;\r
821 int num;\r
822 int alloc;\r
823} NameTable;\r
824\r
825#ifdef ONIG_DEBUG\r
826extern int\r
827onig_print_names(FILE* fp, regex_t* reg)\r
828{\r
829 int i, j;\r
830 NameEntry* e;\r
831 NameTable* t = (NameTable* )reg->name_table;\r
832\r
833 if (IS_NOT_NULL(t) && t->num > 0) {\r
834 fprintf(fp, "name table\n");\r
835 for (i = 0; i < t->num; i++) {\r
836 e = &(t->e[i]);\r
837 fprintf(fp, "%s: ", e->name);\r
838 if (e->back_num == 0) {\r
b602265d 839 fputs("-", fp);\r
14b0e578
CS
840 }\r
841 else if (e->back_num == 1) {\r
b602265d 842 fprintf(fp, "%d", e->back_ref1);\r
14b0e578
CS
843 }\r
844 else {\r
b602265d
DG
845 for (j = 0; j < e->back_num; j++) {\r
846 if (j > 0) fprintf(fp, ", ");\r
847 fprintf(fp, "%d", e->back_refs[j]);\r
848 }\r
14b0e578
CS
849 }\r
850 fputs("\n", fp);\r
851 }\r
852 fputs("\n", fp);\r
853 }\r
854 return 0;\r
855}\r
856#endif\r
857\r
858static int\r
859names_clear(regex_t* reg)\r
860{\r
861 int i;\r
862 NameEntry* e;\r
863 NameTable* t = (NameTable* )reg->name_table;\r
864\r
865 if (IS_NOT_NULL(t)) {\r
866 for (i = 0; i < t->num; i++) {\r
867 e = &(t->e[i]);\r
868 if (IS_NOT_NULL(e->name)) {\r
b602265d
DG
869 xfree(e->name);\r
870 e->name = NULL;\r
871 e->name_len = 0;\r
872 e->back_num = 0;\r
873 e->back_alloc = 0;\r
874 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
875 e->back_refs = (int* )NULL;\r
14b0e578
CS
876 }\r
877 }\r
878 if (IS_NOT_NULL(t->e)) {\r
879 xfree(t->e);\r
880 t->e = NULL;\r
881 }\r
882 t->num = 0;\r
883 }\r
884 return 0;\r
885}\r
886\r
887extern int\r
888onig_names_free(regex_t* reg)\r
889{\r
890 int r;\r
891 NameTable* t;\r
892\r
893 r = names_clear(reg);\r
b602265d 894 if (r != 0) return r;\r
14b0e578
CS
895\r
896 t = (NameTable* )reg->name_table;\r
897 if (IS_NOT_NULL(t)) xfree(t);\r
898 reg->name_table = NULL;\r
899 return 0;\r
900}\r
901\r
902static NameEntry*\r
903name_find(regex_t* reg, UChar* name, UChar* name_end)\r
904{\r
905 int i, len;\r
906 NameEntry* e;\r
907 NameTable* t = (NameTable* )reg->name_table;\r
908\r
909 if (IS_NOT_NULL(t)) {\r
910 len = name_end - name;\r
911 for (i = 0; i < t->num; i++) {\r
912 e = &(t->e[i]);\r
913 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
b602265d 914 return e;\r
14b0e578
CS
915 }\r
916 }\r
917 return (NameEntry* )NULL;\r
918}\r
919\r
920extern int\r
921onig_foreach_name(regex_t* reg,\r
922 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
923{\r
924 int i, r;\r
925 NameEntry* e;\r
926 NameTable* t = (NameTable* )reg->name_table;\r
927\r
928 if (IS_NOT_NULL(t)) {\r
929 for (i = 0; i < t->num; i++) {\r
930 e = &(t->e[i]);\r
931 r = (*func)(e->name, e->name + e->name_len, e->back_num,\r
b602265d
DG
932 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
933 reg, arg);\r
14b0e578
CS
934 if (r != 0) return r;\r
935 }\r
936 }\r
937 return 0;\r
938}\r
939\r
940extern int\r
941onig_number_of_names(regex_t* reg)\r
942{\r
943 NameTable* t = (NameTable* )reg->name_table;\r
944\r
945 if (IS_NOT_NULL(t))\r
946 return t->num;\r
947 else\r
948 return 0;\r
949}\r
950\r
951#endif /* else USE_ST_LIBRARY */\r
952\r
953static int\r
954name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r
955{\r
b602265d 956 int r;\r
14b0e578
CS
957 int alloc;\r
958 NameEntry* e;\r
959 NameTable* t = (NameTable* )reg->name_table;\r
960\r
961 if (name_end - name <= 0)\r
962 return ONIGERR_EMPTY_GROUP_NAME;\r
963\r
964 e = name_find(reg, name, name_end);\r
965 if (IS_NULL(e)) {\r
966#ifdef USE_ST_LIBRARY\r
967 if (IS_NULL(t)) {\r
b602265d 968 t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);\r
a5def177 969 CHECK_NULL_RETURN_MEMERR(t);\r
14b0e578
CS
970 reg->name_table = (void* )t;\r
971 }\r
972 e = (NameEntry* )xmalloc(sizeof(NameEntry));\r
973 CHECK_NULL_RETURN_MEMERR(e);\r
974\r
b602265d 975 e->name = onigenc_strdup(reg->enc, name, name_end);\r
14b0e578
CS
976 if (IS_NULL(e->name)) {\r
977 xfree(e); return ONIGERR_MEMORY;\r
978 }\r
b602265d
DG
979 r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r
980 (HashDataType )e);\r
981 if (r < 0) return r;\r
14b0e578 982\r
b602265d 983 e->name_len = (int )(name_end - name);\r
14b0e578
CS
984 e->back_num = 0;\r
985 e->back_alloc = 0;\r
986 e->back_refs = (int* )NULL;\r
987\r
988#else\r
989\r
990 if (IS_NULL(t)) {\r
991 alloc = INIT_NAMES_ALLOC_NUM;\r
992 t = (NameTable* )xmalloc(sizeof(NameTable));\r
993 CHECK_NULL_RETURN_MEMERR(t);\r
994 t->e = NULL;\r
995 t->alloc = 0;\r
996 t->num = 0;\r
997\r
998 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r
999 if (IS_NULL(t->e)) {\r
b602265d
DG
1000 xfree(t);\r
1001 return ONIGERR_MEMORY;\r
14b0e578
CS
1002 }\r
1003 t->alloc = alloc;\r
1004 reg->name_table = t;\r
1005 goto clear;\r
1006 }\r
1007 else if (t->num == t->alloc) {\r
1008 int i;\r
1009\r
1010 alloc = t->alloc * 2;\r
b602265d 1011 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);\r
14b0e578
CS
1012 CHECK_NULL_RETURN_MEMERR(t->e);\r
1013 t->alloc = alloc;\r
1014\r
1015 clear:\r
1016 for (i = t->num; i < t->alloc; i++) {\r
b602265d
DG
1017 t->e[i].name = NULL;\r
1018 t->e[i].name_len = 0;\r
1019 t->e[i].back_num = 0;\r
1020 t->e[i].back_alloc = 0;\r
1021 t->e[i].back_refs = (int* )NULL;\r
14b0e578
CS
1022 }\r
1023 }\r
1024 e = &(t->e[t->num]);\r
1025 t->num++;\r
b602265d 1026 e->name = onigenc_strdup(reg->enc, name, name_end);\r
14b0e578
CS
1027 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
1028 e->name_len = name_end - name;\r
1029#endif\r
1030 }\r
1031\r
1032 if (e->back_num >= 1 &&\r
1033 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r
1034 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r
b602265d 1035 name, name_end);\r
14b0e578
CS
1036 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
1037 }\r
1038\r
1039 e->back_num++;\r
1040 if (e->back_num == 1) {\r
1041 e->back_ref1 = backref;\r
1042 }\r
1043 else {\r
1044 if (e->back_num == 2) {\r
1045 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r
1046 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r
1047 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
1048 e->back_alloc = alloc;\r
1049 e->back_refs[0] = e->back_ref1;\r
1050 e->back_refs[1] = backref;\r
1051 }\r
1052 else {\r
1053 if (e->back_num > e->back_alloc) {\r
b602265d
DG
1054 alloc = e->back_alloc * 2;\r
1055 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r
1056 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
1057 e->back_alloc = alloc;\r
14b0e578
CS
1058 }\r
1059 e->back_refs[e->back_num - 1] = backref;\r
1060 }\r
1061 }\r
1062\r
1063 return 0;\r
1064}\r
1065\r
1066extern int\r
1067onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
b602265d 1068 const UChar* name_end, int** nums)\r
14b0e578
CS
1069{\r
1070 NameEntry* e = name_find(reg, name, name_end);\r
1071\r
1072 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
1073\r
1074 switch (e->back_num) {\r
1075 case 0:\r
1076 break;\r
1077 case 1:\r
1078 *nums = &(e->back_ref1);\r
1079 break;\r
1080 default:\r
1081 *nums = e->back_refs;\r
1082 break;\r
1083 }\r
1084 return e->back_num;\r
1085}\r
1086\r
1087extern int\r
1088onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
b602265d 1089 const UChar* name_end, OnigRegion *region)\r
14b0e578
CS
1090{\r
1091 int i, n, *nums;\r
1092\r
1093 n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r
1094 if (n < 0)\r
1095 return n;\r
1096 else if (n == 0)\r
1097 return ONIGERR_PARSER_BUG;\r
1098 else if (n == 1)\r
1099 return nums[0];\r
1100 else {\r
1101 if (IS_NOT_NULL(region)) {\r
1102 for (i = n - 1; i >= 0; i--) {\r
b602265d
DG
1103 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r
1104 return nums[i];\r
14b0e578
CS
1105 }\r
1106 }\r
1107 return nums[n - 1];\r
1108 }\r
1109}\r
1110\r
14b0e578
CS
1111extern int\r
1112onig_noname_group_capture_is_active(regex_t* reg)\r
1113{\r
1114 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
1115 return 0;\r
1116\r
14b0e578
CS
1117 if (onig_number_of_names(reg) > 0 &&\r
1118 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
1119 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
1120 return 0;\r
1121 }\r
14b0e578
CS
1122\r
1123 return 1;\r
1124}\r
1125\r
b602265d 1126#ifdef USE_CALLOUT\r
14b0e578 1127\r
b602265d
DG
1128typedef struct {\r
1129 OnigCalloutType type;\r
1130 int in;\r
1131 OnigCalloutFunc start_func;\r
1132 OnigCalloutFunc end_func;\r
1133 int arg_num;\r
1134 int opt_arg_num;\r
1135 unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
1136 OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];\r
1137 UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */\r
1138} CalloutNameListEntry;\r
14b0e578 1139\r
b602265d
DG
1140typedef struct {\r
1141 int n;\r
1142 int alloc;\r
1143 CalloutNameListEntry* v;\r
1144} CalloutNameListType;\r
14b0e578 1145\r
b602265d 1146static CalloutNameListType* GlobalCalloutNameList;\r
14b0e578
CS
1147\r
1148static int\r
b602265d 1149make_callout_func_list(CalloutNameListType** rs, int init_size)\r
14b0e578 1150{\r
b602265d
DG
1151 CalloutNameListType* s;\r
1152 CalloutNameListEntry* v;\r
14b0e578 1153\r
b602265d 1154 *rs = 0;\r
14b0e578 1155\r
b602265d
DG
1156 s = xmalloc(sizeof(*s));\r
1157 if (IS_NULL(s)) return ONIGERR_MEMORY;\r
14b0e578 1158\r
b602265d
DG
1159 v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);\r
1160 if (IS_NULL(v)) {\r
1161 xfree(s);\r
1162 return ONIGERR_MEMORY;\r
14b0e578
CS
1163 }\r
1164\r
b602265d
DG
1165 s->n = 0;\r
1166 s->alloc = init_size;\r
1167 s->v = v;\r
14b0e578 1168\r
b602265d
DG
1169 *rs = s;\r
1170 return ONIG_NORMAL;\r
14b0e578
CS
1171}\r
1172\r
b602265d
DG
1173static void\r
1174free_callout_func_list(CalloutNameListType* s)\r
1175{\r
1176 if (IS_NOT_NULL(s)) {\r
1177 if (IS_NOT_NULL(s->v)) {\r
1178 int i, j;\r
1179\r
1180 for (i = 0; i < s->n; i++) {\r
1181 CalloutNameListEntry* e = s->v + i;\r
1182 for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {\r
1183 if (e->arg_types[j] == ONIG_TYPE_STRING) {\r
1184 UChar* p = e->opt_defaults[j].s.start;\r
1185 if (IS_NOT_NULL(p)) xfree(p);\r
1186 }\r
1187 }\r
1188 }\r
1189 xfree(s->v);\r
1190 }\r
1191 xfree(s);\r
1192 }\r
1193}\r
14b0e578 1194\r
b602265d
DG
1195static int\r
1196callout_func_list_add(CalloutNameListType* s, int* rid)\r
1197{\r
1198 if (s->n >= s->alloc) {\r
1199 int new_size = s->alloc * 2;\r
1200 CalloutNameListEntry* nv = (CalloutNameListEntry* )\r
1201 xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);\r
1202 if (IS_NULL(nv)) return ONIGERR_MEMORY;\r
14b0e578 1203\r
b602265d
DG
1204 s->alloc = new_size;\r
1205 s->v = nv;\r
1206 }\r
14b0e578 1207\r
b602265d 1208 *rid = s->n;\r
14b0e578 1209\r
b602265d
DG
1210 xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));\r
1211 s->n++;\r
1212 return ONIG_NORMAL;\r
1213}\r
14b0e578 1214\r
14b0e578 1215\r
b602265d
DG
1216typedef struct {\r
1217 UChar* name;\r
1218 int name_len; /* byte length */\r
1219 int id;\r
1220} CalloutNameEntry;\r
14b0e578 1221\r
b602265d
DG
1222#ifdef USE_ST_LIBRARY\r
1223typedef st_table CalloutNameTable;\r
14b0e578 1224#else\r
b602265d
DG
1225typedef struct {\r
1226 CalloutNameEntry* e;\r
1227 int num;\r
1228 int alloc;\r
1229} CalloutNameTable;\r
14b0e578 1230#endif\r
14b0e578 1231\r
b602265d
DG
1232static CalloutNameTable* GlobalCalloutNameTable;\r
1233static int CalloutNameIDCounter;\r
14b0e578 1234\r
b602265d 1235#ifdef USE_ST_LIBRARY\r
14b0e578 1236\r
b602265d
DG
1237static int\r
1238i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,\r
1239 void* arg ARG_UNUSED)\r
1240{\r
1241 xfree(e->name);\r
1242 /*xfree(key->s); */ /* is same as e->name */\r
1243 xfree(key);\r
1244 xfree(e);\r
1245 return ST_DELETE;\r
1246}\r
14b0e578 1247\r
b602265d
DG
1248static int\r
1249callout_name_table_clear(CalloutNameTable* t)\r
1250{\r
1251 if (IS_NOT_NULL(t)) {\r
1252 onig_st_foreach(t, i_free_callout_name_entry, 0);\r
1253 }\r
1254 return 0;\r
1255}\r
14b0e578 1256\r
b602265d
DG
1257static int\r
1258global_callout_name_table_free(void)\r
1259{\r
1260 if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
1261 int r = callout_name_table_clear(GlobalCalloutNameTable);\r
1262 if (r != 0) return r;\r
14b0e578 1263\r
b602265d
DG
1264 onig_st_free_table(GlobalCalloutNameTable);\r
1265 GlobalCalloutNameTable = 0;\r
1266 CalloutNameIDCounter = 0;\r
14b0e578
CS
1267 }\r
1268\r
b602265d
DG
1269 return 0;\r
1270}\r
1271\r
1272static CalloutNameEntry*\r
1273callout_name_find(OnigEncoding enc, int is_not_single,\r
1274 const UChar* name, const UChar* name_end)\r
1275{\r
1276 int r;\r
1277 CalloutNameEntry* e;\r
1278 CalloutNameTable* t = GlobalCalloutNameTable;\r
14b0e578 1279\r
b602265d
DG
1280 e = (CalloutNameEntry* )NULL;\r
1281 if (IS_NOT_NULL(t)) {\r
1282 r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
1283 (HashDataType* )((void* )(&e)));\r
1284 if (r == 0) { /* not found */\r
1285 if (enc != ONIG_ENCODING_ASCII &&\r
1286 ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {\r
1287 enc = ONIG_ENCODING_ASCII;\r
1288 onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,\r
1289 (HashDataType* )((void* )(&e)));\r
1290 }\r
1291 }\r
14b0e578 1292 }\r
b602265d
DG
1293 return e;\r
1294}\r
1295\r
14b0e578 1296#else\r
b602265d
DG
1297\r
1298static int\r
1299callout_name_table_clear(CalloutNameTable* t)\r
1300{\r
1301 int i;\r
1302 CalloutNameEntry* e;\r
1303\r
1304 if (IS_NOT_NULL(t)) {\r
1305 for (i = 0; i < t->num; i++) {\r
1306 e = &(t->e[i]);\r
1307 if (IS_NOT_NULL(e->name)) {\r
1308 xfree(e->name);\r
1309 e->name = NULL;\r
1310 e->name_len = 0;\r
1311 e->id = 0;\r
1312 e->func = 0;\r
1313 }\r
1314 }\r
1315 if (IS_NOT_NULL(t->e)) {\r
1316 xfree(t->e);\r
1317 t->e = NULL;\r
1318 }\r
1319 t->num = 0;\r
1320 }\r
1321 return 0;\r
14b0e578
CS
1322}\r
1323\r
b602265d
DG
1324static int\r
1325global_callout_name_table_free(void)\r
14b0e578 1326{\r
b602265d
DG
1327 if (IS_NOT_NULL(GlobalCalloutNameTable)) {\r
1328 int r = callout_name_table_clear(GlobalCalloutNameTable);\r
1329 if (r != 0) return r;\r
14b0e578 1330\r
b602265d
DG
1331 xfree(GlobalCalloutNameTable);\r
1332 GlobalCalloutNameTable = 0;\r
1333 CalloutNameIDCounter = 0;\r
14b0e578 1334 }\r
14b0e578
CS
1335 return 0;\r
1336}\r
14b0e578 1337\r
b602265d
DG
1338static CalloutNameEntry*\r
1339callout_name_find(UChar* name, UChar* name_end)\r
14b0e578 1340{\r
b602265d
DG
1341 int i, len;\r
1342 CalloutNameEntry* e;\r
1343 CalloutNameTable* t = Calloutnames;\r
14b0e578 1344\r
b602265d
DG
1345 if (IS_NOT_NULL(t)) {\r
1346 len = name_end - name;\r
1347 for (i = 0; i < t->num; i++) {\r
1348 e = &(t->e[i]);\r
1349 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
1350 return e;\r
1351 }\r
14b0e578 1352 }\r
b602265d
DG
1353 return (CalloutNameEntry* )NULL;\r
1354}\r
1355\r
14b0e578
CS
1356#endif\r
1357\r
b602265d
DG
1358/* name string must be single byte char string. */\r
1359static int\r
1360callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,\r
1361 int is_not_single, UChar* name, UChar* name_end)\r
1362{\r
1363 int r;\r
1364 CalloutNameEntry* e;\r
1365 CalloutNameTable* t = GlobalCalloutNameTable;\r
14b0e578 1366\r
b602265d
DG
1367 *rentry = 0;\r
1368 if (name_end - name <= 0)\r
1369 return ONIGERR_INVALID_CALLOUT_NAME;\r
14b0e578 1370\r
b602265d
DG
1371 e = callout_name_find(enc, is_not_single, name, name_end);\r
1372 if (IS_NULL(e)) {\r
1373#ifdef USE_ST_LIBRARY\r
1374 if (IS_NULL(t)) {\r
1375 t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);\r
a5def177 1376 CHECK_NULL_RETURN_MEMERR(t);\r
b602265d
DG
1377 GlobalCalloutNameTable = t;\r
1378 }\r
1379 e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));\r
1380 CHECK_NULL_RETURN_MEMERR(e);\r
1381\r
1382 e->name = onigenc_strdup(enc, name, name_end);\r
1383 if (IS_NULL(e->name)) {\r
1384 xfree(e); return ONIGERR_MEMORY;\r
1385 }\r
1386\r
1387 r = st_insert_callout_name_table(t, enc, is_not_single,\r
1388 e->name, (e->name + (name_end - name)),\r
1389 (HashDataType )e);\r
1390 if (r < 0) return r;\r
1391\r
1392#else\r
1393\r
1394 int alloc;\r
1395\r
1396 if (IS_NULL(t)) {\r
1397 alloc = INIT_NAMES_ALLOC_NUM;\r
1398 t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));\r
1399 CHECK_NULL_RETURN_MEMERR(t);\r
1400 t->e = NULL;\r
1401 t->alloc = 0;\r
1402 t->num = 0;\r
1403\r
1404 t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);\r
1405 if (IS_NULL(t->e)) {\r
1406 xfree(t);\r
1407 return ONIGERR_MEMORY;\r
1408 }\r
1409 t->alloc = alloc;\r
1410 GlobalCalloutNameTable = t;\r
1411 goto clear;\r
1412 }\r
1413 else if (t->num == t->alloc) {\r
1414 int i;\r
1415\r
1416 alloc = t->alloc * 2;\r
1417 t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);\r
1418 CHECK_NULL_RETURN_MEMERR(t->e);\r
1419 t->alloc = alloc;\r
1420\r
1421 clear:\r
1422 for (i = t->num; i < t->alloc; i++) {\r
1423 t->e[i].name = NULL;\r
1424 t->e[i].name_len = 0;\r
1425 t->e[i].id = 0;\r
1426 }\r
1427 }\r
1428 e = &(t->e[t->num]);\r
1429 t->num++;\r
1430 e->name = onigenc_strdup(enc, name, name_end);\r
1431 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
1432#endif\r
1433\r
1434 CalloutNameIDCounter++;\r
1435 e->id = CalloutNameIDCounter;\r
1436 e->name_len = (int )(name_end - name);\r
1437 }\r
1438\r
1439 *rentry = e;\r
1440 return e->id;\r
1441}\r
1442\r
1443static int\r
1444is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
14b0e578 1445{\r
b602265d
DG
1446 UChar* p;\r
1447 OnigCodePoint c;\r
1448\r
1449 if (name >= name_end) return 0;\r
1450\r
1451 p = name;\r
1452 while (p < name_end) {\r
1453 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
1454 if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))\r
1455 return 0;\r
1456\r
1457 if (p == name) {\r
1458 if (c >= '0' && c <= '9') return 0;\r
1459 }\r
1460\r
1461 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
1462 }\r
1463\r
1464 return 1;\r
14b0e578
CS
1465}\r
1466\r
b602265d
DG
1467static int\r
1468is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)\r
14b0e578 1469{\r
b602265d
DG
1470 UChar* p;\r
1471 OnigCodePoint c;\r
14b0e578 1472\r
b602265d
DG
1473 if (name >= name_end) return 0;\r
1474\r
1475 p = name;\r
1476 while (p < name_end) {\r
1477 c = ONIGENC_MBC_TO_CODE(enc, p, name_end);\r
1478 if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))\r
1479 return 0;\r
1480\r
1481 if (p == name) {\r
1482 if (c >= '0' && c <= '9') return 0;\r
1483 }\r
1484\r
1485 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
1486 }\r
1487\r
1488 return 1;\r
14b0e578
CS
1489}\r
1490\r
b602265d
DG
1491extern int\r
1492onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,\r
1493 UChar* name, UChar* name_end, int in,\r
1494 OnigCalloutFunc start_func,\r
1495 OnigCalloutFunc end_func,\r
1496 int arg_num, unsigned int arg_types[],\r
1497 int opt_arg_num, OnigValue opt_defaults[])\r
14b0e578 1498{\r
b602265d
DG
1499 int r;\r
1500 int i;\r
1501 int j;\r
1502 int id;\r
1503 int is_not_single;\r
1504 CalloutNameEntry* e;\r
1505 CalloutNameListEntry* fe;\r
14b0e578 1506\r
b602265d
DG
1507 if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)\r
1508 return ONIGERR_INVALID_ARGUMENT;\r
14b0e578 1509\r
b602265d
DG
1510 if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)\r
1511 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 1512\r
b602265d
DG
1513 if (opt_arg_num < 0 || opt_arg_num > arg_num)\r
1514 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 1515\r
b602265d
DG
1516 if (start_func == 0 && end_func == 0)\r
1517 return ONIGERR_INVALID_CALLOUT_ARG;\r
1518\r
1519 if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)\r
1520 return ONIGERR_INVALID_CALLOUT_ARG;\r
1521\r
1522 for (i = 0; i < arg_num; i++) {\r
1523 unsigned int t = arg_types[i];\r
1524 if (t == ONIG_TYPE_VOID)\r
1525 return ONIGERR_INVALID_CALLOUT_ARG;\r
1526 else {\r
1527 if (i >= arg_num - opt_arg_num) {\r
1528 if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&\r
1529 t != ONIG_TYPE_TAG)\r
1530 return ONIGERR_INVALID_CALLOUT_ARG;\r
1531 }\r
1532 else {\r
1533 if (t != ONIG_TYPE_LONG) {\r
1534 t = t & ~ONIG_TYPE_LONG;\r
1535 if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)\r
1536 return ONIGERR_INVALID_CALLOUT_ARG;\r
1537 }\r
14b0e578
CS
1538 }\r
1539 }\r
1540 }\r
1541\r
b602265d
DG
1542 if (! is_allowed_callout_name(enc, name, name_end)) {\r
1543 return ONIGERR_INVALID_CALLOUT_NAME;\r
14b0e578 1544 }\r
14b0e578 1545\r
b602265d
DG
1546 is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);\r
1547 id = callout_name_entry(&e, enc, is_not_single, name, name_end);\r
1548 if (id < 0) return id;\r
14b0e578 1549\r
b602265d
DG
1550 r = ONIG_NORMAL;\r
1551 if (IS_NULL(GlobalCalloutNameList)) {\r
1552 r = make_callout_func_list(&GlobalCalloutNameList, 10);\r
1553 if (r != ONIG_NORMAL) return r;\r
1554 }\r
14b0e578 1555\r
b602265d
DG
1556 while (id >= GlobalCalloutNameList->n) {\r
1557 int rid;\r
1558 r = callout_func_list_add(GlobalCalloutNameList, &rid);\r
1559 if (r != ONIG_NORMAL) return r;\r
14b0e578
CS
1560 }\r
1561\r
b602265d
DG
1562 fe = GlobalCalloutNameList->v + id;\r
1563 fe->type = callout_type;\r
1564 fe->in = in;\r
1565 fe->start_func = start_func;\r
1566 fe->end_func = end_func;\r
1567 fe->arg_num = arg_num;\r
1568 fe->opt_arg_num = opt_arg_num;\r
1569 fe->name = e->name;\r
14b0e578 1570\r
b602265d
DG
1571 for (i = 0; i < arg_num; i++) {\r
1572 fe->arg_types[i] = arg_types[i];\r
1573 }\r
1574 for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {\r
1575 if (fe->arg_types[i] == ONIG_TYPE_STRING) {\r
1576 OnigValue* val = opt_defaults + j;\r
1577 UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);\r
1578 CHECK_NULL_RETURN_MEMERR(ds);\r
14b0e578 1579\r
b602265d
DG
1580 fe->opt_defaults[i].s.start = ds;\r
1581 fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);\r
1582 }\r
1583 else {\r
1584 fe->opt_defaults[i] = opt_defaults[j];\r
1585 }\r
1586 }\r
1587\r
1588 r = id;\r
1589 return r;\r
14b0e578
CS
1590}\r
1591\r
b602265d
DG
1592static int\r
1593get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,\r
1594 UChar* name, UChar* name_end, int* rid)\r
14b0e578 1595{\r
b602265d
DG
1596 int r;\r
1597 CalloutNameEntry* e;\r
14b0e578 1598\r
b602265d
DG
1599 if (! is_allowed_callout_name(enc, name, name_end)) {\r
1600 return ONIGERR_INVALID_CALLOUT_NAME;\r
1601 }\r
1602\r
1603 e = callout_name_find(enc, is_not_single, name, name_end);\r
1604 if (IS_NULL(e)) {\r
1605 return ONIGERR_UNDEFINED_CALLOUT_NAME;\r
1606 }\r
1607\r
1608 r = ONIG_NORMAL;\r
1609 *rid = e->id;\r
1610\r
1611 return r;\r
14b0e578
CS
1612}\r
1613\r
b602265d
DG
1614extern OnigCalloutFunc\r
1615onig_get_callout_start_func(regex_t* reg, int callout_num)\r
14b0e578 1616{\r
b602265d
DG
1617 /* If used for callouts of contents, return 0. */\r
1618 CalloutListEntry* e;\r
14b0e578 1619\r
b602265d 1620 e = onig_reg_callout_list_at(reg, callout_num);\r
a5def177 1621 CHECK_NULL_RETURN(e);\r
b602265d 1622 return e->start_func;\r
14b0e578
CS
1623}\r
1624\r
b602265d
DG
1625extern const UChar*\r
1626onig_get_callout_tag_start(regex_t* reg, int callout_num)\r
14b0e578 1627{\r
b602265d 1628 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
a5def177 1629 CHECK_NULL_RETURN(e);\r
b602265d 1630 return e->tag_start;\r
14b0e578
CS
1631}\r
1632\r
b602265d
DG
1633extern const UChar*\r
1634onig_get_callout_tag_end(regex_t* reg, int callout_num)\r
14b0e578 1635{\r
b602265d 1636 CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);\r
a5def177 1637 CHECK_NULL_RETURN(e);\r
b602265d
DG
1638 return e->tag_end;\r
1639}\r
14b0e578 1640\r
14b0e578 1641\r
b602265d
DG
1642extern OnigCalloutType\r
1643onig_get_callout_type_by_name_id(int name_id)\r
1644{\r
1645 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1646 return 0;\r
14b0e578 1647\r
b602265d 1648 return GlobalCalloutNameList->v[name_id].type;\r
14b0e578
CS
1649}\r
1650\r
b602265d
DG
1651extern OnigCalloutFunc\r
1652onig_get_callout_start_func_by_name_id(int name_id)\r
14b0e578 1653{\r
b602265d
DG
1654 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1655 return 0;\r
14b0e578 1656\r
b602265d 1657 return GlobalCalloutNameList->v[name_id].start_func;\r
14b0e578
CS
1658}\r
1659\r
b602265d
DG
1660extern OnigCalloutFunc\r
1661onig_get_callout_end_func_by_name_id(int name_id)\r
14b0e578 1662{\r
b602265d
DG
1663 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1664 return 0;\r
14b0e578 1665\r
b602265d 1666 return GlobalCalloutNameList->v[name_id].end_func;\r
14b0e578
CS
1667}\r
1668\r
b602265d
DG
1669extern int\r
1670onig_get_callout_in_by_name_id(int name_id)\r
14b0e578 1671{\r
b602265d
DG
1672 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1673 return 0;\r
14b0e578 1674\r
b602265d
DG
1675 return GlobalCalloutNameList->v[name_id].in;\r
1676}\r
14b0e578 1677\r
b602265d
DG
1678static int\r
1679get_callout_arg_num_by_name_id(int name_id)\r
1680{\r
1681 return GlobalCalloutNameList->v[name_id].arg_num;\r
1682}\r
14b0e578 1683\r
b602265d
DG
1684static int\r
1685get_callout_opt_arg_num_by_name_id(int name_id)\r
14b0e578 1686{\r
b602265d 1687 return GlobalCalloutNameList->v[name_id].opt_arg_num;\r
14b0e578 1688}\r
14b0e578 1689\r
b602265d
DG
1690static unsigned int\r
1691get_callout_arg_type_by_name_id(int name_id, int index)\r
14b0e578 1692{\r
b602265d 1693 return GlobalCalloutNameList->v[name_id].arg_types[index];\r
14b0e578
CS
1694}\r
1695\r
b602265d
DG
1696static OnigValue\r
1697get_callout_opt_default_by_name_id(int name_id, int index)\r
14b0e578 1698{\r
b602265d 1699 return GlobalCalloutNameList->v[name_id].opt_defaults[index];\r
14b0e578
CS
1700}\r
1701\r
b602265d
DG
1702extern UChar*\r
1703onig_get_callout_name_by_name_id(int name_id)\r
14b0e578 1704{\r
b602265d
DG
1705 if (name_id < 0 || name_id >= GlobalCalloutNameList->n)\r
1706 return 0;\r
1707\r
1708 return GlobalCalloutNameList->v[name_id].name;\r
14b0e578
CS
1709}\r
1710\r
b602265d
DG
1711extern int\r
1712onig_global_callout_names_free(void)\r
14b0e578 1713{\r
b602265d
DG
1714 free_callout_func_list(GlobalCalloutNameList);\r
1715 GlobalCalloutNameList = 0;\r
14b0e578 1716\r
b602265d
DG
1717 global_callout_name_table_free();\r
1718 return ONIG_NORMAL;\r
14b0e578
CS
1719}\r
1720\r
14b0e578 1721\r
b602265d
DG
1722typedef st_table CalloutTagTable;\r
1723typedef intptr_t CalloutTagVal;\r
14b0e578 1724\r
b602265d 1725#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)\r
14b0e578 1726\r
b602265d
DG
1727static int\r
1728i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)\r
1729{\r
1730 int num;\r
1731 RegexExt* ext = (RegexExt* )arg;\r
14b0e578 1732\r
b602265d
DG
1733 num = (int )e - 1;\r
1734 ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;\r
1735 return ST_CONTINUE;\r
1736}\r
14b0e578 1737\r
b602265d
DG
1738static int\r
1739setup_ext_callout_list_values(regex_t* reg)\r
1740{\r
1741 int i, j;\r
1742 RegexExt* ext;\r
1743\r
1744 ext = REG_EXTP(reg);\r
1745 if (IS_NOT_NULL(ext->tag_table)) {\r
1746 onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,\r
1747 (st_data_t )ext);\r
1748 }\r
1749\r
1750 for (i = 0; i < ext->callout_num; i++) {\r
1751 CalloutListEntry* e = ext->callout_list + i;\r
1752 if (e->of == ONIG_CALLOUT_OF_NAME) {\r
1753 for (j = 0; j < e->u.arg.num; j++) {\r
1754 if (e->u.arg.types[j] == ONIG_TYPE_TAG) {\r
1755 UChar* start;\r
1756 UChar* end;\r
1757 int num;\r
1758 start = e->u.arg.vals[j].s.start;\r
1759 end = e->u.arg.vals[j].s.end;\r
1760 num = onig_get_callout_num_by_tag(reg, start, end);\r
1761 if (num < 0) return num;\r
1762 e->u.arg.vals[j].tag = num;\r
1763 }\r
14b0e578
CS
1764 }\r
1765 }\r
14b0e578
CS
1766 }\r
1767\r
b602265d 1768 return ONIG_NORMAL;\r
14b0e578
CS
1769}\r
1770\r
1771extern int\r
b602265d 1772onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)\r
14b0e578 1773{\r
b602265d 1774 RegexExt* ext = REG_EXTP(reg);\r
14b0e578 1775\r
b602265d
DG
1776 if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;\r
1777 if (callout_num > ext->callout_num) return 0;\r
14b0e578 1778\r
b602265d
DG
1779 return (ext->callout_list[callout_num].flag &\r
1780 CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0;\r
14b0e578
CS
1781}\r
1782\r
b602265d
DG
1783static int\r
1784i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)\r
14b0e578 1785{\r
b602265d
DG
1786 xfree(key);\r
1787 return ST_DELETE;\r
14b0e578
CS
1788}\r
1789\r
b602265d
DG
1790static int\r
1791callout_tag_table_clear(CalloutTagTable* t)\r
14b0e578 1792{\r
b602265d
DG
1793 if (IS_NOT_NULL(t)) {\r
1794 onig_st_foreach(t, i_free_callout_tag_entry, 0);\r
14b0e578 1795 }\r
b602265d 1796 return 0;\r
14b0e578
CS
1797}\r
1798\r
b602265d
DG
1799extern int\r
1800onig_callout_tag_table_free(void* table)\r
14b0e578 1801{\r
b602265d 1802 CalloutTagTable* t = (CalloutTagTable* )table;\r
14b0e578 1803\r
b602265d
DG
1804 if (IS_NOT_NULL(t)) {\r
1805 int r = callout_tag_table_clear(t);\r
1806 if (r != 0) return r;\r
14b0e578 1807\r
b602265d
DG
1808 onig_st_free_table(t);\r
1809 }\r
14b0e578 1810\r
b602265d 1811 return 0;\r
14b0e578
CS
1812}\r
1813\r
b602265d
DG
1814extern int\r
1815onig_get_callout_num_by_tag(regex_t* reg,\r
1816 const UChar* tag, const UChar* tag_end)\r
14b0e578 1817{\r
b602265d
DG
1818 int r;\r
1819 RegexExt* ext;\r
1820 CalloutTagVal e;\r
14b0e578 1821\r
b602265d
DG
1822 ext = REG_EXTP(reg);\r
1823 if (IS_NULL(ext) || IS_NULL(ext->tag_table))\r
1824 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 1825\r
b602265d
DG
1826 r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,\r
1827 (HashDataType* )((void* )(&e)));\r
1828 if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
1829 return (int )e;\r
14b0e578
CS
1830}\r
1831\r
b602265d
DG
1832static CalloutTagVal\r
1833callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)\r
14b0e578 1834{\r
b602265d 1835 CalloutTagVal e;\r
14b0e578 1836\r
b602265d
DG
1837 e = -1;\r
1838 if (IS_NOT_NULL(t)) {\r
1839 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
14b0e578 1840 }\r
b602265d 1841 return e;\r
14b0e578
CS
1842}\r
1843\r
1844static int\r
b602265d 1845callout_tag_table_new(CalloutTagTable** rt)\r
14b0e578 1846{\r
b602265d
DG
1847 CalloutTagTable* t;\r
1848\r
1849 *rt = 0;\r
1850 t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);\r
1851 CHECK_NULL_RETURN_MEMERR(t);\r
1852\r
1853 *rt = t;\r
1854 return ONIG_NORMAL;\r
14b0e578
CS
1855}\r
1856\r
14b0e578 1857static int\r
b602265d
DG
1858callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,\r
1859 CalloutTagVal entry_val)\r
14b0e578 1860{\r
b602265d
DG
1861 int r;\r
1862 CalloutTagVal val;\r
14b0e578 1863\r
b602265d
DG
1864 if (name_end - name <= 0)\r
1865 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 1866\r
b602265d
DG
1867 val = callout_tag_find(t, name, name_end);\r
1868 if (val >= 0)\r
1869 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
14b0e578 1870\r
b602265d
DG
1871 r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);\r
1872 if (r < 0) return r;\r
14b0e578 1873\r
b602265d 1874 return ONIG_NORMAL;\r
14b0e578
CS
1875}\r
1876\r
1877static int\r
b602265d 1878ext_ensure_tag_table(regex_t* reg)\r
14b0e578 1879{\r
b602265d
DG
1880 int r;\r
1881 RegexExt* ext;\r
1882 CalloutTagTable* t;\r
14b0e578 1883\r
b602265d
DG
1884 ext = onig_get_regex_ext(reg);\r
1885 CHECK_NULL_RETURN_MEMERR(ext);\r
14b0e578 1886\r
b602265d
DG
1887 if (IS_NULL(ext->tag_table)) {\r
1888 r = callout_tag_table_new(&t);\r
1889 if (r != ONIG_NORMAL) return r;\r
1890\r
1891 ext->tag_table = t;\r
14b0e578 1892 }\r
b602265d
DG
1893\r
1894 return ONIG_NORMAL;\r
14b0e578
CS
1895}\r
1896\r
1897static int\r
b602265d
DG
1898callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,\r
1899 CalloutTagVal entry_val)\r
14b0e578 1900{\r
b602265d
DG
1901 int r;\r
1902 RegexExt* ext;\r
1903 CalloutListEntry* e;\r
14b0e578 1904\r
b602265d
DG
1905 r = ext_ensure_tag_table(reg);\r
1906 if (r != ONIG_NORMAL) return r;\r
14b0e578 1907\r
b602265d
DG
1908 ext = onig_get_regex_ext(reg);\r
1909 r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);\r
14b0e578 1910\r
b602265d 1911 e = onig_reg_callout_list_at(reg, (int )entry_val);\r
a5def177 1912 CHECK_NULL_RETURN_MEMERR(e);\r
b602265d
DG
1913 e->tag_start = name;\r
1914 e->tag_end = name_end;\r
14b0e578 1915\r
b602265d
DG
1916 return r;\r
1917}\r
14b0e578 1918\r
b602265d 1919#endif /* USE_CALLOUT */\r
14b0e578 1920\r
14b0e578 1921\r
b602265d 1922#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16\r
14b0e578 1923\r
b602265d
DG
1924static void\r
1925scan_env_clear(ScanEnv* env)\r
14b0e578 1926{\r
b602265d
DG
1927 MEM_STATUS_CLEAR(env->capture_history);\r
1928 MEM_STATUS_CLEAR(env->bt_mem_start);\r
1929 MEM_STATUS_CLEAR(env->bt_mem_end);\r
1930 MEM_STATUS_CLEAR(env->backrefed_mem);\r
1931 env->error = (UChar* )NULL;\r
1932 env->error_end = (UChar* )NULL;\r
1933 env->num_call = 0;\r
14b0e578 1934\r
b602265d
DG
1935#ifdef USE_CALL\r
1936 env->unset_addr_list = NULL;\r
1937 env->has_call_zero = 0;\r
1938#endif\r
14b0e578 1939\r
b602265d
DG
1940 env->num_mem = 0;\r
1941 env->num_named = 0;\r
1942 env->mem_alloc = 0;\r
1943 env->mem_env_dynamic = (MemEnv* )NULL;\r
14b0e578 1944\r
b602265d 1945 xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));\r
14b0e578 1946\r
b602265d
DG
1947 env->parse_depth = 0;\r
1948 env->keep_num = 0;\r
1949 env->save_num = 0;\r
1950 env->save_alloc_num = 0;\r
1951 env->saves = 0;\r
1952}\r
14b0e578 1953\r
b602265d
DG
1954static int\r
1955scan_env_add_mem_entry(ScanEnv* env)\r
1956{\r
1957 int i, need, alloc;\r
1958 MemEnv* p;\r
14b0e578 1959\r
b602265d
DG
1960 need = env->num_mem + 1;\r
1961 if (need > MaxCaptureNum && MaxCaptureNum != 0)\r
1962 return ONIGERR_TOO_MANY_CAPTURES;\r
14b0e578 1963\r
b602265d
DG
1964 if (need >= SCANENV_MEMENV_SIZE) {\r
1965 if (env->mem_alloc <= need) {\r
1966 if (IS_NULL(env->mem_env_dynamic)) {\r
1967 alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;\r
1968 p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);\r
1969 CHECK_NULL_RETURN_MEMERR(p);\r
1970 xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));\r
1971 }\r
1972 else {\r
1973 alloc = env->mem_alloc * 2;\r
1974 p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);\r
1975 CHECK_NULL_RETURN_MEMERR(p);\r
1976 }\r
14b0e578 1977\r
b602265d
DG
1978 for (i = env->num_mem + 1; i < alloc; i++) {\r
1979 p[i].node = NULL_NODE;\r
1980#if 0\r
1981 p[i].in = 0;\r
1982 p[i].recursion = 0;\r
1983#endif\r
1984 }\r
1985\r
1986 env->mem_env_dynamic = p;\r
1987 env->mem_alloc = alloc;\r
14b0e578
CS
1988 }\r
1989 }\r
1990\r
b602265d
DG
1991 env->num_mem++;\r
1992 return env->num_mem;\r
14b0e578
CS
1993}\r
1994\r
1995static int\r
b602265d 1996scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r
14b0e578 1997{\r
b602265d
DG
1998 if (env->num_mem >= num)\r
1999 SCANENV_MEMENV(env)[num].node = node;\r
2000 else\r
2001 return ONIGERR_PARSER_BUG;\r
2002 return 0;\r
14b0e578
CS
2003}\r
2004\r
b602265d
DG
2005extern void\r
2006onig_node_free(Node* node)\r
14b0e578 2007{\r
b602265d
DG
2008 start:\r
2009 if (IS_NULL(node)) return ;\r
14b0e578 2010\r
b602265d
DG
2011#ifdef DEBUG_NODE_FREE\r
2012 fprintf(stderr, "onig_node_free: %p\n", node);\r
2013#endif\r
14b0e578 2014\r
b602265d
DG
2015 switch (NODE_TYPE(node)) {\r
2016 case NODE_STRING:\r
2017 if (STR_(node)->capa != 0 &&\r
2018 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
2019 xfree(STR_(node)->s);\r
2020 }\r
2021 break;\r
14b0e578 2022\r
b602265d
DG
2023 case NODE_LIST:\r
2024 case NODE_ALT:\r
2025 onig_node_free(NODE_CAR(node));\r
2026 {\r
2027 Node* next_node = NODE_CDR(node);\r
2028\r
2029 xfree(node);\r
2030 node = next_node;\r
2031 goto start;\r
14b0e578 2032 }\r
b602265d 2033 break;\r
14b0e578 2034\r
b602265d
DG
2035 case NODE_CCLASS:\r
2036 {\r
2037 CClassNode* cc = CCLASS_(node);\r
14b0e578 2038\r
b602265d
DG
2039 if (cc->mbuf)\r
2040 bbuf_free(cc->mbuf);\r
2041 }\r
2042 break;\r
14b0e578 2043\r
b602265d
DG
2044 case NODE_BACKREF:\r
2045 if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))\r
2046 xfree(BACKREF_(node)->back_dynamic);\r
2047 break;\r
14b0e578 2048\r
b602265d
DG
2049 case NODE_ENCLOSURE:\r
2050 if (NODE_BODY(node))\r
2051 onig_node_free(NODE_BODY(node));\r
14b0e578 2052\r
b602265d
DG
2053 {\r
2054 EnclosureNode* en = ENCLOSURE_(node);\r
2055 if (en->type == ENCLOSURE_IF_ELSE) {\r
2056 onig_node_free(en->te.Then);\r
2057 onig_node_free(en->te.Else);\r
14b0e578
CS
2058 }\r
2059 }\r
b602265d 2060 break;\r
14b0e578 2061\r
b602265d
DG
2062 case NODE_QUANT:\r
2063 case NODE_ANCHOR:\r
2064 if (NODE_BODY(node))\r
2065 onig_node_free(NODE_BODY(node));\r
2066 break;\r
14b0e578 2067\r
b602265d
DG
2068 case NODE_CTYPE:\r
2069 case NODE_CALL:\r
2070 case NODE_GIMMICK:\r
2071 break;\r
14b0e578 2072 }\r
14b0e578 2073\r
b602265d 2074 xfree(node);\r
14b0e578
CS
2075}\r
2076\r
b602265d
DG
2077static void\r
2078cons_node_free_alone(Node* node)\r
14b0e578 2079{\r
b602265d
DG
2080 NODE_CAR(node) = 0;\r
2081 NODE_CDR(node) = 0;\r
2082 onig_node_free(node);\r
14b0e578
CS
2083}\r
2084\r
b602265d
DG
2085static Node*\r
2086node_new(void)\r
14b0e578 2087{\r
b602265d 2088 Node* node;\r
14b0e578 2089\r
b602265d
DG
2090 node = (Node* )xmalloc(sizeof(Node));\r
2091 xmemset(node, 0, sizeof(*node));\r
14b0e578 2092\r
b602265d
DG
2093#ifdef DEBUG_NODE_FREE\r
2094 fprintf(stderr, "node_new: %p\n", node);\r
2095#endif\r
2096 return node;\r
2097}\r
14b0e578 2098\r
14b0e578 2099\r
b602265d
DG
2100static void\r
2101initialize_cclass(CClassNode* cc)\r
2102{\r
2103 BITSET_CLEAR(cc->bs);\r
2104 cc->flags = 0;\r
2105 cc->mbuf = NULL;\r
2106}\r
2107\r
2108static Node*\r
2109node_new_cclass(void)\r
2110{\r
2111 Node* node = node_new();\r
2112 CHECK_NULL_RETURN(node);\r
2113\r
2114 NODE_SET_TYPE(node, NODE_CCLASS);\r
2115 initialize_cclass(CCLASS_(node));\r
2116 return node;\r
2117}\r
2118\r
2119static Node*\r
2120node_new_ctype(int type, int not, OnigOptionType options)\r
2121{\r
2122 Node* node = node_new();\r
2123 CHECK_NULL_RETURN(node);\r
2124\r
2125 NODE_SET_TYPE(node, NODE_CTYPE);\r
2126 CTYPE_(node)->ctype = type;\r
2127 CTYPE_(node)->not = not;\r
2128 CTYPE_(node)->options = options;\r
2129 CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);\r
2130 return node;\r
2131}\r
2132\r
2133static Node*\r
2134node_new_anychar(void)\r
2135{\r
2136 Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);\r
2137 return node;\r
2138}\r
2139\r
2140static Node*\r
2141node_new_anychar_with_fixed_option(OnigOptionType option)\r
2142{\r
2143 CtypeNode* ct;\r
2144 Node* node;\r
2145\r
2146 node = node_new_anychar();\r
a5def177
DG
2147 CHECK_NULL_RETURN(node);\r
2148\r
b602265d
DG
2149 ct = CTYPE_(node);\r
2150 ct->options = option;\r
2151 NODE_STATUS_ADD(node, FIXED_OPTION);\r
2152 return node;\r
2153}\r
2154\r
2155static int\r
2156node_new_no_newline(Node** node, ScanEnv* env)\r
2157{\r
2158 Node* n;\r
2159\r
2160 n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);\r
2161 CHECK_NULL_RETURN_MEMERR(n);\r
2162 *node = n;\r
2163 return 0;\r
2164}\r
2165\r
2166static int\r
2167node_new_true_anychar(Node** node, ScanEnv* env)\r
2168{\r
2169 Node* n;\r
2170\r
2171 n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);\r
2172 CHECK_NULL_RETURN_MEMERR(n);\r
2173 *node = n;\r
2174 return 0;\r
2175}\r
2176\r
2177static Node*\r
2178node_new_list(Node* left, Node* right)\r
2179{\r
2180 Node* node = node_new();\r
2181 CHECK_NULL_RETURN(node);\r
2182\r
2183 NODE_SET_TYPE(node, NODE_LIST);\r
2184 NODE_CAR(node) = left;\r
2185 NODE_CDR(node) = right;\r
2186 return node;\r
2187}\r
2188\r
2189extern Node*\r
2190onig_node_new_list(Node* left, Node* right)\r
2191{\r
2192 return node_new_list(left, right);\r
2193}\r
2194\r
2195extern Node*\r
2196onig_node_list_add(Node* list, Node* x)\r
2197{\r
2198 Node *n;\r
2199\r
2200 n = onig_node_new_list(x, NULL);\r
2201 if (IS_NULL(n)) return NULL_NODE;\r
2202\r
2203 if (IS_NOT_NULL(list)) {\r
2204 while (IS_NOT_NULL(NODE_CDR(list)))\r
2205 list = NODE_CDR(list);\r
2206\r
2207 NODE_CDR(list) = n;\r
2208 }\r
2209\r
2210 return n;\r
2211}\r
2212\r
2213extern Node*\r
2214onig_node_new_alt(Node* left, Node* right)\r
2215{\r
2216 Node* node = node_new();\r
2217 CHECK_NULL_RETURN(node);\r
2218\r
2219 NODE_SET_TYPE(node, NODE_ALT);\r
2220 NODE_CAR(node) = left;\r
2221 NODE_CDR(node) = right;\r
2222 return node;\r
2223}\r
2224\r
2225static Node*\r
2226make_list_or_alt(NodeType type, int n, Node* ns[])\r
2227{\r
2228 Node* r;\r
2229\r
2230 if (n <= 0) return NULL_NODE;\r
2231\r
2232 if (n == 1) {\r
2233 r = node_new();\r
2234 CHECK_NULL_RETURN(r);\r
2235 NODE_SET_TYPE(r, type);\r
2236 NODE_CAR(r) = ns[0];\r
2237 NODE_CDR(r) = NULL_NODE;\r
2238 }\r
2239 else {\r
2240 Node* right;\r
2241\r
2242 r = node_new();\r
2243 CHECK_NULL_RETURN(r);\r
2244\r
2245 right = make_list_or_alt(type, n - 1, ns + 1);\r
2246 if (IS_NULL(right)) {\r
2247 onig_node_free(r);\r
2248 return NULL_NODE;\r
2249 }\r
2250\r
2251 NODE_SET_TYPE(r, type);\r
2252 NODE_CAR(r) = ns[0];\r
2253 NODE_CDR(r) = right;\r
2254 }\r
2255\r
2256 return r;\r
2257}\r
2258\r
2259static Node*\r
2260make_list(int n, Node* ns[])\r
2261{\r
2262 return make_list_or_alt(NODE_LIST, n, ns);\r
2263}\r
2264\r
2265static Node*\r
2266make_alt(int n, Node* ns[])\r
2267{\r
2268 return make_list_or_alt(NODE_ALT, n, ns);\r
2269}\r
2270\r
2271extern Node*\r
2272onig_node_new_anchor(int type, int ascii_mode)\r
2273{\r
2274 Node* node = node_new();\r
2275 CHECK_NULL_RETURN(node);\r
2276\r
2277 NODE_SET_TYPE(node, NODE_ANCHOR);\r
2278 ANCHOR_(node)->type = type;\r
2279 ANCHOR_(node)->char_len = -1;\r
2280 ANCHOR_(node)->ascii_mode = ascii_mode;\r
2281 return node;\r
2282}\r
2283\r
2284static Node*\r
2285node_new_backref(int back_num, int* backrefs, int by_name,\r
2286#ifdef USE_BACKREF_WITH_LEVEL\r
2287 int exist_level, int nest_level,\r
2288#endif\r
2289 ScanEnv* env)\r
2290{\r
2291 int i;\r
2292 Node* node = node_new();\r
2293\r
2294 CHECK_NULL_RETURN(node);\r
2295\r
2296 NODE_SET_TYPE(node, NODE_BACKREF);\r
2297 BACKREF_(node)->back_num = back_num;\r
2298 BACKREF_(node)->back_dynamic = (int* )NULL;\r
2299 if (by_name != 0)\r
2300 NODE_STATUS_ADD(node, BY_NAME);\r
2301\r
2302#ifdef USE_BACKREF_WITH_LEVEL\r
2303 if (exist_level != 0) {\r
2304 NODE_STATUS_ADD(node, NEST_LEVEL);\r
2305 BACKREF_(node)->nest_level = nest_level;\r
2306 }\r
2307#endif\r
2308\r
2309 for (i = 0; i < back_num; i++) {\r
2310 if (backrefs[i] <= env->num_mem &&\r
2311 IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {\r
2312 NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */\r
2313 break;\r
2314 }\r
2315 }\r
2316\r
2317 if (back_num <= NODE_BACKREFS_SIZE) {\r
2318 for (i = 0; i < back_num; i++)\r
2319 BACKREF_(node)->back_static[i] = backrefs[i];\r
2320 }\r
2321 else {\r
2322 int* p = (int* )xmalloc(sizeof(int) * back_num);\r
2323 if (IS_NULL(p)) {\r
2324 onig_node_free(node);\r
2325 return NULL;\r
2326 }\r
2327 BACKREF_(node)->back_dynamic = p;\r
2328 for (i = 0; i < back_num; i++)\r
2329 p[i] = backrefs[i];\r
2330 }\r
2331 return node;\r
2332}\r
2333\r
2334static Node*\r
2335node_new_backref_checker(int back_num, int* backrefs, int by_name,\r
2336#ifdef USE_BACKREF_WITH_LEVEL\r
2337 int exist_level, int nest_level,\r
2338#endif\r
2339 ScanEnv* env)\r
2340{\r
2341 Node* node;\r
2342\r
2343 node = node_new_backref(back_num, backrefs, by_name,\r
2344#ifdef USE_BACKREF_WITH_LEVEL\r
2345 exist_level, nest_level,\r
2346#endif\r
2347 env);\r
2348 CHECK_NULL_RETURN(node);\r
2349\r
2350 NODE_STATUS_ADD(node, CHECKER);\r
2351 return node;\r
2352}\r
2353\r
2354#ifdef USE_CALL\r
2355static Node*\r
2356node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)\r
2357{\r
2358 Node* node = node_new();\r
2359 CHECK_NULL_RETURN(node);\r
2360\r
2361 NODE_SET_TYPE(node, NODE_CALL);\r
2362 CALL_(node)->by_number = by_number;\r
2363 CALL_(node)->name = name;\r
2364 CALL_(node)->name_end = name_end;\r
2365 CALL_(node)->group_num = gnum;\r
2366 CALL_(node)->entry_count = 1;\r
2367 return node;\r
2368}\r
2369#endif\r
2370\r
2371static Node*\r
2372node_new_quantifier(int lower, int upper, int by_number)\r
2373{\r
2374 Node* node = node_new();\r
2375 CHECK_NULL_RETURN(node);\r
2376\r
2377 NODE_SET_TYPE(node, NODE_QUANT);\r
2378 QUANT_(node)->lower = lower;\r
2379 QUANT_(node)->upper = upper;\r
2380 QUANT_(node)->greedy = 1;\r
2381 QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;\r
2382 QUANT_(node)->head_exact = NULL_NODE;\r
2383 QUANT_(node)->next_head_exact = NULL_NODE;\r
2384 QUANT_(node)->is_refered = 0;\r
2385 if (by_number != 0)\r
2386 NODE_STATUS_ADD(node, BY_NUMBER);\r
2387\r
2388 return node;\r
2389}\r
2390\r
2391static Node*\r
2392node_new_enclosure(enum EnclosureType type)\r
2393{\r
2394 Node* node = node_new();\r
2395 CHECK_NULL_RETURN(node);\r
2396\r
2397 NODE_SET_TYPE(node, NODE_ENCLOSURE);\r
2398 ENCLOSURE_(node)->type = type;\r
2399\r
2400 switch (type) {\r
2401 case ENCLOSURE_MEMORY:\r
2402 ENCLOSURE_(node)->m.regnum = 0;\r
2403 ENCLOSURE_(node)->m.called_addr = -1;\r
2404 ENCLOSURE_(node)->m.entry_count = 1;\r
2405 ENCLOSURE_(node)->m.called_state = 0;\r
2406 break;\r
2407\r
2408 case ENCLOSURE_OPTION:\r
2409 ENCLOSURE_(node)->o.options = 0;\r
2410 break;\r
2411\r
2412 case ENCLOSURE_STOP_BACKTRACK:\r
2413 break;\r
2414\r
2415 case ENCLOSURE_IF_ELSE:\r
2416 ENCLOSURE_(node)->te.Then = 0;\r
2417 ENCLOSURE_(node)->te.Else = 0;\r
2418 break;\r
2419 }\r
2420\r
2421 ENCLOSURE_(node)->opt_count = 0;\r
2422 return node;\r
2423}\r
2424\r
2425extern Node*\r
2426onig_node_new_enclosure(int type)\r
2427{\r
2428 return node_new_enclosure(type);\r
2429}\r
2430\r
2431static Node*\r
2432node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else)\r
2433{\r
2434 Node* n;\r
2435 n = node_new_enclosure(ENCLOSURE_IF_ELSE);\r
2436 CHECK_NULL_RETURN(n);\r
2437\r
2438 NODE_BODY(n) = cond;\r
2439 ENCLOSURE_(n)->te.Then = Then;\r
2440 ENCLOSURE_(n)->te.Else = Else;\r
2441 return n;\r
2442}\r
2443\r
2444static Node*\r
2445node_new_memory(int is_named)\r
2446{\r
2447 Node* node = node_new_enclosure(ENCLOSURE_MEMORY);\r
2448 CHECK_NULL_RETURN(node);\r
2449 if (is_named != 0)\r
2450 NODE_STATUS_ADD(node, NAMED_GROUP);\r
2451\r
2452 return node;\r
2453}\r
2454\r
2455static Node*\r
2456node_new_option(OnigOptionType option)\r
2457{\r
2458 Node* node = node_new_enclosure(ENCLOSURE_OPTION);\r
2459 CHECK_NULL_RETURN(node);\r
2460 ENCLOSURE_(node)->o.options = option;\r
2461 return node;\r
2462}\r
2463\r
2464static int\r
2465node_new_fail(Node** node, ScanEnv* env)\r
2466{\r
2467 *node = node_new();\r
2468 CHECK_NULL_RETURN_MEMERR(*node);\r
2469\r
2470 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2471 GIMMICK_(*node)->type = GIMMICK_FAIL;\r
2472 return ONIG_NORMAL;\r
2473}\r
2474\r
2475static int\r
2476node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)\r
2477{\r
2478 int id;\r
2479 int r;\r
2480\r
2481 r = save_entry(env, save_type, &id);\r
2482 if (r != ONIG_NORMAL) return r;\r
2483\r
2484 *node = node_new();\r
2485 CHECK_NULL_RETURN_MEMERR(*node);\r
2486\r
2487 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2488 GIMMICK_(*node)->id = id;\r
2489 GIMMICK_(*node)->type = GIMMICK_SAVE;\r
2490 GIMMICK_(*node)->detail_type = (int )save_type;\r
2491\r
2492 return ONIG_NORMAL;\r
2493}\r
2494\r
2495static int\r
2496node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,\r
2497 int id, ScanEnv* env)\r
2498{\r
2499 *node = node_new();\r
2500 CHECK_NULL_RETURN_MEMERR(*node);\r
2501\r
2502 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2503 GIMMICK_(*node)->id = id;\r
2504 GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;\r
2505 GIMMICK_(*node)->detail_type = (int )update_var_type;\r
2506\r
2507 return ONIG_NORMAL;\r
2508}\r
2509\r
2510static int\r
2511node_new_keep(Node** node, ScanEnv* env)\r
2512{\r
2513 int r;\r
2514\r
2515 r = node_new_save_gimmick(node, SAVE_KEEP, env);\r
2516 if (r != 0) return r;\r
2517\r
2518 env->keep_num++;\r
2519 return ONIG_NORMAL;\r
2520}\r
2521\r
2522#ifdef USE_CALLOUT\r
2523\r
2524extern void\r
2525onig_free_reg_callout_list(int n, CalloutListEntry* list)\r
2526{\r
2527 int i;\r
2528 int j;\r
2529\r
2530 if (IS_NULL(list)) return ;\r
2531\r
2532 for (i = 0; i < n; i++) {\r
2533 if (list[i].of == ONIG_CALLOUT_OF_NAME) {\r
2534 for (j = 0; j < list[i].u.arg.passed_num; j++) {\r
2535 if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {\r
2536 if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))\r
2537 xfree(list[i].u.arg.vals[j].s.start);\r
2538 }\r
2539 }\r
2540 }\r
2541 else { /* ONIG_CALLOUT_OF_CONTENTS */\r
2542 if (IS_NOT_NULL(list[i].u.content.start)) {\r
2543 xfree((void* )list[i].u.content.start);\r
2544 }\r
2545 }\r
2546 }\r
2547\r
2548 xfree(list);\r
2549}\r
2550\r
2551extern CalloutListEntry*\r
2552onig_reg_callout_list_at(regex_t* reg, int num)\r
2553{\r
2554 RegexExt* ext = REG_EXTP(reg);\r
2555 CHECK_NULL_RETURN(ext);\r
2556\r
2557 if (num <= 0 || num > ext->callout_num)\r
2558 return 0;\r
2559\r
2560 num--;\r
2561 return ext->callout_list + num;\r
2562}\r
2563\r
2564static int\r
2565reg_callout_list_entry(ScanEnv* env, int* rnum)\r
2566{\r
2567#define INIT_CALLOUT_LIST_NUM 3\r
2568\r
2569 int num;\r
2570 CalloutListEntry* list;\r
2571 CalloutListEntry* e;\r
2572 RegexExt* ext;\r
2573\r
2574 ext = onig_get_regex_ext(env->reg);\r
2575 CHECK_NULL_RETURN_MEMERR(ext);\r
2576\r
2577 if (IS_NULL(ext->callout_list)) {\r
2578 list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);\r
2579 CHECK_NULL_RETURN_MEMERR(list);\r
2580\r
2581 ext->callout_list = list;\r
2582 ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;\r
2583 ext->callout_num = 0;\r
2584 }\r
2585\r
2586 num = ext->callout_num + 1;\r
2587 if (num > ext->callout_list_alloc) {\r
2588 int alloc = ext->callout_list_alloc * 2;\r
2589 list = (CalloutListEntry* )xrealloc(ext->callout_list,\r
2590 sizeof(CalloutListEntry) * alloc,\r
2591 sizeof(CalloutListEntry) * ext->callout_list_alloc);\r
2592 CHECK_NULL_RETURN_MEMERR(list);\r
2593\r
2594 ext->callout_list = list;\r
2595 ext->callout_list_alloc = alloc;\r
2596 }\r
2597\r
2598 e = ext->callout_list + (num - 1);\r
2599\r
2600 e->flag = 0;\r
2601 e->of = 0;\r
2602 e->in = ONIG_CALLOUT_OF_CONTENTS;\r
2603 e->type = 0;\r
2604 e->tag_start = 0;\r
2605 e->tag_end = 0;\r
2606 e->start_func = 0;\r
2607 e->end_func = 0;\r
2608 e->u.arg.num = 0;\r
2609 e->u.arg.passed_num = 0;\r
2610\r
2611 ext->callout_num = num;\r
2612 *rnum = num;\r
2613 return ONIG_NORMAL;\r
2614}\r
2615\r
2616static int\r
2617node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,\r
2618 ScanEnv* env)\r
2619{\r
2620 *node = node_new();\r
2621 CHECK_NULL_RETURN_MEMERR(*node);\r
2622\r
2623 NODE_SET_TYPE(*node, NODE_GIMMICK);\r
2624 GIMMICK_(*node)->id = id;\r
2625 GIMMICK_(*node)->num = num;\r
2626 GIMMICK_(*node)->type = GIMMICK_CALLOUT;\r
2627 GIMMICK_(*node)->detail_type = (int )callout_of;\r
2628\r
2629 return ONIG_NORMAL;\r
2630}\r
2631#endif\r
2632\r
2633static int\r
2634make_extended_grapheme_cluster(Node** node, ScanEnv* env)\r
2635{\r
2636 int r;\r
2637 int i;\r
2638 Node* x;\r
2639 Node* ns[2];\r
2640\r
2641 /* \X == (?>\O(?:\Y\O)*) */\r
2642\r
2643 ns[1] = NULL_NODE;\r
2644\r
2645 r = ONIGERR_MEMORY;\r
2646 ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);\r
2647 if (IS_NULL(ns[0])) goto err;\r
2648\r
2649 r = node_new_true_anychar(&ns[1], env);\r
2650 if (r != 0) goto err1;\r
2651\r
2652 x = make_list(2, ns);\r
2653 if (IS_NULL(x)) goto err;\r
2654 ns[0] = x;\r
2655 ns[1] = NULL_NODE;\r
2656\r
2657 x = node_new_quantifier(0, REPEAT_INFINITE, 1);\r
2658 if (IS_NULL(x)) goto err;\r
2659\r
2660 NODE_BODY(x) = ns[0];\r
2661 ns[0] = NULL_NODE;\r
2662 ns[1] = x;\r
2663\r
2664 r = node_new_true_anychar(&ns[0], env);\r
2665 if (r != 0) goto err1;\r
2666\r
2667 x = make_list(2, ns);\r
2668 if (IS_NULL(x)) goto err;\r
2669\r
2670 ns[0] = x;\r
2671 ns[1] = NULL_NODE;\r
2672\r
2673 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
2674 if (IS_NULL(x)) goto err;\r
2675\r
2676 NODE_BODY(x) = ns[0];\r
2677\r
2678 *node = x;\r
2679 return ONIG_NORMAL;\r
2680\r
2681 err:\r
2682 r = ONIGERR_MEMORY;\r
2683 err1:\r
2684 for (i = 0; i < 2; i++) onig_node_free(ns[i]);\r
2685 return r;\r
2686}\r
2687\r
2688static int\r
2689make_absent_engine(Node** node, int pre_save_right_id, Node* absent,\r
2690 Node* step_one, int lower, int upper, int possessive,\r
2691 int is_range_cutter, ScanEnv* env)\r
2692{\r
2693 int r;\r
2694 int i;\r
2695 int id;\r
2696 Node* x;\r
2697 Node* ns[4];\r
2698\r
2699 for (i = 0; i < 4; i++) ns[i] = NULL_NODE;\r
2700\r
2701 ns[1] = absent;\r
2702 ns[3] = step_one; /* for err */\r
2703 r = node_new_save_gimmick(&ns[0], SAVE_S, env);\r
2704 if (r != 0) goto err;\r
2705\r
2706 id = GIMMICK_(ns[0])->id;\r
2707 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,\r
2708 id, env);\r
2709 if (r != 0) goto err;\r
2710\r
2711 r = node_new_fail(&ns[3], env);\r
2712 if (r != 0) goto err;\r
2713\r
2714 x = make_list(4, ns);\r
2715 if (IS_NULL(x)) goto err0;\r
2716\r
2717 ns[0] = x;\r
2718 ns[1] = step_one;\r
2719 ns[2] = ns[3] = NULL_NODE;\r
2720\r
2721 x = make_alt(2, ns);\r
2722 if (IS_NULL(x)) goto err0;\r
2723\r
2724 ns[0] = x;\r
2725\r
2726 x = node_new_quantifier(lower, upper, 0);\r
2727 if (IS_NULL(x)) goto err0;\r
2728\r
2729 NODE_BODY(x) = ns[0];\r
2730 ns[0] = x;\r
2731\r
2732 if (possessive != 0) {\r
2733 x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
2734 if (IS_NULL(x)) goto err0;\r
2735\r
2736 NODE_BODY(x) = ns[0];\r
2737 ns[0] = x;\r
2738 }\r
2739\r
2740 r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2741 pre_save_right_id, env);\r
2742 if (r != 0) goto err;\r
2743\r
2744 r = node_new_fail(&ns[2], env);\r
2745 if (r != 0) goto err;\r
2746\r
2747 x = make_list(2, ns + 1);\r
2748 if (IS_NULL(x)) goto err0;\r
2749\r
2750 ns[1] = x; ns[2] = NULL_NODE;\r
2751\r
2752 x = make_alt(2, ns);\r
2753 if (IS_NULL(x)) goto err0;\r
2754\r
2755 if (is_range_cutter != 0)\r
2756 NODE_STATUS_ADD(x, SUPER);\r
2757\r
2758 *node = x;\r
2759 return ONIG_NORMAL;\r
2760\r
2761 err0:\r
2762 r = ONIGERR_MEMORY;\r
2763 err:\r
2764 for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
2765 return r;\r
2766}\r
2767\r
2768static int\r
2769make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,\r
2770 ScanEnv* env)\r
2771{\r
2772 int r;\r
2773 int id;\r
2774 Node* save;\r
2775 Node* x;\r
2776 Node* ns[2];\r
2777\r
2778 *node1 = *node2 = NULL_NODE;\r
2779 save = ns[0] = ns[1] = NULL_NODE;\r
2780\r
2781 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
2782 if (r != 0) goto err;\r
2783\r
2784 id = GIMMICK_(save)->id;\r
2785 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2786 id, env);\r
2787 if (r != 0) goto err;\r
2788\r
2789 r = node_new_fail(&ns[1], env);\r
2790 if (r != 0) goto err;\r
2791\r
2792 x = make_list(2, ns);\r
2793 if (IS_NULL(x)) goto err0;\r
2794\r
2795 ns[0] = NULL_NODE; ns[1] = x;\r
2796\r
2797 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2798 pre_save_right_id, env);\r
2799 if (r != 0) goto err;\r
2800\r
2801 x = make_alt(2, ns);\r
2802 if (IS_NULL(x)) goto err0;\r
2803\r
2804 *node1 = save;\r
2805 *node2 = x;\r
2806 return ONIG_NORMAL;\r
2807\r
2808 err0:\r
2809 r = ONIGERR_MEMORY;\r
2810 err:\r
2811 onig_node_free(save);\r
2812 onig_node_free(ns[0]);\r
2813 onig_node_free(ns[1]);\r
2814 return r;\r
2815}\r
2816\r
2817static int\r
2818make_range_clear(Node** node, ScanEnv* env)\r
2819{\r
2820 int r;\r
2821 int id;\r
2822 Node* save;\r
2823 Node* x;\r
2824 Node* ns[2];\r
2825\r
2826 *node = NULL_NODE;\r
2827 save = ns[0] = ns[1] = NULL_NODE;\r
2828\r
2829 r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);\r
2830 if (r != 0) goto err;\r
2831\r
2832 id = GIMMICK_(save)->id;\r
2833 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2834 id, env);\r
2835 if (r != 0) goto err;\r
2836\r
2837 r = node_new_fail(&ns[1], env);\r
2838 if (r != 0) goto err;\r
2839\r
2840 x = make_list(2, ns);\r
2841 if (IS_NULL(x)) goto err0;\r
2842\r
2843 ns[0] = NULL_NODE; ns[1] = x;\r
2844\r
2845 r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);\r
2846 if (r != 0) goto err;\r
2847\r
2848 x = make_alt(2, ns);\r
2849 if (IS_NULL(x)) goto err0;\r
2850\r
2851 NODE_STATUS_ADD(x, SUPER);\r
2852\r
2853 ns[0] = save;\r
2854 ns[1] = x;\r
2855 save = NULL_NODE;\r
2856 x = make_list(2, ns);\r
2857 if (IS_NULL(x)) goto err0;\r
2858\r
2859 *node = x;\r
2860 return ONIG_NORMAL;\r
2861\r
2862 err0:\r
2863 r = ONIGERR_MEMORY;\r
2864 err:\r
2865 onig_node_free(save);\r
2866 onig_node_free(ns[0]);\r
2867 onig_node_free(ns[1]);\r
2868 return r;\r
2869}\r
2870\r
2871static int\r
2872is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,\r
2873 int* is_possessive, ScanEnv* env)\r
2874{\r
2875 Node* quant;\r
2876 Node* body;\r
2877\r
2878 *rquant = *rbody = 0;\r
2879 *is_possessive = 0;\r
2880\r
2881 if (NODE_TYPE(node) == NODE_QUANT) {\r
2882 quant = node;\r
2883 }\r
2884 else {\r
2885 if (NODE_TYPE(node) == NODE_ENCLOSURE) {\r
2886 EnclosureNode* en = ENCLOSURE_(node);\r
2887 if (en->type == ENCLOSURE_STOP_BACKTRACK) {\r
2888 *is_possessive = 1;\r
2889 quant = NODE_ENCLOSURE_BODY(en);\r
2890 if (NODE_TYPE(quant) != NODE_QUANT)\r
2891 return 0;\r
2892 }\r
2893 else\r
2894 return 0;\r
2895 }\r
2896 else\r
2897 return 0;\r
2898 }\r
2899\r
2900 if (QUANT_(quant)->greedy == 0)\r
2901 return 0;\r
2902\r
2903 body = NODE_BODY(quant);\r
2904 switch (NODE_TYPE(body)) {\r
2905 case NODE_STRING:\r
2906 {\r
2907 int len;\r
2908 StrNode* sn = STR_(body);\r
2909 UChar *s = sn->s;\r
2910\r
2911 len = 0;\r
2912 while (s < sn->end) {\r
2913 s += enclen(env->enc, s);\r
2914 len++;\r
2915 }\r
2916 if (len != 1)\r
2917 return 0;\r
2918 }\r
2919\r
2920 case NODE_CCLASS:\r
2921 break;\r
2922\r
2923 default:\r
2924 return 0;\r
2925 break;\r
2926 }\r
2927\r
2928 if (node != quant) {\r
2929 NODE_BODY(node) = 0;\r
2930 onig_node_free(node);\r
2931 }\r
2932 NODE_BODY(quant) = NULL_NODE;\r
2933 *rquant = quant;\r
2934 *rbody = body;\r
2935 return 1;\r
2936}\r
2937\r
2938static int\r
2939make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,\r
2940 Node* body, int possessive, ScanEnv* env)\r
2941{\r
2942 int r;\r
2943 int i;\r
2944 int id1;\r
2945 int lower, upper;\r
2946 Node* x;\r
2947 Node* ns[4];\r
2948\r
2949 *node = NULL_NODE;\r
2950 r = ONIGERR_MEMORY;\r
2951 ns[0] = ns[1] = NULL_NODE;\r
2952 ns[2] = body, ns[3] = absent;\r
2953\r
2954 lower = QUANT_(quant)->lower;\r
2955 upper = QUANT_(quant)->upper;\r
2956 onig_node_free(quant);\r
2957\r
2958 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
2959 if (r != 0) goto err;\r
2960\r
2961 id1 = GIMMICK_(ns[0])->id;\r
2962\r
2963 r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,\r
2964 0, env);\r
2965 if (r != 0) goto err;\r
2966\r
2967 ns[2] = ns[3] = NULL_NODE;\r
2968\r
2969 r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,\r
2970 id1, env);\r
2971 if (r != 0) goto err;\r
2972\r
2973 x = make_list(3, ns);\r
2974 if (IS_NULL(x)) goto err0;\r
2975\r
2976 *node = x;\r
2977 return ONIG_NORMAL;\r
2978\r
2979 err0:\r
2980 r = ONIGERR_MEMORY;\r
2981 err:\r
2982 for (i = 0; i < 4; i++) onig_node_free(ns[i]);\r
2983 return r;\r
2984}\r
2985\r
2986static int\r
2987make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,\r
2988 ScanEnv* env)\r
2989{\r
2990 int r;\r
2991 int i;\r
2992 int id1, id2;\r
2993 int possessive;\r
2994 Node* x;\r
2995 Node* ns[7];\r
2996\r
2997 r = ONIGERR_MEMORY;\r
2998 for (i = 0; i < 7; i++) ns[i] = NULL_NODE;\r
2999 ns[4] = expr; ns[5] = absent;\r
3000\r
3001 if (is_range_cutter == 0) {\r
3002 Node* quant;\r
3003 Node* body;\r
3004\r
3005 if (expr == NULL_NODE) {\r
3006 /* default expr \O* */\r
3007 quant = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
3008 if (IS_NULL(quant)) goto err0;\r
3009\r
3010 r = node_new_true_anychar(&body, env);\r
3011 if (r != 0) {\r
3012 onig_node_free(quant);\r
3013 goto err;\r
3014 }\r
3015 possessive = 0;\r
3016 goto simple;\r
3017 }\r
3018 else {\r
3019 if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {\r
3020 simple:\r
3021 r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,\r
3022 body, possessive, env);\r
3023 if (r != 0) {\r
3024 ns[4] = NULL_NODE;\r
3025 onig_node_free(quant);\r
3026 onig_node_free(body);\r
3027 goto err;\r
3028 }\r
3029\r
3030 return ONIG_NORMAL;\r
3031 }\r
3032 }\r
3033 }\r
3034\r
3035 r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);\r
3036 if (r != 0) goto err;\r
3037\r
3038 id1 = GIMMICK_(ns[0])->id;\r
3039\r
3040 r = node_new_save_gimmick(&ns[1], SAVE_S, env);\r
3041 if (r != 0) goto err;\r
3042\r
3043 id2 = GIMMICK_(ns[1])->id;\r
3044\r
3045 r = node_new_true_anychar(&ns[3], env);\r
3046 if (r != 0) goto err;\r
3047\r
3048 possessive = 1;\r
3049 r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,\r
3050 possessive, is_range_cutter, env);\r
3051 if (r != 0) goto err;\r
3052\r
3053 ns[3] = NULL_NODE;\r
3054 ns[5] = NULL_NODE;\r
3055\r
3056 r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);\r
3057 if (r != 0) goto err;\r
3058\r
3059 if (is_range_cutter != 0) {\r
3060 x = make_list(4, ns);\r
3061 if (IS_NULL(x)) goto err0;\r
3062 }\r
3063 else {\r
3064 r = make_absent_tail(&ns[5], &ns[6], id1, env);\r
3065 if (r != 0) goto err;\r
3066 \r
3067 x = make_list(7, ns);\r
3068 if (IS_NULL(x)) goto err0;\r
3069 }\r
3070\r
3071 *node = x;\r
3072 return ONIG_NORMAL;\r
3073\r
3074 err0:\r
3075 r = ONIGERR_MEMORY;\r
3076 err:\r
3077 for (i = 0; i < 7; i++) onig_node_free(ns[i]);\r
3078 return r; \r
3079}\r
3080\r
3081extern int\r
3082onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r
3083{\r
3084 int addlen = (int )(end - s);\r
3085\r
3086 if (addlen > 0) {\r
3087 int len = (int )(STR_(node)->end - STR_(node)->s);\r
3088\r
3089 if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {\r
3090 UChar* p;\r
3091 int capa = len + addlen + NODE_STRING_MARGIN;\r
3092\r
3093 if (capa <= STR_(node)->capa) {\r
3094 onig_strcpy(STR_(node)->s + len, s, end);\r
3095 }\r
3096 else {\r
3097 if (STR_(node)->s == STR_(node)->buf)\r
3098 p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,\r
3099 s, end, capa);\r
3100 else\r
3101 p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capa);\r
3102\r
3103 CHECK_NULL_RETURN_MEMERR(p);\r
3104 STR_(node)->s = p;\r
3105 STR_(node)->capa = capa;\r
3106 }\r
3107 }\r
3108 else {\r
3109 onig_strcpy(STR_(node)->s + len, s, end);\r
3110 }\r
3111 STR_(node)->end = STR_(node)->s + len + addlen;\r
3112 }\r
3113\r
3114 return 0;\r
3115}\r
3116\r
3117extern int\r
3118onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r
3119{\r
3120 onig_node_str_clear(node);\r
3121 return onig_node_str_cat(node, s, end);\r
3122}\r
3123\r
3124static int\r
3125node_str_cat_char(Node* node, UChar c)\r
3126{\r
3127 UChar s[1];\r
3128\r
3129 s[0] = c;\r
3130 return onig_node_str_cat(node, s, s + 1);\r
3131}\r
3132\r
3133extern void\r
3134onig_node_conv_to_str_node(Node* node, int flag)\r
3135{\r
3136 NODE_SET_TYPE(node, NODE_STRING);\r
3137 STR_(node)->flag = flag;\r
3138 STR_(node)->capa = 0;\r
3139 STR_(node)->s = STR_(node)->buf;\r
3140 STR_(node)->end = STR_(node)->buf;\r
3141}\r
3142\r
3143extern void\r
3144onig_node_str_clear(Node* node)\r
3145{\r
3146 if (STR_(node)->capa != 0 &&\r
3147 IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {\r
3148 xfree(STR_(node)->s);\r
3149 }\r
3150\r
3151 STR_(node)->capa = 0;\r
3152 STR_(node)->flag = 0;\r
3153 STR_(node)->s = STR_(node)->buf;\r
3154 STR_(node)->end = STR_(node)->buf;\r
3155}\r
3156\r
3157static Node*\r
3158node_new_str(const UChar* s, const UChar* end)\r
3159{\r
3160 Node* node = node_new();\r
3161 CHECK_NULL_RETURN(node);\r
3162\r
3163 NODE_SET_TYPE(node, NODE_STRING);\r
3164 STR_(node)->capa = 0;\r
3165 STR_(node)->flag = 0;\r
3166 STR_(node)->s = STR_(node)->buf;\r
3167 STR_(node)->end = STR_(node)->buf;\r
3168 if (onig_node_str_cat(node, s, end)) {\r
3169 onig_node_free(node);\r
3170 return NULL;\r
3171 }\r
3172 return node;\r
3173}\r
3174\r
3175extern Node*\r
3176onig_node_new_str(const UChar* s, const UChar* end)\r
3177{\r
3178 return node_new_str(s, end);\r
3179}\r
3180\r
3181static Node*\r
3182node_new_str_raw(UChar* s, UChar* end)\r
3183{\r
3184 Node* node = node_new_str(s, end);\r
a5def177 3185 CHECK_NULL_RETURN(node);\r
b602265d
DG
3186 NODE_STRING_SET_RAW(node);\r
3187 return node;\r
3188}\r
3189\r
3190static Node*\r
3191node_new_empty(void)\r
3192{\r
3193 return node_new_str(NULL, NULL);\r
3194}\r
3195\r
3196static Node*\r
3197node_new_str_raw_char(UChar c)\r
3198{\r
3199 UChar p[1];\r
3200\r
3201 p[0] = c;\r
3202 return node_new_str_raw(p, p + 1);\r
3203}\r
3204\r
3205static Node*\r
3206str_node_split_last_char(Node* node, OnigEncoding enc)\r
3207{\r
3208 const UChar *p;\r
3209 Node* rn;\r
3210 StrNode* sn;\r
3211\r
3212 sn = STR_(node);\r
3213 rn = NULL_NODE;\r
3214 if (sn->end > sn->s) {\r
3215 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r
3216 if (p && p > sn->s) { /* can be split. */\r
3217 rn = node_new_str(p, sn->end);\r
a5def177 3218 CHECK_NULL_RETURN(rn);\r
b602265d
DG
3219 if (NODE_STRING_IS_RAW(node))\r
3220 NODE_STRING_SET_RAW(rn);\r
3221\r
3222 sn->end = (UChar* )p;\r
3223 }\r
3224 }\r
3225 return rn;\r
3226}\r
3227\r
3228static int\r
3229str_node_can_be_split(Node* node, OnigEncoding enc)\r
3230{\r
3231 StrNode* sn = STR_(node);\r
3232 if (sn->end > sn->s) {\r
3233 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r
3234 }\r
3235 return 0;\r
3236}\r
3237\r
3238#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
3239static int\r
3240node_str_head_pad(StrNode* sn, int num, UChar val)\r
3241{\r
3242 UChar buf[NODE_STRING_BUF_SIZE];\r
3243 int i, len;\r
3244\r
3245 len = sn->end - sn->s;\r
3246 onig_strcpy(buf, sn->s, sn->end);\r
3247 onig_strcpy(&(sn->s[num]), buf, buf + len);\r
3248 sn->end += num;\r
3249\r
3250 for (i = 0; i < num; i++) {\r
3251 sn->s[i] = val;\r
3252 }\r
3253}\r
3254#endif\r
3255\r
3256extern int\r
3257onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r
3258{\r
3259 unsigned int num, val;\r
3260 OnigCodePoint c;\r
3261 UChar* p = *src;\r
3262 PFETCH_READY;\r
3263\r
3264 num = 0;\r
3265 while (! PEND) {\r
3266 PFETCH(c);\r
3267 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
3268 val = (unsigned int )DIGITVAL(c);\r
3269 if ((INT_MAX_LIMIT - val) / 10UL < num)\r
3270 return -1; /* overflow */\r
3271\r
3272 num = num * 10 + val;\r
3273 }\r
3274 else {\r
3275 PUNFETCH;\r
3276 break;\r
3277 }\r
3278 }\r
3279 *src = p;\r
3280 return num;\r
3281}\r
3282\r
3283static int\r
3284scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,\r
3285 int maxlen, OnigEncoding enc)\r
3286{\r
3287 OnigCodePoint c;\r
3288 unsigned int num, val;\r
3289 int n;\r
3290 UChar* p = *src;\r
3291 PFETCH_READY;\r
3292\r
3293 num = 0;\r
3294 n = 0;\r
3295 while (! PEND && n < maxlen) {\r
3296 PFETCH(c);\r
3297 if (IS_CODE_XDIGIT_ASCII(enc, c)) {\r
3298 n++;\r
3299 val = (unsigned int )XDIGITVAL(enc,c);\r
3300 if ((INT_MAX_LIMIT - val) / 16UL < num)\r
3301 return ONIGERR_TOO_BIG_NUMBER; /* overflow */\r
3302\r
3303 num = (num << 4) + XDIGITVAL(enc,c);\r
3304 }\r
3305 else {\r
3306 PUNFETCH;\r
3307 break;\r
3308 }\r
3309 }\r
3310\r
3311 if (n < minlen)\r
3312 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
3313\r
3314 *src = p;\r
3315 return num;\r
3316}\r
3317\r
3318static int\r
3319scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r
3320 OnigEncoding enc)\r
3321{\r
3322 OnigCodePoint c;\r
3323 unsigned int num, val;\r
3324 UChar* p = *src;\r
3325 PFETCH_READY;\r
3326\r
3327 num = 0;\r
3328 while (! PEND && maxlen-- != 0) {\r
3329 PFETCH(c);\r
3330 if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {\r
3331 val = ODIGITVAL(c);\r
3332 if ((INT_MAX_LIMIT - val) / 8UL < num)\r
3333 return -1; /* overflow */\r
3334\r
3335 num = (num << 3) + val;\r
3336 }\r
3337 else {\r
3338 PUNFETCH;\r
3339 break;\r
3340 }\r
3341 }\r
3342 *src = p;\r
3343 return num;\r
3344}\r
3345\r
3346\r
3347#define BB_WRITE_CODE_POINT(bbuf,pos,code) \\r
3348 BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r
3349\r
3350/* data format:\r
3351 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r
3352 (all data size is OnigCodePoint)\r
3353 */\r
3354static int\r
3355new_code_range(BBuf** pbuf)\r
3356{\r
3357#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r
3358 int r;\r
3359 OnigCodePoint n;\r
3360 BBuf* bbuf;\r
3361\r
3362 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
3363 CHECK_NULL_RETURN_MEMERR(bbuf);\r
3364 r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r
3365 if (r != 0) {\r
3366 xfree(bbuf);\r
3367 *pbuf = 0;\r
3368 return r;\r
3369 }\r
3370\r
3371 n = 0;\r
3372 BB_WRITE_CODE_POINT(bbuf, 0, n);\r
3373 return 0;\r
3374}\r
3375\r
3376static int\r
3377add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r
3378{\r
3379 int r, inc_n, pos;\r
3380 int low, high, bound, x;\r
3381 OnigCodePoint n, *data;\r
3382 BBuf* bbuf;\r
3383\r
3384 if (from > to) {\r
3385 n = from; from = to; to = n;\r
3386 }\r
3387\r
3388 if (IS_NULL(*pbuf)) {\r
3389 r = new_code_range(pbuf);\r
3390 if (r != 0) return r;\r
3391 bbuf = *pbuf;\r
3392 n = 0;\r
3393 }\r
3394 else {\r
3395 bbuf = *pbuf;\r
3396 GET_CODE_POINT(n, bbuf->p);\r
3397 }\r
3398 data = (OnigCodePoint* )(bbuf->p);\r
3399 data++;\r
3400\r
3401 for (low = 0, bound = n; low < bound; ) {\r
3402 x = (low + bound) >> 1;\r
3403 if (from > data[x*2 + 1])\r
3404 low = x + 1;\r
3405 else\r
3406 bound = x;\r
3407 }\r
3408\r
3409 high = (to == ~((OnigCodePoint )0)) ? n : low;\r
3410 for (bound = n; high < bound; ) {\r
3411 x = (high + bound) >> 1;\r
3412 if (to + 1 >= data[x*2])\r
3413 high = x + 1;\r
3414 else\r
3415 bound = x;\r
3416 }\r
3417\r
3418 inc_n = low + 1 - high;\r
3419 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r
3420 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r
3421\r
3422 if (inc_n != 1) {\r
3423 if (from > data[low*2])\r
3424 from = data[low*2];\r
3425 if (to < data[(high - 1)*2 + 1])\r
3426 to = data[(high - 1)*2 + 1];\r
3427 }\r
3428\r
3429 if (inc_n != 0 && (OnigCodePoint )high < n) {\r
3430 int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r
3431 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r
3432 int size = (n - high) * 2 * SIZE_CODE_POINT;\r
3433\r
3434 if (inc_n > 0) {\r
3435 BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r
3436 }\r
3437 else {\r
3438 BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r
3439 }\r
3440 }\r
3441\r
3442 pos = SIZE_CODE_POINT * (1 + low * 2);\r
3443 BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r
3444 BB_WRITE_CODE_POINT(bbuf, pos, from);\r
3445 BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r
3446 n += inc_n;\r
3447 BB_WRITE_CODE_POINT(bbuf, 0, n);\r
3448\r
3449 return 0;\r
3450}\r
3451\r
3452static int\r
3453add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r
3454{\r
3455 if (from > to) {\r
3456 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
3457 return 0;\r
3458 else\r
3459 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
3460 }\r
3461\r
3462 return add_code_range_to_buf(pbuf, from, to);\r
3463}\r
3464\r
3465static int\r
3466not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r
3467{\r
3468 int r, i, n;\r
3469 OnigCodePoint pre, from, *data, to = 0;\r
3470\r
3471 *pbuf = (BBuf* )NULL;\r
3472 if (IS_NULL(bbuf)) {\r
3473 set_all:\r
3474 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3475 }\r
3476\r
3477 data = (OnigCodePoint* )(bbuf->p);\r
3478 GET_CODE_POINT(n, data);\r
3479 data++;\r
3480 if (n <= 0) goto set_all;\r
3481\r
3482 r = 0;\r
3483 pre = MBCODE_START_POS(enc);\r
3484 for (i = 0; i < n; i++) {\r
3485 from = data[i*2];\r
3486 to = data[i*2+1];\r
3487 if (pre <= from - 1) {\r
3488 r = add_code_range_to_buf(pbuf, pre, from - 1);\r
3489 if (r != 0) return r;\r
3490 }\r
3491 if (to == ~((OnigCodePoint )0)) break;\r
3492 pre = to + 1;\r
3493 }\r
3494 if (to < ~((OnigCodePoint )0)) {\r
3495 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r
3496 }\r
3497 return r;\r
3498}\r
3499\r
3500#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\\r
3501 BBuf *tbuf; \\r
3502 int tnot; \\r
3503 tnot = not1; not1 = not2; not2 = tnot; \\r
3504 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r
3505} while (0)\r
3506\r
3507static int\r
3508or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r
3509 BBuf* bbuf2, int not2, BBuf** pbuf)\r
3510{\r
3511 int r;\r
3512 OnigCodePoint i, n1, *data1;\r
3513 OnigCodePoint from, to;\r
3514\r
3515 *pbuf = (BBuf* )NULL;\r
3516 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r
3517 if (not1 != 0 || not2 != 0)\r
3518 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3519 return 0;\r
3520 }\r
3521\r
3522 r = 0;\r
3523 if (IS_NULL(bbuf2))\r
3524 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3525\r
3526 if (IS_NULL(bbuf1)) {\r
3527 if (not1 != 0) {\r
3528 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
3529 }\r
3530 else {\r
3531 if (not2 == 0) {\r
3532 return bbuf_clone(pbuf, bbuf2);\r
3533 }\r
3534 else {\r
3535 return not_code_range_buf(enc, bbuf2, pbuf);\r
3536 }\r
3537 }\r
3538 }\r
3539\r
3540 if (not1 != 0)\r
3541 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3542\r
3543 data1 = (OnigCodePoint* )(bbuf1->p);\r
3544 GET_CODE_POINT(n1, data1);\r
3545 data1++;\r
3546\r
3547 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r
3548 r = bbuf_clone(pbuf, bbuf2);\r
3549 }\r
3550 else if (not1 == 0) { /* 1 OR (not 2) */\r
3551 r = not_code_range_buf(enc, bbuf2, pbuf);\r
3552 }\r
3553 if (r != 0) return r;\r
3554\r
3555 for (i = 0; i < n1; i++) {\r
3556 from = data1[i*2];\r
3557 to = data1[i*2+1];\r
3558 r = add_code_range_to_buf(pbuf, from, to);\r
3559 if (r != 0) return r;\r
3560 }\r
3561 return 0;\r
3562}\r
3563\r
3564static int\r
3565and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r
3566 OnigCodePoint* data, int n)\r
3567{\r
3568 int i, r;\r
3569 OnigCodePoint from2, to2;\r
3570\r
3571 for (i = 0; i < n; i++) {\r
3572 from2 = data[i*2];\r
3573 to2 = data[i*2+1];\r
3574 if (from2 < from1) {\r
3575 if (to2 < from1) continue;\r
3576 else {\r
3577 from1 = to2 + 1;\r
3578 }\r
3579 }\r
3580 else if (from2 <= to1) {\r
3581 if (to2 < to1) {\r
3582 if (from1 <= from2 - 1) {\r
3583 r = add_code_range_to_buf(pbuf, from1, from2-1);\r
3584 if (r != 0) return r;\r
3585 }\r
3586 from1 = to2 + 1;\r
3587 }\r
3588 else {\r
3589 to1 = from2 - 1;\r
3590 }\r
3591 }\r
3592 else {\r
3593 from1 = from2;\r
3594 }\r
3595 if (from1 > to1) break;\r
3596 }\r
3597 if (from1 <= to1) {\r
3598 r = add_code_range_to_buf(pbuf, from1, to1);\r
3599 if (r != 0) return r;\r
3600 }\r
3601 return 0;\r
3602}\r
3603\r
3604static int\r
3605and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r
3606{\r
3607 int r;\r
3608 OnigCodePoint i, j, n1, n2, *data1, *data2;\r
3609 OnigCodePoint from, to, from1, to1, from2, to2;\r
3610\r
3611 *pbuf = (BBuf* )NULL;\r
3612 if (IS_NULL(bbuf1)) {\r
3613 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r
3614 return bbuf_clone(pbuf, bbuf2);\r
3615 return 0;\r
3616 }\r
3617 else if (IS_NULL(bbuf2)) {\r
3618 if (not2 != 0)\r
3619 return bbuf_clone(pbuf, bbuf1);\r
3620 return 0;\r
3621 }\r
3622\r
3623 if (not1 != 0)\r
3624 SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);\r
3625\r
3626 data1 = (OnigCodePoint* )(bbuf1->p);\r
3627 data2 = (OnigCodePoint* )(bbuf2->p);\r
3628 GET_CODE_POINT(n1, data1);\r
3629 GET_CODE_POINT(n2, data2);\r
3630 data1++;\r
3631 data2++;\r
3632\r
3633 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r
3634 for (i = 0; i < n1; i++) {\r
3635 from1 = data1[i*2];\r
3636 to1 = data1[i*2+1];\r
3637 for (j = 0; j < n2; j++) {\r
3638 from2 = data2[j*2];\r
3639 to2 = data2[j*2+1];\r
3640 if (from2 > to1) break;\r
3641 if (to2 < from1) continue;\r
3642 from = MAX(from1, from2);\r
3643 to = MIN(to1, to2);\r
3644 r = add_code_range_to_buf(pbuf, from, to);\r
3645 if (r != 0) return r;\r
3646 }\r
3647 }\r
3648 }\r
3649 else if (not1 == 0) { /* 1 AND (not 2) */\r
3650 for (i = 0; i < n1; i++) {\r
3651 from1 = data1[i*2];\r
3652 to1 = data1[i*2+1];\r
14b0e578
CS
3653 r = and_code_range1(pbuf, from1, to1, data2, n2);\r
3654 if (r != 0) return r;\r
3655 }\r
3656 }\r
3657\r
3658 return 0;\r
3659}\r
3660\r
3661static int\r
3662and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
3663{\r
3664 int r, not1, not2;\r
3665 BBuf *buf1, *buf2, *pbuf;\r
3666 BitSetRef bsr1, bsr2;\r
3667 BitSet bs1, bs2;\r
3668\r
3669 not1 = IS_NCCLASS_NOT(dest);\r
3670 bsr1 = dest->bs;\r
3671 buf1 = dest->mbuf;\r
3672 not2 = IS_NCCLASS_NOT(cc);\r
3673 bsr2 = cc->bs;\r
3674 buf2 = cc->mbuf;\r
3675\r
3676 if (not1 != 0) {\r
3677 bitset_invert_to(bsr1, bs1);\r
3678 bsr1 = bs1;\r
3679 }\r
3680 if (not2 != 0) {\r
3681 bitset_invert_to(bsr2, bs2);\r
3682 bsr2 = bs2;\r
3683 }\r
3684 bitset_and(bsr1, bsr2);\r
3685 if (bsr1 != dest->bs) {\r
3686 bitset_copy(dest->bs, bsr1);\r
14b0e578
CS
3687 }\r
3688 if (not1 != 0) {\r
3689 bitset_invert(dest->bs);\r
3690 }\r
3691\r
3692 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
3693 if (not1 != 0 && not2 != 0) {\r
3694 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r
3695 }\r
3696 else {\r
3697 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r
3698 if (r == 0 && not1 != 0) {\r
b602265d
DG
3699 BBuf *tbuf;\r
3700 r = not_code_range_buf(enc, pbuf, &tbuf);\r
3701 if (r != 0) {\r
3702 bbuf_free(pbuf);\r
3703 return r;\r
3704 }\r
3705 bbuf_free(pbuf);\r
3706 pbuf = tbuf;\r
14b0e578
CS
3707 }\r
3708 }\r
3709 if (r != 0) return r;\r
3710\r
3711 dest->mbuf = pbuf;\r
3712 bbuf_free(buf1);\r
3713 return r;\r
3714 }\r
3715 return 0;\r
3716}\r
3717\r
3718static int\r
3719or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
3720{\r
3721 int r, not1, not2;\r
3722 BBuf *buf1, *buf2, *pbuf;\r
3723 BitSetRef bsr1, bsr2;\r
3724 BitSet bs1, bs2;\r
3725\r
3726 not1 = IS_NCCLASS_NOT(dest);\r
3727 bsr1 = dest->bs;\r
3728 buf1 = dest->mbuf;\r
3729 not2 = IS_NCCLASS_NOT(cc);\r
3730 bsr2 = cc->bs;\r
3731 buf2 = cc->mbuf;\r
3732\r
3733 if (not1 != 0) {\r
3734 bitset_invert_to(bsr1, bs1);\r
3735 bsr1 = bs1;\r
3736 }\r
3737 if (not2 != 0) {\r
3738 bitset_invert_to(bsr2, bs2);\r
3739 bsr2 = bs2;\r
3740 }\r
3741 bitset_or(bsr1, bsr2);\r
3742 if (bsr1 != dest->bs) {\r
3743 bitset_copy(dest->bs, bsr1);\r
14b0e578
CS
3744 }\r
3745 if (not1 != 0) {\r
3746 bitset_invert(dest->bs);\r
3747 }\r
3748\r
3749 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
3750 if (not1 != 0 && not2 != 0) {\r
3751 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r
3752 }\r
3753 else {\r
3754 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r
3755 if (r == 0 && not1 != 0) {\r
b602265d
DG
3756 BBuf *tbuf;\r
3757 r = not_code_range_buf(enc, pbuf, &tbuf);\r
3758 if (r != 0) {\r
3759 bbuf_free(pbuf);\r
3760 return r;\r
3761 }\r
3762 bbuf_free(pbuf);\r
3763 pbuf = tbuf;\r
14b0e578
CS
3764 }\r
3765 }\r
3766 if (r != 0) return r;\r
3767\r
3768 dest->mbuf = pbuf;\r
3769 bbuf_free(buf1);\r
3770 return r;\r
3771 }\r
3772 else\r
3773 return 0;\r
3774}\r
3775\r
b602265d
DG
3776static OnigCodePoint\r
3777conv_backslash_value(OnigCodePoint c, ScanEnv* env)\r
14b0e578
CS
3778{\r
3779 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r
3780 switch (c) {\r
3781 case 'n': return '\n';\r
3782 case 't': return '\t';\r
3783 case 'r': return '\r';\r
3784 case 'f': return '\f';\r
3785 case 'a': return '\007';\r
3786 case 'b': return '\010';\r
3787 case 'e': return '\033';\r
3788 case 'v':\r
3789 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r
b602265d 3790 return '\v';\r
14b0e578
CS
3791 break;\r
3792\r
3793 default:\r
3794 break;\r
3795 }\r
3796 }\r
3797 return c;\r
3798}\r
3799\r
3800static int\r
3801is_invalid_quantifier_target(Node* node)\r
3802{\r
b602265d
DG
3803 switch (NODE_TYPE(node)) {\r
3804 case NODE_ANCHOR:\r
3805 case NODE_GIMMICK:\r
14b0e578
CS
3806 return 1;\r
3807 break;\r
3808\r
b602265d 3809 case NODE_ENCLOSURE:\r
14b0e578 3810 /* allow enclosed elements */\r
b602265d 3811 /* return is_invalid_quantifier_target(NODE_BODY(node)); */\r
14b0e578
CS
3812 break;\r
3813\r
b602265d 3814 case NODE_LIST:\r
14b0e578 3815 do {\r
b602265d
DG
3816 if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;\r
3817 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3818 return 0;\r
3819 break;\r
3820\r
b602265d 3821 case NODE_ALT:\r
14b0e578 3822 do {\r
b602265d
DG
3823 if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;\r
3824 } while (IS_NOT_NULL(node = NODE_CDR(node)));\r
14b0e578
CS
3825 break;\r
3826\r
3827 default:\r
3828 break;\r
3829 }\r
3830 return 0;\r
3831}\r
3832\r
3833/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r
3834static int\r
b602265d 3835quantifier_type_num(QuantNode* q)\r
14b0e578
CS
3836{\r
3837 if (q->greedy) {\r
3838 if (q->lower == 0) {\r
3839 if (q->upper == 1) return 0;\r
3840 else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r
3841 }\r
3842 else if (q->lower == 1) {\r
3843 if (IS_REPEAT_INFINITE(q->upper)) return 2;\r
3844 }\r
3845 }\r
3846 else {\r
3847 if (q->lower == 0) {\r
3848 if (q->upper == 1) return 3;\r
3849 else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r
3850 }\r
3851 else if (q->lower == 1) {\r
3852 if (IS_REPEAT_INFINITE(q->upper)) return 5;\r
3853 }\r
3854 }\r
3855 return -1;\r
3856}\r
3857\r
3858\r
3859enum ReduceType {\r
3860 RQ_ASIS = 0, /* as is */\r
3861 RQ_DEL = 1, /* delete parent */\r
3862 RQ_A, /* to '*' */\r
3863 RQ_AQ, /* to '*?' */\r
3864 RQ_QQ, /* to '??' */\r
3865 RQ_P_QQ, /* to '+)??' */\r
3866 RQ_PQ_Q /* to '+?)?' */\r
3867};\r
3868\r
3869static enum ReduceType ReduceTypeTable[6][6] = {\r
3870 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r
3871 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r
3872 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r
3873 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r
3874 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r
3875 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r
3876};\r
3877\r
3878extern void\r
3879onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r
3880{\r
3881 int pnum, cnum;\r
b602265d
DG
3882 QuantNode *p, *c;\r
3883\r
3884 p = QUANT_(pnode);\r
3885 c = QUANT_(cnode);\r
3886 pnum = quantifier_type_num(p);\r
3887 cnum = quantifier_type_num(c);\r
3888 if (pnum < 0 || cnum < 0) {\r
3889 if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {\r
3890 if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {\r
3891 int n = positive_int_multiply(p->lower, c->lower);\r
3892 if (n >= 0) {\r
3893 p->lower = p->upper = n;\r
3894 NODE_BODY(pnode) = NODE_BODY(cnode);\r
3895 goto remove_cnode;\r
3896 }\r
3897 }\r
3898 }\r
14b0e578 3899\r
b602265d
DG
3900 return ;\r
3901 }\r
14b0e578
CS
3902\r
3903 switch(ReduceTypeTable[cnum][pnum]) {\r
3904 case RQ_DEL:\r
b602265d 3905 *pnode = *cnode;\r
14b0e578
CS
3906 break;\r
3907 case RQ_A:\r
b602265d 3908 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3909 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r
3910 break;\r
3911 case RQ_AQ:\r
b602265d 3912 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3913 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r
3914 break;\r
3915 case RQ_QQ:\r
b602265d 3916 NODE_BODY(pnode) = NODE_BODY(cnode);\r
14b0e578
CS
3917 p->lower = 0; p->upper = 1; p->greedy = 0;\r
3918 break;\r
3919 case RQ_P_QQ:\r
b602265d 3920 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3921 p->lower = 0; p->upper = 1; p->greedy = 0;\r
3922 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r
3923 return ;\r
3924 break;\r
3925 case RQ_PQ_Q:\r
b602265d 3926 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3927 p->lower = 0; p->upper = 1; p->greedy = 1;\r
3928 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r
3929 return ;\r
3930 break;\r
3931 case RQ_ASIS:\r
b602265d 3932 NODE_BODY(pnode) = cnode;\r
14b0e578
CS
3933 return ;\r
3934 break;\r
3935 }\r
3936\r
b602265d
DG
3937 remove_cnode:\r
3938 NODE_BODY(cnode) = NULL_NODE;\r
14b0e578
CS
3939 onig_node_free(cnode);\r
3940}\r
3941\r
b602265d
DG
3942static int\r
3943node_new_general_newline(Node** node, ScanEnv* env)\r
3944{\r
3945 int r;\r
3946 int dlen, alen;\r
3947 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];\r
3948 Node* crnl;\r
3949 Node* ncc;\r
3950 Node* x;\r
3951 CClassNode* cc;\r
3952\r
3953 dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);\r
3954 if (dlen < 0) return dlen;\r
3955 alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);\r
3956 if (alen < 0) return alen;\r
3957\r
3958 crnl = node_new_str_raw(buf, buf + dlen + alen);\r
3959 CHECK_NULL_RETURN_MEMERR(crnl);\r
3960\r
3961 ncc = node_new_cclass();\r
3962 if (IS_NULL(ncc)) goto err2;\r
3963\r
3964 cc = CCLASS_(ncc);\r
3965 if (dlen == 1) {\r
3966 bitset_set_range(cc->bs, 0x0a, 0x0d);\r
3967 }\r
3968 else {\r
3969 r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d);\r
3970 if (r != 0) {\r
3971 err1:\r
3972 onig_node_free(ncc);\r
3973 err2:\r
3974 onig_node_free(crnl);\r
3975 return ONIGERR_MEMORY;\r
3976 }\r
3977 }\r
3978\r
3979 if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {\r
3980 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);\r
3981 if (r != 0) goto err1;\r
3982 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);\r
3983 if (r != 0) goto err1;\r
3984 }\r
3985\r
3986 x = node_new_enclosure_if_else(crnl, 0, ncc);\r
3987 if (IS_NULL(x)) goto err1;\r
3988\r
3989 *node = x;\r
3990 return 0;\r
3991}\r
14b0e578
CS
3992\r
3993enum TokenSyms {\r
3994 TK_EOT = 0, /* end of token */\r
3995 TK_RAW_BYTE = 1,\r
3996 TK_CHAR,\r
3997 TK_STRING,\r
3998 TK_CODE_POINT,\r
3999 TK_ANYCHAR,\r
4000 TK_CHAR_TYPE,\r
4001 TK_BACKREF,\r
4002 TK_CALL,\r
4003 TK_ANCHOR,\r
4004 TK_OP_REPEAT,\r
4005 TK_INTERVAL,\r
4006 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r
4007 TK_ALT,\r
4008 TK_SUBEXP_OPEN,\r
4009 TK_SUBEXP_CLOSE,\r
4010 TK_CC_OPEN,\r
4011 TK_QUOTE_OPEN,\r
4012 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r
b602265d
DG
4013 TK_KEEP, /* \K */\r
4014 TK_GENERAL_NEWLINE, /* \R */\r
4015 TK_NO_NEWLINE, /* \N */\r
4016 TK_TRUE_ANYCHAR, /* \O */\r
4017 TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */\r
4018\r
14b0e578
CS
4019 /* in cc */\r
4020 TK_CC_CLOSE,\r
4021 TK_CC_RANGE,\r
4022 TK_POSIX_BRACKET_OPEN,\r
4023 TK_CC_AND, /* && */\r
4024 TK_CC_CC_OPEN /* [ */\r
4025};\r
4026\r
4027typedef struct {\r
4028 enum TokenSyms type;\r
4029 int escaped;\r
4030 int base; /* is number: 8, 16 (used in [....]) */\r
4031 UChar* backp;\r
4032 union {\r
4033 UChar* s;\r
4034 int c;\r
4035 OnigCodePoint code;\r
4036 int anchor;\r
4037 int subtype;\r
4038 struct {\r
4039 int lower;\r
4040 int upper;\r
4041 int greedy;\r
4042 int possessive;\r
4043 } repeat;\r
4044 struct {\r
4045 int num;\r
4046 int ref1;\r
4047 int* refs;\r
4048 int by_name;\r
4049#ifdef USE_BACKREF_WITH_LEVEL\r
4050 int exist_level;\r
4051 int level; /* \k<name+n> */\r
4052#endif\r
4053 } backref;\r
4054 struct {\r
4055 UChar* name;\r
4056 UChar* name_end;\r
4057 int gnum;\r
b602265d 4058 int by_number;\r
14b0e578
CS
4059 } call;\r
4060 struct {\r
4061 int ctype;\r
4062 int not;\r
4063 } prop;\r
4064 } u;\r
4065} OnigToken;\r
4066\r
4067\r
4068static int\r
4069fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r
4070{\r
4071 int low, up, syn_allow, non_low = 0;\r
4072 int r = 0;\r
4073 OnigCodePoint c;\r
4074 OnigEncoding enc = env->enc;\r
4075 UChar* p = *src;\r
4076 PFETCH_READY;\r
4077\r
4078 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r
4079\r
4080 if (PEND) {\r
4081 if (syn_allow)\r
4082 return 1; /* "....{" : OK! */\r
4083 else\r
4084 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r
4085 }\r
4086\r
4087 if (! syn_allow) {\r
4088 c = PPEEK;\r
4089 if (c == ')' || c == '(' || c == '|') {\r
4090 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r
4091 }\r
4092 }\r
4093\r
4094 low = onig_scan_unsigned_number(&p, end, env->enc);\r
4095 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4096 if (low > ONIG_MAX_REPEAT_NUM)\r
4097 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4098\r
4099 if (p == *src) { /* can't read low */\r
4100 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r
4101 /* allow {,n} as {0,n} */\r
4102 low = 0;\r
4103 non_low = 1;\r
4104 }\r
4105 else\r
4106 goto invalid;\r
4107 }\r
4108\r
4109 if (PEND) goto invalid;\r
4110 PFETCH(c);\r
4111 if (c == ',') {\r
4112 UChar* prev = p;\r
4113 up = onig_scan_unsigned_number(&p, end, env->enc);\r
4114 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4115 if (up > ONIG_MAX_REPEAT_NUM)\r
4116 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
4117\r
4118 if (p == prev) {\r
4119 if (non_low != 0)\r
b602265d 4120 goto invalid;\r
14b0e578
CS
4121 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r
4122 }\r
4123 }\r
4124 else {\r
4125 if (non_low != 0)\r
4126 goto invalid;\r
4127\r
4128 PUNFETCH;\r
4129 up = low; /* {n} : exact n times */\r
4130 r = 2; /* fixed */\r
4131 }\r
4132\r
4133 if (PEND) goto invalid;\r
4134 PFETCH(c);\r
4135 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r
4136 if (c != MC_ESC(env->syntax)) goto invalid;\r
4137 PFETCH(c);\r
4138 }\r
4139 if (c != '}') goto invalid;\r
4140\r
4141 if (!IS_REPEAT_INFINITE(up) && low > up) {\r
4142 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r
4143 }\r
4144\r
4145 tok->type = TK_INTERVAL;\r
4146 tok->u.repeat.lower = low;\r
4147 tok->u.repeat.upper = up;\r
4148 *src = p;\r
4149 return r; /* 0: normal {n,m}, 2: fixed {n} */\r
4150\r
4151 invalid:\r
b602265d
DG
4152 if (syn_allow) {\r
4153 /* *src = p; */ /* !!! Don't do this line !!! */\r
14b0e578 4154 return 1; /* OK */\r
b602265d 4155 }\r
14b0e578
CS
4156 else\r
4157 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r
4158}\r
4159\r
4160/* \M-, \C-, \c, or \... */\r
4161static int\r
b602265d 4162fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)\r
14b0e578
CS
4163{\r
4164 int v;\r
4165 OnigCodePoint c;\r
4166 OnigEncoding enc = env->enc;\r
4167 UChar* p = *src;\r
4168\r
4169 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4170\r
4171 PFETCH_S(c);\r
4172 switch (c) {\r
4173 case 'M':\r
4174 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r
4175 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
4176 PFETCH_S(c);\r
4177 if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r
4178 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
4179 PFETCH_S(c);\r
4180 if (c == MC_ESC(env->syntax)) {\r
b602265d 4181 v = fetch_escaped_value(&p, end, env, &c);\r
14b0e578 4182 if (v < 0) return v;\r
14b0e578
CS
4183 }\r
4184 c = ((c & 0xff) | 0x80);\r
4185 }\r
4186 else\r
4187 goto backslash;\r
4188 break;\r
4189\r
4190 case 'C':\r
4191 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r
4192 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
4193 PFETCH_S(c);\r
4194 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r
4195 goto control;\r
4196 }\r
4197 else\r
4198 goto backslash;\r
4199\r
4200 case 'c':\r
4201 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r
4202 control:\r
4203 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
4204 PFETCH_S(c);\r
4205 if (c == '?') {\r
4206 c = 0177;\r
4207 }\r
4208 else {\r
4209 if (c == MC_ESC(env->syntax)) {\r
b602265d 4210 v = fetch_escaped_value(&p, end, env, &c);\r
14b0e578 4211 if (v < 0) return v;\r
14b0e578
CS
4212 }\r
4213 c &= 0x9f;\r
4214 }\r
4215 break;\r
4216 }\r
4217 /* fall through */\r
4218\r
4219 default:\r
4220 {\r
4221 backslash:\r
4222 c = conv_backslash_value(c, env);\r
4223 }\r
4224 break;\r
4225 }\r
4226\r
4227 *src = p;\r
b602265d
DG
4228 *val = c;\r
4229 return 0;\r
14b0e578
CS
4230}\r
4231\r
4232static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r
4233\r
4234static OnigCodePoint\r
4235get_name_end_code_point(OnigCodePoint start)\r
4236{\r
4237 switch (start) {\r
b602265d 4238 case '<': return (OnigCodePoint )'>'; break;\r
14b0e578 4239 case '\'': return (OnigCodePoint )'\''; break;\r
b602265d 4240 case '(': return (OnigCodePoint )')'; break;\r
14b0e578
CS
4241 default:\r
4242 break;\r
4243 }\r
4244\r
4245 return (OnigCodePoint )0;\r
4246}\r
4247\r
b602265d
DG
4248enum REF_NUM {\r
4249 IS_NOT_NUM = 0,\r
4250 IS_ABS_NUM = 1,\r
4251 IS_REL_NUM = 2\r
4252};\r
4253\r
14b0e578
CS
4254#ifdef USE_BACKREF_WITH_LEVEL\r
4255/*\r
4256 \k<name+n>, \k<name-n>\r
4257 \k<num+n>, \k<num-n>\r
4258 \k<-num+n>, \k<-num-n>\r
b602265d 4259 \k<+num+n>, \k<+num-n>\r
14b0e578
CS
4260*/\r
4261static int\r
4262fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r
b602265d
DG
4263 UChar** rname_end, ScanEnv* env,\r
4264 int* rback_num, int* rlevel, enum REF_NUM* num_type)\r
14b0e578 4265{\r
b602265d
DG
4266 int r, sign, exist_level;\r
4267 int digit_count;\r
14b0e578
CS
4268 OnigCodePoint end_code;\r
4269 OnigCodePoint c = 0;\r
4270 OnigEncoding enc = env->enc;\r
4271 UChar *name_end;\r
4272 UChar *pnum_head;\r
4273 UChar *p = *src;\r
4274 PFETCH_READY;\r
4275\r
4276 *rback_num = 0;\r
b602265d
DG
4277 exist_level = 0;\r
4278 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4279 sign = 1;\r
4280 pnum_head = *src;\r
4281\r
4282 end_code = get_name_end_code_point(start_code);\r
4283\r
b602265d 4284 digit_count = 0;\r
14b0e578
CS
4285 name_end = end;\r
4286 r = 0;\r
4287 if (PEND) {\r
4288 return ONIGERR_EMPTY_GROUP_NAME;\r
4289 }\r
4290 else {\r
4291 PFETCH(c);\r
4292 if (c == end_code)\r
4293 return ONIGERR_EMPTY_GROUP_NAME;\r
4294\r
b602265d
DG
4295 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4296 *num_type = IS_ABS_NUM;\r
4297 digit_count++;\r
14b0e578
CS
4298 }\r
4299 else if (c == '-') {\r
b602265d 4300 *num_type = IS_REL_NUM;\r
14b0e578
CS
4301 sign = -1;\r
4302 pnum_head = p;\r
4303 }\r
b602265d
DG
4304 else if (c == '+') {\r
4305 *num_type = IS_REL_NUM;\r
4306 sign = 1;\r
4307 pnum_head = p;\r
4308 }\r
14b0e578
CS
4309 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4310 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4311 }\r
4312 }\r
4313\r
4314 while (!PEND) {\r
4315 name_end = p;\r
4316 PFETCH(c);\r
4317 if (c == end_code || c == ')' || c == '+' || c == '-') {\r
b602265d
DG
4318 if (*num_type != IS_NOT_NUM && digit_count == 0)\r
4319 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4320 break;\r
4321 }\r
4322\r
b602265d
DG
4323 if (*num_type != IS_NOT_NUM) {\r
4324 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4325 digit_count++;\r
14b0e578
CS
4326 }\r
4327 else {\r
4328 r = ONIGERR_INVALID_GROUP_NAME;\r
b602265d 4329 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4330 }\r
4331 }\r
4332 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4333 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4334 }\r
4335 }\r
4336\r
4337 if (r == 0 && c != end_code) {\r
4338 if (c == '+' || c == '-') {\r
4339 int level;\r
4340 int flag = (c == '-' ? -1 : 1);\r
4341\r
b602265d
DG
4342 if (PEND) {\r
4343 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4344 goto end;\r
4345 }\r
14b0e578 4346 PFETCH(c);\r
b602265d 4347 if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;\r
14b0e578
CS
4348 PUNFETCH;\r
4349 level = onig_scan_unsigned_number(&p, end, enc);\r
4350 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r
4351 *rlevel = (level * flag);\r
4352 exist_level = 1;\r
4353\r
b602265d
DG
4354 if (!PEND) {\r
4355 PFETCH(c);\r
4356 if (c == end_code)\r
4357 goto end;\r
4358 }\r
14b0e578
CS
4359 }\r
4360\r
4361 err:\r
14b0e578 4362 name_end = end;\r
b602265d
DG
4363 err2:\r
4364 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4365 }\r
4366\r
4367 end:\r
4368 if (r == 0) {\r
b602265d 4369 if (*num_type != IS_NOT_NUM) {\r
14b0e578
CS
4370 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
4371 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
b602265d
DG
4372 else if (*rback_num == 0) {\r
4373 if (*num_type == IS_REL_NUM)\r
4374 goto err2;\r
4375 }\r
14b0e578
CS
4376\r
4377 *rback_num *= sign;\r
4378 }\r
4379\r
4380 *rname_end = name_end;\r
4381 *src = p;\r
4382 return (exist_level ? 1 : 0);\r
4383 }\r
4384 else {\r
4385 onig_scan_env_set_error_string(env, r, *src, name_end);\r
4386 return r;\r
4387 }\r
4388}\r
4389#endif /* USE_BACKREF_WITH_LEVEL */\r
4390\r
4391/*\r
b602265d 4392 ref: 0 -> define name (don't allow number name)\r
14b0e578
CS
4393 1 -> reference name (allow number name)\r
4394*/\r
4395static int\r
4396fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
b602265d
DG
4397 UChar** rname_end, ScanEnv* env, int* rback_num,\r
4398 enum REF_NUM* num_type, int ref)\r
14b0e578 4399{\r
b602265d
DG
4400 int r, sign;\r
4401 int digit_count;\r
14b0e578
CS
4402 OnigCodePoint end_code;\r
4403 OnigCodePoint c = 0;\r
4404 OnigEncoding enc = env->enc;\r
4405 UChar *name_end;\r
4406 UChar *pnum_head;\r
4407 UChar *p = *src;\r
4408\r
4409 *rback_num = 0;\r
4410\r
4411 end_code = get_name_end_code_point(start_code);\r
4412\r
b602265d 4413 digit_count = 0;\r
14b0e578
CS
4414 name_end = end;\r
4415 pnum_head = *src;\r
4416 r = 0;\r
b602265d 4417 *num_type = IS_NOT_NUM;\r
14b0e578
CS
4418 sign = 1;\r
4419 if (PEND) {\r
4420 return ONIGERR_EMPTY_GROUP_NAME;\r
4421 }\r
4422 else {\r
4423 PFETCH_S(c);\r
4424 if (c == end_code)\r
4425 return ONIGERR_EMPTY_GROUP_NAME;\r
4426\r
b602265d 4427 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
14b0e578 4428 if (ref == 1)\r
b602265d 4429 *num_type = IS_ABS_NUM;\r
14b0e578
CS
4430 else {\r
4431 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578 4432 }\r
b602265d 4433 digit_count++;\r
14b0e578
CS
4434 }\r
4435 else if (c == '-') {\r
4436 if (ref == 1) {\r
b602265d 4437 *num_type = IS_REL_NUM;\r
14b0e578
CS
4438 sign = -1;\r
4439 pnum_head = p;\r
4440 }\r
4441 else {\r
4442 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578
CS
4443 }\r
4444 }\r
b602265d
DG
4445 else if (c == '+') {\r
4446 if (ref == 1) {\r
4447 *num_type = IS_REL_NUM;\r
4448 sign = 1;\r
4449 pnum_head = p;\r
14b0e578
CS
4450 }\r
4451 else {\r
14b0e578 4452 r = ONIGERR_INVALID_GROUP_NAME;\r
14b0e578 4453 }\r
14b0e578 4454 }\r
b602265d 4455 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
14b0e578 4456 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
b602265d 4457 }\r
14b0e578
CS
4458 }\r
4459\r
4460 if (r == 0) {\r
b602265d
DG
4461 while (!PEND) {\r
4462 name_end = p;\r
4463 PFETCH_S(c);\r
4464 if (c == end_code || c == ')') {\r
4465 if (*num_type != IS_NOT_NUM && digit_count == 0)\r
4466 r = ONIGERR_INVALID_GROUP_NAME;\r
4467 break;\r
4468 }\r
4469\r
4470 if (*num_type != IS_NOT_NUM) {\r
4471 if (IS_CODE_DIGIT_ASCII(enc, c)) {\r
4472 digit_count++;\r
4473 }\r
4474 else {\r
4475 if (!ONIGENC_IS_CODE_WORD(enc, c))\r
4476 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4477 else\r
4478 r = ONIGERR_INVALID_GROUP_NAME;\r
4479\r
4480 *num_type = IS_NOT_NUM;\r
4481 }\r
4482 }\r
4483 else {\r
4484 if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
4485 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
4486 }\r
4487 }\r
4488 }\r
4489\r
4490 if (c != end_code) {\r
14b0e578
CS
4491 r = ONIGERR_INVALID_GROUP_NAME;\r
4492 goto err;\r
4493 }\r
b602265d
DG
4494\r
4495 if (*num_type != IS_NOT_NUM) {\r
4496 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
4497 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
4498 else if (*rback_num == 0) {\r
4499 if (*num_type == IS_REL_NUM) {\r
4500 r = ONIGERR_INVALID_GROUP_NAME;\r
4501 goto err;\r
4502 }\r
4503 }\r
4504\r
4505 *rback_num *= sign;\r
4506 }\r
14b0e578
CS
4507\r
4508 *rname_end = name_end;\r
4509 *src = p;\r
4510 return 0;\r
4511 }\r
4512 else {\r
b602265d
DG
4513 while (!PEND) {\r
4514 name_end = p;\r
4515 PFETCH_S(c);\r
4516 if (c == end_code || c == ')')\r
4517 break;\r
4518 }\r
4519 if (PEND)\r
4520 name_end = end;\r
4521\r
14b0e578
CS
4522 err:\r
4523 onig_scan_env_set_error_string(env, r, *src, name_end);\r
4524 return r;\r
4525 }\r
4526}\r
14b0e578
CS
4527\r
4528static void\r
4529CC_ESC_WARN(ScanEnv* env, UChar *c)\r
4530{\r
4531 if (onig_warn == onig_null_warn) return ;\r
4532\r
4533 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r
4534 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r
4535 UChar buf[WARN_BUFSIZE];\r
4536 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
b602265d
DG
4537 env->pattern, env->pattern_end,\r
4538 (UChar* )"character class has '%s' without escape",\r
4539 c);\r
14b0e578
CS
4540 (*onig_warn)((char* )buf);\r
4541 }\r
4542}\r
4543\r
4544static void\r
4545CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r
4546{\r
4547 if (onig_warn == onig_null_warn) return ;\r
4548\r
4549 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r
4550 UChar buf[WARN_BUFSIZE];\r
4551 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r
b602265d
DG
4552 (env)->pattern, (env)->pattern_end,\r
4553 (UChar* )"regular expression has '%s' without escape", c);\r
14b0e578
CS
4554 (*onig_warn)((char* )buf);\r
4555 }\r
4556}\r
4557\r
4558static UChar*\r
4559find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
b602265d 4560 UChar **next, OnigEncoding enc)\r
14b0e578
CS
4561{\r
4562 int i;\r
4563 OnigCodePoint x;\r
4564 UChar *q;\r
4565 UChar *p = from;\r
4566 \r
4567 while (p < to) {\r
4568 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4569 q = p + enclen(enc, p);\r
4570 if (x == s[0]) {\r
4571 for (i = 1; i < n && q < to; i++) {\r
b602265d
DG
4572 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
4573 if (x != s[i]) break;\r
4574 q += enclen(enc, q);\r
14b0e578
CS
4575 }\r
4576 if (i >= n) {\r
b602265d
DG
4577 if (IS_NOT_NULL(next))\r
4578 *next = q;\r
4579 return p;\r
14b0e578
CS
4580 }\r
4581 }\r
4582 p = q;\r
4583 }\r
4584 return NULL_UCHARP;\r
4585}\r
4586\r
4587static int\r
4588str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
b602265d 4589 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r
14b0e578
CS
4590{\r
4591 int i, in_esc;\r
4592 OnigCodePoint x;\r
4593 UChar *q;\r
4594 UChar *p = from;\r
4595\r
4596 in_esc = 0;\r
4597 while (p < to) {\r
4598 if (in_esc) {\r
4599 in_esc = 0;\r
4600 p += enclen(enc, p);\r
4601 }\r
4602 else {\r
4603 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4604 q = p + enclen(enc, p);\r
4605 if (x == s[0]) {\r
b602265d
DG
4606 for (i = 1; i < n && q < to; i++) {\r
4607 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
4608 if (x != s[i]) break;\r
4609 q += enclen(enc, q);\r
4610 }\r
4611 if (i >= n) return 1;\r
4612 p += enclen(enc, p);\r
14b0e578
CS
4613 }\r
4614 else {\r
b602265d
DG
4615 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
4616 if (x == bad) return 0;\r
4617 else if (x == MC_ESC(syn)) in_esc = 1;\r
4618 p = q;\r
14b0e578
CS
4619 }\r
4620 }\r
4621 }\r
4622 return 0;\r
4623}\r
4624\r
4625static int\r
4626fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
4627{\r
4628 int num;\r
4629 OnigCodePoint c, c2;\r
4630 OnigSyntaxType* syn = env->syntax;\r
4631 OnigEncoding enc = env->enc;\r
4632 UChar* prev;\r
4633 UChar* p = *src;\r
4634 PFETCH_READY;\r
4635\r
4636 if (PEND) {\r
4637 tok->type = TK_EOT;\r
4638 return tok->type;\r
4639 }\r
4640\r
4641 PFETCH(c);\r
4642 tok->type = TK_CHAR;\r
4643 tok->base = 0;\r
4644 tok->u.c = c;\r
4645 tok->escaped = 0;\r
4646\r
4647 if (c == ']') {\r
4648 tok->type = TK_CC_CLOSE;\r
4649 }\r
4650 else if (c == '-') {\r
4651 tok->type = TK_CC_RANGE;\r
4652 }\r
4653 else if (c == MC_ESC(syn)) {\r
4654 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r
4655 goto end;\r
4656\r
4657 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4658\r
4659 PFETCH(c);\r
4660 tok->escaped = 1;\r
4661 tok->u.c = c;\r
4662 switch (c) {\r
4663 case 'w':\r
4664 tok->type = TK_CHAR_TYPE;\r
4665 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4666 tok->u.prop.not = 0;\r
4667 break;\r
4668 case 'W':\r
4669 tok->type = TK_CHAR_TYPE;\r
4670 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4671 tok->u.prop.not = 1;\r
4672 break;\r
4673 case 'd':\r
4674 tok->type = TK_CHAR_TYPE;\r
4675 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
4676 tok->u.prop.not = 0;\r
4677 break;\r
4678 case 'D':\r
4679 tok->type = TK_CHAR_TYPE;\r
4680 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
4681 tok->u.prop.not = 1;\r
4682 break;\r
4683 case 's':\r
4684 tok->type = TK_CHAR_TYPE;\r
4685 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
4686 tok->u.prop.not = 0;\r
4687 break;\r
4688 case 'S':\r
4689 tok->type = TK_CHAR_TYPE;\r
4690 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
4691 tok->u.prop.not = 1;\r
4692 break;\r
4693 case 'h':\r
4694 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
4695 tok->type = TK_CHAR_TYPE;\r
4696 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
4697 tok->u.prop.not = 0;\r
4698 break;\r
4699 case 'H':\r
4700 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
4701 tok->type = TK_CHAR_TYPE;\r
4702 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
4703 tok->u.prop.not = 1;\r
4704 break;\r
4705\r
4706 case 'p':\r
4707 case 'P':\r
b602265d
DG
4708 if (PEND) break;\r
4709\r
14b0e578
CS
4710 c2 = PPEEK;\r
4711 if (c2 == '{' &&\r
b602265d
DG
4712 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
4713 PINC;\r
4714 tok->type = TK_CHAR_PROPERTY;\r
4715 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
4716\r
4717 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
4718 PFETCH(c2);\r
4719 if (c2 == '^') {\r
4720 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
4721 }\r
4722 else\r
4723 PUNFETCH;\r
4724 }\r
4725 }\r
4726 break;\r
4727\r
4728 case 'o':\r
4729 if (PEND) break;\r
4730\r
4731 prev = p;\r
4732 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
4733 PINC;\r
4734 num = scan_unsigned_octal_number(&p, end, 11, enc);\r
4735 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
4736 if (!PEND) {\r
4737 c2 = PPEEK;\r
4738 if (IS_CODE_DIGIT_ASCII(enc, c2))\r
4739 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
4740 }\r
4741\r
4742 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
4743 PINC;\r
4744 tok->type = TK_CODE_POINT;\r
4745 tok->base = 8;\r
4746 tok->u.code = (OnigCodePoint )num;\r
4747 }\r
4748 else {\r
4749 /* can't read nothing or invalid format */\r
4750 p = prev;\r
4751 }\r
14b0e578
CS
4752 }\r
4753 break;\r
4754\r
4755 case 'x':\r
4756 if (PEND) break;\r
4757\r
4758 prev = p;\r
4759 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
b602265d
DG
4760 PINC;\r
4761 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
4762 if (num < 0) {\r
4763 if (num == ONIGERR_TOO_BIG_NUMBER)\r
4764 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
4765 else\r
4766 return num;\r
4767 }\r
4768 if (!PEND) {\r
14b0e578 4769 c2 = PPEEK;\r
b602265d 4770 if (IS_CODE_XDIGIT_ASCII(enc, c2))\r
14b0e578
CS
4771 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
4772 }\r
4773\r
b602265d
DG
4774 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
4775 PINC;\r
4776 tok->type = TK_CODE_POINT;\r
4777 tok->base = 16;\r
4778 tok->u.code = (OnigCodePoint )num;\r
4779 }\r
4780 else {\r
4781 /* can't read nothing or invalid format */\r
4782 p = prev;\r
4783 }\r
14b0e578
CS
4784 }\r
4785 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
b602265d
DG
4786 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
4787 if (num < 0) return num;\r
4788 if (p == prev) { /* can't read nothing. */\r
4789 num = 0; /* but, it's not error */\r
4790 }\r
4791 tok->type = TK_RAW_BYTE;\r
4792 tok->base = 16;\r
4793 tok->u.c = num;\r
14b0e578
CS
4794 }\r
4795 break;\r
4796\r
4797 case 'u':\r
4798 if (PEND) break;\r
4799\r
4800 prev = p;\r
4801 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
b602265d
DG
4802 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
4803 if (num < 0) return num;\r
4804 if (p == prev) { /* can't read nothing. */\r
4805 num = 0; /* but, it's not error */\r
4806 }\r
4807 tok->type = TK_CODE_POINT;\r
4808 tok->base = 16;\r
4809 tok->u.code = (OnigCodePoint )num;\r
14b0e578
CS
4810 }\r
4811 break;\r
4812\r
4813 case '0':\r
4814 case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r
4815 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
b602265d
DG
4816 PUNFETCH;\r
4817 prev = p;\r
4818 num = scan_unsigned_octal_number(&p, end, 3, enc);\r
4819 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
4820 if (p == prev) { /* can't read nothing. */\r
4821 num = 0; /* but, it's not error */\r
4822 }\r
4823 tok->type = TK_RAW_BYTE;\r
4824 tok->base = 8;\r
4825 tok->u.c = num;\r
14b0e578
CS
4826 }\r
4827 break;\r
4828\r
4829 default:\r
4830 PUNFETCH;\r
b602265d 4831 num = fetch_escaped_value(&p, end, env, &c2);\r
14b0e578 4832 if (num < 0) return num;\r
b602265d
DG
4833 if (tok->u.c != c2) {\r
4834 tok->u.code = c2;\r
4835 tok->type = TK_CODE_POINT;\r
14b0e578
CS
4836 }\r
4837 break;\r
4838 }\r
4839 }\r
4840 else if (c == '[') {\r
4841 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r
4842 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r
b602265d 4843 tok->backp = p; /* point at '[' is read */\r
14b0e578
CS
4844 PINC;\r
4845 if (str_exist_check_with_esc(send, 2, p, end,\r
4846 (OnigCodePoint )']', enc, syn)) {\r
b602265d 4847 tok->type = TK_POSIX_BRACKET_OPEN;\r
14b0e578
CS
4848 }\r
4849 else {\r
b602265d
DG
4850 PUNFETCH;\r
4851 goto cc_in_cc;\r
14b0e578
CS
4852 }\r
4853 }\r
4854 else {\r
4855 cc_in_cc:\r
4856 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r
b602265d 4857 tok->type = TK_CC_CC_OPEN;\r
14b0e578
CS
4858 }\r
4859 else {\r
b602265d 4860 CC_ESC_WARN(env, (UChar* )"[");\r
14b0e578
CS
4861 }\r
4862 }\r
4863 }\r
4864 else if (c == '&') {\r
4865 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r
b602265d 4866 !PEND && (PPEEK_IS('&'))) {\r
14b0e578
CS
4867 PINC;\r
4868 tok->type = TK_CC_AND;\r
4869 }\r
4870 }\r
4871\r
4872 end:\r
4873 *src = p;\r
4874 return tok->type;\r
4875}\r
4876\r
4877static int\r
4878fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
4879{\r
4880 int r, num;\r
4881 OnigCodePoint c;\r
4882 OnigEncoding enc = env->enc;\r
4883 OnigSyntaxType* syn = env->syntax;\r
4884 UChar* prev;\r
4885 UChar* p = *src;\r
4886 PFETCH_READY;\r
4887\r
4888 start:\r
4889 if (PEND) {\r
4890 tok->type = TK_EOT;\r
4891 return tok->type;\r
4892 }\r
4893\r
4894 tok->type = TK_STRING;\r
4895 tok->base = 0;\r
4896 tok->backp = p;\r
4897\r
4898 PFETCH(c);\r
4899 if (IS_MC_ESC_CODE(c, syn)) {\r
4900 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
4901\r
4902 tok->backp = p;\r
4903 PFETCH(c);\r
4904\r
4905 tok->u.c = c;\r
4906 tok->escaped = 1;\r
4907 switch (c) {\r
4908 case '*':\r
4909 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r
4910 tok->type = TK_OP_REPEAT;\r
4911 tok->u.repeat.lower = 0;\r
4912 tok->u.repeat.upper = REPEAT_INFINITE;\r
4913 goto greedy_check;\r
4914 break;\r
4915\r
4916 case '+':\r
4917 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r
4918 tok->type = TK_OP_REPEAT;\r
4919 tok->u.repeat.lower = 1;\r
4920 tok->u.repeat.upper = REPEAT_INFINITE;\r
4921 goto greedy_check;\r
4922 break;\r
4923\r
4924 case '?':\r
4925 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r
4926 tok->type = TK_OP_REPEAT;\r
4927 tok->u.repeat.lower = 0;\r
4928 tok->u.repeat.upper = 1;\r
4929 greedy_check:\r
4930 if (!PEND && PPEEK_IS('?') &&\r
b602265d
DG
4931 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r
4932 PFETCH(c);\r
4933 tok->u.repeat.greedy = 0;\r
4934 tok->u.repeat.possessive = 0;\r
14b0e578
CS
4935 }\r
4936 else {\r
4937 possessive_check:\r
b602265d
DG
4938 if (!PEND && PPEEK_IS('+') &&\r
4939 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r
4940 tok->type != TK_INTERVAL) ||\r
4941 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r
4942 tok->type == TK_INTERVAL))) {\r
4943 PFETCH(c);\r
4944 tok->u.repeat.greedy = 1;\r
4945 tok->u.repeat.possessive = 1;\r
4946 }\r
4947 else {\r
4948 tok->u.repeat.greedy = 1;\r
4949 tok->u.repeat.possessive = 0;\r
4950 }\r
14b0e578
CS
4951 }\r
4952 break;\r
4953\r
4954 case '{':\r
4955 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r
4956 r = fetch_range_quantifier(&p, end, tok, env);\r
4957 if (r < 0) return r; /* error */\r
4958 if (r == 0) goto greedy_check;\r
4959 else if (r == 2) { /* {n} */\r
b602265d
DG
4960 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
4961 goto possessive_check;\r
14b0e578 4962\r
b602265d 4963 goto greedy_check;\r
14b0e578
CS
4964 }\r
4965 /* r == 1 : normal char */\r
4966 break;\r
4967\r
4968 case '|':\r
4969 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r
4970 tok->type = TK_ALT;\r
4971 break;\r
4972\r
4973 case '(':\r
4974 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
4975 tok->type = TK_SUBEXP_OPEN;\r
4976 break;\r
4977\r
4978 case ')':\r
4979 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
4980 tok->type = TK_SUBEXP_CLOSE;\r
4981 break;\r
4982\r
4983 case 'w':\r
4984 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
4985 tok->type = TK_CHAR_TYPE;\r
4986 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4987 tok->u.prop.not = 0;\r
4988 break;\r
4989\r
4990 case 'W':\r
4991 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
4992 tok->type = TK_CHAR_TYPE;\r
4993 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
4994 tok->u.prop.not = 1;\r
4995 break;\r
4996\r
4997 case 'b':\r
4998 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
4999 tok->type = TK_ANCHOR;\r
b602265d 5000 tok->u.anchor = ANCHOR_WORD_BOUNDARY;\r
14b0e578
CS
5001 break;\r
5002\r
5003 case 'B':\r
5004 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
5005 tok->type = TK_ANCHOR;\r
b602265d
DG
5006 tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;\r
5007 break;\r
5008\r
5009 case 'y':\r
5010 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5011 tok->type = TK_ANCHOR;\r
5012 tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
5013 break;\r
5014\r
5015 case 'Y':\r
5016 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5017 tok->type = TK_ANCHOR;\r
5018 tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;\r
14b0e578
CS
5019 break;\r
5020\r
5021#ifdef USE_WORD_BEGIN_END\r
5022 case '<':\r
5023 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
5024 tok->type = TK_ANCHOR;\r
5025 tok->u.anchor = ANCHOR_WORD_BEGIN;\r
5026 break;\r
5027\r
5028 case '>':\r
5029 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
5030 tok->type = TK_ANCHOR;\r
5031 tok->u.anchor = ANCHOR_WORD_END;\r
5032 break;\r
5033#endif\r
5034\r
5035 case 's':\r
5036 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
5037 tok->type = TK_CHAR_TYPE;\r
5038 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
5039 tok->u.prop.not = 0;\r
5040 break;\r
5041\r
5042 case 'S':\r
5043 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
5044 tok->type = TK_CHAR_TYPE;\r
5045 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
5046 tok->u.prop.not = 1;\r
5047 break;\r
5048\r
5049 case 'd':\r
5050 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
5051 tok->type = TK_CHAR_TYPE;\r
5052 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
5053 tok->u.prop.not = 0;\r
5054 break;\r
5055\r
5056 case 'D':\r
5057 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
5058 tok->type = TK_CHAR_TYPE;\r
5059 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
5060 tok->u.prop.not = 1;\r
5061 break;\r
5062\r
5063 case 'h':\r
5064 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
5065 tok->type = TK_CHAR_TYPE;\r
5066 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
5067 tok->u.prop.not = 0;\r
5068 break;\r
5069\r
5070 case 'H':\r
5071 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
5072 tok->type = TK_CHAR_TYPE;\r
5073 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
5074 tok->u.prop.not = 1;\r
5075 break;\r
5076\r
b602265d
DG
5077 case 'K':\r
5078 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;\r
5079 tok->type = TK_KEEP;\r
5080 break;\r
5081\r
5082 case 'R':\r
5083 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;\r
5084 tok->type = TK_GENERAL_NEWLINE;\r
5085 break;\r
5086\r
5087 case 'N':\r
5088 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
5089 tok->type = TK_NO_NEWLINE;\r
5090 break;\r
5091\r
5092 case 'O':\r
5093 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;\r
5094 tok->type = TK_TRUE_ANYCHAR;\r
5095 break;\r
5096\r
5097 case 'X':\r
5098 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;\r
5099 tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;\r
5100 break;\r
5101\r
14b0e578
CS
5102 case 'A':\r
5103 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5104 begin_buf:\r
5105 tok->type = TK_ANCHOR;\r
5106 tok->u.subtype = ANCHOR_BEGIN_BUF;\r
5107 break;\r
5108\r
5109 case 'Z':\r
5110 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5111 tok->type = TK_ANCHOR;\r
5112 tok->u.subtype = ANCHOR_SEMI_END_BUF;\r
5113 break;\r
5114\r
5115 case 'z':\r
5116 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
5117 end_buf:\r
5118 tok->type = TK_ANCHOR;\r
5119 tok->u.subtype = ANCHOR_END_BUF;\r
5120 break;\r
5121\r
5122 case 'G':\r
5123 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r
5124 tok->type = TK_ANCHOR;\r
5125 tok->u.subtype = ANCHOR_BEGIN_POSITION;\r
5126 break;\r
5127\r
5128 case '`':\r
5129 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
5130 goto begin_buf;\r
5131 break;\r
5132\r
5133 case '\'':\r
5134 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
5135 goto end_buf;\r
5136 break;\r
5137\r
b602265d
DG
5138 case 'o':\r
5139 if (PEND) break;\r
5140\r
5141 prev = p;\r
5142 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {\r
5143 PINC;\r
5144 num = scan_unsigned_octal_number(&p, end, 11, enc);\r
5145 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
5146 if (!PEND) {\r
5147 if (IS_CODE_DIGIT_ASCII(enc, PPEEK))\r
5148 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
5149 }\r
5150\r
5151 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
5152 PINC;\r
5153 tok->type = TK_CODE_POINT;\r
5154 tok->u.code = (OnigCodePoint )num;\r
5155 }\r
5156 else {\r
5157 /* can't read nothing or invalid format */\r
5158 p = prev;\r
5159 }\r
5160 }\r
5161 break;\r
5162\r
14b0e578
CS
5163 case 'x':\r
5164 if (PEND) break;\r
5165\r
5166 prev = p;\r
5167 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
b602265d
DG
5168 PINC;\r
5169 num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);\r
5170 if (num < 0) {\r
5171 if (num == ONIGERR_TOO_BIG_NUMBER)\r
5172 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
5173 else\r
5174 return num;\r
5175 }\r
5176 if (!PEND) {\r
5177 if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))\r
14b0e578
CS
5178 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
5179 }\r
5180\r
b602265d
DG
5181 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
5182 PINC;\r
5183 tok->type = TK_CODE_POINT;\r
5184 tok->u.code = (OnigCodePoint )num;\r
5185 }\r
5186 else {\r
5187 /* can't read nothing or invalid format */\r
5188 p = prev;\r
5189 }\r
14b0e578
CS
5190 }\r
5191 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
b602265d
DG
5192 num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);\r
5193 if (num < 0) return num;\r
5194 if (p == prev) { /* can't read nothing. */\r
5195 num = 0; /* but, it's not error */\r
5196 }\r
5197 tok->type = TK_RAW_BYTE;\r
5198 tok->base = 16;\r
5199 tok->u.c = num;\r
14b0e578
CS
5200 }\r
5201 break;\r
5202\r
5203 case 'u':\r
5204 if (PEND) break;\r
5205\r
5206 prev = p;\r
5207 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
b602265d
DG
5208 num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);\r
5209 if (num < 0) return num;\r
5210 if (p == prev) { /* can't read nothing. */\r
5211 num = 0; /* but, it's not error */\r
5212 }\r
5213 tok->type = TK_CODE_POINT;\r
5214 tok->base = 16;\r
5215 tok->u.code = (OnigCodePoint )num;\r
14b0e578
CS
5216 }\r
5217 break;\r
5218\r
5219 case '1': case '2': case '3': case '4':\r
5220 case '5': case '6': case '7': case '8': case '9':\r
5221 PUNFETCH;\r
5222 prev = p;\r
5223 num = onig_scan_unsigned_number(&p, end, enc);\r
5224 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r
5225 goto skip_backref;\r
5226 }\r
5227\r
5228 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r
b602265d
DG
5229 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r
5230 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5231 if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))\r
5232 return ONIGERR_INVALID_BACKREF;\r
5233 }\r
5234\r
5235 tok->type = TK_BACKREF;\r
5236 tok->u.backref.num = 1;\r
5237 tok->u.backref.ref1 = num;\r
5238 tok->u.backref.by_name = 0;\r
14b0e578 5239#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d 5240 tok->u.backref.exist_level = 0;\r
14b0e578 5241#endif\r
b602265d 5242 break;\r
14b0e578
CS
5243 }\r
5244\r
5245 skip_backref:\r
5246 if (c == '8' || c == '9') {\r
b602265d
DG
5247 /* normal char */\r
5248 p = prev; PINC;\r
5249 break;\r
14b0e578
CS
5250 }\r
5251\r
5252 p = prev;\r
5253 /* fall through */\r
5254 case '0':\r
5255 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
b602265d
DG
5256 prev = p;\r
5257 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r
5258 if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;\r
5259 if (p == prev) { /* can't read nothing. */\r
5260 num = 0; /* but, it's not error */\r
5261 }\r
5262 tok->type = TK_RAW_BYTE;\r
5263 tok->base = 8;\r
5264 tok->u.c = num;\r
14b0e578
CS
5265 }\r
5266 else if (c != '0') {\r
b602265d 5267 PINC;\r
14b0e578
CS
5268 }\r
5269 break;\r
5270\r
14b0e578 5271 case 'k':\r
b602265d
DG
5272 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r
5273 PFETCH(c);\r
5274 if (c == '<' || c == '\'') {\r
5275 UChar* name_end;\r
5276 int* backs;\r
5277 int back_num;\r
5278 enum REF_NUM num_type;\r
14b0e578 5279\r
b602265d 5280 prev = p;\r
14b0e578
CS
5281\r
5282#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
5283 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
5284 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r
5285 env, &back_num, &tok->u.backref.level, &num_type);\r
5286 if (r == 1) tok->u.backref.exist_level = 1;\r
5287 else tok->u.backref.exist_level = 0;\r
14b0e578 5288#else\r
b602265d 5289 r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);\r
14b0e578 5290#endif\r
b602265d
DG
5291 if (r < 0) return r;\r
5292\r
5293 if (num_type != IS_NOT_NUM) {\r
5294 if (num_type == IS_REL_NUM) {\r
5295 back_num = backref_rel_to_abs(back_num, env);\r
5296 }\r
5297 if (back_num <= 0)\r
5298 return ONIGERR_INVALID_BACKREF;\r
5299\r
5300 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5301 if (back_num > env->num_mem ||\r
5302 IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
5303 return ONIGERR_INVALID_BACKREF;\r
5304 }\r
5305 tok->type = TK_BACKREF;\r
5306 tok->u.backref.by_name = 0;\r
5307 tok->u.backref.num = 1;\r
5308 tok->u.backref.ref1 = back_num;\r
5309 }\r
5310 else {\r
5311 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
5312 if (num <= 0) {\r
5313 onig_scan_env_set_error_string(env,\r
5314 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
5315 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
5316 }\r
5317 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
5318 int i;\r
5319 for (i = 0; i < num; i++) {\r
5320 if (backs[i] > env->num_mem ||\r
5321 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
5322 return ONIGERR_INVALID_BACKREF;\r
5323 }\r
5324 }\r
5325\r
5326 tok->type = TK_BACKREF;\r
5327 tok->u.backref.by_name = 1;\r
5328 if (num == 1) {\r
5329 tok->u.backref.num = 1;\r
5330 tok->u.backref.ref1 = backs[0];\r
5331 }\r
5332 else {\r
5333 tok->u.backref.num = num;\r
5334 tok->u.backref.refs = backs;\r
5335 }\r
5336 }\r
5337 }\r
5338 else\r
5339 PUNFETCH;\r
14b0e578
CS
5340 }\r
5341 break;\r
14b0e578 5342\r
b602265d 5343#ifdef USE_CALL\r
14b0e578 5344 case 'g':\r
b602265d
DG
5345 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r
5346 PFETCH(c);\r
5347 if (c == '<' || c == '\'') {\r
5348 int gnum;\r
5349 UChar* name_end;\r
5350 enum REF_NUM num_type;\r
5351\r
5352 prev = p;\r
5353 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,\r
5354 &gnum, &num_type, 1);\r
5355 if (r < 0) return r;\r
5356\r
5357 if (num_type != IS_NOT_NUM) {\r
5358 if (num_type == IS_REL_NUM) {\r
5359 gnum = backref_rel_to_abs(gnum, env);\r
5360 if (gnum < 0) {\r
5361 onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,\r
5362 prev, name_end);\r
5363 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
5364 }\r
5365 }\r
5366 tok->u.call.by_number = 1;\r
5367 tok->u.call.gnum = gnum;\r
5368 }\r
5369 else {\r
5370 tok->u.call.by_number = 0;\r
5371 tok->u.call.gnum = 0;\r
5372 }\r
5373\r
5374 tok->type = TK_CALL;\r
5375 tok->u.call.name = prev;\r
5376 tok->u.call.name_end = name_end;\r
5377 }\r
5378 else\r
5379 PUNFETCH;\r
14b0e578
CS
5380 }\r
5381 break;\r
5382#endif\r
5383\r
5384 case 'Q':\r
5385 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r
b602265d 5386 tok->type = TK_QUOTE_OPEN;\r
14b0e578
CS
5387 }\r
5388 break;\r
5389\r
5390 case 'p':\r
5391 case 'P':\r
b602265d
DG
5392 if (!PEND && PPEEK_IS('{') &&\r
5393 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
5394 PINC;\r
5395 tok->type = TK_CHAR_PROPERTY;\r
5396 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
5397\r
5398 if (!PEND &&\r
5399 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
5400 PFETCH(c);\r
5401 if (c == '^') {\r
5402 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
5403 }\r
5404 else\r
5405 PUNFETCH;\r
5406 }\r
14b0e578
CS
5407 }\r
5408 break;\r
5409\r
5410 default:\r
b602265d
DG
5411 {\r
5412 OnigCodePoint c2;\r
5413\r
5414 PUNFETCH;\r
5415 num = fetch_escaped_value(&p, end, env, &c2);\r
5416 if (num < 0) return num;\r
5417 /* set_raw: */\r
5418 if (tok->u.c != c2) {\r
5419 tok->type = TK_CODE_POINT;\r
5420 tok->u.code = c2;\r
5421 }\r
5422 else { /* string */\r
5423 p = tok->backp + enclen(enc, tok->backp);\r
5424 }\r
14b0e578
CS
5425 }\r
5426 break;\r
5427 }\r
5428 }\r
5429 else {\r
5430 tok->u.c = c;\r
5431 tok->escaped = 0;\r
5432\r
5433#ifdef USE_VARIABLE_META_CHARS\r
5434 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r
b602265d 5435 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r
14b0e578 5436 if (c == MC_ANYCHAR(syn))\r
b602265d 5437 goto any_char;\r
14b0e578 5438 else if (c == MC_ANYTIME(syn))\r
b602265d 5439 goto anytime;\r
14b0e578 5440 else if (c == MC_ZERO_OR_ONE_TIME(syn))\r
b602265d 5441 goto zero_or_one_time;\r
14b0e578 5442 else if (c == MC_ONE_OR_MORE_TIME(syn))\r
b602265d 5443 goto one_or_more_time;\r
14b0e578 5444 else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r
b602265d
DG
5445 tok->type = TK_ANYCHAR_ANYTIME;\r
5446 goto out;\r
14b0e578
CS
5447 }\r
5448 }\r
5449#endif\r
5450\r
5451 switch (c) {\r
5452 case '.':\r
5453 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r
5454#ifdef USE_VARIABLE_META_CHARS\r
5455 any_char:\r
5456#endif\r
5457 tok->type = TK_ANYCHAR;\r
5458 break;\r
5459\r
5460 case '*':\r
5461 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r
5462#ifdef USE_VARIABLE_META_CHARS\r
5463 anytime:\r
5464#endif\r
5465 tok->type = TK_OP_REPEAT;\r
5466 tok->u.repeat.lower = 0;\r
5467 tok->u.repeat.upper = REPEAT_INFINITE;\r
5468 goto greedy_check;\r
5469 break;\r
5470\r
5471 case '+':\r
5472 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r
5473#ifdef USE_VARIABLE_META_CHARS\r
5474 one_or_more_time:\r
5475#endif\r
5476 tok->type = TK_OP_REPEAT;\r
5477 tok->u.repeat.lower = 1;\r
5478 tok->u.repeat.upper = REPEAT_INFINITE;\r
5479 goto greedy_check;\r
5480 break;\r
5481\r
5482 case '?':\r
5483 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r
5484#ifdef USE_VARIABLE_META_CHARS\r
5485 zero_or_one_time:\r
5486#endif\r
5487 tok->type = TK_OP_REPEAT;\r
5488 tok->u.repeat.lower = 0;\r
5489 tok->u.repeat.upper = 1;\r
5490 goto greedy_check;\r
5491 break;\r
5492\r
5493 case '{':\r
5494 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r
5495 r = fetch_range_quantifier(&p, end, tok, env);\r
5496 if (r < 0) return r; /* error */\r
5497 if (r == 0) goto greedy_check;\r
5498 else if (r == 2) { /* {n} */\r
b602265d
DG
5499 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
5500 goto possessive_check;\r
14b0e578 5501\r
b602265d 5502 goto greedy_check;\r
14b0e578
CS
5503 }\r
5504 /* r == 1 : normal char */\r
5505 break;\r
5506\r
5507 case '|':\r
5508 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r
5509 tok->type = TK_ALT;\r
5510 break;\r
5511\r
5512 case '(':\r
b602265d 5513 if (!PEND && PPEEK_IS('?') &&\r
14b0e578
CS
5514 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
5515 PINC;\r
b602265d
DG
5516 if (! PEND) {\r
5517 c = PPEEK;\r
5518 if (c == '#') {\r
14b0e578 5519 PFETCH(c);\r
b602265d
DG
5520 while (1) {\r
5521 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
5522 PFETCH(c);\r
5523 if (c == MC_ESC(syn)) {\r
5524 if (! PEND) PFETCH(c);\r
5525 }\r
5526 else {\r
5527 if (c == ')') break;\r
5528 }\r
14b0e578 5529 }\r
b602265d
DG
5530 goto start;\r
5531 }\r
5532 else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {\r
5533 int gnum;\r
5534 UChar* name;\r
5535 UChar* name_end;\r
5536 enum REF_NUM num_type;\r
5537\r
5538 switch (c) {\r
5539 case '&':\r
5540 {\r
5541 PINC;\r
5542 name = p;\r
5543 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,\r
5544 &num_type, 0);\r
5545 if (r < 0) return r;\r
5546\r
5547 tok->type = TK_CALL;\r
5548 tok->u.call.by_number = 0;\r
5549 tok->u.call.gnum = 0;\r
5550 tok->u.call.name = name;\r
5551 tok->u.call.name_end = name_end;\r
5552 }\r
5553 break;\r
5554\r
5555 case 'R':\r
5556 tok->type = TK_CALL;\r
5557 tok->u.call.by_number = 1;\r
5558 tok->u.call.gnum = 0;\r
5559 tok->u.call.name = p;\r
5560 PINC;\r
5561 if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;\r
5562 tok->u.call.name_end = p;\r
5563 break;\r
5564\r
5565 case '-':\r
5566 case '+':\r
5567 goto lparen_qmark_num;\r
5568 break;\r
5569 default:\r
5570 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;\r
5571\r
5572 lparen_qmark_num:\r
5573 {\r
5574 name = p;\r
5575 r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,\r
5576 &gnum, &num_type, 1);\r
5577 if (r < 0) return r;\r
5578\r
5579 if (num_type == IS_NOT_NUM) {\r
5580 return ONIGERR_INVALID_GROUP_NAME;\r
5581 }\r
5582 else {\r
5583 if (num_type == IS_REL_NUM) {\r
5584 gnum = backref_rel_to_abs(gnum, env);\r
5585 if (gnum < 0) {\r
5586 onig_scan_env_set_error_string(env,\r
5587 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);\r
5588 return ONIGERR_UNDEFINED_GROUP_REFERENCE;\r
5589 }\r
5590 }\r
5591 tok->u.call.by_number = 1;\r
5592 tok->u.call.gnum = gnum;\r
5593 }\r
5594\r
5595 tok->type = TK_CALL;\r
5596 tok->u.call.name = name;\r
5597 tok->u.call.name_end = name_end;\r
5598 }\r
5599 break;\r
14b0e578
CS
5600 }\r
5601 }\r
14b0e578 5602 }\r
b602265d 5603 lparen_qmark_end:\r
14b0e578
CS
5604 PUNFETCH;\r
5605 }\r
5606\r
5607 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
5608 tok->type = TK_SUBEXP_OPEN;\r
5609 break;\r
5610\r
5611 case ')':\r
5612 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
5613 tok->type = TK_SUBEXP_CLOSE;\r
5614 break;\r
5615\r
5616 case '^':\r
5617 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
5618 tok->type = TK_ANCHOR;\r
b602265d
DG
5619 tok->u.subtype = (IS_SINGLELINE(env->options)\r
5620 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r
14b0e578
CS
5621 break;\r
5622\r
5623 case '$':\r
5624 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
5625 tok->type = TK_ANCHOR;\r
b602265d
DG
5626 tok->u.subtype = (IS_SINGLELINE(env->options)\r
5627 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r
14b0e578
CS
5628 break;\r
5629\r
5630 case '[':\r
5631 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r
5632 tok->type = TK_CC_OPEN;\r
5633 break;\r
5634\r
5635 case ']':\r
5636 if (*src > env->pattern) /* /].../ is allowed. */\r
b602265d 5637 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r
14b0e578
CS
5638 break;\r
5639\r
5640 case '#':\r
b602265d
DG
5641 if (IS_EXTEND(env->options)) {\r
5642 while (!PEND) {\r
5643 PFETCH(c);\r
5644 if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r
5645 break;\r
5646 }\r
5647 goto start;\r
5648 break;\r
14b0e578
CS
5649 }\r
5650 break;\r
5651\r
5652 case ' ': case '\t': case '\n': case '\r': case '\f':\r
b602265d
DG
5653 if (IS_EXTEND(env->options))\r
5654 goto start;\r
14b0e578
CS
5655 break;\r
5656\r
5657 default:\r
5658 /* string */\r
5659 break;\r
5660 }\r
5661 }\r
5662\r
5663#ifdef USE_VARIABLE_META_CHARS\r
5664 out:\r
5665#endif\r
5666 *src = p;\r
5667 return tok->type;\r
5668}\r
5669\r
5670static int\r
5671add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
b602265d
DG
5672 OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,\r
5673 const OnigCodePoint mbr[])\r
14b0e578
CS
5674{\r
5675 int i, r;\r
5676 OnigCodePoint j;\r
5677\r
5678 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
5679\r
5680 if (not == 0) {\r
5681 for (i = 0; i < n; i++) {\r
5682 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5683 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
b602265d
DG
5684 if (j >= sb_out) {\r
5685 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
5686 r = add_code_range_to_buf(&(cc->mbuf), j,\r
5687 ONIGENC_CODE_RANGE_TO(mbr, i));\r
5688 if (r != 0) return r;\r
5689 i++;\r
5690 }\r
5691\r
5692 goto sb_end;\r
5693 }\r
14b0e578
CS
5694 BITSET_SET_BIT(cc->bs, j);\r
5695 }\r
5696 }\r
5697\r
5698 sb_end:\r
5699 for ( ; i < n; i++) {\r
5700 r = add_code_range_to_buf(&(cc->mbuf),\r
5701 ONIGENC_CODE_RANGE_FROM(mbr, i),\r
5702 ONIGENC_CODE_RANGE_TO(mbr, i));\r
5703 if (r != 0) return r;\r
5704 }\r
5705 }\r
5706 else {\r
5707 OnigCodePoint prev = 0;\r
5708\r
5709 for (i = 0; i < n; i++) {\r
b602265d
DG
5710 for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r
5711 if (j >= sb_out) {\r
5712 goto sb_end2;\r
5713 }\r
5714 BITSET_SET_BIT(cc->bs, j);\r
14b0e578
CS
5715 }\r
5716 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
5717 }\r
5718 for (j = prev; j < sb_out; j++) {\r
5719 BITSET_SET_BIT(cc->bs, j);\r
5720 }\r
5721\r
5722 sb_end2:\r
5723 prev = sb_out;\r
5724\r
5725 for (i = 0; i < n; i++) {\r
5726 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
b602265d 5727 r = add_code_range_to_buf(&(cc->mbuf), prev,\r
14b0e578 5728 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r
b602265d 5729 if (r != 0) return r;\r
14b0e578
CS
5730 }\r
5731 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
b602265d
DG
5732 if (prev == 0) goto end;\r
5733 }\r
5734\r
5735 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
5736 if (r != 0) return r;\r
5737 }\r
5738\r
5739 end:\r
5740 return 0;\r
5741}\r
5742\r
5743static int\r
5744add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
5745 OnigEncoding enc ARG_UNUSED,\r
5746 OnigCodePoint sb_out,\r
5747 const OnigCodePoint mbr[], OnigCodePoint limit)\r
5748{\r
5749 int i, r;\r
5750 OnigCodePoint j;\r
5751 OnigCodePoint from;\r
5752 OnigCodePoint to;\r
5753\r
5754 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
5755\r
5756 if (not == 0) {\r
5757 for (i = 0; i < n; i++) {\r
5758 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5759 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
5760 if (j > limit) goto end;\r
5761 if (j >= sb_out) {\r
5762 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
5763 to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5764 if (to > limit) to = limit;\r
5765 r = add_code_range_to_buf(&(cc->mbuf), j, to);\r
5766 if (r != 0) return r;\r
5767 i++;\r
5768 }\r
5769\r
5770 goto sb_end;\r
5771 }\r
5772 BITSET_SET_BIT(cc->bs, j);\r
5773 }\r
14b0e578 5774 }\r
b602265d
DG
5775\r
5776 sb_end:\r
5777 for ( ; i < n; i++) {\r
5778 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5779 to = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5780 if (from > limit) break;\r
5781 if (to > limit) to = limit;\r
5782 r = add_code_range_to_buf(&(cc->mbuf), from, to);\r
14b0e578
CS
5783 if (r != 0) return r;\r
5784 }\r
5785 }\r
b602265d
DG
5786 else {\r
5787 OnigCodePoint prev = 0;\r
5788\r
5789 for (i = 0; i < n; i++) {\r
5790 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5791 if (from > limit) {\r
5792 for (j = prev; j < sb_out; j++) {\r
5793 BITSET_SET_BIT(cc->bs, j);\r
5794 }\r
5795 goto sb_end2;\r
5796 }\r
5797 for (j = prev; j < from; j++) {\r
5798 if (j >= sb_out) goto sb_end2;\r
5799 BITSET_SET_BIT(cc->bs, j);\r
5800 }\r
5801 prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5802 if (prev > limit) prev = limit;\r
5803 prev++;\r
5804 if (prev == 0) goto end;\r
5805 }\r
5806 for (j = prev; j < sb_out; j++) {\r
5807 BITSET_SET_BIT(cc->bs, j);\r
5808 }\r
5809\r
5810 sb_end2:\r
5811 prev = sb_out;\r
5812\r
5813 for (i = 0; i < n; i++) {\r
5814 from = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
5815 if (from > limit) goto last;\r
5816\r
5817 if (prev < from) {\r
5818 r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);\r
5819 if (r != 0) return r;\r
5820 }\r
5821 prev = ONIGENC_CODE_RANGE_TO(mbr, i);\r
5822 if (prev > limit) prev = limit;\r
5823 prev++;\r
5824 if (prev == 0) goto end;\r
5825 }\r
5826\r
5827 last:\r
5828 r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);\r
5829 if (r != 0) return r;\r
5830 }\r
14b0e578 5831\r
b602265d 5832 end:\r
14b0e578
CS
5833 return 0;\r
5834}\r
5835\r
5836static int\r
5837add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r
5838{\r
b602265d
DG
5839#define ASCII_LIMIT 127\r
5840\r
14b0e578 5841 int c, r;\r
b602265d 5842 int ascii_mode;\r
14b0e578 5843 const OnigCodePoint *ranges;\r
b602265d 5844 OnigCodePoint limit;\r
14b0e578
CS
5845 OnigCodePoint sb_out;\r
5846 OnigEncoding enc = env->enc;\r
5847\r
b602265d
DG
5848 ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);\r
5849\r
14b0e578
CS
5850 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r
5851 if (r == 0) {\r
b602265d
DG
5852 if (ascii_mode == 0)\r
5853 r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r
5854 else\r
5855 r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,\r
5856 ranges, ASCII_LIMIT);\r
5857 return r;\r
14b0e578
CS
5858 }\r
5859 else if (r != ONIG_NO_SUPPORT_CONFIG) {\r
5860 return r;\r
5861 }\r
5862\r
5863 r = 0;\r
b602265d
DG
5864 limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;\r
5865\r
14b0e578
CS
5866 switch (ctype) {\r
5867 case ONIGENC_CTYPE_ALPHA:\r
5868 case ONIGENC_CTYPE_BLANK:\r
5869 case ONIGENC_CTYPE_CNTRL:\r
5870 case ONIGENC_CTYPE_DIGIT:\r
5871 case ONIGENC_CTYPE_LOWER:\r
5872 case ONIGENC_CTYPE_PUNCT:\r
5873 case ONIGENC_CTYPE_SPACE:\r
5874 case ONIGENC_CTYPE_UPPER:\r
5875 case ONIGENC_CTYPE_XDIGIT:\r
5876 case ONIGENC_CTYPE_ASCII:\r
5877 case ONIGENC_CTYPE_ALNUM:\r
5878 if (not != 0) {\r
b602265d
DG
5879 for (c = 0; c < (int )limit; c++) {\r
5880 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5881 BITSET_SET_BIT(cc->bs, c);\r
5882 }\r
5883 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
5884 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5885 }\r
b602265d 5886\r
14b0e578
CS
5887 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
5888 }\r
5889 else {\r
b602265d
DG
5890 for (c = 0; c < (int )limit; c++) {\r
5891 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5892 BITSET_SET_BIT(cc->bs, c);\r
14b0e578
CS
5893 }\r
5894 }\r
5895 break;\r
5896\r
5897 case ONIGENC_CTYPE_GRAPH:\r
5898 case ONIGENC_CTYPE_PRINT:\r
b602265d 5899 case ONIGENC_CTYPE_WORD:\r
14b0e578 5900 if (not != 0) {\r
b602265d
DG
5901 for (c = 0; c < (int )limit; c++) {\r
5902 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */\r
5903 && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5904 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5905 }\r
b602265d
DG
5906 for (c = limit; c < SINGLE_BYTE_SIZE; c++) {\r
5907 if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)\r
5908 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5909 }\r
14b0e578
CS
5910 }\r
5911 else {\r
b602265d
DG
5912 for (c = 0; c < (int )limit; c++) {\r
5913 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
5914 BITSET_SET_BIT(cc->bs, c);\r
14b0e578 5915 }\r
b602265d
DG
5916 if (ascii_mode == 0)\r
5917 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
14b0e578
CS
5918 }\r
5919 break;\r
5920\r
5921 default:\r
5922 return ONIGERR_PARSER_BUG;\r
5923 break;\r
5924 }\r
5925\r
5926 return r;\r
5927}\r
5928\r
5929static int\r
5930parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r
5931{\r
5932#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r
5933#define POSIX_BRACKET_NAME_MIN_LEN 4\r
5934\r
5935 static PosixBracketEntryType PBS[] = {\r
5936 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r
5937 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r
5938 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r
5939 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r
5940 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r
5941 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r
5942 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r
5943 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r
5944 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r
5945 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r
5946 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r
5947 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r
5948 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r
5949 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r
5950 { (UChar* )NULL, -1, 0 }\r
5951 };\r
5952\r
5953 PosixBracketEntryType *pb;\r
5954 int not, i, r;\r
5955 OnigCodePoint c;\r
5956 OnigEncoding enc = env->enc;\r
5957 UChar *p = *src;\r
5958\r
5959 if (PPEEK_IS('^')) {\r
5960 PINC_S;\r
5961 not = 1;\r
5962 }\r
5963 else\r
5964 not = 0;\r
5965\r
5966 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r
5967 goto not_posix_bracket;\r
5968\r
5969 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r
5970 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r
5971 p = (UChar* )onigenc_step(enc, p, end, pb->len);\r
5972 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r
5973 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
5974\r
5975 r = add_ctype_to_cc(cc, pb->ctype, not, env);\r
5976 if (r != 0) return r;\r
5977\r
5978 PINC_S; PINC_S;\r
5979 *src = p;\r
5980 return 0;\r
5981 }\r
5982 }\r
5983\r
5984 not_posix_bracket:\r
5985 c = 0;\r
5986 i = 0;\r
5987 while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r
5988 PINC_S;\r
5989 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r
5990 }\r
5991 if (c == ':' && ! PEND) {\r
5992 PINC_S;\r
5993 if (! PEND) {\r
5994 PFETCH_S(c);\r
5995 if (c == ']')\r
5996 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
5997 }\r
5998 }\r
5999\r
6000 return 1; /* 1: is not POSIX bracket, but no error. */\r
6001}\r
6002\r
6003static int\r
6004fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r
6005{\r
6006 int r;\r
6007 OnigCodePoint c;\r
6008 OnigEncoding enc = env->enc;\r
6009 UChar *prev, *start, *p = *src;\r
6010\r
6011 r = 0;\r
6012 start = prev = p;\r
6013\r
6014 while (!PEND) {\r
6015 prev = p;\r
6016 PFETCH_S(c);\r
6017 if (c == '}') {\r
6018 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r
6019 if (r < 0) break;\r
6020\r
6021 *src = p;\r
6022 return r;\r
6023 }\r
6024 else if (c == '(' || c == ')' || c == '{' || c == '|') {\r
6025 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
6026 break;\r
6027 }\r
6028 }\r
6029\r
6030 onig_scan_env_set_error_string(env, r, *src, prev);\r
6031 return r;\r
6032}\r
6033\r
6034static int\r
b602265d 6035parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
14b0e578
CS
6036{\r
6037 int r, ctype;\r
6038 CClassNode* cc;\r
6039\r
6040 ctype = fetch_char_property_to_ctype(src, end, env);\r
6041 if (ctype < 0) return ctype;\r
6042\r
6043 *np = node_new_cclass();\r
6044 CHECK_NULL_RETURN_MEMERR(*np);\r
b602265d 6045 cc = CCLASS_(*np);\r
14b0e578
CS
6046 r = add_ctype_to_cc(cc, ctype, 0, env);\r
6047 if (r != 0) return r;\r
6048 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
6049\r
6050 return 0;\r
6051}\r
6052\r
6053\r
6054enum CCSTATE {\r
6055 CCS_VALUE,\r
6056 CCS_RANGE,\r
6057 CCS_COMPLETE,\r
6058 CCS_START\r
6059};\r
6060\r
6061enum CCVALTYPE {\r
6062 CCV_SB,\r
6063 CCV_CODE_POINT,\r
6064 CCV_CLASS\r
6065};\r
6066\r
6067static int\r
6068next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r
b602265d 6069 enum CCSTATE* state, ScanEnv* env)\r
14b0e578
CS
6070{\r
6071 int r;\r
6072\r
6073 if (*state == CCS_RANGE)\r
6074 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r
6075\r
6076 if (*state == CCS_VALUE && *type != CCV_CLASS) {\r
6077 if (*type == CCV_SB)\r
6078 BITSET_SET_BIT(cc->bs, (int )(*vs));\r
6079 else if (*type == CCV_CODE_POINT) {\r
6080 r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
6081 if (r < 0) return r;\r
6082 }\r
6083 }\r
6084\r
6085 *state = CCS_VALUE;\r
6086 *type = CCV_CLASS;\r
6087 return 0;\r
6088}\r
6089\r
6090static int\r
b602265d
DG
6091next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,\r
6092 int* from_israw, int to_israw,\r
6093 enum CCVALTYPE intype, enum CCVALTYPE* type,\r
6094 enum CCSTATE* state, ScanEnv* env)\r
14b0e578
CS
6095{\r
6096 int r;\r
6097\r
6098 switch (*state) {\r
6099 case CCS_VALUE:\r
b602265d
DG
6100 if (*type == CCV_SB) {\r
6101 if (*from > 0xff)\r
6102 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
6103\r
6104 BITSET_SET_BIT(cc->bs, (int )(*from));\r
6105 }\r
14b0e578 6106 else if (*type == CCV_CODE_POINT) {\r
b602265d 6107 r = add_code_range(&(cc->mbuf), env, *from, *from);\r
14b0e578
CS
6108 if (r < 0) return r;\r
6109 }\r
6110 break;\r
6111\r
6112 case CCS_RANGE:\r
6113 if (intype == *type) {\r
6114 if (intype == CCV_SB) {\r
b602265d 6115 if (*from > 0xff || to > 0xff)\r
14b0e578
CS
6116 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
6117\r
b602265d
DG
6118 if (*from > to) {\r
6119 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
6120 goto ccs_range_end;\r
6121 else\r
6122 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
6123 }\r
6124 bitset_set_range(cc->bs, (int )*from, (int )to);\r
14b0e578
CS
6125 }\r
6126 else {\r
b602265d
DG
6127 r = add_code_range(&(cc->mbuf), env, *from, to);\r
6128 if (r < 0) return r;\r
14b0e578
CS
6129 }\r
6130 }\r
6131 else {\r
b602265d
DG
6132 if (*from > to) {\r
6133 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
6134 goto ccs_range_end;\r
6135 else\r
6136 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
14b0e578 6137 }\r
b602265d
DG
6138 bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));\r
6139 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);\r
6140 if (r < 0) return r;\r
14b0e578
CS
6141 }\r
6142 ccs_range_end:\r
6143 *state = CCS_COMPLETE;\r
6144 break;\r
6145\r
6146 case CCS_COMPLETE:\r
6147 case CCS_START:\r
6148 *state = CCS_VALUE;\r
6149 break;\r
6150\r
6151 default:\r
6152 break;\r
6153 }\r
6154\r
b602265d
DG
6155 *from_israw = to_israw;\r
6156 *from = to;\r
6157 *type = intype;\r
14b0e578
CS
6158 return 0;\r
6159}\r
6160\r
6161static int\r
6162code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r
b602265d 6163 ScanEnv* env)\r
14b0e578
CS
6164{\r
6165 int in_esc;\r
6166 OnigCodePoint code;\r
6167 OnigEncoding enc = env->enc;\r
6168 UChar* p = from;\r
6169\r
6170 in_esc = 0;\r
6171 while (! PEND) {\r
6172 if (ignore_escaped && in_esc) {\r
6173 in_esc = 0;\r
6174 }\r
6175 else {\r
6176 PFETCH_S(code);\r
6177 if (code == c) return 1;\r
6178 if (code == MC_ESC(env->syntax)) in_esc = 1;\r
6179 }\r
6180 }\r
6181 return 0;\r
6182}\r
6183\r
6184static int\r
b602265d 6185parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
14b0e578
CS
6186{\r
6187 int r, neg, len, fetched, and_start;\r
6188 OnigCodePoint v, vs;\r
6189 UChar *p;\r
6190 Node* node;\r
6191 CClassNode *cc, *prev_cc;\r
6192 CClassNode work_cc;\r
6193\r
6194 enum CCSTATE state;\r
6195 enum CCVALTYPE val_type, in_type;\r
6196 int val_israw, in_israw;\r
6197\r
14b0e578 6198 *np = NULL_NODE;\r
b602265d
DG
6199 env->parse_depth++;\r
6200 if (env->parse_depth > ParseDepthLimit)\r
6201 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
6202 prev_cc = (CClassNode* )NULL;\r
14b0e578
CS
6203 r = fetch_token_in_cc(tok, src, end, env);\r
6204 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r
6205 neg = 1;\r
6206 r = fetch_token_in_cc(tok, src, end, env);\r
6207 }\r
6208 else {\r
6209 neg = 0;\r
6210 }\r
6211\r
6212 if (r < 0) return r;\r
6213 if (r == TK_CC_CLOSE) {\r
6214 if (! code_exist_check((OnigCodePoint )']',\r
6215 *src, env->pattern_end, 1, env))\r
6216 return ONIGERR_EMPTY_CHAR_CLASS;\r
6217\r
6218 CC_ESC_WARN(env, (UChar* )"]");\r
6219 r = tok->type = TK_CHAR; /* allow []...] */\r
6220 }\r
6221\r
6222 *np = node = node_new_cclass();\r
6223 CHECK_NULL_RETURN_MEMERR(node);\r
b602265d 6224 cc = CCLASS_(node);\r
14b0e578
CS
6225\r
6226 and_start = 0;\r
6227 state = CCS_START;\r
6228 p = *src;\r
6229 while (r != TK_CC_CLOSE) {\r
6230 fetched = 0;\r
6231 switch (r) {\r
6232 case TK_CHAR:\r
b602265d 6233 any_char_in:\r
14b0e578
CS
6234 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r
6235 if (len > 1) {\r
b602265d 6236 in_type = CCV_CODE_POINT;\r
14b0e578
CS
6237 }\r
6238 else if (len < 0) {\r
b602265d
DG
6239 r = len;\r
6240 goto err;\r
14b0e578
CS
6241 }\r
6242 else {\r
b602265d
DG
6243 /* sb_char: */\r
6244 in_type = CCV_SB;\r
14b0e578
CS
6245 }\r
6246 v = (OnigCodePoint )tok->u.c;\r
6247 in_israw = 0;\r
6248 goto val_entry2;\r
6249 break;\r
6250\r
6251 case TK_RAW_BYTE:\r
6252 /* tok->base != 0 : octal or hexadec. */\r
6253 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r
b602265d
DG
6254 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
6255 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r
6256 UChar* psave = p;\r
6257 int i, base = tok->base;\r
6258\r
6259 buf[0] = tok->u.c;\r
6260 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r
6261 r = fetch_token_in_cc(tok, &p, end, env);\r
6262 if (r < 0) goto err;\r
6263 if (r != TK_RAW_BYTE || tok->base != base) {\r
6264 fetched = 1;\r
6265 break;\r
6266 }\r
6267 buf[i] = tok->u.c;\r
6268 }\r
6269\r
6270 if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r
6271 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
6272 goto err;\r
6273 }\r
6274\r
6275 len = enclen(env->enc, buf);\r
6276 if (i < len) {\r
6277 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
6278 goto err;\r
6279 }\r
6280 else if (i > len) { /* fetch back */\r
6281 p = psave;\r
6282 for (i = 1; i < len; i++) {\r
6283 r = fetch_token_in_cc(tok, &p, end, env);\r
6284 }\r
6285 fetched = 0;\r
6286 }\r
6287\r
6288 if (i == 1) {\r
6289 v = (OnigCodePoint )buf[0];\r
6290 goto raw_single;\r
6291 }\r
6292 else {\r
6293 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r
6294 in_type = CCV_CODE_POINT;\r
6295 }\r
6296 }\r
6297 else {\r
6298 v = (OnigCodePoint )tok->u.c;\r
6299 raw_single:\r
6300 in_type = CCV_SB;\r
6301 }\r
6302 in_israw = 1;\r
6303 goto val_entry2;\r
6304 break;\r
6305\r
6306 case TK_CODE_POINT:\r
6307 v = tok->u.code;\r
6308 in_israw = 1;\r
6309 val_entry:\r
6310 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r
6311 if (len < 0) {\r
6312 r = len;\r
6313 goto err;\r
6314 }\r
6315 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r
6316 val_entry2:\r
6317 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r
6318 &state, env);\r
6319 if (r != 0) goto err;\r
6320 break;\r
6321\r
6322 case TK_POSIX_BRACKET_OPEN:\r
6323 r = parse_posix_bracket(cc, &p, end, env);\r
6324 if (r < 0) goto err;\r
6325 if (r == 1) { /* is not POSIX bracket */\r
6326 CC_ESC_WARN(env, (UChar* )"[");\r
6327 p = tok->backp;\r
6328 v = (OnigCodePoint )tok->u.c;\r
6329 in_israw = 0;\r
6330 goto val_entry;\r
6331 }\r
6332 goto next_class;\r
6333 break;\r
6334\r
6335 case TK_CHAR_TYPE:\r
6336 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r
6337 if (r != 0) goto err;\r
6338\r
6339 next_class:\r
6340 r = next_state_class(cc, &vs, &val_type, &state, env);\r
6341 if (r != 0) goto err;\r
6342 break;\r
6343\r
6344 case TK_CHAR_PROPERTY:\r
6345 {\r
6346 int ctype = fetch_char_property_to_ctype(&p, end, env);\r
6347 if (ctype < 0) {\r
6348 r = ctype;\r
6349 goto err;\r
6350 }\r
6351 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r
6352 if (r != 0) goto err;\r
6353 goto next_class;\r
6354 }\r
6355 break;\r
6356\r
6357 case TK_CC_RANGE:\r
6358 if (state == CCS_VALUE) {\r
6359 r = fetch_token_in_cc(tok, &p, end, env);\r
6360 if (r < 0) goto err;\r
6361 fetched = 1;\r
6362 if (r == TK_CC_CLOSE) { /* allow [x-] */\r
6363 range_end_val:\r
6364 v = (OnigCodePoint )'-';\r
6365 in_israw = 0;\r
6366 goto val_entry;\r
6367 }\r
6368 else if (r == TK_CC_AND) {\r
6369 CC_ESC_WARN(env, (UChar* )"-");\r
6370 goto range_end_val;\r
6371 }\r
6372\r
6373 if (val_type == CCV_CLASS) {\r
6374 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
6375 goto err;\r
6376 }\r
6377\r
6378 state = CCS_RANGE;\r
6379 }\r
6380 else if (state == CCS_START) {\r
6381 /* [-xa] is allowed */\r
6382 v = (OnigCodePoint )tok->u.c;\r
6383 in_israw = 0;\r
6384\r
6385 r = fetch_token_in_cc(tok, &p, end, env);\r
6386 if (r < 0) goto err;\r
6387 fetched = 1;\r
6388 /* [--x] or [a&&-x] is warned. */\r
6389 if (r == TK_CC_RANGE || and_start != 0)\r
6390 CC_ESC_WARN(env, (UChar* )"-");\r
6391\r
6392 goto val_entry;\r
6393 }\r
6394 else if (state == CCS_RANGE) {\r
6395 CC_ESC_WARN(env, (UChar* )"-");\r
6396 goto any_char_in; /* [!--x] is allowed */\r
6397 }\r
6398 else { /* CCS_COMPLETE */\r
6399 r = fetch_token_in_cc(tok, &p, end, env);\r
6400 if (r < 0) goto err;\r
6401 fetched = 1;\r
6402 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r
6403 else if (r == TK_CC_AND) {\r
6404 CC_ESC_WARN(env, (UChar* )"-");\r
6405 goto range_end_val;\r
6406 }\r
6407\r
6408 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r
6409 CC_ESC_WARN(env, (UChar* )"-");\r
6410 goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */\r
6411 }\r
6412 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
6413 goto err;\r
6414 }\r
6415 break;\r
6416\r
6417 case TK_CC_CC_OPEN: /* [ */\r
6418 {\r
6419 Node *anode;\r
6420 CClassNode* acc;\r
6421\r
6422 r = parse_char_class(&anode, tok, &p, end, env);\r
6423 if (r != 0) {\r
6424 onig_node_free(anode);\r
6425 goto cc_open_err;\r
6426 }\r
6427 acc = CCLASS_(anode);\r
6428 r = or_cclass(cc, acc, env->enc);\r
6429 onig_node_free(anode);\r
6430\r
6431 cc_open_err:\r
6432 if (r != 0) goto err;\r
6433 }\r
6434 break;\r
6435\r
6436 case TK_CC_AND: /* && */\r
6437 {\r
6438 if (state == CCS_VALUE) {\r
6439 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
6440 &val_type, &state, env);\r
6441 if (r != 0) goto err;\r
6442 }\r
6443 /* initialize local variables */\r
6444 and_start = 1;\r
6445 state = CCS_START;\r
6446\r
6447 if (IS_NOT_NULL(prev_cc)) {\r
6448 r = and_cclass(prev_cc, cc, env->enc);\r
6449 if (r != 0) goto err;\r
6450 bbuf_free(cc->mbuf);\r
6451 }\r
6452 else {\r
6453 prev_cc = cc;\r
6454 cc = &work_cc;\r
6455 }\r
6456 initialize_cclass(cc);\r
6457 }\r
6458 break;\r
6459\r
6460 case TK_EOT:\r
6461 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r
6462 goto err;\r
6463 break;\r
6464 default:\r
6465 r = ONIGERR_PARSER_BUG;\r
6466 goto err;\r
6467 break;\r
6468 }\r
6469\r
6470 if (fetched)\r
6471 r = tok->type;\r
6472 else {\r
6473 r = fetch_token_in_cc(tok, &p, end, env);\r
6474 if (r < 0) goto err;\r
6475 }\r
6476 }\r
6477\r
6478 if (state == CCS_VALUE) {\r
6479 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
6480 &val_type, &state, env);\r
6481 if (r != 0) goto err;\r
6482 }\r
6483\r
6484 if (IS_NOT_NULL(prev_cc)) {\r
6485 r = and_cclass(prev_cc, cc, env->enc);\r
6486 if (r != 0) goto err;\r
6487 bbuf_free(cc->mbuf);\r
6488 cc = prev_cc;\r
6489 }\r
6490\r
6491 if (neg != 0)\r
6492 NCCLASS_SET_NOT(cc);\r
6493 else\r
6494 NCCLASS_CLEAR_NOT(cc);\r
6495 if (IS_NCCLASS_NOT(cc) &&\r
6496 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r
6497 int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r
6498 if (is_empty != 0)\r
6499 BITSET_IS_EMPTY(cc->bs, is_empty);\r
6500\r
6501 if (is_empty == 0) {\r
6502#define NEWLINE_CODE 0x0a\r
6503\r
6504 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r
6505 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r
6506 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r
6507 else\r
6508 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r
6509 }\r
6510 }\r
6511 }\r
6512 *src = p;\r
6513 env->parse_depth--;\r
6514 return 0;\r
6515\r
6516 err:\r
6517 if (cc != CCLASS_(*np))\r
6518 bbuf_free(cc->mbuf);\r
6519 return r;\r
6520}\r
6521\r
6522static int parse_subexp(Node** top, OnigToken* tok, int term,\r
6523 UChar** src, UChar* end, ScanEnv* env);\r
6524\r
6525#ifdef USE_CALLOUT\r
6526\r
6527/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */\r
6528static int\r
6529parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
6530{\r
6531 int r;\r
6532 int i;\r
6533 int in;\r
6534 int num;\r
6535 OnigCodePoint c;\r
6536 UChar* code_start;\r
6537 UChar* code_end;\r
6538 UChar* contents;\r
6539 UChar* tag_start;\r
6540 UChar* tag_end;\r
6541 int brace_nest;\r
6542 CalloutListEntry* e;\r
6543 RegexExt* ext;\r
6544 OnigEncoding enc = env->enc;\r
6545 UChar* p = *src;\r
6546\r
6547 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6548\r
6549 brace_nest = 0;\r
6550 while (PPEEK_IS('{')) {\r
6551 brace_nest++;\r
6552 PINC_S;\r
6553 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6554 }\r
6555\r
6556 in = ONIG_CALLOUT_IN_PROGRESS;\r
6557 code_start = p;\r
6558 while (1) {\r
6559 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6560\r
6561 code_end = p;\r
6562 PFETCH_S(c);\r
6563 if (c == '}') {\r
6564 i = brace_nest;\r
6565 while (i > 0) {\r
6566 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6567 PFETCH_S(c);\r
6568 if (c == '}') i--;\r
6569 else break;\r
6570 }\r
6571 if (i == 0) break;\r
6572 }\r
6573 }\r
6574\r
6575 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6576\r
6577 PFETCH_S(c);\r
6578 if (c == '[') {\r
6579 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6580 tag_start = p;\r
6581 while (! PEND) {\r
6582 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6583 tag_end = p;\r
6584 PFETCH_S(c);\r
6585 if (c == ']') break;\r
6586 }\r
6587 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
6588 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
6589\r
6590 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6591 PFETCH_S(c);\r
6592 }\r
6593 else {\r
6594 tag_start = tag_end = 0;\r
6595 }\r
6596\r
6597 if (c == 'X') {\r
6598 in |= ONIG_CALLOUT_IN_RETRACTION;\r
6599 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6600 PFETCH_S(c);\r
6601 }\r
6602 else if (c == '<') {\r
6603 in = ONIG_CALLOUT_IN_RETRACTION;\r
6604 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6605 PFETCH_S(c);\r
6606 }\r
6607 else if (c == '>') { /* no needs (default) */\r
6608 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6609 PFETCH_S(c);\r
6610 }\r
6611\r
6612 if (c != cterm)\r
6613 return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6614\r
6615 r = reg_callout_list_entry(env, &num);\r
6616 if (r != 0) return r;\r
6617\r
6618 ext = onig_get_regex_ext(env->reg);\r
6619 if (IS_NULL(ext->pattern)) {\r
6620 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
6621 if (r != ONIG_NORMAL) return r;\r
6622 }\r
6623\r
6624 if (tag_start != tag_end) {\r
6625 r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
6626 if (r != ONIG_NORMAL) return r;\r
6627 }\r
6628\r
6629 contents = onigenc_strdup(enc, code_start, code_end);\r
6630 CHECK_NULL_RETURN_MEMERR(contents);\r
6631\r
6632 r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);\r
6633 if (r != 0) {\r
6634 xfree(contents);\r
6635 return r;\r
6636 }\r
6637\r
6638 e = onig_reg_callout_list_at(env->reg, num);\r
a5def177
DG
6639 if (IS_NULL(e)) {\r
6640 xfree(contents);\r
6641 return ONIGERR_MEMORY;\r
6642 }\r
6643\r
b602265d
DG
6644 e->of = ONIG_CALLOUT_OF_CONTENTS;\r
6645 e->in = in;\r
6646 e->name_id = ONIG_NON_NAME_ID;\r
6647 e->u.content.start = contents;\r
6648 e->u.content.end = contents + (code_end - code_start);\r
6649\r
6650 *src = p;\r
6651 return 0;\r
6652}\r
6653\r
6654static long\r
6655parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)\r
6656{\r
6657 long v;\r
6658 long d;\r
6659 int flag;\r
6660 UChar* p;\r
6661 OnigCodePoint c;\r
6662\r
6663 if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;\r
6664\r
6665 flag = 1;\r
6666 v = 0;\r
6667 p = s;\r
6668 while (p < end) {\r
6669 c = ONIGENC_MBC_TO_CODE(enc, p, end);\r
6670 p += ONIGENC_MBC_ENC_LEN(enc, p);\r
6671 if (c >= '0' && c <= '9') {\r
6672 d = (long )(c - '0');\r
6673 if (v > (max - d) / 10)\r
6674 return ONIGERR_INVALID_CALLOUT_ARG;\r
6675\r
6676 v = v * 10 + d;\r
6677 }\r
6678 else if (sign_on != 0 && (c == '-' || c == '+')) {\r
6679 if (c == '-') flag = -1;\r
6680 }\r
6681 else\r
6682 return ONIGERR_INVALID_CALLOUT_ARG;\r
6683\r
6684 sign_on = 0;\r
6685 }\r
6686\r
6687 *rl = flag * v;\r
6688 return ONIG_NORMAL;\r
6689}\r
6690\r
6691static int\r
6692parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,\r
6693 unsigned int types[], OnigValue vals[], ScanEnv* env)\r
6694{\r
6695#define MAX_CALLOUT_ARG_BYTE_LENGTH 128\r
6696\r
6697 int r;\r
6698 int n;\r
6699 int esc;\r
6700 int cn;\r
6701 UChar* s;\r
6702 UChar* e;\r
6703 UChar* eesc;\r
6704 OnigCodePoint c;\r
6705 UChar* bufend;\r
6706 UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];\r
6707 OnigEncoding enc = env->enc;\r
6708 UChar* p = *src;\r
6709\r
6710 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6711\r
6712 n = 0;\r
6713 while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {\r
6714 c = 0;\r
6715 cn = 0;\r
6716 esc = 0;\r
6717 eesc = 0;\r
6718 bufend = buf;\r
6719 s = e = p;\r
6720 while (1) {\r
6721 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6722\r
6723 e = p;\r
6724 PFETCH_S(c);\r
6725 if (esc != 0) {\r
6726 esc = 0;\r
6727 if (c == '\\' || c == cterm || c == ',') {\r
6728 /* */\r
6729 }\r
6730 else {\r
6731 e = eesc;\r
6732 cn++;\r
6733 }\r
6734 goto add_char;\r
14b0e578
CS
6735 }\r
6736 else {\r
b602265d
DG
6737 if (c == '\\') {\r
6738 esc = 1;\r
6739 eesc = e;\r
6740 }\r
6741 else if (c == cterm || c == ',')\r
6742 break;\r
6743 else {\r
6744 size_t clen;\r
14b0e578 6745\r
b602265d
DG
6746 add_char:\r
6747 if (skip_mode == 0) {\r
6748 clen = p - e;\r
6749 if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)\r
6750 return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */\r
14b0e578 6751\r
b602265d
DG
6752 xmemcpy(bufend, e, clen);\r
6753 bufend += clen;\r
6754 }\r
6755 cn++;\r
6756 }\r
14b0e578 6757 }\r
b602265d 6758 }\r
14b0e578 6759\r
b602265d
DG
6760 if (cn != 0) {\r
6761 if (skip_mode == 0) {\r
6762 if ((types[n] & ONIG_TYPE_LONG) != 0) {\r
6763 int fixed = 0;\r
6764 if (cn > 0) {\r
6765 long rl;\r
6766 r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);\r
6767 if (r == ONIG_NORMAL) {\r
6768 vals[n].l = rl;\r
6769 fixed = 1;\r
6770 types[n] = ONIG_TYPE_LONG;\r
6771 }\r
6772 }\r
14b0e578 6773\r
b602265d
DG
6774 if (fixed == 0) {\r
6775 types[n] = (types[n] & ~ONIG_TYPE_LONG);\r
6776 if (types[n] == ONIG_TYPE_VOID)\r
6777 return ONIGERR_INVALID_CALLOUT_ARG;\r
6778 }\r
6779 }\r
14b0e578 6780\r
b602265d
DG
6781 switch (types[n]) {\r
6782 case ONIG_TYPE_LONG:\r
6783 break;\r
14b0e578 6784\r
b602265d
DG
6785 case ONIG_TYPE_CHAR:\r
6786 if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;\r
6787 vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);\r
6788 break;\r
14b0e578 6789\r
b602265d
DG
6790 case ONIG_TYPE_STRING:\r
6791 {\r
6792 UChar* rs = onigenc_strdup(enc, buf, bufend);\r
6793 CHECK_NULL_RETURN_MEMERR(rs);\r
6794 vals[n].s.start = rs;\r
6795 vals[n].s.end = rs + (e - s);\r
6796 }\r
6797 break;\r
14b0e578 6798\r
b602265d
DG
6799 case ONIG_TYPE_TAG:\r
6800 if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))\r
6801 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
14b0e578 6802\r
b602265d
DG
6803 vals[n].s.start = s;\r
6804 vals[n].s.end = e;\r
6805 break;\r
6806\r
6807 case ONIG_TYPE_VOID:\r
6808 case ONIG_TYPE_POINTER:\r
6809 return ONIGERR_PARSER_BUG;\r
6810 break;\r
6811 }\r
14b0e578 6812 }\r
14b0e578 6813\r
b602265d
DG
6814 n++;\r
6815 }\r
14b0e578 6816\r
b602265d
DG
6817 if (c == cterm) break;\r
6818 }\r
14b0e578 6819\r
b602265d 6820 if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
14b0e578 6821\r
b602265d
DG
6822 *src = p;\r
6823 return n;\r
6824}\r
14b0e578 6825\r
b602265d
DG
6826/* (*name[TAG]) (*name[TAG]{a,b,..}) */\r
6827static int\r
6828parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)\r
6829{\r
6830 int r;\r
6831 int i;\r
6832 int in;\r
6833 int num;\r
6834 int name_id;\r
6835 int arg_num;\r
6836 int max_arg_num;\r
6837 int opt_arg_num;\r
6838 int is_not_single;\r
6839 OnigCodePoint c;\r
6840 UChar* name_start;\r
6841 UChar* name_end;\r
6842 UChar* tag_start;\r
6843 UChar* tag_end;\r
6844 Node* node;\r
6845 CalloutListEntry* e;\r
6846 RegexExt* ext;\r
6847 unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];\r
6848 OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];\r
6849 OnigEncoding enc = env->enc;\r
6850 UChar* p = *src;\r
14b0e578 6851\r
b602265d
DG
6852 /* PFETCH_READY; */\r
6853 if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;\r
6854\r
6855 node = 0;\r
6856 name_start = p;\r
6857 while (1) {\r
6858 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6859 name_end = p;\r
6860 PFETCH_S(c);\r
6861 if (c == cterm || c == '[' || c == '{') break;\r
6862 }\r
6863\r
6864 if (! is_allowed_callout_name(enc, name_start, name_end))\r
6865 return ONIGERR_INVALID_CALLOUT_NAME;\r
6866\r
6867 if (c == '[') {\r
6868 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6869 tag_start = p;\r
6870 while (! PEND) {\r
6871 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6872 tag_end = p;\r
6873 PFETCH_S(c);\r
6874 if (c == ']') break;\r
14b0e578 6875 }\r
b602265d
DG
6876 if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))\r
6877 return ONIGERR_INVALID_CALLOUT_TAG_NAME;\r
6878\r
6879 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6880 PFETCH_S(c);\r
6881 }\r
6882 else {\r
6883 tag_start = tag_end = 0;\r
14b0e578
CS
6884 }\r
6885\r
b602265d
DG
6886 if (c == '{') {\r
6887 UChar* save;\r
6888\r
6889 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6890\r
6891 /* read for single check only */\r
6892 save = p;\r
6893 arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);\r
6894 if (arg_num < 0) return arg_num;\r
6895\r
6896 is_not_single = PPEEK_IS(cterm) ? 0 : 1;\r
6897 p = save;\r
6898 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
6899 &name_id);\r
6900 if (r != ONIG_NORMAL) return r;\r
6901\r
6902 max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
6903 for (i = 0; i < max_arg_num; i++) {\r
6904 types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
6905 }\r
6906\r
6907 arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);\r
6908 if (arg_num < 0) return arg_num;\r
6909\r
6910 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6911 PFETCH_S(c);\r
14b0e578 6912 }\r
b602265d
DG
6913 else {\r
6914 arg_num = 0;\r
14b0e578 6915\r
b602265d
DG
6916 is_not_single = 0;\r
6917 r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,\r
6918 &name_id);\r
6919 if (r != ONIG_NORMAL) return r;\r
6920\r
6921 max_arg_num = get_callout_arg_num_by_name_id(name_id);\r
6922 for (i = 0; i < max_arg_num; i++) {\r
6923 types[i] = get_callout_arg_type_by_name_id(name_id, i);\r
6924 }\r
14b0e578
CS
6925 }\r
6926\r
b602265d
DG
6927 in = onig_get_callout_in_by_name_id(name_id);\r
6928 opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);\r
6929 if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))\r
6930 return ONIGERR_INVALID_CALLOUT_ARG;\r
14b0e578 6931\r
b602265d
DG
6932 if (c != cterm)\r
6933 return ONIGERR_INVALID_CALLOUT_PATTERN;\r
14b0e578 6934\r
b602265d
DG
6935 r = reg_callout_list_entry(env, &num);\r
6936 if (r != 0) return r;\r
14b0e578 6937\r
b602265d
DG
6938 ext = onig_get_regex_ext(env->reg);\r
6939 if (IS_NULL(ext->pattern)) {\r
6940 r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);\r
6941 if (r != ONIG_NORMAL) return r;\r
6942 }\r
6943\r
6944 if (tag_start != tag_end) {\r
6945 r = callout_tag_entry(env->reg, tag_start, tag_end, num);\r
6946 if (r != ONIG_NORMAL) return r;\r
6947 }\r
6948\r
6949 r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);\r
6950 if (r != ONIG_NORMAL) return r;\r
6951\r
6952 e = onig_reg_callout_list_at(env->reg, num);\r
a5def177
DG
6953 CHECK_NULL_RETURN_MEMERR(e);\r
6954\r
b602265d
DG
6955 e->of = ONIG_CALLOUT_OF_NAME;\r
6956 e->in = in;\r
6957 e->name_id = name_id;\r
6958 e->type = onig_get_callout_type_by_name_id(name_id);\r
6959 e->start_func = onig_get_callout_start_func_by_name_id(name_id);\r
6960 e->end_func = onig_get_callout_end_func_by_name_id(name_id);\r
6961 e->u.arg.num = max_arg_num;\r
6962 e->u.arg.passed_num = arg_num;\r
6963 for (i = 0; i < max_arg_num; i++) {\r
6964 e->u.arg.types[i] = types[i];\r
6965 if (i < arg_num)\r
6966 e->u.arg.vals[i] = vals[i];\r
6967 else\r
6968 e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);\r
14b0e578 6969 }\r
b602265d
DG
6970\r
6971 *np = node;\r
14b0e578
CS
6972 *src = p;\r
6973 return 0;\r
14b0e578 6974}\r
b602265d 6975#endif\r
14b0e578
CS
6976\r
6977static int\r
b602265d
DG
6978parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
6979 ScanEnv* env)\r
14b0e578
CS
6980{\r
6981 int r, num;\r
6982 Node *target;\r
6983 OnigOptionType option;\r
6984 OnigCodePoint c;\r
b602265d 6985 int list_capture;\r
14b0e578
CS
6986 OnigEncoding enc = env->enc;\r
6987\r
b602265d
DG
6988 UChar* p = *src;\r
6989 PFETCH_READY;\r
6990\r
6991 *np = NULL;\r
6992 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
6993\r
6994 option = env->options;\r
6995 c = PPEEK;\r
6996 if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
6997 PINC;\r
6998 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
6999\r
7000 PFETCH(c);\r
7001 switch (c) {\r
7002 case ':': /* (?:...) grouping only */\r
7003 group:\r
7004 r = fetch_token(tok, &p, end, env);\r
7005 if (r < 0) return r;\r
7006 r = parse_subexp(np, tok, term, &p, end, env);\r
7007 if (r < 0) return r;\r
7008 *src = p;\r
7009 return 1; /* group */\r
7010 break;\r
7011\r
7012 case '=':\r
7013 *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);\r
7014 break;\r
7015 case '!': /* preceding read */\r
7016 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);\r
7017 break;\r
7018 case '>': /* (?>...) stop backtrack */\r
7019 *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
7020 break;\r
7021\r
7022 case '\'':\r
7023 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7024 goto named_group1;\r
7025 }\r
7026 else\r
7027 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7028 break;\r
7029\r
7030 case '<': /* look behind (?<=...), (?<!...) */\r
7031 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
7032 PFETCH(c);\r
7033 if (c == '=')\r
7034 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);\r
7035 else if (c == '!')\r
7036 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);\r
7037 else {\r
7038 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7039 UChar *name;\r
7040 UChar *name_end;\r
7041 enum REF_NUM num_type;\r
7042\r
7043 PUNFETCH;\r
7044 c = '<';\r
7045\r
7046 named_group1:\r
7047 list_capture = 0;\r
7048\r
7049 named_group2:\r
7050 name = p;\r
7051 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,\r
7052 &num_type, 0);\r
7053 if (r < 0) return r;\r
7054\r
7055 num = scan_env_add_mem_entry(env);\r
7056 if (num < 0) return num;\r
7057 if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)\r
7058 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
7059\r
7060 r = name_add(env->reg, name, name_end, num, env);\r
7061 if (r != 0) return r;\r
7062 *np = node_new_memory(1);\r
7063 CHECK_NULL_RETURN_MEMERR(*np);\r
7064 ENCLOSURE_(*np)->m.regnum = num;\r
7065 if (list_capture != 0)\r
7066 MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
7067 env->num_named++;\r
7068 }\r
7069 else {\r
7070 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7071 }\r
7072 }\r
7073 break;\r
7074\r
7075 case '~':\r
7076 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {\r
7077 Node* absent;\r
7078 Node* expr;\r
7079 int head_bar;\r
7080 int is_range_cutter;\r
7081\r
7082 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7083\r
7084 if (PPEEK_IS('|')) { /* (?~|generator|absent) */\r
7085 PINC;\r
7086 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7087\r
7088 head_bar = 1;\r
7089 if (PPEEK_IS(')')) { /* (?~|) : range clear */\r
7090 PINC;\r
7091 r = make_range_clear(np, env);\r
7092 if (r != 0) return r;\r
7093 goto end;\r
7094 }\r
7095 }\r
7096 else\r
7097 head_bar = 0;\r
7098\r
7099 r = fetch_token(tok, &p, end, env);\r
7100 if (r < 0) return r;\r
7101 r = parse_subexp(&absent, tok, term, &p, end, env);\r
7102 if (r < 0) {\r
7103 onig_node_free(absent);\r
7104 return r;\r
7105 }\r
7106\r
7107 expr = NULL_NODE;\r
7108 is_range_cutter = 0;\r
7109 if (head_bar != 0) {\r
7110 Node* top = absent;\r
7111 if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {\r
7112 expr = NULL_NODE;\r
7113 is_range_cutter = 1;\r
7114 /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */\r
7115 }\r
7116 else {\r
7117 absent = NODE_CAR(top);\r
7118 expr = NODE_CDR(top);\r
7119 NODE_CAR(top) = NULL_NODE;\r
7120 NODE_CDR(top) = NULL_NODE;\r
7121 onig_node_free(top);\r
7122 if (IS_NULL(NODE_CDR(expr))) {\r
7123 top = expr;\r
7124 expr = NODE_CAR(top);\r
7125 NODE_CAR(top) = NULL_NODE;\r
7126 onig_node_free(top);\r
7127 }\r
7128 }\r
7129 }\r
7130\r
7131 r = make_absent_tree(np, absent, expr, is_range_cutter, env);\r
7132 if (r != 0) {\r
7133 return r;\r
7134 }\r
7135 goto end;\r
7136 }\r
7137 else {\r
7138 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7139 }\r
7140 break;\r
7141\r
7142#ifdef USE_CALLOUT\r
7143 case '{':\r
7144 if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))\r
7145 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7146\r
7147 r = parse_callout_of_contents(np, ')', &p, end, env);\r
7148 if (r != 0) return r;\r
7149\r
7150 goto end;\r
7151 break;\r
7152#endif\r
7153\r
7154 case '(':\r
7155 /* (?()...) */\r
7156 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {\r
7157 UChar *prev;\r
7158 Node* condition;\r
7159 int condition_is_checker;\r
7160\r
7161 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7162 PFETCH(c);\r
7163 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7164\r
7165 if (IS_CODE_DIGIT_ASCII(enc, c)\r
7166 || c == '-' || c == '+' || c == '<' || c == '\'') {\r
7167 UChar* name_end;\r
7168 int back_num;\r
7169 int exist_level;\r
7170 int level;\r
7171 enum REF_NUM num_type;\r
7172 int is_enclosed;\r
7173\r
7174 is_enclosed = (c == '<' || c == '\'') ? 1 : 0;\r
7175 if (! is_enclosed)\r
7176 PUNFETCH;\r
7177 prev = p;\r
7178 exist_level = 0;\r
7179#ifdef USE_BACKREF_WITH_LEVEL\r
7180 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
7181 r = fetch_name_with_level(\r
7182 (OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
7183 &p, end, &name_end,\r
7184 env, &back_num, &level, &num_type);\r
7185 if (r == 1) exist_level = 1;\r
7186#else\r
7187 r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),\r
7188 &p, end, &name_end, env, &back_num, &num_type, 1);\r
7189#endif\r
7190 if (r < 0) {\r
7191 if (is_enclosed == 0) {\r
7192 goto any_condition;\r
7193 }\r
7194 else\r
7195 return r;\r
7196 }\r
7197\r
7198 condition_is_checker = 1;\r
7199 if (num_type != IS_NOT_NUM) {\r
7200 if (num_type == IS_REL_NUM) {\r
7201 back_num = backref_rel_to_abs(back_num, env);\r
7202 }\r
7203 if (back_num <= 0)\r
7204 return ONIGERR_INVALID_BACKREF;\r
7205\r
7206 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
7207 if (back_num > env->num_mem ||\r
7208 IS_NULL(SCANENV_MEMENV(env)[back_num].node))\r
7209 return ONIGERR_INVALID_BACKREF;\r
7210 }\r
7211\r
7212 condition = node_new_backref_checker(1, &back_num, 0,\r
7213#ifdef USE_BACKREF_WITH_LEVEL\r
7214 exist_level, level,\r
7215#endif\r
7216 env);\r
7217 }\r
7218 else {\r
7219 int num;\r
7220 int* backs;\r
7221\r
7222 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
7223 if (num <= 0) {\r
7224 onig_scan_env_set_error_string(env,\r
7225 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
7226 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
7227 }\r
7228 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
7229 int i;\r
7230 for (i = 0; i < num; i++) {\r
7231 if (backs[i] > env->num_mem ||\r
7232 IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))\r
7233 return ONIGERR_INVALID_BACKREF;\r
7234 }\r
7235 }\r
7236\r
7237 condition = node_new_backref_checker(num, backs, 1,\r
7238#ifdef USE_BACKREF_WITH_LEVEL\r
7239 exist_level, level,\r
7240#endif\r
7241 env);\r
7242 }\r
7243\r
7244 if (is_enclosed != 0) {\r
7245 if (PEND) goto err_if_else;\r
7246 PFETCH(c);\r
7247 if (c != ')') goto err_if_else;\r
7248 }\r
7249 }\r
7250#ifdef USE_CALLOUT\r
7251 else if (c == '?') {\r
7252 if (IS_SYNTAX_OP2(env->syntax,\r
7253 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {\r
7254 if (! PEND && PPEEK_IS('{')) {\r
7255 /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */\r
7256 condition_is_checker = 0;\r
7257 PFETCH(c);\r
7258 r = parse_callout_of_contents(&condition, ')', &p, end, env);\r
7259 if (r != 0) return r;\r
7260 goto end_condition;\r
7261 }\r
7262 }\r
7263 goto any_condition;\r
7264 }\r
7265 else if (c == '*' &&\r
7266 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
7267 condition_is_checker = 0;\r
7268 r = parse_callout_of_name(&condition, ')', &p, end, env);\r
7269 if (r != 0) return r;\r
7270 goto end_condition;\r
7271 }\r
14b0e578 7272#endif\r
b602265d
DG
7273 else {\r
7274 any_condition:\r
7275 PUNFETCH;\r
7276 condition_is_checker = 0;\r
7277 r = fetch_token(tok, &p, end, env);\r
7278 if (r < 0) return r;\r
7279 r = parse_subexp(&condition, tok, term, &p, end, env);\r
7280 if (r < 0) {\r
7281 onig_node_free(condition);\r
7282 return r;\r
7283 }\r
7284 }\r
14b0e578 7285\r
b602265d
DG
7286 end_condition:\r
7287 CHECK_NULL_RETURN_MEMERR(condition);\r
14b0e578 7288\r
b602265d
DG
7289 if (PEND) {\r
7290 err_if_else:\r
7291 onig_node_free(condition);\r
7292 return ONIGERR_END_PATTERN_IN_GROUP;\r
7293 }\r
14b0e578 7294\r
b602265d
DG
7295 if (PPEEK_IS(')')) { /* case: empty body: make backref checker */\r
7296 if (condition_is_checker == 0) {\r
7297 onig_node_free(condition);\r
7298 return ONIGERR_INVALID_IF_ELSE_SYNTAX;\r
7299 }\r
7300 PFETCH(c);\r
7301 *np = condition;\r
7302 }\r
7303 else { /* if-else */\r
7304 int then_is_empty;\r
7305 Node *Then, *Else;\r
14b0e578 7306\r
b602265d
DG
7307 if (PPEEK_IS('|')) {\r
7308 PFETCH(c);\r
7309 Then = 0;\r
7310 then_is_empty = 1;\r
7311 }\r
7312 else\r
7313 then_is_empty = 0;\r
14b0e578 7314\r
b602265d
DG
7315 r = fetch_token(tok, &p, end, env);\r
7316 if (r < 0) {\r
7317 onig_node_free(condition);\r
7318 return r;\r
7319 }\r
7320 r = parse_subexp(&target, tok, term, &p, end, env);\r
7321 if (r < 0) {\r
7322 onig_node_free(condition);\r
7323 onig_node_free(target);\r
7324 return r;\r
7325 }\r
14b0e578 7326\r
b602265d
DG
7327 if (then_is_empty != 0) {\r
7328 Else = target;\r
7329 }\r
7330 else {\r
7331 if (NODE_TYPE(target) == NODE_ALT) {\r
7332 Then = NODE_CAR(target);\r
7333 if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) {\r
7334 Else = NODE_CAR(NODE_CDR(target));\r
7335 cons_node_free_alone(NODE_CDR(target));\r
7336 }\r
7337 else {\r
7338 Else = NODE_CDR(target);\r
7339 }\r
7340 cons_node_free_alone(target);\r
7341 }\r
7342 else {\r
7343 Then = target;\r
7344 Else = 0;\r
7345 }\r
7346 }\r
14b0e578 7347\r
b602265d
DG
7348 *np = node_new_enclosure_if_else(condition, Then, Else);\r
7349 if (IS_NULL(*np)) {\r
7350 onig_node_free(condition);\r
7351 onig_node_free(Then);\r
7352 onig_node_free(Else);\r
7353 return ONIGERR_MEMORY;\r
7354 }\r
7355 }\r
7356 goto end;\r
14b0e578 7357 }\r
14b0e578 7358 else {\r
b602265d 7359 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
14b0e578 7360 }\r
14b0e578
CS
7361 break;\r
7362\r
7363 case '@':\r
7364 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r
b602265d
DG
7365 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
7366 PFETCH(c);\r
7367 if (c == '<' || c == '\'') {\r
7368 list_capture = 1;\r
7369 goto named_group2; /* (?@<name>...) */\r
7370 }\r
7371 PUNFETCH;\r
7372 }\r
7373\r
7374 *np = node_new_memory(0);\r
7375 CHECK_NULL_RETURN_MEMERR(*np);\r
7376 num = scan_env_add_mem_entry(env);\r
7377 if (num < 0) {\r
7378 return num;\r
7379 }\r
7380 else if (num >= (int )MEM_STATUS_BITS_NUM) {\r
7381 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
7382 }\r
7383 ENCLOSURE_(*np)->m.regnum = num;\r
7384 MEM_STATUS_ON_SIMPLE(env->capture_history, num);\r
14b0e578
CS
7385 }\r
7386 else {\r
b602265d 7387 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
14b0e578
CS
7388 }\r
7389 break;\r
7390\r
7391#ifdef USE_POSIXLINE_OPTION\r
7392 case 'p':\r
7393#endif\r
7394 case '-': case 'i': case 'm': case 's': case 'x':\r
b602265d 7395 case 'W': case 'D': case 'S': case 'P':\r
14b0e578 7396 {\r
b602265d
DG
7397 int neg = 0;\r
7398\r
7399 while (1) {\r
7400 switch (c) {\r
7401 case ':':\r
7402 case ')':\r
7403 break;\r
7404\r
7405 case '-': neg = 1; break;\r
7406 case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break;\r
7407 case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;\r
7408 case 's':\r
7409 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
7410 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
7411 }\r
7412 else\r
7413 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7414 break;\r
7415\r
7416 case 'm':\r
7417 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
7418 OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r
7419 }\r
7420 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r
7421 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);\r
7422 }\r
7423 else\r
7424 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7425 break;\r
14b0e578 7426#ifdef USE_POSIXLINE_OPTION\r
b602265d
DG
7427 case 'p':\r
7428 OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r
7429 break;\r
14b0e578 7430#endif\r
b602265d
DG
7431 case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;\r
7432 case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;\r
7433 case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;\r
7434 case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;\r
7435\r
7436 default:\r
7437 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7438 }\r
7439\r
7440 if (c == ')') {\r
7441 *np = node_new_option(option);\r
7442 CHECK_NULL_RETURN_MEMERR(*np);\r
7443 *src = p;\r
7444 return 2; /* option only */\r
7445 }\r
7446 else if (c == ':') {\r
7447 OnigOptionType prev = env->options;\r
7448\r
7449 env->options = option;\r
7450 r = fetch_token(tok, &p, end, env);\r
7451 if (r < 0) return r;\r
7452 r = parse_subexp(&target, tok, term, &p, end, env);\r
7453 env->options = prev;\r
7454 if (r < 0) {\r
7455 onig_node_free(target);\r
7456 return r;\r
7457 }\r
7458 *np = node_new_option(option);\r
7459 CHECK_NULL_RETURN_MEMERR(*np);\r
7460 NODE_BODY(*np) = target;\r
7461 *src = p;\r
7462 return 0;\r
7463 }\r
7464\r
7465 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
7466 PFETCH(c);\r
7467 }\r
14b0e578
CS
7468 }\r
7469 break;\r
7470\r
7471 default:\r
7472 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
7473 }\r
7474 }\r
b602265d
DG
7475#ifdef USE_CALLOUT\r
7476 else if (c == '*' &&\r
7477 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {\r
7478 PINC;\r
7479 r = parse_callout_of_name(np, ')', &p, end, env);\r
7480 if (r != 0) return r;\r
7481\r
7482 goto end;\r
7483 }\r
7484#endif\r
14b0e578 7485 else {\r
b602265d 7486 if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
14b0e578
CS
7487 goto group;\r
7488\r
b602265d 7489 *np = node_new_memory(0);\r
14b0e578
CS
7490 CHECK_NULL_RETURN_MEMERR(*np);\r
7491 num = scan_env_add_mem_entry(env);\r
7492 if (num < 0) return num;\r
b602265d 7493 ENCLOSURE_(*np)->m.regnum = num;\r
14b0e578
CS
7494 }\r
7495\r
7496 CHECK_NULL_RETURN_MEMERR(*np);\r
7497 r = fetch_token(tok, &p, end, env);\r
7498 if (r < 0) return r;\r
7499 r = parse_subexp(&target, tok, term, &p, end, env);\r
b602265d
DG
7500 if (r < 0) {\r
7501 onig_node_free(target);\r
7502 return r;\r
7503 }\r
14b0e578 7504\r
b602265d
DG
7505 NODE_BODY(*np) = target;\r
7506\r
7507 if (NODE_TYPE(*np) == NODE_ENCLOSURE) {\r
7508 if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {\r
14b0e578 7509 /* Don't move this to previous of parse_subexp() */\r
b602265d 7510 r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);\r
14b0e578
CS
7511 if (r != 0) return r;\r
7512 }\r
7513 }\r
7514\r
b602265d 7515 end:\r
14b0e578
CS
7516 *src = p;\r
7517 return 0;\r
7518}\r
7519\r
7520static const char* PopularQStr[] = {\r
7521 "?", "*", "+", "??", "*?", "+?"\r
7522};\r
7523\r
7524static const char* ReduceQStr[] = {\r
7525 "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r
7526};\r
7527\r
7528static int\r
7529set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r
7530{\r
b602265d 7531 QuantNode* qn;\r
14b0e578 7532\r
b602265d
DG
7533 qn = QUANT_(qnode);\r
7534 if (qn->lower == 1 && qn->upper == 1)\r
14b0e578 7535 return 1;\r
14b0e578 7536\r
b602265d
DG
7537 switch (NODE_TYPE(target)) {\r
7538 case NODE_STRING:\r
14b0e578 7539 if (! group) {\r
b602265d
DG
7540 if (str_node_can_be_split(target, env->enc)) {\r
7541 Node* n = str_node_split_last_char(target, env->enc);\r
7542 if (IS_NOT_NULL(n)) {\r
7543 NODE_BODY(qnode) = n;\r
7544 return 2;\r
7545 }\r
14b0e578
CS
7546 }\r
7547 }\r
7548 break;\r
7549\r
b602265d 7550 case NODE_QUANT:\r
14b0e578
CS
7551 { /* check redundant double repeat. */\r
7552 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r
b602265d
DG
7553 QuantNode* qnt = QUANT_(target);\r
7554 int nestq_num = quantifier_type_num(qn);\r
7555 int targetq_num = quantifier_type_num(qnt);\r
14b0e578
CS
7556\r
7557#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r
b602265d
DG
7558 if (targetq_num >= 0 && nestq_num >= 0 &&\r
7559 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r
14b0e578
CS
7560 UChar buf[WARN_BUFSIZE];\r
7561\r
7562 switch(ReduceTypeTable[targetq_num][nestq_num]) {\r
7563 case RQ_ASIS:\r
7564 break;\r
7565\r
7566 case RQ_DEL:\r
7567 if (onig_verb_warn != onig_null_warn) {\r
7568 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
b602265d
DG
7569 env->pattern, env->pattern_end,\r
7570 (UChar* )"redundant nested repeat operator");\r
14b0e578
CS
7571 (*onig_verb_warn)((char* )buf);\r
7572 }\r
7573 goto warn_exit;\r
7574 break;\r
7575\r
7576 default:\r
7577 if (onig_verb_warn != onig_null_warn) {\r
7578 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
7579 env->pattern, env->pattern_end,\r
7580 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r
7581 PopularQStr[targetq_num], PopularQStr[nestq_num],\r
7582 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r
7583 (*onig_verb_warn)((char* )buf);\r
7584 }\r
7585 goto warn_exit;\r
7586 break;\r
7587 }\r
7588 }\r
7589\r
7590 warn_exit:\r
7591#endif\r
b602265d
DG
7592 if (targetq_num >= 0 && nestq_num < 0) {\r
7593 if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r
7594 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r
7595 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r
7596 qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
7597 }\r
7598 }\r
7599 }\r
7600 else {\r
7601 NODE_BODY(qnode) = target;\r
7602 onig_reduce_nested_quantifier(qnode, target);\r
7603 goto q_exit;\r
14b0e578
CS
7604 }\r
7605 }\r
7606 break;\r
7607\r
7608 default:\r
7609 break;\r
7610 }\r
7611\r
b602265d 7612 NODE_BODY(qnode) = target;\r
14b0e578
CS
7613 q_exit:\r
7614 return 0;\r
7615}\r
7616\r
7617\r
14b0e578
CS
7618#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
7619static int\r
7620clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r
7621{\r
7622 BBuf *tbuf;\r
7623 int r;\r
7624\r
7625 if (IS_NCCLASS_NOT(cc)) {\r
7626 bitset_invert(cc->bs);\r
7627\r
7628 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
7629 r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r
7630 if (r != 0) return r;\r
7631\r
7632 bbuf_free(cc->mbuf);\r
7633 cc->mbuf = tbuf;\r
7634 }\r
7635\r
7636 NCCLASS_CLEAR_NOT(cc);\r
7637 }\r
7638\r
7639 return 0;\r
7640}\r
7641#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
7642\r
7643typedef struct {\r
7644 ScanEnv* env;\r
7645 CClassNode* cc;\r
7646 Node* alt_root;\r
7647 Node** ptail;\r
7648} IApplyCaseFoldArg;\r
7649\r
7650static int\r
b602265d 7651i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)\r
14b0e578
CS
7652{\r
7653 IApplyCaseFoldArg* iarg;\r
7654 ScanEnv* env;\r
7655 CClassNode* cc;\r
7656 BitSetRef bs;\r
7657\r
7658 iarg = (IApplyCaseFoldArg* )arg;\r
7659 env = iarg->env;\r
7660 cc = iarg->cc;\r
7661 bs = cc->bs;\r
7662\r
7663 if (to_len == 1) {\r
7664 int is_in = onig_is_code_in_cc(env->enc, from, cc);\r
7665#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
7666 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r
b602265d 7667 (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r
14b0e578 7668 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
b602265d 7669 add_code_range(&(cc->mbuf), env, *to, *to);\r
14b0e578
CS
7670 }\r
7671 else {\r
b602265d 7672 BITSET_SET_BIT(bs, *to);\r
14b0e578
CS
7673 }\r
7674 }\r
7675#else\r
7676 if (is_in != 0) {\r
7677 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
b602265d
DG
7678 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r
7679 add_code_range(&(cc->mbuf), env, *to, *to);\r
14b0e578
CS
7680 }\r
7681 else {\r
b602265d
DG
7682 if (IS_NCCLASS_NOT(cc)) {\r
7683 BITSET_CLEAR_BIT(bs, *to);\r
7684 }\r
7685 else\r
7686 BITSET_SET_BIT(bs, *to);\r
14b0e578
CS
7687 }\r
7688 }\r
7689#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
7690 }\r
7691 else {\r
7692 int r, i, len;\r
7693 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
7694 Node *snode = NULL_NODE;\r
7695\r
7696 if (onig_is_code_in_cc(env->enc, from, cc)\r
7697#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
b602265d 7698 && !IS_NCCLASS_NOT(cc)\r
14b0e578 7699#endif\r
b602265d 7700 ) {\r
14b0e578 7701 for (i = 0; i < to_len; i++) {\r
b602265d
DG
7702 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r
7703 if (i == 0) {\r
7704 snode = onig_node_new_str(buf, buf + len);\r
7705 CHECK_NULL_RETURN_MEMERR(snode);\r
7706\r
7707 /* char-class expanded multi-char only\r
7708 compare with string folded at match time. */\r
7709 NODE_STRING_SET_AMBIG(snode);\r
7710 }\r
7711 else {\r
7712 r = onig_node_str_cat(snode, buf, buf + len);\r
7713 if (r < 0) {\r
7714 onig_node_free(snode);\r
7715 return r;\r
7716 }\r
7717 }\r
14b0e578
CS
7718 }\r
7719\r
7720 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r
7721 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r
b602265d 7722 iarg->ptail = &(NODE_CDR((*(iarg->ptail))));\r
14b0e578
CS
7723 }\r
7724 }\r
7725\r
7726 return 0;\r
7727}\r
7728\r
7729static int\r
b602265d
DG
7730parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
7731 ScanEnv* env)\r
14b0e578
CS
7732{\r
7733 int r, len, group = 0;\r
7734 Node* qn;\r
7735 Node** targetp;\r
7736\r
7737 *np = NULL;\r
7738 if (tok->type == (enum TokenSyms )term)\r
7739 goto end_of_token;\r
7740\r
7741 switch (tok->type) {\r
7742 case TK_ALT:\r
7743 case TK_EOT:\r
7744 end_of_token:\r
16bd7c35
DG
7745 *np = node_new_empty();\r
7746 CHECK_NULL_RETURN_MEMERR(*np);\r
7747 return tok->type;\r
14b0e578
CS
7748 break;\r
7749\r
7750 case TK_SUBEXP_OPEN:\r
b602265d 7751 r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r
14b0e578
CS
7752 if (r < 0) return r;\r
7753 if (r == 1) group = 1;\r
7754 else if (r == 2) { /* option only */\r
7755 Node* target;\r
b602265d 7756 OnigOptionType prev = env->options;\r
14b0e578 7757\r
b602265d 7758 env->options = ENCLOSURE_(*np)->o.options;\r
14b0e578
CS
7759 r = fetch_token(tok, src, end, env);\r
7760 if (r < 0) return r;\r
7761 r = parse_subexp(&target, tok, term, src, end, env);\r
b602265d
DG
7762 env->options = prev;\r
7763 if (r < 0) {\r
7764 onig_node_free(target);\r
7765 return r;\r
7766 }\r
7767 NODE_BODY(*np) = target;\r
14b0e578
CS
7768 return tok->type;\r
7769 }\r
7770 break;\r
7771\r
7772 case TK_SUBEXP_CLOSE:\r
7773 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r
7774 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r
7775\r
7776 if (tok->escaped) goto tk_raw_byte;\r
7777 else goto tk_byte;\r
7778 break;\r
7779\r
7780 case TK_STRING:\r
7781 tk_byte:\r
7782 {\r
7783 *np = node_new_str(tok->backp, *src);\r
7784 CHECK_NULL_RETURN_MEMERR(*np);\r
7785\r
7786 while (1) {\r
b602265d
DG
7787 r = fetch_token(tok, src, end, env);\r
7788 if (r < 0) return r;\r
7789 if (r != TK_STRING) break;\r
14b0e578 7790\r
b602265d
DG
7791 r = onig_node_str_cat(*np, tok->backp, *src);\r
7792 if (r < 0) return r;\r
14b0e578
CS
7793 }\r
7794\r
7795 string_end:\r
7796 targetp = np;\r
7797 goto repeat;\r
7798 }\r
7799 break;\r
7800\r
7801 case TK_RAW_BYTE:\r
7802 tk_raw_byte:\r
7803 {\r
7804 *np = node_new_str_raw_char((UChar )tok->u.c);\r
7805 CHECK_NULL_RETURN_MEMERR(*np);\r
7806 len = 1;\r
7807 while (1) {\r
b602265d
DG
7808 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r
7809 if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */\r
7810 r = fetch_token(tok, src, end, env);\r
7811 NODE_STRING_CLEAR_RAW(*np);\r
7812 goto string_end;\r
7813 }\r
7814 }\r
7815\r
7816 r = fetch_token(tok, src, end, env);\r
7817 if (r < 0) return r;\r
7818 if (r != TK_RAW_BYTE) {\r
7819 /* Don't use this, it is wrong for little endian encodings. */\r
14b0e578 7820#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
b602265d
DG
7821 int rem;\r
7822 if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r
7823 rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r
7824 (void )node_str_head_pad(STR_(*np), rem, (UChar )0);\r
7825 if (len + rem == enclen(env->enc, STR_(*np)->s)) {\r
7826 NODE_STRING_CLEAR_RAW(*np);\r
7827 goto string_end;\r
7828 }\r
7829 }\r
14b0e578 7830#endif\r
b602265d
DG
7831 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
7832 }\r
14b0e578 7833\r
b602265d
DG
7834 r = node_str_cat_char(*np, (UChar )tok->u.c);\r
7835 if (r < 0) return r;\r
14b0e578 7836\r
b602265d 7837 len++;\r
14b0e578
CS
7838 }\r
7839 }\r
7840 break;\r
7841\r
7842 case TK_CODE_POINT:\r
7843 {\r
7844 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
7845 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r
7846 if (num < 0) return num;\r
7847#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r
7848 *np = node_new_str_raw(buf, buf + num);\r
7849#else\r
7850 *np = node_new_str(buf, buf + num);\r
7851#endif\r
7852 CHECK_NULL_RETURN_MEMERR(*np);\r
7853 }\r
7854 break;\r
7855\r
7856 case TK_QUOTE_OPEN:\r
7857 {\r
7858 OnigCodePoint end_op[2];\r
7859 UChar *qstart, *qend, *nextp;\r
7860\r
7861 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r
7862 end_op[1] = (OnigCodePoint )'E';\r
7863 qstart = *src;\r
7864 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r
7865 if (IS_NULL(qend)) {\r
b602265d 7866 nextp = qend = end;\r
14b0e578
CS
7867 }\r
7868 *np = node_new_str(qstart, qend);\r
7869 CHECK_NULL_RETURN_MEMERR(*np);\r
7870 *src = nextp;\r
7871 }\r
7872 break;\r
7873\r
7874 case TK_CHAR_TYPE:\r
7875 {\r
7876 switch (tok->u.prop.ctype) {\r
7877 case ONIGENC_CTYPE_WORD:\r
b602265d
DG
7878 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);\r
7879 CHECK_NULL_RETURN_MEMERR(*np);\r
7880 break;\r
14b0e578
CS
7881\r
7882 case ONIGENC_CTYPE_SPACE:\r
7883 case ONIGENC_CTYPE_DIGIT:\r
7884 case ONIGENC_CTYPE_XDIGIT:\r
b602265d
DG
7885 {\r
7886 CClassNode* cc;\r
7887\r
7888 *np = node_new_cclass();\r
7889 CHECK_NULL_RETURN_MEMERR(*np);\r
7890 cc = CCLASS_(*np);\r
7891 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r
7892 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
7893 }\r
7894 break;\r
14b0e578
CS
7895\r
7896 default:\r
b602265d
DG
7897 return ONIGERR_PARSER_BUG;\r
7898 break;\r
14b0e578
CS
7899 }\r
7900 }\r
7901 break;\r
7902\r
7903 case TK_CHAR_PROPERTY:\r
7904 r = parse_char_property(np, tok, src, end, env);\r
7905 if (r != 0) return r;\r
7906 break;\r
7907\r
7908 case TK_CC_OPEN:\r
7909 {\r
7910 CClassNode* cc;\r
7911\r
7912 r = parse_char_class(np, tok, src, end, env);\r
7913 if (r != 0) return r;\r
7914\r
b602265d
DG
7915 cc = CCLASS_(*np);\r
7916 if (IS_IGNORECASE(env->options)) {\r
7917 IApplyCaseFoldArg iarg;\r
7918\r
7919 iarg.env = env;\r
7920 iarg.cc = cc;\r
7921 iarg.alt_root = NULL_NODE;\r
7922 iarg.ptail = &(iarg.alt_root);\r
7923\r
7924 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r
7925 i_apply_case_fold, &iarg);\r
7926 if (r != 0) {\r
7927 onig_node_free(iarg.alt_root);\r
7928 return r;\r
7929 }\r
7930 if (IS_NOT_NULL(iarg.alt_root)) {\r
14b0e578
CS
7931 Node* work = onig_node_new_alt(*np, iarg.alt_root);\r
7932 if (IS_NULL(work)) {\r
7933 onig_node_free(iarg.alt_root);\r
7934 return ONIGERR_MEMORY;\r
7935 }\r
7936 *np = work;\r
b602265d 7937 }\r
14b0e578
CS
7938 }\r
7939 }\r
7940 break;\r
7941\r
7942 case TK_ANYCHAR:\r
7943 *np = node_new_anychar();\r
7944 CHECK_NULL_RETURN_MEMERR(*np);\r
7945 break;\r
7946\r
7947 case TK_ANYCHAR_ANYTIME:\r
7948 *np = node_new_anychar();\r
7949 CHECK_NULL_RETURN_MEMERR(*np);\r
7950 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
7951 CHECK_NULL_RETURN_MEMERR(qn);\r
b602265d 7952 NODE_BODY(qn) = *np;\r
14b0e578
CS
7953 *np = qn;\r
7954 break;\r
7955\r
7956 case TK_BACKREF:\r
7957 len = tok->u.backref.num;\r
7958 *np = node_new_backref(len,\r
b602265d
DG
7959 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r
7960 tok->u.backref.by_name,\r
14b0e578 7961#ifdef USE_BACKREF_WITH_LEVEL\r
b602265d
DG
7962 tok->u.backref.exist_level,\r
7963 tok->u.backref.level,\r
14b0e578 7964#endif\r
b602265d 7965 env);\r
14b0e578
CS
7966 CHECK_NULL_RETURN_MEMERR(*np);\r
7967 break;\r
7968\r
b602265d 7969#ifdef USE_CALL\r
14b0e578
CS
7970 case TK_CALL:\r
7971 {\r
7972 int gnum = tok->u.call.gnum;\r
7973\r
b602265d
DG
7974 *np = node_new_call(tok->u.call.name, tok->u.call.name_end,\r
7975 gnum, tok->u.call.by_number);\r
14b0e578
CS
7976 CHECK_NULL_RETURN_MEMERR(*np);\r
7977 env->num_call++;\r
b602265d
DG
7978 if (tok->u.call.by_number != 0 && gnum == 0) {\r
7979 env->has_call_zero = 1;\r
7980 }\r
14b0e578
CS
7981 }\r
7982 break;\r
7983#endif\r
7984\r
7985 case TK_ANCHOR:\r
b602265d
DG
7986 {\r
7987 int ascii_mode =\r
7988 IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;\r
7989 *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);\r
7990 }\r
14b0e578
CS
7991 break;\r
7992\r
7993 case TK_OP_REPEAT:\r
7994 case TK_INTERVAL:\r
7995 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r
7996 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r
b602265d 7997 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r
16bd7c35 7998 else {\r
b602265d 7999 *np = node_new_empty();\r
16bd7c35
DG
8000 CHECK_NULL_RETURN_MEMERR(*np);\r
8001 }\r
14b0e578
CS
8002 }\r
8003 else {\r
8004 goto tk_byte;\r
8005 }\r
8006 break;\r
8007\r
b602265d
DG
8008 case TK_KEEP:\r
8009 r = node_new_keep(np, env);\r
8010 if (r < 0) return r;\r
8011 break;\r
8012\r
8013 case TK_GENERAL_NEWLINE:\r
8014 r = node_new_general_newline(np, env);\r
8015 if (r < 0) return r;\r
8016 break;\r
8017\r
8018 case TK_NO_NEWLINE:\r
8019 r = node_new_no_newline(np, env);\r
8020 if (r < 0) return r;\r
8021 break;\r
8022\r
8023 case TK_TRUE_ANYCHAR:\r
8024 r = node_new_true_anychar(np, env);\r
8025 if (r < 0) return r;\r
8026 break;\r
8027\r
8028 case TK_EXTENDED_GRAPHEME_CLUSTER:\r
8029 r = make_extended_grapheme_cluster(np, env);\r
8030 if (r < 0) return r;\r
8031 break;\r
8032\r
14b0e578
CS
8033 default:\r
8034 return ONIGERR_PARSER_BUG;\r
8035 break;\r
8036 }\r
8037\r
8038 {\r
8039 targetp = np;\r
8040\r
8041 re_entry:\r
8042 r = fetch_token(tok, src, end, env);\r
8043 if (r < 0) return r;\r
8044\r
8045 repeat:\r
8046 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r
8047 if (is_invalid_quantifier_target(*targetp))\r
b602265d 8048 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r
14b0e578
CS
8049\r
8050 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r
b602265d 8051 (r == TK_INTERVAL ? 1 : 0));\r
14b0e578 8052 CHECK_NULL_RETURN_MEMERR(qn);\r
b602265d 8053 QUANT_(qn)->greedy = tok->u.repeat.greedy;\r
14b0e578
CS
8054 r = set_quantifier(qn, *targetp, group, env);\r
8055 if (r < 0) {\r
b602265d
DG
8056 onig_node_free(qn);\r
8057 return r;\r
14b0e578
CS
8058 }\r
8059\r
8060 if (tok->u.repeat.possessive != 0) {\r
b602265d
DG
8061 Node* en;\r
8062 en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);\r
8063 if (IS_NULL(en)) {\r
8064 onig_node_free(qn);\r
8065 return ONIGERR_MEMORY;\r
8066 }\r
8067 NODE_BODY(en) = qn;\r
8068 qn = en;\r
14b0e578
CS
8069 }\r
8070\r
8071 if (r == 0) {\r
b602265d 8072 *targetp = qn;\r
14b0e578
CS
8073 }\r
8074 else if (r == 1) {\r
b602265d 8075 onig_node_free(qn);\r
14b0e578
CS
8076 }\r
8077 else if (r == 2) { /* split case: /abc+/ */\r
b602265d
DG
8078 Node *tmp;\r
8079\r
8080 *targetp = node_new_list(*targetp, NULL);\r
8081 if (IS_NULL(*targetp)) {\r
8082 onig_node_free(qn);\r
8083 return ONIGERR_MEMORY;\r
8084 }\r
8085 tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);\r
8086 if (IS_NULL(tmp)) {\r
8087 onig_node_free(qn);\r
8088 return ONIGERR_MEMORY;\r
8089 }\r
8090 targetp = &(NODE_CAR(tmp));\r
14b0e578
CS
8091 }\r
8092 goto re_entry;\r
8093 }\r
8094 }\r
8095\r
8096 return r;\r
8097}\r
8098\r
8099static int\r
b602265d
DG
8100parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
8101 ScanEnv* env)\r
14b0e578
CS
8102{\r
8103 int r;\r
8104 Node *node, **headp;\r
8105\r
8106 *top = NULL;\r
8107 r = parse_exp(&node, tok, term, src, end, env);\r
b602265d
DG
8108 if (r < 0) {\r
8109 onig_node_free(node);\r
8110 return r;\r
8111 }\r
14b0e578
CS
8112\r
8113 if (r == TK_EOT || r == term || r == TK_ALT) {\r
8114 *top = node;\r
8115 }\r
8116 else {\r
8117 *top = node_new_list(node, NULL);\r
a5def177
DG
8118 if (IS_NULL(*top)) {\r
8119 onig_node_free(node);\r
8120 return ONIGERR_MEMORY;\r
8121 }\r
8122\r
b602265d 8123 headp = &(NODE_CDR(*top));\r
14b0e578
CS
8124 while (r != TK_EOT && r != term && r != TK_ALT) {\r
8125 r = parse_exp(&node, tok, term, src, end, env);\r
b602265d
DG
8126 if (r < 0) {\r
8127 onig_node_free(node);\r
8128 return r;\r
8129 }\r
14b0e578 8130\r
b602265d
DG
8131 if (NODE_TYPE(node) == NODE_LIST) {\r
8132 *headp = node;\r
8133 while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);\r
8134 headp = &(NODE_CDR(node));\r
14b0e578
CS
8135 }\r
8136 else {\r
b602265d
DG
8137 *headp = node_new_list(node, NULL);\r
8138 headp = &(NODE_CDR(*headp));\r
14b0e578
CS
8139 }\r
8140 }\r
8141 }\r
8142\r
8143 return r;\r
8144}\r
8145\r
8146/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r
8147static int\r
b602265d
DG
8148parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,\r
8149 ScanEnv* env)\r
14b0e578
CS
8150{\r
8151 int r;\r
8152 Node *node, **headp;\r
8153\r
8154 *top = NULL;\r
b602265d
DG
8155 env->parse_depth++;\r
8156 if (env->parse_depth > ParseDepthLimit)\r
8157 return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\r
a5def177 8158\r
14b0e578
CS
8159 r = parse_branch(&node, tok, term, src, end, env);\r
8160 if (r < 0) {\r
8161 onig_node_free(node);\r
8162 return r;\r
8163 }\r
8164\r
8165 if (r == term) {\r
8166 *top = node;\r
8167 }\r
8168 else if (r == TK_ALT) {\r
8169 *top = onig_node_new_alt(node, NULL);\r
a5def177
DG
8170 if (IS_NULL(*top)) {\r
8171 onig_node_free(node);\r
8172 return ONIGERR_MEMORY;\r
8173 }\r
8174\r
b602265d 8175 headp = &(NODE_CDR(*top));\r
14b0e578
CS
8176 while (r == TK_ALT) {\r
8177 r = fetch_token(tok, src, end, env);\r
8178 if (r < 0) return r;\r
8179 r = parse_branch(&node, tok, term, src, end, env);\r
b602265d
DG
8180 if (r < 0) {\r
8181 onig_node_free(node);\r
8182 return r;\r
8183 }\r
14b0e578 8184 *headp = onig_node_new_alt(node, NULL);\r
a5def177
DG
8185 if (IS_NULL(*headp)) {\r
8186 onig_node_free(node);\r
8187 onig_node_free(*top);\r
8188 return ONIGERR_MEMORY;\r
8189 }\r
8190\r
b602265d 8191 headp = &(NODE_CDR(*headp));\r
14b0e578
CS
8192 }\r
8193\r
8194 if (tok->type != (enum TokenSyms )term)\r
8195 goto err;\r
8196 }\r
8197 else {\r
b602265d 8198 onig_node_free(node);\r
14b0e578
CS
8199 err:\r
8200 if (term == TK_SUBEXP_CLOSE)\r
8201 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
8202 else\r
8203 return ONIGERR_PARSER_BUG;\r
8204 }\r
8205\r
b602265d 8206 env->parse_depth--;\r
14b0e578
CS
8207 return r;\r
8208}\r
8209\r
8210static int\r
8211parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r
8212{\r
8213 int r;\r
8214 OnigToken tok;\r
8215\r
8216 r = fetch_token(&tok, src, end, env);\r
8217 if (r < 0) return r;\r
8218 r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r
8219 if (r < 0) return r;\r
b602265d
DG
8220\r
8221 return 0;\r
8222}\r
8223\r
8224#ifdef USE_CALL\r
8225static int\r
8226make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)\r
8227{\r
8228 int r;\r
8229\r
8230 Node* x = node_new_memory(0 /* 0: is not named */);\r
8231 CHECK_NULL_RETURN_MEMERR(x);\r
8232\r
8233 NODE_BODY(x) = node;\r
8234 ENCLOSURE_(x)->m.regnum = 0;\r
8235 r = scan_env_set_mem_node(env, 0, x);\r
8236 if (r != 0) {\r
8237 onig_node_free(x);\r
8238 return r;\r
8239 }\r
8240\r
8241 *rnode = x;\r
14b0e578
CS
8242 return 0;\r
8243}\r
b602265d 8244#endif\r
14b0e578
CS
8245\r
8246extern int\r
b602265d
DG
8247onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,\r
8248 regex_t* reg, ScanEnv* env)\r
14b0e578
CS
8249{\r
8250 int r;\r
8251 UChar* p;\r
b602265d
DG
8252#ifdef USE_CALLOUT\r
8253 RegexExt* ext;\r
8254#endif\r
14b0e578 8255\r
14b0e578 8256 names_clear(reg);\r
14b0e578
CS
8257\r
8258 scan_env_clear(env);\r
b602265d 8259 env->options = reg->options;\r
14b0e578
CS
8260 env->case_fold_flag = reg->case_fold_flag;\r
8261 env->enc = reg->enc;\r
8262 env->syntax = reg->syntax;\r
8263 env->pattern = (UChar* )pattern;\r
8264 env->pattern_end = (UChar* )end;\r
8265 env->reg = reg;\r
8266\r
8267 *root = NULL;\r
b602265d
DG
8268\r
8269 if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))\r
8270 return ONIGERR_INVALID_WIDE_CHAR_VALUE;\r
8271\r
14b0e578
CS
8272 p = (UChar* )pattern;\r
8273 r = parse_regexp(root, &p, (UChar* )end, env);\r
b602265d
DG
8274\r
8275#ifdef USE_CALL\r
8276 if (r != 0) return r;\r
8277\r
8278 if (env->has_call_zero != 0) {\r
8279 Node* zero_node;\r
8280 r = make_call_zero_body(*root, env, &zero_node);\r
8281 if (r != 0) return r;\r
8282\r
8283 *root = zero_node;\r
8284 }\r
8285#endif\r
8286\r
14b0e578 8287 reg->num_mem = env->num_mem;\r
b602265d
DG
8288\r
8289#ifdef USE_CALLOUT\r
8290 ext = REG_EXTP(reg);\r
8291 if (IS_NOT_NULL(ext) && ext->callout_num > 0) {\r
8292 r = setup_ext_callout_list_values(reg);\r
8293 }\r
8294#endif\r
8295\r
14b0e578
CS
8296 return r;\r
8297}\r
8298\r
8299extern void\r
8300onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r
b602265d 8301 UChar* arg, UChar* arg_end)\r
14b0e578
CS
8302{\r
8303 env->error = arg;\r
8304 env->error_end = arg_end;\r
8305}\r