]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
MdeModulePkg: Delete useless case code
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
CommitLineData
14b0e578
CS
1/**********************************************************************\r
2 regparse.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
6 * All rights reserved.\r
7 *\r
0af8e57c 8 * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>\r
14b0e578
CS
9 *\r
10 * Redistribution and use in source and binary forms, with or without\r
11 * modification, are permitted provided that the following conditions\r
12 * are met:\r
13 * 1. Redistributions of source code must retain the above copyright\r
14 * notice, this list of conditions and the following disclaimer.\r
15 * 2. Redistributions in binary form must reproduce the above copyright\r
16 * notice, this list of conditions and the following disclaimer in the\r
17 * documentation and/or other materials provided with the distribution.\r
18 *\r
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
29 * SUCH DAMAGE.\r
30 */\r
31\r
32#include "regparse.h"\r
33#include "st.h"\r
34\r
35#define WARN_BUFSIZE 256\r
36\r
37#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
38\r
39\r
40OnigSyntaxType OnigSyntaxRuby = {\r
41 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
42 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
43 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
44 ONIG_SYN_OP_ESC_C_CONTROL )\r
45 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
46 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
47 ONIG_SYN_OP2_OPTION_RUBY |\r
48 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
49 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
50 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
51 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
52 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
53 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
54 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
55 ONIG_SYN_OP2_ESC_H_XDIGIT )\r
56 , ( SYN_GNU_REGEX_BV | \r
57 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
58 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
59 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
60 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
61 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
62 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
63 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
64 , ONIG_OPTION_NONE\r
65 ,\r
66 {\r
67 (OnigCodePoint )'\\' /* esc */\r
68 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
69 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
70 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
71 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
72 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
73 }\r
74};\r
75\r
76OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;\r
77\r
78extern void onig_null_warn(const char* s ARG_UNUSED) { }\r
79\r
80#ifdef DEFAULT_WARN_FUNCTION\r
81static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r
82#else\r
83static OnigWarnFunc onig_warn = onig_null_warn;\r
84#endif\r
85\r
86#ifdef DEFAULT_VERB_WARN_FUNCTION\r
87static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r
88#else\r
89static OnigWarnFunc onig_verb_warn = onig_null_warn;\r
90#endif\r
91\r
92extern void onig_set_warn_func(OnigWarnFunc f)\r
93{\r
94 onig_warn = f;\r
95}\r
96\r
97extern void onig_set_verb_warn_func(OnigWarnFunc f)\r
98{\r
99 onig_verb_warn = f;\r
100}\r
101\r
102static void\r
103bbuf_free(BBuf* bbuf)\r
104{\r
105 if (IS_NOT_NULL(bbuf)) {\r
106 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r
107 xfree(bbuf);\r
108 }\r
109}\r
110\r
111static int\r
112bbuf_clone(BBuf** rto, BBuf* from)\r
113{\r
114 int r;\r
115 BBuf *to;\r
116\r
117 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r
118 CHECK_NULL_RETURN_MEMERR(to);\r
119 r = BBUF_INIT(to, from->alloc);\r
120 if (r != 0) return r;\r
121 to->used = from->used;\r
122 xmemcpy(to->p, from->p, from->used);\r
123 return 0;\r
124}\r
125\r
126#define BACKREF_REL_TO_ABS(rel_no, env) \\r
127 ((env)->num_mem + 1 + (rel_no))\r
128\r
129#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r
130\r
131#define MBCODE_START_POS(enc) \\r
132 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r
133\r
134#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r
135 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r
136\r
137#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r
138 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r
139 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r
140 if (r) return r;\\r
141 }\\r
142} while (0)\r
143\r
144\r
145#define BITSET_IS_EMPTY(bs,empty) do {\\r
146 int i;\\r
147 empty = 1;\\r
148 for (i = 0; i < (int )BITSET_SIZE; i++) {\\r
149 if ((bs)[i] != 0) {\\r
150 empty = 0; break;\\r
151 }\\r
152 }\\r
153} while (0)\r
154\r
155static void\r
156bitset_set_range(BitSetRef bs, int from, int to)\r
157{\r
158 int i;\r
159 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r
160 BITSET_SET_BIT(bs, i);\r
161 }\r
162}\r
163\r
164#if 0\r
165static void\r
166bitset_set_all(BitSetRef bs)\r
167{\r
168 int i;\r
169 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r
170}\r
171#endif\r
172\r
173static void\r
174bitset_invert(BitSetRef bs)\r
175{\r
176 int i;\r
177 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r
178}\r
179\r
180static void\r
181bitset_invert_to(BitSetRef from, BitSetRef to)\r
182{\r
183 int i;\r
184 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r
185}\r
186\r
187static void\r
188bitset_and(BitSetRef dest, BitSetRef bs)\r
189{\r
190 int i;\r
191 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r
192}\r
193\r
194static void\r
195bitset_or(BitSetRef dest, BitSetRef bs)\r
196{\r
197 int i;\r
198 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r
199}\r
200\r
201static void\r
202bitset_copy(BitSetRef dest, BitSetRef bs)\r
203{\r
204 int i;\r
205 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r
206}\r
207\r
208extern int\r
209onig_strncmp(const UChar* s1, const UChar* s2, int n)\r
210{\r
211 int x;\r
212\r
213 while (n-- > 0) {\r
214 x = *s2++ - *s1++;\r
215 if (x) return x;\r
216 }\r
217 return 0;\r
218}\r
219\r
220extern void\r
221onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r
222{\r
223 int len = (int)(end - src);\r
224 if (len > 0) {\r
225 xmemcpy(dest, src, len);\r
226 dest[len] = (UChar )0;\r
227 }\r
228}\r
229\r
230#ifdef USE_NAMED_GROUP\r
231static UChar*\r
232strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)\r
233{\r
234 int slen, term_len, i;\r
235 UChar *r;\r
236\r
237 slen = (int)(end - s);\r
238 term_len = ONIGENC_MBC_MINLEN(enc);\r
239\r
240 r = (UChar* )xmalloc(slen + term_len);\r
241 CHECK_NULL_RETURN(r);\r
242 xmemcpy(r, s, slen);\r
243\r
244 for (i = 0; i < term_len; i++)\r
245 r[slen + i] = (UChar )0;\r
246\r
247 return r;\r
248}\r
249#endif\r
250\r
251/* scan pattern methods */\r
252#define PEND_VALUE 0\r
253\r
254#define PFETCH_READY UChar* pfetch_prev\r
255#define PEND (p < end ? 0 : 1)\r
256#define PUNFETCH p = pfetch_prev\r
257#define PINC do { \\r
258 pfetch_prev = p; \\r
259 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
260} while (0)\r
261#define PFETCH(c) do { \\r
262 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
263 pfetch_prev = p; \\r
264 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
265} while (0)\r
266\r
267#define PINC_S do { \\r
268 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
269} while (0)\r
270#define PFETCH_S(c) do { \\r
271 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
272 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
273} while (0)\r
274\r
275#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r
276#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r
277\r
278static UChar*\r
279strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r
280 int capa, int oldCapa)\r
281{\r
282 UChar* r;\r
283\r
284 if (dest)\r
285 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r
286 else\r
287 r = (UChar* )xmalloc(capa + 1);\r
288\r
289 CHECK_NULL_RETURN(r);\r
290 onig_strcpy(r + (dest_end - dest), src, src_end);\r
291 return r;\r
292}\r
293\r
294/* dest on static area */\r
295static UChar*\r
296strcat_capa_from_static(UChar* dest, UChar* dest_end,\r
297 const UChar* src, const UChar* src_end, int capa)\r
298{\r
299 UChar* r;\r
300\r
301 r = (UChar* )xmalloc(capa + 1);\r
302 CHECK_NULL_RETURN(r);\r
303 onig_strcpy(r, dest, dest_end);\r
304 onig_strcpy(r + (dest_end - dest), src, src_end);\r
305 return r;\r
306}\r
307\r
308\r
309#ifdef USE_ST_LIBRARY\r
310\r
311typedef struct {\r
312 UChar* s;\r
313 UChar* end;\r
314} st_str_end_key;\r
315\r
316static int\r
317str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r
318{\r
319 UChar *p, *q;\r
320 int c;\r
321\r
322 if ((x->end - x->s) != (y->end - y->s))\r
323 return 1;\r
324\r
325 p = x->s;\r
326 q = y->s;\r
327 while (p < x->end) {\r
328 c = (int )*p - (int )*q;\r
329 if (c != 0) return c;\r
330\r
331 p++; q++;\r
332 }\r
333\r
334 return 0;\r
335}\r
336\r
337static int\r
338str_end_hash(st_str_end_key* x)\r
339{\r
340 UChar *p;\r
341 int val = 0;\r
342\r
343 p = x->s;\r
344 while (p < x->end) {\r
345 val = val * 997 + (int )*p++;\r
346 }\r
347\r
348 return val + (val >> 5);\r
349}\r
350\r
351extern hash_table_type*\r
352onig_st_init_strend_table_with_size(int size)\r
353{\r
354 static struct st_hash_type hashType = {\r
355 str_end_cmp,\r
356 str_end_hash,\r
357 };\r
358\r
359 return (hash_table_type* )\r
360 onig_st_init_table_with_size(&hashType, size);\r
361}\r
362\r
363extern int\r
364onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r
365 const UChar* end_key, hash_data_type *value)\r
366{\r
367 st_str_end_key key;\r
368\r
369 key.s = (UChar* )str_key;\r
370 key.end = (UChar* )end_key;\r
371\r
4d454c54 372 return onig_st_lookup(table, (st_data_t )(UINTN)(&key), value);\r
14b0e578
CS
373}\r
374\r
375extern int\r
376onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r
377 const UChar* end_key, hash_data_type value)\r
378{\r
379 st_str_end_key* key;\r
380 int result;\r
381\r
382 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r
b0c2b797 383 CHECK_NULL_RETURN_MEMERR(key);\r
14b0e578
CS
384 key->s = (UChar* )str_key;\r
385 key->end = (UChar* )end_key;\r
4d454c54 386 result = onig_st_insert(table, (st_data_t )(UINTN)key, value);\r
14b0e578
CS
387 if (result) {\r
388 xfree(key);\r
389 }\r
390 return result;\r
391}\r
392\r
393#endif /* USE_ST_LIBRARY */\r
394\r
395\r
396#ifdef USE_NAMED_GROUP\r
397\r
398#define INIT_NAME_BACKREFS_ALLOC_NUM 8\r
399\r
400typedef struct {\r
401 UChar* name;\r
402 int name_len; /* byte length */\r
403 int back_num; /* number of backrefs */\r
404 int back_alloc;\r
405 int back_ref1;\r
406 int* back_refs;\r
407} NameEntry;\r
408\r
409#ifdef USE_ST_LIBRARY\r
410\r
411typedef st_table NameTable;\r
412typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r
413\r
414#define NAMEBUF_SIZE 24\r
415#define NAMEBUF_SIZE_1 25\r
416\r
417#ifdef ONIG_DEBUG\r
418static int\r
419i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r
420{\r
421 int i;\r
422 FILE* fp = (FILE* )arg;\r
423\r
424 fprintf(fp, "%s: ", e->name);\r
425 if (e->back_num == 0)\r
426 fputs("-", fp);\r
427 else if (e->back_num == 1)\r
428 fprintf(fp, "%d", e->back_ref1);\r
429 else {\r
430 for (i = 0; i < e->back_num; i++) {\r
431 if (i > 0) fprintf(fp, ", ");\r
432 fprintf(fp, "%d", e->back_refs[i]);\r
433 }\r
434 }\r
435 fputs("\n", fp);\r
436 return ST_CONTINUE;\r
437}\r
438\r
439extern int\r
440onig_print_names(FILE* fp, regex_t* reg)\r
441{\r
442 NameTable* t = (NameTable* )reg->name_table;\r
443\r
444 if (IS_NOT_NULL(t)) {\r
445 fprintf(fp, "name table\n");\r
446 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r
447 fputs("\n", fp);\r
448 }\r
449 return 0;\r
450}\r
451#endif /* ONIG_DEBUG */\r
452\r
453static int\r
454i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r
455{\r
456 xfree(e->name);\r
457 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
458 xfree(key);\r
459 xfree(e);\r
460 return ST_DELETE;\r
461}\r
462\r
463static int\r
464names_clear(regex_t* reg)\r
465{\r
466 NameTable* t = (NameTable* )reg->name_table;\r
467\r
468 if (IS_NOT_NULL(t)) {\r
469 onig_st_foreach(t, i_free_name_entry, 0);\r
470 }\r
471 return 0;\r
472}\r
473\r
474extern int\r
475onig_names_free(regex_t* reg)\r
476{\r
477 int r;\r
478 NameTable* t;\r
479\r
480 r = names_clear(reg);\r
481 if (r) return r;\r
482\r
483 t = (NameTable* )reg->name_table;\r
484 if (IS_NOT_NULL(t)) onig_st_free_table(t);\r
485 reg->name_table = (void* )NULL;\r
486 return 0;\r
487}\r
488\r
489static NameEntry*\r
490name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r
491{\r
492 NameEntry* e;\r
493 NameTable* t = (NameTable* )reg->name_table;\r
494\r
495 e = (NameEntry* )NULL;\r
496 if (IS_NOT_NULL(t)) {\r
497 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
498 }\r
499 return e;\r
500}\r
501\r
502typedef struct {\r
503 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r
504 regex_t* reg;\r
505 void* arg;\r
506 int ret;\r
507 OnigEncoding enc;\r
508} INamesArg;\r
509\r
510static int\r
511i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r
512{\r
513 int r = (*(arg->func))(e->name,\r
514 e->name + e->name_len,\r
515 e->back_num,\r
516 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
517 arg->reg, arg->arg);\r
518 if (r != 0) {\r
519 arg->ret = r;\r
520 return ST_STOP;\r
521 }\r
522 return ST_CONTINUE;\r
523}\r
524\r
525extern int\r
526onig_foreach_name(regex_t* reg,\r
527 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
528{\r
529 INamesArg narg;\r
530 NameTable* t = (NameTable* )reg->name_table;\r
531\r
532 narg.ret = 0;\r
533 if (IS_NOT_NULL(t)) {\r
534 narg.func = func;\r
535 narg.reg = reg;\r
536 narg.arg = arg;\r
537 narg.enc = reg->enc; /* should be pattern encoding. */\r
4d454c54 538 onig_st_foreach(t, i_names, (HashDataType )(UINTN)&narg);\r
14b0e578
CS
539 }\r
540 return narg.ret;\r
541}\r
542\r
543static int\r
544i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r
545{\r
546 int i;\r
547\r
548 if (e->back_num > 1) {\r
549 for (i = 0; i < e->back_num; i++) {\r
550 e->back_refs[i] = map[e->back_refs[i]].new_val;\r
551 }\r
552 }\r
553 else if (e->back_num == 1) {\r
554 e->back_ref1 = map[e->back_ref1].new_val;\r
555 }\r
556\r
557 return ST_CONTINUE;\r
558}\r
559\r
560extern int\r
561onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r
562{\r
563 NameTable* t = (NameTable* )reg->name_table;\r
564\r
565 if (IS_NOT_NULL(t)) {\r
4d454c54 566 onig_st_foreach(t, i_renumber_name, (HashDataType )(UINTN)map);\r
14b0e578
CS
567 }\r
568 return 0;\r
569}\r
570\r
571\r
572extern int\r
573onig_number_of_names(regex_t* reg)\r
574{\r
575 NameTable* t = (NameTable* )reg->name_table;\r
576\r
577 if (IS_NOT_NULL(t))\r
578 return t->num_entries;\r
579 else\r
580 return 0;\r
581}\r
582\r
583#else /* USE_ST_LIBRARY */\r
584\r
585#define INIT_NAMES_ALLOC_NUM 8\r
586\r
587typedef struct {\r
588 NameEntry* e;\r
589 int num;\r
590 int alloc;\r
591} NameTable;\r
592\r
593#ifdef ONIG_DEBUG\r
594extern int\r
595onig_print_names(FILE* fp, regex_t* reg)\r
596{\r
597 int i, j;\r
598 NameEntry* e;\r
599 NameTable* t = (NameTable* )reg->name_table;\r
600\r
601 if (IS_NOT_NULL(t) && t->num > 0) {\r
602 fprintf(fp, "name table\n");\r
603 for (i = 0; i < t->num; i++) {\r
604 e = &(t->e[i]);\r
605 fprintf(fp, "%s: ", e->name);\r
606 if (e->back_num == 0) {\r
607 fputs("-", fp);\r
608 }\r
609 else if (e->back_num == 1) {\r
610 fprintf(fp, "%d", e->back_ref1);\r
611 }\r
612 else {\r
613 for (j = 0; j < e->back_num; j++) {\r
614 if (j > 0) fprintf(fp, ", ");\r
615 fprintf(fp, "%d", e->back_refs[j]);\r
616 }\r
617 }\r
618 fputs("\n", fp);\r
619 }\r
620 fputs("\n", fp);\r
621 }\r
622 return 0;\r
623}\r
624#endif\r
625\r
626static int\r
627names_clear(regex_t* reg)\r
628{\r
629 int i;\r
630 NameEntry* e;\r
631 NameTable* t = (NameTable* )reg->name_table;\r
632\r
633 if (IS_NOT_NULL(t)) {\r
634 for (i = 0; i < t->num; i++) {\r
635 e = &(t->e[i]);\r
636 if (IS_NOT_NULL(e->name)) {\r
637 xfree(e->name);\r
638 e->name = NULL;\r
639 e->name_len = 0;\r
640 e->back_num = 0;\r
641 e->back_alloc = 0;\r
642 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
643 e->back_refs = (int* )NULL;\r
644 }\r
645 }\r
646 if (IS_NOT_NULL(t->e)) {\r
647 xfree(t->e);\r
648 t->e = NULL;\r
649 }\r
650 t->num = 0;\r
651 }\r
652 return 0;\r
653}\r
654\r
655extern int\r
656onig_names_free(regex_t* reg)\r
657{\r
658 int r;\r
659 NameTable* t;\r
660\r
661 r = names_clear(reg);\r
662 if (r) return r;\r
663\r
664 t = (NameTable* )reg->name_table;\r
665 if (IS_NOT_NULL(t)) xfree(t);\r
666 reg->name_table = NULL;\r
667 return 0;\r
668}\r
669\r
670static NameEntry*\r
671name_find(regex_t* reg, UChar* name, UChar* name_end)\r
672{\r
673 int i, len;\r
674 NameEntry* e;\r
675 NameTable* t = (NameTable* )reg->name_table;\r
676\r
677 if (IS_NOT_NULL(t)) {\r
678 len = name_end - name;\r
679 for (i = 0; i < t->num; i++) {\r
680 e = &(t->e[i]);\r
681 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
682 return e;\r
683 }\r
684 }\r
685 return (NameEntry* )NULL;\r
686}\r
687\r
688extern int\r
689onig_foreach_name(regex_t* reg,\r
690 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
691{\r
692 int i, r;\r
693 NameEntry* e;\r
694 NameTable* t = (NameTable* )reg->name_table;\r
695\r
696 if (IS_NOT_NULL(t)) {\r
697 for (i = 0; i < t->num; i++) {\r
698 e = &(t->e[i]);\r
699 r = (*func)(e->name, e->name + e->name_len, e->back_num,\r
700 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
701 reg, arg);\r
702 if (r != 0) return r;\r
703 }\r
704 }\r
705 return 0;\r
706}\r
707\r
708extern int\r
709onig_number_of_names(regex_t* reg)\r
710{\r
711 NameTable* t = (NameTable* )reg->name_table;\r
712\r
713 if (IS_NOT_NULL(t))\r
714 return t->num;\r
715 else\r
716 return 0;\r
717}\r
718\r
719#endif /* else USE_ST_LIBRARY */\r
720\r
721static int\r
722name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r
723{\r
724 int alloc;\r
725 NameEntry* e;\r
726 NameTable* t = (NameTable* )reg->name_table;\r
727\r
728 if (name_end - name <= 0)\r
729 return ONIGERR_EMPTY_GROUP_NAME;\r
730\r
731 e = name_find(reg, name, name_end);\r
732 if (IS_NULL(e)) {\r
733#ifdef USE_ST_LIBRARY\r
734 if (IS_NULL(t)) {\r
735 t = onig_st_init_strend_table_with_size(5);\r
b0c2b797 736 CHECK_NULL_RETURN_MEMERR(t);\r
14b0e578
CS
737 reg->name_table = (void* )t;\r
738 }\r
739 e = (NameEntry* )xmalloc(sizeof(NameEntry));\r
740 CHECK_NULL_RETURN_MEMERR(e);\r
741\r
742 e->name = strdup_with_null(reg->enc, name, name_end);\r
743 if (IS_NULL(e->name)) {\r
744 xfree(e); return ONIGERR_MEMORY;\r
745 }\r
746 onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r
4d454c54 747 (HashDataType )(UINTN)e);\r
14b0e578
CS
748\r
749 e->name_len = (int)(name_end - name);\r
750 e->back_num = 0;\r
751 e->back_alloc = 0;\r
752 e->back_refs = (int* )NULL;\r
753\r
754#else\r
755\r
756 if (IS_NULL(t)) {\r
757 alloc = INIT_NAMES_ALLOC_NUM;\r
758 t = (NameTable* )xmalloc(sizeof(NameTable));\r
759 CHECK_NULL_RETURN_MEMERR(t);\r
760 t->e = NULL;\r
761 t->alloc = 0;\r
762 t->num = 0;\r
763\r
764 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r
765 if (IS_NULL(t->e)) {\r
766 xfree(t);\r
767 return ONIGERR_MEMORY;\r
768 }\r
769 t->alloc = alloc;\r
770 reg->name_table = t;\r
771 goto clear;\r
772 }\r
773 else if (t->num == t->alloc) {\r
774 int i;\r
775\r
776 alloc = t->alloc * 2;\r
777 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);\r
778 CHECK_NULL_RETURN_MEMERR(t->e);\r
779 t->alloc = alloc;\r
780\r
781 clear:\r
782 for (i = t->num; i < t->alloc; i++) {\r
783 t->e[i].name = NULL;\r
784 t->e[i].name_len = 0;\r
785 t->e[i].back_num = 0;\r
786 t->e[i].back_alloc = 0;\r
787 t->e[i].back_refs = (int* )NULL;\r
788 }\r
789 }\r
790 e = &(t->e[t->num]);\r
791 t->num++;\r
792 e->name = strdup_with_null(reg->enc, name, name_end);\r
793 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
794 e->name_len = name_end - name;\r
795#endif\r
796 }\r
797\r
798 if (e->back_num >= 1 &&\r
799 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r
800 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r
801 name, name_end);\r
802 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
803 }\r
804\r
805 e->back_num++;\r
806 if (e->back_num == 1) {\r
807 e->back_ref1 = backref;\r
808 }\r
809 else {\r
810 if (e->back_num == 2) {\r
811 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r
812 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r
813 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
814 e->back_alloc = alloc;\r
815 e->back_refs[0] = e->back_ref1;\r
816 e->back_refs[1] = backref;\r
817 }\r
818 else {\r
819 if (e->back_num > e->back_alloc) {\r
820 alloc = e->back_alloc * 2;\r
821 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r
822 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
823 e->back_alloc = alloc;\r
824 }\r
825 e->back_refs[e->back_num - 1] = backref;\r
826 }\r
827 }\r
828\r
829 return 0;\r
830}\r
831\r
832extern int\r
833onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
834 const UChar* name_end, int** nums)\r
835{\r
836 NameEntry* e = name_find(reg, name, name_end);\r
837\r
838 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
839\r
840 switch (e->back_num) {\r
841 case 0:\r
842 break;\r
843 case 1:\r
844 *nums = &(e->back_ref1);\r
845 break;\r
846 default:\r
847 *nums = e->back_refs;\r
848 break;\r
849 }\r
850 return e->back_num;\r
851}\r
852\r
853extern int\r
854onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
855 const UChar* name_end, OnigRegion *region)\r
856{\r
857 int i, n, *nums;\r
858\r
859 n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r
860 if (n < 0)\r
861 return n;\r
862 else if (n == 0)\r
863 return ONIGERR_PARSER_BUG;\r
864 else if (n == 1)\r
865 return nums[0];\r
866 else {\r
867 if (IS_NOT_NULL(region)) {\r
868 for (i = n - 1; i >= 0; i--) {\r
869 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r
870 return nums[i];\r
871 }\r
872 }\r
873 return nums[n - 1];\r
874 }\r
875}\r
876\r
877#else /* USE_NAMED_GROUP */\r
878\r
879extern int\r
880onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
881 const UChar* name_end, int** nums)\r
882{\r
883 return ONIG_NO_SUPPORT_CONFIG;\r
884}\r
885\r
886extern int\r
887onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
888 const UChar* name_end, OnigRegion* region)\r
889{\r
890 return ONIG_NO_SUPPORT_CONFIG;\r
891}\r
892\r
893extern int\r
894onig_foreach_name(regex_t* reg,\r
895 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
896{\r
897 return ONIG_NO_SUPPORT_CONFIG;\r
898}\r
899\r
900extern int\r
901onig_number_of_names(regex_t* reg)\r
902{\r
903 return 0;\r
904}\r
905#endif /* else USE_NAMED_GROUP */\r
906\r
907extern int\r
908onig_noname_group_capture_is_active(regex_t* reg)\r
909{\r
910 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
911 return 0;\r
912\r
913#ifdef USE_NAMED_GROUP\r
914 if (onig_number_of_names(reg) > 0 &&\r
915 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
916 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
917 return 0;\r
918 }\r
919#endif\r
920\r
921 return 1;\r
922}\r
923\r
924\r
925#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16\r
926\r
927static void\r
928scan_env_clear(ScanEnv* env)\r
929{\r
930 int i;\r
931\r
932 BIT_STATUS_CLEAR(env->capture_history);\r
933 BIT_STATUS_CLEAR(env->bt_mem_start);\r
934 BIT_STATUS_CLEAR(env->bt_mem_end);\r
935 BIT_STATUS_CLEAR(env->backrefed_mem);\r
936 env->error = (UChar* )NULL;\r
937 env->error_end = (UChar* )NULL;\r
938 env->num_call = 0;\r
939 env->num_mem = 0;\r
940#ifdef USE_NAMED_GROUP\r
941 env->num_named = 0;\r
942#endif\r
943 env->mem_alloc = 0;\r
944 env->mem_nodes_dynamic = (Node** )NULL;\r
945\r
946 for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)\r
947 env->mem_nodes_static[i] = NULL_NODE;\r
948\r
949#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
950 env->num_comb_exp_check = 0;\r
951 env->comb_exp_max_regnum = 0;\r
952 env->curr_max_regnum = 0;\r
953 env->has_recursion = 0;\r
954#endif\r
955}\r
956\r
957static int\r
958scan_env_add_mem_entry(ScanEnv* env)\r
959{\r
960 int i, need, alloc;\r
961 Node** p;\r
962\r
963 need = env->num_mem + 1;\r
964 if (need >= SCANENV_MEMNODES_SIZE) {\r
965 if (env->mem_alloc <= need) {\r
966 if (IS_NULL(env->mem_nodes_dynamic)) {\r
967 alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;\r
968 p = (Node** )xmalloc(sizeof(Node*) * alloc);\r
b0c2b797
QS
969 CHECK_NULL_RETURN_MEMERR(p);\r
970 \r
14b0e578
CS
971 xmemcpy(p, env->mem_nodes_static,\r
972 sizeof(Node*) * SCANENV_MEMNODES_SIZE);\r
973 }\r
974 else {\r
975 alloc = env->mem_alloc * 2;\r
976 p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc, sizeof(Node*) * env->mem_alloc);\r
977 }\r
978 CHECK_NULL_RETURN_MEMERR(p);\r
979\r
980 for (i = env->num_mem + 1; i < alloc; i++)\r
981 p[i] = NULL_NODE;\r
982\r
983 env->mem_nodes_dynamic = p;\r
984 env->mem_alloc = alloc;\r
985 }\r
986 }\r
987\r
988 env->num_mem++;\r
989 return env->num_mem;\r
990}\r
991\r
992static int\r
993scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r
994{\r
995 if (env->num_mem >= num)\r
996 SCANENV_MEM_NODES(env)[num] = node;\r
997 else\r
998 return ONIGERR_PARSER_BUG;\r
999 return 0;\r
1000}\r
1001\r
1002\r
1003#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1004typedef struct _FreeNode {\r
1005 struct _FreeNode* next;\r
1006} FreeNode;\r
1007\r
1008static FreeNode* FreeNodeList = (FreeNode* )NULL;\r
1009#endif\r
1010\r
1011extern void\r
1012onig_node_free(Node* node)\r
1013{\r
1014 start:\r
1015 if (IS_NULL(node)) return ;\r
1016\r
1017 switch (NTYPE(node)) {\r
1018 case NT_STR:\r
1019 if (NSTR(node)->capa != 0 &&\r
1020 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {\r
1021 xfree(NSTR(node)->s);\r
1022 }\r
1023 break;\r
1024\r
1025 case NT_LIST:\r
1026 case NT_ALT:\r
1027 onig_node_free(NCAR(node));\r
1028 {\r
1029 Node* next_node = NCDR(node);\r
1030\r
1031#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1032 {\r
1033 FreeNode* n = (FreeNode* )node;\r
1034\r
1035 THREAD_ATOMIC_START;\r
1036 n->next = FreeNodeList;\r
1037 FreeNodeList = n;\r
1038 THREAD_ATOMIC_END;\r
1039 }\r
1040#else\r
1041 xfree(node);\r
1042#endif\r
1043 node = next_node;\r
1044 goto start;\r
1045 }\r
1046 break;\r
1047\r
1048 case NT_CCLASS:\r
1049 {\r
1050 CClassNode* cc = NCCLASS(node);\r
1051\r
1052 if (IS_NCCLASS_SHARE(cc)) return ;\r
1053 if (cc->mbuf)\r
1054 bbuf_free(cc->mbuf);\r
1055 }\r
1056 break;\r
1057\r
1058 case NT_QTFR:\r
1059 if (NQTFR(node)->target)\r
1060 onig_node_free(NQTFR(node)->target);\r
1061 break;\r
1062\r
1063 case NT_ENCLOSE:\r
1064 if (NENCLOSE(node)->target)\r
1065 onig_node_free(NENCLOSE(node)->target);\r
1066 break;\r
1067\r
1068 case NT_BREF:\r
1069 if (IS_NOT_NULL(NBREF(node)->back_dynamic))\r
1070 xfree(NBREF(node)->back_dynamic);\r
1071 break;\r
1072\r
1073 case NT_ANCHOR:\r
1074 if (NANCHOR(node)->target)\r
1075 onig_node_free(NANCHOR(node)->target);\r
1076 break;\r
1077 }\r
1078\r
1079#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1080 {\r
1081 FreeNode* n = (FreeNode* )node;\r
1082\r
1083 THREAD_ATOMIC_START;\r
1084 n->next = FreeNodeList;\r
1085 FreeNodeList = n;\r
1086 THREAD_ATOMIC_END;\r
1087 }\r
1088#else\r
1089 xfree(node);\r
1090#endif\r
1091}\r
1092\r
1093#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1094extern int\r
1095onig_free_node_list(void)\r
1096{\r
1097 FreeNode* n;\r
1098\r
1099 /* THREAD_ATOMIC_START; */\r
1100 while (IS_NOT_NULL(FreeNodeList)) {\r
1101 n = FreeNodeList;\r
1102 FreeNodeList = FreeNodeList->next;\r
1103 xfree(n);\r
1104 }\r
1105 /* THREAD_ATOMIC_END; */\r
1106 return 0;\r
1107}\r
1108#endif\r
1109\r
1110static Node*\r
1111node_new(void)\r
1112{\r
1113 Node* node;\r
1114\r
1115#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1116 THREAD_ATOMIC_START;\r
1117 if (IS_NOT_NULL(FreeNodeList)) {\r
1118 node = (Node* )FreeNodeList;\r
1119 FreeNodeList = FreeNodeList->next;\r
1120 THREAD_ATOMIC_END;\r
1121 return node;\r
1122 }\r
1123 THREAD_ATOMIC_END;\r
1124#endif\r
1125\r
1126 node = (Node* )xmalloc(sizeof(Node));\r
1127 /* xmemset(node, 0, sizeof(Node)); */\r
1128 return node;\r
1129}\r
1130\r
1131\r
1132static void\r
1133initialize_cclass(CClassNode* cc)\r
1134{\r
1135 BITSET_CLEAR(cc->bs);\r
1136 /* cc->base.flags = 0; */\r
1137 cc->flags = 0;\r
1138 cc->mbuf = NULL;\r
1139}\r
1140\r
1141static Node*\r
1142node_new_cclass(void)\r
1143{\r
1144 Node* node = node_new();\r
1145 CHECK_NULL_RETURN(node);\r
1146\r
1147 SET_NTYPE(node, NT_CCLASS);\r
1148 initialize_cclass(NCCLASS(node));\r
1149 return node;\r
1150}\r
1151\r
1152static Node*\r
1153node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,\r
1154 const OnigCodePoint ranges[])\r
1155{\r
1156 int n, i;\r
1157 CClassNode* cc;\r
1158 OnigCodePoint j;\r
1159\r
1160 Node* node = node_new_cclass();\r
1161 CHECK_NULL_RETURN(node);\r
1162\r
1163 cc = NCCLASS(node);\r
1164 if (not != 0) NCCLASS_SET_NOT(cc);\r
1165\r
1166 BITSET_CLEAR(cc->bs);\r
1167 if (sb_out > 0 && IS_NOT_NULL(ranges)) {\r
1168 n = ONIGENC_CODE_RANGE_NUM(ranges);\r
1169 for (i = 0; i < n; i++) {\r
1170 for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);\r
1171 j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {\r
1172 if (j >= sb_out) goto sb_end;\r
1173\r
1174 BITSET_SET_BIT(cc->bs, j);\r
1175 }\r
1176 }\r
1177 }\r
1178\r
1179 sb_end:\r
1180 if (IS_NULL(ranges)) {\r
1181 is_null:\r
1182 cc->mbuf = NULL;\r
1183 }\r
1184 else {\r
1185 BBuf* bbuf;\r
1186\r
1187 n = ONIGENC_CODE_RANGE_NUM(ranges);\r
1188 if (n == 0) goto is_null;\r
1189\r
1190 bbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
1191 CHECK_NULL_RETURN(bbuf);\r
1192 bbuf->alloc = n + 1;\r
1193 bbuf->used = n + 1;\r
1194 bbuf->p = (UChar* )((void* )ranges);\r
1195\r
1196 cc->mbuf = bbuf;\r
1197 }\r
1198\r
1199 return node;\r
1200}\r
1201\r
1202static Node*\r
1203node_new_ctype(int type, int not)\r
1204{\r
1205 Node* node = node_new();\r
1206 CHECK_NULL_RETURN(node);\r
1207\r
1208 SET_NTYPE(node, NT_CTYPE);\r
1209 NCTYPE(node)->ctype = type;\r
1210 NCTYPE(node)->not = not;\r
1211 return node;\r
1212}\r
1213\r
1214static Node*\r
1215node_new_anychar(void)\r
1216{\r
1217 Node* node = node_new();\r
1218 CHECK_NULL_RETURN(node);\r
1219\r
1220 SET_NTYPE(node, NT_CANY);\r
1221 return node;\r
1222}\r
1223\r
1224static Node*\r
1225node_new_list(Node* left, Node* right)\r
1226{\r
1227 Node* node = node_new();\r
1228 CHECK_NULL_RETURN(node);\r
1229\r
1230 SET_NTYPE(node, NT_LIST);\r
1231 NCAR(node) = left;\r
1232 NCDR(node) = right;\r
1233 return node;\r
1234}\r
1235\r
1236extern Node*\r
1237onig_node_new_list(Node* left, Node* right)\r
1238{\r
1239 return node_new_list(left, right);\r
1240}\r
1241\r
1242extern Node*\r
1243onig_node_list_add(Node* list, Node* x)\r
1244{\r
1245 Node *n;\r
1246\r
1247 n = onig_node_new_list(x, NULL);\r
1248 if (IS_NULL(n)) return NULL_NODE;\r
1249\r
1250 if (IS_NOT_NULL(list)) {\r
1251 while (IS_NOT_NULL(NCDR(list)))\r
1252 list = NCDR(list);\r
1253\r
1254 NCDR(list) = n;\r
1255 }\r
1256\r
1257 return n;\r
1258}\r
1259\r
1260extern Node*\r
1261onig_node_new_alt(Node* left, Node* right)\r
1262{\r
1263 Node* node = node_new();\r
1264 CHECK_NULL_RETURN(node);\r
1265\r
1266 SET_NTYPE(node, NT_ALT);\r
1267 NCAR(node) = left;\r
1268 NCDR(node) = right;\r
1269 return node;\r
1270}\r
1271\r
1272extern Node*\r
1273onig_node_new_anchor(int type)\r
1274{\r
1275 Node* node = node_new();\r
1276 CHECK_NULL_RETURN(node);\r
1277\r
1278 SET_NTYPE(node, NT_ANCHOR);\r
1279 NANCHOR(node)->type = type;\r
1280 NANCHOR(node)->target = NULL;\r
1281 NANCHOR(node)->char_len = -1;\r
1282 return node;\r
1283}\r
1284\r
1285static Node*\r
1286node_new_backref(int back_num, int* backrefs, int by_name,\r
1287#ifdef USE_BACKREF_WITH_LEVEL\r
1288 int exist_level, int nest_level,\r
1289#endif\r
1290 ScanEnv* env)\r
1291{\r
1292 int i;\r
1293 Node* node = node_new();\r
1294\r
1295 CHECK_NULL_RETURN(node);\r
1296\r
1297 SET_NTYPE(node, NT_BREF);\r
1298 NBREF(node)->state = 0;\r
1299 NBREF(node)->back_num = back_num;\r
1300 NBREF(node)->back_dynamic = (int* )NULL;\r
1301 if (by_name != 0)\r
1302 NBREF(node)->state |= NST_NAME_REF;\r
1303\r
1304#ifdef USE_BACKREF_WITH_LEVEL\r
1305 if (exist_level != 0) {\r
1306 NBREF(node)->state |= NST_NEST_LEVEL;\r
1307 NBREF(node)->nest_level = nest_level;\r
1308 }\r
1309#endif\r
1310\r
1311 for (i = 0; i < back_num; i++) {\r
1312 if (backrefs[i] <= env->num_mem &&\r
1313 IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {\r
1314 NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */\r
1315 break;\r
1316 }\r
1317 }\r
1318\r
1319 if (back_num <= NODE_BACKREFS_SIZE) {\r
1320 for (i = 0; i < back_num; i++)\r
1321 NBREF(node)->back_static[i] = backrefs[i];\r
1322 }\r
1323 else {\r
1324 int* p = (int* )xmalloc(sizeof(int) * back_num);\r
1325 if (IS_NULL(p)) {\r
1326 onig_node_free(node);\r
1327 return NULL;\r
1328 }\r
1329 NBREF(node)->back_dynamic = p;\r
1330 for (i = 0; i < back_num; i++)\r
1331 p[i] = backrefs[i];\r
1332 }\r
1333 return node;\r
1334}\r
1335\r
1336#ifdef USE_SUBEXP_CALL\r
1337static Node*\r
1338node_new_call(UChar* name, UChar* name_end, int gnum)\r
1339{\r
1340 Node* node = node_new();\r
1341 CHECK_NULL_RETURN(node);\r
1342\r
1343 SET_NTYPE(node, NT_CALL);\r
1344 NCALL(node)->state = 0;\r
1345 NCALL(node)->target = NULL_NODE;\r
1346 NCALL(node)->name = name;\r
1347 NCALL(node)->name_end = name_end;\r
1348 NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */\r
1349 return node;\r
1350}\r
1351#endif\r
1352\r
1353static Node*\r
1354node_new_quantifier(int lower, int upper, int by_number)\r
1355{\r
1356 Node* node = node_new();\r
1357 CHECK_NULL_RETURN(node);\r
1358\r
1359 SET_NTYPE(node, NT_QTFR);\r
1360 NQTFR(node)->state = 0;\r
1361 NQTFR(node)->target = NULL;\r
1362 NQTFR(node)->lower = lower;\r
1363 NQTFR(node)->upper = upper;\r
1364 NQTFR(node)->greedy = 1;\r
1365 NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;\r
1366 NQTFR(node)->head_exact = NULL_NODE;\r
1367 NQTFR(node)->next_head_exact = NULL_NODE;\r
1368 NQTFR(node)->is_refered = 0;\r
1369 if (by_number != 0)\r
1370 NQTFR(node)->state |= NST_BY_NUMBER;\r
1371\r
1372#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
1373 NQTFR(node)->comb_exp_check_num = 0;\r
1374#endif\r
1375\r
1376 return node;\r
1377}\r
1378\r
1379static Node*\r
1380node_new_enclose(int type)\r
1381{\r
1382 Node* node = node_new();\r
1383 CHECK_NULL_RETURN(node);\r
1384\r
1385 SET_NTYPE(node, NT_ENCLOSE);\r
1386 NENCLOSE(node)->type = type;\r
1387 NENCLOSE(node)->state = 0;\r
1388 NENCLOSE(node)->regnum = 0;\r
1389 NENCLOSE(node)->option = 0;\r
1390 NENCLOSE(node)->target = NULL;\r
1391 NENCLOSE(node)->call_addr = -1;\r
1392 NENCLOSE(node)->opt_count = 0;\r
1393 return node;\r
1394}\r
1395\r
1396extern Node*\r
1397onig_node_new_enclose(int type)\r
1398{\r
1399 return node_new_enclose(type);\r
1400}\r
1401\r
1402static Node*\r
1403node_new_enclose_memory(OnigOptionType option, int is_named)\r
1404{\r
1405 Node* node = node_new_enclose(ENCLOSE_MEMORY);\r
1406 CHECK_NULL_RETURN(node);\r
1407 if (is_named != 0)\r
1408 SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);\r
1409\r
1410#ifdef USE_SUBEXP_CALL\r
1411 NENCLOSE(node)->option = option;\r
1412#endif\r
1413 return node;\r
1414}\r
1415\r
1416static Node*\r
1417node_new_option(OnigOptionType option)\r
1418{\r
1419 Node* node = node_new_enclose(ENCLOSE_OPTION);\r
1420 CHECK_NULL_RETURN(node);\r
1421 NENCLOSE(node)->option = option;\r
1422 return node;\r
1423}\r
1424\r
1425extern int\r
1426onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r
1427{\r
1428 int addlen = (int)(end - s);\r
1429\r
1430 if (addlen > 0) {\r
1431 int len = (int)(NSTR(node)->end - NSTR(node)->s);\r
1432\r
1433 if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {\r
1434 UChar* p;\r
1435 int capa = len + addlen + NODE_STR_MARGIN;\r
1436\r
1437 if (capa <= NSTR(node)->capa) {\r
1438 onig_strcpy(NSTR(node)->s + len, s, end);\r
1439 }\r
1440 else {\r
1441 if (NSTR(node)->s == NSTR(node)->buf)\r
1442 p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,\r
1443 s, end, capa);\r
1444 else\r
1445 p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa, NSTR(node)->capa);\r
1446\r
1447 CHECK_NULL_RETURN_MEMERR(p);\r
1448 NSTR(node)->s = p;\r
1449 NSTR(node)->capa = capa;\r
1450 }\r
1451 }\r
1452 else {\r
1453 onig_strcpy(NSTR(node)->s + len, s, end);\r
1454 }\r
1455 NSTR(node)->end = NSTR(node)->s + len + addlen;\r
1456 }\r
1457\r
1458 return 0;\r
1459}\r
1460\r
1461extern int\r
1462onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r
1463{\r
1464 onig_node_str_clear(node);\r
1465 return onig_node_str_cat(node, s, end);\r
1466}\r
1467\r
1468static int\r
1469node_str_cat_char(Node* node, UChar c)\r
1470{\r
1471 UChar s[1];\r
1472\r
1473 s[0] = c;\r
1474 return onig_node_str_cat(node, s, s + 1);\r
1475}\r
1476\r
1477extern void\r
1478onig_node_conv_to_str_node(Node* node, int flag)\r
1479{\r
1480 SET_NTYPE(node, NT_STR);\r
1481 NSTR(node)->flag = flag;\r
1482 NSTR(node)->capa = 0;\r
1483 NSTR(node)->s = NSTR(node)->buf;\r
1484 NSTR(node)->end = NSTR(node)->buf;\r
1485}\r
1486\r
1487extern void\r
1488onig_node_str_clear(Node* node)\r
1489{\r
1490 if (NSTR(node)->capa != 0 &&\r
1491 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {\r
1492 xfree(NSTR(node)->s);\r
1493 }\r
1494\r
1495 NSTR(node)->capa = 0;\r
1496 NSTR(node)->flag = 0;\r
1497 NSTR(node)->s = NSTR(node)->buf;\r
1498 NSTR(node)->end = NSTR(node)->buf;\r
1499}\r
1500\r
1501static Node*\r
1502node_new_str(const UChar* s, const UChar* end)\r
1503{\r
1504 Node* node = node_new();\r
1505 CHECK_NULL_RETURN(node);\r
1506\r
1507 SET_NTYPE(node, NT_STR);\r
1508 NSTR(node)->capa = 0;\r
1509 NSTR(node)->flag = 0;\r
1510 NSTR(node)->s = NSTR(node)->buf;\r
1511 NSTR(node)->end = NSTR(node)->buf;\r
1512 if (onig_node_str_cat(node, s, end)) {\r
1513 onig_node_free(node);\r
1514 return NULL;\r
1515 }\r
1516 return node;\r
1517}\r
1518\r
1519extern Node*\r
1520onig_node_new_str(const UChar* s, const UChar* end)\r
1521{\r
1522 return node_new_str(s, end);\r
1523}\r
1524\r
1525static Node*\r
1526node_new_str_raw(UChar* s, UChar* end)\r
1527{\r
1528 Node* node = node_new_str(s, end);\r
b0c2b797 1529 CHECK_NULL_RETURN(node);\r
14b0e578
CS
1530 NSTRING_SET_RAW(node);\r
1531 return node;\r
1532}\r
1533\r
1534static Node*\r
1535node_new_empty(void)\r
1536{\r
1537 return node_new_str(NULL, NULL);\r
1538}\r
1539\r
1540static Node*\r
1541node_new_str_raw_char(UChar c)\r
1542{\r
1543 UChar p[1];\r
1544\r
1545 p[0] = c;\r
1546 return node_new_str_raw(p, p + 1);\r
1547}\r
1548\r
1549static Node*\r
1550str_node_split_last_char(StrNode* sn, OnigEncoding enc)\r
1551{\r
1552 const UChar *p;\r
1553 Node* n = NULL_NODE;\r
1554\r
1555 if (sn->end > sn->s) {\r
1556 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r
1557 if (p && p > sn->s) { /* can be splitted. */\r
1558 n = node_new_str(p, sn->end);\r
b0c2b797 1559 CHECK_NULL_RETURN(n);\r
14b0e578
CS
1560 if ((sn->flag & NSTR_RAW) != 0)\r
1561 NSTRING_SET_RAW(n);\r
1562 sn->end = (UChar* )p;\r
1563 }\r
1564 }\r
1565 return n;\r
1566}\r
1567\r
1568static int\r
1569str_node_can_be_split(StrNode* sn, OnigEncoding enc)\r
1570{\r
1571 if (sn->end > sn->s) {\r
1572 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r
1573 }\r
1574 return 0;\r
1575}\r
1576\r
1577#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
1578static int\r
1579node_str_head_pad(StrNode* sn, int num, UChar val)\r
1580{\r
1581 UChar buf[NODE_STR_BUF_SIZE];\r
1582 int i, len;\r
1583\r
1584 len = sn->end - sn->s;\r
1585 onig_strcpy(buf, sn->s, sn->end);\r
1586 onig_strcpy(&(sn->s[num]), buf, buf + len);\r
1587 sn->end += num;\r
1588\r
1589 for (i = 0; i < num; i++) {\r
1590 sn->s[i] = val;\r
1591 }\r
1592}\r
1593#endif\r
1594\r
1595extern int\r
1596onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r
1597{\r
1598 unsigned int num, val;\r
1599 OnigCodePoint c;\r
1600 UChar* p = *src;\r
1601 PFETCH_READY;\r
1602\r
1603 num = 0;\r
1604 while (!PEND) {\r
1605 PFETCH(c);\r
1606 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
1607 val = (unsigned int )DIGITVAL(c);\r
1608 if ((INT_MAX_LIMIT - val) / 10UL < num)\r
1609 return -1; /* overflow */\r
1610\r
1611 num = num * 10 + val;\r
1612 }\r
1613 else {\r
1614 PUNFETCH;\r
1615 break;\r
1616 }\r
1617 }\r
1618 *src = p;\r
1619 return num;\r
1620}\r
1621\r
1622static int\r
1623scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,\r
1624 OnigEncoding enc)\r
1625{\r
1626 OnigCodePoint c;\r
1627 unsigned int num, val;\r
1628 UChar* p = *src;\r
1629 PFETCH_READY;\r
1630\r
1631 num = 0;\r
1632 while (!PEND && maxlen-- != 0) {\r
1633 PFETCH(c);\r
1634 if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {\r
1635 val = (unsigned int )XDIGITVAL(enc,c);\r
1636 if ((INT_MAX_LIMIT - val) / 16UL < num)\r
1637 return -1; /* overflow */\r
1638\r
1639 num = (num << 4) + XDIGITVAL(enc,c);\r
1640 }\r
1641 else {\r
1642 PUNFETCH;\r
1643 break;\r
1644 }\r
1645 }\r
1646 *src = p;\r
1647 return num;\r
1648}\r
1649\r
1650static int\r
1651scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r
1652 OnigEncoding enc)\r
1653{\r
1654 OnigCodePoint c;\r
1655 unsigned int num, val;\r
1656 UChar* p = *src;\r
1657 PFETCH_READY;\r
1658\r
1659 num = 0;\r
1660 while (!PEND && maxlen-- != 0) {\r
1661 PFETCH(c);\r
1662 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {\r
1663 val = ODIGITVAL(c);\r
1664 if ((INT_MAX_LIMIT - val) / 8UL < num)\r
1665 return -1; /* overflow */\r
1666\r
1667 num = (num << 3) + val;\r
1668 }\r
1669 else {\r
1670 PUNFETCH;\r
1671 break;\r
1672 }\r
1673 }\r
1674 *src = p;\r
1675 return num;\r
1676}\r
1677\r
1678\r
1679#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \\r
1680 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r
1681\r
1682/* data format:\r
1683 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r
1684 (all data size is OnigCodePoint)\r
1685 */\r
1686static int\r
1687new_code_range(BBuf** pbuf)\r
1688{\r
1689#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r
1690 int r;\r
1691 OnigCodePoint n;\r
1692 BBuf* bbuf;\r
1693\r
1694 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
1695 CHECK_NULL_RETURN_MEMERR(*pbuf);\r
1696 r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r
1697 if (r) return r;\r
1698\r
1699 n = 0;\r
1700 BBUF_WRITE_CODE_POINT(bbuf, 0, n);\r
1701 return 0;\r
1702}\r
1703\r
1704static int\r
1705add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r
1706{\r
1707 int r, inc_n, pos;\r
1708 int low, high, bound, x;\r
1709 OnigCodePoint n, *data;\r
1710 BBuf* bbuf;\r
1711\r
1712 if (from > to) {\r
1713 n = from; from = to; to = n;\r
1714 }\r
1715\r
1716 if (IS_NULL(*pbuf)) {\r
1717 r = new_code_range(pbuf);\r
1718 if (r) return r;\r
1719 bbuf = *pbuf;\r
1720 n = 0;\r
1721 }\r
1722 else {\r
1723 bbuf = *pbuf;\r
1724 GET_CODE_POINT(n, bbuf->p);\r
1725 }\r
1726 data = (OnigCodePoint* )(bbuf->p);\r
1727 data++;\r
1728\r
1729 for (low = 0, bound = n; low < bound; ) {\r
1730 x = (low + bound) >> 1;\r
1731 if (from > data[x*2 + 1])\r
1732 low = x + 1;\r
1733 else\r
1734 bound = x;\r
1735 }\r
1736\r
1737 for (high = low, bound = n; high < bound; ) {\r
1738 x = (high + bound) >> 1;\r
1739 if (to >= data[x*2] - 1)\r
1740 high = x + 1;\r
1741 else\r
1742 bound = x;\r
1743 }\r
1744\r
1745 inc_n = low + 1 - high;\r
1746 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r
1747 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r
1748\r
1749 if (inc_n != 1) {\r
1750 if (from > data[low*2])\r
1751 from = data[low*2];\r
1752 if (to < data[(high - 1)*2 + 1])\r
1753 to = data[(high - 1)*2 + 1];\r
1754 }\r
1755\r
1756 if (inc_n != 0 && (OnigCodePoint )high < n) {\r
1757 int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r
1758 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r
1759 int size = (n - high) * 2 * SIZE_CODE_POINT;\r
1760\r
1761 if (inc_n > 0) {\r
1762 BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r
1763 }\r
1764 else {\r
1765 BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r
1766 }\r
1767 }\r
1768\r
1769 pos = SIZE_CODE_POINT * (1 + low * 2);\r
1770 BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r
1771 BBUF_WRITE_CODE_POINT(bbuf, pos, from);\r
1772 BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r
1773 n += inc_n;\r
1774 BBUF_WRITE_CODE_POINT(bbuf, 0, n);\r
1775\r
1776 return 0;\r
1777}\r
1778\r
1779static int\r
1780add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r
1781{\r
1782 if (from > to) {\r
1783 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
1784 return 0;\r
1785 else\r
1786 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
1787 }\r
1788\r
1789 return add_code_range_to_buf(pbuf, from, to);\r
1790}\r
1791\r
1792static int\r
1793not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r
1794{\r
1795 int r, i, n;\r
1796 OnigCodePoint pre, from, *data, to = 0;\r
1797\r
1798 *pbuf = (BBuf* )NULL;\r
1799 if (IS_NULL(bbuf)) {\r
1800 set_all:\r
1801 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
1802 }\r
1803\r
1804 data = (OnigCodePoint* )(bbuf->p);\r
1805 GET_CODE_POINT(n, data);\r
1806 data++;\r
1807 if (n <= 0) goto set_all;\r
1808\r
1809 r = 0;\r
1810 pre = MBCODE_START_POS(enc);\r
1811 for (i = 0; i < n; i++) {\r
1812 from = data[i*2];\r
1813 to = data[i*2+1];\r
1814 if (pre <= from - 1) {\r
1815 r = add_code_range_to_buf(pbuf, pre, from - 1);\r
1816 if (r != 0) return r;\r
1817 }\r
1818 if (to == ~((OnigCodePoint )0)) break;\r
1819 pre = to + 1;\r
1820 }\r
1821 if (to < ~((OnigCodePoint )0)) {\r
1822 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r
1823 }\r
1824 return r;\r
1825}\r
1826\r
1827#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\\r
1828 BBuf *tbuf; \\r
1829 int tnot; \\r
1830 tnot = not1; not1 = not2; not2 = tnot; \\r
1831 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r
1832} while (0)\r
1833\r
1834static int\r
1835or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r
1836 BBuf* bbuf2, int not2, BBuf** pbuf)\r
1837{\r
1838 int r;\r
1839 OnigCodePoint i, n1, *data1;\r
1840 OnigCodePoint from, to;\r
1841\r
1842 *pbuf = (BBuf* )NULL;\r
1843 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r
1844 if (not1 != 0 || not2 != 0)\r
1845 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
1846 return 0;\r
1847 }\r
1848\r
1849 r = 0;\r
1850 if (IS_NULL(bbuf2))\r
1851 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
1852\r
1853 if (IS_NULL(bbuf1)) {\r
1854 if (not1 != 0) {\r
1855 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
1856 }\r
1857 else {\r
1858 if (not2 == 0) {\r
1859 return bbuf_clone(pbuf, bbuf2);\r
1860 }\r
1861 else {\r
1862 return not_code_range_buf(enc, bbuf2, pbuf);\r
1863 }\r
1864 }\r
1865 }\r
1866\r
1867 if (not1 != 0)\r
1868 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
1869\r
1870 data1 = (OnigCodePoint* )(bbuf1->p);\r
1871 GET_CODE_POINT(n1, data1);\r
1872 data1++;\r
1873\r
1874 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r
1875 r = bbuf_clone(pbuf, bbuf2);\r
1876 }\r
1877 else if (not1 == 0) { /* 1 OR (not 2) */\r
1878 r = not_code_range_buf(enc, bbuf2, pbuf);\r
1879 }\r
1880 if (r != 0) return r;\r
1881\r
1882 for (i = 0; i < n1; i++) {\r
1883 from = data1[i*2];\r
1884 to = data1[i*2+1];\r
1885 r = add_code_range_to_buf(pbuf, from, to);\r
1886 if (r != 0) return r;\r
1887 }\r
1888 return 0;\r
1889}\r
1890\r
1891static int\r
1892and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r
1893 OnigCodePoint* data, int n)\r
1894{\r
1895 int i, r;\r
1896 OnigCodePoint from2, to2;\r
1897\r
1898 for (i = 0; i < n; i++) {\r
1899 from2 = data[i*2];\r
1900 to2 = data[i*2+1];\r
1901 if (from2 < from1) {\r
1902 if (to2 < from1) continue;\r
1903 else {\r
1904 from1 = to2 + 1;\r
1905 }\r
1906 }\r
1907 else if (from2 <= to1) {\r
1908 if (to2 < to1) {\r
1909 if (from1 <= from2 - 1) {\r
1910 r = add_code_range_to_buf(pbuf, from1, from2-1);\r
1911 if (r != 0) return r;\r
1912 }\r
1913 from1 = to2 + 1;\r
1914 }\r
1915 else {\r
1916 to1 = from2 - 1;\r
1917 }\r
1918 }\r
1919 else {\r
1920 from1 = from2;\r
1921 }\r
1922 if (from1 > to1) break;\r
1923 }\r
1924 if (from1 <= to1) {\r
1925 r = add_code_range_to_buf(pbuf, from1, to1);\r
1926 if (r != 0) return r;\r
1927 }\r
1928 return 0;\r
1929}\r
1930\r
1931static int\r
1932and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r
1933{\r
1934 int r;\r
1935 OnigCodePoint i, j, n1, n2, *data1, *data2;\r
1936 OnigCodePoint from, to, from1, to1, from2, to2;\r
1937\r
1938 *pbuf = (BBuf* )NULL;\r
1939 if (IS_NULL(bbuf1)) {\r
1940 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r
1941 return bbuf_clone(pbuf, bbuf2);\r
1942 return 0;\r
1943 }\r
1944 else if (IS_NULL(bbuf2)) {\r
1945 if (not2 != 0)\r
1946 return bbuf_clone(pbuf, bbuf1);\r
1947 return 0;\r
1948 }\r
1949\r
1950 if (not1 != 0)\r
1951 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
1952\r
1953 data1 = (OnigCodePoint* )(bbuf1->p);\r
1954 data2 = (OnigCodePoint* )(bbuf2->p);\r
1955 GET_CODE_POINT(n1, data1);\r
1956 GET_CODE_POINT(n2, data2);\r
1957 data1++;\r
1958 data2++;\r
1959\r
1960 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r
1961 for (i = 0; i < n1; i++) {\r
1962 from1 = data1[i*2];\r
1963 to1 = data1[i*2+1];\r
1964 for (j = 0; j < n2; j++) {\r
1965 from2 = data2[j*2];\r
1966 to2 = data2[j*2+1];\r
1967 if (from2 > to1) break;\r
1968 if (to2 < from1) continue;\r
1969 from = MAX(from1, from2);\r
1970 to = MIN(to1, to2);\r
1971 r = add_code_range_to_buf(pbuf, from, to);\r
1972 if (r != 0) return r;\r
1973 }\r
1974 }\r
1975 }\r
1976 else if (not1 == 0) { /* 1 AND (not 2) */\r
1977 for (i = 0; i < n1; i++) {\r
1978 from1 = data1[i*2];\r
1979 to1 = data1[i*2+1];\r
1980 r = and_code_range1(pbuf, from1, to1, data2, n2);\r
1981 if (r != 0) return r;\r
1982 }\r
1983 }\r
1984\r
1985 return 0;\r
1986}\r
1987\r
1988static int\r
1989and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
1990{\r
1991 int r, not1, not2;\r
1992 BBuf *buf1, *buf2, *pbuf;\r
1993 BitSetRef bsr1, bsr2;\r
1994 BitSet bs1, bs2;\r
1995\r
1996 not1 = IS_NCCLASS_NOT(dest);\r
1997 bsr1 = dest->bs;\r
1998 buf1 = dest->mbuf;\r
1999 not2 = IS_NCCLASS_NOT(cc);\r
2000 bsr2 = cc->bs;\r
2001 buf2 = cc->mbuf;\r
2002\r
2003 if (not1 != 0) {\r
2004 bitset_invert_to(bsr1, bs1);\r
2005 bsr1 = bs1;\r
2006 }\r
2007 if (not2 != 0) {\r
2008 bitset_invert_to(bsr2, bs2);\r
2009 bsr2 = bs2;\r
2010 }\r
2011 bitset_and(bsr1, bsr2);\r
2012 if (bsr1 != dest->bs) {\r
2013 bitset_copy(dest->bs, bsr1);\r
2014 bsr1 = dest->bs;\r
2015 }\r
2016 if (not1 != 0) {\r
2017 bitset_invert(dest->bs);\r
2018 }\r
2019\r
2020 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
2021 if (not1 != 0 && not2 != 0) {\r
2022 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r
2023 }\r
2024 else {\r
2025 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r
2026 if (r == 0 && not1 != 0) {\r
2027 BBuf *tbuf;\r
2028 r = not_code_range_buf(enc, pbuf, &tbuf);\r
2029 if (r != 0) {\r
2030 bbuf_free(pbuf);\r
2031 return r;\r
2032 }\r
2033 bbuf_free(pbuf);\r
2034 pbuf = tbuf;\r
2035 }\r
2036 }\r
2037 if (r != 0) return r;\r
2038\r
2039 dest->mbuf = pbuf;\r
2040 bbuf_free(buf1);\r
2041 return r;\r
2042 }\r
2043 return 0;\r
2044}\r
2045\r
2046static int\r
2047or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
2048{\r
2049 int r, not1, not2;\r
2050 BBuf *buf1, *buf2, *pbuf;\r
2051 BitSetRef bsr1, bsr2;\r
2052 BitSet bs1, bs2;\r
2053\r
2054 not1 = IS_NCCLASS_NOT(dest);\r
2055 bsr1 = dest->bs;\r
2056 buf1 = dest->mbuf;\r
2057 not2 = IS_NCCLASS_NOT(cc);\r
2058 bsr2 = cc->bs;\r
2059 buf2 = cc->mbuf;\r
2060\r
2061 if (not1 != 0) {\r
2062 bitset_invert_to(bsr1, bs1);\r
2063 bsr1 = bs1;\r
2064 }\r
2065 if (not2 != 0) {\r
2066 bitset_invert_to(bsr2, bs2);\r
2067 bsr2 = bs2;\r
2068 }\r
2069 bitset_or(bsr1, bsr2);\r
2070 if (bsr1 != dest->bs) {\r
2071 bitset_copy(dest->bs, bsr1);\r
2072 bsr1 = dest->bs;\r
2073 }\r
2074 if (not1 != 0) {\r
2075 bitset_invert(dest->bs);\r
2076 }\r
2077\r
2078 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
2079 if (not1 != 0 && not2 != 0) {\r
2080 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r
2081 }\r
2082 else {\r
2083 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r
2084 if (r == 0 && not1 != 0) {\r
2085 BBuf *tbuf;\r
2086 r = not_code_range_buf(enc, pbuf, &tbuf);\r
2087 if (r != 0) {\r
2088 bbuf_free(pbuf);\r
2089 return r;\r
2090 }\r
2091 bbuf_free(pbuf);\r
2092 pbuf = tbuf;\r
2093 }\r
2094 }\r
2095 if (r != 0) return r;\r
2096\r
2097 dest->mbuf = pbuf;\r
2098 bbuf_free(buf1);\r
2099 return r;\r
2100 }\r
2101 else\r
2102 return 0;\r
2103}\r
2104\r
2105static int\r
2106conv_backslash_value(int c, ScanEnv* env)\r
2107{\r
2108 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r
2109 switch (c) {\r
2110 case 'n': return '\n';\r
2111 case 't': return '\t';\r
2112 case 'r': return '\r';\r
2113 case 'f': return '\f';\r
2114 case 'a': return '\007';\r
2115 case 'b': return '\010';\r
2116 case 'e': return '\033';\r
2117 case 'v':\r
2118 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r
2119 return '\v';\r
2120 break;\r
2121\r
2122 default:\r
2123 break;\r
2124 }\r
2125 }\r
2126 return c;\r
2127}\r
2128\r
2129static int\r
2130is_invalid_quantifier_target(Node* node)\r
2131{\r
2132 switch (NTYPE(node)) {\r
2133 case NT_ANCHOR:\r
2134 return 1;\r
2135 break;\r
2136\r
2137 case NT_ENCLOSE:\r
2138 /* allow enclosed elements */\r
2139 /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */\r
2140 break;\r
2141\r
2142 case NT_LIST:\r
2143 do {\r
2144 if (! is_invalid_quantifier_target(NCAR(node))) return 0;\r
2145 } while (IS_NOT_NULL(node = NCDR(node)));\r
2146 return 0;\r
2147 break;\r
2148\r
2149 case NT_ALT:\r
2150 do {\r
2151 if (is_invalid_quantifier_target(NCAR(node))) return 1;\r
2152 } while (IS_NOT_NULL(node = NCDR(node)));\r
2153 break;\r
2154\r
2155 default:\r
2156 break;\r
2157 }\r
2158 return 0;\r
2159}\r
2160\r
2161/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r
2162static int\r
2163popular_quantifier_num(QtfrNode* q)\r
2164{\r
2165 if (q->greedy) {\r
2166 if (q->lower == 0) {\r
2167 if (q->upper == 1) return 0;\r
2168 else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r
2169 }\r
2170 else if (q->lower == 1) {\r
2171 if (IS_REPEAT_INFINITE(q->upper)) return 2;\r
2172 }\r
2173 }\r
2174 else {\r
2175 if (q->lower == 0) {\r
2176 if (q->upper == 1) return 3;\r
2177 else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r
2178 }\r
2179 else if (q->lower == 1) {\r
2180 if (IS_REPEAT_INFINITE(q->upper)) return 5;\r
2181 }\r
2182 }\r
2183 return -1;\r
2184}\r
2185\r
2186\r
2187enum ReduceType {\r
2188 RQ_ASIS = 0, /* as is */\r
2189 RQ_DEL = 1, /* delete parent */\r
2190 RQ_A, /* to '*' */\r
2191 RQ_AQ, /* to '*?' */\r
2192 RQ_QQ, /* to '??' */\r
2193 RQ_P_QQ, /* to '+)??' */\r
2194 RQ_PQ_Q /* to '+?)?' */\r
2195};\r
2196\r
2197static enum ReduceType ReduceTypeTable[6][6] = {\r
2198 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r
2199 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r
2200 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r
2201 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r
2202 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r
2203 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r
2204};\r
2205\r
2206extern void\r
2207onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r
2208{\r
2209 int pnum, cnum;\r
2210 QtfrNode *p, *c;\r
2211\r
2212 p = NQTFR(pnode);\r
2213 c = NQTFR(cnode);\r
2214 pnum = popular_quantifier_num(p);\r
2215 cnum = popular_quantifier_num(c);\r
2216 if (pnum < 0 || cnum < 0) return ;\r
2217\r
2218 switch(ReduceTypeTable[cnum][pnum]) {\r
2219 case RQ_DEL:\r
b2e35426 2220 CopyMem (pnode, cnode, sizeof (Node));\r
14b0e578
CS
2221 break;\r
2222 case RQ_A:\r
2223 p->target = c->target;\r
2224 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r
2225 break;\r
2226 case RQ_AQ:\r
2227 p->target = c->target;\r
2228 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r
2229 break;\r
2230 case RQ_QQ:\r
2231 p->target = c->target;\r
2232 p->lower = 0; p->upper = 1; p->greedy = 0;\r
2233 break;\r
2234 case RQ_P_QQ:\r
2235 p->target = cnode;\r
2236 p->lower = 0; p->upper = 1; p->greedy = 0;\r
2237 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r
2238 return ;\r
2239 break;\r
2240 case RQ_PQ_Q:\r
2241 p->target = cnode;\r
2242 p->lower = 0; p->upper = 1; p->greedy = 1;\r
2243 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r
2244 return ;\r
2245 break;\r
2246 case RQ_ASIS:\r
2247 p->target = cnode;\r
2248 return ;\r
2249 break;\r
2250 }\r
2251\r
2252 c->target = NULL_NODE;\r
2253 onig_node_free(cnode);\r
2254}\r
2255\r
2256\r
2257enum TokenSyms {\r
2258 TK_EOT = 0, /* end of token */\r
2259 TK_RAW_BYTE = 1,\r
2260 TK_CHAR,\r
2261 TK_STRING,\r
2262 TK_CODE_POINT,\r
2263 TK_ANYCHAR,\r
2264 TK_CHAR_TYPE,\r
2265 TK_BACKREF,\r
2266 TK_CALL,\r
2267 TK_ANCHOR,\r
2268 TK_OP_REPEAT,\r
2269 TK_INTERVAL,\r
2270 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r
2271 TK_ALT,\r
2272 TK_SUBEXP_OPEN,\r
2273 TK_SUBEXP_CLOSE,\r
2274 TK_CC_OPEN,\r
2275 TK_QUOTE_OPEN,\r
2276 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r
2277 /* in cc */\r
2278 TK_CC_CLOSE,\r
2279 TK_CC_RANGE,\r
2280 TK_POSIX_BRACKET_OPEN,\r
2281 TK_CC_AND, /* && */\r
2282 TK_CC_CC_OPEN /* [ */\r
2283};\r
2284\r
2285typedef struct {\r
2286 enum TokenSyms type;\r
2287 int escaped;\r
2288 int base; /* is number: 8, 16 (used in [....]) */\r
2289 UChar* backp;\r
2290 union {\r
2291 UChar* s;\r
2292 int c;\r
2293 OnigCodePoint code;\r
2294 int anchor;\r
2295 int subtype;\r
2296 struct {\r
2297 int lower;\r
2298 int upper;\r
2299 int greedy;\r
2300 int possessive;\r
2301 } repeat;\r
2302 struct {\r
2303 int num;\r
2304 int ref1;\r
2305 int* refs;\r
2306 int by_name;\r
2307#ifdef USE_BACKREF_WITH_LEVEL\r
2308 int exist_level;\r
2309 int level; /* \k<name+n> */\r
2310#endif\r
2311 } backref;\r
2312 struct {\r
2313 UChar* name;\r
2314 UChar* name_end;\r
2315 int gnum;\r
2316 } call;\r
2317 struct {\r
2318 int ctype;\r
2319 int not;\r
2320 } prop;\r
2321 } u;\r
2322} OnigToken;\r
2323\r
2324\r
2325static int\r
2326fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r
2327{\r
2328 int low, up, syn_allow, non_low = 0;\r
2329 int r = 0;\r
2330 OnigCodePoint c;\r
2331 OnigEncoding enc = env->enc;\r
2332 UChar* p = *src;\r
2333 PFETCH_READY;\r
2334\r
2335 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r
2336\r
2337 if (PEND) {\r
2338 if (syn_allow)\r
2339 return 1; /* "....{" : OK! */\r
2340 else\r
2341 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r
2342 }\r
2343\r
2344 if (! syn_allow) {\r
2345 c = PPEEK;\r
2346 if (c == ')' || c == '(' || c == '|') {\r
2347 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r
2348 }\r
2349 }\r
2350\r
2351 low = onig_scan_unsigned_number(&p, end, env->enc);\r
2352 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2353 if (low > ONIG_MAX_REPEAT_NUM)\r
2354 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2355\r
2356 if (p == *src) { /* can't read low */\r
2357 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r
2358 /* allow {,n} as {0,n} */\r
2359 low = 0;\r
2360 non_low = 1;\r
2361 }\r
2362 else\r
2363 goto invalid;\r
2364 }\r
2365\r
2366 if (PEND) goto invalid;\r
2367 PFETCH(c);\r
2368 if (c == ',') {\r
2369 UChar* prev = p;\r
2370 up = onig_scan_unsigned_number(&p, end, env->enc);\r
2371 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2372 if (up > ONIG_MAX_REPEAT_NUM)\r
2373 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2374\r
2375 if (p == prev) {\r
2376 if (non_low != 0)\r
2377 goto invalid;\r
2378 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r
2379 }\r
2380 }\r
2381 else {\r
2382 if (non_low != 0)\r
2383 goto invalid;\r
2384\r
2385 PUNFETCH;\r
2386 up = low; /* {n} : exact n times */\r
2387 r = 2; /* fixed */\r
2388 }\r
2389\r
2390 if (PEND) goto invalid;\r
2391 PFETCH(c);\r
2392 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r
2393 if (c != MC_ESC(env->syntax)) goto invalid;\r
2394 PFETCH(c);\r
2395 }\r
2396 if (c != '}') goto invalid;\r
2397\r
2398 if (!IS_REPEAT_INFINITE(up) && low > up) {\r
2399 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r
2400 }\r
2401\r
2402 tok->type = TK_INTERVAL;\r
2403 tok->u.repeat.lower = low;\r
2404 tok->u.repeat.upper = up;\r
2405 *src = p;\r
2406 return r; /* 0: normal {n,m}, 2: fixed {n} */\r
2407\r
2408 invalid:\r
2409 if (syn_allow)\r
2410 return 1; /* OK */\r
2411 else\r
2412 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r
2413}\r
2414\r
2415/* \M-, \C-, \c, or \... */\r
2416static int\r
2417fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)\r
2418{\r
2419 int v;\r
2420 OnigCodePoint c;\r
2421 OnigEncoding enc = env->enc;\r
2422 UChar* p = *src;\r
2423\r
2424 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
2425\r
2426 PFETCH_S(c);\r
2427 switch (c) {\r
2428 case 'M':\r
2429 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r
2430 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
2431 PFETCH_S(c);\r
2432 if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r
2433 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
2434 PFETCH_S(c);\r
2435 if (c == MC_ESC(env->syntax)) {\r
2436 v = fetch_escaped_value(&p, end, env);\r
2437 if (v < 0) return v;\r
2438 c = (OnigCodePoint )v;\r
2439 }\r
2440 c = ((c & 0xff) | 0x80);\r
2441 }\r
2442 else\r
2443 goto backslash;\r
2444 break;\r
2445\r
2446 case 'C':\r
2447 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r
2448 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
2449 PFETCH_S(c);\r
2450 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r
2451 goto control;\r
2452 }\r
2453 else\r
2454 goto backslash;\r
2455\r
2456 case 'c':\r
2457 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r
2458 control:\r
2459 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
2460 PFETCH_S(c);\r
2461 if (c == '?') {\r
2462 c = 0177;\r
2463 }\r
2464 else {\r
2465 if (c == MC_ESC(env->syntax)) {\r
2466 v = fetch_escaped_value(&p, end, env);\r
2467 if (v < 0) return v;\r
2468 c = (OnigCodePoint )v;\r
2469 }\r
2470 c &= 0x9f;\r
2471 }\r
2472 break;\r
2473 }\r
2474 /* fall through */\r
2475\r
2476 default:\r
2477 {\r
2478 backslash:\r
2479 c = conv_backslash_value(c, env);\r
2480 }\r
2481 break;\r
2482 }\r
2483\r
2484 *src = p;\r
2485 return c;\r
2486}\r
2487\r
2488static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r
2489\r
2490static OnigCodePoint\r
2491get_name_end_code_point(OnigCodePoint start)\r
2492{\r
2493 switch (start) {\r
2494 case '<': return (OnigCodePoint )'>'; break;\r
2495 case '\'': return (OnigCodePoint )'\''; break;\r
2496 default:\r
2497 break;\r
2498 }\r
2499\r
2500 return (OnigCodePoint )0;\r
2501}\r
2502\r
2503#ifdef USE_NAMED_GROUP\r
2504#ifdef USE_BACKREF_WITH_LEVEL\r
2505/*\r
2506 \k<name+n>, \k<name-n>\r
2507 \k<num+n>, \k<num-n>\r
2508 \k<-num+n>, \k<-num-n>\r
2509*/\r
2510static int\r
2511fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r
2512 UChar** rname_end, ScanEnv* env,\r
2513 int* rback_num, int* rlevel)\r
2514{\r
2515 int r, sign, is_num, exist_level;\r
2516 OnigCodePoint end_code;\r
2517 OnigCodePoint c = 0;\r
2518 OnigEncoding enc = env->enc;\r
2519 UChar *name_end;\r
2520 UChar *pnum_head;\r
2521 UChar *p = *src;\r
2522 PFETCH_READY;\r
2523\r
2524 *rback_num = 0;\r
2525 is_num = exist_level = 0;\r
2526 sign = 1;\r
2527 pnum_head = *src;\r
2528\r
2529 end_code = get_name_end_code_point(start_code);\r
2530\r
2531 name_end = end;\r
2532 r = 0;\r
2533 if (PEND) {\r
2534 return ONIGERR_EMPTY_GROUP_NAME;\r
2535 }\r
2536 else {\r
2537 PFETCH(c);\r
2538 if (c == end_code)\r
2539 return ONIGERR_EMPTY_GROUP_NAME;\r
2540\r
2541 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2542 is_num = 1;\r
2543 }\r
2544 else if (c == '-') {\r
2545 is_num = 2;\r
2546 sign = -1;\r
2547 pnum_head = p;\r
2548 }\r
2549 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2550 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2551 }\r
2552 }\r
2553\r
2554 while (!PEND) {\r
2555 name_end = p;\r
2556 PFETCH(c);\r
2557 if (c == end_code || c == ')' || c == '+' || c == '-') {\r
2558 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;\r
2559 break;\r
2560 }\r
2561\r
2562 if (is_num != 0) {\r
2563 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2564 is_num = 1;\r
2565 }\r
2566 else {\r
2567 r = ONIGERR_INVALID_GROUP_NAME;\r
2568 is_num = 0;\r
2569 }\r
2570 }\r
2571 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2572 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2573 }\r
2574 }\r
2575\r
2576 if (r == 0 && c != end_code) {\r
2577 if (c == '+' || c == '-') {\r
2578 int level;\r
2579 int flag = (c == '-' ? -1 : 1);\r
2580\r
2581 PFETCH(c);\r
2582 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;\r
2583 PUNFETCH;\r
2584 level = onig_scan_unsigned_number(&p, end, enc);\r
2585 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2586 *rlevel = (level * flag);\r
2587 exist_level = 1;\r
2588\r
2589 PFETCH(c);\r
2590 if (c == end_code)\r
2591 goto end;\r
2592 }\r
2593\r
2594 err:\r
2595 r = ONIGERR_INVALID_GROUP_NAME;\r
2596 name_end = end;\r
2597 }\r
2598\r
2599 end:\r
2600 if (r == 0) {\r
2601 if (is_num != 0) {\r
2602 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
2603 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2604 else if (*rback_num == 0) goto err;\r
2605\r
2606 *rback_num *= sign;\r
2607 }\r
2608\r
2609 *rname_end = name_end;\r
2610 *src = p;\r
2611 return (exist_level ? 1 : 0);\r
2612 }\r
2613 else {\r
2614 onig_scan_env_set_error_string(env, r, *src, name_end);\r
2615 return r;\r
2616 }\r
2617}\r
2618#endif /* USE_BACKREF_WITH_LEVEL */\r
2619\r
2620/*\r
2621 def: 0 -> define name (don't allow number name)\r
2622 1 -> reference name (allow number name)\r
2623*/\r
2624static int\r
2625fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
2626 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)\r
2627{\r
2628 int r, is_num, sign;\r
2629 OnigCodePoint end_code;\r
2630 OnigCodePoint c = 0;\r
2631 OnigEncoding enc = env->enc;\r
2632 UChar *name_end;\r
2633 UChar *pnum_head;\r
2634 UChar *p = *src;\r
2635\r
2636 *rback_num = 0;\r
2637\r
2638 end_code = get_name_end_code_point(start_code);\r
2639\r
2640 name_end = end;\r
2641 pnum_head = *src;\r
2642 r = 0;\r
2643 is_num = 0;\r
2644 sign = 1;\r
2645 if (PEND) {\r
2646 return ONIGERR_EMPTY_GROUP_NAME;\r
2647 }\r
2648 else {\r
2649 PFETCH_S(c);\r
2650 if (c == end_code)\r
2651 return ONIGERR_EMPTY_GROUP_NAME;\r
2652\r
2653 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2654 if (ref == 1)\r
2655 is_num = 1;\r
2656 else {\r
2657 r = ONIGERR_INVALID_GROUP_NAME;\r
2658 is_num = 0;\r
2659 }\r
2660 }\r
2661 else if (c == '-') {\r
2662 if (ref == 1) {\r
2663 is_num = 2;\r
2664 sign = -1;\r
2665 pnum_head = p;\r
2666 }\r
2667 else {\r
2668 r = ONIGERR_INVALID_GROUP_NAME;\r
2669 is_num = 0;\r
2670 }\r
2671 }\r
2672 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2673 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2674 }\r
2675 }\r
2676\r
2677 if (r == 0) {\r
2678 while (!PEND) {\r
2679 name_end = p;\r
2680 PFETCH_S(c);\r
2681 if (c == end_code || c == ')') {\r
2682 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;\r
2683 break;\r
2684 }\r
2685\r
2686 if (is_num != 0) {\r
2687 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2688 is_num = 1;\r
2689 }\r
2690 else {\r
2691 if (!ONIGENC_IS_CODE_WORD(enc, c))\r
2692 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2693 else\r
2694 r = ONIGERR_INVALID_GROUP_NAME;\r
2695 is_num = 0;\r
2696 }\r
2697 }\r
2698 else {\r
2699 if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2700 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2701 }\r
2702 }\r
2703 }\r
2704\r
2705 if (c != end_code) {\r
2706 r = ONIGERR_INVALID_GROUP_NAME;\r
2707 name_end = end;\r
2708 }\r
2709\r
2710 if (is_num != 0) {\r
2711 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
2712 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2713 else if (*rback_num == 0) {\r
2714 r = ONIGERR_INVALID_GROUP_NAME;\r
2715 goto err;\r
2716 }\r
2717\r
2718 *rback_num *= sign;\r
2719 }\r
2720\r
2721 *rname_end = name_end;\r
2722 *src = p;\r
2723 return 0;\r
2724 }\r
2725 else {\r
2726 while (!PEND) {\r
2727 name_end = p;\r
2728 PFETCH_S(c);\r
2729 if (c == end_code || c == ')')\r
2730 break;\r
2731 }\r
2732 if (PEND)\r
2733 name_end = end;\r
2734\r
2735 err:\r
2736 onig_scan_env_set_error_string(env, r, *src, name_end);\r
2737 return r;\r
2738 }\r
2739}\r
2740#else\r
2741static int\r
2742fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
2743 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)\r
2744{\r
2745 int r, is_num, sign;\r
2746 OnigCodePoint end_code;\r
2747 OnigCodePoint c = 0;\r
2748 UChar *name_end;\r
2749 OnigEncoding enc = env->enc;\r
2750 UChar *pnum_head;\r
2751 UChar *p = *src;\r
2752 PFETCH_READY;\r
2753\r
2754 *rback_num = 0;\r
2755\r
2756 end_code = get_name_end_code_point(start_code);\r
2757\r
2758 *rname_end = name_end = end;\r
2759 r = 0;\r
2760 pnum_head = *src;\r
2761 is_num = 0;\r
2762 sign = 1;\r
2763\r
2764 if (PEND) {\r
2765 return ONIGERR_EMPTY_GROUP_NAME;\r
2766 }\r
2767 else {\r
2768 PFETCH(c);\r
2769 if (c == end_code)\r
2770 return ONIGERR_EMPTY_GROUP_NAME;\r
2771\r
2772 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2773 is_num = 1;\r
2774 }\r
2775 else if (c == '-') {\r
2776 is_num = 2;\r
2777 sign = -1;\r
2778 pnum_head = p;\r
2779 }\r
2780 else {\r
2781 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2782 }\r
2783 }\r
2784\r
2785 while (!PEND) {\r
2786 name_end = p;\r
2787\r
2788 PFETCH(c);\r
2789 if (c == end_code || c == ')') break;\r
2790 if (! ONIGENC_IS_CODE_DIGIT(enc, c))\r
2791 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2792 }\r
2793 if (r == 0 && c != end_code) {\r
2794 r = ONIGERR_INVALID_GROUP_NAME;\r
2795 name_end = end;\r
2796 }\r
2797\r
2798 if (r == 0) {\r
2799 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
2800 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2801 else if (*rback_num == 0) {\r
2802 r = ONIGERR_INVALID_GROUP_NAME;\r
2803 goto err;\r
2804 }\r
2805 *rback_num *= sign;\r
2806\r
2807 *rname_end = name_end;\r
2808 *src = p;\r
2809 return 0;\r
2810 }\r
2811 else {\r
2812 err:\r
2813 onig_scan_env_set_error_string(env, r, *src, name_end);\r
2814 return r;\r
2815 }\r
2816}\r
2817#endif /* USE_NAMED_GROUP */\r
2818\r
2819static void\r
2820CC_ESC_WARN(ScanEnv* env, UChar *c)\r
2821{\r
2822 if (onig_warn == onig_null_warn) return ;\r
2823\r
2824 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r
2825 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r
2826 UChar buf[WARN_BUFSIZE];\r
2827 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
2828 env->pattern, env->pattern_end,\r
2829 (UChar* )"character class has '%s' without escape", c);\r
2830 (*onig_warn)((char* )buf);\r
2831 }\r
2832}\r
2833\r
2834static void\r
2835CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r
2836{\r
2837 if (onig_warn == onig_null_warn) return ;\r
2838\r
2839 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r
2840 UChar buf[WARN_BUFSIZE];\r
2841 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r
2842 (env)->pattern, (env)->pattern_end,\r
2843 (UChar* )"regular expression has '%s' without escape", c);\r
2844 (*onig_warn)((char* )buf);\r
2845 }\r
2846}\r
2847\r
2848static UChar*\r
2849find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
2850 UChar **next, OnigEncoding enc)\r
2851{\r
2852 int i;\r
2853 OnigCodePoint x;\r
2854 UChar *q;\r
2855 UChar *p = from;\r
2856 \r
2857 while (p < to) {\r
2858 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
2859 q = p + enclen(enc, p);\r
2860 if (x == s[0]) {\r
2861 for (i = 1; i < n && q < to; i++) {\r
2862 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
2863 if (x != s[i]) break;\r
2864 q += enclen(enc, q);\r
2865 }\r
2866 if (i >= n) {\r
2867 if (IS_NOT_NULL(next))\r
2868 *next = q;\r
2869 return p;\r
2870 }\r
2871 }\r
2872 p = q;\r
2873 }\r
2874 return NULL_UCHARP;\r
2875}\r
2876\r
2877static int\r
2878str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
2879 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r
2880{\r
2881 int i, in_esc;\r
2882 OnigCodePoint x;\r
2883 UChar *q;\r
2884 UChar *p = from;\r
2885\r
2886 in_esc = 0;\r
2887 while (p < to) {\r
2888 if (in_esc) {\r
2889 in_esc = 0;\r
2890 p += enclen(enc, p);\r
2891 }\r
2892 else {\r
2893 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
2894 q = p + enclen(enc, p);\r
2895 if (x == s[0]) {\r
2896 for (i = 1; i < n && q < to; i++) {\r
2897 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
2898 if (x != s[i]) break;\r
2899 q += enclen(enc, q);\r
2900 }\r
2901 if (i >= n) return 1;\r
2902 p += enclen(enc, p);\r
2903 }\r
2904 else {\r
2905 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
2906 if (x == bad) return 0;\r
2907 else if (x == MC_ESC(syn)) in_esc = 1;\r
2908 p = q;\r
2909 }\r
2910 }\r
2911 }\r
2912 return 0;\r
2913}\r
2914\r
2915static int\r
2916fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
2917{\r
2918 int num;\r
2919 OnigCodePoint c, c2;\r
2920 OnigSyntaxType* syn = env->syntax;\r
2921 OnigEncoding enc = env->enc;\r
2922 UChar* prev;\r
2923 UChar* p = *src;\r
2924 PFETCH_READY;\r
2925\r
2926 if (PEND) {\r
2927 tok->type = TK_EOT;\r
2928 return tok->type;\r
2929 }\r
2930\r
2931 PFETCH(c);\r
2932 tok->type = TK_CHAR;\r
2933 tok->base = 0;\r
2934 tok->u.c = c;\r
2935 tok->escaped = 0;\r
2936\r
2937 if (c == ']') {\r
2938 tok->type = TK_CC_CLOSE;\r
2939 }\r
2940 else if (c == '-') {\r
2941 tok->type = TK_CC_RANGE;\r
2942 }\r
2943 else if (c == MC_ESC(syn)) {\r
2944 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r
2945 goto end;\r
2946\r
2947 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
2948\r
2949 PFETCH(c);\r
2950 tok->escaped = 1;\r
2951 tok->u.c = c;\r
2952 switch (c) {\r
2953 case 'w':\r
2954 tok->type = TK_CHAR_TYPE;\r
2955 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
2956 tok->u.prop.not = 0;\r
2957 break;\r
2958 case 'W':\r
2959 tok->type = TK_CHAR_TYPE;\r
2960 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
2961 tok->u.prop.not = 1;\r
2962 break;\r
2963 case 'd':\r
2964 tok->type = TK_CHAR_TYPE;\r
2965 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
2966 tok->u.prop.not = 0;\r
2967 break;\r
2968 case 'D':\r
2969 tok->type = TK_CHAR_TYPE;\r
2970 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
2971 tok->u.prop.not = 1;\r
2972 break;\r
2973 case 's':\r
2974 tok->type = TK_CHAR_TYPE;\r
2975 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
2976 tok->u.prop.not = 0;\r
2977 break;\r
2978 case 'S':\r
2979 tok->type = TK_CHAR_TYPE;\r
2980 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
2981 tok->u.prop.not = 1;\r
2982 break;\r
2983 case 'h':\r
2984 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
2985 tok->type = TK_CHAR_TYPE;\r
2986 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
2987 tok->u.prop.not = 0;\r
2988 break;\r
2989 case 'H':\r
2990 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
2991 tok->type = TK_CHAR_TYPE;\r
2992 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
2993 tok->u.prop.not = 1;\r
2994 break;\r
2995\r
2996 case 'p':\r
2997 case 'P':\r
2998 c2 = PPEEK;\r
2999 if (c2 == '{' &&\r
3000 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
3001 PINC;\r
3002 tok->type = TK_CHAR_PROPERTY;\r
3003 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
3004\r
3005 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
3006 PFETCH(c2);\r
3007 if (c2 == '^') {\r
3008 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
3009 }\r
3010 else\r
3011 PUNFETCH;\r
3012 }\r
3013 }\r
3014 break;\r
3015\r
3016 case 'x':\r
3017 if (PEND) break;\r
3018\r
3019 prev = p;\r
3020 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
3021 PINC;\r
3022 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);\r
3023 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
3024 if (!PEND) {\r
3025 c2 = PPEEK;\r
3026 if (ONIGENC_IS_CODE_XDIGIT(enc, c2))\r
3027 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
3028 }\r
3029\r
3030 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
3031 PINC;\r
3032 tok->type = TK_CODE_POINT;\r
3033 tok->base = 16;\r
3034 tok->u.code = (OnigCodePoint )num;\r
3035 }\r
3036 else {\r
3037 /* can't read nothing or invalid format */\r
3038 p = prev;\r
3039 }\r
3040 }\r
3041 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
3042 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);\r
3043 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3044 if (p == prev) { /* can't read nothing. */\r
3045 num = 0; /* but, it's not error */\r
3046 }\r
3047 tok->type = TK_RAW_BYTE;\r
3048 tok->base = 16;\r
3049 tok->u.c = num;\r
3050 }\r
3051 break;\r
3052\r
3053 case 'u':\r
3054 if (PEND) break;\r
3055\r
3056 prev = p;\r
3057 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
3058 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);\r
3059 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3060 if (p == prev) { /* can't read nothing. */\r
3061 num = 0; /* but, it's not error */\r
3062 }\r
3063 tok->type = TK_CODE_POINT;\r
3064 tok->base = 16;\r
3065 tok->u.code = (OnigCodePoint )num;\r
3066 }\r
3067 break;\r
3068\r
3069 case '0':\r
3070 case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r
3071 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
3072 PUNFETCH;\r
3073 prev = p;\r
3074 num = scan_unsigned_octal_number(&p, end, 3, enc);\r
3075 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3076 if (p == prev) { /* can't read nothing. */\r
3077 num = 0; /* but, it's not error */\r
3078 }\r
3079 tok->type = TK_RAW_BYTE;\r
3080 tok->base = 8;\r
3081 tok->u.c = num;\r
3082 }\r
3083 break;\r
3084\r
3085 default:\r
3086 PUNFETCH;\r
3087 num = fetch_escaped_value(&p, end, env);\r
3088 if (num < 0) return num;\r
3089 if (tok->u.c != num) {\r
3090 tok->u.code = (OnigCodePoint )num;\r
3091 tok->type = TK_CODE_POINT;\r
3092 }\r
3093 break;\r
3094 }\r
3095 }\r
3096 else if (c == '[') {\r
3097 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r
3098 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r
3099 tok->backp = p; /* point at '[' is readed */\r
3100 PINC;\r
3101 if (str_exist_check_with_esc(send, 2, p, end,\r
3102 (OnigCodePoint )']', enc, syn)) {\r
3103 tok->type = TK_POSIX_BRACKET_OPEN;\r
3104 }\r
3105 else {\r
3106 PUNFETCH;\r
3107 goto cc_in_cc;\r
3108 }\r
3109 }\r
3110 else {\r
3111 cc_in_cc:\r
3112 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r
3113 tok->type = TK_CC_CC_OPEN;\r
3114 }\r
3115 else {\r
3116 CC_ESC_WARN(env, (UChar* )"[");\r
3117 }\r
3118 }\r
3119 }\r
3120 else if (c == '&') {\r
3121 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r
3122 !PEND && (PPEEK_IS('&'))) {\r
3123 PINC;\r
3124 tok->type = TK_CC_AND;\r
3125 }\r
3126 }\r
3127\r
3128 end:\r
3129 *src = p;\r
3130 return tok->type;\r
3131}\r
3132\r
3133static int\r
3134fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
3135{\r
3136 int r, num;\r
3137 OnigCodePoint c;\r
3138 OnigEncoding enc = env->enc;\r
3139 OnigSyntaxType* syn = env->syntax;\r
3140 UChar* prev;\r
3141 UChar* p = *src;\r
3142 PFETCH_READY;\r
3143\r
3144 start:\r
3145 if (PEND) {\r
3146 tok->type = TK_EOT;\r
3147 return tok->type;\r
3148 }\r
3149\r
3150 tok->type = TK_STRING;\r
3151 tok->base = 0;\r
3152 tok->backp = p;\r
3153\r
3154 PFETCH(c);\r
3155 if (IS_MC_ESC_CODE(c, syn)) {\r
3156 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
3157\r
3158 tok->backp = p;\r
3159 PFETCH(c);\r
3160\r
3161 tok->u.c = c;\r
3162 tok->escaped = 1;\r
3163 switch (c) {\r
3164 case '*':\r
3165 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r
3166 tok->type = TK_OP_REPEAT;\r
3167 tok->u.repeat.lower = 0;\r
3168 tok->u.repeat.upper = REPEAT_INFINITE;\r
3169 goto greedy_check;\r
3170 break;\r
3171\r
3172 case '+':\r
3173 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r
3174 tok->type = TK_OP_REPEAT;\r
3175 tok->u.repeat.lower = 1;\r
3176 tok->u.repeat.upper = REPEAT_INFINITE;\r
3177 goto greedy_check;\r
3178 break;\r
3179\r
3180 case '?':\r
3181 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r
3182 tok->type = TK_OP_REPEAT;\r
3183 tok->u.repeat.lower = 0;\r
3184 tok->u.repeat.upper = 1;\r
3185 greedy_check:\r
3186 if (!PEND && PPEEK_IS('?') &&\r
3187 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r
3188 PFETCH(c);\r
3189 tok->u.repeat.greedy = 0;\r
3190 tok->u.repeat.possessive = 0;\r
3191 }\r
3192 else {\r
3193 possessive_check:\r
3194 if (!PEND && PPEEK_IS('+') &&\r
3195 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r
3196 tok->type != TK_INTERVAL) ||\r
3197 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r
3198 tok->type == TK_INTERVAL))) {\r
3199 PFETCH(c);\r
3200 tok->u.repeat.greedy = 1;\r
3201 tok->u.repeat.possessive = 1;\r
3202 }\r
3203 else {\r
3204 tok->u.repeat.greedy = 1;\r
3205 tok->u.repeat.possessive = 0;\r
3206 }\r
3207 }\r
3208 break;\r
3209\r
3210 case '{':\r
3211 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r
3212 r = fetch_range_quantifier(&p, end, tok, env);\r
3213 if (r < 0) return r; /* error */\r
3214 if (r == 0) goto greedy_check;\r
3215 else if (r == 2) { /* {n} */\r
3216 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
3217 goto possessive_check;\r
3218\r
3219 goto greedy_check;\r
3220 }\r
3221 /* r == 1 : normal char */\r
3222 break;\r
3223\r
3224 case '|':\r
3225 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r
3226 tok->type = TK_ALT;\r
3227 break;\r
3228\r
3229 case '(':\r
3230 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
3231 tok->type = TK_SUBEXP_OPEN;\r
3232 break;\r
3233\r
3234 case ')':\r
3235 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
3236 tok->type = TK_SUBEXP_CLOSE;\r
3237 break;\r
3238\r
3239 case 'w':\r
3240 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
3241 tok->type = TK_CHAR_TYPE;\r
3242 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
3243 tok->u.prop.not = 0;\r
3244 break;\r
3245\r
3246 case 'W':\r
3247 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
3248 tok->type = TK_CHAR_TYPE;\r
3249 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
3250 tok->u.prop.not = 1;\r
3251 break;\r
3252\r
3253 case 'b':\r
3254 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
3255 tok->type = TK_ANCHOR;\r
3256 tok->u.anchor = ANCHOR_WORD_BOUND;\r
3257 break;\r
3258\r
3259 case 'B':\r
3260 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
3261 tok->type = TK_ANCHOR;\r
3262 tok->u.anchor = ANCHOR_NOT_WORD_BOUND;\r
3263 break;\r
3264\r
3265#ifdef USE_WORD_BEGIN_END\r
3266 case '<':\r
3267 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
3268 tok->type = TK_ANCHOR;\r
3269 tok->u.anchor = ANCHOR_WORD_BEGIN;\r
3270 break;\r
3271\r
3272 case '>':\r
3273 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
3274 tok->type = TK_ANCHOR;\r
3275 tok->u.anchor = ANCHOR_WORD_END;\r
3276 break;\r
3277#endif\r
3278\r
3279 case 's':\r
3280 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
3281 tok->type = TK_CHAR_TYPE;\r
3282 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
3283 tok->u.prop.not = 0;\r
3284 break;\r
3285\r
3286 case 'S':\r
3287 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
3288 tok->type = TK_CHAR_TYPE;\r
3289 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
3290 tok->u.prop.not = 1;\r
3291 break;\r
3292\r
3293 case 'd':\r
3294 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
3295 tok->type = TK_CHAR_TYPE;\r
3296 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
3297 tok->u.prop.not = 0;\r
3298 break;\r
3299\r
3300 case 'D':\r
3301 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
3302 tok->type = TK_CHAR_TYPE;\r
3303 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
3304 tok->u.prop.not = 1;\r
3305 break;\r
3306\r
3307 case 'h':\r
3308 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
3309 tok->type = TK_CHAR_TYPE;\r
3310 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
3311 tok->u.prop.not = 0;\r
3312 break;\r
3313\r
3314 case 'H':\r
3315 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
3316 tok->type = TK_CHAR_TYPE;\r
3317 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
3318 tok->u.prop.not = 1;\r
3319 break;\r
3320\r
3321 case 'A':\r
3322 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
3323 begin_buf:\r
3324 tok->type = TK_ANCHOR;\r
3325 tok->u.subtype = ANCHOR_BEGIN_BUF;\r
3326 break;\r
3327\r
3328 case 'Z':\r
3329 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
3330 tok->type = TK_ANCHOR;\r
3331 tok->u.subtype = ANCHOR_SEMI_END_BUF;\r
3332 break;\r
3333\r
3334 case 'z':\r
3335 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
3336 end_buf:\r
3337 tok->type = TK_ANCHOR;\r
3338 tok->u.subtype = ANCHOR_END_BUF;\r
3339 break;\r
3340\r
3341 case 'G':\r
3342 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r
3343 tok->type = TK_ANCHOR;\r
3344 tok->u.subtype = ANCHOR_BEGIN_POSITION;\r
3345 break;\r
3346\r
3347 case '`':\r
3348 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
3349 goto begin_buf;\r
3350 break;\r
3351\r
3352 case '\'':\r
3353 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
3354 goto end_buf;\r
3355 break;\r
3356\r
3357 case 'x':\r
3358 if (PEND) break;\r
3359\r
3360 prev = p;\r
3361 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
3362 PINC;\r
3363 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);\r
3364 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
3365 if (!PEND) {\r
3366 if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))\r
3367 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
3368 }\r
3369\r
3370 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
3371 PINC;\r
3372 tok->type = TK_CODE_POINT;\r
3373 tok->u.code = (OnigCodePoint )num;\r
3374 }\r
3375 else {\r
3376 /* can't read nothing or invalid format */\r
3377 p = prev;\r
3378 }\r
3379 }\r
3380 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
3381 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);\r
3382 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3383 if (p == prev) { /* can't read nothing. */\r
3384 num = 0; /* but, it's not error */\r
3385 }\r
3386 tok->type = TK_RAW_BYTE;\r
3387 tok->base = 16;\r
3388 tok->u.c = num;\r
3389 }\r
3390 break;\r
3391\r
3392 case 'u':\r
3393 if (PEND) break;\r
3394\r
3395 prev = p;\r
3396 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
3397 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);\r
3398 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3399 if (p == prev) { /* can't read nothing. */\r
3400 num = 0; /* but, it's not error */\r
3401 }\r
3402 tok->type = TK_CODE_POINT;\r
3403 tok->base = 16;\r
3404 tok->u.code = (OnigCodePoint )num;\r
3405 }\r
3406 break;\r
3407\r
3408 case '1': case '2': case '3': case '4':\r
3409 case '5': case '6': case '7': case '8': case '9':\r
3410 PUNFETCH;\r
3411 prev = p;\r
3412 num = onig_scan_unsigned_number(&p, end, enc);\r
3413 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r
3414 goto skip_backref;\r
3415 }\r
3416\r
3417 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r
3418 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r
3419 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
3420 if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))\r
3421 return ONIGERR_INVALID_BACKREF;\r
3422 }\r
3423\r
3424 tok->type = TK_BACKREF;\r
3425 tok->u.backref.num = 1;\r
3426 tok->u.backref.ref1 = num;\r
3427 tok->u.backref.by_name = 0;\r
3428#ifdef USE_BACKREF_WITH_LEVEL\r
3429 tok->u.backref.exist_level = 0;\r
3430#endif\r
3431 break;\r
3432 }\r
3433\r
3434 skip_backref:\r
3435 if (c == '8' || c == '9') {\r
3436 /* normal char */\r
3437 p = prev; PINC;\r
3438 break;\r
3439 }\r
3440\r
3441 p = prev;\r
3442 /* fall through */\r
3443 case '0':\r
3444 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
3445 prev = p;\r
3446 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r
3447 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3448 if (p == prev) { /* can't read nothing. */\r
3449 num = 0; /* but, it's not error */\r
3450 }\r
3451 tok->type = TK_RAW_BYTE;\r
3452 tok->base = 8;\r
3453 tok->u.c = num;\r
3454 }\r
3455 else if (c != '0') {\r
3456 PINC;\r
3457 }\r
3458 break;\r
3459\r
3460#ifdef USE_NAMED_GROUP\r
3461 case 'k':\r
3462 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r
3463 PFETCH(c);\r
3464 if (c == '<' || c == '\'') {\r
3465 UChar* name_end;\r
3466 int* backs;\r
3467 int back_num;\r
3468\r
3469 prev = p;\r
3470\r
3471#ifdef USE_BACKREF_WITH_LEVEL\r
3472 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
3473 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r
3474 env, &back_num, &tok->u.backref.level);\r
3475 if (r == 1) tok->u.backref.exist_level = 1;\r
3476 else tok->u.backref.exist_level = 0;\r
3477#else\r
3478 r = fetch_name(&p, end, &name_end, env, &back_num, 1);\r
3479#endif\r
3480 if (r < 0) return r;\r
3481\r
3482 if (back_num != 0) {\r
3483 if (back_num < 0) {\r
3484 back_num = BACKREF_REL_TO_ABS(back_num, env);\r
3485 if (back_num <= 0)\r
3486 return ONIGERR_INVALID_BACKREF;\r
3487 }\r
3488\r
3489 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
3490 if (back_num > env->num_mem ||\r
3491 IS_NULL(SCANENV_MEM_NODES(env)[back_num]))\r
3492 return ONIGERR_INVALID_BACKREF;\r
3493 }\r
3494 tok->type = TK_BACKREF;\r
3495 tok->u.backref.by_name = 0;\r
3496 tok->u.backref.num = 1;\r
3497 tok->u.backref.ref1 = back_num;\r
3498 }\r
3499 else {\r
3500 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
3501 if (num <= 0) {\r
3502 onig_scan_env_set_error_string(env,\r
3503 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
3504 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3505 }\r
3506 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
3507 int i;\r
3508 for (i = 0; i < num; i++) {\r
3509 if (backs[i] > env->num_mem ||\r
3510 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))\r
3511 return ONIGERR_INVALID_BACKREF;\r
3512 }\r
3513 }\r
3514\r
3515 tok->type = TK_BACKREF;\r
3516 tok->u.backref.by_name = 1;\r
3517 if (num == 1) {\r
3518 tok->u.backref.num = 1;\r
3519 tok->u.backref.ref1 = backs[0];\r
3520 }\r
3521 else {\r
3522 tok->u.backref.num = num;\r
3523 tok->u.backref.refs = backs;\r
3524 }\r
3525 }\r
3526 }\r
3527 else\r
3528 PUNFETCH;\r
3529 }\r
3530 break;\r
3531#endif\r
3532\r
3533#ifdef USE_SUBEXP_CALL\r
3534 case 'g':\r
3535 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r
3536 PFETCH(c);\r
3537 if (c == '<' || c == '\'') {\r
3538 int gnum;\r
3539 UChar* name_end;\r
3540\r
3541 prev = p;\r
3542 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);\r
3543 if (r < 0) return r;\r
3544\r
3545 tok->type = TK_CALL;\r
3546 tok->u.call.name = prev;\r
3547 tok->u.call.name_end = name_end;\r
3548 tok->u.call.gnum = gnum;\r
3549 }\r
3550 else\r
3551 PUNFETCH;\r
3552 }\r
3553 break;\r
3554#endif\r
3555\r
3556 case 'Q':\r
3557 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r
3558 tok->type = TK_QUOTE_OPEN;\r
3559 }\r
3560 break;\r
3561\r
3562 case 'p':\r
3563 case 'P':\r
3564 if (PPEEK_IS('{') &&\r
3565 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
3566 PINC;\r
3567 tok->type = TK_CHAR_PROPERTY;\r
3568 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
3569\r
3570 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
3571 PFETCH(c);\r
3572 if (c == '^') {\r
3573 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
3574 }\r
3575 else\r
3576 PUNFETCH;\r
3577 }\r
3578 }\r
3579 break;\r
3580\r
3581 default:\r
3582 PUNFETCH;\r
3583 num = fetch_escaped_value(&p, end, env);\r
3584 if (num < 0) return num;\r
3585 /* set_raw: */\r
3586 if (tok->u.c != num) {\r
3587 tok->type = TK_CODE_POINT;\r
3588 tok->u.code = (OnigCodePoint )num;\r
3589 }\r
3590 else { /* string */\r
3591 p = tok->backp + enclen(enc, tok->backp);\r
3592 }\r
3593 break;\r
3594 }\r
3595 }\r
3596 else {\r
3597 tok->u.c = c;\r
3598 tok->escaped = 0;\r
3599\r
3600#ifdef USE_VARIABLE_META_CHARS\r
3601 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r
3602 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r
3603 if (c == MC_ANYCHAR(syn))\r
3604 goto any_char;\r
3605 else if (c == MC_ANYTIME(syn))\r
3606 goto anytime;\r
3607 else if (c == MC_ZERO_OR_ONE_TIME(syn))\r
3608 goto zero_or_one_time;\r
3609 else if (c == MC_ONE_OR_MORE_TIME(syn))\r
3610 goto one_or_more_time;\r
3611 else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r
3612 tok->type = TK_ANYCHAR_ANYTIME;\r
3613 goto out;\r
3614 }\r
3615 }\r
3616#endif\r
3617\r
3618 switch (c) {\r
3619 case '.':\r
3620 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r
3621#ifdef USE_VARIABLE_META_CHARS\r
3622 any_char:\r
3623#endif\r
3624 tok->type = TK_ANYCHAR;\r
3625 break;\r
3626\r
3627 case '*':\r
3628 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r
3629#ifdef USE_VARIABLE_META_CHARS\r
3630 anytime:\r
3631#endif\r
3632 tok->type = TK_OP_REPEAT;\r
3633 tok->u.repeat.lower = 0;\r
3634 tok->u.repeat.upper = REPEAT_INFINITE;\r
3635 goto greedy_check;\r
3636 break;\r
3637\r
3638 case '+':\r
3639 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r
3640#ifdef USE_VARIABLE_META_CHARS\r
3641 one_or_more_time:\r
3642#endif\r
3643 tok->type = TK_OP_REPEAT;\r
3644 tok->u.repeat.lower = 1;\r
3645 tok->u.repeat.upper = REPEAT_INFINITE;\r
3646 goto greedy_check;\r
3647 break;\r
3648\r
3649 case '?':\r
3650 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r
3651#ifdef USE_VARIABLE_META_CHARS\r
3652 zero_or_one_time:\r
3653#endif\r
3654 tok->type = TK_OP_REPEAT;\r
3655 tok->u.repeat.lower = 0;\r
3656 tok->u.repeat.upper = 1;\r
3657 goto greedy_check;\r
3658 break;\r
3659\r
3660 case '{':\r
3661 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r
3662 r = fetch_range_quantifier(&p, end, tok, env);\r
3663 if (r < 0) return r; /* error */\r
3664 if (r == 0) goto greedy_check;\r
3665 else if (r == 2) { /* {n} */\r
3666 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
3667 goto possessive_check;\r
3668\r
3669 goto greedy_check;\r
3670 }\r
3671 /* r == 1 : normal char */\r
3672 break;\r
3673\r
3674 case '|':\r
3675 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r
3676 tok->type = TK_ALT;\r
3677 break;\r
3678\r
3679 case '(':\r
3680 if (PPEEK_IS('?') &&\r
3681 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
3682 PINC;\r
3683 if (PPEEK_IS('#')) {\r
3684 PFETCH(c);\r
3685 while (1) {\r
3686 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
3687 PFETCH(c);\r
3688 if (c == MC_ESC(syn)) {\r
3689 if (!PEND) PFETCH(c);\r
3690 }\r
3691 else {\r
3692 if (c == ')') break;\r
3693 }\r
3694 }\r
3695 goto start;\r
3696 }\r
3697 PUNFETCH;\r
3698 }\r
3699\r
3700 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
3701 tok->type = TK_SUBEXP_OPEN;\r
3702 break;\r
3703\r
3704 case ')':\r
3705 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
3706 tok->type = TK_SUBEXP_CLOSE;\r
3707 break;\r
3708\r
3709 case '^':\r
3710 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
3711 tok->type = TK_ANCHOR;\r
3712 tok->u.subtype = (IS_SINGLELINE(env->option)\r
3713 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r
3714 break;\r
3715\r
3716 case '$':\r
3717 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
3718 tok->type = TK_ANCHOR;\r
3719 tok->u.subtype = (IS_SINGLELINE(env->option)\r
3720 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r
3721 break;\r
3722\r
3723 case '[':\r
3724 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r
3725 tok->type = TK_CC_OPEN;\r
3726 break;\r
3727\r
3728 case ']':\r
3729 if (*src > env->pattern) /* /].../ is allowed. */\r
3730 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r
3731 break;\r
3732\r
3733 case '#':\r
3734 if (IS_EXTEND(env->option)) {\r
3735 while (!PEND) {\r
3736 PFETCH(c);\r
3737 if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r
3738 break;\r
3739 }\r
3740 goto start;\r
3741 break;\r
3742 }\r
3743 break;\r
3744\r
3745 case ' ': case '\t': case '\n': case '\r': case '\f':\r
3746 if (IS_EXTEND(env->option))\r
3747 goto start;\r
3748 break;\r
3749\r
3750 default:\r
3751 /* string */\r
3752 break;\r
3753 }\r
3754 }\r
3755\r
3756#ifdef USE_VARIABLE_META_CHARS\r
3757 out:\r
3758#endif\r
3759 *src = p;\r
3760 return tok->type;\r
3761}\r
3762\r
3763static int\r
3764add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
3765 OnigEncoding enc ARG_UNUSED,\r
3766 OnigCodePoint sb_out, const OnigCodePoint mbr[])\r
3767{\r
3768 int i, r;\r
3769 OnigCodePoint j;\r
3770\r
3771 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
3772\r
3773 if (not == 0) {\r
3774 for (i = 0; i < n; i++) {\r
3775 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
3776 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
3777 if (j >= sb_out) {\r
3778 if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;\r
3779 else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
3780 r = add_code_range_to_buf(&(cc->mbuf), j,\r
3781 ONIGENC_CODE_RANGE_TO(mbr, i));\r
3782 if (r != 0) return r;\r
3783 i++;\r
3784 }\r
3785\r
3786 goto sb_end;\r
3787 }\r
3788 BITSET_SET_BIT(cc->bs, j);\r
3789 }\r
3790 }\r
3791\r
3792 sb_end:\r
3793 for ( ; i < n; i++) {\r
3794 r = add_code_range_to_buf(&(cc->mbuf),\r
3795 ONIGENC_CODE_RANGE_FROM(mbr, i),\r
3796 ONIGENC_CODE_RANGE_TO(mbr, i));\r
3797 if (r != 0) return r;\r
3798 }\r
3799 }\r
3800 else {\r
3801 OnigCodePoint prev = 0;\r
3802\r
3803 for (i = 0; i < n; i++) {\r
3804 for (j = prev;\r
3805 j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r
3806 if (j >= sb_out) {\r
3807 goto sb_end2;\r
3808 }\r
3809 BITSET_SET_BIT(cc->bs, j);\r
3810 }\r
3811 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
3812 }\r
3813 for (j = prev; j < sb_out; j++) {\r
3814 BITSET_SET_BIT(cc->bs, j);\r
3815 }\r
3816\r
3817 sb_end2:\r
3818 prev = sb_out;\r
3819\r
3820 for (i = 0; i < n; i++) {\r
3821 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
3822 r = add_code_range_to_buf(&(cc->mbuf), prev,\r
3823 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r
3824 if (r != 0) return r;\r
3825 }\r
3826 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
3827 }\r
3828 if (prev < 0x7fffffff) {\r
3829 r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);\r
3830 if (r != 0) return r;\r
3831 }\r
3832 }\r
3833\r
3834 return 0;\r
3835}\r
3836\r
3837static int\r
3838add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r
3839{\r
3840 int c, r;\r
3841 const OnigCodePoint *ranges;\r
3842 OnigCodePoint sb_out;\r
3843 OnigEncoding enc = env->enc;\r
3844\r
3845 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r
3846 if (r == 0) {\r
3847 return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r
3848 }\r
3849 else if (r != ONIG_NO_SUPPORT_CONFIG) {\r
3850 return r;\r
3851 }\r
3852\r
3853 r = 0;\r
3854 switch (ctype) {\r
3855 case ONIGENC_CTYPE_ALPHA:\r
3856 case ONIGENC_CTYPE_BLANK:\r
3857 case ONIGENC_CTYPE_CNTRL:\r
3858 case ONIGENC_CTYPE_DIGIT:\r
3859 case ONIGENC_CTYPE_LOWER:\r
3860 case ONIGENC_CTYPE_PUNCT:\r
3861 case ONIGENC_CTYPE_SPACE:\r
3862 case ONIGENC_CTYPE_UPPER:\r
3863 case ONIGENC_CTYPE_XDIGIT:\r
3864 case ONIGENC_CTYPE_ASCII:\r
3865 case ONIGENC_CTYPE_ALNUM:\r
3866 if (not != 0) {\r
3867 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3868 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3869 BITSET_SET_BIT(cc->bs, c);\r
3870 }\r
3871 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
3872 }\r
3873 else {\r
3874 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3875 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3876 BITSET_SET_BIT(cc->bs, c);\r
3877 }\r
3878 }\r
3879 break;\r
3880\r
3881 case ONIGENC_CTYPE_GRAPH:\r
3882 case ONIGENC_CTYPE_PRINT:\r
3883 if (not != 0) {\r
3884 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3885 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3886 BITSET_SET_BIT(cc->bs, c);\r
3887 }\r
3888 }\r
3889 else {\r
3890 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3891 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3892 BITSET_SET_BIT(cc->bs, c);\r
3893 }\r
3894 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
3895 }\r
3896 break;\r
3897\r
3898 case ONIGENC_CTYPE_WORD:\r
3899 if (not == 0) {\r
3900 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3901 if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);\r
3902 }\r
3903 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
3904 }\r
3905 else {\r
3906 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3907 if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */\r
3908 && ! ONIGENC_IS_CODE_WORD(enc, c))\r
3909 BITSET_SET_BIT(cc->bs, c);\r
3910 }\r
3911 }\r
3912 break;\r
3913\r
3914 default:\r
3915 return ONIGERR_PARSER_BUG;\r
3916 break;\r
3917 }\r
3918\r
3919 return r;\r
3920}\r
3921\r
3922static int\r
3923parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r
3924{\r
3925#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r
3926#define POSIX_BRACKET_NAME_MIN_LEN 4\r
3927\r
3928 static PosixBracketEntryType PBS[] = {\r
3929 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r
3930 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r
3931 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r
3932 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r
3933 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r
3934 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r
3935 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r
3936 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r
3937 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r
3938 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r
3939 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r
3940 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r
3941 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r
3942 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r
3943 { (UChar* )NULL, -1, 0 }\r
3944 };\r
3945\r
3946 PosixBracketEntryType *pb;\r
3947 int not, i, r;\r
3948 OnigCodePoint c;\r
3949 OnigEncoding enc = env->enc;\r
3950 UChar *p = *src;\r
3951\r
3952 if (PPEEK_IS('^')) {\r
3953 PINC_S;\r
3954 not = 1;\r
3955 }\r
3956 else\r
3957 not = 0;\r
3958\r
3959 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r
3960 goto not_posix_bracket;\r
3961\r
3962 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r
3963 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r
3964 p = (UChar* )onigenc_step(enc, p, end, pb->len);\r
3965 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r
3966 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
3967\r
3968 r = add_ctype_to_cc(cc, pb->ctype, not, env);\r
3969 if (r != 0) return r;\r
3970\r
3971 PINC_S; PINC_S;\r
3972 *src = p;\r
3973 return 0;\r
3974 }\r
3975 }\r
3976\r
3977 not_posix_bracket:\r
3978 c = 0;\r
3979 i = 0;\r
3980 while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r
3981 PINC_S;\r
3982 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r
3983 }\r
3984 if (c == ':' && ! PEND) {\r
3985 PINC_S;\r
3986 if (! PEND) {\r
3987 PFETCH_S(c);\r
3988 if (c == ']')\r
3989 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
3990 }\r
3991 }\r
3992\r
3993 return 1; /* 1: is not POSIX bracket, but no error. */\r
3994}\r
3995\r
3996static int\r
3997fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r
3998{\r
3999 int r;\r
4000 OnigCodePoint c;\r
4001 OnigEncoding enc = env->enc;\r
4002 UChar *prev, *start, *p = *src;\r
4003\r
4004 r = 0;\r
4005 start = prev = p;\r
4006\r
4007 while (!PEND) {\r
4008 prev = p;\r
4009 PFETCH_S(c);\r
4010 if (c == '}') {\r
4011 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r
4012 if (r < 0) break;\r
4013\r
4014 *src = p;\r
4015 return r;\r
4016 }\r
4017 else if (c == '(' || c == ')' || c == '{' || c == '|') {\r
4018 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
4019 break;\r
4020 }\r
4021 }\r
4022\r
4023 onig_scan_env_set_error_string(env, r, *src, prev);\r
4024 return r;\r
4025}\r
4026\r
4027static int\r
4028parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,\r
4029 ScanEnv* env)\r
4030{\r
4031 int r, ctype;\r
4032 CClassNode* cc;\r
4033\r
4034 ctype = fetch_char_property_to_ctype(src, end, env);\r
4035 if (ctype < 0) return ctype;\r
4036\r
4037 *np = node_new_cclass();\r
4038 CHECK_NULL_RETURN_MEMERR(*np);\r
4039 cc = NCCLASS(*np);\r
4040 r = add_ctype_to_cc(cc, ctype, 0, env);\r
4041 if (r != 0) return r;\r
4042 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
4043\r
4044 return 0;\r
4045}\r
4046\r
4047\r
4048enum CCSTATE {\r
4049 CCS_VALUE,\r
4050 CCS_RANGE,\r
4051 CCS_COMPLETE,\r
4052 CCS_START\r
4053};\r
4054\r
4055enum CCVALTYPE {\r
4056 CCV_SB,\r
4057 CCV_CODE_POINT,\r
4058 CCV_CLASS\r
4059};\r
4060\r
4061static int\r
4062next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r
4063 enum CCSTATE* state, ScanEnv* env)\r
4064{\r
4065 int r;\r
4066\r
4067 if (*state == CCS_RANGE)\r
4068 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r
4069\r
4070 if (*state == CCS_VALUE && *type != CCV_CLASS) {\r
4071 if (*type == CCV_SB)\r
4072 BITSET_SET_BIT(cc->bs, (int )(*vs));\r
4073 else if (*type == CCV_CODE_POINT) {\r
4074 r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
4075 if (r < 0) return r;\r
4076 }\r
4077 }\r
4078\r
4079 *state = CCS_VALUE;\r
4080 *type = CCV_CLASS;\r
4081 return 0;\r
4082}\r
4083\r
4084static int\r
4085next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,\r
4086 int* vs_israw, int v_israw,\r
4087 enum CCVALTYPE intype, enum CCVALTYPE* type,\r
4088 enum CCSTATE* state, ScanEnv* env)\r
4089{\r
4090 int r;\r
4091\r
4092 switch (*state) {\r
4093 case CCS_VALUE:\r
4094 if (*type == CCV_SB)\r
4095 BITSET_SET_BIT(cc->bs, (int )(*vs));\r
4096 else if (*type == CCV_CODE_POINT) {\r
4097 r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
4098 if (r < 0) return r;\r
4099 }\r
4100 break;\r
4101\r
4102 case CCS_RANGE:\r
4103 if (intype == *type) {\r
4104 if (intype == CCV_SB) {\r
4105 if (*vs > 0xff || v > 0xff)\r
4106 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
4107\r
4108 if (*vs > v) {\r
4109 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
4110 goto ccs_range_end;\r
4111 else\r
4112 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
4113 }\r
4114 bitset_set_range(cc->bs, (int )*vs, (int )v);\r
4115 }\r
4116 else {\r
4117 r = add_code_range(&(cc->mbuf), env, *vs, v);\r
4118 if (r < 0) return r;\r
4119 }\r
4120 }\r
4121 else {\r
4122#if 0\r
4123 if (intype == CCV_CODE_POINT && *type == CCV_SB) {\r
4124#endif\r
4125 if (*vs > v) {\r
4126 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
4127 goto ccs_range_end;\r
4128 else\r
4129 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
4130 }\r
4131 bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));\r
4132 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);\r
4133 if (r < 0) return r;\r
4134#if 0\r
4135 }\r
4136 else\r
4137 return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;\r
4138#endif\r
4139 }\r
4140 ccs_range_end:\r
4141 *state = CCS_COMPLETE;\r
4142 break;\r
4143\r
4144 case CCS_COMPLETE:\r
4145 case CCS_START:\r
4146 *state = CCS_VALUE;\r
4147 break;\r
4148\r
4149 default:\r
4150 break;\r
4151 }\r
4152\r
4153 *vs_israw = v_israw;\r
4154 *vs = v;\r
4155 *type = intype;\r
4156 return 0;\r
4157}\r
4158\r
4159static int\r
4160code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r
4161 ScanEnv* env)\r
4162{\r
4163 int in_esc;\r
4164 OnigCodePoint code;\r
4165 OnigEncoding enc = env->enc;\r
4166 UChar* p = from;\r
4167\r
4168 in_esc = 0;\r
4169 while (! PEND) {\r
4170 if (ignore_escaped && in_esc) {\r
4171 in_esc = 0;\r
4172 }\r
4173 else {\r
4174 PFETCH_S(code);\r
4175 if (code == c) return 1;\r
4176 if (code == MC_ESC(env->syntax)) in_esc = 1;\r
4177 }\r
4178 }\r
4179 return 0;\r
4180}\r
4181\r
4182static int\r
4183parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,\r
4184 ScanEnv* env)\r
4185{\r
4186 int r, neg, len, fetched, and_start;\r
4187 OnigCodePoint v, vs;\r
4188 UChar *p;\r
4189 Node* node;\r
4190 CClassNode *cc, *prev_cc;\r
4191 CClassNode work_cc;\r
4192\r
4193 enum CCSTATE state;\r
4194 enum CCVALTYPE val_type, in_type;\r
4195 int val_israw, in_israw;\r
4196\r
4197 prev_cc = (CClassNode* )NULL;\r
4198 *np = NULL_NODE;\r
4199 r = fetch_token_in_cc(tok, src, end, env);\r
4200 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r
4201 neg = 1;\r
4202 r = fetch_token_in_cc(tok, src, end, env);\r
4203 }\r
4204 else {\r
4205 neg = 0;\r
4206 }\r
4207\r
4208 if (r < 0) return r;\r
4209 if (r == TK_CC_CLOSE) {\r
4210 if (! code_exist_check((OnigCodePoint )']',\r
4211 *src, env->pattern_end, 1, env))\r
4212 return ONIGERR_EMPTY_CHAR_CLASS;\r
4213\r
4214 CC_ESC_WARN(env, (UChar* )"]");\r
4215 r = tok->type = TK_CHAR; /* allow []...] */\r
4216 }\r
4217\r
4218 *np = node = node_new_cclass();\r
4219 CHECK_NULL_RETURN_MEMERR(node);\r
4220 cc = NCCLASS(node);\r
4221\r
4222 and_start = 0;\r
4223 state = CCS_START;\r
4224 p = *src;\r
4225 while (r != TK_CC_CLOSE) {\r
4226 fetched = 0;\r
4227 switch (r) {\r
4228 case TK_CHAR:\r
4229 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r
4230 if (len > 1) {\r
4231 in_type = CCV_CODE_POINT;\r
4232 }\r
4233 else if (len < 0) {\r
4234 r = len;\r
4235 goto err;\r
4236 }\r
4237 else {\r
4238 sb_char:\r
4239 in_type = CCV_SB;\r
4240 }\r
4241 v = (OnigCodePoint )tok->u.c;\r
4242 in_israw = 0;\r
4243 goto val_entry2;\r
4244 break;\r
4245\r
4246 case TK_RAW_BYTE:\r
4247 /* tok->base != 0 : octal or hexadec. */\r
4248 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r
4249 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
4250 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r
4251 UChar* psave = p;\r
4252 int i, base = tok->base;\r
4253\r
4254 buf[0] = (UChar)tok->u.c;\r
4255 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r
4256 r = fetch_token_in_cc(tok, &p, end, env);\r
4257 if (r < 0) goto err;\r
4258 if (r != TK_RAW_BYTE || tok->base != base) {\r
4259 fetched = 1;\r
4260 break;\r
4261 }\r
4262 buf[i] = (UChar)tok->u.c;\r
4263 }\r
4264\r
4265 if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r
4266 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
4267 goto err;\r
4268 }\r
4269\r
4270 len = enclen(env->enc, buf);\r
4271 if (i < len) {\r
4272 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
4273 goto err;\r
4274 }\r
4275 else if (i > len) { /* fetch back */\r
4276 p = psave;\r
4277 for (i = 1; i < len; i++) {\r
4278 r = fetch_token_in_cc(tok, &p, end, env);\r
4279 }\r
4280 fetched = 0;\r
4281 }\r
4282\r
4283 if (i == 1) {\r
4284 v = (OnigCodePoint )buf[0];\r
4285 goto raw_single;\r
4286 }\r
4287 else {\r
4288 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r
4289 in_type = CCV_CODE_POINT;\r
4290 }\r
4291 }\r
4292 else {\r
4293 v = (OnigCodePoint )tok->u.c;\r
4294 raw_single:\r
4295 in_type = CCV_SB;\r
4296 }\r
4297 in_israw = 1;\r
4298 goto val_entry2;\r
4299 break;\r
4300\r
4301 case TK_CODE_POINT:\r
4302 v = tok->u.code;\r
4303 in_israw = 1;\r
4304 val_entry:\r
4305 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r
4306 if (len < 0) {\r
4307 r = len;\r
4308 goto err;\r
4309 }\r
4310 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r
4311 val_entry2:\r
4312 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r
4313 &state, env);\r
4314 if (r != 0) goto err;\r
4315 break;\r
4316\r
4317 case TK_POSIX_BRACKET_OPEN:\r
4318 r = parse_posix_bracket(cc, &p, end, env);\r
4319 if (r < 0) goto err;\r
4320 if (r == 1) { /* is not POSIX bracket */\r
4321 CC_ESC_WARN(env, (UChar* )"[");\r
4322 p = tok->backp;\r
4323 v = (OnigCodePoint )tok->u.c;\r
4324 in_israw = 0;\r
4325 goto val_entry;\r
4326 }\r
4327 goto next_class;\r
4328 break;\r
4329\r
4330 case TK_CHAR_TYPE:\r
4331 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r
4332 if (r != 0) return r;\r
4333\r
4334 next_class:\r
4335 r = next_state_class(cc, &vs, &val_type, &state, env);\r
4336 if (r != 0) goto err;\r
4337 break;\r
4338\r
4339 case TK_CHAR_PROPERTY:\r
4340 {\r
4341 int ctype;\r
4342\r
4343 ctype = fetch_char_property_to_ctype(&p, end, env);\r
4344 if (ctype < 0) return ctype;\r
4345 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r
4346 if (r != 0) return r;\r
4347 goto next_class;\r
4348 }\r
4349 break;\r
4350\r
4351 case TK_CC_RANGE:\r
4352 if (state == CCS_VALUE) {\r
4353 r = fetch_token_in_cc(tok, &p, end, env);\r
4354 if (r < 0) goto err;\r
4355 fetched = 1;\r
4356 if (r == TK_CC_CLOSE) { /* allow [x-] */\r
4357 range_end_val:\r
4358 v = (OnigCodePoint )'-';\r
4359 in_israw = 0;\r
4360 goto val_entry;\r
4361 }\r
4362 else if (r == TK_CC_AND) {\r
4363 CC_ESC_WARN(env, (UChar* )"-");\r
4364 goto range_end_val;\r
4365 }\r
4366 state = CCS_RANGE;\r
4367 }\r
4368 else if (state == CCS_START) {\r
4369 /* [-xa] is allowed */\r
4370 v = (OnigCodePoint )tok->u.c;\r
4371 in_israw = 0;\r
4372\r
4373 r = fetch_token_in_cc(tok, &p, end, env);\r
4374 if (r < 0) goto err;\r
4375 fetched = 1;\r
4376 /* [--x] or [a&&-x] is warned. */\r
4377 if (r == TK_CC_RANGE || and_start != 0)\r
4378 CC_ESC_WARN(env, (UChar* )"-");\r
4379\r
4380 goto val_entry;\r
4381 }\r
4382 else if (state == CCS_RANGE) {\r
4383 CC_ESC_WARN(env, (UChar* )"-");\r
4384 goto sb_char; /* [!--x] is allowed */\r
4385 }\r
4386 else { /* CCS_COMPLETE */\r
4387 r = fetch_token_in_cc(tok, &p, end, env);\r
4388 if (r < 0) goto err;\r
4389 fetched = 1;\r
4390 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r
4391 else if (r == TK_CC_AND) {\r
4392 CC_ESC_WARN(env, (UChar* )"-");\r
4393 goto range_end_val;\r
4394 }\r
4395 \r
4396 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r
4397 CC_ESC_WARN(env, (UChar* )"-");\r
4398 goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */\r
4399 }\r
4400 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
4401 goto err;\r
4402 }\r
4403 break;\r
4404\r
4405 case TK_CC_CC_OPEN: /* [ */\r
4406 {\r
4407 Node *anode;\r
4408 CClassNode* acc;\r
4409\r
4410 r = parse_char_class(&anode, tok, &p, end, env);\r
4411 if (r != 0) goto cc_open_err;\r
4412 acc = NCCLASS(anode);\r
4413 r = or_cclass(cc, acc, env->enc);\r
4414\r
4415 onig_node_free(anode);\r
4416 cc_open_err:\r
4417 if (r != 0) goto err;\r
4418 }\r
4419 break;\r
4420\r
4421 case TK_CC_AND: /* && */\r
4422 {\r
4423 if (state == CCS_VALUE) {\r
4424 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
4425 &val_type, &state, env);\r
4426 if (r != 0) goto err;\r
4427 }\r
4428 /* initialize local variables */\r
4429 and_start = 1;\r
4430 state = CCS_START;\r
4431\r
4432 if (IS_NOT_NULL(prev_cc)) {\r
4433 r = and_cclass(prev_cc, cc, env->enc);\r
4434 if (r != 0) goto err;\r
4435 bbuf_free(cc->mbuf);\r
4436 }\r
4437 else {\r
4438 prev_cc = cc;\r
4439 cc = &work_cc;\r
4440 }\r
4441 initialize_cclass(cc);\r
4442 }\r
4443 break;\r
4444\r
4445 case TK_EOT:\r
4446 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r
4447 goto err;\r
4448 break;\r
4449 default:\r
4450 r = ONIGERR_PARSER_BUG;\r
4451 goto err;\r
4452 break;\r
4453 }\r
4454\r
4455 if (fetched)\r
4456 r = tok->type;\r
4457 else {\r
4458 r = fetch_token_in_cc(tok, &p, end, env);\r
4459 if (r < 0) goto err;\r
4460 }\r
4461 }\r
4462\r
4463 if (state == CCS_VALUE) {\r
4464 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
4465 &val_type, &state, env);\r
4466 if (r != 0) goto err;\r
4467 }\r
4468\r
4469 if (IS_NOT_NULL(prev_cc)) {\r
4470 r = and_cclass(prev_cc, cc, env->enc);\r
4471 if (r != 0) goto err;\r
4472 bbuf_free(cc->mbuf);\r
4473 cc = prev_cc;\r
4474 }\r
4475\r
4476 if (neg != 0)\r
4477 NCCLASS_SET_NOT(cc);\r
4478 else\r
4479 NCCLASS_CLEAR_NOT(cc);\r
4480 if (IS_NCCLASS_NOT(cc) &&\r
4481 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r
4482 int is_empty;\r
4483\r
4484 is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r
4485 if (is_empty != 0)\r
4486 BITSET_IS_EMPTY(cc->bs, is_empty);\r
4487\r
4488 if (is_empty == 0) {\r
4489#define NEWLINE_CODE 0x0a\r
4490\r
4491 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r
4492 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r
4493 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r
4494 else\r
4495 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r
4496 }\r
4497 }\r
4498 }\r
4499 *src = p;\r
4500 return 0;\r
4501\r
4502 err:\r
4503 if (cc != NCCLASS(*np))\r
4504 bbuf_free(cc->mbuf);\r
4505 onig_node_free(*np);\r
4506 return r;\r
4507}\r
4508\r
4509static int parse_subexp(Node** top, OnigToken* tok, int term,\r
4510 UChar** src, UChar* end, ScanEnv* env);\r
4511\r
4512static int\r
4513parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
4514 ScanEnv* env)\r
4515{\r
4516 int r, num;\r
4517 Node *target;\r
4518 OnigOptionType option;\r
4519 OnigCodePoint c;\r
4520 OnigEncoding enc = env->enc;\r
4521\r
4522#ifdef USE_NAMED_GROUP\r
4523 int list_capture;\r
4524#endif\r
4525\r
4526 UChar* p = *src;\r
4527 PFETCH_READY;\r
4528\r
4529 *np = NULL;\r
4530 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
4531\r
4532 option = env->option;\r
4533 if (PPEEK_IS('?') &&\r
4534 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
4535 PINC;\r
4536 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
4537\r
4538 PFETCH(c);\r
4539 switch (c) {\r
4540 case ':': /* (?:...) grouping only */\r
4541 group:\r
4542 r = fetch_token(tok, &p, end, env);\r
4543 if (r < 0) return r;\r
4544 r = parse_subexp(np, tok, term, &p, end, env);\r
4545 if (r < 0) return r;\r
4546 *src = p;\r
4547 return 1; /* group */\r
4548 break;\r
4549\r
4550 case '=':\r
4551 *np = onig_node_new_anchor(ANCHOR_PREC_READ);\r
4552 break;\r
4553 case '!': /* preceding read */\r
4554 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);\r
4555 break;\r
4556 case '>': /* (?>...) stop backtrack */\r
4557 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
4558 break;\r
4559\r
4560#ifdef USE_NAMED_GROUP\r
4561 case '\'':\r
4562 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
4563 goto named_group1;\r
4564 }\r
4565 else\r
4566 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4567 break;\r
4568#endif\r
4569\r
4570 case '<': /* look behind (?<=...), (?<!...) */\r
4571 PFETCH(c);\r
4572 if (c == '=')\r
4573 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);\r
4574 else if (c == '!')\r
4575 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);\r
4576#ifdef USE_NAMED_GROUP\r
4577 else {\r
4578 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
4579 UChar *name;\r
4580 UChar *name_end;\r
4581\r
4582 PUNFETCH;\r
4583 c = '<';\r
4584\r
4585 named_group1:\r
4586 list_capture = 0;\r
4587\r
4588 named_group2:\r
4589 name = p;\r
4590 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);\r
4591 if (r < 0) return r;\r
4592\r
4593 num = scan_env_add_mem_entry(env);\r
4594 if (num < 0) return num;\r
4595 if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)\r
4596 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
4597\r
4598 r = name_add(env->reg, name, name_end, num, env);\r
4599 if (r != 0) return r;\r
4600 *np = node_new_enclose_memory(env->option, 1);\r
4601 CHECK_NULL_RETURN_MEMERR(*np);\r
4602 NENCLOSE(*np)->regnum = num;\r
4603 if (list_capture != 0)\r
4604 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);\r
4605 env->num_named++;\r
4606 }\r
4607 else {\r
4608 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4609 }\r
4610 }\r
4611#else\r
4612 else {\r
4613 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4614 }\r
4615#endif\r
4616 break;\r
4617\r
4618 case '@':\r
4619 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r
4620#ifdef USE_NAMED_GROUP\r
4621 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
4622 PFETCH(c);\r
4623 if (c == '<' || c == '\'') {\r
4624 list_capture = 1;\r
4625 goto named_group2; /* (?@<name>...) */\r
4626 }\r
4627 PUNFETCH;\r
4628 }\r
4629#endif\r
4630 *np = node_new_enclose_memory(env->option, 0);\r
4631 CHECK_NULL_RETURN_MEMERR(*np);\r
4632 num = scan_env_add_mem_entry(env);\r
4633 if (num < 0) {\r
4634 onig_node_free(*np);\r
4635 return num;\r
4636 }\r
4637 else if (num >= (int )BIT_STATUS_BITS_NUM) {\r
4638 onig_node_free(*np);\r
4639 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
4640 }\r
4641 NENCLOSE(*np)->regnum = num;\r
4642 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);\r
4643 }\r
4644 else {\r
4645 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4646 }\r
4647 break;\r
4648\r
4649#ifdef USE_POSIXLINE_OPTION\r
4650 case 'p':\r
4651#endif\r
4652 case '-': case 'i': case 'm': case 's': case 'x':\r
4653 {\r
4654 int neg = 0;\r
4655\r
4656 while (1) {\r
4657 switch (c) {\r
4658 case ':':\r
4659 case ')':\r
4660 break;\r
4661\r
4662 case '-': neg = 1; break;\r
4663 case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;\r
4664 case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;\r
4665 case 's':\r
4666 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
4667 ONOFF(option, ONIG_OPTION_MULTILINE, neg);\r
4668 }\r
4669 else\r
4670 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4671 break;\r
4672\r
4673 case 'm':\r
4674 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
4675 ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r
4676 }\r
4677 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r
4678 ONOFF(option, ONIG_OPTION_MULTILINE, neg);\r
4679 }\r
4680 else\r
4681 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4682 break;\r
4683#ifdef USE_POSIXLINE_OPTION\r
4684 case 'p':\r
4685 ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r
4686 break;\r
4687#endif\r
4688 default:\r
4689 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4690 }\r
4691\r
4692 if (c == ')') {\r
4693 *np = node_new_option(option);\r
4694 CHECK_NULL_RETURN_MEMERR(*np);\r
4695 *src = p;\r
4696 return 2; /* option only */\r
4697 }\r
4698 else if (c == ':') {\r
4699 OnigOptionType prev = env->option;\r
4700\r
4701 env->option = option;\r
4702 r = fetch_token(tok, &p, end, env);\r
4703 if (r < 0) return r;\r
4704 r = parse_subexp(&target, tok, term, &p, end, env);\r
4705 env->option = prev;\r
4706 if (r < 0) return r;\r
4707 *np = node_new_option(option);\r
4708 CHECK_NULL_RETURN_MEMERR(*np);\r
4709 NENCLOSE(*np)->target = target;\r
4710 *src = p;\r
4711 return 0;\r
4712 }\r
4713\r
4714 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
4715 PFETCH(c);\r
4716 }\r
4717 }\r
4718 break;\r
4719\r
4720 default:\r
4721 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4722 }\r
4723 }\r
4724 else {\r
4725 if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
4726 goto group;\r
4727\r
4728 *np = node_new_enclose_memory(env->option, 0);\r
4729 CHECK_NULL_RETURN_MEMERR(*np);\r
4730 num = scan_env_add_mem_entry(env);\r
4731 if (num < 0) return num;\r
4732 NENCLOSE(*np)->regnum = num;\r
4733 }\r
4734\r
4735 CHECK_NULL_RETURN_MEMERR(*np);\r
4736 r = fetch_token(tok, &p, end, env);\r
4737 if (r < 0) return r;\r
4738 r = parse_subexp(&target, tok, term, &p, end, env);\r
4739 if (r < 0) return r;\r
4740\r
4741 if (NTYPE(*np) == NT_ANCHOR)\r
4742 NANCHOR(*np)->target = target;\r
4743 else {\r
4744 NENCLOSE(*np)->target = target;\r
4745 if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {\r
4746 /* Don't move this to previous of parse_subexp() */\r
4747 r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);\r
4748 if (r != 0) return r;\r
4749 }\r
4750 }\r
4751\r
4752 *src = p;\r
4753 return 0;\r
4754}\r
4755\r
4756static const char* PopularQStr[] = {\r
4757 "?", "*", "+", "??", "*?", "+?"\r
4758};\r
4759\r
4760static const char* ReduceQStr[] = {\r
4761 "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r
4762};\r
4763\r
4764static int\r
4765set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r
4766{\r
4767 QtfrNode* qn;\r
4768\r
4769 qn = NQTFR(qnode);\r
4770 if (qn->lower == 1 && qn->upper == 1) {\r
4771 return 1;\r
4772 }\r
4773\r
4774 switch (NTYPE(target)) {\r
4775 case NT_STR:\r
4776 if (! group) {\r
4777 StrNode* sn = NSTR(target);\r
4778 if (str_node_can_be_split(sn, env->enc)) {\r
4779 Node* n = str_node_split_last_char(sn, env->enc);\r
4780 if (IS_NOT_NULL(n)) {\r
4781 qn->target = n;\r
4782 return 2;\r
4783 }\r
4784 }\r
4785 }\r
4786 break;\r
4787\r
4788 case NT_QTFR:\r
4789 { /* check redundant double repeat. */\r
4790 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r
4791 QtfrNode* qnt = NQTFR(target);\r
4792 int nestq_num = popular_quantifier_num(qn);\r
4793 int targetq_num = popular_quantifier_num(qnt);\r
b0c2b797
QS
4794 if (nestq_num < 0 || targetq_num < 0) {\r
4795 return ONIGERR_TYPE_BUG;\r
4796 }\r
14b0e578
CS
4797\r
4798#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r
4799 if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&\r
4800 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r
4801 UChar buf[WARN_BUFSIZE];\r
4802\r
4803 switch(ReduceTypeTable[targetq_num][nestq_num]) {\r
4804 case RQ_ASIS:\r
4805 break;\r
4806\r
4807 case RQ_DEL:\r
4808 if (onig_verb_warn != onig_null_warn) {\r
4809 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
4810 env->pattern, env->pattern_end,\r
4811 (UChar* )"redundant nested repeat operator");\r
4812 (*onig_verb_warn)((char* )buf);\r
4813 }\r
4814 goto warn_exit;\r
4815 break;\r
4816\r
4817 default:\r
4818 if (onig_verb_warn != onig_null_warn) {\r
4819 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
4820 env->pattern, env->pattern_end,\r
4821 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r
4822 PopularQStr[targetq_num], PopularQStr[nestq_num],\r
4823 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r
4824 (*onig_verb_warn)((char* )buf);\r
4825 }\r
4826 goto warn_exit;\r
4827 break;\r
4828 }\r
4829 }\r
4830\r
4831 warn_exit:\r
4832#endif\r
4833 if (targetq_num >= 0) {\r
4834 if (nestq_num >= 0) {\r
4835 onig_reduce_nested_quantifier(qnode, target);\r
4836 goto q_exit;\r
4837 }\r
4838 else if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r
4839 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r
4840 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r
4841 qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
4842 }\r
4843 }\r
4844 }\r
4845 }\r
4846 break;\r
4847\r
4848 default:\r
4849 break;\r
4850 }\r
4851\r
4852 qn->target = target;\r
4853 q_exit:\r
4854 return 0;\r
4855}\r
4856\r
4857\r
4858#ifdef USE_SHARED_CCLASS_TABLE\r
4859\r
4860#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8\r
4861\r
4862/* for ctype node hash table */\r
4863\r
4864typedef struct {\r
4865 OnigEncoding enc;\r
4866 int not;\r
4867 int type;\r
4868} type_cclass_key;\r
4869\r
4870static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)\r
4871{\r
4872 if (x->type != y->type) return 1;\r
4873 if (x->enc != y->enc) return 1;\r
4874 if (x->not != y->not) return 1;\r
4875 return 0;\r
4876}\r
4877\r
4878static int type_cclass_hash(type_cclass_key* key)\r
4879{\r
4880 int i, val;\r
4881 UChar *p;\r
4882\r
4883 val = 0;\r
4884\r
4885 p = (UChar* )&(key->enc);\r
4886 for (i = 0; i < (int )sizeof(key->enc); i++) {\r
4887 val = val * 997 + (int )*p++;\r
4888 }\r
4889\r
4890 p = (UChar* )(&key->type);\r
4891 for (i = 0; i < (int )sizeof(key->type); i++) {\r
4892 val = val * 997 + (int )*p++;\r
4893 }\r
4894\r
4895 val += key->not;\r
4896 return val + (val >> 5);\r
4897}\r
4898\r
4899static struct st_hash_type type_type_cclass_hash = {\r
4900 type_cclass_cmp,\r
4901 type_cclass_hash,\r
4902};\r
4903\r
4904static st_table* OnigTypeCClassTable;\r
4905\r
4906\r
4907static int\r
4908i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)\r
4909{\r
4910 if (IS_NOT_NULL(node)) {\r
4911 CClassNode* cc = NCCLASS(node);\r
4912 if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);\r
4913 xfree(node);\r
4914 }\r
4915\r
4916 if (IS_NOT_NULL(key)) xfree(key);\r
4917 return ST_DELETE;\r
4918}\r
4919\r
4920extern int\r
4921onig_free_shared_cclass_table(void)\r
4922{\r
4923 if (IS_NOT_NULL(OnigTypeCClassTable)) {\r
4924 onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);\r
4925 onig_st_free_table(OnigTypeCClassTable);\r
4926 OnigTypeCClassTable = NULL;\r
4927 }\r
4928\r
4929 return 0;\r
4930}\r
4931\r
4932#endif /* USE_SHARED_CCLASS_TABLE */\r
4933\r
4934\r
4935#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
4936static int\r
4937clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r
4938{\r
4939 BBuf *tbuf;\r
4940 int r;\r
4941\r
4942 if (IS_NCCLASS_NOT(cc)) {\r
4943 bitset_invert(cc->bs);\r
4944\r
4945 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
4946 r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r
4947 if (r != 0) return r;\r
4948\r
4949 bbuf_free(cc->mbuf);\r
4950 cc->mbuf = tbuf;\r
4951 }\r
4952\r
4953 NCCLASS_CLEAR_NOT(cc);\r
4954 }\r
4955\r
4956 return 0;\r
4957}\r
4958#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
4959\r
4960typedef struct {\r
4961 ScanEnv* env;\r
4962 CClassNode* cc;\r
4963 Node* alt_root;\r
4964 Node** ptail;\r
4965} IApplyCaseFoldArg;\r
4966\r
4967static int\r
4968i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],\r
4969 int to_len, void* arg)\r
4970{\r
4971 IApplyCaseFoldArg* iarg;\r
4972 ScanEnv* env;\r
4973 CClassNode* cc;\r
4974 BitSetRef bs;\r
4975\r
4976 iarg = (IApplyCaseFoldArg* )arg;\r
4977 env = iarg->env;\r
4978 cc = iarg->cc;\r
4979 bs = cc->bs;\r
4980\r
4981 if (to_len == 1) {\r
4982 int is_in = onig_is_code_in_cc(env->enc, from, cc);\r
4983#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
4984 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r
4985 (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r
4986 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
4987 add_code_range(&(cc->mbuf), env, *to, *to);\r
4988 }\r
4989 else {\r
4990 BITSET_SET_BIT(bs, *to);\r
4991 }\r
4992 }\r
4993#else\r
4994 if (is_in != 0) {\r
4995 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
4996 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r
4997 add_code_range(&(cc->mbuf), env, *to, *to);\r
4998 }\r
4999 else {\r
5000 if (IS_NCCLASS_NOT(cc)) {\r
5001 BITSET_CLEAR_BIT(bs, *to);\r
5002 }\r
5003 else\r
5004 BITSET_SET_BIT(bs, *to);\r
5005 }\r
5006 }\r
5007#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
5008 }\r
5009 else {\r
5010 int r, i, len;\r
5011 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
5012 Node *snode = NULL_NODE;\r
5013\r
5014 if (onig_is_code_in_cc(env->enc, from, cc)\r
5015#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
5016 && !IS_NCCLASS_NOT(cc)\r
5017#endif\r
5018 ) {\r
5019 for (i = 0; i < to_len; i++) {\r
5020 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r
5021 if (i == 0) {\r
5022 snode = onig_node_new_str(buf, buf + len);\r
5023 CHECK_NULL_RETURN_MEMERR(snode);\r
5024\r
5025 /* char-class expanded multi-char only\r
5026 compare with string folded at match time. */\r
5027 NSTRING_SET_AMBIG(snode);\r
5028 }\r
5029 else {\r
5030 r = onig_node_str_cat(snode, buf, buf + len);\r
5031 if (r < 0) {\r
5032 onig_node_free(snode);\r
5033 return r;\r
5034 }\r
5035 }\r
5036 }\r
5037\r
5038 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r
5039 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r
5040 iarg->ptail = &(NCDR((*(iarg->ptail))));\r
5041 }\r
5042 }\r
5043\r
5044 return 0;\r
5045}\r
5046\r
5047static int\r
5048parse_exp(Node** np, OnigToken* tok, int term,\r
5049 UChar** src, UChar* end, ScanEnv* env)\r
5050{\r
5051 int r, len, group = 0;\r
5052 Node* qn;\r
5053 Node** targetp;\r
5054\r
5055 *np = NULL;\r
5056 if (tok->type == (enum TokenSyms )term)\r
5057 goto end_of_token;\r
5058\r
5059 switch (tok->type) {\r
5060 case TK_ALT:\r
5061 case TK_EOT:\r
5062 end_of_token:\r
5063 *np = node_new_empty();\r
5064 return tok->type;\r
5065 break;\r
5066\r
5067 case TK_SUBEXP_OPEN:\r
5068 r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r
5069 if (r < 0) return r;\r
5070 if (r == 1) group = 1;\r
5071 else if (r == 2) { /* option only */\r
5072 Node* target;\r
5073 OnigOptionType prev = env->option;\r
5074\r
5075 env->option = NENCLOSE(*np)->option;\r
5076 r = fetch_token(tok, src, end, env);\r
5077 if (r < 0) return r;\r
5078 r = parse_subexp(&target, tok, term, src, end, env);\r
5079 env->option = prev;\r
5080 if (r < 0) return r;\r
5081 NENCLOSE(*np)->target = target; \r
5082 return tok->type;\r
5083 }\r
5084 break;\r
5085\r
5086 case TK_SUBEXP_CLOSE:\r
5087 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r
5088 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r
5089\r
5090 if (tok->escaped) goto tk_raw_byte;\r
5091 else goto tk_byte;\r
5092 break;\r
5093\r
5094 case TK_STRING:\r
5095 tk_byte:\r
5096 {\r
5097 *np = node_new_str(tok->backp, *src);\r
5098 CHECK_NULL_RETURN_MEMERR(*np);\r
5099\r
5100 while (1) {\r
5101 r = fetch_token(tok, src, end, env);\r
5102 if (r < 0) return r;\r
5103 if (r != TK_STRING) break;\r
5104\r
5105 r = onig_node_str_cat(*np, tok->backp, *src);\r
5106 if (r < 0) return r;\r
5107 }\r
5108\r
5109 string_end:\r
5110 targetp = np;\r
5111 goto repeat;\r
5112 }\r
5113 break;\r
5114\r
5115 case TK_RAW_BYTE:\r
5116 tk_raw_byte:\r
5117 {\r
5118 *np = node_new_str_raw_char((UChar )tok->u.c);\r
5119 CHECK_NULL_RETURN_MEMERR(*np);\r
5120 len = 1;\r
5121 while (1) {\r
5122 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r
5123 if (len == enclen(env->enc, NSTR(*np)->s)) {\r
5124 r = fetch_token(tok, src, end, env);\r
5125 NSTRING_CLEAR_RAW(*np);\r
5126 goto string_end;\r
5127 }\r
5128 }\r
5129\r
5130 r = fetch_token(tok, src, end, env);\r
5131 if (r < 0) return r;\r
5132 if (r != TK_RAW_BYTE) {\r
5133 /* Don't use this, it is wrong for little endian encodings. */\r
5134#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
5135 int rem;\r
5136 if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r
5137 rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r
5138 (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);\r
5139 if (len + rem == enclen(env->enc, NSTR(*np)->s)) {\r
5140 NSTRING_CLEAR_RAW(*np);\r
5141 goto string_end;\r
5142 }\r
5143 }\r
5144#endif\r
5145 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
5146 }\r
5147\r
5148 r = node_str_cat_char(*np, (UChar )tok->u.c);\r
5149 if (r < 0) return r;\r
5150\r
5151 len++;\r
5152 }\r
5153 }\r
5154 break;\r
5155\r
5156 case TK_CODE_POINT:\r
5157 {\r
5158 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
5159 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r
5160 if (num < 0) return num;\r
5161#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r
5162 *np = node_new_str_raw(buf, buf + num);\r
5163#else\r
5164 *np = node_new_str(buf, buf + num);\r
5165#endif\r
5166 CHECK_NULL_RETURN_MEMERR(*np);\r
5167 }\r
5168 break;\r
5169\r
5170 case TK_QUOTE_OPEN:\r
5171 {\r
5172 OnigCodePoint end_op[2];\r
5173 UChar *qstart, *qend, *nextp;\r
5174\r
5175 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r
5176 end_op[1] = (OnigCodePoint )'E';\r
5177 qstart = *src;\r
5178 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r
5179 if (IS_NULL(qend)) {\r
5180 nextp = qend = end;\r
5181 }\r
5182 *np = node_new_str(qstart, qend);\r
5183 CHECK_NULL_RETURN_MEMERR(*np);\r
5184 *src = nextp;\r
5185 }\r
5186 break;\r
5187\r
5188 case TK_CHAR_TYPE:\r
5189 {\r
5190 switch (tok->u.prop.ctype) {\r
5191 case ONIGENC_CTYPE_WORD:\r
5192 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);\r
5193 CHECK_NULL_RETURN_MEMERR(*np);\r
5194 break;\r
5195\r
5196 case ONIGENC_CTYPE_SPACE:\r
5197 case ONIGENC_CTYPE_DIGIT:\r
5198 case ONIGENC_CTYPE_XDIGIT:\r
5199 {\r
5200 CClassNode* cc;\r
5201\r
5202#ifdef USE_SHARED_CCLASS_TABLE\r
5203 const OnigCodePoint *mbr;\r
5204 OnigCodePoint sb_out;\r
5205\r
5206 r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,\r
5207 &sb_out, &mbr);\r
5208 if (r == 0 &&\r
5209 ONIGENC_CODE_RANGE_NUM(mbr)\r
5210 >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {\r
5211 type_cclass_key key;\r
5212 type_cclass_key* new_key;\r
5213\r
5214 key.enc = env->enc;\r
5215 key.not = tok->u.prop.not;\r
5216 key.type = tok->u.prop.ctype;\r
5217\r
5218 THREAD_ATOMIC_START;\r
5219\r
5220 if (IS_NULL(OnigTypeCClassTable)) {\r
5221 OnigTypeCClassTable\r
5222 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);\r
5223 if (IS_NULL(OnigTypeCClassTable)) {\r
5224 THREAD_ATOMIC_END;\r
5225 return ONIGERR_MEMORY;\r
5226 }\r
5227 }\r
5228 else {\r
4d454c54 5229 if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )(UINTN)&key,\r
14b0e578
CS
5230 (st_data_t* )np)) {\r
5231 THREAD_ATOMIC_END;\r
5232 break;\r
5233 }\r
5234 }\r
5235\r
5236 *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,\r
5237 sb_out, mbr);\r
5238 if (IS_NULL(*np)) {\r
5239 THREAD_ATOMIC_END;\r
5240 return ONIGERR_MEMORY;\r
5241 }\r
5242\r
5243 cc = NCCLASS(*np);\r
5244 NCCLASS_SET_SHARE(cc);\r
5245 new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));\r
b0c2b797 5246 CHECK_NULL_RETURN_MEMERR(new_key);\r
14b0e578 5247 xmemcpy(new_key, &key, sizeof(type_cclass_key));\r
4d454c54
LG
5248 onig_st_add_direct(OnigTypeCClassTable, (st_data_t )(UINTN)new_key,\r
5249 (st_data_t )(UINTN)*np);\r
14b0e578
CS
5250 \r
5251 THREAD_ATOMIC_END;\r
5252 }\r
5253 else {\r
5254#endif\r
5255 *np = node_new_cclass();\r
5256 CHECK_NULL_RETURN_MEMERR(*np);\r
5257 cc = NCCLASS(*np);\r
5258 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r
5259 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
5260#ifdef USE_SHARED_CCLASS_TABLE\r
5261 }\r
5262#endif\r
5263 }\r
5264 break;\r
5265\r
5266 default:\r
5267 return ONIGERR_PARSER_BUG;\r
5268 break;\r
5269 }\r
5270 }\r
5271 break;\r
5272\r
5273 case TK_CHAR_PROPERTY:\r
5274 r = parse_char_property(np, tok, src, end, env);\r
5275 if (r != 0) return r;\r
5276 break;\r
5277\r
5278 case TK_CC_OPEN:\r
5279 {\r
5280 CClassNode* cc;\r
5281\r
5282 r = parse_char_class(np, tok, src, end, env);\r
5283 if (r != 0) return r;\r
5284\r
5285 cc = NCCLASS(*np);\r
5286 if (IS_IGNORECASE(env->option)) {\r
5287 IApplyCaseFoldArg iarg;\r
5288\r
5289 iarg.env = env;\r
5290 iarg.cc = cc;\r
5291 iarg.alt_root = NULL_NODE;\r
5292 iarg.ptail = &(iarg.alt_root);\r
5293\r
5294 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r
5295 i_apply_case_fold, &iarg);\r
5296 if (r != 0) {\r
5297 onig_node_free(iarg.alt_root);\r
5298 return r;\r
5299 }\r
5300 if (IS_NOT_NULL(iarg.alt_root)) {\r
5301 Node* work = onig_node_new_alt(*np, iarg.alt_root);\r
5302 if (IS_NULL(work)) {\r
5303 onig_node_free(iarg.alt_root);\r
5304 return ONIGERR_MEMORY;\r
5305 }\r
5306 *np = work;\r
5307 }\r
5308 }\r
5309 }\r
5310 break;\r
5311\r
5312 case TK_ANYCHAR:\r
5313 *np = node_new_anychar();\r
5314 CHECK_NULL_RETURN_MEMERR(*np);\r
5315 break;\r
5316\r
5317 case TK_ANYCHAR_ANYTIME:\r
5318 *np = node_new_anychar();\r
5319 CHECK_NULL_RETURN_MEMERR(*np);\r
5320 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
5321 CHECK_NULL_RETURN_MEMERR(qn);\r
5322 NQTFR(qn)->target = *np;\r
5323 *np = qn;\r
5324 break;\r
5325\r
5326 case TK_BACKREF:\r
5327 len = tok->u.backref.num;\r
5328 *np = node_new_backref(len,\r
5329 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r
5330 tok->u.backref.by_name,\r
5331#ifdef USE_BACKREF_WITH_LEVEL\r
5332 tok->u.backref.exist_level,\r
5333 tok->u.backref.level,\r
5334#endif\r
5335 env);\r
5336 CHECK_NULL_RETURN_MEMERR(*np);\r
5337 break;\r
5338\r
5339#ifdef USE_SUBEXP_CALL\r
5340 case TK_CALL:\r
5341 {\r
5342 int gnum = tok->u.call.gnum;\r
5343\r
5344 if (gnum < 0) {\r
5345 gnum = BACKREF_REL_TO_ABS(gnum, env);\r
5346 if (gnum <= 0)\r
5347 return ONIGERR_INVALID_BACKREF;\r
5348 }\r
5349 *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);\r
5350 CHECK_NULL_RETURN_MEMERR(*np);\r
5351 env->num_call++;\r
5352 }\r
5353 break;\r
5354#endif\r
5355\r
5356 case TK_ANCHOR:\r
5357 *np = onig_node_new_anchor(tok->u.anchor);\r
b0c2b797 5358 CHECK_NULL_RETURN_MEMERR(*np);\r
14b0e578
CS
5359 break;\r
5360\r
5361 case TK_OP_REPEAT:\r
5362 case TK_INTERVAL:\r
5363 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r
5364 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r
5365 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r
5366 else\r
5367 *np = node_new_empty();\r
b0c2b797 5368 CHECK_NULL_RETURN_MEMERR(*np);\r
14b0e578
CS
5369 }\r
5370 else {\r
5371 goto tk_byte;\r
5372 }\r
5373 break;\r
5374\r
5375 default:\r
5376 return ONIGERR_PARSER_BUG;\r
5377 break;\r
5378 }\r
5379\r
5380 {\r
5381 targetp = np;\r
5382\r
5383 re_entry:\r
5384 r = fetch_token(tok, src, end, env);\r
5385 if (r < 0) return r;\r
5386\r
5387 repeat:\r
5388 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r
5389 if (is_invalid_quantifier_target(*targetp))\r
5390 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r
5391\r
5392 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r
5393 (r == TK_INTERVAL ? 1 : 0));\r
5394 CHECK_NULL_RETURN_MEMERR(qn);\r
5395 NQTFR(qn)->greedy = tok->u.repeat.greedy;\r
5396 r = set_quantifier(qn, *targetp, group, env);\r
5397 if (r < 0) {\r
5398 onig_node_free(qn);\r
5399 return r;\r
5400 }\r
5401\r
5402 if (tok->u.repeat.possessive != 0) {\r
5403 Node* en;\r
5404 en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
5405 if (IS_NULL(en)) {\r
5406 onig_node_free(qn);\r
5407 return ONIGERR_MEMORY;\r
5408 }\r
5409 NENCLOSE(en)->target = qn;\r
5410 qn = en;\r
5411 }\r
5412\r
5413 if (r == 0) {\r
5414 *targetp = qn;\r
5415 }\r
5416 else if (r == 1) {\r
5417 onig_node_free(qn);\r
5418 }\r
5419 else if (r == 2) { /* split case: /abc+/ */\r
5420 Node *tmp;\r
5421\r
5422 *targetp = node_new_list(*targetp, NULL);\r
5423 if (IS_NULL(*targetp)) {\r
5424 onig_node_free(qn);\r
5425 return ONIGERR_MEMORY;\r
5426 }\r
5427 tmp = NCDR(*targetp) = node_new_list(qn, NULL);\r
5428 if (IS_NULL(tmp)) {\r
5429 onig_node_free(qn);\r
5430 return ONIGERR_MEMORY;\r
5431 }\r
5432 targetp = &(NCAR(tmp));\r
5433 }\r
5434 goto re_entry;\r
5435 }\r
5436 }\r
5437\r
5438 return r;\r
5439}\r
5440\r
5441static int\r
5442parse_branch(Node** top, OnigToken* tok, int term,\r
5443 UChar** src, UChar* end, ScanEnv* env)\r
5444{\r
5445 int r;\r
5446 Node *node, **headp;\r
5447\r
5448 *top = NULL;\r
5449 r = parse_exp(&node, tok, term, src, end, env);\r
5450 if (r < 0) return r;\r
5451\r
5452 if (r == TK_EOT || r == term || r == TK_ALT) {\r
5453 *top = node;\r
5454 }\r
5455 else {\r
5456 *top = node_new_list(node, NULL);\r
b0c2b797 5457 CHECK_NULL_RETURN_MEMERR(*top);\r
14b0e578
CS
5458 headp = &(NCDR(*top));\r
5459 while (r != TK_EOT && r != term && r != TK_ALT) {\r
5460 r = parse_exp(&node, tok, term, src, end, env);\r
b0c2b797 5461 CHECK_NULL_RETURN_MEMERR(node);\r
14b0e578
CS
5462 if (r < 0) return r;\r
5463\r
5464 if (NTYPE(node) == NT_LIST) {\r
5465 *headp = node;\r
5466 while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);\r
5467 headp = &(NCDR(node));\r
5468 }\r
5469 else {\r
5470 *headp = node_new_list(node, NULL);\r
5471 headp = &(NCDR(*headp));\r
5472 }\r
5473 }\r
5474 }\r
5475\r
5476 return r;\r
5477}\r
5478\r
5479/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r
5480static int\r
5481parse_subexp(Node** top, OnigToken* tok, int term,\r
5482 UChar** src, UChar* end, ScanEnv* env)\r
5483{\r
5484 int r;\r
5485 Node *node, **headp;\r
5486\r
5487 *top = NULL;\r
5488 r = parse_branch(&node, tok, term, src, end, env);\r
5489 if (r < 0) {\r
5490 onig_node_free(node);\r
5491 return r;\r
5492 }\r
5493\r
5494 if (r == term) {\r
5495 *top = node;\r
5496 }\r
5497 else if (r == TK_ALT) {\r
5498 *top = onig_node_new_alt(node, NULL);\r
b0c2b797 5499 CHECK_NULL_RETURN_MEMERR(*top);\r
14b0e578
CS
5500 headp = &(NCDR(*top));\r
5501 while (r == TK_ALT) {\r
5502 r = fetch_token(tok, src, end, env);\r
5503 if (r < 0) return r;\r
5504 r = parse_branch(&node, tok, term, src, end, env);\r
5505 if (r < 0) return r;\r
5506\r
5507 *headp = onig_node_new_alt(node, NULL);\r
5508 headp = &(NCDR(*headp));\r
5509 }\r
5510\r
5511 if (tok->type != (enum TokenSyms )term)\r
5512 goto err;\r
5513 }\r
5514 else {\r
5515 err:\r
5516 if (term == TK_SUBEXP_CLOSE)\r
5517 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
5518 else\r
5519 return ONIGERR_PARSER_BUG;\r
5520 }\r
5521\r
5522 return r;\r
5523}\r
5524\r
5525static int\r
5526parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r
5527{\r
5528 int r;\r
5529 OnigToken tok;\r
5530\r
5531 r = fetch_token(&tok, src, end, env);\r
5532 if (r < 0) return r;\r
5533 r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r
5534 if (r < 0) return r;\r
5535 return 0;\r
5536}\r
5537\r
5538extern int\r
5539onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,\r
5540 regex_t* reg, ScanEnv* env)\r
5541{\r
5542 int r;\r
5543 UChar* p;\r
5544\r
5545#ifdef USE_NAMED_GROUP\r
5546 names_clear(reg);\r
5547#endif\r
5548\r
5549 scan_env_clear(env);\r
5550 env->option = reg->options;\r
5551 env->case_fold_flag = reg->case_fold_flag;\r
5552 env->enc = reg->enc;\r
5553 env->syntax = reg->syntax;\r
5554 env->pattern = (UChar* )pattern;\r
5555 env->pattern_end = (UChar* )end;\r
5556 env->reg = reg;\r
5557\r
5558 *root = NULL;\r
5559 p = (UChar* )pattern;\r
5560 r = parse_regexp(root, &p, (UChar* )end, env);\r
5561 reg->num_mem = env->num_mem;\r
5562 return r;\r
5563}\r
5564\r
5565extern void\r
5566onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r
5567 UChar* arg, UChar* arg_end)\r
5568{\r
5569 env->error = arg;\r
5570 env->error_end = arg_end;\r
5571}\r