]> git.proxmox.com Git - mirror_edk2.git/blame - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
MdeModulePkg: Regular expression protocol
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.c
CommitLineData
db3b92b4
CS
1/**********************************************************************\r
2 regparse.c - Oniguruma (regular expression library)\r
3**********************************************************************/\r
4/*-\r
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
6 * All rights reserved.\r
7 *\r
8 * Copyright (c) 2015, Hewlett Packard Enterprise Development, L.P.<BR>\r
9 *\r
10 * Redistribution and use in source and binary forms, with or without\r
11 * modification, are permitted provided that the following conditions\r
12 * are met:\r
13 * 1. Redistributions of source code must retain the above copyright\r
14 * notice, this list of conditions and the following disclaimer.\r
15 * 2. Redistributions in binary form must reproduce the above copyright\r
16 * notice, this list of conditions and the following disclaimer in the\r
17 * documentation and/or other materials provided with the distribution.\r
18 *\r
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r
29 * SUCH DAMAGE.\r
30 */\r
31\r
32#include "regparse.h"\r
33#include "st.h"\r
34\r
35#define WARN_BUFSIZE 256\r
36\r
37#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
38\r
39\r
40OnigSyntaxType OnigSyntaxRuby = {\r
41 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |\r
42 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |\r
43 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |\r
44 ONIG_SYN_OP_ESC_C_CONTROL )\r
45 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )\r
46 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |\r
47 ONIG_SYN_OP2_OPTION_RUBY |\r
48 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |\r
49 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |\r
50 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |\r
51 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |\r
52 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |\r
53 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |\r
54 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |\r
55 ONIG_SYN_OP2_ESC_H_XDIGIT )\r
56 , ( SYN_GNU_REGEX_BV | \r
57 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |\r
58 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |\r
59 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |\r
60 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |\r
61 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |\r
62 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |\r
63 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )\r
64 , ONIG_OPTION_NONE\r
65 ,\r
66 {\r
67 (OnigCodePoint )'\\' /* esc */\r
68 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */\r
69 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */\r
70 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */\r
71 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */\r
72 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */\r
73 }\r
74};\r
75\r
76OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;\r
77\r
78extern void onig_null_warn(const char* s ARG_UNUSED) { }\r
79\r
80#ifdef DEFAULT_WARN_FUNCTION\r
81static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;\r
82#else\r
83static OnigWarnFunc onig_warn = onig_null_warn;\r
84#endif\r
85\r
86#ifdef DEFAULT_VERB_WARN_FUNCTION\r
87static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;\r
88#else\r
89static OnigWarnFunc onig_verb_warn = onig_null_warn;\r
90#endif\r
91\r
92extern void onig_set_warn_func(OnigWarnFunc f)\r
93{\r
94 onig_warn = f;\r
95}\r
96\r
97extern void onig_set_verb_warn_func(OnigWarnFunc f)\r
98{\r
99 onig_verb_warn = f;\r
100}\r
101\r
102static void\r
103bbuf_free(BBuf* bbuf)\r
104{\r
105 if (IS_NOT_NULL(bbuf)) {\r
106 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);\r
107 xfree(bbuf);\r
108 }\r
109}\r
110\r
111static int\r
112bbuf_clone(BBuf** rto, BBuf* from)\r
113{\r
114 int r;\r
115 BBuf *to;\r
116\r
117 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));\r
118 CHECK_NULL_RETURN_MEMERR(to);\r
119 r = BBUF_INIT(to, from->alloc);\r
120 if (r != 0) return r;\r
121 to->used = from->used;\r
122 xmemcpy(to->p, from->p, from->used);\r
123 return 0;\r
124}\r
125\r
126#define BACKREF_REL_TO_ABS(rel_no, env) \\r
127 ((env)->num_mem + 1 + (rel_no))\r
128\r
129#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))\r
130\r
131#define MBCODE_START_POS(enc) \\r
132 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)\r
133\r
134#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \\r
135 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))\r
136\r
137#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\\r
138 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\\r
139 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\\r
140 if (r) return r;\\r
141 }\\r
142} while (0)\r
143\r
144\r
145#define BITSET_IS_EMPTY(bs,empty) do {\\r
146 int i;\\r
147 empty = 1;\\r
148 for (i = 0; i < (int )BITSET_SIZE; i++) {\\r
149 if ((bs)[i] != 0) {\\r
150 empty = 0; break;\\r
151 }\\r
152 }\\r
153} while (0)\r
154\r
155static void\r
156bitset_set_range(BitSetRef bs, int from, int to)\r
157{\r
158 int i;\r
159 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {\r
160 BITSET_SET_BIT(bs, i);\r
161 }\r
162}\r
163\r
164#if 0\r
165static void\r
166bitset_set_all(BitSetRef bs)\r
167{\r
168 int i;\r
169 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }\r
170}\r
171#endif\r
172\r
173static void\r
174bitset_invert(BitSetRef bs)\r
175{\r
176 int i;\r
177 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }\r
178}\r
179\r
180static void\r
181bitset_invert_to(BitSetRef from, BitSetRef to)\r
182{\r
183 int i;\r
184 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }\r
185}\r
186\r
187static void\r
188bitset_and(BitSetRef dest, BitSetRef bs)\r
189{\r
190 int i;\r
191 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }\r
192}\r
193\r
194static void\r
195bitset_or(BitSetRef dest, BitSetRef bs)\r
196{\r
197 int i;\r
198 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }\r
199}\r
200\r
201static void\r
202bitset_copy(BitSetRef dest, BitSetRef bs)\r
203{\r
204 int i;\r
205 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }\r
206}\r
207\r
208extern int\r
209onig_strncmp(const UChar* s1, const UChar* s2, int n)\r
210{\r
211 int x;\r
212\r
213 while (n-- > 0) {\r
214 x = *s2++ - *s1++;\r
215 if (x) return x;\r
216 }\r
217 return 0;\r
218}\r
219\r
220extern void\r
221onig_strcpy(UChar* dest, const UChar* src, const UChar* end)\r
222{\r
223 int len = (int)(end - src);\r
224 if (len > 0) {\r
225 xmemcpy(dest, src, len);\r
226 dest[len] = (UChar )0;\r
227 }\r
228}\r
229\r
230#ifdef USE_NAMED_GROUP\r
231static UChar*\r
232strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)\r
233{\r
234 int slen, term_len, i;\r
235 UChar *r;\r
236\r
237 slen = (int)(end - s);\r
238 term_len = ONIGENC_MBC_MINLEN(enc);\r
239\r
240 r = (UChar* )xmalloc(slen + term_len);\r
241 CHECK_NULL_RETURN(r);\r
242 xmemcpy(r, s, slen);\r
243\r
244 for (i = 0; i < term_len; i++)\r
245 r[slen + i] = (UChar )0;\r
246\r
247 return r;\r
248}\r
249#endif\r
250\r
251/* scan pattern methods */\r
252#define PEND_VALUE 0\r
253\r
254#define PFETCH_READY UChar* pfetch_prev\r
255#define PEND (p < end ? 0 : 1)\r
256#define PUNFETCH p = pfetch_prev\r
257#define PINC do { \\r
258 pfetch_prev = p; \\r
259 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
260} while (0)\r
261#define PFETCH(c) do { \\r
262 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
263 pfetch_prev = p; \\r
264 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
265} while (0)\r
266\r
267#define PINC_S do { \\r
268 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
269} while (0)\r
270#define PFETCH_S(c) do { \\r
271 c = ONIGENC_MBC_TO_CODE(enc, p, end); \\r
272 p += ONIGENC_MBC_ENC_LEN(enc, p); \\r
273} while (0)\r
274\r
275#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)\r
276#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)\r
277\r
278static UChar*\r
279strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,\r
280 int capa, int oldCapa)\r
281{\r
282 UChar* r;\r
283\r
284 if (dest)\r
285 r = (UChar* )xrealloc(dest, capa + 1, oldCapa);\r
286 else\r
287 r = (UChar* )xmalloc(capa + 1);\r
288\r
289 CHECK_NULL_RETURN(r);\r
290 onig_strcpy(r + (dest_end - dest), src, src_end);\r
291 return r;\r
292}\r
293\r
294/* dest on static area */\r
295static UChar*\r
296strcat_capa_from_static(UChar* dest, UChar* dest_end,\r
297 const UChar* src, const UChar* src_end, int capa)\r
298{\r
299 UChar* r;\r
300\r
301 r = (UChar* )xmalloc(capa + 1);\r
302 CHECK_NULL_RETURN(r);\r
303 onig_strcpy(r, dest, dest_end);\r
304 onig_strcpy(r + (dest_end - dest), src, src_end);\r
305 return r;\r
306}\r
307\r
308\r
309#ifdef USE_ST_LIBRARY\r
310\r
311typedef struct {\r
312 UChar* s;\r
313 UChar* end;\r
314} st_str_end_key;\r
315\r
316static int\r
317str_end_cmp(st_str_end_key* x, st_str_end_key* y)\r
318{\r
319 UChar *p, *q;\r
320 int c;\r
321\r
322 if ((x->end - x->s) != (y->end - y->s))\r
323 return 1;\r
324\r
325 p = x->s;\r
326 q = y->s;\r
327 while (p < x->end) {\r
328 c = (int )*p - (int )*q;\r
329 if (c != 0) return c;\r
330\r
331 p++; q++;\r
332 }\r
333\r
334 return 0;\r
335}\r
336\r
337static int\r
338str_end_hash(st_str_end_key* x)\r
339{\r
340 UChar *p;\r
341 int val = 0;\r
342\r
343 p = x->s;\r
344 while (p < x->end) {\r
345 val = val * 997 + (int )*p++;\r
346 }\r
347\r
348 return val + (val >> 5);\r
349}\r
350\r
351extern hash_table_type*\r
352onig_st_init_strend_table_with_size(int size)\r
353{\r
354 static struct st_hash_type hashType = {\r
355 str_end_cmp,\r
356 str_end_hash,\r
357 };\r
358\r
359 return (hash_table_type* )\r
360 onig_st_init_table_with_size(&hashType, size);\r
361}\r
362\r
363extern int\r
364onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,\r
365 const UChar* end_key, hash_data_type *value)\r
366{\r
367 st_str_end_key key;\r
368\r
369 key.s = (UChar* )str_key;\r
370 key.end = (UChar* )end_key;\r
371\r
372 return onig_st_lookup(table, (st_data_t )(&key), value);\r
373}\r
374\r
375extern int\r
376onig_st_insert_strend(hash_table_type* table, const UChar* str_key,\r
377 const UChar* end_key, hash_data_type value)\r
378{\r
379 st_str_end_key* key;\r
380 int result;\r
381\r
382 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));\r
383 key->s = (UChar* )str_key;\r
384 key->end = (UChar* )end_key;\r
385 result = onig_st_insert(table, (st_data_t )key, value);\r
386 if (result) {\r
387 xfree(key);\r
388 }\r
389 return result;\r
390}\r
391\r
392#endif /* USE_ST_LIBRARY */\r
393\r
394\r
395#ifdef USE_NAMED_GROUP\r
396\r
397#define INIT_NAME_BACKREFS_ALLOC_NUM 8\r
398\r
399typedef struct {\r
400 UChar* name;\r
401 int name_len; /* byte length */\r
402 int back_num; /* number of backrefs */\r
403 int back_alloc;\r
404 int back_ref1;\r
405 int* back_refs;\r
406} NameEntry;\r
407\r
408#ifdef USE_ST_LIBRARY\r
409\r
410typedef st_table NameTable;\r
411typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */\r
412\r
413#define NAMEBUF_SIZE 24\r
414#define NAMEBUF_SIZE_1 25\r
415\r
416#ifdef ONIG_DEBUG\r
417static int\r
418i_print_name_entry(UChar* key, NameEntry* e, void* arg)\r
419{\r
420 int i;\r
421 FILE* fp = (FILE* )arg;\r
422\r
423 fprintf(fp, "%s: ", e->name);\r
424 if (e->back_num == 0)\r
425 fputs("-", fp);\r
426 else if (e->back_num == 1)\r
427 fprintf(fp, "%d", e->back_ref1);\r
428 else {\r
429 for (i = 0; i < e->back_num; i++) {\r
430 if (i > 0) fprintf(fp, ", ");\r
431 fprintf(fp, "%d", e->back_refs[i]);\r
432 }\r
433 }\r
434 fputs("\n", fp);\r
435 return ST_CONTINUE;\r
436}\r
437\r
438extern int\r
439onig_print_names(FILE* fp, regex_t* reg)\r
440{\r
441 NameTable* t = (NameTable* )reg->name_table;\r
442\r
443 if (IS_NOT_NULL(t)) {\r
444 fprintf(fp, "name table\n");\r
445 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);\r
446 fputs("\n", fp);\r
447 }\r
448 return 0;\r
449}\r
450#endif /* ONIG_DEBUG */\r
451\r
452static int\r
453i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)\r
454{\r
455 xfree(e->name);\r
456 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
457 xfree(key);\r
458 xfree(e);\r
459 return ST_DELETE;\r
460}\r
461\r
462static int\r
463names_clear(regex_t* reg)\r
464{\r
465 NameTable* t = (NameTable* )reg->name_table;\r
466\r
467 if (IS_NOT_NULL(t)) {\r
468 onig_st_foreach(t, i_free_name_entry, 0);\r
469 }\r
470 return 0;\r
471}\r
472\r
473extern int\r
474onig_names_free(regex_t* reg)\r
475{\r
476 int r;\r
477 NameTable* t;\r
478\r
479 r = names_clear(reg);\r
480 if (r) return r;\r
481\r
482 t = (NameTable* )reg->name_table;\r
483 if (IS_NOT_NULL(t)) onig_st_free_table(t);\r
484 reg->name_table = (void* )NULL;\r
485 return 0;\r
486}\r
487\r
488static NameEntry*\r
489name_find(regex_t* reg, const UChar* name, const UChar* name_end)\r
490{\r
491 NameEntry* e;\r
492 NameTable* t = (NameTable* )reg->name_table;\r
493\r
494 e = (NameEntry* )NULL;\r
495 if (IS_NOT_NULL(t)) {\r
496 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));\r
497 }\r
498 return e;\r
499}\r
500\r
501typedef struct {\r
502 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);\r
503 regex_t* reg;\r
504 void* arg;\r
505 int ret;\r
506 OnigEncoding enc;\r
507} INamesArg;\r
508\r
509static int\r
510i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)\r
511{\r
512 int r = (*(arg->func))(e->name,\r
513 e->name + e->name_len,\r
514 e->back_num,\r
515 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
516 arg->reg, arg->arg);\r
517 if (r != 0) {\r
518 arg->ret = r;\r
519 return ST_STOP;\r
520 }\r
521 return ST_CONTINUE;\r
522}\r
523\r
524extern int\r
525onig_foreach_name(regex_t* reg,\r
526 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
527{\r
528 INamesArg narg;\r
529 NameTable* t = (NameTable* )reg->name_table;\r
530\r
531 narg.ret = 0;\r
532 if (IS_NOT_NULL(t)) {\r
533 narg.func = func;\r
534 narg.reg = reg;\r
535 narg.arg = arg;\r
536 narg.enc = reg->enc; /* should be pattern encoding. */\r
537 onig_st_foreach(t, i_names, (HashDataType )&narg);\r
538 }\r
539 return narg.ret;\r
540}\r
541\r
542static int\r
543i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)\r
544{\r
545 int i;\r
546\r
547 if (e->back_num > 1) {\r
548 for (i = 0; i < e->back_num; i++) {\r
549 e->back_refs[i] = map[e->back_refs[i]].new_val;\r
550 }\r
551 }\r
552 else if (e->back_num == 1) {\r
553 e->back_ref1 = map[e->back_ref1].new_val;\r
554 }\r
555\r
556 return ST_CONTINUE;\r
557}\r
558\r
559extern int\r
560onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)\r
561{\r
562 NameTable* t = (NameTable* )reg->name_table;\r
563\r
564 if (IS_NOT_NULL(t)) {\r
565 onig_st_foreach(t, i_renumber_name, (HashDataType )map);\r
566 }\r
567 return 0;\r
568}\r
569\r
570\r
571extern int\r
572onig_number_of_names(regex_t* reg)\r
573{\r
574 NameTable* t = (NameTable* )reg->name_table;\r
575\r
576 if (IS_NOT_NULL(t))\r
577 return t->num_entries;\r
578 else\r
579 return 0;\r
580}\r
581\r
582#else /* USE_ST_LIBRARY */\r
583\r
584#define INIT_NAMES_ALLOC_NUM 8\r
585\r
586typedef struct {\r
587 NameEntry* e;\r
588 int num;\r
589 int alloc;\r
590} NameTable;\r
591\r
592#ifdef ONIG_DEBUG\r
593extern int\r
594onig_print_names(FILE* fp, regex_t* reg)\r
595{\r
596 int i, j;\r
597 NameEntry* e;\r
598 NameTable* t = (NameTable* )reg->name_table;\r
599\r
600 if (IS_NOT_NULL(t) && t->num > 0) {\r
601 fprintf(fp, "name table\n");\r
602 for (i = 0; i < t->num; i++) {\r
603 e = &(t->e[i]);\r
604 fprintf(fp, "%s: ", e->name);\r
605 if (e->back_num == 0) {\r
606 fputs("-", fp);\r
607 }\r
608 else if (e->back_num == 1) {\r
609 fprintf(fp, "%d", e->back_ref1);\r
610 }\r
611 else {\r
612 for (j = 0; j < e->back_num; j++) {\r
613 if (j > 0) fprintf(fp, ", ");\r
614 fprintf(fp, "%d", e->back_refs[j]);\r
615 }\r
616 }\r
617 fputs("\n", fp);\r
618 }\r
619 fputs("\n", fp);\r
620 }\r
621 return 0;\r
622}\r
623#endif\r
624\r
625static int\r
626names_clear(regex_t* reg)\r
627{\r
628 int i;\r
629 NameEntry* e;\r
630 NameTable* t = (NameTable* )reg->name_table;\r
631\r
632 if (IS_NOT_NULL(t)) {\r
633 for (i = 0; i < t->num; i++) {\r
634 e = &(t->e[i]);\r
635 if (IS_NOT_NULL(e->name)) {\r
636 xfree(e->name);\r
637 e->name = NULL;\r
638 e->name_len = 0;\r
639 e->back_num = 0;\r
640 e->back_alloc = 0;\r
641 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);\r
642 e->back_refs = (int* )NULL;\r
643 }\r
644 }\r
645 if (IS_NOT_NULL(t->e)) {\r
646 xfree(t->e);\r
647 t->e = NULL;\r
648 }\r
649 t->num = 0;\r
650 }\r
651 return 0;\r
652}\r
653\r
654extern int\r
655onig_names_free(regex_t* reg)\r
656{\r
657 int r;\r
658 NameTable* t;\r
659\r
660 r = names_clear(reg);\r
661 if (r) return r;\r
662\r
663 t = (NameTable* )reg->name_table;\r
664 if (IS_NOT_NULL(t)) xfree(t);\r
665 reg->name_table = NULL;\r
666 return 0;\r
667}\r
668\r
669static NameEntry*\r
670name_find(regex_t* reg, UChar* name, UChar* name_end)\r
671{\r
672 int i, len;\r
673 NameEntry* e;\r
674 NameTable* t = (NameTable* )reg->name_table;\r
675\r
676 if (IS_NOT_NULL(t)) {\r
677 len = name_end - name;\r
678 for (i = 0; i < t->num; i++) {\r
679 e = &(t->e[i]);\r
680 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)\r
681 return e;\r
682 }\r
683 }\r
684 return (NameEntry* )NULL;\r
685}\r
686\r
687extern int\r
688onig_foreach_name(regex_t* reg,\r
689 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
690{\r
691 int i, r;\r
692 NameEntry* e;\r
693 NameTable* t = (NameTable* )reg->name_table;\r
694\r
695 if (IS_NOT_NULL(t)) {\r
696 for (i = 0; i < t->num; i++) {\r
697 e = &(t->e[i]);\r
698 r = (*func)(e->name, e->name + e->name_len, e->back_num,\r
699 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),\r
700 reg, arg);\r
701 if (r != 0) return r;\r
702 }\r
703 }\r
704 return 0;\r
705}\r
706\r
707extern int\r
708onig_number_of_names(regex_t* reg)\r
709{\r
710 NameTable* t = (NameTable* )reg->name_table;\r
711\r
712 if (IS_NOT_NULL(t))\r
713 return t->num;\r
714 else\r
715 return 0;\r
716}\r
717\r
718#endif /* else USE_ST_LIBRARY */\r
719\r
720static int\r
721name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)\r
722{\r
723 int alloc;\r
724 NameEntry* e;\r
725 NameTable* t = (NameTable* )reg->name_table;\r
726\r
727 if (name_end - name <= 0)\r
728 return ONIGERR_EMPTY_GROUP_NAME;\r
729\r
730 e = name_find(reg, name, name_end);\r
731 if (IS_NULL(e)) {\r
732#ifdef USE_ST_LIBRARY\r
733 if (IS_NULL(t)) {\r
734 t = onig_st_init_strend_table_with_size(5);\r
735 reg->name_table = (void* )t;\r
736 }\r
737 e = (NameEntry* )xmalloc(sizeof(NameEntry));\r
738 CHECK_NULL_RETURN_MEMERR(e);\r
739\r
740 e->name = strdup_with_null(reg->enc, name, name_end);\r
741 if (IS_NULL(e->name)) {\r
742 xfree(e); return ONIGERR_MEMORY;\r
743 }\r
744 onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),\r
745 (HashDataType )e);\r
746\r
747 e->name_len = (int)(name_end - name);\r
748 e->back_num = 0;\r
749 e->back_alloc = 0;\r
750 e->back_refs = (int* )NULL;\r
751\r
752#else\r
753\r
754 if (IS_NULL(t)) {\r
755 alloc = INIT_NAMES_ALLOC_NUM;\r
756 t = (NameTable* )xmalloc(sizeof(NameTable));\r
757 CHECK_NULL_RETURN_MEMERR(t);\r
758 t->e = NULL;\r
759 t->alloc = 0;\r
760 t->num = 0;\r
761\r
762 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);\r
763 if (IS_NULL(t->e)) {\r
764 xfree(t);\r
765 return ONIGERR_MEMORY;\r
766 }\r
767 t->alloc = alloc;\r
768 reg->name_table = t;\r
769 goto clear;\r
770 }\r
771 else if (t->num == t->alloc) {\r
772 int i;\r
773\r
774 alloc = t->alloc * 2;\r
775 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);\r
776 CHECK_NULL_RETURN_MEMERR(t->e);\r
777 t->alloc = alloc;\r
778\r
779 clear:\r
780 for (i = t->num; i < t->alloc; i++) {\r
781 t->e[i].name = NULL;\r
782 t->e[i].name_len = 0;\r
783 t->e[i].back_num = 0;\r
784 t->e[i].back_alloc = 0;\r
785 t->e[i].back_refs = (int* )NULL;\r
786 }\r
787 }\r
788 e = &(t->e[t->num]);\r
789 t->num++;\r
790 e->name = strdup_with_null(reg->enc, name, name_end);\r
791 if (IS_NULL(e->name)) return ONIGERR_MEMORY;\r
792 e->name_len = name_end - name;\r
793#endif\r
794 }\r
795\r
796 if (e->back_num >= 1 &&\r
797 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {\r
798 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,\r
799 name, name_end);\r
800 return ONIGERR_MULTIPLEX_DEFINED_NAME;\r
801 }\r
802\r
803 e->back_num++;\r
804 if (e->back_num == 1) {\r
805 e->back_ref1 = backref;\r
806 }\r
807 else {\r
808 if (e->back_num == 2) {\r
809 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;\r
810 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);\r
811 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
812 e->back_alloc = alloc;\r
813 e->back_refs[0] = e->back_ref1;\r
814 e->back_refs[1] = backref;\r
815 }\r
816 else {\r
817 if (e->back_num > e->back_alloc) {\r
818 alloc = e->back_alloc * 2;\r
819 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);\r
820 CHECK_NULL_RETURN_MEMERR(e->back_refs);\r
821 e->back_alloc = alloc;\r
822 }\r
823 e->back_refs[e->back_num - 1] = backref;\r
824 }\r
825 }\r
826\r
827 return 0;\r
828}\r
829\r
830extern int\r
831onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
832 const UChar* name_end, int** nums)\r
833{\r
834 NameEntry* e = name_find(reg, name, name_end);\r
835\r
836 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
837\r
838 switch (e->back_num) {\r
839 case 0:\r
840 break;\r
841 case 1:\r
842 *nums = &(e->back_ref1);\r
843 break;\r
844 default:\r
845 *nums = e->back_refs;\r
846 break;\r
847 }\r
848 return e->back_num;\r
849}\r
850\r
851extern int\r
852onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
853 const UChar* name_end, OnigRegion *region)\r
854{\r
855 int i, n, *nums;\r
856\r
857 n = onig_name_to_group_numbers(reg, name, name_end, &nums);\r
858 if (n < 0)\r
859 return n;\r
860 else if (n == 0)\r
861 return ONIGERR_PARSER_BUG;\r
862 else if (n == 1)\r
863 return nums[0];\r
864 else {\r
865 if (IS_NOT_NULL(region)) {\r
866 for (i = n - 1; i >= 0; i--) {\r
867 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)\r
868 return nums[i];\r
869 }\r
870 }\r
871 return nums[n - 1];\r
872 }\r
873}\r
874\r
875#else /* USE_NAMED_GROUP */\r
876\r
877extern int\r
878onig_name_to_group_numbers(regex_t* reg, const UChar* name,\r
879 const UChar* name_end, int** nums)\r
880{\r
881 return ONIG_NO_SUPPORT_CONFIG;\r
882}\r
883\r
884extern int\r
885onig_name_to_backref_number(regex_t* reg, const UChar* name,\r
886 const UChar* name_end, OnigRegion* region)\r
887{\r
888 return ONIG_NO_SUPPORT_CONFIG;\r
889}\r
890\r
891extern int\r
892onig_foreach_name(regex_t* reg,\r
893 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)\r
894{\r
895 return ONIG_NO_SUPPORT_CONFIG;\r
896}\r
897\r
898extern int\r
899onig_number_of_names(regex_t* reg)\r
900{\r
901 return 0;\r
902}\r
903#endif /* else USE_NAMED_GROUP */\r
904\r
905extern int\r
906onig_noname_group_capture_is_active(regex_t* reg)\r
907{\r
908 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
909 return 0;\r
910\r
911#ifdef USE_NAMED_GROUP\r
912 if (onig_number_of_names(reg) > 0 &&\r
913 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&\r
914 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {\r
915 return 0;\r
916 }\r
917#endif\r
918\r
919 return 1;\r
920}\r
921\r
922\r
923#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16\r
924\r
925static void\r
926scan_env_clear(ScanEnv* env)\r
927{\r
928 int i;\r
929\r
930 BIT_STATUS_CLEAR(env->capture_history);\r
931 BIT_STATUS_CLEAR(env->bt_mem_start);\r
932 BIT_STATUS_CLEAR(env->bt_mem_end);\r
933 BIT_STATUS_CLEAR(env->backrefed_mem);\r
934 env->error = (UChar* )NULL;\r
935 env->error_end = (UChar* )NULL;\r
936 env->num_call = 0;\r
937 env->num_mem = 0;\r
938#ifdef USE_NAMED_GROUP\r
939 env->num_named = 0;\r
940#endif\r
941 env->mem_alloc = 0;\r
942 env->mem_nodes_dynamic = (Node** )NULL;\r
943\r
944 for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)\r
945 env->mem_nodes_static[i] = NULL_NODE;\r
946\r
947#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
948 env->num_comb_exp_check = 0;\r
949 env->comb_exp_max_regnum = 0;\r
950 env->curr_max_regnum = 0;\r
951 env->has_recursion = 0;\r
952#endif\r
953}\r
954\r
955static int\r
956scan_env_add_mem_entry(ScanEnv* env)\r
957{\r
958 int i, need, alloc;\r
959 Node** p;\r
960\r
961 need = env->num_mem + 1;\r
962 if (need >= SCANENV_MEMNODES_SIZE) {\r
963 if (env->mem_alloc <= need) {\r
964 if (IS_NULL(env->mem_nodes_dynamic)) {\r
965 alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;\r
966 p = (Node** )xmalloc(sizeof(Node*) * alloc);\r
967 xmemcpy(p, env->mem_nodes_static,\r
968 sizeof(Node*) * SCANENV_MEMNODES_SIZE);\r
969 }\r
970 else {\r
971 alloc = env->mem_alloc * 2;\r
972 p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc, sizeof(Node*) * env->mem_alloc);\r
973 }\r
974 CHECK_NULL_RETURN_MEMERR(p);\r
975\r
976 for (i = env->num_mem + 1; i < alloc; i++)\r
977 p[i] = NULL_NODE;\r
978\r
979 env->mem_nodes_dynamic = p;\r
980 env->mem_alloc = alloc;\r
981 }\r
982 }\r
983\r
984 env->num_mem++;\r
985 return env->num_mem;\r
986}\r
987\r
988static int\r
989scan_env_set_mem_node(ScanEnv* env, int num, Node* node)\r
990{\r
991 if (env->num_mem >= num)\r
992 SCANENV_MEM_NODES(env)[num] = node;\r
993 else\r
994 return ONIGERR_PARSER_BUG;\r
995 return 0;\r
996}\r
997\r
998\r
999#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1000typedef struct _FreeNode {\r
1001 struct _FreeNode* next;\r
1002} FreeNode;\r
1003\r
1004static FreeNode* FreeNodeList = (FreeNode* )NULL;\r
1005#endif\r
1006\r
1007extern void\r
1008onig_node_free(Node* node)\r
1009{\r
1010 start:\r
1011 if (IS_NULL(node)) return ;\r
1012\r
1013 switch (NTYPE(node)) {\r
1014 case NT_STR:\r
1015 if (NSTR(node)->capa != 0 &&\r
1016 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {\r
1017 xfree(NSTR(node)->s);\r
1018 }\r
1019 break;\r
1020\r
1021 case NT_LIST:\r
1022 case NT_ALT:\r
1023 onig_node_free(NCAR(node));\r
1024 {\r
1025 Node* next_node = NCDR(node);\r
1026\r
1027#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1028 {\r
1029 FreeNode* n = (FreeNode* )node;\r
1030\r
1031 THREAD_ATOMIC_START;\r
1032 n->next = FreeNodeList;\r
1033 FreeNodeList = n;\r
1034 THREAD_ATOMIC_END;\r
1035 }\r
1036#else\r
1037 xfree(node);\r
1038#endif\r
1039 node = next_node;\r
1040 goto start;\r
1041 }\r
1042 break;\r
1043\r
1044 case NT_CCLASS:\r
1045 {\r
1046 CClassNode* cc = NCCLASS(node);\r
1047\r
1048 if (IS_NCCLASS_SHARE(cc)) return ;\r
1049 if (cc->mbuf)\r
1050 bbuf_free(cc->mbuf);\r
1051 }\r
1052 break;\r
1053\r
1054 case NT_QTFR:\r
1055 if (NQTFR(node)->target)\r
1056 onig_node_free(NQTFR(node)->target);\r
1057 break;\r
1058\r
1059 case NT_ENCLOSE:\r
1060 if (NENCLOSE(node)->target)\r
1061 onig_node_free(NENCLOSE(node)->target);\r
1062 break;\r
1063\r
1064 case NT_BREF:\r
1065 if (IS_NOT_NULL(NBREF(node)->back_dynamic))\r
1066 xfree(NBREF(node)->back_dynamic);\r
1067 break;\r
1068\r
1069 case NT_ANCHOR:\r
1070 if (NANCHOR(node)->target)\r
1071 onig_node_free(NANCHOR(node)->target);\r
1072 break;\r
1073 }\r
1074\r
1075#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1076 {\r
1077 FreeNode* n = (FreeNode* )node;\r
1078\r
1079 THREAD_ATOMIC_START;\r
1080 n->next = FreeNodeList;\r
1081 FreeNodeList = n;\r
1082 THREAD_ATOMIC_END;\r
1083 }\r
1084#else\r
1085 xfree(node);\r
1086#endif\r
1087}\r
1088\r
1089#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1090extern int\r
1091onig_free_node_list(void)\r
1092{\r
1093 FreeNode* n;\r
1094\r
1095 /* THREAD_ATOMIC_START; */\r
1096 while (IS_NOT_NULL(FreeNodeList)) {\r
1097 n = FreeNodeList;\r
1098 FreeNodeList = FreeNodeList->next;\r
1099 xfree(n);\r
1100 }\r
1101 /* THREAD_ATOMIC_END; */\r
1102 return 0;\r
1103}\r
1104#endif\r
1105\r
1106static Node*\r
1107node_new(void)\r
1108{\r
1109 Node* node;\r
1110\r
1111#ifdef USE_PARSE_TREE_NODE_RECYCLE\r
1112 THREAD_ATOMIC_START;\r
1113 if (IS_NOT_NULL(FreeNodeList)) {\r
1114 node = (Node* )FreeNodeList;\r
1115 FreeNodeList = FreeNodeList->next;\r
1116 THREAD_ATOMIC_END;\r
1117 return node;\r
1118 }\r
1119 THREAD_ATOMIC_END;\r
1120#endif\r
1121\r
1122 node = (Node* )xmalloc(sizeof(Node));\r
1123 /* xmemset(node, 0, sizeof(Node)); */\r
1124 return node;\r
1125}\r
1126\r
1127\r
1128static void\r
1129initialize_cclass(CClassNode* cc)\r
1130{\r
1131 BITSET_CLEAR(cc->bs);\r
1132 /* cc->base.flags = 0; */\r
1133 cc->flags = 0;\r
1134 cc->mbuf = NULL;\r
1135}\r
1136\r
1137static Node*\r
1138node_new_cclass(void)\r
1139{\r
1140 Node* node = node_new();\r
1141 CHECK_NULL_RETURN(node);\r
1142\r
1143 SET_NTYPE(node, NT_CCLASS);\r
1144 initialize_cclass(NCCLASS(node));\r
1145 return node;\r
1146}\r
1147\r
1148static Node*\r
1149node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,\r
1150 const OnigCodePoint ranges[])\r
1151{\r
1152 int n, i;\r
1153 CClassNode* cc;\r
1154 OnigCodePoint j;\r
1155\r
1156 Node* node = node_new_cclass();\r
1157 CHECK_NULL_RETURN(node);\r
1158\r
1159 cc = NCCLASS(node);\r
1160 if (not != 0) NCCLASS_SET_NOT(cc);\r
1161\r
1162 BITSET_CLEAR(cc->bs);\r
1163 if (sb_out > 0 && IS_NOT_NULL(ranges)) {\r
1164 n = ONIGENC_CODE_RANGE_NUM(ranges);\r
1165 for (i = 0; i < n; i++) {\r
1166 for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);\r
1167 j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {\r
1168 if (j >= sb_out) goto sb_end;\r
1169\r
1170 BITSET_SET_BIT(cc->bs, j);\r
1171 }\r
1172 }\r
1173 }\r
1174\r
1175 sb_end:\r
1176 if (IS_NULL(ranges)) {\r
1177 is_null:\r
1178 cc->mbuf = NULL;\r
1179 }\r
1180 else {\r
1181 BBuf* bbuf;\r
1182\r
1183 n = ONIGENC_CODE_RANGE_NUM(ranges);\r
1184 if (n == 0) goto is_null;\r
1185\r
1186 bbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
1187 CHECK_NULL_RETURN(bbuf);\r
1188 bbuf->alloc = n + 1;\r
1189 bbuf->used = n + 1;\r
1190 bbuf->p = (UChar* )((void* )ranges);\r
1191\r
1192 cc->mbuf = bbuf;\r
1193 }\r
1194\r
1195 return node;\r
1196}\r
1197\r
1198static Node*\r
1199node_new_ctype(int type, int not)\r
1200{\r
1201 Node* node = node_new();\r
1202 CHECK_NULL_RETURN(node);\r
1203\r
1204 SET_NTYPE(node, NT_CTYPE);\r
1205 NCTYPE(node)->ctype = type;\r
1206 NCTYPE(node)->not = not;\r
1207 return node;\r
1208}\r
1209\r
1210static Node*\r
1211node_new_anychar(void)\r
1212{\r
1213 Node* node = node_new();\r
1214 CHECK_NULL_RETURN(node);\r
1215\r
1216 SET_NTYPE(node, NT_CANY);\r
1217 return node;\r
1218}\r
1219\r
1220static Node*\r
1221node_new_list(Node* left, Node* right)\r
1222{\r
1223 Node* node = node_new();\r
1224 CHECK_NULL_RETURN(node);\r
1225\r
1226 SET_NTYPE(node, NT_LIST);\r
1227 NCAR(node) = left;\r
1228 NCDR(node) = right;\r
1229 return node;\r
1230}\r
1231\r
1232extern Node*\r
1233onig_node_new_list(Node* left, Node* right)\r
1234{\r
1235 return node_new_list(left, right);\r
1236}\r
1237\r
1238extern Node*\r
1239onig_node_list_add(Node* list, Node* x)\r
1240{\r
1241 Node *n;\r
1242\r
1243 n = onig_node_new_list(x, NULL);\r
1244 if (IS_NULL(n)) return NULL_NODE;\r
1245\r
1246 if (IS_NOT_NULL(list)) {\r
1247 while (IS_NOT_NULL(NCDR(list)))\r
1248 list = NCDR(list);\r
1249\r
1250 NCDR(list) = n;\r
1251 }\r
1252\r
1253 return n;\r
1254}\r
1255\r
1256extern Node*\r
1257onig_node_new_alt(Node* left, Node* right)\r
1258{\r
1259 Node* node = node_new();\r
1260 CHECK_NULL_RETURN(node);\r
1261\r
1262 SET_NTYPE(node, NT_ALT);\r
1263 NCAR(node) = left;\r
1264 NCDR(node) = right;\r
1265 return node;\r
1266}\r
1267\r
1268extern Node*\r
1269onig_node_new_anchor(int type)\r
1270{\r
1271 Node* node = node_new();\r
1272 CHECK_NULL_RETURN(node);\r
1273\r
1274 SET_NTYPE(node, NT_ANCHOR);\r
1275 NANCHOR(node)->type = type;\r
1276 NANCHOR(node)->target = NULL;\r
1277 NANCHOR(node)->char_len = -1;\r
1278 return node;\r
1279}\r
1280\r
1281static Node*\r
1282node_new_backref(int back_num, int* backrefs, int by_name,\r
1283#ifdef USE_BACKREF_WITH_LEVEL\r
1284 int exist_level, int nest_level,\r
1285#endif\r
1286 ScanEnv* env)\r
1287{\r
1288 int i;\r
1289 Node* node = node_new();\r
1290\r
1291 CHECK_NULL_RETURN(node);\r
1292\r
1293 SET_NTYPE(node, NT_BREF);\r
1294 NBREF(node)->state = 0;\r
1295 NBREF(node)->back_num = back_num;\r
1296 NBREF(node)->back_dynamic = (int* )NULL;\r
1297 if (by_name != 0)\r
1298 NBREF(node)->state |= NST_NAME_REF;\r
1299\r
1300#ifdef USE_BACKREF_WITH_LEVEL\r
1301 if (exist_level != 0) {\r
1302 NBREF(node)->state |= NST_NEST_LEVEL;\r
1303 NBREF(node)->nest_level = nest_level;\r
1304 }\r
1305#endif\r
1306\r
1307 for (i = 0; i < back_num; i++) {\r
1308 if (backrefs[i] <= env->num_mem &&\r
1309 IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {\r
1310 NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */\r
1311 break;\r
1312 }\r
1313 }\r
1314\r
1315 if (back_num <= NODE_BACKREFS_SIZE) {\r
1316 for (i = 0; i < back_num; i++)\r
1317 NBREF(node)->back_static[i] = backrefs[i];\r
1318 }\r
1319 else {\r
1320 int* p = (int* )xmalloc(sizeof(int) * back_num);\r
1321 if (IS_NULL(p)) {\r
1322 onig_node_free(node);\r
1323 return NULL;\r
1324 }\r
1325 NBREF(node)->back_dynamic = p;\r
1326 for (i = 0; i < back_num; i++)\r
1327 p[i] = backrefs[i];\r
1328 }\r
1329 return node;\r
1330}\r
1331\r
1332#ifdef USE_SUBEXP_CALL\r
1333static Node*\r
1334node_new_call(UChar* name, UChar* name_end, int gnum)\r
1335{\r
1336 Node* node = node_new();\r
1337 CHECK_NULL_RETURN(node);\r
1338\r
1339 SET_NTYPE(node, NT_CALL);\r
1340 NCALL(node)->state = 0;\r
1341 NCALL(node)->target = NULL_NODE;\r
1342 NCALL(node)->name = name;\r
1343 NCALL(node)->name_end = name_end;\r
1344 NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */\r
1345 return node;\r
1346}\r
1347#endif\r
1348\r
1349static Node*\r
1350node_new_quantifier(int lower, int upper, int by_number)\r
1351{\r
1352 Node* node = node_new();\r
1353 CHECK_NULL_RETURN(node);\r
1354\r
1355 SET_NTYPE(node, NT_QTFR);\r
1356 NQTFR(node)->state = 0;\r
1357 NQTFR(node)->target = NULL;\r
1358 NQTFR(node)->lower = lower;\r
1359 NQTFR(node)->upper = upper;\r
1360 NQTFR(node)->greedy = 1;\r
1361 NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;\r
1362 NQTFR(node)->head_exact = NULL_NODE;\r
1363 NQTFR(node)->next_head_exact = NULL_NODE;\r
1364 NQTFR(node)->is_refered = 0;\r
1365 if (by_number != 0)\r
1366 NQTFR(node)->state |= NST_BY_NUMBER;\r
1367\r
1368#ifdef USE_COMBINATION_EXPLOSION_CHECK\r
1369 NQTFR(node)->comb_exp_check_num = 0;\r
1370#endif\r
1371\r
1372 return node;\r
1373}\r
1374\r
1375static Node*\r
1376node_new_enclose(int type)\r
1377{\r
1378 Node* node = node_new();\r
1379 CHECK_NULL_RETURN(node);\r
1380\r
1381 SET_NTYPE(node, NT_ENCLOSE);\r
1382 NENCLOSE(node)->type = type;\r
1383 NENCLOSE(node)->state = 0;\r
1384 NENCLOSE(node)->regnum = 0;\r
1385 NENCLOSE(node)->option = 0;\r
1386 NENCLOSE(node)->target = NULL;\r
1387 NENCLOSE(node)->call_addr = -1;\r
1388 NENCLOSE(node)->opt_count = 0;\r
1389 return node;\r
1390}\r
1391\r
1392extern Node*\r
1393onig_node_new_enclose(int type)\r
1394{\r
1395 return node_new_enclose(type);\r
1396}\r
1397\r
1398static Node*\r
1399node_new_enclose_memory(OnigOptionType option, int is_named)\r
1400{\r
1401 Node* node = node_new_enclose(ENCLOSE_MEMORY);\r
1402 CHECK_NULL_RETURN(node);\r
1403 if (is_named != 0)\r
1404 SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);\r
1405\r
1406#ifdef USE_SUBEXP_CALL\r
1407 NENCLOSE(node)->option = option;\r
1408#endif\r
1409 return node;\r
1410}\r
1411\r
1412static Node*\r
1413node_new_option(OnigOptionType option)\r
1414{\r
1415 Node* node = node_new_enclose(ENCLOSE_OPTION);\r
1416 CHECK_NULL_RETURN(node);\r
1417 NENCLOSE(node)->option = option;\r
1418 return node;\r
1419}\r
1420\r
1421extern int\r
1422onig_node_str_cat(Node* node, const UChar* s, const UChar* end)\r
1423{\r
1424 int addlen = (int)(end - s);\r
1425\r
1426 if (addlen > 0) {\r
1427 int len = (int)(NSTR(node)->end - NSTR(node)->s);\r
1428\r
1429 if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {\r
1430 UChar* p;\r
1431 int capa = len + addlen + NODE_STR_MARGIN;\r
1432\r
1433 if (capa <= NSTR(node)->capa) {\r
1434 onig_strcpy(NSTR(node)->s + len, s, end);\r
1435 }\r
1436 else {\r
1437 if (NSTR(node)->s == NSTR(node)->buf)\r
1438 p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,\r
1439 s, end, capa);\r
1440 else\r
1441 p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa, NSTR(node)->capa);\r
1442\r
1443 CHECK_NULL_RETURN_MEMERR(p);\r
1444 NSTR(node)->s = p;\r
1445 NSTR(node)->capa = capa;\r
1446 }\r
1447 }\r
1448 else {\r
1449 onig_strcpy(NSTR(node)->s + len, s, end);\r
1450 }\r
1451 NSTR(node)->end = NSTR(node)->s + len + addlen;\r
1452 }\r
1453\r
1454 return 0;\r
1455}\r
1456\r
1457extern int\r
1458onig_node_str_set(Node* node, const UChar* s, const UChar* end)\r
1459{\r
1460 onig_node_str_clear(node);\r
1461 return onig_node_str_cat(node, s, end);\r
1462}\r
1463\r
1464static int\r
1465node_str_cat_char(Node* node, UChar c)\r
1466{\r
1467 UChar s[1];\r
1468\r
1469 s[0] = c;\r
1470 return onig_node_str_cat(node, s, s + 1);\r
1471}\r
1472\r
1473extern void\r
1474onig_node_conv_to_str_node(Node* node, int flag)\r
1475{\r
1476 SET_NTYPE(node, NT_STR);\r
1477 NSTR(node)->flag = flag;\r
1478 NSTR(node)->capa = 0;\r
1479 NSTR(node)->s = NSTR(node)->buf;\r
1480 NSTR(node)->end = NSTR(node)->buf;\r
1481}\r
1482\r
1483extern void\r
1484onig_node_str_clear(Node* node)\r
1485{\r
1486 if (NSTR(node)->capa != 0 &&\r
1487 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {\r
1488 xfree(NSTR(node)->s);\r
1489 }\r
1490\r
1491 NSTR(node)->capa = 0;\r
1492 NSTR(node)->flag = 0;\r
1493 NSTR(node)->s = NSTR(node)->buf;\r
1494 NSTR(node)->end = NSTR(node)->buf;\r
1495}\r
1496\r
1497static Node*\r
1498node_new_str(const UChar* s, const UChar* end)\r
1499{\r
1500 Node* node = node_new();\r
1501 CHECK_NULL_RETURN(node);\r
1502\r
1503 SET_NTYPE(node, NT_STR);\r
1504 NSTR(node)->capa = 0;\r
1505 NSTR(node)->flag = 0;\r
1506 NSTR(node)->s = NSTR(node)->buf;\r
1507 NSTR(node)->end = NSTR(node)->buf;\r
1508 if (onig_node_str_cat(node, s, end)) {\r
1509 onig_node_free(node);\r
1510 return NULL;\r
1511 }\r
1512 return node;\r
1513}\r
1514\r
1515extern Node*\r
1516onig_node_new_str(const UChar* s, const UChar* end)\r
1517{\r
1518 return node_new_str(s, end);\r
1519}\r
1520\r
1521static Node*\r
1522node_new_str_raw(UChar* s, UChar* end)\r
1523{\r
1524 Node* node = node_new_str(s, end);\r
1525 NSTRING_SET_RAW(node);\r
1526 return node;\r
1527}\r
1528\r
1529static Node*\r
1530node_new_empty(void)\r
1531{\r
1532 return node_new_str(NULL, NULL);\r
1533}\r
1534\r
1535static Node*\r
1536node_new_str_raw_char(UChar c)\r
1537{\r
1538 UChar p[1];\r
1539\r
1540 p[0] = c;\r
1541 return node_new_str_raw(p, p + 1);\r
1542}\r
1543\r
1544static Node*\r
1545str_node_split_last_char(StrNode* sn, OnigEncoding enc)\r
1546{\r
1547 const UChar *p;\r
1548 Node* n = NULL_NODE;\r
1549\r
1550 if (sn->end > sn->s) {\r
1551 p = onigenc_get_prev_char_head(enc, sn->s, sn->end);\r
1552 if (p && p > sn->s) { /* can be splitted. */\r
1553 n = node_new_str(p, sn->end);\r
1554 if ((sn->flag & NSTR_RAW) != 0)\r
1555 NSTRING_SET_RAW(n);\r
1556 sn->end = (UChar* )p;\r
1557 }\r
1558 }\r
1559 return n;\r
1560}\r
1561\r
1562static int\r
1563str_node_can_be_split(StrNode* sn, OnigEncoding enc)\r
1564{\r
1565 if (sn->end > sn->s) {\r
1566 return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);\r
1567 }\r
1568 return 0;\r
1569}\r
1570\r
1571#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
1572static int\r
1573node_str_head_pad(StrNode* sn, int num, UChar val)\r
1574{\r
1575 UChar buf[NODE_STR_BUF_SIZE];\r
1576 int i, len;\r
1577\r
1578 len = sn->end - sn->s;\r
1579 onig_strcpy(buf, sn->s, sn->end);\r
1580 onig_strcpy(&(sn->s[num]), buf, buf + len);\r
1581 sn->end += num;\r
1582\r
1583 for (i = 0; i < num; i++) {\r
1584 sn->s[i] = val;\r
1585 }\r
1586}\r
1587#endif\r
1588\r
1589extern int\r
1590onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)\r
1591{\r
1592 unsigned int num, val;\r
1593 OnigCodePoint c;\r
1594 UChar* p = *src;\r
1595 PFETCH_READY;\r
1596\r
1597 num = 0;\r
1598 while (!PEND) {\r
1599 PFETCH(c);\r
1600 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
1601 val = (unsigned int )DIGITVAL(c);\r
1602 if ((INT_MAX_LIMIT - val) / 10UL < num)\r
1603 return -1; /* overflow */\r
1604\r
1605 num = num * 10 + val;\r
1606 }\r
1607 else {\r
1608 PUNFETCH;\r
1609 break;\r
1610 }\r
1611 }\r
1612 *src = p;\r
1613 return num;\r
1614}\r
1615\r
1616static int\r
1617scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,\r
1618 OnigEncoding enc)\r
1619{\r
1620 OnigCodePoint c;\r
1621 unsigned int num, val;\r
1622 UChar* p = *src;\r
1623 PFETCH_READY;\r
1624\r
1625 num = 0;\r
1626 while (!PEND && maxlen-- != 0) {\r
1627 PFETCH(c);\r
1628 if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {\r
1629 val = (unsigned int )XDIGITVAL(enc,c);\r
1630 if ((INT_MAX_LIMIT - val) / 16UL < num)\r
1631 return -1; /* overflow */\r
1632\r
1633 num = (num << 4) + XDIGITVAL(enc,c);\r
1634 }\r
1635 else {\r
1636 PUNFETCH;\r
1637 break;\r
1638 }\r
1639 }\r
1640 *src = p;\r
1641 return num;\r
1642}\r
1643\r
1644static int\r
1645scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,\r
1646 OnigEncoding enc)\r
1647{\r
1648 OnigCodePoint c;\r
1649 unsigned int num, val;\r
1650 UChar* p = *src;\r
1651 PFETCH_READY;\r
1652\r
1653 num = 0;\r
1654 while (!PEND && maxlen-- != 0) {\r
1655 PFETCH(c);\r
1656 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {\r
1657 val = ODIGITVAL(c);\r
1658 if ((INT_MAX_LIMIT - val) / 8UL < num)\r
1659 return -1; /* overflow */\r
1660\r
1661 num = (num << 3) + val;\r
1662 }\r
1663 else {\r
1664 PUNFETCH;\r
1665 break;\r
1666 }\r
1667 }\r
1668 *src = p;\r
1669 return num;\r
1670}\r
1671\r
1672\r
1673#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \\r
1674 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)\r
1675\r
1676/* data format:\r
1677 [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]\r
1678 (all data size is OnigCodePoint)\r
1679 */\r
1680static int\r
1681new_code_range(BBuf** pbuf)\r
1682{\r
1683#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)\r
1684 int r;\r
1685 OnigCodePoint n;\r
1686 BBuf* bbuf;\r
1687\r
1688 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));\r
1689 CHECK_NULL_RETURN_MEMERR(*pbuf);\r
1690 r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);\r
1691 if (r) return r;\r
1692\r
1693 n = 0;\r
1694 BBUF_WRITE_CODE_POINT(bbuf, 0, n);\r
1695 return 0;\r
1696}\r
1697\r
1698static int\r
1699add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)\r
1700{\r
1701 int r, inc_n, pos;\r
1702 int low, high, bound, x;\r
1703 OnigCodePoint n, *data;\r
1704 BBuf* bbuf;\r
1705\r
1706 if (from > to) {\r
1707 n = from; from = to; to = n;\r
1708 }\r
1709\r
1710 if (IS_NULL(*pbuf)) {\r
1711 r = new_code_range(pbuf);\r
1712 if (r) return r;\r
1713 bbuf = *pbuf;\r
1714 n = 0;\r
1715 }\r
1716 else {\r
1717 bbuf = *pbuf;\r
1718 GET_CODE_POINT(n, bbuf->p);\r
1719 }\r
1720 data = (OnigCodePoint* )(bbuf->p);\r
1721 data++;\r
1722\r
1723 for (low = 0, bound = n; low < bound; ) {\r
1724 x = (low + bound) >> 1;\r
1725 if (from > data[x*2 + 1])\r
1726 low = x + 1;\r
1727 else\r
1728 bound = x;\r
1729 }\r
1730\r
1731 for (high = low, bound = n; high < bound; ) {\r
1732 x = (high + bound) >> 1;\r
1733 if (to >= data[x*2] - 1)\r
1734 high = x + 1;\r
1735 else\r
1736 bound = x;\r
1737 }\r
1738\r
1739 inc_n = low + 1 - high;\r
1740 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)\r
1741 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;\r
1742\r
1743 if (inc_n != 1) {\r
1744 if (from > data[low*2])\r
1745 from = data[low*2];\r
1746 if (to < data[(high - 1)*2 + 1])\r
1747 to = data[(high - 1)*2 + 1];\r
1748 }\r
1749\r
1750 if (inc_n != 0 && (OnigCodePoint )high < n) {\r
1751 int from_pos = SIZE_CODE_POINT * (1 + high * 2);\r
1752 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);\r
1753 int size = (n - high) * 2 * SIZE_CODE_POINT;\r
1754\r
1755 if (inc_n > 0) {\r
1756 BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);\r
1757 }\r
1758 else {\r
1759 BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);\r
1760 }\r
1761 }\r
1762\r
1763 pos = SIZE_CODE_POINT * (1 + low * 2);\r
1764 BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);\r
1765 BBUF_WRITE_CODE_POINT(bbuf, pos, from);\r
1766 BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);\r
1767 n += inc_n;\r
1768 BBUF_WRITE_CODE_POINT(bbuf, 0, n);\r
1769\r
1770 return 0;\r
1771}\r
1772\r
1773static int\r
1774add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)\r
1775{\r
1776 if (from > to) {\r
1777 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
1778 return 0;\r
1779 else\r
1780 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
1781 }\r
1782\r
1783 return add_code_range_to_buf(pbuf, from, to);\r
1784}\r
1785\r
1786static int\r
1787not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)\r
1788{\r
1789 int r, i, n;\r
1790 OnigCodePoint pre, from, *data, to = 0;\r
1791\r
1792 *pbuf = (BBuf* )NULL;\r
1793 if (IS_NULL(bbuf)) {\r
1794 set_all:\r
1795 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
1796 }\r
1797\r
1798 data = (OnigCodePoint* )(bbuf->p);\r
1799 GET_CODE_POINT(n, data);\r
1800 data++;\r
1801 if (n <= 0) goto set_all;\r
1802\r
1803 r = 0;\r
1804 pre = MBCODE_START_POS(enc);\r
1805 for (i = 0; i < n; i++) {\r
1806 from = data[i*2];\r
1807 to = data[i*2+1];\r
1808 if (pre <= from - 1) {\r
1809 r = add_code_range_to_buf(pbuf, pre, from - 1);\r
1810 if (r != 0) return r;\r
1811 }\r
1812 if (to == ~((OnigCodePoint )0)) break;\r
1813 pre = to + 1;\r
1814 }\r
1815 if (to < ~((OnigCodePoint )0)) {\r
1816 r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));\r
1817 }\r
1818 return r;\r
1819}\r
1820\r
1821#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\\r
1822 BBuf *tbuf; \\r
1823 int tnot; \\r
1824 tnot = not1; not1 = not2; not2 = tnot; \\r
1825 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \\r
1826} while (0)\r
1827\r
1828static int\r
1829or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,\r
1830 BBuf* bbuf2, int not2, BBuf** pbuf)\r
1831{\r
1832 int r;\r
1833 OnigCodePoint i, n1, *data1;\r
1834 OnigCodePoint from, to;\r
1835\r
1836 *pbuf = (BBuf* )NULL;\r
1837 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {\r
1838 if (not1 != 0 || not2 != 0)\r
1839 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
1840 return 0;\r
1841 }\r
1842\r
1843 r = 0;\r
1844 if (IS_NULL(bbuf2))\r
1845 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
1846\r
1847 if (IS_NULL(bbuf1)) {\r
1848 if (not1 != 0) {\r
1849 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);\r
1850 }\r
1851 else {\r
1852 if (not2 == 0) {\r
1853 return bbuf_clone(pbuf, bbuf2);\r
1854 }\r
1855 else {\r
1856 return not_code_range_buf(enc, bbuf2, pbuf);\r
1857 }\r
1858 }\r
1859 }\r
1860\r
1861 if (not1 != 0)\r
1862 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
1863\r
1864 data1 = (OnigCodePoint* )(bbuf1->p);\r
1865 GET_CODE_POINT(n1, data1);\r
1866 data1++;\r
1867\r
1868 if (not2 == 0 && not1 == 0) { /* 1 OR 2 */\r
1869 r = bbuf_clone(pbuf, bbuf2);\r
1870 }\r
1871 else if (not1 == 0) { /* 1 OR (not 2) */\r
1872 r = not_code_range_buf(enc, bbuf2, pbuf);\r
1873 }\r
1874 if (r != 0) return r;\r
1875\r
1876 for (i = 0; i < n1; i++) {\r
1877 from = data1[i*2];\r
1878 to = data1[i*2+1];\r
1879 r = add_code_range_to_buf(pbuf, from, to);\r
1880 if (r != 0) return r;\r
1881 }\r
1882 return 0;\r
1883}\r
1884\r
1885static int\r
1886and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,\r
1887 OnigCodePoint* data, int n)\r
1888{\r
1889 int i, r;\r
1890 OnigCodePoint from2, to2;\r
1891\r
1892 for (i = 0; i < n; i++) {\r
1893 from2 = data[i*2];\r
1894 to2 = data[i*2+1];\r
1895 if (from2 < from1) {\r
1896 if (to2 < from1) continue;\r
1897 else {\r
1898 from1 = to2 + 1;\r
1899 }\r
1900 }\r
1901 else if (from2 <= to1) {\r
1902 if (to2 < to1) {\r
1903 if (from1 <= from2 - 1) {\r
1904 r = add_code_range_to_buf(pbuf, from1, from2-1);\r
1905 if (r != 0) return r;\r
1906 }\r
1907 from1 = to2 + 1;\r
1908 }\r
1909 else {\r
1910 to1 = from2 - 1;\r
1911 }\r
1912 }\r
1913 else {\r
1914 from1 = from2;\r
1915 }\r
1916 if (from1 > to1) break;\r
1917 }\r
1918 if (from1 <= to1) {\r
1919 r = add_code_range_to_buf(pbuf, from1, to1);\r
1920 if (r != 0) return r;\r
1921 }\r
1922 return 0;\r
1923}\r
1924\r
1925static int\r
1926and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)\r
1927{\r
1928 int r;\r
1929 OnigCodePoint i, j, n1, n2, *data1, *data2;\r
1930 OnigCodePoint from, to, from1, to1, from2, to2;\r
1931\r
1932 *pbuf = (BBuf* )NULL;\r
1933 if (IS_NULL(bbuf1)) {\r
1934 if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */\r
1935 return bbuf_clone(pbuf, bbuf2);\r
1936 return 0;\r
1937 }\r
1938 else if (IS_NULL(bbuf2)) {\r
1939 if (not2 != 0)\r
1940 return bbuf_clone(pbuf, bbuf1);\r
1941 return 0;\r
1942 }\r
1943\r
1944 if (not1 != 0)\r
1945 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);\r
1946\r
1947 data1 = (OnigCodePoint* )(bbuf1->p);\r
1948 data2 = (OnigCodePoint* )(bbuf2->p);\r
1949 GET_CODE_POINT(n1, data1);\r
1950 GET_CODE_POINT(n2, data2);\r
1951 data1++;\r
1952 data2++;\r
1953\r
1954 if (not2 == 0 && not1 == 0) { /* 1 AND 2 */\r
1955 for (i = 0; i < n1; i++) {\r
1956 from1 = data1[i*2];\r
1957 to1 = data1[i*2+1];\r
1958 for (j = 0; j < n2; j++) {\r
1959 from2 = data2[j*2];\r
1960 to2 = data2[j*2+1];\r
1961 if (from2 > to1) break;\r
1962 if (to2 < from1) continue;\r
1963 from = MAX(from1, from2);\r
1964 to = MIN(to1, to2);\r
1965 r = add_code_range_to_buf(pbuf, from, to);\r
1966 if (r != 0) return r;\r
1967 }\r
1968 }\r
1969 }\r
1970 else if (not1 == 0) { /* 1 AND (not 2) */\r
1971 for (i = 0; i < n1; i++) {\r
1972 from1 = data1[i*2];\r
1973 to1 = data1[i*2+1];\r
1974 r = and_code_range1(pbuf, from1, to1, data2, n2);\r
1975 if (r != 0) return r;\r
1976 }\r
1977 }\r
1978\r
1979 return 0;\r
1980}\r
1981\r
1982static int\r
1983and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
1984{\r
1985 int r, not1, not2;\r
1986 BBuf *buf1, *buf2, *pbuf;\r
1987 BitSetRef bsr1, bsr2;\r
1988 BitSet bs1, bs2;\r
1989\r
1990 not1 = IS_NCCLASS_NOT(dest);\r
1991 bsr1 = dest->bs;\r
1992 buf1 = dest->mbuf;\r
1993 not2 = IS_NCCLASS_NOT(cc);\r
1994 bsr2 = cc->bs;\r
1995 buf2 = cc->mbuf;\r
1996\r
1997 if (not1 != 0) {\r
1998 bitset_invert_to(bsr1, bs1);\r
1999 bsr1 = bs1;\r
2000 }\r
2001 if (not2 != 0) {\r
2002 bitset_invert_to(bsr2, bs2);\r
2003 bsr2 = bs2;\r
2004 }\r
2005 bitset_and(bsr1, bsr2);\r
2006 if (bsr1 != dest->bs) {\r
2007 bitset_copy(dest->bs, bsr1);\r
2008 bsr1 = dest->bs;\r
2009 }\r
2010 if (not1 != 0) {\r
2011 bitset_invert(dest->bs);\r
2012 }\r
2013\r
2014 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
2015 if (not1 != 0 && not2 != 0) {\r
2016 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);\r
2017 }\r
2018 else {\r
2019 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);\r
2020 if (r == 0 && not1 != 0) {\r
2021 BBuf *tbuf;\r
2022 r = not_code_range_buf(enc, pbuf, &tbuf);\r
2023 if (r != 0) {\r
2024 bbuf_free(pbuf);\r
2025 return r;\r
2026 }\r
2027 bbuf_free(pbuf);\r
2028 pbuf = tbuf;\r
2029 }\r
2030 }\r
2031 if (r != 0) return r;\r
2032\r
2033 dest->mbuf = pbuf;\r
2034 bbuf_free(buf1);\r
2035 return r;\r
2036 }\r
2037 return 0;\r
2038}\r
2039\r
2040static int\r
2041or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)\r
2042{\r
2043 int r, not1, not2;\r
2044 BBuf *buf1, *buf2, *pbuf;\r
2045 BitSetRef bsr1, bsr2;\r
2046 BitSet bs1, bs2;\r
2047\r
2048 not1 = IS_NCCLASS_NOT(dest);\r
2049 bsr1 = dest->bs;\r
2050 buf1 = dest->mbuf;\r
2051 not2 = IS_NCCLASS_NOT(cc);\r
2052 bsr2 = cc->bs;\r
2053 buf2 = cc->mbuf;\r
2054\r
2055 if (not1 != 0) {\r
2056 bitset_invert_to(bsr1, bs1);\r
2057 bsr1 = bs1;\r
2058 }\r
2059 if (not2 != 0) {\r
2060 bitset_invert_to(bsr2, bs2);\r
2061 bsr2 = bs2;\r
2062 }\r
2063 bitset_or(bsr1, bsr2);\r
2064 if (bsr1 != dest->bs) {\r
2065 bitset_copy(dest->bs, bsr1);\r
2066 bsr1 = dest->bs;\r
2067 }\r
2068 if (not1 != 0) {\r
2069 bitset_invert(dest->bs);\r
2070 }\r
2071\r
2072 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
2073 if (not1 != 0 && not2 != 0) {\r
2074 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);\r
2075 }\r
2076 else {\r
2077 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);\r
2078 if (r == 0 && not1 != 0) {\r
2079 BBuf *tbuf;\r
2080 r = not_code_range_buf(enc, pbuf, &tbuf);\r
2081 if (r != 0) {\r
2082 bbuf_free(pbuf);\r
2083 return r;\r
2084 }\r
2085 bbuf_free(pbuf);\r
2086 pbuf = tbuf;\r
2087 }\r
2088 }\r
2089 if (r != 0) return r;\r
2090\r
2091 dest->mbuf = pbuf;\r
2092 bbuf_free(buf1);\r
2093 return r;\r
2094 }\r
2095 else\r
2096 return 0;\r
2097}\r
2098\r
2099static int\r
2100conv_backslash_value(int c, ScanEnv* env)\r
2101{\r
2102 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {\r
2103 switch (c) {\r
2104 case 'n': return '\n';\r
2105 case 't': return '\t';\r
2106 case 'r': return '\r';\r
2107 case 'f': return '\f';\r
2108 case 'a': return '\007';\r
2109 case 'b': return '\010';\r
2110 case 'e': return '\033';\r
2111 case 'v':\r
2112 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))\r
2113 return '\v';\r
2114 break;\r
2115\r
2116 default:\r
2117 break;\r
2118 }\r
2119 }\r
2120 return c;\r
2121}\r
2122\r
2123static int\r
2124is_invalid_quantifier_target(Node* node)\r
2125{\r
2126 switch (NTYPE(node)) {\r
2127 case NT_ANCHOR:\r
2128 return 1;\r
2129 break;\r
2130\r
2131 case NT_ENCLOSE:\r
2132 /* allow enclosed elements */\r
2133 /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */\r
2134 break;\r
2135\r
2136 case NT_LIST:\r
2137 do {\r
2138 if (! is_invalid_quantifier_target(NCAR(node))) return 0;\r
2139 } while (IS_NOT_NULL(node = NCDR(node)));\r
2140 return 0;\r
2141 break;\r
2142\r
2143 case NT_ALT:\r
2144 do {\r
2145 if (is_invalid_quantifier_target(NCAR(node))) return 1;\r
2146 } while (IS_NOT_NULL(node = NCDR(node)));\r
2147 break;\r
2148\r
2149 default:\r
2150 break;\r
2151 }\r
2152 return 0;\r
2153}\r
2154\r
2155/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */\r
2156static int\r
2157popular_quantifier_num(QtfrNode* q)\r
2158{\r
2159 if (q->greedy) {\r
2160 if (q->lower == 0) {\r
2161 if (q->upper == 1) return 0;\r
2162 else if (IS_REPEAT_INFINITE(q->upper)) return 1;\r
2163 }\r
2164 else if (q->lower == 1) {\r
2165 if (IS_REPEAT_INFINITE(q->upper)) return 2;\r
2166 }\r
2167 }\r
2168 else {\r
2169 if (q->lower == 0) {\r
2170 if (q->upper == 1) return 3;\r
2171 else if (IS_REPEAT_INFINITE(q->upper)) return 4;\r
2172 }\r
2173 else if (q->lower == 1) {\r
2174 if (IS_REPEAT_INFINITE(q->upper)) return 5;\r
2175 }\r
2176 }\r
2177 return -1;\r
2178}\r
2179\r
2180\r
2181enum ReduceType {\r
2182 RQ_ASIS = 0, /* as is */\r
2183 RQ_DEL = 1, /* delete parent */\r
2184 RQ_A, /* to '*' */\r
2185 RQ_AQ, /* to '*?' */\r
2186 RQ_QQ, /* to '??' */\r
2187 RQ_P_QQ, /* to '+)??' */\r
2188 RQ_PQ_Q /* to '+?)?' */\r
2189};\r
2190\r
2191static enum ReduceType ReduceTypeTable[6][6] = {\r
2192 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */\r
2193 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */\r
2194 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */\r
2195 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */\r
2196 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */\r
2197 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */\r
2198};\r
2199\r
2200extern void\r
2201onig_reduce_nested_quantifier(Node* pnode, Node* cnode)\r
2202{\r
2203 int pnum, cnum;\r
2204 QtfrNode *p, *c;\r
2205\r
2206 p = NQTFR(pnode);\r
2207 c = NQTFR(cnode);\r
2208 pnum = popular_quantifier_num(p);\r
2209 cnum = popular_quantifier_num(c);\r
2210 if (pnum < 0 || cnum < 0) return ;\r
2211\r
2212 switch(ReduceTypeTable[cnum][pnum]) {\r
2213 case RQ_DEL:\r
2214 *pnode = *cnode;\r
2215 break;\r
2216 case RQ_A:\r
2217 p->target = c->target;\r
2218 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;\r
2219 break;\r
2220 case RQ_AQ:\r
2221 p->target = c->target;\r
2222 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;\r
2223 break;\r
2224 case RQ_QQ:\r
2225 p->target = c->target;\r
2226 p->lower = 0; p->upper = 1; p->greedy = 0;\r
2227 break;\r
2228 case RQ_P_QQ:\r
2229 p->target = cnode;\r
2230 p->lower = 0; p->upper = 1; p->greedy = 0;\r
2231 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;\r
2232 return ;\r
2233 break;\r
2234 case RQ_PQ_Q:\r
2235 p->target = cnode;\r
2236 p->lower = 0; p->upper = 1; p->greedy = 1;\r
2237 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;\r
2238 return ;\r
2239 break;\r
2240 case RQ_ASIS:\r
2241 p->target = cnode;\r
2242 return ;\r
2243 break;\r
2244 }\r
2245\r
2246 c->target = NULL_NODE;\r
2247 onig_node_free(cnode);\r
2248}\r
2249\r
2250\r
2251enum TokenSyms {\r
2252 TK_EOT = 0, /* end of token */\r
2253 TK_RAW_BYTE = 1,\r
2254 TK_CHAR,\r
2255 TK_STRING,\r
2256 TK_CODE_POINT,\r
2257 TK_ANYCHAR,\r
2258 TK_CHAR_TYPE,\r
2259 TK_BACKREF,\r
2260 TK_CALL,\r
2261 TK_ANCHOR,\r
2262 TK_OP_REPEAT,\r
2263 TK_INTERVAL,\r
2264 TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */\r
2265 TK_ALT,\r
2266 TK_SUBEXP_OPEN,\r
2267 TK_SUBEXP_CLOSE,\r
2268 TK_CC_OPEN,\r
2269 TK_QUOTE_OPEN,\r
2270 TK_CHAR_PROPERTY, /* \p{...}, \P{...} */\r
2271 /* in cc */\r
2272 TK_CC_CLOSE,\r
2273 TK_CC_RANGE,\r
2274 TK_POSIX_BRACKET_OPEN,\r
2275 TK_CC_AND, /* && */\r
2276 TK_CC_CC_OPEN /* [ */\r
2277};\r
2278\r
2279typedef struct {\r
2280 enum TokenSyms type;\r
2281 int escaped;\r
2282 int base; /* is number: 8, 16 (used in [....]) */\r
2283 UChar* backp;\r
2284 union {\r
2285 UChar* s;\r
2286 int c;\r
2287 OnigCodePoint code;\r
2288 int anchor;\r
2289 int subtype;\r
2290 struct {\r
2291 int lower;\r
2292 int upper;\r
2293 int greedy;\r
2294 int possessive;\r
2295 } repeat;\r
2296 struct {\r
2297 int num;\r
2298 int ref1;\r
2299 int* refs;\r
2300 int by_name;\r
2301#ifdef USE_BACKREF_WITH_LEVEL\r
2302 int exist_level;\r
2303 int level; /* \k<name+n> */\r
2304#endif\r
2305 } backref;\r
2306 struct {\r
2307 UChar* name;\r
2308 UChar* name_end;\r
2309 int gnum;\r
2310 } call;\r
2311 struct {\r
2312 int ctype;\r
2313 int not;\r
2314 } prop;\r
2315 } u;\r
2316} OnigToken;\r
2317\r
2318\r
2319static int\r
2320fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)\r
2321{\r
2322 int low, up, syn_allow, non_low = 0;\r
2323 int r = 0;\r
2324 OnigCodePoint c;\r
2325 OnigEncoding enc = env->enc;\r
2326 UChar* p = *src;\r
2327 PFETCH_READY;\r
2328\r
2329 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);\r
2330\r
2331 if (PEND) {\r
2332 if (syn_allow)\r
2333 return 1; /* "....{" : OK! */\r
2334 else\r
2335 return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */\r
2336 }\r
2337\r
2338 if (! syn_allow) {\r
2339 c = PPEEK;\r
2340 if (c == ')' || c == '(' || c == '|') {\r
2341 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;\r
2342 }\r
2343 }\r
2344\r
2345 low = onig_scan_unsigned_number(&p, end, env->enc);\r
2346 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2347 if (low > ONIG_MAX_REPEAT_NUM)\r
2348 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2349\r
2350 if (p == *src) { /* can't read low */\r
2351 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {\r
2352 /* allow {,n} as {0,n} */\r
2353 low = 0;\r
2354 non_low = 1;\r
2355 }\r
2356 else\r
2357 goto invalid;\r
2358 }\r
2359\r
2360 if (PEND) goto invalid;\r
2361 PFETCH(c);\r
2362 if (c == ',') {\r
2363 UChar* prev = p;\r
2364 up = onig_scan_unsigned_number(&p, end, env->enc);\r
2365 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2366 if (up > ONIG_MAX_REPEAT_NUM)\r
2367 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;\r
2368\r
2369 if (p == prev) {\r
2370 if (non_low != 0)\r
2371 goto invalid;\r
2372 up = REPEAT_INFINITE; /* {n,} : {n,infinite} */\r
2373 }\r
2374 }\r
2375 else {\r
2376 if (non_low != 0)\r
2377 goto invalid;\r
2378\r
2379 PUNFETCH;\r
2380 up = low; /* {n} : exact n times */\r
2381 r = 2; /* fixed */\r
2382 }\r
2383\r
2384 if (PEND) goto invalid;\r
2385 PFETCH(c);\r
2386 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {\r
2387 if (c != MC_ESC(env->syntax)) goto invalid;\r
2388 PFETCH(c);\r
2389 }\r
2390 if (c != '}') goto invalid;\r
2391\r
2392 if (!IS_REPEAT_INFINITE(up) && low > up) {\r
2393 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;\r
2394 }\r
2395\r
2396 tok->type = TK_INTERVAL;\r
2397 tok->u.repeat.lower = low;\r
2398 tok->u.repeat.upper = up;\r
2399 *src = p;\r
2400 return r; /* 0: normal {n,m}, 2: fixed {n} */\r
2401\r
2402 invalid:\r
2403 if (syn_allow)\r
2404 return 1; /* OK */\r
2405 else\r
2406 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;\r
2407}\r
2408\r
2409/* \M-, \C-, \c, or \... */\r
2410static int\r
2411fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)\r
2412{\r
2413 int v;\r
2414 OnigCodePoint c;\r
2415 OnigEncoding enc = env->enc;\r
2416 UChar* p = *src;\r
2417\r
2418 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
2419\r
2420 PFETCH_S(c);\r
2421 switch (c) {\r
2422 case 'M':\r
2423 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {\r
2424 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
2425 PFETCH_S(c);\r
2426 if (c != '-') return ONIGERR_META_CODE_SYNTAX;\r
2427 if (PEND) return ONIGERR_END_PATTERN_AT_META;\r
2428 PFETCH_S(c);\r
2429 if (c == MC_ESC(env->syntax)) {\r
2430 v = fetch_escaped_value(&p, end, env);\r
2431 if (v < 0) return v;\r
2432 c = (OnigCodePoint )v;\r
2433 }\r
2434 c = ((c & 0xff) | 0x80);\r
2435 }\r
2436 else\r
2437 goto backslash;\r
2438 break;\r
2439\r
2440 case 'C':\r
2441 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {\r
2442 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
2443 PFETCH_S(c);\r
2444 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;\r
2445 goto control;\r
2446 }\r
2447 else\r
2448 goto backslash;\r
2449\r
2450 case 'c':\r
2451 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {\r
2452 control:\r
2453 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;\r
2454 PFETCH_S(c);\r
2455 if (c == '?') {\r
2456 c = 0177;\r
2457 }\r
2458 else {\r
2459 if (c == MC_ESC(env->syntax)) {\r
2460 v = fetch_escaped_value(&p, end, env);\r
2461 if (v < 0) return v;\r
2462 c = (OnigCodePoint )v;\r
2463 }\r
2464 c &= 0x9f;\r
2465 }\r
2466 break;\r
2467 }\r
2468 /* fall through */\r
2469\r
2470 default:\r
2471 {\r
2472 backslash:\r
2473 c = conv_backslash_value(c, env);\r
2474 }\r
2475 break;\r
2476 }\r
2477\r
2478 *src = p;\r
2479 return c;\r
2480}\r
2481\r
2482static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);\r
2483\r
2484static OnigCodePoint\r
2485get_name_end_code_point(OnigCodePoint start)\r
2486{\r
2487 switch (start) {\r
2488 case '<': return (OnigCodePoint )'>'; break;\r
2489 case '\'': return (OnigCodePoint )'\''; break;\r
2490 default:\r
2491 break;\r
2492 }\r
2493\r
2494 return (OnigCodePoint )0;\r
2495}\r
2496\r
2497#ifdef USE_NAMED_GROUP\r
2498#ifdef USE_BACKREF_WITH_LEVEL\r
2499/*\r
2500 \k<name+n>, \k<name-n>\r
2501 \k<num+n>, \k<num-n>\r
2502 \k<-num+n>, \k<-num-n>\r
2503*/\r
2504static int\r
2505fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,\r
2506 UChar** rname_end, ScanEnv* env,\r
2507 int* rback_num, int* rlevel)\r
2508{\r
2509 int r, sign, is_num, exist_level;\r
2510 OnigCodePoint end_code;\r
2511 OnigCodePoint c = 0;\r
2512 OnigEncoding enc = env->enc;\r
2513 UChar *name_end;\r
2514 UChar *pnum_head;\r
2515 UChar *p = *src;\r
2516 PFETCH_READY;\r
2517\r
2518 *rback_num = 0;\r
2519 is_num = exist_level = 0;\r
2520 sign = 1;\r
2521 pnum_head = *src;\r
2522\r
2523 end_code = get_name_end_code_point(start_code);\r
2524\r
2525 name_end = end;\r
2526 r = 0;\r
2527 if (PEND) {\r
2528 return ONIGERR_EMPTY_GROUP_NAME;\r
2529 }\r
2530 else {\r
2531 PFETCH(c);\r
2532 if (c == end_code)\r
2533 return ONIGERR_EMPTY_GROUP_NAME;\r
2534\r
2535 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2536 is_num = 1;\r
2537 }\r
2538 else if (c == '-') {\r
2539 is_num = 2;\r
2540 sign = -1;\r
2541 pnum_head = p;\r
2542 }\r
2543 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2544 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2545 }\r
2546 }\r
2547\r
2548 while (!PEND) {\r
2549 name_end = p;\r
2550 PFETCH(c);\r
2551 if (c == end_code || c == ')' || c == '+' || c == '-') {\r
2552 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;\r
2553 break;\r
2554 }\r
2555\r
2556 if (is_num != 0) {\r
2557 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2558 is_num = 1;\r
2559 }\r
2560 else {\r
2561 r = ONIGERR_INVALID_GROUP_NAME;\r
2562 is_num = 0;\r
2563 }\r
2564 }\r
2565 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2566 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2567 }\r
2568 }\r
2569\r
2570 if (r == 0 && c != end_code) {\r
2571 if (c == '+' || c == '-') {\r
2572 int level;\r
2573 int flag = (c == '-' ? -1 : 1);\r
2574\r
2575 PFETCH(c);\r
2576 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;\r
2577 PUNFETCH;\r
2578 level = onig_scan_unsigned_number(&p, end, enc);\r
2579 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2580 *rlevel = (level * flag);\r
2581 exist_level = 1;\r
2582\r
2583 PFETCH(c);\r
2584 if (c == end_code)\r
2585 goto end;\r
2586 }\r
2587\r
2588 err:\r
2589 r = ONIGERR_INVALID_GROUP_NAME;\r
2590 name_end = end;\r
2591 }\r
2592\r
2593 end:\r
2594 if (r == 0) {\r
2595 if (is_num != 0) {\r
2596 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
2597 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2598 else if (*rback_num == 0) goto err;\r
2599\r
2600 *rback_num *= sign;\r
2601 }\r
2602\r
2603 *rname_end = name_end;\r
2604 *src = p;\r
2605 return (exist_level ? 1 : 0);\r
2606 }\r
2607 else {\r
2608 onig_scan_env_set_error_string(env, r, *src, name_end);\r
2609 return r;\r
2610 }\r
2611}\r
2612#endif /* USE_BACKREF_WITH_LEVEL */\r
2613\r
2614/*\r
2615 def: 0 -> define name (don't allow number name)\r
2616 1 -> reference name (allow number name)\r
2617*/\r
2618static int\r
2619fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
2620 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)\r
2621{\r
2622 int r, is_num, sign;\r
2623 OnigCodePoint end_code;\r
2624 OnigCodePoint c = 0;\r
2625 OnigEncoding enc = env->enc;\r
2626 UChar *name_end;\r
2627 UChar *pnum_head;\r
2628 UChar *p = *src;\r
2629\r
2630 *rback_num = 0;\r
2631\r
2632 end_code = get_name_end_code_point(start_code);\r
2633\r
2634 name_end = end;\r
2635 pnum_head = *src;\r
2636 r = 0;\r
2637 is_num = 0;\r
2638 sign = 1;\r
2639 if (PEND) {\r
2640 return ONIGERR_EMPTY_GROUP_NAME;\r
2641 }\r
2642 else {\r
2643 PFETCH_S(c);\r
2644 if (c == end_code)\r
2645 return ONIGERR_EMPTY_GROUP_NAME;\r
2646\r
2647 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2648 if (ref == 1)\r
2649 is_num = 1;\r
2650 else {\r
2651 r = ONIGERR_INVALID_GROUP_NAME;\r
2652 is_num = 0;\r
2653 }\r
2654 }\r
2655 else if (c == '-') {\r
2656 if (ref == 1) {\r
2657 is_num = 2;\r
2658 sign = -1;\r
2659 pnum_head = p;\r
2660 }\r
2661 else {\r
2662 r = ONIGERR_INVALID_GROUP_NAME;\r
2663 is_num = 0;\r
2664 }\r
2665 }\r
2666 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2667 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2668 }\r
2669 }\r
2670\r
2671 if (r == 0) {\r
2672 while (!PEND) {\r
2673 name_end = p;\r
2674 PFETCH_S(c);\r
2675 if (c == end_code || c == ')') {\r
2676 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;\r
2677 break;\r
2678 }\r
2679\r
2680 if (is_num != 0) {\r
2681 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2682 is_num = 1;\r
2683 }\r
2684 else {\r
2685 if (!ONIGENC_IS_CODE_WORD(enc, c))\r
2686 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2687 else\r
2688 r = ONIGERR_INVALID_GROUP_NAME;\r
2689 is_num = 0;\r
2690 }\r
2691 }\r
2692 else {\r
2693 if (!ONIGENC_IS_CODE_WORD(enc, c)) {\r
2694 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2695 }\r
2696 }\r
2697 }\r
2698\r
2699 if (c != end_code) {\r
2700 r = ONIGERR_INVALID_GROUP_NAME;\r
2701 name_end = end;\r
2702 }\r
2703\r
2704 if (is_num != 0) {\r
2705 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
2706 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2707 else if (*rback_num == 0) {\r
2708 r = ONIGERR_INVALID_GROUP_NAME;\r
2709 goto err;\r
2710 }\r
2711\r
2712 *rback_num *= sign;\r
2713 }\r
2714\r
2715 *rname_end = name_end;\r
2716 *src = p;\r
2717 return 0;\r
2718 }\r
2719 else {\r
2720 while (!PEND) {\r
2721 name_end = p;\r
2722 PFETCH_S(c);\r
2723 if (c == end_code || c == ')')\r
2724 break;\r
2725 }\r
2726 if (PEND)\r
2727 name_end = end;\r
2728\r
2729 err:\r
2730 onig_scan_env_set_error_string(env, r, *src, name_end);\r
2731 return r;\r
2732 }\r
2733}\r
2734#else\r
2735static int\r
2736fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,\r
2737 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)\r
2738{\r
2739 int r, is_num, sign;\r
2740 OnigCodePoint end_code;\r
2741 OnigCodePoint c = 0;\r
2742 UChar *name_end;\r
2743 OnigEncoding enc = env->enc;\r
2744 UChar *pnum_head;\r
2745 UChar *p = *src;\r
2746 PFETCH_READY;\r
2747\r
2748 *rback_num = 0;\r
2749\r
2750 end_code = get_name_end_code_point(start_code);\r
2751\r
2752 *rname_end = name_end = end;\r
2753 r = 0;\r
2754 pnum_head = *src;\r
2755 is_num = 0;\r
2756 sign = 1;\r
2757\r
2758 if (PEND) {\r
2759 return ONIGERR_EMPTY_GROUP_NAME;\r
2760 }\r
2761 else {\r
2762 PFETCH(c);\r
2763 if (c == end_code)\r
2764 return ONIGERR_EMPTY_GROUP_NAME;\r
2765\r
2766 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {\r
2767 is_num = 1;\r
2768 }\r
2769 else if (c == '-') {\r
2770 is_num = 2;\r
2771 sign = -1;\r
2772 pnum_head = p;\r
2773 }\r
2774 else {\r
2775 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2776 }\r
2777 }\r
2778\r
2779 while (!PEND) {\r
2780 name_end = p;\r
2781\r
2782 PFETCH(c);\r
2783 if (c == end_code || c == ')') break;\r
2784 if (! ONIGENC_IS_CODE_DIGIT(enc, c))\r
2785 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;\r
2786 }\r
2787 if (r == 0 && c != end_code) {\r
2788 r = ONIGERR_INVALID_GROUP_NAME;\r
2789 name_end = end;\r
2790 }\r
2791\r
2792 if (r == 0) {\r
2793 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);\r
2794 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
2795 else if (*rback_num == 0) {\r
2796 r = ONIGERR_INVALID_GROUP_NAME;\r
2797 goto err;\r
2798 }\r
2799 *rback_num *= sign;\r
2800\r
2801 *rname_end = name_end;\r
2802 *src = p;\r
2803 return 0;\r
2804 }\r
2805 else {\r
2806 err:\r
2807 onig_scan_env_set_error_string(env, r, *src, name_end);\r
2808 return r;\r
2809 }\r
2810}\r
2811#endif /* USE_NAMED_GROUP */\r
2812\r
2813static void\r
2814CC_ESC_WARN(ScanEnv* env, UChar *c)\r
2815{\r
2816 if (onig_warn == onig_null_warn) return ;\r
2817\r
2818 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&\r
2819 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {\r
2820 UChar buf[WARN_BUFSIZE];\r
2821 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
2822 env->pattern, env->pattern_end,\r
2823 (UChar* )"character class has '%s' without escape", c);\r
2824 (*onig_warn)((char* )buf);\r
2825 }\r
2826}\r
2827\r
2828static void\r
2829CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)\r
2830{\r
2831 if (onig_warn == onig_null_warn) return ;\r
2832\r
2833 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {\r
2834 UChar buf[WARN_BUFSIZE];\r
2835 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,\r
2836 (env)->pattern, (env)->pattern_end,\r
2837 (UChar* )"regular expression has '%s' without escape", c);\r
2838 (*onig_warn)((char* )buf);\r
2839 }\r
2840}\r
2841\r
2842static UChar*\r
2843find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
2844 UChar **next, OnigEncoding enc)\r
2845{\r
2846 int i;\r
2847 OnigCodePoint x;\r
2848 UChar *q;\r
2849 UChar *p = from;\r
2850 \r
2851 while (p < to) {\r
2852 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
2853 q = p + enclen(enc, p);\r
2854 if (x == s[0]) {\r
2855 for (i = 1; i < n && q < to; i++) {\r
2856 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
2857 if (x != s[i]) break;\r
2858 q += enclen(enc, q);\r
2859 }\r
2860 if (i >= n) {\r
2861 if (IS_NOT_NULL(next))\r
2862 *next = q;\r
2863 return p;\r
2864 }\r
2865 }\r
2866 p = q;\r
2867 }\r
2868 return NULL_UCHARP;\r
2869}\r
2870\r
2871static int\r
2872str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,\r
2873 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)\r
2874{\r
2875 int i, in_esc;\r
2876 OnigCodePoint x;\r
2877 UChar *q;\r
2878 UChar *p = from;\r
2879\r
2880 in_esc = 0;\r
2881 while (p < to) {\r
2882 if (in_esc) {\r
2883 in_esc = 0;\r
2884 p += enclen(enc, p);\r
2885 }\r
2886 else {\r
2887 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
2888 q = p + enclen(enc, p);\r
2889 if (x == s[0]) {\r
2890 for (i = 1; i < n && q < to; i++) {\r
2891 x = ONIGENC_MBC_TO_CODE(enc, q, to);\r
2892 if (x != s[i]) break;\r
2893 q += enclen(enc, q);\r
2894 }\r
2895 if (i >= n) return 1;\r
2896 p += enclen(enc, p);\r
2897 }\r
2898 else {\r
2899 x = ONIGENC_MBC_TO_CODE(enc, p, to);\r
2900 if (x == bad) return 0;\r
2901 else if (x == MC_ESC(syn)) in_esc = 1;\r
2902 p = q;\r
2903 }\r
2904 }\r
2905 }\r
2906 return 0;\r
2907}\r
2908\r
2909static int\r
2910fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
2911{\r
2912 int num;\r
2913 OnigCodePoint c, c2;\r
2914 OnigSyntaxType* syn = env->syntax;\r
2915 OnigEncoding enc = env->enc;\r
2916 UChar* prev;\r
2917 UChar* p = *src;\r
2918 PFETCH_READY;\r
2919\r
2920 if (PEND) {\r
2921 tok->type = TK_EOT;\r
2922 return tok->type;\r
2923 }\r
2924\r
2925 PFETCH(c);\r
2926 tok->type = TK_CHAR;\r
2927 tok->base = 0;\r
2928 tok->u.c = c;\r
2929 tok->escaped = 0;\r
2930\r
2931 if (c == ']') {\r
2932 tok->type = TK_CC_CLOSE;\r
2933 }\r
2934 else if (c == '-') {\r
2935 tok->type = TK_CC_RANGE;\r
2936 }\r
2937 else if (c == MC_ESC(syn)) {\r
2938 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))\r
2939 goto end;\r
2940\r
2941 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
2942\r
2943 PFETCH(c);\r
2944 tok->escaped = 1;\r
2945 tok->u.c = c;\r
2946 switch (c) {\r
2947 case 'w':\r
2948 tok->type = TK_CHAR_TYPE;\r
2949 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
2950 tok->u.prop.not = 0;\r
2951 break;\r
2952 case 'W':\r
2953 tok->type = TK_CHAR_TYPE;\r
2954 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
2955 tok->u.prop.not = 1;\r
2956 break;\r
2957 case 'd':\r
2958 tok->type = TK_CHAR_TYPE;\r
2959 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
2960 tok->u.prop.not = 0;\r
2961 break;\r
2962 case 'D':\r
2963 tok->type = TK_CHAR_TYPE;\r
2964 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
2965 tok->u.prop.not = 1;\r
2966 break;\r
2967 case 's':\r
2968 tok->type = TK_CHAR_TYPE;\r
2969 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
2970 tok->u.prop.not = 0;\r
2971 break;\r
2972 case 'S':\r
2973 tok->type = TK_CHAR_TYPE;\r
2974 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
2975 tok->u.prop.not = 1;\r
2976 break;\r
2977 case 'h':\r
2978 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
2979 tok->type = TK_CHAR_TYPE;\r
2980 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
2981 tok->u.prop.not = 0;\r
2982 break;\r
2983 case 'H':\r
2984 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
2985 tok->type = TK_CHAR_TYPE;\r
2986 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
2987 tok->u.prop.not = 1;\r
2988 break;\r
2989\r
2990 case 'p':\r
2991 case 'P':\r
2992 c2 = PPEEK;\r
2993 if (c2 == '{' &&\r
2994 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
2995 PINC;\r
2996 tok->type = TK_CHAR_PROPERTY;\r
2997 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
2998\r
2999 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
3000 PFETCH(c2);\r
3001 if (c2 == '^') {\r
3002 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
3003 }\r
3004 else\r
3005 PUNFETCH;\r
3006 }\r
3007 }\r
3008 break;\r
3009\r
3010 case 'x':\r
3011 if (PEND) break;\r
3012\r
3013 prev = p;\r
3014 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
3015 PINC;\r
3016 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);\r
3017 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
3018 if (!PEND) {\r
3019 c2 = PPEEK;\r
3020 if (ONIGENC_IS_CODE_XDIGIT(enc, c2))\r
3021 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
3022 }\r
3023\r
3024 if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {\r
3025 PINC;\r
3026 tok->type = TK_CODE_POINT;\r
3027 tok->base = 16;\r
3028 tok->u.code = (OnigCodePoint )num;\r
3029 }\r
3030 else {\r
3031 /* can't read nothing or invalid format */\r
3032 p = prev;\r
3033 }\r
3034 }\r
3035 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
3036 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);\r
3037 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3038 if (p == prev) { /* can't read nothing. */\r
3039 num = 0; /* but, it's not error */\r
3040 }\r
3041 tok->type = TK_RAW_BYTE;\r
3042 tok->base = 16;\r
3043 tok->u.c = num;\r
3044 }\r
3045 break;\r
3046\r
3047 case 'u':\r
3048 if (PEND) break;\r
3049\r
3050 prev = p;\r
3051 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
3052 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);\r
3053 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3054 if (p == prev) { /* can't read nothing. */\r
3055 num = 0; /* but, it's not error */\r
3056 }\r
3057 tok->type = TK_CODE_POINT;\r
3058 tok->base = 16;\r
3059 tok->u.code = (OnigCodePoint )num;\r
3060 }\r
3061 break;\r
3062\r
3063 case '0':\r
3064 case '1': case '2': case '3': case '4': case '5': case '6': case '7':\r
3065 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
3066 PUNFETCH;\r
3067 prev = p;\r
3068 num = scan_unsigned_octal_number(&p, end, 3, enc);\r
3069 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3070 if (p == prev) { /* can't read nothing. */\r
3071 num = 0; /* but, it's not error */\r
3072 }\r
3073 tok->type = TK_RAW_BYTE;\r
3074 tok->base = 8;\r
3075 tok->u.c = num;\r
3076 }\r
3077 break;\r
3078\r
3079 default:\r
3080 PUNFETCH;\r
3081 num = fetch_escaped_value(&p, end, env);\r
3082 if (num < 0) return num;\r
3083 if (tok->u.c != num) {\r
3084 tok->u.code = (OnigCodePoint )num;\r
3085 tok->type = TK_CODE_POINT;\r
3086 }\r
3087 break;\r
3088 }\r
3089 }\r
3090 else if (c == '[') {\r
3091 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {\r
3092 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };\r
3093 tok->backp = p; /* point at '[' is readed */\r
3094 PINC;\r
3095 if (str_exist_check_with_esc(send, 2, p, end,\r
3096 (OnigCodePoint )']', enc, syn)) {\r
3097 tok->type = TK_POSIX_BRACKET_OPEN;\r
3098 }\r
3099 else {\r
3100 PUNFETCH;\r
3101 goto cc_in_cc;\r
3102 }\r
3103 }\r
3104 else {\r
3105 cc_in_cc:\r
3106 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {\r
3107 tok->type = TK_CC_CC_OPEN;\r
3108 }\r
3109 else {\r
3110 CC_ESC_WARN(env, (UChar* )"[");\r
3111 }\r
3112 }\r
3113 }\r
3114 else if (c == '&') {\r
3115 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&\r
3116 !PEND && (PPEEK_IS('&'))) {\r
3117 PINC;\r
3118 tok->type = TK_CC_AND;\r
3119 }\r
3120 }\r
3121\r
3122 end:\r
3123 *src = p;\r
3124 return tok->type;\r
3125}\r
3126\r
3127static int\r
3128fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)\r
3129{\r
3130 int r, num;\r
3131 OnigCodePoint c;\r
3132 OnigEncoding enc = env->enc;\r
3133 OnigSyntaxType* syn = env->syntax;\r
3134 UChar* prev;\r
3135 UChar* p = *src;\r
3136 PFETCH_READY;\r
3137\r
3138 start:\r
3139 if (PEND) {\r
3140 tok->type = TK_EOT;\r
3141 return tok->type;\r
3142 }\r
3143\r
3144 tok->type = TK_STRING;\r
3145 tok->base = 0;\r
3146 tok->backp = p;\r
3147\r
3148 PFETCH(c);\r
3149 if (IS_MC_ESC_CODE(c, syn)) {\r
3150 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;\r
3151\r
3152 tok->backp = p;\r
3153 PFETCH(c);\r
3154\r
3155 tok->u.c = c;\r
3156 tok->escaped = 1;\r
3157 switch (c) {\r
3158 case '*':\r
3159 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;\r
3160 tok->type = TK_OP_REPEAT;\r
3161 tok->u.repeat.lower = 0;\r
3162 tok->u.repeat.upper = REPEAT_INFINITE;\r
3163 goto greedy_check;\r
3164 break;\r
3165\r
3166 case '+':\r
3167 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;\r
3168 tok->type = TK_OP_REPEAT;\r
3169 tok->u.repeat.lower = 1;\r
3170 tok->u.repeat.upper = REPEAT_INFINITE;\r
3171 goto greedy_check;\r
3172 break;\r
3173\r
3174 case '?':\r
3175 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;\r
3176 tok->type = TK_OP_REPEAT;\r
3177 tok->u.repeat.lower = 0;\r
3178 tok->u.repeat.upper = 1;\r
3179 greedy_check:\r
3180 if (!PEND && PPEEK_IS('?') &&\r
3181 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {\r
3182 PFETCH(c);\r
3183 tok->u.repeat.greedy = 0;\r
3184 tok->u.repeat.possessive = 0;\r
3185 }\r
3186 else {\r
3187 possessive_check:\r
3188 if (!PEND && PPEEK_IS('+') &&\r
3189 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&\r
3190 tok->type != TK_INTERVAL) ||\r
3191 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&\r
3192 tok->type == TK_INTERVAL))) {\r
3193 PFETCH(c);\r
3194 tok->u.repeat.greedy = 1;\r
3195 tok->u.repeat.possessive = 1;\r
3196 }\r
3197 else {\r
3198 tok->u.repeat.greedy = 1;\r
3199 tok->u.repeat.possessive = 0;\r
3200 }\r
3201 }\r
3202 break;\r
3203\r
3204 case '{':\r
3205 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;\r
3206 r = fetch_range_quantifier(&p, end, tok, env);\r
3207 if (r < 0) return r; /* error */\r
3208 if (r == 0) goto greedy_check;\r
3209 else if (r == 2) { /* {n} */\r
3210 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
3211 goto possessive_check;\r
3212\r
3213 goto greedy_check;\r
3214 }\r
3215 /* r == 1 : normal char */\r
3216 break;\r
3217\r
3218 case '|':\r
3219 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;\r
3220 tok->type = TK_ALT;\r
3221 break;\r
3222\r
3223 case '(':\r
3224 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
3225 tok->type = TK_SUBEXP_OPEN;\r
3226 break;\r
3227\r
3228 case ')':\r
3229 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;\r
3230 tok->type = TK_SUBEXP_CLOSE;\r
3231 break;\r
3232\r
3233 case 'w':\r
3234 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
3235 tok->type = TK_CHAR_TYPE;\r
3236 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
3237 tok->u.prop.not = 0;\r
3238 break;\r
3239\r
3240 case 'W':\r
3241 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;\r
3242 tok->type = TK_CHAR_TYPE;\r
3243 tok->u.prop.ctype = ONIGENC_CTYPE_WORD;\r
3244 tok->u.prop.not = 1;\r
3245 break;\r
3246\r
3247 case 'b':\r
3248 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
3249 tok->type = TK_ANCHOR;\r
3250 tok->u.anchor = ANCHOR_WORD_BOUND;\r
3251 break;\r
3252\r
3253 case 'B':\r
3254 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;\r
3255 tok->type = TK_ANCHOR;\r
3256 tok->u.anchor = ANCHOR_NOT_WORD_BOUND;\r
3257 break;\r
3258\r
3259#ifdef USE_WORD_BEGIN_END\r
3260 case '<':\r
3261 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
3262 tok->type = TK_ANCHOR;\r
3263 tok->u.anchor = ANCHOR_WORD_BEGIN;\r
3264 break;\r
3265\r
3266 case '>':\r
3267 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;\r
3268 tok->type = TK_ANCHOR;\r
3269 tok->u.anchor = ANCHOR_WORD_END;\r
3270 break;\r
3271#endif\r
3272\r
3273 case 's':\r
3274 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
3275 tok->type = TK_CHAR_TYPE;\r
3276 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
3277 tok->u.prop.not = 0;\r
3278 break;\r
3279\r
3280 case 'S':\r
3281 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;\r
3282 tok->type = TK_CHAR_TYPE;\r
3283 tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;\r
3284 tok->u.prop.not = 1;\r
3285 break;\r
3286\r
3287 case 'd':\r
3288 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
3289 tok->type = TK_CHAR_TYPE;\r
3290 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
3291 tok->u.prop.not = 0;\r
3292 break;\r
3293\r
3294 case 'D':\r
3295 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;\r
3296 tok->type = TK_CHAR_TYPE;\r
3297 tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;\r
3298 tok->u.prop.not = 1;\r
3299 break;\r
3300\r
3301 case 'h':\r
3302 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
3303 tok->type = TK_CHAR_TYPE;\r
3304 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
3305 tok->u.prop.not = 0;\r
3306 break;\r
3307\r
3308 case 'H':\r
3309 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;\r
3310 tok->type = TK_CHAR_TYPE;\r
3311 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;\r
3312 tok->u.prop.not = 1;\r
3313 break;\r
3314\r
3315 case 'A':\r
3316 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
3317 begin_buf:\r
3318 tok->type = TK_ANCHOR;\r
3319 tok->u.subtype = ANCHOR_BEGIN_BUF;\r
3320 break;\r
3321\r
3322 case 'Z':\r
3323 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
3324 tok->type = TK_ANCHOR;\r
3325 tok->u.subtype = ANCHOR_SEMI_END_BUF;\r
3326 break;\r
3327\r
3328 case 'z':\r
3329 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;\r
3330 end_buf:\r
3331 tok->type = TK_ANCHOR;\r
3332 tok->u.subtype = ANCHOR_END_BUF;\r
3333 break;\r
3334\r
3335 case 'G':\r
3336 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;\r
3337 tok->type = TK_ANCHOR;\r
3338 tok->u.subtype = ANCHOR_BEGIN_POSITION;\r
3339 break;\r
3340\r
3341 case '`':\r
3342 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
3343 goto begin_buf;\r
3344 break;\r
3345\r
3346 case '\'':\r
3347 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;\r
3348 goto end_buf;\r
3349 break;\r
3350\r
3351 case 'x':\r
3352 if (PEND) break;\r
3353\r
3354 prev = p;\r
3355 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {\r
3356 PINC;\r
3357 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);\r
3358 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;\r
3359 if (!PEND) {\r
3360 if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))\r
3361 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;\r
3362 }\r
3363\r
3364 if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {\r
3365 PINC;\r
3366 tok->type = TK_CODE_POINT;\r
3367 tok->u.code = (OnigCodePoint )num;\r
3368 }\r
3369 else {\r
3370 /* can't read nothing or invalid format */\r
3371 p = prev;\r
3372 }\r
3373 }\r
3374 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {\r
3375 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);\r
3376 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3377 if (p == prev) { /* can't read nothing. */\r
3378 num = 0; /* but, it's not error */\r
3379 }\r
3380 tok->type = TK_RAW_BYTE;\r
3381 tok->base = 16;\r
3382 tok->u.c = num;\r
3383 }\r
3384 break;\r
3385\r
3386 case 'u':\r
3387 if (PEND) break;\r
3388\r
3389 prev = p;\r
3390 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {\r
3391 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);\r
3392 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3393 if (p == prev) { /* can't read nothing. */\r
3394 num = 0; /* but, it's not error */\r
3395 }\r
3396 tok->type = TK_CODE_POINT;\r
3397 tok->base = 16;\r
3398 tok->u.code = (OnigCodePoint )num;\r
3399 }\r
3400 break;\r
3401\r
3402 case '1': case '2': case '3': case '4':\r
3403 case '5': case '6': case '7': case '8': case '9':\r
3404 PUNFETCH;\r
3405 prev = p;\r
3406 num = onig_scan_unsigned_number(&p, end, enc);\r
3407 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {\r
3408 goto skip_backref;\r
3409 }\r
3410\r
3411 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && \r
3412 (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */\r
3413 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
3414 if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))\r
3415 return ONIGERR_INVALID_BACKREF;\r
3416 }\r
3417\r
3418 tok->type = TK_BACKREF;\r
3419 tok->u.backref.num = 1;\r
3420 tok->u.backref.ref1 = num;\r
3421 tok->u.backref.by_name = 0;\r
3422#ifdef USE_BACKREF_WITH_LEVEL\r
3423 tok->u.backref.exist_level = 0;\r
3424#endif\r
3425 break;\r
3426 }\r
3427\r
3428 skip_backref:\r
3429 if (c == '8' || c == '9') {\r
3430 /* normal char */\r
3431 p = prev; PINC;\r
3432 break;\r
3433 }\r
3434\r
3435 p = prev;\r
3436 /* fall through */\r
3437 case '0':\r
3438 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {\r
3439 prev = p;\r
3440 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);\r
3441 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;\r
3442 if (p == prev) { /* can't read nothing. */\r
3443 num = 0; /* but, it's not error */\r
3444 }\r
3445 tok->type = TK_RAW_BYTE;\r
3446 tok->base = 8;\r
3447 tok->u.c = num;\r
3448 }\r
3449 else if (c != '0') {\r
3450 PINC;\r
3451 }\r
3452 break;\r
3453\r
3454#ifdef USE_NAMED_GROUP\r
3455 case 'k':\r
3456 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {\r
3457 PFETCH(c);\r
3458 if (c == '<' || c == '\'') {\r
3459 UChar* name_end;\r
3460 int* backs;\r
3461 int back_num;\r
3462\r
3463 prev = p;\r
3464\r
3465#ifdef USE_BACKREF_WITH_LEVEL\r
3466 name_end = NULL_UCHARP; /* no need. escape gcc warning. */\r
3467 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,\r
3468 env, &back_num, &tok->u.backref.level);\r
3469 if (r == 1) tok->u.backref.exist_level = 1;\r
3470 else tok->u.backref.exist_level = 0;\r
3471#else\r
3472 r = fetch_name(&p, end, &name_end, env, &back_num, 1);\r
3473#endif\r
3474 if (r < 0) return r;\r
3475\r
3476 if (back_num != 0) {\r
3477 if (back_num < 0) {\r
3478 back_num = BACKREF_REL_TO_ABS(back_num, env);\r
3479 if (back_num <= 0)\r
3480 return ONIGERR_INVALID_BACKREF;\r
3481 }\r
3482\r
3483 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
3484 if (back_num > env->num_mem ||\r
3485 IS_NULL(SCANENV_MEM_NODES(env)[back_num]))\r
3486 return ONIGERR_INVALID_BACKREF;\r
3487 }\r
3488 tok->type = TK_BACKREF;\r
3489 tok->u.backref.by_name = 0;\r
3490 tok->u.backref.num = 1;\r
3491 tok->u.backref.ref1 = back_num;\r
3492 }\r
3493 else {\r
3494 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);\r
3495 if (num <= 0) {\r
3496 onig_scan_env_set_error_string(env,\r
3497 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);\r
3498 return ONIGERR_UNDEFINED_NAME_REFERENCE;\r
3499 }\r
3500 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {\r
3501 int i;\r
3502 for (i = 0; i < num; i++) {\r
3503 if (backs[i] > env->num_mem ||\r
3504 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))\r
3505 return ONIGERR_INVALID_BACKREF;\r
3506 }\r
3507 }\r
3508\r
3509 tok->type = TK_BACKREF;\r
3510 tok->u.backref.by_name = 1;\r
3511 if (num == 1) {\r
3512 tok->u.backref.num = 1;\r
3513 tok->u.backref.ref1 = backs[0];\r
3514 }\r
3515 else {\r
3516 tok->u.backref.num = num;\r
3517 tok->u.backref.refs = backs;\r
3518 }\r
3519 }\r
3520 }\r
3521 else\r
3522 PUNFETCH;\r
3523 }\r
3524 break;\r
3525#endif\r
3526\r
3527#ifdef USE_SUBEXP_CALL\r
3528 case 'g':\r
3529 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {\r
3530 PFETCH(c);\r
3531 if (c == '<' || c == '\'') {\r
3532 int gnum;\r
3533 UChar* name_end;\r
3534\r
3535 prev = p;\r
3536 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);\r
3537 if (r < 0) return r;\r
3538\r
3539 tok->type = TK_CALL;\r
3540 tok->u.call.name = prev;\r
3541 tok->u.call.name_end = name_end;\r
3542 tok->u.call.gnum = gnum;\r
3543 }\r
3544 else\r
3545 PUNFETCH;\r
3546 }\r
3547 break;\r
3548#endif\r
3549\r
3550 case 'Q':\r
3551 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {\r
3552 tok->type = TK_QUOTE_OPEN;\r
3553 }\r
3554 break;\r
3555\r
3556 case 'p':\r
3557 case 'P':\r
3558 if (PPEEK_IS('{') &&\r
3559 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {\r
3560 PINC;\r
3561 tok->type = TK_CHAR_PROPERTY;\r
3562 tok->u.prop.not = (c == 'P' ? 1 : 0);\r
3563\r
3564 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {\r
3565 PFETCH(c);\r
3566 if (c == '^') {\r
3567 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);\r
3568 }\r
3569 else\r
3570 PUNFETCH;\r
3571 }\r
3572 }\r
3573 break;\r
3574\r
3575 default:\r
3576 PUNFETCH;\r
3577 num = fetch_escaped_value(&p, end, env);\r
3578 if (num < 0) return num;\r
3579 /* set_raw: */\r
3580 if (tok->u.c != num) {\r
3581 tok->type = TK_CODE_POINT;\r
3582 tok->u.code = (OnigCodePoint )num;\r
3583 }\r
3584 else { /* string */\r
3585 p = tok->backp + enclen(enc, tok->backp);\r
3586 }\r
3587 break;\r
3588 }\r
3589 }\r
3590 else {\r
3591 tok->u.c = c;\r
3592 tok->escaped = 0;\r
3593\r
3594#ifdef USE_VARIABLE_META_CHARS\r
3595 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&\r
3596 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {\r
3597 if (c == MC_ANYCHAR(syn))\r
3598 goto any_char;\r
3599 else if (c == MC_ANYTIME(syn))\r
3600 goto anytime;\r
3601 else if (c == MC_ZERO_OR_ONE_TIME(syn))\r
3602 goto zero_or_one_time;\r
3603 else if (c == MC_ONE_OR_MORE_TIME(syn))\r
3604 goto one_or_more_time;\r
3605 else if (c == MC_ANYCHAR_ANYTIME(syn)) {\r
3606 tok->type = TK_ANYCHAR_ANYTIME;\r
3607 goto out;\r
3608 }\r
3609 }\r
3610#endif\r
3611\r
3612 switch (c) {\r
3613 case '.':\r
3614 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;\r
3615#ifdef USE_VARIABLE_META_CHARS\r
3616 any_char:\r
3617#endif\r
3618 tok->type = TK_ANYCHAR;\r
3619 break;\r
3620\r
3621 case '*':\r
3622 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;\r
3623#ifdef USE_VARIABLE_META_CHARS\r
3624 anytime:\r
3625#endif\r
3626 tok->type = TK_OP_REPEAT;\r
3627 tok->u.repeat.lower = 0;\r
3628 tok->u.repeat.upper = REPEAT_INFINITE;\r
3629 goto greedy_check;\r
3630 break;\r
3631\r
3632 case '+':\r
3633 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;\r
3634#ifdef USE_VARIABLE_META_CHARS\r
3635 one_or_more_time:\r
3636#endif\r
3637 tok->type = TK_OP_REPEAT;\r
3638 tok->u.repeat.lower = 1;\r
3639 tok->u.repeat.upper = REPEAT_INFINITE;\r
3640 goto greedy_check;\r
3641 break;\r
3642\r
3643 case '?':\r
3644 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;\r
3645#ifdef USE_VARIABLE_META_CHARS\r
3646 zero_or_one_time:\r
3647#endif\r
3648 tok->type = TK_OP_REPEAT;\r
3649 tok->u.repeat.lower = 0;\r
3650 tok->u.repeat.upper = 1;\r
3651 goto greedy_check;\r
3652 break;\r
3653\r
3654 case '{':\r
3655 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;\r
3656 r = fetch_range_quantifier(&p, end, tok, env);\r
3657 if (r < 0) return r; /* error */\r
3658 if (r == 0) goto greedy_check;\r
3659 else if (r == 2) { /* {n} */\r
3660 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))\r
3661 goto possessive_check;\r
3662\r
3663 goto greedy_check;\r
3664 }\r
3665 /* r == 1 : normal char */\r
3666 break;\r
3667\r
3668 case '|':\r
3669 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;\r
3670 tok->type = TK_ALT;\r
3671 break;\r
3672\r
3673 case '(':\r
3674 if (PPEEK_IS('?') &&\r
3675 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
3676 PINC;\r
3677 if (PPEEK_IS('#')) {\r
3678 PFETCH(c);\r
3679 while (1) {\r
3680 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
3681 PFETCH(c);\r
3682 if (c == MC_ESC(syn)) {\r
3683 if (!PEND) PFETCH(c);\r
3684 }\r
3685 else {\r
3686 if (c == ')') break;\r
3687 }\r
3688 }\r
3689 goto start;\r
3690 }\r
3691 PUNFETCH;\r
3692 }\r
3693\r
3694 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
3695 tok->type = TK_SUBEXP_OPEN;\r
3696 break;\r
3697\r
3698 case ')':\r
3699 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;\r
3700 tok->type = TK_SUBEXP_CLOSE;\r
3701 break;\r
3702\r
3703 case '^':\r
3704 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
3705 tok->type = TK_ANCHOR;\r
3706 tok->u.subtype = (IS_SINGLELINE(env->option)\r
3707 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);\r
3708 break;\r
3709\r
3710 case '$':\r
3711 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;\r
3712 tok->type = TK_ANCHOR;\r
3713 tok->u.subtype = (IS_SINGLELINE(env->option)\r
3714 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);\r
3715 break;\r
3716\r
3717 case '[':\r
3718 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;\r
3719 tok->type = TK_CC_OPEN;\r
3720 break;\r
3721\r
3722 case ']':\r
3723 if (*src > env->pattern) /* /].../ is allowed. */\r
3724 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");\r
3725 break;\r
3726\r
3727 case '#':\r
3728 if (IS_EXTEND(env->option)) {\r
3729 while (!PEND) {\r
3730 PFETCH(c);\r
3731 if (ONIGENC_IS_CODE_NEWLINE(enc, c))\r
3732 break;\r
3733 }\r
3734 goto start;\r
3735 break;\r
3736 }\r
3737 break;\r
3738\r
3739 case ' ': case '\t': case '\n': case '\r': case '\f':\r
3740 if (IS_EXTEND(env->option))\r
3741 goto start;\r
3742 break;\r
3743\r
3744 default:\r
3745 /* string */\r
3746 break;\r
3747 }\r
3748 }\r
3749\r
3750#ifdef USE_VARIABLE_META_CHARS\r
3751 out:\r
3752#endif\r
3753 *src = p;\r
3754 return tok->type;\r
3755}\r
3756\r
3757static int\r
3758add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,\r
3759 OnigEncoding enc ARG_UNUSED,\r
3760 OnigCodePoint sb_out, const OnigCodePoint mbr[])\r
3761{\r
3762 int i, r;\r
3763 OnigCodePoint j;\r
3764\r
3765 int n = ONIGENC_CODE_RANGE_NUM(mbr);\r
3766\r
3767 if (not == 0) {\r
3768 for (i = 0; i < n; i++) {\r
3769 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);\r
3770 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {\r
3771 if (j >= sb_out) {\r
3772 if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;\r
3773 else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
3774 r = add_code_range_to_buf(&(cc->mbuf), j,\r
3775 ONIGENC_CODE_RANGE_TO(mbr, i));\r
3776 if (r != 0) return r;\r
3777 i++;\r
3778 }\r
3779\r
3780 goto sb_end;\r
3781 }\r
3782 BITSET_SET_BIT(cc->bs, j);\r
3783 }\r
3784 }\r
3785\r
3786 sb_end:\r
3787 for ( ; i < n; i++) {\r
3788 r = add_code_range_to_buf(&(cc->mbuf),\r
3789 ONIGENC_CODE_RANGE_FROM(mbr, i),\r
3790 ONIGENC_CODE_RANGE_TO(mbr, i));\r
3791 if (r != 0) return r;\r
3792 }\r
3793 }\r
3794 else {\r
3795 OnigCodePoint prev = 0;\r
3796\r
3797 for (i = 0; i < n; i++) {\r
3798 for (j = prev;\r
3799 j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {\r
3800 if (j >= sb_out) {\r
3801 goto sb_end2;\r
3802 }\r
3803 BITSET_SET_BIT(cc->bs, j);\r
3804 }\r
3805 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
3806 }\r
3807 for (j = prev; j < sb_out; j++) {\r
3808 BITSET_SET_BIT(cc->bs, j);\r
3809 }\r
3810\r
3811 sb_end2:\r
3812 prev = sb_out;\r
3813\r
3814 for (i = 0; i < n; i++) {\r
3815 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {\r
3816 r = add_code_range_to_buf(&(cc->mbuf), prev,\r
3817 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);\r
3818 if (r != 0) return r;\r
3819 }\r
3820 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;\r
3821 }\r
3822 if (prev < 0x7fffffff) {\r
3823 r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);\r
3824 if (r != 0) return r;\r
3825 }\r
3826 }\r
3827\r
3828 return 0;\r
3829}\r
3830\r
3831static int\r
3832add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)\r
3833{\r
3834 int c, r;\r
3835 const OnigCodePoint *ranges;\r
3836 OnigCodePoint sb_out;\r
3837 OnigEncoding enc = env->enc;\r
3838\r
3839 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);\r
3840 if (r == 0) {\r
3841 return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);\r
3842 }\r
3843 else if (r != ONIG_NO_SUPPORT_CONFIG) {\r
3844 return r;\r
3845 }\r
3846\r
3847 r = 0;\r
3848 switch (ctype) {\r
3849 case ONIGENC_CTYPE_ALPHA:\r
3850 case ONIGENC_CTYPE_BLANK:\r
3851 case ONIGENC_CTYPE_CNTRL:\r
3852 case ONIGENC_CTYPE_DIGIT:\r
3853 case ONIGENC_CTYPE_LOWER:\r
3854 case ONIGENC_CTYPE_PUNCT:\r
3855 case ONIGENC_CTYPE_SPACE:\r
3856 case ONIGENC_CTYPE_UPPER:\r
3857 case ONIGENC_CTYPE_XDIGIT:\r
3858 case ONIGENC_CTYPE_ASCII:\r
3859 case ONIGENC_CTYPE_ALNUM:\r
3860 if (not != 0) {\r
3861 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3862 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3863 BITSET_SET_BIT(cc->bs, c);\r
3864 }\r
3865 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
3866 }\r
3867 else {\r
3868 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3869 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3870 BITSET_SET_BIT(cc->bs, c);\r
3871 }\r
3872 }\r
3873 break;\r
3874\r
3875 case ONIGENC_CTYPE_GRAPH:\r
3876 case ONIGENC_CTYPE_PRINT:\r
3877 if (not != 0) {\r
3878 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3879 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3880 BITSET_SET_BIT(cc->bs, c);\r
3881 }\r
3882 }\r
3883 else {\r
3884 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3885 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))\r
3886 BITSET_SET_BIT(cc->bs, c);\r
3887 }\r
3888 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
3889 }\r
3890 break;\r
3891\r
3892 case ONIGENC_CTYPE_WORD:\r
3893 if (not == 0) {\r
3894 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3895 if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);\r
3896 }\r
3897 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);\r
3898 }\r
3899 else {\r
3900 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {\r
3901 if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */\r
3902 && ! ONIGENC_IS_CODE_WORD(enc, c))\r
3903 BITSET_SET_BIT(cc->bs, c);\r
3904 }\r
3905 }\r
3906 break;\r
3907\r
3908 default:\r
3909 return ONIGERR_PARSER_BUG;\r
3910 break;\r
3911 }\r
3912\r
3913 return r;\r
3914}\r
3915\r
3916static int\r
3917parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)\r
3918{\r
3919#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20\r
3920#define POSIX_BRACKET_NAME_MIN_LEN 4\r
3921\r
3922 static PosixBracketEntryType PBS[] = {\r
3923 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },\r
3924 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },\r
3925 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },\r
3926 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },\r
3927 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },\r
3928 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },\r
3929 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },\r
3930 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },\r
3931 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },\r
3932 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },\r
3933 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },\r
3934 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },\r
3935 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },\r
3936 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },\r
3937 { (UChar* )NULL, -1, 0 }\r
3938 };\r
3939\r
3940 PosixBracketEntryType *pb;\r
3941 int not, i, r;\r
3942 OnigCodePoint c;\r
3943 OnigEncoding enc = env->enc;\r
3944 UChar *p = *src;\r
3945\r
3946 if (PPEEK_IS('^')) {\r
3947 PINC_S;\r
3948 not = 1;\r
3949 }\r
3950 else\r
3951 not = 0;\r
3952\r
3953 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)\r
3954 goto not_posix_bracket;\r
3955\r
3956 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {\r
3957 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {\r
3958 p = (UChar* )onigenc_step(enc, p, end, pb->len);\r
3959 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)\r
3960 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
3961\r
3962 r = add_ctype_to_cc(cc, pb->ctype, not, env);\r
3963 if (r != 0) return r;\r
3964\r
3965 PINC_S; PINC_S;\r
3966 *src = p;\r
3967 return 0;\r
3968 }\r
3969 }\r
3970\r
3971 not_posix_bracket:\r
3972 c = 0;\r
3973 i = 0;\r
3974 while (!PEND && ((c = PPEEK) != ':') && c != ']') {\r
3975 PINC_S;\r
3976 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;\r
3977 }\r
3978 if (c == ':' && ! PEND) {\r
3979 PINC_S;\r
3980 if (! PEND) {\r
3981 PFETCH_S(c);\r
3982 if (c == ']')\r
3983 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;\r
3984 }\r
3985 }\r
3986\r
3987 return 1; /* 1: is not POSIX bracket, but no error. */\r
3988}\r
3989\r
3990static int\r
3991fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)\r
3992{\r
3993 int r;\r
3994 OnigCodePoint c;\r
3995 OnigEncoding enc = env->enc;\r
3996 UChar *prev, *start, *p = *src;\r
3997\r
3998 r = 0;\r
3999 start = prev = p;\r
4000\r
4001 while (!PEND) {\r
4002 prev = p;\r
4003 PFETCH_S(c);\r
4004 if (c == '}') {\r
4005 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);\r
4006 if (r < 0) break;\r
4007\r
4008 *src = p;\r
4009 return r;\r
4010 }\r
4011 else if (c == '(' || c == ')' || c == '{' || c == '|') {\r
4012 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;\r
4013 break;\r
4014 }\r
4015 }\r
4016\r
4017 onig_scan_env_set_error_string(env, r, *src, prev);\r
4018 return r;\r
4019}\r
4020\r
4021static int\r
4022parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,\r
4023 ScanEnv* env)\r
4024{\r
4025 int r, ctype;\r
4026 CClassNode* cc;\r
4027\r
4028 ctype = fetch_char_property_to_ctype(src, end, env);\r
4029 if (ctype < 0) return ctype;\r
4030\r
4031 *np = node_new_cclass();\r
4032 CHECK_NULL_RETURN_MEMERR(*np);\r
4033 cc = NCCLASS(*np);\r
4034 r = add_ctype_to_cc(cc, ctype, 0, env);\r
4035 if (r != 0) return r;\r
4036 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
4037\r
4038 return 0;\r
4039}\r
4040\r
4041\r
4042enum CCSTATE {\r
4043 CCS_VALUE,\r
4044 CCS_RANGE,\r
4045 CCS_COMPLETE,\r
4046 CCS_START\r
4047};\r
4048\r
4049enum CCVALTYPE {\r
4050 CCV_SB,\r
4051 CCV_CODE_POINT,\r
4052 CCV_CLASS\r
4053};\r
4054\r
4055static int\r
4056next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,\r
4057 enum CCSTATE* state, ScanEnv* env)\r
4058{\r
4059 int r;\r
4060\r
4061 if (*state == CCS_RANGE)\r
4062 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;\r
4063\r
4064 if (*state == CCS_VALUE && *type != CCV_CLASS) {\r
4065 if (*type == CCV_SB)\r
4066 BITSET_SET_BIT(cc->bs, (int )(*vs));\r
4067 else if (*type == CCV_CODE_POINT) {\r
4068 r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
4069 if (r < 0) return r;\r
4070 }\r
4071 }\r
4072\r
4073 *state = CCS_VALUE;\r
4074 *type = CCV_CLASS;\r
4075 return 0;\r
4076}\r
4077\r
4078static int\r
4079next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,\r
4080 int* vs_israw, int v_israw,\r
4081 enum CCVALTYPE intype, enum CCVALTYPE* type,\r
4082 enum CCSTATE* state, ScanEnv* env)\r
4083{\r
4084 int r;\r
4085\r
4086 switch (*state) {\r
4087 case CCS_VALUE:\r
4088 if (*type == CCV_SB)\r
4089 BITSET_SET_BIT(cc->bs, (int )(*vs));\r
4090 else if (*type == CCV_CODE_POINT) {\r
4091 r = add_code_range(&(cc->mbuf), env, *vs, *vs);\r
4092 if (r < 0) return r;\r
4093 }\r
4094 break;\r
4095\r
4096 case CCS_RANGE:\r
4097 if (intype == *type) {\r
4098 if (intype == CCV_SB) {\r
4099 if (*vs > 0xff || v > 0xff)\r
4100 return ONIGERR_INVALID_CODE_POINT_VALUE;\r
4101\r
4102 if (*vs > v) {\r
4103 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
4104 goto ccs_range_end;\r
4105 else\r
4106 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
4107 }\r
4108 bitset_set_range(cc->bs, (int )*vs, (int )v);\r
4109 }\r
4110 else {\r
4111 r = add_code_range(&(cc->mbuf), env, *vs, v);\r
4112 if (r < 0) return r;\r
4113 }\r
4114 }\r
4115 else {\r
4116#if 0\r
4117 if (intype == CCV_CODE_POINT && *type == CCV_SB) {\r
4118#endif\r
4119 if (*vs > v) {\r
4120 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))\r
4121 goto ccs_range_end;\r
4122 else\r
4123 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;\r
4124 }\r
4125 bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));\r
4126 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);\r
4127 if (r < 0) return r;\r
4128#if 0\r
4129 }\r
4130 else\r
4131 return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;\r
4132#endif\r
4133 }\r
4134 ccs_range_end:\r
4135 *state = CCS_COMPLETE;\r
4136 break;\r
4137\r
4138 case CCS_COMPLETE:\r
4139 case CCS_START:\r
4140 *state = CCS_VALUE;\r
4141 break;\r
4142\r
4143 default:\r
4144 break;\r
4145 }\r
4146\r
4147 *vs_israw = v_israw;\r
4148 *vs = v;\r
4149 *type = intype;\r
4150 return 0;\r
4151}\r
4152\r
4153static int\r
4154code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,\r
4155 ScanEnv* env)\r
4156{\r
4157 int in_esc;\r
4158 OnigCodePoint code;\r
4159 OnigEncoding enc = env->enc;\r
4160 UChar* p = from;\r
4161\r
4162 in_esc = 0;\r
4163 while (! PEND) {\r
4164 if (ignore_escaped && in_esc) {\r
4165 in_esc = 0;\r
4166 }\r
4167 else {\r
4168 PFETCH_S(code);\r
4169 if (code == c) return 1;\r
4170 if (code == MC_ESC(env->syntax)) in_esc = 1;\r
4171 }\r
4172 }\r
4173 return 0;\r
4174}\r
4175\r
4176static int\r
4177parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,\r
4178 ScanEnv* env)\r
4179{\r
4180 int r, neg, len, fetched, and_start;\r
4181 OnigCodePoint v, vs;\r
4182 UChar *p;\r
4183 Node* node;\r
4184 CClassNode *cc, *prev_cc;\r
4185 CClassNode work_cc;\r
4186\r
4187 enum CCSTATE state;\r
4188 enum CCVALTYPE val_type, in_type;\r
4189 int val_israw, in_israw;\r
4190\r
4191 prev_cc = (CClassNode* )NULL;\r
4192 *np = NULL_NODE;\r
4193 r = fetch_token_in_cc(tok, src, end, env);\r
4194 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {\r
4195 neg = 1;\r
4196 r = fetch_token_in_cc(tok, src, end, env);\r
4197 }\r
4198 else {\r
4199 neg = 0;\r
4200 }\r
4201\r
4202 if (r < 0) return r;\r
4203 if (r == TK_CC_CLOSE) {\r
4204 if (! code_exist_check((OnigCodePoint )']',\r
4205 *src, env->pattern_end, 1, env))\r
4206 return ONIGERR_EMPTY_CHAR_CLASS;\r
4207\r
4208 CC_ESC_WARN(env, (UChar* )"]");\r
4209 r = tok->type = TK_CHAR; /* allow []...] */\r
4210 }\r
4211\r
4212 *np = node = node_new_cclass();\r
4213 CHECK_NULL_RETURN_MEMERR(node);\r
4214 cc = NCCLASS(node);\r
4215\r
4216 and_start = 0;\r
4217 state = CCS_START;\r
4218 p = *src;\r
4219 while (r != TK_CC_CLOSE) {\r
4220 fetched = 0;\r
4221 switch (r) {\r
4222 case TK_CHAR:\r
4223 len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);\r
4224 if (len > 1) {\r
4225 in_type = CCV_CODE_POINT;\r
4226 }\r
4227 else if (len < 0) {\r
4228 r = len;\r
4229 goto err;\r
4230 }\r
4231 else {\r
4232 sb_char:\r
4233 in_type = CCV_SB;\r
4234 }\r
4235 v = (OnigCodePoint )tok->u.c;\r
4236 in_israw = 0;\r
4237 goto val_entry2;\r
4238 break;\r
4239\r
4240 case TK_RAW_BYTE:\r
4241 /* tok->base != 0 : octal or hexadec. */\r
4242 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {\r
4243 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
4244 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;\r
4245 UChar* psave = p;\r
4246 int i, base = tok->base;\r
4247\r
4248 buf[0] = (UChar)tok->u.c;\r
4249 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {\r
4250 r = fetch_token_in_cc(tok, &p, end, env);\r
4251 if (r < 0) goto err;\r
4252 if (r != TK_RAW_BYTE || tok->base != base) {\r
4253 fetched = 1;\r
4254 break;\r
4255 }\r
4256 buf[i] = (UChar)tok->u.c;\r
4257 }\r
4258\r
4259 if (i < ONIGENC_MBC_MINLEN(env->enc)) {\r
4260 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
4261 goto err;\r
4262 }\r
4263\r
4264 len = enclen(env->enc, buf);\r
4265 if (i < len) {\r
4266 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
4267 goto err;\r
4268 }\r
4269 else if (i > len) { /* fetch back */\r
4270 p = psave;\r
4271 for (i = 1; i < len; i++) {\r
4272 r = fetch_token_in_cc(tok, &p, end, env);\r
4273 }\r
4274 fetched = 0;\r
4275 }\r
4276\r
4277 if (i == 1) {\r
4278 v = (OnigCodePoint )buf[0];\r
4279 goto raw_single;\r
4280 }\r
4281 else {\r
4282 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);\r
4283 in_type = CCV_CODE_POINT;\r
4284 }\r
4285 }\r
4286 else {\r
4287 v = (OnigCodePoint )tok->u.c;\r
4288 raw_single:\r
4289 in_type = CCV_SB;\r
4290 }\r
4291 in_israw = 1;\r
4292 goto val_entry2;\r
4293 break;\r
4294\r
4295 case TK_CODE_POINT:\r
4296 v = tok->u.code;\r
4297 in_israw = 1;\r
4298 val_entry:\r
4299 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);\r
4300 if (len < 0) {\r
4301 r = len;\r
4302 goto err;\r
4303 }\r
4304 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);\r
4305 val_entry2:\r
4306 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,\r
4307 &state, env);\r
4308 if (r != 0) goto err;\r
4309 break;\r
4310\r
4311 case TK_POSIX_BRACKET_OPEN:\r
4312 r = parse_posix_bracket(cc, &p, end, env);\r
4313 if (r < 0) goto err;\r
4314 if (r == 1) { /* is not POSIX bracket */\r
4315 CC_ESC_WARN(env, (UChar* )"[");\r
4316 p = tok->backp;\r
4317 v = (OnigCodePoint )tok->u.c;\r
4318 in_israw = 0;\r
4319 goto val_entry;\r
4320 }\r
4321 goto next_class;\r
4322 break;\r
4323\r
4324 case TK_CHAR_TYPE:\r
4325 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);\r
4326 if (r != 0) return r;\r
4327\r
4328 next_class:\r
4329 r = next_state_class(cc, &vs, &val_type, &state, env);\r
4330 if (r != 0) goto err;\r
4331 break;\r
4332\r
4333 case TK_CHAR_PROPERTY:\r
4334 {\r
4335 int ctype;\r
4336\r
4337 ctype = fetch_char_property_to_ctype(&p, end, env);\r
4338 if (ctype < 0) return ctype;\r
4339 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);\r
4340 if (r != 0) return r;\r
4341 goto next_class;\r
4342 }\r
4343 break;\r
4344\r
4345 case TK_CC_RANGE:\r
4346 if (state == CCS_VALUE) {\r
4347 r = fetch_token_in_cc(tok, &p, end, env);\r
4348 if (r < 0) goto err;\r
4349 fetched = 1;\r
4350 if (r == TK_CC_CLOSE) { /* allow [x-] */\r
4351 range_end_val:\r
4352 v = (OnigCodePoint )'-';\r
4353 in_israw = 0;\r
4354 goto val_entry;\r
4355 }\r
4356 else if (r == TK_CC_AND) {\r
4357 CC_ESC_WARN(env, (UChar* )"-");\r
4358 goto range_end_val;\r
4359 }\r
4360 state = CCS_RANGE;\r
4361 }\r
4362 else if (state == CCS_START) {\r
4363 /* [-xa] is allowed */\r
4364 v = (OnigCodePoint )tok->u.c;\r
4365 in_israw = 0;\r
4366\r
4367 r = fetch_token_in_cc(tok, &p, end, env);\r
4368 if (r < 0) goto err;\r
4369 fetched = 1;\r
4370 /* [--x] or [a&&-x] is warned. */\r
4371 if (r == TK_CC_RANGE || and_start != 0)\r
4372 CC_ESC_WARN(env, (UChar* )"-");\r
4373\r
4374 goto val_entry;\r
4375 }\r
4376 else if (state == CCS_RANGE) {\r
4377 CC_ESC_WARN(env, (UChar* )"-");\r
4378 goto sb_char; /* [!--x] is allowed */\r
4379 }\r
4380 else { /* CCS_COMPLETE */\r
4381 r = fetch_token_in_cc(tok, &p, end, env);\r
4382 if (r < 0) goto err;\r
4383 fetched = 1;\r
4384 if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */\r
4385 else if (r == TK_CC_AND) {\r
4386 CC_ESC_WARN(env, (UChar* )"-");\r
4387 goto range_end_val;\r
4388 }\r
4389 \r
4390 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {\r
4391 CC_ESC_WARN(env, (UChar* )"-");\r
4392 goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */\r
4393 }\r
4394 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;\r
4395 goto err;\r
4396 }\r
4397 break;\r
4398\r
4399 case TK_CC_CC_OPEN: /* [ */\r
4400 {\r
4401 Node *anode;\r
4402 CClassNode* acc;\r
4403\r
4404 r = parse_char_class(&anode, tok, &p, end, env);\r
4405 if (r != 0) goto cc_open_err;\r
4406 acc = NCCLASS(anode);\r
4407 r = or_cclass(cc, acc, env->enc);\r
4408\r
4409 onig_node_free(anode);\r
4410 cc_open_err:\r
4411 if (r != 0) goto err;\r
4412 }\r
4413 break;\r
4414\r
4415 case TK_CC_AND: /* && */\r
4416 {\r
4417 if (state == CCS_VALUE) {\r
4418 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
4419 &val_type, &state, env);\r
4420 if (r != 0) goto err;\r
4421 }\r
4422 /* initialize local variables */\r
4423 and_start = 1;\r
4424 state = CCS_START;\r
4425\r
4426 if (IS_NOT_NULL(prev_cc)) {\r
4427 r = and_cclass(prev_cc, cc, env->enc);\r
4428 if (r != 0) goto err;\r
4429 bbuf_free(cc->mbuf);\r
4430 }\r
4431 else {\r
4432 prev_cc = cc;\r
4433 cc = &work_cc;\r
4434 }\r
4435 initialize_cclass(cc);\r
4436 }\r
4437 break;\r
4438\r
4439 case TK_EOT:\r
4440 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;\r
4441 goto err;\r
4442 break;\r
4443 default:\r
4444 r = ONIGERR_PARSER_BUG;\r
4445 goto err;\r
4446 break;\r
4447 }\r
4448\r
4449 if (fetched)\r
4450 r = tok->type;\r
4451 else {\r
4452 r = fetch_token_in_cc(tok, &p, end, env);\r
4453 if (r < 0) goto err;\r
4454 }\r
4455 }\r
4456\r
4457 if (state == CCS_VALUE) {\r
4458 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,\r
4459 &val_type, &state, env);\r
4460 if (r != 0) goto err;\r
4461 }\r
4462\r
4463 if (IS_NOT_NULL(prev_cc)) {\r
4464 r = and_cclass(prev_cc, cc, env->enc);\r
4465 if (r != 0) goto err;\r
4466 bbuf_free(cc->mbuf);\r
4467 cc = prev_cc;\r
4468 }\r
4469\r
4470 if (neg != 0)\r
4471 NCCLASS_SET_NOT(cc);\r
4472 else\r
4473 NCCLASS_CLEAR_NOT(cc);\r
4474 if (IS_NCCLASS_NOT(cc) &&\r
4475 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {\r
4476 int is_empty;\r
4477\r
4478 is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);\r
4479 if (is_empty != 0)\r
4480 BITSET_IS_EMPTY(cc->bs, is_empty);\r
4481\r
4482 if (is_empty == 0) {\r
4483#define NEWLINE_CODE 0x0a\r
4484\r
4485 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {\r
4486 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)\r
4487 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);\r
4488 else\r
4489 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);\r
4490 }\r
4491 }\r
4492 }\r
4493 *src = p;\r
4494 return 0;\r
4495\r
4496 err:\r
4497 if (cc != NCCLASS(*np))\r
4498 bbuf_free(cc->mbuf);\r
4499 onig_node_free(*np);\r
4500 return r;\r
4501}\r
4502\r
4503static int parse_subexp(Node** top, OnigToken* tok, int term,\r
4504 UChar** src, UChar* end, ScanEnv* env);\r
4505\r
4506static int\r
4507parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,\r
4508 ScanEnv* env)\r
4509{\r
4510 int r, num;\r
4511 Node *target;\r
4512 OnigOptionType option;\r
4513 OnigCodePoint c;\r
4514 OnigEncoding enc = env->enc;\r
4515\r
4516#ifdef USE_NAMED_GROUP\r
4517 int list_capture;\r
4518#endif\r
4519\r
4520 UChar* p = *src;\r
4521 PFETCH_READY;\r
4522\r
4523 *np = NULL;\r
4524 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
4525\r
4526 option = env->option;\r
4527 if (PPEEK_IS('?') &&\r
4528 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {\r
4529 PINC;\r
4530 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
4531\r
4532 PFETCH(c);\r
4533 switch (c) {\r
4534 case ':': /* (?:...) grouping only */\r
4535 group:\r
4536 r = fetch_token(tok, &p, end, env);\r
4537 if (r < 0) return r;\r
4538 r = parse_subexp(np, tok, term, &p, end, env);\r
4539 if (r < 0) return r;\r
4540 *src = p;\r
4541 return 1; /* group */\r
4542 break;\r
4543\r
4544 case '=':\r
4545 *np = onig_node_new_anchor(ANCHOR_PREC_READ);\r
4546 break;\r
4547 case '!': /* preceding read */\r
4548 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);\r
4549 break;\r
4550 case '>': /* (?>...) stop backtrack */\r
4551 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
4552 break;\r
4553\r
4554#ifdef USE_NAMED_GROUP\r
4555 case '\'':\r
4556 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
4557 goto named_group1;\r
4558 }\r
4559 else\r
4560 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4561 break;\r
4562#endif\r
4563\r
4564 case '<': /* look behind (?<=...), (?<!...) */\r
4565 PFETCH(c);\r
4566 if (c == '=')\r
4567 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);\r
4568 else if (c == '!')\r
4569 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);\r
4570#ifdef USE_NAMED_GROUP\r
4571 else {\r
4572 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
4573 UChar *name;\r
4574 UChar *name_end;\r
4575\r
4576 PUNFETCH;\r
4577 c = '<';\r
4578\r
4579 named_group1:\r
4580 list_capture = 0;\r
4581\r
4582 named_group2:\r
4583 name = p;\r
4584 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);\r
4585 if (r < 0) return r;\r
4586\r
4587 num = scan_env_add_mem_entry(env);\r
4588 if (num < 0) return num;\r
4589 if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)\r
4590 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
4591\r
4592 r = name_add(env->reg, name, name_end, num, env);\r
4593 if (r != 0) return r;\r
4594 *np = node_new_enclose_memory(env->option, 1);\r
4595 CHECK_NULL_RETURN_MEMERR(*np);\r
4596 NENCLOSE(*np)->regnum = num;\r
4597 if (list_capture != 0)\r
4598 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);\r
4599 env->num_named++;\r
4600 }\r
4601 else {\r
4602 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4603 }\r
4604 }\r
4605#else\r
4606 else {\r
4607 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4608 }\r
4609#endif\r
4610 break;\r
4611\r
4612 case '@':\r
4613 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {\r
4614#ifdef USE_NAMED_GROUP\r
4615 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {\r
4616 PFETCH(c);\r
4617 if (c == '<' || c == '\'') {\r
4618 list_capture = 1;\r
4619 goto named_group2; /* (?@<name>...) */\r
4620 }\r
4621 PUNFETCH;\r
4622 }\r
4623#endif\r
4624 *np = node_new_enclose_memory(env->option, 0);\r
4625 CHECK_NULL_RETURN_MEMERR(*np);\r
4626 num = scan_env_add_mem_entry(env);\r
4627 if (num < 0) {\r
4628 onig_node_free(*np);\r
4629 return num;\r
4630 }\r
4631 else if (num >= (int )BIT_STATUS_BITS_NUM) {\r
4632 onig_node_free(*np);\r
4633 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;\r
4634 }\r
4635 NENCLOSE(*np)->regnum = num;\r
4636 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);\r
4637 }\r
4638 else {\r
4639 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4640 }\r
4641 break;\r
4642\r
4643#ifdef USE_POSIXLINE_OPTION\r
4644 case 'p':\r
4645#endif\r
4646 case '-': case 'i': case 'm': case 's': case 'x':\r
4647 {\r
4648 int neg = 0;\r
4649\r
4650 while (1) {\r
4651 switch (c) {\r
4652 case ':':\r
4653 case ')':\r
4654 break;\r
4655\r
4656 case '-': neg = 1; break;\r
4657 case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;\r
4658 case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;\r
4659 case 's':\r
4660 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
4661 ONOFF(option, ONIG_OPTION_MULTILINE, neg);\r
4662 }\r
4663 else\r
4664 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4665 break;\r
4666\r
4667 case 'm':\r
4668 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {\r
4669 ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));\r
4670 }\r
4671 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {\r
4672 ONOFF(option, ONIG_OPTION_MULTILINE, neg);\r
4673 }\r
4674 else\r
4675 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4676 break;\r
4677#ifdef USE_POSIXLINE_OPTION\r
4678 case 'p':\r
4679 ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);\r
4680 break;\r
4681#endif\r
4682 default:\r
4683 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4684 }\r
4685\r
4686 if (c == ')') {\r
4687 *np = node_new_option(option);\r
4688 CHECK_NULL_RETURN_MEMERR(*np);\r
4689 *src = p;\r
4690 return 2; /* option only */\r
4691 }\r
4692 else if (c == ':') {\r
4693 OnigOptionType prev = env->option;\r
4694\r
4695 env->option = option;\r
4696 r = fetch_token(tok, &p, end, env);\r
4697 if (r < 0) return r;\r
4698 r = parse_subexp(&target, tok, term, &p, end, env);\r
4699 env->option = prev;\r
4700 if (r < 0) return r;\r
4701 *np = node_new_option(option);\r
4702 CHECK_NULL_RETURN_MEMERR(*np);\r
4703 NENCLOSE(*np)->target = target;\r
4704 *src = p;\r
4705 return 0;\r
4706 }\r
4707\r
4708 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;\r
4709 PFETCH(c);\r
4710 }\r
4711 }\r
4712 break;\r
4713\r
4714 default:\r
4715 return ONIGERR_UNDEFINED_GROUP_OPTION;\r
4716 }\r
4717 }\r
4718 else {\r
4719 if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))\r
4720 goto group;\r
4721\r
4722 *np = node_new_enclose_memory(env->option, 0);\r
4723 CHECK_NULL_RETURN_MEMERR(*np);\r
4724 num = scan_env_add_mem_entry(env);\r
4725 if (num < 0) return num;\r
4726 NENCLOSE(*np)->regnum = num;\r
4727 }\r
4728\r
4729 CHECK_NULL_RETURN_MEMERR(*np);\r
4730 r = fetch_token(tok, &p, end, env);\r
4731 if (r < 0) return r;\r
4732 r = parse_subexp(&target, tok, term, &p, end, env);\r
4733 if (r < 0) return r;\r
4734\r
4735 if (NTYPE(*np) == NT_ANCHOR)\r
4736 NANCHOR(*np)->target = target;\r
4737 else {\r
4738 NENCLOSE(*np)->target = target;\r
4739 if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {\r
4740 /* Don't move this to previous of parse_subexp() */\r
4741 r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);\r
4742 if (r != 0) return r;\r
4743 }\r
4744 }\r
4745\r
4746 *src = p;\r
4747 return 0;\r
4748}\r
4749\r
4750static const char* PopularQStr[] = {\r
4751 "?", "*", "+", "??", "*?", "+?"\r
4752};\r
4753\r
4754static const char* ReduceQStr[] = {\r
4755 "", "", "*", "*?", "??", "+ and ??", "+? and ?"\r
4756};\r
4757\r
4758static int\r
4759set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)\r
4760{\r
4761 QtfrNode* qn;\r
4762\r
4763 qn = NQTFR(qnode);\r
4764 if (qn->lower == 1 && qn->upper == 1) {\r
4765 return 1;\r
4766 }\r
4767\r
4768 switch (NTYPE(target)) {\r
4769 case NT_STR:\r
4770 if (! group) {\r
4771 StrNode* sn = NSTR(target);\r
4772 if (str_node_can_be_split(sn, env->enc)) {\r
4773 Node* n = str_node_split_last_char(sn, env->enc);\r
4774 if (IS_NOT_NULL(n)) {\r
4775 qn->target = n;\r
4776 return 2;\r
4777 }\r
4778 }\r
4779 }\r
4780 break;\r
4781\r
4782 case NT_QTFR:\r
4783 { /* check redundant double repeat. */\r
4784 /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */\r
4785 QtfrNode* qnt = NQTFR(target);\r
4786 int nestq_num = popular_quantifier_num(qn);\r
4787 int targetq_num = popular_quantifier_num(qnt);\r
4788\r
4789#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR\r
4790 if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&\r
4791 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {\r
4792 UChar buf[WARN_BUFSIZE];\r
4793\r
4794 switch(ReduceTypeTable[targetq_num][nestq_num]) {\r
4795 case RQ_ASIS:\r
4796 break;\r
4797\r
4798 case RQ_DEL:\r
4799 if (onig_verb_warn != onig_null_warn) {\r
4800 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
4801 env->pattern, env->pattern_end,\r
4802 (UChar* )"redundant nested repeat operator");\r
4803 (*onig_verb_warn)((char* )buf);\r
4804 }\r
4805 goto warn_exit;\r
4806 break;\r
4807\r
4808 default:\r
4809 if (onig_verb_warn != onig_null_warn) {\r
4810 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,\r
4811 env->pattern, env->pattern_end,\r
4812 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",\r
4813 PopularQStr[targetq_num], PopularQStr[nestq_num],\r
4814 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);\r
4815 (*onig_verb_warn)((char* )buf);\r
4816 }\r
4817 goto warn_exit;\r
4818 break;\r
4819 }\r
4820 }\r
4821\r
4822 warn_exit:\r
4823#endif\r
4824 if (targetq_num >= 0) {\r
4825 if (nestq_num >= 0) {\r
4826 onig_reduce_nested_quantifier(qnode, target);\r
4827 goto q_exit;\r
4828 }\r
4829 else if (targetq_num == 1 || targetq_num == 2) { /* * or + */\r
4830 /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */\r
4831 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {\r
4832 qn->upper = (qn->lower == 0 ? 1 : qn->lower);\r
4833 }\r
4834 }\r
4835 }\r
4836 }\r
4837 break;\r
4838\r
4839 default:\r
4840 break;\r
4841 }\r
4842\r
4843 qn->target = target;\r
4844 q_exit:\r
4845 return 0;\r
4846}\r
4847\r
4848\r
4849#ifdef USE_SHARED_CCLASS_TABLE\r
4850\r
4851#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8\r
4852\r
4853/* for ctype node hash table */\r
4854\r
4855typedef struct {\r
4856 OnigEncoding enc;\r
4857 int not;\r
4858 int type;\r
4859} type_cclass_key;\r
4860\r
4861static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)\r
4862{\r
4863 if (x->type != y->type) return 1;\r
4864 if (x->enc != y->enc) return 1;\r
4865 if (x->not != y->not) return 1;\r
4866 return 0;\r
4867}\r
4868\r
4869static int type_cclass_hash(type_cclass_key* key)\r
4870{\r
4871 int i, val;\r
4872 UChar *p;\r
4873\r
4874 val = 0;\r
4875\r
4876 p = (UChar* )&(key->enc);\r
4877 for (i = 0; i < (int )sizeof(key->enc); i++) {\r
4878 val = val * 997 + (int )*p++;\r
4879 }\r
4880\r
4881 p = (UChar* )(&key->type);\r
4882 for (i = 0; i < (int )sizeof(key->type); i++) {\r
4883 val = val * 997 + (int )*p++;\r
4884 }\r
4885\r
4886 val += key->not;\r
4887 return val + (val >> 5);\r
4888}\r
4889\r
4890static struct st_hash_type type_type_cclass_hash = {\r
4891 type_cclass_cmp,\r
4892 type_cclass_hash,\r
4893};\r
4894\r
4895static st_table* OnigTypeCClassTable;\r
4896\r
4897\r
4898static int\r
4899i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)\r
4900{\r
4901 if (IS_NOT_NULL(node)) {\r
4902 CClassNode* cc = NCCLASS(node);\r
4903 if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);\r
4904 xfree(node);\r
4905 }\r
4906\r
4907 if (IS_NOT_NULL(key)) xfree(key);\r
4908 return ST_DELETE;\r
4909}\r
4910\r
4911extern int\r
4912onig_free_shared_cclass_table(void)\r
4913{\r
4914 if (IS_NOT_NULL(OnigTypeCClassTable)) {\r
4915 onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);\r
4916 onig_st_free_table(OnigTypeCClassTable);\r
4917 OnigTypeCClassTable = NULL;\r
4918 }\r
4919\r
4920 return 0;\r
4921}\r
4922\r
4923#endif /* USE_SHARED_CCLASS_TABLE */\r
4924\r
4925\r
4926#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
4927static int\r
4928clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)\r
4929{\r
4930 BBuf *tbuf;\r
4931 int r;\r
4932\r
4933 if (IS_NCCLASS_NOT(cc)) {\r
4934 bitset_invert(cc->bs);\r
4935\r
4936 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\r
4937 r = not_code_range_buf(enc, cc->mbuf, &tbuf);\r
4938 if (r != 0) return r;\r
4939\r
4940 bbuf_free(cc->mbuf);\r
4941 cc->mbuf = tbuf;\r
4942 }\r
4943\r
4944 NCCLASS_CLEAR_NOT(cc);\r
4945 }\r
4946\r
4947 return 0;\r
4948}\r
4949#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
4950\r
4951typedef struct {\r
4952 ScanEnv* env;\r
4953 CClassNode* cc;\r
4954 Node* alt_root;\r
4955 Node** ptail;\r
4956} IApplyCaseFoldArg;\r
4957\r
4958static int\r
4959i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],\r
4960 int to_len, void* arg)\r
4961{\r
4962 IApplyCaseFoldArg* iarg;\r
4963 ScanEnv* env;\r
4964 CClassNode* cc;\r
4965 BitSetRef bs;\r
4966\r
4967 iarg = (IApplyCaseFoldArg* )arg;\r
4968 env = iarg->env;\r
4969 cc = iarg->cc;\r
4970 bs = cc->bs;\r
4971\r
4972 if (to_len == 1) {\r
4973 int is_in = onig_is_code_in_cc(env->enc, from, cc);\r
4974#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
4975 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||\r
4976 (is_in == 0 && IS_NCCLASS_NOT(cc))) {\r
4977 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
4978 add_code_range(&(cc->mbuf), env, *to, *to);\r
4979 }\r
4980 else {\r
4981 BITSET_SET_BIT(bs, *to);\r
4982 }\r
4983 }\r
4984#else\r
4985 if (is_in != 0) {\r
4986 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {\r
4987 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);\r
4988 add_code_range(&(cc->mbuf), env, *to, *to);\r
4989 }\r
4990 else {\r
4991 if (IS_NCCLASS_NOT(cc)) {\r
4992 BITSET_CLEAR_BIT(bs, *to);\r
4993 }\r
4994 else\r
4995 BITSET_SET_BIT(bs, *to);\r
4996 }\r
4997 }\r
4998#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */\r
4999 }\r
5000 else {\r
5001 int r, i, len;\r
5002 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
5003 Node *snode = NULL_NODE;\r
5004\r
5005 if (onig_is_code_in_cc(env->enc, from, cc)\r
5006#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS\r
5007 && !IS_NCCLASS_NOT(cc)\r
5008#endif\r
5009 ) {\r
5010 for (i = 0; i < to_len; i++) {\r
5011 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);\r
5012 if (i == 0) {\r
5013 snode = onig_node_new_str(buf, buf + len);\r
5014 CHECK_NULL_RETURN_MEMERR(snode);\r
5015\r
5016 /* char-class expanded multi-char only\r
5017 compare with string folded at match time. */\r
5018 NSTRING_SET_AMBIG(snode);\r
5019 }\r
5020 else {\r
5021 r = onig_node_str_cat(snode, buf, buf + len);\r
5022 if (r < 0) {\r
5023 onig_node_free(snode);\r
5024 return r;\r
5025 }\r
5026 }\r
5027 }\r
5028\r
5029 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);\r
5030 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));\r
5031 iarg->ptail = &(NCDR((*(iarg->ptail))));\r
5032 }\r
5033 }\r
5034\r
5035 return 0;\r
5036}\r
5037\r
5038static int\r
5039parse_exp(Node** np, OnigToken* tok, int term,\r
5040 UChar** src, UChar* end, ScanEnv* env)\r
5041{\r
5042 int r, len, group = 0;\r
5043 Node* qn;\r
5044 Node** targetp;\r
5045\r
5046 *np = NULL;\r
5047 if (tok->type == (enum TokenSyms )term)\r
5048 goto end_of_token;\r
5049\r
5050 switch (tok->type) {\r
5051 case TK_ALT:\r
5052 case TK_EOT:\r
5053 end_of_token:\r
5054 *np = node_new_empty();\r
5055 return tok->type;\r
5056 break;\r
5057\r
5058 case TK_SUBEXP_OPEN:\r
5059 r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);\r
5060 if (r < 0) return r;\r
5061 if (r == 1) group = 1;\r
5062 else if (r == 2) { /* option only */\r
5063 Node* target;\r
5064 OnigOptionType prev = env->option;\r
5065\r
5066 env->option = NENCLOSE(*np)->option;\r
5067 r = fetch_token(tok, src, end, env);\r
5068 if (r < 0) return r;\r
5069 r = parse_subexp(&target, tok, term, src, end, env);\r
5070 env->option = prev;\r
5071 if (r < 0) return r;\r
5072 NENCLOSE(*np)->target = target; \r
5073 return tok->type;\r
5074 }\r
5075 break;\r
5076\r
5077 case TK_SUBEXP_CLOSE:\r
5078 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))\r
5079 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;\r
5080\r
5081 if (tok->escaped) goto tk_raw_byte;\r
5082 else goto tk_byte;\r
5083 break;\r
5084\r
5085 case TK_STRING:\r
5086 tk_byte:\r
5087 {\r
5088 *np = node_new_str(tok->backp, *src);\r
5089 CHECK_NULL_RETURN_MEMERR(*np);\r
5090\r
5091 while (1) {\r
5092 r = fetch_token(tok, src, end, env);\r
5093 if (r < 0) return r;\r
5094 if (r != TK_STRING) break;\r
5095\r
5096 r = onig_node_str_cat(*np, tok->backp, *src);\r
5097 if (r < 0) return r;\r
5098 }\r
5099\r
5100 string_end:\r
5101 targetp = np;\r
5102 goto repeat;\r
5103 }\r
5104 break;\r
5105\r
5106 case TK_RAW_BYTE:\r
5107 tk_raw_byte:\r
5108 {\r
5109 *np = node_new_str_raw_char((UChar )tok->u.c);\r
5110 CHECK_NULL_RETURN_MEMERR(*np);\r
5111 len = 1;\r
5112 while (1) {\r
5113 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {\r
5114 if (len == enclen(env->enc, NSTR(*np)->s)) {\r
5115 r = fetch_token(tok, src, end, env);\r
5116 NSTRING_CLEAR_RAW(*np);\r
5117 goto string_end;\r
5118 }\r
5119 }\r
5120\r
5121 r = fetch_token(tok, src, end, env);\r
5122 if (r < 0) return r;\r
5123 if (r != TK_RAW_BYTE) {\r
5124 /* Don't use this, it is wrong for little endian encodings. */\r
5125#ifdef USE_PAD_TO_SHORT_BYTE_CHAR\r
5126 int rem;\r
5127 if (len < ONIGENC_MBC_MINLEN(env->enc)) {\r
5128 rem = ONIGENC_MBC_MINLEN(env->enc) - len;\r
5129 (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);\r
5130 if (len + rem == enclen(env->enc, NSTR(*np)->s)) {\r
5131 NSTRING_CLEAR_RAW(*np);\r
5132 goto string_end;\r
5133 }\r
5134 }\r
5135#endif\r
5136 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;\r
5137 }\r
5138\r
5139 r = node_str_cat_char(*np, (UChar )tok->u.c);\r
5140 if (r < 0) return r;\r
5141\r
5142 len++;\r
5143 }\r
5144 }\r
5145 break;\r
5146\r
5147 case TK_CODE_POINT:\r
5148 {\r
5149 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];\r
5150 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);\r
5151 if (num < 0) return num;\r
5152#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG\r
5153 *np = node_new_str_raw(buf, buf + num);\r
5154#else\r
5155 *np = node_new_str(buf, buf + num);\r
5156#endif\r
5157 CHECK_NULL_RETURN_MEMERR(*np);\r
5158 }\r
5159 break;\r
5160\r
5161 case TK_QUOTE_OPEN:\r
5162 {\r
5163 OnigCodePoint end_op[2];\r
5164 UChar *qstart, *qend, *nextp;\r
5165\r
5166 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);\r
5167 end_op[1] = (OnigCodePoint )'E';\r
5168 qstart = *src;\r
5169 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);\r
5170 if (IS_NULL(qend)) {\r
5171 nextp = qend = end;\r
5172 }\r
5173 *np = node_new_str(qstart, qend);\r
5174 CHECK_NULL_RETURN_MEMERR(*np);\r
5175 *src = nextp;\r
5176 }\r
5177 break;\r
5178\r
5179 case TK_CHAR_TYPE:\r
5180 {\r
5181 switch (tok->u.prop.ctype) {\r
5182 case ONIGENC_CTYPE_WORD:\r
5183 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);\r
5184 CHECK_NULL_RETURN_MEMERR(*np);\r
5185 break;\r
5186\r
5187 case ONIGENC_CTYPE_SPACE:\r
5188 case ONIGENC_CTYPE_DIGIT:\r
5189 case ONIGENC_CTYPE_XDIGIT:\r
5190 {\r
5191 CClassNode* cc;\r
5192\r
5193#ifdef USE_SHARED_CCLASS_TABLE\r
5194 const OnigCodePoint *mbr;\r
5195 OnigCodePoint sb_out;\r
5196\r
5197 r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,\r
5198 &sb_out, &mbr);\r
5199 if (r == 0 &&\r
5200 ONIGENC_CODE_RANGE_NUM(mbr)\r
5201 >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {\r
5202 type_cclass_key key;\r
5203 type_cclass_key* new_key;\r
5204\r
5205 key.enc = env->enc;\r
5206 key.not = tok->u.prop.not;\r
5207 key.type = tok->u.prop.ctype;\r
5208\r
5209 THREAD_ATOMIC_START;\r
5210\r
5211 if (IS_NULL(OnigTypeCClassTable)) {\r
5212 OnigTypeCClassTable\r
5213 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);\r
5214 if (IS_NULL(OnigTypeCClassTable)) {\r
5215 THREAD_ATOMIC_END;\r
5216 return ONIGERR_MEMORY;\r
5217 }\r
5218 }\r
5219 else {\r
5220 if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,\r
5221 (st_data_t* )np)) {\r
5222 THREAD_ATOMIC_END;\r
5223 break;\r
5224 }\r
5225 }\r
5226\r
5227 *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,\r
5228 sb_out, mbr);\r
5229 if (IS_NULL(*np)) {\r
5230 THREAD_ATOMIC_END;\r
5231 return ONIGERR_MEMORY;\r
5232 }\r
5233\r
5234 cc = NCCLASS(*np);\r
5235 NCCLASS_SET_SHARE(cc);\r
5236 new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));\r
5237 xmemcpy(new_key, &key, sizeof(type_cclass_key));\r
5238 onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,\r
5239 (st_data_t )*np);\r
5240 \r
5241 THREAD_ATOMIC_END;\r
5242 }\r
5243 else {\r
5244#endif\r
5245 *np = node_new_cclass();\r
5246 CHECK_NULL_RETURN_MEMERR(*np);\r
5247 cc = NCCLASS(*np);\r
5248 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);\r
5249 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);\r
5250#ifdef USE_SHARED_CCLASS_TABLE\r
5251 }\r
5252#endif\r
5253 }\r
5254 break;\r
5255\r
5256 default:\r
5257 return ONIGERR_PARSER_BUG;\r
5258 break;\r
5259 }\r
5260 }\r
5261 break;\r
5262\r
5263 case TK_CHAR_PROPERTY:\r
5264 r = parse_char_property(np, tok, src, end, env);\r
5265 if (r != 0) return r;\r
5266 break;\r
5267\r
5268 case TK_CC_OPEN:\r
5269 {\r
5270 CClassNode* cc;\r
5271\r
5272 r = parse_char_class(np, tok, src, end, env);\r
5273 if (r != 0) return r;\r
5274\r
5275 cc = NCCLASS(*np);\r
5276 if (IS_IGNORECASE(env->option)) {\r
5277 IApplyCaseFoldArg iarg;\r
5278\r
5279 iarg.env = env;\r
5280 iarg.cc = cc;\r
5281 iarg.alt_root = NULL_NODE;\r
5282 iarg.ptail = &(iarg.alt_root);\r
5283\r
5284 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,\r
5285 i_apply_case_fold, &iarg);\r
5286 if (r != 0) {\r
5287 onig_node_free(iarg.alt_root);\r
5288 return r;\r
5289 }\r
5290 if (IS_NOT_NULL(iarg.alt_root)) {\r
5291 Node* work = onig_node_new_alt(*np, iarg.alt_root);\r
5292 if (IS_NULL(work)) {\r
5293 onig_node_free(iarg.alt_root);\r
5294 return ONIGERR_MEMORY;\r
5295 }\r
5296 *np = work;\r
5297 }\r
5298 }\r
5299 }\r
5300 break;\r
5301\r
5302 case TK_ANYCHAR:\r
5303 *np = node_new_anychar();\r
5304 CHECK_NULL_RETURN_MEMERR(*np);\r
5305 break;\r
5306\r
5307 case TK_ANYCHAR_ANYTIME:\r
5308 *np = node_new_anychar();\r
5309 CHECK_NULL_RETURN_MEMERR(*np);\r
5310 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);\r
5311 CHECK_NULL_RETURN_MEMERR(qn);\r
5312 NQTFR(qn)->target = *np;\r
5313 *np = qn;\r
5314 break;\r
5315\r
5316 case TK_BACKREF:\r
5317 len = tok->u.backref.num;\r
5318 *np = node_new_backref(len,\r
5319 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),\r
5320 tok->u.backref.by_name,\r
5321#ifdef USE_BACKREF_WITH_LEVEL\r
5322 tok->u.backref.exist_level,\r
5323 tok->u.backref.level,\r
5324#endif\r
5325 env);\r
5326 CHECK_NULL_RETURN_MEMERR(*np);\r
5327 break;\r
5328\r
5329#ifdef USE_SUBEXP_CALL\r
5330 case TK_CALL:\r
5331 {\r
5332 int gnum = tok->u.call.gnum;\r
5333\r
5334 if (gnum < 0) {\r
5335 gnum = BACKREF_REL_TO_ABS(gnum, env);\r
5336 if (gnum <= 0)\r
5337 return ONIGERR_INVALID_BACKREF;\r
5338 }\r
5339 *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);\r
5340 CHECK_NULL_RETURN_MEMERR(*np);\r
5341 env->num_call++;\r
5342 }\r
5343 break;\r
5344#endif\r
5345\r
5346 case TK_ANCHOR:\r
5347 *np = onig_node_new_anchor(tok->u.anchor);\r
5348 break;\r
5349\r
5350 case TK_OP_REPEAT:\r
5351 case TK_INTERVAL:\r
5352 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {\r
5353 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))\r
5354 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;\r
5355 else\r
5356 *np = node_new_empty();\r
5357 }\r
5358 else {\r
5359 goto tk_byte;\r
5360 }\r
5361 break;\r
5362\r
5363 default:\r
5364 return ONIGERR_PARSER_BUG;\r
5365 break;\r
5366 }\r
5367\r
5368 {\r
5369 targetp = np;\r
5370\r
5371 re_entry:\r
5372 r = fetch_token(tok, src, end, env);\r
5373 if (r < 0) return r;\r
5374\r
5375 repeat:\r
5376 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {\r
5377 if (is_invalid_quantifier_target(*targetp))\r
5378 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;\r
5379\r
5380 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,\r
5381 (r == TK_INTERVAL ? 1 : 0));\r
5382 CHECK_NULL_RETURN_MEMERR(qn);\r
5383 NQTFR(qn)->greedy = tok->u.repeat.greedy;\r
5384 r = set_quantifier(qn, *targetp, group, env);\r
5385 if (r < 0) {\r
5386 onig_node_free(qn);\r
5387 return r;\r
5388 }\r
5389\r
5390 if (tok->u.repeat.possessive != 0) {\r
5391 Node* en;\r
5392 en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);\r
5393 if (IS_NULL(en)) {\r
5394 onig_node_free(qn);\r
5395 return ONIGERR_MEMORY;\r
5396 }\r
5397 NENCLOSE(en)->target = qn;\r
5398 qn = en;\r
5399 }\r
5400\r
5401 if (r == 0) {\r
5402 *targetp = qn;\r
5403 }\r
5404 else if (r == 1) {\r
5405 onig_node_free(qn);\r
5406 }\r
5407 else if (r == 2) { /* split case: /abc+/ */\r
5408 Node *tmp;\r
5409\r
5410 *targetp = node_new_list(*targetp, NULL);\r
5411 if (IS_NULL(*targetp)) {\r
5412 onig_node_free(qn);\r
5413 return ONIGERR_MEMORY;\r
5414 }\r
5415 tmp = NCDR(*targetp) = node_new_list(qn, NULL);\r
5416 if (IS_NULL(tmp)) {\r
5417 onig_node_free(qn);\r
5418 return ONIGERR_MEMORY;\r
5419 }\r
5420 targetp = &(NCAR(tmp));\r
5421 }\r
5422 goto re_entry;\r
5423 }\r
5424 }\r
5425\r
5426 return r;\r
5427}\r
5428\r
5429static int\r
5430parse_branch(Node** top, OnigToken* tok, int term,\r
5431 UChar** src, UChar* end, ScanEnv* env)\r
5432{\r
5433 int r;\r
5434 Node *node, **headp;\r
5435\r
5436 *top = NULL;\r
5437 r = parse_exp(&node, tok, term, src, end, env);\r
5438 if (r < 0) return r;\r
5439\r
5440 if (r == TK_EOT || r == term || r == TK_ALT) {\r
5441 *top = node;\r
5442 }\r
5443 else {\r
5444 *top = node_new_list(node, NULL);\r
5445 headp = &(NCDR(*top));\r
5446 while (r != TK_EOT && r != term && r != TK_ALT) {\r
5447 r = parse_exp(&node, tok, term, src, end, env);\r
5448 if (r < 0) return r;\r
5449\r
5450 if (NTYPE(node) == NT_LIST) {\r
5451 *headp = node;\r
5452 while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);\r
5453 headp = &(NCDR(node));\r
5454 }\r
5455 else {\r
5456 *headp = node_new_list(node, NULL);\r
5457 headp = &(NCDR(*headp));\r
5458 }\r
5459 }\r
5460 }\r
5461\r
5462 return r;\r
5463}\r
5464\r
5465/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */\r
5466static int\r
5467parse_subexp(Node** top, OnigToken* tok, int term,\r
5468 UChar** src, UChar* end, ScanEnv* env)\r
5469{\r
5470 int r;\r
5471 Node *node, **headp;\r
5472\r
5473 *top = NULL;\r
5474 r = parse_branch(&node, tok, term, src, end, env);\r
5475 if (r < 0) {\r
5476 onig_node_free(node);\r
5477 return r;\r
5478 }\r
5479\r
5480 if (r == term) {\r
5481 *top = node;\r
5482 }\r
5483 else if (r == TK_ALT) {\r
5484 *top = onig_node_new_alt(node, NULL);\r
5485 headp = &(NCDR(*top));\r
5486 while (r == TK_ALT) {\r
5487 r = fetch_token(tok, src, end, env);\r
5488 if (r < 0) return r;\r
5489 r = parse_branch(&node, tok, term, src, end, env);\r
5490 if (r < 0) return r;\r
5491\r
5492 *headp = onig_node_new_alt(node, NULL);\r
5493 headp = &(NCDR(*headp));\r
5494 }\r
5495\r
5496 if (tok->type != (enum TokenSyms )term)\r
5497 goto err;\r
5498 }\r
5499 else {\r
5500 err:\r
5501 if (term == TK_SUBEXP_CLOSE)\r
5502 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;\r
5503 else\r
5504 return ONIGERR_PARSER_BUG;\r
5505 }\r
5506\r
5507 return r;\r
5508}\r
5509\r
5510static int\r
5511parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)\r
5512{\r
5513 int r;\r
5514 OnigToken tok;\r
5515\r
5516 r = fetch_token(&tok, src, end, env);\r
5517 if (r < 0) return r;\r
5518 r = parse_subexp(top, &tok, TK_EOT, src, end, env);\r
5519 if (r < 0) return r;\r
5520 return 0;\r
5521}\r
5522\r
5523extern int\r
5524onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,\r
5525 regex_t* reg, ScanEnv* env)\r
5526{\r
5527 int r;\r
5528 UChar* p;\r
5529\r
5530#ifdef USE_NAMED_GROUP\r
5531 names_clear(reg);\r
5532#endif\r
5533\r
5534 scan_env_clear(env);\r
5535 env->option = reg->options;\r
5536 env->case_fold_flag = reg->case_fold_flag;\r
5537 env->enc = reg->enc;\r
5538 env->syntax = reg->syntax;\r
5539 env->pattern = (UChar* )pattern;\r
5540 env->pattern_end = (UChar* )end;\r
5541 env->reg = reg;\r
5542\r
5543 *root = NULL;\r
5544 p = (UChar* )pattern;\r
5545 r = parse_regexp(root, &p, (UChar* )end, env);\r
5546 reg->num_mem = env->num_mem;\r
5547 return r;\r
5548}\r
5549\r
5550extern void\r
5551onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,\r
5552 UChar* arg, UChar* arg_end)\r
5553{\r
5554 env->error = arg;\r
5555 env->error_end = arg_end;\r
5556}\r