]>
Commit | Line | Data |
---|---|---|
14b0e578 CS |
1 | /**********************************************************************\r |
2 | regexec.c - Oniguruma (regular expression library)\r | |
3 | **********************************************************************/\r | |
4 | /*-\r | |
5 | * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r | |
6 | * All rights reserved.\r | |
7 | *\r | |
0af8e57c | 8 | * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>\r |
14b0e578 CS |
9 | *\r |
10 | * Redistribution and use in source and binary forms, with or without\r | |
11 | * modification, are permitted provided that the following conditions\r | |
12 | * are met:\r | |
13 | * 1. Redistributions of source code must retain the above copyright\r | |
14 | * notice, this list of conditions and the following disclaimer.\r | |
15 | * 2. Redistributions in binary form must reproduce the above copyright\r | |
16 | * notice, this list of conditions and the following disclaimer in the\r | |
17 | * documentation and/or other materials provided with the distribution.\r | |
18 | *\r | |
19 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND\r | |
20 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r | |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r | |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\r | |
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r | |
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\r | |
25 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\r | |
26 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\r | |
27 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\r | |
28 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\r | |
29 | * SUCH DAMAGE.\r | |
30 | */\r | |
31 | \r | |
32 | #include "regint.h"\r | |
33 | \r | |
34 | #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE\r | |
35 | \r | |
36 | #ifdef USE_CRNL_AS_LINE_TERMINATOR\r | |
37 | #define ONIGENC_IS_MBC_CRNL(enc,p,end) \\r | |
38 | (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \\r | |
39 | ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))\r | |
40 | #endif\r | |
41 | \r | |
42 | #ifdef USE_CAPTURE_HISTORY\r | |
43 | static void history_tree_free(OnigCaptureTreeNode* node);\r | |
44 | \r | |
45 | static void\r | |
46 | history_tree_clear(OnigCaptureTreeNode* node)\r | |
47 | {\r | |
48 | int i;\r | |
49 | \r | |
50 | if (IS_NOT_NULL(node)) {\r | |
51 | for (i = 0; i < node->num_childs; i++) {\r | |
52 | if (IS_NOT_NULL(node->childs[i])) {\r | |
53 | history_tree_free(node->childs[i]);\r | |
54 | }\r | |
55 | }\r | |
56 | for (i = 0; i < node->allocated; i++) {\r | |
57 | node->childs[i] = (OnigCaptureTreeNode* )0;\r | |
58 | }\r | |
59 | node->num_childs = 0;\r | |
60 | node->beg = ONIG_REGION_NOTPOS;\r | |
61 | node->end = ONIG_REGION_NOTPOS;\r | |
62 | node->group = -1;\r | |
63 | }\r | |
64 | }\r | |
65 | \r | |
66 | static void\r | |
67 | history_tree_free(OnigCaptureTreeNode* node)\r | |
68 | {\r | |
69 | history_tree_clear(node);\r | |
70 | xfree(node);\r | |
71 | }\r | |
72 | \r | |
73 | static void\r | |
74 | history_root_free(OnigRegion* r)\r | |
75 | {\r | |
76 | if (IS_NOT_NULL(r->history_root)) {\r | |
77 | history_tree_free(r->history_root);\r | |
78 | r->history_root = (OnigCaptureTreeNode* )0;\r | |
79 | }\r | |
80 | }\r | |
81 | \r | |
82 | static OnigCaptureTreeNode*\r | |
83 | history_node_new(void)\r | |
84 | {\r | |
85 | OnigCaptureTreeNode* node;\r | |
86 | \r | |
87 | node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));\r | |
88 | CHECK_NULL_RETURN(node);\r | |
89 | node->childs = (OnigCaptureTreeNode** )0;\r | |
90 | node->allocated = 0;\r | |
91 | node->num_childs = 0;\r | |
92 | node->group = -1;\r | |
93 | node->beg = ONIG_REGION_NOTPOS;\r | |
94 | node->end = ONIG_REGION_NOTPOS;\r | |
95 | \r | |
96 | return node;\r | |
97 | }\r | |
98 | \r | |
99 | static int\r | |
100 | history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)\r | |
101 | {\r | |
102 | #define HISTORY_TREE_INIT_ALLOC_SIZE 8\r | |
103 | \r | |
104 | if (parent->num_childs >= parent->allocated) {\r | |
105 | int n, i;\r | |
106 | \r | |
107 | if (IS_NULL(parent->childs)) {\r | |
108 | n = HISTORY_TREE_INIT_ALLOC_SIZE;\r | |
109 | parent->childs =\r | |
110 | (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);\r | |
111 | }\r | |
112 | else {\r | |
113 | n = parent->allocated * 2;\r | |
114 | parent->childs =\r | |
115 | (OnigCaptureTreeNode** )xrealloc(parent->childs,\r | |
116 | sizeof(OnigCaptureTreeNode*) * n,\r | |
117 | sizeof(OnigCaptureTreeNode*) * parent->allocated);\r | |
118 | }\r | |
119 | CHECK_NULL_RETURN_MEMERR(parent->childs);\r | |
120 | for (i = parent->allocated; i < n; i++) {\r | |
121 | parent->childs[i] = (OnigCaptureTreeNode* )0;\r | |
122 | }\r | |
123 | parent->allocated = n;\r | |
124 | }\r | |
125 | \r | |
126 | parent->childs[parent->num_childs] = child;\r | |
127 | parent->num_childs++;\r | |
128 | return 0;\r | |
129 | }\r | |
130 | \r | |
131 | static OnigCaptureTreeNode*\r | |
132 | history_tree_clone(OnigCaptureTreeNode* node)\r | |
133 | {\r | |
134 | int i;\r | |
135 | OnigCaptureTreeNode *clone, *child;\r | |
136 | \r | |
137 | clone = history_node_new();\r | |
138 | CHECK_NULL_RETURN(clone);\r | |
139 | \r | |
140 | clone->beg = node->beg;\r | |
141 | clone->end = node->end;\r | |
142 | for (i = 0; i < node->num_childs; i++) {\r | |
143 | child = history_tree_clone(node->childs[i]);\r | |
144 | if (IS_NULL(child)) {\r | |
145 | history_tree_free(clone);\r | |
146 | return (OnigCaptureTreeNode* )0;\r | |
147 | }\r | |
148 | history_tree_add_child(clone, child);\r | |
149 | }\r | |
150 | \r | |
151 | return clone;\r | |
152 | }\r | |
153 | \r | |
154 | extern OnigCaptureTreeNode*\r | |
155 | onig_get_capture_tree(OnigRegion* region)\r | |
156 | {\r | |
157 | return region->history_root;\r | |
158 | }\r | |
159 | #endif /* USE_CAPTURE_HISTORY */\r | |
160 | \r | |
161 | extern void\r | |
162 | onig_region_clear(OnigRegion* region)\r | |
163 | {\r | |
164 | int i;\r | |
165 | \r | |
166 | for (i = 0; i < region->num_regs; i++) {\r | |
167 | region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;\r | |
168 | }\r | |
169 | #ifdef USE_CAPTURE_HISTORY\r | |
170 | history_root_free(region);\r | |
171 | #endif\r | |
172 | }\r | |
173 | \r | |
174 | extern int\r | |
175 | onig_region_resize(OnigRegion* region, int n)\r | |
176 | {\r | |
177 | region->num_regs = n;\r | |
178 | \r | |
179 | if (n < ONIG_NREGION)\r | |
180 | n = ONIG_NREGION;\r | |
181 | \r | |
182 | if (region->allocated == 0) {\r | |
183 | region->beg = (int* )xmalloc(n * sizeof(int));\r | |
184 | region->end = (int* )xmalloc(n * sizeof(int));\r | |
185 | \r | |
186 | if (region->beg == 0 || region->end == 0)\r | |
187 | return ONIGERR_MEMORY;\r | |
188 | \r | |
189 | region->allocated = n;\r | |
190 | }\r | |
191 | else if (region->allocated < n) {\r | |
192 | region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int));\r | |
193 | region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int));\r | |
194 | \r | |
195 | if (region->beg == 0 || region->end == 0)\r | |
196 | return ONIGERR_MEMORY;\r | |
197 | \r | |
198 | region->allocated = n;\r | |
199 | }\r | |
200 | \r | |
201 | return 0;\r | |
202 | }\r | |
203 | \r | |
204 | static int\r | |
205 | onig_region_resize_clear(OnigRegion* region, int n)\r | |
206 | {\r | |
207 | int r;\r | |
208 | \r | |
209 | r = onig_region_resize(region, n);\r | |
210 | if (r != 0) return r;\r | |
211 | onig_region_clear(region);\r | |
212 | return 0;\r | |
213 | }\r | |
214 | \r | |
215 | extern int\r | |
216 | onig_region_set(OnigRegion* region, int at, int beg, int end)\r | |
217 | {\r | |
218 | if (at < 0) return ONIGERR_INVALID_ARGUMENT;\r | |
219 | \r | |
220 | if (at >= region->allocated) {\r | |
221 | int r = onig_region_resize(region, at + 1);\r | |
222 | if (r < 0) return r;\r | |
223 | }\r | |
224 | \r | |
225 | region->beg[at] = beg;\r | |
226 | region->end[at] = end;\r | |
227 | return 0;\r | |
228 | }\r | |
229 | \r | |
230 | extern void\r | |
231 | onig_region_init(OnigRegion* region)\r | |
232 | {\r | |
233 | region->num_regs = 0;\r | |
234 | region->allocated = 0;\r | |
235 | region->beg = (int* )0;\r | |
236 | region->end = (int* )0;\r | |
237 | region->history_root = (OnigCaptureTreeNode* )0;\r | |
238 | }\r | |
239 | \r | |
240 | extern OnigRegion*\r | |
241 | onig_region_new(void)\r | |
242 | {\r | |
243 | OnigRegion* r;\r | |
244 | \r | |
245 | r = (OnigRegion* )xmalloc(sizeof(OnigRegion));\r | |
b0c2b797 QS |
246 | if (r != NULL) {\r |
247 | onig_region_init(r);\r | |
248 | }\r | |
14b0e578 CS |
249 | return r;\r |
250 | }\r | |
251 | \r | |
252 | extern void\r | |
253 | onig_region_free(OnigRegion* r, int free_self)\r | |
254 | {\r | |
255 | if (r) {\r | |
256 | if (r->allocated > 0) {\r | |
257 | if (r->beg) xfree(r->beg);\r | |
258 | if (r->end) xfree(r->end);\r | |
259 | r->allocated = 0;\r | |
260 | }\r | |
261 | #ifdef USE_CAPTURE_HISTORY\r | |
262 | history_root_free(r);\r | |
263 | #endif\r | |
264 | if (free_self) xfree(r);\r | |
265 | }\r | |
266 | }\r | |
267 | \r | |
268 | extern void\r | |
269 | onig_region_copy(OnigRegion* to, OnigRegion* from)\r | |
270 | {\r | |
271 | #define RREGC_SIZE (sizeof(int) * from->num_regs)\r | |
272 | int i;\r | |
273 | \r | |
274 | if (to == from) return;\r | |
275 | \r | |
276 | if (to->allocated == 0) {\r | |
277 | if (from->num_regs > 0) {\r | |
278 | to->beg = (int* )xmalloc(RREGC_SIZE);\r | |
279 | to->end = (int* )xmalloc(RREGC_SIZE);\r | |
280 | to->allocated = from->num_regs;\r | |
281 | }\r | |
282 | }\r | |
283 | else if (to->allocated < from->num_regs) {\r | |
284 | to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated);\r | |
285 | to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated);\r | |
286 | to->allocated = from->num_regs;\r | |
287 | }\r | |
288 | \r | |
b0c2b797 QS |
289 | if (to->beg == NULL || to->end == NULL) {\r |
290 | return;\r | |
291 | }\r | |
292 | \r | |
14b0e578 CS |
293 | for (i = 0; i < from->num_regs; i++) {\r |
294 | to->beg[i] = from->beg[i];\r | |
295 | to->end[i] = from->end[i];\r | |
296 | }\r | |
297 | to->num_regs = from->num_regs;\r | |
298 | \r | |
299 | #ifdef USE_CAPTURE_HISTORY\r | |
300 | history_root_free(to);\r | |
301 | \r | |
302 | if (IS_NOT_NULL(from->history_root)) {\r | |
303 | to->history_root = history_tree_clone(from->history_root);\r | |
304 | }\r | |
305 | #endif\r | |
306 | }\r | |
307 | \r | |
308 | \r | |
309 | /** stack **/\r | |
310 | #define INVALID_STACK_INDEX -1\r | |
311 | \r | |
312 | /* stack type */\r | |
313 | /* used by normal-POP */\r | |
314 | #define STK_ALT 0x0001\r | |
315 | #define STK_LOOK_BEHIND_NOT 0x0002\r | |
316 | #define STK_POS_NOT 0x0003\r | |
317 | /* handled by normal-POP */\r | |
318 | #define STK_MEM_START 0x0100\r | |
319 | #define STK_MEM_END 0x8200\r | |
320 | #define STK_REPEAT_INC 0x0300\r | |
321 | #define STK_STATE_CHECK_MARK 0x1000\r | |
322 | /* avoided by normal-POP */\r | |
323 | #define STK_NULL_CHECK_START 0x3000\r | |
324 | #define STK_NULL_CHECK_END 0x5000 /* for recursive call */\r | |
325 | #define STK_MEM_END_MARK 0x8400\r | |
326 | #define STK_POS 0x0500 /* used when POP-POS */\r | |
327 | #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */\r | |
328 | #define STK_REPEAT 0x0700\r | |
329 | #define STK_CALL_FRAME 0x0800\r | |
330 | #define STK_RETURN 0x0900\r | |
331 | #define STK_VOID 0x0a00 /* for fill a blank */\r | |
332 | \r | |
333 | /* stack type check mask */\r | |
334 | #define STK_MASK_POP_USED 0x00ff\r | |
335 | #define STK_MASK_TO_VOID_TARGET 0x10ff\r | |
336 | #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */\r | |
337 | \r | |
338 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE\r | |
339 | #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\\r | |
340 | (msa).stack_p = (void* )0;\\r | |
341 | (msa).options = (arg_option);\\r | |
342 | (msa).region = (arg_region);\\r | |
343 | (msa).start = (arg_start);\\r | |
344 | (msa).best_len = ONIG_MISMATCH;\\r | |
345 | } while(0)\r | |
346 | #else\r | |
347 | #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\\r | |
348 | (msa).stack_p = (void* )0;\\r | |
349 | (msa).options = (arg_option);\\r | |
350 | (msa).region = (arg_region);\\r | |
351 | (msa).start = (arg_start);\\r | |
352 | } while(0)\r | |
353 | #endif\r | |
354 | \r | |
355 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
356 | \r | |
357 | #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16\r | |
358 | \r | |
359 | #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \\r | |
360 | if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\\r | |
361 | unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\\r | |
362 | offset = ((offset) * (state_num)) >> 3;\\r | |
363 | if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\\r | |
364 | if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \\r | |
365 | (msa).state_check_buff = (void* )xmalloc(size);\\r | |
366 | else \\r | |
367 | (msa).state_check_buff = (void* )xalloca(size);\\r | |
368 | xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \\r | |
369 | (size_t )(size - (offset))); \\r | |
370 | (msa).state_check_buff_size = size;\\r | |
371 | }\\r | |
372 | else {\\r | |
373 | (msa).state_check_buff = (void* )0;\\r | |
374 | (msa).state_check_buff_size = 0;\\r | |
375 | }\\r | |
376 | }\\r | |
377 | else {\\r | |
378 | (msa).state_check_buff = (void* )0;\\r | |
379 | (msa).state_check_buff_size = 0;\\r | |
380 | }\\r | |
381 | } while(0)\r | |
382 | \r | |
383 | #define MATCH_ARG_FREE(msa) do {\\r | |
384 | if ((msa).stack_p) xfree((msa).stack_p);\\r | |
385 | if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \\r | |
386 | if ((msa).state_check_buff) xfree((msa).state_check_buff);\\r | |
387 | }\\r | |
388 | } while(0)\r | |
389 | #else\r | |
390 | #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)\r | |
391 | #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)\r | |
392 | #endif\r | |
393 | \r | |
394 | \r | |
395 | \r | |
396 | #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\\r | |
397 | if (msa->stack_p) {\\r | |
398 | alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num));\\r | |
399 | stk_alloc = (OnigStackType* )(msa->stack_p);\\r | |
400 | stk_base = stk_alloc;\\r | |
401 | stk = stk_base;\\r | |
402 | stk_end = stk_base + msa->stack_n;\\r | |
403 | }\\r | |
404 | else {\\r | |
405 | alloc_addr = (char* )xmalloc(sizeof(char*) * (ptr_num)\\r | |
406 | + sizeof(OnigStackType) * (stack_num));\\r | |
407 | stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\\r | |
408 | stk_base = stk_alloc;\\r | |
409 | stk = stk_base;\\r | |
410 | stk_end = stk_base + (stack_num);\\r | |
411 | }\\r | |
412 | } while(0)\r | |
413 | \r | |
414 | #define STACK_SAVE do{\\r | |
415 | if (stk_base != stk_alloc) {\\r | |
416 | msa->stack_p = stk_base;\\r | |
417 | msa->stack_n = (int)(stk_end - stk_base);\\r | |
418 | };\\r | |
419 | } while(0)\r | |
420 | \r | |
421 | static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;\r | |
422 | \r | |
423 | extern unsigned int\r | |
424 | onig_get_match_stack_limit_size(void)\r | |
425 | {\r | |
426 | return MatchStackLimitSize;\r | |
427 | }\r | |
428 | \r | |
429 | extern int\r | |
430 | onig_set_match_stack_limit_size(unsigned int size)\r | |
431 | {\r | |
432 | MatchStackLimitSize = size;\r | |
433 | return 0;\r | |
434 | }\r | |
435 | \r | |
436 | static int\r | |
437 | stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,\r | |
438 | OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)\r | |
439 | {\r | |
440 | unsigned int n;\r | |
441 | OnigStackType *x, *stk_base, *stk_end, *stk;\r | |
442 | \r | |
443 | stk_base = *arg_stk_base;\r | |
444 | stk_end = *arg_stk_end;\r | |
445 | stk = *arg_stk;\r | |
446 | \r | |
447 | n = (unsigned int)(stk_end - stk_base);\r | |
448 | if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {\r | |
449 | x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);\r | |
450 | if (IS_NULL(x)) {\r | |
451 | STACK_SAVE;\r | |
452 | return ONIGERR_MEMORY;\r | |
453 | }\r | |
454 | xmemcpy(x, stk_base, n * sizeof(OnigStackType));\r | |
455 | n *= 2;\r | |
456 | }\r | |
457 | else {\r | |
458 | n *= 2;\r | |
459 | if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {\r | |
460 | if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)\r | |
461 | return ONIGERR_MATCH_STACK_LIMIT_OVER;\r | |
462 | else\r | |
463 | n = MatchStackLimitSize;\r | |
464 | }\r | |
465 | x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n, sizeof(OnigStackType) * (stk_end - stk_base));\r | |
466 | if (IS_NULL(x)) {\r | |
467 | STACK_SAVE;\r | |
468 | return ONIGERR_MEMORY;\r | |
469 | }\r | |
470 | }\r | |
471 | *arg_stk = x + (stk - stk_base);\r | |
472 | *arg_stk_base = x;\r | |
473 | *arg_stk_end = x + n;\r | |
474 | return 0;\r | |
475 | }\r | |
476 | \r | |
477 | #define STACK_ENSURE(n) do {\\r | |
478 | if (stk_end - stk < (n)) {\\r | |
479 | int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\\r | |
480 | if (r != 0) { STACK_SAVE; return r; } \\r | |
481 | }\\r | |
482 | } while(0)\r | |
483 | \r | |
484 | #define STACK_AT(index) (stk_base + (index))\r | |
485 | #define GET_STACK_INDEX(stk) ((OnigStackIndex)((stk) - stk_base))\r | |
486 | \r | |
487 | #define STACK_PUSH_TYPE(stack_type) do {\\r | |
488 | STACK_ENSURE(1);\\r | |
489 | stk->type = (stack_type);\\r | |
490 | STACK_INC;\\r | |
491 | } while(0)\r | |
492 | \r | |
493 | #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)\r | |
494 | \r | |
495 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
496 | #define STATE_CHECK_POS(s,snum) \\r | |
497 | (((s) - str) * num_comb_exp_check + ((snum) - 1))\r | |
498 | #define STATE_CHECK_VAL(v,snum) do {\\r | |
499 | if (state_check_buff != NULL) {\\r | |
500 | int x = STATE_CHECK_POS(s,snum);\\r | |
501 | (v) = state_check_buff[x/8] & (1<<(x%8));\\r | |
502 | }\\r | |
503 | else (v) = 0;\\r | |
504 | } while(0)\r | |
505 | \r | |
506 | \r | |
507 | #define ELSE_IF_STATE_CHECK_MARK(stk) \\r | |
508 | else if ((stk)->type == STK_STATE_CHECK_MARK) { \\r | |
509 | int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\\r | |
510 | state_check_buff[x/8] |= (1<<(x%8)); \\r | |
511 | }\r | |
512 | \r | |
513 | #define STACK_PUSH(stack_type,pat,s,sprev) do {\\r | |
514 | STACK_ENSURE(1);\\r | |
515 | stk->type = (stack_type);\\r | |
516 | stk->u.state.pcode = (pat);\\r | |
517 | stk->u.state.pstr = (s);\\r | |
518 | stk->u.state.pstr_prev = (sprev);\\r | |
519 | stk->u.state.state_check = 0;\\r | |
520 | STACK_INC;\\r | |
521 | } while(0)\r | |
522 | \r | |
523 | #define STACK_PUSH_ENSURED(stack_type,pat) do {\\r | |
524 | stk->type = (stack_type);\\r | |
525 | stk->u.state.pcode = (pat);\\r | |
526 | stk->u.state.state_check = 0;\\r | |
527 | STACK_INC;\\r | |
528 | } while(0)\r | |
529 | \r | |
530 | #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\\r | |
531 | STACK_ENSURE(1);\\r | |
532 | stk->type = STK_ALT;\\r | |
533 | stk->u.state.pcode = (pat);\\r | |
534 | stk->u.state.pstr = (s);\\r | |
535 | stk->u.state.pstr_prev = (sprev);\\r | |
536 | stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\\r | |
537 | STACK_INC;\\r | |
538 | } while(0)\r | |
539 | \r | |
540 | #define STACK_PUSH_STATE_CHECK(s,snum) do {\\r | |
541 | if (state_check_buff != NULL) {\\r | |
542 | STACK_ENSURE(1);\\r | |
543 | stk->type = STK_STATE_CHECK_MARK;\\r | |
544 | stk->u.state.pstr = (s);\\r | |
545 | stk->u.state.state_check = (snum);\\r | |
546 | STACK_INC;\\r | |
547 | }\\r | |
548 | } while(0)\r | |
549 | \r | |
550 | #else /* USE_COMBINATION_EXPLOSION_CHECK */\r | |
551 | \r | |
552 | #define ELSE_IF_STATE_CHECK_MARK(stk)\r | |
553 | \r | |
554 | #define STACK_PUSH(stack_type,pat,s,sprev) do {\\r | |
555 | STACK_ENSURE(1);\\r | |
556 | stk->type = (stack_type);\\r | |
557 | stk->u.state.pcode = (pat);\\r | |
558 | stk->u.state.pstr = (s);\\r | |
559 | stk->u.state.pstr_prev = (sprev);\\r | |
560 | STACK_INC;\\r | |
561 | } while(0)\r | |
562 | \r | |
563 | #define STACK_PUSH_ENSURED(stack_type,pat) do {\\r | |
564 | stk->type = (stack_type);\\r | |
565 | stk->u.state.pcode = (pat);\\r | |
566 | STACK_INC;\\r | |
567 | } while(0)\r | |
568 | #endif /* USE_COMBINATION_EXPLOSION_CHECK */\r | |
569 | \r | |
570 | #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)\r | |
571 | #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)\r | |
572 | #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)\r | |
573 | #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)\r | |
574 | #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \\r | |
575 | STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)\r | |
576 | \r | |
577 | #define STACK_PUSH_REPEAT(id, pat) do {\\r | |
578 | STACK_ENSURE(1);\\r | |
579 | stk->type = STK_REPEAT;\\r | |
580 | stk->u.repeat.num = (id);\\r | |
581 | stk->u.repeat.pcode = (pat);\\r | |
582 | stk->u.repeat.count = 0;\\r | |
583 | STACK_INC;\\r | |
584 | } while(0)\r | |
585 | \r | |
586 | #define STACK_PUSH_REPEAT_INC(sindex) do {\\r | |
587 | STACK_ENSURE(1);\\r | |
588 | stk->type = STK_REPEAT_INC;\\r | |
589 | stk->u.repeat_inc.si = (sindex);\\r | |
590 | STACK_INC;\\r | |
591 | } while(0)\r | |
592 | \r | |
593 | #define STACK_PUSH_MEM_START(mnum, s) do {\\r | |
594 | STACK_ENSURE(1);\\r | |
595 | stk->type = STK_MEM_START;\\r | |
596 | stk->u.mem.num = (int)(mnum);\\r | |
597 | stk->u.mem.pstr = (s);\\r | |
598 | stk->u.mem.start = mem_start_stk[mnum];\\r | |
599 | stk->u.mem.end = mem_end_stk[mnum];\\r | |
600 | mem_start_stk[mnum] = GET_STACK_INDEX(stk);\\r | |
601 | mem_end_stk[mnum] = INVALID_STACK_INDEX;\\r | |
602 | STACK_INC;\\r | |
603 | } while(0)\r | |
604 | \r | |
605 | #define STACK_PUSH_MEM_END(mnum, s) do {\\r | |
606 | STACK_ENSURE(1);\\r | |
607 | stk->type = STK_MEM_END;\\r | |
608 | stk->u.mem.num = (mnum);\\r | |
609 | stk->u.mem.pstr = (s);\\r | |
610 | stk->u.mem.start = mem_start_stk[mnum];\\r | |
611 | stk->u.mem.end = mem_end_stk[mnum];\\r | |
612 | mem_end_stk[mnum] = GET_STACK_INDEX(stk);\\r | |
613 | STACK_INC;\\r | |
614 | } while(0)\r | |
615 | \r | |
616 | #define STACK_PUSH_MEM_END_MARK(mnum) do {\\r | |
617 | STACK_ENSURE(1);\\r | |
618 | stk->type = STK_MEM_END_MARK;\\r | |
619 | stk->u.mem.num = (mnum);\\r | |
620 | STACK_INC;\\r | |
621 | } while(0)\r | |
622 | \r | |
623 | #define STACK_GET_MEM_START(mnum, k) do {\\r | |
624 | int level = 0;\\r | |
625 | k = stk;\\r | |
626 | while (k > stk_base) {\\r | |
627 | k--;\\r | |
628 | if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \\r | |
629 | && k->u.mem.num == (mnum)) {\\r | |
630 | level++;\\r | |
631 | }\\r | |
632 | else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\\r | |
633 | if (level == 0) break;\\r | |
634 | level--;\\r | |
635 | }\\r | |
636 | }\\r | |
637 | } while(0)\r | |
638 | \r | |
639 | #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\\r | |
640 | int level = 0;\\r | |
641 | while (k < stk) {\\r | |
642 | if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\\r | |
643 | if (level == 0) (start) = k->u.mem.pstr;\\r | |
644 | level++;\\r | |
645 | }\\r | |
646 | else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\\r | |
647 | level--;\\r | |
648 | if (level == 0) {\\r | |
649 | (end) = k->u.mem.pstr;\\r | |
650 | break;\\r | |
651 | }\\r | |
652 | }\\r | |
653 | k++;\\r | |
654 | }\\r | |
655 | } while(0)\r | |
656 | \r | |
657 | #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\\r | |
658 | STACK_ENSURE(1);\\r | |
659 | stk->type = STK_NULL_CHECK_START;\\r | |
660 | stk->u.null_check.num = (cnum);\\r | |
661 | stk->u.null_check.pstr = (s);\\r | |
662 | STACK_INC;\\r | |
663 | } while(0)\r | |
664 | \r | |
665 | #define STACK_PUSH_NULL_CHECK_END(cnum) do {\\r | |
666 | STACK_ENSURE(1);\\r | |
667 | stk->type = STK_NULL_CHECK_END;\\r | |
668 | stk->u.null_check.num = (cnum);\\r | |
669 | STACK_INC;\\r | |
670 | } while(0)\r | |
671 | \r | |
672 | #define STACK_PUSH_CALL_FRAME(pat) do {\\r | |
673 | STACK_ENSURE(1);\\r | |
674 | stk->type = STK_CALL_FRAME;\\r | |
675 | stk->u.call_frame.ret_addr = (pat);\\r | |
676 | STACK_INC;\\r | |
677 | } while(0)\r | |
678 | \r | |
679 | #define STACK_PUSH_RETURN do {\\r | |
680 | STACK_ENSURE(1);\\r | |
681 | stk->type = STK_RETURN;\\r | |
682 | STACK_INC;\\r | |
683 | } while(0)\r | |
684 | \r | |
685 | \r | |
686 | #ifdef ONIG_DEBUG\r | |
687 | #define STACK_BASE_CHECK(p, at) \\r | |
688 | if ((p) < stk_base) {\\r | |
689 | fprintf(stderr, "at %s\n", at);\\r | |
690 | goto stack_error;\\r | |
691 | }\r | |
692 | #else\r | |
693 | #define STACK_BASE_CHECK(p, at)\r | |
694 | #endif\r | |
695 | \r | |
696 | #define STACK_POP_ONE do {\\r | |
697 | stk--;\\r | |
698 | STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \\r | |
699 | } while(0)\r | |
700 | \r | |
701 | #define STACK_POP do {\\r | |
702 | switch (pop_level) {\\r | |
703 | case STACK_POP_LEVEL_FREE:\\r | |
704 | while (1) {\\r | |
705 | stk--;\\r | |
706 | STACK_BASE_CHECK(stk, "STACK_POP"); \\r | |
707 | if ((stk->type & STK_MASK_POP_USED) != 0) break;\\r | |
708 | ELSE_IF_STATE_CHECK_MARK(stk);\\r | |
709 | }\\r | |
710 | break;\\r | |
711 | case STACK_POP_LEVEL_MEM_START:\\r | |
712 | while (1) {\\r | |
713 | stk--;\\r | |
714 | STACK_BASE_CHECK(stk, "STACK_POP 2"); \\r | |
715 | if ((stk->type & STK_MASK_POP_USED) != 0) break;\\r | |
716 | else if (stk->type == STK_MEM_START) {\\r | |
717 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\\r | |
718 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\\r | |
719 | }\\r | |
720 | ELSE_IF_STATE_CHECK_MARK(stk);\\r | |
721 | }\\r | |
722 | break;\\r | |
723 | default:\\r | |
724 | while (1) {\\r | |
725 | stk--;\\r | |
726 | STACK_BASE_CHECK(stk, "STACK_POP 3"); \\r | |
727 | if ((stk->type & STK_MASK_POP_USED) != 0) break;\\r | |
728 | else if (stk->type == STK_MEM_START) {\\r | |
729 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\\r | |
730 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\\r | |
731 | }\\r | |
732 | else if (stk->type == STK_REPEAT_INC) {\\r | |
733 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\\r | |
734 | }\\r | |
735 | else if (stk->type == STK_MEM_END) {\\r | |
736 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\\r | |
737 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\\r | |
738 | }\\r | |
739 | ELSE_IF_STATE_CHECK_MARK(stk);\\r | |
740 | }\\r | |
741 | break;\\r | |
742 | }\\r | |
743 | } while(0)\r | |
744 | \r | |
745 | #define STACK_POP_TIL_POS_NOT do {\\r | |
746 | while (1) {\\r | |
747 | stk--;\\r | |
748 | STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \\r | |
749 | if (stk->type == STK_POS_NOT) break;\\r | |
750 | else if (stk->type == STK_MEM_START) {\\r | |
751 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\\r | |
752 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\\r | |
753 | }\\r | |
754 | else if (stk->type == STK_REPEAT_INC) {\\r | |
755 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\\r | |
756 | }\\r | |
757 | else if (stk->type == STK_MEM_END) {\\r | |
758 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\\r | |
759 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\\r | |
760 | }\\r | |
761 | ELSE_IF_STATE_CHECK_MARK(stk);\\r | |
762 | }\\r | |
763 | } while(0)\r | |
764 | \r | |
765 | #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\\r | |
766 | while (1) {\\r | |
767 | stk--;\\r | |
768 | STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \\r | |
769 | if (stk->type == STK_LOOK_BEHIND_NOT) break;\\r | |
770 | else if (stk->type == STK_MEM_START) {\\r | |
771 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\\r | |
772 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\\r | |
773 | }\\r | |
774 | else if (stk->type == STK_REPEAT_INC) {\\r | |
775 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\\r | |
776 | }\\r | |
777 | else if (stk->type == STK_MEM_END) {\\r | |
778 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\\r | |
779 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\\r | |
780 | }\\r | |
781 | ELSE_IF_STATE_CHECK_MARK(stk);\\r | |
782 | }\\r | |
783 | } while(0)\r | |
784 | \r | |
785 | #define STACK_POS_END(k) do {\\r | |
786 | k = stk;\\r | |
787 | while (1) {\\r | |
788 | k--;\\r | |
789 | STACK_BASE_CHECK(k, "STACK_POS_END"); \\r | |
790 | if (IS_TO_VOID_TARGET(k)) {\\r | |
791 | k->type = STK_VOID;\\r | |
792 | }\\r | |
793 | else if (k->type == STK_POS) {\\r | |
794 | k->type = STK_VOID;\\r | |
795 | break;\\r | |
796 | }\\r | |
797 | }\\r | |
798 | } while(0)\r | |
799 | \r | |
800 | #define STACK_STOP_BT_END do {\\r | |
801 | OnigStackType *k = stk;\\r | |
802 | while (1) {\\r | |
803 | k--;\\r | |
804 | STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \\r | |
805 | if (IS_TO_VOID_TARGET(k)) {\\r | |
806 | k->type = STK_VOID;\\r | |
807 | }\\r | |
808 | else if (k->type == STK_STOP_BT) {\\r | |
809 | k->type = STK_VOID;\\r | |
810 | break;\\r | |
811 | }\\r | |
812 | }\\r | |
813 | } while(0)\r | |
814 | \r | |
815 | #define STACK_NULL_CHECK(isnull,id,s) do {\\r | |
816 | OnigStackType* k = stk;\\r | |
817 | while (1) {\\r | |
818 | k--;\\r | |
819 | STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \\r | |
820 | if (k->type == STK_NULL_CHECK_START) {\\r | |
821 | if (k->u.null_check.num == (id)) {\\r | |
822 | (isnull) = (k->u.null_check.pstr == (s));\\r | |
823 | break;\\r | |
824 | }\\r | |
825 | }\\r | |
826 | }\\r | |
827 | } while(0)\r | |
828 | \r | |
829 | #define STACK_NULL_CHECK_REC(isnull,id,s) do {\\r | |
830 | int level = 0;\\r | |
831 | OnigStackType* k = stk;\\r | |
832 | while (1) {\\r | |
833 | k--;\\r | |
834 | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \\r | |
835 | if (k->type == STK_NULL_CHECK_START) {\\r | |
836 | if (k->u.null_check.num == (id)) {\\r | |
837 | if (level == 0) {\\r | |
838 | (isnull) = (k->u.null_check.pstr == (s));\\r | |
839 | break;\\r | |
840 | }\\r | |
841 | else level--;\\r | |
842 | }\\r | |
843 | }\\r | |
844 | else if (k->type == STK_NULL_CHECK_END) {\\r | |
845 | level++;\\r | |
846 | }\\r | |
847 | }\\r | |
848 | } while(0)\r | |
849 | \r | |
850 | #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\\r | |
851 | OnigStackType* k = stk;\\r | |
852 | while (1) {\\r | |
853 | k--;\\r | |
854 | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \\r | |
855 | if (k->type == STK_NULL_CHECK_START) {\\r | |
856 | if (k->u.null_check.num == (id)) {\\r | |
857 | if (k->u.null_check.pstr != (s)) {\\r | |
858 | (isnull) = 0;\\r | |
859 | break;\\r | |
860 | }\\r | |
861 | else {\\r | |
862 | UChar* endp;\\r | |
863 | (isnull) = 1;\\r | |
864 | while (k < stk) {\\r | |
865 | if (k->type == STK_MEM_START) {\\r | |
866 | if (k->u.mem.end == INVALID_STACK_INDEX) {\\r | |
867 | (isnull) = 0; break;\\r | |
868 | }\\r | |
869 | if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\\r | |
870 | endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\\r | |
871 | else\\r | |
4d454c54 | 872 | endp = (UChar* )(UINTN)k->u.mem.end;\\r |
14b0e578 CS |
873 | if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\\r |
874 | (isnull) = 0; break;\\r | |
875 | }\\r | |
876 | else if (endp != s) {\\r | |
877 | (isnull) = -1; /* empty, but position changed */ \\r | |
878 | }\\r | |
879 | }\\r | |
880 | k++;\\r | |
881 | }\\r | |
882 | break;\\r | |
883 | }\\r | |
884 | }\\r | |
885 | }\\r | |
886 | }\\r | |
887 | } while(0)\r | |
888 | \r | |
889 | #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\\r | |
890 | int level = 0;\\r | |
891 | OnigStackType* k = stk;\\r | |
892 | while (1) {\\r | |
893 | k--;\\r | |
894 | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \\r | |
895 | if (k->type == STK_NULL_CHECK_START) {\\r | |
896 | if (k->u.null_check.num == (id)) {\\r | |
897 | if (level == 0) {\\r | |
898 | if (k->u.null_check.pstr != (s)) {\\r | |
899 | (isnull) = 0;\\r | |
900 | break;\\r | |
901 | }\\r | |
902 | else {\\r | |
903 | UChar* endp;\\r | |
904 | (isnull) = 1;\\r | |
905 | while (k < stk) {\\r | |
906 | if (k->type == STK_MEM_START) {\\r | |
907 | if (k->u.mem.end == INVALID_STACK_INDEX) {\\r | |
908 | (isnull) = 0; break;\\r | |
909 | }\\r | |
910 | if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\\r | |
911 | endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\\r | |
912 | else\\r | |
4d454c54 | 913 | endp = (UChar* )(UINTN)k->u.mem.end;\\r |
14b0e578 CS |
914 | if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\\r |
915 | (isnull) = 0; break;\\r | |
916 | }\\r | |
917 | else if (endp != s) {\\r | |
918 | (isnull) = -1; /* empty, but position changed */ \\r | |
919 | }\\r | |
920 | }\\r | |
921 | k++;\\r | |
922 | }\\r | |
923 | break;\\r | |
924 | }\\r | |
925 | }\\r | |
926 | else {\\r | |
927 | level--;\\r | |
928 | }\\r | |
929 | }\\r | |
930 | }\\r | |
931 | else if (k->type == STK_NULL_CHECK_END) {\\r | |
932 | if (k->u.null_check.num == (id)) level++;\\r | |
933 | }\\r | |
934 | }\\r | |
935 | } while(0)\r | |
936 | \r | |
937 | #define STACK_GET_REPEAT(id, k) do {\\r | |
938 | int level = 0;\\r | |
939 | k = stk;\\r | |
940 | while (1) {\\r | |
941 | k--;\\r | |
942 | STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \\r | |
943 | if (k->type == STK_REPEAT) {\\r | |
944 | if (level == 0) {\\r | |
945 | if (k->u.repeat.num == (id)) {\\r | |
946 | break;\\r | |
947 | }\\r | |
948 | }\\r | |
949 | }\\r | |
950 | else if (k->type == STK_CALL_FRAME) level--;\\r | |
951 | else if (k->type == STK_RETURN) level++;\\r | |
952 | }\\r | |
953 | } while(0)\r | |
954 | \r | |
955 | #define STACK_RETURN(addr) do {\\r | |
956 | int level = 0;\\r | |
957 | OnigStackType* k = stk;\\r | |
958 | while (1) {\\r | |
959 | k--;\\r | |
960 | STACK_BASE_CHECK(k, "STACK_RETURN"); \\r | |
961 | if (k->type == STK_CALL_FRAME) {\\r | |
962 | if (level == 0) {\\r | |
963 | (addr) = k->u.call_frame.ret_addr;\\r | |
964 | break;\\r | |
965 | }\\r | |
966 | else level--;\\r | |
967 | }\\r | |
968 | else if (k->type == STK_RETURN)\\r | |
969 | level++;\\r | |
970 | }\\r | |
971 | } while(0)\r | |
972 | \r | |
973 | \r | |
974 | #define STRING_CMP(s1,s2,len) do {\\r | |
975 | while (len-- > 0) {\\r | |
976 | if (*s1++ != *s2++) goto fail;\\r | |
977 | }\\r | |
978 | } while(0)\r | |
979 | \r | |
980 | #define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\\r | |
981 | if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \\r | |
982 | goto fail; \\r | |
983 | } while(0)\r | |
984 | \r | |
985 | static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,\r | |
986 | UChar* s1, UChar** ps2, int mblen)\r | |
987 | {\r | |
988 | UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r | |
989 | UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r | |
990 | UChar *p1, *p2, *end1, *s2, *end2;\r | |
991 | int len1, len2;\r | |
992 | \r | |
993 | s2 = *ps2;\r | |
994 | end1 = s1 + mblen;\r | |
995 | end2 = s2 + mblen;\r | |
996 | while (s1 < end1) {\r | |
997 | len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);\r | |
998 | len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);\r | |
999 | if (len1 != len2) return 0;\r | |
1000 | p1 = buf1;\r | |
1001 | p2 = buf2;\r | |
1002 | while (len1-- > 0) {\r | |
1003 | if (*p1 != *p2) return 0;\r | |
1004 | p1++;\r | |
1005 | p2++;\r | |
1006 | }\r | |
1007 | }\r | |
1008 | \r | |
1009 | *ps2 = s2;\r | |
1010 | return 1;\r | |
1011 | }\r | |
1012 | \r | |
1013 | #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\\r | |
1014 | is_fail = 0;\\r | |
1015 | while (len-- > 0) {\\r | |
1016 | if (*s1++ != *s2++) {\\r | |
1017 | is_fail = 1; break;\\r | |
1018 | }\\r | |
1019 | }\\r | |
1020 | } while(0)\r | |
1021 | \r | |
1022 | #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\\r | |
1023 | if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \\r | |
1024 | is_fail = 1; \\r | |
1025 | else \\r | |
1026 | is_fail = 0; \\r | |
1027 | } while(0)\r | |
1028 | \r | |
1029 | \r | |
1030 | #define IS_EMPTY_STR (str == end)\r | |
1031 | #define ON_STR_BEGIN(s) ((s) == str)\r | |
1032 | #define ON_STR_END(s) ((s) == end)\r | |
1033 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE\r | |
1034 | #define DATA_ENSURE_CHECK1 (s < right_range)\r | |
1035 | #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)\r | |
1036 | #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail\r | |
1037 | #else\r | |
1038 | #define DATA_ENSURE_CHECK1 (s < end)\r | |
1039 | #define DATA_ENSURE_CHECK(n) (s + (n) <= end)\r | |
1040 | #define DATA_ENSURE(n) if (s + (n) > end) goto fail\r | |
1041 | #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */\r | |
1042 | \r | |
1043 | \r | |
1044 | #ifdef USE_CAPTURE_HISTORY\r | |
1045 | static int\r | |
1046 | make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,\r | |
1047 | OnigStackType* stk_top, UChar* str, regex_t* reg)\r | |
1048 | {\r | |
1049 | int n, r;\r | |
1050 | OnigCaptureTreeNode* child;\r | |
1051 | OnigStackType* k = *kp;\r | |
1052 | \r | |
1053 | while (k < stk_top) {\r | |
1054 | if (k->type == STK_MEM_START) {\r | |
1055 | n = k->u.mem.num;\r | |
1056 | if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&\r | |
1057 | BIT_STATUS_AT(reg->capture_history, n) != 0) {\r | |
1058 | child = history_node_new();\r | |
1059 | CHECK_NULL_RETURN_MEMERR(child);\r | |
1060 | child->group = n;\r | |
1061 | child->beg = (int )(k->u.mem.pstr - str);\r | |
1062 | r = history_tree_add_child(node, child);\r | |
1063 | if (r != 0) return r;\r | |
1064 | *kp = (k + 1);\r | |
1065 | r = make_capture_history_tree(child, kp, stk_top, str, reg);\r | |
1066 | if (r != 0) return r;\r | |
1067 | \r | |
1068 | k = *kp;\r | |
1069 | child->end = (int )(k->u.mem.pstr - str);\r | |
1070 | }\r | |
1071 | }\r | |
1072 | else if (k->type == STK_MEM_END) {\r | |
1073 | if (k->u.mem.num == node->group) {\r | |
1074 | node->end = (int )(k->u.mem.pstr - str);\r | |
1075 | *kp = k;\r | |
1076 | return 0;\r | |
1077 | }\r | |
1078 | }\r | |
1079 | k++;\r | |
1080 | }\r | |
1081 | \r | |
1082 | return 1; /* 1: root node ending. */\r | |
1083 | }\r | |
1084 | #endif\r | |
1085 | \r | |
1086 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
1087 | static int mem_is_in_memp(int mem, int num, UChar* memp)\r | |
1088 | {\r | |
1089 | int i;\r | |
1090 | MemNumType m;\r | |
1091 | \r | |
1092 | for (i = 0; i < num; i++) {\r | |
1093 | GET_MEMNUM_INC(m, memp);\r | |
1094 | if (mem == (int )m) return 1;\r | |
1095 | }\r | |
1096 | return 0;\r | |
1097 | }\r | |
1098 | \r | |
1099 | static int backref_match_at_nested_level(regex_t* reg\r | |
1100 | , OnigStackType* top, OnigStackType* stk_base\r | |
1101 | , int ignore_case, int case_fold_flag\r | |
1102 | , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)\r | |
1103 | {\r | |
1104 | UChar *ss, *p, *pstart, *pend = NULL_UCHARP;\r | |
1105 | int level;\r | |
1106 | OnigStackType* k;\r | |
1107 | \r | |
1108 | level = 0;\r | |
1109 | k = top;\r | |
1110 | k--;\r | |
1111 | while (k >= stk_base) {\r | |
1112 | if (k->type == STK_CALL_FRAME) {\r | |
1113 | level--;\r | |
1114 | }\r | |
1115 | else if (k->type == STK_RETURN) {\r | |
1116 | level++;\r | |
1117 | }\r | |
1118 | else if (level == nest) {\r | |
1119 | if (k->type == STK_MEM_START) {\r | |
1120 | if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {\r | |
1121 | pstart = k->u.mem.pstr;\r | |
1122 | if (pend != NULL_UCHARP) {\r | |
1123 | if (pend - pstart > send - *s) return 0; /* or goto next_mem; */\r | |
1124 | p = pstart;\r | |
1125 | ss = *s;\r | |
1126 | \r | |
1127 | if (ignore_case != 0) {\r | |
1128 | if (string_cmp_ic(reg->enc, case_fold_flag,\r | |
1129 | pstart, &ss, (int )(pend - pstart)) == 0)\r | |
1130 | return 0; /* or goto next_mem; */\r | |
1131 | }\r | |
1132 | else {\r | |
1133 | while (p < pend) {\r | |
1134 | if (*p++ != *ss++) return 0; /* or goto next_mem; */\r | |
1135 | }\r | |
1136 | }\r | |
1137 | \r | |
1138 | *s = ss;\r | |
1139 | return 1;\r | |
1140 | }\r | |
1141 | }\r | |
1142 | }\r | |
1143 | else if (k->type == STK_MEM_END) {\r | |
1144 | if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {\r | |
1145 | pend = k->u.mem.pstr;\r | |
1146 | }\r | |
1147 | }\r | |
1148 | }\r | |
1149 | k--;\r | |
1150 | }\r | |
1151 | \r | |
1152 | return 0;\r | |
1153 | }\r | |
1154 | #endif /* USE_BACKREF_WITH_LEVEL */\r | |
1155 | \r | |
1156 | \r | |
1157 | #ifdef ONIG_DEBUG_STATISTICS\r | |
1158 | \r | |
1159 | #define USE_TIMEOFDAY\r | |
1160 | \r | |
1161 | #ifdef USE_TIMEOFDAY\r | |
1162 | #ifdef HAVE_SYS_TIME_H\r | |
1163 | #include <sys/time.h>\r | |
1164 | #endif\r | |
1165 | #ifdef HAVE_UNISTD_H\r | |
1166 | #include <unistd.h>\r | |
1167 | #endif\r | |
1168 | static struct timeval ts, te;\r | |
1169 | #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)\r | |
1170 | #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \\r | |
1171 | (((te).tv_sec - (ts).tv_sec)*1000000))\r | |
1172 | #else\r | |
1173 | #ifdef HAVE_SYS_TIMES_H\r | |
1174 | #include <sys/times.h>\r | |
1175 | #endif\r | |
1176 | static struct tms ts, te;\r | |
1177 | #define GETTIME(t) times(&(t))\r | |
1178 | #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)\r | |
1179 | #endif\r | |
1180 | \r | |
1181 | static int OpCounter[256];\r | |
1182 | static int OpPrevCounter[256];\r | |
1183 | static unsigned long OpTime[256];\r | |
1184 | static int OpCurr = OP_FINISH;\r | |
1185 | static int OpPrevTarget = OP_FAIL;\r | |
1186 | static int MaxStackDepth = 0;\r | |
1187 | \r | |
1188 | #define MOP_IN(opcode) do {\\r | |
1189 | if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\\r | |
1190 | OpCurr = opcode;\\r | |
1191 | OpCounter[opcode]++;\\r | |
1192 | GETTIME(ts);\\r | |
1193 | } while(0)\r | |
1194 | \r | |
1195 | #define MOP_OUT do {\\r | |
1196 | GETTIME(te);\\r | |
1197 | OpTime[OpCurr] += TIMEDIFF(te, ts);\\r | |
1198 | } while(0)\r | |
1199 | \r | |
1200 | extern void\r | |
1201 | onig_statistics_init(void)\r | |
1202 | {\r | |
1203 | int i;\r | |
1204 | for (i = 0; i < 256; i++) {\r | |
1205 | OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;\r | |
1206 | }\r | |
1207 | MaxStackDepth = 0;\r | |
1208 | }\r | |
1209 | \r | |
1210 | extern void\r | |
1211 | onig_print_statistics(FILE* f)\r | |
1212 | {\r | |
1213 | int i;\r | |
1214 | fprintf(f, " count prev time\n");\r | |
1215 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {\r | |
1216 | fprintf(f, "%8d: %8d: %10ld: %s\n",\r | |
1217 | OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);\r | |
1218 | }\r | |
1219 | fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);\r | |
1220 | }\r | |
1221 | \r | |
1222 | #define STACK_INC do {\\r | |
1223 | stk++;\\r | |
1224 | if (stk - stk_base > MaxStackDepth) \\r | |
1225 | MaxStackDepth = stk - stk_base;\\r | |
1226 | } while(0)\r | |
1227 | \r | |
1228 | #else\r | |
1229 | #define STACK_INC stk++\r | |
1230 | \r | |
1231 | #define MOP_IN(opcode)\r | |
1232 | #define MOP_OUT\r | |
1233 | #endif\r | |
1234 | \r | |
1235 | \r | |
1236 | /* matching region of POSIX API */\r | |
1237 | typedef int regoff_t;\r | |
1238 | \r | |
1239 | typedef struct {\r | |
1240 | regoff_t rm_so;\r | |
1241 | regoff_t rm_eo;\r | |
1242 | } posix_regmatch_t;\r | |
1243 | \r | |
1244 | /* match data(str - end) from position (sstart). */\r | |
1245 | /* if sstart == str then set sprev to NULL. */\r | |
1246 | static int\r | |
1247 | match_at(regex_t* reg, const UChar* str, const UChar* end,\r | |
1248 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE\r | |
1249 | const UChar* right_range,\r | |
1250 | #endif\r | |
1251 | const UChar* sstart, UChar* sprev, OnigMatchArg* msa)\r | |
1252 | {\r | |
1253 | static UChar FinishCode[] = { OP_FINISH };\r | |
1254 | \r | |
1255 | int i, n, num_mem, best_len, pop_level;\r | |
1256 | LengthType tlen, tlen2;\r | |
1257 | MemNumType mem;\r | |
1258 | RelAddrType addr;\r | |
1259 | OnigOptionType option = reg->options;\r | |
1260 | OnigEncoding encode = reg->enc;\r | |
1261 | OnigCaseFoldType case_fold_flag = reg->case_fold_flag;\r | |
1262 | UChar *s, *q, *sbegin;\r | |
1263 | UChar *p = reg->p;\r | |
1264 | char *alloca_base;\r | |
1265 | OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;\r | |
1266 | OnigStackType *stkp; /* used as any purpose. */\r | |
1267 | OnigStackIndex si;\r | |
1268 | OnigStackIndex *repeat_stk;\r | |
1269 | OnigStackIndex *mem_start_stk, *mem_end_stk;\r | |
1270 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
1271 | int scv;\r | |
1272 | unsigned char* state_check_buff = msa->state_check_buff;\r | |
1273 | int num_comb_exp_check = reg->num_comb_exp_check;\r | |
1274 | #endif\r | |
1275 | n = reg->num_repeat + reg->num_mem * 2;\r | |
1276 | \r | |
1277 | STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);\r | |
1278 | pop_level = reg->stack_pop_level;\r | |
1279 | num_mem = reg->num_mem;\r | |
1280 | repeat_stk = (OnigStackIndex* )alloca_base;\r | |
1281 | \r | |
1282 | mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\r | |
1283 | mem_end_stk = mem_start_stk + num_mem;\r | |
1284 | mem_start_stk--; /* for index start from 1,\r | |
1285 | mem_start_stk[1]..mem_start_stk[num_mem] */\r | |
1286 | mem_end_stk--; /* for index start from 1,\r | |
1287 | mem_end_stk[1]..mem_end_stk[num_mem] */\r | |
1288 | for (i = 1; i <= num_mem; i++) {\r | |
1289 | mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;\r | |
1290 | }\r | |
1291 | \r | |
1292 | #ifdef ONIG_DEBUG_MATCH\r | |
1293 | fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",\r | |
1294 | (int )str, (int )end, (int )sstart, (int )sprev);\r | |
1295 | fprintf(stderr, "size: %d, start offset: %d\n",\r | |
1296 | (int )(end - str), (int )(sstart - str));\r | |
1297 | #endif\r | |
1298 | \r | |
1299 | STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */\r | |
1300 | best_len = ONIG_MISMATCH;\r | |
1301 | s = (UChar* )sstart;\r | |
1302 | while (1) {\r | |
1303 | #ifdef ONIG_DEBUG_MATCH\r | |
1304 | {\r | |
1305 | UChar *q, *bp, buf[50];\r | |
1306 | int len;\r | |
1307 | fprintf(stderr, "%4d> \"", (int )(s - str));\r | |
1308 | bp = buf;\r | |
1309 | for (i = 0, q = s; i < 7 && q < end; i++) {\r | |
1310 | len = enclen(encode, q);\r | |
1311 | while (len-- > 0) *bp++ = *q++;\r | |
1312 | }\r | |
1313 | if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\r | |
1314 | else { xmemcpy(bp, "\"", 1); bp += 1; }\r | |
1315 | *bp = 0;\r | |
1316 | fputs((char* )buf, stderr);\r | |
1317 | for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\r | |
1318 | onig_print_compiled_byte_code(stderr, p, NULL, encode);\r | |
1319 | fprintf(stderr, "\n");\r | |
1320 | }\r | |
1321 | #endif\r | |
1322 | \r | |
1323 | sbegin = s;\r | |
1324 | switch (*p++) {\r | |
1325 | case OP_END: MOP_IN(OP_END);\r | |
1326 | n = (int)(s - sstart);\r | |
1327 | if (n > best_len) {\r | |
1328 | OnigRegion* region;\r | |
1329 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE\r | |
1330 | if (IS_FIND_LONGEST(option)) {\r | |
1331 | if (n > msa->best_len) {\r | |
1332 | msa->best_len = n;\r | |
1333 | msa->best_s = (UChar* )sstart;\r | |
1334 | }\r | |
1335 | else\r | |
1336 | goto end_best_len;\r | |
1337 | }\r | |
1338 | #endif\r | |
1339 | best_len = n;\r | |
1340 | region = msa->region;\r | |
1341 | if (region) {\r | |
1342 | #ifdef USE_POSIX_API_REGION_OPTION\r | |
1343 | if (IS_POSIX_REGION(msa->options)) {\r | |
1344 | posix_regmatch_t* rmt = (posix_regmatch_t* )region;\r | |
1345 | \r | |
1346 | rmt[0].rm_so = (regoff_t)(sstart - str);\r | |
1347 | rmt[0].rm_eo = (regoff_t)(s - str);\r | |
1348 | for (i = 1; i <= num_mem; i++) {\r | |
1349 | if (mem_end_stk[i] != INVALID_STACK_INDEX) {\r | |
1350 | if (BIT_STATUS_AT(reg->bt_mem_start, i))\r | |
1351 | rmt[i].rm_so = (regoff_t)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);\r | |
1352 | else\r | |
4d454c54 | 1353 | rmt[i].rm_so = (regoff_t)((UChar* )((void* )(UINTN)(mem_start_stk[i])) - str);\r |
14b0e578 CS |
1354 | \r |
1355 | rmt[i].rm_eo = (regoff_t)((BIT_STATUS_AT(reg->bt_mem_end, i)\r | |
1356 | ? STACK_AT(mem_end_stk[i])->u.mem.pstr\r | |
4d454c54 | 1357 | : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str);\r |
14b0e578 CS |
1358 | }\r |
1359 | else {\r | |
1360 | rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;\r | |
1361 | }\r | |
1362 | }\r | |
1363 | }\r | |
1364 | else {\r | |
1365 | #endif /* USE_POSIX_API_REGION_OPTION */\r | |
1366 | region->beg[0] = (int)(sstart - str);\r | |
1367 | region->end[0] = (int)(s - str);\r | |
1368 | for (i = 1; i <= num_mem; i++) {\r | |
1369 | if (mem_end_stk[i] != INVALID_STACK_INDEX) {\r | |
1370 | if (BIT_STATUS_AT(reg->bt_mem_start, i))\r | |
1371 | region->beg[i] = (int)(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);\r | |
1372 | else\r | |
4d454c54 | 1373 | region->beg[i] = (int)((UChar* )((void* )(UINTN)mem_start_stk[i]) - str);\r |
14b0e578 CS |
1374 | \r |
1375 | region->end[i] = (int)((BIT_STATUS_AT(reg->bt_mem_end, i)\r | |
1376 | ? STACK_AT(mem_end_stk[i])->u.mem.pstr\r | |
4d454c54 | 1377 | : (UChar* )((void* )(UINTN)mem_end_stk[i])) - str);\r |
14b0e578 CS |
1378 | }\r |
1379 | else {\r | |
1380 | region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;\r | |
1381 | }\r | |
1382 | }\r | |
1383 | \r | |
1384 | #ifdef USE_CAPTURE_HISTORY\r | |
1385 | if (reg->capture_history != 0) {\r | |
1386 | int r;\r | |
1387 | OnigCaptureTreeNode* node;\r | |
1388 | \r | |
1389 | if (IS_NULL(region->history_root)) {\r | |
1390 | region->history_root = node = history_node_new();\r | |
1391 | CHECK_NULL_RETURN_MEMERR(node);\r | |
1392 | }\r | |
1393 | else {\r | |
1394 | node = region->history_root;\r | |
1395 | history_tree_clear(node);\r | |
1396 | }\r | |
1397 | \r | |
1398 | node->group = 0;\r | |
1399 | node->beg = (int)(sstart - str);\r | |
1400 | node->end = (int)(s - str);\r | |
1401 | \r | |
1402 | stkp = stk_base;\r | |
1403 | r = make_capture_history_tree(region->history_root, &stkp,\r | |
1404 | stk, (UChar* )str, reg);\r | |
1405 | if (r < 0) {\r | |
1406 | best_len = r; /* error code */\r | |
1407 | goto finish;\r | |
1408 | }\r | |
1409 | }\r | |
1410 | #endif /* USE_CAPTURE_HISTORY */\r | |
1411 | #ifdef USE_POSIX_API_REGION_OPTION\r | |
1412 | } /* else IS_POSIX_REGION() */\r | |
1413 | #endif\r | |
1414 | } /* if (region) */\r | |
1415 | } /* n > best_len */\r | |
1416 | \r | |
1417 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE\r | |
1418 | end_best_len:\r | |
1419 | #endif\r | |
1420 | MOP_OUT;\r | |
1421 | \r | |
1422 | if (IS_FIND_CONDITION(option)) {\r | |
1423 | if (IS_FIND_NOT_EMPTY(option) && s == sstart) {\r | |
1424 | best_len = ONIG_MISMATCH;\r | |
1425 | goto fail; /* for retry */\r | |
1426 | }\r | |
1427 | if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {\r | |
1428 | goto fail; /* for retry */\r | |
1429 | }\r | |
1430 | }\r | |
1431 | \r | |
1432 | /* default behavior: return first-matching result. */\r | |
1433 | goto finish;\r | |
1434 | break;\r | |
1435 | \r | |
1436 | case OP_EXACT1: MOP_IN(OP_EXACT1);\r | |
1437 | #if 0\r | |
1438 | DATA_ENSURE(1);\r | |
1439 | if (*p != *s) goto fail;\r | |
1440 | p++; s++;\r | |
1441 | #endif\r | |
1442 | if (*p != *s++) goto fail;\r | |
1443 | DATA_ENSURE(0);\r | |
1444 | p++;\r | |
1445 | MOP_OUT;\r | |
1446 | break;\r | |
1447 | \r | |
1448 | case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);\r | |
1449 | {\r | |
1450 | int len;\r | |
d76946e3 | 1451 | UChar *q1, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r |
14b0e578 CS |
1452 | \r |
1453 | DATA_ENSURE(1);\r | |
1454 | len = ONIGENC_MBC_CASE_FOLD(encode,\r | |
1455 | /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */\r | |
1456 | case_fold_flag,\r | |
1457 | &s, end, lowbuf);\r | |
1458 | DATA_ENSURE(0);\r | |
d76946e3 | 1459 | q1 = lowbuf;\r |
14b0e578 | 1460 | while (len-- > 0) {\r |
d76946e3 | 1461 | if (*p != *q1) {\r |
14b0e578 CS |
1462 | goto fail;\r |
1463 | }\r | |
d76946e3 | 1464 | p++; q1++;\r |
14b0e578 CS |
1465 | }\r |
1466 | }\r | |
1467 | MOP_OUT;\r | |
1468 | break;\r | |
1469 | \r | |
1470 | case OP_EXACT2: MOP_IN(OP_EXACT2);\r | |
1471 | DATA_ENSURE(2);\r | |
1472 | if (*p != *s) goto fail;\r | |
1473 | p++; s++;\r | |
1474 | if (*p != *s) goto fail;\r | |
1475 | sprev = s;\r | |
1476 | p++; s++;\r | |
1477 | MOP_OUT;\r | |
1478 | continue;\r | |
1479 | break;\r | |
1480 | \r | |
1481 | case OP_EXACT3: MOP_IN(OP_EXACT3);\r | |
1482 | DATA_ENSURE(3);\r | |
1483 | if (*p != *s) goto fail;\r | |
1484 | p++; s++;\r | |
1485 | if (*p != *s) goto fail;\r | |
1486 | p++; s++;\r | |
1487 | if (*p != *s) goto fail;\r | |
1488 | sprev = s;\r | |
1489 | p++; s++;\r | |
1490 | MOP_OUT;\r | |
1491 | continue;\r | |
1492 | break;\r | |
1493 | \r | |
1494 | case OP_EXACT4: MOP_IN(OP_EXACT4);\r | |
1495 | DATA_ENSURE(4);\r | |
1496 | if (*p != *s) goto fail;\r | |
1497 | p++; s++;\r | |
1498 | if (*p != *s) goto fail;\r | |
1499 | p++; s++;\r | |
1500 | if (*p != *s) goto fail;\r | |
1501 | p++; s++;\r | |
1502 | if (*p != *s) goto fail;\r | |
1503 | sprev = s;\r | |
1504 | p++; s++;\r | |
1505 | MOP_OUT;\r | |
1506 | continue;\r | |
1507 | break;\r | |
1508 | \r | |
1509 | case OP_EXACT5: MOP_IN(OP_EXACT5);\r | |
1510 | DATA_ENSURE(5);\r | |
1511 | if (*p != *s) goto fail;\r | |
1512 | p++; s++;\r | |
1513 | if (*p != *s) goto fail;\r | |
1514 | p++; s++;\r | |
1515 | if (*p != *s) goto fail;\r | |
1516 | p++; s++;\r | |
1517 | if (*p != *s) goto fail;\r | |
1518 | p++; s++;\r | |
1519 | if (*p != *s) goto fail;\r | |
1520 | sprev = s;\r | |
1521 | p++; s++;\r | |
1522 | MOP_OUT;\r | |
1523 | continue;\r | |
1524 | break;\r | |
1525 | \r | |
1526 | case OP_EXACTN: MOP_IN(OP_EXACTN);\r | |
1527 | GET_LENGTH_INC(tlen, p);\r | |
1528 | DATA_ENSURE(tlen);\r | |
1529 | while (tlen-- > 0) {\r | |
1530 | if (*p++ != *s++) goto fail;\r | |
1531 | }\r | |
1532 | sprev = s - 1;\r | |
1533 | MOP_OUT;\r | |
1534 | continue;\r | |
1535 | break;\r | |
1536 | \r | |
1537 | case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);\r | |
1538 | {\r | |
1539 | int len;\r | |
d76946e3 | 1540 | UChar *qn, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r |
14b0e578 CS |
1541 | \r |
1542 | GET_LENGTH_INC(tlen, p);\r | |
1543 | endp = p + tlen;\r | |
1544 | \r | |
1545 | while (p < endp) {\r | |
1546 | sprev = s;\r | |
1547 | DATA_ENSURE(1);\r | |
1548 | len = ONIGENC_MBC_CASE_FOLD(encode,\r | |
1549 | /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */\r | |
1550 | case_fold_flag,\r | |
1551 | &s, end, lowbuf);\r | |
1552 | DATA_ENSURE(0);\r | |
d76946e3 | 1553 | qn = lowbuf;\r |
14b0e578 | 1554 | while (len-- > 0) {\r |
d76946e3 LG |
1555 | if (*p != *qn) goto fail;\r |
1556 | p++; qn++;\r | |
14b0e578 CS |
1557 | }\r |
1558 | }\r | |
1559 | }\r | |
1560 | \r | |
1561 | MOP_OUT;\r | |
1562 | continue;\r | |
1563 | break;\r | |
1564 | \r | |
1565 | case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);\r | |
1566 | DATA_ENSURE(2);\r | |
1567 | if (*p != *s) goto fail;\r | |
1568 | p++; s++;\r | |
1569 | if (*p != *s) goto fail;\r | |
1570 | p++; s++;\r | |
1571 | MOP_OUT;\r | |
1572 | break;\r | |
1573 | \r | |
1574 | case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);\r | |
1575 | DATA_ENSURE(4);\r | |
1576 | if (*p != *s) goto fail;\r | |
1577 | p++; s++;\r | |
1578 | if (*p != *s) goto fail;\r | |
1579 | p++; s++;\r | |
1580 | sprev = s;\r | |
1581 | if (*p != *s) goto fail;\r | |
1582 | p++; s++;\r | |
1583 | if (*p != *s) goto fail;\r | |
1584 | p++; s++;\r | |
1585 | MOP_OUT;\r | |
1586 | continue;\r | |
1587 | break;\r | |
1588 | \r | |
1589 | case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);\r | |
1590 | DATA_ENSURE(6);\r | |
1591 | if (*p != *s) goto fail;\r | |
1592 | p++; s++;\r | |
1593 | if (*p != *s) goto fail;\r | |
1594 | p++; s++;\r | |
1595 | if (*p != *s) goto fail;\r | |
1596 | p++; s++;\r | |
1597 | if (*p != *s) goto fail;\r | |
1598 | p++; s++;\r | |
1599 | sprev = s;\r | |
1600 | if (*p != *s) goto fail;\r | |
1601 | p++; s++;\r | |
1602 | if (*p != *s) goto fail;\r | |
1603 | p++; s++;\r | |
1604 | MOP_OUT;\r | |
1605 | continue;\r | |
1606 | break;\r | |
1607 | \r | |
1608 | case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);\r | |
1609 | GET_LENGTH_INC(tlen, p);\r | |
1610 | DATA_ENSURE(tlen * 2);\r | |
1611 | while (tlen-- > 0) {\r | |
1612 | if (*p != *s) goto fail;\r | |
1613 | p++; s++;\r | |
1614 | if (*p != *s) goto fail;\r | |
1615 | p++; s++;\r | |
1616 | }\r | |
1617 | sprev = s - 2;\r | |
1618 | MOP_OUT;\r | |
1619 | continue;\r | |
1620 | break;\r | |
1621 | \r | |
1622 | case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);\r | |
1623 | GET_LENGTH_INC(tlen, p);\r | |
1624 | DATA_ENSURE(tlen * 3);\r | |
1625 | while (tlen-- > 0) {\r | |
1626 | if (*p != *s) goto fail;\r | |
1627 | p++; s++;\r | |
1628 | if (*p != *s) goto fail;\r | |
1629 | p++; s++;\r | |
1630 | if (*p != *s) goto fail;\r | |
1631 | p++; s++;\r | |
1632 | }\r | |
1633 | sprev = s - 3;\r | |
1634 | MOP_OUT;\r | |
1635 | continue;\r | |
1636 | break;\r | |
1637 | \r | |
1638 | case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);\r | |
1639 | GET_LENGTH_INC(tlen, p); /* mb-len */\r | |
1640 | GET_LENGTH_INC(tlen2, p); /* string len */\r | |
1641 | tlen2 *= tlen;\r | |
1642 | DATA_ENSURE(tlen2);\r | |
1643 | while (tlen2-- > 0) {\r | |
1644 | if (*p != *s) goto fail;\r | |
1645 | p++; s++;\r | |
1646 | }\r | |
1647 | sprev = s - tlen;\r | |
1648 | MOP_OUT;\r | |
1649 | continue;\r | |
1650 | break;\r | |
1651 | \r | |
1652 | case OP_CCLASS: MOP_IN(OP_CCLASS);\r | |
1653 | DATA_ENSURE(1);\r | |
1654 | if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;\r | |
1655 | p += SIZE_BITSET;\r | |
1656 | s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */\r | |
1657 | MOP_OUT;\r | |
1658 | break;\r | |
1659 | \r | |
1660 | case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);\r | |
1661 | if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;\r | |
1662 | \r | |
1663 | cclass_mb:\r | |
1664 | GET_LENGTH_INC(tlen, p);\r | |
1665 | {\r | |
1666 | OnigCodePoint code;\r | |
1667 | UChar *ss;\r | |
1668 | int mb_len;\r | |
1669 | \r | |
1670 | DATA_ENSURE(1);\r | |
1671 | mb_len = enclen(encode, s);\r | |
1672 | DATA_ENSURE(mb_len);\r | |
1673 | ss = s;\r | |
1674 | s += mb_len;\r | |
1675 | code = ONIGENC_MBC_TO_CODE(encode, ss, s);\r | |
1676 | \r | |
1677 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r | |
1678 | if (! onig_is_in_code_range(p, code)) goto fail;\r | |
1679 | #else\r | |
1680 | q = p;\r | |
1681 | ALIGNMENT_RIGHT(q);\r | |
1682 | if (! onig_is_in_code_range(q, code)) goto fail;\r | |
1683 | #endif\r | |
1684 | }\r | |
1685 | p += tlen;\r | |
1686 | MOP_OUT;\r | |
1687 | break;\r | |
1688 | \r | |
1689 | case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);\r | |
1690 | DATA_ENSURE(1);\r | |
1691 | if (ONIGENC_IS_MBC_HEAD(encode, s)) {\r | |
1692 | p += SIZE_BITSET;\r | |
1693 | goto cclass_mb;\r | |
1694 | }\r | |
1695 | else {\r | |
1696 | if (BITSET_AT(((BitSetRef )p), *s) == 0)\r | |
1697 | goto fail;\r | |
1698 | \r | |
1699 | p += SIZE_BITSET;\r | |
1700 | GET_LENGTH_INC(tlen, p);\r | |
1701 | p += tlen;\r | |
1702 | s++;\r | |
1703 | }\r | |
1704 | MOP_OUT;\r | |
1705 | break;\r | |
1706 | \r | |
1707 | case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);\r | |
1708 | DATA_ENSURE(1);\r | |
1709 | if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;\r | |
1710 | p += SIZE_BITSET;\r | |
1711 | s += enclen(encode, s);\r | |
1712 | MOP_OUT;\r | |
1713 | break;\r | |
1714 | \r | |
1715 | case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);\r | |
1716 | DATA_ENSURE(1);\r | |
1717 | if (! ONIGENC_IS_MBC_HEAD(encode, s)) {\r | |
1718 | s++;\r | |
1719 | GET_LENGTH_INC(tlen, p);\r | |
1720 | p += tlen;\r | |
1721 | goto cc_mb_not_success;\r | |
1722 | }\r | |
1723 | \r | |
1724 | cclass_mb_not:\r | |
1725 | GET_LENGTH_INC(tlen, p);\r | |
1726 | {\r | |
1727 | OnigCodePoint code;\r | |
1728 | UChar *ss;\r | |
1729 | int mb_len = enclen(encode, s);\r | |
1730 | \r | |
1731 | if (! DATA_ENSURE_CHECK(mb_len)) {\r | |
1732 | DATA_ENSURE(1);\r | |
1733 | s = (UChar* )end;\r | |
1734 | p += tlen;\r | |
1735 | goto cc_mb_not_success;\r | |
1736 | }\r | |
1737 | \r | |
1738 | ss = s;\r | |
1739 | s += mb_len;\r | |
1740 | code = ONIGENC_MBC_TO_CODE(encode, ss, s);\r | |
1741 | \r | |
1742 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS\r | |
1743 | if (onig_is_in_code_range(p, code)) goto fail;\r | |
1744 | #else\r | |
1745 | q = p;\r | |
1746 | ALIGNMENT_RIGHT(q);\r | |
1747 | if (onig_is_in_code_range(q, code)) goto fail;\r | |
1748 | #endif\r | |
1749 | }\r | |
1750 | p += tlen;\r | |
1751 | \r | |
1752 | cc_mb_not_success:\r | |
1753 | MOP_OUT;\r | |
1754 | break;\r | |
1755 | \r | |
1756 | case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);\r | |
1757 | DATA_ENSURE(1);\r | |
1758 | if (ONIGENC_IS_MBC_HEAD(encode, s)) {\r | |
1759 | p += SIZE_BITSET;\r | |
1760 | goto cclass_mb_not;\r | |
1761 | }\r | |
1762 | else {\r | |
1763 | if (BITSET_AT(((BitSetRef )p), *s) != 0)\r | |
1764 | goto fail;\r | |
1765 | \r | |
1766 | p += SIZE_BITSET;\r | |
1767 | GET_LENGTH_INC(tlen, p);\r | |
1768 | p += tlen;\r | |
1769 | s++;\r | |
1770 | }\r | |
1771 | MOP_OUT;\r | |
1772 | break;\r | |
1773 | \r | |
1774 | case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);\r | |
1775 | {\r | |
1776 | OnigCodePoint code;\r | |
1777 | void *node;\r | |
1778 | int mb_len;\r | |
1779 | UChar *ss;\r | |
1780 | \r | |
1781 | DATA_ENSURE(1);\r | |
1782 | GET_POINTER_INC(node, p);\r | |
1783 | mb_len = enclen(encode, s);\r | |
1784 | ss = s;\r | |
1785 | s += mb_len;\r | |
1786 | DATA_ENSURE(0);\r | |
1787 | code = ONIGENC_MBC_TO_CODE(encode, ss, s);\r | |
1788 | if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;\r | |
1789 | }\r | |
1790 | MOP_OUT;\r | |
1791 | break;\r | |
1792 | \r | |
1793 | case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);\r | |
1794 | DATA_ENSURE(1);\r | |
1795 | n = enclen(encode, s);\r | |
1796 | DATA_ENSURE(n);\r | |
1797 | if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;\r | |
1798 | s += n;\r | |
1799 | MOP_OUT;\r | |
1800 | break;\r | |
1801 | \r | |
1802 | case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);\r | |
1803 | DATA_ENSURE(1);\r | |
1804 | n = enclen(encode, s);\r | |
1805 | DATA_ENSURE(n);\r | |
1806 | s += n;\r | |
1807 | MOP_OUT;\r | |
1808 | break;\r | |
1809 | \r | |
1810 | case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);\r | |
1811 | while (DATA_ENSURE_CHECK1) {\r | |
1812 | STACK_PUSH_ALT(p, s, sprev);\r | |
1813 | n = enclen(encode, s);\r | |
1814 | DATA_ENSURE(n);\r | |
1815 | if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;\r | |
1816 | sprev = s;\r | |
1817 | s += n;\r | |
1818 | }\r | |
1819 | MOP_OUT;\r | |
1820 | break;\r | |
1821 | \r | |
1822 | case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);\r | |
1823 | while (DATA_ENSURE_CHECK1) {\r | |
1824 | STACK_PUSH_ALT(p, s, sprev);\r | |
1825 | n = enclen(encode, s);\r | |
1826 | if (n > 1) {\r | |
1827 | DATA_ENSURE(n);\r | |
1828 | sprev = s;\r | |
1829 | s += n;\r | |
1830 | }\r | |
1831 | else {\r | |
1832 | sprev = s;\r | |
1833 | s++;\r | |
1834 | }\r | |
1835 | }\r | |
1836 | MOP_OUT;\r | |
1837 | break;\r | |
1838 | \r | |
1839 | case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);\r | |
1840 | while (DATA_ENSURE_CHECK1) {\r | |
1841 | if (*p == *s) {\r | |
1842 | STACK_PUSH_ALT(p + 1, s, sprev);\r | |
1843 | }\r | |
1844 | n = enclen(encode, s);\r | |
1845 | DATA_ENSURE(n);\r | |
1846 | if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;\r | |
1847 | sprev = s;\r | |
1848 | s += n;\r | |
1849 | }\r | |
1850 | p++;\r | |
1851 | MOP_OUT;\r | |
1852 | break;\r | |
1853 | \r | |
1854 | case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);\r | |
1855 | while (DATA_ENSURE_CHECK1) {\r | |
1856 | if (*p == *s) {\r | |
1857 | STACK_PUSH_ALT(p + 1, s, sprev);\r | |
1858 | }\r | |
1859 | n = enclen(encode, s);\r | |
1860 | if (n > 1) {\r | |
1861 | DATA_ENSURE(n);\r | |
1862 | sprev = s;\r | |
1863 | s += n;\r | |
1864 | }\r | |
1865 | else {\r | |
1866 | sprev = s;\r | |
1867 | s++;\r | |
1868 | }\r | |
1869 | }\r | |
1870 | p++;\r | |
1871 | MOP_OUT;\r | |
1872 | break;\r | |
1873 | \r | |
1874 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
1875 | case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);\r | |
1876 | GET_STATE_CHECK_NUM_INC(mem, p);\r | |
1877 | while (DATA_ENSURE_CHECK1) {\r | |
1878 | STATE_CHECK_VAL(scv, mem);\r | |
1879 | if (scv) goto fail;\r | |
1880 | \r | |
1881 | STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);\r | |
1882 | n = enclen(encode, s);\r | |
1883 | DATA_ENSURE(n);\r | |
1884 | if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;\r | |
1885 | sprev = s;\r | |
1886 | s += n;\r | |
1887 | }\r | |
1888 | MOP_OUT;\r | |
1889 | break;\r | |
1890 | \r | |
1891 | case OP_STATE_CHECK_ANYCHAR_ML_STAR:\r | |
1892 | MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);\r | |
1893 | \r | |
1894 | GET_STATE_CHECK_NUM_INC(mem, p);\r | |
1895 | while (DATA_ENSURE_CHECK1) {\r | |
1896 | STATE_CHECK_VAL(scv, mem);\r | |
1897 | if (scv) goto fail;\r | |
1898 | \r | |
1899 | STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);\r | |
1900 | n = enclen(encode, s);\r | |
1901 | if (n > 1) {\r | |
1902 | DATA_ENSURE(n);\r | |
1903 | sprev = s;\r | |
1904 | s += n;\r | |
1905 | }\r | |
1906 | else {\r | |
1907 | sprev = s;\r | |
1908 | s++;\r | |
1909 | }\r | |
1910 | }\r | |
1911 | MOP_OUT;\r | |
1912 | break;\r | |
1913 | #endif /* USE_COMBINATION_EXPLOSION_CHECK */\r | |
1914 | \r | |
1915 | case OP_WORD: MOP_IN(OP_WORD);\r | |
1916 | DATA_ENSURE(1);\r | |
1917 | if (! ONIGENC_IS_MBC_WORD(encode, s, end))\r | |
1918 | goto fail;\r | |
1919 | \r | |
1920 | s += enclen(encode, s);\r | |
1921 | MOP_OUT;\r | |
1922 | break;\r | |
1923 | \r | |
1924 | case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);\r | |
1925 | DATA_ENSURE(1);\r | |
1926 | if (ONIGENC_IS_MBC_WORD(encode, s, end))\r | |
1927 | goto fail;\r | |
1928 | \r | |
1929 | s += enclen(encode, s);\r | |
1930 | MOP_OUT;\r | |
1931 | break;\r | |
1932 | \r | |
1933 | case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);\r | |
1934 | if (ON_STR_BEGIN(s)) {\r | |
1935 | DATA_ENSURE(1);\r | |
1936 | if (! ONIGENC_IS_MBC_WORD(encode, s, end))\r | |
1937 | goto fail;\r | |
1938 | }\r | |
1939 | else if (ON_STR_END(s)) {\r | |
1940 | if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))\r | |
1941 | goto fail;\r | |
1942 | }\r | |
1943 | else {\r | |
1944 | if (ONIGENC_IS_MBC_WORD(encode, s, end)\r | |
1945 | == ONIGENC_IS_MBC_WORD(encode, sprev, end))\r | |
1946 | goto fail;\r | |
1947 | }\r | |
1948 | MOP_OUT;\r | |
1949 | continue;\r | |
1950 | break;\r | |
1951 | \r | |
1952 | case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);\r | |
1953 | if (ON_STR_BEGIN(s)) {\r | |
1954 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))\r | |
1955 | goto fail;\r | |
1956 | }\r | |
1957 | else if (ON_STR_END(s)) {\r | |
1958 | if (ONIGENC_IS_MBC_WORD(encode, sprev, end))\r | |
1959 | goto fail;\r | |
1960 | }\r | |
1961 | else {\r | |
1962 | if (ONIGENC_IS_MBC_WORD(encode, s, end)\r | |
1963 | != ONIGENC_IS_MBC_WORD(encode, sprev, end))\r | |
1964 | goto fail;\r | |
1965 | }\r | |
1966 | MOP_OUT;\r | |
1967 | continue;\r | |
1968 | break;\r | |
1969 | \r | |
1970 | #ifdef USE_WORD_BEGIN_END\r | |
1971 | case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);\r | |
1972 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {\r | |
1973 | if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {\r | |
1974 | MOP_OUT;\r | |
1975 | continue;\r | |
1976 | }\r | |
1977 | }\r | |
1978 | goto fail;\r | |
1979 | break;\r | |
1980 | \r | |
1981 | case OP_WORD_END: MOP_IN(OP_WORD_END);\r | |
1982 | if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {\r | |
1983 | if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {\r | |
1984 | MOP_OUT;\r | |
1985 | continue;\r | |
1986 | }\r | |
1987 | }\r | |
1988 | goto fail;\r | |
1989 | break;\r | |
1990 | #endif\r | |
1991 | \r | |
1992 | case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);\r | |
1993 | if (! ON_STR_BEGIN(s)) goto fail;\r | |
1994 | \r | |
1995 | MOP_OUT;\r | |
1996 | continue;\r | |
1997 | break;\r | |
1998 | \r | |
1999 | case OP_END_BUF: MOP_IN(OP_END_BUF);\r | |
2000 | if (! ON_STR_END(s)) goto fail;\r | |
2001 | \r | |
2002 | MOP_OUT;\r | |
2003 | continue;\r | |
2004 | break;\r | |
2005 | \r | |
2006 | case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);\r | |
2007 | if (ON_STR_BEGIN(s)) {\r | |
2008 | if (IS_NOTBOL(msa->options)) goto fail;\r | |
2009 | MOP_OUT;\r | |
2010 | continue;\r | |
2011 | }\r | |
2012 | else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {\r | |
2013 | MOP_OUT;\r | |
2014 | continue;\r | |
2015 | }\r | |
2016 | goto fail;\r | |
2017 | break;\r | |
2018 | \r | |
2019 | case OP_END_LINE: MOP_IN(OP_END_LINE);\r | |
2020 | if (ON_STR_END(s)) {\r | |
2021 | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE\r | |
2022 | if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {\r | |
2023 | #endif\r | |
2024 | if (IS_NOTEOL(msa->options)) goto fail;\r | |
2025 | MOP_OUT;\r | |
2026 | continue;\r | |
2027 | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE\r | |
2028 | }\r | |
2029 | #endif\r | |
2030 | }\r | |
2031 | else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {\r | |
2032 | MOP_OUT;\r | |
2033 | continue;\r | |
2034 | }\r | |
2035 | #ifdef USE_CRNL_AS_LINE_TERMINATOR\r | |
2036 | else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {\r | |
2037 | MOP_OUT;\r | |
2038 | continue;\r | |
2039 | }\r | |
2040 | #endif\r | |
2041 | goto fail;\r | |
2042 | break;\r | |
2043 | \r | |
2044 | case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);\r | |
2045 | if (ON_STR_END(s)) {\r | |
2046 | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE\r | |
2047 | if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {\r | |
2048 | #endif\r | |
2049 | if (IS_NOTEOL(msa->options)) goto fail;\r | |
2050 | MOP_OUT;\r | |
2051 | continue;\r | |
2052 | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE\r | |
2053 | }\r | |
2054 | #endif\r | |
2055 | }\r | |
2056 | else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&\r | |
2057 | ON_STR_END(s + enclen(encode, s))) {\r | |
2058 | MOP_OUT;\r | |
2059 | continue;\r | |
2060 | }\r | |
2061 | #ifdef USE_CRNL_AS_LINE_TERMINATOR\r | |
2062 | else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {\r | |
2063 | UChar* ss = s + enclen(encode, s);\r | |
2064 | ss += enclen(encode, ss);\r | |
2065 | if (ON_STR_END(ss)) {\r | |
2066 | MOP_OUT;\r | |
2067 | continue;\r | |
2068 | }\r | |
2069 | }\r | |
2070 | #endif\r | |
2071 | goto fail;\r | |
2072 | break;\r | |
2073 | \r | |
2074 | case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);\r | |
2075 | if (s != msa->start)\r | |
2076 | goto fail;\r | |
2077 | \r | |
2078 | MOP_OUT;\r | |
2079 | continue;\r | |
2080 | break;\r | |
2081 | \r | |
2082 | case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);\r | |
2083 | GET_MEMNUM_INC(mem, p);\r | |
2084 | STACK_PUSH_MEM_START(mem, s);\r | |
2085 | MOP_OUT;\r | |
2086 | continue;\r | |
2087 | break;\r | |
2088 | \r | |
2089 | case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);\r | |
2090 | GET_MEMNUM_INC(mem, p);\r | |
4d454c54 | 2091 | mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )s);\r |
14b0e578 CS |
2092 | MOP_OUT;\r |
2093 | continue;\r | |
2094 | break;\r | |
2095 | \r | |
2096 | case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);\r | |
2097 | GET_MEMNUM_INC(mem, p);\r | |
2098 | STACK_PUSH_MEM_END(mem, s);\r | |
2099 | MOP_OUT;\r | |
2100 | continue;\r | |
2101 | break;\r | |
2102 | \r | |
2103 | case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);\r | |
2104 | GET_MEMNUM_INC(mem, p);\r | |
4d454c54 | 2105 | mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s);\r |
14b0e578 CS |
2106 | MOP_OUT;\r |
2107 | continue;\r | |
2108 | break;\r | |
2109 | \r | |
2110 | #ifdef USE_SUBEXP_CALL\r | |
2111 | case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);\r | |
2112 | GET_MEMNUM_INC(mem, p);\r | |
2113 | STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */\r | |
2114 | STACK_PUSH_MEM_END(mem, s);\r | |
2115 | mem_start_stk[mem] = GET_STACK_INDEX(stkp);\r | |
2116 | MOP_OUT;\r | |
2117 | continue;\r | |
2118 | break;\r | |
2119 | \r | |
2120 | case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);\r | |
2121 | GET_MEMNUM_INC(mem, p);\r | |
4d454c54 | 2122 | mem_end_stk[mem] = (OnigStackIndex )(UINTN)((void* )s);\r |
14b0e578 CS |
2123 | STACK_GET_MEM_START(mem, stkp);\r |
2124 | \r | |
2125 | if (BIT_STATUS_AT(reg->bt_mem_start, mem))\r | |
2126 | mem_start_stk[mem] = GET_STACK_INDEX(stkp);\r | |
2127 | else\r | |
4d454c54 | 2128 | mem_start_stk[mem] = (OnigStackIndex )(UINTN)((void* )stkp->u.mem.pstr);\r |
14b0e578 CS |
2129 | \r |
2130 | STACK_PUSH_MEM_END_MARK(mem);\r | |
2131 | MOP_OUT;\r | |
2132 | continue;\r | |
2133 | break;\r | |
2134 | #endif\r | |
2135 | \r | |
2136 | case OP_BACKREF1: MOP_IN(OP_BACKREF1);\r | |
2137 | mem = 1;\r | |
2138 | goto backref;\r | |
2139 | break;\r | |
2140 | \r | |
2141 | case OP_BACKREF2: MOP_IN(OP_BACKREF2);\r | |
2142 | mem = 2;\r | |
2143 | goto backref;\r | |
2144 | break;\r | |
2145 | \r | |
2146 | case OP_BACKREFN: MOP_IN(OP_BACKREFN);\r | |
2147 | GET_MEMNUM_INC(mem, p);\r | |
2148 | backref:\r | |
2149 | {\r | |
2150 | int len;\r | |
2151 | UChar *pstart, *pend;\r | |
2152 | \r | |
2153 | /* if you want to remove following line, \r | |
2154 | you should check in parse and compile time. */\r | |
2155 | if (mem > num_mem) goto fail;\r | |
2156 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;\r | |
2157 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;\r | |
2158 | \r | |
2159 | if (BIT_STATUS_AT(reg->bt_mem_start, mem))\r | |
2160 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;\r | |
2161 | else\r | |
4d454c54 | 2162 | pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);\r |
14b0e578 CS |
2163 | \r |
2164 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)\r | |
2165 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr\r | |
4d454c54 | 2166 | : (UChar* )((void* )(UINTN)mem_end_stk[mem]));\r |
14b0e578 CS |
2167 | n = (int)(pend - pstart);\r |
2168 | DATA_ENSURE(n);\r | |
2169 | sprev = s;\r | |
2170 | STRING_CMP(pstart, s, n);\r | |
2171 | while (sprev + (len = enclen(encode, sprev)) < s)\r | |
2172 | sprev += len;\r | |
2173 | \r | |
2174 | MOP_OUT;\r | |
2175 | continue;\r | |
2176 | }\r | |
2177 | break;\r | |
2178 | \r | |
2179 | case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);\r | |
2180 | GET_MEMNUM_INC(mem, p);\r | |
2181 | {\r | |
2182 | int len;\r | |
2183 | UChar *pstart, *pend;\r | |
2184 | \r | |
2185 | /* if you want to remove following line, \r | |
2186 | you should check in parse and compile time. */\r | |
2187 | if (mem > num_mem) goto fail;\r | |
2188 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;\r | |
2189 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;\r | |
2190 | \r | |
2191 | if (BIT_STATUS_AT(reg->bt_mem_start, mem))\r | |
2192 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;\r | |
2193 | else\r | |
4d454c54 | 2194 | pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);\r |
14b0e578 CS |
2195 | \r |
2196 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)\r | |
2197 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr\r | |
4d454c54 | 2198 | : (UChar* )((void* )(UINTN)mem_end_stk[mem]));\r |
14b0e578 CS |
2199 | n = (int)(pend - pstart);\r |
2200 | DATA_ENSURE(n);\r | |
2201 | sprev = s;\r | |
2202 | STRING_CMP_IC(case_fold_flag, pstart, &s, n);\r | |
2203 | while (sprev + (len = enclen(encode, sprev)) < s)\r | |
2204 | sprev += len;\r | |
2205 | \r | |
2206 | MOP_OUT;\r | |
2207 | continue;\r | |
2208 | }\r | |
2209 | break;\r | |
2210 | \r | |
2211 | case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);\r | |
2212 | {\r | |
2213 | int len, is_fail;\r | |
2214 | UChar *pstart, *pend, *swork;\r | |
2215 | \r | |
2216 | GET_LENGTH_INC(tlen, p);\r | |
2217 | for (i = 0; i < tlen; i++) {\r | |
2218 | GET_MEMNUM_INC(mem, p);\r | |
2219 | \r | |
2220 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;\r | |
2221 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;\r | |
2222 | \r | |
2223 | if (BIT_STATUS_AT(reg->bt_mem_start, mem))\r | |
2224 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;\r | |
2225 | else\r | |
4d454c54 | 2226 | pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);\r |
14b0e578 CS |
2227 | \r |
2228 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)\r | |
2229 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr\r | |
4d454c54 | 2230 | : (UChar* )((void* )(UINTN)mem_end_stk[mem]));\r |
14b0e578 CS |
2231 | n = (int)(pend - pstart);\r |
2232 | DATA_ENSURE(n);\r | |
2233 | sprev = s;\r | |
2234 | swork = s;\r | |
2235 | STRING_CMP_VALUE(pstart, swork, n, is_fail);\r | |
2236 | if (is_fail) continue;\r | |
2237 | s = swork;\r | |
2238 | while (sprev + (len = enclen(encode, sprev)) < s)\r | |
2239 | sprev += len;\r | |
2240 | \r | |
2241 | p += (SIZE_MEMNUM * (tlen - i - 1));\r | |
2242 | break; /* success */\r | |
2243 | }\r | |
2244 | if (i == tlen) goto fail;\r | |
2245 | MOP_OUT;\r | |
2246 | continue;\r | |
2247 | }\r | |
2248 | break;\r | |
2249 | \r | |
2250 | case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);\r | |
2251 | {\r | |
2252 | int len, is_fail;\r | |
2253 | UChar *pstart, *pend, *swork;\r | |
2254 | \r | |
2255 | GET_LENGTH_INC(tlen, p);\r | |
2256 | for (i = 0; i < tlen; i++) {\r | |
2257 | GET_MEMNUM_INC(mem, p);\r | |
2258 | \r | |
2259 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;\r | |
2260 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;\r | |
2261 | \r | |
2262 | if (BIT_STATUS_AT(reg->bt_mem_start, mem))\r | |
2263 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;\r | |
2264 | else\r | |
4d454c54 | 2265 | pstart = (UChar* )((void* )(UINTN)mem_start_stk[mem]);\r |
14b0e578 CS |
2266 | \r |
2267 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)\r | |
2268 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr\r | |
4d454c54 | 2269 | : (UChar* )((void* )(UINTN)mem_end_stk[mem]));\r |
14b0e578 CS |
2270 | n = (int)(pend - pstart);\r |
2271 | DATA_ENSURE(n);\r | |
2272 | sprev = s;\r | |
2273 | swork = s;\r | |
2274 | STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);\r | |
2275 | if (is_fail) continue;\r | |
2276 | s = swork;\r | |
2277 | while (sprev + (len = enclen(encode, sprev)) < s)\r | |
2278 | sprev += len;\r | |
2279 | \r | |
2280 | p += (SIZE_MEMNUM * (tlen - i - 1));\r | |
2281 | break; /* success */\r | |
2282 | }\r | |
2283 | if (i == tlen) goto fail;\r | |
2284 | MOP_OUT;\r | |
2285 | continue;\r | |
2286 | }\r | |
2287 | break;\r | |
2288 | \r | |
2289 | #ifdef USE_BACKREF_WITH_LEVEL\r | |
2290 | case OP_BACKREF_WITH_LEVEL:\r | |
2291 | {\r | |
2292 | int len;\r | |
2293 | OnigOptionType ic;\r | |
2294 | LengthType level;\r | |
2295 | \r | |
2296 | GET_OPTION_INC(ic, p);\r | |
2297 | GET_LENGTH_INC(level, p);\r | |
2298 | GET_LENGTH_INC(tlen, p);\r | |
2299 | \r | |
2300 | sprev = s;\r | |
2301 | if (backref_match_at_nested_level(reg, stk, stk_base, ic\r | |
2302 | , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {\r | |
2303 | while (sprev + (len = enclen(encode, sprev)) < s)\r | |
2304 | sprev += len;\r | |
2305 | \r | |
2306 | p += (SIZE_MEMNUM * tlen);\r | |
2307 | }\r | |
2308 | else\r | |
2309 | goto fail;\r | |
2310 | \r | |
2311 | MOP_OUT;\r | |
2312 | continue;\r | |
2313 | }\r | |
2314 | \r | |
2315 | break;\r | |
2316 | #endif\r | |
2317 | \r | |
2318 | #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */\r | |
2319 | case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH);\r | |
2320 | GET_OPTION_INC(option, p);\r | |
2321 | STACK_PUSH_ALT(p, s, sprev);\r | |
2322 | p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;\r | |
2323 | MOP_OUT;\r | |
2324 | continue;\r | |
2325 | break;\r | |
2326 | \r | |
2327 | case OP_SET_OPTION: MOP_IN(OP_SET_OPTION);\r | |
2328 | GET_OPTION_INC(option, p);\r | |
2329 | MOP_OUT;\r | |
2330 | continue;\r | |
2331 | break;\r | |
2332 | #endif\r | |
2333 | \r | |
2334 | case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);\r | |
2335 | GET_MEMNUM_INC(mem, p); /* mem: null check id */\r | |
2336 | STACK_PUSH_NULL_CHECK_START(mem, s);\r | |
2337 | MOP_OUT;\r | |
2338 | continue;\r | |
2339 | break;\r | |
2340 | \r | |
2341 | case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);\r | |
2342 | {\r | |
2343 | int isnull;\r | |
2344 | \r | |
2345 | GET_MEMNUM_INC(mem, p); /* mem: null check id */\r | |
2346 | STACK_NULL_CHECK(isnull, mem, s);\r | |
2347 | if (isnull) {\r | |
2348 | #ifdef ONIG_DEBUG_MATCH\r | |
2349 | fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",\r | |
2350 | (int )mem, (int )s);\r | |
2351 | #endif\r | |
2352 | null_check_found:\r | |
2353 | /* empty loop founded, skip next instruction */\r | |
2354 | switch (*p++) {\r | |
2355 | case OP_JUMP:\r | |
2356 | case OP_PUSH:\r | |
2357 | p += SIZE_RELADDR;\r | |
2358 | break;\r | |
2359 | case OP_REPEAT_INC:\r | |
2360 | case OP_REPEAT_INC_NG:\r | |
2361 | case OP_REPEAT_INC_SG:\r | |
2362 | case OP_REPEAT_INC_NG_SG:\r | |
2363 | p += SIZE_MEMNUM;\r | |
2364 | break;\r | |
2365 | default:\r | |
2366 | goto unexpected_bytecode_error;\r | |
2367 | break;\r | |
2368 | }\r | |
2369 | }\r | |
2370 | }\r | |
2371 | MOP_OUT;\r | |
2372 | continue;\r | |
2373 | break;\r | |
2374 | \r | |
2375 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT\r | |
2376 | case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);\r | |
2377 | {\r | |
2378 | int isnull;\r | |
2379 | \r | |
2380 | GET_MEMNUM_INC(mem, p); /* mem: null check id */\r | |
2381 | STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);\r | |
2382 | if (isnull) {\r | |
2383 | #ifdef ONIG_DEBUG_MATCH\r | |
2384 | fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",\r | |
2385 | (int )mem, (int )s);\r | |
2386 | #endif\r | |
2387 | if (isnull == -1) goto fail;\r | |
2388 | goto null_check_found;\r | |
2389 | }\r | |
2390 | }\r | |
2391 | MOP_OUT;\r | |
2392 | continue;\r | |
2393 | break;\r | |
2394 | #endif\r | |
2395 | \r | |
2396 | #ifdef USE_SUBEXP_CALL\r | |
2397 | case OP_NULL_CHECK_END_MEMST_PUSH:\r | |
2398 | MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);\r | |
2399 | {\r | |
2400 | int isnull;\r | |
2401 | \r | |
2402 | GET_MEMNUM_INC(mem, p); /* mem: null check id */\r | |
2403 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT\r | |
2404 | STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);\r | |
2405 | #else\r | |
2406 | STACK_NULL_CHECK_REC(isnull, mem, s);\r | |
2407 | #endif\r | |
2408 | if (isnull) {\r | |
2409 | #ifdef ONIG_DEBUG_MATCH\r | |
2410 | fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",\r | |
2411 | (int )mem, (int )s);\r | |
2412 | #endif\r | |
2413 | if (isnull == -1) goto fail;\r | |
2414 | goto null_check_found;\r | |
2415 | }\r | |
2416 | else {\r | |
2417 | STACK_PUSH_NULL_CHECK_END(mem);\r | |
2418 | }\r | |
2419 | }\r | |
2420 | MOP_OUT;\r | |
2421 | continue;\r | |
2422 | break;\r | |
2423 | #endif\r | |
2424 | \r | |
2425 | case OP_JUMP: MOP_IN(OP_JUMP);\r | |
2426 | GET_RELADDR_INC(addr, p);\r | |
2427 | p += addr;\r | |
2428 | MOP_OUT;\r | |
2429 | CHECK_INTERRUPT_IN_MATCH_AT;\r | |
2430 | continue;\r | |
2431 | break;\r | |
2432 | \r | |
2433 | case OP_PUSH: MOP_IN(OP_PUSH);\r | |
2434 | GET_RELADDR_INC(addr, p);\r | |
2435 | STACK_PUSH_ALT(p + addr, s, sprev);\r | |
2436 | MOP_OUT;\r | |
2437 | continue;\r | |
2438 | break;\r | |
2439 | \r | |
2440 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
2441 | case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);\r | |
2442 | GET_STATE_CHECK_NUM_INC(mem, p);\r | |
2443 | STATE_CHECK_VAL(scv, mem);\r | |
2444 | if (scv) goto fail;\r | |
2445 | \r | |
2446 | GET_RELADDR_INC(addr, p);\r | |
2447 | STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);\r | |
2448 | MOP_OUT;\r | |
2449 | continue;\r | |
2450 | break;\r | |
2451 | \r | |
2452 | case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);\r | |
2453 | GET_STATE_CHECK_NUM_INC(mem, p);\r | |
2454 | GET_RELADDR_INC(addr, p);\r | |
2455 | STATE_CHECK_VAL(scv, mem);\r | |
2456 | if (scv) {\r | |
2457 | p += addr;\r | |
2458 | }\r | |
2459 | else {\r | |
2460 | STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);\r | |
2461 | }\r | |
2462 | MOP_OUT;\r | |
2463 | continue;\r | |
2464 | break;\r | |
2465 | \r | |
2466 | case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);\r | |
2467 | GET_STATE_CHECK_NUM_INC(mem, p);\r | |
2468 | STATE_CHECK_VAL(scv, mem);\r | |
2469 | if (scv) goto fail;\r | |
2470 | \r | |
2471 | STACK_PUSH_STATE_CHECK(s, mem);\r | |
2472 | MOP_OUT;\r | |
2473 | continue;\r | |
2474 | break;\r | |
2475 | #endif /* USE_COMBINATION_EXPLOSION_CHECK */\r | |
2476 | \r | |
2477 | case OP_POP: MOP_IN(OP_POP);\r | |
2478 | STACK_POP_ONE;\r | |
2479 | MOP_OUT;\r | |
2480 | continue;\r | |
2481 | break;\r | |
2482 | \r | |
2483 | case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);\r | |
2484 | GET_RELADDR_INC(addr, p);\r | |
2485 | if (*p == *s && DATA_ENSURE_CHECK1) {\r | |
2486 | p++;\r | |
2487 | STACK_PUSH_ALT(p + addr, s, sprev);\r | |
2488 | MOP_OUT;\r | |
2489 | continue;\r | |
2490 | }\r | |
2491 | p += (addr + 1);\r | |
2492 | MOP_OUT;\r | |
2493 | continue;\r | |
2494 | break;\r | |
2495 | \r | |
2496 | case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);\r | |
2497 | GET_RELADDR_INC(addr, p);\r | |
2498 | if (*p == *s) {\r | |
2499 | p++;\r | |
2500 | STACK_PUSH_ALT(p + addr, s, sprev);\r | |
2501 | MOP_OUT;\r | |
2502 | continue;\r | |
2503 | }\r | |
2504 | p++;\r | |
2505 | MOP_OUT;\r | |
2506 | continue;\r | |
2507 | break;\r | |
2508 | \r | |
2509 | case OP_REPEAT: MOP_IN(OP_REPEAT);\r | |
2510 | {\r | |
2511 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */\r | |
2512 | GET_RELADDR_INC(addr, p);\r | |
2513 | \r | |
2514 | STACK_ENSURE(1);\r | |
2515 | repeat_stk[mem] = GET_STACK_INDEX(stk);\r | |
2516 | STACK_PUSH_REPEAT(mem, p);\r | |
2517 | \r | |
2518 | if (reg->repeat_range[mem].lower == 0) {\r | |
2519 | STACK_PUSH_ALT(p + addr, s, sprev);\r | |
2520 | }\r | |
2521 | }\r | |
2522 | MOP_OUT;\r | |
2523 | continue;\r | |
2524 | break;\r | |
2525 | \r | |
2526 | case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);\r | |
2527 | {\r | |
2528 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */\r | |
2529 | GET_RELADDR_INC(addr, p);\r | |
2530 | \r | |
2531 | STACK_ENSURE(1);\r | |
2532 | repeat_stk[mem] = GET_STACK_INDEX(stk);\r | |
2533 | STACK_PUSH_REPEAT(mem, p);\r | |
2534 | \r | |
2535 | if (reg->repeat_range[mem].lower == 0) {\r | |
2536 | STACK_PUSH_ALT(p, s, sprev);\r | |
2537 | p += addr;\r | |
2538 | }\r | |
2539 | }\r | |
2540 | MOP_OUT;\r | |
2541 | continue;\r | |
2542 | break;\r | |
2543 | \r | |
2544 | case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);\r | |
2545 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */\r | |
2546 | si = repeat_stk[mem];\r | |
2547 | stkp = STACK_AT(si);\r | |
2548 | \r | |
2549 | repeat_inc:\r | |
2550 | stkp->u.repeat.count++;\r | |
2551 | if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {\r | |
2552 | /* end of repeat. Nothing to do. */\r | |
2553 | }\r | |
2554 | else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {\r | |
2555 | STACK_PUSH_ALT(p, s, sprev);\r | |
2556 | p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */\r | |
2557 | }\r | |
2558 | else {\r | |
2559 | p = stkp->u.repeat.pcode;\r | |
2560 | }\r | |
2561 | STACK_PUSH_REPEAT_INC(si);\r | |
2562 | MOP_OUT;\r | |
2563 | CHECK_INTERRUPT_IN_MATCH_AT;\r | |
2564 | continue;\r | |
2565 | break;\r | |
2566 | \r | |
2567 | case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);\r | |
2568 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */\r | |
2569 | STACK_GET_REPEAT(mem, stkp);\r | |
2570 | si = GET_STACK_INDEX(stkp);\r | |
2571 | goto repeat_inc;\r | |
2572 | break;\r | |
2573 | \r | |
2574 | case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);\r | |
2575 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */\r | |
2576 | si = repeat_stk[mem];\r | |
2577 | stkp = STACK_AT(si);\r | |
2578 | \r | |
2579 | repeat_inc_ng:\r | |
2580 | stkp->u.repeat.count++;\r | |
2581 | if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {\r | |
2582 | if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {\r | |
2583 | UChar* pcode = stkp->u.repeat.pcode;\r | |
2584 | \r | |
2585 | STACK_PUSH_REPEAT_INC(si);\r | |
2586 | STACK_PUSH_ALT(pcode, s, sprev);\r | |
2587 | }\r | |
2588 | else {\r | |
2589 | p = stkp->u.repeat.pcode;\r | |
2590 | STACK_PUSH_REPEAT_INC(si);\r | |
2591 | }\r | |
2592 | }\r | |
2593 | else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {\r | |
2594 | STACK_PUSH_REPEAT_INC(si);\r | |
2595 | }\r | |
2596 | MOP_OUT;\r | |
2597 | CHECK_INTERRUPT_IN_MATCH_AT;\r | |
2598 | continue;\r | |
2599 | break;\r | |
2600 | \r | |
2601 | case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);\r | |
2602 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */\r | |
2603 | STACK_GET_REPEAT(mem, stkp);\r | |
2604 | si = GET_STACK_INDEX(stkp);\r | |
2605 | goto repeat_inc_ng;\r | |
2606 | break;\r | |
2607 | \r | |
2608 | case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);\r | |
2609 | STACK_PUSH_POS(s, sprev);\r | |
2610 | MOP_OUT;\r | |
2611 | continue;\r | |
2612 | break;\r | |
2613 | \r | |
2614 | case OP_POP_POS: MOP_IN(OP_POP_POS);\r | |
2615 | {\r | |
2616 | STACK_POS_END(stkp);\r | |
2617 | s = stkp->u.state.pstr;\r | |
2618 | sprev = stkp->u.state.pstr_prev;\r | |
2619 | }\r | |
2620 | MOP_OUT;\r | |
2621 | continue;\r | |
2622 | break;\r | |
2623 | \r | |
2624 | case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);\r | |
2625 | GET_RELADDR_INC(addr, p);\r | |
2626 | STACK_PUSH_POS_NOT(p + addr, s, sprev);\r | |
2627 | MOP_OUT;\r | |
2628 | continue;\r | |
2629 | break;\r | |
2630 | \r | |
2631 | case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);\r | |
2632 | STACK_POP_TIL_POS_NOT;\r | |
2633 | goto fail;\r | |
2634 | break;\r | |
2635 | \r | |
2636 | case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);\r | |
2637 | STACK_PUSH_STOP_BT;\r | |
2638 | MOP_OUT;\r | |
2639 | continue;\r | |
2640 | break;\r | |
2641 | \r | |
2642 | case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);\r | |
2643 | STACK_STOP_BT_END;\r | |
2644 | MOP_OUT;\r | |
2645 | continue;\r | |
2646 | break;\r | |
2647 | \r | |
2648 | case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);\r | |
2649 | GET_LENGTH_INC(tlen, p);\r | |
2650 | s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);\r | |
2651 | if (IS_NULL(s)) goto fail;\r | |
2652 | sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);\r | |
2653 | MOP_OUT;\r | |
2654 | continue;\r | |
2655 | break;\r | |
2656 | \r | |
2657 | case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);\r | |
2658 | GET_RELADDR_INC(addr, p);\r | |
2659 | GET_LENGTH_INC(tlen, p);\r | |
2660 | q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);\r | |
2661 | if (IS_NULL(q)) {\r | |
2662 | /* too short case -> success. ex. /(?<!XXX)a/.match("a")\r | |
2663 | If you want to change to fail, replace following line. */\r | |
2664 | p += addr;\r | |
2665 | /* goto fail; */\r | |
2666 | }\r | |
2667 | else {\r | |
2668 | STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);\r | |
2669 | s = q;\r | |
2670 | sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);\r | |
2671 | }\r | |
2672 | MOP_OUT;\r | |
2673 | continue;\r | |
2674 | break;\r | |
2675 | \r | |
2676 | case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);\r | |
2677 | STACK_POP_TIL_LOOK_BEHIND_NOT;\r | |
2678 | goto fail;\r | |
2679 | break;\r | |
2680 | \r | |
2681 | #ifdef USE_SUBEXP_CALL\r | |
2682 | case OP_CALL: MOP_IN(OP_CALL);\r | |
2683 | GET_ABSADDR_INC(addr, p);\r | |
2684 | STACK_PUSH_CALL_FRAME(p);\r | |
2685 | p = reg->p + addr;\r | |
2686 | MOP_OUT;\r | |
2687 | continue;\r | |
2688 | break;\r | |
2689 | \r | |
2690 | case OP_RETURN: MOP_IN(OP_RETURN);\r | |
2691 | STACK_RETURN(p);\r | |
2692 | STACK_PUSH_RETURN;\r | |
2693 | MOP_OUT;\r | |
2694 | continue;\r | |
2695 | break;\r | |
2696 | #endif\r | |
2697 | \r | |
2698 | case OP_FINISH:\r | |
2699 | goto finish;\r | |
2700 | break;\r | |
2701 | \r | |
2702 | fail:\r | |
2703 | MOP_OUT;\r | |
2704 | /* fall */\r | |
2705 | case OP_FAIL: MOP_IN(OP_FAIL);\r | |
2706 | STACK_POP;\r | |
2707 | p = stk->u.state.pcode;\r | |
2708 | s = stk->u.state.pstr;\r | |
2709 | sprev = stk->u.state.pstr_prev;\r | |
2710 | \r | |
2711 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
2712 | if (stk->u.state.state_check != 0) {\r | |
2713 | stk->type = STK_STATE_CHECK_MARK;\r | |
2714 | stk++;\r | |
2715 | }\r | |
2716 | #endif\r | |
2717 | \r | |
2718 | MOP_OUT;\r | |
2719 | continue;\r | |
2720 | break;\r | |
2721 | \r | |
2722 | default:\r | |
2723 | goto bytecode_error;\r | |
2724 | \r | |
2725 | } /* end of switch */\r | |
2726 | sprev = sbegin;\r | |
2727 | } /* end of while(1) */\r | |
2728 | \r | |
2729 | finish:\r | |
2730 | STACK_SAVE;\r | |
2731 | xfree(alloca_base);\r | |
2732 | return best_len;\r | |
2733 | \r | |
2734 | #ifdef ONIG_DEBUG\r | |
2735 | stack_error:\r | |
2736 | STACK_SAVE;\r | |
2737 | xfree(alloca_base);\r | |
2738 | return ONIGERR_STACK_BUG;\r | |
2739 | #endif\r | |
2740 | \r | |
2741 | bytecode_error:\r | |
2742 | STACK_SAVE;\r | |
2743 | xfree(alloca_base);\r | |
2744 | return ONIGERR_UNDEFINED_BYTECODE;\r | |
2745 | \r | |
2746 | unexpected_bytecode_error:\r | |
2747 | STACK_SAVE;\r | |
2748 | xfree(alloca_base);\r | |
2749 | return ONIGERR_UNEXPECTED_BYTECODE;\r | |
2750 | }\r | |
2751 | \r | |
2752 | \r | |
2753 | static UChar*\r | |
2754 | slow_search(OnigEncoding enc, UChar* target, UChar* target_end,\r | |
2755 | const UChar* text, const UChar* text_end, UChar* text_range)\r | |
2756 | {\r | |
2757 | UChar *t, *p, *s, *end;\r | |
2758 | \r | |
2759 | end = (UChar* )text_end;\r | |
2760 | end -= target_end - target - 1;\r | |
2761 | if (end > text_range)\r | |
2762 | end = text_range;\r | |
2763 | \r | |
2764 | s = (UChar* )text;\r | |
2765 | \r | |
2766 | while (s < end) {\r | |
2767 | if (*s == *target) {\r | |
2768 | p = s + 1;\r | |
2769 | t = target + 1;\r | |
2770 | while (t < target_end) {\r | |
2771 | if (*t != *p++)\r | |
2772 | break;\r | |
2773 | t++;\r | |
2774 | }\r | |
2775 | if (t == target_end)\r | |
2776 | return s;\r | |
2777 | }\r | |
2778 | s += enclen(enc, s);\r | |
2779 | }\r | |
2780 | \r | |
2781 | return (UChar* )NULL;\r | |
2782 | }\r | |
2783 | \r | |
2784 | static int\r | |
2785 | str_lower_case_match(OnigEncoding enc, int case_fold_flag,\r | |
2786 | const UChar* t, const UChar* tend,\r | |
2787 | const UChar* p, const UChar* end)\r | |
2788 | {\r | |
2789 | int lowlen;\r | |
2790 | UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];\r | |
2791 | \r | |
2792 | while (t < tend) {\r | |
2793 | lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);\r | |
2794 | q = lowbuf;\r | |
2795 | while (lowlen > 0) {\r | |
2796 | if (*t++ != *q++) return 0;\r | |
2797 | lowlen--;\r | |
2798 | }\r | |
2799 | }\r | |
2800 | \r | |
2801 | return 1;\r | |
2802 | }\r | |
2803 | \r | |
2804 | static UChar*\r | |
2805 | slow_search_ic(OnigEncoding enc, int case_fold_flag,\r | |
2806 | UChar* target, UChar* target_end,\r | |
2807 | const UChar* text, const UChar* text_end, UChar* text_range)\r | |
2808 | {\r | |
2809 | UChar *s, *end;\r | |
2810 | \r | |
2811 | end = (UChar* )text_end;\r | |
2812 | end -= target_end - target - 1;\r | |
2813 | if (end > text_range)\r | |
2814 | end = text_range;\r | |
2815 | \r | |
2816 | s = (UChar* )text;\r | |
2817 | \r | |
2818 | while (s < end) {\r | |
2819 | if (str_lower_case_match(enc, case_fold_flag, target, target_end,\r | |
2820 | s, text_end))\r | |
2821 | return s;\r | |
2822 | \r | |
2823 | s += enclen(enc, s);\r | |
2824 | }\r | |
2825 | \r | |
2826 | return (UChar* )NULL;\r | |
2827 | }\r | |
2828 | \r | |
2829 | static UChar*\r | |
2830 | slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,\r | |
2831 | const UChar* text, const UChar* adjust_text,\r | |
2832 | const UChar* text_end, const UChar* text_start)\r | |
2833 | {\r | |
2834 | UChar *t, *p, *s;\r | |
2835 | \r | |
2836 | s = (UChar* )text_end;\r | |
2837 | s -= (target_end - target);\r | |
2838 | if (s > text_start)\r | |
2839 | s = (UChar* )text_start;\r | |
2840 | else\r | |
2841 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);\r | |
2842 | \r | |
2843 | while (s >= text) {\r | |
2844 | if (*s == *target) {\r | |
2845 | p = s + 1;\r | |
2846 | t = target + 1;\r | |
2847 | while (t < target_end) {\r | |
2848 | if (*t != *p++)\r | |
2849 | break;\r | |
2850 | t++;\r | |
2851 | }\r | |
2852 | if (t == target_end)\r | |
2853 | return s;\r | |
2854 | }\r | |
2855 | s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);\r | |
2856 | }\r | |
2857 | \r | |
2858 | return (UChar* )NULL;\r | |
2859 | }\r | |
2860 | \r | |
2861 | static UChar*\r | |
2862 | slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,\r | |
2863 | UChar* target, UChar* target_end,\r | |
2864 | const UChar* text, const UChar* adjust_text,\r | |
2865 | const UChar* text_end, const UChar* text_start)\r | |
2866 | {\r | |
2867 | UChar *s;\r | |
2868 | \r | |
2869 | s = (UChar* )text_end;\r | |
2870 | s -= (target_end - target);\r | |
2871 | if (s > text_start)\r | |
2872 | s = (UChar* )text_start;\r | |
2873 | else\r | |
2874 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);\r | |
2875 | \r | |
2876 | while (s >= text) {\r | |
2877 | if (str_lower_case_match(enc, case_fold_flag,\r | |
2878 | target, target_end, s, text_end))\r | |
2879 | return s;\r | |
2880 | \r | |
2881 | s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);\r | |
2882 | }\r | |
2883 | \r | |
2884 | return (UChar* )NULL;\r | |
2885 | }\r | |
2886 | \r | |
2887 | static UChar*\r | |
2888 | bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,\r | |
2889 | const UChar* text, const UChar* text_end,\r | |
2890 | const UChar* text_range)\r | |
2891 | {\r | |
2892 | const UChar *s, *se, *t, *p, *end;\r | |
2893 | const UChar *tail;\r | |
2894 | int skip, tlen1;\r | |
2895 | \r | |
2896 | #ifdef ONIG_DEBUG_SEARCH\r | |
2897 | fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",\r | |
2898 | (int )text, (int )text_end, (int )text_range);\r | |
2899 | #endif\r | |
2900 | \r | |
2901 | tail = target_end - 1;\r | |
2902 | tlen1 = (int)(tail - target);\r | |
2903 | end = text_range;\r | |
2904 | if (end + tlen1 > text_end)\r | |
2905 | end = text_end - tlen1;\r | |
2906 | \r | |
2907 | s = text;\r | |
2908 | \r | |
2909 | if (IS_NULL(reg->int_map)) {\r | |
2910 | while (s < end) {\r | |
2911 | p = se = s + tlen1;\r | |
2912 | t = tail;\r | |
2913 | while (*p == *t) {\r | |
2914 | if (t == target) return (UChar* )s;\r | |
2915 | p--; t--;\r | |
2916 | }\r | |
2917 | skip = reg->map[*se];\r | |
2918 | t = s;\r | |
2919 | do {\r | |
2920 | s += enclen(reg->enc, s);\r | |
2921 | } while ((s - t) < skip && s < end);\r | |
2922 | }\r | |
2923 | }\r | |
2924 | else {\r | |
2925 | while (s < end) {\r | |
2926 | p = se = s + tlen1;\r | |
2927 | t = tail;\r | |
2928 | while (*p == *t) {\r | |
2929 | if (t == target) return (UChar* )s;\r | |
2930 | p--; t--;\r | |
2931 | }\r | |
2932 | skip = reg->int_map[*se];\r | |
2933 | t = s;\r | |
2934 | do {\r | |
2935 | s += enclen(reg->enc, s);\r | |
2936 | } while ((s - t) < skip && s < end);\r | |
2937 | }\r | |
2938 | }\r | |
2939 | \r | |
2940 | return (UChar* )NULL;\r | |
2941 | }\r | |
2942 | \r | |
2943 | static UChar*\r | |
2944 | bm_search(regex_t* reg, const UChar* target, const UChar* target_end,\r | |
2945 | const UChar* text, const UChar* text_end, const UChar* text_range)\r | |
2946 | {\r | |
2947 | const UChar *s, *t, *p, *end;\r | |
2948 | const UChar *tail;\r | |
2949 | \r | |
2950 | end = text_range + (target_end - target) - 1;\r | |
2951 | if (end > text_end)\r | |
2952 | end = text_end;\r | |
2953 | \r | |
2954 | tail = target_end - 1;\r | |
2955 | s = text + (target_end - target) - 1;\r | |
2956 | if (IS_NULL(reg->int_map)) {\r | |
2957 | while (s < end) {\r | |
2958 | p = s;\r | |
2959 | t = tail;\r | |
2960 | while (*p == *t) {\r | |
2961 | if (t == target) return (UChar* )p;\r | |
2962 | p--; t--;\r | |
2963 | }\r | |
2964 | s += reg->map[*s];\r | |
2965 | }\r | |
2966 | }\r | |
2967 | else { /* see int_map[] */\r | |
2968 | while (s < end) {\r | |
2969 | p = s;\r | |
2970 | t = tail;\r | |
2971 | while (*p == *t) {\r | |
2972 | if (t == target) return (UChar* )p;\r | |
2973 | p--; t--;\r | |
2974 | }\r | |
2975 | s += reg->int_map[*s];\r | |
2976 | }\r | |
2977 | }\r | |
2978 | return (UChar* )NULL;\r | |
2979 | }\r | |
2980 | \r | |
2981 | static int\r | |
2982 | set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,\r | |
2983 | int** skip)\r | |
2984 | \r | |
2985 | {\r | |
2986 | int i, len;\r | |
2987 | \r | |
2988 | if (IS_NULL(*skip)) {\r | |
2989 | *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);\r | |
2990 | if (IS_NULL(*skip)) return ONIGERR_MEMORY;\r | |
2991 | }\r | |
2992 | \r | |
2993 | len = (int)(end - s);\r | |
2994 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)\r | |
2995 | (*skip)[i] = len;\r | |
2996 | \r | |
2997 | for (i = len - 1; i > 0; i--)\r | |
2998 | (*skip)[s[i]] = i;\r | |
2999 | \r | |
3000 | return 0;\r | |
3001 | }\r | |
3002 | \r | |
3003 | static UChar*\r | |
3004 | bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,\r | |
3005 | const UChar* text, const UChar* adjust_text,\r | |
3006 | const UChar* text_end, const UChar* text_start)\r | |
3007 | {\r | |
3008 | const UChar *s, *t, *p;\r | |
3009 | \r | |
3010 | s = text_end - (target_end - target);\r | |
3011 | if (text_start < s)\r | |
3012 | s = text_start;\r | |
3013 | else\r | |
3014 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);\r | |
3015 | \r | |
3016 | while (s >= text) {\r | |
3017 | p = s;\r | |
3018 | t = target;\r | |
3019 | while (t < target_end && *p == *t) {\r | |
3020 | p++; t++;\r | |
3021 | }\r | |
3022 | if (t == target_end)\r | |
3023 | return (UChar* )s;\r | |
3024 | \r | |
3025 | s -= reg->int_map_backward[*s];\r | |
3026 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);\r | |
3027 | }\r | |
3028 | \r | |
3029 | return (UChar* )NULL;\r | |
3030 | }\r | |
3031 | \r | |
3032 | static UChar*\r | |
3033 | map_search(OnigEncoding enc, UChar map[],\r | |
3034 | const UChar* text, const UChar* text_range)\r | |
3035 | {\r | |
3036 | const UChar *s = text;\r | |
3037 | \r | |
3038 | while (s < text_range) {\r | |
3039 | if (map[*s]) return (UChar* )s;\r | |
3040 | \r | |
3041 | s += enclen(enc, s);\r | |
3042 | }\r | |
3043 | return (UChar* )NULL;\r | |
3044 | }\r | |
3045 | \r | |
3046 | static UChar*\r | |
3047 | map_search_backward(OnigEncoding enc, UChar map[],\r | |
3048 | const UChar* text, const UChar* adjust_text,\r | |
3049 | const UChar* text_start)\r | |
3050 | {\r | |
3051 | const UChar *s = text_start;\r | |
3052 | \r | |
3053 | while (s >= text) {\r | |
3054 | if (map[*s]) return (UChar* )s;\r | |
3055 | \r | |
3056 | s = onigenc_get_prev_char_head(enc, adjust_text, s);\r | |
3057 | }\r | |
3058 | return (UChar* )NULL;\r | |
3059 | }\r | |
3060 | \r | |
3061 | extern int\r | |
3062 | onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,\r | |
3063 | OnigOptionType option)\r | |
3064 | {\r | |
3065 | int r;\r | |
3066 | UChar *prev;\r | |
3067 | OnigMatchArg msa;\r | |
3068 | \r | |
3069 | #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)\r | |
3070 | start:\r | |
3071 | THREAD_ATOMIC_START;\r | |
3072 | if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {\r | |
3073 | ONIG_STATE_INC(reg);\r | |
3074 | if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {\r | |
3075 | onig_chain_reduce(reg);\r | |
3076 | ONIG_STATE_INC(reg);\r | |
3077 | }\r | |
3078 | }\r | |
3079 | else {\r | |
3080 | int n;\r | |
3081 | \r | |
3082 | THREAD_ATOMIC_END;\r | |
3083 | n = 0;\r | |
3084 | while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {\r | |
3085 | if (++n > THREAD_PASS_LIMIT_COUNT)\r | |
3086 | return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;\r | |
3087 | THREAD_PASS;\r | |
3088 | }\r | |
3089 | goto start;\r | |
3090 | }\r | |
3091 | THREAD_ATOMIC_END;\r | |
3092 | #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */\r | |
3093 | \r | |
3094 | MATCH_ARG_INIT(msa, option, region, at);\r | |
3095 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
3096 | {\r | |
3097 | int offset = at - str;\r | |
3098 | STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);\r | |
3099 | }\r | |
3100 | #endif\r | |
3101 | \r | |
3102 | if (region\r | |
3103 | #ifdef USE_POSIX_API_REGION_OPTION\r | |
3104 | && !IS_POSIX_REGION(option)\r | |
3105 | #endif\r | |
3106 | ) {\r | |
3107 | r = onig_region_resize_clear(region, reg->num_mem + 1);\r | |
3108 | }\r | |
3109 | else\r | |
3110 | r = 0;\r | |
3111 | \r | |
3112 | if (r == 0) {\r | |
3113 | prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);\r | |
3114 | r = match_at(reg, str, end,\r | |
3115 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE\r | |
3116 | end,\r | |
3117 | #endif\r | |
3118 | at, prev, &msa);\r | |
3119 | }\r | |
3120 | \r | |
3121 | MATCH_ARG_FREE(msa);\r | |
3122 | ONIG_STATE_DEC_THREAD(reg);\r | |
3123 | return r;\r | |
3124 | }\r | |
3125 | \r | |
3126 | static int\r | |
3127 | forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,\r | |
3128 | UChar* range, UChar** low, UChar** high, UChar** low_prev)\r | |
3129 | {\r | |
3130 | UChar *p, *pprev = (UChar* )NULL;\r | |
3131 | \r | |
3132 | #ifdef ONIG_DEBUG_SEARCH\r | |
3133 | fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",\r | |
3134 | (int )str, (int )end, (int )s, (int )range);\r | |
3135 | #endif\r | |
3136 | \r | |
3137 | p = s;\r | |
3138 | if (reg->dmin > 0) {\r | |
3139 | if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {\r | |
3140 | p += reg->dmin;\r | |
3141 | }\r | |
3142 | else {\r | |
3143 | UChar *q = p + reg->dmin;\r | |
3144 | while (p < q) p += enclen(reg->enc, p);\r | |
3145 | }\r | |
3146 | }\r | |
3147 | \r | |
3148 | retry:\r | |
3149 | switch (reg->optimize) {\r | |
3150 | case ONIG_OPTIMIZE_EXACT:\r | |
3151 | p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);\r | |
3152 | break;\r | |
3153 | case ONIG_OPTIMIZE_EXACT_IC:\r | |
3154 | p = slow_search_ic(reg->enc, reg->case_fold_flag,\r | |
3155 | reg->exact, reg->exact_end, p, end, range);\r | |
3156 | break;\r | |
3157 | \r | |
3158 | case ONIG_OPTIMIZE_EXACT_BM:\r | |
3159 | p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);\r | |
3160 | break;\r | |
3161 | \r | |
3162 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:\r | |
3163 | p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);\r | |
3164 | break;\r | |
3165 | \r | |
3166 | case ONIG_OPTIMIZE_MAP:\r | |
3167 | p = map_search(reg->enc, reg->map, p, range);\r | |
3168 | break;\r | |
3169 | }\r | |
3170 | \r | |
3171 | if (p && p < range) {\r | |
3172 | if (p - reg->dmin < s) {\r | |
3173 | retry_gate:\r | |
3174 | pprev = p;\r | |
3175 | p += enclen(reg->enc, p);\r | |
3176 | goto retry;\r | |
3177 | }\r | |
3178 | \r | |
3179 | if (reg->sub_anchor) {\r | |
3180 | UChar* prev;\r | |
3181 | \r | |
3182 | switch (reg->sub_anchor) {\r | |
3183 | case ANCHOR_BEGIN_LINE:\r | |
3184 | if (!ON_STR_BEGIN(p)) {\r | |
3185 | prev = onigenc_get_prev_char_head(reg->enc,\r | |
3186 | (pprev ? pprev : str), p);\r | |
3187 | if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))\r | |
3188 | goto retry_gate;\r | |
3189 | }\r | |
3190 | break;\r | |
3191 | \r | |
3192 | case ANCHOR_END_LINE:\r | |
3193 | if (ON_STR_END(p)) {\r | |
3194 | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE\r | |
3195 | prev = (UChar* )onigenc_get_prev_char_head(reg->enc,\r | |
3196 | (pprev ? pprev : str), p);\r | |
3197 | if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))\r | |
3198 | goto retry_gate;\r | |
3199 | #endif\r | |
3200 | }\r | |
3201 | else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)\r | |
3202 | #ifdef USE_CRNL_AS_LINE_TERMINATOR\r | |
3203 | && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)\r | |
3204 | #endif\r | |
3205 | )\r | |
3206 | goto retry_gate;\r | |
3207 | break;\r | |
3208 | }\r | |
3209 | }\r | |
3210 | \r | |
3211 | if (reg->dmax == 0) {\r | |
3212 | *low = p;\r | |
3213 | if (low_prev) {\r | |
3214 | if (*low > s)\r | |
3215 | *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);\r | |
3216 | else\r | |
3217 | *low_prev = onigenc_get_prev_char_head(reg->enc,\r | |
3218 | (pprev ? pprev : str), p);\r | |
3219 | }\r | |
3220 | }\r | |
3221 | else {\r | |
3222 | if (reg->dmax != ONIG_INFINITE_DISTANCE) {\r | |
3223 | *low = p - reg->dmax;\r | |
3224 | if (*low > s) {\r | |
3225 | *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,\r | |
3226 | *low, (const UChar** )low_prev);\r | |
3227 | if (low_prev && IS_NULL(*low_prev))\r | |
3228 | *low_prev = onigenc_get_prev_char_head(reg->enc,\r | |
3229 | (pprev ? pprev : s), *low);\r | |
3230 | }\r | |
3231 | else {\r | |
3232 | if (low_prev)\r | |
3233 | *low_prev = onigenc_get_prev_char_head(reg->enc,\r | |
3234 | (pprev ? pprev : str), *low);\r | |
3235 | }\r | |
3236 | }\r | |
3237 | }\r | |
3238 | /* no needs to adjust *high, *high is used as range check only */\r | |
3239 | *high = p - reg->dmin;\r | |
3240 | \r | |
3241 | #ifdef ONIG_DEBUG_SEARCH\r | |
3242 | fprintf(stderr,\r | |
3243 | "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",\r | |
3244 | (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);\r | |
3245 | #endif\r | |
3246 | return 1; /* success */\r | |
3247 | }\r | |
3248 | \r | |
3249 | return 0; /* fail */\r | |
3250 | }\r | |
3251 | \r | |
3252 | static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,\r | |
3253 | int** skip));\r | |
3254 | \r | |
3255 | #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100\r | |
3256 | \r | |
3257 | static int\r | |
3258 | backward_search_range(regex_t* reg, const UChar* str, const UChar* end,\r | |
3259 | UChar* s, const UChar* range, UChar* adjrange,\r | |
3260 | UChar** low, UChar** high)\r | |
3261 | {\r | |
3262 | int r;\r | |
3263 | UChar *p;\r | |
3264 | \r | |
3265 | range += reg->dmin;\r | |
3266 | p = s;\r | |
3267 | \r | |
3268 | retry:\r | |
3269 | switch (reg->optimize) {\r | |
3270 | case ONIG_OPTIMIZE_EXACT:\r | |
3271 | exact_method:\r | |
3272 | p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,\r | |
3273 | range, adjrange, end, p);\r | |
3274 | break;\r | |
3275 | \r | |
3276 | case ONIG_OPTIMIZE_EXACT_IC:\r | |
3277 | p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,\r | |
3278 | reg->exact, reg->exact_end,\r | |
3279 | range, adjrange, end, p);\r | |
3280 | break;\r | |
3281 | \r | |
3282 | case ONIG_OPTIMIZE_EXACT_BM:\r | |
3283 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:\r | |
3284 | if (IS_NULL(reg->int_map_backward)) {\r | |
3285 | if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)\r | |
3286 | goto exact_method;\r | |
3287 | \r | |
3288 | r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,\r | |
3289 | &(reg->int_map_backward));\r | |
3290 | if (r) return r;\r | |
3291 | }\r | |
3292 | p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,\r | |
3293 | end, p);\r | |
3294 | break;\r | |
3295 | \r | |
3296 | case ONIG_OPTIMIZE_MAP:\r | |
3297 | p = map_search_backward(reg->enc, reg->map, range, adjrange, p);\r | |
3298 | break;\r | |
3299 | }\r | |
3300 | \r | |
3301 | if (p) {\r | |
3302 | if (reg->sub_anchor) {\r | |
3303 | UChar* prev;\r | |
3304 | \r | |
3305 | switch (reg->sub_anchor) {\r | |
3306 | case ANCHOR_BEGIN_LINE:\r | |
3307 | if (!ON_STR_BEGIN(p)) {\r | |
3308 | prev = onigenc_get_prev_char_head(reg->enc, str, p);\r | |
3309 | if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {\r | |
3310 | p = prev;\r | |
3311 | goto retry;\r | |
3312 | }\r | |
3313 | }\r | |
3314 | break;\r | |
3315 | \r | |
3316 | case ANCHOR_END_LINE:\r | |
3317 | if (ON_STR_END(p)) {\r | |
3318 | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE\r | |
3319 | prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);\r | |
3320 | if (IS_NULL(prev)) goto fail;\r | |
3321 | if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {\r | |
3322 | p = prev;\r | |
3323 | goto retry;\r | |
3324 | }\r | |
3325 | #endif\r | |
3326 | }\r | |
3327 | else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)\r | |
3328 | #ifdef USE_CRNL_AS_LINE_TERMINATOR\r | |
3329 | && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)\r | |
3330 | #endif\r | |
3331 | ) {\r | |
3332 | p = onigenc_get_prev_char_head(reg->enc, adjrange, p);\r | |
3333 | if (IS_NULL(p)) goto fail;\r | |
3334 | goto retry;\r | |
3335 | }\r | |
3336 | break;\r | |
3337 | }\r | |
3338 | }\r | |
3339 | \r | |
3340 | /* no needs to adjust *high, *high is used as range check only */\r | |
3341 | if (reg->dmax != ONIG_INFINITE_DISTANCE) {\r | |
3342 | *low = p - reg->dmax;\r | |
3343 | *high = p - reg->dmin;\r | |
3344 | *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);\r | |
3345 | }\r | |
3346 | \r | |
3347 | #ifdef ONIG_DEBUG_SEARCH\r | |
3348 | fprintf(stderr, "backward_search_range: low: %d, high: %d\n",\r | |
3349 | (int )(*low - str), (int )(*high - str));\r | |
3350 | #endif\r | |
3351 | return 1; /* success */\r | |
3352 | }\r | |
3353 | \r | |
3354 | fail:\r | |
3355 | #ifdef ONIG_DEBUG_SEARCH\r | |
3356 | fprintf(stderr, "backward_search_range: fail.\n");\r | |
3357 | #endif\r | |
3358 | return 0; /* fail */\r | |
3359 | }\r | |
3360 | \r | |
3361 | \r | |
3362 | extern int\r | |
3363 | onig_search(regex_t* reg, const UChar* str, const UChar* end,\r | |
3364 | const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)\r | |
3365 | {\r | |
3366 | int r;\r | |
3367 | UChar *s, *prev;\r | |
3368 | OnigMatchArg msa;\r | |
3369 | const UChar *orig_start = start;\r | |
3370 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE\r | |
3371 | const UChar *orig_range = range;\r | |
3372 | #endif\r | |
3373 | \r | |
3374 | #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)\r | |
3375 | start:\r | |
3376 | THREAD_ATOMIC_START;\r | |
3377 | if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {\r | |
3378 | ONIG_STATE_INC(reg);\r | |
3379 | if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {\r | |
3380 | onig_chain_reduce(reg);\r | |
3381 | ONIG_STATE_INC(reg);\r | |
3382 | }\r | |
3383 | }\r | |
3384 | else {\r | |
3385 | int n;\r | |
3386 | \r | |
3387 | THREAD_ATOMIC_END;\r | |
3388 | n = 0;\r | |
3389 | while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {\r | |
3390 | if (++n > THREAD_PASS_LIMIT_COUNT)\r | |
3391 | return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;\r | |
3392 | THREAD_PASS;\r | |
3393 | }\r | |
3394 | goto start;\r | |
3395 | }\r | |
3396 | THREAD_ATOMIC_END;\r | |
3397 | #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */\r | |
3398 | \r | |
3399 | #ifdef ONIG_DEBUG_SEARCH\r | |
3400 | fprintf(stderr,\r | |
3401 | "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",\r | |
3402 | (int )str, (int )(end - str), (int )(start - str), (int )(range - str));\r | |
3403 | #endif\r | |
3404 | \r | |
3405 | if (region\r | |
3406 | #ifdef USE_POSIX_API_REGION_OPTION\r | |
3407 | && !IS_POSIX_REGION(option)\r | |
3408 | #endif\r | |
3409 | ) {\r | |
3410 | r = onig_region_resize_clear(region, reg->num_mem + 1);\r | |
3411 | if (r) goto finish_no_msa;\r | |
3412 | }\r | |
3413 | \r | |
3414 | if (start > end || start < str) goto mismatch_no_msa;\r | |
3415 | \r | |
3416 | \r | |
3417 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE\r | |
3418 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE\r | |
3419 | #define MATCH_AND_RETURN_CHECK(upper_range) \\r | |
3420 | r = match_at(reg, str, end, (upper_range), s, prev, &msa); \\r | |
3421 | if (r != ONIG_MISMATCH) {\\r | |
3422 | if (r >= 0) {\\r | |
3423 | if (! IS_FIND_LONGEST(reg->options)) {\\r | |
3424 | goto match;\\r | |
3425 | }\\r | |
3426 | }\\r | |
3427 | else goto finish; /* error */ \\r | |
3428 | }\r | |
3429 | #else\r | |
3430 | #define MATCH_AND_RETURN_CHECK(upper_range) \\r | |
3431 | r = match_at(reg, str, end, (upper_range), s, prev, &msa); \\r | |
3432 | if (r != ONIG_MISMATCH) {\\r | |
3433 | if (r >= 0) {\\r | |
3434 | goto match;\\r | |
3435 | }\\r | |
3436 | else goto finish; /* error */ \\r | |
3437 | }\r | |
3438 | #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */\r | |
3439 | #else\r | |
3440 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE\r | |
3441 | #define MATCH_AND_RETURN_CHECK(none) \\r | |
3442 | r = match_at(reg, str, end, s, prev, &msa);\\r | |
3443 | if (r != ONIG_MISMATCH) {\\r | |
3444 | if (r >= 0) {\\r | |
3445 | if (! IS_FIND_LONGEST(reg->options)) {\\r | |
3446 | goto match;\\r | |
3447 | }\\r | |
3448 | }\\r | |
3449 | else goto finish; /* error */ \\r | |
3450 | }\r | |
3451 | #else\r | |
3452 | #define MATCH_AND_RETURN_CHECK(none) \\r | |
3453 | r = match_at(reg, str, end, s, prev, &msa);\\r | |
3454 | if (r != ONIG_MISMATCH) {\\r | |
3455 | if (r >= 0) {\\r | |
3456 | goto match;\\r | |
3457 | }\\r | |
3458 | else goto finish; /* error */ \\r | |
3459 | }\r | |
3460 | #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */\r | |
3461 | #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */\r | |
3462 | \r | |
3463 | \r | |
3464 | /* anchor optimize: resume search range */\r | |
3465 | if (reg->anchor != 0 && str < end) {\r | |
3466 | UChar *min_semi_end, *max_semi_end;\r | |
3467 | \r | |
3468 | if (reg->anchor & ANCHOR_BEGIN_POSITION) {\r | |
3469 | /* search start-position only */\r | |
3470 | begin_position:\r | |
3471 | if (range > start)\r | |
3472 | range = start + 1;\r | |
3473 | else\r | |
3474 | range = start;\r | |
3475 | }\r | |
3476 | else if (reg->anchor & ANCHOR_BEGIN_BUF) {\r | |
3477 | /* search str-position only */\r | |
3478 | if (range > start) {\r | |
3479 | if (start != str) goto mismatch_no_msa;\r | |
3480 | range = str + 1;\r | |
3481 | }\r | |
3482 | else {\r | |
3483 | if (range <= str) {\r | |
3484 | start = str;\r | |
3485 | range = str;\r | |
3486 | }\r | |
3487 | else\r | |
3488 | goto mismatch_no_msa;\r | |
3489 | }\r | |
3490 | }\r | |
3491 | else if (reg->anchor & ANCHOR_END_BUF) {\r | |
3492 | min_semi_end = max_semi_end = (UChar* )end;\r | |
3493 | \r | |
3494 | end_buf:\r | |
3495 | if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)\r | |
3496 | goto mismatch_no_msa;\r | |
3497 | \r | |
3498 | if (range > start) {\r | |
3499 | if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {\r | |
3500 | start = min_semi_end - reg->anchor_dmax;\r | |
3501 | if (start < end)\r | |
3502 | start = onigenc_get_right_adjust_char_head(reg->enc, str, start);\r | |
3503 | else { /* match with empty at end */\r | |
3504 | start = onigenc_get_prev_char_head(reg->enc, str, end);\r | |
3505 | }\r | |
3506 | }\r | |
3507 | if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {\r | |
3508 | range = max_semi_end - reg->anchor_dmin + 1;\r | |
3509 | }\r | |
3510 | \r | |
3511 | if (start >= range) goto mismatch_no_msa;\r | |
3512 | }\r | |
3513 | else {\r | |
3514 | if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {\r | |
3515 | range = min_semi_end - reg->anchor_dmax;\r | |
3516 | }\r | |
3517 | if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {\r | |
3518 | start = max_semi_end - reg->anchor_dmin;\r | |
3519 | start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);\r | |
3520 | }\r | |
3521 | if (range > start) goto mismatch_no_msa;\r | |
3522 | }\r | |
3523 | }\r | |
3524 | else if (reg->anchor & ANCHOR_SEMI_END_BUF) {\r | |
3525 | UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);\r | |
3526 | \r | |
3527 | max_semi_end = (UChar* )end;\r | |
3528 | if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {\r | |
3529 | min_semi_end = pre_end;\r | |
3530 | \r | |
3531 | #ifdef USE_CRNL_AS_LINE_TERMINATOR\r | |
3532 | pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);\r | |
3533 | if (IS_NOT_NULL(pre_end) &&\r | |
3534 | ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {\r | |
3535 | min_semi_end = pre_end;\r | |
3536 | }\r | |
3537 | #endif\r | |
3538 | if (min_semi_end > str && start <= min_semi_end) {\r | |
3539 | goto end_buf;\r | |
3540 | }\r | |
3541 | }\r | |
3542 | else {\r | |
3543 | min_semi_end = (UChar* )end;\r | |
3544 | goto end_buf;\r | |
3545 | }\r | |
3546 | }\r | |
3547 | else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {\r | |
3548 | goto begin_position;\r | |
3549 | }\r | |
3550 | }\r | |
3551 | else if (str == end) { /* empty string */\r | |
3552 | static const UChar* address_for_empty_string = (UChar* )"";\r | |
3553 | \r | |
3554 | #ifdef ONIG_DEBUG_SEARCH\r | |
3555 | fprintf(stderr, "onig_search: empty string.\n");\r | |
3556 | #endif\r | |
3557 | \r | |
3558 | if (reg->threshold_len == 0) {\r | |
3559 | start = end = str = address_for_empty_string;\r | |
3560 | s = (UChar* )start;\r | |
3561 | prev = (UChar* )NULL;\r | |
3562 | \r | |
3563 | MATCH_ARG_INIT(msa, option, region, start);\r | |
3564 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
3565 | msa.state_check_buff = (void* )0;\r | |
3566 | msa.state_check_buff_size = 0; /* NO NEED, for valgrind */\r | |
3567 | #endif\r | |
3568 | MATCH_AND_RETURN_CHECK(end);\r | |
3569 | goto mismatch;\r | |
3570 | }\r | |
3571 | goto mismatch_no_msa;\r | |
3572 | }\r | |
3573 | \r | |
3574 | #ifdef ONIG_DEBUG_SEARCH\r | |
3575 | fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",\r | |
3576 | (int )(end - str), (int )(start - str), (int )(range - str));\r | |
3577 | #endif\r | |
3578 | \r | |
3579 | MATCH_ARG_INIT(msa, option, region, orig_start);\r | |
3580 | #ifdef USE_COMBINATION_EXPLOSION_CHECK\r | |
3581 | {\r | |
3582 | int offset = (MIN(start, range) - str);\r | |
3583 | STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);\r | |
3584 | }\r | |
3585 | #endif\r | |
3586 | \r | |
3587 | s = (UChar* )start;\r | |
3588 | if (range > start) { /* forward search */\r | |
3589 | if (s > str)\r | |
3590 | prev = onigenc_get_prev_char_head(reg->enc, str, s);\r | |
3591 | else\r | |
3592 | prev = (UChar* )NULL;\r | |
3593 | \r | |
3594 | if (reg->optimize != ONIG_OPTIMIZE_NONE) {\r | |
3595 | UChar *sch_range, *low, *high, *low_prev;\r | |
3596 | \r | |
3597 | sch_range = (UChar* )range;\r | |
3598 | if (reg->dmax != 0) {\r | |
3599 | if (reg->dmax == ONIG_INFINITE_DISTANCE)\r | |
3600 | sch_range = (UChar* )end;\r | |
3601 | else {\r | |
3602 | sch_range += reg->dmax;\r | |
3603 | if (sch_range > end) sch_range = (UChar* )end;\r | |
3604 | }\r | |
3605 | }\r | |
3606 | \r | |
3607 | if ((end - start) < reg->threshold_len)\r | |
3608 | goto mismatch;\r | |
3609 | \r | |
3610 | if (reg->dmax != ONIG_INFINITE_DISTANCE) {\r | |
3611 | do {\r | |
3612 | if (! forward_search_range(reg, str, end, s, sch_range,\r | |
3613 | &low, &high, &low_prev)) goto mismatch;\r | |
3614 | if (s < low) {\r | |
3615 | s = low;\r | |
3616 | prev = low_prev;\r | |
3617 | }\r | |
3618 | while (s <= high) {\r | |
3619 | MATCH_AND_RETURN_CHECK(orig_range);\r | |
3620 | prev = s;\r | |
3621 | s += enclen(reg->enc, s);\r | |
3622 | }\r | |
3623 | } while (s < range);\r | |
3624 | goto mismatch;\r | |
3625 | }\r | |
3626 | else { /* check only. */\r | |
3627 | if (! forward_search_range(reg, str, end, s, sch_range,\r | |
3628 | &low, &high, (UChar** )NULL)) goto mismatch;\r | |
3629 | \r | |
3630 | if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {\r | |
3631 | do {\r | |
3632 | MATCH_AND_RETURN_CHECK(orig_range);\r | |
3633 | prev = s;\r | |
3634 | s += enclen(reg->enc, s);\r | |
3635 | \r | |
3636 | while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {\r | |
3637 | prev = s;\r | |
3638 | s += enclen(reg->enc, s);\r | |
3639 | }\r | |
3640 | } while (s < range);\r | |
3641 | goto mismatch;\r | |
3642 | }\r | |
3643 | }\r | |
3644 | }\r | |
3645 | \r | |
3646 | do {\r | |
3647 | MATCH_AND_RETURN_CHECK(orig_range);\r | |
3648 | prev = s;\r | |
3649 | s += enclen(reg->enc, s);\r | |
3650 | } while (s < range);\r | |
3651 | \r | |
3652 | if (s == range) { /* because empty match with /$/. */\r | |
3653 | MATCH_AND_RETURN_CHECK(orig_range);\r | |
3654 | }\r | |
3655 | }\r | |
3656 | else { /* backward search */\r | |
3657 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE\r | |
3658 | if (orig_start < end)\r | |
3659 | orig_start += enclen(reg->enc, orig_start); /* is upper range */\r | |
3660 | #endif\r | |
3661 | \r | |
3662 | if (reg->optimize != ONIG_OPTIMIZE_NONE) {\r | |
3663 | UChar *low, *high, *adjrange, *sch_start;\r | |
3664 | \r | |
3665 | if (range < end)\r | |
3666 | adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);\r | |
3667 | else\r | |
3668 | adjrange = (UChar* )end;\r | |
3669 | \r | |
3670 | if (reg->dmax != ONIG_INFINITE_DISTANCE &&\r | |
3671 | (end - range) >= reg->threshold_len) {\r | |
3672 | do {\r | |
3673 | sch_start = s + reg->dmax;\r | |
3674 | if (sch_start > end) sch_start = (UChar* )end;\r | |
3675 | if (backward_search_range(reg, str, end, sch_start, range, adjrange,\r | |
3676 | &low, &high) <= 0)\r | |
3677 | goto mismatch;\r | |
3678 | \r | |
3679 | if (s > high)\r | |
3680 | s = high;\r | |
3681 | \r | |
3682 | while (s >= low) {\r | |
3683 | prev = onigenc_get_prev_char_head(reg->enc, str, s);\r | |
3684 | MATCH_AND_RETURN_CHECK(orig_start);\r | |
3685 | s = prev;\r | |
3686 | }\r | |
3687 | } while (s >= range);\r | |
3688 | goto mismatch;\r | |
3689 | }\r | |
3690 | else { /* check only. */\r | |
3691 | if ((end - range) < reg->threshold_len) goto mismatch;\r | |
3692 | \r | |
3693 | sch_start = s;\r | |
3694 | if (reg->dmax != 0) {\r | |
3695 | if (reg->dmax == ONIG_INFINITE_DISTANCE)\r | |
3696 | sch_start = (UChar* )end;\r | |
3697 | else {\r | |
3698 | sch_start += reg->dmax;\r | |
3699 | if (sch_start > end) sch_start = (UChar* )end;\r | |
3700 | else\r | |
3701 | sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,\r | |
3702 | start, sch_start);\r | |
3703 | }\r | |
3704 | }\r | |
3705 | if (backward_search_range(reg, str, end, sch_start, range, adjrange,\r | |
3706 | &low, &high) <= 0) goto mismatch;\r | |
3707 | }\r | |
3708 | }\r | |
3709 | \r | |
3710 | do {\r | |
3711 | prev = onigenc_get_prev_char_head(reg->enc, str, s);\r | |
3712 | MATCH_AND_RETURN_CHECK(orig_start);\r | |
3713 | s = prev;\r | |
3714 | } while (s >= range);\r | |
3715 | }\r | |
3716 | \r | |
3717 | mismatch:\r | |
3718 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE\r | |
3719 | if (IS_FIND_LONGEST(reg->options)) {\r | |
3720 | if (msa.best_len >= 0) {\r | |
3721 | s = msa.best_s;\r | |
3722 | goto match;\r | |
3723 | }\r | |
3724 | }\r | |
3725 | #endif\r | |
3726 | r = ONIG_MISMATCH;\r | |
3727 | \r | |
3728 | finish:\r | |
3729 | MATCH_ARG_FREE(msa);\r | |
3730 | ONIG_STATE_DEC_THREAD(reg);\r | |
3731 | \r | |
3732 | /* If result is mismatch and no FIND_NOT_EMPTY option,\r | |
3733 | then the region is not setted in match_at(). */\r | |
3734 | if (IS_FIND_NOT_EMPTY(reg->options) && region\r | |
3735 | #ifdef USE_POSIX_API_REGION_OPTION\r | |
3736 | && !IS_POSIX_REGION(option)\r | |
3737 | #endif\r | |
3738 | ) {\r | |
3739 | onig_region_clear(region);\r | |
3740 | }\r | |
3741 | \r | |
3742 | #ifdef ONIG_DEBUG\r | |
3743 | if (r != ONIG_MISMATCH)\r | |
3744 | fprintf(stderr, "onig_search: error %d\n", r);\r | |
3745 | #endif\r | |
3746 | return r;\r | |
3747 | \r | |
3748 | mismatch_no_msa:\r | |
3749 | r = ONIG_MISMATCH;\r | |
3750 | finish_no_msa:\r | |
3751 | ONIG_STATE_DEC_THREAD(reg);\r | |
3752 | #ifdef ONIG_DEBUG\r | |
3753 | if (r != ONIG_MISMATCH)\r | |
3754 | fprintf(stderr, "onig_search: error %d\n", r);\r | |
3755 | #endif\r | |
3756 | return r;\r | |
3757 | \r | |
3758 | match:\r | |
3759 | ONIG_STATE_DEC_THREAD(reg);\r | |
3760 | MATCH_ARG_FREE(msa);\r | |
3761 | return (int)(s - str);\r | |
3762 | }\r | |
3763 | \r | |
3764 | extern OnigEncoding\r | |
3765 | onig_get_encoding(regex_t* reg)\r | |
3766 | {\r | |
3767 | return reg->enc;\r | |
3768 | }\r | |
3769 | \r | |
3770 | extern OnigOptionType\r | |
3771 | onig_get_options(regex_t* reg)\r | |
3772 | {\r | |
3773 | return reg->options;\r | |
3774 | }\r | |
3775 | \r | |
3776 | extern OnigCaseFoldType\r | |
3777 | onig_get_case_fold_flag(regex_t* reg)\r | |
3778 | {\r | |
3779 | return reg->case_fold_flag;\r | |
3780 | }\r | |
3781 | \r | |
3782 | extern OnigSyntaxType*\r | |
3783 | onig_get_syntax(regex_t* reg)\r | |
3784 | {\r | |
3785 | return reg->syntax;\r | |
3786 | }\r | |
3787 | \r | |
3788 | extern int\r | |
3789 | onig_number_of_captures(regex_t* reg)\r | |
3790 | {\r | |
3791 | return reg->num_mem;\r | |
3792 | }\r | |
3793 | \r | |
3794 | extern int\r | |
3795 | onig_number_of_capture_histories(regex_t* reg)\r | |
3796 | {\r | |
3797 | #ifdef USE_CAPTURE_HISTORY\r | |
3798 | int i, n;\r | |
3799 | \r | |
3800 | n = 0;\r | |
3801 | for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {\r | |
3802 | if (BIT_STATUS_AT(reg->capture_history, i) != 0)\r | |
3803 | n++;\r | |
3804 | }\r | |
3805 | return n;\r | |
3806 | #else\r | |
3807 | return 0;\r | |
3808 | #endif\r | |
3809 | }\r | |
3810 | \r | |
3811 | extern void\r | |
3812 | onig_copy_encoding(OnigEncoding to, OnigEncoding from)\r | |
3813 | {\r | |
3814 | *to = *from;\r | |
3815 | }\r | |
3816 | \r |