]> git.proxmox.com Git - mirror_smartmontools-debian.git/blame - regex/regexec.c
Imported Upstream version 6.1+svn3812
[mirror_smartmontools-debian.git] / regex / regexec.c
CommitLineData
832b75ed
GG
1/* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
ee38a438
GI
18 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 MA 02110-1301 USA. */
832b75ed
GG
20
21static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22 re_string_t *input, int n);
23static void match_ctx_clean (re_match_context_t *mctx);
24static void match_ctx_free (re_match_context_t *cache);
25static void match_ctx_free_subtops (re_match_context_t *mctx);
26static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
27 int str_idx, int from, int to);
28static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
29static void match_ctx_clear_flag (re_match_context_t *mctx);
30static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31 int str_idx);
32static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33 int node, int str_idx);
34static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35 re_dfastate_t **limited_sts, int last_node,
36 int last_str_idx, int check_subexp);
37static reg_errcode_t re_search_internal (const regex_t *preg,
38 const char *string, int length,
39 int start, int range, int stop,
40 size_t nmatch, regmatch_t pmatch[],
41 int eflags);
42static int re_search_2_stub (struct re_pattern_buffer *bufp,
43 const char *string1, int length1,
44 const char *string2, int length2,
45 int start, int range, struct re_registers *regs,
46 int stop, int ret_len);
47static int re_search_stub (struct re_pattern_buffer *bufp,
48 const char *string, int length, int start,
49 int range, int stop, struct re_registers *regs,
50 int ret_len);
51static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
52 int nregs, int regs_allocated);
53static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
54 const regex_t *preg,
55 const re_match_context_t *mctx,
56 int idx);
57static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
58 re_match_context_t *mctx);
59static int check_matching (const regex_t *preg, re_match_context_t *mctx,
60 int fl_search, int fl_longest_match);
61static int check_halt_node_context (const re_dfa_t *dfa, int node,
62 unsigned int context);
63static int check_halt_state_context (const regex_t *preg,
64 const re_dfastate_t *state,
65 const re_match_context_t *mctx, int idx);
66static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
67 int cur_idx, int nmatch);
68static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
69 const re_match_context_t *mctx,
70 int *pidx, int node, re_node_set *eps_via_nodes,
71 struct re_fail_stack_t *fs);
72static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
73 int str_idx, int *dests, int nregs,
74 regmatch_t *regs,
75 re_node_set *eps_via_nodes);
76static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
77 regmatch_t *regs, re_node_set *eps_via_nodes);
78static reg_errcode_t set_regs (const regex_t *preg,
79 const re_match_context_t *mctx,
80 size_t nmatch, regmatch_t *pmatch,
81 int fl_backtrack);
82static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
83
84#ifdef RE_ENABLE_I18N
85static int sift_states_iter_mb (const regex_t *preg,
86 const re_match_context_t *mctx,
87 re_sift_context_t *sctx,
88 int node_idx, int str_idx, int max_str_idx);
89#endif /* RE_ENABLE_I18N */
90static reg_errcode_t sift_states_backward (const regex_t *preg,
91 re_match_context_t *mctx,
92 re_sift_context_t *sctx);
93static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
94 re_match_context_t *mctx,
95 re_sift_context_t *sctx,
96 int str_idx,
97 re_node_set *dest_nodes);
98static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
99 re_node_set *dest_nodes,
100 const re_node_set *candidates);
101static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
102 re_node_set *dest_nodes,
103 const re_node_set *and_nodes);
104static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
105 re_match_context_t *mctx, int dst_node,
106 int dst_idx, int src_node, int src_idx);
107static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
108 int limit, re_node_set *eclosures,
109 int subexp_idx, int node, int str_idx);
110static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
111 re_node_set *dest_nodes,
112 const re_node_set *candidates,
113 re_node_set *limits,
114 struct re_backref_cache_entry *bkref_ents,
115 int str_idx);
116static reg_errcode_t sift_states_bkref (const regex_t *preg,
117 re_match_context_t *mctx,
118 re_sift_context_t *sctx,
119 int str_idx, re_node_set *dest_nodes);
120static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
121 int next_state_log_idx);
122static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
123 re_dfastate_t **src, int num);
124static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
125 re_match_context_t *mctx,
126 re_dfastate_t *state, int fl_search);
127static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
128 re_match_context_t *mctx,
129 re_node_set *cur_nodes,
130 int str_idx);
131static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
132 re_dfastate_t *pstate,
133 int fl_search,
134 re_match_context_t *mctx);
135#ifdef RE_ENABLE_I18N
136static reg_errcode_t transit_state_mb (const regex_t *preg,
137 re_dfastate_t *pstate,
138 re_match_context_t *mctx);
139#endif /* RE_ENABLE_I18N */
140static reg_errcode_t transit_state_bkref (const regex_t *preg,
141 re_node_set *nodes,
142 re_match_context_t *mctx);
143static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
144 int bkref_node, int bkref_str_idx);
145static reg_errcode_t get_subexp_sub (const regex_t *preg,
146 re_match_context_t *mctx,
147 re_sub_match_top_t *sub_top,
148 re_sub_match_last_t *sub_last,
149 int bkref_node, int bkref_str);
150static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,
151 int subexp_idx, int fl_open);
152static reg_errcode_t check_arrival (const regex_t *preg,
153 re_match_context_t *mctx,
154 state_array_t *path, int top_node,
155 int top_str, int last_node, int last_str,
156 int fl_open);
157static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
158 re_dfa_t *dfa,
159 re_match_context_t *mctx,
160 int str_idx,
161 re_node_set *cur_nodes,
162 re_node_set *next_nodes);
163static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
164 re_node_set *cur_nodes,
165 int ex_subexp, int fl_open);
166static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
167 re_node_set *dst_nodes,
168 int target, int ex_subexp,
169 int fl_open);
170static reg_errcode_t expand_bkref_cache (const regex_t *preg,
171 re_match_context_t *mctx,
172 re_node_set *cur_nodes, int cur_str,
173 int last_str, int subexp_num,
174 int fl_open);
175static re_dfastate_t **build_trtable (const regex_t *dfa,
176 const re_dfastate_t *state,
177 int fl_search);
178#ifdef RE_ENABLE_I18N
179static int check_node_accept_bytes (const regex_t *preg, int node_idx,
180 const re_string_t *input, int idx);
181# ifdef _LIBC
182static unsigned int find_collation_sequence_value (const unsigned char *mbs,
183 size_t name_len);
184# endif /* _LIBC */
185#endif /* RE_ENABLE_I18N */
186static int group_nodes_into_DFAstates (const regex_t *dfa,
187 const re_dfastate_t *state,
188 re_node_set *states_node,
189 bitset *states_ch);
190static int check_node_accept (const regex_t *preg, const re_token_t *node,
191 const re_match_context_t *mctx, int idx);
192static reg_errcode_t extend_buffers (re_match_context_t *mctx);
193\f
194/* Entry point for POSIX code. */
195
196/* regexec searches for a given pattern, specified by PREG, in the
197 string STRING.
198
199 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
200 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
201 least NMATCH elements, and we set them to the offsets of the
202 corresponding matched substrings.
203
204 EFLAGS specifies `execution flags' which affect matching: if
205 REG_NOTBOL is set, then ^ does not match at the beginning of the
206 string; if REG_NOTEOL is set, then $ does not match at the end.
207
208 We return 0 if we find a match and REG_NOMATCH if not. */
209
210int
211regexec (preg, string, nmatch, pmatch, eflags)
212 const regex_t *__restrict preg;
213 const char *__restrict string;
214 size_t nmatch;
215 regmatch_t pmatch[];
216 int eflags;
217{
218 reg_errcode_t err;
219 int length = strlen (string);
220 if (preg->no_sub)
221 err = re_search_internal (preg, string, length, 0, length, length, 0,
222 NULL, eflags);
223 else
224 err = re_search_internal (preg, string, length, 0, length, length, nmatch,
225 pmatch, eflags);
226 return err != REG_NOERROR;
227}
228#ifdef _LIBC
229weak_alias (__regexec, regexec)
230#endif
231
232/* Entry points for GNU code. */
233
234/* re_match, re_search, re_match_2, re_search_2
235
236 The former two functions operate on STRING with length LENGTH,
237 while the later two operate on concatenation of STRING1 and STRING2
238 with lengths LENGTH1 and LENGTH2, respectively.
239
240 re_match() matches the compiled pattern in BUFP against the string,
241 starting at index START.
242
243 re_search() first tries matching at index START, then it tries to match
244 starting from index START + 1, and so on. The last start position tried
245 is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
246 way as re_match().)
247
248 The parameter STOP of re_{match,search}_2 specifies that no match exceeding
249 the first STOP characters of the concatenation of the strings should be
250 concerned.
251
252 If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
253 and all groups is stroed in REGS. (For the "_2" variants, the offsets are
254 computed relative to the concatenation, not relative to the individual
255 strings.)
256
257 On success, re_match* functions return the length of the match, re_search*
258 return the position of the start of the match. Return value -1 means no
259 match was found and -2 indicates an internal error. */
260
261int
262re_match (bufp, string, length, start, regs)
263 struct re_pattern_buffer *bufp;
264 const char *string;
265 int length, start;
266 struct re_registers *regs;
267{
268 return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
269}
270#ifdef _LIBC
271weak_alias (__re_match, re_match)
272#endif
273
274int
275re_search (bufp, string, length, start, range, regs)
276 struct re_pattern_buffer *bufp;
277 const char *string;
278 int length, start, range;
279 struct re_registers *regs;
280{
281 return re_search_stub (bufp, string, length, start, range, length, regs, 0);
282}
283#ifdef _LIBC
284weak_alias (__re_search, re_search)
285#endif
286
287int
288re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
289 struct re_pattern_buffer *bufp;
290 const char *string1, *string2;
291 int length1, length2, start, stop;
292 struct re_registers *regs;
293{
294 return re_search_2_stub (bufp, string1, length1, string2, length2,
295 start, 0, regs, stop, 1);
296}
297#ifdef _LIBC
298weak_alias (__re_match_2, re_match_2)
299#endif
300
301int
302re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
303 struct re_pattern_buffer *bufp;
304 const char *string1, *string2;
305 int length1, length2, start, range, stop;
306 struct re_registers *regs;
307{
308 return re_search_2_stub (bufp, string1, length1, string2, length2,
309 start, range, regs, stop, 0);
310}
311#ifdef _LIBC
312weak_alias (__re_search_2, re_search_2)
313#endif
314
315static int
316re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
317 stop, ret_len)
318 struct re_pattern_buffer *bufp;
319 const char *string1, *string2;
320 int length1, length2, start, range, stop, ret_len;
321 struct re_registers *regs;
322{
323 const char *str;
324 int rval;
325 int len = length1 + length2;
326 int free_str = 0;
327
328 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
329 return -2;
330
331 /* Concatenate the strings. */
332 if (length2 > 0)
333 if (length1 > 0)
334 {
335 char *s = re_malloc (char, len);
336
337 if (BE (s == NULL, 0))
338 return -2;
339 memcpy (s, string1, length1);
340 memcpy (s + length1, string2, length2);
341 str = s;
342 free_str = 1;
343 }
344 else
345 str = string2;
346 else
347 str = string1;
348
349 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
350 ret_len);
351 if (free_str)
352 re_free ((char *) str);
353 return rval;
354}
355
356/* The parameters have the same meaning as those of re_search.
357 Additional parameters:
358 If RET_LEN is nonzero the length of the match is returned (re_match style);
359 otherwise the position of the match is returned. */
360
361static int
362re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
363 struct re_pattern_buffer *bufp;
364 const char *string;
365 int length, start, range, stop, ret_len;
366 struct re_registers *regs;
367{
368 reg_errcode_t result;
369 regmatch_t *pmatch;
370 int nregs, rval;
371 int eflags = 0;
372
373 /* Check for out-of-range. */
374 if (BE (start < 0 || start > length, 0))
375 return -1;
376 if (BE (start + range > length, 0))
377 range = length - start;
378 else if (BE (start + range < 0, 0))
379 range = -start;
380
381 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
382 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
383
384 /* Compile fastmap if we haven't yet. */
385 if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
386 re_compile_fastmap (bufp);
387
388 if (BE (bufp->no_sub, 0))
389 regs = NULL;
390
391 /* We need at least 1 register. */
392 if (regs == NULL)
393 nregs = 1;
394 else if (BE (bufp->regs_allocated == REGS_FIXED &&
395 regs->num_regs < bufp->re_nsub + 1, 0))
396 {
397 nregs = regs->num_regs;
398 if (BE (nregs < 1, 0))
399 {
400 /* Nothing can be copied to regs. */
401 regs = NULL;
402 nregs = 1;
403 }
404 }
405 else
406 nregs = bufp->re_nsub + 1;
407 pmatch = re_malloc (regmatch_t, nregs);
408 if (BE (pmatch == NULL, 0))
409 return -2;
410
411 result = re_search_internal (bufp, string, length, start, range, stop,
412 nregs, pmatch, eflags);
413
414 rval = 0;
415
416 /* I hope we needn't fill ther regs with -1's when no match was found. */
417 if (result != REG_NOERROR)
418 rval = -1;
419 else if (regs != NULL)
420 {
421 /* If caller wants register contents data back, copy them. */
422 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
423 bufp->regs_allocated);
424 if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
425 rval = -2;
426 }
427
428 if (BE (rval == 0, 1))
429 {
430 if (ret_len)
431 {
432 assert (pmatch[0].rm_so == start);
433 rval = pmatch[0].rm_eo - start;
434 }
435 else
436 rval = pmatch[0].rm_so;
437 }
438 re_free (pmatch);
439 return rval;
440}
441
442static unsigned
443re_copy_regs (regs, pmatch, nregs, regs_allocated)
444 struct re_registers *regs;
445 regmatch_t *pmatch;
446 int nregs, regs_allocated;
447{
448 int rval = REGS_REALLOCATE;
449 int i;
450 int need_regs = nregs + 1;
451 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
452 uses. */
453
454 /* Have the register data arrays been allocated? */
455 if (regs_allocated == REGS_UNALLOCATED)
456 { /* No. So allocate them with malloc. */
457 regs->start = re_malloc (regoff_t, need_regs);
458 if (BE (regs->start == NULL, 0))
459 return REGS_UNALLOCATED;
460 regs->end = re_malloc (regoff_t, need_regs);
461 if (BE (regs->end == NULL, 0))
462 {
463 re_free (regs->start);
464 return REGS_UNALLOCATED;
465 }
466 regs->num_regs = need_regs;
467 }
468 else if (regs_allocated == REGS_REALLOCATE)
469 { /* Yes. If we need more elements than were already
470 allocated, reallocate them. If we need fewer, just
471 leave it alone. */
472 if (need_regs > regs->num_regs)
473 {
474 regs->start = re_realloc (regs->start, regoff_t, need_regs);
475 if (BE (regs->start == NULL, 0))
476 {
477 if (regs->end != NULL)
478 re_free (regs->end);
479 return REGS_UNALLOCATED;
480 }
481 regs->end = re_realloc (regs->end, regoff_t, need_regs);
482 if (BE (regs->end == NULL, 0))
483 {
484 re_free (regs->start);
485 return REGS_UNALLOCATED;
486 }
487 regs->num_regs = need_regs;
488 }
489 }
490 else
491 {
492 assert (regs_allocated == REGS_FIXED);
493 /* This function may not be called with REGS_FIXED and nregs too big. */
494 assert (regs->num_regs >= nregs);
495 rval = REGS_FIXED;
496 }
497
498 /* Copy the regs. */
499 for (i = 0; i < nregs; ++i)
500 {
501 regs->start[i] = pmatch[i].rm_so;
502 regs->end[i] = pmatch[i].rm_eo;
503 }
504 for ( ; i < regs->num_regs; ++i)
505 regs->start[i] = regs->end[i] = -1;
506
507 return rval;
508}
509
510/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
511 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
512 this memory for recording register information. STARTS and ENDS
513 must be allocated using the malloc library routine, and must each
514 be at least NUM_REGS * sizeof (regoff_t) bytes long.
515
516 If NUM_REGS == 0, then subsequent matches should allocate their own
517 register data.
518
519 Unless this function is called, the first search or match using
520 PATTERN_BUFFER will allocate its own register data, without
521 freeing the old data. */
522
523void
524re_set_registers (bufp, regs, num_regs, starts, ends)
525 struct re_pattern_buffer *bufp;
526 struct re_registers *regs;
527 unsigned num_regs;
528 regoff_t *starts, *ends;
529{
530 if (num_regs)
531 {
532 bufp->regs_allocated = REGS_REALLOCATE;
533 regs->num_regs = num_regs;
534 regs->start = starts;
535 regs->end = ends;
536 }
537 else
538 {
539 bufp->regs_allocated = REGS_UNALLOCATED;
540 regs->num_regs = 0;
541 regs->start = regs->end = (regoff_t *) 0;
542 }
543}
544#ifdef _LIBC
545weak_alias (__re_set_registers, re_set_registers)
546#endif
547\f
548/* Entry points compatible with 4.2 BSD regex library. We don't define
549 them unless specifically requested. */
550
551#if defined _REGEX_RE_COMP || defined _LIBC
552int
553# ifdef _LIBC
554weak_function
555# endif
556re_exec (s)
557 const char *s;
558{
559 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
560}
561#endif /* _REGEX_RE_COMP */
562\f
563static re_node_set empty_set;
564
565/* Internal entry point. */
566
567/* Searches for a compiled pattern PREG in the string STRING, whose
568 length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
569 mingings with regexec. START, and RANGE have the same meanings
570 with re_search.
571 Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
572 otherwise return the error code.
573 Note: We assume front end functions already check ranges.
574 (START + RANGE >= 0 && START + RANGE <= LENGTH) */
575
576static reg_errcode_t
577re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
578 eflags)
579 const regex_t *preg;
580 const char *string;
581 int length, start, range, stop, eflags;
582 size_t nmatch;
583 regmatch_t pmatch[];
584{
585 reg_errcode_t err;
586 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
587 re_string_t input;
588 int left_lim, right_lim, incr;
589 int fl_longest_match, match_first, match_last = -1;
590 int fast_translate, sb;
591 re_match_context_t mctx;
592 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
593 && range && !preg->can_be_null) ? preg->fastmap : NULL);
594
595 /* Check if the DFA haven't been compiled. */
596 if (BE (preg->used == 0 || dfa->init_state == NULL
597 || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
598 || dfa->init_state_begbuf == NULL, 0))
599 return REG_NOMATCH;
600
601 re_node_set_init_empty (&empty_set);
602 memset (&mctx, '\0', sizeof (re_match_context_t));
603
604 /* We must check the longest matching, if nmatch > 0. */
605 fl_longest_match = (nmatch != 0 || dfa->nbackref);
606
607 err = re_string_allocate (&input, string, length, dfa->nodes_len + 1,
608 preg->translate, preg->syntax & RE_ICASE);
609 if (BE (err != REG_NOERROR, 0))
610 goto free_return;
611 input.stop = stop;
612
613 err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
614 if (BE (err != REG_NOERROR, 0))
615 goto free_return;
616
617 /* We will log all the DFA states through which the dfa pass,
618 if nmatch > 1, or this dfa has "multibyte node", which is a
619 back-reference or a node which can accept multibyte character or
620 multi character collating element. */
621 if (nmatch > 1 || dfa->has_mb_node)
622 {
623 mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1);
624 if (BE (mctx.state_log == NULL, 0))
625 {
626 err = REG_ESPACE;
627 goto free_return;
628 }
629 }
630 else
631 mctx.state_log = NULL;
632
633#ifdef DEBUG
634 /* We assume front-end functions already check them. */
635 assert (start + range >= 0 && start + range <= length);
636#endif
637
638 match_first = start;
639 input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
640 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
641
642 /* Check incrementally whether of not the input string match. */
643 incr = (range < 0) ? -1 : 1;
644 left_lim = (range < 0) ? start + range : start;
645 right_lim = (range < 0) ? start : start + range;
646 sb = MB_CUR_MAX == 1;
647 fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate);
648
649 for (;;)
650 {
651 /* At first get the current byte from input string. */
652 if (fastmap)
653 {
654 if (BE (fast_translate, 1))
655 {
656 unsigned RE_TRANSLATE_TYPE t
657 = (unsigned RE_TRANSLATE_TYPE) preg->translate;
658 if (BE (range >= 0, 1))
659 {
660 if (BE (t != NULL, 0))
661 {
662 while (BE (match_first < right_lim, 1)
663 && !fastmap[t[(unsigned char) string[match_first]]])
664 ++match_first;
665 }
666 else
667 {
668 while (BE (match_first < right_lim, 1)
669 && !fastmap[(unsigned char) string[match_first]])
670 ++match_first;
671 }
672 if (BE (match_first == right_lim, 0))
673 {
674 int ch = match_first >= length
675 ? 0 : (unsigned char) string[match_first];
676 if (!fastmap[t ? t[ch] : ch])
677 break;
678 }
679 }
680 else
681 {
682 while (match_first >= left_lim)
683 {
684 int ch = match_first >= length
685 ? 0 : (unsigned char) string[match_first];
686 if (fastmap[t ? t[ch] : ch])
687 break;
688 --match_first;
689 }
690 if (match_first < left_lim)
691 break;
692 }
693 }
694 else
695 {
696 int ch;
697
698 do
699 {
700 /* In this case, we can't determine easily the current byte,
701 since it might be a component byte of a multibyte
702 character. Then we use the constructed buffer
703 instead. */
704 /* If MATCH_FIRST is out of the valid range, reconstruct the
705 buffers. */
706 if (input.raw_mbs_idx + input.valid_len <= match_first
707 || match_first < input.raw_mbs_idx)
708 {
709 err = re_string_reconstruct (&input, match_first, eflags,
710 preg->newline_anchor);
711 if (BE (err != REG_NOERROR, 0))
712 goto free_return;
713 }
714 /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
715 Note that MATCH_FIRST must not be smaller than 0. */
716 ch = ((match_first >= length) ? 0
717 : re_string_byte_at (&input,
718 match_first - input.raw_mbs_idx));
719 if (fastmap[ch])
720 break;
721 match_first += incr;
722 }
723 while (match_first >= left_lim && match_first <= right_lim);
724 if (! fastmap[ch])
725 break;
726 }
727 }
728
729 /* Reconstruct the buffers so that the matcher can assume that
730 the matching starts from the begining of the buffer. */
731 err = re_string_reconstruct (&input, match_first, eflags,
732 preg->newline_anchor);
733 if (BE (err != REG_NOERROR, 0))
734 goto free_return;
735#ifdef RE_ENABLE_I18N
736 /* Eliminate it when it is a component of a multibyte character
737 and isn't the head of a multibyte character. */
738 if (sb || re_string_first_byte (&input, 0))
739#endif
740 {
741 /* It seems to be appropriate one, then use the matcher. */
742 /* We assume that the matching starts from 0. */
743 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
744 match_last = check_matching (preg, &mctx, 0, fl_longest_match);
745 if (match_last != -1)
746 {
747 if (BE (match_last == -2, 0))
748 {
749 err = REG_ESPACE;
750 goto free_return;
751 }
752 else
753 {
754 mctx.match_last = match_last;
755 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
756 {
757 re_dfastate_t *pstate = mctx.state_log[match_last];
758 mctx.last_node = check_halt_state_context (preg, pstate,
759 &mctx, match_last);
760 }
761 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
762 || dfa->nbackref)
763 {
764 err = prune_impossible_nodes (preg, &mctx);
765 if (err == REG_NOERROR)
766 break;
767 if (BE (err != REG_NOMATCH, 0))
768 goto free_return;
769 }
770 else
771 break; /* We found a matching. */
772 }
773 }
774 match_ctx_clean (&mctx);
775 }
776 /* Update counter. */
777 match_first += incr;
778 if (match_first < left_lim || right_lim < match_first)
779 break;
780 }
781
782 /* Set pmatch[] if we need. */
783 if (match_last != -1 && nmatch > 0)
784 {
785 int reg_idx;
786
787 /* Initialize registers. */
788 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
789 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
790
791 /* Set the points where matching start/end. */
792 pmatch[0].rm_so = 0;
793 pmatch[0].rm_eo = mctx.match_last;
794
795 if (!preg->no_sub && nmatch > 1)
796 {
797 err = set_regs (preg, &mctx, nmatch, pmatch,
798 dfa->has_plural_match && dfa->nbackref > 0);
799 if (BE (err != REG_NOERROR, 0))
800 goto free_return;
801 }
802
803 /* At last, add the offset to the each registers, since we slided
804 the buffers so that We can assume that the matching starts from 0. */
805 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
806 if (pmatch[reg_idx].rm_so != -1)
807 {
808 pmatch[reg_idx].rm_so += match_first;
809 pmatch[reg_idx].rm_eo += match_first;
810 }
811 }
812 err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
813 free_return:
814 re_free (mctx.state_log);
815 if (dfa->nbackref)
816 match_ctx_free (&mctx);
817 re_string_destruct (&input);
818 return err;
819}
820
821static reg_errcode_t
822prune_impossible_nodes (preg, mctx)
823 const regex_t *preg;
824 re_match_context_t *mctx;
825{
826 int halt_node, match_last;
827 reg_errcode_t ret;
828 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
829 re_dfastate_t **sifted_states;
830 re_dfastate_t **lim_states = NULL;
831 re_sift_context_t sctx;
832#ifdef DEBUG
833 assert (mctx->state_log != NULL);
834#endif
835 match_last = mctx->match_last;
836 halt_node = mctx->last_node;
837 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
838 if (BE (sifted_states == NULL, 0))
839 {
840 ret = REG_ESPACE;
841 goto free_return;
842 }
843 if (dfa->nbackref)
844 {
845 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
846 if (BE (lim_states == NULL, 0))
847 {
848 ret = REG_ESPACE;
849 goto free_return;
850 }
851 while (1)
852 {
853 memset (lim_states, '\0',
854 sizeof (re_dfastate_t *) * (match_last + 1));
855 match_ctx_clear_flag (mctx);
856 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
857 match_last, 0);
858 ret = sift_states_backward (preg, mctx, &sctx);
859 re_node_set_free (&sctx.limits);
860 if (BE (ret != REG_NOERROR, 0))
861 goto free_return;
862 if (sifted_states[0] != NULL || lim_states[0] != NULL)
863 break;
864 do
865 {
866 --match_last;
867 if (match_last < 0)
868 {
869 ret = REG_NOMATCH;
870 goto free_return;
871 }
872 } while (!mctx->state_log[match_last]->halt);
873 halt_node = check_halt_state_context (preg,
874 mctx->state_log[match_last],
875 mctx, match_last);
876 }
877 ret = merge_state_array (dfa, sifted_states, lim_states,
878 match_last + 1);
879 re_free (lim_states);
880 lim_states = NULL;
881 if (BE (ret != REG_NOERROR, 0))
882 goto free_return;
883 }
884 else
885 {
886 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
887 match_last, 0);
888 ret = sift_states_backward (preg, mctx, &sctx);
889 re_node_set_free (&sctx.limits);
890 if (BE (ret != REG_NOERROR, 0))
891 goto free_return;
892 }
893 re_free (mctx->state_log);
894 mctx->state_log = sifted_states;
895 sifted_states = NULL;
896 mctx->last_node = halt_node;
897 mctx->match_last = match_last;
898 ret = REG_NOERROR;
899 free_return:
900 re_free (sifted_states);
901 re_free (lim_states);
902 return ret;
903}
904
905/* Acquire an initial state and return it.
906 We must select appropriate initial state depending on the context,
907 since initial states may have constraints like "\<", "^", etc.. */
908
909static inline re_dfastate_t *
910acquire_init_state_context (err, preg, mctx, idx)
911 reg_errcode_t *err;
912 const regex_t *preg;
913 const re_match_context_t *mctx;
914 int idx;
915{
916 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
917
918 *err = REG_NOERROR;
919 if (dfa->init_state->has_constraint)
920 {
921 unsigned int context;
922 context = re_string_context_at (mctx->input, idx - 1, mctx->eflags,
923 preg->newline_anchor);
924 if (IS_WORD_CONTEXT (context))
925 return dfa->init_state_word;
926 else if (IS_ORDINARY_CONTEXT (context))
927 return dfa->init_state;
928 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
929 return dfa->init_state_begbuf;
930 else if (IS_NEWLINE_CONTEXT (context))
931 return dfa->init_state_nl;
932 else if (IS_BEGBUF_CONTEXT (context))
933 {
934 /* It is relatively rare case, then calculate on demand. */
935 return re_acquire_state_context (err, dfa,
936 dfa->init_state->entrance_nodes,
937 context);
938 }
939 else
940 /* Must not happen? */
941 return dfa->init_state;
942 }
943 else
944 return dfa->init_state;
945}
946
947/* Check whether the regular expression match input string INPUT or not,
948 and return the index where the matching end, return -1 if not match,
949 or return -2 in case of an error.
950 FL_SEARCH means we must search where the matching starts,
951 FL_LONGEST_MATCH means we want the POSIX longest matching.
952 Note that the matcher assume that the maching starts from the current
953 index of the buffer. */
954
955static int
956check_matching (preg, mctx, fl_search, fl_longest_match)
957 const regex_t *preg;
958 re_match_context_t *mctx;
959 int fl_search, fl_longest_match;
960{
961 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
962 reg_errcode_t err;
963 int match = 0;
964 int match_last = -1;
965 int cur_str_idx = re_string_cur_idx (mctx->input);
966 re_dfastate_t *cur_state;
967
968 cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx);
969 /* An initial state must not be NULL(invalid state). */
970 if (BE (cur_state == NULL, 0))
971 return -2;
972 if (mctx->state_log != NULL)
973 mctx->state_log[cur_str_idx] = cur_state;
974
975 /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
976 later. E.g. Processing back references. */
977 if (dfa->nbackref)
978 {
979 err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
980 if (BE (err != REG_NOERROR, 0))
981 return err;
982 }
983
984 if (cur_state->has_backref)
985 {
986 err = transit_state_bkref (preg, &cur_state->nodes, mctx);
987 if (BE (err != REG_NOERROR, 0))
988 return err;
989 }
990
991 /* If the RE accepts NULL string. */
992 if (cur_state->halt)
993 {
994 if (!cur_state->has_constraint
995 || check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
996 {
997 if (!fl_longest_match)
998 return cur_str_idx;
999 else
1000 {
1001 match_last = cur_str_idx;
1002 match = 1;
1003 }
1004 }
1005 }
1006
1007 while (!re_string_eoi (mctx->input))
1008 {
1009 cur_state = transit_state (&err, preg, mctx, cur_state,
1010 fl_search && !match);
1011 if (cur_state == NULL) /* Reached at the invalid state or an error. */
1012 {
1013 cur_str_idx = re_string_cur_idx (mctx->input);
1014 if (BE (err != REG_NOERROR, 0))
1015 return -2;
1016 if (fl_search && !match)
1017 {
1018 /* Restart from initial state, since we are searching
1019 the point from where matching start. */
1020#ifdef RE_ENABLE_I18N
1021 if (MB_CUR_MAX == 1
1022 || re_string_first_byte (mctx->input, cur_str_idx))
1023#endif /* RE_ENABLE_I18N */
1024 cur_state = acquire_init_state_context (&err, preg, mctx,
1025 cur_str_idx);
1026 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
1027 return -2;
1028 if (mctx->state_log != NULL)
1029 mctx->state_log[cur_str_idx] = cur_state;
1030 }
1031 else if (!fl_longest_match && match)
1032 break;
1033 else /* (fl_longest_match && match) || (!fl_search && !match) */
1034 {
1035 if (mctx->state_log == NULL)
1036 break;
1037 else
1038 {
1039 int max = mctx->state_log_top;
1040 for (; cur_str_idx <= max; ++cur_str_idx)
1041 if (mctx->state_log[cur_str_idx] != NULL)
1042 break;
1043 if (cur_str_idx > max)
1044 break;
1045 }
1046 }
1047 }
1048
1049 if (cur_state != NULL && cur_state->halt)
1050 {
1051 /* Reached at a halt state.
1052 Check the halt state can satisfy the current context. */
1053 if (!cur_state->has_constraint
1054 || check_halt_state_context (preg, cur_state, mctx,
1055 re_string_cur_idx (mctx->input)))
1056 {
1057 /* We found an appropriate halt state. */
1058 match_last = re_string_cur_idx (mctx->input);
1059 match = 1;
1060 if (!fl_longest_match)
1061 break;
1062 }
1063 }
1064 }
1065 return match_last;
1066}
1067
1068/* Check NODE match the current context. */
1069
1070static int check_halt_node_context (dfa, node, context)
1071 const re_dfa_t *dfa;
1072 int node;
1073 unsigned int context;
1074{
1075 re_token_type_t type = dfa->nodes[node].type;
1076 unsigned int constraint = dfa->nodes[node].constraint;
1077 if (type != END_OF_RE)
1078 return 0;
1079 if (!constraint)
1080 return 1;
1081 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1082 return 0;
1083 return 1;
1084}
1085
1086/* Check the halt state STATE match the current context.
1087 Return 0 if not match, if the node, STATE has, is a halt node and
1088 match the context, return the node. */
1089
1090static int
1091check_halt_state_context (preg, state, mctx, idx)
1092 const regex_t *preg;
1093 const re_dfastate_t *state;
1094 const re_match_context_t *mctx;
1095 int idx;
1096{
1097 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1098 int i;
1099 unsigned int context;
1100#ifdef DEBUG
1101 assert (state->halt);
1102#endif
1103 context = re_string_context_at (mctx->input, idx, mctx->eflags,
1104 preg->newline_anchor);
1105 for (i = 0; i < state->nodes.nelem; ++i)
1106 if (check_halt_node_context (dfa, state->nodes.elems[i], context))
1107 return state->nodes.elems[i];
1108 return 0;
1109}
1110
1111/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1112 corresponding to the DFA).
1113 Return the destination node, and update EPS_VIA_NODES, return -1 in case
1114 of errors. */
1115
1116static int
1117proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
1118 const regex_t *preg;
1119 regmatch_t *regs;
1120 const re_match_context_t *mctx;
1121 int nregs, *pidx, node;
1122 re_node_set *eps_via_nodes;
1123 struct re_fail_stack_t *fs;
1124{
1125 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1126 int i, err, dest_node;
1127 dest_node = -1;
1128 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1129 {
1130 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1131 int ndest, dest_nodes[2];
1132 err = re_node_set_insert (eps_via_nodes, node);
1133 if (BE (err < 0, 0))
1134 return -1;
1135 /* Pick up valid destinations. */
1136 for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
1137 {
1138 int candidate = dfa->edests[node].elems[i];
1139 if (!re_node_set_contains (cur_nodes, candidate))
1140 continue;
1141 dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
1142 dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
1143 ++ndest;
1144 }
1145 if (ndest <= 1)
1146 return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
1147 /* In order to avoid infinite loop like "(a*)*". */
1148 if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
1149 return dest_nodes[1];
1150 if (fs != NULL)
1151 push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes);
1152 return dest_nodes[0];
1153 }
1154 else
1155 {
1156 int naccepted = 0;
1157 re_token_type_t type = dfa->nodes[node].type;
1158
1159#ifdef RE_ENABLE_I18N
1160 if (ACCEPT_MB_NODE (type))
1161 naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
1162 else
1163#endif /* RE_ENABLE_I18N */
1164 if (type == OP_BACK_REF)
1165 {
1166 int subexp_idx = dfa->nodes[node].opr.idx;
1167 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1168 if (fs != NULL)
1169 {
1170 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1171 return -1;
1172 else if (naccepted)
1173 {
1174 char *buf = (char *) re_string_get_buffer (mctx->input);
1175 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1176 naccepted) != 0)
1177 return -1;
1178 }
1179 }
1180
1181 if (naccepted == 0)
1182 {
1183 err = re_node_set_insert (eps_via_nodes, node);
1184 if (BE (err < 0, 0))
1185 return -2;
1186 dest_node = dfa->edests[node].elems[0];
1187 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1188 dest_node))
1189 return dest_node;
1190 }
1191 }
1192
1193 if (naccepted != 0
1194 || check_node_accept (preg, dfa->nodes + node, mctx, *pidx))
1195 {
1196 dest_node = dfa->nexts[node];
1197 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1198 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1199 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1200 dest_node)))
1201 return -1;
1202 re_node_set_empty (eps_via_nodes);
1203 return dest_node;
1204 }
1205 }
1206 return -1;
1207}
1208
1209static reg_errcode_t
1210push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
1211 struct re_fail_stack_t *fs;
1212 int str_idx, *dests, nregs;
1213 regmatch_t *regs;
1214 re_node_set *eps_via_nodes;
1215{
1216 reg_errcode_t err;
1217 int num = fs->num++;
1218 if (fs->num == fs->alloc)
1219 {
1220 struct re_fail_stack_ent_t *new_array;
1221 fs->alloc *= 2;
1222 new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1223 * fs->alloc));
1224 if (new_array == NULL)
1225 return REG_ESPACE;
1226 fs->stack = new_array;
1227 }
1228 fs->stack[num].idx = str_idx;
1229 fs->stack[num].node = dests[1];
1230 fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1231 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1232 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1233 return err;
1234}
1235
1236static int
1237pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
1238 struct re_fail_stack_t *fs;
1239 int *pidx, nregs;
1240 regmatch_t *regs;
1241 re_node_set *eps_via_nodes;
1242{
1243 int num = --fs->num;
1244 assert (num >= 0);
1245 *pidx = fs->stack[num].idx;
1246 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1247 re_node_set_free (eps_via_nodes);
1248 re_free (fs->stack[num].regs);
1249 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1250 return fs->stack[num].node;
1251}
1252
1253/* Set the positions where the subexpressions are starts/ends to registers
1254 PMATCH.
1255 Note: We assume that pmatch[0] is already set, and
1256 pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
1257
1258static reg_errcode_t
1259set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
1260 const regex_t *preg;
1261 const re_match_context_t *mctx;
1262 size_t nmatch;
1263 regmatch_t *pmatch;
1264 int fl_backtrack;
1265{
1266 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1267 int idx, cur_node, real_nmatch;
1268 re_node_set eps_via_nodes;
1269 struct re_fail_stack_t *fs;
1270 struct re_fail_stack_t fs_body = {0, 2, NULL};
1271#ifdef DEBUG
1272 assert (nmatch > 1);
1273 assert (mctx->state_log != NULL);
1274#endif
1275 if (fl_backtrack)
1276 {
1277 fs = &fs_body;
1278 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1279 }
1280 else
1281 fs = NULL;
1282 cur_node = dfa->init_node;
1283 real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
1284 re_node_set_init_empty (&eps_via_nodes);
1285 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1286 {
1287 update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
1288 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1289 {
1290 int reg_idx;
1291 if (fs)
1292 {
1293 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1294 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1295 break;
1296 if (reg_idx == nmatch)
1297 {
1298 re_node_set_free (&eps_via_nodes);
1299 return free_fail_stack_return (fs);
1300 }
1301 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1302 &eps_via_nodes);
1303 }
1304 else
1305 {
1306 re_node_set_free (&eps_via_nodes);
1307 return REG_NOERROR;
1308 }
1309 }
1310
1311 /* Proceed to next node. */
1312 cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node,
1313 &eps_via_nodes, fs);
1314
1315 if (BE (cur_node < 0, 0))
1316 {
1317 if (cur_node == -2)
1318 return REG_ESPACE;
1319 if (fs)
1320 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1321 &eps_via_nodes);
1322 else
1323 {
1324 re_node_set_free (&eps_via_nodes);
1325 return REG_NOMATCH;
1326 }
1327 }
1328 }
1329 re_node_set_free (&eps_via_nodes);
1330 return free_fail_stack_return (fs);
1331}
1332
1333static reg_errcode_t
1334free_fail_stack_return (fs)
1335 struct re_fail_stack_t *fs;
1336{
1337 if (fs)
1338 {
1339 int fs_idx;
1340 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1341 {
1342 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1343 re_free (fs->stack[fs_idx].regs);
1344 }
1345 re_free (fs->stack);
1346 }
1347 return REG_NOERROR;
1348}
1349
1350static void
1351update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
1352 re_dfa_t *dfa;
1353 regmatch_t *pmatch;
1354 int cur_node, cur_idx, nmatch;
1355{
1356 int type = dfa->nodes[cur_node].type;
1357 int reg_num;
1358 if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
1359 return;
1360 reg_num = dfa->nodes[cur_node].opr.idx + 1;
1361 if (reg_num >= nmatch)
1362 return;
1363 if (type == OP_OPEN_SUBEXP)
1364 {
1365 /* We are at the first node of this sub expression. */
1366 pmatch[reg_num].rm_so = cur_idx;
1367 pmatch[reg_num].rm_eo = -1;
1368 }
1369 else if (type == OP_CLOSE_SUBEXP)
1370 /* We are at the first node of this sub expression. */
1371 pmatch[reg_num].rm_eo = cur_idx;
1372}
1373
1374#define NUMBER_OF_STATE 1
1375
1376/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1377 and sift the nodes in each states according to the following rules.
1378 Updated state_log will be wrote to STATE_LOG.
1379
1380 Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1381 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1382 If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1383 the LAST_NODE, we throw away the node `a'.
1384 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1385 string `s' and transit to `b':
1386 i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1387 away the node `a'.
1388 ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1389 throwed away, we throw away the node `a'.
1390 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b':
1391 i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1392 node `a'.
1393 ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
1394 we throw away the node `a'. */
1395
1396#define STATE_NODE_CONTAINS(state,node) \
1397 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1398
1399static reg_errcode_t
1400sift_states_backward (preg, mctx, sctx)
1401 const regex_t *preg;
1402 re_match_context_t *mctx;
1403 re_sift_context_t *sctx;
1404{
1405 reg_errcode_t err;
1406 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1407 int null_cnt = 0;
1408 int str_idx = sctx->last_str_idx;
1409 re_node_set cur_dest;
1410 re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
1411
1412#ifdef DEBUG
1413 assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1414#endif
1415 cur_src = &mctx->state_log[str_idx]->nodes;
1416
1417 /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1418 transit to the last_node and the last_node itself. */
1419 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1420 if (BE (err != REG_NOERROR, 0))
1421 return err;
1422 err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1423 if (BE (err != REG_NOERROR, 0))
1424 goto free_return;
1425
1426 /* Then check each states in the state_log. */
1427 while (str_idx > 0)
1428 {
1429 int i, ret;
1430 /* Update counters. */
1431 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1432 if (null_cnt > mctx->max_mb_elem_len)
1433 {
1434 memset (sctx->sifted_states, '\0',
1435 sizeof (re_dfastate_t *) * str_idx);
1436 re_node_set_free (&cur_dest);
1437 return REG_NOERROR;
1438 }
1439 re_node_set_empty (&cur_dest);
1440 --str_idx;
1441 cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1442 : &mctx->state_log[str_idx]->nodes);
1443
1444 /* Then build the next sifted state.
1445 We build the next sifted state on `cur_dest', and update
1446 `sifted_states[str_idx]' with `cur_dest'.
1447 Note:
1448 `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1449 `cur_src' points the node_set of the old `state_log[str_idx]'. */
1450 for (i = 0; i < cur_src->nelem; i++)
1451 {
1452 int prev_node = cur_src->elems[i];
1453 int naccepted = 0;
1454 re_token_type_t type = dfa->nodes[prev_node].type;
1455
1456 if (IS_EPSILON_NODE(type))
1457 continue;
1458#ifdef RE_ENABLE_I18N
1459 /* If the node may accept `multi byte'. */
1460 if (ACCEPT_MB_NODE (type))
1461 naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
1462 str_idx, sctx->last_str_idx);
1463
1464#endif /* RE_ENABLE_I18N */
1465 /* We don't check backreferences here.
1466 See update_cur_sifted_state(). */
1467
1468 if (!naccepted
1469 && check_node_accept (preg, dfa->nodes + prev_node, mctx,
1470 str_idx)
1471 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1472 dfa->nexts[prev_node]))
1473 naccepted = 1;
1474
1475 if (naccepted == 0)
1476 continue;
1477
1478 if (sctx->limits.nelem)
1479 {
1480 int to_idx = str_idx + naccepted;
1481 if (check_dst_limits (dfa, &sctx->limits, mctx,
1482 dfa->nexts[prev_node], to_idx,
1483 prev_node, str_idx))
1484 continue;
1485 }
1486 ret = re_node_set_insert (&cur_dest, prev_node);
1487 if (BE (ret == -1, 0))
1488 {
1489 err = REG_ESPACE;
1490 goto free_return;
1491 }
1492 }
1493
1494 /* Add all the nodes which satisfy the following conditions:
1495 - It can epsilon transit to a node in CUR_DEST.
1496 - It is in CUR_SRC.
1497 And update state_log. */
1498 err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest);
1499 if (BE (err != REG_NOERROR, 0))
1500 goto free_return;
1501 }
1502 err = REG_NOERROR;
1503 free_return:
1504 re_node_set_free (&cur_dest);
1505 return err;
1506}
1507
1508/* Helper functions. */
1509
1510static inline reg_errcode_t
1511clean_state_log_if_need (mctx, next_state_log_idx)
1512 re_match_context_t *mctx;
1513 int next_state_log_idx;
1514{
1515 int top = mctx->state_log_top;
1516
1517 if (next_state_log_idx >= mctx->input->bufs_len
1518 || (next_state_log_idx >= mctx->input->valid_len
1519 && mctx->input->valid_len < mctx->input->len))
1520 {
1521 reg_errcode_t err;
1522 err = extend_buffers (mctx);
1523 if (BE (err != REG_NOERROR, 0))
1524 return err;
1525 }
1526
1527 if (top < next_state_log_idx)
1528 {
1529 memset (mctx->state_log + top + 1, '\0',
1530 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1531 mctx->state_log_top = next_state_log_idx;
1532 }
1533 return REG_NOERROR;
1534}
1535
1536static reg_errcode_t
1537merge_state_array (dfa, dst, src, num)
1538 re_dfa_t *dfa;
1539 re_dfastate_t **dst;
1540 re_dfastate_t **src;
1541 int num;
1542{
1543 int st_idx;
1544 reg_errcode_t err;
1545 for (st_idx = 0; st_idx < num; ++st_idx)
1546 {
1547 if (dst[st_idx] == NULL)
1548 dst[st_idx] = src[st_idx];
1549 else if (src[st_idx] != NULL)
1550 {
1551 re_node_set merged_set;
1552 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1553 &src[st_idx]->nodes);
1554 if (BE (err != REG_NOERROR, 0))
1555 return err;
1556 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1557 re_node_set_free (&merged_set);
1558 if (BE (err != REG_NOERROR, 0))
1559 return err;
1560 }
1561 }
1562 return REG_NOERROR;
1563}
1564
1565static reg_errcode_t
1566update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
1567 const regex_t *preg;
1568 re_match_context_t *mctx;
1569 re_sift_context_t *sctx;
1570 int str_idx;
1571 re_node_set *dest_nodes;
1572{
1573 reg_errcode_t err;
1574 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1575 const re_node_set *candidates;
1576 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1577 : &mctx->state_log[str_idx]->nodes);
1578
1579 /* At first, add the nodes which can epsilon transit to a node in
1580 DEST_NODE. */
1581 if (dest_nodes->nelem)
1582 {
1583 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1584 if (BE (err != REG_NOERROR, 0))
1585 return err;
1586 }
1587
1588 /* Then, check the limitations in the current sift_context. */
1589 if (dest_nodes->nelem && sctx->limits.nelem)
1590 {
1591 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1592 mctx->bkref_ents, str_idx);
1593 if (BE (err != REG_NOERROR, 0))
1594 return err;
1595 }
1596
1597 /* Update state_log. */
1598 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1599 if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
1600 return err;
1601
1602 if ((mctx->state_log[str_idx] != NULL
1603 && mctx->state_log[str_idx]->has_backref))
1604 {
1605 err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes);
1606 if (BE (err != REG_NOERROR, 0))
1607 return err;
1608 }
1609 return REG_NOERROR;
1610}
1611
1612static reg_errcode_t
1613add_epsilon_src_nodes (dfa, dest_nodes, candidates)
1614 re_dfa_t *dfa;
1615 re_node_set *dest_nodes;
1616 const re_node_set *candidates;
1617{
1618 reg_errcode_t err;
1619 int src_idx;
1620 re_node_set src_copy;
1621
1622 err = re_node_set_init_copy (&src_copy, dest_nodes);
1623 if (BE (err != REG_NOERROR, 0))
1624 return err;
1625 for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
1626 {
1627 err = re_node_set_add_intersect (dest_nodes, candidates,
1628 dfa->inveclosures
1629 + src_copy.elems[src_idx]);
1630 if (BE (err != REG_NOERROR, 0))
1631 {
1632 re_node_set_free (&src_copy);
1633 return err;
1634 }
1635 }
1636 re_node_set_free (&src_copy);
1637 return REG_NOERROR;
1638}
1639
1640static reg_errcode_t
1641sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
1642 re_dfa_t *dfa;
1643 int node;
1644 re_node_set *dest_nodes;
1645 const re_node_set *candidates;
1646{
1647 int ecl_idx;
1648 reg_errcode_t err;
1649 re_node_set *inv_eclosure = dfa->inveclosures + node;
1650 re_node_set except_nodes;
1651 re_node_set_init_empty (&except_nodes);
1652 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1653 {
1654 int cur_node = inv_eclosure->elems[ecl_idx];
1655 if (cur_node == node)
1656 continue;
1657 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1658 {
1659 int edst1 = dfa->edests[cur_node].elems[0];
1660 int edst2 = ((dfa->edests[cur_node].nelem > 1)
1661 ? dfa->edests[cur_node].elems[1] : -1);
1662 if ((!re_node_set_contains (inv_eclosure, edst1)
1663 && re_node_set_contains (dest_nodes, edst1))
1664 || (edst2 > 0
1665 && !re_node_set_contains (inv_eclosure, edst2)
1666 && re_node_set_contains (dest_nodes, edst2)))
1667 {
1668 err = re_node_set_add_intersect (&except_nodes, candidates,
1669 dfa->inveclosures + cur_node);
1670 if (BE (err != REG_NOERROR, 0))
1671 {
1672 re_node_set_free (&except_nodes);
1673 return err;
1674 }
1675 }
1676 }
1677 }
1678 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1679 {
1680 int cur_node = inv_eclosure->elems[ecl_idx];
1681 if (!re_node_set_contains (&except_nodes, cur_node))
1682 {
1683 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1684 re_node_set_remove_at (dest_nodes, idx);
1685 }
1686 }
1687 re_node_set_free (&except_nodes);
1688 return REG_NOERROR;
1689}
1690
1691static int
1692check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
1693 re_dfa_t *dfa;
1694 re_node_set *limits;
1695 re_match_context_t *mctx;
1696 int dst_node, dst_idx, src_node, src_idx;
1697{
1698 int lim_idx, src_pos, dst_pos;
1699
1700 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1701 {
1702 int subexp_idx;
1703 struct re_backref_cache_entry *ent;
1704 ent = mctx->bkref_ents + limits->elems[lim_idx];
1705 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1706
1707 dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1708 dfa->eclosures + dst_node,
1709 subexp_idx, dst_node, dst_idx);
1710 src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
1711 dfa->eclosures + src_node,
1712 subexp_idx, src_node, src_idx);
1713
1714 /* In case of:
1715 <src> <dst> ( <subexp> )
1716 ( <subexp> ) <src> <dst>
1717 ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
1718 if (src_pos == dst_pos)
1719 continue; /* This is unrelated limitation. */
1720 else
1721 return 1;
1722 }
1723 return 0;
1724}
1725
1726static int
1727check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
1728 str_idx)
1729 re_dfa_t *dfa;
1730 re_match_context_t *mctx;
1731 re_node_set *eclosures;
1732 int limit, subexp_idx, node, str_idx;
1733{
1734 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1735 int pos = (str_idx < lim->subexp_from ? -1
1736 : (lim->subexp_to < str_idx ? 1 : 0));
1737 if (pos == 0
1738 && (str_idx == lim->subexp_from || str_idx == lim->subexp_to))
1739 {
1740 int node_idx;
1741 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1742 {
1743 int node = eclosures->elems[node_idx];
1744 re_token_type_t type= dfa->nodes[node].type;
1745 if (type == OP_BACK_REF)
1746 {
1747 int bi = search_cur_bkref_entry (mctx, str_idx);
1748 for (; bi < mctx->nbkref_ents; ++bi)
1749 {
1750 struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
1751 if (ent->str_idx > str_idx)
1752 break;
1753 if (ent->node == node && ent->subexp_from == ent->subexp_to)
1754 {
1755 int cpos, dst;
1756 dst = dfa->edests[node].elems[0];
1757 cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
1758 dfa->eclosures + dst,
1759 subexp_idx, dst,
1760 str_idx);
1761 if ((str_idx == lim->subexp_from && cpos == -1)
1762 || (str_idx == lim->subexp_to && cpos == 0))
1763 return cpos;
1764 }
1765 }
1766 }
1767 if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1768 && str_idx == lim->subexp_from)
1769 {
1770 pos = -1;
1771 break;
1772 }
1773 if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx
1774 && str_idx == lim->subexp_to)
1775 break;
1776 }
1777 if (node_idx == eclosures->nelem && str_idx == lim->subexp_to)
1778 pos = 1;
1779 }
1780 return pos;
1781}
1782
1783/* Check the limitations of sub expressions LIMITS, and remove the nodes
1784 which are against limitations from DEST_NODES. */
1785
1786static reg_errcode_t
1787check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
1788 re_dfa_t *dfa;
1789 re_node_set *dest_nodes;
1790 const re_node_set *candidates;
1791 re_node_set *limits;
1792 struct re_backref_cache_entry *bkref_ents;
1793 int str_idx;
1794{
1795 reg_errcode_t err;
1796 int node_idx, lim_idx;
1797
1798 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1799 {
1800 int subexp_idx;
1801 struct re_backref_cache_entry *ent;
1802 ent = bkref_ents + limits->elems[lim_idx];
1803
1804 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1805 continue; /* This is unrelated limitation. */
1806
1807 subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
1808 if (ent->subexp_to == str_idx)
1809 {
1810 int ops_node = -1;
1811 int cls_node = -1;
1812 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1813 {
1814 int node = dest_nodes->elems[node_idx];
1815 re_token_type_t type= dfa->nodes[node].type;
1816 if (type == OP_OPEN_SUBEXP
1817 && subexp_idx == dfa->nodes[node].opr.idx)
1818 ops_node = node;
1819 else if (type == OP_CLOSE_SUBEXP
1820 && subexp_idx == dfa->nodes[node].opr.idx)
1821 cls_node = node;
1822 }
1823
1824 /* Check the limitation of the open subexpression. */
1825 /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
1826 if (ops_node >= 0)
1827 {
1828 err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
1829 candidates);
1830 if (BE (err != REG_NOERROR, 0))
1831 return err;
1832 }
1833 /* Check the limitation of the close subexpression. */
1834 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1835 {
1836 int node = dest_nodes->elems[node_idx];
1837 if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
1838 && !re_node_set_contains (dfa->eclosures + node, cls_node))
1839 {
1840 /* It is against this limitation.
1841 Remove it form the current sifted state. */
1842 err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1843 candidates);
1844 if (BE (err != REG_NOERROR, 0))
1845 return err;
1846 --node_idx;
1847 }
1848 }
1849 }
1850 else /* (ent->subexp_to != str_idx) */
1851 {
1852 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1853 {
1854 int node = dest_nodes->elems[node_idx];
1855 re_token_type_t type= dfa->nodes[node].type;
1856 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
1857 {
1858 if (subexp_idx != dfa->nodes[node].opr.idx)
1859 continue;
1860 if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
1861 || (type == OP_OPEN_SUBEXP))
1862 {
1863 /* It is against this limitation.
1864 Remove it form the current sifted state. */
1865 err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
1866 candidates);
1867 if (BE (err != REG_NOERROR, 0))
1868 return err;
1869 }
1870 }
1871 }
1872 }
1873 }
1874 return REG_NOERROR;
1875}
1876
1877static reg_errcode_t
1878sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
1879 const regex_t *preg;
1880 re_match_context_t *mctx;
1881 re_sift_context_t *sctx;
1882 int str_idx;
1883 re_node_set *dest_nodes;
1884{
1885 reg_errcode_t err;
1886 re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
1887 int node_idx, node;
1888 re_sift_context_t local_sctx;
1889 const re_node_set *candidates;
1890 candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
1891 : &mctx->state_log[str_idx]->nodes);
1892 local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
1893
1894 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
1895 {
1896 int cur_bkref_idx = re_string_cur_idx (mctx->input);
1897 re_token_type_t type;
1898 node = candidates->elems[node_idx];
1899 type = dfa->nodes[node].type;
1900 if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
1901 continue;
1902 /* Avoid infinite loop for the REs like "()\1+". */
1903 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
1904 continue;
1905 if (type == OP_BACK_REF)
1906 {
1907 int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
1908 for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1909 {
1910 int disabled_idx, subexp_len, to_idx, dst_node;
1911 struct re_backref_cache_entry *entry;
1912 entry = mctx->bkref_ents + enabled_idx;
1913 if (entry->str_idx > str_idx)
1914 break;
1915 if (entry->node != node)
1916 continue;
1917 subexp_len = entry->subexp_to - entry->subexp_from;
1918 to_idx = str_idx + subexp_len;
1919 dst_node = (subexp_len ? dfa->nexts[node]
1920 : dfa->edests[node].elems[0]);
1921
1922 if (to_idx > sctx->last_str_idx
1923 || sctx->sifted_states[to_idx] == NULL
1924 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
1925 dst_node)
1926 || check_dst_limits (dfa, &sctx->limits, mctx, node,
1927 str_idx, dst_node, to_idx))
1928 continue;
1929 {
1930 re_dfastate_t *cur_state;
1931 entry->flag = 0;
1932 for (disabled_idx = enabled_idx + 1;
1933 disabled_idx < mctx->nbkref_ents; ++disabled_idx)
1934 {
1935 struct re_backref_cache_entry *entry2;
1936 entry2 = mctx->bkref_ents + disabled_idx;
1937 if (entry2->str_idx > str_idx)
1938 break;
1939 entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
1940 }
1941
1942 if (local_sctx.sifted_states == NULL)
1943 {
1944 local_sctx = *sctx;
1945 err = re_node_set_init_copy (&local_sctx.limits,
1946 &sctx->limits);
1947 if (BE (err != REG_NOERROR, 0))
1948 goto free_return;
1949 }
1950 local_sctx.last_node = node;
1951 local_sctx.last_str_idx = str_idx;
1952 err = re_node_set_insert (&local_sctx.limits, enabled_idx);
1953 if (BE (err < 0, 0))
1954 {
1955 err = REG_ESPACE;
1956 goto free_return;
1957 }
1958 cur_state = local_sctx.sifted_states[str_idx];
1959 err = sift_states_backward (preg, mctx, &local_sctx);
1960 if (BE (err != REG_NOERROR, 0))
1961 goto free_return;
1962 if (sctx->limited_states != NULL)
1963 {
1964 err = merge_state_array (dfa, sctx->limited_states,
1965 local_sctx.sifted_states,
1966 str_idx + 1);
1967 if (BE (err != REG_NOERROR, 0))
1968 goto free_return;
1969 }
1970 local_sctx.sifted_states[str_idx] = cur_state;
1971 re_node_set_remove (&local_sctx.limits, enabled_idx);
1972 /* We must not use the variable entry here, since
1973 mctx->bkref_ents might be realloced. */
1974 mctx->bkref_ents[enabled_idx].flag = 1;
1975 }
1976 }
1977 enabled_idx = search_cur_bkref_entry (mctx, str_idx);
1978 for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
1979 {
1980 struct re_backref_cache_entry *entry;
1981 entry = mctx->bkref_ents + enabled_idx;
1982 if (entry->str_idx > str_idx)
1983 break;
1984 if (entry->node == node)
1985 entry->flag = 0;
1986 }
1987 }
1988 }
1989 err = REG_NOERROR;
1990 free_return:
1991 if (local_sctx.sifted_states != NULL)
1992 {
1993 re_node_set_free (&local_sctx.limits);
1994 }
1995
1996 return err;
1997}
1998
1999
2000#ifdef RE_ENABLE_I18N
2001static int
2002sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
2003 const regex_t *preg;
2004 const re_match_context_t *mctx;
2005 re_sift_context_t *sctx;
2006 int node_idx, str_idx, max_str_idx;
2007{
2008 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2009 int naccepted;
2010 /* Check the node can accept `multi byte'. */
2011 naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx);
2012 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2013 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2014 dfa->nexts[node_idx]))
2015 /* The node can't accept the `multi byte', or the
2016 destination was already throwed away, then the node
2017 could't accept the current input `multi byte'. */
2018 naccepted = 0;
2019 /* Otherwise, it is sure that the node could accept
2020 `naccepted' bytes input. */
2021 return naccepted;
2022}
2023#endif /* RE_ENABLE_I18N */
2024
2025\f
2026/* Functions for state transition. */
2027
2028/* Return the next state to which the current state STATE will transit by
2029 accepting the current input byte, and update STATE_LOG if necessary.
2030 If STATE can accept a multibyte char/collating element/back reference
2031 update the destination of STATE_LOG. */
2032
2033static re_dfastate_t *
2034transit_state (err, preg, mctx, state, fl_search)
2035 reg_errcode_t *err;
2036 const regex_t *preg;
2037 re_match_context_t *mctx;
2038 re_dfastate_t *state;
2039 int fl_search;
2040{
2041 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2042 re_dfastate_t **trtable, *next_state;
2043 unsigned char ch;
2044 int cur_idx;
2045
2046 if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len
2047 || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len
2048 && mctx->input->valid_len < mctx->input->len))
2049 {
2050 *err = extend_buffers (mctx);
2051 if (BE (*err != REG_NOERROR, 0))
2052 return NULL;
2053 }
2054
2055 *err = REG_NOERROR;
2056 if (state == NULL)
2057 {
2058 next_state = state;
2059 re_string_skip_bytes (mctx->input, 1);
2060 }
2061 else
2062 {
2063#ifdef RE_ENABLE_I18N
2064 /* If the current state can accept multibyte. */
2065 if (state->accept_mb)
2066 {
2067 *err = transit_state_mb (preg, state, mctx);
2068 if (BE (*err != REG_NOERROR, 0))
2069 return NULL;
2070 }
2071#endif /* RE_ENABLE_I18N */
2072
2073 /* Then decide the next state with the single byte. */
2074 if (1)
2075 {
2076 /* Use transition table */
2077 ch = re_string_fetch_byte (mctx->input);
2078 trtable = fl_search ? state->trtable_search : state->trtable;
2079 if (trtable == NULL)
2080 {
2081 trtable = build_trtable (preg, state, fl_search);
2082 if (fl_search)
2083 state->trtable_search = trtable;
2084 else
2085 state->trtable = trtable;
2086 }
2087 next_state = trtable[ch];
2088 }
2089 else
2090 {
2091 /* don't use transition table */
2092 next_state = transit_state_sb (err, preg, state, fl_search, mctx);
2093 if (BE (next_state == NULL && err != REG_NOERROR, 0))
2094 return NULL;
2095 }
2096 }
2097
2098 cur_idx = re_string_cur_idx (mctx->input);
2099 /* Update the state_log if we need. */
2100 if (mctx->state_log != NULL)
2101 {
2102 if (cur_idx > mctx->state_log_top)
2103 {
2104 mctx->state_log[cur_idx] = next_state;
2105 mctx->state_log_top = cur_idx;
2106 }
2107 else if (mctx->state_log[cur_idx] == 0)
2108 {
2109 mctx->state_log[cur_idx] = next_state;
2110 }
2111 else
2112 {
2113 re_dfastate_t *pstate;
2114 unsigned int context;
2115 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2116 /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2117 the destination of a multibyte char/collating element/
2118 back reference. Then the next state is the union set of
2119 these destinations and the results of the transition table. */
2120 pstate = mctx->state_log[cur_idx];
2121 log_nodes = pstate->entrance_nodes;
2122 if (next_state != NULL)
2123 {
2124 table_nodes = next_state->entrance_nodes;
2125 *err = re_node_set_init_union (&next_nodes, table_nodes,
2126 log_nodes);
2127 if (BE (*err != REG_NOERROR, 0))
2128 return NULL;
2129 }
2130 else
2131 next_nodes = *log_nodes;
2132 /* Note: We already add the nodes of the initial state,
2133 then we don't need to add them here. */
2134
2135 context = re_string_context_at (mctx->input,
2136 re_string_cur_idx (mctx->input) - 1,
2137 mctx->eflags, preg->newline_anchor);
2138 next_state = mctx->state_log[cur_idx]
2139 = re_acquire_state_context (err, dfa, &next_nodes, context);
2140 /* We don't need to check errors here, since the return value of
2141 this function is next_state and ERR is already set. */
2142
2143 if (table_nodes != NULL)
2144 re_node_set_free (&next_nodes);
2145 }
2146 }
2147
2148 /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2149 later. We must check them here, since the back references in the
2150 next state might use them. */
2151 if (dfa->nbackref && next_state/* && fl_process_bkref */)
2152 {
2153 *err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
2154 cur_idx);
2155 if (BE (*err != REG_NOERROR, 0))
2156 return NULL;
2157 }
2158
2159 /* If the next state has back references. */
2160 if (next_state != NULL && next_state->has_backref)
2161 {
2162 *err = transit_state_bkref (preg, &next_state->nodes, mctx);
2163 if (BE (*err != REG_NOERROR, 0))
2164 return NULL;
2165 next_state = mctx->state_log[cur_idx];
2166 }
2167 return next_state;
2168}
2169
2170/* Helper functions for transit_state. */
2171
2172/* From the node set CUR_NODES, pick up the nodes whose types are
2173 OP_OPEN_SUBEXP and which have corresponding back references in the regular
2174 expression. And register them to use them later for evaluating the
2175 correspoding back references. */
2176
2177static reg_errcode_t
2178check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
2179 re_dfa_t *dfa;
2180 re_match_context_t *mctx;
2181 re_node_set *cur_nodes;
2182 int str_idx;
2183{
2184 int node_idx;
2185 reg_errcode_t err;
2186
2187 /* TODO: This isn't efficient.
2188 Because there might be more than one nodes whose types are
2189 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2190 nodes.
2191 E.g. RE: (a){2} */
2192 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2193 {
2194 int node = cur_nodes->elems[node_idx];
2195 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2196 && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
2197 {
2198 err = match_ctx_add_subtop (mctx, node, str_idx);
2199 if (BE (err != REG_NOERROR, 0))
2200 return err;
2201 }
2202 }
2203 return REG_NOERROR;
2204}
2205
2206/* Return the next state to which the current state STATE will transit by
2207 accepting the current input byte. */
2208
2209static re_dfastate_t *
2210transit_state_sb (err, preg, state, fl_search, mctx)
2211 reg_errcode_t *err;
2212 const regex_t *preg;
2213 re_dfastate_t *state;
2214 int fl_search;
2215 re_match_context_t *mctx;
2216{
2217 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2218 re_node_set next_nodes;
2219 re_dfastate_t *next_state;
2220 int node_cnt, cur_str_idx = re_string_cur_idx (mctx->input);
2221 unsigned int context;
2222
2223 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2224 if (BE (*err != REG_NOERROR, 0))
2225 return NULL;
2226 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2227 {
2228 int cur_node = state->nodes.elems[node_cnt];
2229 if (check_node_accept (preg, dfa->nodes + cur_node, mctx, cur_str_idx))
2230 {
2231 *err = re_node_set_merge (&next_nodes,
2232 dfa->eclosures + dfa->nexts[cur_node]);
2233 if (BE (*err != REG_NOERROR, 0))
2234 {
2235 re_node_set_free (&next_nodes);
2236 return NULL;
2237 }
2238 }
2239 }
2240 if (fl_search)
2241 {
2242#ifdef RE_ENABLE_I18N
2243 int not_initial = 0;
2244 if (MB_CUR_MAX > 1)
2245 for (node_cnt = 0; node_cnt < next_nodes.nelem; ++node_cnt)
2246 if (dfa->nodes[next_nodes.elems[node_cnt]].type == CHARACTER)
2247 {
2248 not_initial = dfa->nodes[next_nodes.elems[node_cnt]].mb_partial;
2249 break;
2250 }
2251 if (!not_initial)
2252#endif
2253 {
2254 *err = re_node_set_merge (&next_nodes,
2255 dfa->init_state->entrance_nodes);
2256 if (BE (*err != REG_NOERROR, 0))
2257 {
2258 re_node_set_free (&next_nodes);
2259 return NULL;
2260 }
2261 }
2262 }
2263 context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags,
2264 preg->newline_anchor);
2265 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2266 /* We don't need to check errors here, since the return value of
2267 this function is next_state and ERR is already set. */
2268
2269 re_node_set_free (&next_nodes);
2270 re_string_skip_bytes (mctx->input, 1);
2271 return next_state;
2272}
2273
2274#ifdef RE_ENABLE_I18N
2275static reg_errcode_t
2276transit_state_mb (preg, pstate, mctx)
2277 const regex_t *preg;
2278 re_dfastate_t *pstate;
2279 re_match_context_t *mctx;
2280{
2281 reg_errcode_t err;
2282 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2283 int i;
2284
2285 for (i = 0; i < pstate->nodes.nelem; ++i)
2286 {
2287 re_node_set dest_nodes, *new_nodes;
2288 int cur_node_idx = pstate->nodes.elems[i];
2289 int naccepted = 0, dest_idx;
2290 unsigned int context;
2291 re_dfastate_t *dest_state;
2292
2293 if (dfa->nodes[cur_node_idx].constraint)
2294 {
2295 context = re_string_context_at (mctx->input,
2296 re_string_cur_idx (mctx->input),
2297 mctx->eflags, preg->newline_anchor);
2298 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2299 context))
2300 continue;
2301 }
2302
2303 /* How many bytes the node can accepts? */
2304 if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
2305 naccepted = check_node_accept_bytes (preg, cur_node_idx, mctx->input,
2306 re_string_cur_idx (mctx->input));
2307 if (naccepted == 0)
2308 continue;
2309
2310 /* The node can accepts `naccepted' bytes. */
2311 dest_idx = re_string_cur_idx (mctx->input) + naccepted;
2312 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2313 : mctx->max_mb_elem_len);
2314 err = clean_state_log_if_need (mctx, dest_idx);
2315 if (BE (err != REG_NOERROR, 0))
2316 return err;
2317#ifdef DEBUG
2318 assert (dfa->nexts[cur_node_idx] != -1);
2319#endif
2320 /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
2321 then we use pstate->nodes.elems[i] instead. */
2322 new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
2323
2324 dest_state = mctx->state_log[dest_idx];
2325 if (dest_state == NULL)
2326 dest_nodes = *new_nodes;
2327 else
2328 {
2329 err = re_node_set_init_union (&dest_nodes,
2330 dest_state->entrance_nodes, new_nodes);
2331 if (BE (err != REG_NOERROR, 0))
2332 return err;
2333 }
2334 context = re_string_context_at (mctx->input, dest_idx - 1, mctx->eflags,
2335 preg->newline_anchor);
2336 mctx->state_log[dest_idx]
2337 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2338 if (dest_state != NULL)
2339 re_node_set_free (&dest_nodes);
2340 if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2341 return err;
2342 }
2343 return REG_NOERROR;
2344}
2345#endif /* RE_ENABLE_I18N */
2346
2347static reg_errcode_t
2348transit_state_bkref (preg, nodes, mctx)
2349 const regex_t *preg;
2350 re_node_set *nodes;
2351 re_match_context_t *mctx;
2352{
2353 reg_errcode_t err;
2354 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2355 int i;
2356 int cur_str_idx = re_string_cur_idx (mctx->input);
2357
2358 for (i = 0; i < nodes->nelem; ++i)
2359 {
2360 int dest_str_idx, prev_nelem, bkc_idx;
2361 int node_idx = nodes->elems[i];
2362 unsigned int context;
2363 re_token_t *node = dfa->nodes + node_idx;
2364 re_node_set *new_dest_nodes;
2365
2366 /* Check whether `node' is a backreference or not. */
2367 if (node->type != OP_BACK_REF)
2368 continue;
2369
2370 if (node->constraint)
2371 {
2372 context = re_string_context_at (mctx->input, cur_str_idx,
2373 mctx->eflags, preg->newline_anchor);
2374 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2375 continue;
2376 }
2377
2378 /* `node' is a backreference.
2379 Check the substring which the substring matched. */
2380 bkc_idx = mctx->nbkref_ents;
2381 err = get_subexp (preg, mctx, node_idx, cur_str_idx);
2382 if (BE (err != REG_NOERROR, 0))
2383 goto free_return;
2384
2385 /* And add the epsilon closures (which is `new_dest_nodes') of
2386 the backreference to appropriate state_log. */
2387#ifdef DEBUG
2388 assert (dfa->nexts[node_idx] != -1);
2389#endif
2390 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2391 {
2392 int subexp_len;
2393 re_dfastate_t *dest_state;
2394 struct re_backref_cache_entry *bkref_ent;
2395 bkref_ent = mctx->bkref_ents + bkc_idx;
2396 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2397 continue;
2398 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2399 new_dest_nodes = (subexp_len == 0
2400 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2401 : dfa->eclosures + dfa->nexts[node_idx]);
2402 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2403 - bkref_ent->subexp_from);
2404 context = re_string_context_at (mctx->input, dest_str_idx - 1,
2405 mctx->eflags, preg->newline_anchor);
2406 dest_state = mctx->state_log[dest_str_idx];
2407 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2408 : mctx->state_log[cur_str_idx]->nodes.nelem);
2409 /* Add `new_dest_node' to state_log. */
2410 if (dest_state == NULL)
2411 {
2412 mctx->state_log[dest_str_idx]
2413 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2414 context);
2415 if (BE (mctx->state_log[dest_str_idx] == NULL
2416 && err != REG_NOERROR, 0))
2417 goto free_return;
2418 }
2419 else
2420 {
2421 re_node_set dest_nodes;
2422 err = re_node_set_init_union (&dest_nodes,
2423 dest_state->entrance_nodes,
2424 new_dest_nodes);
2425 if (BE (err != REG_NOERROR, 0))
2426 {
2427 re_node_set_free (&dest_nodes);
2428 goto free_return;
2429 }
2430 mctx->state_log[dest_str_idx]
2431 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2432 re_node_set_free (&dest_nodes);
2433 if (BE (mctx->state_log[dest_str_idx] == NULL
2434 && err != REG_NOERROR, 0))
2435 goto free_return;
2436 }
2437 /* We need to check recursively if the backreference can epsilon
2438 transit. */
2439 if (subexp_len == 0
2440 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2441 {
2442 err = check_subexp_matching_top (dfa, mctx, new_dest_nodes,
2443 cur_str_idx);
2444 if (BE (err != REG_NOERROR, 0))
2445 goto free_return;
2446 err = transit_state_bkref (preg, new_dest_nodes, mctx);
2447 if (BE (err != REG_NOERROR, 0))
2448 goto free_return;
2449 }
2450 }
2451 }
2452 err = REG_NOERROR;
2453 free_return:
2454 return err;
2455}
2456
2457/* Enumerate all the candidates which the backreference BKREF_NODE can match
2458 at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2459 Note that we might collect inappropriate candidates here.
2460 However, the cost of checking them strictly here is too high, then we
2461 delay these checking for prune_impossible_nodes(). */
2462
2463static reg_errcode_t
2464get_subexp (preg, mctx, bkref_node, bkref_str_idx)
2465 const regex_t *preg;
2466 re_match_context_t *mctx;
2467 int bkref_node, bkref_str_idx;
2468{
2469 int subexp_num, sub_top_idx;
2470 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2471 char *buf = (char *) re_string_get_buffer (mctx->input);
2472 /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2473 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2474 for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
2475 {
2476 struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2477 if (entry->str_idx > bkref_str_idx)
2478 break;
2479 if (entry->node == bkref_node)
2480 return REG_NOERROR; /* We already checked it. */
2481 }
2482 subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
2483
2484 /* For each sub expression */
2485 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2486 {
2487 reg_errcode_t err;
2488 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2489 re_sub_match_last_t *sub_last;
2490 int sub_last_idx, sl_str;
2491 char *bkref_str;
2492
2493 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2494 continue; /* It isn't related. */
2495
2496 sl_str = sub_top->str_idx;
2497 bkref_str = buf + bkref_str_idx;
2498 /* At first, check the last node of sub expressions we already
2499 evaluated. */
2500 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2501 {
2502 int sl_str_diff;
2503 sub_last = sub_top->lasts[sub_last_idx];
2504 sl_str_diff = sub_last->str_idx - sl_str;
2505 /* The matched string by the sub expression match with the substring
2506 at the back reference? */
2507 if (sl_str_diff > 0
2508 && memcmp (bkref_str, buf + sl_str, sl_str_diff) != 0)
2509 break; /* We don't need to search this sub expression any more. */
2510 bkref_str += sl_str_diff;
2511 sl_str += sl_str_diff;
2512 err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2513 bkref_str_idx);
2514 if (err == REG_NOMATCH)
2515 continue;
2516 if (BE (err != REG_NOERROR, 0))
2517 return err;
2518 }
2519 if (sub_last_idx < sub_top->nlasts)
2520 continue;
2521 if (sub_last_idx > 0)
2522 ++sl_str;
2523 /* Then, search for the other last nodes of the sub expression. */
2524 for (; sl_str <= bkref_str_idx; ++sl_str)
2525 {
2526 int cls_node, sl_str_off;
2527 re_node_set *nodes;
2528 sl_str_off = sl_str - sub_top->str_idx;
2529 /* The matched string by the sub expression match with the substring
2530 at the back reference? */
2531 if (sl_str_off > 0
2532 && memcmp (bkref_str++, buf + sl_str - 1, 1) != 0)
2533 break; /* We don't need to search this sub expression any more. */
2534 if (mctx->state_log[sl_str] == NULL)
2535 continue;
2536 /* Does this state have a ')' of the sub expression? */
2537 nodes = &mctx->state_log[sl_str]->nodes;
2538 cls_node = find_subexp_node (dfa, nodes, subexp_num, 0);
2539 if (cls_node == -1)
2540 continue; /* No. */
2541 if (sub_top->path == NULL)
2542 {
2543 sub_top->path = calloc (sizeof (state_array_t),
2544 sl_str - sub_top->str_idx + 1);
2545 if (sub_top->path == NULL)
2546 return REG_ESPACE;
2547 }
2548 /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2549 in the current context? */
2550 err = check_arrival (preg, mctx, sub_top->path, sub_top->node,
2551 sub_top->str_idx, cls_node, sl_str, 0);
2552 if (err == REG_NOMATCH)
2553 continue;
2554 if (BE (err != REG_NOERROR, 0))
2555 return err;
2556 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2557 if (BE (sub_last == NULL, 0))
2558 return REG_ESPACE;
2559 err = get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node,
2560 bkref_str_idx);
2561 if (err == REG_NOMATCH)
2562 continue;
2563 }
2564 }
2565 return REG_NOERROR;
2566}
2567
2568/* Helper functions for get_subexp(). */
2569
2570/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2571 If it can arrive, register the sub expression expressed with SUB_TOP
2572 and SUB_LAST. */
2573
2574static reg_errcode_t
2575get_subexp_sub (preg, mctx, sub_top, sub_last, bkref_node, bkref_str)
2576 const regex_t *preg;
2577 re_match_context_t *mctx;
2578 re_sub_match_top_t *sub_top;
2579 re_sub_match_last_t *sub_last;
2580 int bkref_node, bkref_str;
2581{
2582 reg_errcode_t err;
2583 int to_idx;
2584 /* Can the subexpression arrive the back reference? */
2585 err = check_arrival (preg, mctx, &sub_last->path, sub_last->node,
2586 sub_last->str_idx, bkref_node, bkref_str, 1);
2587 if (err != REG_NOERROR)
2588 return err;
2589 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2590 sub_last->str_idx);
2591 if (BE (err != REG_NOERROR, 0))
2592 return err;
2593 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2594 clean_state_log_if_need (mctx, to_idx);
2595 return REG_NOERROR;
2596}
2597
2598/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2599 Search '(' if FL_OPEN, or search ')' otherwise.
2600 TODO: This function isn't efficient...
2601 Because there might be more than one nodes whose types are
2602 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2603 nodes.
2604 E.g. RE: (a){2} */
2605
2606static int
2607find_subexp_node (dfa, nodes, subexp_idx, fl_open)
2608 re_dfa_t *dfa;
2609 re_node_set *nodes;
2610 int subexp_idx, fl_open;
2611{
2612 int cls_idx;
2613 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2614 {
2615 int cls_node = nodes->elems[cls_idx];
2616 re_token_t *node = dfa->nodes + cls_node;
2617 if (((fl_open && node->type == OP_OPEN_SUBEXP)
2618 || (!fl_open && node->type == OP_CLOSE_SUBEXP))
2619 && node->opr.idx == subexp_idx)
2620 return cls_node;
2621 }
2622 return -1;
2623}
2624
2625/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2626 LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2627 heavily reused.
2628 Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2629
2630static reg_errcode_t
2631check_arrival (preg, mctx, path, top_node, top_str, last_node, last_str,
2632 fl_open)
2633 const regex_t *preg;
2634 re_match_context_t *mctx;
2635 state_array_t *path;
2636 int top_node, top_str, last_node, last_str, fl_open;
2637{
2638 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2639 reg_errcode_t err;
2640 int subexp_num, backup_cur_idx, str_idx, null_cnt;
2641 re_dfastate_t *cur_state = NULL;
2642 re_node_set *cur_nodes, next_nodes;
2643 re_dfastate_t **backup_state_log;
2644 unsigned int context;
2645
2646 subexp_num = dfa->nodes[top_node].opr.idx;
2647 /* Extend the buffer if we need. */
2648 if (path->alloc < last_str + mctx->max_mb_elem_len + 1)
2649 {
2650 re_dfastate_t **new_array;
2651 int old_alloc = path->alloc;
2652 path->alloc += last_str + mctx->max_mb_elem_len + 1;
2653 new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2654 if (new_array == NULL)
2655 return REG_ESPACE;
2656 path->array = new_array;
2657 memset (new_array + old_alloc, '\0',
2658 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2659 }
2660
2661 str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2662
2663 /* Temporary modify MCTX. */
2664 backup_state_log = mctx->state_log;
2665 backup_cur_idx = mctx->input->cur_idx;
2666 mctx->state_log = path->array;
2667 mctx->input->cur_idx = str_idx;
2668
2669 /* Setup initial node set. */
2670 context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2671 preg->newline_anchor);
2672 if (str_idx == top_str)
2673 {
2674 err = re_node_set_init_1 (&next_nodes, top_node);
2675 if (BE (err != REG_NOERROR, 0))
2676 return err;
2677 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, fl_open);
2678 if (BE (err != REG_NOERROR, 0))
2679 {
2680 re_node_set_free (&next_nodes);
2681 return err;
2682 }
2683 }
2684 else
2685 {
2686 cur_state = mctx->state_log[str_idx];
2687 if (cur_state && cur_state->has_backref)
2688 {
2689 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2690 if (BE ( err != REG_NOERROR, 0))
2691 return err;
2692 }
2693 else
2694 re_node_set_init_empty (&next_nodes);
2695 }
2696 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2697 {
2698 if (next_nodes.nelem)
2699 {
2700 err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2701 subexp_num, fl_open);
2702 if (BE ( err != REG_NOERROR, 0))
2703 {
2704 re_node_set_free (&next_nodes);
2705 return err;
2706 }
2707 }
2708 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2709 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2710 {
2711 re_node_set_free (&next_nodes);
2712 return err;
2713 }
2714 mctx->state_log[str_idx] = cur_state;
2715 }
2716
2717 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2718 {
2719 re_node_set_empty (&next_nodes);
2720 if (mctx->state_log[str_idx + 1])
2721 {
2722 err = re_node_set_merge (&next_nodes,
2723 &mctx->state_log[str_idx + 1]->nodes);
2724 if (BE (err != REG_NOERROR, 0))
2725 {
2726 re_node_set_free (&next_nodes);
2727 return err;
2728 }
2729 }
2730 if (cur_state)
2731 {
2732 err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
2733 &cur_state->nodes, &next_nodes);
2734 if (BE (err != REG_NOERROR, 0))
2735 {
2736 re_node_set_free (&next_nodes);
2737 return err;
2738 }
2739 }
2740 ++str_idx;
2741 if (next_nodes.nelem)
2742 {
2743 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num,
2744 fl_open);
2745 if (BE (err != REG_NOERROR, 0))
2746 {
2747 re_node_set_free (&next_nodes);
2748 return err;
2749 }
2750 err = expand_bkref_cache (preg, mctx, &next_nodes, str_idx, last_str,
2751 subexp_num, fl_open);
2752 if (BE ( err != REG_NOERROR, 0))
2753 {
2754 re_node_set_free (&next_nodes);
2755 return err;
2756 }
2757 }
2758 context = re_string_context_at (mctx->input, str_idx - 1, mctx->eflags,
2759 preg->newline_anchor);
2760 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2761 if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2762 {
2763 re_node_set_free (&next_nodes);
2764 return err;
2765 }
2766 mctx->state_log[str_idx] = cur_state;
2767 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2768 }
2769 re_node_set_free (&next_nodes);
2770 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2771 : &mctx->state_log[last_str]->nodes);
2772 path->next_idx = str_idx;
2773
2774 /* Fix MCTX. */
2775 mctx->state_log = backup_state_log;
2776 mctx->input->cur_idx = backup_cur_idx;
2777
2778 if (cur_nodes == NULL)
2779 return REG_NOMATCH;
2780 /* Then check the current node set has the node LAST_NODE. */
2781 return (re_node_set_contains (cur_nodes, last_node)
2782 || re_node_set_contains (cur_nodes, last_node) ? REG_NOERROR
2783 : REG_NOMATCH);
2784}
2785
2786/* Helper functions for check_arrival. */
2787
2788/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2789 to NEXT_NODES.
2790 TODO: This function is similar to the functions transit_state*(),
2791 however this function has many additional works.
2792 Can't we unify them? */
2793
2794static reg_errcode_t
2795check_arrival_add_next_nodes (preg, dfa, mctx, str_idx, cur_nodes, next_nodes)
2796 const regex_t *preg;
2797 re_dfa_t *dfa;
2798 re_match_context_t *mctx;
2799 int str_idx;
2800 re_node_set *cur_nodes, *next_nodes;
2801{
2802 int cur_idx;
2803 reg_errcode_t err;
2804 re_node_set union_set;
2805 re_node_set_init_empty (&union_set);
2806 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2807 {
2808 int naccepted = 0;
2809 int cur_node = cur_nodes->elems[cur_idx];
2810 re_token_type_t type = dfa->nodes[cur_node].type;
2811 if (IS_EPSILON_NODE(type))
2812 continue;
2813#ifdef RE_ENABLE_I18N
2814 /* If the node may accept `multi byte'. */
2815 if (ACCEPT_MB_NODE (type))
2816 {
2817 naccepted = check_node_accept_bytes (preg, cur_node, mctx->input,
2818 str_idx);
2819 if (naccepted > 1)
2820 {
2821 re_dfastate_t *dest_state;
2822 int next_node = dfa->nexts[cur_node];
2823 int next_idx = str_idx + naccepted;
2824 dest_state = mctx->state_log[next_idx];
2825 re_node_set_empty (&union_set);
2826 if (dest_state)
2827 {
2828 err = re_node_set_merge (&union_set, &dest_state->nodes);
2829 if (BE (err != REG_NOERROR, 0))
2830 {
2831 re_node_set_free (&union_set);
2832 return err;
2833 }
2834 err = re_node_set_insert (&union_set, next_node);
2835 if (BE (err < 0, 0))
2836 {
2837 re_node_set_free (&union_set);
2838 return REG_ESPACE;
2839 }
2840 }
2841 else
2842 {
2843 err = re_node_set_insert (&union_set, next_node);
2844 if (BE (err < 0, 0))
2845 {
2846 re_node_set_free (&union_set);
2847 return REG_ESPACE;
2848 }
2849 }
2850 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
2851 &union_set);
2852 if (BE (mctx->state_log[next_idx] == NULL
2853 && err != REG_NOERROR, 0))
2854 {
2855 re_node_set_free (&union_set);
2856 return err;
2857 }
2858 }
2859 }
2860#endif /* RE_ENABLE_I18N */
2861 if (naccepted
2862 || check_node_accept (preg, dfa->nodes + cur_node, mctx,
2863 str_idx))
2864 {
2865 err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
2866 if (BE (err < 0, 0))
2867 {
2868 re_node_set_free (&union_set);
2869 return REG_ESPACE;
2870 }
2871 }
2872 }
2873 re_node_set_free (&union_set);
2874 return REG_NOERROR;
2875}
2876
2877/* For all the nodes in CUR_NODES, add the epsilon closures of them to
2878 CUR_NODES, however exclude the nodes which are:
2879 - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
2880 - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
2881*/
2882
2883static reg_errcode_t
2884check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, fl_open)
2885 re_dfa_t *dfa;
2886 re_node_set *cur_nodes;
2887 int ex_subexp, fl_open;
2888{
2889 reg_errcode_t err;
2890 int idx, outside_node;
2891 re_node_set new_nodes;
2892#ifdef DEBUG
2893 assert (cur_nodes->nelem);
2894#endif
2895 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
2896 if (BE (err != REG_NOERROR, 0))
2897 return err;
2898 /* Create a new node set NEW_NODES with the nodes which are epsilon
2899 closures of the node in CUR_NODES. */
2900
2901 for (idx = 0; idx < cur_nodes->nelem; ++idx)
2902 {
2903 int cur_node = cur_nodes->elems[idx];
2904 re_node_set *eclosure = dfa->eclosures + cur_node;
2905 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, fl_open);
2906 if (outside_node == -1)
2907 {
2908 /* There are no problematic nodes, just merge them. */
2909 err = re_node_set_merge (&new_nodes, eclosure);
2910 if (BE (err != REG_NOERROR, 0))
2911 {
2912 re_node_set_free (&new_nodes);
2913 return err;
2914 }
2915 }
2916 else
2917 {
2918 /* There are problematic nodes, re-calculate incrementally. */
2919 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
2920 ex_subexp, fl_open);
2921 if (BE (err != REG_NOERROR, 0))
2922 {
2923 re_node_set_free (&new_nodes);
2924 return err;
2925 }
2926 }
2927 }
2928 re_node_set_free (cur_nodes);
2929 *cur_nodes = new_nodes;
2930 return REG_NOERROR;
2931}
2932
2933/* Helper function for check_arrival_expand_ecl.
2934 Check incrementally the epsilon closure of TARGET, and if it isn't
2935 problematic append it to DST_NODES. */
2936
2937static reg_errcode_t
2938check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, fl_open)
2939 re_dfa_t *dfa;
2940 int target, ex_subexp, fl_open;
2941 re_node_set *dst_nodes;
2942{
2943 int cur_node, type;
2944 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
2945 {
2946 int err;
2947 type = dfa->nodes[cur_node].type;
2948
2949 if (((type == OP_OPEN_SUBEXP && fl_open)
2950 || (type == OP_CLOSE_SUBEXP && !fl_open))
2951 && dfa->nodes[cur_node].opr.idx == ex_subexp)
2952 {
2953 if (!fl_open)
2954 {
2955 err = re_node_set_insert (dst_nodes, cur_node);
2956 if (BE (err == -1, 0))
2957 return REG_ESPACE;
2958 }
2959 break;
2960 }
2961 err = re_node_set_insert (dst_nodes, cur_node);
2962 if (BE (err == -1, 0))
2963 return REG_ESPACE;
2964 if (dfa->edests[cur_node].nelem == 0)
2965 break;
2966 if (dfa->edests[cur_node].nelem == 2)
2967 {
2968 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
2969 dfa->edests[cur_node].elems[1],
2970 ex_subexp, fl_open);
2971 if (BE (err != REG_NOERROR, 0))
2972 return err;
2973 }
2974 cur_node = dfa->edests[cur_node].elems[0];
2975 }
2976 return REG_NOERROR;
2977}
2978
2979
2980/* For all the back references in the current state, calculate the
2981 destination of the back references by the appropriate entry
2982 in MCTX->BKREF_ENTS. */
2983
2984static reg_errcode_t
2985expand_bkref_cache (preg, mctx, cur_nodes, cur_str, last_str, subexp_num,
2986 fl_open)
2987 const regex_t *preg;
2988 re_match_context_t *mctx;
2989 int cur_str, last_str, subexp_num, fl_open;
2990 re_node_set *cur_nodes;
2991{
2992 reg_errcode_t err;
2993 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2994 int cache_idx, cache_idx_start;
2995 /* The current state. */
2996
2997 cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
2998 for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
2999 {
3000 int to_idx, next_node;
3001 struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
3002 if (ent->str_idx > cur_str)
3003 break;
3004 /* Is this entry ENT is appropriate? */
3005 if (!re_node_set_contains (cur_nodes, ent->node))
3006 continue; /* No. */
3007
3008 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3009 /* Calculate the destination of the back reference, and append it
3010 to MCTX->STATE_LOG. */
3011 if (to_idx == cur_str)
3012 {
3013 /* The backreference did epsilon transit, we must re-check all the
3014 node in the current state. */
3015 re_node_set new_dests;
3016 reg_errcode_t err2, err3;
3017 next_node = dfa->edests[ent->node].elems[0];
3018 if (re_node_set_contains (cur_nodes, next_node))
3019 continue;
3020 err = re_node_set_init_1 (&new_dests, next_node);
3021 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num,
3022 fl_open);
3023 err3 = re_node_set_merge (cur_nodes, &new_dests);
3024 re_node_set_free (&new_dests);
3025 if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3026 || err3 != REG_NOERROR, 0))
3027 {
3028 err = (err != REG_NOERROR ? err
3029 : (err2 != REG_NOERROR ? err2 : err3));
3030 return err;
3031 }
3032 /* TODO: It is still inefficient... */
3033 cache_idx = cache_idx_start - 1;
3034 continue;
3035 }
3036 else
3037 {
3038 re_node_set union_set;
3039 next_node = dfa->nexts[ent->node];
3040 if (mctx->state_log[to_idx])
3041 {
3042 int ret;
3043 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3044 next_node))
3045 continue;
3046 err = re_node_set_init_copy (&union_set,
3047 &mctx->state_log[to_idx]->nodes);
3048 ret = re_node_set_insert (&union_set, next_node);
3049 if (BE (err != REG_NOERROR || ret < 0, 0))
3050 {
3051 re_node_set_free (&union_set);
3052 err = err != REG_NOERROR ? err : REG_ESPACE;
3053 return err;
3054 }
3055 }
3056 else
3057 {
3058 err = re_node_set_init_1 (&union_set, next_node);
3059 if (BE (err != REG_NOERROR, 0))
3060 return err;
3061 }
3062 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3063 re_node_set_free (&union_set);
3064 if (BE (mctx->state_log[to_idx] == NULL
3065 && err != REG_NOERROR, 0))
3066 return err;
3067 }
3068 }
3069 return REG_NOERROR;
3070}
3071
3072/* Build transition table for the state.
3073 Return the new table if succeeded, otherwise return NULL. */
3074
3075static re_dfastate_t **
3076build_trtable (preg, state, fl_search)
3077 const regex_t *preg;
3078 const re_dfastate_t *state;
3079 int fl_search;
3080{
3081 reg_errcode_t err;
3082 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3083 int i, j, k, ch;
3084 int dests_node_malloced = 0, dest_states_malloced = 0;
3085 int ndests; /* Number of the destination states from `state'. */
3086 re_dfastate_t **trtable;
3087 re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3088 re_node_set follows, *dests_node;
3089 bitset *dests_ch;
3090 bitset acceptable;
3091
3092 /* We build DFA states which corresponds to the destination nodes
3093 from `state'. `dests_node[i]' represents the nodes which i-th
3094 destination state contains, and `dests_ch[i]' represents the
3095 characters which i-th destination state accepts. */
3096#ifdef _LIBC
3097 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
3098 dests_node = (re_node_set *)
3099 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3100 else
3101#endif
3102 {
3103 dests_node = (re_node_set *)
3104 malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
3105 if (BE (dests_node == NULL, 0))
3106 return NULL;
3107 dests_node_malloced = 1;
3108 }
3109 dests_ch = (bitset *) (dests_node + SBC_MAX);
3110
3111 /* Initialize transiton table. */
3112 trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3113 if (BE (trtable == NULL, 0))
3114 {
3115 if (dests_node_malloced)
3116 free (dests_node);
3117 return NULL;
3118 }
3119
3120 /* At first, group all nodes belonging to `state' into several
3121 destinations. */
3122 ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
3123 if (BE (ndests <= 0, 0))
3124 {
3125 if (dests_node_malloced)
3126 free (dests_node);
3127 /* Return NULL in case of an error, trtable otherwise. */
3128 if (ndests == 0)
3129 return trtable;
3130 free (trtable);
3131 return NULL;
3132 }
3133
3134 err = re_node_set_alloc (&follows, ndests + 1);
3135 if (BE (err != REG_NOERROR, 0))
3136 goto out_free;
3137
3138#ifdef _LIBC
3139 if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3140 + ndests * 3 * sizeof (re_dfastate_t *)))
3141 dest_states = (re_dfastate_t **)
3142 alloca (ndests * 3 * sizeof (re_dfastate_t *));
3143 else
3144#endif
3145 {
3146 dest_states = (re_dfastate_t **)
3147 malloc (ndests * 3 * sizeof (re_dfastate_t *));
3148 if (BE (dest_states == NULL, 0))
3149 {
3150out_free:
3151 if (dest_states_malloced)
3152 free (dest_states);
3153 re_node_set_free (&follows);
3154 for (i = 0; i < ndests; ++i)
3155 re_node_set_free (dests_node + i);
3156 free (trtable);
3157 if (dests_node_malloced)
3158 free (dests_node);
3159 return NULL;
3160 }
3161 dest_states_malloced = 1;
3162 }
3163 dest_states_word = dest_states + ndests;
3164 dest_states_nl = dest_states_word + ndests;
3165 bitset_empty (acceptable);
3166
3167 /* Then build the states for all destinations. */
3168 for (i = 0; i < ndests; ++i)
3169 {
3170 int next_node;
3171 re_node_set_empty (&follows);
3172 /* Merge the follows of this destination states. */
3173 for (j = 0; j < dests_node[i].nelem; ++j)
3174 {
3175 next_node = dfa->nexts[dests_node[i].elems[j]];
3176 if (next_node != -1)
3177 {
3178 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3179 if (BE (err != REG_NOERROR, 0))
3180 goto out_free;
3181 }
3182 }
3183 /* If search flag is set, merge the initial state. */
3184 if (fl_search)
3185 {
3186#ifdef RE_ENABLE_I18N
3187 int not_initial = 0;
3188 for (j = 0; j < follows.nelem; ++j)
3189 if (dfa->nodes[follows.elems[j]].type == CHARACTER)
3190 {
3191 not_initial = dfa->nodes[follows.elems[j]].mb_partial;
3192 break;
3193 }
3194 if (!not_initial)
3195#endif
3196 {
3197 err = re_node_set_merge (&follows,
3198 dfa->init_state->entrance_nodes);
3199 if (BE (err != REG_NOERROR, 0))
3200 goto out_free;
3201 }
3202 }
3203 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3204 if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3205 goto out_free;
3206 /* If the new state has context constraint,
3207 build appropriate states for these contexts. */
3208 if (dest_states[i]->has_constraint)
3209 {
3210 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3211 CONTEXT_WORD);
3212 if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3213 goto out_free;
3214 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3215 CONTEXT_NEWLINE);
3216 if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3217 goto out_free;
3218 }
3219 else
3220 {
3221 dest_states_word[i] = dest_states[i];
3222 dest_states_nl[i] = dest_states[i];
3223 }
3224 bitset_merge (acceptable, dests_ch[i]);
3225 }
3226
3227 /* Update the transition table. */
3228 /* For all characters ch...: */
3229 for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
3230 for (j = 0; j < UINT_BITS; ++j, ++ch)
3231 if ((acceptable[i] >> j) & 1)
3232 {
3233 /* The current state accepts the character ch. */
3234 if (IS_WORD_CHAR (ch))
3235 {
3236 for (k = 0; k < ndests; ++k)
3237 if ((dests_ch[k][i] >> j) & 1)
3238 {
3239 /* k-th destination accepts the word character ch. */
3240 trtable[ch] = dest_states_word[k];
3241 /* There must be only one destination which accepts
3242 character ch. See group_nodes_into_DFAstates. */
3243 break;
3244 }
3245 }
3246 else /* not WORD_CHAR */
3247 {
3248 for (k = 0; k < ndests; ++k)
3249 if ((dests_ch[k][i] >> j) & 1)
3250 {
3251 /* k-th destination accepts the non-word character ch. */
3252 trtable[ch] = dest_states[k];
3253 /* There must be only one destination which accepts
3254 character ch. See group_nodes_into_DFAstates. */
3255 break;
3256 }
3257 }
3258 }
3259 /* new line */
3260 if (bitset_contain (acceptable, NEWLINE_CHAR))
3261 {
3262 /* The current state accepts newline character. */
3263 for (k = 0; k < ndests; ++k)
3264 if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
3265 {
3266 /* k-th destination accepts newline character. */
3267 trtable[NEWLINE_CHAR] = dest_states_nl[k];
3268 /* There must be only one destination which accepts
3269 newline. See group_nodes_into_DFAstates. */
3270 break;
3271 }
3272 }
3273
3274 if (dest_states_malloced)
3275 free (dest_states);
3276
3277 re_node_set_free (&follows);
3278 for (i = 0; i < ndests; ++i)
3279 re_node_set_free (dests_node + i);
3280
3281 if (dests_node_malloced)
3282 free (dests_node);
3283
3284 return trtable;
3285}
3286
3287/* Group all nodes belonging to STATE into several destinations.
3288 Then for all destinations, set the nodes belonging to the destination
3289 to DESTS_NODE[i] and set the characters accepted by the destination
3290 to DEST_CH[i]. This function return the number of destinations. */
3291
3292static int
3293group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
3294 const regex_t *preg;
3295 const re_dfastate_t *state;
3296 re_node_set *dests_node;
3297 bitset *dests_ch;
3298{
3299 reg_errcode_t err;
3300 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3301 int i, j, k;
3302 int ndests; /* Number of the destinations from `state'. */
3303 bitset accepts; /* Characters a node can accept. */
3304 const re_node_set *cur_nodes = &state->nodes;
3305 bitset_empty (accepts);
3306 ndests = 0;
3307
3308 /* For all the nodes belonging to `state', */
3309 for (i = 0; i < cur_nodes->nelem; ++i)
3310 {
3311 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3312 re_token_type_t type = node->type;
3313 unsigned int constraint = node->constraint;
3314
3315 /* Enumerate all single byte character this node can accept. */
3316 if (type == CHARACTER)
3317 bitset_set (accepts, node->opr.c);
3318 else if (type == SIMPLE_BRACKET)
3319 {
3320 bitset_merge (accepts, node->opr.sbcset);
3321 }
3322 else if (type == OP_PERIOD)
3323 {
3324 bitset_set_all (accepts);
3325 if (!(preg->syntax & RE_DOT_NEWLINE))
3326 bitset_clear (accepts, '\n');
3327 if (preg->syntax & RE_DOT_NOT_NULL)
3328 bitset_clear (accepts, '\0');
3329 }
3330 else
3331 continue;
3332
3333 /* Check the `accepts' and sift the characters which are not
3334 match it the context. */
3335 if (constraint)
3336 {
3337 if (constraint & NEXT_WORD_CONSTRAINT)
3338 for (j = 0; j < BITSET_UINTS; ++j)
3339 accepts[j] &= dfa->word_char[j];
3340 if (constraint & NEXT_NOTWORD_CONSTRAINT)
3341 for (j = 0; j < BITSET_UINTS; ++j)
3342 accepts[j] &= ~dfa->word_char[j];
3343 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3344 {
3345 int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3346 bitset_empty (accepts);
3347 if (accepts_newline)
3348 bitset_set (accepts, NEWLINE_CHAR);
3349 else
3350 continue;
3351 }
3352 }
3353
3354 /* Then divide `accepts' into DFA states, or create a new
3355 state. */
3356 for (j = 0; j < ndests; ++j)
3357 {
3358 bitset intersec; /* Intersection sets, see below. */
3359 bitset remains;
3360 /* Flags, see below. */
3361 int has_intersec, not_subset, not_consumed;
3362
3363 /* Optimization, skip if this state doesn't accept the character. */
3364 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3365 continue;
3366
3367 /* Enumerate the intersection set of this state and `accepts'. */
3368 has_intersec = 0;
3369 for (k = 0; k < BITSET_UINTS; ++k)
3370 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3371 /* And skip if the intersection set is empty. */
3372 if (!has_intersec)
3373 continue;
3374
3375 /* Then check if this state is a subset of `accepts'. */
3376 not_subset = not_consumed = 0;
3377 for (k = 0; k < BITSET_UINTS; ++k)
3378 {
3379 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3380 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3381 }
3382
3383 /* If this state isn't a subset of `accepts', create a
3384 new group state, which has the `remains'. */
3385 if (not_subset)
3386 {
3387 bitset_copy (dests_ch[ndests], remains);
3388 bitset_copy (dests_ch[j], intersec);
3389 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3390 if (BE (err != REG_NOERROR, 0))
3391 goto error_return;
3392 ++ndests;
3393 }
3394
3395 /* Put the position in the current group. */
3396 err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3397 if (BE (err < 0, 0))
3398 goto error_return;
3399
3400 /* If all characters are consumed, go to next node. */
3401 if (!not_consumed)
3402 break;
3403 }
3404 /* Some characters remain, create a new group. */
3405 if (j == ndests)
3406 {
3407 bitset_copy (dests_ch[ndests], accepts);
3408 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3409 if (BE (err != REG_NOERROR, 0))
3410 goto error_return;
3411 ++ndests;
3412 bitset_empty (accepts);
3413 }
3414 }
3415 return ndests;
3416 error_return:
3417 for (j = 0; j < ndests; ++j)
3418 re_node_set_free (dests_node + j);
3419 return -1;
3420}
3421
3422#ifdef RE_ENABLE_I18N
3423/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3424 Return the number of the bytes the node accepts.
3425 STR_IDX is the current index of the input string.
3426
3427 This function handles the nodes which can accept one character, or
3428 one collating element like '.', '[a-z]', opposite to the other nodes
3429 can only accept one byte. */
3430
3431static int
3432check_node_accept_bytes (preg, node_idx, input, str_idx)
3433 const regex_t *preg;
3434 int node_idx, str_idx;
3435 const re_string_t *input;
3436{
3437 const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
3438 const re_token_t *node = dfa->nodes + node_idx;
3439 int elem_len = re_string_elem_size_at (input, str_idx);
3440 int char_len = re_string_char_size_at (input, str_idx);
3441 int i;
3442# ifdef _LIBC
3443 int j;
3444 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3445# endif /* _LIBC */
3446 if (elem_len <= 1 && char_len <= 1)
3447 return 0;
3448 if (node->type == OP_PERIOD)
3449 {
3450 /* '.' accepts any one character except the following two cases. */
3451 if ((!(preg->syntax & RE_DOT_NEWLINE) &&
3452 re_string_byte_at (input, str_idx) == '\n') ||
3453 ((preg->syntax & RE_DOT_NOT_NULL) &&
3454 re_string_byte_at (input, str_idx) == '\0'))
3455 return 0;
3456 return char_len;
3457 }
3458 else if (node->type == COMPLEX_BRACKET)
3459 {
3460 const re_charset_t *cset = node->opr.mbcset;
3461# ifdef _LIBC
3462 const unsigned char *pin = ((char *) re_string_get_buffer (input)
3463 + str_idx);
3464# endif /* _LIBC */
3465 int match_len = 0;
3466 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3467 ? re_string_wchar_at (input, str_idx) : 0);
3468
3469 /* match with multibyte character? */
3470 for (i = 0; i < cset->nmbchars; ++i)
3471 if (wc == cset->mbchars[i])
3472 {
3473 match_len = char_len;
3474 goto check_node_accept_bytes_match;
3475 }
3476 /* match with character_class? */
3477 for (i = 0; i < cset->nchar_classes; ++i)
3478 {
3479 wctype_t wt = cset->char_classes[i];
3480 if (__iswctype (wc, wt))
3481 {
3482 match_len = char_len;
3483 goto check_node_accept_bytes_match;
3484 }
3485 }
3486
3487# ifdef _LIBC
3488 if (nrules != 0)
3489 {
3490 unsigned int in_collseq = 0;
3491 const int32_t *table, *indirect;
3492 const unsigned char *weights, *extra;
3493 const char *collseqwc;
3494 int32_t idx;
3495 /* This #include defines a local function! */
3496# include <locale/weight.h>
3497
3498 /* match with collating_symbol? */
3499 if (cset->ncoll_syms)
3500 extra = (const unsigned char *)
3501 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3502 for (i = 0; i < cset->ncoll_syms; ++i)
3503 {
3504 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3505 /* Compare the length of input collating element and
3506 the length of current collating element. */
3507 if (*coll_sym != elem_len)
3508 continue;
3509 /* Compare each bytes. */
3510 for (j = 0; j < *coll_sym; j++)
3511 if (pin[j] != coll_sym[1 + j])
3512 break;
3513 if (j == *coll_sym)
3514 {
3515 /* Match if every bytes is equal. */
3516 match_len = j;
3517 goto check_node_accept_bytes_match;
3518 }
3519 }
3520
3521 if (cset->nranges)
3522 {
3523 if (elem_len <= char_len)
3524 {
3525 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3526 in_collseq = collseq_table_lookup (collseqwc, wc);
3527 }
3528 else
3529 in_collseq = find_collation_sequence_value (pin, elem_len);
3530 }
3531 /* match with range expression? */
3532 for (i = 0; i < cset->nranges; ++i)
3533 if (cset->range_starts[i] <= in_collseq
3534 && in_collseq <= cset->range_ends[i])
3535 {
3536 match_len = elem_len;
3537 goto check_node_accept_bytes_match;
3538 }
3539
3540 /* match with equivalence_class? */
3541 if (cset->nequiv_classes)
3542 {
3543 const unsigned char *cp = pin;
3544 table = (const int32_t *)
3545 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3546 weights = (const unsigned char *)
3547 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3548 extra = (const unsigned char *)
3549 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3550 indirect = (const int32_t *)
3551 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3552 idx = findidx (&cp);
3553 if (idx > 0)
3554 for (i = 0; i < cset->nequiv_classes; ++i)
3555 {
3556 int32_t equiv_class_idx = cset->equiv_classes[i];
3557 size_t weight_len = weights[idx];
3558 if (weight_len == weights[equiv_class_idx])
3559 {
3560 int cnt = 0;
3561 while (cnt <= weight_len
3562 && (weights[equiv_class_idx + 1 + cnt]
3563 == weights[idx + 1 + cnt]))
3564 ++cnt;
3565 if (cnt > weight_len)
3566 {
3567 match_len = elem_len;
3568 goto check_node_accept_bytes_match;
3569 }
3570 }
3571 }
3572 }
3573 }
3574 else
3575# endif /* _LIBC */
3576 {
3577 /* match with range expression? */
3578#if __GNUC__ >= 2
3579 wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3580#else
3581 wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3582 cmp_buf[2] = wc;
3583#endif
3584 for (i = 0; i < cset->nranges; ++i)
3585 {
3586 cmp_buf[0] = cset->range_starts[i];
3587 cmp_buf[4] = cset->range_ends[i];
3588 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3589 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3590 {
3591 match_len = char_len;
3592 goto check_node_accept_bytes_match;
3593 }
3594 }
3595 }
3596 check_node_accept_bytes_match:
3597 if (!cset->non_match)
3598 return match_len;
3599 else
3600 {
3601 if (match_len > 0)
3602 return 0;
3603 else
3604 return (elem_len > char_len) ? elem_len : char_len;
3605 }
3606 }
3607 return 0;
3608}
3609
3610# ifdef _LIBC
3611static unsigned int
3612find_collation_sequence_value (mbs, mbs_len)
3613 const unsigned char *mbs;
3614 size_t mbs_len;
3615{
3616 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3617 if (nrules == 0)
3618 {
3619 if (mbs_len == 1)
3620 {
3621 /* No valid character. Match it as a single byte character. */
3622 const unsigned char *collseq = (const unsigned char *)
3623 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3624 return collseq[mbs[0]];
3625 }
3626 return UINT_MAX;
3627 }
3628 else
3629 {
3630 int32_t idx;
3631 const unsigned char *extra = (const unsigned char *)
3632 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3633
3634 for (idx = 0; ;)
3635 {
3636 int mbs_cnt, found = 0;
3637 int32_t elem_mbs_len;
3638 /* Skip the name of collating element name. */
3639 idx = idx + extra[idx] + 1;
3640 elem_mbs_len = extra[idx++];
3641 if (mbs_len == elem_mbs_len)
3642 {
3643 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3644 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3645 break;
3646 if (mbs_cnt == elem_mbs_len)
3647 /* Found the entry. */
3648 found = 1;
3649 }
3650 /* Skip the byte sequence of the collating element. */
3651 idx += elem_mbs_len;
3652 /* Adjust for the alignment. */
3653 idx = (idx + 3) & ~3;
3654 /* Skip the collation sequence value. */
3655 idx += sizeof (uint32_t);
3656 /* Skip the wide char sequence of the collating element. */
3657 idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3658 /* If we found the entry, return the sequence value. */
3659 if (found)
3660 return *(uint32_t *) (extra + idx);
3661 /* Skip the collation sequence value. */
3662 idx += sizeof (uint32_t);
3663 }
3664 }
3665}
3666# endif /* _LIBC */
3667#endif /* RE_ENABLE_I18N */
3668
3669/* Check whether the node accepts the byte which is IDX-th
3670 byte of the INPUT. */
3671
3672static int
3673check_node_accept (preg, node, mctx, idx)
3674 const regex_t *preg;
3675 const re_token_t *node;
3676 const re_match_context_t *mctx;
3677 int idx;
3678{
3679 unsigned char ch;
3680 if (node->constraint)
3681 {
3682 /* The node has constraints. Check whether the current context
3683 satisfies the constraints. */
3684 unsigned int context = re_string_context_at (mctx->input, idx,
3685 mctx->eflags,
3686 preg->newline_anchor);
3687 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3688 return 0;
3689 }
3690 ch = re_string_byte_at (mctx->input, idx);
3691 if (node->type == CHARACTER)
3692 return node->opr.c == ch;
3693 else if (node->type == SIMPLE_BRACKET)
3694 return bitset_contain (node->opr.sbcset, ch);
3695 else if (node->type == OP_PERIOD)
3696 return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
3697 || (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
3698 else
3699 return 0;
3700}
3701
3702/* Extend the buffers, if the buffers have run out. */
3703
3704static reg_errcode_t
3705extend_buffers (mctx)
3706 re_match_context_t *mctx;
3707{
3708 reg_errcode_t ret;
3709 re_string_t *pstr = mctx->input;
3710
3711 /* Double the lengthes of the buffers. */
3712 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
3713 if (BE (ret != REG_NOERROR, 0))
3714 return ret;
3715
3716 if (mctx->state_log != NULL)
3717 {
3718 /* And double the length of state_log. */
3719 re_dfastate_t **new_array;
3720 new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3721 pstr->bufs_len * 2);
3722 if (BE (new_array == NULL, 0))
3723 return REG_ESPACE;
3724 mctx->state_log = new_array;
3725 }
3726
3727 /* Then reconstruct the buffers. */
3728 if (pstr->icase)
3729 {
3730#ifdef RE_ENABLE_I18N
3731 if (MB_CUR_MAX > 1)
3732 build_wcs_upper_buffer (pstr);
3733 else
3734#endif /* RE_ENABLE_I18N */
3735 build_upper_buffer (pstr);
3736 }
3737 else
3738 {
3739#ifdef RE_ENABLE_I18N
3740 if (MB_CUR_MAX > 1)
3741 build_wcs_buffer (pstr);
3742 else
3743#endif /* RE_ENABLE_I18N */
3744 {
3745 if (pstr->trans != NULL)
3746 re_string_translate_buffer (pstr);
3747 else
3748 pstr->valid_len = pstr->bufs_len;
3749 }
3750 }
3751 return REG_NOERROR;
3752}
3753
3754\f
3755/* Functions for matching context. */
3756
3757/* Initialize MCTX. */
3758
3759static reg_errcode_t
3760match_ctx_init (mctx, eflags, input, n)
3761 re_match_context_t *mctx;
3762 int eflags, n;
3763 re_string_t *input;
3764{
3765 mctx->eflags = eflags;
3766 mctx->input = input;
3767 mctx->match_last = -1;
3768 if (n > 0)
3769 {
3770 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
3771 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
3772 if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
3773 return REG_ESPACE;
3774 }
3775 else
3776 mctx->bkref_ents = NULL;
3777 mctx->nbkref_ents = 0;
3778 mctx->abkref_ents = n;
3779 mctx->max_mb_elem_len = 1;
3780 mctx->nsub_tops = 0;
3781 mctx->asub_tops = n;
3782 return REG_NOERROR;
3783}
3784
3785/* Clean the entries which depend on the current input in MCTX.
3786 This function must be invoked when the matcher changes the start index
3787 of the input, or changes the input string. */
3788
3789static void
3790match_ctx_clean (mctx)
3791 re_match_context_t *mctx;
3792{
3793 match_ctx_free_subtops (mctx);
3794 mctx->nsub_tops = 0;
3795 mctx->nbkref_ents = 0;
3796}
3797
3798/* Free all the memory associated with MCTX. */
3799
3800static void
3801match_ctx_free (mctx)
3802 re_match_context_t *mctx;
3803{
3804 match_ctx_free_subtops (mctx);
3805 re_free (mctx->sub_tops);
3806 re_free (mctx->bkref_ents);
3807}
3808
3809/* Free all the memory associated with MCTX->SUB_TOPS. */
3810
3811static void
3812match_ctx_free_subtops (mctx)
3813 re_match_context_t *mctx;
3814{
3815 int st_idx;
3816 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
3817 {
3818 int sl_idx;
3819 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
3820 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
3821 {
3822 re_sub_match_last_t *last = top->lasts[sl_idx];
3823 re_free (last->path.array);
3824 re_free (last);
3825 }
3826 re_free (top->lasts);
3827 if (top->path)
3828 {
3829 re_free (top->path->array);
3830 re_free (top->path);
3831 }
3832 free (top);
3833 }
3834}
3835
3836/* Add a new backreference entry to MCTX.
3837 Note that we assume that caller never call this function with duplicate
3838 entry, and call with STR_IDX which isn't smaller than any existing entry.
3839*/
3840
3841static reg_errcode_t
3842match_ctx_add_entry (mctx, node, str_idx, from, to)
3843 re_match_context_t *mctx;
3844 int node, str_idx, from, to;
3845{
3846 if (mctx->nbkref_ents >= mctx->abkref_ents)
3847 {
3848 struct re_backref_cache_entry* new_entry;
3849 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
3850 mctx->abkref_ents * 2);
3851 if (BE (new_entry == NULL, 0))
3852 {
3853 re_free (mctx->bkref_ents);
3854 return REG_ESPACE;
3855 }
3856 mctx->bkref_ents = new_entry;
3857 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
3858 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
3859 mctx->abkref_ents *= 2;
3860 }
3861 mctx->bkref_ents[mctx->nbkref_ents].node = node;
3862 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
3863 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
3864 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
3865 mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
3866 if (mctx->max_mb_elem_len < to - from)
3867 mctx->max_mb_elem_len = to - from;
3868 return REG_NOERROR;
3869}
3870
3871/* Search for the first entry which has the same str_idx.
3872 Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
3873
3874static int
3875search_cur_bkref_entry (mctx, str_idx)
3876 re_match_context_t *mctx;
3877 int str_idx;
3878{
3879 int left, right, mid;
3880 right = mctx->nbkref_ents;
3881 for (left = 0; left < right;)
3882 {
3883 mid = (left + right) / 2;
3884 if (mctx->bkref_ents[mid].str_idx < str_idx)
3885 left = mid + 1;
3886 else
3887 right = mid;
3888 }
3889 return left;
3890}
3891
3892static void
3893match_ctx_clear_flag (mctx)
3894 re_match_context_t *mctx;
3895{
3896 int i;
3897 for (i = 0; i < mctx->nbkref_ents; ++i)
3898 {
3899 mctx->bkref_ents[i].flag = 0;
3900 }
3901}
3902
3903/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
3904 at STR_IDX. */
3905
3906static reg_errcode_t
3907match_ctx_add_subtop (mctx, node, str_idx)
3908 re_match_context_t *mctx;
3909 int node, str_idx;
3910{
3911#ifdef DEBUG
3912 assert (mctx->sub_tops != NULL);
3913 assert (mctx->asub_tops > 0);
3914#endif
3915 if (mctx->nsub_tops == mctx->asub_tops)
3916 {
3917 re_sub_match_top_t **new_array;
3918 mctx->asub_tops *= 2;
3919 new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *,
3920 mctx->asub_tops);
3921 if (BE (new_array == NULL, 0))
3922 return REG_ESPACE;
3923 mctx->sub_tops = new_array;
3924 }
3925 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
3926 if (mctx->sub_tops[mctx->nsub_tops] == NULL)
3927 return REG_ESPACE;
3928 mctx->sub_tops[mctx->nsub_tops]->node = node;
3929 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
3930 return REG_NOERROR;
3931}
3932
3933/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
3934 at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
3935
3936static re_sub_match_last_t *
3937match_ctx_add_sublast (subtop, node, str_idx)
3938 re_sub_match_top_t *subtop;
3939 int node, str_idx;
3940{
3941 re_sub_match_last_t *new_entry;
3942 if (subtop->nlasts == subtop->alasts)
3943 {
3944 re_sub_match_last_t **new_array;
3945 subtop->alasts = 2 * subtop->alasts + 1;
3946 new_array = re_realloc (subtop->lasts, re_sub_match_last_t *,
3947 subtop->alasts);
3948 if (BE (new_array == NULL, 0))
3949 return NULL;
3950 subtop->lasts = new_array;
3951 }
3952 new_entry = calloc (1, sizeof (re_sub_match_last_t));
3953 if (BE (new_entry == NULL, 0))
3954 return NULL;
3955 subtop->lasts[subtop->nlasts] = new_entry;
3956 new_entry->node = node;
3957 new_entry->str_idx = str_idx;
3958 ++subtop->nlasts;
3959 return new_entry;
3960}
3961
3962static void
3963sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
3964 check_subexp)
3965 re_sift_context_t *sctx;
3966 re_dfastate_t **sifted_sts, **limited_sts;
3967 int last_node, last_str_idx, check_subexp;
3968{
3969 sctx->sifted_states = sifted_sts;
3970 sctx->limited_states = limited_sts;
3971 sctx->last_node = last_node;
3972 sctx->last_str_idx = last_str_idx;
3973 sctx->check_subexp = check_subexp;
3974 sctx->cur_bkref = -1;
3975 sctx->cls_subexp_idx = -1;
3976 re_node_set_init_empty (&sctx->limits);
3977}