1 use crate::dfa
::automaton
::{Automaton, State}
;
4 /// This is marked as `inline(always)` specifically because it supports
5 /// multiple modes of searching. Namely, the 'earliest' boolean getting inlined
6 /// permits eliminating a few crucial branches.
8 pub fn find_fwd
<A
: Automaton
+ ?Sized
>(
14 ) -> Result
<Option
<usize>, MatchError
> {
15 assert
!(start
<= end
);
16 assert
!(start
<= bytes
.len());
17 assert
!(end
<= bytes
.len());
19 let (mut state
, mut last_match
) = init_fwd(dfa
, bytes
, start
, end
)?
;
20 if earliest
&& last_match
.is_some() {
21 return Ok(last_match
);
27 state
= dfa
.next_state(state
, byte
);
29 if dfa
.is_special_state(state
) {
30 if dfa
.is_dead_state(state
) {
31 return Ok(last_match
);
32 } else if dfa
.is_quit_state(state
) {
33 return Err(MatchError
::Quit { byte, offset: at - 1 }
);
35 last_match
= Some(at
- dfa
.match_offset());
37 return Ok(last_match
);
43 let mut p = bytes.as_ptr().add(start);
44 while p < bytes[end..].as_ptr() {
46 state = dfa.next_state_unchecked(state, byte);
48 if dfa.is_special_state(state) {
49 if dfa.is_dead_state(state) {
50 return Ok(last_match);
51 } else if dfa.is_quit_state(state) {
52 return Err(MatchError::Quit {
54 offset: offset(bytes, p) - 1,
57 last_match = Some(offset(bytes, p) - dfa.match_offset());
59 return Ok(last_match);
65 Ok(eof_fwd(dfa
, bytes
, end
, &mut state
)?
.or(last_match
))
68 /// This is marked as `inline(always)` specifically because it supports
69 /// multiple modes of searching. Namely, the 'earliest' boolean getting inlined
70 /// permits eliminating a few crucial branches.
72 pub fn find_rev
<A
: Automaton
+ ?Sized
>(
78 ) -> Result
<Option
<usize>, MatchError
> {
79 assert
!(start
<= end
);
80 assert
!(start
<= bytes
.len());
81 assert
!(end
<= bytes
.len());
83 let (mut state
, mut last_match
) = init_rev(dfa
, bytes
, start
, end
)?
;
84 if earliest
&& last_match
.is_some() {
85 return Ok(last_match
);
92 state
= dfa
.next_state(state
, byte
);
93 if dfa
.is_special_state(state
) {
94 if dfa
.is_dead_state(state
) {
95 return Ok(last_match
);
96 } else if dfa
.is_quit_state(state
) {
97 return Err(MatchError
::Quit { byte, offset: at }
);
99 last_match
= Some(at
+ dfa
.match_offset());
101 return Ok(last_match
);
107 let mut p = bytes.as_ptr().add(end);
108 while p > bytes[start..].as_ptr() {
111 state = dfa.next_state_unchecked(state, byte);
112 if dfa.is_special_state(state) {
113 if dfa.is_dead_state(state) {
114 return Ok(last_match);
115 } else if dfa.is_quit_state(state) {
116 return Err(MatchError::Quit {
118 offset: offset(bytes, p),
121 last_match = Some(offset(bytes, p) + dfa.match_offset());
123 return Ok(last_match);
129 Ok(eof_rev(dfa
, state
, bytes
, start
)?
.or(last_match
))
132 pub fn find_overlapping_fwd
<A
: Automaton
+ ?Sized
>(
137 caller_state
: &mut State
<A
::ID
>,
138 ) -> Result
<Option
<usize>, MatchError
> {
139 assert
!(start
<= end
);
140 assert
!(start
<= bytes
.len());
141 assert
!(end
<= bytes
.len());
143 let (mut state
, mut last_match
) = match caller_state
.as_option() {
144 None
=> init_fwd(dfa
, bytes
, start
, end
)?
,
146 // This is a subtle but critical detail. If the caller provides a
147 // non-None state ID, then it must be the case that the state ID
148 // corresponds to one set by this function. The state ID therefore
149 // corresponds to a match state, a dead state or some other state.
150 // However, "some other" state _only_ occurs when the input has
151 // been exhausted because the only way to stop before then is to
152 // see a match or a dead/quit state.
154 // If the input is exhausted or if it's a dead state, then
155 // incrementing the starting position has no relevance on
156 // correctness, since the loop below will either not execute
157 // at all or will immediately stop due to being in a dead state.
158 // (Once in a dead state it is impossible to leave it.)
160 // Therefore, the only case we need to consider is when
161 // caller_state is a match state. In this case, since our machines
162 // support the ability to delay a match by a certain number of
163 // bytes (to support look-around), it follows that we actually
164 // consumed that many additional bytes on our previous search. When
165 // the caller resumes their search to find subsequent matches, they
166 // will use the ending location from the previous match as the next
167 // starting point, which is `match_offset` bytes PRIOR to where
168 // we scanned to on the previous search. Therefore, we need to
169 // compensate by bumping `start` up by `match_offset` bytes.
170 start
+= dfa
.match_offset();
171 // Since match_offset could be any arbitrary value and we use
172 // `start` in pointer arithmetic below, we check that we are still
173 // in bounds. Otherwise, we could materialize a pointer that is
174 // more than one past the end point of `bytes`, which is UB.
181 if last_match
.is_some() {
182 caller_state
.set(state
);
183 return Ok(last_match
);
188 let byte
= bytes
[at
];
189 state
= dfa
.next_state(state
, byte
);
191 if dfa
.is_special_state(state
) {
192 caller_state
.set(state
);
193 if dfa
.is_dead_state(state
) {
195 } else if dfa
.is_quit_state(state
) {
196 return Err(MatchError
::Quit { byte, offset: at - 1 }
);
198 return Ok(Some(at
- dfa
.match_offset()));
203 // SAFETY: Other than the normal pointer arithmetic happening here, a
204 // unique aspect of safety for this function is the fact that the caller
205 // can provide the state that the search routine will start with. If this
206 // state were invalid, it would be possible to incorrectly index the
207 // transition table. We however prevent this from happening by guaranteeing
208 // that State is valid. Namely, callers cannot mutate a State. All they can
209 // do is create a "start" state or otherwise reuse a previously set state.
210 // Since callers can't mutate a state, it follows that a previously set
211 // state can only be retrieved by crate internal functions. Therefore, our
212 // use of it is safe since this code will only ever set the provided state
215 let mut p = bytes.as_ptr().add(start);
216 while p < bytes[end..].as_ptr() {
218 state = dfa.next_state_unchecked(state, byte);
220 if dfa.is_special_state(state) {
221 caller_state.set(state);
222 return if dfa.is_dead_state(state) {
224 } else if dfa.is_quit_state(state) {
225 Err(MatchError::Quit { byte, offset: offset(bytes, p) - 1 })
227 Ok(Some(offset(bytes, p) - dfa.match_offset()))
234 let result
= eof_fwd(dfa
, bytes
, end
, &mut state
);
235 caller_state
.set(state
);
239 fn init_fwd
<A
: Automaton
+ ?Sized
>(
244 ) -> Result
<(A
::ID
, Option
<usize>), MatchError
> {
245 let state
= dfa
.start_state_forward(bytes
, start
, end
);
246 if dfa
.is_match_state(state
) {
247 Ok((state
, Some(start
- dfa
.match_offset())))
253 fn init_rev
<A
: Automaton
+ ?Sized
>(
258 ) -> Result
<(A
::ID
, Option
<usize>), MatchError
> {
259 let state
= dfa
.start_state_reverse(bytes
, start
, end
);
260 if dfa
.is_match_state(state
) {
261 Ok((state
, Some(end
+ dfa
.match_offset())))
267 fn eof_fwd
<A
: Automaton
+ ?Sized
>(
272 ) -> Result
<Option
<usize>, MatchError
> {
273 match bytes
.get(end
) {
275 *state
= dfa
.next_state(*state
, b
);
276 if dfa
.is_match_state(*state
) {
283 *state
= dfa
.next_eof_state(*state
);
284 if dfa
.is_match_state(*state
) {
285 Ok(Some(bytes
.len()))
293 fn eof_rev
<A
: Automaton
+ ?Sized
>(
298 ) -> Result
<Option
<usize>, MatchError
> {
300 if dfa
.is_match_state(dfa
.next_state(state
, bytes
[start
- 1])) {
306 if dfa
.is_match_state(dfa
.next_eof_state(state
)) {
314 /// Returns the distance between the given pointer and the start of `bytes`.
315 /// This assumes that the given pointer points to somewhere in the `bytes`
317 fn offset(bytes
: &[u8], p
: *const u8) -> usize {
318 debug_assert
!(bytes
.as_ptr() <= p
);
319 debug_assert
!(bytes
[bytes
.len()..].as_ptr() >= p
);
320 ((p
as isize) - (bytes
.as_ptr() as isize)) as usize