]> git.proxmox.com Git - rustc.git/blob - vendor/regex-automata/src/dfa/search_unsafe.rs
New upstream version 1.68.2+dfsg1
[rustc.git] / vendor / regex-automata / src / dfa / search_unsafe.rs
1 use crate::dfa::automaton::{Automaton, State};
2 use crate::MatchError;
3
4 /// This is marked as `inline(always)` specifically because it supports
5 /// multiple modes of searching. Namely, the 'earliest' boolean getting inlined
6 /// permits eliminating a few crucial branches.
7 #[inline(always)]
8 pub fn find_fwd<A: Automaton + ?Sized>(
9 dfa: &A,
10 bytes: &[u8],
11 start: usize,
12 end: usize,
13 earliest: bool,
14 ) -> Result<Option<usize>, MatchError> {
15 assert!(start <= end);
16 assert!(start <= bytes.len());
17 assert!(end <= bytes.len());
18
19 let (mut state, mut last_match) = init_fwd(dfa, bytes, start, end)?;
20 if earliest && last_match.is_some() {
21 return Ok(last_match);
22 }
23
24 let mut at = start;
25 while at < end {
26 let byte = bytes[at];
27 state = dfa.next_state(state, byte);
28 at += 1;
29 if dfa.is_special_state(state) {
30 if dfa.is_dead_state(state) {
31 return Ok(last_match);
32 } else if dfa.is_quit_state(state) {
33 return Err(MatchError::Quit { byte, offset: at - 1 });
34 }
35 last_match = Some(at - dfa.match_offset());
36 if earliest {
37 return Ok(last_match);
38 }
39 }
40 }
41 /*
42 unsafe {
43 let mut p = bytes.as_ptr().add(start);
44 while p < bytes[end..].as_ptr() {
45 let byte = *p;
46 state = dfa.next_state_unchecked(state, byte);
47 p = p.add(1);
48 if dfa.is_special_state(state) {
49 if dfa.is_dead_state(state) {
50 return Ok(last_match);
51 } else if dfa.is_quit_state(state) {
52 return Err(MatchError::Quit {
53 byte,
54 offset: offset(bytes, p) - 1,
55 });
56 }
57 last_match = Some(offset(bytes, p) - dfa.match_offset());
58 if earliest {
59 return Ok(last_match);
60 }
61 }
62 }
63 }
64 */
65 Ok(eof_fwd(dfa, bytes, end, &mut state)?.or(last_match))
66 }
67
68 /// This is marked as `inline(always)` specifically because it supports
69 /// multiple modes of searching. Namely, the 'earliest' boolean getting inlined
70 /// permits eliminating a few crucial branches.
71 #[inline(always)]
72 pub fn find_rev<A: Automaton + ?Sized>(
73 dfa: &A,
74 bytes: &[u8],
75 start: usize,
76 end: usize,
77 earliest: bool,
78 ) -> Result<Option<usize>, MatchError> {
79 assert!(start <= end);
80 assert!(start <= bytes.len());
81 assert!(end <= bytes.len());
82
83 let (mut state, mut last_match) = init_rev(dfa, bytes, start, end)?;
84 if earliest && last_match.is_some() {
85 return Ok(last_match);
86 }
87
88 let mut at = end;
89 while at > start {
90 at -= 1;
91 let byte = bytes[at];
92 state = dfa.next_state(state, byte);
93 if dfa.is_special_state(state) {
94 if dfa.is_dead_state(state) {
95 return Ok(last_match);
96 } else if dfa.is_quit_state(state) {
97 return Err(MatchError::Quit { byte, offset: at });
98 }
99 last_match = Some(at + dfa.match_offset());
100 if earliest {
101 return Ok(last_match);
102 }
103 }
104 }
105 /*
106 unsafe {
107 let mut p = bytes.as_ptr().add(end);
108 while p > bytes[start..].as_ptr() {
109 p = p.sub(1);
110 let byte = *p;
111 state = dfa.next_state_unchecked(state, byte);
112 if dfa.is_special_state(state) {
113 if dfa.is_dead_state(state) {
114 return Ok(last_match);
115 } else if dfa.is_quit_state(state) {
116 return Err(MatchError::Quit {
117 byte,
118 offset: offset(bytes, p),
119 });
120 }
121 last_match = Some(offset(bytes, p) + dfa.match_offset());
122 if earliest {
123 return Ok(last_match);
124 }
125 }
126 }
127 }
128 */
129 Ok(eof_rev(dfa, state, bytes, start)?.or(last_match))
130 }
131
132 pub fn find_overlapping_fwd<A: Automaton + ?Sized>(
133 dfa: &A,
134 bytes: &[u8],
135 mut start: usize,
136 end: usize,
137 caller_state: &mut State<A::ID>,
138 ) -> Result<Option<usize>, MatchError> {
139 assert!(start <= end);
140 assert!(start <= bytes.len());
141 assert!(end <= bytes.len());
142
143 let (mut state, mut last_match) = match caller_state.as_option() {
144 None => init_fwd(dfa, bytes, start, end)?,
145 Some(id) => {
146 // This is a subtle but critical detail. If the caller provides a
147 // non-None state ID, then it must be the case that the state ID
148 // corresponds to one set by this function. The state ID therefore
149 // corresponds to a match state, a dead state or some other state.
150 // However, "some other" state _only_ occurs when the input has
151 // been exhausted because the only way to stop before then is to
152 // see a match or a dead/quit state.
153 //
154 // If the input is exhausted or if it's a dead state, then
155 // incrementing the starting position has no relevance on
156 // correctness, since the loop below will either not execute
157 // at all or will immediately stop due to being in a dead state.
158 // (Once in a dead state it is impossible to leave it.)
159 //
160 // Therefore, the only case we need to consider is when
161 // caller_state is a match state. In this case, since our machines
162 // support the ability to delay a match by a certain number of
163 // bytes (to support look-around), it follows that we actually
164 // consumed that many additional bytes on our previous search. When
165 // the caller resumes their search to find subsequent matches, they
166 // will use the ending location from the previous match as the next
167 // starting point, which is `match_offset` bytes PRIOR to where
168 // we scanned to on the previous search. Therefore, we need to
169 // compensate by bumping `start` up by `match_offset` bytes.
170 start += dfa.match_offset();
171 // Since match_offset could be any arbitrary value and we use
172 // `start` in pointer arithmetic below, we check that we are still
173 // in bounds. Otherwise, we could materialize a pointer that is
174 // more than one past the end point of `bytes`, which is UB.
175 if start > end {
176 return Ok(None);
177 }
178 (id, None)
179 }
180 };
181 if last_match.is_some() {
182 caller_state.set(state);
183 return Ok(last_match);
184 }
185
186 let mut at = start;
187 while at < end {
188 let byte = bytes[at];
189 state = dfa.next_state(state, byte);
190 at += 1;
191 if dfa.is_special_state(state) {
192 caller_state.set(state);
193 if dfa.is_dead_state(state) {
194 return Ok(None);
195 } else if dfa.is_quit_state(state) {
196 return Err(MatchError::Quit { byte, offset: at - 1 });
197 } else {
198 return Ok(Some(at - dfa.match_offset()));
199 }
200 }
201 }
202 /*
203 // SAFETY: Other than the normal pointer arithmetic happening here, a
204 // unique aspect of safety for this function is the fact that the caller
205 // can provide the state that the search routine will start with. If this
206 // state were invalid, it would be possible to incorrectly index the
207 // transition table. We however prevent this from happening by guaranteeing
208 // that State is valid. Namely, callers cannot mutate a State. All they can
209 // do is create a "start" state or otherwise reuse a previously set state.
210 // Since callers can't mutate a state, it follows that a previously set
211 // state can only be retrieved by crate internal functions. Therefore, our
212 // use of it is safe since this code will only ever set the provided state
213 // to a valid state.
214 unsafe {
215 let mut p = bytes.as_ptr().add(start);
216 while p < bytes[end..].as_ptr() {
217 let byte = *p;
218 state = dfa.next_state_unchecked(state, byte);
219 p = p.add(1);
220 if dfa.is_special_state(state) {
221 caller_state.set(state);
222 return if dfa.is_dead_state(state) {
223 Ok(None)
224 } else if dfa.is_quit_state(state) {
225 Err(MatchError::Quit { byte, offset: offset(bytes, p) - 1 })
226 } else {
227 Ok(Some(offset(bytes, p) - dfa.match_offset()))
228 };
229 }
230 }
231 }
232 */
233
234 let result = eof_fwd(dfa, bytes, end, &mut state);
235 caller_state.set(state);
236 result
237 }
238
239 fn init_fwd<A: Automaton + ?Sized>(
240 dfa: &A,
241 bytes: &[u8],
242 start: usize,
243 end: usize,
244 ) -> Result<(A::ID, Option<usize>), MatchError> {
245 let state = dfa.start_state_forward(bytes, start, end);
246 if dfa.is_match_state(state) {
247 Ok((state, Some(start - dfa.match_offset())))
248 } else {
249 Ok((state, None))
250 }
251 }
252
253 fn init_rev<A: Automaton + ?Sized>(
254 dfa: &A,
255 bytes: &[u8],
256 start: usize,
257 end: usize,
258 ) -> Result<(A::ID, Option<usize>), MatchError> {
259 let state = dfa.start_state_reverse(bytes, start, end);
260 if dfa.is_match_state(state) {
261 Ok((state, Some(end + dfa.match_offset())))
262 } else {
263 Ok((state, None))
264 }
265 }
266
267 fn eof_fwd<A: Automaton + ?Sized>(
268 dfa: &A,
269 bytes: &[u8],
270 end: usize,
271 state: &mut A::ID,
272 ) -> Result<Option<usize>, MatchError> {
273 match bytes.get(end) {
274 Some(&b) => {
275 *state = dfa.next_state(*state, b);
276 if dfa.is_match_state(*state) {
277 Ok(Some(end))
278 } else {
279 Ok(None)
280 }
281 }
282 None => {
283 *state = dfa.next_eof_state(*state);
284 if dfa.is_match_state(*state) {
285 Ok(Some(bytes.len()))
286 } else {
287 Ok(None)
288 }
289 }
290 }
291 }
292
293 fn eof_rev<A: Automaton + ?Sized>(
294 dfa: &A,
295 state: A::ID,
296 bytes: &[u8],
297 start: usize,
298 ) -> Result<Option<usize>, MatchError> {
299 if start > 0 {
300 if dfa.is_match_state(dfa.next_state(state, bytes[start - 1])) {
301 Ok(Some(start))
302 } else {
303 Ok(None)
304 }
305 } else {
306 if dfa.is_match_state(dfa.next_eof_state(state)) {
307 Ok(Some(0))
308 } else {
309 Ok(None)
310 }
311 }
312 }
313
314 /// Returns the distance between the given pointer and the start of `bytes`.
315 /// This assumes that the given pointer points to somewhere in the `bytes`
316 /// slice given.
317 fn offset(bytes: &[u8], p: *const u8) -> usize {
318 debug_assert!(bytes.as_ptr() <= p);
319 debug_assert!(bytes[bytes.len()..].as_ptr() >= p);
320 ((p as isize) - (bytes.as_ptr() as isize)) as usize
321 }