]> git.proxmox.com Git - rustc.git/blob - src/libcore/str/pattern.rs
8bdbab55211d8cce2dc735d1d3fd670450098d15
[rustc.git] / src / libcore / str / pattern.rs
1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! The string Pattern API.
12 //!
13 //! For more details, see the traits `Pattern`, `Searcher`,
14 //! `ReverseSearcher` and `DoubleEndedSearcher`.
15
16 #![unstable(feature = "pattern",
17 reason = "API not fully fleshed out and ready to be stabilized")]
18
19 use prelude::*;
20
21 // Pattern
22
23 /// A string pattern.
24 ///
25 /// A `Pattern<'a>` expresses that the implementing type
26 /// can be used as a string pattern for searching in a `&'a str`.
27 ///
28 /// For example, both `'a'` and `"aa"` are patterns that
29 /// would match at index `1` in the string `"baaaab"`.
30 ///
31 /// The trait itself acts as a builder for an associated
32 /// `Searcher` type, which does the actual work of finding
33 /// occurrences of the pattern in a string.
34 pub trait Pattern<'a>: Sized {
35 /// Associated searcher for this pattern
36 type Searcher: Searcher<'a>;
37
38 /// Constructs the associated searcher from
39 /// `self` and the `haystack` to search in.
40 fn into_searcher(self, haystack: &'a str) -> Self::Searcher;
41
42 /// Checks whether the pattern matches anywhere in the haystack
43 #[inline]
44 fn is_contained_in(self, haystack: &'a str) -> bool {
45 self.into_searcher(haystack).next_match().is_some()
46 }
47
48 /// Checks whether the pattern matches at the front of the haystack
49 #[inline]
50 fn is_prefix_of(self, haystack: &'a str) -> bool {
51 match self.into_searcher(haystack).next() {
52 SearchStep::Match(0, _) => true,
53 _ => false,
54 }
55 }
56
57 /// Checks whether the pattern matches at the back of the haystack
58 #[inline]
59 fn is_suffix_of(self, haystack: &'a str) -> bool
60 where Self::Searcher: ReverseSearcher<'a>
61 {
62 match self.into_searcher(haystack).next_back() {
63 SearchStep::Match(_, j) if haystack.len() == j => true,
64 _ => false,
65 }
66 }
67 }
68
69 // Searcher
70
71 /// Result of calling `Searcher::next()` or `ReverseSearcher::next_back()`.
72 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
73 pub enum SearchStep {
74 /// Expresses that a match of the pattern has been found at
75 /// `haystack[a..b]`.
76 Match(usize, usize),
77 /// Expresses that `haystack[a..b]` has been rejected as a possible match
78 /// of the pattern.
79 ///
80 /// Note that there might be more than one `Reject` between two `Match`es,
81 /// there is no requirement for them to be combined into one.
82 Reject(usize, usize),
83 /// Expresses that every byte of the haystack has been visted, ending
84 /// the iteration.
85 Done
86 }
87
88 /// A searcher for a string pattern.
89 ///
90 /// This trait provides methods for searching for non-overlapping
91 /// matches of a pattern starting from the front (left) of a string.
92 ///
93 /// It will be implemented by associated `Searcher`
94 /// types of the `Pattern` trait.
95 ///
96 /// The trait is marked unsafe because the indices returned by the
97 /// `next()` methods are required to lie on valid utf8 boundaries in
98 /// the haystack. This enables consumers of this trait to
99 /// slice the haystack without additional runtime checks.
100 pub unsafe trait Searcher<'a> {
101 /// Getter for the underlaying string to be searched in
102 ///
103 /// Will always return the same `&str`
104 fn haystack(&self) -> &'a str;
105
106 /// Performs the next search step starting from the front.
107 ///
108 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
109 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
110 /// pattern, even partially.
111 /// - Returns `Done` if every byte of the haystack has been visited
112 ///
113 /// The stream of `Match` and `Reject` values up to a `Done`
114 /// will contain index ranges that are adjacent, non-overlapping,
115 /// covering the whole haystack, and laying on utf8 boundaries.
116 ///
117 /// A `Match` result needs to contain the whole matched pattern,
118 /// however `Reject` results may be split up into arbitrary
119 /// many adjacent fragments. Both ranges may have zero length.
120 ///
121 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
122 /// might produce the stream
123 /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
124 fn next(&mut self) -> SearchStep;
125
126 /// Find the next `Match` result. See `next()`
127 #[inline]
128 fn next_match(&mut self) -> Option<(usize, usize)> {
129 loop {
130 match self.next() {
131 SearchStep::Match(a, b) => return Some((a, b)),
132 SearchStep::Done => return None,
133 _ => continue,
134 }
135 }
136 }
137
138 /// Find the next `Reject` result. See `next()`
139 #[inline]
140 fn next_reject(&mut self) -> Option<(usize, usize)> {
141 loop {
142 match self.next() {
143 SearchStep::Reject(a, b) => return Some((a, b)),
144 SearchStep::Done => return None,
145 _ => continue,
146 }
147 }
148 }
149 }
150
151 /// A reverse searcher for a string pattern.
152 ///
153 /// This trait provides methods for searching for non-overlapping
154 /// matches of a pattern starting from the back (right) of a string.
155 ///
156 /// It will be implemented by associated `Searcher`
157 /// types of the `Pattern` trait if the pattern supports searching
158 /// for it from the back.
159 ///
160 /// The index ranges returned by this trait are not required
161 /// to exactly match those of the forward search in reverse.
162 ///
163 /// For the reason why this trait is marked unsafe, see them
164 /// parent trait `Searcher`.
165 pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
166 /// Performs the next search step starting from the back.
167 ///
168 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
169 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
170 /// pattern, even partially.
171 /// - Returns `Done` if every byte of the haystack has been visited
172 ///
173 /// The stream of `Match` and `Reject` values up to a `Done`
174 /// will contain index ranges that are adjacent, non-overlapping,
175 /// covering the whole haystack, and laying on utf8 boundaries.
176 ///
177 /// A `Match` result needs to contain the whole matched pattern,
178 /// however `Reject` results may be split up into arbitrary
179 /// many adjacent fragments. Both ranges may have zero length.
180 ///
181 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
182 /// might produce the stream
183 /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`
184 fn next_back(&mut self) -> SearchStep;
185
186 /// Find the next `Match` result. See `next_back()`
187 #[inline]
188 fn next_match_back(&mut self) -> Option<(usize, usize)>{
189 loop {
190 match self.next_back() {
191 SearchStep::Match(a, b) => return Some((a, b)),
192 SearchStep::Done => return None,
193 _ => continue,
194 }
195 }
196 }
197
198 /// Find the next `Reject` result. See `next_back()`
199 #[inline]
200 fn next_reject_back(&mut self) -> Option<(usize, usize)>{
201 loop {
202 match self.next_back() {
203 SearchStep::Reject(a, b) => return Some((a, b)),
204 SearchStep::Done => return None,
205 _ => continue,
206 }
207 }
208 }
209 }
210
211 /// A marker trait to express that a `ReverseSearcher`
212 /// can be used for a `DoubleEndedIterator` implementation.
213 ///
214 /// For this, the impl of `Searcher` and `ReverseSearcher` need
215 /// to follow these conditions:
216 ///
217 /// - All results of `next()` need to be identical
218 /// to the results of `next_back()` in reverse order.
219 /// - `next()` and `next_back()` need to behave as
220 /// the two ends of a range of values, that is they
221 /// can not "walk past each other".
222 ///
223 /// # Examples
224 ///
225 /// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
226 /// `char` only requires looking at one at a time, which behaves the same
227 /// from both ends.
228 ///
229 /// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
230 /// the pattern `"aa"` in the haystack `"aaa"` matches as either
231 /// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
232 pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
233
234 /////////////////////////////////////////////////////////////////////////////
235 // Impl for a CharEq wrapper
236 /////////////////////////////////////////////////////////////////////////////
237
238 #[doc(hidden)]
239 trait CharEq {
240 fn matches(&mut self, char) -> bool;
241 fn only_ascii(&self) -> bool;
242 }
243
244 impl CharEq for char {
245 #[inline]
246 fn matches(&mut self, c: char) -> bool { *self == c }
247
248 #[inline]
249 fn only_ascii(&self) -> bool { (*self as u32) < 128 }
250 }
251
252 impl<F> CharEq for F where F: FnMut(char) -> bool {
253 #[inline]
254 fn matches(&mut self, c: char) -> bool { (*self)(c) }
255
256 #[inline]
257 fn only_ascii(&self) -> bool { false }
258 }
259
260 impl<'a> CharEq for &'a [char] {
261 #[inline]
262 fn matches(&mut self, c: char) -> bool {
263 self.iter().any(|&m| { let mut m = m; m.matches(c) })
264 }
265
266 #[inline]
267 fn only_ascii(&self) -> bool {
268 self.iter().all(|m| m.only_ascii())
269 }
270 }
271
272 struct CharEqPattern<C: CharEq>(C);
273
274 #[derive(Clone)]
275 struct CharEqSearcher<'a, C: CharEq> {
276 char_eq: C,
277 haystack: &'a str,
278 char_indices: super::CharIndices<'a>,
279 #[allow(dead_code)]
280 ascii_only: bool,
281 }
282
283 impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
284 type Searcher = CharEqSearcher<'a, C>;
285
286 #[inline]
287 fn into_searcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
288 CharEqSearcher {
289 ascii_only: self.0.only_ascii(),
290 haystack: haystack,
291 char_eq: self.0,
292 char_indices: haystack.char_indices(),
293 }
294 }
295 }
296
297 unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
298 #[inline]
299 fn haystack(&self) -> &'a str {
300 self.haystack
301 }
302
303 #[inline]
304 fn next(&mut self) -> SearchStep {
305 let s = &mut self.char_indices;
306 // Compare lengths of the internal byte slice iterator
307 // to find length of current char
308 let (pre_len, _) = s.iter.iter.size_hint();
309 if let Some((i, c)) = s.next() {
310 let (len, _) = s.iter.iter.size_hint();
311 let char_len = pre_len - len;
312 if self.char_eq.matches(c) {
313 return SearchStep::Match(i, i + char_len);
314 } else {
315 return SearchStep::Reject(i, i + char_len);
316 }
317 }
318 SearchStep::Done
319 }
320 }
321
322 unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
323 #[inline]
324 fn next_back(&mut self) -> SearchStep {
325 let s = &mut self.char_indices;
326 // Compare lengths of the internal byte slice iterator
327 // to find length of current char
328 let (pre_len, _) = s.iter.iter.size_hint();
329 if let Some((i, c)) = s.next_back() {
330 let (len, _) = s.iter.iter.size_hint();
331 let char_len = pre_len - len;
332 if self.char_eq.matches(c) {
333 return SearchStep::Match(i, i + char_len);
334 } else {
335 return SearchStep::Reject(i, i + char_len);
336 }
337 }
338 SearchStep::Done
339 }
340 }
341
342 impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
343
344 /////////////////////////////////////////////////////////////////////////////
345 // Impl for &str
346 /////////////////////////////////////////////////////////////////////////////
347
348 // Todo: Optimize the naive implementation here
349
350 /// Associated type for `<&str as Pattern<'a>>::Searcher`.
351 #[derive(Clone)]
352 pub struct StrSearcher<'a, 'b> {
353 haystack: &'a str,
354 needle: &'b str,
355 start: usize,
356 end: usize,
357 state: State,
358 }
359
360 #[derive(Clone, PartialEq)]
361 enum State { Done, NotDone, Reject(usize, usize) }
362 impl State {
363 #[inline] fn done(&self) -> bool { *self == State::Done }
364 #[inline] fn take(&mut self) -> State { ::mem::replace(self, State::NotDone) }
365 }
366
367 /// Non-allocating substring search.
368 ///
369 /// Will handle the pattern `""` as returning empty matches at each utf8
370 /// boundary.
371 impl<'a, 'b> Pattern<'a> for &'b str {
372 type Searcher = StrSearcher<'a, 'b>;
373
374 #[inline]
375 fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
376 StrSearcher {
377 haystack: haystack,
378 needle: self,
379 start: 0,
380 end: haystack.len(),
381 state: State::NotDone,
382 }
383 }
384 }
385
386 unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
387 #[inline]
388 fn haystack(&self) -> &'a str {
389 self.haystack
390 }
391
392 #[inline]
393 fn next(&mut self) -> SearchStep {
394 str_search_step(self,
395 |m: &mut StrSearcher| {
396 // Forward step for empty needle
397 let current_start = m.start;
398 if !m.state.done() {
399 m.start = m.haystack.char_range_at(current_start).next;
400 m.state = State::Reject(current_start, m.start);
401 }
402 SearchStep::Match(current_start, current_start)
403 },
404 |m: &mut StrSearcher| {
405 // Forward step for nonempty needle
406 let current_start = m.start;
407 // Compare byte window because this might break utf8 boundaries
408 let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
409 if possible_match == m.needle.as_bytes() {
410 m.start += m.needle.len();
411 SearchStep::Match(current_start, m.start)
412 } else {
413 // Skip a char
414 let haystack_suffix = &m.haystack[m.start..];
415 m.start += haystack_suffix.chars().next().unwrap().len_utf8();
416 SearchStep::Reject(current_start, m.start)
417 }
418 })
419 }
420 }
421
422 unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
423 #[inline]
424 fn next_back(&mut self) -> SearchStep {
425 str_search_step(self,
426 |m: &mut StrSearcher| {
427 // Backward step for empty needle
428 let current_end = m.end;
429 if !m.state.done() {
430 m.end = m.haystack.char_range_at_reverse(current_end).next;
431 m.state = State::Reject(m.end, current_end);
432 }
433 SearchStep::Match(current_end, current_end)
434 },
435 |m: &mut StrSearcher| {
436 // Backward step for nonempty needle
437 let current_end = m.end;
438 // Compare byte window because this might break utf8 boundaries
439 let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
440 if possible_match == m.needle.as_bytes() {
441 m.end -= m.needle.len();
442 SearchStep::Match(m.end, current_end)
443 } else {
444 // Skip a char
445 let haystack_prefix = &m.haystack[..m.end];
446 m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
447 SearchStep::Reject(m.end, current_end)
448 }
449 })
450 }
451 }
452
453 // Helper function for encapsulating the common control flow
454 // of doing a search step from the front or doing a search step from the back
455 fn str_search_step<F, G>(mut m: &mut StrSearcher,
456 empty_needle_step: F,
457 nonempty_needle_step: G) -> SearchStep
458 where F: FnOnce(&mut StrSearcher) -> SearchStep,
459 G: FnOnce(&mut StrSearcher) -> SearchStep
460 {
461 if m.state.done() {
462 SearchStep::Done
463 } else if m.needle.is_empty() && m.start <= m.end {
464 // Case for needle == ""
465 if let State::Reject(a, b) = m.state.take() {
466 SearchStep::Reject(a, b)
467 } else {
468 if m.start == m.end {
469 m.state = State::Done;
470 }
471 empty_needle_step(&mut m)
472 }
473 } else if m.start + m.needle.len() <= m.end {
474 // Case for needle != ""
475 nonempty_needle_step(&mut m)
476 } else if m.start < m.end {
477 // Remaining slice shorter than needle, reject it
478 m.state = State::Done;
479 SearchStep::Reject(m.start, m.end)
480 } else {
481 m.state = State::Done;
482 SearchStep::Done
483 }
484 }
485
486 /////////////////////////////////////////////////////////////////////////////
487
488 macro_rules! pattern_methods {
489 ($t:ty, $pmap:expr, $smap:expr) => {
490 type Searcher = $t;
491
492 #[inline]
493 fn into_searcher(self, haystack: &'a str) -> $t {
494 ($smap)(($pmap)(self).into_searcher(haystack))
495 }
496
497 #[inline]
498 fn is_contained_in(self, haystack: &'a str) -> bool {
499 ($pmap)(self).is_contained_in(haystack)
500 }
501
502 #[inline]
503 fn is_prefix_of(self, haystack: &'a str) -> bool {
504 ($pmap)(self).is_prefix_of(haystack)
505 }
506
507 #[inline]
508 fn is_suffix_of(self, haystack: &'a str) -> bool
509 where $t: ReverseSearcher<'a>
510 {
511 ($pmap)(self).is_suffix_of(haystack)
512 }
513 }
514 }
515
516 macro_rules! searcher_methods {
517 (forward) => {
518 #[inline]
519 fn haystack(&self) -> &'a str {
520 self.0.haystack()
521 }
522 #[inline]
523 fn next(&mut self) -> SearchStep {
524 self.0.next()
525 }
526 #[inline]
527 fn next_match(&mut self) -> Option<(usize, usize)> {
528 self.0.next_match()
529 }
530 #[inline]
531 fn next_reject(&mut self) -> Option<(usize, usize)> {
532 self.0.next_reject()
533 }
534 };
535 (reverse) => {
536 #[inline]
537 fn next_back(&mut self) -> SearchStep {
538 self.0.next_back()
539 }
540 #[inline]
541 fn next_match_back(&mut self) -> Option<(usize, usize)> {
542 self.0.next_match_back()
543 }
544 #[inline]
545 fn next_reject_back(&mut self) -> Option<(usize, usize)> {
546 self.0.next_reject_back()
547 }
548 }
549 }
550
551 /////////////////////////////////////////////////////////////////////////////
552 // Impl for char
553 /////////////////////////////////////////////////////////////////////////////
554
555 /// Associated type for `<char as Pattern<'a>>::Searcher`.
556 #[derive(Clone)]
557 pub struct CharSearcher<'a>(<CharEqPattern<char> as Pattern<'a>>::Searcher);
558
559 unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
560 searcher_methods!(forward);
561 }
562
563 unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
564 searcher_methods!(reverse);
565 }
566
567 impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
568
569 /// Searches for chars that are equal to a given char
570 impl<'a> Pattern<'a> for char {
571 pattern_methods!(CharSearcher<'a>, CharEqPattern, CharSearcher);
572 }
573
574 /////////////////////////////////////////////////////////////////////////////
575 // Impl for &[char]
576 /////////////////////////////////////////////////////////////////////////////
577
578 // Todo: Change / Remove due to ambiguity in meaning.
579
580 /// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
581 #[derive(Clone)]
582 pub struct CharSliceSearcher<'a, 'b>(<CharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
583
584 unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
585 searcher_methods!(forward);
586 }
587
588 unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> {
589 searcher_methods!(reverse);
590 }
591
592 impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
593
594 /// Searches for chars that are equal to any of the chars in the array
595 impl<'a, 'b> Pattern<'a> for &'b [char] {
596 pattern_methods!(CharSliceSearcher<'a, 'b>, CharEqPattern, CharSliceSearcher);
597 }
598
599 /////////////////////////////////////////////////////////////////////////////
600 // Impl for F: FnMut(char) -> bool
601 /////////////////////////////////////////////////////////////////////////////
602
603 /// Associated type for `<F as Pattern<'a>>::Searcher`.
604 #[derive(Clone)]
605 pub struct CharPredicateSearcher<'a, F>(<CharEqPattern<F> as Pattern<'a>>::Searcher)
606 where F: FnMut(char) -> bool;
607
608 unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F>
609 where F: FnMut(char) -> bool
610 {
611 searcher_methods!(forward);
612 }
613
614 unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F>
615 where F: FnMut(char) -> bool
616 {
617 searcher_methods!(reverse);
618 }
619
620 impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F>
621 where F: FnMut(char) -> bool {}
622
623 /// Searches for chars that match the given predicate
624 impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
625 pattern_methods!(CharPredicateSearcher<'a, F>, CharEqPattern, CharPredicateSearcher);
626 }
627
628 /////////////////////////////////////////////////////////////////////////////
629 // Impl for &&str
630 /////////////////////////////////////////////////////////////////////////////
631
632 /// Delegates to the `&str` impl.
633 impl<'a, 'b> Pattern<'a> for &'b &'b str {
634 pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s);
635 }