1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! The string Pattern API.
13 //! For more details, see the traits `Pattern`, `Searcher`,
14 //! `ReverseSearcher` and `DoubleEndedSearcher`.
16 #![unstable(feature = "pattern",
17 reason
= "API not fully fleshed out and ready to be stabilized")]
25 /// A `Pattern<'a>` expresses that the implementing type
26 /// can be used as a string pattern for searching in a `&'a str`.
28 /// For example, both `'a'` and `"aa"` are patterns that
29 /// would match at index `1` in the string `"baaaab"`.
31 /// The trait itself acts as a builder for an associated
32 /// `Searcher` type, which does the actual work of finding
33 /// occurrences of the pattern in a string.
34 pub trait Pattern
<'a
>: Sized
{
35 /// Associated searcher for this pattern
36 type Searcher
: Searcher
<'a
>;
38 /// Constructs the associated searcher from
39 /// `self` and the `haystack` to search in.
40 fn into_searcher(self, haystack
: &'a
str) -> Self::Searcher
;
42 /// Checks whether the pattern matches anywhere in the haystack
44 fn is_contained_in(self, haystack
: &'a
str) -> bool
{
45 self.into_searcher(haystack
).next_match().is_some()
48 /// Checks whether the pattern matches at the front of the haystack
50 fn is_prefix_of(self, haystack
: &'a
str) -> bool
{
51 match self.into_searcher(haystack
).next() {
52 SearchStep
::Match(0, _
) => true,
57 /// Checks whether the pattern matches at the back of the haystack
59 fn is_suffix_of(self, haystack
: &'a
str) -> bool
60 where Self::Searcher
: ReverseSearcher
<'a
>
62 match self.into_searcher(haystack
).next_back() {
63 SearchStep
::Match(_
, j
) if haystack
.len() == j
=> true,
71 /// Result of calling `Searcher::next()` or `ReverseSearcher::next_back()`.
72 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
74 /// Expresses that a match of the pattern has been found at
77 /// Expresses that `haystack[a..b]` has been rejected as a possible match
80 /// Note that there might be more than one `Reject` between two `Match`es,
81 /// there is no requirement for them to be combined into one.
83 /// Expresses that every byte of the haystack has been visted, ending
88 /// A searcher for a string pattern.
90 /// This trait provides methods for searching for non-overlapping
91 /// matches of a pattern starting from the front (left) of a string.
93 /// It will be implemented by associated `Searcher`
94 /// types of the `Pattern` trait.
96 /// The trait is marked unsafe because the indices returned by the
97 /// `next()` methods are required to lie on valid utf8 boundaries in
98 /// the haystack. This enables consumers of this trait to
99 /// slice the haystack without additional runtime checks.
100 pub unsafe trait Searcher
<'a
> {
101 /// Getter for the underlaying string to be searched in
103 /// Will always return the same `&str`
104 fn haystack(&self) -> &'a
str;
106 /// Performs the next search step starting from the front.
108 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
109 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
110 /// pattern, even partially.
111 /// - Returns `Done` if every byte of the haystack has been visited
113 /// The stream of `Match` and `Reject` values up to a `Done`
114 /// will contain index ranges that are adjacent, non-overlapping,
115 /// covering the whole haystack, and laying on utf8 boundaries.
117 /// A `Match` result needs to contain the whole matched pattern,
118 /// however `Reject` results may be split up into arbitrary
119 /// many adjacent fragments. Both ranges may have zero length.
121 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
122 /// might produce the stream
123 /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
124 fn next(&mut self) -> SearchStep
;
126 /// Find the next `Match` result. See `next()`
128 fn next_match(&mut self) -> Option
<(usize, usize)> {
131 SearchStep
::Match(a
, b
) => return Some((a
, b
)),
132 SearchStep
::Done
=> return None
,
138 /// Find the next `Reject` result. See `next()`
140 fn next_reject(&mut self) -> Option
<(usize, usize)> {
143 SearchStep
::Reject(a
, b
) => return Some((a
, b
)),
144 SearchStep
::Done
=> return None
,
151 /// A reverse searcher for a string pattern.
153 /// This trait provides methods for searching for non-overlapping
154 /// matches of a pattern starting from the back (right) of a string.
156 /// It will be implemented by associated `Searcher`
157 /// types of the `Pattern` trait if the pattern supports searching
158 /// for it from the back.
160 /// The index ranges returned by this trait are not required
161 /// to exactly match those of the forward search in reverse.
163 /// For the reason why this trait is marked unsafe, see them
164 /// parent trait `Searcher`.
165 pub unsafe trait ReverseSearcher
<'a
>: Searcher
<'a
> {
166 /// Performs the next search step starting from the back.
168 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
169 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
170 /// pattern, even partially.
171 /// - Returns `Done` if every byte of the haystack has been visited
173 /// The stream of `Match` and `Reject` values up to a `Done`
174 /// will contain index ranges that are adjacent, non-overlapping,
175 /// covering the whole haystack, and laying on utf8 boundaries.
177 /// A `Match` result needs to contain the whole matched pattern,
178 /// however `Reject` results may be split up into arbitrary
179 /// many adjacent fragments. Both ranges may have zero length.
181 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
182 /// might produce the stream
183 /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`
184 fn next_back(&mut self) -> SearchStep
;
186 /// Find the next `Match` result. See `next_back()`
188 fn next_match_back(&mut self) -> Option
<(usize, usize)>{
190 match self.next_back() {
191 SearchStep
::Match(a
, b
) => return Some((a
, b
)),
192 SearchStep
::Done
=> return None
,
198 /// Find the next `Reject` result. See `next_back()`
200 fn next_reject_back(&mut self) -> Option
<(usize, usize)>{
202 match self.next_back() {
203 SearchStep
::Reject(a
, b
) => return Some((a
, b
)),
204 SearchStep
::Done
=> return None
,
211 /// A marker trait to express that a `ReverseSearcher`
212 /// can be used for a `DoubleEndedIterator` implementation.
214 /// For this, the impl of `Searcher` and `ReverseSearcher` need
215 /// to follow these conditions:
217 /// - All results of `next()` need to be identical
218 /// to the results of `next_back()` in reverse order.
219 /// - `next()` and `next_back()` need to behave as
220 /// the two ends of a range of values, that is they
221 /// can not "walk past each other".
225 /// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
226 /// `char` only requires looking at one at a time, which behaves the same
229 /// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
230 /// the pattern `"aa"` in the haystack `"aaa"` matches as either
231 /// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
232 pub trait DoubleEndedSearcher
<'a
>: ReverseSearcher
<'a
> {}
234 /////////////////////////////////////////////////////////////////////////////
235 // Impl for a CharEq wrapper
236 /////////////////////////////////////////////////////////////////////////////
240 fn matches(&mut self, char) -> bool
;
241 fn only_ascii(&self) -> bool
;
244 impl CharEq
for char {
246 fn matches(&mut self, c
: char) -> bool { *self == c }
249 fn only_ascii(&self) -> bool { (*self as u32) < 128 }
252 impl<F
> CharEq
for F
where F
: FnMut(char) -> bool
{
254 fn matches(&mut self, c
: char) -> bool { (*self)(c) }
257 fn only_ascii(&self) -> bool { false }
260 impl<'a
> CharEq
for &'a
[char] {
262 fn matches(&mut self, c
: char) -> bool
{
263 self.iter().any(|&m
| { let mut m = m; m.matches(c) }
)
267 fn only_ascii(&self) -> bool
{
268 self.iter().all(|m
| m
.only_ascii())
272 struct CharEqPattern
<C
: CharEq
>(C
);
275 struct CharEqSearcher
<'a
, C
: CharEq
> {
278 char_indices
: super::CharIndices
<'a
>,
283 impl<'a
, C
: CharEq
> Pattern
<'a
> for CharEqPattern
<C
> {
284 type Searcher
= CharEqSearcher
<'a
, C
>;
287 fn into_searcher(self, haystack
: &'a
str) -> CharEqSearcher
<'a
, C
> {
289 ascii_only
: self.0.only_ascii(),
292 char_indices
: haystack
.char_indices(),
297 unsafe impl<'a
, C
: CharEq
> Searcher
<'a
> for CharEqSearcher
<'a
, C
> {
299 fn haystack(&self) -> &'a
str {
304 fn next(&mut self) -> SearchStep
{
305 let s
= &mut self.char_indices
;
306 // Compare lengths of the internal byte slice iterator
307 // to find length of current char
308 let (pre_len
, _
) = s
.iter
.iter
.size_hint();
309 if let Some((i
, c
)) = s
.next() {
310 let (len
, _
) = s
.iter
.iter
.size_hint();
311 let char_len
= pre_len
- len
;
312 if self.char_eq
.matches(c
) {
313 return SearchStep
::Match(i
, i
+ char_len
);
315 return SearchStep
::Reject(i
, i
+ char_len
);
322 unsafe impl<'a
, C
: CharEq
> ReverseSearcher
<'a
> for CharEqSearcher
<'a
, C
> {
324 fn next_back(&mut self) -> SearchStep
{
325 let s
= &mut self.char_indices
;
326 // Compare lengths of the internal byte slice iterator
327 // to find length of current char
328 let (pre_len
, _
) = s
.iter
.iter
.size_hint();
329 if let Some((i
, c
)) = s
.next_back() {
330 let (len
, _
) = s
.iter
.iter
.size_hint();
331 let char_len
= pre_len
- len
;
332 if self.char_eq
.matches(c
) {
333 return SearchStep
::Match(i
, i
+ char_len
);
335 return SearchStep
::Reject(i
, i
+ char_len
);
342 impl<'a
, C
: CharEq
> DoubleEndedSearcher
<'a
> for CharEqSearcher
<'a
, C
> {}
344 /////////////////////////////////////////////////////////////////////////////
346 /////////////////////////////////////////////////////////////////////////////
348 // Todo: Optimize the naive implementation here
350 /// Associated type for `<&str as Pattern<'a>>::Searcher`.
352 pub struct StrSearcher
<'a
, 'b
> {
360 #[derive(Clone, PartialEq)]
361 enum State { Done, NotDone, Reject(usize, usize) }
363 #[inline] fn done(&self) -> bool { *self == State::Done }
364 #[inline] fn take(&mut self) -> State { ::mem::replace(self, State::NotDone) }
367 /// Non-allocating substring search.
369 /// Will handle the pattern `""` as returning empty matches at each utf8
371 impl<'a
, 'b
> Pattern
<'a
> for &'b
str {
372 type Searcher
= StrSearcher
<'a
, 'b
>;
375 fn into_searcher(self, haystack
: &'a
str) -> StrSearcher
<'a
, 'b
> {
381 state
: State
::NotDone
,
386 unsafe impl<'a
, 'b
> Searcher
<'a
> for StrSearcher
<'a
, 'b
> {
388 fn haystack(&self) -> &'a
str {
393 fn next(&mut self) -> SearchStep
{
394 str_search_step(self,
395 |m
: &mut StrSearcher
| {
396 // Forward step for empty needle
397 let current_start
= m
.start
;
399 m
.start
= m
.haystack
.char_range_at(current_start
).next
;
400 m
.state
= State
::Reject(current_start
, m
.start
);
402 SearchStep
::Match(current_start
, current_start
)
404 |m
: &mut StrSearcher
| {
405 // Forward step for nonempty needle
406 let current_start
= m
.start
;
407 // Compare byte window because this might break utf8 boundaries
408 let possible_match
= &m
.haystack
.as_bytes()[m
.start
.. m
.start
+ m
.needle
.len()];
409 if possible_match
== m
.needle
.as_bytes() {
410 m
.start
+= m
.needle
.len();
411 SearchStep
::Match(current_start
, m
.start
)
414 let haystack_suffix
= &m
.haystack
[m
.start
..];
415 m
.start
+= haystack_suffix
.chars().next().unwrap().len_utf8();
416 SearchStep
::Reject(current_start
, m
.start
)
422 unsafe impl<'a
, 'b
> ReverseSearcher
<'a
> for StrSearcher
<'a
, 'b
> {
424 fn next_back(&mut self) -> SearchStep
{
425 str_search_step(self,
426 |m
: &mut StrSearcher
| {
427 // Backward step for empty needle
428 let current_end
= m
.end
;
430 m
.end
= m
.haystack
.char_range_at_reverse(current_end
).next
;
431 m
.state
= State
::Reject(m
.end
, current_end
);
433 SearchStep
::Match(current_end
, current_end
)
435 |m
: &mut StrSearcher
| {
436 // Backward step for nonempty needle
437 let current_end
= m
.end
;
438 // Compare byte window because this might break utf8 boundaries
439 let possible_match
= &m
.haystack
.as_bytes()[m
.end
- m
.needle
.len() .. m
.end
];
440 if possible_match
== m
.needle
.as_bytes() {
441 m
.end
-= m
.needle
.len();
442 SearchStep
::Match(m
.end
, current_end
)
445 let haystack_prefix
= &m
.haystack
[..m
.end
];
446 m
.end
-= haystack_prefix
.chars().rev().next().unwrap().len_utf8();
447 SearchStep
::Reject(m
.end
, current_end
)
453 // Helper function for encapsulating the common control flow
454 // of doing a search step from the front or doing a search step from the back
455 fn str_search_step
<F
, G
>(mut m
: &mut StrSearcher
,
456 empty_needle_step
: F
,
457 nonempty_needle_step
: G
) -> SearchStep
458 where F
: FnOnce(&mut StrSearcher
) -> SearchStep
,
459 G
: FnOnce(&mut StrSearcher
) -> SearchStep
463 } else if m
.needle
.is_empty() && m
.start
<= m
.end
{
464 // Case for needle == ""
465 if let State
::Reject(a
, b
) = m
.state
.take() {
466 SearchStep
::Reject(a
, b
)
468 if m
.start
== m
.end
{
469 m
.state
= State
::Done
;
471 empty_needle_step(&mut m
)
473 } else if m
.start
+ m
.needle
.len() <= m
.end
{
474 // Case for needle != ""
475 nonempty_needle_step(&mut m
)
476 } else if m
.start
< m
.end
{
477 // Remaining slice shorter than needle, reject it
478 m
.state
= State
::Done
;
479 SearchStep
::Reject(m
.start
, m
.end
)
481 m
.state
= State
::Done
;
486 /////////////////////////////////////////////////////////////////////////////
488 macro_rules
! pattern_methods
{
489 ($t
:ty
, $pmap
:expr
, $smap
:expr
) => {
493 fn into_searcher(self, haystack
: &'a
str) -> $t
{
494 ($smap
)(($pmap
)(self).into_searcher(haystack
))
498 fn is_contained_in(self, haystack
: &'a
str) -> bool
{
499 ($pmap
)(self).is_contained_in(haystack
)
503 fn is_prefix_of(self, haystack
: &'a
str) -> bool
{
504 ($pmap
)(self).is_prefix_of(haystack
)
508 fn is_suffix_of(self, haystack
: &'a
str) -> bool
509 where $t
: ReverseSearcher
<'a
>
511 ($pmap
)(self).is_suffix_of(haystack
)
516 macro_rules
! searcher_methods
{
519 fn haystack(&self) -> &'a
str {
523 fn next(&mut self) -> SearchStep
{
527 fn next_match(&mut self) -> Option
<(usize, usize)> {
531 fn next_reject(&mut self) -> Option
<(usize, usize)> {
537 fn next_back(&mut self) -> SearchStep
{
541 fn next_match_back(&mut self) -> Option
<(usize, usize)> {
542 self.0.next_match_back()
545 fn next_reject_back(&mut self) -> Option
<(usize, usize)> {
546 self.0.next_reject_back()
551 /////////////////////////////////////////////////////////////////////////////
553 /////////////////////////////////////////////////////////////////////////////
555 /// Associated type for `<char as Pattern<'a>>::Searcher`.
557 pub struct CharSearcher
<'a
>(<CharEqPattern
<char> as Pattern
<'a
>>::Searcher
);
559 unsafe impl<'a
> Searcher
<'a
> for CharSearcher
<'a
> {
560 searcher_methods
!(forward
);
563 unsafe impl<'a
> ReverseSearcher
<'a
> for CharSearcher
<'a
> {
564 searcher_methods
!(reverse
);
567 impl<'a
> DoubleEndedSearcher
<'a
> for CharSearcher
<'a
> {}
569 /// Searches for chars that are equal to a given char
570 impl<'a
> Pattern
<'a
> for char {
571 pattern_methods
!(CharSearcher
<'a
>, CharEqPattern
, CharSearcher
);
574 /////////////////////////////////////////////////////////////////////////////
576 /////////////////////////////////////////////////////////////////////////////
578 // Todo: Change / Remove due to ambiguity in meaning.
580 /// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
582 pub struct CharSliceSearcher
<'a
, 'b
>(<CharEqPattern
<&'b
[char]> as Pattern
<'a
>>::Searcher
);
584 unsafe impl<'a
, 'b
> Searcher
<'a
> for CharSliceSearcher
<'a
, 'b
> {
585 searcher_methods
!(forward
);
588 unsafe impl<'a
, 'b
> ReverseSearcher
<'a
> for CharSliceSearcher
<'a
, 'b
> {
589 searcher_methods
!(reverse
);
592 impl<'a
, 'b
> DoubleEndedSearcher
<'a
> for CharSliceSearcher
<'a
, 'b
> {}
594 /// Searches for chars that are equal to any of the chars in the array
595 impl<'a
, 'b
> Pattern
<'a
> for &'b
[char] {
596 pattern_methods
!(CharSliceSearcher
<'a
, 'b
>, CharEqPattern
, CharSliceSearcher
);
599 /////////////////////////////////////////////////////////////////////////////
600 // Impl for F: FnMut(char) -> bool
601 /////////////////////////////////////////////////////////////////////////////
603 /// Associated type for `<F as Pattern<'a>>::Searcher`.
605 pub struct CharPredicateSearcher
<'a
, F
>(<CharEqPattern
<F
> as Pattern
<'a
>>::Searcher
)
606 where F
: FnMut(char) -> bool
;
608 unsafe impl<'a
, F
> Searcher
<'a
> for CharPredicateSearcher
<'a
, F
>
609 where F
: FnMut(char) -> bool
611 searcher_methods
!(forward
);
614 unsafe impl<'a
, F
> ReverseSearcher
<'a
> for CharPredicateSearcher
<'a
, F
>
615 where F
: FnMut(char) -> bool
617 searcher_methods
!(reverse
);
620 impl<'a
, F
> DoubleEndedSearcher
<'a
> for CharPredicateSearcher
<'a
, F
>
621 where F
: FnMut(char) -> bool {}
623 /// Searches for chars that match the given predicate
624 impl<'a
, F
> Pattern
<'a
> for F
where F
: FnMut(char) -> bool
{
625 pattern_methods
!(CharPredicateSearcher
<'a
, F
>, CharEqPattern
, CharPredicateSearcher
);
628 /////////////////////////////////////////////////////////////////////////////
630 /////////////////////////////////////////////////////////////////////////////
632 /// Delegates to the `&str` impl.
633 impl<'a
, 'b
> Pattern
<'a
> for &'b
&'b
str {
634 pattern_methods
!(StrSearcher
<'a
, 'b
>, |&s
| s
, |s
| s
);