1 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 /// Slot is a single saved capture location. Note that there are two slots for
12 /// every capture in a regular expression (one slot each for the start and end
14 pub type Slot
= Option
<usize>;
16 /// Locations represents the offsets of each capturing group in a regex for
19 /// Unlike `Captures`, a `Locations` value only stores offsets.
21 pub struct Locations(Vec
<Slot
>);
24 /// Returns the start and end positions of the Nth capture group. Returns
25 /// `None` if `i` is not a valid capture group or if the capture group did
26 /// not match anything. The positions returned are *always* byte indices
27 /// with respect to the original string matched.
28 pub fn pos(&self, i
: usize) -> Option
<(usize, usize)> {
29 let (s
, e
) = (i
* 2, i
* 2 + 1);
30 match (self.0.get(s
), self.0.get(e
)) {
31 (Some(&Some(s
)), Some(&Some(e
))) => Some((s
, e
)),
36 /// Creates an iterator of all the capture group positions in order of
37 /// appearance in the regular expression. Positions are byte indices
38 /// in terms of the original string matched.
39 pub fn iter(&self) -> SubCapturesPosIter
{
40 SubCapturesPosIter { idx: 0, locs: self }
43 /// Returns the total number of capturing groups.
45 /// This is always at least `1` since every regex has at least `1`
46 /// capturing group that corresponds to the entire match.
47 pub fn len(&self) -> usize {
52 /// This is a hack to make Locations -> &mut [Slot] be available internally
53 /// without exposing it in the public API.
54 pub fn as_slots(locs
: &mut Locations
) -> &mut [Slot
] {
58 /// An iterator over capture group positions for a particular match of a
59 /// regular expression.
61 /// Positions are byte indices in terms of the original string matched.
63 /// `'c` is the lifetime of the captures.
64 pub struct SubCapturesPosIter
<'c
> {
69 impl<'c
> Iterator
for SubCapturesPosIter
<'c
> {
70 type Item
= Option
<(usize, usize)>;
72 fn next(&mut self) -> Option
<Option
<(usize, usize)>> {
73 if self.idx
>= self.locs
.len() {
76 let x
= match self.locs
.pos(self.idx
) {
87 /// `RegularExpression` describes types that can implement regex searching.
89 /// This trait is my attempt at reducing code duplication and to standardize
90 /// the internal API. Specific duplication that is avoided are the `find`
91 /// and `capture` iterators, which are slightly tricky.
93 /// It's not clear whether this trait is worth it, and it also isn't
94 /// clear whether it's useful as a public trait or not. Methods like
95 /// `next_after_empty` reak of bad design, but the rest of the methods seem
96 /// somewhat reasonable. One particular thing this trait would expose would be
97 /// the ability to start the search of a regex anywhere in a haystack, which
98 /// isn't possible in the current public API.
99 pub trait RegularExpression
: Sized
{
100 /// The type of the haystack.
103 /// The number of capture slots in the compiled regular expression. This is
104 /// always two times the number of capture groups (two slots per group).
105 fn slots_len(&self) -> usize;
107 /// Allocates fresh space for all capturing groups in this regex.
108 fn locations(&self) -> Locations
{
109 Locations(vec
![None
; self.slots_len()])
112 /// Returns the position of the next character after `i`.
114 /// For example, a haystack with type `&[u8]` probably returns `i+1`,
115 /// whereas a haystack with type `&str` probably returns `i` plus the
116 /// length of the next UTF-8 sequence.
117 fn next_after_empty(&self, text
: &Self::Text
, i
: usize) -> usize;
119 /// Returns the location of the shortest match.
120 fn shortest_match_at(
126 /// Returns whether the regex matches the text given.
133 /// Returns the leftmost-first match location if one exists.
138 ) -> Option
<(usize, usize)>;
140 /// Returns the leftmost-first match location if one exists, and also
141 /// fills in any matching capture slot locations.
144 locs
: &mut Locations
,
147 ) -> Option
<(usize, usize)>;
149 /// Returns an iterator over all non-overlapping successive leftmost-first
163 /// Returns an iterator over all non-overlapping successive leftmost-first
164 /// matches with captures.
168 ) -> CaptureMatches
<Self> {
169 CaptureMatches(self.find_iter(text
))
173 /// An iterator over all non-overlapping successive leftmost-first matches.
174 pub struct Matches
<'t
, R
> where R
: RegularExpression
, R
::Text
: 't
{
178 last_match
: Option
<usize>,
181 impl<'t
, R
> Matches
<'t
, R
> where R
: RegularExpression
, R
::Text
: 't
{
182 /// Return the text being searched.
183 pub fn text(&self) -> &'t R
::Text
{
187 /// Return the underlying regex.
188 pub fn regex(&self) -> &R
{
193 impl<'t
, R
> Iterator
for Matches
<'t
, R
>
194 where R
: RegularExpression
, R
::Text
: 't
+ AsRef
<[u8]> {
195 type Item
= (usize, usize);
197 fn next(&mut self) -> Option
<(usize, usize)> {
198 if self.last_end
> self.text
.as_ref().len() {
201 let (s
, e
) = match self.re
.find_at(self.text
, self.last_end
) {
203 Some((s
, e
)) => (s
, e
),
206 // This is an empty match. To ensure we make progress, start
207 // the next search at the smallest possible starting position
208 // of the next match following this one.
209 self.last_end
= self.re
.next_after_empty(self.text
, e
);
210 // Don't accept empty matches immediately following a match.
211 // Just move on to the next match.
212 if Some(e
) == self.last_match
{
218 self.last_match
= Some(e
);
223 /// An iterator over all non-overlapping successive leftmost-first matches with
225 pub struct CaptureMatches
<'t
, R
>(Matches
<'t
, R
>)
226 where R
: RegularExpression
, R
::Text
: 't
;
228 impl<'t
, R
> CaptureMatches
<'t
, R
> where R
: RegularExpression
, R
::Text
: 't
{
229 /// Return the text being searched.
230 pub fn text(&self) -> &'t R
::Text
{
234 /// Return the underlying regex.
235 pub fn regex(&self) -> &R
{
240 impl<'t
, R
> Iterator
for CaptureMatches
<'t
, R
>
241 where R
: RegularExpression
, R
::Text
: 't
+ AsRef
<[u8]> {
242 type Item
= Locations
;
244 fn next(&mut self) -> Option
<Locations
> {
245 if self.0.last_end
> self.0.text
.as_ref().len() {
248 let mut locs
= self.0.re
.locations();
249 let (s
, e
) = match self.0.re
.read_captures_at(
255 Some((s
, e
)) => (s
, e
),
258 self.0.last_end
= self.0.re
.next_after_empty(self.0.text
, e
);
259 if Some(e
) == self.0.last_match
{
265 self.0.last_match
= Some(e
);