1 /// Slot is a single saved capture location. Note that there are two slots for
2 /// every capture in a regular expression (one slot each for the start and end
4 pub type Slot
= Option
<usize>;
6 /// Locations represents the offsets of each capturing group in a regex for
9 /// Unlike `Captures`, a `Locations` value only stores offsets.
11 #[derive(Clone, Debug)]
12 pub struct Locations(Vec
<Slot
>);
15 /// Returns the start and end positions of the Nth capture group. Returns
16 /// `None` if `i` is not a valid capture group or if the capture group did
17 /// not match anything. The positions returned are *always* byte indices
18 /// with respect to the original string matched.
19 pub fn pos(&self, i
: usize) -> Option
<(usize, usize)> {
20 let (s
, e
) = (i
* 2, i
* 2 + 1);
21 match (self.0.get(s
), self.0.get(e
)) {
22 (Some(&Some(s
)), Some(&Some(e
))) => Some((s
, e
)),
27 /// Creates an iterator of all the capture group positions in order of
28 /// appearance in the regular expression. Positions are byte indices
29 /// in terms of the original string matched.
30 pub fn iter(&self) -> SubCapturesPosIter
{
31 SubCapturesPosIter { idx: 0, locs: self }
34 /// Returns the total number of capturing groups.
36 /// This is always at least `1` since every regex has at least `1`
37 /// capturing group that corresponds to the entire match.
38 pub fn len(&self) -> usize {
42 /// Return the individual slots as a slice.
43 pub(crate) fn as_slots(&mut self) -> &mut [Slot
] {
48 /// An iterator over capture group positions for a particular match of a
49 /// regular expression.
51 /// Positions are byte indices in terms of the original string matched.
53 /// `'c` is the lifetime of the captures.
54 pub struct SubCapturesPosIter
<'c
> {
59 impl<'c
> Iterator
for SubCapturesPosIter
<'c
> {
60 type Item
= Option
<(usize, usize)>;
62 fn next(&mut self) -> Option
<Option
<(usize, usize)>> {
63 if self.idx
>= self.locs
.len() {
66 let x
= match self.locs
.pos(self.idx
) {
68 Some((s
, e
)) => Some(Some((s
, e
))),
75 /// `RegularExpression` describes types that can implement regex searching.
77 /// This trait is my attempt at reducing code duplication and to standardize
78 /// the internal API. Specific duplication that is avoided are the `find`
79 /// and `capture` iterators, which are slightly tricky.
81 /// It's not clear whether this trait is worth it, and it also isn't
82 /// clear whether it's useful as a public trait or not. Methods like
83 /// `next_after_empty` reak of bad design, but the rest of the methods seem
84 /// somewhat reasonable. One particular thing this trait would expose would be
85 /// the ability to start the search of a regex anywhere in a haystack, which
86 /// isn't possible in the current public API.
87 pub trait RegularExpression
: Sized
{
88 /// The type of the haystack.
91 /// The number of capture slots in the compiled regular expression. This is
92 /// always two times the number of capture groups (two slots per group).
93 fn slots_len(&self) -> usize;
95 /// Allocates fresh space for all capturing groups in this regex.
96 fn locations(&self) -> Locations
{
97 Locations(vec
![None
; self.slots_len()])
100 /// Returns the position of the next character after `i`.
102 /// For example, a haystack with type `&[u8]` probably returns `i+1`,
103 /// whereas a haystack with type `&str` probably returns `i` plus the
104 /// length of the next UTF-8 sequence.
105 fn next_after_empty(&self, text
: &Self::Text
, i
: usize) -> usize;
107 /// Returns the location of the shortest match.
108 fn shortest_match_at(
114 /// Returns whether the regex matches the text given.
115 fn is_match_at(&self, text
: &Self::Text
, start
: usize) -> bool
;
117 /// Returns the leftmost-first match location if one exists.
122 ) -> Option
<(usize, usize)>;
124 /// Returns the leftmost-first match location if one exists, and also
125 /// fills in any matching capture slot locations.
128 locs
: &mut Locations
,
131 ) -> Option
<(usize, usize)>;
133 /// Returns an iterator over all non-overlapping successive leftmost-first
135 fn find_iter(self, text
: &Self::Text
) -> Matches
<Self> {
136 Matches { re: self, text: text, last_end: 0, last_match: None }
139 /// Returns an iterator over all non-overlapping successive leftmost-first
140 /// matches with captures.
141 fn captures_iter(self, text
: &Self::Text
) -> CaptureMatches
<Self> {
142 CaptureMatches(self.find_iter(text
))
146 /// An iterator over all non-overlapping successive leftmost-first matches.
147 pub struct Matches
<'t
, R
>
149 R
: RegularExpression
,
155 last_match
: Option
<usize>,
158 impl<'t
, R
> Matches
<'t
, R
>
160 R
: RegularExpression
,
163 /// Return the text being searched.
164 pub fn text(&self) -> &'t R
::Text
{
168 /// Return the underlying regex.
169 pub fn regex(&self) -> &R
{
174 impl<'t
, R
> Iterator
for Matches
<'t
, R
>
176 R
: RegularExpression
,
177 R
::Text
: 't
+ AsRef
<[u8]>,
179 type Item
= (usize, usize);
181 fn next(&mut self) -> Option
<(usize, usize)> {
182 if self.last_end
> self.text
.as_ref().len() {
185 let (s
, e
) = match self.re
.find_at(self.text
, self.last_end
) {
187 Some((s
, e
)) => (s
, e
),
190 // This is an empty match. To ensure we make progress, start
191 // the next search at the smallest possible starting position
192 // of the next match following this one.
193 self.last_end
= self.re
.next_after_empty(self.text
, e
);
194 // Don't accept empty matches immediately following a match.
195 // Just move on to the next match.
196 if Some(e
) == self.last_match
{
202 self.last_match
= Some(e
);
207 /// An iterator over all non-overlapping successive leftmost-first matches with
209 pub struct CaptureMatches
<'t
, R
>(Matches
<'t
, R
>)
211 R
: RegularExpression
,
214 impl<'t
, R
> CaptureMatches
<'t
, R
>
216 R
: RegularExpression
,
219 /// Return the text being searched.
220 pub fn text(&self) -> &'t R
::Text
{
224 /// Return the underlying regex.
225 pub fn regex(&self) -> &R
{
230 impl<'t
, R
> Iterator
for CaptureMatches
<'t
, R
>
232 R
: RegularExpression
,
233 R
::Text
: 't
+ AsRef
<[u8]>,
235 type Item
= Locations
;
237 fn next(&mut self) -> Option
<Locations
> {
238 if self.0.last_end
> self.0.text
.as_ref().len() {
241 let mut locs
= self.0.re
.locations();
242 let (s
, e
) = match self.0.re
.captures_read_at(
248 Some((s
, e
)) => (s
, e
),
251 self.0.last_end
= self.0.re
.next_after_empty(self.0.text
, e
);
252 if Some(e
) == self.0.last_match
{
258 self.0.last_match
= Some(e
);