]> git.proxmox.com Git - rustc.git/blame - vendor/regex-1.4.3/src/re_trait.rs
New upstream version 1.54.0+dfsg1
[rustc.git] / vendor / regex-1.4.3 / src / re_trait.rs
CommitLineData
5869c6ff
XL
1use std::fmt;
2use std::iter::FusedIterator;
3
8bb4bdeb
XL
4/// Slot is a single saved capture location. Note that there are two slots for
5/// every capture in a regular expression (one slot each for the start and end
6/// of the capture).
7pub type Slot = Option<usize>;
8
9/// Locations represents the offsets of each capturing group in a regex for
10/// a single match.
11///
12/// Unlike `Captures`, a `Locations` value only stores offsets.
13#[doc(hidden)]
b7449926 14#[derive(Clone, Debug)]
8bb4bdeb
XL
15pub struct Locations(Vec<Slot>);
16
17impl Locations {
18 /// Returns the start and end positions of the Nth capture group. Returns
19 /// `None` if `i` is not a valid capture group or if the capture group did
20 /// not match anything. The positions returned are *always* byte indices
21 /// with respect to the original string matched.
22 pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
23 let (s, e) = (i * 2, i * 2 + 1);
24 match (self.0.get(s), self.0.get(e)) {
25 (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
26 _ => None,
27 }
28 }
29
30 /// Creates an iterator of all the capture group positions in order of
31 /// appearance in the regular expression. Positions are byte indices
32 /// in terms of the original string matched.
33 pub fn iter(&self) -> SubCapturesPosIter {
ff7c6d11 34 SubCapturesPosIter { idx: 0, locs: self }
8bb4bdeb
XL
35 }
36
37 /// Returns the total number of capturing groups.
38 ///
39 /// This is always at least `1` since every regex has at least `1`
40 /// capturing group that corresponds to the entire match.
41 pub fn len(&self) -> usize {
42 self.0.len() / 2
43 }
8bb4bdeb 44
b7449926
XL
45 /// Return the individual slots as a slice.
46 pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
47 &mut self.0
48 }
8bb4bdeb
XL
49}
50
51/// An iterator over capture group positions for a particular match of a
52/// regular expression.
53///
54/// Positions are byte indices in terms of the original string matched.
55///
56/// `'c` is the lifetime of the captures.
5869c6ff 57#[derive(Clone, Debug)]
8bb4bdeb
XL
58pub struct SubCapturesPosIter<'c> {
59 idx: usize,
60 locs: &'c Locations,
61}
62
63impl<'c> Iterator for SubCapturesPosIter<'c> {
64 type Item = Option<(usize, usize)>;
65
66 fn next(&mut self) -> Option<Option<(usize, usize)>> {
67 if self.idx >= self.locs.len() {
68 return None;
69 }
70 let x = match self.locs.pos(self.idx) {
71 None => Some(None),
f9f354fc 72 Some((s, e)) => Some(Some((s, e))),
8bb4bdeb
XL
73 };
74 self.idx += 1;
75 x
76 }
77}
78
5869c6ff
XL
79impl<'c> FusedIterator for SubCapturesPosIter<'c> {}
80
ff7c6d11 81/// `RegularExpression` describes types that can implement regex searching.
8bb4bdeb
XL
82///
83/// This trait is my attempt at reducing code duplication and to standardize
84/// the internal API. Specific duplication that is avoided are the `find`
85/// and `capture` iterators, which are slightly tricky.
86///
87/// It's not clear whether this trait is worth it, and it also isn't
88/// clear whether it's useful as a public trait or not. Methods like
89/// `next_after_empty` reak of bad design, but the rest of the methods seem
90/// somewhat reasonable. One particular thing this trait would expose would be
91/// the ability to start the search of a regex anywhere in a haystack, which
92/// isn't possible in the current public API.
5869c6ff 93pub trait RegularExpression: Sized + fmt::Debug {
8bb4bdeb 94 /// The type of the haystack.
5869c6ff 95 type Text: ?Sized + fmt::Debug;
8bb4bdeb
XL
96
97 /// The number of capture slots in the compiled regular expression. This is
98 /// always two times the number of capture groups (two slots per group).
99 fn slots_len(&self) -> usize;
100
101 /// Allocates fresh space for all capturing groups in this regex.
102 fn locations(&self) -> Locations {
103 Locations(vec![None; self.slots_len()])
104 }
105
106 /// Returns the position of the next character after `i`.
107 ///
108 /// For example, a haystack with type `&[u8]` probably returns `i+1`,
109 /// whereas a haystack with type `&str` probably returns `i` plus the
110 /// length of the next UTF-8 sequence.
111 fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
112
113 /// Returns the location of the shortest match.
114 fn shortest_match_at(
115 &self,
116 text: &Self::Text,
117 start: usize,
118 ) -> Option<usize>;
119
120 /// Returns whether the regex matches the text given.
f9f354fc 121 fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;
8bb4bdeb
XL
122
123 /// Returns the leftmost-first match location if one exists.
124 fn find_at(
125 &self,
126 text: &Self::Text,
127 start: usize,
128 ) -> Option<(usize, usize)>;
129
130 /// Returns the leftmost-first match location if one exists, and also
131 /// fills in any matching capture slot locations.
b7449926 132 fn captures_read_at(
8bb4bdeb
XL
133 &self,
134 locs: &mut Locations,
135 text: &Self::Text,
136 start: usize,
137 ) -> Option<(usize, usize)>;
138
139 /// Returns an iterator over all non-overlapping successive leftmost-first
140 /// matches.
f9f354fc
XL
141 fn find_iter(self, text: &Self::Text) -> Matches<Self> {
142 Matches { re: self, text: text, last_end: 0, last_match: None }
8bb4bdeb
XL
143 }
144
145 /// Returns an iterator over all non-overlapping successive leftmost-first
146 /// matches with captures.
f9f354fc 147 fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
8bb4bdeb
XL
148 CaptureMatches(self.find_iter(text))
149 }
150}
151
152/// An iterator over all non-overlapping successive leftmost-first matches.
5869c6ff 153#[derive(Debug)]
f9f354fc
XL
154pub struct Matches<'t, R>
155where
156 R: RegularExpression,
157 R::Text: 't,
158{
8bb4bdeb
XL
159 re: R,
160 text: &'t R::Text,
161 last_end: usize,
162 last_match: Option<usize>,
163}
164
f9f354fc
XL
165impl<'t, R> Matches<'t, R>
166where
167 R: RegularExpression,
168 R::Text: 't,
169{
8bb4bdeb
XL
170 /// Return the text being searched.
171 pub fn text(&self) -> &'t R::Text {
172 self.text
173 }
174
175 /// Return the underlying regex.
176 pub fn regex(&self) -> &R {
177 &self.re
178 }
179}
180
181impl<'t, R> Iterator for Matches<'t, R>
f9f354fc
XL
182where
183 R: RegularExpression,
184 R::Text: 't + AsRef<[u8]>,
185{
8bb4bdeb
XL
186 type Item = (usize, usize);
187
188 fn next(&mut self) -> Option<(usize, usize)> {
189 if self.last_end > self.text.as_ref().len() {
190 return None;
191 }
192 let (s, e) = match self.re.find_at(self.text, self.last_end) {
193 None => return None,
194 Some((s, e)) => (s, e),
195 };
196 if s == e {
197 // This is an empty match. To ensure we make progress, start
198 // the next search at the smallest possible starting position
199 // of the next match following this one.
ff7c6d11 200 self.last_end = self.re.next_after_empty(self.text, e);
8bb4bdeb
XL
201 // Don't accept empty matches immediately following a match.
202 // Just move on to the next match.
203 if Some(e) == self.last_match {
204 return self.next();
205 }
206 } else {
207 self.last_end = e;
208 }
209 self.last_match = Some(e);
210 Some((s, e))
211 }
212}
213
5869c6ff
XL
214impl<'t, R> FusedIterator for Matches<'t, R>
215where
216 R: RegularExpression,
217 R::Text: 't + AsRef<[u8]>,
218{
219}
220
8bb4bdeb
XL
221/// An iterator over all non-overlapping successive leftmost-first matches with
222/// captures.
5869c6ff 223#[derive(Debug)]
8bb4bdeb 224pub struct CaptureMatches<'t, R>(Matches<'t, R>)
f9f354fc
XL
225where
226 R: RegularExpression,
227 R::Text: 't;
8bb4bdeb 228
f9f354fc
XL
229impl<'t, R> CaptureMatches<'t, R>
230where
231 R: RegularExpression,
232 R::Text: 't,
233{
8bb4bdeb
XL
234 /// Return the text being searched.
235 pub fn text(&self) -> &'t R::Text {
236 self.0.text()
237 }
238
239 /// Return the underlying regex.
240 pub fn regex(&self) -> &R {
241 self.0.regex()
242 }
243}
244
245impl<'t, R> Iterator for CaptureMatches<'t, R>
f9f354fc
XL
246where
247 R: RegularExpression,
248 R::Text: 't + AsRef<[u8]>,
249{
8bb4bdeb
XL
250 type Item = Locations;
251
252 fn next(&mut self) -> Option<Locations> {
253 if self.0.last_end > self.0.text.as_ref().len() {
f9f354fc 254 return None;
8bb4bdeb
XL
255 }
256 let mut locs = self.0.re.locations();
b7449926 257 let (s, e) = match self.0.re.captures_read_at(
8bb4bdeb
XL
258 &mut locs,
259 self.0.text,
260 self.0.last_end,
261 ) {
262 None => return None,
263 Some((s, e)) => (s, e),
264 };
265 if s == e {
ff7c6d11 266 self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
8bb4bdeb
XL
267 if Some(e) == self.0.last_match {
268 return self.next();
269 }
270 } else {
271 self.0.last_end = e;
272 }
273 self.0.last_match = Some(e);
274 Some(locs)
275 }
276}
5869c6ff
XL
277
278impl<'t, R> FusedIterator for CaptureMatches<'t, R>
279where
280 R: RegularExpression,
281 R::Text: 't + AsRef<[u8]>,
282{
283}