]> git.proxmox.com Git - cargo.git/blob - vendor/regex/src/re_trait.rs
New upstream version 0.47.0
[cargo.git] / vendor / regex / src / re_trait.rs
1 /// Slot is a single saved capture location. Note that there are two slots for
2 /// every capture in a regular expression (one slot each for the start and end
3 /// of the capture).
4 pub type Slot = Option<usize>;
5
6 /// Locations represents the offsets of each capturing group in a regex for
7 /// a single match.
8 ///
9 /// Unlike `Captures`, a `Locations` value only stores offsets.
10 #[doc(hidden)]
11 #[derive(Clone, Debug)]
12 pub struct Locations(Vec<Slot>);
13
14 impl Locations {
15 /// Returns the start and end positions of the Nth capture group. Returns
16 /// `None` if `i` is not a valid capture group or if the capture group did
17 /// not match anything. The positions returned are *always* byte indices
18 /// with respect to the original string matched.
19 pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
20 let (s, e) = (i * 2, i * 2 + 1);
21 match (self.0.get(s), self.0.get(e)) {
22 (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
23 _ => None,
24 }
25 }
26
27 /// Creates an iterator of all the capture group positions in order of
28 /// appearance in the regular expression. Positions are byte indices
29 /// in terms of the original string matched.
30 pub fn iter(&self) -> SubCapturesPosIter {
31 SubCapturesPosIter { idx: 0, locs: self }
32 }
33
34 /// Returns the total number of capturing groups.
35 ///
36 /// This is always at least `1` since every regex has at least `1`
37 /// capturing group that corresponds to the entire match.
38 pub fn len(&self) -> usize {
39 self.0.len() / 2
40 }
41
42 /// Return the individual slots as a slice.
43 pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
44 &mut self.0
45 }
46 }
47
48 /// An iterator over capture group positions for a particular match of a
49 /// regular expression.
50 ///
51 /// Positions are byte indices in terms of the original string matched.
52 ///
53 /// `'c` is the lifetime of the captures.
54 pub struct SubCapturesPosIter<'c> {
55 idx: usize,
56 locs: &'c Locations,
57 }
58
59 impl<'c> Iterator for SubCapturesPosIter<'c> {
60 type Item = Option<(usize, usize)>;
61
62 fn next(&mut self) -> Option<Option<(usize, usize)>> {
63 if self.idx >= self.locs.len() {
64 return None;
65 }
66 let x = match self.locs.pos(self.idx) {
67 None => Some(None),
68 Some((s, e)) => Some(Some((s, e))),
69 };
70 self.idx += 1;
71 x
72 }
73 }
74
75 /// `RegularExpression` describes types that can implement regex searching.
76 ///
77 /// This trait is my attempt at reducing code duplication and to standardize
78 /// the internal API. Specific duplication that is avoided are the `find`
79 /// and `capture` iterators, which are slightly tricky.
80 ///
81 /// It's not clear whether this trait is worth it, and it also isn't
82 /// clear whether it's useful as a public trait or not. Methods like
83 /// `next_after_empty` reak of bad design, but the rest of the methods seem
84 /// somewhat reasonable. One particular thing this trait would expose would be
85 /// the ability to start the search of a regex anywhere in a haystack, which
86 /// isn't possible in the current public API.
87 pub trait RegularExpression: Sized {
88 /// The type of the haystack.
89 type Text: ?Sized;
90
91 /// The number of capture slots in the compiled regular expression. This is
92 /// always two times the number of capture groups (two slots per group).
93 fn slots_len(&self) -> usize;
94
95 /// Allocates fresh space for all capturing groups in this regex.
96 fn locations(&self) -> Locations {
97 Locations(vec![None; self.slots_len()])
98 }
99
100 /// Returns the position of the next character after `i`.
101 ///
102 /// For example, a haystack with type `&[u8]` probably returns `i+1`,
103 /// whereas a haystack with type `&str` probably returns `i` plus the
104 /// length of the next UTF-8 sequence.
105 fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
106
107 /// Returns the location of the shortest match.
108 fn shortest_match_at(
109 &self,
110 text: &Self::Text,
111 start: usize,
112 ) -> Option<usize>;
113
114 /// Returns whether the regex matches the text given.
115 fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;
116
117 /// Returns the leftmost-first match location if one exists.
118 fn find_at(
119 &self,
120 text: &Self::Text,
121 start: usize,
122 ) -> Option<(usize, usize)>;
123
124 /// Returns the leftmost-first match location if one exists, and also
125 /// fills in any matching capture slot locations.
126 fn captures_read_at(
127 &self,
128 locs: &mut Locations,
129 text: &Self::Text,
130 start: usize,
131 ) -> Option<(usize, usize)>;
132
133 /// Returns an iterator over all non-overlapping successive leftmost-first
134 /// matches.
135 fn find_iter(self, text: &Self::Text) -> Matches<Self> {
136 Matches { re: self, text: text, last_end: 0, last_match: None }
137 }
138
139 /// Returns an iterator over all non-overlapping successive leftmost-first
140 /// matches with captures.
141 fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
142 CaptureMatches(self.find_iter(text))
143 }
144 }
145
146 /// An iterator over all non-overlapping successive leftmost-first matches.
147 pub struct Matches<'t, R>
148 where
149 R: RegularExpression,
150 R::Text: 't,
151 {
152 re: R,
153 text: &'t R::Text,
154 last_end: usize,
155 last_match: Option<usize>,
156 }
157
158 impl<'t, R> Matches<'t, R>
159 where
160 R: RegularExpression,
161 R::Text: 't,
162 {
163 /// Return the text being searched.
164 pub fn text(&self) -> &'t R::Text {
165 self.text
166 }
167
168 /// Return the underlying regex.
169 pub fn regex(&self) -> &R {
170 &self.re
171 }
172 }
173
174 impl<'t, R> Iterator for Matches<'t, R>
175 where
176 R: RegularExpression,
177 R::Text: 't + AsRef<[u8]>,
178 {
179 type Item = (usize, usize);
180
181 fn next(&mut self) -> Option<(usize, usize)> {
182 if self.last_end > self.text.as_ref().len() {
183 return None;
184 }
185 let (s, e) = match self.re.find_at(self.text, self.last_end) {
186 None => return None,
187 Some((s, e)) => (s, e),
188 };
189 if s == e {
190 // This is an empty match. To ensure we make progress, start
191 // the next search at the smallest possible starting position
192 // of the next match following this one.
193 self.last_end = self.re.next_after_empty(self.text, e);
194 // Don't accept empty matches immediately following a match.
195 // Just move on to the next match.
196 if Some(e) == self.last_match {
197 return self.next();
198 }
199 } else {
200 self.last_end = e;
201 }
202 self.last_match = Some(e);
203 Some((s, e))
204 }
205 }
206
207 /// An iterator over all non-overlapping successive leftmost-first matches with
208 /// captures.
209 pub struct CaptureMatches<'t, R>(Matches<'t, R>)
210 where
211 R: RegularExpression,
212 R::Text: 't;
213
214 impl<'t, R> CaptureMatches<'t, R>
215 where
216 R: RegularExpression,
217 R::Text: 't,
218 {
219 /// Return the text being searched.
220 pub fn text(&self) -> &'t R::Text {
221 self.0.text()
222 }
223
224 /// Return the underlying regex.
225 pub fn regex(&self) -> &R {
226 self.0.regex()
227 }
228 }
229
230 impl<'t, R> Iterator for CaptureMatches<'t, R>
231 where
232 R: RegularExpression,
233 R::Text: 't + AsRef<[u8]>,
234 {
235 type Item = Locations;
236
237 fn next(&mut self) -> Option<Locations> {
238 if self.0.last_end > self.0.text.as_ref().len() {
239 return None;
240 }
241 let mut locs = self.0.re.locations();
242 let (s, e) = match self.0.re.captures_read_at(
243 &mut locs,
244 self.0.text,
245 self.0.last_end,
246 ) {
247 None => return None,
248 Some((s, e)) => (s, e),
249 };
250 if s == e {
251 self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
252 if Some(e) == self.0.last_match {
253 return self.next();
254 }
255 } else {
256 self.0.last_end = e;
257 }
258 self.0.last_match = Some(e);
259 Some(locs)
260 }
261 }