]> git.proxmox.com Git - rustc.git/blob - vendor/regex-0.2.11/src/re_trait.rs
New upstream version 1.33.0+dfsg1
[rustc.git] / vendor / regex-0.2.11 / src / re_trait.rs
1 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 /// Slot is a single saved capture location. Note that there are two slots for
12 /// every capture in a regular expression (one slot each for the start and end
13 /// of the capture).
14 pub type Slot = Option<usize>;
15
16 /// Locations represents the offsets of each capturing group in a regex for
17 /// a single match.
18 ///
19 /// Unlike `Captures`, a `Locations` value only stores offsets.
20 #[doc(hidden)]
21 pub struct Locations(Vec<Slot>);
22
23 impl Locations {
24 /// Returns the start and end positions of the Nth capture group. Returns
25 /// `None` if `i` is not a valid capture group or if the capture group did
26 /// not match anything. The positions returned are *always* byte indices
27 /// with respect to the original string matched.
28 pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
29 let (s, e) = (i * 2, i * 2 + 1);
30 match (self.0.get(s), self.0.get(e)) {
31 (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
32 _ => None,
33 }
34 }
35
36 /// Creates an iterator of all the capture group positions in order of
37 /// appearance in the regular expression. Positions are byte indices
38 /// in terms of the original string matched.
39 pub fn iter(&self) -> SubCapturesPosIter {
40 SubCapturesPosIter { idx: 0, locs: self }
41 }
42
43 /// Returns the total number of capturing groups.
44 ///
45 /// This is always at least `1` since every regex has at least `1`
46 /// capturing group that corresponds to the entire match.
47 pub fn len(&self) -> usize {
48 self.0.len() / 2
49 }
50 }
51
52 /// This is a hack to make Locations -> &mut [Slot] be available internally
53 /// without exposing it in the public API.
54 pub fn as_slots(locs: &mut Locations) -> &mut [Slot] {
55 &mut locs.0
56 }
57
58 /// An iterator over capture group positions for a particular match of a
59 /// regular expression.
60 ///
61 /// Positions are byte indices in terms of the original string matched.
62 ///
63 /// `'c` is the lifetime of the captures.
64 pub struct SubCapturesPosIter<'c> {
65 idx: usize,
66 locs: &'c Locations,
67 }
68
69 impl<'c> Iterator for SubCapturesPosIter<'c> {
70 type Item = Option<(usize, usize)>;
71
72 fn next(&mut self) -> Option<Option<(usize, usize)>> {
73 if self.idx >= self.locs.len() {
74 return None;
75 }
76 let x = match self.locs.pos(self.idx) {
77 None => Some(None),
78 Some((s, e)) => {
79 Some(Some((s, e)))
80 }
81 };
82 self.idx += 1;
83 x
84 }
85 }
86
87 /// `RegularExpression` describes types that can implement regex searching.
88 ///
89 /// This trait is my attempt at reducing code duplication and to standardize
90 /// the internal API. Specific duplication that is avoided are the `find`
91 /// and `capture` iterators, which are slightly tricky.
92 ///
93 /// It's not clear whether this trait is worth it, and it also isn't
94 /// clear whether it's useful as a public trait or not. Methods like
95 /// `next_after_empty` reak of bad design, but the rest of the methods seem
96 /// somewhat reasonable. One particular thing this trait would expose would be
97 /// the ability to start the search of a regex anywhere in a haystack, which
98 /// isn't possible in the current public API.
99 pub trait RegularExpression: Sized {
100 /// The type of the haystack.
101 type Text: ?Sized;
102
103 /// The number of capture slots in the compiled regular expression. This is
104 /// always two times the number of capture groups (two slots per group).
105 fn slots_len(&self) -> usize;
106
107 /// Allocates fresh space for all capturing groups in this regex.
108 fn locations(&self) -> Locations {
109 Locations(vec![None; self.slots_len()])
110 }
111
112 /// Returns the position of the next character after `i`.
113 ///
114 /// For example, a haystack with type `&[u8]` probably returns `i+1`,
115 /// whereas a haystack with type `&str` probably returns `i` plus the
116 /// length of the next UTF-8 sequence.
117 fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
118
119 /// Returns the location of the shortest match.
120 fn shortest_match_at(
121 &self,
122 text: &Self::Text,
123 start: usize,
124 ) -> Option<usize>;
125
126 /// Returns whether the regex matches the text given.
127 fn is_match_at(
128 &self,
129 text: &Self::Text,
130 start: usize,
131 ) -> bool;
132
133 /// Returns the leftmost-first match location if one exists.
134 fn find_at(
135 &self,
136 text: &Self::Text,
137 start: usize,
138 ) -> Option<(usize, usize)>;
139
140 /// Returns the leftmost-first match location if one exists, and also
141 /// fills in any matching capture slot locations.
142 fn read_captures_at(
143 &self,
144 locs: &mut Locations,
145 text: &Self::Text,
146 start: usize,
147 ) -> Option<(usize, usize)>;
148
149 /// Returns an iterator over all non-overlapping successive leftmost-first
150 /// matches.
151 fn find_iter (
152 self,
153 text: &Self::Text,
154 ) -> Matches<Self> {
155 Matches {
156 re: self,
157 text: text,
158 last_end: 0,
159 last_match: None,
160 }
161 }
162
163 /// Returns an iterator over all non-overlapping successive leftmost-first
164 /// matches with captures.
165 fn captures_iter(
166 self,
167 text: &Self::Text,
168 ) -> CaptureMatches<Self> {
169 CaptureMatches(self.find_iter(text))
170 }
171 }
172
173 /// An iterator over all non-overlapping successive leftmost-first matches.
174 pub struct Matches<'t, R> where R: RegularExpression, R::Text: 't {
175 re: R,
176 text: &'t R::Text,
177 last_end: usize,
178 last_match: Option<usize>,
179 }
180
181 impl<'t, R> Matches<'t, R> where R: RegularExpression, R::Text: 't {
182 /// Return the text being searched.
183 pub fn text(&self) -> &'t R::Text {
184 self.text
185 }
186
187 /// Return the underlying regex.
188 pub fn regex(&self) -> &R {
189 &self.re
190 }
191 }
192
193 impl<'t, R> Iterator for Matches<'t, R>
194 where R: RegularExpression, R::Text: 't + AsRef<[u8]> {
195 type Item = (usize, usize);
196
197 fn next(&mut self) -> Option<(usize, usize)> {
198 if self.last_end > self.text.as_ref().len() {
199 return None;
200 }
201 let (s, e) = match self.re.find_at(self.text, self.last_end) {
202 None => return None,
203 Some((s, e)) => (s, e),
204 };
205 if s == e {
206 // This is an empty match. To ensure we make progress, start
207 // the next search at the smallest possible starting position
208 // of the next match following this one.
209 self.last_end = self.re.next_after_empty(self.text, e);
210 // Don't accept empty matches immediately following a match.
211 // Just move on to the next match.
212 if Some(e) == self.last_match {
213 return self.next();
214 }
215 } else {
216 self.last_end = e;
217 }
218 self.last_match = Some(e);
219 Some((s, e))
220 }
221 }
222
223 /// An iterator over all non-overlapping successive leftmost-first matches with
224 /// captures.
225 pub struct CaptureMatches<'t, R>(Matches<'t, R>)
226 where R: RegularExpression, R::Text: 't;
227
228 impl<'t, R> CaptureMatches<'t, R> where R: RegularExpression, R::Text: 't {
229 /// Return the text being searched.
230 pub fn text(&self) -> &'t R::Text {
231 self.0.text()
232 }
233
234 /// Return the underlying regex.
235 pub fn regex(&self) -> &R {
236 self.0.regex()
237 }
238 }
239
240 impl<'t, R> Iterator for CaptureMatches<'t, R>
241 where R: RegularExpression, R::Text: 't + AsRef<[u8]> {
242 type Item = Locations;
243
244 fn next(&mut self) -> Option<Locations> {
245 if self.0.last_end > self.0.text.as_ref().len() {
246 return None
247 }
248 let mut locs = self.0.re.locations();
249 let (s, e) = match self.0.re.read_captures_at(
250 &mut locs,
251 self.0.text,
252 self.0.last_end,
253 ) {
254 None => return None,
255 Some((s, e)) => (s, e),
256 };
257 if s == e {
258 self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
259 if Some(e) == self.0.last_match {
260 return self.next();
261 }
262 } else {
263 self.0.last_end = e;
264 }
265 self.0.last_match = Some(e);
266 Some(locs)
267 }
268 }