vendor/regex-0.2.11/src/re_trait.rs

   1 // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /// Slot is a single saved capture location. Note that there are two slots for
  12 /// every capture in a regular expression (one slot each for the start and end
  13 /// of the capture).
  14 pub type Slot = Option<usize>;
  15
  16 /// Locations represents the offsets of each capturing group in a regex for
  17 /// a single match.
  18 ///
  19 /// Unlike `Captures`, a `Locations` value only stores offsets.
  20 #[doc(hidden)]
  21 pub struct Locations(Vec<Slot>);
  22
  23 impl Locations {
  24     /// Returns the start and end positions of the Nth capture group. Returns
  25     /// `None` if `i` is not a valid capture group or if the capture group did
  26     /// not match anything. The positions returned are *always* byte indices
  27     /// with respect to the original string matched.
  28     pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
  29         let (s, e) = (i * 2, i * 2 + 1);
  30         match (self.0.get(s), self.0.get(e)) {
  31             (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
  32             _ => None,
  33         }
  34     }
  35
  36     /// Creates an iterator of all the capture group positions in order of
  37     /// appearance in the regular expression. Positions are byte indices
  38     /// in terms of the original string matched.
  39     pub fn iter(&self) -> SubCapturesPosIter {
  40         SubCapturesPosIter { idx: 0, locs: self }
  41     }
  42
  43     /// Returns the total number of capturing groups.
  44     ///
  45     /// This is always at least `1` since every regex has at least `1`
  46     /// capturing group that corresponds to the entire match.
  47     pub fn len(&self) -> usize {
  48         self.0.len() / 2
  49     }
  50 }
  51
  52 /// This is a hack to make Locations -> &mut [Slot] be available internally
  53 /// without exposing it in the public API.
  54 pub fn as_slots(locs: &mut Locations) -> &mut [Slot] {
  55     &mut locs.0
  56 }
  57
  58 /// An iterator over capture group positions for a particular match of a
  59 /// regular expression.
  60 ///
  61 /// Positions are byte indices in terms of the original string matched.
  62 ///
  63 /// `'c` is the lifetime of the captures.
  64 pub struct SubCapturesPosIter<'c> {
  65     idx: usize,
  66     locs: &'c Locations,
  67 }
  68
  69 impl<'c> Iterator for SubCapturesPosIter<'c> {
  70     type Item = Option<(usize, usize)>;
  71
  72     fn next(&mut self) -> Option<Option<(usize, usize)>> {
  73         if self.idx >= self.locs.len() {
  74             return None;
  75         }
  76         let x = match self.locs.pos(self.idx) {
  77             None => Some(None),
  78             Some((s, e)) => {
  79                 Some(Some((s, e)))
  80             }
  81         };
  82         self.idx += 1;
  83         x
  84     }
  85 }
  86
  87 /// `RegularExpression` describes types that can implement regex searching.
  88 ///
  89 /// This trait is my attempt at reducing code duplication and to standardize
  90 /// the internal API. Specific duplication that is avoided are the `find`
  91 /// and `capture` iterators, which are slightly tricky.
  92 ///
  93 /// It's not clear whether this trait is worth it, and it also isn't
  94 /// clear whether it's useful as a public trait or not. Methods like
  95 /// `next_after_empty` reak of bad design, but the rest of the methods seem
  96 /// somewhat reasonable. One particular thing this trait would expose would be
  97 /// the ability to start the search of a regex anywhere in a haystack, which
  98 /// isn't possible in the current public API.
  99 pub trait RegularExpression: Sized {
 100     /// The type of the haystack.
 101     type Text: ?Sized;
 102
 103     /// The number of capture slots in the compiled regular expression. This is
 104     /// always two times the number of capture groups (two slots per group).
 105     fn slots_len(&self) -> usize;
 106
 107     /// Allocates fresh space for all capturing groups in this regex.
 108     fn locations(&self) -> Locations {
 109         Locations(vec![None; self.slots_len()])
 110     }
 111
 112     /// Returns the position of the next character after `i`.
 113     ///
 114     /// For example, a haystack with type `&[u8]` probably returns `i+1`,
 115     /// whereas a haystack with type `&str` probably returns `i` plus the
 116     /// length of the next UTF-8 sequence.
 117     fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
 118
 119     /// Returns the location of the shortest match.
 120     fn shortest_match_at(
 121         &self,
 122         text: &Self::Text,
 123         start: usize,
 124     ) -> Option<usize>;
 125
 126     /// Returns whether the regex matches the text given.
 127     fn is_match_at(
 128         &self,
 129         text: &Self::Text,
 130         start: usize,
 131     ) -> bool;
 132
 133     /// Returns the leftmost-first match location if one exists.
 134     fn find_at(
 135         &self,
 136         text: &Self::Text,
 137         start: usize,
 138     ) -> Option<(usize, usize)>;
 139
 140     /// Returns the leftmost-first match location if one exists, and also
 141     /// fills in any matching capture slot locations.
 142     fn read_captures_at(
 143         &self,
 144         locs: &mut Locations,
 145         text: &Self::Text,
 146         start: usize,
 147     ) -> Option<(usize, usize)>;
 148
 149     /// Returns an iterator over all non-overlapping successive leftmost-first
 150     /// matches.
 151     fn find_iter (
 152         self,
 153         text: &Self::Text,
 154     ) -> Matches<Self> {
 155         Matches {
 156             re: self,
 157             text: text,
 158             last_end: 0,
 159             last_match: None,
 160         }
 161     }
 162
 163     /// Returns an iterator over all non-overlapping successive leftmost-first
 164     /// matches with captures.
 165     fn captures_iter(
 166         self,
 167         text: &Self::Text,
 168     ) -> CaptureMatches<Self> {
 169         CaptureMatches(self.find_iter(text))
 170     }
 171 }
 172
 173 /// An iterator over all non-overlapping successive leftmost-first matches.
 174 pub struct Matches<'t, R> where R: RegularExpression, R::Text: 't {
 175     re: R,
 176     text: &'t R::Text,
 177     last_end: usize,
 178     last_match: Option<usize>,
 179 }
 180
 181 impl<'t, R> Matches<'t, R> where R: RegularExpression, R::Text: 't {
 182     /// Return the text being searched.
 183     pub fn text(&self) -> &'t R::Text {
 184         self.text
 185     }
 186
 187     /// Return the underlying regex.
 188     pub fn regex(&self) -> &R {
 189         &self.re
 190     }
 191 }
 192
 193 impl<'t, R> Iterator for Matches<'t, R>
 194         where R: RegularExpression, R::Text: 't + AsRef<[u8]> {
 195     type Item = (usize, usize);
 196
 197     fn next(&mut self) -> Option<(usize, usize)> {
 198         if self.last_end > self.text.as_ref().len() {
 199             return None;
 200         }
 201         let (s, e) = match self.re.find_at(self.text, self.last_end) {
 202             None => return None,
 203             Some((s, e)) => (s, e),
 204         };
 205         if s == e {
 206             // This is an empty match. To ensure we make progress, start
 207             // the next search at the smallest possible starting position
 208             // of the next match following this one.
 209             self.last_end = self.re.next_after_empty(self.text, e);
 210             // Don't accept empty matches immediately following a match.
 211             // Just move on to the next match.
 212             if Some(e) == self.last_match {
 213                 return self.next();
 214             }
 215         } else {
 216             self.last_end = e;
 217         }
 218         self.last_match = Some(e);
 219         Some((s, e))
 220     }
 221 }
 222
 223 /// An iterator over all non-overlapping successive leftmost-first matches with
 224 /// captures.
 225 pub struct CaptureMatches<'t, R>(Matches<'t, R>)
 226     where R: RegularExpression, R::Text: 't;
 227
 228 impl<'t, R> CaptureMatches<'t, R> where R: RegularExpression, R::Text: 't {
 229     /// Return the text being searched.
 230     pub fn text(&self) -> &'t R::Text {
 231         self.0.text()
 232     }
 233
 234     /// Return the underlying regex.
 235     pub fn regex(&self) -> &R {
 236         self.0.regex()
 237     }
 238 }
 239
 240 impl<'t, R> Iterator for CaptureMatches<'t, R>
 241         where R: RegularExpression, R::Text: 't + AsRef<[u8]> {
 242     type Item = Locations;
 243
 244     fn next(&mut self) -> Option<Locations> {
 245         if self.0.last_end > self.0.text.as_ref().len() {
 246             return None
 247         }
 248         let mut locs = self.0.re.locations();
 249         let (s, e) = match self.0.re.read_captures_at(
 250             &mut locs,
 251             self.0.text,
 252             self.0.last_end,
 253         ) {
 254             None => return None,
 255             Some((s, e)) => (s, e),
 256         };
 257         if s == e {
 258             self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
 259             if Some(e) == self.0.last_match {
 260                 return self.next();
 261             }
 262         } else {
 263             self.0.last_end = e;
 264         }
 265         self.0.last_match = Some(e);
 266         Some(locs)
 267     }
 268 }