use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
+use std::iter::FusedIterator;
use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
-use find_byte::find_byte;
+use crate::find_byte::find_byte;
-use error::Error;
-use exec::{Exec, ExecNoSync};
-use expand::expand_bytes;
-use re_builder::bytes::RegexBuilder;
-use re_trait::{self, RegularExpression, SubCapturesPosIter};
+use crate::error::Error;
+use crate::exec::{Exec, ExecNoSync};
+use crate::expand::expand_bytes;
+use crate::re_builder::bytes::RegexBuilder;
+use crate::re_trait::{self, RegularExpression, SubCapturesPosIter};
/// Match represents a single match of a regex in a haystack.
///
impl fmt::Display for Regex {
/// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl fmt::Debug for Regex {
/// Shows the original regular expression.
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
RegexBuilder::new(re).build()
}
- /// Returns true if and only if the regex matches the string given.
+ /// Returns true if and only if there is a match for the regex in the
+ /// string given.
///
/// It is recommended to use this method if all you need to do is test
/// a match, since the underlying matching engine may be able to do less
/// bytes:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"I categorically deny having triskaidekaphobia.";
/// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
/// ASCII word bytes:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"I categorically deny having triskaidekaphobia.";
/// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap();
/// word bytes:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"Retroactively relinquishing remunerations is reprehensible.";
/// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
/// year separately.
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
/// let text = b"Not my favorite movie: 'Citizen Kane' (1941).";
/// We can make this example a bit clearer by using *named* capture groups:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
/// some text, where the movie is formatted like "'Title' (xxxx)":
///
/// ```rust
- /// # extern crate regex; use std::str; use regex::bytes::Regex;
+ /// # use std::str; use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
/// .unwrap();
/// To split a string delimited by arbitrary amounts of spaces or tabs:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"[ \t]+").unwrap();
/// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect();
/// Get the first two words in some text:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"\W+").unwrap();
/// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect();
/// In typical usage, this can just be a normal byte string:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new("[^01]+").unwrap();
/// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]);
/// group matches easily:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # use regex::bytes::Captures; fn main() {
/// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
/// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| {
/// with named capture groups:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
/// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]);
/// underscore:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
/// let result = re.replace(b"deep fried", &b"${first}_$second"[..]);
/// byte string with `NoExpand`:
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// use regex::bytes::NoExpand;
///
/// `a`.
///
/// ```rust
- /// # extern crate regex; use regex::bytes::Regex;
+ /// # use regex::bytes::Regex;
/// # fn main() {
/// let text = b"aaaaa";
/// let pos = Regex::new(r"a+").unwrap().shortest_match(text);
}
/// Returns an iterator over the capture names.
- pub fn capture_names(&self) -> CaptureNames {
+ pub fn capture_names(&self) -> CaptureNames<'_> {
CaptureNames(self.0.capture_names().iter())
}
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched byte string.
+#[derive(Debug)]
pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>);
impl<'r, 't> Iterator for Matches<'r, 't> {
}
}
+impl<'r, 't> FusedIterator for Matches<'r, 't> {}
+
/// An iterator that yields all non-overlapping capture groups matching a
/// particular regular expression.
///
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the matched byte string.
+#[derive(Debug)]
pub struct CaptureMatches<'r, 't>(
re_trait::CaptureMatches<'t, ExecNoSync<'r>>,
);
}
}
+impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {}
+
/// Yields all substrings delimited by a regular expression match.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
+#[derive(Debug)]
pub struct Split<'r, 't> {
finder: Matches<'r, 't>,
last: usize,
}
}
+impl<'r, 't> FusedIterator for Split<'r, 't> {}
+
/// Yields at most `N` substrings delimited by a regular expression match.
///
/// The last substring will be whatever remains after splitting.
///
/// `'r` is the lifetime of the compiled regular expression and `'t` is the
/// lifetime of the byte string being split.
+#[derive(Debug)]
pub struct SplitN<'r, 't> {
splits: Split<'r, 't>,
n: usize,
Some(&text[self.splits.last..])
}
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (0, Some(self.n))
+ }
}
+impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
+
/// An iterator over the names of all possible captures.
///
/// `None` indicates an unnamed capture; the first element (capture 0, the
/// whole matched region) is always unnamed.
///
/// `'r` is the lifetime of the compiled regular expression.
+#[derive(Clone, Debug)]
pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
impl<'r> Iterator for CaptureNames<'r> {
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
+
+ fn count(self) -> usize {
+ self.0.count()
+ }
}
+impl<'r> ExactSizeIterator for CaptureNames<'r> {}
+
+impl<'r> FusedIterator for CaptureNames<'r> {}
+
/// CaptureLocations is a low level representation of the raw offsets of each
/// submatch.
///
/// Expands all instances of `$name` in `replacement` to the corresponding
/// capture group `name`, and writes them to the `dst` buffer given.
///
- /// `name` may be an integer corresponding to the index of the
- /// capture group (counted by order of opening parenthesis where `0` is the
+ /// `name` may be an integer corresponding to the index of the capture
+ /// group (counted by order of opening parenthesis where `0` is the
/// entire match) or it can be a name (consisting of letters, digits or
/// underscores) corresponding to a named capture group.
///
/// If `name` isn't a valid capture group (whether the name doesn't exist
/// or isn't a valid index), then it is replaced with the empty string.
///
- /// The longest possible name is used. e.g., `$1a` looks up the capture
- /// group named `1a` and not the capture group at index `1`. To exert more
- /// precise control over the name, use braces, e.g., `${1}a`.
+ /// The longest possible name consisting of the characters `[_0-9A-Za-z]`
+ /// is used. e.g., `$1a` looks up the capture group named `1a` and not the
+ /// capture group at index `1`. To exert more precise control over the
+ /// name, or to refer to a capture group name that uses characters outside
+ /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When
+ /// using braces, any sequence of valid UTF-8 bytes is permitted. If the
+ /// sequence does not refer to a capture group name in the corresponding
+ /// regex, then it is replaced with an empty string.
///
/// To write a literal `$` use `$$`.
pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
}
impl<'t> fmt::Debug for Captures<'t> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Captures").field(&CapturesDebug(self)).finish()
}
}
-struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
+struct CapturesDebug<'c, 't>(&'c Captures<'t>);
impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn escape_bytes(bytes: &[u8]) -> String {
let mut s = String::new();
for &b in bytes {
///
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
/// the lifetime `'t` corresponds to the originally matched text.
-pub struct SubCaptureMatches<'c, 't: 'c> {
+#[derive(Clone, Debug)]
+pub struct SubCaptureMatches<'c, 't> {
caps: &'c Captures<'t>,
it: SubCapturesPosIter<'c>,
}
}
}
+impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {}
+
/// Replacer describes types that can be used to replace matches in a byte
/// string.
///
/// In general, users of this crate shouldn't need to implement this trait,
-/// since implementations are already provided for `&[u8]` and
-/// `FnMut(&Captures) -> Vec<u8>` (or any `FnMut(&Captures) -> T`
-/// where `T: AsRef<[u8]>`), which covers most use cases.
+/// since implementations are already provided for `&[u8]` along with other
+/// variants of bytes types and `FnMut(&Captures) -> Vec<u8>` (or any
+/// `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`), which covers most use cases.
pub trait Replacer {
/// Appends text to `dst` to replace the current match.
///
///
/// For example, a no-op replacement would be
/// `dst.extend(&caps[0])`.
- fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>);
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>);
/// Return a fixed unchanging replacement byte string.
///
///
/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
#[derive(Debug)]
-pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
+pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
- fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
self.0.replace_append(caps, dst)
}
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
}
impl<'a> Replacer for &'a [u8] {
- fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
- fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
- match find_byte(b'$', *self) {
- Some(_) => None,
- None => Some(Cow::Borrowed(*self)),
- }
+ fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ no_expansion(self)
+ }
+}
+
+impl<'a> Replacer for &'a Vec<u8> {
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ caps.expand(*self, dst);
+ }
+
+ fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ no_expansion(self)
+ }
+}
+
+impl Replacer for Vec<u8> {
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ caps.expand(self, dst);
+ }
+
+ fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ no_expansion(self)
+ }
+}
+
+impl<'a> Replacer for Cow<'a, [u8]> {
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ caps.expand(self.as_ref(), dst);
+ }
+
+ fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ no_expansion(self)
+ }
+}
+
+impl<'a> Replacer for &'a Cow<'a, [u8]> {
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
+ caps.expand(self.as_ref(), dst);
+ }
+
+ fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
+ no_expansion(self)
+ }
+}
+
+fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> {
+ let s = t.as_ref();
+ match find_byte(b'$', s) {
+ Some(_) => None,
+ None => Some(Cow::Borrowed(s)),
}
}
impl<F, T> Replacer for F
where
- F: FnMut(&Captures) -> T,
+ F: FnMut(&Captures<'_>) -> T,
T: AsRef<[u8]>,
{
- fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
dst.extend_from_slice((*self)(caps).as_ref());
}
}
/// and performant (since capture groups don't need to be found).
///
/// `'t` is the lifetime of the literal text.
+#[derive(Clone, Debug)]
pub struct NoExpand<'t>(pub &'t [u8]);
impl<'t> Replacer for NoExpand<'t> {
- fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) {
+ fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) {
dst.extend_from_slice(self.0);
}
- fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
+ fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
Some(Cow::Borrowed(self.0))
}
}