vendor/bstr/src/ext_slice.rs

   1 #[cfg(feature = "std")]
   2 use std::borrow::Cow;
   3 #[cfg(feature = "std")]
   4 use std::ffi::OsStr;
   5 #[cfg(feature = "std")]
   6 use std::path::Path;
   7
   8 use core::cmp;
   9 use core::ops;
  10 use core::ptr;
  11 use core::slice;
  12 use core::str;
  13
  14 use memchr::{memchr, memrchr};
  15
  16 use ascii;
  17 use bstr::BStr;
  18 use byteset;
  19 #[cfg(feature = "std")]
  20 use ext_vec::ByteVec;
  21 use search::{PrefilterState, TwoWay};
  22 #[cfg(feature = "unicode")]
  23 use unicode::{
  24     whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
  25     SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
  26     WordsWithBreaks,
  27 };
  28 use utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error};
  29
  30 /// A short-hand constructor for building a `&[u8]`.
  31 ///
  32 /// This idiosyncratic constructor is useful for concisely building byte string
  33 /// slices. Its primary utility is in conveniently writing byte string literals
  34 /// in a uniform way. For example, consider this code that does not compile:
  35 ///
  36 /// ```ignore
  37 /// let strs = vec![b"a", b"xy"];
  38 /// ```
  39 ///
  40 /// The above code doesn't compile because the type of the byte string literal
  41 /// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
  42 /// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
  43 /// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
  44 /// where both `"a"` and `"xy"` have the same type of `&'static str`.)
  45 ///
  46 /// One way of getting the above code to compile is to convert byte strings to
  47 /// slices. You might try this:
  48 ///
  49 /// ```ignore
  50 /// let strs = vec![&b"a", &b"xy"];
  51 /// ```
  52 ///
  53 /// But this just creates values with type `& &'static [u8; 1]` and
  54 /// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
  55 ///
  56 /// ```
  57 /// let strs = vec![&b"a"[..], &b"xy"[..]];
  58 /// // or
  59 /// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
  60 /// ```
  61 ///
  62 /// But neither of these are particularly convenient to type, especially when
  63 /// it's something as common as a string literal. Thus, this constructor
  64 /// permits writing the following instead:
  65 ///
  66 /// ```
  67 /// use bstr::B;
  68 ///
  69 /// let strs = vec![B("a"), B(b"xy")];
  70 /// ```
  71 ///
  72 /// Notice that this also lets you mix and match both string literals and byte
  73 /// string literals. This can be quite convenient!
  74 #[allow(non_snake_case)]
  75 #[inline]
  76 pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
  77     bytes.as_ref()
  78 }
  79
  80 impl ByteSlice for [u8] {
  81     #[inline]
  82     fn as_bytes(&self) -> &[u8] {
  83         self
  84     }
  85
  86     #[inline]
  87     fn as_bytes_mut(&mut self) -> &mut [u8] {
  88         self
  89     }
  90 }
  91
  92 /// Ensure that callers cannot implement `ByteSlice` by making an
  93 /// umplementable trait its super trait.
  94 pub trait Sealed {}
  95 impl Sealed for [u8] {}
  96
  97 /// A trait that extends `&[u8]` with string oriented methods.
  98 pub trait ByteSlice: Sealed {
  99     /// A method for accessing the raw bytes of this type. This is always a
 100     /// no-op and callers shouldn't care about it. This only exists for making
 101     /// the extension trait work.
 102     #[doc(hidden)]
 103     fn as_bytes(&self) -> &[u8];
 104
 105     /// A method for accessing the raw bytes of this type, mutably. This is
 106     /// always a no-op and callers shouldn't care about it. This only exists
 107     /// for making the extension trait work.
 108     #[doc(hidden)]
 109     fn as_bytes_mut(&mut self) -> &mut [u8];
 110
 111     /// Return this byte slice as a `&BStr`.
 112     ///
 113     /// Use `&BStr` is useful because of its `fmt::Debug` representation
 114     /// and various other trait implementations (such as `PartialEq` and
 115     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
 116     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
 117     /// sequences are used.
 118     ///
 119     /// # Examples
 120     ///
 121     /// Basic usage:
 122     ///
 123     /// ```
 124     /// use bstr::ByteSlice;
 125     ///
 126     /// println!("{:?}", b"foo\xFFbar".as_bstr());
 127     /// ```
 128     #[inline]
 129     fn as_bstr(&self) -> &BStr {
 130         BStr::new(self.as_bytes())
 131     }
 132
 133     /// Return this byte slice as a `&mut BStr`.
 134     ///
 135     /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
 136     /// and various other trait implementations (such as `PartialEq` and
 137     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
 138     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
 139     /// sequences are used.
 140     ///
 141     /// # Examples
 142     ///
 143     /// Basic usage:
 144     ///
 145     /// ```
 146     /// use bstr::ByteSlice;
 147     ///
 148     /// let mut bytes = *b"foo\xFFbar";
 149     /// println!("{:?}", &mut bytes.as_bstr_mut());
 150     /// ```
 151     #[inline]
 152     fn as_bstr_mut(&mut self) -> &mut BStr {
 153         BStr::new_mut(self.as_bytes_mut())
 154     }
 155
 156     /// Create an immutable byte string from an OS string slice.
 157     ///
 158     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
 159     /// this returns `None` if the given OS string is not valid UTF-8. (For
 160     /// example, on Windows, file paths are allowed to be a sequence of
 161     /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
 162     /// valid UTF-8.)
 163     ///
 164     /// # Examples
 165     ///
 166     /// Basic usage:
 167     ///
 168     /// ```
 169     /// use std::ffi::OsStr;
 170     ///
 171     /// use bstr::{B, ByteSlice};
 172     ///
 173     /// let os_str = OsStr::new("foo");
 174     /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
 175     /// assert_eq!(bs, B("foo"));
 176     /// ```
 177     #[cfg(feature = "std")]
 178     #[inline]
 179     fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
 180         #[cfg(unix)]
 181         #[inline]
 182         fn imp(os_str: &OsStr) -> Option<&[u8]> {
 183             use std::os::unix::ffi::OsStrExt;
 184
 185             Some(os_str.as_bytes())
 186         }
 187
 188         #[cfg(not(unix))]
 189         #[inline]
 190         fn imp(os_str: &OsStr) -> Option<&[u8]> {
 191             os_str.to_str().map(|s| s.as_bytes())
 192         }
 193
 194         imp(os_str)
 195     }
 196
 197     /// Create an immutable byte string from a file path.
 198     ///
 199     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
 200     /// this returns `None` if the given path is not valid UTF-8. (For example,
 201     /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit
 202     /// integers. Not all such sequences can be transcoded to valid UTF-8.)
 203     ///
 204     /// # Examples
 205     ///
 206     /// Basic usage:
 207     ///
 208     /// ```
 209     /// use std::path::Path;
 210     ///
 211     /// use bstr::{B, ByteSlice};
 212     ///
 213     /// let path = Path::new("foo");
 214     /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
 215     /// assert_eq!(bs, B("foo"));
 216     /// ```
 217     #[cfg(feature = "std")]
 218     #[inline]
 219     fn from_path(path: &Path) -> Option<&[u8]> {
 220         Self::from_os_str(path.as_os_str())
 221     }
 222
 223     /// Safely convert this byte string into a `&str` if it's valid UTF-8.
 224     ///
 225     /// If this byte string is not valid UTF-8, then an error is returned. The
 226     /// error returned indicates the first invalid byte found and the length
 227     /// of the error.
 228     ///
 229     /// In cases where a lossy conversion to `&str` is acceptable, then use one
 230     /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
 231     /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
 232     /// methods.
 233     ///
 234     /// # Examples
 235     ///
 236     /// Basic usage:
 237     ///
 238     /// ```
 239     /// use bstr::{B, ByteSlice, ByteVec};
 240     ///
 241     /// # fn example() -> Result<(), bstr::Utf8Error> {
 242     /// let s = B("☃βツ").to_str()?;
 243     /// assert_eq!("☃βツ", s);
 244     ///
 245     /// let mut bstring = <Vec<u8>>::from("☃βツ");
 246     /// bstring.push(b'\xFF');
 247     /// let err = bstring.to_str().unwrap_err();
 248     /// assert_eq!(8, err.valid_up_to());
 249     /// # Ok(()) }; example().unwrap()
 250     /// ```
 251     #[inline]
 252     fn to_str(&self) -> Result<&str, Utf8Error> {
 253         utf8::validate(self.as_bytes()).map(|_| {
 254             // SAFETY: This is safe because of the guarantees provided by
 255             // utf8::validate.
 256             unsafe { str::from_utf8_unchecked(self.as_bytes()) }
 257         })
 258     }
 259
 260     /// Unsafely convert this byte string into a `&str`, without checking for
 261     /// valid UTF-8.
 262     ///
 263     /// # Safety
 264     ///
 265     /// Callers *must* ensure that this byte string is valid UTF-8 before
 266     /// calling this method. Converting a byte string into a `&str` that is
 267     /// not valid UTF-8 is considered undefined behavior.
 268     ///
 269     /// This routine is useful in performance sensitive contexts where the
 270     /// UTF-8 validity of the byte string is already known and it is
 271     /// undesirable to pay the cost of an additional UTF-8 validation check
 272     /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
 273     ///
 274     /// # Examples
 275     ///
 276     /// Basic usage:
 277     ///
 278     /// ```
 279     /// use bstr::{B, ByteSlice};
 280     ///
 281     /// // SAFETY: This is safe because string literals are guaranteed to be
 282     /// // valid UTF-8 by the Rust compiler.
 283     /// let s = unsafe { B("☃βツ").to_str_unchecked() };
 284     /// assert_eq!("☃βツ", s);
 285     /// ```
 286     #[inline]
 287     unsafe fn to_str_unchecked(&self) -> &str {
 288         str::from_utf8_unchecked(self.as_bytes())
 289     }
 290
 291     /// Convert this byte string to a valid UTF-8 string by replacing invalid
 292     /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
 293     ///
 294     /// If the byte string is already valid UTF-8, then no copying or
 295     /// allocation is performed and a borrrowed string slice is returned. If
 296     /// the byte string is not valid UTF-8, then an owned string buffer is
 297     /// returned with invalid bytes replaced by the replacement codepoint.
 298     ///
 299     /// This method uses the "substitution of maximal subparts" (Unicode
 300     /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
 301     /// codepoint. Specifically, a replacement codepoint is inserted whenever a
 302     /// byte is found that cannot possibly lead to a valid code unit sequence.
 303     /// If there were previous bytes that represented a prefix of a well-formed
 304     /// code unit sequence, then all of those bytes are substituted with a
 305     /// single replacement codepoint. The "substitution of maximal subparts"
 306     /// strategy is the same strategy used by
 307     /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
 308     /// For a more precise description of the maximal subpart strategy, see
 309     /// the Unicode Standard, Chapter 3, Section 9. See also
 310     /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html).
 311     ///
 312     /// N.B. Rust's standard library also appears to use the same strategy,
 313     /// but it does not appear to be an API guarantee.
 314     ///
 315     /// # Examples
 316     ///
 317     /// Basic usage:
 318     ///
 319     /// ```
 320     /// use std::borrow::Cow;
 321     ///
 322     /// use bstr::ByteSlice;
 323     ///
 324     /// let mut bstring = <Vec<u8>>::from("☃βツ");
 325     /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
 326     ///
 327     /// // Add a byte that makes the sequence invalid.
 328     /// bstring.push(b'\xFF');
 329     /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
 330     /// ```
 331     ///
 332     /// This demonstrates the "maximal subpart" substitution logic.
 333     ///
 334     /// ```
 335     /// use bstr::{B, ByteSlice};
 336     ///
 337     /// // \x61 is the ASCII codepoint for 'a'.
 338     /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
 339     /// // \xE1\x80 is a valid 2-byte code unit prefix.
 340     /// // \xC2 is a valid 1-byte code unit prefix.
 341     /// // \x62 is the ASCII codepoint for 'b'.
 342     /// //
 343     /// // In sum, each of the prefixes is replaced by a single replacement
 344     /// // codepoint since none of the prefixes are properly completed. This
 345     /// // is in contrast to other strategies that might insert a replacement
 346     /// // codepoint for every single byte.
 347     /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
 348     /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
 349     /// ```
 350     #[cfg(feature = "std")]
 351     #[inline]
 352     fn to_str_lossy(&self) -> Cow<str> {
 353         match utf8::validate(self.as_bytes()) {
 354             Ok(()) => {
 355                 // SAFETY: This is safe because of the guarantees provided by
 356                 // utf8::validate.
 357                 unsafe {
 358                     Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
 359                 }
 360             }
 361             Err(err) => {
 362                 let mut lossy = String::with_capacity(self.as_bytes().len());
 363                 let (valid, after) =
 364                     self.as_bytes().split_at(err.valid_up_to());
 365                 // SAFETY: This is safe because utf8::validate guarantees
 366                 // that all of `valid` is valid UTF-8.
 367                 lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
 368                 lossy.push_str("\u{FFFD}");
 369                 if let Some(len) = err.error_len() {
 370                     after[len..].to_str_lossy_into(&mut lossy);
 371                 }
 372                 Cow::Owned(lossy)
 373             }
 374         }
 375     }
 376
 377     /// Copy the contents of this byte string into the given owned string
 378     /// buffer, while replacing invalid UTF-8 code unit sequences with the
 379     /// Unicode replacement codepoint (`U+FFFD`).
 380     ///
 381     /// This method uses the same "substitution of maximal subparts" strategy
 382     /// for inserting the replacement codepoint as the
 383     /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
 384     ///
 385     /// This routine is useful for amortizing allocation. However, unlike
 386     /// `to_str_lossy`, this routine will _always_ copy the contents of this
 387     /// byte string into the destination buffer, even if this byte string is
 388     /// valid UTF-8.
 389     ///
 390     /// # Examples
 391     ///
 392     /// Basic usage:
 393     ///
 394     /// ```
 395     /// use std::borrow::Cow;
 396     ///
 397     /// use bstr::ByteSlice;
 398     ///
 399     /// let mut bstring = <Vec<u8>>::from("☃βツ");
 400     /// // Add a byte that makes the sequence invalid.
 401     /// bstring.push(b'\xFF');
 402     ///
 403     /// let mut dest = String::new();
 404     /// bstring.to_str_lossy_into(&mut dest);
 405     /// assert_eq!("☃βツ\u{FFFD}", dest);
 406     /// ```
 407     #[cfg(feature = "std")]
 408     #[inline]
 409     fn to_str_lossy_into(&self, dest: &mut String) {
 410         let mut bytes = self.as_bytes();
 411         dest.reserve(bytes.len());
 412         loop {
 413             match utf8::validate(bytes) {
 414                 Ok(()) => {
 415                     // SAFETY: This is safe because utf8::validate guarantees
 416                     // that all of `bytes` is valid UTF-8.
 417                     dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
 418                     break;
 419                 }
 420                 Err(err) => {
 421                     let (valid, after) = bytes.split_at(err.valid_up_to());
 422                     // SAFETY: This is safe because utf8::validate guarantees
 423                     // that all of `valid` is valid UTF-8.
 424                     dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
 425                     dest.push_str("\u{FFFD}");
 426                     match err.error_len() {
 427                         None => break,
 428                         Some(len) => bytes = &after[len..],
 429                     }
 430                 }
 431             }
 432         }
 433     }
 434
 435     /// Create an OS string slice from this byte string.
 436     ///
 437     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
 438     /// this returns a UTF-8 decoding error if this byte string is not valid
 439     /// UTF-8. (For example, on Windows, file paths are allowed to be a
 440     /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
 441     /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
 442     /// 16-bit integers.)
 443     ///
 444     /// # Examples
 445     ///
 446     /// Basic usage:
 447     ///
 448     /// ```
 449     /// use bstr::{B, ByteSlice};
 450     ///
 451     /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
 452     /// assert_eq!(os_str, "foo");
 453     /// ```
 454     #[cfg(feature = "std")]
 455     #[inline]
 456     fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
 457         #[cfg(unix)]
 458         #[inline]
 459         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
 460             use std::os::unix::ffi::OsStrExt;
 461
 462             Ok(OsStr::from_bytes(bytes))
 463         }
 464
 465         #[cfg(not(unix))]
 466         #[inline]
 467         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
 468             bytes.to_str().map(OsStr::new)
 469         }
 470
 471         imp(self.as_bytes())
 472     }
 473
 474     /// Lossily create an OS string slice from this byte string.
 475     ///
 476     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
 477     /// this will perform a UTF-8 check and lossily convert this byte string
 478     /// into valid UTF-8 using the Unicode replacement codepoint.
 479     ///
 480     /// Note that this can prevent the correct roundtripping of file paths on
 481     /// non-Unix systems such as Windows, where file paths are an arbitrary
 482     /// sequence of 16-bit integers.
 483     ///
 484     /// # Examples
 485     ///
 486     /// Basic usage:
 487     ///
 488     /// ```
 489     /// use bstr::ByteSlice;
 490     ///
 491     /// let os_str = b"foo\xFFbar".to_os_str_lossy();
 492     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
 493     /// ```
 494     #[cfg(feature = "std")]
 495     #[inline]
 496     fn to_os_str_lossy(&self) -> Cow<OsStr> {
 497         #[cfg(unix)]
 498         #[inline]
 499         fn imp(bytes: &[u8]) -> Cow<OsStr> {
 500             use std::os::unix::ffi::OsStrExt;
 501
 502             Cow::Borrowed(OsStr::from_bytes(bytes))
 503         }
 504
 505         #[cfg(not(unix))]
 506         #[inline]
 507         fn imp(bytes: &[u8]) -> Cow<OsStr> {
 508             use std::ffi::OsString;
 509
 510             match bytes.to_str_lossy() {
 511                 Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
 512                 Cow::Owned(x) => Cow::Owned(OsString::from(x)),
 513             }
 514         }
 515
 516         imp(self.as_bytes())
 517     }
 518
 519     /// Create a path slice from this byte string.
 520     ///
 521     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
 522     /// this returns a UTF-8 decoding error if this byte string is not valid
 523     /// UTF-8. (For example, on Windows, file paths are allowed to be a
 524     /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
 525     /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
 526     /// 16-bit integers.)
 527     ///
 528     /// # Examples
 529     ///
 530     /// Basic usage:
 531     ///
 532     /// ```
 533     /// use bstr::ByteSlice;
 534     ///
 535     /// let path = b"foo".to_path().expect("should be valid UTF-8");
 536     /// assert_eq!(path.as_os_str(), "foo");
 537     /// ```
 538     #[cfg(feature = "std")]
 539     #[inline]
 540     fn to_path(&self) -> Result<&Path, Utf8Error> {
 541         self.to_os_str().map(Path::new)
 542     }
 543
 544     /// Lossily create a path slice from this byte string.
 545     ///
 546     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
 547     /// this will perform a UTF-8 check and lossily convert this byte string
 548     /// into valid UTF-8 using the Unicode replacement codepoint.
 549     ///
 550     /// Note that this can prevent the correct roundtripping of file paths on
 551     /// non-Unix systems such as Windows, where file paths are an arbitrary
 552     /// sequence of 16-bit integers.
 553     ///
 554     /// # Examples
 555     ///
 556     /// Basic usage:
 557     ///
 558     /// ```
 559     /// use bstr::ByteSlice;
 560     ///
 561     /// let bs = b"foo\xFFbar";
 562     /// let path = bs.to_path_lossy();
 563     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
 564     /// ```
 565     #[cfg(feature = "std")]
 566     #[inline]
 567     fn to_path_lossy(&self) -> Cow<Path> {
 568         use std::path::PathBuf;
 569
 570         match self.to_os_str_lossy() {
 571             Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
 572             Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
 573         }
 574     }
 575
 576     /// Create a new byte string by repeating this byte string `n` times.
 577     ///
 578     /// # Panics
 579     ///
 580     /// This function panics if the capacity of the new byte string would
 581     /// overflow.
 582     ///
 583     /// # Examples
 584     ///
 585     /// Basic usage:
 586     ///
 587     /// ```
 588     /// use bstr::{B, ByteSlice};
 589     ///
 590     /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
 591     /// assert_eq!(b"foo".repeatn(0), B(""));
 592     /// ```
 593     #[cfg(feature = "std")]
 594     #[inline]
 595     fn repeatn(&self, n: usize) -> Vec<u8> {
 596         let bs = self.as_bytes();
 597         let mut dst = vec![0; bs.len() * n];
 598         for i in 0..n {
 599             dst[i * bs.len()..(i + 1) * bs.len()].copy_from_slice(bs);
 600         }
 601         dst
 602     }
 603
 604     /// Returns true if and only if this byte string contains the given needle.
 605     ///
 606     /// # Examples
 607     ///
 608     /// Basic usage:
 609     ///
 610     /// ```
 611     /// use bstr::ByteSlice;
 612     ///
 613     /// assert!(b"foo bar".contains_str("foo"));
 614     /// assert!(b"foo bar".contains_str("bar"));
 615     /// assert!(!b"foo".contains_str("foobar"));
 616     /// ```
 617     #[inline]
 618     fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
 619         self.find(needle).is_some()
 620     }
 621
 622     /// Returns true if and only if this byte string has the given prefix.
 623     ///
 624     /// # Examples
 625     ///
 626     /// Basic usage:
 627     ///
 628     /// ```
 629     /// use bstr::ByteSlice;
 630     ///
 631     /// assert!(b"foo bar".starts_with_str("foo"));
 632     /// assert!(!b"foo bar".starts_with_str("bar"));
 633     /// assert!(!b"foo".starts_with_str("foobar"));
 634     /// ```
 635     #[inline]
 636     fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
 637         self.as_bytes().starts_with(prefix.as_ref())
 638     }
 639
 640     /// Returns true if and only if this byte string has the given suffix.
 641     ///
 642     /// # Examples
 643     ///
 644     /// Basic usage:
 645     ///
 646     /// ```
 647     /// use bstr::ByteSlice;
 648     ///
 649     /// assert!(b"foo bar".ends_with_str("bar"));
 650     /// assert!(!b"foo bar".ends_with_str("foo"));
 651     /// assert!(!b"bar".ends_with_str("foobar"));
 652     /// ```
 653     #[inline]
 654     fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
 655         self.as_bytes().ends_with(suffix.as_ref())
 656     }
 657
 658     /// Returns the index of the first occurrence of the given needle.
 659     ///
 660     /// The needle may be any type that can be cheaply converted into a
 661     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
 662     ///
 663     /// Note that if you're are searching for the same needle in many
 664     /// different small haystacks, it may be faster to initialize a
 665     /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
 666     ///
 667     /// # Complexity
 668     ///
 669     /// This routine is guaranteed to have worst case linear time complexity
 670     /// with respect to both the needle and the haystack. That is, this runs
 671     /// in `O(needle.len() + haystack.len())` time.
 672     ///
 673     /// This routine is also guaranteed to have worst case constant space
 674     /// complexity.
 675     ///
 676     /// # Examples
 677     ///
 678     /// Basic usage:
 679     ///
 680     /// ```
 681     /// use bstr::ByteSlice;
 682     ///
 683     /// let s = b"foo bar baz";
 684     /// assert_eq!(Some(0), s.find("foo"));
 685     /// assert_eq!(Some(4), s.find("bar"));
 686     /// assert_eq!(None, s.find("quux"));
 687     /// ```
 688     #[inline]
 689     fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
 690         Finder::new(needle.as_ref()).find(self.as_bytes())
 691     }
 692
 693     /// Returns the index of the last occurrence of the given needle.
 694     ///
 695     /// The needle may be any type that can be cheaply converted into a
 696     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
 697     ///
 698     /// Note that if you're are searching for the same needle in many
 699     /// different small haystacks, it may be faster to initialize a
 700     /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
 701     /// each search.
 702     ///
 703     /// # Complexity
 704     ///
 705     /// This routine is guaranteed to have worst case linear time complexity
 706     /// with respect to both the needle and the haystack. That is, this runs
 707     /// in `O(needle.len() + haystack.len())` time.
 708     ///
 709     /// This routine is also guaranteed to have worst case constant space
 710     /// complexity.
 711     ///
 712     /// # Examples
 713     ///
 714     /// Basic usage:
 715     ///
 716     /// ```
 717     /// use bstr::ByteSlice;
 718     ///
 719     /// let s = b"foo bar baz";
 720     /// assert_eq!(Some(0), s.rfind("foo"));
 721     /// assert_eq!(Some(4), s.rfind("bar"));
 722     /// assert_eq!(Some(8), s.rfind("ba"));
 723     /// assert_eq!(None, s.rfind("quux"));
 724     /// ```
 725     #[inline]
 726     fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
 727         FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
 728     }
 729
 730     /// Returns an iterator of the non-overlapping occurrences of the given
 731     /// needle. The iterator yields byte offset positions indicating the start
 732     /// of each match.
 733     ///
 734     /// # Complexity
 735     ///
 736     /// This routine is guaranteed to have worst case linear time complexity
 737     /// with respect to both the needle and the haystack. That is, this runs
 738     /// in `O(needle.len() + haystack.len())` time.
 739     ///
 740     /// This routine is also guaranteed to have worst case constant space
 741     /// complexity.
 742     ///
 743     /// # Examples
 744     ///
 745     /// Basic usage:
 746     ///
 747     /// ```
 748     /// use bstr::ByteSlice;
 749     ///
 750     /// let s = b"foo bar foo foo quux foo";
 751     /// let matches: Vec<usize> = s.find_iter("foo").collect();
 752     /// assert_eq!(matches, vec![0, 8, 12, 21]);
 753     /// ```
 754     ///
 755     /// An empty string matches at every position, including the position
 756     /// immediately following the last byte:
 757     ///
 758     /// ```
 759     /// use bstr::ByteSlice;
 760     ///
 761     /// let matches: Vec<usize> = b"foo".find_iter("").collect();
 762     /// assert_eq!(matches, vec![0, 1, 2, 3]);
 763     ///
 764     /// let matches: Vec<usize> = b"".find_iter("").collect();
 765     /// assert_eq!(matches, vec![0]);
 766     /// ```
 767     #[inline]
 768     fn find_iter<'a, B: ?Sized + AsRef<[u8]>>(
 769         &'a self,
 770         needle: &'a B,
 771     ) -> Find<'a> {
 772         Find::new(self.as_bytes(), needle.as_ref())
 773     }
 774
 775     /// Returns an iterator of the non-overlapping occurrences of the given
 776     /// needle in reverse. The iterator yields byte offset positions indicating
 777     /// the start of each match.
 778     ///
 779     /// # Complexity
 780     ///
 781     /// This routine is guaranteed to have worst case linear time complexity
 782     /// with respect to both the needle and the haystack. That is, this runs
 783     /// in `O(needle.len() + haystack.len())` time.
 784     ///
 785     /// This routine is also guaranteed to have worst case constant space
 786     /// complexity.
 787     ///
 788     /// # Examples
 789     ///
 790     /// Basic usage:
 791     ///
 792     /// ```
 793     /// use bstr::ByteSlice;
 794     ///
 795     /// let s = b"foo bar foo foo quux foo";
 796     /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
 797     /// assert_eq!(matches, vec![21, 12, 8, 0]);
 798     /// ```
 799     ///
 800     /// An empty string matches at every position, including the position
 801     /// immediately following the last byte:
 802     ///
 803     /// ```
 804     /// use bstr::ByteSlice;
 805     ///
 806     /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
 807     /// assert_eq!(matches, vec![3, 2, 1, 0]);
 808     ///
 809     /// let matches: Vec<usize> = b"".rfind_iter("").collect();
 810     /// assert_eq!(matches, vec![0]);
 811     /// ```
 812     #[inline]
 813     fn rfind_iter<'a, B: ?Sized + AsRef<[u8]>>(
 814         &'a self,
 815         needle: &'a B,
 816     ) -> FindReverse<'a> {
 817         FindReverse::new(self.as_bytes(), needle.as_ref())
 818     }
 819
 820     /// Returns the index of the first occurrence of the given byte. If the
 821     /// byte does not occur in this byte string, then `None` is returned.
 822     ///
 823     /// # Examples
 824     ///
 825     /// Basic usage:
 826     ///
 827     /// ```
 828     /// use bstr::ByteSlice;
 829     ///
 830     /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
 831     /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
 832     /// ```
 833     #[inline]
 834     fn find_byte(&self, byte: u8) -> Option<usize> {
 835         memchr(byte, self.as_bytes())
 836     }
 837
 838     /// Returns the index of the last occurrence of the given byte. If the
 839     /// byte does not occur in this byte string, then `None` is returned.
 840     ///
 841     /// # Examples
 842     ///
 843     /// Basic usage:
 844     ///
 845     /// ```
 846     /// use bstr::ByteSlice;
 847     ///
 848     /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
 849     /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
 850     /// ```
 851     #[inline]
 852     fn rfind_byte(&self, byte: u8) -> Option<usize> {
 853         memrchr(byte, self.as_bytes())
 854     }
 855
 856     /// Returns the index of the first occurrence of the given codepoint.
 857     /// If the codepoint does not occur in this byte string, then `None` is
 858     /// returned.
 859     ///
 860     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
 861     /// then only explicit occurrences of that encoding will be found. Invalid
 862     /// UTF-8 sequences will not be matched.
 863     ///
 864     /// # Examples
 865     ///
 866     /// Basic usage:
 867     ///
 868     /// ```
 869     /// use bstr::{B, ByteSlice};
 870     ///
 871     /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
 872     /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
 873     /// assert_eq!(None, b"foo bar baz".find_char('y'));
 874     /// ```
 875     #[inline]
 876     fn find_char(&self, ch: char) -> Option<usize> {
 877         self.find(ch.encode_utf8(&mut [0; 4]))
 878     }
 879
 880     /// Returns the index of the last occurrence of the given codepoint.
 881     /// If the codepoint does not occur in this byte string, then `None` is
 882     /// returned.
 883     ///
 884     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
 885     /// then only explicit occurrences of that encoding will be found. Invalid
 886     /// UTF-8 sequences will not be matched.
 887     ///
 888     /// # Examples
 889     ///
 890     /// Basic usage:
 891     ///
 892     /// ```
 893     /// use bstr::{B, ByteSlice};
 894     ///
 895     /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
 896     /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
 897     /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
 898     /// ```
 899     #[inline]
 900     fn rfind_char(&self, ch: char) -> Option<usize> {
 901         self.rfind(ch.encode_utf8(&mut [0; 4]))
 902     }
 903
 904     /// Returns the index of the first occurrence of any of the bytes in the
 905     /// provided set.
 906     ///
 907     /// The `byteset` may be any type that can be cheaply converted into a
 908     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
 909     /// note that passing a `&str` which contains multibyte characters may not
 910     /// behave as you expect: each byte in the `&str` is treated as an
 911     /// individual member of the byte set.
 912     ///
 913     /// Note that order is irrelevant for the `byteset` parameter, and
 914     /// duplicate bytes present in its body are ignored.
 915     ///
 916     /// # Complexity
 917     ///
 918     /// This routine is guaranteed to have worst case linear time complexity
 919     /// with respect to both the set of bytes and the haystack. That is, this
 920     /// runs in `O(byteset.len() + haystack.len())` time.
 921     ///
 922     /// This routine is also guaranteed to have worst case constant space
 923     /// complexity.
 924     ///
 925     /// # Examples
 926     ///
 927     /// Basic usage:
 928     ///
 929     /// ```
 930     /// use bstr::ByteSlice;
 931     ///
 932     /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
 933     /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
 934     /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
 935     /// ```
 936     #[inline]
 937     fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
 938         byteset::find(self.as_bytes(), byteset.as_ref())
 939     }
 940
 941     /// Returns the index of the first occurrence of a byte that is not a member
 942     /// of the provided set.
 943     ///
 944     /// The `byteset` may be any type that can be cheaply converted into a
 945     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
 946     /// note that passing a `&str` which contains multibyte characters may not
 947     /// behave as you expect: each byte in the `&str` is treated as an
 948     /// individual member of the byte set.
 949     ///
 950     /// Note that order is irrelevant for the `byteset` parameter, and
 951     /// duplicate bytes present in its body are ignored.
 952     ///
 953     /// # Complexity
 954     ///
 955     /// This routine is guaranteed to have worst case linear time complexity
 956     /// with respect to both the set of bytes and the haystack. That is, this
 957     /// runs in `O(byteset.len() + haystack.len())` time.
 958     ///
 959     /// This routine is also guaranteed to have worst case constant space
 960     /// complexity.
 961     ///
 962     /// # Examples
 963     ///
 964     /// Basic usage:
 965     ///
 966     /// ```
 967     /// use bstr::ByteSlice;
 968     ///
 969     /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
 970     /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
 971     /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
 972     /// ```
 973     #[inline]
 974     fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
 975         byteset::find_not(self.as_bytes(), byteset.as_ref())
 976     }
 977
 978     /// Returns the index of the last occurrence of any of the bytes in the
 979     /// provided set.
 980     ///
 981     /// The `byteset` may be any type that can be cheaply converted into a
 982     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
 983     /// note that passing a `&str` which contains multibyte characters may not
 984     /// behave as you expect: each byte in the `&str` is treated as an
 985     /// individual member of the byte set.
 986     ///
 987     /// Note that order is irrelevant for the `byteset` parameter, and duplicate
 988     /// bytes present in its body are ignored.
 989     ///
 990     /// # Complexity
 991     ///
 992     /// This routine is guaranteed to have worst case linear time complexity
 993     /// with respect to both the set of bytes and the haystack. That is, this
 994     /// runs in `O(byteset.len() + haystack.len())` time.
 995     ///
 996     /// This routine is also guaranteed to have worst case constant space
 997     /// complexity.
 998     ///
 999     /// # Examples
1000     ///
1001     /// Basic usage:
1002     ///
1003     /// ```
1004     /// use bstr::ByteSlice;
1005     ///
1006     /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
1007     /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
1008     /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
1009     /// ```
1010     #[inline]
1011     fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1012         byteset::rfind(self.as_bytes(), byteset.as_ref())
1013     }
1014
1015     /// Returns the index of the last occurrence of a byte that is not a member
1016     /// of the provided set.
1017     ///
1018     /// The `byteset` may be any type that can be cheaply converted into a
1019     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1020     /// note that passing a `&str` which contains multibyte characters may not
1021     /// behave as you expect: each byte in the `&str` is treated as an
1022     /// individual member of the byte set.
1023     ///
1024     /// Note that order is irrelevant for the `byteset` parameter, and
1025     /// duplicate bytes present in its body are ignored.
1026     ///
1027     /// # Complexity
1028     ///
1029     /// This routine is guaranteed to have worst case linear time complexity
1030     /// with respect to both the set of bytes and the haystack. That is, this
1031     /// runs in `O(byteset.len() + haystack.len())` time.
1032     ///
1033     /// This routine is also guaranteed to have worst case constant space
1034     /// complexity.
1035     ///
1036     /// # Examples
1037     ///
1038     /// Basic usage:
1039     ///
1040     /// ```
1041     /// use bstr::ByteSlice;
1042     ///
1043     /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
1044     /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
1045     /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1046     /// ```
1047     #[inline]
1048     fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1049         byteset::rfind_not(self.as_bytes(), byteset.as_ref())
1050     }
1051
1052     /// Returns an iterator over the fields in a byte string, separated by
1053     /// contiguous whitespace.
1054     ///
1055     /// # Example
1056     ///
1057     /// Basic usage:
1058     ///
1059     /// ```
1060     /// use bstr::{B, ByteSlice};
1061     ///
1062     /// let s = B("  foo\tbar\t\u{2003}\nquux   \n");
1063     /// let fields: Vec<&[u8]> = s.fields().collect();
1064     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1065     /// ```
1066     ///
1067     /// A byte string consisting of just whitespace yields no elements:
1068     ///
1069     /// ```
1070     /// use bstr::{B, ByteSlice};
1071     ///
1072     /// assert_eq!(0, B("  \n\t\u{2003}\n  \t").fields().count());
1073     /// ```
1074     #[inline]
1075     fn fields(&self) -> Fields {
1076         Fields::new(self.as_bytes())
1077     }
1078
1079     /// Returns an iterator over the fields in a byte string, separated by
1080     /// contiguous codepoints satisfying the given predicate.
1081     ///
1082     /// If this byte string is not valid UTF-8, then the given closure will
1083     /// be called with a Unicode replacement codepoint when invalid UTF-8
1084     /// bytes are seen.
1085     ///
1086     /// # Example
1087     ///
1088     /// Basic usage:
1089     ///
1090     /// ```
1091     /// use bstr::{B, ByteSlice};
1092     ///
1093     /// let s = b"123foo999999bar1quux123456";
1094     /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
1095     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1096     /// ```
1097     ///
1098     /// A byte string consisting of all codepoints satisfying the predicate
1099     /// yields no elements:
1100     ///
1101     /// ```
1102     /// use bstr::ByteSlice;
1103     ///
1104     /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
1105     /// ```
1106     #[inline]
1107     fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<F> {
1108         FieldsWith::new(self.as_bytes(), f)
1109     }
1110
1111     /// Returns an iterator over substrings of this byte string, separated
1112     /// by the given byte string. Each element yielded is guaranteed not to
1113     /// include the splitter substring.
1114     ///
1115     /// The splitter may be any type that can be cheaply converted into a
1116     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1117     ///
1118     /// # Examples
1119     ///
1120     /// Basic usage:
1121     ///
1122     /// ```
1123     /// use bstr::{B, ByteSlice};
1124     ///
1125     /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1126     /// assert_eq!(x, vec![
1127     ///     B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1128     /// ]);
1129     ///
1130     /// let x: Vec<&[u8]> = b"".split_str("X").collect();
1131     /// assert_eq!(x, vec![b""]);
1132     ///
1133     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1134     /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1135     ///
1136     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1137     /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1138     /// ```
1139     ///
1140     /// If a string contains multiple contiguous separators, you will end up
1141     /// with empty strings yielded by the iterator:
1142     ///
1143     /// ```
1144     /// use bstr::{B, ByteSlice};
1145     ///
1146     /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
1147     /// assert_eq!(x, vec![
1148     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1149     /// ]);
1150     ///
1151     /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1152     /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1153     /// ```
1154     ///
1155     /// Separators at the start or end of a string are neighbored by empty
1156     /// strings.
1157     ///
1158     /// ```
1159     /// use bstr::{B, ByteSlice};
1160     ///
1161     /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1162     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1163     /// ```
1164     ///
1165     /// When the empty string is used as a separator, it splits every **byte**
1166     /// in the byte string, along with the beginning and end of the byte
1167     /// string.
1168     ///
1169     /// ```
1170     /// use bstr::{B, ByteSlice};
1171     ///
1172     /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1173     /// assert_eq!(x, vec![
1174     ///     B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1175     /// ]);
1176     ///
1177     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1178     /// // may not be valid UTF-8!
1179     /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1180     /// assert_eq!(x, vec![
1181     ///     B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
1182     /// ]);
1183     /// ```
1184     ///
1185     /// Contiguous separators, especially whitespace, can lead to possibly
1186     /// surprising behavior. For example, this code is correct:
1187     ///
1188     /// ```
1189     /// use bstr::{B, ByteSlice};
1190     ///
1191     /// let x: Vec<&[u8]> = b"    a  b c".split_str(" ").collect();
1192     /// assert_eq!(x, vec![
1193     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1194     /// ]);
1195     /// ```
1196     ///
1197     /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
1198     /// [`fields`](#method.fields) instead.
1199     #[inline]
1200     fn split_str<'a, B: ?Sized + AsRef<[u8]>>(
1201         &'a self,
1202         splitter: &'a B,
1203     ) -> Split<'a> {
1204         Split::new(self.as_bytes(), splitter.as_ref())
1205     }
1206
1207     /// Returns an iterator over substrings of this byte string, separated by
1208     /// the given byte string, in reverse. Each element yielded is guaranteed
1209     /// not to include the splitter substring.
1210     ///
1211     /// The splitter may be any type that can be cheaply converted into a
1212     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1213     ///
1214     /// # Examples
1215     ///
1216     /// Basic usage:
1217     ///
1218     /// ```
1219     /// use bstr::{B, ByteSlice};
1220     ///
1221     /// let x: Vec<&[u8]> =
1222     ///     b"Mary had a little lamb".rsplit_str(" ").collect();
1223     /// assert_eq!(x, vec![
1224     ///     B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1225     /// ]);
1226     ///
1227     /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1228     /// assert_eq!(x, vec![b""]);
1229     ///
1230     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1231     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1232     ///
1233     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1234     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1235     /// ```
1236     ///
1237     /// If a string contains multiple contiguous separators, you will end up
1238     /// with empty strings yielded by the iterator:
1239     ///
1240     /// ```
1241     /// use bstr::{B, ByteSlice};
1242     ///
1243     /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
1244     /// assert_eq!(x, vec![
1245     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1246     /// ]);
1247     ///
1248     /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1249     /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1250     /// ```
1251     ///
1252     /// Separators at the start or end of a string are neighbored by empty
1253     /// strings.
1254     ///
1255     /// ```
1256     /// use bstr::{B, ByteSlice};
1257     ///
1258     /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1259     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1260     /// ```
1261     ///
1262     /// When the empty string is used as a separator, it splits every **byte**
1263     /// in the byte string, along with the beginning and end of the byte
1264     /// string.
1265     ///
1266     /// ```
1267     /// use bstr::{B, ByteSlice};
1268     ///
1269     /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1270     /// assert_eq!(x, vec![
1271     ///     B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1272     /// ]);
1273     ///
1274     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1275     /// // may not be valid UTF-8!
1276     /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1277     /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
1278     /// ```
1279     ///
1280     /// Contiguous separators, especially whitespace, can lead to possibly
1281     /// surprising behavior. For example, this code is correct:
1282     ///
1283     /// ```
1284     /// use bstr::{B, ByteSlice};
1285     ///
1286     /// let x: Vec<&[u8]> = b"    a  b c".rsplit_str(" ").collect();
1287     /// assert_eq!(x, vec![
1288     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1289     /// ]);
1290     /// ```
1291     ///
1292     /// It does *not* give you `["a", "b", "c"]`.
1293     #[inline]
1294     fn rsplit_str<'a, B: ?Sized + AsRef<[u8]>>(
1295         &'a self,
1296         splitter: &'a B,
1297     ) -> SplitReverse<'a> {
1298         SplitReverse::new(self.as_bytes(), splitter.as_ref())
1299     }
1300
1301     /// Returns an iterator of at most `limit` substrings of this byte string,
1302     /// separated by the given byte string. If `limit` substrings are yielded,
1303     /// then the last substring will contain the remainder of this byte string.
1304     ///
1305     /// The needle may be any type that can be cheaply converted into a
1306     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1307     ///
1308     /// # Examples
1309     ///
1310     /// Basic usage:
1311     ///
1312     /// ```
1313     /// use bstr::{B, ByteSlice};
1314     ///
1315     /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
1316     /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1317     ///
1318     /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
1319     /// assert_eq!(x, vec![b""]);
1320     ///
1321     /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
1322     /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1323     ///
1324     /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
1325     /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1326     ///
1327     /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
1328     /// assert_eq!(x, vec![B("abcXdef")]);
1329     ///
1330     /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
1331     /// assert_eq!(x, vec![B("abcdef")]);
1332     ///
1333     /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
1334     /// assert!(x.is_empty());
1335     /// ```
1336     #[inline]
1337     fn splitn_str<'a, B: ?Sized + AsRef<[u8]>>(
1338         &'a self,
1339         limit: usize,
1340         splitter: &'a B,
1341     ) -> SplitN<'a> {
1342         SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
1343     }
1344
1345     /// Returns an iterator of at most `limit` substrings of this byte string,
1346     /// separated by the given byte string, in reverse. If `limit` substrings
1347     /// are yielded, then the last substring will contain the remainder of this
1348     /// byte string.
1349     ///
1350     /// The needle may be any type that can be cheaply converted into a
1351     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1352     ///
1353     /// # Examples
1354     ///
1355     /// Basic usage:
1356     ///
1357     /// ```
1358     /// use bstr::{B, ByteSlice};
1359     ///
1360     /// let x: Vec<_> =
1361     ///     b"Mary had a little lamb".rsplitn_str(3, " ").collect();
1362     /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1363     ///
1364     /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
1365     /// assert_eq!(x, vec![b""]);
1366     ///
1367     /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
1368     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1369     ///
1370     /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
1371     /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1372     ///
1373     /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
1374     /// assert_eq!(x, vec![B("abcXdef")]);
1375     ///
1376     /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
1377     /// assert_eq!(x, vec![B("abcdef")]);
1378     ///
1379     /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
1380     /// assert!(x.is_empty());
1381     /// ```
1382     #[inline]
1383     fn rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>(
1384         &'a self,
1385         limit: usize,
1386         splitter: &'a B,
1387     ) -> SplitNReverse<'a> {
1388         SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
1389     }
1390
1391     /// Replace all matches of the given needle with the given replacement, and
1392     /// the result as a new `Vec<u8>`.
1393     ///
1394     /// This routine is useful as a convenience. If you need to reuse an
1395     /// allocation, use [`replace_into`](#method.replace_into) instead.
1396     ///
1397     /// # Examples
1398     ///
1399     /// Basic usage:
1400     ///
1401     /// ```
1402     /// use bstr::ByteSlice;
1403     ///
1404     /// let s = b"this is old".replace("old", "new");
1405     /// assert_eq!(s, "this is new".as_bytes());
1406     /// ```
1407     ///
1408     /// When the pattern doesn't match:
1409     ///
1410     /// ```
1411     /// use bstr::ByteSlice;
1412     ///
1413     /// let s = b"this is old".replace("nada nada", "limonada");
1414     /// assert_eq!(s, "this is old".as_bytes());
1415     /// ```
1416     ///
1417     /// When the needle is an empty string:
1418     ///
1419     /// ```
1420     /// use bstr::ByteSlice;
1421     ///
1422     /// let s = b"foo".replace("", "Z");
1423     /// assert_eq!(s, "ZfZoZoZ".as_bytes());
1424     /// ```
1425     #[cfg(feature = "std")]
1426     #[inline]
1427     fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1428         &self,
1429         needle: N,
1430         replacement: R,
1431     ) -> Vec<u8> {
1432         let mut dest = Vec::with_capacity(self.as_bytes().len());
1433         self.replace_into(needle, replacement, &mut dest);
1434         dest
1435     }
1436
1437     /// Replace up to `limit` matches of the given needle with the given
1438     /// replacement, and the result as a new `Vec<u8>`.
1439     ///
1440     /// This routine is useful as a convenience. If you need to reuse an
1441     /// allocation, use [`replacen_into`](#method.replacen_into) instead.
1442     ///
1443     /// # Examples
1444     ///
1445     /// Basic usage:
1446     ///
1447     /// ```
1448     /// use bstr::ByteSlice;
1449     ///
1450     /// let s = b"foofoo".replacen("o", "z", 2);
1451     /// assert_eq!(s, "fzzfoo".as_bytes());
1452     /// ```
1453     ///
1454     /// When the pattern doesn't match:
1455     ///
1456     /// ```
1457     /// use bstr::ByteSlice;
1458     ///
1459     /// let s = b"foofoo".replacen("a", "z", 2);
1460     /// assert_eq!(s, "foofoo".as_bytes());
1461     /// ```
1462     ///
1463     /// When the needle is an empty string:
1464     ///
1465     /// ```
1466     /// use bstr::ByteSlice;
1467     ///
1468     /// let s = b"foo".replacen("", "Z", 2);
1469     /// assert_eq!(s, "ZfZoo".as_bytes());
1470     /// ```
1471     #[cfg(feature = "std")]
1472     #[inline]
1473     fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1474         &self,
1475         needle: N,
1476         replacement: R,
1477         limit: usize,
1478     ) -> Vec<u8> {
1479         let mut dest = Vec::with_capacity(self.as_bytes().len());
1480         self.replacen_into(needle, replacement, limit, &mut dest);
1481         dest
1482     }
1483
1484     /// Replace all matches of the given needle with the given replacement,
1485     /// and write the result into the provided `Vec<u8>`.
1486     ///
1487     /// This does **not** clear `dest` before writing to it.
1488     ///
1489     /// This routine is useful for reusing allocation. For a more convenient
1490     /// API, use [`replace`](#method.replace) instead.
1491     ///
1492     /// # Examples
1493     ///
1494     /// Basic usage:
1495     ///
1496     /// ```
1497     /// use bstr::ByteSlice;
1498     ///
1499     /// let s = b"this is old";
1500     ///
1501     /// let mut dest = vec![];
1502     /// s.replace_into("old", "new", &mut dest);
1503     /// assert_eq!(dest, "this is new".as_bytes());
1504     /// ```
1505     ///
1506     /// When the pattern doesn't match:
1507     ///
1508     /// ```
1509     /// use bstr::ByteSlice;
1510     ///
1511     /// let s = b"this is old";
1512     ///
1513     /// let mut dest = vec![];
1514     /// s.replace_into("nada nada", "limonada", &mut dest);
1515     /// assert_eq!(dest, "this is old".as_bytes());
1516     /// ```
1517     ///
1518     /// When the needle is an empty string:
1519     ///
1520     /// ```
1521     /// use bstr::ByteSlice;
1522     ///
1523     /// let s = b"foo";
1524     ///
1525     /// let mut dest = vec![];
1526     /// s.replace_into("", "Z", &mut dest);
1527     /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1528     /// ```
1529     #[cfg(feature = "std")]
1530     #[inline]
1531     fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1532         &self,
1533         needle: N,
1534         replacement: R,
1535         dest: &mut Vec<u8>,
1536     ) {
1537         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1538
1539         let mut last = 0;
1540         for start in self.find_iter(needle) {
1541             dest.push_str(&self.as_bytes()[last..start]);
1542             dest.push_str(replacement);
1543             last = start + needle.len();
1544         }
1545         dest.push_str(&self.as_bytes()[last..]);
1546     }
1547
1548     /// Replace up to `limit` matches of the given needle with the given
1549     /// replacement, and write the result into the provided `Vec<u8>`.
1550     ///
1551     /// This does **not** clear `dest` before writing to it.
1552     ///
1553     /// This routine is useful for reusing allocation. For a more convenient
1554     /// API, use [`replacen`](#method.replacen) instead.
1555     ///
1556     /// # Examples
1557     ///
1558     /// Basic usage:
1559     ///
1560     /// ```
1561     /// use bstr::ByteSlice;
1562     ///
1563     /// let s = b"foofoo";
1564     ///
1565     /// let mut dest = vec![];
1566     /// s.replacen_into("o", "z", 2, &mut dest);
1567     /// assert_eq!(dest, "fzzfoo".as_bytes());
1568     /// ```
1569     ///
1570     /// When the pattern doesn't match:
1571     ///
1572     /// ```
1573     /// use bstr::ByteSlice;
1574     ///
1575     /// let s = b"foofoo";
1576     ///
1577     /// let mut dest = vec![];
1578     /// s.replacen_into("a", "z", 2, &mut dest);
1579     /// assert_eq!(dest, "foofoo".as_bytes());
1580     /// ```
1581     ///
1582     /// When the needle is an empty string:
1583     ///
1584     /// ```
1585     /// use bstr::ByteSlice;
1586     ///
1587     /// let s = b"foo";
1588     ///
1589     /// let mut dest = vec![];
1590     /// s.replacen_into("", "Z", 2, &mut dest);
1591     /// assert_eq!(dest, "ZfZoo".as_bytes());
1592     /// ```
1593     #[cfg(feature = "std")]
1594     #[inline]
1595     fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1596         &self,
1597         needle: N,
1598         replacement: R,
1599         limit: usize,
1600         dest: &mut Vec<u8>,
1601     ) {
1602         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1603
1604         let mut last = 0;
1605         for start in self.find_iter(needle).take(limit) {
1606             dest.push_str(&self.as_bytes()[last..start]);
1607             dest.push_str(replacement);
1608             last = start + needle.len();
1609         }
1610         dest.push_str(&self.as_bytes()[last..]);
1611     }
1612
1613     /// Returns an iterator over the bytes in this byte string.
1614     ///
1615     /// # Examples
1616     ///
1617     /// Basic usage:
1618     ///
1619     /// ```
1620     /// use bstr::ByteSlice;
1621     ///
1622     /// let bs = b"foobar";
1623     /// let bytes: Vec<u8> = bs.bytes().collect();
1624     /// assert_eq!(bytes, bs);
1625     /// ```
1626     #[inline]
1627     fn bytes(&self) -> Bytes {
1628         Bytes { it: self.as_bytes().iter() }
1629     }
1630
1631     /// Returns an iterator over the Unicode scalar values in this byte string.
1632     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1633     /// is yielded instead.
1634     ///
1635     /// # Examples
1636     ///
1637     /// Basic usage:
1638     ///
1639     /// ```
1640     /// use bstr::ByteSlice;
1641     ///
1642     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1643     /// let chars: Vec<char> = bs.chars().collect();
1644     /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
1645     /// ```
1646     ///
1647     /// Codepoints can also be iterated over in reverse:
1648     ///
1649     /// ```
1650     /// use bstr::ByteSlice;
1651     ///
1652     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1653     /// let chars: Vec<char> = bs.chars().rev().collect();
1654     /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
1655     /// ```
1656     #[inline]
1657     fn chars(&self) -> Chars {
1658         Chars::new(self.as_bytes())
1659     }
1660
1661     /// Returns an iterator over the Unicode scalar values in this byte string
1662     /// along with their starting and ending byte index positions. If invalid
1663     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1664     /// instead.
1665     ///
1666     /// Note that this is slightly different from the `CharIndices` iterator
1667     /// provided by the standard library. Aside from working on possibly
1668     /// invalid UTF-8, this iterator provides both the corresponding starting
1669     /// and ending byte indices of each codepoint yielded. The ending position
1670     /// is necessary to slice the original byte string when invalid UTF-8 bytes
1671     /// are converted into a Unicode replacement codepoint, since a single
1672     /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1673     /// (inclusive).
1674     ///
1675     /// # Examples
1676     ///
1677     /// Basic usage:
1678     ///
1679     /// ```
1680     /// use bstr::ByteSlice;
1681     ///
1682     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1683     /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1684     /// assert_eq!(chars, vec![
1685     ///     (0, 3, '☃'),
1686     ///     (3, 4, '\u{FFFD}'),
1687     ///     (4, 8, '𝞃'),
1688     ///     (8, 10, '\u{FFFD}'),
1689     ///     (10, 11, 'a'),
1690     /// ]);
1691     /// ```
1692     ///
1693     /// Codepoints can also be iterated over in reverse:
1694     ///
1695     /// ```
1696     /// use bstr::ByteSlice;
1697     ///
1698     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1699     /// let chars: Vec<(usize, usize, char)> = bs
1700     ///     .char_indices()
1701     ///     .rev()
1702     ///     .collect();
1703     /// assert_eq!(chars, vec![
1704     ///     (10, 11, 'a'),
1705     ///     (8, 10, '\u{FFFD}'),
1706     ///     (4, 8, '𝞃'),
1707     ///     (3, 4, '\u{FFFD}'),
1708     ///     (0, 3, '☃'),
1709     /// ]);
1710     /// ```
1711     #[inline]
1712     fn char_indices(&self) -> CharIndices {
1713         CharIndices::new(self.as_bytes())
1714     }
1715
1716     /// Iterate over chunks of valid UTF-8.
1717     ///
1718     /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1719     /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1720     /// which are determined via the "substitution of maximal subparts"
1721     /// strategy described in the docs for the
1722     /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1723     /// method.
1724     ///
1725     /// # Examples
1726     ///
1727     /// This example shows how the `std::fmt::Display` implementation is
1728     /// written for the `BStr` type:
1729     ///
1730     /// ```
1731     /// use bstr::{ByteSlice, Utf8Chunk};
1732     ///
1733     /// let bytes = b"foo\xFD\xFEbar\xFF";
1734     ///
1735     /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1736     /// for chunk in bytes.utf8_chunks() {
1737     ///     if !chunk.valid().is_empty() {
1738     ///         valid_chunks.push(chunk.valid());
1739     ///     }
1740     ///     if !chunk.invalid().is_empty() {
1741     ///         invalid_chunks.push(chunk.invalid());
1742     ///     }
1743     /// }
1744     ///
1745     /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1746     /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
1747     /// ```
1748     #[inline]
1749     fn utf8_chunks(&self) -> Utf8Chunks {
1750         Utf8Chunks { bytes: self.as_bytes() }
1751     }
1752
1753     /// Returns an iterator over the grapheme clusters in this byte string.
1754     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1755     /// is yielded instead.
1756     ///
1757     /// # Examples
1758     ///
1759     /// This example shows how multiple codepoints can combine to form a
1760     /// single grapheme cluster:
1761     ///
1762     /// ```
1763     /// use bstr::ByteSlice;
1764     ///
1765     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1766     /// let graphemes: Vec<&str> = bs.graphemes().collect();
1767     /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
1768     /// ```
1769     ///
1770     /// This shows that graphemes can be iterated over in reverse:
1771     ///
1772     /// ```
1773     /// use bstr::ByteSlice;
1774     ///
1775     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1776     /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1777     /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
1778     /// ```
1779     #[cfg(feature = "unicode")]
1780     #[inline]
1781     fn graphemes(&self) -> Graphemes {
1782         Graphemes::new(self.as_bytes())
1783     }
1784
1785     /// Returns an iterator over the grapheme clusters in this byte string
1786     /// along with their starting and ending byte index positions. If invalid
1787     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1788     /// instead.
1789     ///
1790     /// # Examples
1791     ///
1792     /// This example shows how to get the byte offsets of each individual
1793     /// grapheme cluster:
1794     ///
1795     /// ```
1796     /// use bstr::ByteSlice;
1797     ///
1798     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1799     /// let graphemes: Vec<(usize, usize, &str)> =
1800     ///     bs.grapheme_indices().collect();
1801     /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes);
1802     /// ```
1803     ///
1804     /// This example shows what happens when invalid UTF-8 is enountered. Note
1805     /// that the offsets are valid indices into the original string, and do
1806     /// not necessarily correspond to the length of the `&str` returned!
1807     ///
1808     /// ```
1809     /// use bstr::{ByteSlice, ByteVec};
1810     ///
1811     /// let mut bytes = vec![];
1812     /// bytes.push_str("a\u{0300}\u{0316}");
1813     /// bytes.push(b'\xFF');
1814     /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
1815     ///
1816     /// let graphemes: Vec<(usize, usize, &str)> =
1817     ///     bytes.grapheme_indices().collect();
1818     /// assert_eq!(
1819     ///     graphemes,
1820     ///     vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
1821     /// );
1822     /// ```
1823     #[cfg(feature = "unicode")]
1824     #[inline]
1825     fn grapheme_indices(&self) -> GraphemeIndices {
1826         GraphemeIndices::new(self.as_bytes())
1827     }
1828
1829     /// Returns an iterator over the words in this byte string. If invalid
1830     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1831     /// instead.
1832     ///
1833     /// This is similar to
1834     /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1835     /// except it only returns elements that contain a "word" character. A word
1836     /// character is defined by UTS #18 (Annex C) to be the combination of the
1837     /// `Alphabetic` and `Join_Control` properties, along with the
1838     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1839     /// categories.
1840     ///
1841     /// Since words are made up of one or more codepoints, this iterator
1842     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1843     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1844     ///
1845     /// # Examples
1846     ///
1847     /// Basic usage:
1848     ///
1849     /// ```
1850     /// use bstr::ByteSlice;
1851     ///
1852     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1853     /// let words: Vec<&str> = bs.words().collect();
1854     /// assert_eq!(words, vec![
1855     ///     "The", "quick", "brown", "fox", "can't",
1856     ///     "jump", "32.3", "feet", "right",
1857     /// ]);
1858     /// ```
1859     #[cfg(feature = "unicode")]
1860     #[inline]
1861     fn words(&self) -> Words {
1862         Words::new(self.as_bytes())
1863     }
1864
1865     /// Returns an iterator over the words in this byte string along with
1866     /// their starting and ending byte index positions.
1867     ///
1868     /// This is similar to
1869     /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1870     /// except it only returns elements that contain a "word" character. A word
1871     /// character is defined by UTS #18 (Annex C) to be the combination of the
1872     /// `Alphabetic` and `Join_Control` properties, along with the
1873     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1874     /// categories.
1875     ///
1876     /// Since words are made up of one or more codepoints, this iterator
1877     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1878     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1879     ///
1880     /// # Examples
1881     ///
1882     /// This example shows how to get the byte offsets of each individual
1883     /// word:
1884     ///
1885     /// ```
1886     /// use bstr::ByteSlice;
1887     ///
1888     /// let bs = b"can't jump 32.3 feet";
1889     /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
1890     /// assert_eq!(words, vec![
1891     ///     (0, 5, "can't"),
1892     ///     (6, 10, "jump"),
1893     ///     (11, 15, "32.3"),
1894     ///     (16, 20, "feet"),
1895     /// ]);
1896     /// ```
1897     #[cfg(feature = "unicode")]
1898     #[inline]
1899     fn word_indices(&self) -> WordIndices {
1900         WordIndices::new(self.as_bytes())
1901     }
1902
1903     /// Returns an iterator over the words in this byte string, along with
1904     /// all breaks between the words. Concatenating all elements yielded by
1905     /// the iterator results in the original string (modulo Unicode replacement
1906     /// codepoint substitutions if invalid UTF-8 is encountered).
1907     ///
1908     /// Since words are made up of one or more codepoints, this iterator
1909     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1910     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1911     ///
1912     /// # Examples
1913     ///
1914     /// Basic usage:
1915     ///
1916     /// ```
1917     /// use bstr::ByteSlice;
1918     ///
1919     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1920     /// let words: Vec<&str> = bs.words_with_breaks().collect();
1921     /// assert_eq!(words, vec![
1922     ///     "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
1923     ///     " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
1924     ///     ",", " ", "right", "?",
1925     /// ]);
1926     /// ```
1927     #[cfg(feature = "unicode")]
1928     #[inline]
1929     fn words_with_breaks(&self) -> WordsWithBreaks {
1930         WordsWithBreaks::new(self.as_bytes())
1931     }
1932
1933     /// Returns an iterator over the words and their byte offsets in this
1934     /// byte string, along with all breaks between the words. Concatenating
1935     /// all elements yielded by the iterator results in the original string
1936     /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
1937     /// encountered).
1938     ///
1939     /// Since words are made up of one or more codepoints, this iterator
1940     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1941     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1942     ///
1943     /// # Examples
1944     ///
1945     /// This example shows how to get the byte offsets of each individual
1946     /// word:
1947     ///
1948     /// ```
1949     /// use bstr::ByteSlice;
1950     ///
1951     /// let bs = b"can't jump 32.3 feet";
1952     /// let words: Vec<(usize, usize, &str)> =
1953     ///     bs.words_with_break_indices().collect();
1954     /// assert_eq!(words, vec![
1955     ///     (0, 5, "can't"),
1956     ///     (5, 6, " "),
1957     ///     (6, 10, "jump"),
1958     ///     (10, 11, " "),
1959     ///     (11, 15, "32.3"),
1960     ///     (15, 16, " "),
1961     ///     (16, 20, "feet"),
1962     /// ]);
1963     /// ```
1964     #[cfg(feature = "unicode")]
1965     #[inline]
1966     fn words_with_break_indices(&self) -> WordsWithBreakIndices {
1967         WordsWithBreakIndices::new(self.as_bytes())
1968     }
1969
1970     /// Returns an iterator over the sentences in this byte string.
1971     ///
1972     /// Typically, a sentence will include its trailing punctuation and
1973     /// whitespace. Concatenating all elements yielded by the iterator
1974     /// results in the original string (modulo Unicode replacement codepoint
1975     /// substitutions if invalid UTF-8 is encountered).
1976     ///
1977     /// Since sentences are made up of one or more codepoints, this iterator
1978     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1979     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1980     ///
1981     /// # Examples
1982     ///
1983     /// Basic usage:
1984     ///
1985     /// ```
1986     /// use bstr::ByteSlice;
1987     ///
1988     /// let bs = b"I want this. Not that. Right now.";
1989     /// let sentences: Vec<&str> = bs.sentences().collect();
1990     /// assert_eq!(sentences, vec![
1991     ///     "I want this. ",
1992     ///     "Not that. ",
1993     ///     "Right now.",
1994     /// ]);
1995     /// ```
1996     #[cfg(feature = "unicode")]
1997     #[inline]
1998     fn sentences(&self) -> Sentences {
1999         Sentences::new(self.as_bytes())
2000     }
2001
2002     /// Returns an iterator over the sentences in this byte string along with
2003     /// their starting and ending byte index positions.
2004     ///
2005     /// Typically, a sentence will include its trailing punctuation and
2006     /// whitespace. Concatenating all elements yielded by the iterator
2007     /// results in the original string (modulo Unicode replacement codepoint
2008     /// substitutions if invalid UTF-8 is encountered).
2009     ///
2010     /// Since sentences are made up of one or more codepoints, this iterator
2011     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2012     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2013     ///
2014     /// # Examples
2015     ///
2016     /// Basic usage:
2017     ///
2018     /// ```
2019     /// use bstr::ByteSlice;
2020     ///
2021     /// let bs = b"I want this. Not that. Right now.";
2022     /// let sentences: Vec<(usize, usize, &str)> =
2023     ///     bs.sentence_indices().collect();
2024     /// assert_eq!(sentences, vec![
2025     ///     (0, 13, "I want this. "),
2026     ///     (13, 23, "Not that. "),
2027     ///     (23, 33, "Right now."),
2028     /// ]);
2029     /// ```
2030     #[cfg(feature = "unicode")]
2031     #[inline]
2032     fn sentence_indices(&self) -> SentenceIndices {
2033         SentenceIndices::new(self.as_bytes())
2034     }
2035
2036     /// An iterator over all lines in a byte string, without their
2037     /// terminators.
2038     ///
2039     /// For this iterator, the only line terminators recognized are `\r\n` and
2040     /// `\n`.
2041     ///
2042     /// # Examples
2043     ///
2044     /// Basic usage:
2045     ///
2046     /// ```
2047     /// use bstr::{B, ByteSlice};
2048     ///
2049     /// let s = b"\
2050     /// foo
2051     ///
2052     /// bar\r
2053     /// baz
2054     ///
2055     ///
2056     /// quux";
2057     /// let lines: Vec<&[u8]> = s.lines().collect();
2058     /// assert_eq!(lines, vec![
2059     ///     B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2060     /// ]);
2061     /// ```
2062     #[inline]
2063     fn lines(&self) -> Lines {
2064         Lines::new(self.as_bytes())
2065     }
2066
2067     /// An iterator over all lines in a byte string, including their
2068     /// terminators.
2069     ///
2070     /// For this iterator, the only line terminator recognized is `\n`. (Since
2071     /// line terminators are included, this also handles `\r\n` line endings.)
2072     ///
2073     /// Line terminators are only included if they are present in the original
2074     /// byte string. For example, the last line in a byte string may not end
2075     /// with a line terminator.
2076     ///
2077     /// Concatenating all elements yielded by this iterator is guaranteed to
2078     /// yield the original byte string.
2079     ///
2080     /// # Examples
2081     ///
2082     /// Basic usage:
2083     ///
2084     /// ```
2085     /// use bstr::{B, ByteSlice};
2086     ///
2087     /// let s = b"\
2088     /// foo
2089     ///
2090     /// bar\r
2091     /// baz
2092     ///
2093     ///
2094     /// quux";
2095     /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2096     /// assert_eq!(lines, vec![
2097     ///     B("foo\n"),
2098     ///     B("\n"),
2099     ///     B("bar\r\n"),
2100     ///     B("baz\n"),
2101     ///     B("\n"),
2102     ///     B("\n"),
2103     ///     B("quux"),
2104     /// ]);
2105     /// ```
2106     #[inline]
2107     fn lines_with_terminator(&self) -> LinesWithTerminator {
2108         LinesWithTerminator::new(self.as_bytes())
2109     }
2110
2111     /// Return a byte string slice with leading and trailing whitespace
2112     /// removed.
2113     ///
2114     /// Whitespace is defined according to the terms of the `White_Space`
2115     /// Unicode property.
2116     ///
2117     /// # Examples
2118     ///
2119     /// Basic usage:
2120     ///
2121     /// ```
2122     /// use bstr::{B, ByteSlice};
2123     ///
2124     /// let s = B(" foo\tbar\t\u{2003}\n");
2125     /// assert_eq!(s.trim(), B("foo\tbar"));
2126     /// ```
2127     #[cfg(feature = "unicode")]
2128     #[inline]
2129     fn trim(&self) -> &[u8] {
2130         self.trim_start().trim_end()
2131     }
2132
2133     /// Return a byte string slice with leading whitespace removed.
2134     ///
2135     /// Whitespace is defined according to the terms of the `White_Space`
2136     /// Unicode property.
2137     ///
2138     /// # Examples
2139     ///
2140     /// Basic usage:
2141     ///
2142     /// ```
2143     /// use bstr::{B, ByteSlice};
2144     ///
2145     /// let s = B(" foo\tbar\t\u{2003}\n");
2146     /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
2147     /// ```
2148     #[cfg(feature = "unicode")]
2149     #[inline]
2150     fn trim_start(&self) -> &[u8] {
2151         let start = whitespace_len_fwd(self.as_bytes());
2152         &self.as_bytes()[start..]
2153     }
2154
2155     /// Return a byte string slice with trailing whitespace removed.
2156     ///
2157     /// Whitespace is defined according to the terms of the `White_Space`
2158     /// Unicode property.
2159     ///
2160     /// # Examples
2161     ///
2162     /// Basic usage:
2163     ///
2164     /// ```
2165     /// use bstr::{B, ByteSlice};
2166     ///
2167     /// let s = B(" foo\tbar\t\u{2003}\n");
2168     /// assert_eq!(s.trim_end(), B(" foo\tbar"));
2169     /// ```
2170     #[cfg(feature = "unicode")]
2171     #[inline]
2172     fn trim_end(&self) -> &[u8] {
2173         let end = whitespace_len_rev(self.as_bytes());
2174         &self.as_bytes()[..end]
2175     }
2176
2177     /// Return a byte string slice with leading and trailing characters
2178     /// satisfying the given predicate removed.
2179     ///
2180     /// # Examples
2181     ///
2182     /// Basic usage:
2183     ///
2184     /// ```
2185     /// use bstr::{B, ByteSlice};
2186     ///
2187     /// let s = b"123foo5bar789";
2188     /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
2189     /// ```
2190     #[inline]
2191     fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2192         self.trim_start_with(&mut trim).trim_end_with(&mut trim)
2193     }
2194
2195     /// Return a byte string slice with leading characters satisfying the given
2196     /// predicate removed.
2197     ///
2198     /// # Examples
2199     ///
2200     /// Basic usage:
2201     ///
2202     /// ```
2203     /// use bstr::{B, ByteSlice};
2204     ///
2205     /// let s = b"123foo5bar789";
2206     /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
2207     /// ```
2208     #[inline]
2209     fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2210         for (s, _, ch) in self.char_indices() {
2211             if !trim(ch) {
2212                 return &self.as_bytes()[s..];
2213             }
2214         }
2215         b""
2216     }
2217
2218     /// Return a byte string slice with trailing characters satisfying the
2219     /// given predicate removed.
2220     ///
2221     /// # Examples
2222     ///
2223     /// Basic usage:
2224     ///
2225     /// ```
2226     /// use bstr::{B, ByteSlice};
2227     ///
2228     /// let s = b"123foo5bar";
2229     /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
2230     /// ```
2231     #[inline]
2232     fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2233         for (_, e, ch) in self.char_indices().rev() {
2234             if !trim(ch) {
2235                 return &self.as_bytes()[..e];
2236             }
2237         }
2238         b""
2239     }
2240
2241     /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2242     /// byte string.
2243     ///
2244     /// In this case, lowercase is defined according to the `Lowercase` Unicode
2245     /// property.
2246     ///
2247     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2248     /// then it is written to the given buffer unchanged.
2249     ///
2250     /// Note that some characters in this byte string may expand into multiple
2251     /// characters when changing the case, so the number of bytes written to
2252     /// the given byte string may not be equivalent to the number of bytes in
2253     /// this byte string.
2254     ///
2255     /// If you'd like to reuse an allocation for performance reasons, then use
2256     /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2257     ///
2258     /// # Examples
2259     ///
2260     /// Basic usage:
2261     ///
2262     /// ```
2263     /// use bstr::{B, ByteSlice};
2264     ///
2265     /// let s = B("HELLO Β");
2266     /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2267     /// ```
2268     ///
2269     /// Scripts without case are not changed:
2270     ///
2271     /// ```
2272     /// use bstr::{B, ByteSlice};
2273     ///
2274     /// let s = B("农历新年");
2275     /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2276     /// ```
2277     ///
2278     /// Invalid UTF-8 remains as is:
2279     ///
2280     /// ```
2281     /// use bstr::{B, ByteSlice};
2282     ///
2283     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2284     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
2285     /// ```
2286     #[cfg(all(feature = "std", feature = "unicode"))]
2287     #[inline]
2288     fn to_lowercase(&self) -> Vec<u8> {
2289         let mut buf = vec![];
2290         self.to_lowercase_into(&mut buf);
2291         buf
2292     }
2293
2294     /// Writes the lowercase equivalent of this byte string into the given
2295     /// buffer. The buffer is not cleared before written to.
2296     ///
2297     /// In this case, lowercase is defined according to the `Lowercase`
2298     /// Unicode property.
2299     ///
2300     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2301     /// then it is written to the given buffer unchanged.
2302     ///
2303     /// Note that some characters in this byte string may expand into multiple
2304     /// characters when changing the case, so the number of bytes written to
2305     /// the given byte string may not be equivalent to the number of bytes in
2306     /// this byte string.
2307     ///
2308     /// If you don't need to amortize allocation and instead prefer
2309     /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2310     ///
2311     /// # Examples
2312     ///
2313     /// Basic usage:
2314     ///
2315     /// ```
2316     /// use bstr::{B, ByteSlice};
2317     ///
2318     /// let s = B("HELLO Β");
2319     ///
2320     /// let mut buf = vec![];
2321     /// s.to_lowercase_into(&mut buf);
2322     /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2323     /// ```
2324     ///
2325     /// Scripts without case are not changed:
2326     ///
2327     /// ```
2328     /// use bstr::{B, ByteSlice};
2329     ///
2330     /// let s = B("农历新年");
2331     ///
2332     /// let mut buf = vec![];
2333     /// s.to_lowercase_into(&mut buf);
2334     /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2335     /// ```
2336     ///
2337     /// Invalid UTF-8 remains as is:
2338     ///
2339     /// ```
2340     /// use bstr::{B, ByteSlice};
2341     ///
2342     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2343     ///
2344     /// let mut buf = vec![];
2345     /// s.to_lowercase_into(&mut buf);
2346     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
2347     /// ```
2348     #[cfg(all(feature = "std", feature = "unicode"))]
2349     #[inline]
2350     fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
2351         // TODO: This is the best we can do given what std exposes I think.
2352         // If we roll our own case handling, then we might be able to do this
2353         // a bit faster. We shouldn't roll our own case handling unless we
2354         // need to, e.g., for doing caseless matching or case folding.
2355
2356         // TODO(BUG): This doesn't handle any special casing rules.
2357
2358         buf.reserve(self.as_bytes().len());
2359         for (s, e, ch) in self.char_indices() {
2360             if ch == '\u{FFFD}' {
2361                 buf.push_str(&self.as_bytes()[s..e]);
2362             } else if ch.is_ascii() {
2363                 buf.push_char(ch.to_ascii_lowercase());
2364             } else {
2365                 for upper in ch.to_lowercase() {
2366                     buf.push_char(upper);
2367                 }
2368             }
2369         }
2370     }
2371
2372     /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2373     /// this byte string.
2374     ///
2375     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2376     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2377     /// In particular, the length of the byte string returned is always
2378     /// equivalent to the length of this byte string.
2379     ///
2380     /// If you'd like to reuse an allocation for performance reasons, then use
2381     /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2382     /// the conversion in place.
2383     ///
2384     /// # Examples
2385     ///
2386     /// Basic usage:
2387     ///
2388     /// ```
2389     /// use bstr::{B, ByteSlice};
2390     ///
2391     /// let s = B("HELLO Β");
2392     /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2393     /// ```
2394     ///
2395     /// Invalid UTF-8 remains as is:
2396     ///
2397     /// ```
2398     /// use bstr::{B, ByteSlice};
2399     ///
2400     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2401     /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
2402     /// ```
2403     #[cfg(feature = "std")]
2404     #[inline]
2405     fn to_ascii_lowercase(&self) -> Vec<u8> {
2406         self.as_bytes().to_ascii_lowercase()
2407     }
2408
2409     /// Convert this byte string to its lowercase ASCII equivalent in place.
2410     ///
2411     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2412     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2413     ///
2414     /// If you don't need to do the conversion in
2415     /// place and instead prefer convenience, then use
2416     /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2417     ///
2418     /// # Examples
2419     ///
2420     /// Basic usage:
2421     ///
2422     /// ```
2423     /// use bstr::ByteSlice;
2424     ///
2425     /// let mut s = <Vec<u8>>::from("HELLO Β");
2426     /// s.make_ascii_lowercase();
2427     /// assert_eq!(s, "hello Β".as_bytes());
2428     /// ```
2429     ///
2430     /// Invalid UTF-8 remains as is:
2431     ///
2432     /// ```
2433     /// use bstr::{B, ByteSlice, ByteVec};
2434     ///
2435     /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
2436     /// s.make_ascii_lowercase();
2437     /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
2438     /// ```
2439     #[inline]
2440     fn make_ascii_lowercase(&mut self) {
2441         self.as_bytes_mut().make_ascii_lowercase();
2442     }
2443
2444     /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2445     /// byte string.
2446     ///
2447     /// In this case, uppercase is defined according to the `Uppercase`
2448     /// Unicode property.
2449     ///
2450     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2451     /// then it is written to the given buffer unchanged.
2452     ///
2453     /// Note that some characters in this byte string may expand into multiple
2454     /// characters when changing the case, so the number of bytes written to
2455     /// the given byte string may not be equivalent to the number of bytes in
2456     /// this byte string.
2457     ///
2458     /// If you'd like to reuse an allocation for performance reasons, then use
2459     /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2460     ///
2461     /// # Examples
2462     ///
2463     /// Basic usage:
2464     ///
2465     /// ```
2466     /// use bstr::{B, ByteSlice};
2467     ///
2468     /// let s = B("hello β");
2469     /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2470     /// ```
2471     ///
2472     /// Scripts without case are not changed:
2473     ///
2474     /// ```
2475     /// use bstr::{B, ByteSlice};
2476     ///
2477     /// let s = B("农历新年");
2478     /// assert_eq!(s.to_uppercase(), B("农历新年"));
2479     /// ```
2480     ///
2481     /// Invalid UTF-8 remains as is:
2482     ///
2483     /// ```
2484     /// use bstr::{B, ByteSlice};
2485     ///
2486     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2487     /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2488     /// ```
2489     #[cfg(all(feature = "std", feature = "unicode"))]
2490     #[inline]
2491     fn to_uppercase(&self) -> Vec<u8> {
2492         let mut buf = vec![];
2493         self.to_uppercase_into(&mut buf);
2494         buf
2495     }
2496
2497     /// Writes the uppercase equivalent of this byte string into the given
2498     /// buffer. The buffer is not cleared before written to.
2499     ///
2500     /// In this case, uppercase is defined according to the `Uppercase`
2501     /// Unicode property.
2502     ///
2503     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2504     /// then it is written to the given buffer unchanged.
2505     ///
2506     /// Note that some characters in this byte string may expand into multiple
2507     /// characters when changing the case, so the number of bytes written to
2508     /// the given byte string may not be equivalent to the number of bytes in
2509     /// this byte string.
2510     ///
2511     /// If you don't need to amortize allocation and instead prefer
2512     /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2513     ///
2514     /// # Examples
2515     ///
2516     /// Basic usage:
2517     ///
2518     /// ```
2519     /// use bstr::{B, ByteSlice};
2520     ///
2521     /// let s = B("hello β");
2522     ///
2523     /// let mut buf = vec![];
2524     /// s.to_uppercase_into(&mut buf);
2525     /// assert_eq!(buf, B("HELLO Β"));
2526     /// ```
2527     ///
2528     /// Scripts without case are not changed:
2529     ///
2530     /// ```
2531     /// use bstr::{B, ByteSlice};
2532     ///
2533     /// let s = B("农历新年");
2534     ///
2535     /// let mut buf = vec![];
2536     /// s.to_uppercase_into(&mut buf);
2537     /// assert_eq!(buf, B("农历新年"));
2538     /// ```
2539     ///
2540     /// Invalid UTF-8 remains as is:
2541     ///
2542     /// ```
2543     /// use bstr::{B, ByteSlice};
2544     ///
2545     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2546     ///
2547     /// let mut buf = vec![];
2548     /// s.to_uppercase_into(&mut buf);
2549     /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2550     /// ```
2551     #[cfg(all(feature = "std", feature = "unicode"))]
2552     #[inline]
2553     fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
2554         // TODO: This is the best we can do given what std exposes I think.
2555         // If we roll our own case handling, then we might be able to do this
2556         // a bit faster. We shouldn't roll our own case handling unless we
2557         // need to, e.g., for doing caseless matching or case folding.
2558         buf.reserve(self.as_bytes().len());
2559         for (s, e, ch) in self.char_indices() {
2560             if ch == '\u{FFFD}' {
2561                 buf.push_str(&self.as_bytes()[s..e]);
2562             } else if ch.is_ascii() {
2563                 buf.push_char(ch.to_ascii_uppercase());
2564             } else {
2565                 for upper in ch.to_uppercase() {
2566                     buf.push_char(upper);
2567                 }
2568             }
2569         }
2570     }
2571
2572     /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2573     /// this byte string.
2574     ///
2575     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2576     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2577     /// In particular, the length of the byte string returned is always
2578     /// equivalent to the length of this byte string.
2579     ///
2580     /// If you'd like to reuse an allocation for performance reasons, then use
2581     /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2582     /// the conversion in place.
2583     ///
2584     /// # Examples
2585     ///
2586     /// Basic usage:
2587     ///
2588     /// ```
2589     /// use bstr::{B, ByteSlice};
2590     ///
2591     /// let s = B("hello β");
2592     /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2593     /// ```
2594     ///
2595     /// Invalid UTF-8 remains as is:
2596     ///
2597     /// ```
2598     /// use bstr::{B, ByteSlice};
2599     ///
2600     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2601     /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2602     /// ```
2603     #[cfg(feature = "std")]
2604     #[inline]
2605     fn to_ascii_uppercase(&self) -> Vec<u8> {
2606         self.as_bytes().to_ascii_uppercase()
2607     }
2608
2609     /// Convert this byte string to its uppercase ASCII equivalent in place.
2610     ///
2611     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2612     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2613     ///
2614     /// If you don't need to do the conversion in
2615     /// place and instead prefer convenience, then use
2616     /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2617     ///
2618     /// # Examples
2619     ///
2620     /// Basic usage:
2621     ///
2622     /// ```
2623     /// use bstr::{B, ByteSlice};
2624     ///
2625     /// let mut s = <Vec<u8>>::from("hello β");
2626     /// s.make_ascii_uppercase();
2627     /// assert_eq!(s, B("HELLO β"));
2628     /// ```
2629     ///
2630     /// Invalid UTF-8 remains as is:
2631     ///
2632     /// ```
2633     /// use bstr::{B, ByteSlice, ByteVec};
2634     ///
2635     /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
2636     /// s.make_ascii_uppercase();
2637     /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2638     /// ```
2639     #[inline]
2640     fn make_ascii_uppercase(&mut self) {
2641         self.as_bytes_mut().make_ascii_uppercase();
2642     }
2643
2644     /// Reverse the bytes in this string, in place.
2645     ///
2646     /// This is not necessarily a well formed operation! For example, if this
2647     /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2648     /// string will likely result in invalid UTF-8 and otherwise non-sensical
2649     /// content.
2650     ///
2651     /// Note that this is equivalent to the generic `[u8]::reverse` method.
2652     /// This method is provided to permit callers to explicitly differentiate
2653     /// between reversing bytes, codepoints and graphemes.
2654     ///
2655     /// # Examples
2656     ///
2657     /// Basic usage:
2658     ///
2659     /// ```
2660     /// use bstr::ByteSlice;
2661     ///
2662     /// let mut s = <Vec<u8>>::from("hello");
2663     /// s.reverse_bytes();
2664     /// assert_eq!(s, "olleh".as_bytes());
2665     /// ```
2666     #[inline]
2667     fn reverse_bytes(&mut self) {
2668         self.as_bytes_mut().reverse();
2669     }
2670
2671     /// Reverse the codepoints in this string, in place.
2672     ///
2673     /// If this byte string is valid UTF-8, then its reversal by codepoint
2674     /// is also guaranteed to be valid UTF-8.
2675     ///
2676     /// This operation is equivalent to the following, but without allocating:
2677     ///
2678     /// ```
2679     /// use bstr::ByteSlice;
2680     ///
2681     /// let mut s = <Vec<u8>>::from("foo☃bar");
2682     ///
2683     /// let mut chars: Vec<char> = s.chars().collect();
2684     /// chars.reverse();
2685     ///
2686     /// let reversed: String = chars.into_iter().collect();
2687     /// assert_eq!(reversed, "rab☃oof");
2688     /// ```
2689     ///
2690     /// Note that this is not necessarily a well formed operation. For example,
2691     /// if this byte string contains grapheme clusters with more than one
2692     /// codepoint, then those grapheme clusters will not necessarily be
2693     /// preserved. If you'd like to preserve grapheme clusters, then use
2694     /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2695     ///
2696     /// # Examples
2697     ///
2698     /// Basic usage:
2699     ///
2700     /// ```
2701     /// use bstr::ByteSlice;
2702     ///
2703     /// let mut s = <Vec<u8>>::from("foo☃bar");
2704     /// s.reverse_chars();
2705     /// assert_eq!(s, "rab☃oof".as_bytes());
2706     /// ```
2707     ///
2708     /// This example shows that not all reversals lead to a well formed string.
2709     /// For example, in this case, combining marks are used to put accents over
2710     /// some letters, and those accent marks must appear after the codepoints
2711     /// they modify.
2712     ///
2713     /// ```
2714     /// use bstr::{B, ByteSlice};
2715     ///
2716     /// let mut s = <Vec<u8>>::from("résumé");
2717     /// s.reverse_chars();
2718     /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
2719     /// ```
2720     ///
2721     /// A word of warning: the above example relies on the fact that
2722     /// `résumé` is in decomposed normal form, which means there are separate
2723     /// codepoints for the accents above `e`. If it is instead in composed
2724     /// normal form, then the example works:
2725     ///
2726     /// ```
2727     /// use bstr::{B, ByteSlice};
2728     ///
2729     /// let mut s = <Vec<u8>>::from("résumé");
2730     /// s.reverse_chars();
2731     /// assert_eq!(s, B("émusér"));
2732     /// ```
2733     ///
2734     /// The point here is to be cautious and not assume that just because
2735     /// `reverse_chars` works in one case, that it therefore works in all
2736     /// cases.
2737     #[inline]
2738     fn reverse_chars(&mut self) {
2739         let mut i = 0;
2740         loop {
2741             let (_, size) = utf8::decode(&self.as_bytes()[i..]);
2742             if size == 0 {
2743                 break;
2744             }
2745             if size > 1 {
2746                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2747             }
2748             i += size;
2749         }
2750         self.reverse_bytes();
2751     }
2752
2753     /// Reverse the graphemes in this string, in place.
2754     ///
2755     /// If this byte string is valid UTF-8, then its reversal by grapheme
2756     /// is also guaranteed to be valid UTF-8.
2757     ///
2758     /// This operation is equivalent to the following, but without allocating:
2759     ///
2760     /// ```
2761     /// use bstr::ByteSlice;
2762     ///
2763     /// let mut s = <Vec<u8>>::from("foo☃bar");
2764     ///
2765     /// let mut graphemes: Vec<&str> = s.graphemes().collect();
2766     /// graphemes.reverse();
2767     ///
2768     /// let reversed = graphemes.concat();
2769     /// assert_eq!(reversed, "rab☃oof");
2770     /// ```
2771     ///
2772     /// # Examples
2773     ///
2774     /// Basic usage:
2775     ///
2776     /// ```
2777     /// use bstr::ByteSlice;
2778     ///
2779     /// let mut s = <Vec<u8>>::from("foo☃bar");
2780     /// s.reverse_graphemes();
2781     /// assert_eq!(s, "rab☃oof".as_bytes());
2782     /// ```
2783     ///
2784     /// This example shows how this correctly handles grapheme clusters,
2785     /// unlike `reverse_chars`.
2786     ///
2787     /// ```
2788     /// use bstr::ByteSlice;
2789     ///
2790     /// let mut s = <Vec<u8>>::from("résumé");
2791     /// s.reverse_graphemes();
2792     /// assert_eq!(s, "émusér".as_bytes());
2793     /// ```
2794     #[cfg(feature = "unicode")]
2795     #[inline]
2796     fn reverse_graphemes(&mut self) {
2797         use unicode::decode_grapheme;
2798
2799         let mut i = 0;
2800         loop {
2801             let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
2802             if size == 0 {
2803                 break;
2804             }
2805             if size > 1 {
2806                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2807             }
2808             i += size;
2809         }
2810         self.reverse_bytes();
2811     }
2812
2813     /// Returns true if and only if every byte in this byte string is ASCII.
2814     ///
2815     /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2816     /// an ASCII codepoint if and only if it is in the inclusive range
2817     /// `[0, 127]`.
2818     ///
2819     /// # Examples
2820     ///
2821     /// Basic usage:
2822     ///
2823     /// ```
2824     /// use bstr::{B, ByteSlice};
2825     ///
2826     /// assert!(B("abc").is_ascii());
2827     /// assert!(!B("☃βツ").is_ascii());
2828     /// assert!(!B(b"\xFF").is_ascii());
2829     /// ```
2830     #[inline]
2831     fn is_ascii(&self) -> bool {
2832         ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2833     }
2834
2835     /// Returns true if and only if the entire byte string is valid UTF-8.
2836     ///
2837     /// If you need location information about where a byte string's first
2838     /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
2839     ///
2840     /// # Examples
2841     ///
2842     /// Basic usage:
2843     ///
2844     /// ```
2845     /// use bstr::{B, ByteSlice};
2846     ///
2847     /// assert!(B("abc").is_utf8());
2848     /// assert!(B("☃βツ").is_utf8());
2849     /// // invalid bytes
2850     /// assert!(!B(b"abc\xFF").is_utf8());
2851     /// // surrogate encoding
2852     /// assert!(!B(b"\xED\xA0\x80").is_utf8());
2853     /// // incomplete sequence
2854     /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
2855     /// // overlong sequence
2856     /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
2857     /// ```
2858     #[inline]
2859     fn is_utf8(&self) -> bool {
2860         utf8::validate(self.as_bytes()).is_ok()
2861     }
2862
2863     /// Returns the last byte in this byte string, if it's non-empty. If this
2864     /// byte string is empty, this returns `None`.
2865     ///
2866     /// Note that this is like the generic `[u8]::last`, except this returns
2867     /// the byte by value instead of a reference to the byte.
2868     ///
2869     /// # Examples
2870     ///
2871     /// Basic usage:
2872     ///
2873     /// ```
2874     /// use bstr::ByteSlice;
2875     ///
2876     /// assert_eq!(Some(b'z'), b"baz".last_byte());
2877     /// assert_eq!(None, b"".last_byte());
2878     /// ```
2879     #[inline]
2880     fn last_byte(&self) -> Option<u8> {
2881         let bytes = self.as_bytes();
2882         bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
2883     }
2884
2885     /// Returns the index of the first non-ASCII byte in this byte string (if
2886     /// any such indices exist). Specifically, it returns the index of the
2887     /// first byte with a value greater than or equal to `0x80`.
2888     ///
2889     /// # Examples
2890     ///
2891     /// Basic usage:
2892     ///
2893     /// ```
2894     /// use bstr::{ByteSlice, B};
2895     ///
2896     /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
2897     /// assert_eq!(None, b"abcde".find_non_ascii_byte());
2898     /// assert_eq!(Some(0), B("😀").find_non_ascii_byte());
2899     /// ```
2900     #[inline]
2901     fn find_non_ascii_byte(&self) -> Option<usize> {
2902         let index = ascii::first_non_ascii_byte(self.as_bytes());
2903         if index == self.as_bytes().len() {
2904             None
2905         } else {
2906             Some(index)
2907         }
2908     }
2909
2910     /// Copies elements from one part of the slice to another part of itself,
2911     /// where the parts may be overlapping.
2912     ///
2913     /// `src` is the range within this byte string to copy from, while `dest`
2914     /// is the starting index of the range within this byte string to copy to.
2915     /// The length indicated by `src` must be less than or equal to the number
2916     /// of bytes from `dest` to the end of the byte string.
2917     ///
2918     /// # Panics
2919     ///
2920     /// Panics if either range is out of bounds, or if `src` is too big to fit
2921     /// into `dest`, or if the end of `src` is before the start.
2922     ///
2923     /// # Examples
2924     ///
2925     /// Copying four bytes within a byte string:
2926     ///
2927     /// ```
2928     /// use bstr::{B, ByteSlice};
2929     ///
2930     /// let mut buf = *b"Hello, World!";
2931     /// let s = &mut buf;
2932     /// s.copy_within_str(1..5, 8);
2933     /// assert_eq!(s, B("Hello, Wello!"));
2934     /// ```
2935     #[inline]
2936     fn copy_within_str<R>(&mut self, src: R, dest: usize)
2937     where
2938         R: ops::RangeBounds<usize>,
2939     {
2940         // TODO: Deprecate this once slice::copy_within stabilizes.
2941         let src_start = match src.start_bound() {
2942             ops::Bound::Included(&n) => n,
2943             ops::Bound::Excluded(&n) => {
2944                 n.checked_add(1).expect("attempted to index slice beyond max")
2945             }
2946             ops::Bound::Unbounded => 0,
2947         };
2948         let src_end = match src.end_bound() {
2949             ops::Bound::Included(&n) => {
2950                 n.checked_add(1).expect("attempted to index slice beyond max")
2951             }
2952             ops::Bound::Excluded(&n) => n,
2953             ops::Bound::Unbounded => self.as_bytes().len(),
2954         };
2955         assert!(src_start <= src_end, "src end is before src start");
2956         assert!(src_end <= self.as_bytes().len(), "src is out of bounds");
2957         let count = src_end - src_start;
2958         assert!(
2959             dest <= self.as_bytes().len() - count,
2960             "dest is out of bounds",
2961         );
2962
2963         // SAFETY: This is safe because we use ptr::copy to handle overlapping
2964         // copies, and is also safe because we've checked all the bounds above.
2965         // Finally, we are only dealing with u8 data, which is Copy, which
2966         // means we can copy without worrying about ownership/destructors.
2967         unsafe {
2968             ptr::copy(
2969                 self.as_bytes().get_unchecked(src_start),
2970                 self.as_bytes_mut().get_unchecked_mut(dest),
2971                 count,
2972             );
2973         }
2974     }
2975 }
2976
2977 /// A single substring searcher fixed to a particular needle.
2978 ///
2979 /// The purpose of this type is to permit callers to construct a substring
2980 /// searcher that can be used to search haystacks without the overhead of
2981 /// constructing the searcher in the first place. This is a somewhat niche
2982 /// concern when it's necessary to re-use the same needle to search multiple
2983 /// different haystacks with as little overhead as possible. In general, using
2984 /// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
2985 /// or
2986 /// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
2987 /// is good enough, but `Finder` is useful when you can meaningfully observe
2988 /// searcher construction time in a profile.
2989 ///
2990 /// When the `std` feature is enabled, then this type has an `into_owned`
2991 /// version which permits building a `Finder` that is not connected to the
2992 /// lifetime of its needle.
2993 #[derive(Clone, Debug)]
2994 pub struct Finder<'a> {
2995     searcher: TwoWay<'a>,
2996 }
2997
2998 impl<'a> Finder<'a> {
2999     /// Create a new finder for the given needle.
3000     #[inline]
3001     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
3002         Finder { searcher: TwoWay::forward(needle.as_ref()) }
3003     }
3004
3005     /// Convert this finder into its owned variant, such that it no longer
3006     /// borrows the needle.
3007     ///
3008     /// If this is already an owned finder, then this is a no-op. Otherwise,
3009     /// this copies the needle.
3010     ///
3011     /// This is only available when the `std` feature is enabled.
3012     #[cfg(feature = "std")]
3013     #[inline]
3014     pub fn into_owned(self) -> Finder<'static> {
3015         Finder { searcher: self.searcher.into_owned() }
3016     }
3017
3018     /// Returns the needle that this finder searches for.
3019     ///
3020     /// Note that the lifetime of the needle returned is tied to the lifetime
3021     /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3022     /// finder's needle can be either borrowed or owned, so the lifetime of the
3023     /// needle returned must necessarily be the shorter of the two.
3024     #[inline]
3025     pub fn needle(&self) -> &[u8] {
3026         self.searcher.needle()
3027     }
3028
3029     /// Returns the index of the first occurrence of this needle in the given
3030     /// haystack.
3031     ///
3032     /// The haystack may be any type that can be cheaply converted into a
3033     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3034     ///
3035     /// # Complexity
3036     ///
3037     /// This routine is guaranteed to have worst case linear time complexity
3038     /// with respect to both the needle and the haystack. That is, this runs
3039     /// in `O(needle.len() + haystack.len())` time.
3040     ///
3041     /// This routine is also guaranteed to have worst case constant space
3042     /// complexity.
3043     ///
3044     /// # Examples
3045     ///
3046     /// Basic usage:
3047     ///
3048     /// ```
3049     /// use bstr::Finder;
3050     ///
3051     /// let haystack = "foo bar baz";
3052     /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
3053     /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
3054     /// assert_eq!(None, Finder::new("quux").find(haystack));
3055     /// ```
3056     #[inline]
3057     pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3058         self.searcher.find(haystack.as_ref())
3059     }
3060 }
3061
3062 /// A single substring reverse searcher fixed to a particular needle.
3063 ///
3064 /// The purpose of this type is to permit callers to construct a substring
3065 /// searcher that can be used to search haystacks without the overhead of
3066 /// constructing the searcher in the first place. This is a somewhat niche
3067 /// concern when it's necessary to re-use the same needle to search multiple
3068 /// different haystacks with as little overhead as possible. In general, using
3069 /// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3070 /// or
3071 /// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3072 /// is good enough, but `FinderReverse` is useful when you can meaningfully
3073 /// observe searcher construction time in a profile.
3074 ///
3075 /// When the `std` feature is enabled, then this type has an `into_owned`
3076 /// version which permits building a `FinderReverse` that is not connected to
3077 /// the lifetime of its needle.
3078 #[derive(Clone, Debug)]
3079 pub struct FinderReverse<'a> {
3080     searcher: TwoWay<'a>,
3081 }
3082
3083 impl<'a> FinderReverse<'a> {
3084     /// Create a new reverse finder for the given needle.
3085     #[inline]
3086     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
3087         FinderReverse { searcher: TwoWay::reverse(needle.as_ref()) }
3088     }
3089
3090     /// Convert this finder into its owned variant, such that it no longer
3091     /// borrows the needle.
3092     ///
3093     /// If this is already an owned finder, then this is a no-op. Otherwise,
3094     /// this copies the needle.
3095     ///
3096     /// This is only available when the `std` feature is enabled.
3097     #[cfg(feature = "std")]
3098     #[inline]
3099     pub fn into_owned(self) -> FinderReverse<'static> {
3100         FinderReverse { searcher: self.searcher.into_owned() }
3101     }
3102
3103     /// Returns the needle that this finder searches for.
3104     ///
3105     /// Note that the lifetime of the needle returned is tied to the lifetime
3106     /// of this finder, and may be shorter than the `'a` lifetime. Namely,
3107     /// a finder's needle can be either borrowed or owned, so the lifetime of
3108     /// the needle returned must necessarily be the shorter of the two.
3109     #[inline]
3110     pub fn needle(&self) -> &[u8] {
3111         self.searcher.needle()
3112     }
3113
3114     /// Returns the index of the last occurrence of this needle in the given
3115     /// haystack.
3116     ///
3117     /// The haystack may be any type that can be cheaply converted into a
3118     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3119     ///
3120     /// # Complexity
3121     ///
3122     /// This routine is guaranteed to have worst case linear time complexity
3123     /// with respect to both the needle and the haystack. That is, this runs
3124     /// in `O(needle.len() + haystack.len())` time.
3125     ///
3126     /// This routine is also guaranteed to have worst case constant space
3127     /// complexity.
3128     ///
3129     /// # Examples
3130     ///
3131     /// Basic usage:
3132     ///
3133     /// ```
3134     /// use bstr::FinderReverse;
3135     ///
3136     /// let haystack = "foo bar baz";
3137     /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
3138     /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
3139     /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3140     /// ```
3141     #[inline]
3142     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3143         self.searcher.rfind(haystack.as_ref())
3144     }
3145 }
3146
3147 /// An iterator over non-overlapping substring matches.
3148 ///
3149 /// Matches are reported by the byte offset at which they begin.
3150 ///
3151 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3152 /// byte string being looked for.
3153 #[derive(Debug)]
3154 pub struct Find<'a> {
3155     haystack: &'a [u8],
3156     prestate: PrefilterState,
3157     searcher: TwoWay<'a>,
3158     pos: usize,
3159 }
3160
3161 impl<'a> Find<'a> {
3162     fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> {
3163         let searcher = TwoWay::forward(needle);
3164         let prestate = searcher.prefilter_state();
3165         Find { haystack, prestate, searcher, pos: 0 }
3166     }
3167 }
3168
3169 impl<'a> Iterator for Find<'a> {
3170     type Item = usize;
3171
3172     #[inline]
3173     fn next(&mut self) -> Option<usize> {
3174         if self.pos > self.haystack.len() {
3175             return None;
3176         }
3177         let result = self
3178             .searcher
3179             .find_with(&mut self.prestate, &self.haystack[self.pos..]);
3180         match result {
3181             None => None,
3182             Some(i) => {
3183                 let pos = self.pos + i;
3184                 self.pos = pos + cmp::max(1, self.searcher.needle().len());
3185                 Some(pos)
3186             }
3187         }
3188     }
3189 }
3190
3191 /// An iterator over non-overlapping substring matches in reverse.
3192 ///
3193 /// Matches are reported by the byte offset at which they begin.
3194 ///
3195 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3196 /// byte string being looked for.
3197 #[derive(Debug)]
3198 pub struct FindReverse<'a> {
3199     haystack: &'a [u8],
3200     prestate: PrefilterState,
3201     searcher: TwoWay<'a>,
3202     /// When searching with an empty needle, this gets set to `None` after
3203     /// we've yielded the last element at `0`.
3204     pos: Option<usize>,
3205 }
3206
3207 impl<'a> FindReverse<'a> {
3208     fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> {
3209         let searcher = TwoWay::reverse(needle);
3210         let prestate = searcher.prefilter_state();
3211         let pos = Some(haystack.len());
3212         FindReverse { haystack, prestate, searcher, pos }
3213     }
3214
3215     fn haystack(&self) -> &'a [u8] {
3216         self.haystack
3217     }
3218
3219     fn needle(&self) -> &[u8] {
3220         self.searcher.needle()
3221     }
3222 }
3223
3224 impl<'a> Iterator for FindReverse<'a> {
3225     type Item = usize;
3226
3227     #[inline]
3228     fn next(&mut self) -> Option<usize> {
3229         let pos = match self.pos {
3230             None => return None,
3231             Some(pos) => pos,
3232         };
3233         let result = self
3234             .searcher
3235             .rfind_with(&mut self.prestate, &self.haystack[..pos]);
3236         match result {
3237             None => None,
3238             Some(i) => {
3239                 if pos == i {
3240                     self.pos = pos.checked_sub(1);
3241                 } else {
3242                     self.pos = Some(i);
3243                 }
3244                 Some(i)
3245             }
3246         }
3247     }
3248 }
3249
3250 /// An iterator over the bytes in a byte string.
3251 ///
3252 /// `'a` is the lifetime of the byte string being traversed.
3253 #[derive(Clone, Debug)]
3254 pub struct Bytes<'a> {
3255     it: slice::Iter<'a, u8>,
3256 }
3257
3258 impl<'a> Iterator for Bytes<'a> {
3259     type Item = u8;
3260
3261     #[inline]
3262     fn next(&mut self) -> Option<u8> {
3263         self.it.next().map(|&b| b)
3264     }
3265 }
3266
3267 impl<'a> DoubleEndedIterator for Bytes<'a> {
3268     #[inline]
3269     fn next_back(&mut self) -> Option<u8> {
3270         self.it.next_back().map(|&b| b)
3271     }
3272 }
3273
3274 impl<'a> ExactSizeIterator for Bytes<'a> {
3275     #[inline]
3276     fn len(&self) -> usize {
3277         self.it.len()
3278     }
3279 }
3280
3281 /// An iterator over the fields in a byte string, separated by whitespace.
3282 ///
3283 /// This iterator splits on contiguous runs of whitespace, such that the fields
3284 /// in `foo\t\t\n  \nbar` are `foo` and `bar`.
3285 ///
3286 /// `'a` is the lifetime of the byte string being split.
3287 #[derive(Debug)]
3288 pub struct Fields<'a> {
3289     it: FieldsWith<'a, fn(char) -> bool>,
3290 }
3291
3292 impl<'a> Fields<'a> {
3293     fn new(bytes: &'a [u8]) -> Fields<'a> {
3294         Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
3295     }
3296 }
3297
3298 impl<'a> Iterator for Fields<'a> {
3299     type Item = &'a [u8];
3300
3301     #[inline]
3302     fn next(&mut self) -> Option<&'a [u8]> {
3303         self.it.next()
3304     }
3305 }
3306
3307 /// An iterator over fields in the byte string, separated by a predicate over
3308 /// codepoints.
3309 ///
3310 /// This iterator splits a byte string based on its predicate function such
3311 /// that the elements returned are separated by contiguous runs of codepoints
3312 /// for which the predicate returns true.
3313 ///
3314 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3315 /// of the predicate, i.e., `FnMut(char) -> bool`.
3316 #[derive(Debug)]
3317 pub struct FieldsWith<'a, F> {
3318     f: F,
3319     bytes: &'a [u8],
3320     chars: CharIndices<'a>,
3321 }
3322
3323 impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
3324     fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
3325         FieldsWith { f, bytes, chars: bytes.char_indices() }
3326     }
3327 }
3328
3329 impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3330     type Item = &'a [u8];
3331
3332     #[inline]
3333     fn next(&mut self) -> Option<&'a [u8]> {
3334         let (start, mut end);
3335         loop {
3336             match self.chars.next() {
3337                 None => return None,
3338                 Some((s, e, ch)) => {
3339                     if !(self.f)(ch) {
3340                         start = s;
3341                         end = e;
3342                         break;
3343                     }
3344                 }
3345             }
3346         }
3347         while let Some((_, e, ch)) = self.chars.next() {
3348             if (self.f)(ch) {
3349                 break;
3350             }
3351             end = e;
3352         }
3353         Some(&self.bytes[start..end])
3354     }
3355 }
3356
3357 /// An iterator over substrings in a byte string, split by a separator.
3358 ///
3359 /// `'a` is the lifetime of the byte string being split.
3360 #[derive(Debug)]
3361 pub struct Split<'a> {
3362     finder: Find<'a>,
3363     /// The end position of the previous match of our splitter. The element
3364     /// we yield corresponds to the substring starting at `last` up to the
3365     /// beginning of the next match of the splitter.
3366     last: usize,
3367     /// Only set when iteration is complete. A corner case here is when a
3368     /// splitter is matched at the end of the haystack. At that point, we still
3369     /// need to yield an empty string following it.
3370     done: bool,
3371 }
3372
3373 impl<'a> Split<'a> {
3374     fn new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a> {
3375         let finder = haystack.find_iter(splitter);
3376         Split { finder, last: 0, done: false }
3377     }
3378 }
3379
3380 impl<'a> Iterator for Split<'a> {
3381     type Item = &'a [u8];
3382
3383     #[inline]
3384     fn next(&mut self) -> Option<&'a [u8]> {
3385         let haystack = self.finder.haystack;
3386         match self.finder.next() {
3387             Some(start) => {
3388                 let next = &haystack[self.last..start];
3389                 self.last = start + self.finder.searcher.needle().len();
3390                 Some(next)
3391             }
3392             None => {
3393                 if self.last >= haystack.len() {
3394                     if !self.done {
3395                         self.done = true;
3396                         Some(b"")
3397                     } else {
3398                         None
3399                     }
3400                 } else {
3401                     let s = &haystack[self.last..];
3402                     self.last = haystack.len();
3403                     self.done = true;
3404                     Some(s)
3405                 }
3406             }
3407         }
3408     }
3409 }
3410
3411 /// An iterator over substrings in a byte string, split by a separator, in
3412 /// reverse.
3413 ///
3414 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3415 /// of the predicate, i.e., `FnMut(char) -> bool`.
3416 #[derive(Debug)]
3417 pub struct SplitReverse<'a> {
3418     finder: FindReverse<'a>,
3419     /// The end position of the previous match of our splitter. The element
3420     /// we yield corresponds to the substring starting at `last` up to the
3421     /// beginning of the next match of the splitter.
3422     last: usize,
3423     /// Only set when iteration is complete. A corner case here is when a
3424     /// splitter is matched at the end of the haystack. At that point, we still
3425     /// need to yield an empty string following it.
3426     done: bool,
3427 }
3428
3429 impl<'a> SplitReverse<'a> {
3430     fn new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a> {
3431         let finder = haystack.rfind_iter(splitter);
3432         SplitReverse { finder, last: haystack.len(), done: false }
3433     }
3434 }
3435
3436 impl<'a> Iterator for SplitReverse<'a> {
3437     type Item = &'a [u8];
3438
3439     #[inline]
3440     fn next(&mut self) -> Option<&'a [u8]> {
3441         let haystack = self.finder.haystack();
3442         match self.finder.next() {
3443             Some(start) => {
3444                 let nlen = self.finder.needle().len();
3445                 let next = &haystack[start + nlen..self.last];
3446                 self.last = start;
3447                 Some(next)
3448             }
3449             None => {
3450                 if self.last == 0 {
3451                     if !self.done {
3452                         self.done = true;
3453                         Some(b"")
3454                     } else {
3455                         None
3456                     }
3457                 } else {
3458                     let s = &haystack[..self.last];
3459                     self.last = 0;
3460                     self.done = true;
3461                     Some(s)
3462                 }
3463             }
3464         }
3465     }
3466 }
3467
3468 /// An iterator over at most `n` substrings in a byte string, split by a
3469 /// separator.
3470 ///
3471 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3472 /// of the predicate, i.e., `FnMut(char) -> bool`.
3473 #[derive(Debug)]
3474 pub struct SplitN<'a> {
3475     split: Split<'a>,
3476     limit: usize,
3477     count: usize,
3478 }
3479
3480 impl<'a> SplitN<'a> {
3481     fn new(
3482         haystack: &'a [u8],
3483         splitter: &'a [u8],
3484         limit: usize,
3485     ) -> SplitN<'a> {
3486         let split = haystack.split_str(splitter);
3487         SplitN { split, limit, count: 0 }
3488     }
3489 }
3490
3491 impl<'a> Iterator for SplitN<'a> {
3492     type Item = &'a [u8];
3493
3494     #[inline]
3495     fn next(&mut self) -> Option<&'a [u8]> {
3496         self.count += 1;
3497         if self.count > self.limit || self.split.done {
3498             None
3499         } else if self.count == self.limit {
3500             Some(&self.split.finder.haystack[self.split.last..])
3501         } else {
3502             self.split.next()
3503         }
3504     }
3505 }
3506
3507 /// An iterator over at most `n` substrings in a byte string, split by a
3508 /// separator, in reverse.
3509 ///
3510 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3511 /// of the predicate, i.e., `FnMut(char) -> bool`.
3512 #[derive(Debug)]
3513 pub struct SplitNReverse<'a> {
3514     split: SplitReverse<'a>,
3515     limit: usize,
3516     count: usize,
3517 }
3518
3519 impl<'a> SplitNReverse<'a> {
3520     fn new(
3521         haystack: &'a [u8],
3522         splitter: &'a [u8],
3523         limit: usize,
3524     ) -> SplitNReverse<'a> {
3525         let split = haystack.rsplit_str(splitter);
3526         SplitNReverse { split, limit, count: 0 }
3527     }
3528 }
3529
3530 impl<'a> Iterator for SplitNReverse<'a> {
3531     type Item = &'a [u8];
3532
3533     #[inline]
3534     fn next(&mut self) -> Option<&'a [u8]> {
3535         self.count += 1;
3536         if self.count > self.limit || self.split.done {
3537             None
3538         } else if self.count == self.limit {
3539             Some(&self.split.finder.haystack()[..self.split.last])
3540         } else {
3541             self.split.next()
3542         }
3543     }
3544 }
3545
3546 /// An iterator over all lines in a byte string, without their terminators.
3547 ///
3548 /// For this iterator, the only line terminators recognized are `\r\n` and
3549 /// `\n`.
3550 ///
3551 /// `'a` is the lifetime of the byte string being iterated over.
3552 pub struct Lines<'a> {
3553     it: LinesWithTerminator<'a>,
3554 }
3555
3556 impl<'a> Lines<'a> {
3557     fn new(bytes: &'a [u8]) -> Lines<'a> {
3558         Lines { it: LinesWithTerminator::new(bytes) }
3559     }
3560 }
3561
3562 impl<'a> Iterator for Lines<'a> {
3563     type Item = &'a [u8];
3564
3565     #[inline]
3566     fn next(&mut self) -> Option<&'a [u8]> {
3567         let mut line = self.it.next()?;
3568         if line.last_byte() == Some(b'\n') {
3569             line = &line[..line.len() - 1];
3570             if line.last_byte() == Some(b'\r') {
3571                 line = &line[..line.len() - 1];
3572             }
3573         }
3574         Some(line)
3575     }
3576 }
3577
3578 /// An iterator over all lines in a byte string, including their terminators.
3579 ///
3580 /// For this iterator, the only line terminator recognized is `\n`. (Since
3581 /// line terminators are included, this also handles `\r\n` line endings.)
3582 ///
3583 /// Line terminators are only included if they are present in the original
3584 /// byte string. For example, the last line in a byte string may not end with
3585 /// a line terminator.
3586 ///
3587 /// Concatenating all elements yielded by this iterator is guaranteed to yield
3588 /// the original byte string.
3589 ///
3590 /// `'a` is the lifetime of the byte string being iterated over.
3591 pub struct LinesWithTerminator<'a> {
3592     bytes: &'a [u8],
3593 }
3594
3595 impl<'a> LinesWithTerminator<'a> {
3596     fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
3597         LinesWithTerminator { bytes }
3598     }
3599 }
3600
3601 impl<'a> Iterator for LinesWithTerminator<'a> {
3602     type Item = &'a [u8];
3603
3604     #[inline]
3605     fn next(&mut self) -> Option<&'a [u8]> {
3606         match self.bytes.find_byte(b'\n') {
3607             None if self.bytes.is_empty() => None,
3608             None => {
3609                 let line = self.bytes;
3610                 self.bytes = b"";
3611                 Some(line)
3612             }
3613             Some(end) => {
3614                 let line = &self.bytes[..end + 1];
3615                 self.bytes = &self.bytes[end + 1..];
3616                 Some(line)
3617             }
3618         }
3619     }
3620 }
3621
3622 #[cfg(test)]
3623 mod tests {
3624     use ext_slice::{ByteSlice, B};
3625     use tests::LOSSY_TESTS;
3626
3627     #[test]
3628     fn to_str_lossy() {
3629         for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
3630             let got = B(input).to_str_lossy();
3631             assert_eq!(
3632                 expected.as_bytes(),
3633                 got.as_bytes(),
3634                 "to_str_lossy(ith: {:?}, given: {:?})",
3635                 i,
3636                 input,
3637             );
3638
3639             let mut got = String::new();
3640             B(input).to_str_lossy_into(&mut got);
3641             assert_eq!(
3642                 expected.as_bytes(),
3643                 got.as_bytes(),
3644                 "to_str_lossy_into",
3645             );
3646
3647             let got = String::from_utf8_lossy(input);
3648             assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
3649         }
3650     }
3651
3652     #[test]
3653     #[should_panic]
3654     fn copy_within_fail1() {
3655         let mut buf = *b"foobar";
3656         let s = &mut buf;
3657         s.copy_within_str(0..2, 5);
3658     }
3659
3660     #[test]
3661     #[should_panic]
3662     fn copy_within_fail2() {
3663         let mut buf = *b"foobar";
3664         let s = &mut buf;
3665         s.copy_within_str(3..2, 0);
3666     }
3667
3668     #[test]
3669     #[should_panic]
3670     fn copy_within_fail3() {
3671         let mut buf = *b"foobar";
3672         let s = &mut buf;
3673         s.copy_within_str(5..7, 0);
3674     }
3675
3676     #[test]
3677     #[should_panic]
3678     fn copy_within_fail4() {
3679         let mut buf = *b"foobar";
3680         let s = &mut buf;
3681         s.copy_within_str(0..1, 6);
3682     }
3683 }