1 #[cfg(feature = "std")]
3 #[cfg(feature = "std")]
5 #[cfg(feature = "std")]
14 use memchr
::{memchr, memrchr}
;
19 #[cfg(feature = "std")]
21 use search
::{PrefilterState, TwoWay}
;
22 #[cfg(feature = "unicode")]
24 whitespace_len_fwd
, whitespace_len_rev
, GraphemeIndices
, Graphemes
,
25 SentenceIndices
, Sentences
, WordIndices
, Words
, WordsWithBreakIndices
,
28 use utf8
::{self, CharIndices, Chars, Utf8Chunks, Utf8Error}
;
30 /// A short-hand constructor for building a `&[u8]`.
32 /// This idiosyncratic constructor is useful for concisely building byte string
33 /// slices. Its primary utility is in conveniently writing byte string literals
34 /// in a uniform way. For example, consider this code that does not compile:
37 /// let strs = vec![b"a", b"xy"];
40 /// The above code doesn't compile because the type of the byte string literal
41 /// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
42 /// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
43 /// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
44 /// where both `"a"` and `"xy"` have the same type of `&'static str`.)
46 /// One way of getting the above code to compile is to convert byte strings to
47 /// slices. You might try this:
50 /// let strs = vec![&b"a", &b"xy"];
53 /// But this just creates values with type `& &'static [u8; 1]` and
54 /// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
57 /// let strs = vec![&b"a"[..], &b"xy"[..]];
59 /// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
62 /// But neither of these are particularly convenient to type, especially when
63 /// it's something as common as a string literal. Thus, this constructor
64 /// permits writing the following instead:
69 /// let strs = vec![B("a"), B(b"xy")];
72 /// Notice that this also lets you mix and match both string literals and byte
73 /// string literals. This can be quite convenient!
74 #[allow(non_snake_case)]
76 pub fn B
<'a
, B
: ?Sized
+ AsRef
<[u8]>>(bytes
: &'a B
) -> &'a
[u8] {
80 impl ByteSlice
for [u8] {
82 fn as_bytes(&self) -> &[u8] {
87 fn as_bytes_mut(&mut self) -> &mut [u8] {
92 /// Ensure that callers cannot implement `ByteSlice` by making an
93 /// umplementable trait its super trait.
95 impl Sealed
for [u8] {}
97 /// A trait that extends `&[u8]` with string oriented methods.
98 pub trait ByteSlice
: Sealed
{
99 /// A method for accessing the raw bytes of this type. This is always a
100 /// no-op and callers shouldn't care about it. This only exists for making
101 /// the extension trait work.
103 fn as_bytes(&self) -> &[u8];
105 /// A method for accessing the raw bytes of this type, mutably. This is
106 /// always a no-op and callers shouldn't care about it. This only exists
107 /// for making the extension trait work.
109 fn as_bytes_mut(&mut self) -> &mut [u8];
111 /// Return this byte slice as a `&BStr`.
113 /// Use `&BStr` is useful because of its `fmt::Debug` representation
114 /// and various other trait implementations (such as `PartialEq` and
115 /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
116 /// shows its bytes as a normal string. For invalid UTF-8, hex escape
117 /// sequences are used.
124 /// use bstr::ByteSlice;
126 /// println!("{:?}", b"foo\xFFbar".as_bstr());
129 fn as_bstr(&self) -> &BStr
{
130 BStr
::new(self.as_bytes())
133 /// Return this byte slice as a `&mut BStr`.
135 /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
136 /// and various other trait implementations (such as `PartialEq` and
137 /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
138 /// shows its bytes as a normal string. For invalid UTF-8, hex escape
139 /// sequences are used.
146 /// use bstr::ByteSlice;
148 /// let mut bytes = *b"foo\xFFbar";
149 /// println!("{:?}", &mut bytes.as_bstr_mut());
152 fn as_bstr_mut(&mut self) -> &mut BStr
{
153 BStr
::new_mut(self.as_bytes_mut())
156 /// Create an immutable byte string from an OS string slice.
158 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
159 /// this returns `None` if the given OS string is not valid UTF-8. (For
160 /// example, on Windows, file paths are allowed to be a sequence of
161 /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
169 /// use std::ffi::OsStr;
171 /// use bstr::{B, ByteSlice};
173 /// let os_str = OsStr::new("foo");
174 /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
175 /// assert_eq!(bs, B("foo"));
177 #[cfg(feature = "std")]
179 fn from_os_str(os_str
: &OsStr
) -> Option
<&[u8]> {
182 fn imp(os_str
: &OsStr
) -> Option
<&[u8]> {
183 use std
::os
::unix
::ffi
::OsStrExt
;
185 Some(os_str
.as_bytes())
190 fn imp(os_str
: &OsStr
) -> Option
<&[u8]> {
191 os_str
.to_str().map(|s
| s
.as_bytes())
197 /// Create an immutable byte string from a file path.
199 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
200 /// this returns `None` if the given path is not valid UTF-8. (For example,
201 /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit
202 /// integers. Not all such sequences can be transcoded to valid UTF-8.)
209 /// use std::path::Path;
211 /// use bstr::{B, ByteSlice};
213 /// let path = Path::new("foo");
214 /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
215 /// assert_eq!(bs, B("foo"));
217 #[cfg(feature = "std")]
219 fn from_path(path
: &Path
) -> Option
<&[u8]> {
220 Self::from_os_str(path
.as_os_str())
223 /// Safely convert this byte string into a `&str` if it's valid UTF-8.
225 /// If this byte string is not valid UTF-8, then an error is returned. The
226 /// error returned indicates the first invalid byte found and the length
229 /// In cases where a lossy conversion to `&str` is acceptable, then use one
230 /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
231 /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
239 /// use bstr::{B, ByteSlice, ByteVec};
241 /// # fn example() -> Result<(), bstr::Utf8Error> {
242 /// let s = B("☃βツ").to_str()?;
243 /// assert_eq!("☃βツ", s);
245 /// let mut bstring = <Vec<u8>>::from("☃βツ");
246 /// bstring.push(b'\xFF');
247 /// let err = bstring.to_str().unwrap_err();
248 /// assert_eq!(8, err.valid_up_to());
249 /// # Ok(()) }; example().unwrap()
252 fn to_str(&self) -> Result
<&str, Utf8Error
> {
253 utf8
::validate(self.as_bytes()).map(|_
| {
254 // SAFETY: This is safe because of the guarantees provided by
256 unsafe { str::from_utf8_unchecked(self.as_bytes()) }
260 /// Unsafely convert this byte string into a `&str`, without checking for
265 /// Callers *must* ensure that this byte string is valid UTF-8 before
266 /// calling this method. Converting a byte string into a `&str` that is
267 /// not valid UTF-8 is considered undefined behavior.
269 /// This routine is useful in performance sensitive contexts where the
270 /// UTF-8 validity of the byte string is already known and it is
271 /// undesirable to pay the cost of an additional UTF-8 validation check
272 /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
279 /// use bstr::{B, ByteSlice};
281 /// // SAFETY: This is safe because string literals are guaranteed to be
282 /// // valid UTF-8 by the Rust compiler.
283 /// let s = unsafe { B("☃βツ").to_str_unchecked() };
284 /// assert_eq!("☃βツ", s);
287 unsafe fn to_str_unchecked(&self) -> &str {
288 str::from_utf8_unchecked(self.as_bytes())
291 /// Convert this byte string to a valid UTF-8 string by replacing invalid
292 /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
294 /// If the byte string is already valid UTF-8, then no copying or
295 /// allocation is performed and a borrrowed string slice is returned. If
296 /// the byte string is not valid UTF-8, then an owned string buffer is
297 /// returned with invalid bytes replaced by the replacement codepoint.
299 /// This method uses the "substitution of maximal subparts" (Unicode
300 /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
301 /// codepoint. Specifically, a replacement codepoint is inserted whenever a
302 /// byte is found that cannot possibly lead to a valid code unit sequence.
303 /// If there were previous bytes that represented a prefix of a well-formed
304 /// code unit sequence, then all of those bytes are substituted with a
305 /// single replacement codepoint. The "substitution of maximal subparts"
306 /// strategy is the same strategy used by
307 /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
308 /// For a more precise description of the maximal subpart strategy, see
309 /// the Unicode Standard, Chapter 3, Section 9. See also
310 /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html).
312 /// N.B. Rust's standard library also appears to use the same strategy,
313 /// but it does not appear to be an API guarantee.
320 /// use std::borrow::Cow;
322 /// use bstr::ByteSlice;
324 /// let mut bstring = <Vec<u8>>::from("☃βツ");
325 /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
327 /// // Add a byte that makes the sequence invalid.
328 /// bstring.push(b'\xFF');
329 /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
332 /// This demonstrates the "maximal subpart" substitution logic.
335 /// use bstr::{B, ByteSlice};
337 /// // \x61 is the ASCII codepoint for 'a'.
338 /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
339 /// // \xE1\x80 is a valid 2-byte code unit prefix.
340 /// // \xC2 is a valid 1-byte code unit prefix.
341 /// // \x62 is the ASCII codepoint for 'b'.
343 /// // In sum, each of the prefixes is replaced by a single replacement
344 /// // codepoint since none of the prefixes are properly completed. This
345 /// // is in contrast to other strategies that might insert a replacement
346 /// // codepoint for every single byte.
347 /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
348 /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
350 #[cfg(feature = "std")]
352 fn to_str_lossy(&self) -> Cow
<str> {
353 match utf8
::validate(self.as_bytes()) {
355 // SAFETY: This is safe because of the guarantees provided by
358 Cow
::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
362 let mut lossy
= String
::with_capacity(self.as_bytes().len());
364 self.as_bytes().split_at(err
.valid_up_to());
365 // SAFETY: This is safe because utf8::validate guarantees
366 // that all of `valid` is valid UTF-8.
367 lossy
.push_str(unsafe { str::from_utf8_unchecked(valid) }
);
368 lossy
.push_str("\u{FFFD}");
369 if let Some(len
) = err
.error_len() {
370 after
[len
..].to_str_lossy_into(&mut lossy
);
377 /// Copy the contents of this byte string into the given owned string
378 /// buffer, while replacing invalid UTF-8 code unit sequences with the
379 /// Unicode replacement codepoint (`U+FFFD`).
381 /// This method uses the same "substitution of maximal subparts" strategy
382 /// for inserting the replacement codepoint as the
383 /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
385 /// This routine is useful for amortizing allocation. However, unlike
386 /// `to_str_lossy`, this routine will _always_ copy the contents of this
387 /// byte string into the destination buffer, even if this byte string is
395 /// use std::borrow::Cow;
397 /// use bstr::ByteSlice;
399 /// let mut bstring = <Vec<u8>>::from("☃βツ");
400 /// // Add a byte that makes the sequence invalid.
401 /// bstring.push(b'\xFF');
403 /// let mut dest = String::new();
404 /// bstring.to_str_lossy_into(&mut dest);
405 /// assert_eq!("☃βツ\u{FFFD}", dest);
407 #[cfg(feature = "std")]
409 fn to_str_lossy_into(&self, dest
: &mut String
) {
410 let mut bytes
= self.as_bytes();
411 dest
.reserve(bytes
.len());
413 match utf8
::validate(bytes
) {
415 // SAFETY: This is safe because utf8::validate guarantees
416 // that all of `bytes` is valid UTF-8.
417 dest
.push_str(unsafe { str::from_utf8_unchecked(bytes) }
);
421 let (valid
, after
) = bytes
.split_at(err
.valid_up_to());
422 // SAFETY: This is safe because utf8::validate guarantees
423 // that all of `valid` is valid UTF-8.
424 dest
.push_str(unsafe { str::from_utf8_unchecked(valid) }
);
425 dest
.push_str("\u{FFFD}");
426 match err
.error_len() {
428 Some(len
) => bytes
= &after
[len
..],
435 /// Create an OS string slice from this byte string.
437 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
438 /// this returns a UTF-8 decoding error if this byte string is not valid
439 /// UTF-8. (For example, on Windows, file paths are allowed to be a
440 /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
441 /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
442 /// 16-bit integers.)
449 /// use bstr::{B, ByteSlice};
451 /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
452 /// assert_eq!(os_str, "foo");
454 #[cfg(feature = "std")]
456 fn to_os_str(&self) -> Result
<&OsStr
, Utf8Error
> {
459 fn imp(bytes
: &[u8]) -> Result
<&OsStr
, Utf8Error
> {
460 use std
::os
::unix
::ffi
::OsStrExt
;
462 Ok(OsStr
::from_bytes(bytes
))
467 fn imp(bytes
: &[u8]) -> Result
<&OsStr
, Utf8Error
> {
468 bytes
.to_str().map(OsStr
::new
)
474 /// Lossily create an OS string slice from this byte string.
476 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
477 /// this will perform a UTF-8 check and lossily convert this byte string
478 /// into valid UTF-8 using the Unicode replacement codepoint.
480 /// Note that this can prevent the correct roundtripping of file paths on
481 /// non-Unix systems such as Windows, where file paths are an arbitrary
482 /// sequence of 16-bit integers.
489 /// use bstr::ByteSlice;
491 /// let os_str = b"foo\xFFbar".to_os_str_lossy();
492 /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
494 #[cfg(feature = "std")]
496 fn to_os_str_lossy(&self) -> Cow
<OsStr
> {
499 fn imp(bytes
: &[u8]) -> Cow
<OsStr
> {
500 use std
::os
::unix
::ffi
::OsStrExt
;
502 Cow
::Borrowed(OsStr
::from_bytes(bytes
))
507 fn imp(bytes
: &[u8]) -> Cow
<OsStr
> {
508 use std
::ffi
::OsString
;
510 match bytes
.to_str_lossy() {
511 Cow
::Borrowed(x
) => Cow
::Borrowed(OsStr
::new(x
)),
512 Cow
::Owned(x
) => Cow
::Owned(OsString
::from(x
)),
519 /// Create a path slice from this byte string.
521 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
522 /// this returns a UTF-8 decoding error if this byte string is not valid
523 /// UTF-8. (For example, on Windows, file paths are allowed to be a
524 /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
525 /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
526 /// 16-bit integers.)
533 /// use bstr::ByteSlice;
535 /// let path = b"foo".to_path().expect("should be valid UTF-8");
536 /// assert_eq!(path.as_os_str(), "foo");
538 #[cfg(feature = "std")]
540 fn to_path(&self) -> Result
<&Path
, Utf8Error
> {
541 self.to_os_str().map(Path
::new
)
544 /// Lossily create a path slice from this byte string.
546 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
547 /// this will perform a UTF-8 check and lossily convert this byte string
548 /// into valid UTF-8 using the Unicode replacement codepoint.
550 /// Note that this can prevent the correct roundtripping of file paths on
551 /// non-Unix systems such as Windows, where file paths are an arbitrary
552 /// sequence of 16-bit integers.
559 /// use bstr::ByteSlice;
561 /// let bs = b"foo\xFFbar";
562 /// let path = bs.to_path_lossy();
563 /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
565 #[cfg(feature = "std")]
567 fn to_path_lossy(&self) -> Cow
<Path
> {
568 use std
::path
::PathBuf
;
570 match self.to_os_str_lossy() {
571 Cow
::Borrowed(x
) => Cow
::Borrowed(Path
::new(x
)),
572 Cow
::Owned(x
) => Cow
::Owned(PathBuf
::from(x
)),
576 /// Create a new byte string by repeating this byte string `n` times.
580 /// This function panics if the capacity of the new byte string would
588 /// use bstr::{B, ByteSlice};
590 /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
591 /// assert_eq!(b"foo".repeatn(0), B(""));
593 #[cfg(feature = "std")]
595 fn repeatn(&self, n
: usize) -> Vec
<u8> {
596 let bs
= self.as_bytes();
597 let mut dst
= vec
![0; bs
.len() * n
];
599 dst
[i
* bs
.len()..(i
+ 1) * bs
.len()].copy_from_slice(bs
);
604 /// Returns true if and only if this byte string contains the given needle.
611 /// use bstr::ByteSlice;
613 /// assert!(b"foo bar".contains_str("foo"));
614 /// assert!(b"foo bar".contains_str("bar"));
615 /// assert!(!b"foo".contains_str("foobar"));
618 fn contains_str
<B
: AsRef
<[u8]>>(&self, needle
: B
) -> bool
{
619 self.find(needle
).is_some()
622 /// Returns true if and only if this byte string has the given prefix.
629 /// use bstr::ByteSlice;
631 /// assert!(b"foo bar".starts_with_str("foo"));
632 /// assert!(!b"foo bar".starts_with_str("bar"));
633 /// assert!(!b"foo".starts_with_str("foobar"));
636 fn starts_with_str
<B
: AsRef
<[u8]>>(&self, prefix
: B
) -> bool
{
637 self.as_bytes().starts_with(prefix
.as_ref())
640 /// Returns true if and only if this byte string has the given suffix.
647 /// use bstr::ByteSlice;
649 /// assert!(b"foo bar".ends_with_str("bar"));
650 /// assert!(!b"foo bar".ends_with_str("foo"));
651 /// assert!(!b"bar".ends_with_str("foobar"));
654 fn ends_with_str
<B
: AsRef
<[u8]>>(&self, suffix
: B
) -> bool
{
655 self.as_bytes().ends_with(suffix
.as_ref())
658 /// Returns the index of the first occurrence of the given needle.
660 /// The needle may be any type that can be cheaply converted into a
661 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
663 /// Note that if you're are searching for the same needle in many
664 /// different small haystacks, it may be faster to initialize a
665 /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
669 /// This routine is guaranteed to have worst case linear time complexity
670 /// with respect to both the needle and the haystack. That is, this runs
671 /// in `O(needle.len() + haystack.len())` time.
673 /// This routine is also guaranteed to have worst case constant space
681 /// use bstr::ByteSlice;
683 /// let s = b"foo bar baz";
684 /// assert_eq!(Some(0), s.find("foo"));
685 /// assert_eq!(Some(4), s.find("bar"));
686 /// assert_eq!(None, s.find("quux"));
689 fn find
<B
: AsRef
<[u8]>>(&self, needle
: B
) -> Option
<usize> {
690 Finder
::new(needle
.as_ref()).find(self.as_bytes())
693 /// Returns the index of the last occurrence of the given needle.
695 /// The needle may be any type that can be cheaply converted into a
696 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
698 /// Note that if you're are searching for the same needle in many
699 /// different small haystacks, it may be faster to initialize a
700 /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
705 /// This routine is guaranteed to have worst case linear time complexity
706 /// with respect to both the needle and the haystack. That is, this runs
707 /// in `O(needle.len() + haystack.len())` time.
709 /// This routine is also guaranteed to have worst case constant space
717 /// use bstr::ByteSlice;
719 /// let s = b"foo bar baz";
720 /// assert_eq!(Some(0), s.rfind("foo"));
721 /// assert_eq!(Some(4), s.rfind("bar"));
722 /// assert_eq!(Some(8), s.rfind("ba"));
723 /// assert_eq!(None, s.rfind("quux"));
726 fn rfind
<B
: AsRef
<[u8]>>(&self, needle
: B
) -> Option
<usize> {
727 FinderReverse
::new(needle
.as_ref()).rfind(self.as_bytes())
730 /// Returns an iterator of the non-overlapping occurrences of the given
731 /// needle. The iterator yields byte offset positions indicating the start
736 /// This routine is guaranteed to have worst case linear time complexity
737 /// with respect to both the needle and the haystack. That is, this runs
738 /// in `O(needle.len() + haystack.len())` time.
740 /// This routine is also guaranteed to have worst case constant space
748 /// use bstr::ByteSlice;
750 /// let s = b"foo bar foo foo quux foo";
751 /// let matches: Vec<usize> = s.find_iter("foo").collect();
752 /// assert_eq!(matches, vec![0, 8, 12, 21]);
755 /// An empty string matches at every position, including the position
756 /// immediately following the last byte:
759 /// use bstr::ByteSlice;
761 /// let matches: Vec<usize> = b"foo".find_iter("").collect();
762 /// assert_eq!(matches, vec![0, 1, 2, 3]);
764 /// let matches: Vec<usize> = b"".find_iter("").collect();
765 /// assert_eq!(matches, vec![0]);
768 fn find_iter
<'a
, B
: ?Sized
+ AsRef
<[u8]>>(
772 Find
::new(self.as_bytes(), needle
.as_ref())
775 /// Returns an iterator of the non-overlapping occurrences of the given
776 /// needle in reverse. The iterator yields byte offset positions indicating
777 /// the start of each match.
781 /// This routine is guaranteed to have worst case linear time complexity
782 /// with respect to both the needle and the haystack. That is, this runs
783 /// in `O(needle.len() + haystack.len())` time.
785 /// This routine is also guaranteed to have worst case constant space
793 /// use bstr::ByteSlice;
795 /// let s = b"foo bar foo foo quux foo";
796 /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
797 /// assert_eq!(matches, vec![21, 12, 8, 0]);
800 /// An empty string matches at every position, including the position
801 /// immediately following the last byte:
804 /// use bstr::ByteSlice;
806 /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
807 /// assert_eq!(matches, vec![3, 2, 1, 0]);
809 /// let matches: Vec<usize> = b"".rfind_iter("").collect();
810 /// assert_eq!(matches, vec![0]);
813 fn rfind_iter
<'a
, B
: ?Sized
+ AsRef
<[u8]>>(
816 ) -> FindReverse
<'a
> {
817 FindReverse
::new(self.as_bytes(), needle
.as_ref())
820 /// Returns the index of the first occurrence of the given byte. If the
821 /// byte does not occur in this byte string, then `None` is returned.
828 /// use bstr::ByteSlice;
830 /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
831 /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
834 fn find_byte(&self, byte
: u8) -> Option
<usize> {
835 memchr(byte
, self.as_bytes())
838 /// Returns the index of the last occurrence of the given byte. If the
839 /// byte does not occur in this byte string, then `None` is returned.
846 /// use bstr::ByteSlice;
848 /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
849 /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
852 fn rfind_byte(&self, byte
: u8) -> Option
<usize> {
853 memrchr(byte
, self.as_bytes())
856 /// Returns the index of the first occurrence of the given codepoint.
857 /// If the codepoint does not occur in this byte string, then `None` is
860 /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
861 /// then only explicit occurrences of that encoding will be found. Invalid
862 /// UTF-8 sequences will not be matched.
869 /// use bstr::{B, ByteSlice};
871 /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
872 /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
873 /// assert_eq!(None, b"foo bar baz".find_char('y'));
876 fn find_char(&self, ch
: char) -> Option
<usize> {
877 self.find(ch
.encode_utf8(&mut [0; 4]))
880 /// Returns the index of the last occurrence of the given codepoint.
881 /// If the codepoint does not occur in this byte string, then `None` is
884 /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
885 /// then only explicit occurrences of that encoding will be found. Invalid
886 /// UTF-8 sequences will not be matched.
893 /// use bstr::{B, ByteSlice};
895 /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
896 /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
897 /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
900 fn rfind_char(&self, ch
: char) -> Option
<usize> {
901 self.rfind(ch
.encode_utf8(&mut [0; 4]))
904 /// Returns the index of the first occurrence of any of the bytes in the
907 /// The `byteset` may be any type that can be cheaply converted into a
908 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
909 /// note that passing a `&str` which contains multibyte characters may not
910 /// behave as you expect: each byte in the `&str` is treated as an
911 /// individual member of the byte set.
913 /// Note that order is irrelevant for the `byteset` parameter, and
914 /// duplicate bytes present in its body are ignored.
918 /// This routine is guaranteed to have worst case linear time complexity
919 /// with respect to both the set of bytes and the haystack. That is, this
920 /// runs in `O(byteset.len() + haystack.len())` time.
922 /// This routine is also guaranteed to have worst case constant space
930 /// use bstr::ByteSlice;
932 /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
933 /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
934 /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
937 fn find_byteset
<B
: AsRef
<[u8]>>(&self, byteset
: B
) -> Option
<usize> {
938 byteset
::find(self.as_bytes(), byteset
.as_ref())
941 /// Returns the index of the first occurrence of a byte that is not a member
942 /// of the provided set.
944 /// The `byteset` may be any type that can be cheaply converted into a
945 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
946 /// note that passing a `&str` which contains multibyte characters may not
947 /// behave as you expect: each byte in the `&str` is treated as an
948 /// individual member of the byte set.
950 /// Note that order is irrelevant for the `byteset` parameter, and
951 /// duplicate bytes present in its body are ignored.
955 /// This routine is guaranteed to have worst case linear time complexity
956 /// with respect to both the set of bytes and the haystack. That is, this
957 /// runs in `O(byteset.len() + haystack.len())` time.
959 /// This routine is also guaranteed to have worst case constant space
967 /// use bstr::ByteSlice;
969 /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
970 /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
971 /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
974 fn find_not_byteset
<B
: AsRef
<[u8]>>(&self, byteset
: B
) -> Option
<usize> {
975 byteset
::find_not(self.as_bytes(), byteset
.as_ref())
978 /// Returns the index of the last occurrence of any of the bytes in the
981 /// The `byteset` may be any type that can be cheaply converted into a
982 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
983 /// note that passing a `&str` which contains multibyte characters may not
984 /// behave as you expect: each byte in the `&str` is treated as an
985 /// individual member of the byte set.
987 /// Note that order is irrelevant for the `byteset` parameter, and duplicate
988 /// bytes present in its body are ignored.
992 /// This routine is guaranteed to have worst case linear time complexity
993 /// with respect to both the set of bytes and the haystack. That is, this
994 /// runs in `O(byteset.len() + haystack.len())` time.
996 /// This routine is also guaranteed to have worst case constant space
1004 /// use bstr::ByteSlice;
1006 /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
1007 /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
1008 /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
1011 fn rfind_byteset
<B
: AsRef
<[u8]>>(&self, byteset
: B
) -> Option
<usize> {
1012 byteset
::rfind(self.as_bytes(), byteset
.as_ref())
1015 /// Returns the index of the last occurrence of a byte that is not a member
1016 /// of the provided set.
1018 /// The `byteset` may be any type that can be cheaply converted into a
1019 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1020 /// note that passing a `&str` which contains multibyte characters may not
1021 /// behave as you expect: each byte in the `&str` is treated as an
1022 /// individual member of the byte set.
1024 /// Note that order is irrelevant for the `byteset` parameter, and
1025 /// duplicate bytes present in its body are ignored.
1029 /// This routine is guaranteed to have worst case linear time complexity
1030 /// with respect to both the set of bytes and the haystack. That is, this
1031 /// runs in `O(byteset.len() + haystack.len())` time.
1033 /// This routine is also guaranteed to have worst case constant space
1041 /// use bstr::ByteSlice;
1043 /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
1044 /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
1045 /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1048 fn rfind_not_byteset
<B
: AsRef
<[u8]>>(&self, byteset
: B
) -> Option
<usize> {
1049 byteset
::rfind_not(self.as_bytes(), byteset
.as_ref())
1052 /// Returns an iterator over the fields in a byte string, separated by
1053 /// contiguous whitespace.
1060 /// use bstr::{B, ByteSlice};
1062 /// let s = B(" foo\tbar\t\u{2003}\nquux \n");
1063 /// let fields: Vec<&[u8]> = s.fields().collect();
1064 /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1067 /// A byte string consisting of just whitespace yields no elements:
1070 /// use bstr::{B, ByteSlice};
1072 /// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count());
1075 fn fields(&self) -> Fields
{
1076 Fields
::new(self.as_bytes())
1079 /// Returns an iterator over the fields in a byte string, separated by
1080 /// contiguous codepoints satisfying the given predicate.
1082 /// If this byte string is not valid UTF-8, then the given closure will
1083 /// be called with a Unicode replacement codepoint when invalid UTF-8
1091 /// use bstr::{B, ByteSlice};
1093 /// let s = b"123foo999999bar1quux123456";
1094 /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
1095 /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1098 /// A byte string consisting of all codepoints satisfying the predicate
1099 /// yields no elements:
1102 /// use bstr::ByteSlice;
1104 /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
1107 fn fields_with
<F
: FnMut(char) -> bool
>(&self, f
: F
) -> FieldsWith
<F
> {
1108 FieldsWith
::new(self.as_bytes(), f
)
1111 /// Returns an iterator over substrings of this byte string, separated
1112 /// by the given byte string. Each element yielded is guaranteed not to
1113 /// include the splitter substring.
1115 /// The splitter may be any type that can be cheaply converted into a
1116 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1123 /// use bstr::{B, ByteSlice};
1125 /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1126 /// assert_eq!(x, vec![
1127 /// B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1130 /// let x: Vec<&[u8]> = b"".split_str("X").collect();
1131 /// assert_eq!(x, vec![b""]);
1133 /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1134 /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1136 /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1137 /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1140 /// If a string contains multiple contiguous separators, you will end up
1141 /// with empty strings yielded by the iterator:
1144 /// use bstr::{B, ByteSlice};
1146 /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
1147 /// assert_eq!(x, vec![
1148 /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1151 /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1152 /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1155 /// Separators at the start or end of a string are neighbored by empty
1159 /// use bstr::{B, ByteSlice};
1161 /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1162 /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1165 /// When the empty string is used as a separator, it splits every **byte**
1166 /// in the byte string, along with the beginning and end of the byte
1170 /// use bstr::{B, ByteSlice};
1172 /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1173 /// assert_eq!(x, vec![
1174 /// B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1177 /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1178 /// // may not be valid UTF-8!
1179 /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1180 /// assert_eq!(x, vec![
1181 /// B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
1185 /// Contiguous separators, especially whitespace, can lead to possibly
1186 /// surprising behavior. For example, this code is correct:
1189 /// use bstr::{B, ByteSlice};
1191 /// let x: Vec<&[u8]> = b" a b c".split_str(" ").collect();
1192 /// assert_eq!(x, vec![
1193 /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1197 /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
1198 /// [`fields`](#method.fields) instead.
1200 fn split_str
<'a
, B
: ?Sized
+ AsRef
<[u8]>>(
1204 Split
::new(self.as_bytes(), splitter
.as_ref())
1207 /// Returns an iterator over substrings of this byte string, separated by
1208 /// the given byte string, in reverse. Each element yielded is guaranteed
1209 /// not to include the splitter substring.
1211 /// The splitter may be any type that can be cheaply converted into a
1212 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1219 /// use bstr::{B, ByteSlice};
1221 /// let x: Vec<&[u8]> =
1222 /// b"Mary had a little lamb".rsplit_str(" ").collect();
1223 /// assert_eq!(x, vec![
1224 /// B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1227 /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1228 /// assert_eq!(x, vec![b""]);
1230 /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1231 /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1233 /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1234 /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1237 /// If a string contains multiple contiguous separators, you will end up
1238 /// with empty strings yielded by the iterator:
1241 /// use bstr::{B, ByteSlice};
1243 /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
1244 /// assert_eq!(x, vec![
1245 /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1248 /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1249 /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1252 /// Separators at the start or end of a string are neighbored by empty
1256 /// use bstr::{B, ByteSlice};
1258 /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1259 /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1262 /// When the empty string is used as a separator, it splits every **byte**
1263 /// in the byte string, along with the beginning and end of the byte
1267 /// use bstr::{B, ByteSlice};
1269 /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1270 /// assert_eq!(x, vec![
1271 /// B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1274 /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1275 /// // may not be valid UTF-8!
1276 /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1277 /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
1280 /// Contiguous separators, especially whitespace, can lead to possibly
1281 /// surprising behavior. For example, this code is correct:
1284 /// use bstr::{B, ByteSlice};
1286 /// let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect();
1287 /// assert_eq!(x, vec![
1288 /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1292 /// It does *not* give you `["a", "b", "c"]`.
1294 fn rsplit_str
<'a
, B
: ?Sized
+ AsRef
<[u8]>>(
1297 ) -> SplitReverse
<'a
> {
1298 SplitReverse
::new(self.as_bytes(), splitter
.as_ref())
1301 /// Returns an iterator of at most `limit` substrings of this byte string,
1302 /// separated by the given byte string. If `limit` substrings are yielded,
1303 /// then the last substring will contain the remainder of this byte string.
1305 /// The needle may be any type that can be cheaply converted into a
1306 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1313 /// use bstr::{B, ByteSlice};
1315 /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
1316 /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1318 /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
1319 /// assert_eq!(x, vec![b""]);
1321 /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
1322 /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1324 /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
1325 /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1327 /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
1328 /// assert_eq!(x, vec![B("abcXdef")]);
1330 /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
1331 /// assert_eq!(x, vec![B("abcdef")]);
1333 /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
1334 /// assert!(x.is_empty());
1337 fn splitn_str
<'a
, B
: ?Sized
+ AsRef
<[u8]>>(
1342 SplitN
::new(self.as_bytes(), splitter
.as_ref(), limit
)
1345 /// Returns an iterator of at most `limit` substrings of this byte string,
1346 /// separated by the given byte string, in reverse. If `limit` substrings
1347 /// are yielded, then the last substring will contain the remainder of this
1350 /// The needle may be any type that can be cheaply converted into a
1351 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1358 /// use bstr::{B, ByteSlice};
1361 /// b"Mary had a little lamb".rsplitn_str(3, " ").collect();
1362 /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1364 /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
1365 /// assert_eq!(x, vec![b""]);
1367 /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
1368 /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1370 /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
1371 /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1373 /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
1374 /// assert_eq!(x, vec![B("abcXdef")]);
1376 /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
1377 /// assert_eq!(x, vec![B("abcdef")]);
1379 /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
1380 /// assert!(x.is_empty());
1383 fn rsplitn_str
<'a
, B
: ?Sized
+ AsRef
<[u8]>>(
1387 ) -> SplitNReverse
<'a
> {
1388 SplitNReverse
::new(self.as_bytes(), splitter
.as_ref(), limit
)
1391 /// Replace all matches of the given needle with the given replacement, and
1392 /// the result as a new `Vec<u8>`.
1394 /// This routine is useful as a convenience. If you need to reuse an
1395 /// allocation, use [`replace_into`](#method.replace_into) instead.
1402 /// use bstr::ByteSlice;
1404 /// let s = b"this is old".replace("old", "new");
1405 /// assert_eq!(s, "this is new".as_bytes());
1408 /// When the pattern doesn't match:
1411 /// use bstr::ByteSlice;
1413 /// let s = b"this is old".replace("nada nada", "limonada");
1414 /// assert_eq!(s, "this is old".as_bytes());
1417 /// When the needle is an empty string:
1420 /// use bstr::ByteSlice;
1422 /// let s = b"foo".replace("", "Z");
1423 /// assert_eq!(s, "ZfZoZoZ".as_bytes());
1425 #[cfg(feature = "std")]
1427 fn replace
<N
: AsRef
<[u8]>, R
: AsRef
<[u8]>>(
1432 let mut dest
= Vec
::with_capacity(self.as_bytes().len());
1433 self.replace_into(needle
, replacement
, &mut dest
);
1437 /// Replace up to `limit` matches of the given needle with the given
1438 /// replacement, and the result as a new `Vec<u8>`.
1440 /// This routine is useful as a convenience. If you need to reuse an
1441 /// allocation, use [`replacen_into`](#method.replacen_into) instead.
1448 /// use bstr::ByteSlice;
1450 /// let s = b"foofoo".replacen("o", "z", 2);
1451 /// assert_eq!(s, "fzzfoo".as_bytes());
1454 /// When the pattern doesn't match:
1457 /// use bstr::ByteSlice;
1459 /// let s = b"foofoo".replacen("a", "z", 2);
1460 /// assert_eq!(s, "foofoo".as_bytes());
1463 /// When the needle is an empty string:
1466 /// use bstr::ByteSlice;
1468 /// let s = b"foo".replacen("", "Z", 2);
1469 /// assert_eq!(s, "ZfZoo".as_bytes());
1471 #[cfg(feature = "std")]
1473 fn replacen
<N
: AsRef
<[u8]>, R
: AsRef
<[u8]>>(
1479 let mut dest
= Vec
::with_capacity(self.as_bytes().len());
1480 self.replacen_into(needle
, replacement
, limit
, &mut dest
);
1484 /// Replace all matches of the given needle with the given replacement,
1485 /// and write the result into the provided `Vec<u8>`.
1487 /// This does **not** clear `dest` before writing to it.
1489 /// This routine is useful for reusing allocation. For a more convenient
1490 /// API, use [`replace`](#method.replace) instead.
1497 /// use bstr::ByteSlice;
1499 /// let s = b"this is old";
1501 /// let mut dest = vec![];
1502 /// s.replace_into("old", "new", &mut dest);
1503 /// assert_eq!(dest, "this is new".as_bytes());
1506 /// When the pattern doesn't match:
1509 /// use bstr::ByteSlice;
1511 /// let s = b"this is old";
1513 /// let mut dest = vec![];
1514 /// s.replace_into("nada nada", "limonada", &mut dest);
1515 /// assert_eq!(dest, "this is old".as_bytes());
1518 /// When the needle is an empty string:
1521 /// use bstr::ByteSlice;
1525 /// let mut dest = vec![];
1526 /// s.replace_into("", "Z", &mut dest);
1527 /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1529 #[cfg(feature = "std")]
1531 fn replace_into
<N
: AsRef
<[u8]>, R
: AsRef
<[u8]>>(
1537 let (needle
, replacement
) = (needle
.as_ref(), replacement
.as_ref());
1540 for start
in self.find_iter(needle
) {
1541 dest
.push_str(&self.as_bytes()[last
..start
]);
1542 dest
.push_str(replacement
);
1543 last
= start
+ needle
.len();
1545 dest
.push_str(&self.as_bytes()[last
..]);
1548 /// Replace up to `limit` matches of the given needle with the given
1549 /// replacement, and write the result into the provided `Vec<u8>`.
1551 /// This does **not** clear `dest` before writing to it.
1553 /// This routine is useful for reusing allocation. For a more convenient
1554 /// API, use [`replacen`](#method.replacen) instead.
1561 /// use bstr::ByteSlice;
1563 /// let s = b"foofoo";
1565 /// let mut dest = vec![];
1566 /// s.replacen_into("o", "z", 2, &mut dest);
1567 /// assert_eq!(dest, "fzzfoo".as_bytes());
1570 /// When the pattern doesn't match:
1573 /// use bstr::ByteSlice;
1575 /// let s = b"foofoo";
1577 /// let mut dest = vec![];
1578 /// s.replacen_into("a", "z", 2, &mut dest);
1579 /// assert_eq!(dest, "foofoo".as_bytes());
1582 /// When the needle is an empty string:
1585 /// use bstr::ByteSlice;
1589 /// let mut dest = vec![];
1590 /// s.replacen_into("", "Z", 2, &mut dest);
1591 /// assert_eq!(dest, "ZfZoo".as_bytes());
1593 #[cfg(feature = "std")]
1595 fn replacen_into
<N
: AsRef
<[u8]>, R
: AsRef
<[u8]>>(
1602 let (needle
, replacement
) = (needle
.as_ref(), replacement
.as_ref());
1605 for start
in self.find_iter(needle
).take(limit
) {
1606 dest
.push_str(&self.as_bytes()[last
..start
]);
1607 dest
.push_str(replacement
);
1608 last
= start
+ needle
.len();
1610 dest
.push_str(&self.as_bytes()[last
..]);
1613 /// Returns an iterator over the bytes in this byte string.
1620 /// use bstr::ByteSlice;
1622 /// let bs = b"foobar";
1623 /// let bytes: Vec<u8> = bs.bytes().collect();
1624 /// assert_eq!(bytes, bs);
1627 fn bytes(&self) -> Bytes
{
1628 Bytes { it: self.as_bytes().iter() }
1631 /// Returns an iterator over the Unicode scalar values in this byte string.
1632 /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1633 /// is yielded instead.
1640 /// use bstr::ByteSlice;
1642 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1643 /// let chars: Vec<char> = bs.chars().collect();
1644 /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
1647 /// Codepoints can also be iterated over in reverse:
1650 /// use bstr::ByteSlice;
1652 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1653 /// let chars: Vec<char> = bs.chars().rev().collect();
1654 /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
1657 fn chars(&self) -> Chars
{
1658 Chars
::new(self.as_bytes())
1661 /// Returns an iterator over the Unicode scalar values in this byte string
1662 /// along with their starting and ending byte index positions. If invalid
1663 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1666 /// Note that this is slightly different from the `CharIndices` iterator
1667 /// provided by the standard library. Aside from working on possibly
1668 /// invalid UTF-8, this iterator provides both the corresponding starting
1669 /// and ending byte indices of each codepoint yielded. The ending position
1670 /// is necessary to slice the original byte string when invalid UTF-8 bytes
1671 /// are converted into a Unicode replacement codepoint, since a single
1672 /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1680 /// use bstr::ByteSlice;
1682 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1683 /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1684 /// assert_eq!(chars, vec![
1686 /// (3, 4, '\u{FFFD}'),
1688 /// (8, 10, '\u{FFFD}'),
1693 /// Codepoints can also be iterated over in reverse:
1696 /// use bstr::ByteSlice;
1698 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1699 /// let chars: Vec<(usize, usize, char)> = bs
1703 /// assert_eq!(chars, vec![
1705 /// (8, 10, '\u{FFFD}'),
1707 /// (3, 4, '\u{FFFD}'),
1712 fn char_indices(&self) -> CharIndices
{
1713 CharIndices
::new(self.as_bytes())
1716 /// Iterate over chunks of valid UTF-8.
1718 /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1719 /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1720 /// which are determined via the "substitution of maximal subparts"
1721 /// strategy described in the docs for the
1722 /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1727 /// This example shows how the `std::fmt::Display` implementation is
1728 /// written for the `BStr` type:
1731 /// use bstr::{ByteSlice, Utf8Chunk};
1733 /// let bytes = b"foo\xFD\xFEbar\xFF";
1735 /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1736 /// for chunk in bytes.utf8_chunks() {
1737 /// if !chunk.valid().is_empty() {
1738 /// valid_chunks.push(chunk.valid());
1740 /// if !chunk.invalid().is_empty() {
1741 /// invalid_chunks.push(chunk.invalid());
1745 /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1746 /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
1749 fn utf8_chunks(&self) -> Utf8Chunks
{
1750 Utf8Chunks { bytes: self.as_bytes() }
1753 /// Returns an iterator over the grapheme clusters in this byte string.
1754 /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1755 /// is yielded instead.
1759 /// This example shows how multiple codepoints can combine to form a
1760 /// single grapheme cluster:
1763 /// use bstr::ByteSlice;
1765 /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1766 /// let graphemes: Vec<&str> = bs.graphemes().collect();
1767 /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
1770 /// This shows that graphemes can be iterated over in reverse:
1773 /// use bstr::ByteSlice;
1775 /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1776 /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1777 /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
1779 #[cfg(feature = "unicode")]
1781 fn graphemes(&self) -> Graphemes
{
1782 Graphemes
::new(self.as_bytes())
1785 /// Returns an iterator over the grapheme clusters in this byte string
1786 /// along with their starting and ending byte index positions. If invalid
1787 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1792 /// This example shows how to get the byte offsets of each individual
1793 /// grapheme cluster:
1796 /// use bstr::ByteSlice;
1798 /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1799 /// let graphemes: Vec<(usize, usize, &str)> =
1800 /// bs.grapheme_indices().collect();
1801 /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes);
1804 /// This example shows what happens when invalid UTF-8 is enountered. Note
1805 /// that the offsets are valid indices into the original string, and do
1806 /// not necessarily correspond to the length of the `&str` returned!
1809 /// use bstr::{ByteSlice, ByteVec};
1811 /// let mut bytes = vec![];
1812 /// bytes.push_str("a\u{0300}\u{0316}");
1813 /// bytes.push(b'\xFF');
1814 /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
1816 /// let graphemes: Vec<(usize, usize, &str)> =
1817 /// bytes.grapheme_indices().collect();
1820 /// vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
1823 #[cfg(feature = "unicode")]
1825 fn grapheme_indices(&self) -> GraphemeIndices
{
1826 GraphemeIndices
::new(self.as_bytes())
1829 /// Returns an iterator over the words in this byte string. If invalid
1830 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1833 /// This is similar to
1834 /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1835 /// except it only returns elements that contain a "word" character. A word
1836 /// character is defined by UTS #18 (Annex C) to be the combination of the
1837 /// `Alphabetic` and `Join_Control` properties, along with the
1838 /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1841 /// Since words are made up of one or more codepoints, this iterator
1842 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1843 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1850 /// use bstr::ByteSlice;
1852 /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1853 /// let words: Vec<&str> = bs.words().collect();
1854 /// assert_eq!(words, vec![
1855 /// "The", "quick", "brown", "fox", "can't",
1856 /// "jump", "32.3", "feet", "right",
1859 #[cfg(feature = "unicode")]
1861 fn words(&self) -> Words
{
1862 Words
::new(self.as_bytes())
1865 /// Returns an iterator over the words in this byte string along with
1866 /// their starting and ending byte index positions.
1868 /// This is similar to
1869 /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1870 /// except it only returns elements that contain a "word" character. A word
1871 /// character is defined by UTS #18 (Annex C) to be the combination of the
1872 /// `Alphabetic` and `Join_Control` properties, along with the
1873 /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1876 /// Since words are made up of one or more codepoints, this iterator
1877 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1878 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1882 /// This example shows how to get the byte offsets of each individual
1886 /// use bstr::ByteSlice;
1888 /// let bs = b"can't jump 32.3 feet";
1889 /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
1890 /// assert_eq!(words, vec![
1891 /// (0, 5, "can't"),
1892 /// (6, 10, "jump"),
1893 /// (11, 15, "32.3"),
1894 /// (16, 20, "feet"),
1897 #[cfg(feature = "unicode")]
1899 fn word_indices(&self) -> WordIndices
{
1900 WordIndices
::new(self.as_bytes())
1903 /// Returns an iterator over the words in this byte string, along with
1904 /// all breaks between the words. Concatenating all elements yielded by
1905 /// the iterator results in the original string (modulo Unicode replacement
1906 /// codepoint substitutions if invalid UTF-8 is encountered).
1908 /// Since words are made up of one or more codepoints, this iterator
1909 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1910 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1917 /// use bstr::ByteSlice;
1919 /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1920 /// let words: Vec<&str> = bs.words_with_breaks().collect();
1921 /// assert_eq!(words, vec![
1922 /// "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
1923 /// " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
1924 /// ",", " ", "right", "?",
1927 #[cfg(feature = "unicode")]
1929 fn words_with_breaks(&self) -> WordsWithBreaks
{
1930 WordsWithBreaks
::new(self.as_bytes())
1933 /// Returns an iterator over the words and their byte offsets in this
1934 /// byte string, along with all breaks between the words. Concatenating
1935 /// all elements yielded by the iterator results in the original string
1936 /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
1939 /// Since words are made up of one or more codepoints, this iterator
1940 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1941 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1945 /// This example shows how to get the byte offsets of each individual
1949 /// use bstr::ByteSlice;
1951 /// let bs = b"can't jump 32.3 feet";
1952 /// let words: Vec<(usize, usize, &str)> =
1953 /// bs.words_with_break_indices().collect();
1954 /// assert_eq!(words, vec![
1955 /// (0, 5, "can't"),
1957 /// (6, 10, "jump"),
1959 /// (11, 15, "32.3"),
1961 /// (16, 20, "feet"),
1964 #[cfg(feature = "unicode")]
1966 fn words_with_break_indices(&self) -> WordsWithBreakIndices
{
1967 WordsWithBreakIndices
::new(self.as_bytes())
1970 /// Returns an iterator over the sentences in this byte string.
1972 /// Typically, a sentence will include its trailing punctuation and
1973 /// whitespace. Concatenating all elements yielded by the iterator
1974 /// results in the original string (modulo Unicode replacement codepoint
1975 /// substitutions if invalid UTF-8 is encountered).
1977 /// Since sentences are made up of one or more codepoints, this iterator
1978 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1979 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1986 /// use bstr::ByteSlice;
1988 /// let bs = b"I want this. Not that. Right now.";
1989 /// let sentences: Vec<&str> = bs.sentences().collect();
1990 /// assert_eq!(sentences, vec![
1991 /// "I want this. ",
1996 #[cfg(feature = "unicode")]
1998 fn sentences(&self) -> Sentences
{
1999 Sentences
::new(self.as_bytes())
2002 /// Returns an iterator over the sentences in this byte string along with
2003 /// their starting and ending byte index positions.
2005 /// Typically, a sentence will include its trailing punctuation and
2006 /// whitespace. Concatenating all elements yielded by the iterator
2007 /// results in the original string (modulo Unicode replacement codepoint
2008 /// substitutions if invalid UTF-8 is encountered).
2010 /// Since sentences are made up of one or more codepoints, this iterator
2011 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2012 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2019 /// use bstr::ByteSlice;
2021 /// let bs = b"I want this. Not that. Right now.";
2022 /// let sentences: Vec<(usize, usize, &str)> =
2023 /// bs.sentence_indices().collect();
2024 /// assert_eq!(sentences, vec![
2025 /// (0, 13, "I want this. "),
2026 /// (13, 23, "Not that. "),
2027 /// (23, 33, "Right now."),
2030 #[cfg(feature = "unicode")]
2032 fn sentence_indices(&self) -> SentenceIndices
{
2033 SentenceIndices
::new(self.as_bytes())
2036 /// An iterator over all lines in a byte string, without their
2039 /// For this iterator, the only line terminators recognized are `\r\n` and
2047 /// use bstr::{B, ByteSlice};
2057 /// let lines: Vec<&[u8]> = s.lines().collect();
2058 /// assert_eq!(lines, vec![
2059 /// B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2063 fn lines(&self) -> Lines
{
2064 Lines
::new(self.as_bytes())
2067 /// An iterator over all lines in a byte string, including their
2070 /// For this iterator, the only line terminator recognized is `\n`. (Since
2071 /// line terminators are included, this also handles `\r\n` line endings.)
2073 /// Line terminators are only included if they are present in the original
2074 /// byte string. For example, the last line in a byte string may not end
2075 /// with a line terminator.
2077 /// Concatenating all elements yielded by this iterator is guaranteed to
2078 /// yield the original byte string.
2085 /// use bstr::{B, ByteSlice};
2095 /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2096 /// assert_eq!(lines, vec![
2107 fn lines_with_terminator(&self) -> LinesWithTerminator
{
2108 LinesWithTerminator
::new(self.as_bytes())
2111 /// Return a byte string slice with leading and trailing whitespace
2114 /// Whitespace is defined according to the terms of the `White_Space`
2115 /// Unicode property.
2122 /// use bstr::{B, ByteSlice};
2124 /// let s = B(" foo\tbar\t\u{2003}\n");
2125 /// assert_eq!(s.trim(), B("foo\tbar"));
2127 #[cfg(feature = "unicode")]
2129 fn trim(&self) -> &[u8] {
2130 self.trim_start().trim_end()
2133 /// Return a byte string slice with leading whitespace removed.
2135 /// Whitespace is defined according to the terms of the `White_Space`
2136 /// Unicode property.
2143 /// use bstr::{B, ByteSlice};
2145 /// let s = B(" foo\tbar\t\u{2003}\n");
2146 /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
2148 #[cfg(feature = "unicode")]
2150 fn trim_start(&self) -> &[u8] {
2151 let start
= whitespace_len_fwd(self.as_bytes());
2152 &self.as_bytes()[start
..]
2155 /// Return a byte string slice with trailing whitespace removed.
2157 /// Whitespace is defined according to the terms of the `White_Space`
2158 /// Unicode property.
2165 /// use bstr::{B, ByteSlice};
2167 /// let s = B(" foo\tbar\t\u{2003}\n");
2168 /// assert_eq!(s.trim_end(), B(" foo\tbar"));
2170 #[cfg(feature = "unicode")]
2172 fn trim_end(&self) -> &[u8] {
2173 let end
= whitespace_len_rev(self.as_bytes());
2174 &self.as_bytes()[..end
]
2177 /// Return a byte string slice with leading and trailing characters
2178 /// satisfying the given predicate removed.
2185 /// use bstr::{B, ByteSlice};
2187 /// let s = b"123foo5bar789";
2188 /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
2191 fn trim_with
<F
: FnMut(char) -> bool
>(&self, mut trim
: F
) -> &[u8] {
2192 self.trim_start_with(&mut trim
).trim_end_with(&mut trim
)
2195 /// Return a byte string slice with leading characters satisfying the given
2196 /// predicate removed.
2203 /// use bstr::{B, ByteSlice};
2205 /// let s = b"123foo5bar789";
2206 /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
2209 fn trim_start_with
<F
: FnMut(char) -> bool
>(&self, mut trim
: F
) -> &[u8] {
2210 for (s
, _
, ch
) in self.char_indices() {
2212 return &self.as_bytes()[s
..];
2218 /// Return a byte string slice with trailing characters satisfying the
2219 /// given predicate removed.
2226 /// use bstr::{B, ByteSlice};
2228 /// let s = b"123foo5bar";
2229 /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
2232 fn trim_end_with
<F
: FnMut(char) -> bool
>(&self, mut trim
: F
) -> &[u8] {
2233 for (_
, e
, ch
) in self.char_indices().rev() {
2235 return &self.as_bytes()[..e
];
2241 /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2244 /// In this case, lowercase is defined according to the `Lowercase` Unicode
2247 /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2248 /// then it is written to the given buffer unchanged.
2250 /// Note that some characters in this byte string may expand into multiple
2251 /// characters when changing the case, so the number of bytes written to
2252 /// the given byte string may not be equivalent to the number of bytes in
2253 /// this byte string.
2255 /// If you'd like to reuse an allocation for performance reasons, then use
2256 /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2263 /// use bstr::{B, ByteSlice};
2265 /// let s = B("HELLO Β");
2266 /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2269 /// Scripts without case are not changed:
2272 /// use bstr::{B, ByteSlice};
2274 /// let s = B("农历新年");
2275 /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2278 /// Invalid UTF-8 remains as is:
2281 /// use bstr::{B, ByteSlice};
2283 /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2284 /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
2286 #[cfg(all(feature = "std", feature = "unicode"))]
2288 fn to_lowercase(&self) -> Vec
<u8> {
2289 let mut buf
= vec
![];
2290 self.to_lowercase_into(&mut buf
);
2294 /// Writes the lowercase equivalent of this byte string into the given
2295 /// buffer. The buffer is not cleared before written to.
2297 /// In this case, lowercase is defined according to the `Lowercase`
2298 /// Unicode property.
2300 /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2301 /// then it is written to the given buffer unchanged.
2303 /// Note that some characters in this byte string may expand into multiple
2304 /// characters when changing the case, so the number of bytes written to
2305 /// the given byte string may not be equivalent to the number of bytes in
2306 /// this byte string.
2308 /// If you don't need to amortize allocation and instead prefer
2309 /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2316 /// use bstr::{B, ByteSlice};
2318 /// let s = B("HELLO Β");
2320 /// let mut buf = vec![];
2321 /// s.to_lowercase_into(&mut buf);
2322 /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2325 /// Scripts without case are not changed:
2328 /// use bstr::{B, ByteSlice};
2330 /// let s = B("农历新年");
2332 /// let mut buf = vec![];
2333 /// s.to_lowercase_into(&mut buf);
2334 /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2337 /// Invalid UTF-8 remains as is:
2340 /// use bstr::{B, ByteSlice};
2342 /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2344 /// let mut buf = vec![];
2345 /// s.to_lowercase_into(&mut buf);
2346 /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
2348 #[cfg(all(feature = "std", feature = "unicode"))]
2350 fn to_lowercase_into(&self, buf
: &mut Vec
<u8>) {
2351 // TODO: This is the best we can do given what std exposes I think.
2352 // If we roll our own case handling, then we might be able to do this
2353 // a bit faster. We shouldn't roll our own case handling unless we
2354 // need to, e.g., for doing caseless matching or case folding.
2356 // TODO(BUG): This doesn't handle any special casing rules.
2358 buf
.reserve(self.as_bytes().len());
2359 for (s
, e
, ch
) in self.char_indices() {
2360 if ch
== '
\u{FFFD}'
{
2361 buf
.push_str(&self.as_bytes()[s
..e
]);
2362 } else if ch
.is_ascii() {
2363 buf
.push_char(ch
.to_ascii_lowercase());
2365 for upper
in ch
.to_lowercase() {
2366 buf
.push_char(upper
);
2372 /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2373 /// this byte string.
2375 /// In this case, lowercase is only defined in ASCII letters. Namely, the
2376 /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2377 /// In particular, the length of the byte string returned is always
2378 /// equivalent to the length of this byte string.
2380 /// If you'd like to reuse an allocation for performance reasons, then use
2381 /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2382 /// the conversion in place.
2389 /// use bstr::{B, ByteSlice};
2391 /// let s = B("HELLO Β");
2392 /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2395 /// Invalid UTF-8 remains as is:
2398 /// use bstr::{B, ByteSlice};
2400 /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2401 /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
2403 #[cfg(feature = "std")]
2405 fn to_ascii_lowercase(&self) -> Vec
<u8> {
2406 self.as_bytes().to_ascii_lowercase()
2409 /// Convert this byte string to its lowercase ASCII equivalent in place.
2411 /// In this case, lowercase is only defined in ASCII letters. Namely, the
2412 /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2414 /// If you don't need to do the conversion in
2415 /// place and instead prefer convenience, then use
2416 /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2423 /// use bstr::ByteSlice;
2425 /// let mut s = <Vec<u8>>::from("HELLO Β");
2426 /// s.make_ascii_lowercase();
2427 /// assert_eq!(s, "hello Β".as_bytes());
2430 /// Invalid UTF-8 remains as is:
2433 /// use bstr::{B, ByteSlice, ByteVec};
2435 /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
2436 /// s.make_ascii_lowercase();
2437 /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
2440 fn make_ascii_lowercase(&mut self) {
2441 self.as_bytes_mut().make_ascii_lowercase();
2444 /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2447 /// In this case, uppercase is defined according to the `Uppercase`
2448 /// Unicode property.
2450 /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2451 /// then it is written to the given buffer unchanged.
2453 /// Note that some characters in this byte string may expand into multiple
2454 /// characters when changing the case, so the number of bytes written to
2455 /// the given byte string may not be equivalent to the number of bytes in
2456 /// this byte string.
2458 /// If you'd like to reuse an allocation for performance reasons, then use
2459 /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2466 /// use bstr::{B, ByteSlice};
2468 /// let s = B("hello β");
2469 /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2472 /// Scripts without case are not changed:
2475 /// use bstr::{B, ByteSlice};
2477 /// let s = B("农历新年");
2478 /// assert_eq!(s.to_uppercase(), B("农历新年"));
2481 /// Invalid UTF-8 remains as is:
2484 /// use bstr::{B, ByteSlice};
2486 /// let s = B(b"foo\xFFbar\xE2\x98baz");
2487 /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2489 #[cfg(all(feature = "std", feature = "unicode"))]
2491 fn to_uppercase(&self) -> Vec
<u8> {
2492 let mut buf
= vec
![];
2493 self.to_uppercase_into(&mut buf
);
2497 /// Writes the uppercase equivalent of this byte string into the given
2498 /// buffer. The buffer is not cleared before written to.
2500 /// In this case, uppercase is defined according to the `Uppercase`
2501 /// Unicode property.
2503 /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2504 /// then it is written to the given buffer unchanged.
2506 /// Note that some characters in this byte string may expand into multiple
2507 /// characters when changing the case, so the number of bytes written to
2508 /// the given byte string may not be equivalent to the number of bytes in
2509 /// this byte string.
2511 /// If you don't need to amortize allocation and instead prefer
2512 /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2519 /// use bstr::{B, ByteSlice};
2521 /// let s = B("hello β");
2523 /// let mut buf = vec![];
2524 /// s.to_uppercase_into(&mut buf);
2525 /// assert_eq!(buf, B("HELLO Β"));
2528 /// Scripts without case are not changed:
2531 /// use bstr::{B, ByteSlice};
2533 /// let s = B("农历新年");
2535 /// let mut buf = vec![];
2536 /// s.to_uppercase_into(&mut buf);
2537 /// assert_eq!(buf, B("农历新年"));
2540 /// Invalid UTF-8 remains as is:
2543 /// use bstr::{B, ByteSlice};
2545 /// let s = B(b"foo\xFFbar\xE2\x98baz");
2547 /// let mut buf = vec![];
2548 /// s.to_uppercase_into(&mut buf);
2549 /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2551 #[cfg(all(feature = "std", feature = "unicode"))]
2553 fn to_uppercase_into(&self, buf
: &mut Vec
<u8>) {
2554 // TODO: This is the best we can do given what std exposes I think.
2555 // If we roll our own case handling, then we might be able to do this
2556 // a bit faster. We shouldn't roll our own case handling unless we
2557 // need to, e.g., for doing caseless matching or case folding.
2558 buf
.reserve(self.as_bytes().len());
2559 for (s
, e
, ch
) in self.char_indices() {
2560 if ch
== '
\u{FFFD}'
{
2561 buf
.push_str(&self.as_bytes()[s
..e
]);
2562 } else if ch
.is_ascii() {
2563 buf
.push_char(ch
.to_ascii_uppercase());
2565 for upper
in ch
.to_uppercase() {
2566 buf
.push_char(upper
);
2572 /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2573 /// this byte string.
2575 /// In this case, uppercase is only defined in ASCII letters. Namely, the
2576 /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2577 /// In particular, the length of the byte string returned is always
2578 /// equivalent to the length of this byte string.
2580 /// If you'd like to reuse an allocation for performance reasons, then use
2581 /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2582 /// the conversion in place.
2589 /// use bstr::{B, ByteSlice};
2591 /// let s = B("hello β");
2592 /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2595 /// Invalid UTF-8 remains as is:
2598 /// use bstr::{B, ByteSlice};
2600 /// let s = B(b"foo\xFFbar\xE2\x98baz");
2601 /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2603 #[cfg(feature = "std")]
2605 fn to_ascii_uppercase(&self) -> Vec
<u8> {
2606 self.as_bytes().to_ascii_uppercase()
2609 /// Convert this byte string to its uppercase ASCII equivalent in place.
2611 /// In this case, uppercase is only defined in ASCII letters. Namely, the
2612 /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2614 /// If you don't need to do the conversion in
2615 /// place and instead prefer convenience, then use
2616 /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2623 /// use bstr::{B, ByteSlice};
2625 /// let mut s = <Vec<u8>>::from("hello β");
2626 /// s.make_ascii_uppercase();
2627 /// assert_eq!(s, B("HELLO β"));
2630 /// Invalid UTF-8 remains as is:
2633 /// use bstr::{B, ByteSlice, ByteVec};
2635 /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
2636 /// s.make_ascii_uppercase();
2637 /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2640 fn make_ascii_uppercase(&mut self) {
2641 self.as_bytes_mut().make_ascii_uppercase();
2644 /// Reverse the bytes in this string, in place.
2646 /// This is not necessarily a well formed operation! For example, if this
2647 /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2648 /// string will likely result in invalid UTF-8 and otherwise non-sensical
2651 /// Note that this is equivalent to the generic `[u8]::reverse` method.
2652 /// This method is provided to permit callers to explicitly differentiate
2653 /// between reversing bytes, codepoints and graphemes.
2660 /// use bstr::ByteSlice;
2662 /// let mut s = <Vec<u8>>::from("hello");
2663 /// s.reverse_bytes();
2664 /// assert_eq!(s, "olleh".as_bytes());
2667 fn reverse_bytes(&mut self) {
2668 self.as_bytes_mut().reverse();
2671 /// Reverse the codepoints in this string, in place.
2673 /// If this byte string is valid UTF-8, then its reversal by codepoint
2674 /// is also guaranteed to be valid UTF-8.
2676 /// This operation is equivalent to the following, but without allocating:
2679 /// use bstr::ByteSlice;
2681 /// let mut s = <Vec<u8>>::from("foo☃bar");
2683 /// let mut chars: Vec<char> = s.chars().collect();
2684 /// chars.reverse();
2686 /// let reversed: String = chars.into_iter().collect();
2687 /// assert_eq!(reversed, "rab☃oof");
2690 /// Note that this is not necessarily a well formed operation. For example,
2691 /// if this byte string contains grapheme clusters with more than one
2692 /// codepoint, then those grapheme clusters will not necessarily be
2693 /// preserved. If you'd like to preserve grapheme clusters, then use
2694 /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2701 /// use bstr::ByteSlice;
2703 /// let mut s = <Vec<u8>>::from("foo☃bar");
2704 /// s.reverse_chars();
2705 /// assert_eq!(s, "rab☃oof".as_bytes());
2708 /// This example shows that not all reversals lead to a well formed string.
2709 /// For example, in this case, combining marks are used to put accents over
2710 /// some letters, and those accent marks must appear after the codepoints
2714 /// use bstr::{B, ByteSlice};
2716 /// let mut s = <Vec<u8>>::from("résumé");
2717 /// s.reverse_chars();
2718 /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
2721 /// A word of warning: the above example relies on the fact that
2722 /// `résumé` is in decomposed normal form, which means there are separate
2723 /// codepoints for the accents above `e`. If it is instead in composed
2724 /// normal form, then the example works:
2727 /// use bstr::{B, ByteSlice};
2729 /// let mut s = <Vec<u8>>::from("résumé");
2730 /// s.reverse_chars();
2731 /// assert_eq!(s, B("émusér"));
2734 /// The point here is to be cautious and not assume that just because
2735 /// `reverse_chars` works in one case, that it therefore works in all
2738 fn reverse_chars(&mut self) {
2741 let (_
, size
) = utf8
::decode(&self.as_bytes()[i
..]);
2746 self.as_bytes_mut()[i
..i
+ size
].reverse_bytes();
2750 self.reverse_bytes();
2753 /// Reverse the graphemes in this string, in place.
2755 /// If this byte string is valid UTF-8, then its reversal by grapheme
2756 /// is also guaranteed to be valid UTF-8.
2758 /// This operation is equivalent to the following, but without allocating:
2761 /// use bstr::ByteSlice;
2763 /// let mut s = <Vec<u8>>::from("foo☃bar");
2765 /// let mut graphemes: Vec<&str> = s.graphemes().collect();
2766 /// graphemes.reverse();
2768 /// let reversed = graphemes.concat();
2769 /// assert_eq!(reversed, "rab☃oof");
2777 /// use bstr::ByteSlice;
2779 /// let mut s = <Vec<u8>>::from("foo☃bar");
2780 /// s.reverse_graphemes();
2781 /// assert_eq!(s, "rab☃oof".as_bytes());
2784 /// This example shows how this correctly handles grapheme clusters,
2785 /// unlike `reverse_chars`.
2788 /// use bstr::ByteSlice;
2790 /// let mut s = <Vec<u8>>::from("résumé");
2791 /// s.reverse_graphemes();
2792 /// assert_eq!(s, "émusér".as_bytes());
2794 #[cfg(feature = "unicode")]
2796 fn reverse_graphemes(&mut self) {
2797 use unicode
::decode_grapheme
;
2801 let (_
, size
) = decode_grapheme(&self.as_bytes()[i
..]);
2806 self.as_bytes_mut()[i
..i
+ size
].reverse_bytes();
2810 self.reverse_bytes();
2813 /// Returns true if and only if every byte in this byte string is ASCII.
2815 /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2816 /// an ASCII codepoint if and only if it is in the inclusive range
2824 /// use bstr::{B, ByteSlice};
2826 /// assert!(B("abc").is_ascii());
2827 /// assert!(!B("☃βツ").is_ascii());
2828 /// assert!(!B(b"\xFF").is_ascii());
2831 fn is_ascii(&self) -> bool
{
2832 ascii
::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2835 /// Returns true if and only if the entire byte string is valid UTF-8.
2837 /// If you need location information about where a byte string's first
2838 /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
2845 /// use bstr::{B, ByteSlice};
2847 /// assert!(B("abc").is_utf8());
2848 /// assert!(B("☃βツ").is_utf8());
2849 /// // invalid bytes
2850 /// assert!(!B(b"abc\xFF").is_utf8());
2851 /// // surrogate encoding
2852 /// assert!(!B(b"\xED\xA0\x80").is_utf8());
2853 /// // incomplete sequence
2854 /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
2855 /// // overlong sequence
2856 /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
2859 fn is_utf8(&self) -> bool
{
2860 utf8
::validate(self.as_bytes()).is_ok()
2863 /// Returns the last byte in this byte string, if it's non-empty. If this
2864 /// byte string is empty, this returns `None`.
2866 /// Note that this is like the generic `[u8]::last`, except this returns
2867 /// the byte by value instead of a reference to the byte.
2874 /// use bstr::ByteSlice;
2876 /// assert_eq!(Some(b'z'), b"baz".last_byte());
2877 /// assert_eq!(None, b"".last_byte());
2880 fn last_byte(&self) -> Option
<u8> {
2881 let bytes
= self.as_bytes();
2882 bytes
.get(bytes
.len().saturating_sub(1)).map(|&b
| b
)
2885 /// Returns the index of the first non-ASCII byte in this byte string (if
2886 /// any such indices exist). Specifically, it returns the index of the
2887 /// first byte with a value greater than or equal to `0x80`.
2894 /// use bstr::{ByteSlice, B};
2896 /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
2897 /// assert_eq!(None, b"abcde".find_non_ascii_byte());
2898 /// assert_eq!(Some(0), B("😀").find_non_ascii_byte());
2901 fn find_non_ascii_byte(&self) -> Option
<usize> {
2902 let index
= ascii
::first_non_ascii_byte(self.as_bytes());
2903 if index
== self.as_bytes().len() {
2910 /// Copies elements from one part of the slice to another part of itself,
2911 /// where the parts may be overlapping.
2913 /// `src` is the range within this byte string to copy from, while `dest`
2914 /// is the starting index of the range within this byte string to copy to.
2915 /// The length indicated by `src` must be less than or equal to the number
2916 /// of bytes from `dest` to the end of the byte string.
2920 /// Panics if either range is out of bounds, or if `src` is too big to fit
2921 /// into `dest`, or if the end of `src` is before the start.
2925 /// Copying four bytes within a byte string:
2928 /// use bstr::{B, ByteSlice};
2930 /// let mut buf = *b"Hello, World!";
2931 /// let s = &mut buf;
2932 /// s.copy_within_str(1..5, 8);
2933 /// assert_eq!(s, B("Hello, Wello!"));
2936 fn copy_within_str
<R
>(&mut self, src
: R
, dest
: usize)
2938 R
: ops
::RangeBounds
<usize>,
2940 // TODO: Deprecate this once slice::copy_within stabilizes.
2941 let src_start
= match src
.start_bound() {
2942 ops
::Bound
::Included(&n
) => n
,
2943 ops
::Bound
::Excluded(&n
) => {
2944 n
.checked_add(1).expect("attempted to index slice beyond max")
2946 ops
::Bound
::Unbounded
=> 0,
2948 let src_end
= match src
.end_bound() {
2949 ops
::Bound
::Included(&n
) => {
2950 n
.checked_add(1).expect("attempted to index slice beyond max")
2952 ops
::Bound
::Excluded(&n
) => n
,
2953 ops
::Bound
::Unbounded
=> self.as_bytes().len(),
2955 assert
!(src_start
<= src_end
, "src end is before src start");
2956 assert
!(src_end
<= self.as_bytes().len(), "src is out of bounds");
2957 let count
= src_end
- src_start
;
2959 dest
<= self.as_bytes().len() - count
,
2960 "dest is out of bounds",
2963 // SAFETY: This is safe because we use ptr::copy to handle overlapping
2964 // copies, and is also safe because we've checked all the bounds above.
2965 // Finally, we are only dealing with u8 data, which is Copy, which
2966 // means we can copy without worrying about ownership/destructors.
2969 self.as_bytes().get_unchecked(src_start
),
2970 self.as_bytes_mut().get_unchecked_mut(dest
),
2977 /// A single substring searcher fixed to a particular needle.
2979 /// The purpose of this type is to permit callers to construct a substring
2980 /// searcher that can be used to search haystacks without the overhead of
2981 /// constructing the searcher in the first place. This is a somewhat niche
2982 /// concern when it's necessary to re-use the same needle to search multiple
2983 /// different haystacks with as little overhead as possible. In general, using
2984 /// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
2986 /// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
2987 /// is good enough, but `Finder` is useful when you can meaningfully observe
2988 /// searcher construction time in a profile.
2990 /// When the `std` feature is enabled, then this type has an `into_owned`
2991 /// version which permits building a `Finder` that is not connected to the
2992 /// lifetime of its needle.
2993 #[derive(Clone, Debug)]
2994 pub struct Finder
<'a
> {
2995 searcher
: TwoWay
<'a
>,
2998 impl<'a
> Finder
<'a
> {
2999 /// Create a new finder for the given needle.
3001 pub fn new
<B
: ?Sized
+ AsRef
<[u8]>>(needle
: &'a B
) -> Finder
<'a
> {
3002 Finder { searcher: TwoWay::forward(needle.as_ref()) }
3005 /// Convert this finder into its owned variant, such that it no longer
3006 /// borrows the needle.
3008 /// If this is already an owned finder, then this is a no-op. Otherwise,
3009 /// this copies the needle.
3011 /// This is only available when the `std` feature is enabled.
3012 #[cfg(feature = "std")]
3014 pub fn into_owned(self) -> Finder
<'
static> {
3015 Finder { searcher: self.searcher.into_owned() }
3018 /// Returns the needle that this finder searches for.
3020 /// Note that the lifetime of the needle returned is tied to the lifetime
3021 /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3022 /// finder's needle can be either borrowed or owned, so the lifetime of the
3023 /// needle returned must necessarily be the shorter of the two.
3025 pub fn needle(&self) -> &[u8] {
3026 self.searcher
.needle()
3029 /// Returns the index of the first occurrence of this needle in the given
3032 /// The haystack may be any type that can be cheaply converted into a
3033 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3037 /// This routine is guaranteed to have worst case linear time complexity
3038 /// with respect to both the needle and the haystack. That is, this runs
3039 /// in `O(needle.len() + haystack.len())` time.
3041 /// This routine is also guaranteed to have worst case constant space
3049 /// use bstr::Finder;
3051 /// let haystack = "foo bar baz";
3052 /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
3053 /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
3054 /// assert_eq!(None, Finder::new("quux").find(haystack));
3057 pub fn find
<B
: AsRef
<[u8]>>(&self, haystack
: B
) -> Option
<usize> {
3058 self.searcher
.find(haystack
.as_ref())
3062 /// A single substring reverse searcher fixed to a particular needle.
3064 /// The purpose of this type is to permit callers to construct a substring
3065 /// searcher that can be used to search haystacks without the overhead of
3066 /// constructing the searcher in the first place. This is a somewhat niche
3067 /// concern when it's necessary to re-use the same needle to search multiple
3068 /// different haystacks with as little overhead as possible. In general, using
3069 /// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3071 /// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3072 /// is good enough, but `FinderReverse` is useful when you can meaningfully
3073 /// observe searcher construction time in a profile.
3075 /// When the `std` feature is enabled, then this type has an `into_owned`
3076 /// version which permits building a `FinderReverse` that is not connected to
3077 /// the lifetime of its needle.
3078 #[derive(Clone, Debug)]
3079 pub struct FinderReverse
<'a
> {
3080 searcher
: TwoWay
<'a
>,
3083 impl<'a
> FinderReverse
<'a
> {
3084 /// Create a new reverse finder for the given needle.
3086 pub fn new
<B
: ?Sized
+ AsRef
<[u8]>>(needle
: &'a B
) -> FinderReverse
<'a
> {
3087 FinderReverse { searcher: TwoWay::reverse(needle.as_ref()) }
3090 /// Convert this finder into its owned variant, such that it no longer
3091 /// borrows the needle.
3093 /// If this is already an owned finder, then this is a no-op. Otherwise,
3094 /// this copies the needle.
3096 /// This is only available when the `std` feature is enabled.
3097 #[cfg(feature = "std")]
3099 pub fn into_owned(self) -> FinderReverse
<'
static> {
3100 FinderReverse { searcher: self.searcher.into_owned() }
3103 /// Returns the needle that this finder searches for.
3105 /// Note that the lifetime of the needle returned is tied to the lifetime
3106 /// of this finder, and may be shorter than the `'a` lifetime. Namely,
3107 /// a finder's needle can be either borrowed or owned, so the lifetime of
3108 /// the needle returned must necessarily be the shorter of the two.
3110 pub fn needle(&self) -> &[u8] {
3111 self.searcher
.needle()
3114 /// Returns the index of the last occurrence of this needle in the given
3117 /// The haystack may be any type that can be cheaply converted into a
3118 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3122 /// This routine is guaranteed to have worst case linear time complexity
3123 /// with respect to both the needle and the haystack. That is, this runs
3124 /// in `O(needle.len() + haystack.len())` time.
3126 /// This routine is also guaranteed to have worst case constant space
3134 /// use bstr::FinderReverse;
3136 /// let haystack = "foo bar baz";
3137 /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
3138 /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
3139 /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3142 pub fn rfind
<B
: AsRef
<[u8]>>(&self, haystack
: B
) -> Option
<usize> {
3143 self.searcher
.rfind(haystack
.as_ref())
3147 /// An iterator over non-overlapping substring matches.
3149 /// Matches are reported by the byte offset at which they begin.
3151 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3152 /// byte string being looked for.
3154 pub struct Find
<'a
> {
3156 prestate
: PrefilterState
,
3157 searcher
: TwoWay
<'a
>,
3162 fn new(haystack
: &'a
[u8], needle
: &'a
[u8]) -> Find
<'a
> {
3163 let searcher
= TwoWay
::forward(needle
);
3164 let prestate
= searcher
.prefilter_state();
3165 Find { haystack, prestate, searcher, pos: 0 }
3169 impl<'a
> Iterator
for Find
<'a
> {
3173 fn next(&mut self) -> Option
<usize> {
3174 if self.pos
> self.haystack
.len() {
3179 .find_with(&mut self.prestate
, &self.haystack
[self.pos
..]);
3183 let pos
= self.pos
+ i
;
3184 self.pos
= pos
+ cmp
::max(1, self.searcher
.needle().len());
3191 /// An iterator over non-overlapping substring matches in reverse.
3193 /// Matches are reported by the byte offset at which they begin.
3195 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3196 /// byte string being looked for.
3198 pub struct FindReverse
<'a
> {
3200 prestate
: PrefilterState
,
3201 searcher
: TwoWay
<'a
>,
3202 /// When searching with an empty needle, this gets set to `None` after
3203 /// we've yielded the last element at `0`.
3207 impl<'a
> FindReverse
<'a
> {
3208 fn new(haystack
: &'a
[u8], needle
: &'a
[u8]) -> FindReverse
<'a
> {
3209 let searcher
= TwoWay
::reverse(needle
);
3210 let prestate
= searcher
.prefilter_state();
3211 let pos
= Some(haystack
.len());
3212 FindReverse { haystack, prestate, searcher, pos }
3215 fn haystack(&self) -> &'a
[u8] {
3219 fn needle(&self) -> &[u8] {
3220 self.searcher
.needle()
3224 impl<'a
> Iterator
for FindReverse
<'a
> {
3228 fn next(&mut self) -> Option
<usize> {
3229 let pos
= match self.pos
{
3230 None
=> return None
,
3235 .rfind_with(&mut self.prestate
, &self.haystack
[..pos
]);
3240 self.pos
= pos
.checked_sub(1);
3250 /// An iterator over the bytes in a byte string.
3252 /// `'a` is the lifetime of the byte string being traversed.
3253 #[derive(Clone, Debug)]
3254 pub struct Bytes
<'a
> {
3255 it
: slice
::Iter
<'a
, u8>,
3258 impl<'a
> Iterator
for Bytes
<'a
> {
3262 fn next(&mut self) -> Option
<u8> {
3263 self.it
.next().map(|&b
| b
)
3267 impl<'a
> DoubleEndedIterator
for Bytes
<'a
> {
3269 fn next_back(&mut self) -> Option
<u8> {
3270 self.it
.next_back().map(|&b
| b
)
3274 impl<'a
> ExactSizeIterator
for Bytes
<'a
> {
3276 fn len(&self) -> usize {
3281 /// An iterator over the fields in a byte string, separated by whitespace.
3283 /// This iterator splits on contiguous runs of whitespace, such that the fields
3284 /// in `foo\t\t\n \nbar` are `foo` and `bar`.
3286 /// `'a` is the lifetime of the byte string being split.
3288 pub struct Fields
<'a
> {
3289 it
: FieldsWith
<'a
, fn(char) -> bool
>,
3292 impl<'a
> Fields
<'a
> {
3293 fn new(bytes
: &'a
[u8]) -> Fields
<'a
> {
3294 Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
3298 impl<'a
> Iterator
for Fields
<'a
> {
3299 type Item
= &'a
[u8];
3302 fn next(&mut self) -> Option
<&'a
[u8]> {
3307 /// An iterator over fields in the byte string, separated by a predicate over
3310 /// This iterator splits a byte string based on its predicate function such
3311 /// that the elements returned are separated by contiguous runs of codepoints
3312 /// for which the predicate returns true.
3314 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3315 /// of the predicate, i.e., `FnMut(char) -> bool`.
3317 pub struct FieldsWith
<'a
, F
> {
3320 chars
: CharIndices
<'a
>,
3323 impl<'a
, F
: FnMut(char) -> bool
> FieldsWith
<'a
, F
> {
3324 fn new(bytes
: &'a
[u8], f
: F
) -> FieldsWith
<'a
, F
> {
3325 FieldsWith { f, bytes, chars: bytes.char_indices() }
3329 impl<'a
, F
: FnMut(char) -> bool
> Iterator
for FieldsWith
<'a
, F
> {
3330 type Item
= &'a
[u8];
3333 fn next(&mut self) -> Option
<&'a
[u8]> {
3334 let (start
, mut end
);
3336 match self.chars
.next() {
3337 None
=> return None
,
3338 Some((s
, e
, ch
)) => {
3347 while let Some((_
, e
, ch
)) = self.chars
.next() {
3353 Some(&self.bytes
[start
..end
])
3357 /// An iterator over substrings in a byte string, split by a separator.
3359 /// `'a` is the lifetime of the byte string being split.
3361 pub struct Split
<'a
> {
3363 /// The end position of the previous match of our splitter. The element
3364 /// we yield corresponds to the substring starting at `last` up to the
3365 /// beginning of the next match of the splitter.
3367 /// Only set when iteration is complete. A corner case here is when a
3368 /// splitter is matched at the end of the haystack. At that point, we still
3369 /// need to yield an empty string following it.
3373 impl<'a
> Split
<'a
> {
3374 fn new(haystack
: &'a
[u8], splitter
: &'a
[u8]) -> Split
<'a
> {
3375 let finder
= haystack
.find_iter(splitter
);
3376 Split { finder, last: 0, done: false }
3380 impl<'a
> Iterator
for Split
<'a
> {
3381 type Item
= &'a
[u8];
3384 fn next(&mut self) -> Option
<&'a
[u8]> {
3385 let haystack
= self.finder
.haystack
;
3386 match self.finder
.next() {
3388 let next
= &haystack
[self.last
..start
];
3389 self.last
= start
+ self.finder
.searcher
.needle().len();
3393 if self.last
>= haystack
.len() {
3401 let s
= &haystack
[self.last
..];
3402 self.last
= haystack
.len();
3411 /// An iterator over substrings in a byte string, split by a separator, in
3414 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3415 /// of the predicate, i.e., `FnMut(char) -> bool`.
3417 pub struct SplitReverse
<'a
> {
3418 finder
: FindReverse
<'a
>,
3419 /// The end position of the previous match of our splitter. The element
3420 /// we yield corresponds to the substring starting at `last` up to the
3421 /// beginning of the next match of the splitter.
3423 /// Only set when iteration is complete. A corner case here is when a
3424 /// splitter is matched at the end of the haystack. At that point, we still
3425 /// need to yield an empty string following it.
3429 impl<'a
> SplitReverse
<'a
> {
3430 fn new(haystack
: &'a
[u8], splitter
: &'a
[u8]) -> SplitReverse
<'a
> {
3431 let finder
= haystack
.rfind_iter(splitter
);
3432 SplitReverse { finder, last: haystack.len(), done: false }
3436 impl<'a
> Iterator
for SplitReverse
<'a
> {
3437 type Item
= &'a
[u8];
3440 fn next(&mut self) -> Option
<&'a
[u8]> {
3441 let haystack
= self.finder
.haystack();
3442 match self.finder
.next() {
3444 let nlen
= self.finder
.needle().len();
3445 let next
= &haystack
[start
+ nlen
..self.last
];
3458 let s
= &haystack
[..self.last
];
3468 /// An iterator over at most `n` substrings in a byte string, split by a
3471 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3472 /// of the predicate, i.e., `FnMut(char) -> bool`.
3474 pub struct SplitN
<'a
> {
3480 impl<'a
> SplitN
<'a
> {
3486 let split
= haystack
.split_str(splitter
);
3487 SplitN { split, limit, count: 0 }
3491 impl<'a
> Iterator
for SplitN
<'a
> {
3492 type Item
= &'a
[u8];
3495 fn next(&mut self) -> Option
<&'a
[u8]> {
3497 if self.count
> self.limit
|| self.split
.done
{
3499 } else if self.count
== self.limit
{
3500 Some(&self.split
.finder
.haystack
[self.split
.last
..])
3507 /// An iterator over at most `n` substrings in a byte string, split by a
3508 /// separator, in reverse.
3510 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3511 /// of the predicate, i.e., `FnMut(char) -> bool`.
3513 pub struct SplitNReverse
<'a
> {
3514 split
: SplitReverse
<'a
>,
3519 impl<'a
> SplitNReverse
<'a
> {
3524 ) -> SplitNReverse
<'a
> {
3525 let split
= haystack
.rsplit_str(splitter
);
3526 SplitNReverse { split, limit, count: 0 }
3530 impl<'a
> Iterator
for SplitNReverse
<'a
> {
3531 type Item
= &'a
[u8];
3534 fn next(&mut self) -> Option
<&'a
[u8]> {
3536 if self.count
> self.limit
|| self.split
.done
{
3538 } else if self.count
== self.limit
{
3539 Some(&self.split
.finder
.haystack()[..self.split
.last
])
3546 /// An iterator over all lines in a byte string, without their terminators.
3548 /// For this iterator, the only line terminators recognized are `\r\n` and
3551 /// `'a` is the lifetime of the byte string being iterated over.
3552 pub struct Lines
<'a
> {
3553 it
: LinesWithTerminator
<'a
>,
3556 impl<'a
> Lines
<'a
> {
3557 fn new(bytes
: &'a
[u8]) -> Lines
<'a
> {
3558 Lines { it: LinesWithTerminator::new(bytes) }
3562 impl<'a
> Iterator
for Lines
<'a
> {
3563 type Item
= &'a
[u8];
3566 fn next(&mut self) -> Option
<&'a
[u8]> {
3567 let mut line
= self.it
.next()?
;
3568 if line
.last_byte() == Some(b'
\n'
) {
3569 line
= &line
[..line
.len() - 1];
3570 if line
.last_byte() == Some(b'
\r'
) {
3571 line
= &line
[..line
.len() - 1];
3578 /// An iterator over all lines in a byte string, including their terminators.
3580 /// For this iterator, the only line terminator recognized is `\n`. (Since
3581 /// line terminators are included, this also handles `\r\n` line endings.)
3583 /// Line terminators are only included if they are present in the original
3584 /// byte string. For example, the last line in a byte string may not end with
3585 /// a line terminator.
3587 /// Concatenating all elements yielded by this iterator is guaranteed to yield
3588 /// the original byte string.
3590 /// `'a` is the lifetime of the byte string being iterated over.
3591 pub struct LinesWithTerminator
<'a
> {
3595 impl<'a
> LinesWithTerminator
<'a
> {
3596 fn new(bytes
: &'a
[u8]) -> LinesWithTerminator
<'a
> {
3597 LinesWithTerminator { bytes }
3601 impl<'a
> Iterator
for LinesWithTerminator
<'a
> {
3602 type Item
= &'a
[u8];
3605 fn next(&mut self) -> Option
<&'a
[u8]> {
3606 match self.bytes
.find_byte(b'
\n'
) {
3607 None
if self.bytes
.is_empty() => None
,
3609 let line
= self.bytes
;
3614 let line
= &self.bytes
[..end
+ 1];
3615 self.bytes
= &self.bytes
[end
+ 1..];
3624 use ext_slice
::{ByteSlice, B}
;
3625 use tests
::LOSSY_TESTS
;
3629 for (i
, &(expected
, input
)) in LOSSY_TESTS
.iter().enumerate() {
3630 let got
= B(input
).to_str_lossy();
3632 expected
.as_bytes(),
3634 "to_str_lossy(ith: {:?}, given: {:?})",
3639 let mut got
= String
::new();
3640 B(input
).to_str_lossy_into(&mut got
);
3642 expected
.as_bytes(),
3644 "to_str_lossy_into",
3647 let got
= String
::from_utf8_lossy(input
);
3648 assert_eq
!(expected
.as_bytes(), got
.as_bytes(), "std");
3654 fn copy_within_fail1() {
3655 let mut buf
= *b
"foobar";
3657 s
.copy_within_str(0..2, 5);
3662 fn copy_within_fail2() {
3663 let mut buf
= *b
"foobar";
3665 s
.copy_within_str(3..2, 0);
3670 fn copy_within_fail3() {
3671 let mut buf
= *b
"foobar";
3673 s
.copy_within_str(5..7, 0);
3678 fn copy_within_fail4() {
3679 let mut buf
= *b
"foobar";
3681 s
.copy_within_str(0..1, 6);