vendor/bstr/src/ext_vec.rs

   1 use core::fmt;
   2 use core::iter;
   3 use core::ops;
   4 use core::ptr;
   5
   6 use alloc::{borrow::Cow, string::String, vec, vec::Vec};
   7
   8 #[cfg(feature = "std")]
   9 use std::{
  10     error,
  11     ffi::{OsStr, OsString},
  12     path::{Path, PathBuf},
  13 };
  14
  15 use crate::{
  16     ext_slice::ByteSlice,
  17     utf8::{self, Utf8Error},
  18 };
  19
  20 /// Concatenate the elements given by the iterator together into a single
  21 /// `Vec<u8>`.
  22 ///
  23 /// The elements may be any type that can be cheaply converted into an `&[u8]`.
  24 /// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
  25 ///
  26 /// # Examples
  27 ///
  28 /// Basic usage:
  29 ///
  30 /// ```
  31 /// use bstr;
  32 ///
  33 /// let s = bstr::concat(&["foo", "bar", "baz"]);
  34 /// assert_eq!(s, "foobarbaz".as_bytes());
  35 /// ```
  36 #[inline]
  37 pub fn concat<T, I>(elements: I) -> Vec<u8>
  38 where
  39     T: AsRef<[u8]>,
  40     I: IntoIterator<Item = T>,
  41 {
  42     let mut dest = vec![];
  43     for element in elements {
  44         dest.push_str(element);
  45     }
  46     dest
  47 }
  48
  49 /// Join the elements given by the iterator with the given separator into a
  50 /// single `Vec<u8>`.
  51 ///
  52 /// Both the separator and the elements may be any type that can be cheaply
  53 /// converted into an `&[u8]`. This includes, but is not limited to,
  54 /// `&str`, `&BStr` and `&[u8]` itself.
  55 ///
  56 /// # Examples
  57 ///
  58 /// Basic usage:
  59 ///
  60 /// ```
  61 /// use bstr;
  62 ///
  63 /// let s = bstr::join(",", &["foo", "bar", "baz"]);
  64 /// assert_eq!(s, "foo,bar,baz".as_bytes());
  65 /// ```
  66 #[inline]
  67 pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
  68 where
  69     B: AsRef<[u8]>,
  70     T: AsRef<[u8]>,
  71     I: IntoIterator<Item = T>,
  72 {
  73     let mut it = elements.into_iter();
  74     let mut dest = vec![];
  75     match it.next() {
  76         None => return dest,
  77         Some(first) => {
  78             dest.push_str(first);
  79         }
  80     }
  81     for element in it {
  82         dest.push_str(&separator);
  83         dest.push_str(element);
  84     }
  85     dest
  86 }
  87
  88 impl ByteVec for Vec<u8> {
  89     #[inline]
  90     fn as_vec(&self) -> &Vec<u8> {
  91         self
  92     }
  93
  94     #[inline]
  95     fn as_vec_mut(&mut self) -> &mut Vec<u8> {
  96         self
  97     }
  98
  99     #[inline]
 100     fn into_vec(self) -> Vec<u8> {
 101         self
 102     }
 103 }
 104
 105 /// Ensure that callers cannot implement `ByteSlice` by making an
 106 /// umplementable trait its super trait.
 107 mod private {
 108     pub trait Sealed {}
 109 }
 110 impl private::Sealed for Vec<u8> {}
 111
 112 /// A trait that extends `Vec<u8>` with string oriented methods.
 113 ///
 114 /// Note that when using the constructor methods, such as
 115 /// `ByteVec::from_slice`, one should actually call them using the concrete
 116 /// type. For example:
 117 ///
 118 /// ```
 119 /// use bstr::{B, ByteVec};
 120 ///
 121 /// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
 122 /// assert_eq!(s, B("abc"));
 123 /// ```
 124 ///
 125 /// This trait is sealed and cannot be implemented outside of `bstr`.
 126 pub trait ByteVec: private::Sealed {
 127     /// A method for accessing the raw vector bytes of this type. This is
 128     /// always a no-op and callers shouldn't care about it. This only exists
 129     /// for making the extension trait work.
 130     #[doc(hidden)]
 131     fn as_vec(&self) -> &Vec<u8>;
 132
 133     /// A method for accessing the raw vector bytes of this type, mutably. This
 134     /// is always a no-op and callers shouldn't care about it. This only exists
 135     /// for making the extension trait work.
 136     #[doc(hidden)]
 137     fn as_vec_mut(&mut self) -> &mut Vec<u8>;
 138
 139     /// A method for consuming ownership of this vector. This is always a no-op
 140     /// and callers shouldn't care about it. This only exists for making the
 141     /// extension trait work.
 142     #[doc(hidden)]
 143     fn into_vec(self) -> Vec<u8>
 144     where
 145         Self: Sized;
 146
 147     /// Create a new owned byte string from the given byte slice.
 148     ///
 149     /// # Examples
 150     ///
 151     /// Basic usage:
 152     ///
 153     /// ```
 154     /// use bstr::{B, ByteVec};
 155     ///
 156     /// let s = Vec::from_slice(b"abc");
 157     /// assert_eq!(s, B("abc"));
 158     /// ```
 159     #[inline]
 160     fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
 161         bytes.as_ref().to_vec()
 162     }
 163
 164     /// Create a new byte string from an owned OS string.
 165     ///
 166     /// When the underlying bytes of OS strings are accessible, then this
 167     /// always succeeds and is zero cost. Otherwise, this returns the given
 168     /// `OsString` if it is not valid UTF-8.
 169     ///
 170     /// # Examples
 171     ///
 172     /// Basic usage:
 173     ///
 174     /// ```
 175     /// use std::ffi::OsString;
 176     ///
 177     /// use bstr::{B, ByteVec};
 178     ///
 179     /// let os_str = OsString::from("foo");
 180     /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
 181     /// assert_eq!(bs, B("foo"));
 182     /// ```
 183     #[inline]
 184     #[cfg(feature = "std")]
 185     fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
 186         #[cfg(unix)]
 187         #[inline]
 188         fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
 189             use std::os::unix::ffi::OsStringExt;
 190
 191             Ok(Vec::from(os_str.into_vec()))
 192         }
 193
 194         #[cfg(not(unix))]
 195         #[inline]
 196         fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
 197             os_str.into_string().map(Vec::from)
 198         }
 199
 200         imp(os_str)
 201     }
 202
 203     /// Lossily create a new byte string from an OS string slice.
 204     ///
 205     /// When the underlying bytes of OS strings are accessible, then this is
 206     /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
 207     /// performed and if the given OS string is not valid UTF-8, then it is
 208     /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
 209     /// Unicode replacement codepoint).
 210     ///
 211     /// # Examples
 212     ///
 213     /// Basic usage:
 214     ///
 215     /// ```
 216     /// use std::ffi::OsStr;
 217     ///
 218     /// use bstr::{B, ByteVec};
 219     ///
 220     /// let os_str = OsStr::new("foo");
 221     /// let bs = Vec::from_os_str_lossy(os_str);
 222     /// assert_eq!(bs, B("foo"));
 223     /// ```
 224     #[inline]
 225     #[cfg(feature = "std")]
 226     fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
 227         #[cfg(unix)]
 228         #[inline]
 229         fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
 230             use std::os::unix::ffi::OsStrExt;
 231
 232             Cow::Borrowed(os_str.as_bytes())
 233         }
 234
 235         #[cfg(not(unix))]
 236         #[inline]
 237         fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
 238             match os_str.to_string_lossy() {
 239                 Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
 240                 Cow::Owned(x) => Cow::Owned(Vec::from(x)),
 241             }
 242         }
 243
 244         imp(os_str)
 245     }
 246
 247     /// Create a new byte string from an owned file path.
 248     ///
 249     /// When the underlying bytes of paths are accessible, then this always
 250     /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
 251     /// if it is not valid UTF-8.
 252     ///
 253     /// # Examples
 254     ///
 255     /// Basic usage:
 256     ///
 257     /// ```
 258     /// use std::path::PathBuf;
 259     ///
 260     /// use bstr::{B, ByteVec};
 261     ///
 262     /// let path = PathBuf::from("foo");
 263     /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
 264     /// assert_eq!(bs, B("foo"));
 265     /// ```
 266     #[inline]
 267     #[cfg(feature = "std")]
 268     fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
 269         Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
 270     }
 271
 272     /// Lossily create a new byte string from a file path.
 273     ///
 274     /// When the underlying bytes of paths are accessible, then this is
 275     /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
 276     /// performed and if the given path is not valid UTF-8, then it is lossily
 277     /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
 278     /// replacement codepoint).
 279     ///
 280     /// # Examples
 281     ///
 282     /// Basic usage:
 283     ///
 284     /// ```
 285     /// use std::path::Path;
 286     ///
 287     /// use bstr::{B, ByteVec};
 288     ///
 289     /// let path = Path::new("foo");
 290     /// let bs = Vec::from_path_lossy(path);
 291     /// assert_eq!(bs, B("foo"));
 292     /// ```
 293     #[inline]
 294     #[cfg(feature = "std")]
 295     fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
 296         Vec::from_os_str_lossy(path.as_os_str())
 297     }
 298
 299     /// Appends the given byte to the end of this byte string.
 300     ///
 301     /// Note that this is equivalent to the generic `Vec::push` method. This
 302     /// method is provided to permit callers to explicitly differentiate
 303     /// between pushing bytes, codepoints and strings.
 304     ///
 305     /// # Examples
 306     ///
 307     /// Basic usage:
 308     ///
 309     /// ```
 310     /// use bstr::ByteVec;
 311     ///
 312     /// let mut s = <Vec<u8>>::from("abc");
 313     /// s.push_byte(b'\xE2');
 314     /// s.push_byte(b'\x98');
 315     /// s.push_byte(b'\x83');
 316     /// assert_eq!(s, "abc☃".as_bytes());
 317     /// ```
 318     #[inline]
 319     fn push_byte(&mut self, byte: u8) {
 320         self.as_vec_mut().push(byte);
 321     }
 322
 323     /// Appends the given `char` to the end of this byte string.
 324     ///
 325     /// # Examples
 326     ///
 327     /// Basic usage:
 328     ///
 329     /// ```
 330     /// use bstr::ByteVec;
 331     ///
 332     /// let mut s = <Vec<u8>>::from("abc");
 333     /// s.push_char('1');
 334     /// s.push_char('2');
 335     /// s.push_char('3');
 336     /// assert_eq!(s, "abc123".as_bytes());
 337     /// ```
 338     #[inline]
 339     fn push_char(&mut self, ch: char) {
 340         if ch.len_utf8() == 1 {
 341             self.push_byte(ch as u8);
 342             return;
 343         }
 344         self.as_vec_mut()
 345             .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
 346     }
 347
 348     /// Appends the given slice to the end of this byte string. This accepts
 349     /// any type that be converted to a `&[u8]`. This includes, but is not
 350     /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
 351     ///
 352     /// # Examples
 353     ///
 354     /// Basic usage:
 355     ///
 356     /// ```
 357     /// use bstr::ByteVec;
 358     ///
 359     /// let mut s = <Vec<u8>>::from("abc");
 360     /// s.push_str(b"123");
 361     /// assert_eq!(s, "abc123".as_bytes());
 362     /// ```
 363     #[inline]
 364     fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
 365         self.as_vec_mut().extend_from_slice(bytes.as_ref());
 366     }
 367
 368     /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
 369     /// valid UTF-8.
 370     ///
 371     /// If it is not valid UTF-8, then a
 372     /// [`FromUtf8Error`](struct.FromUtf8Error.html)
 373     /// is returned. (This error can be used to examine why UTF-8 validation
 374     /// failed, or to regain the original byte string.)
 375     ///
 376     /// # Examples
 377     ///
 378     /// Basic usage:
 379     ///
 380     /// ```
 381     /// use bstr::ByteVec;
 382     ///
 383     /// let bytes = Vec::from("hello");
 384     /// let string = bytes.into_string().unwrap();
 385     ///
 386     /// assert_eq!("hello", string);
 387     /// ```
 388     ///
 389     /// If this byte string is not valid UTF-8, then an error will be returned.
 390     /// That error can then be used to inspect the location at which invalid
 391     /// UTF-8 was found, or to regain the original byte string:
 392     ///
 393     /// ```
 394     /// use bstr::{B, ByteVec};
 395     ///
 396     /// let bytes = Vec::from_slice(b"foo\xFFbar");
 397     /// let err = bytes.into_string().unwrap_err();
 398     ///
 399     /// assert_eq!(err.utf8_error().valid_up_to(), 3);
 400     /// assert_eq!(err.utf8_error().error_len(), Some(1));
 401     ///
 402     /// // At no point in this example is an allocation performed.
 403     /// let bytes = Vec::from(err.into_vec());
 404     /// assert_eq!(bytes, B(b"foo\xFFbar"));
 405     /// ```
 406     #[inline]
 407     fn into_string(self) -> Result<String, FromUtf8Error>
 408     where
 409         Self: Sized,
 410     {
 411         match utf8::validate(self.as_vec()) {
 412             Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
 413             Ok(()) => {
 414                 // SAFETY: This is safe because of the guarantees provided by
 415                 // utf8::validate.
 416                 unsafe { Ok(self.into_string_unchecked()) }
 417             }
 418         }
 419     }
 420
 421     /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
 422     /// contains invalid UTF-8, then the invalid bytes are replaced with the
 423     /// Unicode replacement codepoint.
 424     ///
 425     /// # Examples
 426     ///
 427     /// Basic usage:
 428     ///
 429     /// ```
 430     /// use bstr::ByteVec;
 431     ///
 432     /// let bytes = Vec::from_slice(b"foo\xFFbar");
 433     /// let string = bytes.into_string_lossy();
 434     /// assert_eq!(string, "foo\u{FFFD}bar");
 435     /// ```
 436     #[inline]
 437     fn into_string_lossy(self) -> String
 438     where
 439         Self: Sized,
 440     {
 441         match self.as_vec().to_str_lossy() {
 442             Cow::Borrowed(_) => {
 443                 // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
 444                 // the entire string is valid utf8.
 445                 unsafe { self.into_string_unchecked() }
 446             }
 447             Cow::Owned(s) => s,
 448         }
 449     }
 450
 451     /// Unsafely convert this byte string into a `String`, without checking for
 452     /// valid UTF-8.
 453     ///
 454     /// # Safety
 455     ///
 456     /// Callers *must* ensure that this byte string is valid UTF-8 before
 457     /// calling this method. Converting a byte string into a `String` that is
 458     /// not valid UTF-8 is considered undefined behavior.
 459     ///
 460     /// This routine is useful in performance sensitive contexts where the
 461     /// UTF-8 validity of the byte string is already known and it is
 462     /// undesirable to pay the cost of an additional UTF-8 validation check
 463     /// that [`into_string`](#method.into_string) performs.
 464     ///
 465     /// # Examples
 466     ///
 467     /// Basic usage:
 468     ///
 469     /// ```
 470     /// use bstr::ByteVec;
 471     ///
 472     /// // SAFETY: This is safe because string literals are guaranteed to be
 473     /// // valid UTF-8 by the Rust compiler.
 474     /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
 475     /// assert_eq!("☃βツ", s);
 476     /// ```
 477     #[inline]
 478     unsafe fn into_string_unchecked(self) -> String
 479     where
 480         Self: Sized,
 481     {
 482         String::from_utf8_unchecked(self.into_vec())
 483     }
 484
 485     /// Converts this byte string into an OS string, in place.
 486     ///
 487     /// When OS strings can be constructed from arbitrary byte sequences, this
 488     /// always succeeds and is zero cost. Otherwise, if this byte string is not
 489     /// valid UTF-8, then an error (with the original byte string) is returned.
 490     ///
 491     /// # Examples
 492     ///
 493     /// Basic usage:
 494     ///
 495     /// ```
 496     /// use std::ffi::OsStr;
 497     ///
 498     /// use bstr::ByteVec;
 499     ///
 500     /// let bs = Vec::from("foo");
 501     /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
 502     /// assert_eq!(os_str, OsStr::new("foo"));
 503     /// ```
 504     #[cfg(feature = "std")]
 505     #[inline]
 506     fn into_os_string(self) -> Result<OsString, FromUtf8Error>
 507     where
 508         Self: Sized,
 509     {
 510         #[cfg(unix)]
 511         #[inline]
 512         fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
 513             use std::os::unix::ffi::OsStringExt;
 514
 515             Ok(OsString::from_vec(v))
 516         }
 517
 518         #[cfg(not(unix))]
 519         #[inline]
 520         fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
 521             v.into_string().map(OsString::from)
 522         }
 523
 524         imp(self.into_vec())
 525     }
 526
 527     /// Lossily converts this byte string into an OS string, in place.
 528     ///
 529     /// When OS strings can be constructed from arbitrary byte sequences, this
 530     /// is zero cost and always returns a slice. Otherwise, this will perform a
 531     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
 532     /// the Unicode replacement codepoint.
 533     ///
 534     /// Note that this can prevent the correct roundtripping of file paths when
 535     /// the representation of `OsString` is opaque.
 536     ///
 537     /// # Examples
 538     ///
 539     /// Basic usage:
 540     ///
 541     /// ```
 542     /// use bstr::ByteVec;
 543     ///
 544     /// let bs = Vec::from_slice(b"foo\xFFbar");
 545     /// let os_str = bs.into_os_string_lossy();
 546     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
 547     /// ```
 548     #[inline]
 549     #[cfg(feature = "std")]
 550     fn into_os_string_lossy(self) -> OsString
 551     where
 552         Self: Sized,
 553     {
 554         #[cfg(unix)]
 555         #[inline]
 556         fn imp(v: Vec<u8>) -> OsString {
 557             use std::os::unix::ffi::OsStringExt;
 558
 559             OsString::from_vec(v)
 560         }
 561
 562         #[cfg(not(unix))]
 563         #[inline]
 564         fn imp(v: Vec<u8>) -> OsString {
 565             OsString::from(v.into_string_lossy())
 566         }
 567
 568         imp(self.into_vec())
 569     }
 570
 571     /// Converts this byte string into an owned file path, in place.
 572     ///
 573     /// When paths can be constructed from arbitrary byte sequences, this
 574     /// always succeeds and is zero cost. Otherwise, if this byte string is not
 575     /// valid UTF-8, then an error (with the original byte string) is returned.
 576     ///
 577     /// # Examples
 578     ///
 579     /// Basic usage:
 580     ///
 581     /// ```
 582     /// use bstr::ByteVec;
 583     ///
 584     /// let bs = Vec::from("foo");
 585     /// let path = bs.into_path_buf().expect("should be valid UTF-8");
 586     /// assert_eq!(path.as_os_str(), "foo");
 587     /// ```
 588     #[cfg(feature = "std")]
 589     #[inline]
 590     fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
 591     where
 592         Self: Sized,
 593     {
 594         self.into_os_string().map(PathBuf::from)
 595     }
 596
 597     /// Lossily converts this byte string into an owned file path, in place.
 598     ///
 599     /// When paths can be constructed from arbitrary byte sequences, this is
 600     /// zero cost and always returns a slice. Otherwise, this will perform a
 601     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
 602     /// the Unicode replacement codepoint.
 603     ///
 604     /// Note that this can prevent the correct roundtripping of file paths when
 605     /// the representation of `PathBuf` is opaque.
 606     ///
 607     /// # Examples
 608     ///
 609     /// Basic usage:
 610     ///
 611     /// ```
 612     /// use bstr::ByteVec;
 613     ///
 614     /// let bs = Vec::from_slice(b"foo\xFFbar");
 615     /// let path = bs.into_path_buf_lossy();
 616     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
 617     /// ```
 618     #[inline]
 619     #[cfg(feature = "std")]
 620     fn into_path_buf_lossy(self) -> PathBuf
 621     where
 622         Self: Sized,
 623     {
 624         PathBuf::from(self.into_os_string_lossy())
 625     }
 626
 627     /// Removes the last byte from this `Vec<u8>` and returns it.
 628     ///
 629     /// If this byte string is empty, then `None` is returned.
 630     ///
 631     /// If the last codepoint in this byte string is not ASCII, then removing
 632     /// the last byte could make this byte string contain invalid UTF-8.
 633     ///
 634     /// Note that this is equivalent to the generic `Vec::pop` method. This
 635     /// method is provided to permit callers to explicitly differentiate
 636     /// between popping bytes and codepoints.
 637     ///
 638     /// # Examples
 639     ///
 640     /// Basic usage:
 641     ///
 642     /// ```
 643     /// use bstr::ByteVec;
 644     ///
 645     /// let mut s = Vec::from("foo");
 646     /// assert_eq!(s.pop_byte(), Some(b'o'));
 647     /// assert_eq!(s.pop_byte(), Some(b'o'));
 648     /// assert_eq!(s.pop_byte(), Some(b'f'));
 649     /// assert_eq!(s.pop_byte(), None);
 650     /// ```
 651     #[inline]
 652     fn pop_byte(&mut self) -> Option<u8> {
 653         self.as_vec_mut().pop()
 654     }
 655
 656     /// Removes the last codepoint from this `Vec<u8>` and returns it.
 657     ///
 658     /// If this byte string is empty, then `None` is returned. If the last
 659     /// bytes of this byte string do not correspond to a valid UTF-8 code unit
 660     /// sequence, then the Unicode replacement codepoint is yielded instead in
 661     /// accordance with the
 662     /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
 663     ///
 664     /// # Examples
 665     ///
 666     /// Basic usage:
 667     ///
 668     /// ```
 669     /// use bstr::ByteVec;
 670     ///
 671     /// let mut s = Vec::from("foo");
 672     /// assert_eq!(s.pop_char(), Some('o'));
 673     /// assert_eq!(s.pop_char(), Some('o'));
 674     /// assert_eq!(s.pop_char(), Some('f'));
 675     /// assert_eq!(s.pop_char(), None);
 676     /// ```
 677     ///
 678     /// This shows the replacement codepoint substitution policy. Note that
 679     /// the first pop yields a replacement codepoint but actually removes two
 680     /// bytes. This is in contrast with subsequent pops when encountering
 681     /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
 682     /// code unit sequence.
 683     ///
 684     /// ```
 685     /// use bstr::ByteVec;
 686     ///
 687     /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
 688     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
 689     /// assert_eq!(s.pop_char(), Some('o'));
 690     /// assert_eq!(s.pop_char(), Some('o'));
 691     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
 692     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
 693     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
 694     /// assert_eq!(s.pop_char(), Some('f'));
 695     /// assert_eq!(s.pop_char(), None);
 696     /// ```
 697     #[inline]
 698     fn pop_char(&mut self) -> Option<char> {
 699         let (ch, size) = utf8::decode_last_lossy(self.as_vec());
 700         if size == 0 {
 701             return None;
 702         }
 703         let new_len = self.as_vec().len() - size;
 704         self.as_vec_mut().truncate(new_len);
 705         Some(ch)
 706     }
 707
 708     /// Removes a `char` from this `Vec<u8>` at the given byte position and
 709     /// returns it.
 710     ///
 711     /// If the bytes at the given position do not lead to a valid UTF-8 code
 712     /// unit sequence, then a
 713     /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
 714     ///
 715     /// # Panics
 716     ///
 717     /// Panics if `at` is larger than or equal to this byte string's length.
 718     ///
 719     /// # Examples
 720     ///
 721     /// Basic usage:
 722     ///
 723     /// ```
 724     /// use bstr::ByteVec;
 725     ///
 726     /// let mut s = Vec::from("foo☃bar");
 727     /// assert_eq!(s.remove_char(3), '☃');
 728     /// assert_eq!(s, b"foobar");
 729     /// ```
 730     ///
 731     /// This example shows how the Unicode replacement codepoint policy is
 732     /// used:
 733     ///
 734     /// ```
 735     /// use bstr::ByteVec;
 736     ///
 737     /// let mut s = Vec::from_slice(b"foo\xFFbar");
 738     /// assert_eq!(s.remove_char(3), '\u{FFFD}');
 739     /// assert_eq!(s, b"foobar");
 740     /// ```
 741     #[inline]
 742     fn remove_char(&mut self, at: usize) -> char {
 743         let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
 744         assert!(
 745             size > 0,
 746             "expected {} to be less than {}",
 747             at,
 748             self.as_vec().len(),
 749         );
 750         self.as_vec_mut().drain(at..at + size);
 751         ch
 752     }
 753
 754     /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
 755     /// position.
 756     ///
 757     /// This is an `O(n)` operation as it may copy a number of elements in this
 758     /// byte string proportional to its length.
 759     ///
 760     /// # Panics
 761     ///
 762     /// Panics if `at` is larger than the byte string's length.
 763     ///
 764     /// # Examples
 765     ///
 766     /// Basic usage:
 767     ///
 768     /// ```
 769     /// use bstr::ByteVec;
 770     ///
 771     /// let mut s = Vec::from("foobar");
 772     /// s.insert_char(3, '☃');
 773     /// assert_eq!(s, "foo☃bar".as_bytes());
 774     /// ```
 775     #[inline]
 776     fn insert_char(&mut self, at: usize, ch: char) {
 777         self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
 778     }
 779
 780     /// Inserts the given byte string into this byte string at a particular
 781     /// byte position.
 782     ///
 783     /// This is an `O(n)` operation as it may copy a number of elements in this
 784     /// byte string proportional to its length.
 785     ///
 786     /// The given byte string may be any type that can be cheaply converted
 787     /// into a `&[u8]`. This includes, but is not limited to, `&str` and
 788     /// `&[u8]`.
 789     ///
 790     /// # Panics
 791     ///
 792     /// Panics if `at` is larger than the byte string's length.
 793     ///
 794     /// # Examples
 795     ///
 796     /// Basic usage:
 797     ///
 798     /// ```
 799     /// use bstr::ByteVec;
 800     ///
 801     /// let mut s = Vec::from("foobar");
 802     /// s.insert_str(3, "☃☃☃");
 803     /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
 804     /// ```
 805     #[inline]
 806     fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
 807         let bytes = bytes.as_ref();
 808         let len = self.as_vec().len();
 809         assert!(at <= len, "expected {} to be <= {}", at, len);
 810
 811         // SAFETY: We'd like to efficiently splice in the given bytes into
 812         // this byte string. Since we are only working with `u8` elements here,
 813         // we only need to consider whether our bounds are correct and whether
 814         // our byte string has enough space.
 815         self.as_vec_mut().reserve(bytes.len());
 816         unsafe {
 817             // Shift bytes after `at` over by the length of `bytes` to make
 818             // room for it. This requires referencing two regions of memory
 819             // that may overlap, so we use ptr::copy.
 820             ptr::copy(
 821                 self.as_vec().as_ptr().add(at),
 822                 self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
 823                 len - at,
 824             );
 825             // Now copy the bytes given into the room we made above. In this
 826             // case, we know that the given bytes cannot possibly overlap
 827             // with this byte string since we have a mutable borrow of the
 828             // latter. Thus, we can use a nonoverlapping copy.
 829             ptr::copy_nonoverlapping(
 830                 bytes.as_ptr(),
 831                 self.as_vec_mut().as_mut_ptr().add(at),
 832                 bytes.len(),
 833             );
 834             self.as_vec_mut().set_len(len + bytes.len());
 835         }
 836     }
 837
 838     /// Removes the specified range in this byte string and replaces it with
 839     /// the given bytes. The given bytes do not need to have the same length
 840     /// as the range provided.
 841     ///
 842     /// # Panics
 843     ///
 844     /// Panics if the given range is invalid.
 845     ///
 846     /// # Examples
 847     ///
 848     /// Basic usage:
 849     ///
 850     /// ```
 851     /// use bstr::ByteVec;
 852     ///
 853     /// let mut s = Vec::from("foobar");
 854     /// s.replace_range(2..4, "xxxxx");
 855     /// assert_eq!(s, "foxxxxxar".as_bytes());
 856     /// ```
 857     #[inline]
 858     fn replace_range<R, B>(&mut self, range: R, replace_with: B)
 859     where
 860         R: ops::RangeBounds<usize>,
 861         B: AsRef<[u8]>,
 862     {
 863         self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
 864     }
 865
 866     /// Creates a draining iterator that removes the specified range in this
 867     /// `Vec<u8>` and yields each of the removed bytes.
 868     ///
 869     /// Note that the elements specified by the given range are removed
 870     /// regardless of whether the returned iterator is fully exhausted.
 871     ///
 872     /// Also note that is is unspecified how many bytes are removed from the
 873     /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
 874     ///
 875     /// # Panics
 876     ///
 877     /// Panics if the given range is not valid.
 878     ///
 879     /// # Examples
 880     ///
 881     /// Basic usage:
 882     ///
 883     /// ```
 884     /// use bstr::ByteVec;
 885     ///
 886     /// let mut s = Vec::from("foobar");
 887     /// {
 888     ///     let mut drainer = s.drain_bytes(2..4);
 889     ///     assert_eq!(drainer.next(), Some(b'o'));
 890     ///     assert_eq!(drainer.next(), Some(b'b'));
 891     ///     assert_eq!(drainer.next(), None);
 892     /// }
 893     /// assert_eq!(s, "foar".as_bytes());
 894     /// ```
 895     #[inline]
 896     fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
 897     where
 898         R: ops::RangeBounds<usize>,
 899     {
 900         DrainBytes { it: self.as_vec_mut().drain(range) }
 901     }
 902 }
 903
 904 /// A draining byte oriented iterator for `Vec<u8>`.
 905 ///
 906 /// This iterator is created by
 907 /// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
 908 ///
 909 /// # Examples
 910 ///
 911 /// Basic usage:
 912 ///
 913 /// ```
 914 /// use bstr::ByteVec;
 915 ///
 916 /// let mut s = Vec::from("foobar");
 917 /// {
 918 ///     let mut drainer = s.drain_bytes(2..4);
 919 ///     assert_eq!(drainer.next(), Some(b'o'));
 920 ///     assert_eq!(drainer.next(), Some(b'b'));
 921 ///     assert_eq!(drainer.next(), None);
 922 /// }
 923 /// assert_eq!(s, "foar".as_bytes());
 924 /// ```
 925 #[derive(Debug)]
 926 pub struct DrainBytes<'a> {
 927     it: vec::Drain<'a, u8>,
 928 }
 929
 930 impl<'a> iter::FusedIterator for DrainBytes<'a> {}
 931
 932 impl<'a> Iterator for DrainBytes<'a> {
 933     type Item = u8;
 934
 935     #[inline]
 936     fn next(&mut self) -> Option<u8> {
 937         self.it.next()
 938     }
 939 }
 940
 941 impl<'a> DoubleEndedIterator for DrainBytes<'a> {
 942     #[inline]
 943     fn next_back(&mut self) -> Option<u8> {
 944         self.it.next_back()
 945     }
 946 }
 947
 948 impl<'a> ExactSizeIterator for DrainBytes<'a> {
 949     #[inline]
 950     fn len(&self) -> usize {
 951         self.it.len()
 952     }
 953 }
 954
 955 /// An error that may occur when converting a `Vec<u8>` to a `String`.
 956 ///
 957 /// This error includes the original `Vec<u8>` that failed to convert to a
 958 /// `String`. This permits callers to recover the allocation used even if it
 959 /// it not valid UTF-8.
 960 ///
 961 /// # Examples
 962 ///
 963 /// Basic usage:
 964 ///
 965 /// ```
 966 /// use bstr::{B, ByteVec};
 967 ///
 968 /// let bytes = Vec::from_slice(b"foo\xFFbar");
 969 /// let err = bytes.into_string().unwrap_err();
 970 ///
 971 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
 972 /// assert_eq!(err.utf8_error().error_len(), Some(1));
 973 ///
 974 /// // At no point in this example is an allocation performed.
 975 /// let bytes = Vec::from(err.into_vec());
 976 /// assert_eq!(bytes, B(b"foo\xFFbar"));
 977 /// ```
 978 #[derive(Debug, Eq, PartialEq)]
 979 pub struct FromUtf8Error {
 980     original: Vec<u8>,
 981     err: Utf8Error,
 982 }
 983
 984 impl FromUtf8Error {
 985     /// Return the original bytes as a slice that failed to convert to a
 986     /// `String`.
 987     ///
 988     /// # Examples
 989     ///
 990     /// Basic usage:
 991     ///
 992     /// ```
 993     /// use bstr::{B, ByteVec};
 994     ///
 995     /// let bytes = Vec::from_slice(b"foo\xFFbar");
 996     /// let err = bytes.into_string().unwrap_err();
 997     ///
 998     /// // At no point in this example is an allocation performed.
 999     /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
1000     /// ```
1001     #[inline]
1002     pub fn as_bytes(&self) -> &[u8] {
1003         &self.original
1004     }
1005
1006     /// Consume this error and return the original byte string that failed to
1007     /// convert to a `String`.
1008     ///
1009     /// # Examples
1010     ///
1011     /// Basic usage:
1012     ///
1013     /// ```
1014     /// use bstr::{B, ByteVec};
1015     ///
1016     /// let bytes = Vec::from_slice(b"foo\xFFbar");
1017     /// let err = bytes.into_string().unwrap_err();
1018     /// let original = err.into_vec();
1019     ///
1020     /// // At no point in this example is an allocation performed.
1021     /// assert_eq!(original, B(b"foo\xFFbar"));
1022     /// ```
1023     #[inline]
1024     pub fn into_vec(self) -> Vec<u8> {
1025         self.original
1026     }
1027
1028     /// Return the underlying UTF-8 error that occurred. This error provides
1029     /// information on the nature and location of the invalid UTF-8 detected.
1030     ///
1031     /// # Examples
1032     ///
1033     /// Basic usage:
1034     ///
1035     /// ```
1036     /// use bstr::{B, ByteVec};
1037     ///
1038     /// let bytes = Vec::from_slice(b"foo\xFFbar");
1039     /// let err = bytes.into_string().unwrap_err();
1040     ///
1041     /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1042     /// assert_eq!(err.utf8_error().error_len(), Some(1));
1043     /// ```
1044     #[inline]
1045     pub fn utf8_error(&self) -> &Utf8Error {
1046         &self.err
1047     }
1048 }
1049
1050 #[cfg(feature = "std")]
1051 impl error::Error for FromUtf8Error {
1052     #[inline]
1053     fn description(&self) -> &str {
1054         "invalid UTF-8 vector"
1055     }
1056 }
1057
1058 impl fmt::Display for FromUtf8Error {
1059     #[inline]
1060     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1061         write!(f, "{}", self.err)
1062     }
1063 }
1064
1065 #[cfg(all(test, feature = "std"))]
1066 mod tests {
1067     use crate::ext_vec::ByteVec;
1068
1069     #[test]
1070     fn insert() {
1071         let mut s = vec![];
1072         s.insert_str(0, "foo");
1073         assert_eq!(s, "foo".as_bytes());
1074
1075         let mut s = Vec::from("a");
1076         s.insert_str(0, "foo");
1077         assert_eq!(s, "fooa".as_bytes());
1078
1079         let mut s = Vec::from("a");
1080         s.insert_str(1, "foo");
1081         assert_eq!(s, "afoo".as_bytes());
1082
1083         let mut s = Vec::from("foobar");
1084         s.insert_str(3, "quux");
1085         assert_eq!(s, "fooquuxbar".as_bytes());
1086
1087         let mut s = Vec::from("foobar");
1088         s.insert_str(3, "x");
1089         assert_eq!(s, "fooxbar".as_bytes());
1090
1091         let mut s = Vec::from("foobar");
1092         s.insert_str(0, "x");
1093         assert_eq!(s, "xfoobar".as_bytes());
1094
1095         let mut s = Vec::from("foobar");
1096         s.insert_str(6, "x");
1097         assert_eq!(s, "foobarx".as_bytes());
1098
1099         let mut s = Vec::from("foobar");
1100         s.insert_str(3, "quuxbazquux");
1101         assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1102     }
1103
1104     #[test]
1105     #[should_panic]
1106     fn insert_fail1() {
1107         let mut s = vec![];
1108         s.insert_str(1, "foo");
1109     }
1110
1111     #[test]
1112     #[should_panic]
1113     fn insert_fail2() {
1114         let mut s = Vec::from("a");
1115         s.insert_str(2, "foo");
1116     }
1117
1118     #[test]
1119     #[should_panic]
1120     fn insert_fail3() {
1121         let mut s = Vec::from("foobar");
1122         s.insert_str(7, "foo");
1123     }
1124 }