New upstream version 1.74.1+dfsg1

[rustc.git] / library / core / src / str / mod.rs
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs

index 7ca95a02dd8f482d42d1fac093e71297571cc790..dfa2d4fd5b615d1e0355a6d416fc7cf9024db7c4 100644 (file)
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -7,6 +7,7 @@
  #![stable(feature = "rust1", since = "1.0.0")]
  
  mod converts;
+mod count;
  mod error;
  mod iter;
  mod traits;
@@ -15,15 +16,16 @@ mod validations;
  use self::pattern::Pattern;
  use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
  
+use crate::ascii;
  use crate::char::{self, EscapeDebugExtArgs};
  use crate::mem;
  use crate::slice::{self, SliceIndex};
  
  pub mod pattern;
  
-#[unstable(feature = "str_internals", issue = "none")]
-#[allow(missing_docs)]
-pub mod lossy;
+mod lossy;
+#[unstable(feature = "utf8_chunks", issue = "99543")]
+pub use lossy::{Utf8Chunk, Utf8Chunks};
  
  #[stable(feature = "rust1", since = "1.0.0")]
  pub use converts::{from_utf8, from_utf8_unchecked};
@@ -69,26 +71,43 @@ pub use iter::SplitAsciiWhitespace;
  pub use iter::SplitInclusive;
  
  #[unstable(feature = "str_internals", issue = "none")]
-pub use validations::next_code_point;
+pub use validations::{next_code_point, utf8_char_width};
  
  use iter::MatchIndicesInternal;
  use iter::SplitInternal;
  use iter::{MatchesInternal, SplitNInternal};
  
-use validations::truncate_to_char_boundary;
-
  #[inline(never)]
  #[cold]
  #[track_caller]
-fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
+#[rustc_allow_const_fn_unstable(const_eval_select)]
+const fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
+    // SAFETY: panics for both branches
+    unsafe {
+        crate::intrinsics::const_eval_select(
+            (s, begin, end),
+            slice_error_fail_ct,
+            slice_error_fail_rt,
+        )
+    }
+}
+
+#[track_caller]
+const fn slice_error_fail_ct(_: &str, _: usize, _: usize) -> ! {
+    panic!("failed to slice string");
+}
+
+#[track_caller]
+fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! {
      const MAX_DISPLAY_LENGTH: usize = 256;
-    let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
-    let ellipsis = if truncated { "[...]" } else { "" };
+    let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
+    let s_trunc = &s[..trunc_len];
+    let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
  
      // 1. out of bounds
      if begin > s.len() || end > s.len() {
          let oob_index = if begin > s.len() { begin } else { end };
-        panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
+        panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
      }
  
      // 2. begin <= end
@@ -104,10 +123,7 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
      // 3. character boundary
      let index = if !s.is_char_boundary(begin) { begin } else { end };
      // find the character
-    let mut char_start = index;
-    while !s.is_char_boundary(char_start) {
-        char_start -= 1;
-    }
+    let char_start = s.floor_char_boundary(index);
      // `char_start` must be less than len and a char boundary
      let ch = s[char_start..].chars().next().unwrap();
      let char_range = char_start..char_start + ch.len_utf8();
@@ -117,20 +133,17 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
      );
  }
  
-#[lang = "str"]
  #[cfg(not(test))]
  impl str {
      /// Returns the length of `self`.
      ///
      /// This length is in bytes, not [`char`]s or graphemes. In other words,
-    /// it may not be what a human considers the length of the string.
+    /// it might not be what a human considers the length of the string.
      ///
      /// [`char`]: prim@char
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let len = "foo".len();
      /// assert_eq!(3, len);
@@ -140,6 +153,7 @@ impl str {
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
      #[rustc_const_stable(feature = "const_str_len", since = "1.39.0")]
+    #[must_use]
      #[inline]
      pub const fn len(&self) -> usize {
          self.as_bytes().len()
@@ -149,8 +163,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let s = "";
      /// assert!(s.is_empty());
@@ -158,9 +170,10 @@ impl str {
      /// let s = "not empty";
      /// assert!(!s.is_empty());
      /// ```
-    #[inline]
      #[stable(feature = "rust1", since = "1.0.0")]
      #[rustc_const_stable(feature = "const_str_is_empty", since = "1.39.0")]
+    #[must_use]
+    #[inline]
      pub const fn is_empty(&self) -> bool {
          self.len() == 0
      }
@@ -188,6 +201,7 @@ impl str {
      /// // third byte of `老`
      /// assert!(!s.is_char_boundary(8));
      /// ```
+    #[must_use]
      #[stable(feature = "is_char_boundary", since = "1.9.0")]
      #[inline]
      pub fn is_char_boundary(&self, index: usize) -> bool {
@@ -211,8 +225,79 @@ impl str {
              // code on higher opt-levels. See PR #84751 for more details.
              None => index == self.len(),
  
-            // This is bit magic equivalent to: b < 128 || b >= 192
-            Some(&b) => (b as i8) >= -0x40,
+            Some(&b) => b.is_utf8_char_boundary(),
+        }
+    }
+
+    /// Finds the closest `x` not exceeding `index` where `is_char_boundary(x)` is `true`.
+    ///
+    /// This method can help you truncate a string so that it's still valid UTF-8, but doesn't
+    /// exceed a given number of bytes. Note that this is done purely at the character level
+    /// and can still visually split graphemes, even though the underlying characters aren't
+    /// split. For example, the emoji 🧑‍🔬 (scientist) could be split so that the string only
+    /// includes 🧑 (person) instead.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(round_char_boundary)]
+    /// let s = "❤️🧡💛💚💙💜";
+    /// assert_eq!(s.len(), 26);
+    /// assert!(!s.is_char_boundary(13));
+    ///
+    /// let closest = s.floor_char_boundary(13);
+    /// assert_eq!(closest, 10);
+    /// assert_eq!(&s[..closest], "❤️🧡");
+    /// ```
+    #[unstable(feature = "round_char_boundary", issue = "93743")]
+    #[inline]
+    pub fn floor_char_boundary(&self, index: usize) -> usize {
+        if index >= self.len() {
+            self.len()
+        } else {
+            let lower_bound = index.saturating_sub(3);
+            let new_index = self.as_bytes()[lower_bound..=index]
+                .iter()
+                .rposition(|b| b.is_utf8_char_boundary());
+
+            // SAFETY: we know that the character boundary will be within four bytes
+            unsafe { lower_bound + new_index.unwrap_unchecked() }
+        }
+    }
+
+    /// Finds the closest `x` not below `index` where `is_char_boundary(x)` is `true`.
+    ///
+    /// If `index` is greater than the length of the string, this returns the length of the string.
+    ///
+    /// This method is the natural complement to [`floor_char_boundary`]. See that method
+    /// for more details.
+    ///
+    /// [`floor_char_boundary`]: str::floor_char_boundary
+    ///
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(round_char_boundary)]
+    /// let s = "❤️🧡💛💚💙💜";
+    /// assert_eq!(s.len(), 26);
+    /// assert!(!s.is_char_boundary(13));
+    ///
+    /// let closest = s.ceil_char_boundary(13);
+    /// assert_eq!(closest, 14);
+    /// assert_eq!(&s[..closest], "❤️🧡💛");
+    /// ```
+    #[unstable(feature = "round_char_boundary", issue = "93743")]
+    #[inline]
+    pub fn ceil_char_boundary(&self, index: usize) -> usize {
+        if index > self.len() {
+            self.len()
+        } else {
+            let upper_bound = Ord::min(index + 4, self.len());
+            self.as_bytes()[index..upper_bound]
+                .iter()
+                .position(|b| b.is_utf8_char_boundary())
+                .map_or(upper_bound, |pos| pos + index)
          }
      }
  
@@ -221,17 +306,15 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let bytes = "bors".as_bytes();
      /// assert_eq!(b"bors", bytes);
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
      #[rustc_const_stable(feature = "str_as_bytes", since = "1.39.0")]
+    #[must_use]
      #[inline(always)]
      #[allow(unused_attributes)]
-    #[rustc_allow_const_fn_unstable(const_fn_transmute)]
      pub const fn as_bytes(&self) -> &[u8] {
          // SAFETY: const sound because we transmute two types with the same layout
          unsafe { mem::transmute(self) }
@@ -274,10 +357,11 @@ impl str {
      /// assert_eq!("🍔∈🌏", s);
      /// ```
      #[stable(feature = "str_mut_extras", since = "1.20.0")]
+    #[must_use]
      #[inline(always)]
      pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
          // SAFETY: the cast from `&str` to `&[u8]` is safe since `str`
-        // has the same layout as `&[u8]` (only libstd can make this guarantee).
+        // has the same layout as `&[u8]` (only std can make this guarantee).
          // The pointer dereference is safe since it comes from a mutable reference which
          // is guaranteed to be valid for writes.
          unsafe { &mut *(self as *mut str as *mut [u8]) }
@@ -296,15 +380,15 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let s = "Hello";
      /// let ptr = s.as_ptr();
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
      #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
-    #[inline]
+    #[cfg_attr(not(bootstrap), rustc_never_returns_null_ptr)]
+    #[must_use]
+    #[inline(always)]
      pub const fn as_ptr(&self) -> *const u8 {
          self as *const str as *const u8
      }
@@ -318,7 +402,9 @@ impl str {
      /// It is your responsibility to make sure that the string slice only gets
      /// modified in a way that it remains valid UTF-8.
      #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
-    #[inline]
+    #[cfg_attr(not(bootstrap), rustc_never_returns_null_ptr)]
+    #[must_use]
+    #[inline(always)]
      pub fn as_mut_ptr(&mut self) -> *mut u8 {
          self as *mut str as *mut u8
      }
@@ -410,7 +496,7 @@ impl str {
      #[inline]
      pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
          // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
-        // the slice is dereferencable because `self` is a safe reference.
+        // the slice is dereferenceable because `self` is a safe reference.
          // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
          unsafe { &*i.get_unchecked(self) }
      }
@@ -445,7 +531,7 @@ impl str {
      #[inline]
      pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
          // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
-        // the slice is dereferencable because `self` is a safe reference.
+        // the slice is dereferenceable because `self` is a safe reference.
          // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
          unsafe { &mut *i.get_unchecked_mut(self) }
      }
@@ -477,8 +563,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let s = "Löwe 老虎 Léopard";
      ///
@@ -493,11 +577,12 @@ impl str {
      /// }
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked(begin..end)` instead")]
+    #[deprecated(since = "1.29.0", note = "use `get_unchecked(begin..end)` instead")]
+    #[must_use]
      #[inline]
      pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
          // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
-        // the slice is dereferencable because `self` is a safe reference.
+        // the slice is dereferenceable because `self` is a safe reference.
          // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
          unsafe { &*(begin..end).get_unchecked(self) }
      }
@@ -526,11 +611,11 @@ impl str {
      /// * `begin` and `end` must be byte positions within the string slice.
      /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
      #[stable(feature = "str_slice_mut", since = "1.5.0")]
-    #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked_mut(begin..end)` instead")]
+    #[deprecated(since = "1.29.0", note = "use `get_unchecked_mut(begin..end)` instead")]
      #[inline]
      pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
          // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
-        // the slice is dereferencable because `self` is a safe reference.
+        // the slice is dereferenceable because `self` is a safe reference.
          // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
          unsafe { &mut *(begin..end).get_unchecked_mut(self) }
      }
@@ -555,8 +640,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let s = "Per Martin-Löf";
      ///
@@ -566,6 +649,7 @@ impl str {
      /// assert_eq!(" Martin-Löf", last);
      /// ```
      #[inline]
+    #[must_use]
      #[stable(feature = "str_split_at", since = "1.4.0")]
      pub fn split_at(&self, mid: usize) -> (&str, &str) {
          // is_char_boundary checks that the index is in [0, .len()]
@@ -596,8 +680,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let mut s = "Per Martin-Löf".to_string();
      /// {
@@ -609,6 +691,7 @@ impl str {
      /// assert_eq!("PER Martin-Löf", s);
      /// ```
      #[inline]
+    #[must_use]
      #[stable(feature = "str_split_at", since = "1.4.0")]
      pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
          // is_char_boundary checks that the index is in [0, .len()]
@@ -633,7 +716,7 @@ impl str {
      /// string slice by [`char`]. This method returns such an iterator.
      ///
      /// It's important to remember that [`char`] represents a Unicode Scalar
-    /// Value, and may not match your idea of what a 'character' is. Iteration
+    /// Value, and might not match your idea of what a 'character' is. Iteration
      /// over grapheme clusters may be what you actually want. This functionality
      /// is not provided by Rust's standard library, check crates.io instead.
      ///
@@ -660,7 +743,7 @@ impl str {
      /// assert_eq!(None, chars.next());
      /// ```
      ///
-    /// Remember, [`char`]s may not match your intuition about characters:
+    /// Remember, [`char`]s might not match your intuition about characters:
      ///
      /// [`char`]: prim@char
      ///
@@ -713,7 +796,7 @@ impl str {
      /// assert_eq!(None, char_indices.next());
      /// ```
      ///
-    /// Remember, [`char`]s may not match your intuition about characters:
+    /// Remember, [`char`]s might not match your intuition about characters:
      ///
      /// [`char`]: prim@char
      ///
@@ -744,8 +827,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let mut bytes = "bors".bytes();
      ///
@@ -799,7 +880,16 @@ impl str {
      ///
      /// assert_eq!(None, iter.next());
      /// ```
+    ///
+    /// If the string is empty or all whitespace, the iterator yields no string slices:
+    /// ```
+    /// assert_eq!("".split_whitespace().next(), None);
+    /// assert_eq!("   ".split_whitespace().next(), None);
+    /// ```
+    #[must_use = "this returns the split string as an iterator, \
+                  without modifying the original"]
      #[stable(feature = "split_whitespace", since = "1.1.0")]
+    #[cfg_attr(not(test), rustc_diagnostic_item = "str_split_whitespace")]
      #[inline]
      pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
          SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
@@ -840,6 +930,14 @@ impl str {
      ///
      /// assert_eq!(None, iter.next());
      /// ```
+    ///
+    /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
+    /// ```
+    /// assert_eq!("".split_ascii_whitespace().next(), None);
+    /// assert_eq!("   ".split_ascii_whitespace().next(), None);
+    /// ```
+    #[must_use = "this returns the split string as an iterator, \
+                  without modifying the original"]
      #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
      #[inline]
      pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
@@ -850,8 +948,14 @@ impl str {
  
      /// An iterator over the lines of a string, as string slices.
      ///
-    /// Lines are ended with either a newline (`\n`) or a carriage return with
-    /// a line feed (`\r\n`).
+    /// Lines are split at line endings that are either newlines (`\n`) or
+    /// sequences of a carriage return followed by a line feed (`\r\n`).
+    ///
+    /// Line terminators are not included in the lines returned by the iterator.
+    ///
+    /// Note that any carriage return (`\r`) not immediately followed by a
+    /// line feed (`\n`) does not split a line. These carriage returns are
+    /// thereby included in the produced lines.
      ///
      /// The final line ending is optional. A string that ends with a final line
      /// ending will return the same lines as an otherwise identical string
@@ -862,18 +966,19 @@ impl str {
      /// Basic usage:
      ///
      /// ```
-    /// let text = "foo\r\nbar\n\nbaz\n";
+    /// let text = "foo\r\nbar\n\nbaz\r";
      /// let mut lines = text.lines();
      ///
      /// assert_eq!(Some("foo"), lines.next());
      /// assert_eq!(Some("bar"), lines.next());
      /// assert_eq!(Some(""), lines.next());
-    /// assert_eq!(Some("baz"), lines.next());
+    /// // Trailing carriage return is included in the last line
+    /// assert_eq!(Some("baz\r"), lines.next());
      ///
      /// assert_eq!(None, lines.next());
      /// ```
      ///
-    /// The final line ending isn't required:
+    /// The final line does not require any ending:
      ///
      /// ```
      /// let text = "foo\nbar\n\r\nbaz";
@@ -889,12 +994,12 @@ impl str {
      #[stable(feature = "rust1", since = "1.0.0")]
      #[inline]
      pub fn lines(&self) -> Lines<'_> {
-        Lines(self.split_terminator('\n').map(LinesAnyMap))
+        Lines(self.split_inclusive('\n').map(LinesMap))
      }
  
      /// An iterator over the lines of a string.
      #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")]
+    #[deprecated(since = "1.4.0", note = "use lines() instead now", suggestion = "lines")]
      #[inline]
      #[allow(deprecated)]
      pub fn lines_any(&self) -> LinesAny<'_> {
@@ -905,8 +1010,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let text = "Zażółć gęślą jaźń";
      ///
@@ -915,6 +1018,8 @@ impl str {
      ///
      /// assert!(utf16_len <= utf8_len);
      /// ```
+    #[must_use = "this returns the encoded string as an iterator, \
+                  without modifying the original"]
      #[stable(feature = "encode_utf16", since = "1.8.0")]
      pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
          EncodeUtf16 { chars: self.chars(), extra: 0 }
@@ -933,8 +1038,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let bananas = "bananas";
      ///
@@ -960,8 +1063,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let bananas = "bananas";
      ///
@@ -986,8 +1087,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let bananas = "bananas";
      ///
@@ -1050,7 +1149,7 @@ impl str {
          pat.into_searcher(self).next_match().map(|(i, _)| i)
      }
  
-    /// Returns the byte index for the first character of the rightmost match of the pattern in
+    /// Returns the byte index for the first character of the last match of the pattern in
      /// this string slice.
      ///
      /// Returns [`None`] if the pattern doesn't match.
@@ -1346,14 +1445,15 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
      /// assert_eq!(v, ["A", "B"]);
      ///
      /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
      /// assert_eq!(v, ["A", "", "B", ""]);
+    ///
+    /// let v: Vec<&str> = "A.B:C.D".split_terminator(&['.', ':'][..]).collect();
+    /// assert_eq!(v, ["A", "B", "C", "D"]);
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
      #[inline]
@@ -1397,6 +1497,9 @@ impl str {
      ///
      /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
      /// assert_eq!(v, ["", "B", "", "A"]);
+    ///
+    /// let v: Vec<&str> = "A.B:C.D".rsplit_terminator(&['.', ':'][..]).collect();
+    /// assert_eq!(v, ["D", "C", "B", "A"]);
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
      #[inline]
@@ -1518,6 +1621,7 @@ impl str {
      ///
      /// ```
      /// assert_eq!("cfg".split_once('='), None);
+    /// assert_eq!("cfg=".split_once('='), Some(("cfg", "")));
      /// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
      /// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
      /// ```
@@ -1525,7 +1629,8 @@ impl str {
      #[inline]
      pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> {
          let (start, end) = delimiter.into_searcher(self).next_match()?;
-        Some((&self[..start], &self[end..]))
+        // SAFETY: `Searcher` is known to return valid indices.
+        unsafe { Some((self.get_unchecked(..start), self.get_unchecked(end..))) }
      }
  
      /// Splits the string on the last occurrence of the specified delimiter and
@@ -1545,7 +1650,8 @@ impl str {
          P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
      {
          let (start, end) = delimiter.into_searcher(self).next_match_back()?;
-        Some((&self[..start], &self[end..]))
+        // SAFETY: `Searcher` is known to return valid indices.
+        unsafe { Some((self.get_unchecked(..start), self.get_unchecked(end..))) }
      }
  
      /// An iterator over the disjoint matches of a pattern within the given string
@@ -1566,12 +1672,10 @@ impl str {
      /// If the pattern allows a reverse search but its results might differ
      /// from a forward search, the [`rmatches`] method can be used.
      ///
-    /// [`rmatches`]: str::matches
+    /// [`rmatches`]: str::rmatches
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
      /// assert_eq!(v, ["abc", "abc", "abc"]);
@@ -1606,8 +1710,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
      /// assert_eq!(v, ["abc", "abc", "abc"]);
@@ -1645,12 +1747,10 @@ impl str {
      /// If the pattern allows a reverse search but its results might differ
      /// from a forward search, the [`rmatch_indices`] method can be used.
      ///
-    /// [`rmatch_indices`]: str::match_indices
+    /// [`rmatch_indices`]: str::rmatch_indices
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
      /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
@@ -1691,8 +1791,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
      /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
@@ -1715,14 +1813,12 @@ impl str {
      /// Returns a string slice with leading and trailing whitespace removed.
      ///
      /// 'Whitespace' is defined according to the terms of the Unicode Derived
-    /// Core Property `White_Space`.
+    /// Core Property `White_Space`, which includes newlines.
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
-    /// let s = " Hello\tworld\t";
+    /// let s = "\n Hello\tworld\t\n";
      ///
      /// assert_eq!("Hello\tworld", s.trim());
      /// ```
@@ -1730,6 +1826,7 @@ impl str {
      #[must_use = "this returns the trimmed string as a slice, \
                    without modifying the original"]
      #[stable(feature = "rust1", since = "1.0.0")]
+    #[cfg_attr(not(test), rustc_diagnostic_item = "str_trim")]
      pub fn trim(&self) -> &str {
          self.trim_matches(|c: char| c.is_whitespace())
      }
@@ -1737,7 +1834,7 @@ impl str {
      /// Returns a string slice with leading whitespace removed.
      ///
      /// 'Whitespace' is defined according to the terms of the Unicode Derived
-    /// Core Property `White_Space`.
+    /// Core Property `White_Space`, which includes newlines.
      ///
      /// # Text directionality
      ///
@@ -1751,8 +1848,8 @@ impl str {
      /// Basic usage:
      ///
      /// ```
-    /// let s = " Hello\tworld\t";
-    /// assert_eq!("Hello\tworld\t", s.trim_start());
+    /// let s = "\n Hello\tworld\t\n";
+    /// assert_eq!("Hello\tworld\t\n", s.trim_start());
      /// ```
      ///
      /// Directionality:
@@ -1768,6 +1865,7 @@ impl str {
      #[must_use = "this returns the trimmed string as a new slice, \
                    without modifying the original"]
      #[stable(feature = "trim_direction", since = "1.30.0")]
+    #[cfg_attr(not(test), rustc_diagnostic_item = "str_trim_start")]
      pub fn trim_start(&self) -> &str {
          self.trim_start_matches(|c: char| c.is_whitespace())
      }
@@ -1775,7 +1873,7 @@ impl str {
      /// Returns a string slice with trailing whitespace removed.
      ///
      /// 'Whitespace' is defined according to the terms of the Unicode Derived
-    /// Core Property `White_Space`.
+    /// Core Property `White_Space`, which includes newlines.
      ///
      /// # Text directionality
      ///
@@ -1789,8 +1887,8 @@ impl str {
      /// Basic usage:
      ///
      /// ```
-    /// let s = " Hello\tworld\t";
-    /// assert_eq!(" Hello\tworld", s.trim_end());
+    /// let s = "\n Hello\tworld\t\n";
+    /// assert_eq!("\n Hello\tworld", s.trim_end());
      /// ```
      ///
      /// Directionality:
@@ -1806,6 +1904,7 @@ impl str {
      #[must_use = "this returns the trimmed string as a new slice, \
                    without modifying the original"]
      #[stable(feature = "trim_direction", since = "1.30.0")]
+    #[cfg_attr(not(test), rustc_diagnostic_item = "str_trim_end")]
      pub fn trim_end(&self) -> &str {
          self.trim_end_matches(|c: char| c.is_whitespace())
      }
@@ -1841,13 +1940,11 @@ impl str {
      /// let s = "  עברית";
      /// assert!(Some('ע') == s.trim_left().chars().next());
      /// ```
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
      #[inline]
      #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_deprecated(
-        since = "1.33.0",
-        reason = "superseded by `trim_start`",
-        suggestion = "trim_start"
-    )]
+    #[deprecated(since = "1.33.0", note = "superseded by `trim_start`", suggestion = "trim_start")]
      pub fn trim_left(&self) -> &str {
          self.trim_start()
      }
@@ -1883,13 +1980,11 @@ impl str {
      /// let s = "עברית  ";
      /// assert!(Some('ת') == s.trim_right().chars().rev().next());
      /// ```
+    #[must_use = "this returns the trimmed string as a new slice, \
+                  without modifying the original"]
      #[inline]
      #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_deprecated(
-        since = "1.33.0",
-        reason = "superseded by `trim_end`",
-        suggestion = "trim_end"
-    )]
+    #[deprecated(since = "1.33.0", note = "superseded by `trim_end`", suggestion = "trim_end")]
      pub fn trim_right(&self) -> &str {
          self.trim_end()
      }
@@ -1960,8 +2055,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
      /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
@@ -2107,8 +2200,6 @@ impl str {
      ///
      /// # Examples
      ///
-    /// Basic usage:
-    ///
      /// ```
      /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
      /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
@@ -2117,9 +2208,9 @@ impl str {
      /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_deprecated(
+    #[deprecated(
          since = "1.33.0",
-        reason = "superseded by `trim_start_matches`",
+        note = "superseded by `trim_start_matches`",
          suggestion = "trim_start_matches"
      )]
      pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
@@ -2160,9 +2251,9 @@ impl str {
      /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
      /// ```
      #[stable(feature = "rust1", since = "1.0.0")]
-    #[rustc_deprecated(
+    #[deprecated(
          since = "1.33.0",
-        reason = "superseded by `trim_end_matches`",
+        note = "superseded by `trim_end_matches`",
          suggestion = "trim_end_matches"
      )]
      pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
@@ -2233,14 +2324,26 @@ impl str {
      /// assert!(!non_ascii.is_ascii());
      /// ```
      #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
+    #[rustc_const_stable(feature = "const_slice_is_ascii", since = "1.74.0")]
+    #[must_use]
      #[inline]
-    pub fn is_ascii(&self) -> bool {
+    pub const fn is_ascii(&self) -> bool {
          // We can treat each byte as character here: all multibyte characters
-        // start with a byte that is not in the ascii range, so we will stop
+        // start with a byte that is not in the ASCII range, so we will stop
          // there already.
          self.as_bytes().is_ascii()
      }
  
+    /// If this string slice [`is_ascii`](Self::is_ascii), returns it as a slice
+    /// of [ASCII characters](`ascii::Char`), otherwise returns `None`.
+    #[unstable(feature = "ascii_char", issue = "110998")]
+    #[must_use]
+    #[inline]
+    pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
+        // Like in `is_ascii`, we can work on the bytes directly.
+        self.as_bytes().as_ascii()
+    }
+
      /// Checks that two strings are an ASCII case-insensitive match.
      ///
      /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
@@ -2254,6 +2357,7 @@ impl str {
      /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
      /// ```
      #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
+    #[must_use]
      #[inline]
      pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
          self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
@@ -2281,7 +2385,7 @@ impl str {
      #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
      #[inline]
      pub fn make_ascii_uppercase(&mut self) {
-        // SAFETY: safe because we transmute two types with the same layout.
+        // SAFETY: changing ASCII letters only does not invalidate UTF-8.
          let me = unsafe { self.as_bytes_mut() };
          me.make_ascii_uppercase()
      }
@@ -2308,7 +2412,7 @@ impl str {
      #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
      #[inline]
      pub fn make_ascii_lowercase(&mut self) {
-        // SAFETY: safe because we transmute two types with the same layout.
+        // SAFETY: changing ASCII letters only does not invalidate UTF-8.
          let me = unsafe { self.as_bytes_mut() };
          me.make_ascii_lowercase()
      }
@@ -2324,7 +2428,7 @@ impl str {
      ///
      /// ```
      /// for c in "❤\n!".escape_debug() {
-    ///     print!("{}", c);
+    ///     print!("{c}");
      /// }
      /// println!();
      /// ```
@@ -2347,6 +2451,8 @@ impl str {
      /// ```
      /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
      /// ```
+    #[must_use = "this returns the escaped string as an iterator, \
+                  without modifying the original"]
      #[stable(feature = "str_escape", since = "1.34.0")]
      pub fn escape_debug(&self) -> EscapeDebug<'_> {
          let mut chars = self.chars();
@@ -2368,7 +2474,7 @@ impl str {
      ///
      /// ```
      /// for c in "❤\n!".escape_default() {
-    ///     print!("{}", c);
+    ///     print!("{c}");
      /// }
      /// println!();
      /// ```
@@ -2391,6 +2497,8 @@ impl str {
      /// ```
      /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
      /// ```
+    #[must_use = "this returns the escaped string as an iterator, \
+                  without modifying the original"]
      #[stable(feature = "str_escape", since = "1.34.0")]
      pub fn escape_default(&self) -> EscapeDefault<'_> {
          EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
@@ -2404,7 +2512,7 @@ impl str {
      ///
      /// ```
      /// for c in "❤\n!".escape_unicode() {
-    ///     print!("{}", c);
+    ///     print!("{c}");
      /// }
      /// println!();
      /// ```
@@ -2427,6 +2535,8 @@ impl str {
      /// ```
      /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
      /// ```
+    #[must_use = "this returns the escaped string as an iterator, \
+                  without modifying the original"]
      #[stable(feature = "str_escape", since = "1.34.0")]
      pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
          EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
@@ -2463,10 +2573,10 @@ impl Default for &mut str {
  impl_fn_for_zst! {
      /// A nameable, cloneable fn type
      #[derive(Clone)]
-    struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
-        let l = line.len();
-        if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
-        else { line }
+    struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str {
+        let Some(line) = line.strip_suffix('\n') else { return line };
+        let Some(line) = line.strip_suffix('\r') else { return line };
+        line
      };
  
      #[derive(Clone)]
@@ -2513,3 +2623,7 @@ impl_fn_for_zst! {
          unsafe { from_utf8_unchecked(bytes) }
      };
  }
+
+// This is required to make `impl From<&str> for Box<dyn Error>` and `impl<E> From<E> for Box<dyn Error>` not overlap.
+#[stable(feature = "rust1", since = "1.0.0")]
+impl !crate::error::Error for &str {}