]> git.proxmox.com Git - rustc.git/blob - library/core/src/str/mod.rs
7ca95a02dd8f482d42d1fac093e71297571cc790
[rustc.git] / library / core / src / str / mod.rs
1 //! String manipulation.
2 //!
3 //! For more details, see the [`std::str`] module.
4 //!
5 //! [`std::str`]: ../../std/str/index.html
6
7 #![stable(feature = "rust1", since = "1.0.0")]
8
9 mod converts;
10 mod error;
11 mod iter;
12 mod traits;
13 mod validations;
14
15 use self::pattern::Pattern;
16 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
17
18 use crate::char::{self, EscapeDebugExtArgs};
19 use crate::mem;
20 use crate::slice::{self, SliceIndex};
21
22 pub mod pattern;
23
24 #[unstable(feature = "str_internals", issue = "none")]
25 #[allow(missing_docs)]
26 pub mod lossy;
27
28 #[stable(feature = "rust1", since = "1.0.0")]
29 pub use converts::{from_utf8, from_utf8_unchecked};
30
31 #[stable(feature = "str_mut_extras", since = "1.20.0")]
32 pub use converts::{from_utf8_mut, from_utf8_unchecked_mut};
33
34 #[stable(feature = "rust1", since = "1.0.0")]
35 pub use error::{ParseBoolError, Utf8Error};
36
37 #[stable(feature = "rust1", since = "1.0.0")]
38 pub use traits::FromStr;
39
40 #[stable(feature = "rust1", since = "1.0.0")]
41 pub use iter::{Bytes, CharIndices, Chars, Lines, SplitWhitespace};
42
43 #[stable(feature = "rust1", since = "1.0.0")]
44 #[allow(deprecated)]
45 pub use iter::LinesAny;
46
47 #[stable(feature = "rust1", since = "1.0.0")]
48 pub use iter::{RSplit, RSplitTerminator, Split, SplitTerminator};
49
50 #[stable(feature = "rust1", since = "1.0.0")]
51 pub use iter::{RSplitN, SplitN};
52
53 #[stable(feature = "str_matches", since = "1.2.0")]
54 pub use iter::{Matches, RMatches};
55
56 #[stable(feature = "str_match_indices", since = "1.5.0")]
57 pub use iter::{MatchIndices, RMatchIndices};
58
59 #[stable(feature = "encode_utf16", since = "1.8.0")]
60 pub use iter::EncodeUtf16;
61
62 #[stable(feature = "str_escape", since = "1.34.0")]
63 pub use iter::{EscapeDebug, EscapeDefault, EscapeUnicode};
64
65 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
66 pub use iter::SplitAsciiWhitespace;
67
68 #[stable(feature = "split_inclusive", since = "1.51.0")]
69 pub use iter::SplitInclusive;
70
71 #[unstable(feature = "str_internals", issue = "none")]
72 pub use validations::next_code_point;
73
74 use iter::MatchIndicesInternal;
75 use iter::SplitInternal;
76 use iter::{MatchesInternal, SplitNInternal};
77
78 use validations::truncate_to_char_boundary;
79
80 #[inline(never)]
81 #[cold]
82 #[track_caller]
83 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
84 const MAX_DISPLAY_LENGTH: usize = 256;
85 let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
86 let ellipsis = if truncated { "[...]" } else { "" };
87
88 // 1. out of bounds
89 if begin > s.len() || end > s.len() {
90 let oob_index = if begin > s.len() { begin } else { end };
91 panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
92 }
93
94 // 2. begin <= end
95 assert!(
96 begin <= end,
97 "begin <= end ({} <= {}) when slicing `{}`{}",
98 begin,
99 end,
100 s_trunc,
101 ellipsis
102 );
103
104 // 3. character boundary
105 let index = if !s.is_char_boundary(begin) { begin } else { end };
106 // find the character
107 let mut char_start = index;
108 while !s.is_char_boundary(char_start) {
109 char_start -= 1;
110 }
111 // `char_start` must be less than len and a char boundary
112 let ch = s[char_start..].chars().next().unwrap();
113 let char_range = char_start..char_start + ch.len_utf8();
114 panic!(
115 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
116 index, ch, char_range, s_trunc, ellipsis
117 );
118 }
119
120 #[lang = "str"]
121 #[cfg(not(test))]
122 impl str {
123 /// Returns the length of `self`.
124 ///
125 /// This length is in bytes, not [`char`]s or graphemes. In other words,
126 /// it may not be what a human considers the length of the string.
127 ///
128 /// [`char`]: prim@char
129 ///
130 /// # Examples
131 ///
132 /// Basic usage:
133 ///
134 /// ```
135 /// let len = "foo".len();
136 /// assert_eq!(3, len);
137 ///
138 /// assert_eq!("ƒoo".len(), 4); // fancy f!
139 /// assert_eq!("ƒoo".chars().count(), 3);
140 /// ```
141 #[stable(feature = "rust1", since = "1.0.0")]
142 #[rustc_const_stable(feature = "const_str_len", since = "1.39.0")]
143 #[inline]
144 pub const fn len(&self) -> usize {
145 self.as_bytes().len()
146 }
147
148 /// Returns `true` if `self` has a length of zero bytes.
149 ///
150 /// # Examples
151 ///
152 /// Basic usage:
153 ///
154 /// ```
155 /// let s = "";
156 /// assert!(s.is_empty());
157 ///
158 /// let s = "not empty";
159 /// assert!(!s.is_empty());
160 /// ```
161 #[inline]
162 #[stable(feature = "rust1", since = "1.0.0")]
163 #[rustc_const_stable(feature = "const_str_is_empty", since = "1.39.0")]
164 pub const fn is_empty(&self) -> bool {
165 self.len() == 0
166 }
167
168 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
169 /// sequence or the end of the string.
170 ///
171 /// The start and end of the string (when `index == self.len()`) are
172 /// considered to be boundaries.
173 ///
174 /// Returns `false` if `index` is greater than `self.len()`.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// let s = "Löwe 老虎 Léopard";
180 /// assert!(s.is_char_boundary(0));
181 /// // start of `老`
182 /// assert!(s.is_char_boundary(6));
183 /// assert!(s.is_char_boundary(s.len()));
184 ///
185 /// // second byte of `ö`
186 /// assert!(!s.is_char_boundary(2));
187 ///
188 /// // third byte of `老`
189 /// assert!(!s.is_char_boundary(8));
190 /// ```
191 #[stable(feature = "is_char_boundary", since = "1.9.0")]
192 #[inline]
193 pub fn is_char_boundary(&self, index: usize) -> bool {
194 // 0 is always ok.
195 // Test for 0 explicitly so that it can optimize out the check
196 // easily and skip reading string data for that case.
197 // Note that optimizing `self.get(..index)` relies on this.
198 if index == 0 {
199 return true;
200 }
201
202 match self.as_bytes().get(index) {
203 // For `None` we have two options:
204 //
205 // - index == self.len()
206 // Empty strings are valid, so return true
207 // - index > self.len()
208 // In this case return false
209 //
210 // The check is placed exactly here, because it improves generated
211 // code on higher opt-levels. See PR #84751 for more details.
212 None => index == self.len(),
213
214 // This is bit magic equivalent to: b < 128 || b >= 192
215 Some(&b) => (b as i8) >= -0x40,
216 }
217 }
218
219 /// Converts a string slice to a byte slice. To convert the byte slice back
220 /// into a string slice, use the [`from_utf8`] function.
221 ///
222 /// # Examples
223 ///
224 /// Basic usage:
225 ///
226 /// ```
227 /// let bytes = "bors".as_bytes();
228 /// assert_eq!(b"bors", bytes);
229 /// ```
230 #[stable(feature = "rust1", since = "1.0.0")]
231 #[rustc_const_stable(feature = "str_as_bytes", since = "1.39.0")]
232 #[inline(always)]
233 #[allow(unused_attributes)]
234 #[rustc_allow_const_fn_unstable(const_fn_transmute)]
235 pub const fn as_bytes(&self) -> &[u8] {
236 // SAFETY: const sound because we transmute two types with the same layout
237 unsafe { mem::transmute(self) }
238 }
239
240 /// Converts a mutable string slice to a mutable byte slice.
241 ///
242 /// # Safety
243 ///
244 /// The caller must ensure that the content of the slice is valid UTF-8
245 /// before the borrow ends and the underlying `str` is used.
246 ///
247 /// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
248 ///
249 /// # Examples
250 ///
251 /// Basic usage:
252 ///
253 /// ```
254 /// let mut s = String::from("Hello");
255 /// let bytes = unsafe { s.as_bytes_mut() };
256 ///
257 /// assert_eq!(b"Hello", bytes);
258 /// ```
259 ///
260 /// Mutability:
261 ///
262 /// ```
263 /// let mut s = String::from("🗻∈🌏");
264 ///
265 /// unsafe {
266 /// let bytes = s.as_bytes_mut();
267 ///
268 /// bytes[0] = 0xF0;
269 /// bytes[1] = 0x9F;
270 /// bytes[2] = 0x8D;
271 /// bytes[3] = 0x94;
272 /// }
273 ///
274 /// assert_eq!("🍔∈🌏", s);
275 /// ```
276 #[stable(feature = "str_mut_extras", since = "1.20.0")]
277 #[inline(always)]
278 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
279 // SAFETY: the cast from `&str` to `&[u8]` is safe since `str`
280 // has the same layout as `&[u8]` (only libstd can make this guarantee).
281 // The pointer dereference is safe since it comes from a mutable reference which
282 // is guaranteed to be valid for writes.
283 unsafe { &mut *(self as *mut str as *mut [u8]) }
284 }
285
286 /// Converts a string slice to a raw pointer.
287 ///
288 /// As string slices are a slice of bytes, the raw pointer points to a
289 /// [`u8`]. This pointer will be pointing to the first byte of the string
290 /// slice.
291 ///
292 /// The caller must ensure that the returned pointer is never written to.
293 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
294 ///
295 /// [`as_mut_ptr`]: str::as_mut_ptr
296 ///
297 /// # Examples
298 ///
299 /// Basic usage:
300 ///
301 /// ```
302 /// let s = "Hello";
303 /// let ptr = s.as_ptr();
304 /// ```
305 #[stable(feature = "rust1", since = "1.0.0")]
306 #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
307 #[inline]
308 pub const fn as_ptr(&self) -> *const u8 {
309 self as *const str as *const u8
310 }
311
312 /// Converts a mutable string slice to a raw pointer.
313 ///
314 /// As string slices are a slice of bytes, the raw pointer points to a
315 /// [`u8`]. This pointer will be pointing to the first byte of the string
316 /// slice.
317 ///
318 /// It is your responsibility to make sure that the string slice only gets
319 /// modified in a way that it remains valid UTF-8.
320 #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
321 #[inline]
322 pub fn as_mut_ptr(&mut self) -> *mut u8 {
323 self as *mut str as *mut u8
324 }
325
326 /// Returns a subslice of `str`.
327 ///
328 /// This is the non-panicking alternative to indexing the `str`. Returns
329 /// [`None`] whenever equivalent indexing operation would panic.
330 ///
331 /// # Examples
332 ///
333 /// ```
334 /// let v = String::from("🗻∈🌏");
335 ///
336 /// assert_eq!(Some("🗻"), v.get(0..4));
337 ///
338 /// // indices not on UTF-8 sequence boundaries
339 /// assert!(v.get(1..).is_none());
340 /// assert!(v.get(..8).is_none());
341 ///
342 /// // out of bounds
343 /// assert!(v.get(..42).is_none());
344 /// ```
345 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
346 #[inline]
347 pub fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
348 i.get(self)
349 }
350
351 /// Returns a mutable subslice of `str`.
352 ///
353 /// This is the non-panicking alternative to indexing the `str`. Returns
354 /// [`None`] whenever equivalent indexing operation would panic.
355 ///
356 /// # Examples
357 ///
358 /// ```
359 /// let mut v = String::from("hello");
360 /// // correct length
361 /// assert!(v.get_mut(0..5).is_some());
362 /// // out of bounds
363 /// assert!(v.get_mut(..42).is_none());
364 /// assert_eq!(Some("he"), v.get_mut(0..2).map(|v| &*v));
365 ///
366 /// assert_eq!("hello", v);
367 /// {
368 /// let s = v.get_mut(0..2);
369 /// let s = s.map(|s| {
370 /// s.make_ascii_uppercase();
371 /// &*s
372 /// });
373 /// assert_eq!(Some("HE"), s);
374 /// }
375 /// assert_eq!("HEllo", v);
376 /// ```
377 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
378 #[inline]
379 pub fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
380 i.get_mut(self)
381 }
382
383 /// Returns an unchecked subslice of `str`.
384 ///
385 /// This is the unchecked alternative to indexing the `str`.
386 ///
387 /// # Safety
388 ///
389 /// Callers of this function are responsible that these preconditions are
390 /// satisfied:
391 ///
392 /// * The starting index must not exceed the ending index;
393 /// * Indexes must be within bounds of the original slice;
394 /// * Indexes must lie on UTF-8 sequence boundaries.
395 ///
396 /// Failing that, the returned string slice may reference invalid memory or
397 /// violate the invariants communicated by the `str` type.
398 ///
399 /// # Examples
400 ///
401 /// ```
402 /// let v = "🗻∈🌏";
403 /// unsafe {
404 /// assert_eq!("🗻", v.get_unchecked(0..4));
405 /// assert_eq!("∈", v.get_unchecked(4..7));
406 /// assert_eq!("🌏", v.get_unchecked(7..11));
407 /// }
408 /// ```
409 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
410 #[inline]
411 pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
412 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
413 // the slice is dereferencable because `self` is a safe reference.
414 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
415 unsafe { &*i.get_unchecked(self) }
416 }
417
418 /// Returns a mutable, unchecked subslice of `str`.
419 ///
420 /// This is the unchecked alternative to indexing the `str`.
421 ///
422 /// # Safety
423 ///
424 /// Callers of this function are responsible that these preconditions are
425 /// satisfied:
426 ///
427 /// * The starting index must not exceed the ending index;
428 /// * Indexes must be within bounds of the original slice;
429 /// * Indexes must lie on UTF-8 sequence boundaries.
430 ///
431 /// Failing that, the returned string slice may reference invalid memory or
432 /// violate the invariants communicated by the `str` type.
433 ///
434 /// # Examples
435 ///
436 /// ```
437 /// let mut v = String::from("🗻∈🌏");
438 /// unsafe {
439 /// assert_eq!("🗻", v.get_unchecked_mut(0..4));
440 /// assert_eq!("∈", v.get_unchecked_mut(4..7));
441 /// assert_eq!("🌏", v.get_unchecked_mut(7..11));
442 /// }
443 /// ```
444 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
445 #[inline]
446 pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
447 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
448 // the slice is dereferencable because `self` is a safe reference.
449 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
450 unsafe { &mut *i.get_unchecked_mut(self) }
451 }
452
453 /// Creates a string slice from another string slice, bypassing safety
454 /// checks.
455 ///
456 /// This is generally not recommended, use with caution! For a safe
457 /// alternative see [`str`] and [`Index`].
458 ///
459 /// [`Index`]: crate::ops::Index
460 ///
461 /// This new slice goes from `begin` to `end`, including `begin` but
462 /// excluding `end`.
463 ///
464 /// To get a mutable string slice instead, see the
465 /// [`slice_mut_unchecked`] method.
466 ///
467 /// [`slice_mut_unchecked`]: str::slice_mut_unchecked
468 ///
469 /// # Safety
470 ///
471 /// Callers of this function are responsible that three preconditions are
472 /// satisfied:
473 ///
474 /// * `begin` must not exceed `end`.
475 /// * `begin` and `end` must be byte positions within the string slice.
476 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
477 ///
478 /// # Examples
479 ///
480 /// Basic usage:
481 ///
482 /// ```
483 /// let s = "Löwe 老虎 Léopard";
484 ///
485 /// unsafe {
486 /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21));
487 /// }
488 ///
489 /// let s = "Hello, world!";
490 ///
491 /// unsafe {
492 /// assert_eq!("world", s.slice_unchecked(7, 12));
493 /// }
494 /// ```
495 #[stable(feature = "rust1", since = "1.0.0")]
496 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked(begin..end)` instead")]
497 #[inline]
498 pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
499 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
500 // the slice is dereferencable because `self` is a safe reference.
501 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
502 unsafe { &*(begin..end).get_unchecked(self) }
503 }
504
505 /// Creates a string slice from another string slice, bypassing safety
506 /// checks.
507 /// This is generally not recommended, use with caution! For a safe
508 /// alternative see [`str`] and [`IndexMut`].
509 ///
510 /// [`IndexMut`]: crate::ops::IndexMut
511 ///
512 /// This new slice goes from `begin` to `end`, including `begin` but
513 /// excluding `end`.
514 ///
515 /// To get an immutable string slice instead, see the
516 /// [`slice_unchecked`] method.
517 ///
518 /// [`slice_unchecked`]: str::slice_unchecked
519 ///
520 /// # Safety
521 ///
522 /// Callers of this function are responsible that three preconditions are
523 /// satisfied:
524 ///
525 /// * `begin` must not exceed `end`.
526 /// * `begin` and `end` must be byte positions within the string slice.
527 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
528 #[stable(feature = "str_slice_mut", since = "1.5.0")]
529 #[rustc_deprecated(since = "1.29.0", reason = "use `get_unchecked_mut(begin..end)` instead")]
530 #[inline]
531 pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
532 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
533 // the slice is dereferencable because `self` is a safe reference.
534 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
535 unsafe { &mut *(begin..end).get_unchecked_mut(self) }
536 }
537
538 /// Divide one string slice into two at an index.
539 ///
540 /// The argument, `mid`, should be a byte offset from the start of the
541 /// string. It must also be on the boundary of a UTF-8 code point.
542 ///
543 /// The two slices returned go from the start of the string slice to `mid`,
544 /// and from `mid` to the end of the string slice.
545 ///
546 /// To get mutable string slices instead, see the [`split_at_mut`]
547 /// method.
548 ///
549 /// [`split_at_mut`]: str::split_at_mut
550 ///
551 /// # Panics
552 ///
553 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
554 /// past the end of the last code point of the string slice.
555 ///
556 /// # Examples
557 ///
558 /// Basic usage:
559 ///
560 /// ```
561 /// let s = "Per Martin-Löf";
562 ///
563 /// let (first, last) = s.split_at(3);
564 ///
565 /// assert_eq!("Per", first);
566 /// assert_eq!(" Martin-Löf", last);
567 /// ```
568 #[inline]
569 #[stable(feature = "str_split_at", since = "1.4.0")]
570 pub fn split_at(&self, mid: usize) -> (&str, &str) {
571 // is_char_boundary checks that the index is in [0, .len()]
572 if self.is_char_boundary(mid) {
573 // SAFETY: just checked that `mid` is on a char boundary.
574 unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }
575 } else {
576 slice_error_fail(self, 0, mid)
577 }
578 }
579
580 /// Divide one mutable string slice into two at an index.
581 ///
582 /// The argument, `mid`, should be a byte offset from the start of the
583 /// string. It must also be on the boundary of a UTF-8 code point.
584 ///
585 /// The two slices returned go from the start of the string slice to `mid`,
586 /// and from `mid` to the end of the string slice.
587 ///
588 /// To get immutable string slices instead, see the [`split_at`] method.
589 ///
590 /// [`split_at`]: str::split_at
591 ///
592 /// # Panics
593 ///
594 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
595 /// past the end of the last code point of the string slice.
596 ///
597 /// # Examples
598 ///
599 /// Basic usage:
600 ///
601 /// ```
602 /// let mut s = "Per Martin-Löf".to_string();
603 /// {
604 /// let (first, last) = s.split_at_mut(3);
605 /// first.make_ascii_uppercase();
606 /// assert_eq!("PER", first);
607 /// assert_eq!(" Martin-Löf", last);
608 /// }
609 /// assert_eq!("PER Martin-Löf", s);
610 /// ```
611 #[inline]
612 #[stable(feature = "str_split_at", since = "1.4.0")]
613 pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
614 // is_char_boundary checks that the index is in [0, .len()]
615 if self.is_char_boundary(mid) {
616 let len = self.len();
617 let ptr = self.as_mut_ptr();
618 // SAFETY: just checked that `mid` is on a char boundary.
619 unsafe {
620 (
621 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
622 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)),
623 )
624 }
625 } else {
626 slice_error_fail(self, 0, mid)
627 }
628 }
629
630 /// Returns an iterator over the [`char`]s of a string slice.
631 ///
632 /// As a string slice consists of valid UTF-8, we can iterate through a
633 /// string slice by [`char`]. This method returns such an iterator.
634 ///
635 /// It's important to remember that [`char`] represents a Unicode Scalar
636 /// Value, and may not match your idea of what a 'character' is. Iteration
637 /// over grapheme clusters may be what you actually want. This functionality
638 /// is not provided by Rust's standard library, check crates.io instead.
639 ///
640 /// # Examples
641 ///
642 /// Basic usage:
643 ///
644 /// ```
645 /// let word = "goodbye";
646 ///
647 /// let count = word.chars().count();
648 /// assert_eq!(7, count);
649 ///
650 /// let mut chars = word.chars();
651 ///
652 /// assert_eq!(Some('g'), chars.next());
653 /// assert_eq!(Some('o'), chars.next());
654 /// assert_eq!(Some('o'), chars.next());
655 /// assert_eq!(Some('d'), chars.next());
656 /// assert_eq!(Some('b'), chars.next());
657 /// assert_eq!(Some('y'), chars.next());
658 /// assert_eq!(Some('e'), chars.next());
659 ///
660 /// assert_eq!(None, chars.next());
661 /// ```
662 ///
663 /// Remember, [`char`]s may not match your intuition about characters:
664 ///
665 /// [`char`]: prim@char
666 ///
667 /// ```
668 /// let y = "y̆";
669 ///
670 /// let mut chars = y.chars();
671 ///
672 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
673 /// assert_eq!(Some('\u{0306}'), chars.next());
674 ///
675 /// assert_eq!(None, chars.next());
676 /// ```
677 #[stable(feature = "rust1", since = "1.0.0")]
678 #[inline]
679 pub fn chars(&self) -> Chars<'_> {
680 Chars { iter: self.as_bytes().iter() }
681 }
682
683 /// Returns an iterator over the [`char`]s of a string slice, and their
684 /// positions.
685 ///
686 /// As a string slice consists of valid UTF-8, we can iterate through a
687 /// string slice by [`char`]. This method returns an iterator of both
688 /// these [`char`]s, as well as their byte positions.
689 ///
690 /// The iterator yields tuples. The position is first, the [`char`] is
691 /// second.
692 ///
693 /// # Examples
694 ///
695 /// Basic usage:
696 ///
697 /// ```
698 /// let word = "goodbye";
699 ///
700 /// let count = word.char_indices().count();
701 /// assert_eq!(7, count);
702 ///
703 /// let mut char_indices = word.char_indices();
704 ///
705 /// assert_eq!(Some((0, 'g')), char_indices.next());
706 /// assert_eq!(Some((1, 'o')), char_indices.next());
707 /// assert_eq!(Some((2, 'o')), char_indices.next());
708 /// assert_eq!(Some((3, 'd')), char_indices.next());
709 /// assert_eq!(Some((4, 'b')), char_indices.next());
710 /// assert_eq!(Some((5, 'y')), char_indices.next());
711 /// assert_eq!(Some((6, 'e')), char_indices.next());
712 ///
713 /// assert_eq!(None, char_indices.next());
714 /// ```
715 ///
716 /// Remember, [`char`]s may not match your intuition about characters:
717 ///
718 /// [`char`]: prim@char
719 ///
720 /// ```
721 /// let yes = "y̆es";
722 ///
723 /// let mut char_indices = yes.char_indices();
724 ///
725 /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆')
726 /// assert_eq!(Some((1, '\u{0306}')), char_indices.next());
727 ///
728 /// // note the 3 here - the last character took up two bytes
729 /// assert_eq!(Some((3, 'e')), char_indices.next());
730 /// assert_eq!(Some((4, 's')), char_indices.next());
731 ///
732 /// assert_eq!(None, char_indices.next());
733 /// ```
734 #[stable(feature = "rust1", since = "1.0.0")]
735 #[inline]
736 pub fn char_indices(&self) -> CharIndices<'_> {
737 CharIndices { front_offset: 0, iter: self.chars() }
738 }
739
740 /// An iterator over the bytes of a string slice.
741 ///
742 /// As a string slice consists of a sequence of bytes, we can iterate
743 /// through a string slice by byte. This method returns such an iterator.
744 ///
745 /// # Examples
746 ///
747 /// Basic usage:
748 ///
749 /// ```
750 /// let mut bytes = "bors".bytes();
751 ///
752 /// assert_eq!(Some(b'b'), bytes.next());
753 /// assert_eq!(Some(b'o'), bytes.next());
754 /// assert_eq!(Some(b'r'), bytes.next());
755 /// assert_eq!(Some(b's'), bytes.next());
756 ///
757 /// assert_eq!(None, bytes.next());
758 /// ```
759 #[stable(feature = "rust1", since = "1.0.0")]
760 #[inline]
761 pub fn bytes(&self) -> Bytes<'_> {
762 Bytes(self.as_bytes().iter().copied())
763 }
764
765 /// Splits a string slice by whitespace.
766 ///
767 /// The iterator returned will return string slices that are sub-slices of
768 /// the original string slice, separated by any amount of whitespace.
769 ///
770 /// 'Whitespace' is defined according to the terms of the Unicode Derived
771 /// Core Property `White_Space`. If you only want to split on ASCII whitespace
772 /// instead, use [`split_ascii_whitespace`].
773 ///
774 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
775 ///
776 /// # Examples
777 ///
778 /// Basic usage:
779 ///
780 /// ```
781 /// let mut iter = "A few words".split_whitespace();
782 ///
783 /// assert_eq!(Some("A"), iter.next());
784 /// assert_eq!(Some("few"), iter.next());
785 /// assert_eq!(Some("words"), iter.next());
786 ///
787 /// assert_eq!(None, iter.next());
788 /// ```
789 ///
790 /// All kinds of whitespace are considered:
791 ///
792 /// ```
793 /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace();
794 /// assert_eq!(Some("Mary"), iter.next());
795 /// assert_eq!(Some("had"), iter.next());
796 /// assert_eq!(Some("a"), iter.next());
797 /// assert_eq!(Some("little"), iter.next());
798 /// assert_eq!(Some("lamb"), iter.next());
799 ///
800 /// assert_eq!(None, iter.next());
801 /// ```
802 #[stable(feature = "split_whitespace", since = "1.1.0")]
803 #[inline]
804 pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
805 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
806 }
807
808 /// Splits a string slice by ASCII whitespace.
809 ///
810 /// The iterator returned will return string slices that are sub-slices of
811 /// the original string slice, separated by any amount of ASCII whitespace.
812 ///
813 /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
814 ///
815 /// [`split_whitespace`]: str::split_whitespace
816 ///
817 /// # Examples
818 ///
819 /// Basic usage:
820 ///
821 /// ```
822 /// let mut iter = "A few words".split_ascii_whitespace();
823 ///
824 /// assert_eq!(Some("A"), iter.next());
825 /// assert_eq!(Some("few"), iter.next());
826 /// assert_eq!(Some("words"), iter.next());
827 ///
828 /// assert_eq!(None, iter.next());
829 /// ```
830 ///
831 /// All kinds of ASCII whitespace are considered:
832 ///
833 /// ```
834 /// let mut iter = " Mary had\ta little \n\t lamb".split_ascii_whitespace();
835 /// assert_eq!(Some("Mary"), iter.next());
836 /// assert_eq!(Some("had"), iter.next());
837 /// assert_eq!(Some("a"), iter.next());
838 /// assert_eq!(Some("little"), iter.next());
839 /// assert_eq!(Some("lamb"), iter.next());
840 ///
841 /// assert_eq!(None, iter.next());
842 /// ```
843 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
844 #[inline]
845 pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
846 let inner =
847 self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
848 SplitAsciiWhitespace { inner }
849 }
850
851 /// An iterator over the lines of a string, as string slices.
852 ///
853 /// Lines are ended with either a newline (`\n`) or a carriage return with
854 /// a line feed (`\r\n`).
855 ///
856 /// The final line ending is optional. A string that ends with a final line
857 /// ending will return the same lines as an otherwise identical string
858 /// without a final line ending.
859 ///
860 /// # Examples
861 ///
862 /// Basic usage:
863 ///
864 /// ```
865 /// let text = "foo\r\nbar\n\nbaz\n";
866 /// let mut lines = text.lines();
867 ///
868 /// assert_eq!(Some("foo"), lines.next());
869 /// assert_eq!(Some("bar"), lines.next());
870 /// assert_eq!(Some(""), lines.next());
871 /// assert_eq!(Some("baz"), lines.next());
872 ///
873 /// assert_eq!(None, lines.next());
874 /// ```
875 ///
876 /// The final line ending isn't required:
877 ///
878 /// ```
879 /// let text = "foo\nbar\n\r\nbaz";
880 /// let mut lines = text.lines();
881 ///
882 /// assert_eq!(Some("foo"), lines.next());
883 /// assert_eq!(Some("bar"), lines.next());
884 /// assert_eq!(Some(""), lines.next());
885 /// assert_eq!(Some("baz"), lines.next());
886 ///
887 /// assert_eq!(None, lines.next());
888 /// ```
889 #[stable(feature = "rust1", since = "1.0.0")]
890 #[inline]
891 pub fn lines(&self) -> Lines<'_> {
892 Lines(self.split_terminator('\n').map(LinesAnyMap))
893 }
894
895 /// An iterator over the lines of a string.
896 #[stable(feature = "rust1", since = "1.0.0")]
897 #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")]
898 #[inline]
899 #[allow(deprecated)]
900 pub fn lines_any(&self) -> LinesAny<'_> {
901 LinesAny(self.lines())
902 }
903
904 /// Returns an iterator of `u16` over the string encoded as UTF-16.
905 ///
906 /// # Examples
907 ///
908 /// Basic usage:
909 ///
910 /// ```
911 /// let text = "Zażółć gęślą jaźń";
912 ///
913 /// let utf8_len = text.len();
914 /// let utf16_len = text.encode_utf16().count();
915 ///
916 /// assert!(utf16_len <= utf8_len);
917 /// ```
918 #[stable(feature = "encode_utf16", since = "1.8.0")]
919 pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
920 EncodeUtf16 { chars: self.chars(), extra: 0 }
921 }
922
923 /// Returns `true` if the given pattern matches a sub-slice of
924 /// this string slice.
925 ///
926 /// Returns `false` if it does not.
927 ///
928 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
929 /// function or closure that determines if a character matches.
930 ///
931 /// [`char`]: prim@char
932 /// [pattern]: self::pattern
933 ///
934 /// # Examples
935 ///
936 /// Basic usage:
937 ///
938 /// ```
939 /// let bananas = "bananas";
940 ///
941 /// assert!(bananas.contains("nana"));
942 /// assert!(!bananas.contains("apples"));
943 /// ```
944 #[stable(feature = "rust1", since = "1.0.0")]
945 #[inline]
946 pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
947 pat.is_contained_in(self)
948 }
949
950 /// Returns `true` if the given pattern matches a prefix of this
951 /// string slice.
952 ///
953 /// Returns `false` if it does not.
954 ///
955 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
956 /// function or closure that determines if a character matches.
957 ///
958 /// [`char`]: prim@char
959 /// [pattern]: self::pattern
960 ///
961 /// # Examples
962 ///
963 /// Basic usage:
964 ///
965 /// ```
966 /// let bananas = "bananas";
967 ///
968 /// assert!(bananas.starts_with("bana"));
969 /// assert!(!bananas.starts_with("nana"));
970 /// ```
971 #[stable(feature = "rust1", since = "1.0.0")]
972 pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
973 pat.is_prefix_of(self)
974 }
975
976 /// Returns `true` if the given pattern matches a suffix of this
977 /// string slice.
978 ///
979 /// Returns `false` if it does not.
980 ///
981 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
982 /// function or closure that determines if a character matches.
983 ///
984 /// [`char`]: prim@char
985 /// [pattern]: self::pattern
986 ///
987 /// # Examples
988 ///
989 /// Basic usage:
990 ///
991 /// ```
992 /// let bananas = "bananas";
993 ///
994 /// assert!(bananas.ends_with("anas"));
995 /// assert!(!bananas.ends_with("nana"));
996 /// ```
997 #[stable(feature = "rust1", since = "1.0.0")]
998 pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
999 where
1000 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1001 {
1002 pat.is_suffix_of(self)
1003 }
1004
1005 /// Returns the byte index of the first character of this string slice that
1006 /// matches the pattern.
1007 ///
1008 /// Returns [`None`] if the pattern doesn't match.
1009 ///
1010 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1011 /// function or closure that determines if a character matches.
1012 ///
1013 /// [`char`]: prim@char
1014 /// [pattern]: self::pattern
1015 ///
1016 /// # Examples
1017 ///
1018 /// Simple patterns:
1019 ///
1020 /// ```
1021 /// let s = "Löwe 老虎 Léopard Gepardi";
1022 ///
1023 /// assert_eq!(s.find('L'), Some(0));
1024 /// assert_eq!(s.find('é'), Some(14));
1025 /// assert_eq!(s.find("pard"), Some(17));
1026 /// ```
1027 ///
1028 /// More complex patterns using point-free style and closures:
1029 ///
1030 /// ```
1031 /// let s = "Löwe 老虎 Léopard";
1032 ///
1033 /// assert_eq!(s.find(char::is_whitespace), Some(5));
1034 /// assert_eq!(s.find(char::is_lowercase), Some(1));
1035 /// assert_eq!(s.find(|c: char| c.is_whitespace() || c.is_lowercase()), Some(1));
1036 /// assert_eq!(s.find(|c: char| (c < 'o') && (c > 'a')), Some(4));
1037 /// ```
1038 ///
1039 /// Not finding the pattern:
1040 ///
1041 /// ```
1042 /// let s = "Löwe 老虎 Léopard";
1043 /// let x: &[_] = &['1', '2'];
1044 ///
1045 /// assert_eq!(s.find(x), None);
1046 /// ```
1047 #[stable(feature = "rust1", since = "1.0.0")]
1048 #[inline]
1049 pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1050 pat.into_searcher(self).next_match().map(|(i, _)| i)
1051 }
1052
1053 /// Returns the byte index for the first character of the rightmost match of the pattern in
1054 /// this string slice.
1055 ///
1056 /// Returns [`None`] if the pattern doesn't match.
1057 ///
1058 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1059 /// function or closure that determines if a character matches.
1060 ///
1061 /// [`char`]: prim@char
1062 /// [pattern]: self::pattern
1063 ///
1064 /// # Examples
1065 ///
1066 /// Simple patterns:
1067 ///
1068 /// ```
1069 /// let s = "Löwe 老虎 Léopard Gepardi";
1070 ///
1071 /// assert_eq!(s.rfind('L'), Some(13));
1072 /// assert_eq!(s.rfind('é'), Some(14));
1073 /// assert_eq!(s.rfind("pard"), Some(24));
1074 /// ```
1075 ///
1076 /// More complex patterns with closures:
1077 ///
1078 /// ```
1079 /// let s = "Löwe 老虎 Léopard";
1080 ///
1081 /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
1082 /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
1083 /// ```
1084 ///
1085 /// Not finding the pattern:
1086 ///
1087 /// ```
1088 /// let s = "Löwe 老虎 Léopard";
1089 /// let x: &[_] = &['1', '2'];
1090 ///
1091 /// assert_eq!(s.rfind(x), None);
1092 /// ```
1093 #[stable(feature = "rust1", since = "1.0.0")]
1094 #[inline]
1095 pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
1096 where
1097 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1098 {
1099 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
1100 }
1101
1102 /// An iterator over substrings of this string slice, separated by
1103 /// characters matched by a pattern.
1104 ///
1105 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1106 /// function or closure that determines if a character matches.
1107 ///
1108 /// [`char`]: prim@char
1109 /// [pattern]: self::pattern
1110 ///
1111 /// # Iterator behavior
1112 ///
1113 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1114 /// allows a reverse search and forward/reverse search yields the same
1115 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1116 ///
1117 /// If the pattern allows a reverse search but its results might differ
1118 /// from a forward search, the [`rsplit`] method can be used.
1119 ///
1120 /// [`rsplit`]: str::rsplit
1121 ///
1122 /// # Examples
1123 ///
1124 /// Simple patterns:
1125 ///
1126 /// ```
1127 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
1128 /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
1129 ///
1130 /// let v: Vec<&str> = "".split('X').collect();
1131 /// assert_eq!(v, [""]);
1132 ///
1133 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
1134 /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
1135 ///
1136 /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
1137 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
1138 ///
1139 /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
1140 /// assert_eq!(v, ["abc", "def", "ghi"]);
1141 ///
1142 /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
1143 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
1144 /// ```
1145 ///
1146 /// If the pattern is a slice of chars, split on each occurrence of any of the characters:
1147 ///
1148 /// ```
1149 /// let v: Vec<&str> = "2020-11-03 23:59".split(&['-', ' ', ':', '@'][..]).collect();
1150 /// assert_eq!(v, ["2020", "11", "03", "23", "59"]);
1151 /// ```
1152 ///
1153 /// A more complex pattern, using a closure:
1154 ///
1155 /// ```
1156 /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
1157 /// assert_eq!(v, ["abc", "def", "ghi"]);
1158 /// ```
1159 ///
1160 /// If a string contains multiple contiguous separators, you will end up
1161 /// with empty strings in the output:
1162 ///
1163 /// ```
1164 /// let x = "||||a||b|c".to_string();
1165 /// let d: Vec<_> = x.split('|').collect();
1166 ///
1167 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
1168 /// ```
1169 ///
1170 /// Contiguous separators are separated by the empty string.
1171 ///
1172 /// ```
1173 /// let x = "(///)".to_string();
1174 /// let d: Vec<_> = x.split('/').collect();
1175 ///
1176 /// assert_eq!(d, &["(", "", "", ")"]);
1177 /// ```
1178 ///
1179 /// Separators at the start or end of a string are neighbored
1180 /// by empty strings.
1181 ///
1182 /// ```
1183 /// let d: Vec<_> = "010".split("0").collect();
1184 /// assert_eq!(d, &["", "1", ""]);
1185 /// ```
1186 ///
1187 /// When the empty string is used as a separator, it separates
1188 /// every character in the string, along with the beginning
1189 /// and end of the string.
1190 ///
1191 /// ```
1192 /// let f: Vec<_> = "rust".split("").collect();
1193 /// assert_eq!(f, &["", "r", "u", "s", "t", ""]);
1194 /// ```
1195 ///
1196 /// Contiguous separators can lead to possibly surprising behavior
1197 /// when whitespace is used as the separator. This code is correct:
1198 ///
1199 /// ```
1200 /// let x = " a b c".to_string();
1201 /// let d: Vec<_> = x.split(' ').collect();
1202 ///
1203 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
1204 /// ```
1205 ///
1206 /// It does _not_ give you:
1207 ///
1208 /// ```,ignore
1209 /// assert_eq!(d, &["a", "b", "c"]);
1210 /// ```
1211 ///
1212 /// Use [`split_whitespace`] for this behavior.
1213 ///
1214 /// [`split_whitespace`]: str::split_whitespace
1215 #[stable(feature = "rust1", since = "1.0.0")]
1216 #[inline]
1217 pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1218 Split(SplitInternal {
1219 start: 0,
1220 end: self.len(),
1221 matcher: pat.into_searcher(self),
1222 allow_trailing_empty: true,
1223 finished: false,
1224 })
1225 }
1226
1227 /// An iterator over substrings of this string slice, separated by
1228 /// characters matched by a pattern. Differs from the iterator produced by
1229 /// `split` in that `split_inclusive` leaves the matched part as the
1230 /// terminator of the substring.
1231 ///
1232 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1233 /// function or closure that determines if a character matches.
1234 ///
1235 /// [`char`]: prim@char
1236 /// [pattern]: self::pattern
1237 ///
1238 /// # Examples
1239 ///
1240 /// ```
1241 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
1242 /// .split_inclusive('\n').collect();
1243 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
1244 /// ```
1245 ///
1246 /// If the last element of the string is matched,
1247 /// that element will be considered the terminator of the preceding substring.
1248 /// That substring will be the last item returned by the iterator.
1249 ///
1250 /// ```
1251 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
1252 /// .split_inclusive('\n').collect();
1253 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]);
1254 /// ```
1255 #[stable(feature = "split_inclusive", since = "1.51.0")]
1256 #[inline]
1257 pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> {
1258 SplitInclusive(SplitInternal {
1259 start: 0,
1260 end: self.len(),
1261 matcher: pat.into_searcher(self),
1262 allow_trailing_empty: false,
1263 finished: false,
1264 })
1265 }
1266
1267 /// An iterator over substrings of the given string slice, separated by
1268 /// characters matched by a pattern and yielded in reverse order.
1269 ///
1270 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1271 /// function or closure that determines if a character matches.
1272 ///
1273 /// [`char`]: prim@char
1274 /// [pattern]: self::pattern
1275 ///
1276 /// # Iterator behavior
1277 ///
1278 /// The returned iterator requires that the pattern supports a reverse
1279 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
1280 /// search yields the same elements.
1281 ///
1282 /// For iterating from the front, the [`split`] method can be used.
1283 ///
1284 /// [`split`]: str::split
1285 ///
1286 /// # Examples
1287 ///
1288 /// Simple patterns:
1289 ///
1290 /// ```
1291 /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
1292 /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
1293 ///
1294 /// let v: Vec<&str> = "".rsplit('X').collect();
1295 /// assert_eq!(v, [""]);
1296 ///
1297 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
1298 /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
1299 ///
1300 /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
1301 /// assert_eq!(v, ["leopard", "tiger", "lion"]);
1302 /// ```
1303 ///
1304 /// A more complex pattern, using a closure:
1305 ///
1306 /// ```
1307 /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
1308 /// assert_eq!(v, ["ghi", "def", "abc"]);
1309 /// ```
1310 #[stable(feature = "rust1", since = "1.0.0")]
1311 #[inline]
1312 pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P>
1313 where
1314 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1315 {
1316 RSplit(self.split(pat).0)
1317 }
1318
1319 /// An iterator over substrings of the given string slice, separated by
1320 /// characters matched by a pattern.
1321 ///
1322 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1323 /// function or closure that determines if a character matches.
1324 ///
1325 /// [`char`]: prim@char
1326 /// [pattern]: self::pattern
1327 ///
1328 /// Equivalent to [`split`], except that the trailing substring
1329 /// is skipped if empty.
1330 ///
1331 /// [`split`]: str::split
1332 ///
1333 /// This method can be used for string data that is _terminated_,
1334 /// rather than _separated_ by a pattern.
1335 ///
1336 /// # Iterator behavior
1337 ///
1338 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1339 /// allows a reverse search and forward/reverse search yields the same
1340 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1341 ///
1342 /// If the pattern allows a reverse search but its results might differ
1343 /// from a forward search, the [`rsplit_terminator`] method can be used.
1344 ///
1345 /// [`rsplit_terminator`]: str::rsplit_terminator
1346 ///
1347 /// # Examples
1348 ///
1349 /// Basic usage:
1350 ///
1351 /// ```
1352 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
1353 /// assert_eq!(v, ["A", "B"]);
1354 ///
1355 /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
1356 /// assert_eq!(v, ["A", "", "B", ""]);
1357 /// ```
1358 #[stable(feature = "rust1", since = "1.0.0")]
1359 #[inline]
1360 pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1361 SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
1362 }
1363
1364 /// An iterator over substrings of `self`, separated by characters
1365 /// matched by a pattern and yielded in reverse order.
1366 ///
1367 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1368 /// function or closure that determines if a character matches.
1369 ///
1370 /// [`char`]: prim@char
1371 /// [pattern]: self::pattern
1372 ///
1373 /// Equivalent to [`split`], except that the trailing substring is
1374 /// skipped if empty.
1375 ///
1376 /// [`split`]: str::split
1377 ///
1378 /// This method can be used for string data that is _terminated_,
1379 /// rather than _separated_ by a pattern.
1380 ///
1381 /// # Iterator behavior
1382 ///
1383 /// The returned iterator requires that the pattern supports a
1384 /// reverse search, and it will be double ended if a forward/reverse
1385 /// search yields the same elements.
1386 ///
1387 /// For iterating from the front, the [`split_terminator`] method can be
1388 /// used.
1389 ///
1390 /// [`split_terminator`]: str::split_terminator
1391 ///
1392 /// # Examples
1393 ///
1394 /// ```
1395 /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
1396 /// assert_eq!(v, ["B", "A"]);
1397 ///
1398 /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
1399 /// assert_eq!(v, ["", "B", "", "A"]);
1400 /// ```
1401 #[stable(feature = "rust1", since = "1.0.0")]
1402 #[inline]
1403 pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1404 where
1405 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1406 {
1407 RSplitTerminator(self.split_terminator(pat).0)
1408 }
1409
1410 /// An iterator over substrings of the given string slice, separated by a
1411 /// pattern, restricted to returning at most `n` items.
1412 ///
1413 /// If `n` substrings are returned, the last substring (the `n`th substring)
1414 /// will contain the remainder of the string.
1415 ///
1416 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1417 /// function or closure that determines if a character matches.
1418 ///
1419 /// [`char`]: prim@char
1420 /// [pattern]: self::pattern
1421 ///
1422 /// # Iterator behavior
1423 ///
1424 /// The returned iterator will not be double ended, because it is
1425 /// not efficient to support.
1426 ///
1427 /// If the pattern allows a reverse search, the [`rsplitn`] method can be
1428 /// used.
1429 ///
1430 /// [`rsplitn`]: str::rsplitn
1431 ///
1432 /// # Examples
1433 ///
1434 /// Simple patterns:
1435 ///
1436 /// ```
1437 /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
1438 /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
1439 ///
1440 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
1441 /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
1442 ///
1443 /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
1444 /// assert_eq!(v, ["abcXdef"]);
1445 ///
1446 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
1447 /// assert_eq!(v, [""]);
1448 /// ```
1449 ///
1450 /// A more complex pattern, using a closure:
1451 ///
1452 /// ```
1453 /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
1454 /// assert_eq!(v, ["abc", "defXghi"]);
1455 /// ```
1456 #[stable(feature = "rust1", since = "1.0.0")]
1457 #[inline]
1458 pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> {
1459 SplitN(SplitNInternal { iter: self.split(pat).0, count: n })
1460 }
1461
1462 /// An iterator over substrings of this string slice, separated by a
1463 /// pattern, starting from the end of the string, restricted to returning
1464 /// at most `n` items.
1465 ///
1466 /// If `n` substrings are returned, the last substring (the `n`th substring)
1467 /// will contain the remainder of the string.
1468 ///
1469 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1470 /// function or closure that determines if a character matches.
1471 ///
1472 /// [`char`]: prim@char
1473 /// [pattern]: self::pattern
1474 ///
1475 /// # Iterator behavior
1476 ///
1477 /// The returned iterator will not be double ended, because it is not
1478 /// efficient to support.
1479 ///
1480 /// For splitting from the front, the [`splitn`] method can be used.
1481 ///
1482 /// [`splitn`]: str::splitn
1483 ///
1484 /// # Examples
1485 ///
1486 /// Simple patterns:
1487 ///
1488 /// ```
1489 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
1490 /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
1491 ///
1492 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
1493 /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
1494 ///
1495 /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
1496 /// assert_eq!(v, ["leopard", "lion::tiger"]);
1497 /// ```
1498 ///
1499 /// A more complex pattern, using a closure:
1500 ///
1501 /// ```
1502 /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
1503 /// assert_eq!(v, ["ghi", "abc1def"]);
1504 /// ```
1505 #[stable(feature = "rust1", since = "1.0.0")]
1506 #[inline]
1507 pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P>
1508 where
1509 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1510 {
1511 RSplitN(self.splitn(n, pat).0)
1512 }
1513
1514 /// Splits the string on the first occurrence of the specified delimiter and
1515 /// returns prefix before delimiter and suffix after delimiter.
1516 ///
1517 /// # Examples
1518 ///
1519 /// ```
1520 /// assert_eq!("cfg".split_once('='), None);
1521 /// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
1522 /// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
1523 /// ```
1524 #[stable(feature = "str_split_once", since = "1.52.0")]
1525 #[inline]
1526 pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> {
1527 let (start, end) = delimiter.into_searcher(self).next_match()?;
1528 Some((&self[..start], &self[end..]))
1529 }
1530
1531 /// Splits the string on the last occurrence of the specified delimiter and
1532 /// returns prefix before delimiter and suffix after delimiter.
1533 ///
1534 /// # Examples
1535 ///
1536 /// ```
1537 /// assert_eq!("cfg".rsplit_once('='), None);
1538 /// assert_eq!("cfg=foo".rsplit_once('='), Some(("cfg", "foo")));
1539 /// assert_eq!("cfg=foo=bar".rsplit_once('='), Some(("cfg=foo", "bar")));
1540 /// ```
1541 #[stable(feature = "str_split_once", since = "1.52.0")]
1542 #[inline]
1543 pub fn rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)>
1544 where
1545 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1546 {
1547 let (start, end) = delimiter.into_searcher(self).next_match_back()?;
1548 Some((&self[..start], &self[end..]))
1549 }
1550
1551 /// An iterator over the disjoint matches of a pattern within the given string
1552 /// slice.
1553 ///
1554 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1555 /// function or closure that determines if a character matches.
1556 ///
1557 /// [`char`]: prim@char
1558 /// [pattern]: self::pattern
1559 ///
1560 /// # Iterator behavior
1561 ///
1562 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1563 /// allows a reverse search and forward/reverse search yields the same
1564 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1565 ///
1566 /// If the pattern allows a reverse search but its results might differ
1567 /// from a forward search, the [`rmatches`] method can be used.
1568 ///
1569 /// [`rmatches`]: str::matches
1570 ///
1571 /// # Examples
1572 ///
1573 /// Basic usage:
1574 ///
1575 /// ```
1576 /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
1577 /// assert_eq!(v, ["abc", "abc", "abc"]);
1578 ///
1579 /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
1580 /// assert_eq!(v, ["1", "2", "3"]);
1581 /// ```
1582 #[stable(feature = "str_matches", since = "1.2.0")]
1583 #[inline]
1584 pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1585 Matches(MatchesInternal(pat.into_searcher(self)))
1586 }
1587
1588 /// An iterator over the disjoint matches of a pattern within this string slice,
1589 /// yielded in reverse order.
1590 ///
1591 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1592 /// function or closure that determines if a character matches.
1593 ///
1594 /// [`char`]: prim@char
1595 /// [pattern]: self::pattern
1596 ///
1597 /// # Iterator behavior
1598 ///
1599 /// The returned iterator requires that the pattern supports a reverse
1600 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
1601 /// search yields the same elements.
1602 ///
1603 /// For iterating from the front, the [`matches`] method can be used.
1604 ///
1605 /// [`matches`]: str::matches
1606 ///
1607 /// # Examples
1608 ///
1609 /// Basic usage:
1610 ///
1611 /// ```
1612 /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
1613 /// assert_eq!(v, ["abc", "abc", "abc"]);
1614 ///
1615 /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
1616 /// assert_eq!(v, ["3", "2", "1"]);
1617 /// ```
1618 #[stable(feature = "str_matches", since = "1.2.0")]
1619 #[inline]
1620 pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
1621 where
1622 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1623 {
1624 RMatches(self.matches(pat).0)
1625 }
1626
1627 /// An iterator over the disjoint matches of a pattern within this string
1628 /// slice as well as the index that the match starts at.
1629 ///
1630 /// For matches of `pat` within `self` that overlap, only the indices
1631 /// corresponding to the first match are returned.
1632 ///
1633 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1634 /// function or closure that determines if a character matches.
1635 ///
1636 /// [`char`]: prim@char
1637 /// [pattern]: self::pattern
1638 ///
1639 /// # Iterator behavior
1640 ///
1641 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1642 /// allows a reverse search and forward/reverse search yields the same
1643 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1644 ///
1645 /// If the pattern allows a reverse search but its results might differ
1646 /// from a forward search, the [`rmatch_indices`] method can be used.
1647 ///
1648 /// [`rmatch_indices`]: str::match_indices
1649 ///
1650 /// # Examples
1651 ///
1652 /// Basic usage:
1653 ///
1654 /// ```
1655 /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
1656 /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
1657 ///
1658 /// let v: Vec<_> = "1abcabc2".match_indices("abc").collect();
1659 /// assert_eq!(v, [(1, "abc"), (4, "abc")]);
1660 ///
1661 /// let v: Vec<_> = "ababa".match_indices("aba").collect();
1662 /// assert_eq!(v, [(0, "aba")]); // only the first `aba`
1663 /// ```
1664 #[stable(feature = "str_match_indices", since = "1.5.0")]
1665 #[inline]
1666 pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1667 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1668 }
1669
1670 /// An iterator over the disjoint matches of a pattern within `self`,
1671 /// yielded in reverse order along with the index of the match.
1672 ///
1673 /// For matches of `pat` within `self` that overlap, only the indices
1674 /// corresponding to the last match are returned.
1675 ///
1676 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1677 /// function or closure that determines if a character matches.
1678 ///
1679 /// [`char`]: prim@char
1680 /// [pattern]: self::pattern
1681 ///
1682 /// # Iterator behavior
1683 ///
1684 /// The returned iterator requires that the pattern supports a reverse
1685 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
1686 /// search yields the same elements.
1687 ///
1688 /// For iterating from the front, the [`match_indices`] method can be used.
1689 ///
1690 /// [`match_indices`]: str::match_indices
1691 ///
1692 /// # Examples
1693 ///
1694 /// Basic usage:
1695 ///
1696 /// ```
1697 /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
1698 /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
1699 ///
1700 /// let v: Vec<_> = "1abcabc2".rmatch_indices("abc").collect();
1701 /// assert_eq!(v, [(4, "abc"), (1, "abc")]);
1702 ///
1703 /// let v: Vec<_> = "ababa".rmatch_indices("aba").collect();
1704 /// assert_eq!(v, [(2, "aba")]); // only the last `aba`
1705 /// ```
1706 #[stable(feature = "str_match_indices", since = "1.5.0")]
1707 #[inline]
1708 pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
1709 where
1710 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1711 {
1712 RMatchIndices(self.match_indices(pat).0)
1713 }
1714
1715 /// Returns a string slice with leading and trailing whitespace removed.
1716 ///
1717 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1718 /// Core Property `White_Space`.
1719 ///
1720 /// # Examples
1721 ///
1722 /// Basic usage:
1723 ///
1724 /// ```
1725 /// let s = " Hello\tworld\t";
1726 ///
1727 /// assert_eq!("Hello\tworld", s.trim());
1728 /// ```
1729 #[inline]
1730 #[must_use = "this returns the trimmed string as a slice, \
1731 without modifying the original"]
1732 #[stable(feature = "rust1", since = "1.0.0")]
1733 pub fn trim(&self) -> &str {
1734 self.trim_matches(|c: char| c.is_whitespace())
1735 }
1736
1737 /// Returns a string slice with leading whitespace removed.
1738 ///
1739 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1740 /// Core Property `White_Space`.
1741 ///
1742 /// # Text directionality
1743 ///
1744 /// A string is a sequence of bytes. `start` in this context means the first
1745 /// position of that byte string; for a left-to-right language like English or
1746 /// Russian, this will be left side, and for right-to-left languages like
1747 /// Arabic or Hebrew, this will be the right side.
1748 ///
1749 /// # Examples
1750 ///
1751 /// Basic usage:
1752 ///
1753 /// ```
1754 /// let s = " Hello\tworld\t";
1755 /// assert_eq!("Hello\tworld\t", s.trim_start());
1756 /// ```
1757 ///
1758 /// Directionality:
1759 ///
1760 /// ```
1761 /// let s = " English ";
1762 /// assert!(Some('E') == s.trim_start().chars().next());
1763 ///
1764 /// let s = " עברית ";
1765 /// assert!(Some('ע') == s.trim_start().chars().next());
1766 /// ```
1767 #[inline]
1768 #[must_use = "this returns the trimmed string as a new slice, \
1769 without modifying the original"]
1770 #[stable(feature = "trim_direction", since = "1.30.0")]
1771 pub fn trim_start(&self) -> &str {
1772 self.trim_start_matches(|c: char| c.is_whitespace())
1773 }
1774
1775 /// Returns a string slice with trailing whitespace removed.
1776 ///
1777 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1778 /// Core Property `White_Space`.
1779 ///
1780 /// # Text directionality
1781 ///
1782 /// A string is a sequence of bytes. `end` in this context means the last
1783 /// position of that byte string; for a left-to-right language like English or
1784 /// Russian, this will be right side, and for right-to-left languages like
1785 /// Arabic or Hebrew, this will be the left side.
1786 ///
1787 /// # Examples
1788 ///
1789 /// Basic usage:
1790 ///
1791 /// ```
1792 /// let s = " Hello\tworld\t";
1793 /// assert_eq!(" Hello\tworld", s.trim_end());
1794 /// ```
1795 ///
1796 /// Directionality:
1797 ///
1798 /// ```
1799 /// let s = " English ";
1800 /// assert!(Some('h') == s.trim_end().chars().rev().next());
1801 ///
1802 /// let s = " עברית ";
1803 /// assert!(Some('ת') == s.trim_end().chars().rev().next());
1804 /// ```
1805 #[inline]
1806 #[must_use = "this returns the trimmed string as a new slice, \
1807 without modifying the original"]
1808 #[stable(feature = "trim_direction", since = "1.30.0")]
1809 pub fn trim_end(&self) -> &str {
1810 self.trim_end_matches(|c: char| c.is_whitespace())
1811 }
1812
1813 /// Returns a string slice with leading whitespace removed.
1814 ///
1815 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1816 /// Core Property `White_Space`.
1817 ///
1818 /// # Text directionality
1819 ///
1820 /// A string is a sequence of bytes. 'Left' in this context means the first
1821 /// position of that byte string; for a language like Arabic or Hebrew
1822 /// which are 'right to left' rather than 'left to right', this will be
1823 /// the _right_ side, not the left.
1824 ///
1825 /// # Examples
1826 ///
1827 /// Basic usage:
1828 ///
1829 /// ```
1830 /// let s = " Hello\tworld\t";
1831 ///
1832 /// assert_eq!("Hello\tworld\t", s.trim_left());
1833 /// ```
1834 ///
1835 /// Directionality:
1836 ///
1837 /// ```
1838 /// let s = " English";
1839 /// assert!(Some('E') == s.trim_left().chars().next());
1840 ///
1841 /// let s = " עברית";
1842 /// assert!(Some('ע') == s.trim_left().chars().next());
1843 /// ```
1844 #[inline]
1845 #[stable(feature = "rust1", since = "1.0.0")]
1846 #[rustc_deprecated(
1847 since = "1.33.0",
1848 reason = "superseded by `trim_start`",
1849 suggestion = "trim_start"
1850 )]
1851 pub fn trim_left(&self) -> &str {
1852 self.trim_start()
1853 }
1854
1855 /// Returns a string slice with trailing whitespace removed.
1856 ///
1857 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1858 /// Core Property `White_Space`.
1859 ///
1860 /// # Text directionality
1861 ///
1862 /// A string is a sequence of bytes. 'Right' in this context means the last
1863 /// position of that byte string; for a language like Arabic or Hebrew
1864 /// which are 'right to left' rather than 'left to right', this will be
1865 /// the _left_ side, not the right.
1866 ///
1867 /// # Examples
1868 ///
1869 /// Basic usage:
1870 ///
1871 /// ```
1872 /// let s = " Hello\tworld\t";
1873 ///
1874 /// assert_eq!(" Hello\tworld", s.trim_right());
1875 /// ```
1876 ///
1877 /// Directionality:
1878 ///
1879 /// ```
1880 /// let s = "English ";
1881 /// assert!(Some('h') == s.trim_right().chars().rev().next());
1882 ///
1883 /// let s = "עברית ";
1884 /// assert!(Some('ת') == s.trim_right().chars().rev().next());
1885 /// ```
1886 #[inline]
1887 #[stable(feature = "rust1", since = "1.0.0")]
1888 #[rustc_deprecated(
1889 since = "1.33.0",
1890 reason = "superseded by `trim_end`",
1891 suggestion = "trim_end"
1892 )]
1893 pub fn trim_right(&self) -> &str {
1894 self.trim_end()
1895 }
1896
1897 /// Returns a string slice with all prefixes and suffixes that match a
1898 /// pattern repeatedly removed.
1899 ///
1900 /// The [pattern] can be a [`char`], a slice of [`char`]s, or a function
1901 /// or closure that determines if a character matches.
1902 ///
1903 /// [`char`]: prim@char
1904 /// [pattern]: self::pattern
1905 ///
1906 /// # Examples
1907 ///
1908 /// Simple patterns:
1909 ///
1910 /// ```
1911 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1912 /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
1913 ///
1914 /// let x: &[_] = &['1', '2'];
1915 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
1916 /// ```
1917 ///
1918 /// A more complex pattern, using a closure:
1919 ///
1920 /// ```
1921 /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
1922 /// ```
1923 #[must_use = "this returns the trimmed string as a new slice, \
1924 without modifying the original"]
1925 #[stable(feature = "rust1", since = "1.0.0")]
1926 pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str
1927 where
1928 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
1929 {
1930 let mut i = 0;
1931 let mut j = 0;
1932 let mut matcher = pat.into_searcher(self);
1933 if let Some((a, b)) = matcher.next_reject() {
1934 i = a;
1935 j = b; // Remember earliest known match, correct it below if
1936 // last match is different
1937 }
1938 if let Some((_, b)) = matcher.next_reject_back() {
1939 j = b;
1940 }
1941 // SAFETY: `Searcher` is known to return valid indices.
1942 unsafe { self.get_unchecked(i..j) }
1943 }
1944
1945 /// Returns a string slice with all prefixes that match a pattern
1946 /// repeatedly removed.
1947 ///
1948 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1949 /// function or closure that determines if a character matches.
1950 ///
1951 /// [`char`]: prim@char
1952 /// [pattern]: self::pattern
1953 ///
1954 /// # Text directionality
1955 ///
1956 /// A string is a sequence of bytes. `start` in this context means the first
1957 /// position of that byte string; for a left-to-right language like English or
1958 /// Russian, this will be left side, and for right-to-left languages like
1959 /// Arabic or Hebrew, this will be the right side.
1960 ///
1961 /// # Examples
1962 ///
1963 /// Basic usage:
1964 ///
1965 /// ```
1966 /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
1967 /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
1968 ///
1969 /// let x: &[_] = &['1', '2'];
1970 /// assert_eq!("12foo1bar12".trim_start_matches(x), "foo1bar12");
1971 /// ```
1972 #[must_use = "this returns the trimmed string as a new slice, \
1973 without modifying the original"]
1974 #[stable(feature = "trim_direction", since = "1.30.0")]
1975 pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1976 let mut i = self.len();
1977 let mut matcher = pat.into_searcher(self);
1978 if let Some((a, _)) = matcher.next_reject() {
1979 i = a;
1980 }
1981 // SAFETY: `Searcher` is known to return valid indices.
1982 unsafe { self.get_unchecked(i..self.len()) }
1983 }
1984
1985 /// Returns a string slice with the prefix removed.
1986 ///
1987 /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
1988 /// in `Some`. Unlike `trim_start_matches`, this method removes the prefix exactly once.
1989 ///
1990 /// If the string does not start with `prefix`, returns `None`.
1991 ///
1992 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1993 /// function or closure that determines if a character matches.
1994 ///
1995 /// [`char`]: prim@char
1996 /// [pattern]: self::pattern
1997 ///
1998 /// # Examples
1999 ///
2000 /// ```
2001 /// assert_eq!("foo:bar".strip_prefix("foo:"), Some("bar"));
2002 /// assert_eq!("foo:bar".strip_prefix("bar"), None);
2003 /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
2004 /// ```
2005 #[must_use = "this returns the remaining substring as a new slice, \
2006 without modifying the original"]
2007 #[stable(feature = "str_strip", since = "1.45.0")]
2008 pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
2009 prefix.strip_prefix_of(self)
2010 }
2011
2012 /// Returns a string slice with the suffix removed.
2013 ///
2014 /// If the string ends with the pattern `suffix`, returns the substring before the suffix,
2015 /// wrapped in `Some`. Unlike `trim_end_matches`, this method removes the suffix exactly once.
2016 ///
2017 /// If the string does not end with `suffix`, returns `None`.
2018 ///
2019 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2020 /// function or closure that determines if a character matches.
2021 ///
2022 /// [`char`]: prim@char
2023 /// [pattern]: self::pattern
2024 ///
2025 /// # Examples
2026 ///
2027 /// ```
2028 /// assert_eq!("bar:foo".strip_suffix(":foo"), Some("bar"));
2029 /// assert_eq!("bar:foo".strip_suffix("bar"), None);
2030 /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
2031 /// ```
2032 #[must_use = "this returns the remaining substring as a new slice, \
2033 without modifying the original"]
2034 #[stable(feature = "str_strip", since = "1.45.0")]
2035 pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str>
2036 where
2037 P: Pattern<'a>,
2038 <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
2039 {
2040 suffix.strip_suffix_of(self)
2041 }
2042
2043 /// Returns a string slice with all suffixes that match a pattern
2044 /// repeatedly removed.
2045 ///
2046 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2047 /// function or closure that determines if a character matches.
2048 ///
2049 /// [`char`]: prim@char
2050 /// [pattern]: self::pattern
2051 ///
2052 /// # Text directionality
2053 ///
2054 /// A string is a sequence of bytes. `end` in this context means the last
2055 /// position of that byte string; for a left-to-right language like English or
2056 /// Russian, this will be right side, and for right-to-left languages like
2057 /// Arabic or Hebrew, this will be the left side.
2058 ///
2059 /// # Examples
2060 ///
2061 /// Simple patterns:
2062 ///
2063 /// ```
2064 /// assert_eq!("11foo1bar11".trim_end_matches('1'), "11foo1bar");
2065 /// assert_eq!("123foo1bar123".trim_end_matches(char::is_numeric), "123foo1bar");
2066 ///
2067 /// let x: &[_] = &['1', '2'];
2068 /// assert_eq!("12foo1bar12".trim_end_matches(x), "12foo1bar");
2069 /// ```
2070 ///
2071 /// A more complex pattern, using a closure:
2072 ///
2073 /// ```
2074 /// assert_eq!("1fooX".trim_end_matches(|c| c == '1' || c == 'X'), "1foo");
2075 /// ```
2076 #[must_use = "this returns the trimmed string as a new slice, \
2077 without modifying the original"]
2078 #[stable(feature = "trim_direction", since = "1.30.0")]
2079 pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str
2080 where
2081 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
2082 {
2083 let mut j = 0;
2084 let mut matcher = pat.into_searcher(self);
2085 if let Some((_, b)) = matcher.next_reject_back() {
2086 j = b;
2087 }
2088 // SAFETY: `Searcher` is known to return valid indices.
2089 unsafe { self.get_unchecked(0..j) }
2090 }
2091
2092 /// Returns a string slice with all prefixes that match a pattern
2093 /// repeatedly removed.
2094 ///
2095 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2096 /// function or closure that determines if a character matches.
2097 ///
2098 /// [`char`]: prim@char
2099 /// [pattern]: self::pattern
2100 ///
2101 /// # Text directionality
2102 ///
2103 /// A string is a sequence of bytes. 'Left' in this context means the first
2104 /// position of that byte string; for a language like Arabic or Hebrew
2105 /// which are 'right to left' rather than 'left to right', this will be
2106 /// the _right_ side, not the left.
2107 ///
2108 /// # Examples
2109 ///
2110 /// Basic usage:
2111 ///
2112 /// ```
2113 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
2114 /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
2115 ///
2116 /// let x: &[_] = &['1', '2'];
2117 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
2118 /// ```
2119 #[stable(feature = "rust1", since = "1.0.0")]
2120 #[rustc_deprecated(
2121 since = "1.33.0",
2122 reason = "superseded by `trim_start_matches`",
2123 suggestion = "trim_start_matches"
2124 )]
2125 pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
2126 self.trim_start_matches(pat)
2127 }
2128
2129 /// Returns a string slice with all suffixes that match a pattern
2130 /// repeatedly removed.
2131 ///
2132 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2133 /// function or closure that determines if a character matches.
2134 ///
2135 /// [`char`]: prim@char
2136 /// [pattern]: self::pattern
2137 ///
2138 /// # Text directionality
2139 ///
2140 /// A string is a sequence of bytes. 'Right' in this context means the last
2141 /// position of that byte string; for a language like Arabic or Hebrew
2142 /// which are 'right to left' rather than 'left to right', this will be
2143 /// the _left_ side, not the right.
2144 ///
2145 /// # Examples
2146 ///
2147 /// Simple patterns:
2148 ///
2149 /// ```
2150 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
2151 /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
2152 ///
2153 /// let x: &[_] = &['1', '2'];
2154 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
2155 /// ```
2156 ///
2157 /// A more complex pattern, using a closure:
2158 ///
2159 /// ```
2160 /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
2161 /// ```
2162 #[stable(feature = "rust1", since = "1.0.0")]
2163 #[rustc_deprecated(
2164 since = "1.33.0",
2165 reason = "superseded by `trim_end_matches`",
2166 suggestion = "trim_end_matches"
2167 )]
2168 pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
2169 where
2170 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
2171 {
2172 self.trim_end_matches(pat)
2173 }
2174
2175 /// Parses this string slice into another type.
2176 ///
2177 /// Because `parse` is so general, it can cause problems with type
2178 /// inference. As such, `parse` is one of the few times you'll see
2179 /// the syntax affectionately known as the 'turbofish': `::<>`. This
2180 /// helps the inference algorithm understand specifically which type
2181 /// you're trying to parse into.
2182 ///
2183 /// `parse` can parse into any type that implements the [`FromStr`] trait.
2184
2185 ///
2186 /// # Errors
2187 ///
2188 /// Will return [`Err`] if it's not possible to parse this string slice into
2189 /// the desired type.
2190 ///
2191 /// [`Err`]: FromStr::Err
2192 ///
2193 /// # Examples
2194 ///
2195 /// Basic usage
2196 ///
2197 /// ```
2198 /// let four: u32 = "4".parse().unwrap();
2199 ///
2200 /// assert_eq!(4, four);
2201 /// ```
2202 ///
2203 /// Using the 'turbofish' instead of annotating `four`:
2204 ///
2205 /// ```
2206 /// let four = "4".parse::<u32>();
2207 ///
2208 /// assert_eq!(Ok(4), four);
2209 /// ```
2210 ///
2211 /// Failing to parse:
2212 ///
2213 /// ```
2214 /// let nope = "j".parse::<u32>();
2215 ///
2216 /// assert!(nope.is_err());
2217 /// ```
2218 #[inline]
2219 #[stable(feature = "rust1", since = "1.0.0")]
2220 pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
2221 FromStr::from_str(self)
2222 }
2223
2224 /// Checks if all characters in this string are within the ASCII range.
2225 ///
2226 /// # Examples
2227 ///
2228 /// ```
2229 /// let ascii = "hello!\n";
2230 /// let non_ascii = "Grüße, Jürgen ❤";
2231 ///
2232 /// assert!(ascii.is_ascii());
2233 /// assert!(!non_ascii.is_ascii());
2234 /// ```
2235 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2236 #[inline]
2237 pub fn is_ascii(&self) -> bool {
2238 // We can treat each byte as character here: all multibyte characters
2239 // start with a byte that is not in the ascii range, so we will stop
2240 // there already.
2241 self.as_bytes().is_ascii()
2242 }
2243
2244 /// Checks that two strings are an ASCII case-insensitive match.
2245 ///
2246 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
2247 /// but without allocating and copying temporaries.
2248 ///
2249 /// # Examples
2250 ///
2251 /// ```
2252 /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
2253 /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
2254 /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
2255 /// ```
2256 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2257 #[inline]
2258 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
2259 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
2260 }
2261
2262 /// Converts this string to its ASCII upper case equivalent in-place.
2263 ///
2264 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
2265 /// but non-ASCII letters are unchanged.
2266 ///
2267 /// To return a new uppercased value without modifying the existing one, use
2268 /// [`to_ascii_uppercase()`].
2269 ///
2270 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
2271 ///
2272 /// # Examples
2273 ///
2274 /// ```
2275 /// let mut s = String::from("Grüße, Jürgen ❤");
2276 ///
2277 /// s.make_ascii_uppercase();
2278 ///
2279 /// assert_eq!("GRüßE, JüRGEN ❤", s);
2280 /// ```
2281 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2282 #[inline]
2283 pub fn make_ascii_uppercase(&mut self) {
2284 // SAFETY: safe because we transmute two types with the same layout.
2285 let me = unsafe { self.as_bytes_mut() };
2286 me.make_ascii_uppercase()
2287 }
2288
2289 /// Converts this string to its ASCII lower case equivalent in-place.
2290 ///
2291 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
2292 /// but non-ASCII letters are unchanged.
2293 ///
2294 /// To return a new lowercased value without modifying the existing one, use
2295 /// [`to_ascii_lowercase()`].
2296 ///
2297 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
2298 ///
2299 /// # Examples
2300 ///
2301 /// ```
2302 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
2303 ///
2304 /// s.make_ascii_lowercase();
2305 ///
2306 /// assert_eq!("grÜße, jÜrgen ❤", s);
2307 /// ```
2308 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2309 #[inline]
2310 pub fn make_ascii_lowercase(&mut self) {
2311 // SAFETY: safe because we transmute two types with the same layout.
2312 let me = unsafe { self.as_bytes_mut() };
2313 me.make_ascii_lowercase()
2314 }
2315
2316 /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
2317 ///
2318 /// Note: only extended grapheme codepoints that begin the string will be
2319 /// escaped.
2320 ///
2321 /// # Examples
2322 ///
2323 /// As an iterator:
2324 ///
2325 /// ```
2326 /// for c in "❤\n!".escape_debug() {
2327 /// print!("{}", c);
2328 /// }
2329 /// println!();
2330 /// ```
2331 ///
2332 /// Using `println!` directly:
2333 ///
2334 /// ```
2335 /// println!("{}", "❤\n!".escape_debug());
2336 /// ```
2337 ///
2338 ///
2339 /// Both are equivalent to:
2340 ///
2341 /// ```
2342 /// println!("❤\\n!");
2343 /// ```
2344 ///
2345 /// Using `to_string`:
2346 ///
2347 /// ```
2348 /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
2349 /// ```
2350 #[stable(feature = "str_escape", since = "1.34.0")]
2351 pub fn escape_debug(&self) -> EscapeDebug<'_> {
2352 let mut chars = self.chars();
2353 EscapeDebug {
2354 inner: chars
2355 .next()
2356 .map(|first| first.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL))
2357 .into_iter()
2358 .flatten()
2359 .chain(chars.flat_map(CharEscapeDebugContinue)),
2360 }
2361 }
2362
2363 /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
2364 ///
2365 /// # Examples
2366 ///
2367 /// As an iterator:
2368 ///
2369 /// ```
2370 /// for c in "❤\n!".escape_default() {
2371 /// print!("{}", c);
2372 /// }
2373 /// println!();
2374 /// ```
2375 ///
2376 /// Using `println!` directly:
2377 ///
2378 /// ```
2379 /// println!("{}", "❤\n!".escape_default());
2380 /// ```
2381 ///
2382 ///
2383 /// Both are equivalent to:
2384 ///
2385 /// ```
2386 /// println!("\\u{{2764}}\\n!");
2387 /// ```
2388 ///
2389 /// Using `to_string`:
2390 ///
2391 /// ```
2392 /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
2393 /// ```
2394 #[stable(feature = "str_escape", since = "1.34.0")]
2395 pub fn escape_default(&self) -> EscapeDefault<'_> {
2396 EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
2397 }
2398
2399 /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
2400 ///
2401 /// # Examples
2402 ///
2403 /// As an iterator:
2404 ///
2405 /// ```
2406 /// for c in "❤\n!".escape_unicode() {
2407 /// print!("{}", c);
2408 /// }
2409 /// println!();
2410 /// ```
2411 ///
2412 /// Using `println!` directly:
2413 ///
2414 /// ```
2415 /// println!("{}", "❤\n!".escape_unicode());
2416 /// ```
2417 ///
2418 ///
2419 /// Both are equivalent to:
2420 ///
2421 /// ```
2422 /// println!("\\u{{2764}}\\u{{a}}\\u{{21}}");
2423 /// ```
2424 ///
2425 /// Using `to_string`:
2426 ///
2427 /// ```
2428 /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
2429 /// ```
2430 #[stable(feature = "str_escape", since = "1.34.0")]
2431 pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
2432 EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
2433 }
2434 }
2435
2436 #[stable(feature = "rust1", since = "1.0.0")]
2437 impl AsRef<[u8]> for str {
2438 #[inline]
2439 fn as_ref(&self) -> &[u8] {
2440 self.as_bytes()
2441 }
2442 }
2443
2444 #[stable(feature = "rust1", since = "1.0.0")]
2445 impl Default for &str {
2446 /// Creates an empty str
2447 #[inline]
2448 fn default() -> Self {
2449 ""
2450 }
2451 }
2452
2453 #[stable(feature = "default_mut_str", since = "1.28.0")]
2454 impl Default for &mut str {
2455 /// Creates an empty mutable str
2456 #[inline]
2457 fn default() -> Self {
2458 // SAFETY: The empty string is valid UTF-8.
2459 unsafe { from_utf8_unchecked_mut(&mut []) }
2460 }
2461 }
2462
2463 impl_fn_for_zst! {
2464 /// A nameable, cloneable fn type
2465 #[derive(Clone)]
2466 struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
2467 let l = line.len();
2468 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
2469 else { line }
2470 };
2471
2472 #[derive(Clone)]
2473 struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
2474 c.escape_debug_ext(EscapeDebugExtArgs {
2475 escape_grapheme_extended: false,
2476 escape_single_quote: true,
2477 escape_double_quote: true
2478 })
2479 };
2480
2481 #[derive(Clone)]
2482 struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
2483 c.escape_unicode()
2484 };
2485 #[derive(Clone)]
2486 struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
2487 c.escape_default()
2488 };
2489
2490 #[derive(Clone)]
2491 struct IsWhitespace impl Fn = |c: char| -> bool {
2492 c.is_whitespace()
2493 };
2494
2495 #[derive(Clone)]
2496 struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
2497 byte.is_ascii_whitespace()
2498 };
2499
2500 #[derive(Clone)]
2501 struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
2502 !s.is_empty()
2503 };
2504
2505 #[derive(Clone)]
2506 struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
2507 !s.is_empty()
2508 };
2509
2510 #[derive(Clone)]
2511 struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
2512 // SAFETY: not safe
2513 unsafe { from_utf8_unchecked(bytes) }
2514 };
2515 }