]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
1a4d82fc | 10 | |
c1a9b12d | 11 | //! Unicode string slices |
1a4d82fc | 12 | //! |
c1a9b12d SL |
13 | //! *[See also the `str` primitive type](../primitive.str.html).* |
14 | ||
1a4d82fc | 15 | |
85aaf69f | 16 | #![stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 17 | |
62682a34 SL |
18 | // Many of the usings in this module are only used in the test configuration. |
19 | // It's cleaner to just turn off the unused_imports warning than to fix them. | |
20 | #![allow(unused_imports)] | |
21 | ||
1a4d82fc | 22 | use core::clone::Clone; |
c34b1796 | 23 | use core::iter::{Iterator, Extend}; |
1a4d82fc | 24 | use core::option::Option::{self, Some, None}; |
85aaf69f | 25 | use core::result::Result; |
1a4d82fc | 26 | use core::str as core_str; |
9346a6ac AL |
27 | use core::str::pattern::Pattern; |
28 | use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; | |
c1a9b12d | 29 | use core::mem; |
d9579d0f | 30 | use rustc_unicode::str::{UnicodeStr, Utf16Encoder}; |
1a4d82fc | 31 | |
85aaf69f SL |
32 | use vec_deque::VecDeque; |
33 | use borrow::{Borrow, ToOwned}; | |
1a4d82fc | 34 | use string::String; |
d9579d0f | 35 | use rustc_unicode; |
1a4d82fc JJ |
36 | use vec::Vec; |
37 | use slice::SliceConcatExt; | |
c1a9b12d | 38 | use boxed::Box; |
1a4d82fc | 39 | |
9346a6ac AL |
40 | pub use core::str::{FromStr, Utf8Error}; |
41 | pub use core::str::{Lines, LinesAny, CharRange}; | |
42 | pub use core::str::{Split, RSplit}; | |
43 | pub use core::str::{SplitN, RSplitN}; | |
44 | pub use core::str::{SplitTerminator, RSplitTerminator}; | |
45 | pub use core::str::{Matches, RMatches}; | |
46 | pub use core::str::{MatchIndices, RMatchIndices}; | |
c34b1796 AL |
47 | pub use core::str::{from_utf8, Chars, CharIndices, Bytes}; |
48 | pub use core::str::{from_utf8_unchecked, ParseBoolError}; | |
e9174d1e | 49 | pub use rustc_unicode::str::{SplitWhitespace}; |
9346a6ac | 50 | pub use core::str::pattern; |
1a4d82fc | 51 | |
d9579d0f AL |
52 | impl<S: Borrow<str>> SliceConcatExt<str> for [S] { |
53 | type Output = String; | |
54 | ||
1a4d82fc | 55 | fn concat(&self) -> String { |
c34b1796 | 56 | if self.is_empty() { |
1a4d82fc JJ |
57 | return String::new(); |
58 | } | |
59 | ||
60 | // `len` calculation may overflow but push_str will check boundaries | |
bd371182 | 61 | let len = self.iter().map(|s| s.borrow().len()).sum(); |
1a4d82fc JJ |
62 | let mut result = String::with_capacity(len); |
63 | ||
c34b1796 | 64 | for s in self { |
bd371182 | 65 | result.push_str(s.borrow()) |
1a4d82fc JJ |
66 | } |
67 | ||
68 | result | |
69 | } | |
70 | ||
c1a9b12d | 71 | fn join(&self, sep: &str) -> String { |
c34b1796 | 72 | if self.is_empty() { |
1a4d82fc JJ |
73 | return String::new(); |
74 | } | |
75 | ||
76 | // concat is faster | |
77 | if sep.is_empty() { | |
c34b1796 | 78 | return self.concat(); |
1a4d82fc JJ |
79 | } |
80 | ||
81 | // this is wrong without the guarantee that `self` is non-empty | |
82 | // `len` calculation may overflow but push_str but will check boundaries | |
c34b1796 | 83 | let len = sep.len() * (self.len() - 1) |
bd371182 | 84 | + self.iter().map(|s| s.borrow().len()).sum::<usize>(); |
1a4d82fc JJ |
85 | let mut result = String::with_capacity(len); |
86 | let mut first = true; | |
87 | ||
c34b1796 | 88 | for s in self { |
1a4d82fc JJ |
89 | if first { |
90 | first = false; | |
91 | } else { | |
92 | result.push_str(sep); | |
93 | } | |
bd371182 | 94 | result.push_str(s.borrow()); |
1a4d82fc JJ |
95 | } |
96 | result | |
97 | } | |
1a4d82fc | 98 | |
c1a9b12d SL |
99 | fn connect(&self, sep: &str) -> String { |
100 | self.join(sep) | |
101 | } | |
102 | } | |
1a4d82fc | 103 | |
1a4d82fc | 104 | /// External iterator for a string's UTF16 codeunits. |
c34b1796 AL |
105 | /// |
106 | /// For use with the `std::iter` module. | |
1a4d82fc | 107 | #[derive(Clone)] |
e9174d1e | 108 | #[unstable(feature = "str_utf16", issue = "27714")] |
1a4d82fc JJ |
109 | pub struct Utf16Units<'a> { |
110 | encoder: Utf16Encoder<Chars<'a>> | |
111 | } | |
112 | ||
85aaf69f | 113 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
114 | impl<'a> Iterator for Utf16Units<'a> { |
115 | type Item = u16; | |
116 | ||
117 | #[inline] | |
118 | fn next(&mut self) -> Option<u16> { self.encoder.next() } | |
119 | ||
120 | #[inline] | |
85aaf69f | 121 | fn size_hint(&self) -> (usize, Option<usize>) { self.encoder.size_hint() } |
1a4d82fc JJ |
122 | } |
123 | ||
1a4d82fc JJ |
124 | // Return the initial codepoint accumulator for the first byte. |
125 | // The first byte is special, only want bottom 5 bits for width 2, 4 bits | |
126 | // for width 3, and 3 bits for width 4 | |
127 | macro_rules! utf8_first_byte { | |
128 | ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32) | |
129 | } | |
130 | ||
131 | // return the value of $ch updated with continuation byte $byte | |
132 | macro_rules! utf8_acc_cont_byte { | |
c34b1796 | 133 | ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63) as u32) |
1a4d82fc JJ |
134 | } |
135 | ||
85aaf69f SL |
136 | #[stable(feature = "rust1", since = "1.0.0")] |
137 | impl Borrow<str> for String { | |
d9579d0f | 138 | #[inline] |
85aaf69f | 139 | fn borrow(&self) -> &str { &self[..] } |
1a4d82fc JJ |
140 | } |
141 | ||
85aaf69f SL |
142 | #[stable(feature = "rust1", since = "1.0.0")] |
143 | impl ToOwned for str { | |
144 | type Owned = String; | |
1a4d82fc JJ |
145 | fn to_owned(&self) -> String { |
146 | unsafe { | |
147 | String::from_utf8_unchecked(self.as_bytes().to_owned()) | |
148 | } | |
149 | } | |
150 | } | |
151 | ||
1a4d82fc | 152 | /// Any string that can be represented as a slice. |
c34b1796 AL |
153 | #[lang = "str"] |
154 | #[cfg(not(test))] | |
85aaf69f | 155 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 156 | impl str { |
62682a34 | 157 | /// Returns the length of `self` in bytes. |
1a4d82fc JJ |
158 | /// |
159 | /// # Examples | |
160 | /// | |
c34b1796 | 161 | /// ``` |
62682a34 SL |
162 | /// assert_eq!("foo".len(), 3); |
163 | /// assert_eq!("ƒoo".len(), 4); // fancy f! | |
1a4d82fc | 164 | /// ``` |
85aaf69f | 165 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 166 | #[inline] |
62682a34 SL |
167 | pub fn len(&self) -> usize { |
168 | core_str::StrExt::len(self) | |
1a4d82fc JJ |
169 | } |
170 | ||
62682a34 SL |
171 | /// Returns true if this slice has a length of zero bytes. |
172 | /// | |
173 | /// # Examples | |
174 | /// | |
175 | /// ``` | |
176 | /// assert!("".is_empty()); | |
177 | /// ``` | |
1a4d82fc | 178 | #[inline] |
62682a34 SL |
179 | #[stable(feature = "rust1", since = "1.0.0")] |
180 | pub fn is_empty(&self) -> bool { | |
181 | core_str::StrExt::is_empty(self) | |
1a4d82fc JJ |
182 | } |
183 | ||
62682a34 SL |
184 | /// Checks that `index`-th byte lies at the start and/or end of a |
185 | /// UTF-8 code point sequence. | |
186 | /// | |
187 | /// The start and end of the string (when `index == self.len()`) are | |
188 | /// considered to be | |
189 | /// boundaries. | |
190 | /// | |
c1a9b12d | 191 | /// Returns `false` if `index` is greater than `self.len()`. |
1a4d82fc | 192 | /// |
c34b1796 | 193 | /// # Examples |
1a4d82fc | 194 | /// |
c34b1796 | 195 | /// ``` |
c1a9b12d SL |
196 | /// #![feature(str_char)] |
197 | /// | |
62682a34 SL |
198 | /// let s = "Löwe 老虎 Léopard"; |
199 | /// assert!(s.is_char_boundary(0)); | |
200 | /// // start of `老` | |
201 | /// assert!(s.is_char_boundary(6)); | |
202 | /// assert!(s.is_char_boundary(s.len())); | |
c34b1796 | 203 | /// |
62682a34 SL |
204 | /// // second byte of `ö` |
205 | /// assert!(!s.is_char_boundary(2)); | |
206 | /// | |
207 | /// // third byte of `老` | |
208 | /// assert!(!s.is_char_boundary(8)); | |
1a4d82fc | 209 | /// ``` |
62682a34 SL |
210 | #[unstable(feature = "str_char", |
211 | reason = "it is unclear whether this method pulls its weight \ | |
212 | with the existence of the char_indices iterator or \ | |
213 | this method may want to be replaced with checked \ | |
e9174d1e SL |
214 | slicing", |
215 | issue = "27754")] | |
c1a9b12d | 216 | #[inline] |
62682a34 SL |
217 | pub fn is_char_boundary(&self, index: usize) -> bool { |
218 | core_str::StrExt::is_char_boundary(self, index) | |
1a4d82fc JJ |
219 | } |
220 | ||
62682a34 | 221 | /// Converts `self` to a byte slice. |
1a4d82fc | 222 | /// |
c34b1796 | 223 | /// # Examples |
1a4d82fc | 224 | /// |
1a4d82fc | 225 | /// ``` |
62682a34 | 226 | /// assert_eq!("bors".as_bytes(), b"bors"); |
1a4d82fc | 227 | /// ``` |
85aaf69f | 228 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
229 | #[inline(always)] |
230 | pub fn as_bytes(&self) -> &[u8] { | |
231 | core_str::StrExt::as_bytes(self) | |
1a4d82fc JJ |
232 | } |
233 | ||
62682a34 SL |
234 | /// Returns a raw pointer to the `&str`'s buffer. |
235 | /// | |
236 | /// The caller must ensure that the string outlives this pointer, and | |
237 | /// that it is not | |
238 | /// reallocated (e.g. by pushing to the string). | |
1a4d82fc | 239 | /// |
c34b1796 | 240 | /// # Examples |
1a4d82fc | 241 | /// |
c34b1796 | 242 | /// ``` |
62682a34 SL |
243 | /// let s = "Hello"; |
244 | /// let p = s.as_ptr(); | |
1a4d82fc | 245 | /// ``` |
85aaf69f | 246 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
247 | #[inline] |
248 | pub fn as_ptr(&self) -> *const u8 { | |
249 | core_str::StrExt::as_ptr(self) | |
1a4d82fc JJ |
250 | } |
251 | ||
62682a34 SL |
252 | /// Takes a bytewise slice from a string. |
253 | /// | |
254 | /// Returns the substring from [`begin`..`end`). | |
255 | /// | |
256 | /// # Unsafety | |
257 | /// | |
c1a9b12d | 258 | /// Caller must check both UTF-8 sequence boundaries and the boundaries |
e9174d1e | 259 | /// of the entire slice as well. |
c34b1796 AL |
260 | /// |
261 | /// # Examples | |
262 | /// | |
263 | /// ``` | |
62682a34 | 264 | /// let s = "Löwe 老虎 Léopard"; |
c34b1796 | 265 | /// |
62682a34 SL |
266 | /// unsafe { |
267 | /// assert_eq!(s.slice_unchecked(0, 21), "Löwe 老虎 Léopard"); | |
268 | /// } | |
c34b1796 | 269 | /// ``` |
85aaf69f | 270 | #[stable(feature = "rust1", since = "1.0.0")] |
c1a9b12d | 271 | #[inline] |
62682a34 SL |
272 | pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str { |
273 | core_str::StrExt::slice_unchecked(self, begin, end) | |
1a4d82fc JJ |
274 | } |
275 | ||
c1a9b12d SL |
276 | /// Takes a bytewise mutable slice from a string. |
277 | /// | |
278 | /// Same as `slice_unchecked`, but works with `&mut str` instead of `&str`. | |
e9174d1e SL |
279 | #[unstable(feature = "str_slice_mut", reason = "recently added", |
280 | issue = "27793")] | |
c1a9b12d SL |
281 | #[inline] |
282 | pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str { | |
283 | core_str::StrExt::slice_mut_unchecked(self, begin, end) | |
284 | } | |
285 | ||
c1a9b12d | 286 | /// Given a byte position, return the next code point and its index. |
9346a6ac | 287 | /// |
c1a9b12d | 288 | /// This can be used to iterate over the Unicode code points of a string. |
9346a6ac | 289 | /// |
62682a34 | 290 | /// # Panics |
1a4d82fc | 291 | /// |
62682a34 | 292 | /// If `i` is greater than or equal to the length of the string. |
c1a9b12d | 293 | /// If `i` is not the index of the beginning of a valid UTF-8 sequence. |
c34b1796 | 294 | /// |
62682a34 | 295 | /// # Examples |
1a4d82fc | 296 | /// |
c1a9b12d | 297 | /// This example manually iterates through the code points of a string; |
62682a34 SL |
298 | /// this should normally be |
299 | /// done by `.chars()` or `.char_indices()`. | |
c34b1796 AL |
300 | /// |
301 | /// ``` | |
c1a9b12d SL |
302 | /// #![feature(str_char, core)] |
303 | /// | |
62682a34 | 304 | /// use std::str::CharRange; |
9346a6ac | 305 | /// |
c1a9b12d | 306 | /// let s = "中华Việt Nam"; |
62682a34 SL |
307 | /// let mut i = 0; |
308 | /// while i < s.len() { | |
309 | /// let CharRange {ch, next} = s.char_range_at(i); | |
310 | /// println!("{}: {}", i, ch); | |
311 | /// i = next; | |
312 | /// } | |
313 | /// ``` | |
9346a6ac | 314 | /// |
62682a34 | 315 | /// This outputs: |
9346a6ac | 316 | /// |
62682a34 SL |
317 | /// ```text |
318 | /// 0: 中 | |
319 | /// 3: 华 | |
320 | /// 6: V | |
321 | /// 7: i | |
c1a9b12d | 322 | /// 8: e |
e9174d1e SL |
323 | /// 9: |
324 | /// 11: | |
c1a9b12d SL |
325 | /// 13: t |
326 | /// 14: | |
327 | /// 15: N | |
328 | /// 16: a | |
329 | /// 17: m | |
1a4d82fc | 330 | /// ``` |
62682a34 SL |
331 | #[unstable(feature = "str_char", |
332 | reason = "often replaced by char_indices, this method may \ | |
333 | be removed in favor of just char_at() or eventually \ | |
e9174d1e SL |
334 | removed altogether", |
335 | issue = "27754")] | |
c1a9b12d | 336 | #[inline] |
62682a34 SL |
337 | pub fn char_range_at(&self, start: usize) -> CharRange { |
338 | core_str::StrExt::char_range_at(self, start) | |
1a4d82fc JJ |
339 | } |
340 | ||
62682a34 | 341 | /// Given a byte position, return the previous `char` and its position. |
9346a6ac | 342 | /// |
c1a9b12d SL |
343 | /// This function can be used to iterate over a Unicode code points in reverse. |
344 | /// | |
345 | /// Note that Unicode has many features, such as combining marks, ligatures, | |
346 | /// and direction marks, that need to be taken into account to correctly reverse a string. | |
9346a6ac | 347 | /// |
62682a34 | 348 | /// Returns 0 for next index if called on start index 0. |
9346a6ac | 349 | /// |
62682a34 | 350 | /// # Panics |
9346a6ac | 351 | /// |
62682a34 | 352 | /// If `i` is greater than the length of the string. |
c1a9b12d | 353 | /// If `i` is not an index following a valid UTF-8 sequence. |
1a4d82fc | 354 | /// |
c34b1796 AL |
355 | /// # Examples |
356 | /// | |
c1a9b12d | 357 | /// This example manually iterates through the code points of a string; |
62682a34 SL |
358 | /// this should normally be |
359 | /// done by `.chars().rev()` or `.char_indices()`. | |
c34b1796 AL |
360 | /// |
361 | /// ``` | |
c1a9b12d SL |
362 | /// #![feature(str_char, core)] |
363 | /// | |
62682a34 | 364 | /// use std::str::CharRange; |
1a4d82fc | 365 | /// |
c1a9b12d | 366 | /// let s = "中华Việt Nam"; |
62682a34 SL |
367 | /// let mut i = s.len(); |
368 | /// while i > 0 { | |
369 | /// let CharRange {ch, next} = s.char_range_at_reverse(i); | |
370 | /// println!("{}: {}", i, ch); | |
371 | /// i = next; | |
372 | /// } | |
c34b1796 | 373 | /// ``` |
1a4d82fc | 374 | /// |
62682a34 | 375 | /// This outputs: |
1a4d82fc | 376 | /// |
62682a34 | 377 | /// ```text |
c1a9b12d SL |
378 | /// 18: m |
379 | /// 17: a | |
380 | /// 16: N | |
381 | /// 15: | |
382 | /// 14: t | |
e9174d1e SL |
383 | /// 13: |
384 | /// 11: | |
c1a9b12d | 385 | /// 9: e |
62682a34 SL |
386 | /// 8: i |
387 | /// 7: V | |
388 | /// 6: 华 | |
389 | /// 3: 中 | |
1a4d82fc | 390 | /// ``` |
62682a34 SL |
391 | #[unstable(feature = "str_char", |
392 | reason = "often replaced by char_indices, this method may \ | |
393 | be removed in favor of just char_at_reverse() or \ | |
e9174d1e SL |
394 | eventually removed altogether", |
395 | issue = "27754")] | |
c1a9b12d | 396 | #[inline] |
62682a34 SL |
397 | pub fn char_range_at_reverse(&self, start: usize) -> CharRange { |
398 | core_str::StrExt::char_range_at_reverse(self, start) | |
1a4d82fc JJ |
399 | } |
400 | ||
62682a34 | 401 | /// Given a byte position, return the `char` at that position. |
9346a6ac | 402 | /// |
62682a34 | 403 | /// # Panics |
9346a6ac | 404 | /// |
62682a34 | 405 | /// If `i` is greater than or equal to the length of the string. |
c1a9b12d | 406 | /// If `i` is not the index of the beginning of a valid UTF-8 sequence. |
1a4d82fc | 407 | /// |
c34b1796 | 408 | /// # Examples |
1a4d82fc | 409 | /// |
c34b1796 | 410 | /// ``` |
c1a9b12d SL |
411 | /// #![feature(str_char)] |
412 | /// | |
62682a34 SL |
413 | /// let s = "abπc"; |
414 | /// assert_eq!(s.char_at(1), 'b'); | |
415 | /// assert_eq!(s.char_at(2), 'π'); | |
c1a9b12d | 416 | /// assert_eq!(s.char_at(4), 'c'); |
c34b1796 | 417 | /// ``` |
62682a34 SL |
418 | #[unstable(feature = "str_char", |
419 | reason = "frequently replaced by the chars() iterator, this \ | |
420 | method may be removed or possibly renamed in the \ | |
421 | future; it is normally replaced by chars/char_indices \ | |
422 | iterators or by getting the first char from a \ | |
e9174d1e SL |
423 | subslice", |
424 | issue = "27754")] | |
c1a9b12d | 425 | #[inline] |
62682a34 SL |
426 | pub fn char_at(&self, i: usize) -> char { |
427 | core_str::StrExt::char_at(self, i) | |
9346a6ac AL |
428 | } |
429 | ||
62682a34 SL |
430 | /// Given a byte position, return the `char` at that position, counting |
431 | /// from the end. | |
9346a6ac | 432 | /// |
62682a34 | 433 | /// # Panics |
9346a6ac | 434 | /// |
62682a34 | 435 | /// If `i` is greater than the length of the string. |
c1a9b12d | 436 | /// If `i` is not an index following a valid UTF-8 sequence. |
9346a6ac AL |
437 | /// |
438 | /// # Examples | |
439 | /// | |
9346a6ac | 440 | /// ``` |
c1a9b12d SL |
441 | /// #![feature(str_char)] |
442 | /// | |
62682a34 SL |
443 | /// let s = "abπc"; |
444 | /// assert_eq!(s.char_at_reverse(1), 'a'); | |
445 | /// assert_eq!(s.char_at_reverse(2), 'b'); | |
c1a9b12d | 446 | /// assert_eq!(s.char_at_reverse(3), 'π'); |
9346a6ac | 447 | /// ``` |
62682a34 SL |
448 | #[unstable(feature = "str_char", |
449 | reason = "see char_at for more details, but reverse semantics \ | |
450 | are also somewhat unclear, especially with which \ | |
e9174d1e SL |
451 | cases generate panics", |
452 | issue = "27754")] | |
c1a9b12d | 453 | #[inline] |
62682a34 SL |
454 | pub fn char_at_reverse(&self, i: usize) -> char { |
455 | core_str::StrExt::char_at_reverse(self, i) | |
c34b1796 AL |
456 | } |
457 | ||
c1a9b12d SL |
458 | /// Retrieves the first code point from a `&str` and returns it. |
459 | /// | |
460 | /// Note that a single Unicode character (grapheme cluster) | |
461 | /// can be composed of multiple `char`s. | |
9346a6ac | 462 | /// |
62682a34 | 463 | /// This does not allocate a new string; instead, it returns a slice that |
c1a9b12d | 464 | /// points one code point beyond the code point that was shifted. |
9346a6ac | 465 | /// |
c1a9b12d | 466 | /// `None` is returned if the slice is empty. |
9346a6ac | 467 | /// |
c34b1796 AL |
468 | /// # Examples |
469 | /// | |
c34b1796 | 470 | /// ``` |
c1a9b12d SL |
471 | /// #![feature(str_char)] |
472 | /// | |
473 | /// let s = "Łódź"; // \u{141}o\u{301}dz\u{301} | |
62682a34 | 474 | /// let (c, s1) = s.slice_shift_char().unwrap(); |
9346a6ac | 475 | /// |
c1a9b12d SL |
476 | /// assert_eq!(c, 'Ł'); |
477 | /// assert_eq!(s1, "ódź"); | |
c34b1796 | 478 | /// |
62682a34 | 479 | /// let (c, s2) = s1.slice_shift_char().unwrap(); |
1a4d82fc | 480 | /// |
c1a9b12d SL |
481 | /// assert_eq!(c, 'o'); |
482 | /// assert_eq!(s2, "\u{301}dz\u{301}"); | |
c34b1796 | 483 | /// ``` |
62682a34 SL |
484 | #[unstable(feature = "str_char", |
485 | reason = "awaiting conventions about shifting and slices and \ | |
486 | may not be warranted with the existence of the chars \ | |
e9174d1e SL |
487 | and/or char_indices iterators", |
488 | issue = "27754")] | |
c1a9b12d | 489 | #[inline] |
62682a34 SL |
490 | pub fn slice_shift_char(&self) -> Option<(char, &str)> { |
491 | core_str::StrExt::slice_shift_char(self) | |
1a4d82fc JJ |
492 | } |
493 | ||
62682a34 | 494 | /// Divide one string slice into two at an index. |
9346a6ac | 495 | /// |
62682a34 | 496 | /// The index `mid` is a byte offset from the start of the string |
c1a9b12d | 497 | /// that must be on a `char` boundary. |
9346a6ac | 498 | /// |
62682a34 | 499 | /// Return slices `&self[..mid]` and `&self[mid..]`. |
9346a6ac | 500 | /// |
62682a34 | 501 | /// # Panics |
9346a6ac | 502 | /// |
c1a9b12d SL |
503 | /// Panics if `mid` is beyond the last code point of the string, |
504 | /// or if it is not on a `char` boundary. | |
9346a6ac AL |
505 | /// |
506 | /// # Examples | |
9346a6ac | 507 | /// ``` |
c1a9b12d SL |
508 | /// #![feature(str_split_at)] |
509 | /// | |
62682a34 SL |
510 | /// let s = "Löwe 老虎 Léopard"; |
511 | /// let first_space = s.find(' ').unwrap_or(s.len()); | |
512 | /// let (a, b) = s.split_at(first_space); | |
9346a6ac | 513 | /// |
62682a34 SL |
514 | /// assert_eq!(a, "Löwe"); |
515 | /// assert_eq!(b, " 老虎 Léopard"); | |
9346a6ac | 516 | /// ``` |
62682a34 | 517 | #[inline] |
e9174d1e | 518 | #[stable(feature = "str_split_at", since = "1.4.0")] |
62682a34 SL |
519 | pub fn split_at(&self, mid: usize) -> (&str, &str) { |
520 | core_str::StrExt::split_at(self, mid) | |
9346a6ac AL |
521 | } |
522 | ||
c1a9b12d SL |
523 | /// Divide one mutable string slice into two at an index. |
524 | #[inline] | |
e9174d1e | 525 | #[stable(feature = "str_split_at", since = "1.4.0")] |
c1a9b12d SL |
526 | pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) { |
527 | core_str::StrExt::split_at_mut(self, mid) | |
528 | } | |
529 | ||
530 | /// An iterator over the code points of `self`. | |
531 | /// | |
532 | /// In Unicode relationship between code points and characters is complex. | |
533 | /// A single character may be composed of multiple code points | |
534 | /// (e.g. diacritical marks added to a letter), and a single code point | |
535 | /// (e.g. Hangul syllable) may contain multiple characters. | |
536 | /// | |
537 | /// For iteration over human-readable characters a grapheme cluster iterator | |
538 | /// may be more appropriate. See the [unicode-segmentation crate][1]. | |
539 | /// | |
540 | /// [1]: https://crates.io/crates/unicode-segmentation | |
9346a6ac AL |
541 | /// |
542 | /// # Examples | |
543 | /// | |
544 | /// ``` | |
c1a9b12d | 545 | /// let v: Vec<char> = "ASCII żółć 🇨🇭 한".chars().collect(); |
9346a6ac | 546 | /// |
c1a9b12d SL |
547 | /// assert_eq!(v, ['A', 'S', 'C', 'I', 'I', ' ', |
548 | /// 'z', '\u{307}', 'o', '\u{301}', 'ł', 'c', '\u{301}', ' ', | |
549 | /// '\u{1f1e8}', '\u{1f1ed}', ' ', '한']); | |
9346a6ac | 550 | /// ``` |
62682a34 | 551 | #[stable(feature = "rust1", since = "1.0.0")] |
c1a9b12d | 552 | #[inline] |
62682a34 SL |
553 | pub fn chars(&self) -> Chars { |
554 | core_str::StrExt::chars(self) | |
9346a6ac AL |
555 | } |
556 | ||
c1a9b12d | 557 | /// An iterator over the `char`s of `self` and their byte offsets. |
9346a6ac | 558 | /// |
c34b1796 | 559 | /// # Examples |
1a4d82fc | 560 | /// |
c34b1796 | 561 | /// ``` |
c1a9b12d SL |
562 | /// let v: Vec<(usize, char)> = "A🇨🇭".char_indices().collect(); |
563 | /// let b = vec![(0, 'A'), (1, '\u{1f1e8}'), (5, '\u{1f1ed}')]; | |
1a4d82fc | 564 | /// |
62682a34 | 565 | /// assert_eq!(v, b); |
1a4d82fc | 566 | /// ``` |
62682a34 | 567 | #[stable(feature = "rust1", since = "1.0.0")] |
c1a9b12d | 568 | #[inline] |
62682a34 SL |
569 | pub fn char_indices(&self) -> CharIndices { |
570 | core_str::StrExt::char_indices(self) | |
1a4d82fc JJ |
571 | } |
572 | ||
62682a34 | 573 | /// An iterator over the bytes of `self`. |
9346a6ac | 574 | /// |
62682a34 | 575 | /// # Examples |
9346a6ac | 576 | /// |
62682a34 SL |
577 | /// ``` |
578 | /// let v: Vec<u8> = "bors".bytes().collect(); | |
9346a6ac | 579 | /// |
62682a34 SL |
580 | /// assert_eq!(v, b"bors".to_vec()); |
581 | /// ``` | |
582 | #[stable(feature = "rust1", since = "1.0.0")] | |
c1a9b12d | 583 | #[inline] |
62682a34 SL |
584 | pub fn bytes(&self) -> Bytes { |
585 | core_str::StrExt::bytes(self) | |
586 | } | |
587 | ||
588 | /// An iterator over the non-empty substrings of `self` which contain no whitespace, | |
589 | /// and which are separated by any amount of whitespace. | |
9346a6ac AL |
590 | /// |
591 | /// # Examples | |
592 | /// | |
593 | /// ``` | |
c1a9b12d | 594 | /// let some_words = " Mary had\ta\u{2009}little \n\t lamb"; |
62682a34 | 595 | /// let v: Vec<&str> = some_words.split_whitespace().collect(); |
9346a6ac | 596 | /// |
62682a34 SL |
597 | /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); |
598 | /// ``` | |
599 | #[stable(feature = "split_whitespace", since = "1.1.0")] | |
c1a9b12d | 600 | #[inline] |
62682a34 SL |
601 | pub fn split_whitespace(&self) -> SplitWhitespace { |
602 | UnicodeStr::split_whitespace(self) | |
603 | } | |
604 | ||
e9174d1e | 605 | /// An iterator over the lines of a string, separated by `\n` or `\r\n`. |
1a4d82fc | 606 | /// |
e9174d1e | 607 | /// This does not include the empty string after a trailing newline or CRLF. |
1a4d82fc | 608 | /// |
c34b1796 | 609 | /// # Examples |
1a4d82fc | 610 | /// |
1a4d82fc | 611 | /// ``` |
e9174d1e | 612 | /// let four_lines = "foo\nbar\n\r\nbaz"; |
c34b1796 | 613 | /// let v: Vec<&str> = four_lines.lines().collect(); |
1a4d82fc | 614 | /// |
c34b1796 AL |
615 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); |
616 | /// ``` | |
1a4d82fc | 617 | /// |
c34b1796 AL |
618 | /// Leaving off the trailing character: |
619 | /// | |
620 | /// ``` | |
e9174d1e | 621 | /// let four_lines = "foo\r\nbar\n\nbaz\n"; |
1a4d82fc | 622 | /// let v: Vec<&str> = four_lines.lines().collect(); |
c34b1796 AL |
623 | /// |
624 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); | |
1a4d82fc | 625 | /// ``` |
85aaf69f | 626 | #[stable(feature = "rust1", since = "1.0.0")] |
c1a9b12d | 627 | #[inline] |
c34b1796 | 628 | pub fn lines(&self) -> Lines { |
62682a34 | 629 | core_str::StrExt::lines(self) |
1a4d82fc JJ |
630 | } |
631 | ||
9346a6ac AL |
632 | /// An iterator over the lines of a string, separated by either |
633 | /// `\n` or `\r\n`. | |
1a4d82fc | 634 | /// |
c34b1796 | 635 | /// As with `.lines()`, this does not include an empty trailing line. |
1a4d82fc | 636 | /// |
c34b1796 AL |
637 | /// # Examples |
638 | /// | |
639 | /// ``` | |
640 | /// let four_lines = "foo\r\nbar\n\r\nbaz"; | |
641 | /// let v: Vec<&str> = four_lines.lines_any().collect(); | |
642 | /// | |
643 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); | |
644 | /// ``` | |
645 | /// | |
646 | /// Leaving off the trailing character: | |
647 | /// | |
648 | /// ``` | |
1a4d82fc JJ |
649 | /// let four_lines = "foo\r\nbar\n\r\nbaz\n"; |
650 | /// let v: Vec<&str> = four_lines.lines_any().collect(); | |
c34b1796 AL |
651 | /// |
652 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); | |
1a4d82fc | 653 | /// ``` |
85aaf69f | 654 | #[stable(feature = "rust1", since = "1.0.0")] |
e9174d1e | 655 | #[deprecated(since = "1.4.0", reason = "use lines() instead now")] |
c1a9b12d | 656 | #[inline] |
e9174d1e | 657 | #[allow(deprecated)] |
c34b1796 | 658 | pub fn lines_any(&self) -> LinesAny { |
62682a34 | 659 | core_str::StrExt::lines_any(self) |
1a4d82fc | 660 | } |
62682a34 | 661 | |
62682a34 SL |
662 | /// Returns an iterator of `u16` over the string encoded as UTF-16. |
663 | #[unstable(feature = "str_utf16", | |
e9174d1e SL |
664 | reason = "this functionality may only be provided by libunicode", |
665 | issue = "27714")] | |
62682a34 SL |
666 | pub fn utf16_units(&self) -> Utf16Units { |
667 | Utf16Units { encoder: Utf16Encoder::new(self[..].chars()) } | |
668 | } | |
669 | ||
670 | /// Returns `true` if `self` contains another `&str`. | |
c34b1796 AL |
671 | /// |
672 | /// # Examples | |
673 | /// | |
674 | /// ``` | |
62682a34 | 675 | /// assert!("bananas".contains("nana")); |
c34b1796 | 676 | /// |
62682a34 | 677 | /// assert!(!"bananas".contains("foobar")); |
c34b1796 | 678 | /// ``` |
85aaf69f | 679 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
680 | pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
681 | core_str::StrExt::contains(self, pat) | |
1a4d82fc JJ |
682 | } |
683 | ||
c34b1796 | 684 | /// Returns `true` if the given `&str` is a prefix of the string. |
1a4d82fc | 685 | /// |
c34b1796 | 686 | /// # Examples |
1a4d82fc | 687 | /// |
c34b1796 | 688 | /// ``` |
1a4d82fc JJ |
689 | /// assert!("banana".starts_with("ba")); |
690 | /// ``` | |
85aaf69f | 691 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 692 | pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
62682a34 | 693 | core_str::StrExt::starts_with(self, pat) |
1a4d82fc JJ |
694 | } |
695 | ||
c34b1796 | 696 | /// Returns true if the given `&str` is a suffix of the string. |
1a4d82fc | 697 | /// |
c34b1796 | 698 | /// # Examples |
1a4d82fc JJ |
699 | /// |
700 | /// ```rust | |
701 | /// assert!("banana".ends_with("nana")); | |
702 | /// ``` | |
85aaf69f | 703 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 AL |
704 | pub fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool |
705 | where P::Searcher: ReverseSearcher<'a> | |
706 | { | |
62682a34 | 707 | core_str::StrExt::ends_with(self, pat) |
1a4d82fc JJ |
708 | } |
709 | ||
62682a34 SL |
710 | /// Returns the byte index of the first character of `self` that matches |
711 | /// the pattern, if it | |
712 | /// exists. | |
713 | /// | |
714 | /// Returns `None` if it doesn't exist. | |
715 | /// | |
716 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
717 | /// determines the | |
718 | /// split. | |
719 | /// | |
720 | /// # Examples | |
721 | /// | |
722 | /// Simple patterns: | |
723 | /// | |
724 | /// ``` | |
725 | /// let s = "Löwe 老虎 Léopard"; | |
726 | /// | |
727 | /// assert_eq!(s.find('L'), Some(0)); | |
728 | /// assert_eq!(s.find('é'), Some(14)); | |
729 | /// assert_eq!(s.find("Léopard"), Some(13)); | |
730 | /// | |
731 | /// ``` | |
732 | /// | |
733 | /// More complex patterns with closures: | |
734 | /// | |
735 | /// ``` | |
736 | /// let s = "Löwe 老虎 Léopard"; | |
737 | /// | |
738 | /// assert_eq!(s.find(char::is_whitespace), Some(5)); | |
739 | /// assert_eq!(s.find(char::is_lowercase), Some(1)); | |
740 | /// ``` | |
741 | /// | |
742 | /// Not finding the pattern: | |
743 | /// | |
744 | /// ``` | |
745 | /// let s = "Löwe 老虎 Léopard"; | |
746 | /// let x: &[_] = &['1', '2']; | |
747 | /// | |
748 | /// assert_eq!(s.find(x), None); | |
749 | /// ``` | |
750 | #[stable(feature = "rust1", since = "1.0.0")] | |
751 | pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> { | |
752 | core_str::StrExt::find(self, pat) | |
753 | } | |
754 | ||
755 | /// Returns the byte index of the last character of `self` that | |
756 | /// matches the pattern, if it | |
757 | /// exists. | |
758 | /// | |
759 | /// Returns `None` if it doesn't exist. | |
1a4d82fc | 760 | /// |
62682a34 SL |
761 | /// The pattern can be a simple `&str`, `char`, |
762 | /// or a closure that determines the split. | |
1a4d82fc | 763 | /// |
c34b1796 | 764 | /// # Examples |
1a4d82fc | 765 | /// |
9346a6ac | 766 | /// Simple patterns: |
1a4d82fc | 767 | /// |
c34b1796 | 768 | /// ``` |
62682a34 | 769 | /// let s = "Löwe 老虎 Léopard"; |
c34b1796 | 770 | /// |
62682a34 SL |
771 | /// assert_eq!(s.rfind('L'), Some(13)); |
772 | /// assert_eq!(s.rfind('é'), Some(14)); | |
c34b1796 AL |
773 | /// ``` |
774 | /// | |
9346a6ac | 775 | /// More complex patterns with closures: |
c34b1796 AL |
776 | /// |
777 | /// ``` | |
62682a34 SL |
778 | /// let s = "Löwe 老虎 Léopard"; |
779 | /// | |
780 | /// assert_eq!(s.rfind(char::is_whitespace), Some(12)); | |
781 | /// assert_eq!(s.rfind(char::is_lowercase), Some(20)); | |
782 | /// ``` | |
783 | /// | |
784 | /// Not finding the pattern: | |
785 | /// | |
786 | /// ``` | |
787 | /// let s = "Löwe 老虎 Léopard"; | |
788 | /// let x: &[_] = &['1', '2']; | |
789 | /// | |
790 | /// assert_eq!(s.rfind(x), None); | |
1a4d82fc | 791 | /// ``` |
85aaf69f | 792 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
793 | pub fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> |
794 | where P::Searcher: ReverseSearcher<'a> | |
c34b1796 | 795 | { |
62682a34 | 796 | core_str::StrExt::rfind(self, pat) |
1a4d82fc JJ |
797 | } |
798 | ||
62682a34 SL |
799 | /// An iterator over substrings of `self`, separated by characters |
800 | /// matched by a pattern. | |
1a4d82fc | 801 | /// |
9346a6ac | 802 | /// The pattern can be a simple `&str`, `char`, or a closure that |
62682a34 SL |
803 | /// determines the split. Additional libraries might provide more complex |
804 | /// patterns like regular expressions. | |
805 | /// | |
806 | /// # Iterator behavior | |
807 | /// | |
808 | /// The returned iterator will be double ended if the pattern allows a | |
809 | /// reverse search and forward/reverse search yields the same elements. | |
810 | /// This is true for, eg, `char` but not | |
811 | /// for `&str`. | |
812 | /// | |
813 | /// If the pattern allows a reverse search but its results might differ | |
814 | /// from a forward search, `rsplit()` can be used. | |
1a4d82fc | 815 | /// |
c34b1796 | 816 | /// # Examples |
1a4d82fc | 817 | /// |
9346a6ac | 818 | /// Simple patterns: |
1a4d82fc | 819 | /// |
c34b1796 | 820 | /// ``` |
62682a34 SL |
821 | /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); |
822 | /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); | |
c34b1796 | 823 | /// |
62682a34 SL |
824 | /// let v: Vec<&str> = "".split('X').collect(); |
825 | /// assert_eq!(v, [""]); | |
c34b1796 | 826 | /// |
62682a34 SL |
827 | /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); |
828 | /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); | |
c34b1796 | 829 | /// |
62682a34 SL |
830 | /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect(); |
831 | /// assert_eq!(v, ["lion", "tiger", "leopard"]); | |
1a4d82fc | 832 | /// |
62682a34 SL |
833 | /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect(); |
834 | /// assert_eq!(v, ["abc", "def", "ghi"]); | |
1a4d82fc | 835 | /// |
62682a34 SL |
836 | /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect(); |
837 | /// assert_eq!(v, ["lion", "tiger", "leopard"]); | |
838 | /// ``` | |
1a4d82fc | 839 | /// |
62682a34 | 840 | /// A more complex pattern, using a closure: |
1a4d82fc | 841 | /// |
c34b1796 | 842 | /// ``` |
62682a34 SL |
843 | /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect(); |
844 | /// assert_eq!(v, ["abc", "def", "ghi"]); | |
c34b1796 AL |
845 | /// ``` |
846 | /// | |
62682a34 SL |
847 | /// If a string contains multiple contiguous separators, you will end up |
848 | /// with empty strings in the output: | |
c34b1796 AL |
849 | /// |
850 | /// ``` | |
62682a34 SL |
851 | /// let x = "||||a||b|c".to_string(); |
852 | /// let d: Vec<_> = x.split('|').collect(); | |
c34b1796 | 853 | /// |
62682a34 SL |
854 | /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]); |
855 | /// ``` | |
1a4d82fc | 856 | /// |
62682a34 SL |
857 | /// This can lead to possibly surprising behavior when whitespace is used |
858 | /// as the separator. This code is correct: | |
1a4d82fc | 859 | /// |
62682a34 SL |
860 | /// ``` |
861 | /// let x = " a b c".to_string(); | |
862 | /// let d: Vec<_> = x.split(' ').collect(); | |
1a4d82fc | 863 | /// |
62682a34 | 864 | /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]); |
c34b1796 | 865 | /// ``` |
1a4d82fc | 866 | /// |
62682a34 | 867 | /// It does _not_ give you: |
1a4d82fc | 868 | /// |
62682a34 SL |
869 | /// ```rust,ignore |
870 | /// assert_eq!(d, &["a", "b", "c"]); | |
1a4d82fc | 871 | /// ``` |
62682a34 SL |
872 | #[stable(feature = "rust1", since = "1.0.0")] |
873 | pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { | |
874 | core_str::StrExt::split(self, pat) | |
1a4d82fc JJ |
875 | } |
876 | ||
62682a34 SL |
877 | /// An iterator over substrings of `self`, separated by characters |
878 | /// matched by a pattern and yielded in reverse order. | |
1a4d82fc | 879 | /// |
62682a34 SL |
880 | /// The pattern can be a simple `&str`, `char`, or a closure that |
881 | /// determines the split. | |
882 | /// Additional libraries might provide more complex patterns like | |
883 | /// regular expressions. | |
1a4d82fc | 884 | /// |
62682a34 | 885 | /// # Iterator behavior |
1a4d82fc | 886 | /// |
62682a34 SL |
887 | /// The returned iterator requires that the pattern supports a |
888 | /// reverse search, | |
889 | /// and it will be double ended if a forward/reverse search yields | |
890 | /// the same elements. | |
891 | /// | |
892 | /// For iterating from the front, `split()` can be used. | |
1a4d82fc | 893 | /// |
c34b1796 AL |
894 | /// # Examples |
895 | /// | |
62682a34 | 896 | /// Simple patterns: |
c34b1796 | 897 | /// |
62682a34 SL |
898 | /// ```rust |
899 | /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); | |
900 | /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); | |
1a4d82fc | 901 | /// |
62682a34 SL |
902 | /// let v: Vec<&str> = "".rsplit('X').collect(); |
903 | /// assert_eq!(v, [""]); | |
904 | /// | |
905 | /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect(); | |
906 | /// assert_eq!(v, ["leopard", "tiger", "", "lion"]); | |
907 | /// | |
908 | /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); | |
909 | /// assert_eq!(v, ["leopard", "tiger", "lion"]); | |
1a4d82fc JJ |
910 | /// ``` |
911 | /// | |
62682a34 | 912 | /// A more complex pattern, using a closure: |
1a4d82fc | 913 | /// |
1a4d82fc | 914 | /// ``` |
62682a34 SL |
915 | /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect(); |
916 | /// assert_eq!(v, ["ghi", "def", "abc"]); | |
917 | /// ``` | |
918 | #[stable(feature = "rust1", since = "1.0.0")] | |
919 | pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> | |
920 | where P::Searcher: ReverseSearcher<'a> | |
921 | { | |
922 | core_str::StrExt::rsplit(self, pat) | |
1a4d82fc JJ |
923 | } |
924 | ||
62682a34 SL |
925 | /// An iterator over substrings of `self`, separated by characters |
926 | /// matched by a pattern. | |
1a4d82fc | 927 | /// |
62682a34 SL |
928 | /// The pattern can be a simple `&str`, `char`, or a closure that |
929 | /// determines the split. | |
930 | /// Additional libraries might provide more complex patterns | |
931 | /// like regular expressions. | |
1a4d82fc | 932 | /// |
62682a34 SL |
933 | /// Equivalent to `split`, except that the trailing substring |
934 | /// is skipped if empty. | |
1a4d82fc | 935 | /// |
62682a34 SL |
936 | /// This method can be used for string data that is _terminated_, |
937 | /// rather than _separated_ by a pattern. | |
1a4d82fc | 938 | /// |
62682a34 | 939 | /// # Iterator behavior |
1a4d82fc | 940 | /// |
62682a34 SL |
941 | /// The returned iterator will be double ended if the pattern allows a |
942 | /// reverse search | |
943 | /// and forward/reverse search yields the same elements. This is true | |
944 | /// for, eg, `char` but not for `&str`. | |
c34b1796 | 945 | /// |
62682a34 SL |
946 | /// If the pattern allows a reverse search but its results might differ |
947 | /// from a forward search, `rsplit_terminator()` can be used. | |
1a4d82fc | 948 | /// |
62682a34 | 949 | /// # Examples |
1a4d82fc | 950 | /// |
c34b1796 | 951 | /// ``` |
62682a34 SL |
952 | /// let v: Vec<&str> = "A.B.".split_terminator('.').collect(); |
953 | /// assert_eq!(v, ["A", "B"]); | |
1a4d82fc | 954 | /// |
62682a34 SL |
955 | /// let v: Vec<&str> = "A..B..".split_terminator(".").collect(); |
956 | /// assert_eq!(v, ["A", "", "B", ""]); | |
c34b1796 | 957 | /// ``` |
62682a34 SL |
958 | #[stable(feature = "rust1", since = "1.0.0")] |
959 | pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { | |
960 | core_str::StrExt::split_terminator(self, pat) | |
1a4d82fc JJ |
961 | } |
962 | ||
62682a34 SL |
963 | /// An iterator over substrings of `self`, separated by characters |
964 | /// matched by a pattern and yielded in reverse order. | |
c34b1796 | 965 | /// |
62682a34 SL |
966 | /// The pattern can be a simple `&str`, `char`, or a closure that |
967 | /// determines the split. | |
968 | /// Additional libraries might provide more complex patterns like | |
969 | /// regular expressions. | |
970 | /// | |
971 | /// Equivalent to `split`, except that the trailing substring is | |
972 | /// skipped if empty. | |
973 | /// | |
974 | /// This method can be used for string data that is _terminated_, | |
975 | /// rather than _separated_ by a pattern. | |
976 | /// | |
977 | /// # Iterator behavior | |
978 | /// | |
979 | /// The returned iterator requires that the pattern supports a | |
980 | /// reverse search, and it will be double ended if a forward/reverse | |
981 | /// search yields the same elements. | |
c34b1796 | 982 | /// |
62682a34 | 983 | /// For iterating from the front, `split_terminator()` can be used. |
c34b1796 AL |
984 | /// |
985 | /// # Examples | |
986 | /// | |
987 | /// ``` | |
62682a34 SL |
988 | /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect(); |
989 | /// assert_eq!(v, ["B", "A"]); | |
990 | /// | |
991 | /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect(); | |
992 | /// assert_eq!(v, ["", "B", "", "A"]); | |
c34b1796 | 993 | /// ``` |
62682a34 SL |
994 | #[stable(feature = "rust1", since = "1.0.0")] |
995 | pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> | |
996 | where P::Searcher: ReverseSearcher<'a> | |
997 | { | |
998 | core_str::StrExt::rsplit_terminator(self, pat) | |
c34b1796 AL |
999 | } |
1000 | ||
62682a34 SL |
1001 | /// An iterator over substrings of `self`, separated by a pattern, |
1002 | /// restricted to returning | |
1003 | /// at most `count` items. | |
1a4d82fc | 1004 | /// |
62682a34 SL |
1005 | /// The last element returned, if any, will contain the remainder of the |
1006 | /// string. | |
1007 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
1008 | /// determines the split. | |
1009 | /// Additional libraries might provide more complex patterns like | |
1010 | /// regular expressions. | |
1a4d82fc | 1011 | /// |
62682a34 SL |
1012 | /// # Iterator behavior |
1013 | /// | |
1014 | /// The returned iterator will not be double ended, because it is | |
1015 | /// not efficient to support. | |
1016 | /// | |
1017 | /// If the pattern allows a reverse search, `rsplitn()` can be used. | |
c34b1796 AL |
1018 | /// |
1019 | /// # Examples | |
1020 | /// | |
62682a34 SL |
1021 | /// Simple patterns: |
1022 | /// | |
c34b1796 | 1023 | /// ``` |
62682a34 SL |
1024 | /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect(); |
1025 | /// assert_eq!(v, ["Mary", "had", "a little lambda"]); | |
1026 | /// | |
1027 | /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect(); | |
1028 | /// assert_eq!(v, ["lion", "", "tigerXleopard"]); | |
1029 | /// | |
1030 | /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); | |
1031 | /// assert_eq!(v, ["abcXdef"]); | |
1032 | /// | |
1033 | /// let v: Vec<&str> = "".splitn(1, 'X').collect(); | |
1034 | /// assert_eq!(v, [""]); | |
c34b1796 | 1035 | /// ``` |
1a4d82fc | 1036 | /// |
62682a34 | 1037 | /// A more complex pattern, using a closure: |
1a4d82fc | 1038 | /// |
c34b1796 | 1039 | /// ``` |
62682a34 SL |
1040 | /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect(); |
1041 | /// assert_eq!(v, ["abc", "defXghi"]); | |
1a4d82fc | 1042 | /// ``` |
85aaf69f | 1043 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
1044 | pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { |
1045 | core_str::StrExt::splitn(self, count, pat) | |
1a4d82fc JJ |
1046 | } |
1047 | ||
62682a34 SL |
1048 | /// An iterator over substrings of `self`, separated by a pattern, |
1049 | /// starting from the end of the string, restricted to returning | |
1050 | /// at most `count` items. | |
1a4d82fc | 1051 | /// |
62682a34 SL |
1052 | /// The last element returned, if any, will contain the remainder of the |
1053 | /// string. | |
1a4d82fc | 1054 | /// |
9346a6ac | 1055 | /// The pattern can be a simple `&str`, `char`, or a closure that |
62682a34 SL |
1056 | /// determines the split. |
1057 | /// Additional libraries might provide more complex patterns like | |
1058 | /// regular expressions. | |
1059 | /// | |
1060 | /// # Iterator behavior | |
1061 | /// | |
1062 | /// The returned iterator will not be double ended, because it is not | |
1063 | /// efficient to support. | |
1064 | /// | |
1065 | /// `splitn()` can be used for splitting from the front. | |
1a4d82fc | 1066 | /// |
c34b1796 | 1067 | /// # Examples |
1a4d82fc | 1068 | /// |
9346a6ac | 1069 | /// Simple patterns: |
c34b1796 AL |
1070 | /// |
1071 | /// ``` | |
62682a34 SL |
1072 | /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect(); |
1073 | /// assert_eq!(v, ["lamb", "little", "Mary had a"]); | |
1a4d82fc | 1074 | /// |
62682a34 SL |
1075 | /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect(); |
1076 | /// assert_eq!(v, ["leopard", "tiger", "lionX"]); | |
1a4d82fc | 1077 | /// |
62682a34 SL |
1078 | /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect(); |
1079 | /// assert_eq!(v, ["leopard", "lion::tiger"]); | |
c34b1796 AL |
1080 | /// ``` |
1081 | /// | |
62682a34 | 1082 | /// A more complex pattern, using a closure: |
c34b1796 AL |
1083 | /// |
1084 | /// ``` | |
62682a34 SL |
1085 | /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect(); |
1086 | /// assert_eq!(v, ["ghi", "abc1def"]); | |
c34b1796 | 1087 | /// ``` |
62682a34 SL |
1088 | #[stable(feature = "rust1", since = "1.0.0")] |
1089 | pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> | |
1090 | where P::Searcher: ReverseSearcher<'a> | |
1091 | { | |
1092 | core_str::StrExt::rsplitn(self, count, pat) | |
1093 | } | |
1094 | ||
1095 | /// An iterator over the matches of a pattern within `self`. | |
1a4d82fc | 1096 | /// |
62682a34 SL |
1097 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1098 | /// determines the split. | |
1099 | /// Additional libraries might provide more complex patterns like | |
1100 | /// regular expressions. | |
1101 | /// | |
1102 | /// # Iterator behavior | |
1103 | /// | |
1104 | /// The returned iterator will be double ended if the pattern allows | |
1105 | /// a reverse search | |
1106 | /// and forward/reverse search yields the same elements. This is true | |
1107 | /// for, eg, `char` but not | |
1108 | /// for `&str`. | |
1109 | /// | |
1110 | /// If the pattern allows a reverse search but its results might differ | |
1111 | /// from a forward search, `rmatches()` can be used. | |
1112 | /// | |
1113 | /// # Examples | |
c34b1796 AL |
1114 | /// |
1115 | /// ``` | |
62682a34 SL |
1116 | /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect(); |
1117 | /// assert_eq!(v, ["abc", "abc", "abc"]); | |
c34b1796 | 1118 | /// |
62682a34 SL |
1119 | /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect(); |
1120 | /// assert_eq!(v, ["1", "2", "3"]); | |
1a4d82fc | 1121 | /// ``` |
62682a34 SL |
1122 | #[stable(feature = "str_matches", since = "1.2.0")] |
1123 | pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { | |
1124 | core_str::StrExt::matches(self, pat) | |
1a4d82fc JJ |
1125 | } |
1126 | ||
62682a34 SL |
1127 | /// An iterator over the matches of a pattern within `self`, yielded in |
1128 | /// reverse order. | |
1a4d82fc | 1129 | /// |
62682a34 SL |
1130 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1131 | /// determines the split. | |
1132 | /// Additional libraries might provide more complex patterns like | |
1133 | /// regular expressions. | |
1a4d82fc | 1134 | /// |
62682a34 | 1135 | /// # Iterator behavior |
1a4d82fc | 1136 | /// |
62682a34 SL |
1137 | /// The returned iterator requires that the pattern supports a |
1138 | /// reverse search, | |
1139 | /// and it will be double ended if a forward/reverse search yields | |
1140 | /// the same elements. | |
1a4d82fc | 1141 | /// |
62682a34 SL |
1142 | /// For iterating from the front, `matches()` can be used. |
1143 | /// | |
1144 | /// # Examples | |
c34b1796 AL |
1145 | /// |
1146 | /// ``` | |
62682a34 SL |
1147 | /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect(); |
1148 | /// assert_eq!(v, ["abc", "abc", "abc"]); | |
1a4d82fc | 1149 | /// |
62682a34 SL |
1150 | /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect(); |
1151 | /// assert_eq!(v, ["3", "2", "1"]); | |
c34b1796 | 1152 | /// ``` |
62682a34 SL |
1153 | #[stable(feature = "str_matches", since = "1.2.0")] |
1154 | pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> | |
1155 | where P::Searcher: ReverseSearcher<'a> | |
1156 | { | |
1157 | core_str::StrExt::rmatches(self, pat) | |
1158 | } | |
1159 | ||
1160 | /// An iterator over the start and end indices of the disjoint matches | |
1161 | /// of a pattern within `self`. | |
c34b1796 | 1162 | /// |
62682a34 SL |
1163 | /// For matches of `pat` within `self` that overlap, only the indices |
1164 | /// corresponding to the first | |
1165 | /// match are returned. | |
c34b1796 | 1166 | /// |
62682a34 SL |
1167 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1168 | /// determines | |
1169 | /// the split. | |
1170 | /// Additional libraries might provide more complex patterns like | |
1171 | /// regular expressions. | |
1a4d82fc | 1172 | /// |
62682a34 | 1173 | /// # Iterator behavior |
c34b1796 | 1174 | /// |
62682a34 SL |
1175 | /// The returned iterator will be double ended if the pattern allows a |
1176 | /// reverse search | |
1177 | /// and forward/reverse search yields the same elements. This is true for, | |
1178 | /// eg, `char` but not | |
1179 | /// for `&str`. | |
1180 | /// | |
1181 | /// If the pattern allows a reverse search but its results might differ | |
1182 | /// from a forward search, `rmatch_indices()` can be used. | |
1183 | /// | |
1184 | /// # Examples | |
1a4d82fc | 1185 | /// |
c34b1796 | 1186 | /// ``` |
c1a9b12d SL |
1187 | /// #![feature(str_match_indices)] |
1188 | /// | |
62682a34 SL |
1189 | /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect(); |
1190 | /// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]); | |
c34b1796 | 1191 | /// |
62682a34 SL |
1192 | /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect(); |
1193 | /// assert_eq!(v, [(1, 4), (4, 7)]); | |
1194 | /// | |
1195 | /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect(); | |
1196 | /// assert_eq!(v, [(0, 3)]); // only the first `aba` | |
1a4d82fc | 1197 | /// ``` |
62682a34 | 1198 | #[unstable(feature = "str_match_indices", |
e9174d1e SL |
1199 | reason = "might have its iterator type changed", |
1200 | issue = "27743")] | |
62682a34 SL |
1201 | // NB: Right now MatchIndices yields `(usize, usize)`, but it would |
1202 | // be more consistent with `matches` and `char_indices` to return `(usize, &str)` | |
1203 | pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { | |
1204 | core_str::StrExt::match_indices(self, pat) | |
1a4d82fc JJ |
1205 | } |
1206 | ||
62682a34 SL |
1207 | /// An iterator over the start and end indices of the disjoint matches of |
1208 | /// a pattern within | |
1209 | /// `self`, yielded in reverse order. | |
1a4d82fc | 1210 | /// |
62682a34 SL |
1211 | /// For matches of `pat` within `self` that overlap, only the indices |
1212 | /// corresponding to the last | |
1213 | /// match are returned. | |
1a4d82fc | 1214 | /// |
62682a34 SL |
1215 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1216 | /// determines | |
1217 | /// the split. | |
1218 | /// Additional libraries might provide more complex patterns like | |
1219 | /// regular expressions. | |
1220 | /// | |
1221 | /// # Iterator behavior | |
1222 | /// | |
1223 | /// The returned iterator requires that the pattern supports a | |
1224 | /// reverse search, | |
1225 | /// and it will be double ended if a forward/reverse search yields | |
1226 | /// the same elements. | |
1227 | /// | |
1228 | /// For iterating from the front, `match_indices()` can be used. | |
1a4d82fc | 1229 | /// |
c34b1796 | 1230 | /// # Examples |
1a4d82fc | 1231 | /// |
1a4d82fc | 1232 | /// ``` |
c1a9b12d SL |
1233 | /// #![feature(str_match_indices)] |
1234 | /// | |
62682a34 SL |
1235 | /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); |
1236 | /// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]); | |
1a4d82fc | 1237 | /// |
62682a34 SL |
1238 | /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect(); |
1239 | /// assert_eq!(v, [(4, 7), (1, 4)]); | |
c34b1796 | 1240 | /// |
62682a34 SL |
1241 | /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect(); |
1242 | /// assert_eq!(v, [(2, 5)]); // only the last `aba` | |
1a4d82fc | 1243 | /// ``` |
62682a34 | 1244 | #[unstable(feature = "str_match_indices", |
e9174d1e SL |
1245 | reason = "might have its iterator type changed", |
1246 | issue = "27743")] | |
62682a34 SL |
1247 | // NB: Right now RMatchIndices yields `(usize, usize)`, but it would |
1248 | // be more consistent with `rmatches` and `char_indices` to return `(usize, &str)` | |
1249 | pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> | |
1250 | where P::Searcher: ReverseSearcher<'a> | |
1251 | { | |
1252 | core_str::StrExt::rmatch_indices(self, pat) | |
1a4d82fc JJ |
1253 | } |
1254 | ||
62682a34 | 1255 | /// Returns a `&str` with leading and trailing whitespace removed. |
1a4d82fc | 1256 | /// |
c34b1796 AL |
1257 | /// # Examples |
1258 | /// | |
1259 | /// ``` | |
62682a34 SL |
1260 | /// let s = " Hello\tworld\t"; |
1261 | /// assert_eq!(s.trim(), "Hello\tworld"); | |
c34b1796 | 1262 | /// ``` |
85aaf69f | 1263 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
1264 | pub fn trim(&self) -> &str { |
1265 | UnicodeStr::trim(self) | |
1a4d82fc JJ |
1266 | } |
1267 | ||
62682a34 | 1268 | /// Returns a `&str` with leading whitespace removed. |
1a4d82fc | 1269 | /// |
c34b1796 | 1270 | /// # Examples |
1a4d82fc JJ |
1271 | /// |
1272 | /// ``` | |
62682a34 SL |
1273 | /// let s = " Hello\tworld\t"; |
1274 | /// assert_eq!(s.trim_left(), "Hello\tworld\t"); | |
1a4d82fc | 1275 | /// ``` |
85aaf69f | 1276 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
1277 | pub fn trim_left(&self) -> &str { |
1278 | UnicodeStr::trim_left(self) | |
1a4d82fc JJ |
1279 | } |
1280 | ||
62682a34 | 1281 | /// Returns a `&str` with trailing whitespace removed. |
1a4d82fc | 1282 | /// |
c34b1796 | 1283 | /// # Examples |
1a4d82fc JJ |
1284 | /// |
1285 | /// ``` | |
62682a34 SL |
1286 | /// let s = " Hello\tworld\t"; |
1287 | /// assert_eq!(s.trim_right(), " Hello\tworld"); | |
1a4d82fc | 1288 | /// ``` |
85aaf69f | 1289 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
1290 | pub fn trim_right(&self) -> &str { |
1291 | UnicodeStr::trim_right(self) | |
1a4d82fc JJ |
1292 | } |
1293 | ||
62682a34 SL |
1294 | /// Returns a string with all pre- and suffixes that match a pattern |
1295 | /// repeatedly removed. | |
c34b1796 | 1296 | /// |
62682a34 SL |
1297 | /// The pattern can be a simple `char`, or a closure that determines |
1298 | /// the split. | |
c34b1796 | 1299 | /// |
62682a34 | 1300 | /// # Examples |
1a4d82fc | 1301 | /// |
62682a34 | 1302 | /// Simple patterns: |
1a4d82fc JJ |
1303 | /// |
1304 | /// ``` | |
62682a34 SL |
1305 | /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); |
1306 | /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar"); | |
1307 | /// | |
1308 | /// let x: &[_] = &['1', '2']; | |
1309 | /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar"); | |
c34b1796 AL |
1310 | /// ``` |
1311 | /// | |
62682a34 | 1312 | /// A more complex pattern, using a closure: |
c34b1796 AL |
1313 | /// |
1314 | /// ``` | |
62682a34 | 1315 | /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar"); |
1a4d82fc | 1316 | /// ``` |
85aaf69f | 1317 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
1318 | pub fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1319 | where P::Searcher: DoubleEndedSearcher<'a> | |
1320 | { | |
1321 | core_str::StrExt::trim_matches(self, pat) | |
1a4d82fc JJ |
1322 | } |
1323 | ||
62682a34 SL |
1324 | /// Returns a string with all prefixes that match a pattern |
1325 | /// repeatedly removed. | |
1a4d82fc | 1326 | /// |
62682a34 SL |
1327 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1328 | /// determines the split. | |
1a4d82fc | 1329 | /// |
c34b1796 | 1330 | /// # Examples |
1a4d82fc | 1331 | /// |
c34b1796 | 1332 | /// ``` |
62682a34 SL |
1333 | /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); |
1334 | /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123"); | |
c34b1796 | 1335 | /// |
62682a34 SL |
1336 | /// let x: &[_] = &['1', '2']; |
1337 | /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12"); | |
1a4d82fc | 1338 | /// ``` |
62682a34 SL |
1339 | #[stable(feature = "rust1", since = "1.0.0")] |
1340 | pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { | |
1341 | core_str::StrExt::trim_left_matches(self, pat) | |
1a4d82fc JJ |
1342 | } |
1343 | ||
62682a34 SL |
1344 | /// Returns a string with all suffixes that match a pattern |
1345 | /// repeatedly removed. | |
1346 | /// | |
1347 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
1348 | /// determines the split. | |
1a4d82fc | 1349 | /// |
c34b1796 | 1350 | /// # Examples |
1a4d82fc | 1351 | /// |
62682a34 SL |
1352 | /// Simple patterns: |
1353 | /// | |
c34b1796 | 1354 | /// ``` |
62682a34 SL |
1355 | /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); |
1356 | /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar"); | |
c34b1796 | 1357 | /// |
62682a34 SL |
1358 | /// let x: &[_] = &['1', '2']; |
1359 | /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar"); | |
1a4d82fc | 1360 | /// ``` |
1a4d82fc | 1361 | /// |
62682a34 | 1362 | /// A more complex pattern, using a closure: |
c34b1796 AL |
1363 | /// |
1364 | /// ``` | |
62682a34 | 1365 | /// assert_eq!("1fooX".trim_left_matches(|c| c == '1' || c == 'X'), "fooX"); |
1a4d82fc | 1366 | /// ``` |
62682a34 SL |
1367 | #[stable(feature = "rust1", since = "1.0.0")] |
1368 | pub fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str | |
1369 | where P::Searcher: ReverseSearcher<'a> | |
1370 | { | |
1371 | core_str::StrExt::trim_right_matches(self, pat) | |
1a4d82fc JJ |
1372 | } |
1373 | ||
62682a34 | 1374 | /// Parses `self` into the specified type. |
d9579d0f | 1375 | /// |
62682a34 | 1376 | /// # Failure |
d9579d0f | 1377 | /// |
62682a34 | 1378 | /// Will return `Err` if it's not possible to parse `self` into the type. |
1a4d82fc | 1379 | /// |
62682a34 | 1380 | /// # Example |
c34b1796 | 1381 | /// |
62682a34 SL |
1382 | /// ``` |
1383 | /// assert_eq!("4".parse::<u32>(), Ok(4)); | |
1384 | /// ``` | |
c34b1796 | 1385 | /// |
62682a34 | 1386 | /// Failing: |
c34b1796 AL |
1387 | /// |
1388 | /// ``` | |
62682a34 | 1389 | /// assert!("j".parse::<u32>().is_err()); |
c34b1796 | 1390 | /// ``` |
62682a34 | 1391 | #[inline] |
85aaf69f | 1392 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
1393 | pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> { |
1394 | core_str::StrExt::parse(self) | |
1a4d82fc JJ |
1395 | } |
1396 | ||
62682a34 SL |
1397 | /// Replaces all occurrences of one string with another. |
1398 | /// | |
1399 | /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a | |
1400 | /// second `&str` to | |
1401 | /// replace it with. If the original `&str` isn't found, no change occurs. | |
c34b1796 AL |
1402 | /// |
1403 | /// # Examples | |
1404 | /// | |
1405 | /// ``` | |
62682a34 SL |
1406 | /// let s = "this is old"; |
1407 | /// | |
1408 | /// assert_eq!(s.replace("old", "new"), "this is new"); | |
c34b1796 | 1409 | /// ``` |
c34b1796 | 1410 | /// |
62682a34 | 1411 | /// When a `&str` isn't found: |
c34b1796 AL |
1412 | /// |
1413 | /// ``` | |
62682a34 SL |
1414 | /// let s = "this is old"; |
1415 | /// assert_eq!(s.replace("cookie monster", "little lamb"), s); | |
c34b1796 | 1416 | /// ``` |
85aaf69f | 1417 | #[stable(feature = "rust1", since = "1.0.0")] |
62682a34 SL |
1418 | pub fn replace(&self, from: &str, to: &str) -> String { |
1419 | let mut result = String::new(); | |
1420 | let mut last_end = 0; | |
1421 | for (start, end) in self.match_indices(from) { | |
1422 | result.push_str(unsafe { self.slice_unchecked(last_end, start) }); | |
1423 | result.push_str(to); | |
1424 | last_end = end; | |
1425 | } | |
1426 | result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) }); | |
1427 | result | |
1a4d82fc | 1428 | } |
1a4d82fc | 1429 | |
c34b1796 AL |
1430 | /// Returns the lowercase equivalent of this string. |
1431 | /// | |
1432 | /// # Examples | |
1433 | /// | |
62682a34 | 1434 | /// ``` |
c34b1796 AL |
1435 | /// let s = "HELLO"; |
1436 | /// assert_eq!(s.to_lowercase(), "hello"); | |
62682a34 SL |
1437 | /// ``` |
1438 | #[stable(feature = "unicode_case_mapping", since = "1.2.0")] | |
c34b1796 AL |
1439 | pub fn to_lowercase(&self) -> String { |
1440 | let mut s = String::with_capacity(self.len()); | |
62682a34 SL |
1441 | for (i, c) in self[..].char_indices() { |
1442 | if c == 'Σ' { | |
1443 | // Σ maps to σ, except at the end of a word where it maps to ς. | |
1444 | // This is the only conditional (contextual) but language-independent mapping | |
1445 | // in `SpecialCasing.txt`, | |
1446 | // so hard-code it rather than have a generic "condition" mechanim. | |
1447 | // See https://github.com/rust-lang/rust/issues/26035 | |
1448 | map_uppercase_sigma(self, i, &mut s) | |
1449 | } else { | |
1450 | s.extend(c.to_lowercase()); | |
1451 | } | |
1452 | } | |
c34b1796 | 1453 | return s; |
62682a34 SL |
1454 | |
1455 | fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) { | |
1456 | // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 | |
1457 | // for the definition of `Final_Sigma`. | |
1458 | debug_assert!('Σ'.len_utf8() == 2); | |
1459 | let is_word_final = | |
1460 | case_ignoreable_then_cased(from[..i].chars().rev()) && | |
1461 | !case_ignoreable_then_cased(from[i + 2..].chars()); | |
1462 | to.push_str(if is_word_final { "ς" } else { "σ" }); | |
1463 | } | |
1464 | ||
1465 | fn case_ignoreable_then_cased<I: Iterator<Item=char>>(iter: I) -> bool { | |
1466 | use rustc_unicode::derived_property::{Cased, Case_Ignorable}; | |
1467 | match iter.skip_while(|&c| Case_Ignorable(c)).next() { | |
1468 | Some(c) => Cased(c), | |
1469 | None => false, | |
1470 | } | |
1471 | } | |
1a4d82fc JJ |
1472 | } |
1473 | ||
c34b1796 AL |
1474 | /// Returns the uppercase equivalent of this string. |
1475 | /// | |
1476 | /// # Examples | |
1477 | /// | |
62682a34 | 1478 | /// ``` |
c34b1796 AL |
1479 | /// let s = "hello"; |
1480 | /// assert_eq!(s.to_uppercase(), "HELLO"); | |
62682a34 SL |
1481 | /// ``` |
1482 | #[stable(feature = "unicode_case_mapping", since = "1.2.0")] | |
c34b1796 AL |
1483 | pub fn to_uppercase(&self) -> String { |
1484 | let mut s = String::with_capacity(self.len()); | |
62682a34 | 1485 | s.extend(self.chars().flat_map(|c| c.to_uppercase())); |
c34b1796 | 1486 | return s; |
1a4d82fc | 1487 | } |
62682a34 SL |
1488 | |
1489 | /// Escapes each char in `s` with `char::escape_default`. | |
1490 | #[unstable(feature = "str_escape", | |
e9174d1e SL |
1491 | reason = "return type may change to be an iterator", |
1492 | issue = "27791")] | |
62682a34 SL |
1493 | pub fn escape_default(&self) -> String { |
1494 | self.chars().flat_map(|c| c.escape_default()).collect() | |
1495 | } | |
1496 | ||
1497 | /// Escapes each char in `s` with `char::escape_unicode`. | |
1498 | #[unstable(feature = "str_escape", | |
e9174d1e SL |
1499 | reason = "return type may change to be an iterator", |
1500 | issue = "27791")] | |
62682a34 SL |
1501 | pub fn escape_unicode(&self) -> String { |
1502 | self.chars().flat_map(|c| c.escape_unicode()).collect() | |
1503 | } | |
c1a9b12d SL |
1504 | |
1505 | /// Converts the `Box<str>` into a `String` without copying or allocating. | |
e9174d1e | 1506 | #[stable(feature = "box_str", since = "1.4.0")] |
c1a9b12d SL |
1507 | pub fn into_string(self: Box<str>) -> String { |
1508 | unsafe { | |
1509 | let slice = mem::transmute::<Box<str>, Box<[u8]>>(self); | |
1510 | String::from_utf8_unchecked(slice.into_vec()) | |
1511 | } | |
1512 | } | |
1a4d82fc | 1513 | } |