]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
1a4d82fc | 10 | |
c34b1796 | 11 | //! Unicode string manipulation (the `str` type). |
1a4d82fc | 12 | //! |
c34b1796 AL |
13 | //! Rust's `str` type is one of the core primitive types of the language. `&str` |
14 | //! is the borrowed string type. This type of string can only be created from | |
15 | //! other strings, unless it is a `&'static str` (see below). It is not possible | |
16 | //! to move out of borrowed strings because they are owned elsewhere. | |
1a4d82fc | 17 | //! |
c34b1796 | 18 | //! # Examples |
1a4d82fc | 19 | //! |
c34b1796 | 20 | //! Here's some code that uses a `&str`: |
1a4d82fc | 21 | //! |
1a4d82fc | 22 | //! ``` |
c34b1796 AL |
23 | //! let s = "Hello, world."; |
24 | //! ``` | |
25 | //! | |
26 | //! This `&str` is a `&'static str`, which is the type of string literals. | |
27 | //! They're `'static` because literals are available for the entire lifetime of | |
28 | //! the program. | |
29 | //! | |
30 | //! You can get a non-`'static` `&str` by taking a slice of a `String`: | |
1a4d82fc | 31 | //! |
c34b1796 AL |
32 | //! ``` |
33 | //! # let some_string = "Hello, world.".to_string(); | |
34 | //! let s = &some_string; | |
35 | //! ``` | |
1a4d82fc JJ |
36 | //! |
37 | //! # Representation | |
38 | //! | |
c34b1796 AL |
39 | //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as |
40 | //! a stream of UTF-8 bytes. All [strings](../../reference.html#literals) are | |
1a4d82fc JJ |
41 | //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are |
42 | //! not null-terminated and can thus contain null bytes. | |
43 | //! | |
c34b1796 | 44 | //! The actual representation of `str`s have direct mappings to slices: `&str` |
1a4d82fc JJ |
45 | //! is the same as `&[u8]`. |
46 | ||
47 | #![doc(primitive = "str")] | |
85aaf69f | 48 | #![stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
49 | |
50 | use self::RecompositionState::*; | |
51 | use self::DecompositionType::*; | |
52 | ||
1a4d82fc | 53 | use core::clone::Clone; |
c34b1796 | 54 | use core::iter::{Iterator, Extend}; |
1a4d82fc | 55 | use core::option::Option::{self, Some, None}; |
85aaf69f | 56 | use core::result::Result; |
1a4d82fc | 57 | use core::str as core_str; |
9346a6ac AL |
58 | use core::str::pattern::Pattern; |
59 | use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; | |
d9579d0f | 60 | use rustc_unicode::str::{UnicodeStr, Utf16Encoder}; |
1a4d82fc | 61 | |
85aaf69f SL |
62 | use vec_deque::VecDeque; |
63 | use borrow::{Borrow, ToOwned}; | |
1a4d82fc | 64 | use string::String; |
d9579d0f | 65 | use rustc_unicode; |
1a4d82fc JJ |
66 | use vec::Vec; |
67 | use slice::SliceConcatExt; | |
68 | ||
9346a6ac AL |
69 | pub use core::str::{FromStr, Utf8Error}; |
70 | pub use core::str::{Lines, LinesAny, CharRange}; | |
71 | pub use core::str::{Split, RSplit}; | |
72 | pub use core::str::{SplitN, RSplitN}; | |
73 | pub use core::str::{SplitTerminator, RSplitTerminator}; | |
74 | pub use core::str::{Matches, RMatches}; | |
75 | pub use core::str::{MatchIndices, RMatchIndices}; | |
c34b1796 AL |
76 | pub use core::str::{from_utf8, Chars, CharIndices, Bytes}; |
77 | pub use core::str::{from_utf8_unchecked, ParseBoolError}; | |
d9579d0f | 78 | pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices}; |
9346a6ac | 79 | pub use core::str::pattern; |
1a4d82fc JJ |
80 | |
81 | /* | |
82 | Section: Creating a string | |
83 | */ | |
84 | ||
d9579d0f AL |
85 | impl<S: Borrow<str>> SliceConcatExt<str> for [S] { |
86 | type Output = String; | |
87 | ||
1a4d82fc | 88 | fn concat(&self) -> String { |
c34b1796 | 89 | if self.is_empty() { |
1a4d82fc JJ |
90 | return String::new(); |
91 | } | |
92 | ||
93 | // `len` calculation may overflow but push_str will check boundaries | |
bd371182 | 94 | let len = self.iter().map(|s| s.borrow().len()).sum(); |
1a4d82fc JJ |
95 | let mut result = String::with_capacity(len); |
96 | ||
c34b1796 | 97 | for s in self { |
bd371182 | 98 | result.push_str(s.borrow()) |
1a4d82fc JJ |
99 | } |
100 | ||
101 | result | |
102 | } | |
103 | ||
104 | fn connect(&self, sep: &str) -> String { | |
c34b1796 | 105 | if self.is_empty() { |
1a4d82fc JJ |
106 | return String::new(); |
107 | } | |
108 | ||
109 | // concat is faster | |
110 | if sep.is_empty() { | |
c34b1796 | 111 | return self.concat(); |
1a4d82fc JJ |
112 | } |
113 | ||
114 | // this is wrong without the guarantee that `self` is non-empty | |
115 | // `len` calculation may overflow but push_str but will check boundaries | |
c34b1796 | 116 | let len = sep.len() * (self.len() - 1) |
bd371182 | 117 | + self.iter().map(|s| s.borrow().len()).sum::<usize>(); |
1a4d82fc JJ |
118 | let mut result = String::with_capacity(len); |
119 | let mut first = true; | |
120 | ||
c34b1796 | 121 | for s in self { |
1a4d82fc JJ |
122 | if first { |
123 | first = false; | |
124 | } else { | |
125 | result.push_str(sep); | |
126 | } | |
bd371182 | 127 | result.push_str(s.borrow()); |
1a4d82fc JJ |
128 | } |
129 | result | |
130 | } | |
131 | } | |
132 | ||
133 | /* | |
134 | Section: Iterators | |
135 | */ | |
136 | ||
137 | // Helper functions used for Unicode normalization | |
138 | fn canonical_sort(comb: &mut [(char, u8)]) { | |
139 | let len = comb.len(); | |
85aaf69f | 140 | for i in 0..len { |
1a4d82fc | 141 | let mut swapped = false; |
85aaf69f | 142 | for j in 1..len-i { |
1a4d82fc JJ |
143 | let class_a = comb[j-1].1; |
144 | let class_b = comb[j].1; | |
145 | if class_a != 0 && class_b != 0 && class_a > class_b { | |
146 | comb.swap(j-1, j); | |
147 | swapped = true; | |
148 | } | |
149 | } | |
150 | if !swapped { break; } | |
151 | } | |
152 | } | |
153 | ||
154 | #[derive(Clone)] | |
155 | enum DecompositionType { | |
156 | Canonical, | |
157 | Compatible | |
158 | } | |
159 | ||
c34b1796 AL |
160 | /// External iterator for a string decomposition's characters. |
161 | /// | |
162 | /// For use with the `std::iter` module. | |
d9579d0f AL |
163 | #[allow(deprecated)] |
164 | #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", | |
165 | since = "1.0.0")] | |
1a4d82fc | 166 | #[derive(Clone)] |
c34b1796 AL |
167 | #[unstable(feature = "unicode", |
168 | reason = "this functionality may be replaced with a more generic \ | |
169 | unicode crate on crates.io")] | |
1a4d82fc JJ |
170 | pub struct Decompositions<'a> { |
171 | kind: DecompositionType, | |
172 | iter: Chars<'a>, | |
173 | buffer: Vec<(char, u8)>, | |
174 | sorted: bool | |
175 | } | |
176 | ||
d9579d0f | 177 | #[allow(deprecated)] |
85aaf69f | 178 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
179 | impl<'a> Iterator for Decompositions<'a> { |
180 | type Item = char; | |
181 | ||
182 | #[inline] | |
183 | fn next(&mut self) -> Option<char> { | |
184 | match self.buffer.first() { | |
185 | Some(&(c, 0)) => { | |
186 | self.sorted = false; | |
187 | self.buffer.remove(0); | |
188 | return Some(c); | |
189 | } | |
190 | Some(&(c, _)) if self.sorted => { | |
191 | self.buffer.remove(0); | |
192 | return Some(c); | |
193 | } | |
194 | _ => self.sorted = false | |
195 | } | |
196 | ||
197 | if !self.sorted { | |
85aaf69f | 198 | for ch in self.iter.by_ref() { |
1a4d82fc JJ |
199 | let buffer = &mut self.buffer; |
200 | let sorted = &mut self.sorted; | |
201 | { | |
85aaf69f | 202 | let callback = |d| { |
1a4d82fc | 203 | let class = |
d9579d0f | 204 | rustc_unicode::char::canonical_combining_class(d); |
1a4d82fc | 205 | if class == 0 && !*sorted { |
85aaf69f | 206 | canonical_sort(buffer); |
1a4d82fc JJ |
207 | *sorted = true; |
208 | } | |
209 | buffer.push((d, class)); | |
210 | }; | |
211 | match self.kind { | |
212 | Canonical => { | |
d9579d0f | 213 | rustc_unicode::char::decompose_canonical(ch, callback) |
1a4d82fc JJ |
214 | } |
215 | Compatible => { | |
d9579d0f | 216 | rustc_unicode::char::decompose_compatible(ch, callback) |
1a4d82fc JJ |
217 | } |
218 | } | |
219 | } | |
220 | if *sorted { | |
221 | break | |
222 | } | |
223 | } | |
224 | } | |
225 | ||
226 | if !self.sorted { | |
85aaf69f | 227 | canonical_sort(&mut self.buffer); |
1a4d82fc JJ |
228 | self.sorted = true; |
229 | } | |
230 | ||
231 | if self.buffer.is_empty() { | |
232 | None | |
233 | } else { | |
234 | match self.buffer.remove(0) { | |
235 | (c, 0) => { | |
236 | self.sorted = false; | |
237 | Some(c) | |
238 | } | |
239 | (c, _) => Some(c), | |
240 | } | |
241 | } | |
242 | } | |
243 | ||
85aaf69f | 244 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
245 | let (lower, _) = self.iter.size_hint(); |
246 | (lower, None) | |
247 | } | |
248 | } | |
249 | ||
250 | #[derive(Clone)] | |
251 | enum RecompositionState { | |
252 | Composing, | |
253 | Purging, | |
254 | Finished | |
255 | } | |
256 | ||
c34b1796 AL |
257 | /// External iterator for a string recomposition's characters. |
258 | /// | |
259 | /// For use with the `std::iter` module. | |
d9579d0f AL |
260 | #[allow(deprecated)] |
261 | #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", | |
262 | since = "1.0.0")] | |
1a4d82fc | 263 | #[derive(Clone)] |
c34b1796 AL |
264 | #[unstable(feature = "unicode", |
265 | reason = "this functionality may be replaced with a more generic \ | |
266 | unicode crate on crates.io")] | |
1a4d82fc JJ |
267 | pub struct Recompositions<'a> { |
268 | iter: Decompositions<'a>, | |
269 | state: RecompositionState, | |
85aaf69f | 270 | buffer: VecDeque<char>, |
1a4d82fc JJ |
271 | composee: Option<char>, |
272 | last_ccc: Option<u8> | |
273 | } | |
274 | ||
d9579d0f | 275 | #[allow(deprecated)] |
85aaf69f | 276 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
277 | impl<'a> Iterator for Recompositions<'a> { |
278 | type Item = char; | |
279 | ||
280 | #[inline] | |
281 | fn next(&mut self) -> Option<char> { | |
282 | loop { | |
283 | match self.state { | |
284 | Composing => { | |
85aaf69f | 285 | for ch in self.iter.by_ref() { |
d9579d0f | 286 | let ch_class = rustc_unicode::char::canonical_combining_class(ch); |
1a4d82fc JJ |
287 | if self.composee.is_none() { |
288 | if ch_class != 0 { | |
289 | return Some(ch); | |
290 | } | |
291 | self.composee = Some(ch); | |
292 | continue; | |
293 | } | |
294 | let k = self.composee.clone().unwrap(); | |
295 | ||
296 | match self.last_ccc { | |
297 | None => { | |
d9579d0f | 298 | match rustc_unicode::char::compose(k, ch) { |
1a4d82fc JJ |
299 | Some(r) => { |
300 | self.composee = Some(r); | |
301 | continue; | |
302 | } | |
303 | None => { | |
304 | if ch_class == 0 { | |
305 | self.composee = Some(ch); | |
306 | return Some(k); | |
307 | } | |
308 | self.buffer.push_back(ch); | |
309 | self.last_ccc = Some(ch_class); | |
310 | } | |
311 | } | |
312 | } | |
313 | Some(l_class) => { | |
314 | if l_class >= ch_class { | |
315 | // `ch` is blocked from `composee` | |
316 | if ch_class == 0 { | |
317 | self.composee = Some(ch); | |
318 | self.last_ccc = None; | |
319 | self.state = Purging; | |
320 | return Some(k); | |
321 | } | |
322 | self.buffer.push_back(ch); | |
323 | self.last_ccc = Some(ch_class); | |
324 | continue; | |
325 | } | |
d9579d0f | 326 | match rustc_unicode::char::compose(k, ch) { |
1a4d82fc JJ |
327 | Some(r) => { |
328 | self.composee = Some(r); | |
329 | continue; | |
330 | } | |
331 | None => { | |
332 | self.buffer.push_back(ch); | |
333 | self.last_ccc = Some(ch_class); | |
334 | } | |
335 | } | |
336 | } | |
337 | } | |
338 | } | |
339 | self.state = Finished; | |
340 | if self.composee.is_some() { | |
341 | return self.composee.take(); | |
342 | } | |
343 | } | |
344 | Purging => { | |
345 | match self.buffer.pop_front() { | |
346 | None => self.state = Composing, | |
347 | s => return s | |
348 | } | |
349 | } | |
350 | Finished => { | |
351 | match self.buffer.pop_front() { | |
352 | None => return self.composee.take(), | |
353 | s => return s | |
354 | } | |
355 | } | |
356 | } | |
357 | } | |
358 | } | |
359 | } | |
360 | ||
361 | /// External iterator for a string's UTF16 codeunits. | |
c34b1796 AL |
362 | /// |
363 | /// For use with the `std::iter` module. | |
1a4d82fc | 364 | #[derive(Clone)] |
85aaf69f | 365 | #[unstable(feature = "collections")] |
1a4d82fc JJ |
366 | pub struct Utf16Units<'a> { |
367 | encoder: Utf16Encoder<Chars<'a>> | |
368 | } | |
369 | ||
85aaf69f | 370 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
371 | impl<'a> Iterator for Utf16Units<'a> { |
372 | type Item = u16; | |
373 | ||
374 | #[inline] | |
375 | fn next(&mut self) -> Option<u16> { self.encoder.next() } | |
376 | ||
377 | #[inline] | |
85aaf69f | 378 | fn size_hint(&self) -> (usize, Option<usize>) { self.encoder.size_hint() } |
1a4d82fc JJ |
379 | } |
380 | ||
381 | /* | |
382 | Section: Misc | |
383 | */ | |
384 | ||
385 | // Return the initial codepoint accumulator for the first byte. | |
386 | // The first byte is special, only want bottom 5 bits for width 2, 4 bits | |
387 | // for width 3, and 3 bits for width 4 | |
388 | macro_rules! utf8_first_byte { | |
389 | ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32) | |
390 | } | |
391 | ||
392 | // return the value of $ch updated with continuation byte $byte | |
393 | macro_rules! utf8_acc_cont_byte { | |
c34b1796 | 394 | ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63) as u32) |
1a4d82fc JJ |
395 | } |
396 | ||
85aaf69f SL |
397 | #[stable(feature = "rust1", since = "1.0.0")] |
398 | impl Borrow<str> for String { | |
d9579d0f | 399 | #[inline] |
85aaf69f | 400 | fn borrow(&self) -> &str { &self[..] } |
1a4d82fc JJ |
401 | } |
402 | ||
85aaf69f SL |
403 | #[stable(feature = "rust1", since = "1.0.0")] |
404 | impl ToOwned for str { | |
405 | type Owned = String; | |
1a4d82fc JJ |
406 | fn to_owned(&self) -> String { |
407 | unsafe { | |
408 | String::from_utf8_unchecked(self.as_bytes().to_owned()) | |
409 | } | |
410 | } | |
411 | } | |
412 | ||
413 | /* | |
414 | Section: CowString | |
415 | */ | |
416 | ||
417 | /* | |
418 | Section: Trait implementations | |
419 | */ | |
420 | ||
c34b1796 | 421 | |
1a4d82fc | 422 | /// Any string that can be represented as a slice. |
c34b1796 AL |
423 | #[lang = "str"] |
424 | #[cfg(not(test))] | |
85aaf69f | 425 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 426 | impl str { |
1a4d82fc | 427 | /// Escapes each char in `s` with `char::escape_default`. |
85aaf69f SL |
428 | #[unstable(feature = "collections", |
429 | reason = "return type may change to be an iterator")] | |
c34b1796 | 430 | pub fn escape_default(&self) -> String { |
1a4d82fc JJ |
431 | self.chars().flat_map(|c| c.escape_default()).collect() |
432 | } | |
433 | ||
434 | /// Escapes each char in `s` with `char::escape_unicode`. | |
85aaf69f SL |
435 | #[unstable(feature = "collections", |
436 | reason = "return type may change to be an iterator")] | |
c34b1796 | 437 | pub fn escape_unicode(&self) -> String { |
1a4d82fc JJ |
438 | self.chars().flat_map(|c| c.escape_unicode()).collect() |
439 | } | |
440 | ||
441 | /// Replaces all occurrences of one string with another. | |
442 | /// | |
9346a6ac AL |
443 | /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a |
444 | /// second `&str` to | |
c34b1796 | 445 | /// replace it with. If the original `&str` isn't found, no change occurs. |
1a4d82fc JJ |
446 | /// |
447 | /// # Examples | |
448 | /// | |
c34b1796 | 449 | /// ``` |
85aaf69f | 450 | /// let s = "this is old"; |
1a4d82fc | 451 | /// |
85aaf69f | 452 | /// assert_eq!(s.replace("old", "new"), "this is new"); |
c34b1796 AL |
453 | /// ``` |
454 | /// | |
455 | /// When a `&str` isn't found: | |
1a4d82fc | 456 | /// |
c34b1796 AL |
457 | /// ``` |
458 | /// let s = "this is old"; | |
1a4d82fc JJ |
459 | /// assert_eq!(s.replace("cookie monster", "little lamb"), s); |
460 | /// ``` | |
85aaf69f | 461 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 462 | pub fn replace(&self, from: &str, to: &str) -> String { |
1a4d82fc JJ |
463 | let mut result = String::new(); |
464 | let mut last_end = 0; | |
465 | for (start, end) in self.match_indices(from) { | |
466 | result.push_str(unsafe { self.slice_unchecked(last_end, start) }); | |
467 | result.push_str(to); | |
468 | last_end = end; | |
469 | } | |
470 | result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) }); | |
471 | result | |
472 | } | |
473 | ||
474 | /// Returns an iterator over the string in Unicode Normalization Form D | |
475 | /// (canonical decomposition). | |
d9579d0f AL |
476 | #[allow(deprecated)] |
477 | #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", | |
478 | since = "1.0.0")] | |
1a4d82fc | 479 | #[inline] |
c34b1796 AL |
480 | #[unstable(feature = "unicode", |
481 | reason = "this functionality may be replaced with a more generic \ | |
482 | unicode crate on crates.io")] | |
483 | pub fn nfd_chars(&self) -> Decompositions { | |
1a4d82fc | 484 | Decompositions { |
85aaf69f | 485 | iter: self[..].chars(), |
1a4d82fc JJ |
486 | buffer: Vec::new(), |
487 | sorted: false, | |
488 | kind: Canonical | |
489 | } | |
490 | } | |
491 | ||
492 | /// Returns an iterator over the string in Unicode Normalization Form KD | |
493 | /// (compatibility decomposition). | |
d9579d0f AL |
494 | #[allow(deprecated)] |
495 | #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", | |
496 | since = "1.0.0")] | |
1a4d82fc | 497 | #[inline] |
c34b1796 AL |
498 | #[unstable(feature = "unicode", |
499 | reason = "this functionality may be replaced with a more generic \ | |
500 | unicode crate on crates.io")] | |
501 | pub fn nfkd_chars(&self) -> Decompositions { | |
1a4d82fc | 502 | Decompositions { |
85aaf69f | 503 | iter: self[..].chars(), |
1a4d82fc JJ |
504 | buffer: Vec::new(), |
505 | sorted: false, | |
506 | kind: Compatible | |
507 | } | |
508 | } | |
509 | ||
510 | /// An Iterator over the string in Unicode Normalization Form C | |
511 | /// (canonical decomposition followed by canonical composition). | |
d9579d0f AL |
512 | #[allow(deprecated)] |
513 | #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", | |
514 | since = "1.0.0")] | |
1a4d82fc | 515 | #[inline] |
c34b1796 AL |
516 | #[unstable(feature = "unicode", |
517 | reason = "this functionality may be replaced with a more generic \ | |
518 | unicode crate on crates.io")] | |
519 | pub fn nfc_chars(&self) -> Recompositions { | |
1a4d82fc JJ |
520 | Recompositions { |
521 | iter: self.nfd_chars(), | |
522 | state: Composing, | |
85aaf69f | 523 | buffer: VecDeque::new(), |
1a4d82fc JJ |
524 | composee: None, |
525 | last_ccc: None | |
526 | } | |
527 | } | |
528 | ||
529 | /// An Iterator over the string in Unicode Normalization Form KC | |
530 | /// (compatibility decomposition followed by canonical composition). | |
d9579d0f AL |
531 | #[allow(deprecated)] |
532 | #[deprecated(reason = "use the crates.io `unicode-normalization` library instead", | |
533 | since = "1.0.0")] | |
1a4d82fc | 534 | #[inline] |
c34b1796 AL |
535 | #[unstable(feature = "unicode", |
536 | reason = "this functionality may be replaced with a more generic \ | |
537 | unicode crate on crates.io")] | |
538 | pub fn nfkc_chars(&self) -> Recompositions { | |
1a4d82fc JJ |
539 | Recompositions { |
540 | iter: self.nfkd_chars(), | |
541 | state: Composing, | |
85aaf69f | 542 | buffer: VecDeque::new(), |
1a4d82fc JJ |
543 | composee: None, |
544 | last_ccc: None | |
545 | } | |
546 | } | |
547 | ||
c34b1796 | 548 | /// Returns `true` if `self` contains another `&str`. |
1a4d82fc | 549 | /// |
c34b1796 | 550 | /// # Examples |
1a4d82fc | 551 | /// |
c34b1796 | 552 | /// ``` |
1a4d82fc | 553 | /// assert!("bananas".contains("nana")); |
c34b1796 AL |
554 | /// |
555 | /// assert!(!"bananas".contains("foobar")); | |
1a4d82fc | 556 | /// ``` |
85aaf69f | 557 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 558 | pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
85aaf69f | 559 | core_str::StrExt::contains(&self[..], pat) |
1a4d82fc JJ |
560 | } |
561 | ||
c34b1796 | 562 | /// An iterator over the codepoints of `self`. |
1a4d82fc | 563 | /// |
c34b1796 | 564 | /// # Examples |
1a4d82fc | 565 | /// |
1a4d82fc | 566 | /// ``` |
1a4d82fc | 567 | /// let v: Vec<char> = "abc åäö".chars().collect(); |
c34b1796 AL |
568 | /// |
569 | /// assert_eq!(v, ['a', 'b', 'c', ' ', 'å', 'ä', 'ö']); | |
1a4d82fc | 570 | /// ``` |
85aaf69f | 571 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 572 | pub fn chars(&self) -> Chars { |
85aaf69f | 573 | core_str::StrExt::chars(&self[..]) |
1a4d82fc JJ |
574 | } |
575 | ||
c34b1796 | 576 | /// An iterator over the bytes of `self`. |
1a4d82fc | 577 | /// |
c34b1796 | 578 | /// # Examples |
1a4d82fc | 579 | /// |
c34b1796 | 580 | /// ``` |
1a4d82fc | 581 | /// let v: Vec<u8> = "bors".bytes().collect(); |
c34b1796 | 582 | /// |
1a4d82fc JJ |
583 | /// assert_eq!(v, b"bors".to_vec()); |
584 | /// ``` | |
85aaf69f | 585 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 586 | pub fn bytes(&self) -> Bytes { |
85aaf69f | 587 | core_str::StrExt::bytes(&self[..]) |
1a4d82fc JJ |
588 | } |
589 | ||
590 | /// An iterator over the characters of `self` and their byte offsets. | |
c34b1796 AL |
591 | /// |
592 | /// # Examples | |
593 | /// | |
594 | /// ``` | |
595 | /// let v: Vec<(usize, char)> = "abc".char_indices().collect(); | |
596 | /// let b = vec![(0, 'a'), (1, 'b'), (2, 'c')]; | |
597 | /// | |
598 | /// assert_eq!(v, b); | |
599 | /// ``` | |
85aaf69f | 600 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 601 | pub fn char_indices(&self) -> CharIndices { |
85aaf69f | 602 | core_str::StrExt::char_indices(&self[..]) |
1a4d82fc JJ |
603 | } |
604 | ||
605 | /// An iterator over substrings of `self`, separated by characters | |
c34b1796 | 606 | /// matched by a pattern. |
1a4d82fc | 607 | /// |
9346a6ac AL |
608 | /// The pattern can be a simple `&str`, `char`, or a closure that |
609 | /// determines the split. | |
610 | /// Additional libraries might provide more complex patterns like | |
611 | /// regular expressions. | |
612 | /// | |
613 | /// # Iterator behavior | |
614 | /// | |
615 | /// The returned iterator will be double ended if the pattern allows a | |
616 | /// reverse search and forward/reverse search yields the same elements. | |
617 | /// This is true for, eg, `char` but not | |
618 | /// for `&str`. | |
619 | /// | |
620 | /// If the pattern allows a reverse search but its results might differ | |
621 | /// from a forward search, `rsplit()` can be used. | |
1a4d82fc | 622 | /// |
c34b1796 AL |
623 | /// # Examples |
624 | /// | |
9346a6ac | 625 | /// Simple patterns: |
c34b1796 AL |
626 | /// |
627 | /// ``` | |
1a4d82fc | 628 | /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); |
c34b1796 AL |
629 | /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); |
630 | /// | |
631 | /// let v: Vec<&str> = "".split('X').collect(); | |
632 | /// assert_eq!(v, [""]); | |
9346a6ac AL |
633 | /// |
634 | /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); | |
635 | /// assert_eq!(v, ["lion", "", "tiger", "leopard"]); | |
636 | /// | |
637 | /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect(); | |
638 | /// assert_eq!(v, ["lion", "tiger", "leopard"]); | |
c34b1796 | 639 | /// ``` |
1a4d82fc | 640 | /// |
9346a6ac | 641 | /// More complex patterns with closures: |
c34b1796 AL |
642 | /// |
643 | /// ``` | |
85aaf69f | 644 | /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); |
c34b1796 | 645 | /// assert_eq!(v, ["abc", "def", "ghi"]); |
1a4d82fc | 646 | /// |
9346a6ac AL |
647 | /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect(); |
648 | /// assert_eq!(v, ["lion", "tiger", "leopard"]); | |
1a4d82fc | 649 | /// ``` |
85aaf69f | 650 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 651 | pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { |
85aaf69f | 652 | core_str::StrExt::split(&self[..], pat) |
1a4d82fc JJ |
653 | } |
654 | ||
9346a6ac AL |
655 | /// An iterator over substrings of `self`, separated by characters |
656 | /// matched by a pattern and yielded in reverse order. | |
1a4d82fc | 657 | /// |
9346a6ac AL |
658 | /// The pattern can be a simple `&str`, `char`, or a closure that |
659 | /// determines the split. | |
660 | /// Additional libraries might provide more complex patterns like | |
661 | /// regular expressions. | |
1a4d82fc | 662 | /// |
9346a6ac | 663 | /// # Iterator behavior |
c34b1796 | 664 | /// |
9346a6ac AL |
665 | /// The returned iterator requires that the pattern supports a |
666 | /// reverse search, | |
667 | /// and it will be double ended if a forward/reverse search yields | |
668 | /// the same elements. | |
c34b1796 | 669 | /// |
9346a6ac | 670 | /// For iterating from the front, `split()` can be used. |
1a4d82fc | 671 | /// |
9346a6ac | 672 | /// # Examples |
1a4d82fc | 673 | /// |
9346a6ac | 674 | /// Simple patterns: |
1a4d82fc | 675 | /// |
9346a6ac AL |
676 | /// ```rust |
677 | /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); | |
678 | /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); | |
1a4d82fc | 679 | /// |
9346a6ac | 680 | /// let v: Vec<&str> = "".rsplit('X').collect(); |
c34b1796 | 681 | /// assert_eq!(v, [""]); |
c34b1796 | 682 | /// |
9346a6ac AL |
683 | /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect(); |
684 | /// assert_eq!(v, ["leopard", "tiger", "", "lion"]); | |
c34b1796 | 685 | /// |
9346a6ac AL |
686 | /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect(); |
687 | /// assert_eq!(v, ["leopard", "tiger", "lion"]); | |
c34b1796 | 688 | /// ``` |
9346a6ac AL |
689 | /// |
690 | /// More complex patterns with closures: | |
691 | /// | |
692 | /// ```rust | |
693 | /// let v: Vec<&str> = "abc1def2ghi".rsplit(|c: char| c.is_numeric()).collect(); | |
694 | /// assert_eq!(v, ["ghi", "def", "abc"]); | |
695 | /// | |
696 | /// let v: Vec<&str> = "lionXtigerXleopard".rsplit(char::is_uppercase).collect(); | |
697 | /// assert_eq!(v, ["leopard", "tiger", "lion"]); | |
1a4d82fc | 698 | /// ``` |
85aaf69f | 699 | #[stable(feature = "rust1", since = "1.0.0")] |
9346a6ac AL |
700 | pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> |
701 | where P::Searcher: ReverseSearcher<'a> | |
702 | { | |
703 | core_str::StrExt::rsplit(&self[..], pat) | |
1a4d82fc JJ |
704 | } |
705 | ||
706 | /// An iterator over substrings of `self`, separated by characters | |
c34b1796 | 707 | /// matched by a pattern. |
1a4d82fc | 708 | /// |
9346a6ac AL |
709 | /// The pattern can be a simple `&str`, `char`, or a closure that |
710 | /// determines the split. | |
711 | /// Additional libraries might provide more complex patterns | |
712 | /// like regular expressions. | |
1a4d82fc | 713 | /// |
9346a6ac AL |
714 | /// Equivalent to `split`, except that the trailing substring |
715 | /// is skipped if empty. | |
716 | /// | |
717 | /// This method can be used for string data that is _terminated_, | |
d9579d0f | 718 | /// rather than _separated_ by a pattern. |
9346a6ac AL |
719 | /// |
720 | /// # Iterator behavior | |
721 | /// | |
722 | /// The returned iterator will be double ended if the pattern allows a | |
723 | /// reverse search | |
724 | /// and forward/reverse search yields the same elements. This is true | |
725 | /// for, eg, `char` but not for `&str`. | |
726 | /// | |
727 | /// If the pattern allows a reverse search but its results might differ | |
728 | /// from a forward search, `rsplit_terminator()` can be used. | |
1a4d82fc | 729 | /// |
c34b1796 AL |
730 | /// # Examples |
731 | /// | |
9346a6ac | 732 | /// Simple patterns: |
c34b1796 AL |
733 | /// |
734 | /// ``` | |
1a4d82fc | 735 | /// let v: Vec<&str> = "A.B.".split_terminator('.').collect(); |
c34b1796 | 736 | /// assert_eq!(v, ["A", "B"]); |
1a4d82fc | 737 | /// |
9346a6ac | 738 | /// let v: Vec<&str> = "A..B..".split_terminator(".").collect(); |
c34b1796 AL |
739 | /// assert_eq!(v, ["A", "", "B", ""]); |
740 | /// ``` | |
1a4d82fc | 741 | /// |
9346a6ac | 742 | /// More complex patterns with closures: |
1a4d82fc | 743 | /// |
1a4d82fc | 744 | /// ``` |
c34b1796 AL |
745 | /// let v: Vec<&str> = "abc1def2ghi3".split_terminator(|c: char| c.is_numeric()).collect(); |
746 | /// assert_eq!(v, ["abc", "def", "ghi"]); | |
747 | /// ``` | |
748 | #[stable(feature = "rust1", since = "1.0.0")] | |
749 | pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { | |
85aaf69f | 750 | core_str::StrExt::split_terminator(&self[..], pat) |
1a4d82fc JJ |
751 | } |
752 | ||
9346a6ac AL |
753 | /// An iterator over substrings of `self`, separated by characters |
754 | /// matched by a pattern and yielded in reverse order. | |
755 | /// | |
756 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
757 | /// determines the split. | |
758 | /// Additional libraries might provide more complex patterns like | |
759 | /// regular expressions. | |
760 | /// | |
761 | /// Equivalent to `split`, except that the trailing substring is | |
762 | /// skipped if empty. | |
763 | /// | |
764 | /// This method can be used for string data that is _terminated_, | |
d9579d0f | 765 | /// rather than _separated_ by a pattern. |
9346a6ac AL |
766 | /// |
767 | /// # Iterator behavior | |
768 | /// | |
769 | /// The returned iterator requires that the pattern supports a | |
770 | /// reverse search, and it will be double ended if a forward/reverse | |
771 | /// search yields the same elements. | |
772 | /// | |
773 | /// For iterating from the front, `split_terminator()` can be used. | |
1a4d82fc | 774 | /// |
c34b1796 | 775 | /// # Examples |
1a4d82fc | 776 | /// |
c34b1796 | 777 | /// Simple patterns: |
1a4d82fc | 778 | /// |
c34b1796 | 779 | /// ``` |
9346a6ac AL |
780 | /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect(); |
781 | /// assert_eq!(v, ["B", "A"]); | |
c34b1796 | 782 | /// |
9346a6ac AL |
783 | /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect(); |
784 | /// assert_eq!(v, ["", "B", "", "A"]); | |
c34b1796 AL |
785 | /// ``` |
786 | /// | |
9346a6ac | 787 | /// More complex patterns with closures: |
c34b1796 AL |
788 | /// |
789 | /// ``` | |
9346a6ac | 790 | /// let v: Vec<&str> = "abc1def2ghi3".rsplit_terminator(|c: char| c.is_numeric()).collect(); |
c34b1796 AL |
791 | /// assert_eq!(v, ["ghi", "def", "abc"]); |
792 | /// ``` | |
793 | #[stable(feature = "rust1", since = "1.0.0")] | |
9346a6ac | 794 | pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> |
c34b1796 AL |
795 | where P::Searcher: ReverseSearcher<'a> |
796 | { | |
9346a6ac AL |
797 | core_str::StrExt::rsplit_terminator(&self[..], pat) |
798 | } | |
799 | ||
800 | /// An iterator over substrings of `self`, separated by a pattern, | |
801 | /// restricted to returning | |
802 | /// at most `count` items. | |
803 | /// | |
804 | /// The last element returned, if any, will contain the remainder of the | |
805 | /// string. | |
806 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
807 | /// determines the split. | |
808 | /// Additional libraries might provide more complex patterns like | |
809 | /// regular expressions. | |
810 | /// | |
811 | /// # Iterator behavior | |
812 | /// | |
813 | /// The returned iterator will not be double ended, because it is | |
814 | /// not efficient to support. | |
815 | /// | |
816 | /// If the pattern allows a reverse search, `rsplitn()` can be used. | |
817 | /// | |
818 | /// # Examples | |
819 | /// | |
820 | /// Simple patterns: | |
821 | /// | |
822 | /// ``` | |
823 | /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect(); | |
824 | /// assert_eq!(v, ["Mary", "had", "a little lambda"]); | |
825 | /// | |
826 | /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect(); | |
827 | /// assert_eq!(v, ["lion", "", "tigerXleopard"]); | |
828 | /// | |
829 | /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect(); | |
830 | /// assert_eq!(v, ["abcXdef"]); | |
831 | /// | |
832 | /// let v: Vec<&str> = "".splitn(1, 'X').collect(); | |
833 | /// assert_eq!(v, [""]); | |
834 | /// ``` | |
835 | /// | |
836 | /// More complex patterns with closures: | |
837 | /// | |
838 | /// ``` | |
839 | /// let v: Vec<&str> = "abc1def2ghi".splitn(2, |c: char| c.is_numeric()).collect(); | |
840 | /// assert_eq!(v, ["abc", "def2ghi"]); | |
841 | /// ``` | |
842 | #[stable(feature = "rust1", since = "1.0.0")] | |
843 | pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { | |
844 | core_str::StrExt::splitn(&self[..], count, pat) | |
c34b1796 AL |
845 | } |
846 | ||
847 | /// An iterator over substrings of `self`, separated by a pattern, | |
848 | /// starting from the end of the string, restricted to returning | |
849 | /// at most `count` items. | |
850 | /// | |
851 | /// The last element returned, if any, will contain the remainder of the | |
852 | /// string. | |
853 | /// | |
9346a6ac AL |
854 | /// The pattern can be a simple `&str`, `char`, or a closure that |
855 | /// determines the split. | |
856 | /// Additional libraries might provide more complex patterns like | |
857 | /// regular expressions. | |
858 | /// | |
859 | /// # Iterator behavior | |
860 | /// | |
861 | /// The returned iterator will not be double ended, because it is not | |
862 | /// efficient to support. | |
863 | /// | |
864 | /// `splitn()` can be used for splitting from the front. | |
865 | /// | |
c34b1796 AL |
866 | /// # Examples |
867 | /// | |
868 | /// Simple patterns: | |
869 | /// | |
870 | /// ``` | |
871 | /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect(); | |
872 | /// assert_eq!(v, ["lamb", "little", "Mary had a"]); | |
873 | /// | |
9346a6ac AL |
874 | /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect(); |
875 | /// assert_eq!(v, ["leopard", "tiger", "lionX"]); | |
876 | /// | |
c34b1796 AL |
877 | /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect(); |
878 | /// assert_eq!(v, ["leopard", "lion::tiger"]); | |
879 | /// ``` | |
880 | /// | |
9346a6ac | 881 | /// More complex patterns with closures: |
1a4d82fc | 882 | /// |
c34b1796 AL |
883 | /// ``` |
884 | /// let v: Vec<&str> = "abc1def2ghi".rsplitn(2, |c: char| c.is_numeric()).collect(); | |
885 | /// assert_eq!(v, ["ghi", "abc1def"]); | |
1a4d82fc | 886 | /// ``` |
85aaf69f | 887 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 AL |
888 | pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> |
889 | where P::Searcher: ReverseSearcher<'a> | |
890 | { | |
85aaf69f | 891 | core_str::StrExt::rsplitn(&self[..], count, pat) |
1a4d82fc JJ |
892 | } |
893 | ||
9346a6ac AL |
894 | /// An iterator over the matches of a pattern within `self`. |
895 | /// | |
896 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
897 | /// determines the split. | |
898 | /// Additional libraries might provide more complex patterns like | |
899 | /// regular expressions. | |
900 | /// | |
901 | /// # Iterator behavior | |
902 | /// | |
903 | /// The returned iterator will be double ended if the pattern allows | |
904 | /// a reverse search | |
905 | /// and forward/reverse search yields the same elements. This is true | |
906 | /// for, eg, `char` but not | |
907 | /// for `&str`. | |
908 | /// | |
909 | /// If the pattern allows a reverse search but its results might differ | |
910 | /// from a forward search, `rmatches()` can be used. | |
911 | /// | |
912 | /// # Examples | |
913 | /// | |
914 | /// ``` | |
915 | /// # #![feature(collections)] | |
916 | /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect(); | |
917 | /// assert_eq!(v, ["abc", "abc", "abc"]); | |
918 | /// | |
919 | /// let v: Vec<&str> = "1abc2abc3".matches(|c: char| c.is_numeric()).collect(); | |
920 | /// assert_eq!(v, ["1", "2", "3"]); | |
921 | /// ``` | |
922 | #[unstable(feature = "collections", | |
923 | reason = "method got recently added")] | |
924 | pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { | |
925 | core_str::StrExt::matches(&self[..], pat) | |
926 | } | |
927 | ||
928 | /// An iterator over the matches of a pattern within `self`, yielded in | |
929 | /// reverse order. | |
930 | /// | |
931 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
932 | /// determines the split. | |
933 | /// Additional libraries might provide more complex patterns like | |
934 | /// regular expressions. | |
935 | /// | |
936 | /// # Iterator behavior | |
937 | /// | |
938 | /// The returned iterator requires that the pattern supports a | |
939 | /// reverse search, | |
940 | /// and it will be double ended if a forward/reverse search yields | |
941 | /// the same elements. | |
942 | /// | |
943 | /// For iterating from the front, `matches()` can be used. | |
944 | /// | |
945 | /// # Examples | |
946 | /// | |
947 | /// ``` | |
948 | /// # #![feature(collections)] | |
949 | /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect(); | |
950 | /// assert_eq!(v, ["abc", "abc", "abc"]); | |
951 | /// | |
952 | /// let v: Vec<&str> = "1abc2abc3".rmatches(|c: char| c.is_numeric()).collect(); | |
953 | /// assert_eq!(v, ["3", "2", "1"]); | |
954 | /// ``` | |
955 | #[unstable(feature = "collections", | |
956 | reason = "method got recently added")] | |
957 | pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> | |
958 | where P::Searcher: ReverseSearcher<'a> | |
959 | { | |
960 | core_str::StrExt::rmatches(&self[..], pat) | |
961 | } | |
962 | ||
963 | /// An iterator over the start and end indices of the disjoint matches | |
964 | /// of a pattern within `self`. | |
1a4d82fc | 965 | /// |
9346a6ac AL |
966 | /// For matches of `pat` within `self` that overlap, only the indices |
967 | /// corresponding to the first | |
c34b1796 | 968 | /// match are returned. |
1a4d82fc | 969 | /// |
9346a6ac AL |
970 | /// The pattern can be a simple `&str`, `char`, or a closure that |
971 | /// determines | |
972 | /// the split. | |
973 | /// Additional libraries might provide more complex patterns like | |
974 | /// regular expressions. | |
975 | /// | |
976 | /// # Iterator behavior | |
977 | /// | |
978 | /// The returned iterator will be double ended if the pattern allows a | |
979 | /// reverse search | |
980 | /// and forward/reverse search yields the same elements. This is true for, | |
981 | /// eg, `char` but not | |
982 | /// for `&str`. | |
983 | /// | |
984 | /// If the pattern allows a reverse search but its results might differ | |
985 | /// from a forward search, `rmatch_indices()` can be used. | |
986 | /// | |
c34b1796 | 987 | /// # Examples |
1a4d82fc | 988 | /// |
c34b1796 AL |
989 | /// ``` |
990 | /// # #![feature(collections)] | |
85aaf69f | 991 | /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect(); |
9346a6ac | 992 | /// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]); |
1a4d82fc | 993 | /// |
85aaf69f | 994 | /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect(); |
9346a6ac | 995 | /// assert_eq!(v, [(1, 4), (4, 7)]); |
1a4d82fc | 996 | /// |
85aaf69f | 997 | /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect(); |
c34b1796 | 998 | /// assert_eq!(v, [(0, 3)]); // only the first `aba` |
1a4d82fc | 999 | /// ``` |
85aaf69f SL |
1000 | #[unstable(feature = "collections", |
1001 | reason = "might have its iterator type changed")] | |
9346a6ac AL |
1002 | // NB: Right now MatchIndices yields `(usize, usize)`, but it would |
1003 | // be more consistent with `matches` and `char_indices` to return `(usize, &str)` | |
c34b1796 | 1004 | pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { |
85aaf69f | 1005 | core_str::StrExt::match_indices(&self[..], pat) |
1a4d82fc JJ |
1006 | } |
1007 | ||
9346a6ac AL |
1008 | /// An iterator over the start and end indices of the disjoint matches of |
1009 | /// a pattern within | |
1010 | /// `self`, yielded in reverse order. | |
1011 | /// | |
1012 | /// For matches of `pat` within `self` that overlap, only the indices | |
1013 | /// corresponding to the last | |
1014 | /// match are returned. | |
1015 | /// | |
1016 | /// The pattern can be a simple `&str`, `char`, or a closure that | |
1017 | /// determines | |
1018 | /// the split. | |
1019 | /// Additional libraries might provide more complex patterns like | |
1020 | /// regular expressions. | |
1021 | /// | |
1022 | /// # Iterator behavior | |
1023 | /// | |
1024 | /// The returned iterator requires that the pattern supports a | |
1025 | /// reverse search, | |
1026 | /// and it will be double ended if a forward/reverse search yields | |
1027 | /// the same elements. | |
1028 | /// | |
1029 | /// For iterating from the front, `match_indices()` can be used. | |
1030 | /// | |
1031 | /// # Examples | |
1032 | /// | |
1033 | /// ``` | |
1034 | /// # #![feature(collections)] | |
1035 | /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); | |
1036 | /// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]); | |
1037 | /// | |
1038 | /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect(); | |
1039 | /// assert_eq!(v, [(4, 7), (1, 4)]); | |
1040 | /// | |
1041 | /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect(); | |
1042 | /// assert_eq!(v, [(2, 5)]); // only the last `aba` | |
1043 | /// ``` | |
1044 | #[unstable(feature = "collections", | |
1045 | reason = "might have its iterator type changed")] | |
1046 | // NB: Right now RMatchIndices yields `(usize, usize)`, but it would | |
1047 | // be more consistent with `rmatches` and `char_indices` to return `(usize, &str)` | |
1048 | pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> | |
1049 | where P::Searcher: ReverseSearcher<'a> | |
1050 | { | |
1051 | core_str::StrExt::rmatch_indices(&self[..], pat) | |
1052 | } | |
1053 | ||
c34b1796 | 1054 | /// An iterator over the lines of a string, separated by `\n`. |
1a4d82fc | 1055 | /// |
c34b1796 | 1056 | /// This does not include the empty string after a trailing `\n`. |
1a4d82fc | 1057 | /// |
c34b1796 | 1058 | /// # Examples |
1a4d82fc | 1059 | /// |
1a4d82fc | 1060 | /// ``` |
c34b1796 AL |
1061 | /// let four_lines = "foo\nbar\n\nbaz"; |
1062 | /// let v: Vec<&str> = four_lines.lines().collect(); | |
1a4d82fc | 1063 | /// |
c34b1796 AL |
1064 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); |
1065 | /// ``` | |
1a4d82fc | 1066 | /// |
c34b1796 AL |
1067 | /// Leaving off the trailing character: |
1068 | /// | |
1069 | /// ``` | |
1a4d82fc JJ |
1070 | /// let four_lines = "foo\nbar\n\nbaz\n"; |
1071 | /// let v: Vec<&str> = four_lines.lines().collect(); | |
c34b1796 AL |
1072 | /// |
1073 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); | |
1a4d82fc | 1074 | /// ``` |
85aaf69f | 1075 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1076 | pub fn lines(&self) -> Lines { |
85aaf69f | 1077 | core_str::StrExt::lines(&self[..]) |
1a4d82fc JJ |
1078 | } |
1079 | ||
9346a6ac AL |
1080 | /// An iterator over the lines of a string, separated by either |
1081 | /// `\n` or `\r\n`. | |
1a4d82fc | 1082 | /// |
c34b1796 | 1083 | /// As with `.lines()`, this does not include an empty trailing line. |
1a4d82fc | 1084 | /// |
c34b1796 AL |
1085 | /// # Examples |
1086 | /// | |
1087 | /// ``` | |
1088 | /// let four_lines = "foo\r\nbar\n\r\nbaz"; | |
1089 | /// let v: Vec<&str> = four_lines.lines_any().collect(); | |
1090 | /// | |
1091 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); | |
1092 | /// ``` | |
1093 | /// | |
1094 | /// Leaving off the trailing character: | |
1095 | /// | |
1096 | /// ``` | |
1a4d82fc JJ |
1097 | /// let four_lines = "foo\r\nbar\n\r\nbaz\n"; |
1098 | /// let v: Vec<&str> = four_lines.lines_any().collect(); | |
c34b1796 AL |
1099 | /// |
1100 | /// assert_eq!(v, ["foo", "bar", "", "baz"]); | |
1a4d82fc | 1101 | /// ``` |
85aaf69f | 1102 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1103 | pub fn lines_any(&self) -> LinesAny { |
85aaf69f | 1104 | core_str::StrExt::lines_any(&self[..]) |
1a4d82fc | 1105 | } |
c34b1796 | 1106 | /// Returns a slice of the string from the character range [`begin`..`end`). |
1a4d82fc | 1107 | /// |
c34b1796 AL |
1108 | /// That is, start at the `begin`-th code point of the string and continue |
1109 | /// to the `end`-th code point. This does not detect or handle edge cases | |
1110 | /// such as leaving a combining character as the first code point of the | |
1111 | /// string. | |
1a4d82fc | 1112 | /// |
c34b1796 AL |
1113 | /// Due to the design of UTF-8, this operation is `O(end)`. See `slice`, |
1114 | /// `slice_to` and `slice_from` for `O(1)` variants that use byte indices | |
1115 | /// rather than code point indices. | |
1a4d82fc | 1116 | /// |
c34b1796 | 1117 | /// # Panics |
1a4d82fc | 1118 | /// |
c34b1796 AL |
1119 | /// Panics if `begin` > `end` or the either `begin` or `end` are beyond the |
1120 | /// last character of the string. | |
1a4d82fc | 1121 | /// |
c34b1796 AL |
1122 | /// # Examples |
1123 | /// | |
1124 | /// ``` | |
1125 | /// # #![feature(collections)] | |
1a4d82fc | 1126 | /// let s = "Löwe 老虎 Léopard"; |
c34b1796 | 1127 | /// |
1a4d82fc JJ |
1128 | /// assert_eq!(s.slice_chars(0, 4), "Löwe"); |
1129 | /// assert_eq!(s.slice_chars(5, 7), "老虎"); | |
1130 | /// ``` | |
85aaf69f SL |
1131 | #[unstable(feature = "collections", |
1132 | reason = "may have yet to prove its worth")] | |
c34b1796 | 1133 | pub fn slice_chars(&self, begin: usize, end: usize) -> &str { |
85aaf69f | 1134 | core_str::StrExt::slice_chars(&self[..], begin, end) |
1a4d82fc JJ |
1135 | } |
1136 | ||
c34b1796 | 1137 | /// Takes a bytewise slice from a string. |
1a4d82fc JJ |
1138 | /// |
1139 | /// Returns the substring from [`begin`..`end`). | |
1140 | /// | |
c34b1796 AL |
1141 | /// # Unsafety |
1142 | /// | |
9346a6ac AL |
1143 | /// Caller must check both UTF-8 character boundaries and the boundaries |
1144 | /// of the entire slice as | |
c34b1796 AL |
1145 | /// well. |
1146 | /// | |
1147 | /// # Examples | |
1148 | /// | |
1149 | /// ``` | |
1150 | /// let s = "Löwe 老虎 Léopard"; | |
1151 | /// | |
1152 | /// unsafe { | |
1153 | /// assert_eq!(s.slice_unchecked(0, 21), "Löwe 老虎 Léopard"); | |
1154 | /// } | |
1155 | /// ``` | |
85aaf69f | 1156 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1157 | pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str { |
85aaf69f | 1158 | core_str::StrExt::slice_unchecked(&self[..], begin, end) |
1a4d82fc JJ |
1159 | } |
1160 | ||
c34b1796 | 1161 | /// Returns `true` if the given `&str` is a prefix of the string. |
1a4d82fc | 1162 | /// |
c34b1796 | 1163 | /// # Examples |
1a4d82fc | 1164 | /// |
c34b1796 | 1165 | /// ``` |
1a4d82fc JJ |
1166 | /// assert!("banana".starts_with("ba")); |
1167 | /// ``` | |
85aaf69f | 1168 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1169 | pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
85aaf69f | 1170 | core_str::StrExt::starts_with(&self[..], pat) |
1a4d82fc JJ |
1171 | } |
1172 | ||
c34b1796 | 1173 | /// Returns true if the given `&str` is a suffix of the string. |
1a4d82fc | 1174 | /// |
c34b1796 | 1175 | /// # Examples |
1a4d82fc JJ |
1176 | /// |
1177 | /// ```rust | |
1178 | /// assert!("banana".ends_with("nana")); | |
1179 | /// ``` | |
85aaf69f | 1180 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 AL |
1181 | pub fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool |
1182 | where P::Searcher: ReverseSearcher<'a> | |
1183 | { | |
85aaf69f | 1184 | core_str::StrExt::ends_with(&self[..], pat) |
1a4d82fc JJ |
1185 | } |
1186 | ||
9346a6ac AL |
1187 | /// Returns a string with all pre- and suffixes that match a pattern |
1188 | /// repeatedly removed. | |
1a4d82fc | 1189 | /// |
9346a6ac AL |
1190 | /// The pattern can be a simple `char`, or a closure that determines |
1191 | /// the split. | |
1a4d82fc | 1192 | /// |
c34b1796 | 1193 | /// # Examples |
1a4d82fc | 1194 | /// |
9346a6ac | 1195 | /// Simple patterns: |
1a4d82fc | 1196 | /// |
c34b1796 | 1197 | /// ``` |
1a4d82fc | 1198 | /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); |
c34b1796 | 1199 | /// |
1a4d82fc JJ |
1200 | /// let x: &[_] = &['1', '2']; |
1201 | /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar"); | |
c34b1796 AL |
1202 | /// ``` |
1203 | /// | |
9346a6ac | 1204 | /// More complex patterns with closures: |
c34b1796 AL |
1205 | /// |
1206 | /// ``` | |
85aaf69f | 1207 | /// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar"); |
1a4d82fc | 1208 | /// ``` |
85aaf69f | 1209 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 AL |
1210 | pub fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1211 | where P::Searcher: DoubleEndedSearcher<'a> | |
1212 | { | |
85aaf69f | 1213 | core_str::StrExt::trim_matches(&self[..], pat) |
1a4d82fc JJ |
1214 | } |
1215 | ||
9346a6ac AL |
1216 | /// Returns a string with all prefixes that match a pattern |
1217 | /// repeatedly removed. | |
1a4d82fc | 1218 | /// |
9346a6ac AL |
1219 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1220 | /// determines the split. | |
1a4d82fc | 1221 | /// |
c34b1796 | 1222 | /// # Examples |
1a4d82fc | 1223 | /// |
9346a6ac | 1224 | /// Simple patterns: |
1a4d82fc | 1225 | /// |
c34b1796 | 1226 | /// ``` |
1a4d82fc | 1227 | /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); |
c34b1796 | 1228 | /// |
1a4d82fc JJ |
1229 | /// let x: &[_] = &['1', '2']; |
1230 | /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12"); | |
c34b1796 AL |
1231 | /// ``` |
1232 | /// | |
9346a6ac | 1233 | /// More complex patterns with closures: |
c34b1796 AL |
1234 | /// |
1235 | /// ``` | |
85aaf69f | 1236 | /// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123"); |
1a4d82fc | 1237 | /// ``` |
85aaf69f | 1238 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1239 | pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { |
85aaf69f | 1240 | core_str::StrExt::trim_left_matches(&self[..], pat) |
1a4d82fc JJ |
1241 | } |
1242 | ||
9346a6ac AL |
1243 | /// Returns a string with all suffixes that match a pattern |
1244 | /// repeatedly removed. | |
1a4d82fc | 1245 | /// |
9346a6ac AL |
1246 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1247 | /// determines the split. | |
1a4d82fc | 1248 | /// |
c34b1796 | 1249 | /// # Examples |
1a4d82fc | 1250 | /// |
9346a6ac | 1251 | /// Simple patterns: |
1a4d82fc | 1252 | /// |
c34b1796 | 1253 | /// ``` |
1a4d82fc JJ |
1254 | /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); |
1255 | /// let x: &[_] = &['1', '2']; | |
1256 | /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar"); | |
c34b1796 AL |
1257 | /// ``` |
1258 | /// | |
9346a6ac | 1259 | /// More complex patterns with closures: |
c34b1796 AL |
1260 | /// |
1261 | /// ``` | |
85aaf69f | 1262 | /// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar"); |
1a4d82fc | 1263 | /// ``` |
85aaf69f | 1264 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 AL |
1265 | pub fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1266 | where P::Searcher: ReverseSearcher<'a> | |
1267 | { | |
85aaf69f | 1268 | core_str::StrExt::trim_right_matches(&self[..], pat) |
1a4d82fc JJ |
1269 | } |
1270 | ||
9346a6ac AL |
1271 | /// Checks that `index`-th byte lies at the start and/or end of a |
1272 | /// UTF-8 code point sequence. | |
1a4d82fc | 1273 | /// |
9346a6ac AL |
1274 | /// The start and end of the string (when `index == self.len()`) are |
1275 | /// considered to be | |
c34b1796 AL |
1276 | /// boundaries. |
1277 | /// | |
1278 | /// # Panics | |
1a4d82fc JJ |
1279 | /// |
1280 | /// Panics if `index` is greater than `self.len()`. | |
1281 | /// | |
c34b1796 | 1282 | /// # Examples |
1a4d82fc | 1283 | /// |
c34b1796 AL |
1284 | /// ``` |
1285 | /// # #![feature(str_char)] | |
1a4d82fc JJ |
1286 | /// let s = "Löwe 老虎 Léopard"; |
1287 | /// assert!(s.is_char_boundary(0)); | |
1288 | /// // start of `老` | |
1289 | /// assert!(s.is_char_boundary(6)); | |
1290 | /// assert!(s.is_char_boundary(s.len())); | |
1291 | /// | |
1292 | /// // second byte of `ö` | |
1293 | /// assert!(!s.is_char_boundary(2)); | |
1294 | /// | |
1295 | /// // third byte of `老` | |
1296 | /// assert!(!s.is_char_boundary(8)); | |
1297 | /// ``` | |
c34b1796 AL |
1298 | #[unstable(feature = "str_char", |
1299 | reason = "it is unclear whether this method pulls its weight \ | |
1300 | with the existence of the char_indices iterator or \ | |
1301 | this method may want to be replaced with checked \ | |
1302 | slicing")] | |
1303 | pub fn is_char_boundary(&self, index: usize) -> bool { | |
85aaf69f | 1304 | core_str::StrExt::is_char_boundary(&self[..], index) |
1a4d82fc JJ |
1305 | } |
1306 | ||
c34b1796 | 1307 | /// Given a byte position, return the next char and its index. |
1a4d82fc | 1308 | /// |
c34b1796 | 1309 | /// This can be used to iterate over the Unicode characters of a string. |
1a4d82fc | 1310 | /// |
c34b1796 | 1311 | /// # Panics |
1a4d82fc | 1312 | /// |
c34b1796 AL |
1313 | /// If `i` is greater than or equal to the length of the string. |
1314 | /// If `i` is not the index of the beginning of a valid UTF-8 character. | |
1a4d82fc | 1315 | /// |
c34b1796 AL |
1316 | /// # Examples |
1317 | /// | |
9346a6ac AL |
1318 | /// This example manually iterates through the characters of a string; |
1319 | /// this should normally be | |
c34b1796 AL |
1320 | /// done by `.chars()` or `.char_indices()`. |
1321 | /// | |
1322 | /// ``` | |
1323 | /// # #![feature(str_char, core)] | |
1a4d82fc JJ |
1324 | /// use std::str::CharRange; |
1325 | /// | |
1326 | /// let s = "中华Việt Nam"; | |
85aaf69f | 1327 | /// let mut i = 0; |
1a4d82fc JJ |
1328 | /// while i < s.len() { |
1329 | /// let CharRange {ch, next} = s.char_range_at(i); | |
1330 | /// println!("{}: {}", i, ch); | |
1331 | /// i = next; | |
1332 | /// } | |
1333 | /// ``` | |
1334 | /// | |
1335 | /// This outputs: | |
1336 | /// | |
1337 | /// ```text | |
1338 | /// 0: 中 | |
1339 | /// 3: 华 | |
1340 | /// 6: V | |
1341 | /// 7: i | |
1342 | /// 8: ệ | |
1343 | /// 11: t | |
1344 | /// 12: | |
1345 | /// 13: N | |
1346 | /// 14: a | |
1347 | /// 15: m | |
1348 | /// ``` | |
c34b1796 AL |
1349 | #[unstable(feature = "str_char", |
1350 | reason = "often replaced by char_indices, this method may \ | |
1351 | be removed in favor of just char_at() or eventually \ | |
1352 | removed altogether")] | |
1353 | pub fn char_range_at(&self, start: usize) -> CharRange { | |
85aaf69f | 1354 | core_str::StrExt::char_range_at(&self[..], start) |
1a4d82fc JJ |
1355 | } |
1356 | ||
c34b1796 | 1357 | /// Given a byte position, return the previous `char` and its position. |
1a4d82fc JJ |
1358 | /// |
1359 | /// This function can be used to iterate over a Unicode string in reverse. | |
1360 | /// | |
1361 | /// Returns 0 for next index if called on start index 0. | |
1362 | /// | |
1363 | /// # Panics | |
1364 | /// | |
1365 | /// If `i` is greater than the length of the string. | |
1366 | /// If `i` is not an index following a valid UTF-8 character. | |
1a4d82fc | 1367 | /// |
c34b1796 AL |
1368 | /// # Examples |
1369 | /// | |
9346a6ac AL |
1370 | /// This example manually iterates through the characters of a string; |
1371 | /// this should normally be | |
c34b1796 | 1372 | /// done by `.chars().rev()` or `.char_indices()`. |
1a4d82fc | 1373 | /// |
1a4d82fc | 1374 | /// ``` |
c34b1796 AL |
1375 | /// # #![feature(str_char, core)] |
1376 | /// use std::str::CharRange; | |
1a4d82fc | 1377 | /// |
c34b1796 AL |
1378 | /// let s = "中华Việt Nam"; |
1379 | /// let mut i = s.len(); | |
1380 | /// while i > 0 { | |
1381 | /// let CharRange {ch, next} = s.char_range_at_reverse(i); | |
1382 | /// println!("{}: {}", i, ch); | |
1383 | /// i = next; | |
1384 | /// } | |
1385 | /// ``` | |
1a4d82fc | 1386 | /// |
c34b1796 AL |
1387 | /// This outputs: |
1388 | /// | |
1389 | /// ```text | |
1390 | /// 16: m | |
1391 | /// 15: a | |
1392 | /// 14: N | |
1393 | /// 13: | |
1394 | /// 12: t | |
1395 | /// 11: ệ | |
1396 | /// 8: i | |
1397 | /// 7: V | |
1398 | /// 6: 华 | |
1399 | /// 3: 中 | |
1400 | /// ``` | |
1401 | #[unstable(feature = "str_char", | |
1402 | reason = "often replaced by char_indices, this method may \ | |
1403 | be removed in favor of just char_at_reverse() or \ | |
1404 | eventually removed altogether")] | |
1405 | pub fn char_range_at_reverse(&self, start: usize) -> CharRange { | |
1406 | core_str::StrExt::char_range_at_reverse(&self[..], start) | |
1a4d82fc JJ |
1407 | } |
1408 | ||
c34b1796 AL |
1409 | /// Given a byte position, return the `char` at that position. |
1410 | /// | |
1411 | /// # Panics | |
1412 | /// | |
1413 | /// If `i` is greater than or equal to the length of the string. | |
1414 | /// If `i` is not the index of the beginning of a valid UTF-8 character. | |
1415 | /// | |
1416 | /// # Examples | |
1417 | /// | |
1418 | /// ``` | |
1419 | /// # #![feature(str_char)] | |
1420 | /// let s = "abπc"; | |
1421 | /// assert_eq!(s.char_at(1), 'b'); | |
1422 | /// assert_eq!(s.char_at(2), 'π'); | |
1423 | /// ``` | |
1424 | #[unstable(feature = "str_char", | |
1425 | reason = "frequently replaced by the chars() iterator, this \ | |
1426 | method may be removed or possibly renamed in the \ | |
1427 | future; it is normally replaced by chars/char_indices \ | |
1428 | iterators or by getting the first char from a \ | |
1429 | subslice")] | |
1430 | pub fn char_at(&self, i: usize) -> char { | |
1431 | core_str::StrExt::char_at(&self[..], i) | |
1432 | } | |
1433 | ||
9346a6ac AL |
1434 | /// Given a byte position, return the `char` at that position, counting |
1435 | /// from the end. | |
1a4d82fc JJ |
1436 | /// |
1437 | /// # Panics | |
1438 | /// | |
1439 | /// If `i` is greater than the length of the string. | |
1440 | /// If `i` is not an index following a valid UTF-8 character. | |
c34b1796 AL |
1441 | /// |
1442 | /// # Examples | |
1443 | /// | |
1444 | /// ``` | |
1445 | /// # #![feature(str_char)] | |
1446 | /// let s = "abπc"; | |
1447 | /// assert_eq!(s.char_at_reverse(1), 'a'); | |
1448 | /// assert_eq!(s.char_at_reverse(2), 'b'); | |
1449 | /// ``` | |
1450 | #[unstable(feature = "str_char", | |
1451 | reason = "see char_at for more details, but reverse semantics \ | |
1452 | are also somewhat unclear, especially with which \ | |
1453 | cases generate panics")] | |
1454 | pub fn char_at_reverse(&self, i: usize) -> char { | |
85aaf69f | 1455 | core_str::StrExt::char_at_reverse(&self[..], i) |
1a4d82fc JJ |
1456 | } |
1457 | ||
9346a6ac | 1458 | /// Converts `self` to a byte slice. |
1a4d82fc | 1459 | /// |
c34b1796 | 1460 | /// # Examples |
1a4d82fc | 1461 | /// |
c34b1796 | 1462 | /// ``` |
1a4d82fc JJ |
1463 | /// assert_eq!("bors".as_bytes(), b"bors"); |
1464 | /// ``` | |
85aaf69f | 1465 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1466 | pub fn as_bytes(&self) -> &[u8] { |
85aaf69f | 1467 | core_str::StrExt::as_bytes(&self[..]) |
1a4d82fc JJ |
1468 | } |
1469 | ||
9346a6ac AL |
1470 | /// Returns the byte index of the first character of `self` that matches |
1471 | /// the pattern, if it | |
c34b1796 | 1472 | /// exists. |
1a4d82fc | 1473 | /// |
c34b1796 | 1474 | /// Returns `None` if it doesn't exist. |
1a4d82fc | 1475 | /// |
9346a6ac AL |
1476 | /// The pattern can be a simple `&str`, `char`, or a closure that |
1477 | /// determines the | |
1478 | /// split. | |
1a4d82fc | 1479 | /// |
c34b1796 | 1480 | /// # Examples |
1a4d82fc | 1481 | /// |
9346a6ac | 1482 | /// Simple patterns: |
c34b1796 AL |
1483 | /// |
1484 | /// ``` | |
1a4d82fc JJ |
1485 | /// let s = "Löwe 老虎 Léopard"; |
1486 | /// | |
1487 | /// assert_eq!(s.find('L'), Some(0)); | |
1488 | /// assert_eq!(s.find('é'), Some(14)); | |
9346a6ac | 1489 | /// assert_eq!(s.find("Léopard"), Some(13)); |
1a4d82fc | 1490 | /// |
c34b1796 AL |
1491 | /// ``` |
1492 | /// | |
9346a6ac | 1493 | /// More complex patterns with closures: |
c34b1796 AL |
1494 | /// |
1495 | /// ``` | |
1496 | /// let s = "Löwe 老虎 Léopard"; | |
1497 | /// | |
85aaf69f | 1498 | /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5)); |
9346a6ac | 1499 | /// assert_eq!(s.find(char::is_lowercase), Some(1)); |
c34b1796 | 1500 | /// ``` |
1a4d82fc | 1501 | /// |
c34b1796 AL |
1502 | /// Not finding the pattern: |
1503 | /// | |
1504 | /// ``` | |
1505 | /// let s = "Löwe 老虎 Léopard"; | |
1a4d82fc | 1506 | /// let x: &[_] = &['1', '2']; |
c34b1796 | 1507 | /// |
1a4d82fc JJ |
1508 | /// assert_eq!(s.find(x), None); |
1509 | /// ``` | |
85aaf69f | 1510 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1511 | pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> { |
85aaf69f | 1512 | core_str::StrExt::find(&self[..], pat) |
1a4d82fc JJ |
1513 | } |
1514 | ||
9346a6ac AL |
1515 | /// Returns the byte index of the last character of `self` that |
1516 | /// matches the pattern, if it | |
c34b1796 | 1517 | /// exists. |
1a4d82fc | 1518 | /// |
c34b1796 | 1519 | /// Returns `None` if it doesn't exist. |
1a4d82fc | 1520 | /// |
9346a6ac AL |
1521 | /// The pattern can be a simple `&str`, `char`, |
1522 | /// or a closure that determines the split. | |
1a4d82fc | 1523 | /// |
c34b1796 | 1524 | /// # Examples |
1a4d82fc | 1525 | /// |
9346a6ac | 1526 | /// Simple patterns: |
c34b1796 AL |
1527 | /// |
1528 | /// ``` | |
1a4d82fc JJ |
1529 | /// let s = "Löwe 老虎 Léopard"; |
1530 | /// | |
1531 | /// assert_eq!(s.rfind('L'), Some(13)); | |
1532 | /// assert_eq!(s.rfind('é'), Some(14)); | |
c34b1796 AL |
1533 | /// ``` |
1534 | /// | |
9346a6ac | 1535 | /// More complex patterns with closures: |
c34b1796 AL |
1536 | /// |
1537 | /// ``` | |
1538 | /// let s = "Löwe 老虎 Léopard"; | |
1a4d82fc | 1539 | /// |
85aaf69f | 1540 | /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12)); |
9346a6ac | 1541 | /// assert_eq!(s.rfind(char::is_lowercase), Some(20)); |
c34b1796 AL |
1542 | /// ``` |
1543 | /// | |
1544 | /// Not finding the pattern: | |
1a4d82fc | 1545 | /// |
c34b1796 AL |
1546 | /// ``` |
1547 | /// let s = "Löwe 老虎 Léopard"; | |
1a4d82fc | 1548 | /// let x: &[_] = &['1', '2']; |
c34b1796 | 1549 | /// |
1a4d82fc JJ |
1550 | /// assert_eq!(s.rfind(x), None); |
1551 | /// ``` | |
85aaf69f | 1552 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 AL |
1553 | pub fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> |
1554 | where P::Searcher: ReverseSearcher<'a> | |
1555 | { | |
85aaf69f | 1556 | core_str::StrExt::rfind(&self[..], pat) |
1a4d82fc JJ |
1557 | } |
1558 | ||
c34b1796 | 1559 | /// Retrieves the first character from a `&str` and returns it. |
1a4d82fc | 1560 | /// |
9346a6ac AL |
1561 | /// This does not allocate a new string; instead, it returns a slice that |
1562 | /// points one character | |
c34b1796 | 1563 | /// beyond the character that was shifted. |
1a4d82fc | 1564 | /// |
c34b1796 | 1565 | /// If the slice does not contain any characters, None is returned instead. |
1a4d82fc | 1566 | /// |
c34b1796 | 1567 | /// # Examples |
1a4d82fc | 1568 | /// |
1a4d82fc | 1569 | /// ``` |
c34b1796 | 1570 | /// # #![feature(str_char)] |
1a4d82fc JJ |
1571 | /// let s = "Löwe 老虎 Léopard"; |
1572 | /// let (c, s1) = s.slice_shift_char().unwrap(); | |
c34b1796 | 1573 | /// |
1a4d82fc JJ |
1574 | /// assert_eq!(c, 'L'); |
1575 | /// assert_eq!(s1, "öwe 老虎 Léopard"); | |
1576 | /// | |
1577 | /// let (c, s2) = s1.slice_shift_char().unwrap(); | |
c34b1796 | 1578 | /// |
1a4d82fc JJ |
1579 | /// assert_eq!(c, 'ö'); |
1580 | /// assert_eq!(s2, "we 老虎 Léopard"); | |
1581 | /// ``` | |
c34b1796 AL |
1582 | #[unstable(feature = "str_char", |
1583 | reason = "awaiting conventions about shifting and slices and \ | |
1584 | may not be warranted with the existence of the chars \ | |
1585 | and/or char_indices iterators")] | |
1586 | pub fn slice_shift_char(&self) -> Option<(char, &str)> { | |
85aaf69f | 1587 | core_str::StrExt::slice_shift_char(&self[..]) |
1a4d82fc JJ |
1588 | } |
1589 | ||
9346a6ac AL |
1590 | /// Returns the byte offset of an inner slice relative to an enclosing |
1591 | /// outer slice. | |
1a4d82fc | 1592 | /// |
c34b1796 AL |
1593 | /// # Panics |
1594 | /// | |
1a4d82fc JJ |
1595 | /// Panics if `inner` is not a direct slice contained within self. |
1596 | /// | |
c34b1796 | 1597 | /// # Examples |
1a4d82fc | 1598 | /// |
c34b1796 AL |
1599 | /// ``` |
1600 | /// # #![feature(collections)] | |
1a4d82fc JJ |
1601 | /// let string = "a\nb\nc"; |
1602 | /// let lines: Vec<&str> = string.lines().collect(); | |
1603 | /// | |
1604 | /// assert!(string.subslice_offset(lines[0]) == 0); // &"a" | |
1605 | /// assert!(string.subslice_offset(lines[1]) == 2); // &"b" | |
1606 | /// assert!(string.subslice_offset(lines[2]) == 4); // &"c" | |
1607 | /// ``` | |
85aaf69f SL |
1608 | #[unstable(feature = "collections", |
1609 | reason = "awaiting convention about comparability of arbitrary slices")] | |
c34b1796 | 1610 | pub fn subslice_offset(&self, inner: &str) -> usize { |
85aaf69f | 1611 | core_str::StrExt::subslice_offset(&self[..], inner) |
1a4d82fc JJ |
1612 | } |
1613 | ||
9346a6ac | 1614 | /// Returns an unsafe pointer to the `&str`'s buffer. |
c34b1796 | 1615 | /// |
9346a6ac AL |
1616 | /// The caller must ensure that the string outlives this pointer, and |
1617 | /// that it is not | |
c34b1796 | 1618 | /// reallocated (e.g. by pushing to the string). |
1a4d82fc | 1619 | /// |
c34b1796 AL |
1620 | /// # Examples |
1621 | /// | |
1622 | /// ``` | |
1623 | /// let s = "Hello"; | |
1624 | /// let p = s.as_ptr(); | |
1625 | /// ``` | |
85aaf69f | 1626 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 1627 | #[inline] |
c34b1796 | 1628 | pub fn as_ptr(&self) -> *const u8 { |
85aaf69f | 1629 | core_str::StrExt::as_ptr(&self[..]) |
1a4d82fc JJ |
1630 | } |
1631 | ||
9346a6ac | 1632 | /// Returns an iterator of `u16` over the string encoded as UTF-16. |
85aaf69f SL |
1633 | #[unstable(feature = "collections", |
1634 | reason = "this functionality may only be provided by libunicode")] | |
c34b1796 | 1635 | pub fn utf16_units(&self) -> Utf16Units { |
85aaf69f | 1636 | Utf16Units { encoder: Utf16Encoder::new(self[..].chars()) } |
1a4d82fc JJ |
1637 | } |
1638 | ||
c34b1796 | 1639 | /// Returns the length of `self` in bytes. |
1a4d82fc | 1640 | /// |
c34b1796 | 1641 | /// # Examples |
1a4d82fc JJ |
1642 | /// |
1643 | /// ``` | |
1644 | /// assert_eq!("foo".len(), 3); | |
c34b1796 | 1645 | /// assert_eq!("ƒoo".len(), 4); // fancy f! |
1a4d82fc | 1646 | /// ``` |
85aaf69f | 1647 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 1648 | #[inline] |
c34b1796 | 1649 | pub fn len(&self) -> usize { |
85aaf69f | 1650 | core_str::StrExt::len(&self[..]) |
1a4d82fc JJ |
1651 | } |
1652 | ||
c34b1796 | 1653 | /// Returns true if this slice has a length of zero bytes. |
1a4d82fc | 1654 | /// |
c34b1796 | 1655 | /// # Examples |
1a4d82fc JJ |
1656 | /// |
1657 | /// ``` | |
1658 | /// assert!("".is_empty()); | |
1659 | /// ``` | |
1660 | #[inline] | |
85aaf69f | 1661 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1662 | pub fn is_empty(&self) -> bool { |
85aaf69f | 1663 | core_str::StrExt::is_empty(&self[..]) |
1a4d82fc JJ |
1664 | } |
1665 | ||
c34b1796 AL |
1666 | /// Parses `self` into the specified type. |
1667 | /// | |
1668 | /// # Failure | |
1669 | /// | |
1670 | /// Will return `Err` if it's not possible to parse `self` into the type. | |
1a4d82fc JJ |
1671 | /// |
1672 | /// # Example | |
1673 | /// | |
1674 | /// ``` | |
85aaf69f | 1675 | /// assert_eq!("4".parse::<u32>(), Ok(4)); |
c34b1796 AL |
1676 | /// ``` |
1677 | /// | |
1678 | /// Failing: | |
1679 | /// | |
1680 | /// ``` | |
85aaf69f | 1681 | /// assert!("j".parse::<u32>().is_err()); |
1a4d82fc JJ |
1682 | /// ``` |
1683 | #[inline] | |
85aaf69f | 1684 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1685 | pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> { |
85aaf69f | 1686 | core_str::StrExt::parse(&self[..]) |
1a4d82fc JJ |
1687 | } |
1688 | ||
c34b1796 AL |
1689 | /// Returns an iterator over the [grapheme clusters][graphemes] of `self`. |
1690 | /// | |
1691 | /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries | |
1a4d82fc | 1692 | /// |
9346a6ac AL |
1693 | /// If `is_extended` is true, the iterator is over the |
1694 | /// *extended grapheme clusters*; | |
1a4d82fc JJ |
1695 | /// otherwise, the iterator is over the *legacy grapheme clusters*. |
1696 | /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) | |
1697 | /// recommends extended grapheme cluster boundaries for general processing. | |
1698 | /// | |
c34b1796 | 1699 | /// # Examples |
1a4d82fc | 1700 | /// |
c34b1796 AL |
1701 | /// ``` |
1702 | /// # #![feature(unicode, core)] | |
1a4d82fc JJ |
1703 | /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>(); |
1704 | /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"]; | |
c34b1796 AL |
1705 | /// |
1706 | /// assert_eq!(&gr1[..], b); | |
1707 | /// | |
1a4d82fc JJ |
1708 | /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>(); |
1709 | /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"]; | |
c34b1796 AL |
1710 | /// |
1711 | /// assert_eq!(&gr2[..], b); | |
1a4d82fc | 1712 | /// ``` |
d9579d0f AL |
1713 | #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead", |
1714 | since = "1.0.0")] | |
c34b1796 | 1715 | #[unstable(feature = "unicode", |
85aaf69f | 1716 | reason = "this functionality may only be provided by libunicode")] |
c34b1796 | 1717 | pub fn graphemes(&self, is_extended: bool) -> Graphemes { |
85aaf69f | 1718 | UnicodeStr::graphemes(&self[..], is_extended) |
1a4d82fc JJ |
1719 | } |
1720 | ||
9346a6ac AL |
1721 | /// Returns an iterator over the grapheme clusters of `self` and their |
1722 | /// byte offsets. See | |
c34b1796 | 1723 | /// `graphemes()` for more information. |
1a4d82fc | 1724 | /// |
c34b1796 | 1725 | /// # Examples |
1a4d82fc | 1726 | /// |
c34b1796 AL |
1727 | /// ``` |
1728 | /// # #![feature(unicode, core)] | |
85aaf69f SL |
1729 | /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(usize, &str)>>(); |
1730 | /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; | |
c34b1796 AL |
1731 | /// |
1732 | /// assert_eq!(&gr_inds[..], b); | |
1a4d82fc | 1733 | /// ``` |
d9579d0f AL |
1734 | #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead", |
1735 | since = "1.0.0")] | |
c34b1796 | 1736 | #[unstable(feature = "unicode", |
85aaf69f | 1737 | reason = "this functionality may only be provided by libunicode")] |
c34b1796 | 1738 | pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices { |
85aaf69f | 1739 | UnicodeStr::grapheme_indices(&self[..], is_extended) |
1a4d82fc JJ |
1740 | } |
1741 | ||
d9579d0f AL |
1742 | /// An iterator over the non-empty substrings of `self` which contain no whitespace, |
1743 | /// and which are separated by any amount of whitespace. | |
1a4d82fc | 1744 | /// |
c34b1796 AL |
1745 | /// # Examples |
1746 | /// | |
1747 | /// ``` | |
1748 | /// # #![feature(str_words)] | |
d9579d0f | 1749 | /// # #![allow(deprecated)] |
1a4d82fc JJ |
1750 | /// let some_words = " Mary had\ta little \n\t lamb"; |
1751 | /// let v: Vec<&str> = some_words.words().collect(); | |
c34b1796 AL |
1752 | /// |
1753 | /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); | |
1a4d82fc | 1754 | /// ``` |
d9579d0f AL |
1755 | #[deprecated(reason = "words() will be removed. Use split_whitespace() instead", |
1756 | since = "1.1.0")] | |
85aaf69f SL |
1757 | #[unstable(feature = "str_words", |
1758 | reason = "the precise algorithm to use is unclear")] | |
d9579d0f | 1759 | #[allow(deprecated)] |
c34b1796 | 1760 | pub fn words(&self) -> Words { |
85aaf69f | 1761 | UnicodeStr::words(&self[..]) |
1a4d82fc JJ |
1762 | } |
1763 | ||
d9579d0f AL |
1764 | /// An iterator over the non-empty substrings of `self` which contain no whitespace, |
1765 | /// and which are separated by any amount of whitespace. | |
1766 | /// | |
1767 | /// # Examples | |
1768 | /// | |
1769 | /// ``` | |
1770 | /// let some_words = " Mary had\ta little \n\t lamb"; | |
1771 | /// let v: Vec<&str> = some_words.split_whitespace().collect(); | |
1772 | /// | |
1773 | /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); | |
1774 | /// ``` | |
1775 | #[stable(feature = "split_whitespace", since = "1.1.0")] | |
1776 | pub fn split_whitespace(&self) -> SplitWhitespace { | |
1777 | UnicodeStr::split_whitespace(&self[..]) | |
1778 | } | |
1779 | ||
c34b1796 | 1780 | /// Returns a string's displayed width in columns. |
1a4d82fc | 1781 | /// |
c34b1796 AL |
1782 | /// Control characters have zero width. |
1783 | /// | |
9346a6ac AL |
1784 | /// `is_cjk` determines behavior for characters in the Ambiguous category: |
1785 | /// if `is_cjk` is | |
1786 | /// `true`, these are 2 columns wide; otherwise, they are 1. | |
1787 | /// In CJK locales, `is_cjk` should be | |
c34b1796 | 1788 | /// `true`, else it should be `false`. |
9346a6ac AL |
1789 | /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) |
1790 | /// recommends that these | |
1791 | /// characters be treated as 1 column (i.e., `is_cjk = false`) if the | |
1792 | /// locale is unknown. | |
d9579d0f AL |
1793 | #[deprecated(reason = "use the crates.io `unicode-width` library instead", |
1794 | since = "1.0.0")] | |
c34b1796 | 1795 | #[unstable(feature = "unicode", |
85aaf69f | 1796 | reason = "this functionality may only be provided by libunicode")] |
c34b1796 | 1797 | pub fn width(&self, is_cjk: bool) -> usize { |
85aaf69f | 1798 | UnicodeStr::width(&self[..], is_cjk) |
1a4d82fc JJ |
1799 | } |
1800 | ||
c34b1796 AL |
1801 | /// Returns a `&str` with leading and trailing whitespace removed. |
1802 | /// | |
1803 | /// # Examples | |
1804 | /// | |
1805 | /// ``` | |
1806 | /// let s = " Hello\tworld\t"; | |
1807 | /// assert_eq!(s.trim(), "Hello\tworld"); | |
1808 | /// ``` | |
85aaf69f | 1809 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1810 | pub fn trim(&self) -> &str { |
85aaf69f | 1811 | UnicodeStr::trim(&self[..]) |
1a4d82fc JJ |
1812 | } |
1813 | ||
c34b1796 AL |
1814 | /// Returns a `&str` with leading whitespace removed. |
1815 | /// | |
1816 | /// # Examples | |
1817 | /// | |
1818 | /// ``` | |
1819 | /// let s = " Hello\tworld\t"; | |
1820 | /// assert_eq!(s.trim_left(), "Hello\tworld\t"); | |
1821 | /// ``` | |
85aaf69f | 1822 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1823 | pub fn trim_left(&self) -> &str { |
85aaf69f | 1824 | UnicodeStr::trim_left(&self[..]) |
1a4d82fc JJ |
1825 | } |
1826 | ||
c34b1796 AL |
1827 | /// Returns a `&str` with trailing whitespace removed. |
1828 | /// | |
1829 | /// # Examples | |
1830 | /// | |
1831 | /// ``` | |
1832 | /// let s = " Hello\tworld\t"; | |
1833 | /// assert_eq!(s.trim_right(), " Hello\tworld"); | |
1834 | /// ``` | |
85aaf69f | 1835 | #[stable(feature = "rust1", since = "1.0.0")] |
c34b1796 | 1836 | pub fn trim_right(&self) -> &str { |
85aaf69f | 1837 | UnicodeStr::trim_right(&self[..]) |
1a4d82fc | 1838 | } |
1a4d82fc | 1839 | |
c34b1796 AL |
1840 | /// Returns the lowercase equivalent of this string. |
1841 | /// | |
1842 | /// # Examples | |
1843 | /// | |
1844 | /// let s = "HELLO"; | |
1845 | /// assert_eq!(s.to_lowercase(), "hello"); | |
1846 | #[unstable(feature = "collections")] | |
1847 | pub fn to_lowercase(&self) -> String { | |
1848 | let mut s = String::with_capacity(self.len()); | |
1849 | s.extend(self[..].chars().flat_map(|c| c.to_lowercase())); | |
1850 | return s; | |
1a4d82fc JJ |
1851 | } |
1852 | ||
c34b1796 AL |
1853 | /// Returns the uppercase equivalent of this string. |
1854 | /// | |
1855 | /// # Examples | |
1856 | /// | |
1857 | /// let s = "hello"; | |
1858 | /// assert_eq!(s.to_uppercase(), "HELLO"); | |
1859 | #[unstable(feature = "collections")] | |
1860 | pub fn to_uppercase(&self) -> String { | |
1861 | let mut s = String::with_capacity(self.len()); | |
1862 | s.extend(self[..].chars().flat_map(|c| c.to_uppercase())); | |
1863 | return s; | |
1a4d82fc JJ |
1864 | } |
1865 | } |