]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
1a4d82fc JJ |
10 | |
11 | //! String manipulation | |
12 | //! | |
13 | //! For more details, see std::str | |
14 | ||
62682a34 | 15 | #![stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 16 | |
9346a6ac AL |
17 | use self::pattern::Pattern; |
18 | use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; | |
1a4d82fc | 19 | |
3157f602 | 20 | use char; |
85aaf69f | 21 | use clone::Clone; |
bd371182 | 22 | use convert::AsRef; |
1a4d82fc | 23 | use default::Default; |
85aaf69f | 24 | use fmt; |
1a4d82fc | 25 | use iter::ExactSizeIterator; |
e9174d1e SL |
26 | use iter::{Map, Cloned, Iterator, DoubleEndedIterator}; |
27 | use marker::Sized; | |
1a4d82fc | 28 | use mem; |
c34b1796 | 29 | use ops::{Fn, FnMut, FnOnce}; |
1a4d82fc | 30 | use option::Option::{self, None, Some}; |
1a4d82fc JJ |
31 | use result::Result::{self, Ok, Err}; |
32 | use slice::{self, SliceExt}; | |
1a4d82fc | 33 | |
9346a6ac | 34 | pub mod pattern; |
1a4d82fc JJ |
35 | |
36 | /// A trait to abstract the idea of creating a new instance of a type from a | |
37 | /// string. | |
92a42be0 SL |
38 | /// |
39 | /// `FromStr`'s [`from_str()`] method is often used implicitly, through | |
40 | /// [`str`]'s [`parse()`] method. See [`parse()`]'s documentation for examples. | |
41 | /// | |
42 | /// [`from_str()`]: #tymethod.from_str | |
54a0048b SL |
43 | /// [`str`]: ../../std/primitive.str.html |
44 | /// [`parse()`]: ../../std/primitive.str.html#method.parse | |
85aaf69f | 45 | #[stable(feature = "rust1", since = "1.0.0")] |
e9174d1e | 46 | pub trait FromStr: Sized { |
85aaf69f SL |
47 | /// The associated error which can be returned from parsing. |
48 | #[stable(feature = "rust1", since = "1.0.0")] | |
49 | type Err; | |
50 | ||
d9579d0f AL |
51 | /// Parses a string `s` to return a value of this type. |
52 | /// | |
53 | /// If parsing succeeds, return the value inside `Ok`, otherwise | |
54 | /// when the string is ill-formatted return an error specific to the | |
55 | /// inside `Err`. The error type is specific to implementation of the trait. | |
b039eaaf SL |
56 | /// |
57 | /// # Examples | |
58 | /// | |
59 | /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`: | |
60 | /// | |
54a0048b | 61 | /// [ithirtytwo]: ../../std/primitive.i32.html |
b039eaaf SL |
62 | /// |
63 | /// ``` | |
64 | /// use std::str::FromStr; | |
65 | /// | |
66 | /// let s = "5"; | |
67 | /// let x = i32::from_str(s).unwrap(); | |
68 | /// | |
69 | /// assert_eq!(5, x); | |
70 | /// ``` | |
85aaf69f SL |
71 | #[stable(feature = "rust1", since = "1.0.0")] |
72 | fn from_str(s: &str) -> Result<Self, Self::Err>; | |
1a4d82fc JJ |
73 | } |
74 | ||
85aaf69f | 75 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 76 | impl FromStr for bool { |
85aaf69f SL |
77 | type Err = ParseBoolError; |
78 | ||
1a4d82fc JJ |
79 | /// Parse a `bool` from a string. |
80 | /// | |
c34b1796 AL |
81 | /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not |
82 | /// actually be parseable. | |
1a4d82fc JJ |
83 | /// |
84 | /// # Examples | |
85 | /// | |
c34b1796 AL |
86 | /// ``` |
87 | /// use std::str::FromStr; | |
88 | /// | |
89 | /// assert_eq!(FromStr::from_str("true"), Ok(true)); | |
90 | /// assert_eq!(FromStr::from_str("false"), Ok(false)); | |
91 | /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err()); | |
92 | /// ``` | |
93 | /// | |
94 | /// Note, in many cases, the `.parse()` method on `str` is more proper. | |
95 | /// | |
96 | /// ``` | |
85aaf69f SL |
97 | /// assert_eq!("true".parse(), Ok(true)); |
98 | /// assert_eq!("false".parse(), Ok(false)); | |
99 | /// assert!("not even a boolean".parse::<bool>().is_err()); | |
1a4d82fc JJ |
100 | /// ``` |
101 | #[inline] | |
85aaf69f | 102 | fn from_str(s: &str) -> Result<bool, ParseBoolError> { |
1a4d82fc | 103 | match s { |
85aaf69f SL |
104 | "true" => Ok(true), |
105 | "false" => Ok(false), | |
106 | _ => Err(ParseBoolError { _priv: () }), | |
1a4d82fc JJ |
107 | } |
108 | } | |
109 | } | |
110 | ||
85aaf69f SL |
111 | /// An error returned when parsing a `bool` from a string fails. |
112 | #[derive(Debug, Clone, PartialEq)] | |
113 | #[stable(feature = "rust1", since = "1.0.0")] | |
114 | pub struct ParseBoolError { _priv: () } | |
115 | ||
116 | #[stable(feature = "rust1", since = "1.0.0")] | |
117 | impl fmt::Display for ParseBoolError { | |
118 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
119 | "provided string was not `true` or `false`".fmt(f) | |
120 | } | |
121 | } | |
122 | ||
1a4d82fc JJ |
123 | /* |
124 | Section: Creating a string | |
125 | */ | |
126 | ||
b039eaaf SL |
127 | /// Errors which can occur when attempting to interpret a sequence of `u8` |
128 | /// as a string. | |
129 | /// | |
130 | /// As such, the `from_utf8` family of functions and methods for both `String`s | |
131 | /// and `&str`s make use of this error, for example. | |
85aaf69f | 132 | #[derive(Copy, Eq, PartialEq, Clone, Debug)] |
9346a6ac AL |
133 | #[stable(feature = "rust1", since = "1.0.0")] |
134 | pub struct Utf8Error { | |
135 | valid_up_to: usize, | |
136 | } | |
137 | ||
138 | impl Utf8Error { | |
139 | /// Returns the index in the given string up to which valid UTF-8 was | |
140 | /// verified. | |
1a4d82fc | 141 | /// |
b039eaaf SL |
142 | /// It is the maximum index such that `from_utf8(input[..index])` |
143 | /// would return `Some(_)`. | |
144 | /// | |
145 | /// # Examples | |
146 | /// | |
147 | /// Basic usage: | |
148 | /// | |
149 | /// ``` | |
b039eaaf SL |
150 | /// use std::str; |
151 | /// | |
152 | /// // some invalid bytes, in a vector | |
153 | /// let sparkle_heart = vec![0, 159, 146, 150]; | |
154 | /// | |
155 | /// // std::str::from_utf8 returns a Utf8Error | |
156 | /// let error = str::from_utf8(&sparkle_heart).unwrap_err(); | |
157 | /// | |
7453a54e | 158 | /// // the second byte is invalid here |
b039eaaf SL |
159 | /// assert_eq!(1, error.valid_up_to()); |
160 | /// ``` | |
161 | #[stable(feature = "utf8_error", since = "1.5.0")] | |
9346a6ac | 162 | pub fn valid_up_to(&self) -> usize { self.valid_up_to } |
1a4d82fc JJ |
163 | } |
164 | ||
b039eaaf | 165 | /// Converts a slice of bytes to a string slice. |
1a4d82fc | 166 | /// |
b039eaaf SL |
167 | /// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`) |
168 | /// is made of bytes, so this function converts between the two. Not all byte | |
169 | /// slices are valid string slices, however: `&str` requires that it is valid | |
170 | /// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and | |
171 | /// then does the conversion. | |
172 | /// | |
173 | /// If you are sure that the byte slice is valid UTF-8, and you don't want to | |
174 | /// incur the overhead of the validity check, there is an unsafe version of | |
7453a54e | 175 | /// this function, [`from_utf8_unchecked()`][fromutf8u], which has the same |
b039eaaf SL |
176 | /// behavior but skips the check. |
177 | /// | |
7453a54e | 178 | /// [fromutf8u]: fn.from_utf8_unchecked.html |
b039eaaf SL |
179 | /// |
180 | /// If you need a `String` instead of a `&str`, consider | |
181 | /// [`String::from_utf8()`][string]. | |
182 | /// | |
54a0048b | 183 | /// [string]: ../../std/string/struct.String.html#method.from_utf8 |
b039eaaf SL |
184 | /// |
185 | /// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of | |
186 | /// it, this function is one way to have a stack-allocated string. There is | |
187 | /// an example of this in the examples section below. | |
1a4d82fc | 188 | /// |
7453a54e | 189 | /// # Errors |
1a4d82fc | 190 | /// |
e9174d1e SL |
191 | /// Returns `Err` if the slice is not UTF-8 with a description as to why the |
192 | /// provided slice is not UTF-8. | |
b039eaaf SL |
193 | /// |
194 | /// # Examples | |
195 | /// | |
196 | /// Basic usage: | |
197 | /// | |
198 | /// ``` | |
199 | /// use std::str; | |
200 | /// | |
201 | /// // some bytes, in a vector | |
202 | /// let sparkle_heart = vec![240, 159, 146, 150]; | |
203 | /// | |
204 | /// // We know these bytes are valid, so just use `unwrap()`. | |
205 | /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap(); | |
206 | /// | |
207 | /// assert_eq!("💖", sparkle_heart); | |
208 | /// ``` | |
209 | /// | |
210 | /// Incorrect bytes: | |
211 | /// | |
212 | /// ``` | |
213 | /// use std::str; | |
214 | /// | |
215 | /// // some invalid bytes, in a vector | |
216 | /// let sparkle_heart = vec![0, 159, 146, 150]; | |
217 | /// | |
218 | /// assert!(str::from_utf8(&sparkle_heart).is_err()); | |
219 | /// ``` | |
220 | /// | |
221 | /// See the docs for [`Utf8Error`][error] for more details on the kinds of | |
222 | /// errors that can be returned. | |
223 | /// | |
224 | /// [error]: struct.Utf8Error.html | |
225 | /// | |
226 | /// A "stack allocated string": | |
227 | /// | |
228 | /// ``` | |
229 | /// use std::str; | |
230 | /// | |
231 | /// // some bytes, in a stack-allocated array | |
232 | /// let sparkle_heart = [240, 159, 146, 150]; | |
233 | /// | |
234 | /// // We know these bytes are valid, so just use `unwrap()`. | |
235 | /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap(); | |
236 | /// | |
237 | /// assert_eq!("💖", sparkle_heart); | |
238 | /// ``` | |
85aaf69f | 239 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 240 | pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { |
54a0048b | 241 | run_utf8_validation(v)?; |
1a4d82fc JJ |
242 | Ok(unsafe { from_utf8_unchecked(v) }) |
243 | } | |
244 | ||
7453a54e SL |
245 | /// Forms a str from a pointer and a length. |
246 | /// | |
247 | /// The `len` argument is the number of bytes in the string. | |
248 | /// | |
249 | /// # Safety | |
250 | /// | |
251 | /// This function is unsafe as there is no guarantee that the given pointer is | |
252 | /// valid for `len` bytes, nor whether the lifetime inferred is a suitable | |
253 | /// lifetime for the returned str. | |
254 | /// | |
255 | /// The data must be valid UTF-8 | |
256 | /// | |
257 | /// `p` must be non-null, even for zero-length str. | |
258 | /// | |
259 | /// # Caveat | |
260 | /// | |
261 | /// The lifetime for the returned str is inferred from its usage. To | |
262 | /// prevent accidental misuse, it's suggested to tie the lifetime to whichever | |
263 | /// source lifetime is safe in the context, such as by providing a helper | |
264 | /// function taking the lifetime of a host value for the str, or by explicit | |
265 | /// annotation. | |
266 | /// Performs the same functionality as `from_raw_parts`, except that a mutable | |
267 | /// str is returned. | |
268 | /// | |
269 | unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str { | |
270 | mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len)) | |
271 | } | |
272 | ||
1a4d82fc JJ |
273 | /// Converts a slice of bytes to a string slice without checking |
274 | /// that the string contains valid UTF-8. | |
b039eaaf | 275 | /// |
7453a54e | 276 | /// See the safe version, [`from_utf8()`][fromutf8], for more information. |
b039eaaf SL |
277 | /// |
278 | /// [fromutf8]: fn.from_utf8.html | |
279 | /// | |
280 | /// # Safety | |
281 | /// | |
282 | /// This function is unsafe because it does not check that the bytes passed to | |
283 | /// it are valid UTF-8. If this constraint is violated, undefined behavior | |
284 | /// results, as the rest of Rust assumes that `&str`s are valid UTF-8. | |
285 | /// | |
286 | /// # Examples | |
287 | /// | |
288 | /// Basic usage: | |
289 | /// | |
290 | /// ``` | |
291 | /// use std::str; | |
292 | /// | |
293 | /// // some bytes, in a vector | |
294 | /// let sparkle_heart = vec![240, 159, 146, 150]; | |
295 | /// | |
296 | /// let sparkle_heart = unsafe { | |
297 | /// str::from_utf8_unchecked(&sparkle_heart) | |
298 | /// }; | |
299 | /// | |
300 | /// assert_eq!("💖", sparkle_heart); | |
301 | /// ``` | |
d9579d0f | 302 | #[inline(always)] |
85aaf69f | 303 | #[stable(feature = "rust1", since = "1.0.0")] |
e9174d1e | 304 | pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { |
1a4d82fc JJ |
305 | mem::transmute(v) |
306 | } | |
307 | ||
85aaf69f SL |
308 | #[stable(feature = "rust1", since = "1.0.0")] |
309 | impl fmt::Display for Utf8Error { | |
310 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
9346a6ac | 311 | write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to) |
85aaf69f SL |
312 | } |
313 | } | |
314 | ||
1a4d82fc JJ |
315 | /* |
316 | Section: Iterators | |
317 | */ | |
318 | ||
319 | /// Iterator for the char (representing *Unicode Scalar Values*) of a string | |
320 | /// | |
9cc50fc6 SL |
321 | /// Created with the method [`chars()`]. |
322 | /// | |
54a0048b SL |
323 | /// [`chars()`]: ../../std/primitive.str.html#method.chars |
324 | #[derive(Clone, Debug)] | |
85aaf69f | 325 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
326 | pub struct Chars<'a> { |
327 | iter: slice::Iter<'a, u8> | |
328 | } | |
329 | ||
c34b1796 AL |
330 | /// Return the initial codepoint accumulator for the first byte. |
331 | /// The first byte is special, only want bottom 5 bits for width 2, 4 bits | |
332 | /// for width 3, and 3 bits for width 4. | |
333 | #[inline] | |
334 | fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 } | |
1a4d82fc | 335 | |
c34b1796 AL |
336 | /// Return the value of `ch` updated with continuation byte `byte`. |
337 | #[inline] | |
338 | fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 } | |
1a4d82fc | 339 | |
c34b1796 AL |
340 | /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the |
341 | /// bits `10`). | |
342 | #[inline] | |
343 | fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 } | |
1a4d82fc JJ |
344 | |
345 | #[inline] | |
346 | fn unwrap_or_0(opt: Option<&u8>) -> u8 { | |
347 | match opt { | |
348 | Some(&byte) => byte, | |
349 | None => 0, | |
350 | } | |
351 | } | |
352 | ||
85aaf69f SL |
353 | /// Reads the next code point out of a byte iterator (assuming a |
354 | /// UTF-8-like encoding). | |
e9174d1e | 355 | #[unstable(feature = "str_internals", issue = "0")] |
c34b1796 | 356 | #[inline] |
3157f602 | 357 | pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> { |
85aaf69f SL |
358 | // Decode UTF-8 |
359 | let x = match bytes.next() { | |
360 | None => return None, | |
361 | Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32), | |
362 | Some(&next_byte) => next_byte, | |
363 | }; | |
364 | ||
365 | // Multibyte case follows | |
366 | // Decode from a byte combination out of: [[[x y] z] w] | |
367 | // NOTE: Performance is sensitive to the exact formulation here | |
c34b1796 | 368 | let init = utf8_first_byte(x, 2); |
85aaf69f | 369 | let y = unwrap_or_0(bytes.next()); |
c34b1796 | 370 | let mut ch = utf8_acc_cont_byte(init, y); |
85aaf69f SL |
371 | if x >= 0xE0 { |
372 | // [[x y z] w] case | |
373 | // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid | |
374 | let z = unwrap_or_0(bytes.next()); | |
c34b1796 | 375 | let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z); |
85aaf69f SL |
376 | ch = init << 12 | y_z; |
377 | if x >= 0xF0 { | |
378 | // [x y z w] case | |
379 | // use only the lower 3 bits of `init` | |
380 | let w = unwrap_or_0(bytes.next()); | |
c34b1796 | 381 | ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w); |
85aaf69f SL |
382 | } |
383 | } | |
384 | ||
385 | Some(ch) | |
386 | } | |
387 | ||
c34b1796 AL |
388 | /// Reads the last code point out of a byte iterator (assuming a |
389 | /// UTF-8-like encoding). | |
c34b1796 | 390 | #[inline] |
3157f602 XL |
391 | fn next_code_point_reverse<'a, |
392 | I: DoubleEndedIterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> { | |
c34b1796 AL |
393 | // Decode UTF-8 |
394 | let w = match bytes.next_back() { | |
395 | None => return None, | |
396 | Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32), | |
397 | Some(&back_byte) => back_byte, | |
398 | }; | |
399 | ||
400 | // Multibyte case follows | |
401 | // Decode from a byte combination out of: [x [y [z w]]] | |
402 | let mut ch; | |
403 | let z = unwrap_or_0(bytes.next_back()); | |
404 | ch = utf8_first_byte(z, 2); | |
405 | if utf8_is_cont_byte(z) { | |
406 | let y = unwrap_or_0(bytes.next_back()); | |
407 | ch = utf8_first_byte(y, 3); | |
408 | if utf8_is_cont_byte(y) { | |
409 | let x = unwrap_or_0(bytes.next_back()); | |
410 | ch = utf8_first_byte(x, 4); | |
411 | ch = utf8_acc_cont_byte(ch, y); | |
412 | } | |
413 | ch = utf8_acc_cont_byte(ch, z); | |
414 | } | |
415 | ch = utf8_acc_cont_byte(ch, w); | |
416 | ||
417 | Some(ch) | |
418 | } | |
419 | ||
85aaf69f | 420 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
421 | impl<'a> Iterator for Chars<'a> { |
422 | type Item = char; | |
423 | ||
424 | #[inline] | |
425 | fn next(&mut self) -> Option<char> { | |
85aaf69f SL |
426 | next_code_point(&mut self.iter).map(|ch| { |
427 | // str invariant says `ch` is a valid Unicode Scalar Value | |
428 | unsafe { | |
e9174d1e | 429 | char::from_u32_unchecked(ch) |
1a4d82fc | 430 | } |
85aaf69f | 431 | }) |
1a4d82fc JJ |
432 | } |
433 | ||
434 | #[inline] | |
85aaf69f | 435 | fn size_hint(&self) -> (usize, Option<usize>) { |
3157f602 | 436 | let len = self.iter.len(); |
c34b1796 AL |
437 | // `(len + 3)` can't overflow, because we know that the `slice::Iter` |
438 | // belongs to a slice in memory which has a maximum length of | |
439 | // `isize::MAX` (that's well below `usize::MAX`). | |
440 | ((len + 3) / 4, Some(len)) | |
1a4d82fc JJ |
441 | } |
442 | } | |
443 | ||
85aaf69f | 444 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
445 | impl<'a> DoubleEndedIterator for Chars<'a> { |
446 | #[inline] | |
447 | fn next_back(&mut self) -> Option<char> { | |
c34b1796 AL |
448 | next_code_point_reverse(&mut self.iter).map(|ch| { |
449 | // str invariant says `ch` is a valid Unicode Scalar Value | |
450 | unsafe { | |
e9174d1e | 451 | char::from_u32_unchecked(ch) |
1a4d82fc | 452 | } |
c34b1796 | 453 | }) |
1a4d82fc JJ |
454 | } |
455 | } | |
456 | ||
e9174d1e SL |
457 | impl<'a> Chars<'a> { |
458 | /// View the underlying data as a subslice of the original data. | |
459 | /// | |
460 | /// This has the same lifetime as the original slice, and so the | |
461 | /// iterator can continue to be used while this exists. | |
5bcae85e SL |
462 | /// |
463 | /// # Examples | |
464 | /// | |
465 | /// ``` | |
466 | /// let mut chars = "abc".chars(); | |
467 | /// | |
468 | /// assert_eq!(chars.as_str(), "abc"); | |
469 | /// chars.next(); | |
470 | /// assert_eq!(chars.as_str(), "bc"); | |
471 | /// chars.next(); | |
472 | /// chars.next(); | |
473 | /// assert_eq!(chars.as_str(), ""); | |
474 | /// ``` | |
e9174d1e SL |
475 | #[stable(feature = "iter_to_slice", since = "1.4.0")] |
476 | #[inline] | |
477 | pub fn as_str(&self) -> &'a str { | |
478 | unsafe { from_utf8_unchecked(self.iter.as_slice()) } | |
479 | } | |
480 | } | |
481 | ||
9346a6ac | 482 | /// Iterator for a string's characters and their byte offsets. |
54a0048b | 483 | #[derive(Clone, Debug)] |
85aaf69f | 484 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 485 | pub struct CharIndices<'a> { |
85aaf69f | 486 | front_offset: usize, |
1a4d82fc JJ |
487 | iter: Chars<'a>, |
488 | } | |
489 | ||
85aaf69f | 490 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 491 | impl<'a> Iterator for CharIndices<'a> { |
85aaf69f | 492 | type Item = (usize, char); |
1a4d82fc JJ |
493 | |
494 | #[inline] | |
85aaf69f | 495 | fn next(&mut self) -> Option<(usize, char)> { |
3157f602 | 496 | let pre_len = self.iter.iter.len(); |
1a4d82fc JJ |
497 | match self.iter.next() { |
498 | None => None, | |
499 | Some(ch) => { | |
500 | let index = self.front_offset; | |
3157f602 | 501 | let len = self.iter.iter.len(); |
1a4d82fc JJ |
502 | self.front_offset += pre_len - len; |
503 | Some((index, ch)) | |
504 | } | |
505 | } | |
506 | } | |
507 | ||
508 | #[inline] | |
85aaf69f | 509 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
510 | self.iter.size_hint() |
511 | } | |
512 | } | |
513 | ||
85aaf69f | 514 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
515 | impl<'a> DoubleEndedIterator for CharIndices<'a> { |
516 | #[inline] | |
85aaf69f | 517 | fn next_back(&mut self) -> Option<(usize, char)> { |
1a4d82fc JJ |
518 | match self.iter.next_back() { |
519 | None => None, | |
520 | Some(ch) => { | |
3157f602 | 521 | let index = self.front_offset + self.iter.iter.len(); |
1a4d82fc JJ |
522 | Some((index, ch)) |
523 | } | |
524 | } | |
525 | } | |
526 | } | |
527 | ||
e9174d1e SL |
528 | impl<'a> CharIndices<'a> { |
529 | /// View the underlying data as a subslice of the original data. | |
530 | /// | |
531 | /// This has the same lifetime as the original slice, and so the | |
532 | /// iterator can continue to be used while this exists. | |
533 | #[stable(feature = "iter_to_slice", since = "1.4.0")] | |
534 | #[inline] | |
535 | pub fn as_str(&self) -> &'a str { | |
536 | self.iter.as_str() | |
537 | } | |
538 | } | |
539 | ||
1a4d82fc JJ |
540 | /// External iterator for a string's bytes. |
541 | /// Use with the `std::iter` module. | |
542 | /// | |
9cc50fc6 SL |
543 | /// Created with the method [`bytes()`]. |
544 | /// | |
54a0048b | 545 | /// [`bytes()`]: ../../std/primitive.str.html#method.bytes |
85aaf69f | 546 | #[stable(feature = "rust1", since = "1.0.0")] |
54a0048b | 547 | #[derive(Clone, Debug)] |
e9174d1e | 548 | pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>); |
1a4d82fc | 549 | |
e9174d1e SL |
550 | #[stable(feature = "rust1", since = "1.0.0")] |
551 | impl<'a> Iterator for Bytes<'a> { | |
552 | type Item = u8; | |
1a4d82fc | 553 | |
1a4d82fc | 554 | #[inline] |
e9174d1e SL |
555 | fn next(&mut self) -> Option<u8> { |
556 | self.0.next() | |
1a4d82fc | 557 | } |
1a4d82fc | 558 | |
c34b1796 | 559 | #[inline] |
e9174d1e SL |
560 | fn size_hint(&self) -> (usize, Option<usize>) { |
561 | self.0.size_hint() | |
c34b1796 | 562 | } |
c34b1796 AL |
563 | |
564 | #[inline] | |
e9174d1e SL |
565 | fn count(self) -> usize { |
566 | self.0.count() | |
c34b1796 | 567 | } |
9346a6ac AL |
568 | |
569 | #[inline] | |
e9174d1e SL |
570 | fn last(self) -> Option<Self::Item> { |
571 | self.0.last() | |
9346a6ac AL |
572 | } |
573 | ||
574 | #[inline] | |
e9174d1e SL |
575 | fn nth(&mut self, n: usize) -> Option<Self::Item> { |
576 | self.0.nth(n) | |
9346a6ac | 577 | } |
1a4d82fc JJ |
578 | } |
579 | ||
9346a6ac AL |
580 | #[stable(feature = "rust1", since = "1.0.0")] |
581 | impl<'a> DoubleEndedIterator for Bytes<'a> { | |
582 | #[inline] | |
583 | fn next_back(&mut self) -> Option<u8> { | |
584 | self.0.next_back() | |
585 | } | |
c34b1796 AL |
586 | } |
587 | ||
9346a6ac AL |
588 | #[stable(feature = "rust1", since = "1.0.0")] |
589 | impl<'a> ExactSizeIterator for Bytes<'a> { | |
590 | #[inline] | |
591 | fn len(&self) -> usize { | |
592 | self.0.len() | |
593 | } | |
c34b1796 AL |
594 | } |
595 | ||
9346a6ac AL |
596 | /// This macro generates a Clone impl for string pattern API |
597 | /// wrapper types of the form X<'a, P> | |
598 | macro_rules! derive_pattern_clone { | |
599 | (clone $t:ident with |$s:ident| $e:expr) => { | |
600 | impl<'a, P: Pattern<'a>> Clone for $t<'a, P> | |
601 | where P::Searcher: Clone | |
602 | { | |
603 | fn clone(&self) -> Self { | |
604 | let $s = self; | |
605 | $e | |
606 | } | |
607 | } | |
608 | } | |
1a4d82fc JJ |
609 | } |
610 | ||
9346a6ac | 611 | /// This macro generates two public iterator structs |
b039eaaf | 612 | /// wrapping a private internal one that makes use of the `Pattern` API. |
9346a6ac AL |
613 | /// |
614 | /// For all patterns `P: Pattern<'a>` the following items will be | |
d9579d0f | 615 | /// generated (generics omitted): |
9346a6ac AL |
616 | /// |
617 | /// struct $forward_iterator($internal_iterator); | |
618 | /// struct $reverse_iterator($internal_iterator); | |
619 | /// | |
620 | /// impl Iterator for $forward_iterator | |
621 | /// { /* internal ends up calling Searcher::next_match() */ } | |
622 | /// | |
623 | /// impl DoubleEndedIterator for $forward_iterator | |
624 | /// where P::Searcher: DoubleEndedSearcher | |
625 | /// { /* internal ends up calling Searcher::next_match_back() */ } | |
626 | /// | |
627 | /// impl Iterator for $reverse_iterator | |
628 | /// where P::Searcher: ReverseSearcher | |
629 | /// { /* internal ends up calling Searcher::next_match_back() */ } | |
630 | /// | |
631 | /// impl DoubleEndedIterator for $reverse_iterator | |
632 | /// where P::Searcher: DoubleEndedSearcher | |
633 | /// { /* internal ends up calling Searcher::next_match() */ } | |
634 | /// | |
635 | /// The internal one is defined outside the macro, and has almost the same | |
636 | /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and | |
637 | /// `pattern::ReverseSearcher` for both forward and reverse iteration. | |
638 | /// | |
639 | /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given | |
640 | /// `Pattern` might not return the same elements, so actually implementing | |
641 | /// `DoubleEndedIterator` for it would be incorrect. | |
642 | /// (See the docs in `str::pattern` for more details) | |
643 | /// | |
644 | /// However, the internal struct still represents a single ended iterator from | |
645 | /// either end, and depending on pattern is also a valid double ended iterator, | |
646 | /// so the two wrapper structs implement `Iterator` | |
647 | /// and `DoubleEndedIterator` depending on the concrete pattern type, leading | |
648 | /// to the complex impls seen above. | |
649 | macro_rules! generate_pattern_iterators { | |
650 | { | |
651 | // Forward iterator | |
652 | forward: | |
653 | $(#[$forward_iterator_attribute:meta])* | |
654 | struct $forward_iterator:ident; | |
655 | ||
656 | // Reverse iterator | |
657 | reverse: | |
658 | $(#[$reverse_iterator_attribute:meta])* | |
659 | struct $reverse_iterator:ident; | |
660 | ||
661 | // Stability of all generated items | |
662 | stability: | |
663 | $(#[$common_stability_attribute:meta])* | |
664 | ||
665 | // Internal almost-iterator that is being delegated to | |
666 | internal: | |
667 | $internal_iterator:ident yielding ($iterty:ty); | |
668 | ||
669 | // Kind of delgation - either single ended or double ended | |
670 | delegate $($t:tt)* | |
671 | } => { | |
672 | $(#[$forward_iterator_attribute])* | |
673 | $(#[$common_stability_attribute])* | |
674 | pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); | |
675 | ||
54a0048b SL |
676 | $(#[$common_stability_attribute])* |
677 | impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P> | |
678 | where P::Searcher: fmt::Debug | |
679 | { | |
680 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
681 | f.debug_tuple(stringify!($forward_iterator)) | |
682 | .field(&self.0) | |
683 | .finish() | |
684 | } | |
685 | } | |
686 | ||
9346a6ac AL |
687 | $(#[$common_stability_attribute])* |
688 | impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> { | |
689 | type Item = $iterty; | |
690 | ||
691 | #[inline] | |
692 | fn next(&mut self) -> Option<$iterty> { | |
693 | self.0.next() | |
694 | } | |
695 | } | |
696 | ||
697 | $(#[$common_stability_attribute])* | |
698 | impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P> | |
699 | where P::Searcher: Clone | |
700 | { | |
701 | fn clone(&self) -> Self { | |
702 | $forward_iterator(self.0.clone()) | |
703 | } | |
704 | } | |
705 | ||
706 | $(#[$reverse_iterator_attribute])* | |
707 | $(#[$common_stability_attribute])* | |
708 | pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); | |
709 | ||
54a0048b SL |
710 | $(#[$common_stability_attribute])* |
711 | impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P> | |
712 | where P::Searcher: fmt::Debug | |
713 | { | |
714 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
715 | f.debug_tuple(stringify!($reverse_iterator)) | |
716 | .field(&self.0) | |
717 | .finish() | |
718 | } | |
719 | } | |
720 | ||
9346a6ac AL |
721 | $(#[$common_stability_attribute])* |
722 | impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P> | |
723 | where P::Searcher: ReverseSearcher<'a> | |
724 | { | |
725 | type Item = $iterty; | |
726 | ||
727 | #[inline] | |
728 | fn next(&mut self) -> Option<$iterty> { | |
729 | self.0.next_back() | |
730 | } | |
731 | } | |
732 | ||
733 | $(#[$common_stability_attribute])* | |
734 | impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P> | |
735 | where P::Searcher: Clone | |
736 | { | |
737 | fn clone(&self) -> Self { | |
738 | $reverse_iterator(self.0.clone()) | |
739 | } | |
740 | } | |
741 | ||
742 | generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*, | |
743 | $forward_iterator, | |
744 | $reverse_iterator, $iterty); | |
745 | }; | |
746 | { | |
747 | double ended; with $(#[$common_stability_attribute:meta])*, | |
748 | $forward_iterator:ident, | |
749 | $reverse_iterator:ident, $iterty:ty | |
750 | } => { | |
751 | $(#[$common_stability_attribute])* | |
752 | impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P> | |
753 | where P::Searcher: DoubleEndedSearcher<'a> | |
754 | { | |
755 | #[inline] | |
756 | fn next_back(&mut self) -> Option<$iterty> { | |
757 | self.0.next_back() | |
758 | } | |
759 | } | |
760 | ||
761 | $(#[$common_stability_attribute])* | |
762 | impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P> | |
763 | where P::Searcher: DoubleEndedSearcher<'a> | |
764 | { | |
765 | #[inline] | |
766 | fn next_back(&mut self) -> Option<$iterty> { | |
767 | self.0.next() | |
768 | } | |
769 | } | |
770 | }; | |
771 | { | |
772 | single ended; with $(#[$common_stability_attribute:meta])*, | |
773 | $forward_iterator:ident, | |
774 | $reverse_iterator:ident, $iterty:ty | |
775 | } => {} | |
1a4d82fc JJ |
776 | } |
777 | ||
9346a6ac AL |
778 | derive_pattern_clone!{ |
779 | clone SplitInternal | |
780 | with |s| SplitInternal { matcher: s.matcher.clone(), ..*s } | |
781 | } | |
54a0048b | 782 | |
9346a6ac AL |
783 | struct SplitInternal<'a, P: Pattern<'a>> { |
784 | start: usize, | |
785 | end: usize, | |
786 | matcher: P::Searcher, | |
787 | allow_trailing_empty: bool, | |
788 | finished: bool, | |
1a4d82fc JJ |
789 | } |
790 | ||
54a0048b SL |
791 | impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug { |
792 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
793 | f.debug_struct("SplitInternal") | |
794 | .field("start", &self.start) | |
795 | .field("end", &self.end) | |
796 | .field("matcher", &self.matcher) | |
797 | .field("allow_trailing_empty", &self.allow_trailing_empty) | |
798 | .field("finished", &self.finished) | |
799 | .finish() | |
800 | } | |
801 | } | |
802 | ||
9346a6ac | 803 | impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { |
1a4d82fc JJ |
804 | #[inline] |
805 | fn get_end(&mut self) -> Option<&'a str> { | |
c34b1796 | 806 | if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) { |
1a4d82fc | 807 | self.finished = true; |
c34b1796 AL |
808 | unsafe { |
809 | let string = self.matcher.haystack().slice_unchecked(self.start, self.end); | |
810 | Some(string) | |
811 | } | |
1a4d82fc JJ |
812 | } else { |
813 | None | |
814 | } | |
815 | } | |
1a4d82fc JJ |
816 | |
817 | #[inline] | |
818 | fn next(&mut self) -> Option<&'a str> { | |
819 | if self.finished { return None } | |
820 | ||
c34b1796 AL |
821 | let haystack = self.matcher.haystack(); |
822 | match self.matcher.next_match() { | |
1a4d82fc | 823 | Some((a, b)) => unsafe { |
c34b1796 AL |
824 | let elt = haystack.slice_unchecked(self.start, a); |
825 | self.start = b; | |
1a4d82fc JJ |
826 | Some(elt) |
827 | }, | |
828 | None => self.get_end(), | |
829 | } | |
830 | } | |
1a4d82fc | 831 | |
1a4d82fc | 832 | #[inline] |
9346a6ac AL |
833 | fn next_back(&mut self) -> Option<&'a str> |
834 | where P::Searcher: ReverseSearcher<'a> | |
835 | { | |
1a4d82fc JJ |
836 | if self.finished { return None } |
837 | ||
838 | if !self.allow_trailing_empty { | |
839 | self.allow_trailing_empty = true; | |
840 | match self.next_back() { | |
841 | Some(elt) if !elt.is_empty() => return Some(elt), | |
842 | _ => if self.finished { return None } | |
843 | } | |
844 | } | |
c34b1796 AL |
845 | |
846 | let haystack = self.matcher.haystack(); | |
847 | match self.matcher.next_match_back() { | |
1a4d82fc | 848 | Some((a, b)) => unsafe { |
c34b1796 AL |
849 | let elt = haystack.slice_unchecked(b, self.end); |
850 | self.end = a; | |
1a4d82fc JJ |
851 | Some(elt) |
852 | }, | |
c34b1796 AL |
853 | None => unsafe { |
854 | self.finished = true; | |
855 | Some(haystack.slice_unchecked(self.start, self.end)) | |
856 | }, | |
1a4d82fc JJ |
857 | } |
858 | } | |
859 | } | |
860 | ||
9346a6ac AL |
861 | generate_pattern_iterators! { |
862 | forward: | |
9cc50fc6 SL |
863 | /// Created with the method [`split()`]. |
864 | /// | |
54a0048b | 865 | /// [`split()`]: ../../std/primitive.str.html#method.split |
9346a6ac AL |
866 | struct Split; |
867 | reverse: | |
9cc50fc6 SL |
868 | /// Created with the method [`rsplit()`]. |
869 | /// | |
54a0048b | 870 | /// [`rsplit()`]: ../../std/primitive.str.html#method.rsplit |
9346a6ac AL |
871 | struct RSplit; |
872 | stability: | |
873 | #[stable(feature = "rust1", since = "1.0.0")] | |
874 | internal: | |
875 | SplitInternal yielding (&'a str); | |
876 | delegate double ended; | |
877 | } | |
878 | ||
879 | generate_pattern_iterators! { | |
880 | forward: | |
9cc50fc6 SL |
881 | /// Created with the method [`split_terminator()`]. |
882 | /// | |
54a0048b | 883 | /// [`split_terminator()`]: ../../std/primitive.str.html#method.split_terminator |
9346a6ac AL |
884 | struct SplitTerminator; |
885 | reverse: | |
9cc50fc6 SL |
886 | /// Created with the method [`rsplit_terminator()`]. |
887 | /// | |
54a0048b | 888 | /// [`rsplit_terminator()`]: ../../std/primitive.str.html#method.rsplit_terminator |
9346a6ac AL |
889 | struct RSplitTerminator; |
890 | stability: | |
891 | #[stable(feature = "rust1", since = "1.0.0")] | |
892 | internal: | |
893 | SplitInternal yielding (&'a str); | |
894 | delegate double ended; | |
895 | } | |
1a4d82fc | 896 | |
9346a6ac AL |
897 | derive_pattern_clone!{ |
898 | clone SplitNInternal | |
899 | with |s| SplitNInternal { iter: s.iter.clone(), ..*s } | |
900 | } | |
54a0048b | 901 | |
9346a6ac AL |
902 | struct SplitNInternal<'a, P: Pattern<'a>> { |
903 | iter: SplitInternal<'a, P>, | |
904 | /// The number of splits remaining | |
905 | count: usize, | |
906 | } | |
907 | ||
54a0048b SL |
908 | impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug { |
909 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
910 | f.debug_struct("SplitNInternal") | |
911 | .field("iter", &self.iter) | |
912 | .field("count", &self.count) | |
913 | .finish() | |
914 | } | |
915 | } | |
916 | ||
9346a6ac | 917 | impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> { |
1a4d82fc JJ |
918 | #[inline] |
919 | fn next(&mut self) -> Option<&'a str> { | |
c34b1796 AL |
920 | match self.count { |
921 | 0 => None, | |
922 | 1 => { self.count = 0; self.iter.get_end() } | |
923 | _ => { self.count -= 1; self.iter.next() } | |
1a4d82fc JJ |
924 | } |
925 | } | |
1a4d82fc | 926 | |
c34b1796 | 927 | #[inline] |
9346a6ac AL |
928 | fn next_back(&mut self) -> Option<&'a str> |
929 | where P::Searcher: ReverseSearcher<'a> | |
930 | { | |
931 | match self.count { | |
932 | 0 => None, | |
933 | 1 => { self.count = 0; self.iter.get_end() } | |
934 | _ => { self.count -= 1; self.iter.next_back() } | |
c34b1796 AL |
935 | } |
936 | } | |
1a4d82fc JJ |
937 | } |
938 | ||
9346a6ac AL |
939 | generate_pattern_iterators! { |
940 | forward: | |
9cc50fc6 SL |
941 | /// Created with the method [`splitn()`]. |
942 | /// | |
54a0048b | 943 | /// [`splitn()`]: ../../std/primitive.str.html#method.splitn |
9346a6ac AL |
944 | struct SplitN; |
945 | reverse: | |
9cc50fc6 SL |
946 | /// Created with the method [`rsplitn()`]. |
947 | /// | |
54a0048b | 948 | /// [`rsplitn()`]: ../../std/primitive.str.html#method.rsplitn |
9346a6ac AL |
949 | struct RSplitN; |
950 | stability: | |
951 | #[stable(feature = "rust1", since = "1.0.0")] | |
952 | internal: | |
953 | SplitNInternal yielding (&'a str); | |
954 | delegate single ended; | |
955 | } | |
956 | ||
957 | derive_pattern_clone!{ | |
958 | clone MatchIndicesInternal | |
959 | with |s| MatchIndicesInternal(s.0.clone()) | |
960 | } | |
54a0048b | 961 | |
9346a6ac AL |
962 | struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher); |
963 | ||
54a0048b SL |
964 | impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug { |
965 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
966 | f.debug_tuple("MatchIndicesInternal") | |
967 | .field(&self.0) | |
968 | .finish() | |
969 | } | |
970 | } | |
971 | ||
9346a6ac AL |
972 | impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> { |
973 | #[inline] | |
b039eaaf SL |
974 | fn next(&mut self) -> Option<(usize, &'a str)> { |
975 | self.0.next_match().map(|(start, end)| unsafe { | |
976 | (start, self.0.haystack().slice_unchecked(start, end)) | |
977 | }) | |
9346a6ac AL |
978 | } |
979 | ||
980 | #[inline] | |
b039eaaf | 981 | fn next_back(&mut self) -> Option<(usize, &'a str)> |
9346a6ac AL |
982 | where P::Searcher: ReverseSearcher<'a> |
983 | { | |
b039eaaf SL |
984 | self.0.next_match_back().map(|(start, end)| unsafe { |
985 | (start, self.0.haystack().slice_unchecked(start, end)) | |
986 | }) | |
9346a6ac AL |
987 | } |
988 | } | |
989 | ||
990 | generate_pattern_iterators! { | |
991 | forward: | |
9cc50fc6 SL |
992 | /// Created with the method [`match_indices()`]. |
993 | /// | |
54a0048b | 994 | /// [`match_indices()`]: ../../std/primitive.str.html#method.match_indices |
9346a6ac AL |
995 | struct MatchIndices; |
996 | reverse: | |
9cc50fc6 SL |
997 | /// Created with the method [`rmatch_indices()`]. |
998 | /// | |
54a0048b | 999 | /// [`rmatch_indices()`]: ../../std/primitive.str.html#method.rmatch_indices |
9346a6ac AL |
1000 | struct RMatchIndices; |
1001 | stability: | |
b039eaaf | 1002 | #[stable(feature = "str_match_indices", since = "1.5.0")] |
9346a6ac | 1003 | internal: |
b039eaaf | 1004 | MatchIndicesInternal yielding ((usize, &'a str)); |
9346a6ac AL |
1005 | delegate double ended; |
1006 | } | |
1007 | ||
1008 | derive_pattern_clone!{ | |
1009 | clone MatchesInternal | |
1010 | with |s| MatchesInternal(s.0.clone()) | |
1011 | } | |
54a0048b | 1012 | |
9346a6ac AL |
1013 | struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher); |
1014 | ||
54a0048b SL |
1015 | impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug { |
1016 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
1017 | f.debug_tuple("MatchesInternal") | |
1018 | .field(&self.0) | |
1019 | .finish() | |
1020 | } | |
1021 | } | |
1022 | ||
9346a6ac AL |
1023 | impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> { |
1024 | #[inline] | |
1025 | fn next(&mut self) -> Option<&'a str> { | |
1026 | self.0.next_match().map(|(a, b)| unsafe { | |
1027 | // Indices are known to be on utf8 boundaries | |
1028 | self.0.haystack().slice_unchecked(a, b) | |
1029 | }) | |
1030 | } | |
1031 | ||
1032 | #[inline] | |
1033 | fn next_back(&mut self) -> Option<&'a str> | |
1034 | where P::Searcher: ReverseSearcher<'a> | |
1035 | { | |
1036 | self.0.next_match_back().map(|(a, b)| unsafe { | |
1037 | // Indices are known to be on utf8 boundaries | |
1038 | self.0.haystack().slice_unchecked(a, b) | |
1039 | }) | |
1040 | } | |
1041 | } | |
1042 | ||
1043 | generate_pattern_iterators! { | |
1044 | forward: | |
9cc50fc6 SL |
1045 | /// Created with the method [`matches()`]. |
1046 | /// | |
54a0048b | 1047 | /// [`matches()`]: ../../std/primitive.str.html#method.matches |
9346a6ac AL |
1048 | struct Matches; |
1049 | reverse: | |
9cc50fc6 SL |
1050 | /// Created with the method [`rmatches()`]. |
1051 | /// | |
54a0048b | 1052 | /// [`rmatches()`]: ../../std/primitive.str.html#method.rmatches |
9346a6ac AL |
1053 | struct RMatches; |
1054 | stability: | |
62682a34 | 1055 | #[stable(feature = "str_matches", since = "1.2.0")] |
9346a6ac AL |
1056 | internal: |
1057 | MatchesInternal yielding (&'a str); | |
1058 | delegate double ended; | |
1059 | } | |
1060 | ||
9cc50fc6 SL |
1061 | /// Created with the method [`lines()`]. |
1062 | /// | |
54a0048b | 1063 | /// [`lines()`]: ../../std/primitive.str.html#method.lines |
c34b1796 | 1064 | #[stable(feature = "rust1", since = "1.0.0")] |
54a0048b | 1065 | #[derive(Clone, Debug)] |
e9174d1e | 1066 | pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>); |
9346a6ac AL |
1067 | |
1068 | #[stable(feature = "rust1", since = "1.0.0")] | |
1069 | impl<'a> Iterator for Lines<'a> { | |
c34b1796 AL |
1070 | type Item = &'a str; |
1071 | ||
1072 | #[inline] | |
1073 | fn next(&mut self) -> Option<&'a str> { | |
9346a6ac AL |
1074 | self.0.next() |
1075 | } | |
c34b1796 | 1076 | |
9346a6ac AL |
1077 | #[inline] |
1078 | fn size_hint(&self) -> (usize, Option<usize>) { | |
1079 | self.0.size_hint() | |
1080 | } | |
1081 | } | |
1082 | ||
1083 | #[stable(feature = "rust1", since = "1.0.0")] | |
1084 | impl<'a> DoubleEndedIterator for Lines<'a> { | |
1085 | #[inline] | |
1086 | fn next_back(&mut self) -> Option<&'a str> { | |
1087 | self.0.next_back() | |
1088 | } | |
1089 | } | |
1090 | ||
9cc50fc6 SL |
1091 | /// Created with the method [`lines_any()`]. |
1092 | /// | |
54a0048b | 1093 | /// [`lines_any()`]: ../../std/primitive.str.html#method.lines_any |
9346a6ac | 1094 | #[stable(feature = "rust1", since = "1.0.0")] |
92a42be0 | 1095 | #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")] |
54a0048b | 1096 | #[derive(Clone, Debug)] |
e9174d1e SL |
1097 | #[allow(deprecated)] |
1098 | pub struct LinesAny<'a>(Lines<'a>); | |
9346a6ac | 1099 | |
b039eaaf | 1100 | /// A nameable, cloneable fn type |
9346a6ac AL |
1101 | #[derive(Clone)] |
1102 | struct LinesAnyMap; | |
1103 | ||
1104 | impl<'a> Fn<(&'a str,)> for LinesAnyMap { | |
1105 | #[inline] | |
1106 | extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str { | |
1107 | let l = line.len(); | |
1108 | if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] } | |
1109 | else { line } | |
1110 | } | |
1111 | } | |
1112 | ||
1113 | impl<'a> FnMut<(&'a str,)> for LinesAnyMap { | |
1114 | #[inline] | |
1115 | extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str { | |
1116 | Fn::call(&*self, (line,)) | |
1117 | } | |
1118 | } | |
1119 | ||
1120 | impl<'a> FnOnce<(&'a str,)> for LinesAnyMap { | |
1121 | type Output = &'a str; | |
1122 | ||
1123 | #[inline] | |
1124 | extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str { | |
1125 | Fn::call(&self, (line,)) | |
1a4d82fc | 1126 | } |
c34b1796 | 1127 | } |
1a4d82fc | 1128 | |
c34b1796 | 1129 | #[stable(feature = "rust1", since = "1.0.0")] |
e9174d1e | 1130 | #[allow(deprecated)] |
9346a6ac | 1131 | impl<'a> Iterator for LinesAny<'a> { |
c34b1796 AL |
1132 | type Item = &'a str; |
1133 | ||
1134 | #[inline] | |
1135 | fn next(&mut self) -> Option<&'a str> { | |
9346a6ac AL |
1136 | self.0.next() |
1137 | } | |
1138 | ||
1139 | #[inline] | |
1140 | fn size_hint(&self) -> (usize, Option<usize>) { | |
1141 | self.0.size_hint() | |
1142 | } | |
1143 | } | |
1144 | ||
1145 | #[stable(feature = "rust1", since = "1.0.0")] | |
e9174d1e | 1146 | #[allow(deprecated)] |
9346a6ac AL |
1147 | impl<'a> DoubleEndedIterator for LinesAny<'a> { |
1148 | #[inline] | |
1149 | fn next_back(&mut self) -> Option<&'a str> { | |
1150 | self.0.next_back() | |
1a4d82fc JJ |
1151 | } |
1152 | } | |
1153 | ||
1a4d82fc JJ |
1154 | /* |
1155 | Section: Comparing strings | |
1156 | */ | |
1157 | ||
c1a9b12d | 1158 | /// Bytewise slice equality |
1a4d82fc JJ |
1159 | /// NOTE: This function is (ab)used in rustc::middle::trans::_match |
1160 | /// to compare &[u8] byte slices that are not necessarily valid UTF-8. | |
c1a9b12d | 1161 | #[lang = "str_eq"] |
1a4d82fc | 1162 | #[inline] |
c1a9b12d | 1163 | fn eq_slice(a: &str, b: &str) -> bool { |
54a0048b | 1164 | a.as_bytes() == b.as_bytes() |
1a4d82fc JJ |
1165 | } |
1166 | ||
1a4d82fc | 1167 | /* |
9cc50fc6 | 1168 | Section: UTF-8 validation |
1a4d82fc JJ |
1169 | */ |
1170 | ||
9cc50fc6 SL |
1171 | // use truncation to fit u64 into usize |
1172 | const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize; | |
1173 | ||
1174 | /// Return `true` if any byte in the word `x` is nonascii (>= 128). | |
1175 | #[inline] | |
1176 | fn contains_nonascii(x: usize) -> bool { | |
1177 | (x & NONASCII_MASK) != 0 | |
1178 | } | |
1179 | ||
1a4d82fc JJ |
1180 | /// Walk through `iter` checking that it's a valid UTF-8 sequence, |
1181 | /// returning `true` in that case, or, if it is invalid, `false` with | |
1182 | /// `iter` reset such that it is pointing at the first byte in the | |
1183 | /// invalid sequence. | |
1184 | #[inline(always)] | |
9cc50fc6 SL |
1185 | fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { |
1186 | let mut offset = 0; | |
1187 | let len = v.len(); | |
1188 | while offset < len { | |
1189 | let old_offset = offset; | |
1a4d82fc | 1190 | macro_rules! err { () => {{ |
9346a6ac | 1191 | return Err(Utf8Error { |
9cc50fc6 | 1192 | valid_up_to: old_offset |
9346a6ac | 1193 | }) |
1a4d82fc JJ |
1194 | }}} |
1195 | ||
9cc50fc6 SL |
1196 | macro_rules! next { () => {{ |
1197 | offset += 1; | |
1198 | // we needed data, but there was none: error! | |
1199 | if offset >= len { | |
1200 | err!() | |
1a4d82fc | 1201 | } |
9cc50fc6 SL |
1202 | v[offset] |
1203 | }}} | |
1a4d82fc | 1204 | |
9cc50fc6 | 1205 | let first = v[offset]; |
1a4d82fc | 1206 | if first >= 128 { |
c34b1796 | 1207 | let w = UTF8_CHAR_WIDTH[first as usize]; |
1a4d82fc JJ |
1208 | let second = next!(); |
1209 | // 2-byte encoding is for codepoints \u{0080} to \u{07ff} | |
1210 | // first C2 80 last DF BF | |
1211 | // 3-byte encoding is for codepoints \u{0800} to \u{ffff} | |
1212 | // first E0 A0 80 last EF BF BF | |
1213 | // excluding surrogates codepoints \u{d800} to \u{dfff} | |
1214 | // ED A0 80 to ED BF BF | |
1215 | // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff | |
1216 | // first F0 90 80 80 last F4 8F BF BF | |
1217 | // | |
1218 | // Use the UTF-8 syntax from the RFC | |
1219 | // | |
1220 | // https://tools.ietf.org/html/rfc3629 | |
1221 | // UTF8-1 = %x00-7F | |
1222 | // UTF8-2 = %xC2-DF UTF8-tail | |
1223 | // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / | |
1224 | // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) | |
1225 | // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / | |
1226 | // %xF4 %x80-8F 2( UTF8-tail ) | |
1227 | match w { | |
1228 | 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()}, | |
1229 | 3 => { | |
1230 | match (first, second, next!() & !CONT_MASK) { | |
1231 | (0xE0 , 0xA0 ... 0xBF, TAG_CONT_U8) | | |
1232 | (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) | | |
1233 | (0xED , 0x80 ... 0x9F, TAG_CONT_U8) | | |
1234 | (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {} | |
1235 | _ => err!() | |
1236 | } | |
1237 | } | |
1238 | 4 => { | |
1239 | match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) { | |
1240 | (0xF0 , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) | | |
1241 | (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) | | |
1242 | (0xF4 , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {} | |
1243 | _ => err!() | |
1244 | } | |
1245 | } | |
1246 | _ => err!() | |
1247 | } | |
9cc50fc6 SL |
1248 | offset += 1; |
1249 | } else { | |
1250 | // Ascii case, try to skip forward quickly. | |
1251 | // When the pointer is aligned, read 2 words of data per iteration | |
1252 | // until we find a word containing a non-ascii byte. | |
1253 | let usize_bytes = mem::size_of::<usize>(); | |
1254 | let bytes_per_iteration = 2 * usize_bytes; | |
1255 | let ptr = v.as_ptr(); | |
1256 | let align = (ptr as usize + offset) & (usize_bytes - 1); | |
1257 | if align == 0 { | |
1258 | if len >= bytes_per_iteration { | |
1259 | while offset <= len - bytes_per_iteration { | |
1260 | unsafe { | |
1261 | let u = *(ptr.offset(offset as isize) as *const usize); | |
1262 | let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize); | |
1263 | ||
1264 | // break if there is a nonascii byte | |
1265 | let zu = contains_nonascii(u); | |
1266 | let zv = contains_nonascii(v); | |
1267 | if zu || zv { | |
1268 | break; | |
1269 | } | |
1270 | } | |
1271 | offset += bytes_per_iteration; | |
1272 | } | |
1273 | } | |
1274 | // step from the point where the wordwise loop stopped | |
1275 | while offset < len && v[offset] < 128 { | |
1276 | offset += 1; | |
1277 | } | |
1278 | } else { | |
1279 | offset += 1; | |
1280 | } | |
1a4d82fc JJ |
1281 | } |
1282 | } | |
9cc50fc6 SL |
1283 | |
1284 | Ok(()) | |
1a4d82fc JJ |
1285 | } |
1286 | ||
1287 | // https://tools.ietf.org/html/rfc3629 | |
1288 | static UTF8_CHAR_WIDTH: [u8; 256] = [ | |
1289 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1290 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F | |
1291 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1292 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F | |
1293 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1294 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F | |
1295 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1296 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F | |
1297 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
1298 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F | |
1299 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
1300 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF | |
1301 | 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
1302 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF | |
1303 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF | |
1304 | 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF | |
1305 | ]; | |
1306 | ||
1a4d82fc | 1307 | /// Mask of the value bits of a continuation byte |
c34b1796 | 1308 | const CONT_MASK: u8 = 0b0011_1111; |
1a4d82fc | 1309 | /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte |
c34b1796 | 1310 | const TAG_CONT_U8: u8 = 0b1000_0000; |
1a4d82fc JJ |
1311 | |
1312 | /* | |
1313 | Section: Trait implementations | |
1314 | */ | |
1315 | ||
1316 | mod traits { | |
54a0048b | 1317 | use cmp::{Ord, Ordering, PartialEq, PartialOrd, Eq}; |
1a4d82fc JJ |
1318 | use option::Option; |
1319 | use option::Option::Some; | |
1320 | use ops; | |
1321 | use str::{StrExt, eq_slice}; | |
1322 | ||
85aaf69f | 1323 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1324 | impl Ord for str { |
1325 | #[inline] | |
1326 | fn cmp(&self, other: &str) -> Ordering { | |
54a0048b | 1327 | self.as_bytes().cmp(other.as_bytes()) |
1a4d82fc JJ |
1328 | } |
1329 | } | |
1330 | ||
85aaf69f | 1331 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1332 | impl PartialEq for str { |
1333 | #[inline] | |
1334 | fn eq(&self, other: &str) -> bool { | |
1335 | eq_slice(self, other) | |
1336 | } | |
1337 | #[inline] | |
1338 | fn ne(&self, other: &str) -> bool { !(*self).eq(other) } | |
1339 | } | |
1340 | ||
85aaf69f | 1341 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1342 | impl Eq for str {} |
1343 | ||
85aaf69f | 1344 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1345 | impl PartialOrd for str { |
1346 | #[inline] | |
1347 | fn partial_cmp(&self, other: &str) -> Option<Ordering> { | |
1348 | Some(self.cmp(other)) | |
1349 | } | |
1350 | } | |
1351 | ||
54a0048b SL |
1352 | /// Implements substring slicing with syntax `&self[begin .. end]`. |
1353 | /// | |
85aaf69f SL |
1354 | /// Returns a slice of the given string from the byte range |
1355 | /// [`begin`..`end`). | |
1356 | /// | |
1357 | /// This operation is `O(1)`. | |
1358 | /// | |
54a0048b SL |
1359 | /// # Panics |
1360 | /// | |
1361 | /// Panics if `begin` or `end` does not point to the starting | |
1362 | /// byte offset of a character (as defined by `is_char_boundary`). | |
1363 | /// Requires that `begin <= end` and `end <= len` where `len` is the | |
1364 | /// length of the string. | |
85aaf69f | 1365 | /// |
c34b1796 | 1366 | /// # Examples |
85aaf69f | 1367 | /// |
c34b1796 | 1368 | /// ``` |
85aaf69f SL |
1369 | /// let s = "Löwe 老虎 Léopard"; |
1370 | /// assert_eq!(&s[0 .. 1], "L"); | |
1371 | /// | |
1372 | /// assert_eq!(&s[1 .. 9], "öwe 老"); | |
1373 | /// | |
1374 | /// // these will panic: | |
1375 | /// // byte 2 lies within `ö`: | |
1376 | /// // &s[2 ..3]; | |
1377 | /// | |
1378 | /// // byte 8 lies within `老` | |
1379 | /// // &s[1 .. 8]; | |
1380 | /// | |
1381 | /// // byte 100 is outside the string | |
1382 | /// // &s[3 .. 100]; | |
1383 | /// ``` | |
1384 | #[stable(feature = "rust1", since = "1.0.0")] | |
1385 | impl ops::Index<ops::Range<usize>> for str { | |
1a4d82fc JJ |
1386 | type Output = str; |
1387 | #[inline] | |
c34b1796 | 1388 | fn index(&self, index: ops::Range<usize>) -> &str { |
85aaf69f SL |
1389 | // is_char_boundary checks that the index is in [0, .len()] |
1390 | if index.start <= index.end && | |
1391 | self.is_char_boundary(index.start) && | |
1392 | self.is_char_boundary(index.end) { | |
1393 | unsafe { self.slice_unchecked(index.start, index.end) } | |
1394 | } else { | |
1395 | super::slice_error_fail(self, index.start, index.end) | |
1396 | } | |
1a4d82fc JJ |
1397 | } |
1398 | } | |
85aaf69f | 1399 | |
54a0048b SL |
1400 | /// Implements mutable substring slicing with syntax |
1401 | /// `&mut self[begin .. end]`. | |
1402 | /// | |
c1a9b12d SL |
1403 | /// Returns a mutable slice of the given string from the byte range |
1404 | /// [`begin`..`end`). | |
54a0048b SL |
1405 | /// |
1406 | /// This operation is `O(1)`. | |
1407 | /// | |
1408 | /// # Panics | |
1409 | /// | |
1410 | /// Panics if `begin` or `end` does not point to the starting | |
1411 | /// byte offset of a character (as defined by `is_char_boundary`). | |
1412 | /// Requires that `begin <= end` and `end <= len` where `len` is the | |
1413 | /// length of the string. | |
c1a9b12d SL |
1414 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1415 | impl ops::IndexMut<ops::Range<usize>> for str { | |
1416 | #[inline] | |
1417 | fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str { | |
1418 | // is_char_boundary checks that the index is in [0, .len()] | |
1419 | if index.start <= index.end && | |
1420 | self.is_char_boundary(index.start) && | |
1421 | self.is_char_boundary(index.end) { | |
1422 | unsafe { self.slice_mut_unchecked(index.start, index.end) } | |
1423 | } else { | |
1424 | super::slice_error_fail(self, index.start, index.end) | |
1425 | } | |
1426 | } | |
1427 | } | |
1428 | ||
54a0048b | 1429 | /// Implements substring slicing with syntax `&self[.. end]`. |
85aaf69f | 1430 | /// |
54a0048b SL |
1431 | /// Returns a slice of the string from the beginning to byte offset |
1432 | /// `end`. | |
85aaf69f | 1433 | /// |
54a0048b | 1434 | /// Equivalent to `&self[0 .. end]`. |
85aaf69f SL |
1435 | #[stable(feature = "rust1", since = "1.0.0")] |
1436 | impl ops::Index<ops::RangeTo<usize>> for str { | |
1a4d82fc | 1437 | type Output = str; |
c34b1796 | 1438 | |
1a4d82fc | 1439 | #[inline] |
c34b1796 | 1440 | fn index(&self, index: ops::RangeTo<usize>) -> &str { |
85aaf69f SL |
1441 | // is_char_boundary checks that the index is in [0, .len()] |
1442 | if self.is_char_boundary(index.end) { | |
1443 | unsafe { self.slice_unchecked(0, index.end) } | |
1444 | } else { | |
1445 | super::slice_error_fail(self, 0, index.end) | |
1446 | } | |
1a4d82fc JJ |
1447 | } |
1448 | } | |
85aaf69f | 1449 | |
54a0048b SL |
1450 | /// Implements mutable substring slicing with syntax `&mut self[.. end]`. |
1451 | /// | |
1452 | /// Returns a mutable slice of the string from the beginning to byte offset | |
c1a9b12d | 1453 | /// `end`. |
54a0048b SL |
1454 | /// |
1455 | /// Equivalent to `&mut self[0 .. end]`. | |
c1a9b12d SL |
1456 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1457 | impl ops::IndexMut<ops::RangeTo<usize>> for str { | |
1458 | #[inline] | |
1459 | fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str { | |
1460 | // is_char_boundary checks that the index is in [0, .len()] | |
1461 | if self.is_char_boundary(index.end) { | |
1462 | unsafe { self.slice_mut_unchecked(0, index.end) } | |
1463 | } else { | |
1464 | super::slice_error_fail(self, 0, index.end) | |
1465 | } | |
1466 | } | |
1467 | } | |
1468 | ||
54a0048b | 1469 | /// Implements substring slicing with syntax `&self[begin ..]`. |
85aaf69f | 1470 | /// |
54a0048b SL |
1471 | /// Returns a slice of the string from byte offset `begin` |
1472 | /// to the end of the string. | |
85aaf69f | 1473 | /// |
54a0048b | 1474 | /// Equivalent to `&self[begin .. len]`. |
85aaf69f SL |
1475 | #[stable(feature = "rust1", since = "1.0.0")] |
1476 | impl ops::Index<ops::RangeFrom<usize>> for str { | |
1a4d82fc | 1477 | type Output = str; |
c34b1796 | 1478 | |
1a4d82fc | 1479 | #[inline] |
c34b1796 | 1480 | fn index(&self, index: ops::RangeFrom<usize>) -> &str { |
85aaf69f SL |
1481 | // is_char_boundary checks that the index is in [0, .len()] |
1482 | if self.is_char_boundary(index.start) { | |
1483 | unsafe { self.slice_unchecked(index.start, self.len()) } | |
1484 | } else { | |
1485 | super::slice_error_fail(self, index.start, self.len()) | |
1486 | } | |
1a4d82fc JJ |
1487 | } |
1488 | } | |
85aaf69f | 1489 | |
54a0048b SL |
1490 | /// Implements mutable substring slicing with syntax `&mut self[begin ..]`. |
1491 | /// | |
1492 | /// Returns a mutable slice of the string from byte offset `begin` | |
1493 | /// to the end of the string. | |
1494 | /// | |
1495 | /// Equivalent to `&mut self[begin .. len]`. | |
c1a9b12d SL |
1496 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1497 | impl ops::IndexMut<ops::RangeFrom<usize>> for str { | |
1498 | #[inline] | |
1499 | fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str { | |
1500 | // is_char_boundary checks that the index is in [0, .len()] | |
1501 | if self.is_char_boundary(index.start) { | |
1502 | let len = self.len(); | |
1503 | unsafe { self.slice_mut_unchecked(index.start, len) } | |
1504 | } else { | |
1505 | super::slice_error_fail(self, index.start, self.len()) | |
1506 | } | |
1507 | } | |
1508 | } | |
1509 | ||
54a0048b SL |
1510 | /// Implements substring slicing with syntax `&self[..]`. |
1511 | /// | |
1512 | /// Returns a slice of the whole string. This operation can | |
1513 | /// never panic. | |
1514 | /// | |
1515 | /// Equivalent to `&self[0 .. len]`. | |
85aaf69f SL |
1516 | #[stable(feature = "rust1", since = "1.0.0")] |
1517 | impl ops::Index<ops::RangeFull> for str { | |
1a4d82fc | 1518 | type Output = str; |
c34b1796 | 1519 | |
1a4d82fc | 1520 | #[inline] |
c34b1796 | 1521 | fn index(&self, _index: ops::RangeFull) -> &str { |
1a4d82fc JJ |
1522 | self |
1523 | } | |
1524 | } | |
c1a9b12d | 1525 | |
54a0048b SL |
1526 | /// Implements mutable substring slicing with syntax `&mut self[..]`. |
1527 | /// | |
1528 | /// Returns a mutable slice of the whole string. This operation can | |
1529 | /// never panic. | |
1530 | /// | |
1531 | /// Equivalent to `&mut self[0 .. len]`. | |
c1a9b12d SL |
1532 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1533 | impl ops::IndexMut<ops::RangeFull> for str { | |
1534 | #[inline] | |
1535 | fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str { | |
1536 | self | |
1537 | } | |
1538 | } | |
54a0048b SL |
1539 | |
1540 | #[unstable(feature = "inclusive_range", | |
1541 | reason = "recently added, follows RFC", | |
1542 | issue = "28237")] | |
1543 | impl ops::Index<ops::RangeInclusive<usize>> for str { | |
1544 | type Output = str; | |
1545 | ||
1546 | #[inline] | |
1547 | fn index(&self, index: ops::RangeInclusive<usize>) -> &str { | |
1548 | match index { | |
1549 | ops::RangeInclusive::Empty { .. } => "", | |
1550 | ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() => | |
1551 | panic!("attempted to index slice up to maximum usize"), | |
1552 | ops::RangeInclusive::NonEmpty { start, end } => | |
1553 | self.index(start .. end+1) | |
1554 | } | |
1555 | } | |
1556 | } | |
1557 | #[unstable(feature = "inclusive_range", | |
1558 | reason = "recently added, follows RFC", | |
1559 | issue = "28237")] | |
1560 | impl ops::Index<ops::RangeToInclusive<usize>> for str { | |
1561 | type Output = str; | |
1562 | ||
1563 | #[inline] | |
1564 | fn index(&self, index: ops::RangeToInclusive<usize>) -> &str { | |
1565 | self.index(0...index.end) | |
1566 | } | |
1567 | } | |
1568 | ||
1569 | #[unstable(feature = "inclusive_range", | |
1570 | reason = "recently added, follows RFC", | |
1571 | issue = "28237")] | |
1572 | impl ops::IndexMut<ops::RangeInclusive<usize>> for str { | |
1573 | #[inline] | |
1574 | fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str { | |
1575 | match index { | |
1576 | ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work | |
1577 | ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() => | |
1578 | panic!("attempted to index str up to maximum usize"), | |
1579 | ops::RangeInclusive::NonEmpty { start, end } => | |
1580 | self.index_mut(start .. end+1) | |
1581 | } | |
1582 | } | |
1583 | } | |
1584 | #[unstable(feature = "inclusive_range", | |
1585 | reason = "recently added, follows RFC", | |
1586 | issue = "28237")] | |
1587 | impl ops::IndexMut<ops::RangeToInclusive<usize>> for str { | |
1588 | #[inline] | |
1589 | fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str { | |
1590 | self.index_mut(0...index.end) | |
1591 | } | |
1592 | } | |
1a4d82fc JJ |
1593 | } |
1594 | ||
1a4d82fc JJ |
1595 | /// Methods for string slices |
1596 | #[allow(missing_docs)] | |
9346a6ac | 1597 | #[doc(hidden)] |
62682a34 | 1598 | #[unstable(feature = "core_str_ext", |
e9174d1e | 1599 | reason = "stable interface provided by `impl str` in later crates", |
54a0048b | 1600 | issue = "32110")] |
1a4d82fc JJ |
1601 | pub trait StrExt { |
1602 | // NB there are no docs here are they're all located on the StrExt trait in | |
1603 | // libcollections, not here. | |
1604 | ||
92a42be0 | 1605 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1606 | fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool; |
92a42be0 | 1607 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1608 | fn chars(&self) -> Chars; |
92a42be0 | 1609 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1610 | fn bytes(&self) -> Bytes; |
92a42be0 | 1611 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1612 | fn char_indices(&self) -> CharIndices; |
92a42be0 | 1613 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1614 | fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>; |
92a42be0 | 1615 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1616 | fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> |
1617 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1618 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac | 1619 | fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; |
92a42be0 | 1620 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1621 | fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> |
1622 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1623 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac | 1624 | fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; |
92a42be0 | 1625 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac AL |
1626 | fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> |
1627 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1628 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac | 1629 | fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>; |
92a42be0 | 1630 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac AL |
1631 | fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> |
1632 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1633 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1634 | fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>; |
92a42be0 | 1635 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac AL |
1636 | fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> |
1637 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1638 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1639 | fn lines(&self) -> Lines; |
92a42be0 SL |
1640 | #[stable(feature = "core", since = "1.6.0")] |
1641 | #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")] | |
e9174d1e SL |
1642 | #[allow(deprecated)] |
1643 | fn lines_any(&self) -> LinesAny; | |
92a42be0 | 1644 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1645 | unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str; |
92a42be0 | 1646 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1647 | unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str; |
92a42be0 | 1648 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1649 | fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool; |
92a42be0 | 1650 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1651 | fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool |
1652 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1653 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1654 | fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1655 | where P::Searcher: DoubleEndedSearcher<'a>; | |
92a42be0 | 1656 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1657 | fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str; |
92a42be0 | 1658 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1659 | fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1660 | where P::Searcher: ReverseSearcher<'a>; | |
54a0048b | 1661 | #[stable(feature = "is_char_boundary", since = "1.9.0")] |
85aaf69f | 1662 | fn is_char_boundary(&self, index: usize) -> bool; |
92a42be0 | 1663 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1664 | fn as_bytes(&self) -> &[u8]; |
92a42be0 | 1665 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1666 | fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>; |
92a42be0 | 1667 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1668 | fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> |
1669 | where P::Searcher: ReverseSearcher<'a>; | |
1670 | fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>; | |
92a42be0 | 1671 | #[stable(feature = "core", since = "1.6.0")] |
62682a34 | 1672 | fn split_at(&self, mid: usize) -> (&str, &str); |
92a42be0 | 1673 | #[stable(feature = "core", since = "1.6.0")] |
c1a9b12d | 1674 | fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str); |
92a42be0 | 1675 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc | 1676 | fn as_ptr(&self) -> *const u8; |
92a42be0 | 1677 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 1678 | fn len(&self) -> usize; |
92a42be0 | 1679 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc | 1680 | fn is_empty(&self) -> bool; |
92a42be0 | 1681 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 1682 | fn parse<T: FromStr>(&self) -> Result<T, T::Err>; |
1a4d82fc JJ |
1683 | } |
1684 | ||
54a0048b SL |
1685 | // truncate `&str` to length at most equal to `max` |
1686 | // return `true` if it were truncated, and the new str. | |
1687 | fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) { | |
1688 | if max >= s.len() { | |
1689 | (false, s) | |
1690 | } else { | |
1691 | while !s.is_char_boundary(max) { | |
1692 | max -= 1; | |
1693 | } | |
1694 | (true, &s[..max]) | |
1695 | } | |
1696 | } | |
1697 | ||
1a4d82fc | 1698 | #[inline(never)] |
92a42be0 | 1699 | #[cold] |
85aaf69f | 1700 | fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! { |
54a0048b SL |
1701 | const MAX_DISPLAY_LENGTH: usize = 256; |
1702 | let (truncated, s) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH); | |
1703 | let ellipsis = if truncated { "[...]" } else { "" }; | |
1704 | ||
1705 | assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}", | |
1706 | begin, end, s, ellipsis); | |
1707 | panic!("index {} and/or {} in `{}`{} do not lie on character boundary", | |
1708 | begin, end, s, ellipsis); | |
1a4d82fc JJ |
1709 | } |
1710 | ||
92a42be0 | 1711 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc JJ |
1712 | impl StrExt for str { |
1713 | #[inline] | |
c34b1796 AL |
1714 | fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
1715 | pat.is_contained_in(self) | |
1a4d82fc JJ |
1716 | } |
1717 | ||
1a4d82fc JJ |
1718 | #[inline] |
1719 | fn chars(&self) -> Chars { | |
1720 | Chars{iter: self.as_bytes().iter()} | |
1721 | } | |
1722 | ||
1723 | #[inline] | |
1724 | fn bytes(&self) -> Bytes { | |
e9174d1e | 1725 | Bytes(self.as_bytes().iter().cloned()) |
1a4d82fc JJ |
1726 | } |
1727 | ||
1728 | #[inline] | |
1729 | fn char_indices(&self) -> CharIndices { | |
1730 | CharIndices { front_offset: 0, iter: self.chars() } | |
1731 | } | |
1732 | ||
1733 | #[inline] | |
c34b1796 | 1734 | fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { |
9346a6ac | 1735 | Split(SplitInternal { |
c34b1796 AL |
1736 | start: 0, |
1737 | end: self.len(), | |
1738 | matcher: pat.into_searcher(self), | |
1a4d82fc JJ |
1739 | allow_trailing_empty: true, |
1740 | finished: false, | |
1741 | }) | |
1742 | } | |
1743 | ||
9346a6ac AL |
1744 | #[inline] |
1745 | fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> | |
1746 | where P::Searcher: ReverseSearcher<'a> | |
1747 | { | |
1748 | RSplit(self.split(pat).0) | |
1749 | } | |
1750 | ||
1a4d82fc | 1751 | #[inline] |
c34b1796 | 1752 | fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { |
9346a6ac | 1753 | SplitN(SplitNInternal { |
1a4d82fc JJ |
1754 | iter: self.split(pat).0, |
1755 | count: count, | |
1a4d82fc JJ |
1756 | }) |
1757 | } | |
1758 | ||
9346a6ac AL |
1759 | #[inline] |
1760 | fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> | |
1761 | where P::Searcher: ReverseSearcher<'a> | |
1762 | { | |
1763 | RSplitN(self.splitn(count, pat).0) | |
1764 | } | |
1765 | ||
1a4d82fc | 1766 | #[inline] |
c34b1796 | 1767 | fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { |
9346a6ac | 1768 | SplitTerminator(SplitInternal { |
1a4d82fc JJ |
1769 | allow_trailing_empty: false, |
1770 | ..self.split(pat).0 | |
1771 | }) | |
1772 | } | |
1773 | ||
1774 | #[inline] | |
9346a6ac | 1775 | fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> |
c34b1796 AL |
1776 | where P::Searcher: ReverseSearcher<'a> |
1777 | { | |
9346a6ac | 1778 | RSplitTerminator(self.split_terminator(pat).0) |
1a4d82fc JJ |
1779 | } |
1780 | ||
1781 | #[inline] | |
9346a6ac AL |
1782 | fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { |
1783 | Matches(MatchesInternal(pat.into_searcher(self))) | |
1784 | } | |
1785 | ||
1786 | #[inline] | |
1787 | fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> | |
c34b1796 AL |
1788 | where P::Searcher: ReverseSearcher<'a> |
1789 | { | |
9346a6ac | 1790 | RMatches(self.matches(pat).0) |
1a4d82fc JJ |
1791 | } |
1792 | ||
1793 | #[inline] | |
c34b1796 | 1794 | fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { |
9346a6ac | 1795 | MatchIndices(MatchIndicesInternal(pat.into_searcher(self))) |
1a4d82fc JJ |
1796 | } |
1797 | ||
9346a6ac AL |
1798 | #[inline] |
1799 | fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> | |
1800 | where P::Searcher: ReverseSearcher<'a> | |
1801 | { | |
1802 | RMatchIndices(self.match_indices(pat).0) | |
1803 | } | |
1a4d82fc JJ |
1804 | #[inline] |
1805 | fn lines(&self) -> Lines { | |
e9174d1e | 1806 | Lines(self.split_terminator('\n').map(LinesAnyMap)) |
1a4d82fc JJ |
1807 | } |
1808 | ||
9346a6ac | 1809 | #[inline] |
e9174d1e | 1810 | #[allow(deprecated)] |
1a4d82fc | 1811 | fn lines_any(&self) -> LinesAny { |
e9174d1e | 1812 | LinesAny(self.lines()) |
1a4d82fc JJ |
1813 | } |
1814 | ||
1a4d82fc | 1815 | #[inline] |
85aaf69f | 1816 | unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str { |
54a0048b SL |
1817 | let ptr = self.as_ptr().offset(begin as isize); |
1818 | let len = end - begin; | |
1819 | from_utf8_unchecked(slice::from_raw_parts(ptr, len)) | |
1a4d82fc JJ |
1820 | } |
1821 | ||
c1a9b12d SL |
1822 | #[inline] |
1823 | unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str { | |
54a0048b SL |
1824 | let ptr = self.as_ptr().offset(begin as isize); |
1825 | let len = end - begin; | |
1826 | mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len)) | |
c1a9b12d SL |
1827 | } |
1828 | ||
1a4d82fc | 1829 | #[inline] |
c34b1796 AL |
1830 | fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
1831 | pat.is_prefix_of(self) | |
1a4d82fc JJ |
1832 | } |
1833 | ||
1834 | #[inline] | |
c34b1796 AL |
1835 | fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool |
1836 | where P::Searcher: ReverseSearcher<'a> | |
1837 | { | |
1838 | pat.is_suffix_of(self) | |
1a4d82fc JJ |
1839 | } |
1840 | ||
1841 | #[inline] | |
c34b1796 AL |
1842 | fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1843 | where P::Searcher: DoubleEndedSearcher<'a> | |
1844 | { | |
1845 | let mut i = 0; | |
1846 | let mut j = 0; | |
1847 | let mut matcher = pat.into_searcher(self); | |
1848 | if let Some((a, b)) = matcher.next_reject() { | |
1849 | i = a; | |
7453a54e | 1850 | j = b; // Remember earliest known match, correct it below if |
c34b1796 AL |
1851 | // last match is different |
1852 | } | |
1853 | if let Some((_, b)) = matcher.next_reject_back() { | |
1854 | j = b; | |
1855 | } | |
1856 | unsafe { | |
1857 | // Searcher is known to return valid indices | |
1858 | self.slice_unchecked(i, j) | |
1a4d82fc JJ |
1859 | } |
1860 | } | |
1861 | ||
1862 | #[inline] | |
c34b1796 AL |
1863 | fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { |
1864 | let mut i = self.len(); | |
1865 | let mut matcher = pat.into_searcher(self); | |
1866 | if let Some((a, _)) = matcher.next_reject() { | |
1867 | i = a; | |
1868 | } | |
1869 | unsafe { | |
1870 | // Searcher is known to return valid indices | |
1871 | self.slice_unchecked(i, self.len()) | |
1a4d82fc JJ |
1872 | } |
1873 | } | |
1874 | ||
1875 | #[inline] | |
c34b1796 AL |
1876 | fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1877 | where P::Searcher: ReverseSearcher<'a> | |
1878 | { | |
1879 | let mut j = 0; | |
1880 | let mut matcher = pat.into_searcher(self); | |
1881 | if let Some((_, b)) = matcher.next_reject_back() { | |
1882 | j = b; | |
1883 | } | |
1884 | unsafe { | |
1885 | // Searcher is known to return valid indices | |
1886 | self.slice_unchecked(0, j) | |
1a4d82fc JJ |
1887 | } |
1888 | } | |
1889 | ||
1890 | #[inline] | |
85aaf69f | 1891 | fn is_char_boundary(&self, index: usize) -> bool { |
54a0048b SL |
1892 | // 0 and len are always ok. |
1893 | // Test for 0 explicitly so that it can optimize out the check | |
1894 | // easily and skip reading string data for that case. | |
1895 | if index == 0 || index == self.len() { return true; } | |
1a4d82fc JJ |
1896 | match self.as_bytes().get(index) { |
1897 | None => false, | |
a7813a04 XL |
1898 | // This is bit magic equivalent to: b < 128 || b >= 192 |
1899 | Some(&b) => (b as i8) >= -0x40, | |
1a4d82fc JJ |
1900 | } |
1901 | } | |
1902 | ||
1a4d82fc JJ |
1903 | #[inline] |
1904 | fn as_bytes(&self) -> &[u8] { | |
1905 | unsafe { mem::transmute(self) } | |
1906 | } | |
1907 | ||
c34b1796 AL |
1908 | fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> { |
1909 | pat.into_searcher(self).next_match().map(|(i, _)| i) | |
1a4d82fc JJ |
1910 | } |
1911 | ||
c34b1796 AL |
1912 | fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> |
1913 | where P::Searcher: ReverseSearcher<'a> | |
1914 | { | |
1915 | pat.into_searcher(self).next_match_back().map(|(i, _)| i) | |
1a4d82fc JJ |
1916 | } |
1917 | ||
c34b1796 AL |
1918 | fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> { |
1919 | self.find(pat) | |
1a4d82fc JJ |
1920 | } |
1921 | ||
54a0048b | 1922 | #[inline] |
62682a34 SL |
1923 | fn split_at(&self, mid: usize) -> (&str, &str) { |
1924 | // is_char_boundary checks that the index is in [0, .len()] | |
1925 | if self.is_char_boundary(mid) { | |
1926 | unsafe { | |
1927 | (self.slice_unchecked(0, mid), | |
1928 | self.slice_unchecked(mid, self.len())) | |
1929 | } | |
1930 | } else { | |
1931 | slice_error_fail(self, 0, mid) | |
1932 | } | |
1933 | } | |
1934 | ||
c1a9b12d SL |
1935 | fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) { |
1936 | // is_char_boundary checks that the index is in [0, .len()] | |
1937 | if self.is_char_boundary(mid) { | |
1938 | let len = self.len(); | |
7453a54e | 1939 | let ptr = self.as_ptr() as *mut u8; |
c1a9b12d | 1940 | unsafe { |
7453a54e SL |
1941 | (from_raw_parts_mut(ptr, mid), |
1942 | from_raw_parts_mut(ptr.offset(mid as isize), len - mid)) | |
c1a9b12d SL |
1943 | } |
1944 | } else { | |
1945 | slice_error_fail(self, 0, mid) | |
1946 | } | |
1947 | } | |
1948 | ||
1a4d82fc JJ |
1949 | #[inline] |
1950 | fn as_ptr(&self) -> *const u8 { | |
54a0048b | 1951 | self as *const str as *const u8 |
1a4d82fc JJ |
1952 | } |
1953 | ||
1954 | #[inline] | |
54a0048b SL |
1955 | fn len(&self) -> usize { |
1956 | self.as_bytes().len() | |
1957 | } | |
1a4d82fc JJ |
1958 | |
1959 | #[inline] | |
1960 | fn is_empty(&self) -> bool { self.len() == 0 } | |
1961 | ||
1962 | #[inline] | |
85aaf69f | 1963 | fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) } |
1a4d82fc JJ |
1964 | } |
1965 | ||
bd371182 AL |
1966 | #[stable(feature = "rust1", since = "1.0.0")] |
1967 | impl AsRef<[u8]> for str { | |
1968 | #[inline] | |
1969 | fn as_ref(&self) -> &[u8] { | |
1970 | self.as_bytes() | |
1971 | } | |
1972 | } | |
1973 | ||
85aaf69f | 1974 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 1975 | impl<'a> Default for &'a str { |
1a4d82fc JJ |
1976 | fn default() -> &'a str { "" } |
1977 | } |