]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
1a4d82fc JJ |
10 | |
11 | //! String manipulation | |
12 | //! | |
13 | //! For more details, see std::str | |
14 | ||
62682a34 | 15 | #![stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 16 | |
9346a6ac AL |
17 | use self::pattern::Pattern; |
18 | use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; | |
1a4d82fc | 19 | |
e9174d1e | 20 | use char::{self, CharExt}; |
85aaf69f | 21 | use clone::Clone; |
bd371182 | 22 | use convert::AsRef; |
1a4d82fc | 23 | use default::Default; |
85aaf69f | 24 | use fmt; |
1a4d82fc | 25 | use iter::ExactSizeIterator; |
e9174d1e SL |
26 | use iter::{Map, Cloned, Iterator, DoubleEndedIterator}; |
27 | use marker::Sized; | |
1a4d82fc | 28 | use mem; |
c34b1796 | 29 | use ops::{Fn, FnMut, FnOnce}; |
1a4d82fc | 30 | use option::Option::{self, None, Some}; |
1a4d82fc JJ |
31 | use result::Result::{self, Ok, Err}; |
32 | use slice::{self, SliceExt}; | |
1a4d82fc | 33 | |
9346a6ac | 34 | pub mod pattern; |
1a4d82fc JJ |
35 | |
36 | /// A trait to abstract the idea of creating a new instance of a type from a | |
37 | /// string. | |
92a42be0 SL |
38 | /// |
39 | /// `FromStr`'s [`from_str()`] method is often used implicitly, through | |
40 | /// [`str`]'s [`parse()`] method. See [`parse()`]'s documentation for examples. | |
41 | /// | |
42 | /// [`from_str()`]: #tymethod.from_str | |
54a0048b SL |
43 | /// [`str`]: ../../std/primitive.str.html |
44 | /// [`parse()`]: ../../std/primitive.str.html#method.parse | |
85aaf69f | 45 | #[stable(feature = "rust1", since = "1.0.0")] |
e9174d1e | 46 | pub trait FromStr: Sized { |
85aaf69f SL |
47 | /// The associated error which can be returned from parsing. |
48 | #[stable(feature = "rust1", since = "1.0.0")] | |
49 | type Err; | |
50 | ||
d9579d0f AL |
51 | /// Parses a string `s` to return a value of this type. |
52 | /// | |
53 | /// If parsing succeeds, return the value inside `Ok`, otherwise | |
54 | /// when the string is ill-formatted return an error specific to the | |
55 | /// inside `Err`. The error type is specific to implementation of the trait. | |
b039eaaf SL |
56 | /// |
57 | /// # Examples | |
58 | /// | |
59 | /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`: | |
60 | /// | |
54a0048b | 61 | /// [ithirtytwo]: ../../std/primitive.i32.html |
b039eaaf SL |
62 | /// |
63 | /// ``` | |
64 | /// use std::str::FromStr; | |
65 | /// | |
66 | /// let s = "5"; | |
67 | /// let x = i32::from_str(s).unwrap(); | |
68 | /// | |
69 | /// assert_eq!(5, x); | |
70 | /// ``` | |
85aaf69f SL |
71 | #[stable(feature = "rust1", since = "1.0.0")] |
72 | fn from_str(s: &str) -> Result<Self, Self::Err>; | |
1a4d82fc JJ |
73 | } |
74 | ||
85aaf69f | 75 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 76 | impl FromStr for bool { |
85aaf69f SL |
77 | type Err = ParseBoolError; |
78 | ||
1a4d82fc JJ |
79 | /// Parse a `bool` from a string. |
80 | /// | |
c34b1796 AL |
81 | /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not |
82 | /// actually be parseable. | |
1a4d82fc JJ |
83 | /// |
84 | /// # Examples | |
85 | /// | |
c34b1796 AL |
86 | /// ``` |
87 | /// use std::str::FromStr; | |
88 | /// | |
89 | /// assert_eq!(FromStr::from_str("true"), Ok(true)); | |
90 | /// assert_eq!(FromStr::from_str("false"), Ok(false)); | |
91 | /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err()); | |
92 | /// ``` | |
93 | /// | |
94 | /// Note, in many cases, the `.parse()` method on `str` is more proper. | |
95 | /// | |
96 | /// ``` | |
85aaf69f SL |
97 | /// assert_eq!("true".parse(), Ok(true)); |
98 | /// assert_eq!("false".parse(), Ok(false)); | |
99 | /// assert!("not even a boolean".parse::<bool>().is_err()); | |
1a4d82fc JJ |
100 | /// ``` |
101 | #[inline] | |
85aaf69f | 102 | fn from_str(s: &str) -> Result<bool, ParseBoolError> { |
1a4d82fc | 103 | match s { |
85aaf69f SL |
104 | "true" => Ok(true), |
105 | "false" => Ok(false), | |
106 | _ => Err(ParseBoolError { _priv: () }), | |
1a4d82fc JJ |
107 | } |
108 | } | |
109 | } | |
110 | ||
85aaf69f SL |
111 | /// An error returned when parsing a `bool` from a string fails. |
112 | #[derive(Debug, Clone, PartialEq)] | |
113 | #[stable(feature = "rust1", since = "1.0.0")] | |
114 | pub struct ParseBoolError { _priv: () } | |
115 | ||
116 | #[stable(feature = "rust1", since = "1.0.0")] | |
117 | impl fmt::Display for ParseBoolError { | |
118 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
119 | "provided string was not `true` or `false`".fmt(f) | |
120 | } | |
121 | } | |
122 | ||
1a4d82fc JJ |
123 | /* |
124 | Section: Creating a string | |
125 | */ | |
126 | ||
b039eaaf SL |
127 | /// Errors which can occur when attempting to interpret a sequence of `u8` |
128 | /// as a string. | |
129 | /// | |
130 | /// As such, the `from_utf8` family of functions and methods for both `String`s | |
131 | /// and `&str`s make use of this error, for example. | |
85aaf69f | 132 | #[derive(Copy, Eq, PartialEq, Clone, Debug)] |
9346a6ac AL |
133 | #[stable(feature = "rust1", since = "1.0.0")] |
134 | pub struct Utf8Error { | |
135 | valid_up_to: usize, | |
136 | } | |
137 | ||
138 | impl Utf8Error { | |
139 | /// Returns the index in the given string up to which valid UTF-8 was | |
140 | /// verified. | |
1a4d82fc | 141 | /// |
b039eaaf SL |
142 | /// It is the maximum index such that `from_utf8(input[..index])` |
143 | /// would return `Some(_)`. | |
144 | /// | |
145 | /// # Examples | |
146 | /// | |
147 | /// Basic usage: | |
148 | /// | |
149 | /// ``` | |
b039eaaf SL |
150 | /// use std::str; |
151 | /// | |
152 | /// // some invalid bytes, in a vector | |
153 | /// let sparkle_heart = vec![0, 159, 146, 150]; | |
154 | /// | |
155 | /// // std::str::from_utf8 returns a Utf8Error | |
156 | /// let error = str::from_utf8(&sparkle_heart).unwrap_err(); | |
157 | /// | |
7453a54e | 158 | /// // the second byte is invalid here |
b039eaaf SL |
159 | /// assert_eq!(1, error.valid_up_to()); |
160 | /// ``` | |
161 | #[stable(feature = "utf8_error", since = "1.5.0")] | |
9346a6ac | 162 | pub fn valid_up_to(&self) -> usize { self.valid_up_to } |
1a4d82fc JJ |
163 | } |
164 | ||
b039eaaf | 165 | /// Converts a slice of bytes to a string slice. |
1a4d82fc | 166 | /// |
b039eaaf SL |
167 | /// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`) |
168 | /// is made of bytes, so this function converts between the two. Not all byte | |
169 | /// slices are valid string slices, however: `&str` requires that it is valid | |
170 | /// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and | |
171 | /// then does the conversion. | |
172 | /// | |
173 | /// If you are sure that the byte slice is valid UTF-8, and you don't want to | |
174 | /// incur the overhead of the validity check, there is an unsafe version of | |
7453a54e | 175 | /// this function, [`from_utf8_unchecked()`][fromutf8u], which has the same |
b039eaaf SL |
176 | /// behavior but skips the check. |
177 | /// | |
7453a54e | 178 | /// [fromutf8u]: fn.from_utf8_unchecked.html |
b039eaaf SL |
179 | /// |
180 | /// If you need a `String` instead of a `&str`, consider | |
181 | /// [`String::from_utf8()`][string]. | |
182 | /// | |
54a0048b | 183 | /// [string]: ../../std/string/struct.String.html#method.from_utf8 |
b039eaaf SL |
184 | /// |
185 | /// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of | |
186 | /// it, this function is one way to have a stack-allocated string. There is | |
187 | /// an example of this in the examples section below. | |
1a4d82fc | 188 | /// |
7453a54e | 189 | /// # Errors |
1a4d82fc | 190 | /// |
e9174d1e SL |
191 | /// Returns `Err` if the slice is not UTF-8 with a description as to why the |
192 | /// provided slice is not UTF-8. | |
b039eaaf SL |
193 | /// |
194 | /// # Examples | |
195 | /// | |
196 | /// Basic usage: | |
197 | /// | |
198 | /// ``` | |
199 | /// use std::str; | |
200 | /// | |
201 | /// // some bytes, in a vector | |
202 | /// let sparkle_heart = vec![240, 159, 146, 150]; | |
203 | /// | |
204 | /// // We know these bytes are valid, so just use `unwrap()`. | |
205 | /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap(); | |
206 | /// | |
207 | /// assert_eq!("💖", sparkle_heart); | |
208 | /// ``` | |
209 | /// | |
210 | /// Incorrect bytes: | |
211 | /// | |
212 | /// ``` | |
213 | /// use std::str; | |
214 | /// | |
215 | /// // some invalid bytes, in a vector | |
216 | /// let sparkle_heart = vec![0, 159, 146, 150]; | |
217 | /// | |
218 | /// assert!(str::from_utf8(&sparkle_heart).is_err()); | |
219 | /// ``` | |
220 | /// | |
221 | /// See the docs for [`Utf8Error`][error] for more details on the kinds of | |
222 | /// errors that can be returned. | |
223 | /// | |
224 | /// [error]: struct.Utf8Error.html | |
225 | /// | |
226 | /// A "stack allocated string": | |
227 | /// | |
228 | /// ``` | |
229 | /// use std::str; | |
230 | /// | |
231 | /// // some bytes, in a stack-allocated array | |
232 | /// let sparkle_heart = [240, 159, 146, 150]; | |
233 | /// | |
234 | /// // We know these bytes are valid, so just use `unwrap()`. | |
235 | /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap(); | |
236 | /// | |
237 | /// assert_eq!("💖", sparkle_heart); | |
238 | /// ``` | |
85aaf69f | 239 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 240 | pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { |
54a0048b | 241 | run_utf8_validation(v)?; |
1a4d82fc JJ |
242 | Ok(unsafe { from_utf8_unchecked(v) }) |
243 | } | |
244 | ||
7453a54e SL |
245 | /// Forms a str from a pointer and a length. |
246 | /// | |
247 | /// The `len` argument is the number of bytes in the string. | |
248 | /// | |
249 | /// # Safety | |
250 | /// | |
251 | /// This function is unsafe as there is no guarantee that the given pointer is | |
252 | /// valid for `len` bytes, nor whether the lifetime inferred is a suitable | |
253 | /// lifetime for the returned str. | |
254 | /// | |
255 | /// The data must be valid UTF-8 | |
256 | /// | |
257 | /// `p` must be non-null, even for zero-length str. | |
258 | /// | |
259 | /// # Caveat | |
260 | /// | |
261 | /// The lifetime for the returned str is inferred from its usage. To | |
262 | /// prevent accidental misuse, it's suggested to tie the lifetime to whichever | |
263 | /// source lifetime is safe in the context, such as by providing a helper | |
264 | /// function taking the lifetime of a host value for the str, or by explicit | |
265 | /// annotation. | |
266 | /// Performs the same functionality as `from_raw_parts`, except that a mutable | |
267 | /// str is returned. | |
268 | /// | |
269 | unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str { | |
270 | mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len)) | |
271 | } | |
272 | ||
1a4d82fc JJ |
273 | /// Converts a slice of bytes to a string slice without checking |
274 | /// that the string contains valid UTF-8. | |
b039eaaf | 275 | /// |
7453a54e | 276 | /// See the safe version, [`from_utf8()`][fromutf8], for more information. |
b039eaaf SL |
277 | /// |
278 | /// [fromutf8]: fn.from_utf8.html | |
279 | /// | |
280 | /// # Safety | |
281 | /// | |
282 | /// This function is unsafe because it does not check that the bytes passed to | |
283 | /// it are valid UTF-8. If this constraint is violated, undefined behavior | |
284 | /// results, as the rest of Rust assumes that `&str`s are valid UTF-8. | |
285 | /// | |
286 | /// # Examples | |
287 | /// | |
288 | /// Basic usage: | |
289 | /// | |
290 | /// ``` | |
291 | /// use std::str; | |
292 | /// | |
293 | /// // some bytes, in a vector | |
294 | /// let sparkle_heart = vec![240, 159, 146, 150]; | |
295 | /// | |
296 | /// let sparkle_heart = unsafe { | |
297 | /// str::from_utf8_unchecked(&sparkle_heart) | |
298 | /// }; | |
299 | /// | |
300 | /// assert_eq!("💖", sparkle_heart); | |
301 | /// ``` | |
d9579d0f | 302 | #[inline(always)] |
85aaf69f | 303 | #[stable(feature = "rust1", since = "1.0.0")] |
e9174d1e | 304 | pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { |
1a4d82fc JJ |
305 | mem::transmute(v) |
306 | } | |
307 | ||
85aaf69f SL |
308 | #[stable(feature = "rust1", since = "1.0.0")] |
309 | impl fmt::Display for Utf8Error { | |
310 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
9346a6ac | 311 | write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to) |
85aaf69f SL |
312 | } |
313 | } | |
314 | ||
1a4d82fc JJ |
315 | /* |
316 | Section: Iterators | |
317 | */ | |
318 | ||
319 | /// Iterator for the char (representing *Unicode Scalar Values*) of a string | |
320 | /// | |
9cc50fc6 SL |
321 | /// Created with the method [`chars()`]. |
322 | /// | |
54a0048b SL |
323 | /// [`chars()`]: ../../std/primitive.str.html#method.chars |
324 | #[derive(Clone, Debug)] | |
85aaf69f | 325 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
326 | pub struct Chars<'a> { |
327 | iter: slice::Iter<'a, u8> | |
328 | } | |
329 | ||
c34b1796 AL |
330 | /// Return the initial codepoint accumulator for the first byte. |
331 | /// The first byte is special, only want bottom 5 bits for width 2, 4 bits | |
332 | /// for width 3, and 3 bits for width 4. | |
333 | #[inline] | |
334 | fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 } | |
1a4d82fc | 335 | |
c34b1796 AL |
336 | /// Return the value of `ch` updated with continuation byte `byte`. |
337 | #[inline] | |
338 | fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 } | |
1a4d82fc | 339 | |
c34b1796 AL |
340 | /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the |
341 | /// bits `10`). | |
342 | #[inline] | |
343 | fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 } | |
1a4d82fc JJ |
344 | |
345 | #[inline] | |
346 | fn unwrap_or_0(opt: Option<&u8>) -> u8 { | |
347 | match opt { | |
348 | Some(&byte) => byte, | |
349 | None => 0, | |
350 | } | |
351 | } | |
352 | ||
85aaf69f SL |
353 | /// Reads the next code point out of a byte iterator (assuming a |
354 | /// UTF-8-like encoding). | |
e9174d1e | 355 | #[unstable(feature = "str_internals", issue = "0")] |
c34b1796 | 356 | #[inline] |
85aaf69f SL |
357 | pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> { |
358 | // Decode UTF-8 | |
359 | let x = match bytes.next() { | |
360 | None => return None, | |
361 | Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32), | |
362 | Some(&next_byte) => next_byte, | |
363 | }; | |
364 | ||
365 | // Multibyte case follows | |
366 | // Decode from a byte combination out of: [[[x y] z] w] | |
367 | // NOTE: Performance is sensitive to the exact formulation here | |
c34b1796 | 368 | let init = utf8_first_byte(x, 2); |
85aaf69f | 369 | let y = unwrap_or_0(bytes.next()); |
c34b1796 | 370 | let mut ch = utf8_acc_cont_byte(init, y); |
85aaf69f SL |
371 | if x >= 0xE0 { |
372 | // [[x y z] w] case | |
373 | // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid | |
374 | let z = unwrap_or_0(bytes.next()); | |
c34b1796 | 375 | let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z); |
85aaf69f SL |
376 | ch = init << 12 | y_z; |
377 | if x >= 0xF0 { | |
378 | // [x y z w] case | |
379 | // use only the lower 3 bits of `init` | |
380 | let w = unwrap_or_0(bytes.next()); | |
c34b1796 | 381 | ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w); |
85aaf69f SL |
382 | } |
383 | } | |
384 | ||
385 | Some(ch) | |
386 | } | |
387 | ||
c34b1796 AL |
388 | /// Reads the last code point out of a byte iterator (assuming a |
389 | /// UTF-8-like encoding). | |
c34b1796 | 390 | #[inline] |
62682a34 | 391 | fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> { |
c34b1796 AL |
392 | // Decode UTF-8 |
393 | let w = match bytes.next_back() { | |
394 | None => return None, | |
395 | Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32), | |
396 | Some(&back_byte) => back_byte, | |
397 | }; | |
398 | ||
399 | // Multibyte case follows | |
400 | // Decode from a byte combination out of: [x [y [z w]]] | |
401 | let mut ch; | |
402 | let z = unwrap_or_0(bytes.next_back()); | |
403 | ch = utf8_first_byte(z, 2); | |
404 | if utf8_is_cont_byte(z) { | |
405 | let y = unwrap_or_0(bytes.next_back()); | |
406 | ch = utf8_first_byte(y, 3); | |
407 | if utf8_is_cont_byte(y) { | |
408 | let x = unwrap_or_0(bytes.next_back()); | |
409 | ch = utf8_first_byte(x, 4); | |
410 | ch = utf8_acc_cont_byte(ch, y); | |
411 | } | |
412 | ch = utf8_acc_cont_byte(ch, z); | |
413 | } | |
414 | ch = utf8_acc_cont_byte(ch, w); | |
415 | ||
416 | Some(ch) | |
417 | } | |
418 | ||
85aaf69f | 419 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
420 | impl<'a> Iterator for Chars<'a> { |
421 | type Item = char; | |
422 | ||
423 | #[inline] | |
424 | fn next(&mut self) -> Option<char> { | |
85aaf69f SL |
425 | next_code_point(&mut self.iter).map(|ch| { |
426 | // str invariant says `ch` is a valid Unicode Scalar Value | |
427 | unsafe { | |
e9174d1e | 428 | char::from_u32_unchecked(ch) |
1a4d82fc | 429 | } |
85aaf69f | 430 | }) |
1a4d82fc JJ |
431 | } |
432 | ||
433 | #[inline] | |
85aaf69f | 434 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc | 435 | let (len, _) = self.iter.size_hint(); |
c34b1796 AL |
436 | // `(len + 3)` can't overflow, because we know that the `slice::Iter` |
437 | // belongs to a slice in memory which has a maximum length of | |
438 | // `isize::MAX` (that's well below `usize::MAX`). | |
439 | ((len + 3) / 4, Some(len)) | |
1a4d82fc JJ |
440 | } |
441 | } | |
442 | ||
85aaf69f | 443 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
444 | impl<'a> DoubleEndedIterator for Chars<'a> { |
445 | #[inline] | |
446 | fn next_back(&mut self) -> Option<char> { | |
c34b1796 AL |
447 | next_code_point_reverse(&mut self.iter).map(|ch| { |
448 | // str invariant says `ch` is a valid Unicode Scalar Value | |
449 | unsafe { | |
e9174d1e | 450 | char::from_u32_unchecked(ch) |
1a4d82fc | 451 | } |
c34b1796 | 452 | }) |
1a4d82fc JJ |
453 | } |
454 | } | |
455 | ||
e9174d1e SL |
456 | impl<'a> Chars<'a> { |
457 | /// View the underlying data as a subslice of the original data. | |
458 | /// | |
459 | /// This has the same lifetime as the original slice, and so the | |
460 | /// iterator can continue to be used while this exists. | |
461 | #[stable(feature = "iter_to_slice", since = "1.4.0")] | |
462 | #[inline] | |
463 | pub fn as_str(&self) -> &'a str { | |
464 | unsafe { from_utf8_unchecked(self.iter.as_slice()) } | |
465 | } | |
466 | } | |
467 | ||
9346a6ac | 468 | /// Iterator for a string's characters and their byte offsets. |
54a0048b | 469 | #[derive(Clone, Debug)] |
85aaf69f | 470 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 471 | pub struct CharIndices<'a> { |
85aaf69f | 472 | front_offset: usize, |
1a4d82fc JJ |
473 | iter: Chars<'a>, |
474 | } | |
475 | ||
85aaf69f | 476 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc | 477 | impl<'a> Iterator for CharIndices<'a> { |
85aaf69f | 478 | type Item = (usize, char); |
1a4d82fc JJ |
479 | |
480 | #[inline] | |
85aaf69f | 481 | fn next(&mut self) -> Option<(usize, char)> { |
1a4d82fc JJ |
482 | let (pre_len, _) = self.iter.iter.size_hint(); |
483 | match self.iter.next() { | |
484 | None => None, | |
485 | Some(ch) => { | |
486 | let index = self.front_offset; | |
487 | let (len, _) = self.iter.iter.size_hint(); | |
488 | self.front_offset += pre_len - len; | |
489 | Some((index, ch)) | |
490 | } | |
491 | } | |
492 | } | |
493 | ||
494 | #[inline] | |
85aaf69f | 495 | fn size_hint(&self) -> (usize, Option<usize>) { |
1a4d82fc JJ |
496 | self.iter.size_hint() |
497 | } | |
498 | } | |
499 | ||
85aaf69f | 500 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
501 | impl<'a> DoubleEndedIterator for CharIndices<'a> { |
502 | #[inline] | |
85aaf69f | 503 | fn next_back(&mut self) -> Option<(usize, char)> { |
1a4d82fc JJ |
504 | match self.iter.next_back() { |
505 | None => None, | |
506 | Some(ch) => { | |
507 | let (len, _) = self.iter.iter.size_hint(); | |
508 | let index = self.front_offset + len; | |
509 | Some((index, ch)) | |
510 | } | |
511 | } | |
512 | } | |
513 | } | |
514 | ||
e9174d1e SL |
515 | impl<'a> CharIndices<'a> { |
516 | /// View the underlying data as a subslice of the original data. | |
517 | /// | |
518 | /// This has the same lifetime as the original slice, and so the | |
519 | /// iterator can continue to be used while this exists. | |
520 | #[stable(feature = "iter_to_slice", since = "1.4.0")] | |
521 | #[inline] | |
522 | pub fn as_str(&self) -> &'a str { | |
523 | self.iter.as_str() | |
524 | } | |
525 | } | |
526 | ||
1a4d82fc JJ |
527 | /// External iterator for a string's bytes. |
528 | /// Use with the `std::iter` module. | |
529 | /// | |
9cc50fc6 SL |
530 | /// Created with the method [`bytes()`]. |
531 | /// | |
54a0048b | 532 | /// [`bytes()`]: ../../std/primitive.str.html#method.bytes |
85aaf69f | 533 | #[stable(feature = "rust1", since = "1.0.0")] |
54a0048b | 534 | #[derive(Clone, Debug)] |
e9174d1e | 535 | pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>); |
1a4d82fc | 536 | |
e9174d1e SL |
537 | #[stable(feature = "rust1", since = "1.0.0")] |
538 | impl<'a> Iterator for Bytes<'a> { | |
539 | type Item = u8; | |
1a4d82fc | 540 | |
1a4d82fc | 541 | #[inline] |
e9174d1e SL |
542 | fn next(&mut self) -> Option<u8> { |
543 | self.0.next() | |
1a4d82fc | 544 | } |
1a4d82fc | 545 | |
c34b1796 | 546 | #[inline] |
e9174d1e SL |
547 | fn size_hint(&self) -> (usize, Option<usize>) { |
548 | self.0.size_hint() | |
c34b1796 | 549 | } |
c34b1796 AL |
550 | |
551 | #[inline] | |
e9174d1e SL |
552 | fn count(self) -> usize { |
553 | self.0.count() | |
c34b1796 | 554 | } |
9346a6ac AL |
555 | |
556 | #[inline] | |
e9174d1e SL |
557 | fn last(self) -> Option<Self::Item> { |
558 | self.0.last() | |
9346a6ac AL |
559 | } |
560 | ||
561 | #[inline] | |
e9174d1e SL |
562 | fn nth(&mut self, n: usize) -> Option<Self::Item> { |
563 | self.0.nth(n) | |
9346a6ac | 564 | } |
1a4d82fc JJ |
565 | } |
566 | ||
9346a6ac AL |
567 | #[stable(feature = "rust1", since = "1.0.0")] |
568 | impl<'a> DoubleEndedIterator for Bytes<'a> { | |
569 | #[inline] | |
570 | fn next_back(&mut self) -> Option<u8> { | |
571 | self.0.next_back() | |
572 | } | |
c34b1796 AL |
573 | } |
574 | ||
9346a6ac AL |
575 | #[stable(feature = "rust1", since = "1.0.0")] |
576 | impl<'a> ExactSizeIterator for Bytes<'a> { | |
577 | #[inline] | |
578 | fn len(&self) -> usize { | |
579 | self.0.len() | |
580 | } | |
c34b1796 AL |
581 | } |
582 | ||
9346a6ac AL |
583 | /// This macro generates a Clone impl for string pattern API |
584 | /// wrapper types of the form X<'a, P> | |
585 | macro_rules! derive_pattern_clone { | |
586 | (clone $t:ident with |$s:ident| $e:expr) => { | |
587 | impl<'a, P: Pattern<'a>> Clone for $t<'a, P> | |
588 | where P::Searcher: Clone | |
589 | { | |
590 | fn clone(&self) -> Self { | |
591 | let $s = self; | |
592 | $e | |
593 | } | |
594 | } | |
595 | } | |
1a4d82fc JJ |
596 | } |
597 | ||
9346a6ac | 598 | /// This macro generates two public iterator structs |
b039eaaf | 599 | /// wrapping a private internal one that makes use of the `Pattern` API. |
9346a6ac AL |
600 | /// |
601 | /// For all patterns `P: Pattern<'a>` the following items will be | |
d9579d0f | 602 | /// generated (generics omitted): |
9346a6ac AL |
603 | /// |
604 | /// struct $forward_iterator($internal_iterator); | |
605 | /// struct $reverse_iterator($internal_iterator); | |
606 | /// | |
607 | /// impl Iterator for $forward_iterator | |
608 | /// { /* internal ends up calling Searcher::next_match() */ } | |
609 | /// | |
610 | /// impl DoubleEndedIterator for $forward_iterator | |
611 | /// where P::Searcher: DoubleEndedSearcher | |
612 | /// { /* internal ends up calling Searcher::next_match_back() */ } | |
613 | /// | |
614 | /// impl Iterator for $reverse_iterator | |
615 | /// where P::Searcher: ReverseSearcher | |
616 | /// { /* internal ends up calling Searcher::next_match_back() */ } | |
617 | /// | |
618 | /// impl DoubleEndedIterator for $reverse_iterator | |
619 | /// where P::Searcher: DoubleEndedSearcher | |
620 | /// { /* internal ends up calling Searcher::next_match() */ } | |
621 | /// | |
622 | /// The internal one is defined outside the macro, and has almost the same | |
623 | /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and | |
624 | /// `pattern::ReverseSearcher` for both forward and reverse iteration. | |
625 | /// | |
626 | /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given | |
627 | /// `Pattern` might not return the same elements, so actually implementing | |
628 | /// `DoubleEndedIterator` for it would be incorrect. | |
629 | /// (See the docs in `str::pattern` for more details) | |
630 | /// | |
631 | /// However, the internal struct still represents a single ended iterator from | |
632 | /// either end, and depending on pattern is also a valid double ended iterator, | |
633 | /// so the two wrapper structs implement `Iterator` | |
634 | /// and `DoubleEndedIterator` depending on the concrete pattern type, leading | |
635 | /// to the complex impls seen above. | |
636 | macro_rules! generate_pattern_iterators { | |
637 | { | |
638 | // Forward iterator | |
639 | forward: | |
640 | $(#[$forward_iterator_attribute:meta])* | |
641 | struct $forward_iterator:ident; | |
642 | ||
643 | // Reverse iterator | |
644 | reverse: | |
645 | $(#[$reverse_iterator_attribute:meta])* | |
646 | struct $reverse_iterator:ident; | |
647 | ||
648 | // Stability of all generated items | |
649 | stability: | |
650 | $(#[$common_stability_attribute:meta])* | |
651 | ||
652 | // Internal almost-iterator that is being delegated to | |
653 | internal: | |
654 | $internal_iterator:ident yielding ($iterty:ty); | |
655 | ||
656 | // Kind of delgation - either single ended or double ended | |
657 | delegate $($t:tt)* | |
658 | } => { | |
659 | $(#[$forward_iterator_attribute])* | |
660 | $(#[$common_stability_attribute])* | |
661 | pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); | |
662 | ||
54a0048b SL |
663 | $(#[$common_stability_attribute])* |
664 | impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P> | |
665 | where P::Searcher: fmt::Debug | |
666 | { | |
667 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
668 | f.debug_tuple(stringify!($forward_iterator)) | |
669 | .field(&self.0) | |
670 | .finish() | |
671 | } | |
672 | } | |
673 | ||
9346a6ac AL |
674 | $(#[$common_stability_attribute])* |
675 | impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> { | |
676 | type Item = $iterty; | |
677 | ||
678 | #[inline] | |
679 | fn next(&mut self) -> Option<$iterty> { | |
680 | self.0.next() | |
681 | } | |
682 | } | |
683 | ||
684 | $(#[$common_stability_attribute])* | |
685 | impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P> | |
686 | where P::Searcher: Clone | |
687 | { | |
688 | fn clone(&self) -> Self { | |
689 | $forward_iterator(self.0.clone()) | |
690 | } | |
691 | } | |
692 | ||
693 | $(#[$reverse_iterator_attribute])* | |
694 | $(#[$common_stability_attribute])* | |
695 | pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>); | |
696 | ||
54a0048b SL |
697 | $(#[$common_stability_attribute])* |
698 | impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P> | |
699 | where P::Searcher: fmt::Debug | |
700 | { | |
701 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
702 | f.debug_tuple(stringify!($reverse_iterator)) | |
703 | .field(&self.0) | |
704 | .finish() | |
705 | } | |
706 | } | |
707 | ||
9346a6ac AL |
708 | $(#[$common_stability_attribute])* |
709 | impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P> | |
710 | where P::Searcher: ReverseSearcher<'a> | |
711 | { | |
712 | type Item = $iterty; | |
713 | ||
714 | #[inline] | |
715 | fn next(&mut self) -> Option<$iterty> { | |
716 | self.0.next_back() | |
717 | } | |
718 | } | |
719 | ||
720 | $(#[$common_stability_attribute])* | |
721 | impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P> | |
722 | where P::Searcher: Clone | |
723 | { | |
724 | fn clone(&self) -> Self { | |
725 | $reverse_iterator(self.0.clone()) | |
726 | } | |
727 | } | |
728 | ||
729 | generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*, | |
730 | $forward_iterator, | |
731 | $reverse_iterator, $iterty); | |
732 | }; | |
733 | { | |
734 | double ended; with $(#[$common_stability_attribute:meta])*, | |
735 | $forward_iterator:ident, | |
736 | $reverse_iterator:ident, $iterty:ty | |
737 | } => { | |
738 | $(#[$common_stability_attribute])* | |
739 | impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P> | |
740 | where P::Searcher: DoubleEndedSearcher<'a> | |
741 | { | |
742 | #[inline] | |
743 | fn next_back(&mut self) -> Option<$iterty> { | |
744 | self.0.next_back() | |
745 | } | |
746 | } | |
747 | ||
748 | $(#[$common_stability_attribute])* | |
749 | impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P> | |
750 | where P::Searcher: DoubleEndedSearcher<'a> | |
751 | { | |
752 | #[inline] | |
753 | fn next_back(&mut self) -> Option<$iterty> { | |
754 | self.0.next() | |
755 | } | |
756 | } | |
757 | }; | |
758 | { | |
759 | single ended; with $(#[$common_stability_attribute:meta])*, | |
760 | $forward_iterator:ident, | |
761 | $reverse_iterator:ident, $iterty:ty | |
762 | } => {} | |
1a4d82fc JJ |
763 | } |
764 | ||
9346a6ac AL |
765 | derive_pattern_clone!{ |
766 | clone SplitInternal | |
767 | with |s| SplitInternal { matcher: s.matcher.clone(), ..*s } | |
768 | } | |
54a0048b | 769 | |
9346a6ac AL |
770 | struct SplitInternal<'a, P: Pattern<'a>> { |
771 | start: usize, | |
772 | end: usize, | |
773 | matcher: P::Searcher, | |
774 | allow_trailing_empty: bool, | |
775 | finished: bool, | |
1a4d82fc JJ |
776 | } |
777 | ||
54a0048b SL |
778 | impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug { |
779 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
780 | f.debug_struct("SplitInternal") | |
781 | .field("start", &self.start) | |
782 | .field("end", &self.end) | |
783 | .field("matcher", &self.matcher) | |
784 | .field("allow_trailing_empty", &self.allow_trailing_empty) | |
785 | .field("finished", &self.finished) | |
786 | .finish() | |
787 | } | |
788 | } | |
789 | ||
9346a6ac | 790 | impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { |
1a4d82fc JJ |
791 | #[inline] |
792 | fn get_end(&mut self) -> Option<&'a str> { | |
c34b1796 | 793 | if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) { |
1a4d82fc | 794 | self.finished = true; |
c34b1796 AL |
795 | unsafe { |
796 | let string = self.matcher.haystack().slice_unchecked(self.start, self.end); | |
797 | Some(string) | |
798 | } | |
1a4d82fc JJ |
799 | } else { |
800 | None | |
801 | } | |
802 | } | |
1a4d82fc JJ |
803 | |
804 | #[inline] | |
805 | fn next(&mut self) -> Option<&'a str> { | |
806 | if self.finished { return None } | |
807 | ||
c34b1796 AL |
808 | let haystack = self.matcher.haystack(); |
809 | match self.matcher.next_match() { | |
1a4d82fc | 810 | Some((a, b)) => unsafe { |
c34b1796 AL |
811 | let elt = haystack.slice_unchecked(self.start, a); |
812 | self.start = b; | |
1a4d82fc JJ |
813 | Some(elt) |
814 | }, | |
815 | None => self.get_end(), | |
816 | } | |
817 | } | |
1a4d82fc | 818 | |
1a4d82fc | 819 | #[inline] |
9346a6ac AL |
820 | fn next_back(&mut self) -> Option<&'a str> |
821 | where P::Searcher: ReverseSearcher<'a> | |
822 | { | |
1a4d82fc JJ |
823 | if self.finished { return None } |
824 | ||
825 | if !self.allow_trailing_empty { | |
826 | self.allow_trailing_empty = true; | |
827 | match self.next_back() { | |
828 | Some(elt) if !elt.is_empty() => return Some(elt), | |
829 | _ => if self.finished { return None } | |
830 | } | |
831 | } | |
c34b1796 AL |
832 | |
833 | let haystack = self.matcher.haystack(); | |
834 | match self.matcher.next_match_back() { | |
1a4d82fc | 835 | Some((a, b)) => unsafe { |
c34b1796 AL |
836 | let elt = haystack.slice_unchecked(b, self.end); |
837 | self.end = a; | |
1a4d82fc JJ |
838 | Some(elt) |
839 | }, | |
c34b1796 AL |
840 | None => unsafe { |
841 | self.finished = true; | |
842 | Some(haystack.slice_unchecked(self.start, self.end)) | |
843 | }, | |
1a4d82fc JJ |
844 | } |
845 | } | |
846 | } | |
847 | ||
9346a6ac AL |
848 | generate_pattern_iterators! { |
849 | forward: | |
9cc50fc6 SL |
850 | /// Created with the method [`split()`]. |
851 | /// | |
54a0048b | 852 | /// [`split()`]: ../../std/primitive.str.html#method.split |
9346a6ac AL |
853 | struct Split; |
854 | reverse: | |
9cc50fc6 SL |
855 | /// Created with the method [`rsplit()`]. |
856 | /// | |
54a0048b | 857 | /// [`rsplit()`]: ../../std/primitive.str.html#method.rsplit |
9346a6ac AL |
858 | struct RSplit; |
859 | stability: | |
860 | #[stable(feature = "rust1", since = "1.0.0")] | |
861 | internal: | |
862 | SplitInternal yielding (&'a str); | |
863 | delegate double ended; | |
864 | } | |
865 | ||
866 | generate_pattern_iterators! { | |
867 | forward: | |
9cc50fc6 SL |
868 | /// Created with the method [`split_terminator()`]. |
869 | /// | |
54a0048b | 870 | /// [`split_terminator()`]: ../../std/primitive.str.html#method.split_terminator |
9346a6ac AL |
871 | struct SplitTerminator; |
872 | reverse: | |
9cc50fc6 SL |
873 | /// Created with the method [`rsplit_terminator()`]. |
874 | /// | |
54a0048b | 875 | /// [`rsplit_terminator()`]: ../../std/primitive.str.html#method.rsplit_terminator |
9346a6ac AL |
876 | struct RSplitTerminator; |
877 | stability: | |
878 | #[stable(feature = "rust1", since = "1.0.0")] | |
879 | internal: | |
880 | SplitInternal yielding (&'a str); | |
881 | delegate double ended; | |
882 | } | |
1a4d82fc | 883 | |
9346a6ac AL |
884 | derive_pattern_clone!{ |
885 | clone SplitNInternal | |
886 | with |s| SplitNInternal { iter: s.iter.clone(), ..*s } | |
887 | } | |
54a0048b | 888 | |
9346a6ac AL |
889 | struct SplitNInternal<'a, P: Pattern<'a>> { |
890 | iter: SplitInternal<'a, P>, | |
891 | /// The number of splits remaining | |
892 | count: usize, | |
893 | } | |
894 | ||
54a0048b SL |
895 | impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug { |
896 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
897 | f.debug_struct("SplitNInternal") | |
898 | .field("iter", &self.iter) | |
899 | .field("count", &self.count) | |
900 | .finish() | |
901 | } | |
902 | } | |
903 | ||
9346a6ac | 904 | impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> { |
1a4d82fc JJ |
905 | #[inline] |
906 | fn next(&mut self) -> Option<&'a str> { | |
c34b1796 AL |
907 | match self.count { |
908 | 0 => None, | |
909 | 1 => { self.count = 0; self.iter.get_end() } | |
910 | _ => { self.count -= 1; self.iter.next() } | |
1a4d82fc JJ |
911 | } |
912 | } | |
1a4d82fc | 913 | |
c34b1796 | 914 | #[inline] |
9346a6ac AL |
915 | fn next_back(&mut self) -> Option<&'a str> |
916 | where P::Searcher: ReverseSearcher<'a> | |
917 | { | |
918 | match self.count { | |
919 | 0 => None, | |
920 | 1 => { self.count = 0; self.iter.get_end() } | |
921 | _ => { self.count -= 1; self.iter.next_back() } | |
c34b1796 AL |
922 | } |
923 | } | |
1a4d82fc JJ |
924 | } |
925 | ||
9346a6ac AL |
926 | generate_pattern_iterators! { |
927 | forward: | |
9cc50fc6 SL |
928 | /// Created with the method [`splitn()`]. |
929 | /// | |
54a0048b | 930 | /// [`splitn()`]: ../../std/primitive.str.html#method.splitn |
9346a6ac AL |
931 | struct SplitN; |
932 | reverse: | |
9cc50fc6 SL |
933 | /// Created with the method [`rsplitn()`]. |
934 | /// | |
54a0048b | 935 | /// [`rsplitn()`]: ../../std/primitive.str.html#method.rsplitn |
9346a6ac AL |
936 | struct RSplitN; |
937 | stability: | |
938 | #[stable(feature = "rust1", since = "1.0.0")] | |
939 | internal: | |
940 | SplitNInternal yielding (&'a str); | |
941 | delegate single ended; | |
942 | } | |
943 | ||
944 | derive_pattern_clone!{ | |
945 | clone MatchIndicesInternal | |
946 | with |s| MatchIndicesInternal(s.0.clone()) | |
947 | } | |
54a0048b | 948 | |
9346a6ac AL |
949 | struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher); |
950 | ||
54a0048b SL |
951 | impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug { |
952 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
953 | f.debug_tuple("MatchIndicesInternal") | |
954 | .field(&self.0) | |
955 | .finish() | |
956 | } | |
957 | } | |
958 | ||
9346a6ac AL |
959 | impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> { |
960 | #[inline] | |
b039eaaf SL |
961 | fn next(&mut self) -> Option<(usize, &'a str)> { |
962 | self.0.next_match().map(|(start, end)| unsafe { | |
963 | (start, self.0.haystack().slice_unchecked(start, end)) | |
964 | }) | |
9346a6ac AL |
965 | } |
966 | ||
967 | #[inline] | |
b039eaaf | 968 | fn next_back(&mut self) -> Option<(usize, &'a str)> |
9346a6ac AL |
969 | where P::Searcher: ReverseSearcher<'a> |
970 | { | |
b039eaaf SL |
971 | self.0.next_match_back().map(|(start, end)| unsafe { |
972 | (start, self.0.haystack().slice_unchecked(start, end)) | |
973 | }) | |
9346a6ac AL |
974 | } |
975 | } | |
976 | ||
977 | generate_pattern_iterators! { | |
978 | forward: | |
9cc50fc6 SL |
979 | /// Created with the method [`match_indices()`]. |
980 | /// | |
54a0048b | 981 | /// [`match_indices()`]: ../../std/primitive.str.html#method.match_indices |
9346a6ac AL |
982 | struct MatchIndices; |
983 | reverse: | |
9cc50fc6 SL |
984 | /// Created with the method [`rmatch_indices()`]. |
985 | /// | |
54a0048b | 986 | /// [`rmatch_indices()`]: ../../std/primitive.str.html#method.rmatch_indices |
9346a6ac AL |
987 | struct RMatchIndices; |
988 | stability: | |
b039eaaf | 989 | #[stable(feature = "str_match_indices", since = "1.5.0")] |
9346a6ac | 990 | internal: |
b039eaaf | 991 | MatchIndicesInternal yielding ((usize, &'a str)); |
9346a6ac AL |
992 | delegate double ended; |
993 | } | |
994 | ||
995 | derive_pattern_clone!{ | |
996 | clone MatchesInternal | |
997 | with |s| MatchesInternal(s.0.clone()) | |
998 | } | |
54a0048b | 999 | |
9346a6ac AL |
1000 | struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher); |
1001 | ||
54a0048b SL |
1002 | impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug { |
1003 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
1004 | f.debug_tuple("MatchesInternal") | |
1005 | .field(&self.0) | |
1006 | .finish() | |
1007 | } | |
1008 | } | |
1009 | ||
9346a6ac AL |
1010 | impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> { |
1011 | #[inline] | |
1012 | fn next(&mut self) -> Option<&'a str> { | |
1013 | self.0.next_match().map(|(a, b)| unsafe { | |
1014 | // Indices are known to be on utf8 boundaries | |
1015 | self.0.haystack().slice_unchecked(a, b) | |
1016 | }) | |
1017 | } | |
1018 | ||
1019 | #[inline] | |
1020 | fn next_back(&mut self) -> Option<&'a str> | |
1021 | where P::Searcher: ReverseSearcher<'a> | |
1022 | { | |
1023 | self.0.next_match_back().map(|(a, b)| unsafe { | |
1024 | // Indices are known to be on utf8 boundaries | |
1025 | self.0.haystack().slice_unchecked(a, b) | |
1026 | }) | |
1027 | } | |
1028 | } | |
1029 | ||
1030 | generate_pattern_iterators! { | |
1031 | forward: | |
9cc50fc6 SL |
1032 | /// Created with the method [`matches()`]. |
1033 | /// | |
54a0048b | 1034 | /// [`matches()`]: ../../std/primitive.str.html#method.matches |
9346a6ac AL |
1035 | struct Matches; |
1036 | reverse: | |
9cc50fc6 SL |
1037 | /// Created with the method [`rmatches()`]. |
1038 | /// | |
54a0048b | 1039 | /// [`rmatches()`]: ../../std/primitive.str.html#method.rmatches |
9346a6ac AL |
1040 | struct RMatches; |
1041 | stability: | |
62682a34 | 1042 | #[stable(feature = "str_matches", since = "1.2.0")] |
9346a6ac AL |
1043 | internal: |
1044 | MatchesInternal yielding (&'a str); | |
1045 | delegate double ended; | |
1046 | } | |
1047 | ||
9cc50fc6 SL |
1048 | /// Created with the method [`lines()`]. |
1049 | /// | |
54a0048b | 1050 | /// [`lines()`]: ../../std/primitive.str.html#method.lines |
c34b1796 | 1051 | #[stable(feature = "rust1", since = "1.0.0")] |
54a0048b | 1052 | #[derive(Clone, Debug)] |
e9174d1e | 1053 | pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>); |
9346a6ac AL |
1054 | |
1055 | #[stable(feature = "rust1", since = "1.0.0")] | |
1056 | impl<'a> Iterator for Lines<'a> { | |
c34b1796 AL |
1057 | type Item = &'a str; |
1058 | ||
1059 | #[inline] | |
1060 | fn next(&mut self) -> Option<&'a str> { | |
9346a6ac AL |
1061 | self.0.next() |
1062 | } | |
c34b1796 | 1063 | |
9346a6ac AL |
1064 | #[inline] |
1065 | fn size_hint(&self) -> (usize, Option<usize>) { | |
1066 | self.0.size_hint() | |
1067 | } | |
1068 | } | |
1069 | ||
1070 | #[stable(feature = "rust1", since = "1.0.0")] | |
1071 | impl<'a> DoubleEndedIterator for Lines<'a> { | |
1072 | #[inline] | |
1073 | fn next_back(&mut self) -> Option<&'a str> { | |
1074 | self.0.next_back() | |
1075 | } | |
1076 | } | |
1077 | ||
9cc50fc6 SL |
1078 | /// Created with the method [`lines_any()`]. |
1079 | /// | |
54a0048b | 1080 | /// [`lines_any()`]: ../../std/primitive.str.html#method.lines_any |
9346a6ac | 1081 | #[stable(feature = "rust1", since = "1.0.0")] |
92a42be0 | 1082 | #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")] |
54a0048b | 1083 | #[derive(Clone, Debug)] |
e9174d1e SL |
1084 | #[allow(deprecated)] |
1085 | pub struct LinesAny<'a>(Lines<'a>); | |
9346a6ac | 1086 | |
b039eaaf | 1087 | /// A nameable, cloneable fn type |
9346a6ac AL |
1088 | #[derive(Clone)] |
1089 | struct LinesAnyMap; | |
1090 | ||
1091 | impl<'a> Fn<(&'a str,)> for LinesAnyMap { | |
1092 | #[inline] | |
1093 | extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str { | |
1094 | let l = line.len(); | |
1095 | if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] } | |
1096 | else { line } | |
1097 | } | |
1098 | } | |
1099 | ||
1100 | impl<'a> FnMut<(&'a str,)> for LinesAnyMap { | |
1101 | #[inline] | |
1102 | extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str { | |
1103 | Fn::call(&*self, (line,)) | |
1104 | } | |
1105 | } | |
1106 | ||
1107 | impl<'a> FnOnce<(&'a str,)> for LinesAnyMap { | |
1108 | type Output = &'a str; | |
1109 | ||
1110 | #[inline] | |
1111 | extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str { | |
1112 | Fn::call(&self, (line,)) | |
1a4d82fc | 1113 | } |
c34b1796 | 1114 | } |
1a4d82fc | 1115 | |
c34b1796 | 1116 | #[stable(feature = "rust1", since = "1.0.0")] |
e9174d1e | 1117 | #[allow(deprecated)] |
9346a6ac | 1118 | impl<'a> Iterator for LinesAny<'a> { |
c34b1796 AL |
1119 | type Item = &'a str; |
1120 | ||
1121 | #[inline] | |
1122 | fn next(&mut self) -> Option<&'a str> { | |
9346a6ac AL |
1123 | self.0.next() |
1124 | } | |
1125 | ||
1126 | #[inline] | |
1127 | fn size_hint(&self) -> (usize, Option<usize>) { | |
1128 | self.0.size_hint() | |
1129 | } | |
1130 | } | |
1131 | ||
1132 | #[stable(feature = "rust1", since = "1.0.0")] | |
e9174d1e | 1133 | #[allow(deprecated)] |
9346a6ac AL |
1134 | impl<'a> DoubleEndedIterator for LinesAny<'a> { |
1135 | #[inline] | |
1136 | fn next_back(&mut self) -> Option<&'a str> { | |
1137 | self.0.next_back() | |
1a4d82fc JJ |
1138 | } |
1139 | } | |
1140 | ||
1a4d82fc JJ |
1141 | /* |
1142 | Section: Comparing strings | |
1143 | */ | |
1144 | ||
c1a9b12d | 1145 | /// Bytewise slice equality |
1a4d82fc JJ |
1146 | /// NOTE: This function is (ab)used in rustc::middle::trans::_match |
1147 | /// to compare &[u8] byte slices that are not necessarily valid UTF-8. | |
c1a9b12d | 1148 | #[lang = "str_eq"] |
1a4d82fc | 1149 | #[inline] |
c1a9b12d | 1150 | fn eq_slice(a: &str, b: &str) -> bool { |
54a0048b | 1151 | a.as_bytes() == b.as_bytes() |
1a4d82fc JJ |
1152 | } |
1153 | ||
1a4d82fc | 1154 | /* |
9cc50fc6 | 1155 | Section: UTF-8 validation |
1a4d82fc JJ |
1156 | */ |
1157 | ||
9cc50fc6 SL |
1158 | // use truncation to fit u64 into usize |
1159 | const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize; | |
1160 | ||
1161 | /// Return `true` if any byte in the word `x` is nonascii (>= 128). | |
1162 | #[inline] | |
1163 | fn contains_nonascii(x: usize) -> bool { | |
1164 | (x & NONASCII_MASK) != 0 | |
1165 | } | |
1166 | ||
1a4d82fc JJ |
1167 | /// Walk through `iter` checking that it's a valid UTF-8 sequence, |
1168 | /// returning `true` in that case, or, if it is invalid, `false` with | |
1169 | /// `iter` reset such that it is pointing at the first byte in the | |
1170 | /// invalid sequence. | |
1171 | #[inline(always)] | |
9cc50fc6 SL |
1172 | fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { |
1173 | let mut offset = 0; | |
1174 | let len = v.len(); | |
1175 | while offset < len { | |
1176 | let old_offset = offset; | |
1a4d82fc | 1177 | macro_rules! err { () => {{ |
9346a6ac | 1178 | return Err(Utf8Error { |
9cc50fc6 | 1179 | valid_up_to: old_offset |
9346a6ac | 1180 | }) |
1a4d82fc JJ |
1181 | }}} |
1182 | ||
9cc50fc6 SL |
1183 | macro_rules! next { () => {{ |
1184 | offset += 1; | |
1185 | // we needed data, but there was none: error! | |
1186 | if offset >= len { | |
1187 | err!() | |
1a4d82fc | 1188 | } |
9cc50fc6 SL |
1189 | v[offset] |
1190 | }}} | |
1a4d82fc | 1191 | |
9cc50fc6 | 1192 | let first = v[offset]; |
1a4d82fc | 1193 | if first >= 128 { |
c34b1796 | 1194 | let w = UTF8_CHAR_WIDTH[first as usize]; |
1a4d82fc JJ |
1195 | let second = next!(); |
1196 | // 2-byte encoding is for codepoints \u{0080} to \u{07ff} | |
1197 | // first C2 80 last DF BF | |
1198 | // 3-byte encoding is for codepoints \u{0800} to \u{ffff} | |
1199 | // first E0 A0 80 last EF BF BF | |
1200 | // excluding surrogates codepoints \u{d800} to \u{dfff} | |
1201 | // ED A0 80 to ED BF BF | |
1202 | // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff | |
1203 | // first F0 90 80 80 last F4 8F BF BF | |
1204 | // | |
1205 | // Use the UTF-8 syntax from the RFC | |
1206 | // | |
1207 | // https://tools.ietf.org/html/rfc3629 | |
1208 | // UTF8-1 = %x00-7F | |
1209 | // UTF8-2 = %xC2-DF UTF8-tail | |
1210 | // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / | |
1211 | // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) | |
1212 | // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / | |
1213 | // %xF4 %x80-8F 2( UTF8-tail ) | |
1214 | match w { | |
1215 | 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()}, | |
1216 | 3 => { | |
1217 | match (first, second, next!() & !CONT_MASK) { | |
1218 | (0xE0 , 0xA0 ... 0xBF, TAG_CONT_U8) | | |
1219 | (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) | | |
1220 | (0xED , 0x80 ... 0x9F, TAG_CONT_U8) | | |
1221 | (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {} | |
1222 | _ => err!() | |
1223 | } | |
1224 | } | |
1225 | 4 => { | |
1226 | match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) { | |
1227 | (0xF0 , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) | | |
1228 | (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) | | |
1229 | (0xF4 , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {} | |
1230 | _ => err!() | |
1231 | } | |
1232 | } | |
1233 | _ => err!() | |
1234 | } | |
9cc50fc6 SL |
1235 | offset += 1; |
1236 | } else { | |
1237 | // Ascii case, try to skip forward quickly. | |
1238 | // When the pointer is aligned, read 2 words of data per iteration | |
1239 | // until we find a word containing a non-ascii byte. | |
1240 | let usize_bytes = mem::size_of::<usize>(); | |
1241 | let bytes_per_iteration = 2 * usize_bytes; | |
1242 | let ptr = v.as_ptr(); | |
1243 | let align = (ptr as usize + offset) & (usize_bytes - 1); | |
1244 | if align == 0 { | |
1245 | if len >= bytes_per_iteration { | |
1246 | while offset <= len - bytes_per_iteration { | |
1247 | unsafe { | |
1248 | let u = *(ptr.offset(offset as isize) as *const usize); | |
1249 | let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize); | |
1250 | ||
1251 | // break if there is a nonascii byte | |
1252 | let zu = contains_nonascii(u); | |
1253 | let zv = contains_nonascii(v); | |
1254 | if zu || zv { | |
1255 | break; | |
1256 | } | |
1257 | } | |
1258 | offset += bytes_per_iteration; | |
1259 | } | |
1260 | } | |
1261 | // step from the point where the wordwise loop stopped | |
1262 | while offset < len && v[offset] < 128 { | |
1263 | offset += 1; | |
1264 | } | |
1265 | } else { | |
1266 | offset += 1; | |
1267 | } | |
1a4d82fc JJ |
1268 | } |
1269 | } | |
9cc50fc6 SL |
1270 | |
1271 | Ok(()) | |
1a4d82fc JJ |
1272 | } |
1273 | ||
1274 | // https://tools.ietf.org/html/rfc3629 | |
1275 | static UTF8_CHAR_WIDTH: [u8; 256] = [ | |
1276 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1277 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F | |
1278 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1279 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F | |
1280 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1281 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F | |
1282 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, | |
1283 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F | |
1284 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
1285 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F | |
1286 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | |
1287 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF | |
1288 | 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, | |
1289 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF | |
1290 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF | |
1291 | 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF | |
1292 | ]; | |
1293 | ||
1294 | /// Struct that contains a `char` and the index of the first byte of | |
1295 | /// the next `char` in a string. This can be used as a data structure | |
1296 | /// for iterating over the UTF-8 bytes of a string. | |
54a0048b | 1297 | #[derive(Copy, Clone, Debug)] |
c34b1796 AL |
1298 | #[unstable(feature = "str_char", |
1299 | reason = "existence of this struct is uncertain as it is frequently \ | |
1300 | able to be replaced with char.len_utf8() and/or \ | |
e9174d1e SL |
1301 | char/char_indices iterators", |
1302 | issue = "27754")] | |
1a4d82fc JJ |
1303 | pub struct CharRange { |
1304 | /// Current `char` | |
1305 | pub ch: char, | |
1306 | /// Index of the first byte of the next `char` | |
85aaf69f | 1307 | pub next: usize, |
1a4d82fc JJ |
1308 | } |
1309 | ||
1310 | /// Mask of the value bits of a continuation byte | |
c34b1796 | 1311 | const CONT_MASK: u8 = 0b0011_1111; |
1a4d82fc | 1312 | /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte |
c34b1796 | 1313 | const TAG_CONT_U8: u8 = 0b1000_0000; |
1a4d82fc JJ |
1314 | |
1315 | /* | |
1316 | Section: Trait implementations | |
1317 | */ | |
1318 | ||
1319 | mod traits { | |
54a0048b | 1320 | use cmp::{Ord, Ordering, PartialEq, PartialOrd, Eq}; |
1a4d82fc JJ |
1321 | use option::Option; |
1322 | use option::Option::Some; | |
1323 | use ops; | |
1324 | use str::{StrExt, eq_slice}; | |
1325 | ||
85aaf69f | 1326 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1327 | impl Ord for str { |
1328 | #[inline] | |
1329 | fn cmp(&self, other: &str) -> Ordering { | |
54a0048b | 1330 | self.as_bytes().cmp(other.as_bytes()) |
1a4d82fc JJ |
1331 | } |
1332 | } | |
1333 | ||
85aaf69f | 1334 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1335 | impl PartialEq for str { |
1336 | #[inline] | |
1337 | fn eq(&self, other: &str) -> bool { | |
1338 | eq_slice(self, other) | |
1339 | } | |
1340 | #[inline] | |
1341 | fn ne(&self, other: &str) -> bool { !(*self).eq(other) } | |
1342 | } | |
1343 | ||
85aaf69f | 1344 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1345 | impl Eq for str {} |
1346 | ||
85aaf69f | 1347 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
1348 | impl PartialOrd for str { |
1349 | #[inline] | |
1350 | fn partial_cmp(&self, other: &str) -> Option<Ordering> { | |
1351 | Some(self.cmp(other)) | |
1352 | } | |
1353 | } | |
1354 | ||
54a0048b SL |
1355 | /// Implements substring slicing with syntax `&self[begin .. end]`. |
1356 | /// | |
85aaf69f SL |
1357 | /// Returns a slice of the given string from the byte range |
1358 | /// [`begin`..`end`). | |
1359 | /// | |
1360 | /// This operation is `O(1)`. | |
1361 | /// | |
54a0048b SL |
1362 | /// # Panics |
1363 | /// | |
1364 | /// Panics if `begin` or `end` does not point to the starting | |
1365 | /// byte offset of a character (as defined by `is_char_boundary`). | |
1366 | /// Requires that `begin <= end` and `end <= len` where `len` is the | |
1367 | /// length of the string. | |
85aaf69f | 1368 | /// |
c34b1796 | 1369 | /// # Examples |
85aaf69f | 1370 | /// |
c34b1796 | 1371 | /// ``` |
85aaf69f SL |
1372 | /// let s = "Löwe 老虎 Léopard"; |
1373 | /// assert_eq!(&s[0 .. 1], "L"); | |
1374 | /// | |
1375 | /// assert_eq!(&s[1 .. 9], "öwe 老"); | |
1376 | /// | |
1377 | /// // these will panic: | |
1378 | /// // byte 2 lies within `ö`: | |
1379 | /// // &s[2 ..3]; | |
1380 | /// | |
1381 | /// // byte 8 lies within `老` | |
1382 | /// // &s[1 .. 8]; | |
1383 | /// | |
1384 | /// // byte 100 is outside the string | |
1385 | /// // &s[3 .. 100]; | |
1386 | /// ``` | |
1387 | #[stable(feature = "rust1", since = "1.0.0")] | |
1388 | impl ops::Index<ops::Range<usize>> for str { | |
1a4d82fc JJ |
1389 | type Output = str; |
1390 | #[inline] | |
c34b1796 | 1391 | fn index(&self, index: ops::Range<usize>) -> &str { |
85aaf69f SL |
1392 | // is_char_boundary checks that the index is in [0, .len()] |
1393 | if index.start <= index.end && | |
1394 | self.is_char_boundary(index.start) && | |
1395 | self.is_char_boundary(index.end) { | |
1396 | unsafe { self.slice_unchecked(index.start, index.end) } | |
1397 | } else { | |
1398 | super::slice_error_fail(self, index.start, index.end) | |
1399 | } | |
1a4d82fc JJ |
1400 | } |
1401 | } | |
85aaf69f | 1402 | |
54a0048b SL |
1403 | /// Implements mutable substring slicing with syntax |
1404 | /// `&mut self[begin .. end]`. | |
1405 | /// | |
c1a9b12d SL |
1406 | /// Returns a mutable slice of the given string from the byte range |
1407 | /// [`begin`..`end`). | |
54a0048b SL |
1408 | /// |
1409 | /// This operation is `O(1)`. | |
1410 | /// | |
1411 | /// # Panics | |
1412 | /// | |
1413 | /// Panics if `begin` or `end` does not point to the starting | |
1414 | /// byte offset of a character (as defined by `is_char_boundary`). | |
1415 | /// Requires that `begin <= end` and `end <= len` where `len` is the | |
1416 | /// length of the string. | |
c1a9b12d SL |
1417 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1418 | impl ops::IndexMut<ops::Range<usize>> for str { | |
1419 | #[inline] | |
1420 | fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str { | |
1421 | // is_char_boundary checks that the index is in [0, .len()] | |
1422 | if index.start <= index.end && | |
1423 | self.is_char_boundary(index.start) && | |
1424 | self.is_char_boundary(index.end) { | |
1425 | unsafe { self.slice_mut_unchecked(index.start, index.end) } | |
1426 | } else { | |
1427 | super::slice_error_fail(self, index.start, index.end) | |
1428 | } | |
1429 | } | |
1430 | } | |
1431 | ||
54a0048b | 1432 | /// Implements substring slicing with syntax `&self[.. end]`. |
85aaf69f | 1433 | /// |
54a0048b SL |
1434 | /// Returns a slice of the string from the beginning to byte offset |
1435 | /// `end`. | |
85aaf69f | 1436 | /// |
54a0048b | 1437 | /// Equivalent to `&self[0 .. end]`. |
85aaf69f SL |
1438 | #[stable(feature = "rust1", since = "1.0.0")] |
1439 | impl ops::Index<ops::RangeTo<usize>> for str { | |
1a4d82fc | 1440 | type Output = str; |
c34b1796 | 1441 | |
1a4d82fc | 1442 | #[inline] |
c34b1796 | 1443 | fn index(&self, index: ops::RangeTo<usize>) -> &str { |
85aaf69f SL |
1444 | // is_char_boundary checks that the index is in [0, .len()] |
1445 | if self.is_char_boundary(index.end) { | |
1446 | unsafe { self.slice_unchecked(0, index.end) } | |
1447 | } else { | |
1448 | super::slice_error_fail(self, 0, index.end) | |
1449 | } | |
1a4d82fc JJ |
1450 | } |
1451 | } | |
85aaf69f | 1452 | |
54a0048b SL |
1453 | /// Implements mutable substring slicing with syntax `&mut self[.. end]`. |
1454 | /// | |
1455 | /// Returns a mutable slice of the string from the beginning to byte offset | |
c1a9b12d | 1456 | /// `end`. |
54a0048b SL |
1457 | /// |
1458 | /// Equivalent to `&mut self[0 .. end]`. | |
c1a9b12d SL |
1459 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1460 | impl ops::IndexMut<ops::RangeTo<usize>> for str { | |
1461 | #[inline] | |
1462 | fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str { | |
1463 | // is_char_boundary checks that the index is in [0, .len()] | |
1464 | if self.is_char_boundary(index.end) { | |
1465 | unsafe { self.slice_mut_unchecked(0, index.end) } | |
1466 | } else { | |
1467 | super::slice_error_fail(self, 0, index.end) | |
1468 | } | |
1469 | } | |
1470 | } | |
1471 | ||
54a0048b | 1472 | /// Implements substring slicing with syntax `&self[begin ..]`. |
85aaf69f | 1473 | /// |
54a0048b SL |
1474 | /// Returns a slice of the string from byte offset `begin` |
1475 | /// to the end of the string. | |
85aaf69f | 1476 | /// |
54a0048b | 1477 | /// Equivalent to `&self[begin .. len]`. |
85aaf69f SL |
1478 | #[stable(feature = "rust1", since = "1.0.0")] |
1479 | impl ops::Index<ops::RangeFrom<usize>> for str { | |
1a4d82fc | 1480 | type Output = str; |
c34b1796 | 1481 | |
1a4d82fc | 1482 | #[inline] |
c34b1796 | 1483 | fn index(&self, index: ops::RangeFrom<usize>) -> &str { |
85aaf69f SL |
1484 | // is_char_boundary checks that the index is in [0, .len()] |
1485 | if self.is_char_boundary(index.start) { | |
1486 | unsafe { self.slice_unchecked(index.start, self.len()) } | |
1487 | } else { | |
1488 | super::slice_error_fail(self, index.start, self.len()) | |
1489 | } | |
1a4d82fc JJ |
1490 | } |
1491 | } | |
85aaf69f | 1492 | |
54a0048b SL |
1493 | /// Implements mutable substring slicing with syntax `&mut self[begin ..]`. |
1494 | /// | |
1495 | /// Returns a mutable slice of the string from byte offset `begin` | |
1496 | /// to the end of the string. | |
1497 | /// | |
1498 | /// Equivalent to `&mut self[begin .. len]`. | |
c1a9b12d SL |
1499 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1500 | impl ops::IndexMut<ops::RangeFrom<usize>> for str { | |
1501 | #[inline] | |
1502 | fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str { | |
1503 | // is_char_boundary checks that the index is in [0, .len()] | |
1504 | if self.is_char_boundary(index.start) { | |
1505 | let len = self.len(); | |
1506 | unsafe { self.slice_mut_unchecked(index.start, len) } | |
1507 | } else { | |
1508 | super::slice_error_fail(self, index.start, self.len()) | |
1509 | } | |
1510 | } | |
1511 | } | |
1512 | ||
54a0048b SL |
1513 | /// Implements substring slicing with syntax `&self[..]`. |
1514 | /// | |
1515 | /// Returns a slice of the whole string. This operation can | |
1516 | /// never panic. | |
1517 | /// | |
1518 | /// Equivalent to `&self[0 .. len]`. | |
85aaf69f SL |
1519 | #[stable(feature = "rust1", since = "1.0.0")] |
1520 | impl ops::Index<ops::RangeFull> for str { | |
1a4d82fc | 1521 | type Output = str; |
c34b1796 | 1522 | |
1a4d82fc | 1523 | #[inline] |
c34b1796 | 1524 | fn index(&self, _index: ops::RangeFull) -> &str { |
1a4d82fc JJ |
1525 | self |
1526 | } | |
1527 | } | |
c1a9b12d | 1528 | |
54a0048b SL |
1529 | /// Implements mutable substring slicing with syntax `&mut self[..]`. |
1530 | /// | |
1531 | /// Returns a mutable slice of the whole string. This operation can | |
1532 | /// never panic. | |
1533 | /// | |
1534 | /// Equivalent to `&mut self[0 .. len]`. | |
c1a9b12d SL |
1535 | #[stable(feature = "derefmut_for_string", since = "1.2.0")] |
1536 | impl ops::IndexMut<ops::RangeFull> for str { | |
1537 | #[inline] | |
1538 | fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str { | |
1539 | self | |
1540 | } | |
1541 | } | |
54a0048b SL |
1542 | |
1543 | #[unstable(feature = "inclusive_range", | |
1544 | reason = "recently added, follows RFC", | |
1545 | issue = "28237")] | |
1546 | impl ops::Index<ops::RangeInclusive<usize>> for str { | |
1547 | type Output = str; | |
1548 | ||
1549 | #[inline] | |
1550 | fn index(&self, index: ops::RangeInclusive<usize>) -> &str { | |
1551 | match index { | |
1552 | ops::RangeInclusive::Empty { .. } => "", | |
1553 | ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() => | |
1554 | panic!("attempted to index slice up to maximum usize"), | |
1555 | ops::RangeInclusive::NonEmpty { start, end } => | |
1556 | self.index(start .. end+1) | |
1557 | } | |
1558 | } | |
1559 | } | |
1560 | #[unstable(feature = "inclusive_range", | |
1561 | reason = "recently added, follows RFC", | |
1562 | issue = "28237")] | |
1563 | impl ops::Index<ops::RangeToInclusive<usize>> for str { | |
1564 | type Output = str; | |
1565 | ||
1566 | #[inline] | |
1567 | fn index(&self, index: ops::RangeToInclusive<usize>) -> &str { | |
1568 | self.index(0...index.end) | |
1569 | } | |
1570 | } | |
1571 | ||
1572 | #[unstable(feature = "inclusive_range", | |
1573 | reason = "recently added, follows RFC", | |
1574 | issue = "28237")] | |
1575 | impl ops::IndexMut<ops::RangeInclusive<usize>> for str { | |
1576 | #[inline] | |
1577 | fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str { | |
1578 | match index { | |
1579 | ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work | |
1580 | ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() => | |
1581 | panic!("attempted to index str up to maximum usize"), | |
1582 | ops::RangeInclusive::NonEmpty { start, end } => | |
1583 | self.index_mut(start .. end+1) | |
1584 | } | |
1585 | } | |
1586 | } | |
1587 | #[unstable(feature = "inclusive_range", | |
1588 | reason = "recently added, follows RFC", | |
1589 | issue = "28237")] | |
1590 | impl ops::IndexMut<ops::RangeToInclusive<usize>> for str { | |
1591 | #[inline] | |
1592 | fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str { | |
1593 | self.index_mut(0...index.end) | |
1594 | } | |
1595 | } | |
1a4d82fc JJ |
1596 | } |
1597 | ||
1a4d82fc JJ |
1598 | /// Methods for string slices |
1599 | #[allow(missing_docs)] | |
9346a6ac | 1600 | #[doc(hidden)] |
62682a34 | 1601 | #[unstable(feature = "core_str_ext", |
e9174d1e | 1602 | reason = "stable interface provided by `impl str` in later crates", |
54a0048b | 1603 | issue = "32110")] |
1a4d82fc JJ |
1604 | pub trait StrExt { |
1605 | // NB there are no docs here are they're all located on the StrExt trait in | |
1606 | // libcollections, not here. | |
1607 | ||
92a42be0 | 1608 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1609 | fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool; |
92a42be0 | 1610 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1611 | fn chars(&self) -> Chars; |
92a42be0 | 1612 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1613 | fn bytes(&self) -> Bytes; |
92a42be0 | 1614 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1615 | fn char_indices(&self) -> CharIndices; |
92a42be0 | 1616 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1617 | fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>; |
92a42be0 | 1618 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1619 | fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> |
1620 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1621 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac | 1622 | fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>; |
92a42be0 | 1623 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1624 | fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> |
1625 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1626 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac | 1627 | fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>; |
92a42be0 | 1628 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac AL |
1629 | fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> |
1630 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1631 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac | 1632 | fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>; |
92a42be0 | 1633 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac AL |
1634 | fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> |
1635 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1636 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1637 | fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>; |
92a42be0 | 1638 | #[stable(feature = "core", since = "1.6.0")] |
9346a6ac AL |
1639 | fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> |
1640 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1641 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1642 | fn lines(&self) -> Lines; |
92a42be0 SL |
1643 | #[stable(feature = "core", since = "1.6.0")] |
1644 | #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")] | |
e9174d1e SL |
1645 | #[allow(deprecated)] |
1646 | fn lines_any(&self) -> LinesAny; | |
92a42be0 | 1647 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1648 | unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str; |
92a42be0 | 1649 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1650 | unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str; |
92a42be0 | 1651 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1652 | fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool; |
92a42be0 | 1653 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1654 | fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool |
1655 | where P::Searcher: ReverseSearcher<'a>; | |
92a42be0 | 1656 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1657 | fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1658 | where P::Searcher: DoubleEndedSearcher<'a>; | |
92a42be0 | 1659 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1660 | fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str; |
92a42be0 | 1661 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1662 | fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1663 | where P::Searcher: ReverseSearcher<'a>; | |
54a0048b | 1664 | #[stable(feature = "is_char_boundary", since = "1.9.0")] |
85aaf69f | 1665 | fn is_char_boundary(&self, index: usize) -> bool; |
92a42be0 SL |
1666 | #[unstable(feature = "str_char", |
1667 | reason = "often replaced by char_indices, this method may \ | |
1668 | be removed in favor of just char_at() or eventually \ | |
1669 | removed altogether", | |
1670 | issue = "27754")] | |
54a0048b SL |
1671 | #[rustc_deprecated(reason = "use slicing plus chars() plus len_utf8", |
1672 | since = "1.9.0")] | |
85aaf69f | 1673 | fn char_range_at(&self, start: usize) -> CharRange; |
92a42be0 SL |
1674 | #[unstable(feature = "str_char", |
1675 | reason = "often replaced by char_indices, this method may \ | |
1676 | be removed in favor of just char_at_reverse() or \ | |
1677 | eventually removed altogether", | |
1678 | issue = "27754")] | |
54a0048b SL |
1679 | #[rustc_deprecated(reason = "use slicing plus chars().rev() plus len_utf8", |
1680 | since = "1.9.0")] | |
85aaf69f | 1681 | fn char_range_at_reverse(&self, start: usize) -> CharRange; |
92a42be0 SL |
1682 | #[unstable(feature = "str_char", |
1683 | reason = "frequently replaced by the chars() iterator, this \ | |
1684 | method may be removed or possibly renamed in the \ | |
1685 | future; it is normally replaced by chars/char_indices \ | |
1686 | iterators or by getting the first char from a \ | |
1687 | subslice", | |
1688 | issue = "27754")] | |
54a0048b SL |
1689 | #[rustc_deprecated(reason = "use slicing plus chars()", |
1690 | since = "1.9.0")] | |
85aaf69f | 1691 | fn char_at(&self, i: usize) -> char; |
92a42be0 SL |
1692 | #[unstable(feature = "str_char", |
1693 | reason = "see char_at for more details, but reverse semantics \ | |
1694 | are also somewhat unclear, especially with which \ | |
1695 | cases generate panics", | |
1696 | issue = "27754")] | |
54a0048b SL |
1697 | #[rustc_deprecated(reason = "use slicing plus chars().rev()", |
1698 | since = "1.9.0")] | |
85aaf69f | 1699 | fn char_at_reverse(&self, i: usize) -> char; |
92a42be0 | 1700 | #[stable(feature = "core", since = "1.6.0")] |
e9174d1e | 1701 | fn as_bytes(&self) -> &[u8]; |
92a42be0 | 1702 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 | 1703 | fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>; |
92a42be0 | 1704 | #[stable(feature = "core", since = "1.6.0")] |
c34b1796 AL |
1705 | fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> |
1706 | where P::Searcher: ReverseSearcher<'a>; | |
1707 | fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>; | |
92a42be0 | 1708 | #[stable(feature = "core", since = "1.6.0")] |
62682a34 | 1709 | fn split_at(&self, mid: usize) -> (&str, &str); |
92a42be0 | 1710 | #[stable(feature = "core", since = "1.6.0")] |
c1a9b12d | 1711 | fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str); |
92a42be0 SL |
1712 | #[unstable(feature = "str_char", |
1713 | reason = "awaiting conventions about shifting and slices and \ | |
1714 | may not be warranted with the existence of the chars \ | |
1715 | and/or char_indices iterators", | |
1716 | issue = "27754")] | |
54a0048b SL |
1717 | #[rustc_deprecated(reason = "use chars() plus Chars::as_str", |
1718 | since = "1.9.0")] | |
e9174d1e | 1719 | fn slice_shift_char(&self) -> Option<(char, &str)>; |
92a42be0 | 1720 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc | 1721 | fn as_ptr(&self) -> *const u8; |
92a42be0 | 1722 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 1723 | fn len(&self) -> usize; |
92a42be0 | 1724 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc | 1725 | fn is_empty(&self) -> bool; |
92a42be0 | 1726 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 1727 | fn parse<T: FromStr>(&self) -> Result<T, T::Err>; |
1a4d82fc JJ |
1728 | } |
1729 | ||
54a0048b SL |
1730 | // truncate `&str` to length at most equal to `max` |
1731 | // return `true` if it were truncated, and the new str. | |
1732 | fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) { | |
1733 | if max >= s.len() { | |
1734 | (false, s) | |
1735 | } else { | |
1736 | while !s.is_char_boundary(max) { | |
1737 | max -= 1; | |
1738 | } | |
1739 | (true, &s[..max]) | |
1740 | } | |
1741 | } | |
1742 | ||
1a4d82fc | 1743 | #[inline(never)] |
92a42be0 | 1744 | #[cold] |
85aaf69f | 1745 | fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! { |
54a0048b SL |
1746 | const MAX_DISPLAY_LENGTH: usize = 256; |
1747 | let (truncated, s) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH); | |
1748 | let ellipsis = if truncated { "[...]" } else { "" }; | |
1749 | ||
1750 | assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}", | |
1751 | begin, end, s, ellipsis); | |
1752 | panic!("index {} and/or {} in `{}`{} do not lie on character boundary", | |
1753 | begin, end, s, ellipsis); | |
1a4d82fc JJ |
1754 | } |
1755 | ||
92a42be0 | 1756 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc JJ |
1757 | impl StrExt for str { |
1758 | #[inline] | |
c34b1796 AL |
1759 | fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
1760 | pat.is_contained_in(self) | |
1a4d82fc JJ |
1761 | } |
1762 | ||
1a4d82fc JJ |
1763 | #[inline] |
1764 | fn chars(&self) -> Chars { | |
1765 | Chars{iter: self.as_bytes().iter()} | |
1766 | } | |
1767 | ||
1768 | #[inline] | |
1769 | fn bytes(&self) -> Bytes { | |
e9174d1e | 1770 | Bytes(self.as_bytes().iter().cloned()) |
1a4d82fc JJ |
1771 | } |
1772 | ||
1773 | #[inline] | |
1774 | fn char_indices(&self) -> CharIndices { | |
1775 | CharIndices { front_offset: 0, iter: self.chars() } | |
1776 | } | |
1777 | ||
1778 | #[inline] | |
c34b1796 | 1779 | fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { |
9346a6ac | 1780 | Split(SplitInternal { |
c34b1796 AL |
1781 | start: 0, |
1782 | end: self.len(), | |
1783 | matcher: pat.into_searcher(self), | |
1a4d82fc JJ |
1784 | allow_trailing_empty: true, |
1785 | finished: false, | |
1786 | }) | |
1787 | } | |
1788 | ||
9346a6ac AL |
1789 | #[inline] |
1790 | fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P> | |
1791 | where P::Searcher: ReverseSearcher<'a> | |
1792 | { | |
1793 | RSplit(self.split(pat).0) | |
1794 | } | |
1795 | ||
1a4d82fc | 1796 | #[inline] |
c34b1796 | 1797 | fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> { |
9346a6ac | 1798 | SplitN(SplitNInternal { |
1a4d82fc JJ |
1799 | iter: self.split(pat).0, |
1800 | count: count, | |
1a4d82fc JJ |
1801 | }) |
1802 | } | |
1803 | ||
9346a6ac AL |
1804 | #[inline] |
1805 | fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> | |
1806 | where P::Searcher: ReverseSearcher<'a> | |
1807 | { | |
1808 | RSplitN(self.splitn(count, pat).0) | |
1809 | } | |
1810 | ||
1a4d82fc | 1811 | #[inline] |
c34b1796 | 1812 | fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { |
9346a6ac | 1813 | SplitTerminator(SplitInternal { |
1a4d82fc JJ |
1814 | allow_trailing_empty: false, |
1815 | ..self.split(pat).0 | |
1816 | }) | |
1817 | } | |
1818 | ||
1819 | #[inline] | |
9346a6ac | 1820 | fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P> |
c34b1796 AL |
1821 | where P::Searcher: ReverseSearcher<'a> |
1822 | { | |
9346a6ac | 1823 | RSplitTerminator(self.split_terminator(pat).0) |
1a4d82fc JJ |
1824 | } |
1825 | ||
1826 | #[inline] | |
9346a6ac AL |
1827 | fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { |
1828 | Matches(MatchesInternal(pat.into_searcher(self))) | |
1829 | } | |
1830 | ||
1831 | #[inline] | |
1832 | fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P> | |
c34b1796 AL |
1833 | where P::Searcher: ReverseSearcher<'a> |
1834 | { | |
9346a6ac | 1835 | RMatches(self.matches(pat).0) |
1a4d82fc JJ |
1836 | } |
1837 | ||
1838 | #[inline] | |
c34b1796 | 1839 | fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { |
9346a6ac | 1840 | MatchIndices(MatchIndicesInternal(pat.into_searcher(self))) |
1a4d82fc JJ |
1841 | } |
1842 | ||
9346a6ac AL |
1843 | #[inline] |
1844 | fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P> | |
1845 | where P::Searcher: ReverseSearcher<'a> | |
1846 | { | |
1847 | RMatchIndices(self.match_indices(pat).0) | |
1848 | } | |
1a4d82fc JJ |
1849 | #[inline] |
1850 | fn lines(&self) -> Lines { | |
e9174d1e | 1851 | Lines(self.split_terminator('\n').map(LinesAnyMap)) |
1a4d82fc JJ |
1852 | } |
1853 | ||
9346a6ac | 1854 | #[inline] |
e9174d1e | 1855 | #[allow(deprecated)] |
1a4d82fc | 1856 | fn lines_any(&self) -> LinesAny { |
e9174d1e | 1857 | LinesAny(self.lines()) |
1a4d82fc JJ |
1858 | } |
1859 | ||
1a4d82fc | 1860 | #[inline] |
85aaf69f | 1861 | unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str { |
54a0048b SL |
1862 | let ptr = self.as_ptr().offset(begin as isize); |
1863 | let len = end - begin; | |
1864 | from_utf8_unchecked(slice::from_raw_parts(ptr, len)) | |
1a4d82fc JJ |
1865 | } |
1866 | ||
c1a9b12d SL |
1867 | #[inline] |
1868 | unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str { | |
54a0048b SL |
1869 | let ptr = self.as_ptr().offset(begin as isize); |
1870 | let len = end - begin; | |
1871 | mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len)) | |
c1a9b12d SL |
1872 | } |
1873 | ||
1a4d82fc | 1874 | #[inline] |
c34b1796 AL |
1875 | fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { |
1876 | pat.is_prefix_of(self) | |
1a4d82fc JJ |
1877 | } |
1878 | ||
1879 | #[inline] | |
c34b1796 AL |
1880 | fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool |
1881 | where P::Searcher: ReverseSearcher<'a> | |
1882 | { | |
1883 | pat.is_suffix_of(self) | |
1a4d82fc JJ |
1884 | } |
1885 | ||
1886 | #[inline] | |
c34b1796 AL |
1887 | fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1888 | where P::Searcher: DoubleEndedSearcher<'a> | |
1889 | { | |
1890 | let mut i = 0; | |
1891 | let mut j = 0; | |
1892 | let mut matcher = pat.into_searcher(self); | |
1893 | if let Some((a, b)) = matcher.next_reject() { | |
1894 | i = a; | |
7453a54e | 1895 | j = b; // Remember earliest known match, correct it below if |
c34b1796 AL |
1896 | // last match is different |
1897 | } | |
1898 | if let Some((_, b)) = matcher.next_reject_back() { | |
1899 | j = b; | |
1900 | } | |
1901 | unsafe { | |
1902 | // Searcher is known to return valid indices | |
1903 | self.slice_unchecked(i, j) | |
1a4d82fc JJ |
1904 | } |
1905 | } | |
1906 | ||
1907 | #[inline] | |
c34b1796 AL |
1908 | fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { |
1909 | let mut i = self.len(); | |
1910 | let mut matcher = pat.into_searcher(self); | |
1911 | if let Some((a, _)) = matcher.next_reject() { | |
1912 | i = a; | |
1913 | } | |
1914 | unsafe { | |
1915 | // Searcher is known to return valid indices | |
1916 | self.slice_unchecked(i, self.len()) | |
1a4d82fc JJ |
1917 | } |
1918 | } | |
1919 | ||
1920 | #[inline] | |
c34b1796 AL |
1921 | fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str |
1922 | where P::Searcher: ReverseSearcher<'a> | |
1923 | { | |
1924 | let mut j = 0; | |
1925 | let mut matcher = pat.into_searcher(self); | |
1926 | if let Some((_, b)) = matcher.next_reject_back() { | |
1927 | j = b; | |
1928 | } | |
1929 | unsafe { | |
1930 | // Searcher is known to return valid indices | |
1931 | self.slice_unchecked(0, j) | |
1a4d82fc JJ |
1932 | } |
1933 | } | |
1934 | ||
1935 | #[inline] | |
85aaf69f | 1936 | fn is_char_boundary(&self, index: usize) -> bool { |
54a0048b SL |
1937 | // 0 and len are always ok. |
1938 | // Test for 0 explicitly so that it can optimize out the check | |
1939 | // easily and skip reading string data for that case. | |
1940 | if index == 0 || index == self.len() { return true; } | |
1a4d82fc JJ |
1941 | match self.as_bytes().get(index) { |
1942 | None => false, | |
a7813a04 XL |
1943 | // This is bit magic equivalent to: b < 128 || b >= 192 |
1944 | Some(&b) => (b as i8) >= -0x40, | |
1a4d82fc JJ |
1945 | } |
1946 | } | |
1947 | ||
1948 | #[inline] | |
85aaf69f SL |
1949 | fn char_range_at(&self, i: usize) -> CharRange { |
1950 | let (c, n) = char_range_at_raw(self.as_bytes(), i); | |
e9174d1e | 1951 | CharRange { ch: unsafe { char::from_u32_unchecked(c) }, next: n } |
1a4d82fc JJ |
1952 | } |
1953 | ||
1954 | #[inline] | |
85aaf69f | 1955 | fn char_range_at_reverse(&self, start: usize) -> CharRange { |
1a4d82fc JJ |
1956 | let mut prev = start; |
1957 | ||
1958 | prev = prev.saturating_sub(1); | |
1959 | if self.as_bytes()[prev] < 128 { | |
1960 | return CharRange{ch: self.as_bytes()[prev] as char, next: prev} | |
1961 | } | |
1962 | ||
1963 | // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly | |
85aaf69f | 1964 | fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange { |
1a4d82fc JJ |
1965 | // while there is a previous byte == 10...... |
1966 | while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 { | |
85aaf69f | 1967 | i -= 1; |
1a4d82fc JJ |
1968 | } |
1969 | ||
c34b1796 AL |
1970 | let first= s.as_bytes()[i]; |
1971 | let w = UTF8_CHAR_WIDTH[first as usize]; | |
1972 | assert!(w != 0); | |
1a4d82fc | 1973 | |
c34b1796 AL |
1974 | let mut val = utf8_first_byte(first, w as u32); |
1975 | val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]); | |
1976 | if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); } | |
1977 | if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); } | |
1a4d82fc | 1978 | |
e9174d1e | 1979 | CharRange {ch: unsafe { char::from_u32_unchecked(val) }, next: i} |
1a4d82fc JJ |
1980 | } |
1981 | ||
e9174d1e | 1982 | multibyte_char_range_at_reverse(self, prev) |
1a4d82fc JJ |
1983 | } |
1984 | ||
1985 | #[inline] | |
54a0048b | 1986 | #[allow(deprecated)] |
85aaf69f | 1987 | fn char_at(&self, i: usize) -> char { |
1a4d82fc JJ |
1988 | self.char_range_at(i).ch |
1989 | } | |
1990 | ||
1991 | #[inline] | |
54a0048b | 1992 | #[allow(deprecated)] |
85aaf69f | 1993 | fn char_at_reverse(&self, i: usize) -> char { |
1a4d82fc JJ |
1994 | self.char_range_at_reverse(i).ch |
1995 | } | |
1996 | ||
1997 | #[inline] | |
1998 | fn as_bytes(&self) -> &[u8] { | |
1999 | unsafe { mem::transmute(self) } | |
2000 | } | |
2001 | ||
c34b1796 AL |
2002 | fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> { |
2003 | pat.into_searcher(self).next_match().map(|(i, _)| i) | |
1a4d82fc JJ |
2004 | } |
2005 | ||
c34b1796 AL |
2006 | fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> |
2007 | where P::Searcher: ReverseSearcher<'a> | |
2008 | { | |
2009 | pat.into_searcher(self).next_match_back().map(|(i, _)| i) | |
1a4d82fc JJ |
2010 | } |
2011 | ||
c34b1796 AL |
2012 | fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> { |
2013 | self.find(pat) | |
1a4d82fc JJ |
2014 | } |
2015 | ||
54a0048b | 2016 | #[inline] |
62682a34 SL |
2017 | fn split_at(&self, mid: usize) -> (&str, &str) { |
2018 | // is_char_boundary checks that the index is in [0, .len()] | |
2019 | if self.is_char_boundary(mid) { | |
2020 | unsafe { | |
2021 | (self.slice_unchecked(0, mid), | |
2022 | self.slice_unchecked(mid, self.len())) | |
2023 | } | |
2024 | } else { | |
2025 | slice_error_fail(self, 0, mid) | |
2026 | } | |
2027 | } | |
2028 | ||
c1a9b12d SL |
2029 | fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) { |
2030 | // is_char_boundary checks that the index is in [0, .len()] | |
2031 | if self.is_char_boundary(mid) { | |
2032 | let len = self.len(); | |
7453a54e | 2033 | let ptr = self.as_ptr() as *mut u8; |
c1a9b12d | 2034 | unsafe { |
7453a54e SL |
2035 | (from_raw_parts_mut(ptr, mid), |
2036 | from_raw_parts_mut(ptr.offset(mid as isize), len - mid)) | |
c1a9b12d SL |
2037 | } |
2038 | } else { | |
2039 | slice_error_fail(self, 0, mid) | |
2040 | } | |
2041 | } | |
2042 | ||
1a4d82fc | 2043 | #[inline] |
54a0048b | 2044 | #[allow(deprecated)] |
1a4d82fc JJ |
2045 | fn slice_shift_char(&self) -> Option<(char, &str)> { |
2046 | if self.is_empty() { | |
2047 | None | |
2048 | } else { | |
c34b1796 AL |
2049 | let ch = self.char_at(0); |
2050 | let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) }; | |
1a4d82fc JJ |
2051 | Some((ch, next_s)) |
2052 | } | |
2053 | } | |
2054 | ||
1a4d82fc JJ |
2055 | #[inline] |
2056 | fn as_ptr(&self) -> *const u8 { | |
54a0048b | 2057 | self as *const str as *const u8 |
1a4d82fc JJ |
2058 | } |
2059 | ||
2060 | #[inline] | |
54a0048b SL |
2061 | fn len(&self) -> usize { |
2062 | self.as_bytes().len() | |
2063 | } | |
1a4d82fc JJ |
2064 | |
2065 | #[inline] | |
2066 | fn is_empty(&self) -> bool { self.len() == 0 } | |
2067 | ||
2068 | #[inline] | |
85aaf69f | 2069 | fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) } |
1a4d82fc JJ |
2070 | } |
2071 | ||
bd371182 AL |
2072 | #[stable(feature = "rust1", since = "1.0.0")] |
2073 | impl AsRef<[u8]> for str { | |
2074 | #[inline] | |
2075 | fn as_ref(&self) -> &[u8] { | |
2076 | self.as_bytes() | |
2077 | } | |
2078 | } | |
2079 | ||
85aaf69f SL |
2080 | /// Pluck a code point out of a UTF-8-like byte slice and return the |
2081 | /// index of the next code point. | |
2082 | #[inline] | |
62682a34 | 2083 | fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) { |
c34b1796 | 2084 | if bytes[i] < 128 { |
85aaf69f SL |
2085 | return (bytes[i] as u32, i + 1); |
2086 | } | |
2087 | ||
2088 | // Multibyte case is a fn to allow char_range_at to inline cleanly | |
2089 | fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) { | |
c34b1796 AL |
2090 | let first = bytes[i]; |
2091 | let w = UTF8_CHAR_WIDTH[first as usize]; | |
2092 | assert!(w != 0); | |
85aaf69f | 2093 | |
c34b1796 AL |
2094 | let mut val = utf8_first_byte(first, w as u32); |
2095 | val = utf8_acc_cont_byte(val, bytes[i + 1]); | |
2096 | if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); } | |
2097 | if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); } | |
85aaf69f | 2098 | |
e9174d1e | 2099 | (val, i + w as usize) |
85aaf69f SL |
2100 | } |
2101 | ||
2102 | multibyte_char_range_at(bytes, i) | |
2103 | } | |
2104 | ||
2105 | #[stable(feature = "rust1", since = "1.0.0")] | |
1a4d82fc | 2106 | impl<'a> Default for &'a str { |
1a4d82fc JJ |
2107 | fn default() -> &'a str { "" } |
2108 | } |