]> git.proxmox.com Git - rustc.git/blame - src/libcore/str/mod.rs
New upstream version 1.12.0+dfsg1
[rustc.git] / src / libcore / str / mod.rs
CommitLineData
1a4d82fc
JJ
1// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
1a4d82fc
JJ
10
11//! String manipulation
12//!
13//! For more details, see std::str
14
62682a34 15#![stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 16
9346a6ac
AL
17use self::pattern::Pattern;
18use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
1a4d82fc 19
3157f602 20use char;
85aaf69f 21use clone::Clone;
bd371182 22use convert::AsRef;
1a4d82fc 23use default::Default;
85aaf69f 24use fmt;
1a4d82fc 25use iter::ExactSizeIterator;
e9174d1e
SL
26use iter::{Map, Cloned, Iterator, DoubleEndedIterator};
27use marker::Sized;
1a4d82fc 28use mem;
c34b1796 29use ops::{Fn, FnMut, FnOnce};
1a4d82fc 30use option::Option::{self, None, Some};
1a4d82fc
JJ
31use result::Result::{self, Ok, Err};
32use slice::{self, SliceExt};
1a4d82fc 33
9346a6ac 34pub mod pattern;
1a4d82fc
JJ
35
36/// A trait to abstract the idea of creating a new instance of a type from a
37/// string.
92a42be0
SL
38///
39/// `FromStr`'s [`from_str()`] method is often used implicitly, through
40/// [`str`]'s [`parse()`] method. See [`parse()`]'s documentation for examples.
41///
42/// [`from_str()`]: #tymethod.from_str
54a0048b
SL
43/// [`str`]: ../../std/primitive.str.html
44/// [`parse()`]: ../../std/primitive.str.html#method.parse
85aaf69f 45#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 46pub trait FromStr: Sized {
85aaf69f
SL
47 /// The associated error which can be returned from parsing.
48 #[stable(feature = "rust1", since = "1.0.0")]
49 type Err;
50
d9579d0f
AL
51 /// Parses a string `s` to return a value of this type.
52 ///
53 /// If parsing succeeds, return the value inside `Ok`, otherwise
54 /// when the string is ill-formatted return an error specific to the
55 /// inside `Err`. The error type is specific to implementation of the trait.
b039eaaf
SL
56 ///
57 /// # Examples
58 ///
59 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
60 ///
54a0048b 61 /// [ithirtytwo]: ../../std/primitive.i32.html
b039eaaf
SL
62 ///
63 /// ```
64 /// use std::str::FromStr;
65 ///
66 /// let s = "5";
67 /// let x = i32::from_str(s).unwrap();
68 ///
69 /// assert_eq!(5, x);
70 /// ```
85aaf69f
SL
71 #[stable(feature = "rust1", since = "1.0.0")]
72 fn from_str(s: &str) -> Result<Self, Self::Err>;
1a4d82fc
JJ
73}
74
85aaf69f 75#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 76impl FromStr for bool {
85aaf69f
SL
77 type Err = ParseBoolError;
78
1a4d82fc
JJ
79 /// Parse a `bool` from a string.
80 ///
c34b1796
AL
81 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
82 /// actually be parseable.
1a4d82fc
JJ
83 ///
84 /// # Examples
85 ///
c34b1796
AL
86 /// ```
87 /// use std::str::FromStr;
88 ///
89 /// assert_eq!(FromStr::from_str("true"), Ok(true));
90 /// assert_eq!(FromStr::from_str("false"), Ok(false));
91 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
92 /// ```
93 ///
94 /// Note, in many cases, the `.parse()` method on `str` is more proper.
95 ///
96 /// ```
85aaf69f
SL
97 /// assert_eq!("true".parse(), Ok(true));
98 /// assert_eq!("false".parse(), Ok(false));
99 /// assert!("not even a boolean".parse::<bool>().is_err());
1a4d82fc
JJ
100 /// ```
101 #[inline]
85aaf69f 102 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
1a4d82fc 103 match s {
85aaf69f
SL
104 "true" => Ok(true),
105 "false" => Ok(false),
106 _ => Err(ParseBoolError { _priv: () }),
1a4d82fc
JJ
107 }
108 }
109}
110
85aaf69f
SL
111/// An error returned when parsing a `bool` from a string fails.
112#[derive(Debug, Clone, PartialEq)]
113#[stable(feature = "rust1", since = "1.0.0")]
114pub struct ParseBoolError { _priv: () }
115
116#[stable(feature = "rust1", since = "1.0.0")]
117impl fmt::Display for ParseBoolError {
118 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119 "provided string was not `true` or `false`".fmt(f)
120 }
121}
122
1a4d82fc
JJ
123/*
124Section: Creating a string
125*/
126
b039eaaf
SL
127/// Errors which can occur when attempting to interpret a sequence of `u8`
128/// as a string.
129///
130/// As such, the `from_utf8` family of functions and methods for both `String`s
131/// and `&str`s make use of this error, for example.
85aaf69f 132#[derive(Copy, Eq, PartialEq, Clone, Debug)]
9346a6ac
AL
133#[stable(feature = "rust1", since = "1.0.0")]
134pub struct Utf8Error {
135 valid_up_to: usize,
136}
137
138impl Utf8Error {
139 /// Returns the index in the given string up to which valid UTF-8 was
140 /// verified.
1a4d82fc 141 ///
b039eaaf
SL
142 /// It is the maximum index such that `from_utf8(input[..index])`
143 /// would return `Some(_)`.
144 ///
145 /// # Examples
146 ///
147 /// Basic usage:
148 ///
149 /// ```
b039eaaf
SL
150 /// use std::str;
151 ///
152 /// // some invalid bytes, in a vector
153 /// let sparkle_heart = vec![0, 159, 146, 150];
154 ///
155 /// // std::str::from_utf8 returns a Utf8Error
156 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
157 ///
7453a54e 158 /// // the second byte is invalid here
b039eaaf
SL
159 /// assert_eq!(1, error.valid_up_to());
160 /// ```
161 #[stable(feature = "utf8_error", since = "1.5.0")]
9346a6ac 162 pub fn valid_up_to(&self) -> usize { self.valid_up_to }
1a4d82fc
JJ
163}
164
b039eaaf 165/// Converts a slice of bytes to a string slice.
1a4d82fc 166///
b039eaaf
SL
167/// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
168/// is made of bytes, so this function converts between the two. Not all byte
169/// slices are valid string slices, however: `&str` requires that it is valid
170/// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
171/// then does the conversion.
172///
173/// If you are sure that the byte slice is valid UTF-8, and you don't want to
174/// incur the overhead of the validity check, there is an unsafe version of
7453a54e 175/// this function, [`from_utf8_unchecked()`][fromutf8u], which has the same
b039eaaf
SL
176/// behavior but skips the check.
177///
7453a54e 178/// [fromutf8u]: fn.from_utf8_unchecked.html
b039eaaf
SL
179///
180/// If you need a `String` instead of a `&str`, consider
181/// [`String::from_utf8()`][string].
182///
54a0048b 183/// [string]: ../../std/string/struct.String.html#method.from_utf8
b039eaaf
SL
184///
185/// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
186/// it, this function is one way to have a stack-allocated string. There is
187/// an example of this in the examples section below.
1a4d82fc 188///
7453a54e 189/// # Errors
1a4d82fc 190///
e9174d1e
SL
191/// Returns `Err` if the slice is not UTF-8 with a description as to why the
192/// provided slice is not UTF-8.
b039eaaf
SL
193///
194/// # Examples
195///
196/// Basic usage:
197///
198/// ```
199/// use std::str;
200///
201/// // some bytes, in a vector
202/// let sparkle_heart = vec![240, 159, 146, 150];
203///
204/// // We know these bytes are valid, so just use `unwrap()`.
205/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
206///
207/// assert_eq!("💖", sparkle_heart);
208/// ```
209///
210/// Incorrect bytes:
211///
212/// ```
213/// use std::str;
214///
215/// // some invalid bytes, in a vector
216/// let sparkle_heart = vec![0, 159, 146, 150];
217///
218/// assert!(str::from_utf8(&sparkle_heart).is_err());
219/// ```
220///
221/// See the docs for [`Utf8Error`][error] for more details on the kinds of
222/// errors that can be returned.
223///
224/// [error]: struct.Utf8Error.html
225///
226/// A "stack allocated string":
227///
228/// ```
229/// use std::str;
230///
231/// // some bytes, in a stack-allocated array
232/// let sparkle_heart = [240, 159, 146, 150];
233///
234/// // We know these bytes are valid, so just use `unwrap()`.
235/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
236///
237/// assert_eq!("💖", sparkle_heart);
238/// ```
85aaf69f 239#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 240pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
54a0048b 241 run_utf8_validation(v)?;
1a4d82fc
JJ
242 Ok(unsafe { from_utf8_unchecked(v) })
243}
244
7453a54e
SL
245/// Forms a str from a pointer and a length.
246///
247/// The `len` argument is the number of bytes in the string.
248///
249/// # Safety
250///
251/// This function is unsafe as there is no guarantee that the given pointer is
252/// valid for `len` bytes, nor whether the lifetime inferred is a suitable
253/// lifetime for the returned str.
254///
255/// The data must be valid UTF-8
256///
257/// `p` must be non-null, even for zero-length str.
258///
259/// # Caveat
260///
261/// The lifetime for the returned str is inferred from its usage. To
262/// prevent accidental misuse, it's suggested to tie the lifetime to whichever
263/// source lifetime is safe in the context, such as by providing a helper
264/// function taking the lifetime of a host value for the str, or by explicit
265/// annotation.
266/// Performs the same functionality as `from_raw_parts`, except that a mutable
267/// str is returned.
268///
269unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str {
270 mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len))
271}
272
1a4d82fc
JJ
273/// Converts a slice of bytes to a string slice without checking
274/// that the string contains valid UTF-8.
b039eaaf 275///
7453a54e 276/// See the safe version, [`from_utf8()`][fromutf8], for more information.
b039eaaf
SL
277///
278/// [fromutf8]: fn.from_utf8.html
279///
280/// # Safety
281///
282/// This function is unsafe because it does not check that the bytes passed to
283/// it are valid UTF-8. If this constraint is violated, undefined behavior
284/// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
285///
286/// # Examples
287///
288/// Basic usage:
289///
290/// ```
291/// use std::str;
292///
293/// // some bytes, in a vector
294/// let sparkle_heart = vec![240, 159, 146, 150];
295///
296/// let sparkle_heart = unsafe {
297/// str::from_utf8_unchecked(&sparkle_heart)
298/// };
299///
300/// assert_eq!("💖", sparkle_heart);
301/// ```
d9579d0f 302#[inline(always)]
85aaf69f 303#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 304pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
1a4d82fc
JJ
305 mem::transmute(v)
306}
307
85aaf69f
SL
308#[stable(feature = "rust1", since = "1.0.0")]
309impl fmt::Display for Utf8Error {
310 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
9346a6ac 311 write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
85aaf69f
SL
312 }
313}
314
1a4d82fc
JJ
315/*
316Section: Iterators
317*/
318
319/// Iterator for the char (representing *Unicode Scalar Values*) of a string
320///
9cc50fc6
SL
321/// Created with the method [`chars()`].
322///
54a0048b
SL
323/// [`chars()`]: ../../std/primitive.str.html#method.chars
324#[derive(Clone, Debug)]
85aaf69f 325#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
326pub struct Chars<'a> {
327 iter: slice::Iter<'a, u8>
328}
329
c34b1796
AL
330/// Return the initial codepoint accumulator for the first byte.
331/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
332/// for width 3, and 3 bits for width 4.
333#[inline]
334fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
1a4d82fc 335
c34b1796
AL
336/// Return the value of `ch` updated with continuation byte `byte`.
337#[inline]
338fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
1a4d82fc 339
c34b1796
AL
340/// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
341/// bits `10`).
342#[inline]
343fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
1a4d82fc
JJ
344
345#[inline]
346fn unwrap_or_0(opt: Option<&u8>) -> u8 {
347 match opt {
348 Some(&byte) => byte,
349 None => 0,
350 }
351}
352
85aaf69f
SL
353/// Reads the next code point out of a byte iterator (assuming a
354/// UTF-8-like encoding).
e9174d1e 355#[unstable(feature = "str_internals", issue = "0")]
c34b1796 356#[inline]
3157f602 357pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
85aaf69f
SL
358 // Decode UTF-8
359 let x = match bytes.next() {
360 None => return None,
361 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
362 Some(&next_byte) => next_byte,
363 };
364
365 // Multibyte case follows
366 // Decode from a byte combination out of: [[[x y] z] w]
367 // NOTE: Performance is sensitive to the exact formulation here
c34b1796 368 let init = utf8_first_byte(x, 2);
85aaf69f 369 let y = unwrap_or_0(bytes.next());
c34b1796 370 let mut ch = utf8_acc_cont_byte(init, y);
85aaf69f
SL
371 if x >= 0xE0 {
372 // [[x y z] w] case
373 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
374 let z = unwrap_or_0(bytes.next());
c34b1796 375 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
85aaf69f
SL
376 ch = init << 12 | y_z;
377 if x >= 0xF0 {
378 // [x y z w] case
379 // use only the lower 3 bits of `init`
380 let w = unwrap_or_0(bytes.next());
c34b1796 381 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
85aaf69f
SL
382 }
383 }
384
385 Some(ch)
386}
387
c34b1796
AL
388/// Reads the last code point out of a byte iterator (assuming a
389/// UTF-8-like encoding).
c34b1796 390#[inline]
3157f602
XL
391fn next_code_point_reverse<'a,
392 I: DoubleEndedIterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
c34b1796
AL
393 // Decode UTF-8
394 let w = match bytes.next_back() {
395 None => return None,
396 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
397 Some(&back_byte) => back_byte,
398 };
399
400 // Multibyte case follows
401 // Decode from a byte combination out of: [x [y [z w]]]
402 let mut ch;
403 let z = unwrap_or_0(bytes.next_back());
404 ch = utf8_first_byte(z, 2);
405 if utf8_is_cont_byte(z) {
406 let y = unwrap_or_0(bytes.next_back());
407 ch = utf8_first_byte(y, 3);
408 if utf8_is_cont_byte(y) {
409 let x = unwrap_or_0(bytes.next_back());
410 ch = utf8_first_byte(x, 4);
411 ch = utf8_acc_cont_byte(ch, y);
412 }
413 ch = utf8_acc_cont_byte(ch, z);
414 }
415 ch = utf8_acc_cont_byte(ch, w);
416
417 Some(ch)
418}
419
85aaf69f 420#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
421impl<'a> Iterator for Chars<'a> {
422 type Item = char;
423
424 #[inline]
425 fn next(&mut self) -> Option<char> {
85aaf69f
SL
426 next_code_point(&mut self.iter).map(|ch| {
427 // str invariant says `ch` is a valid Unicode Scalar Value
428 unsafe {
e9174d1e 429 char::from_u32_unchecked(ch)
1a4d82fc 430 }
85aaf69f 431 })
1a4d82fc
JJ
432 }
433
434 #[inline]
85aaf69f 435 fn size_hint(&self) -> (usize, Option<usize>) {
3157f602 436 let len = self.iter.len();
c34b1796
AL
437 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
438 // belongs to a slice in memory which has a maximum length of
439 // `isize::MAX` (that's well below `usize::MAX`).
440 ((len + 3) / 4, Some(len))
1a4d82fc
JJ
441 }
442}
443
85aaf69f 444#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
445impl<'a> DoubleEndedIterator for Chars<'a> {
446 #[inline]
447 fn next_back(&mut self) -> Option<char> {
c34b1796
AL
448 next_code_point_reverse(&mut self.iter).map(|ch| {
449 // str invariant says `ch` is a valid Unicode Scalar Value
450 unsafe {
e9174d1e 451 char::from_u32_unchecked(ch)
1a4d82fc 452 }
c34b1796 453 })
1a4d82fc
JJ
454 }
455}
456
e9174d1e
SL
457impl<'a> Chars<'a> {
458 /// View the underlying data as a subslice of the original data.
459 ///
460 /// This has the same lifetime as the original slice, and so the
461 /// iterator can continue to be used while this exists.
5bcae85e
SL
462 ///
463 /// # Examples
464 ///
465 /// ```
466 /// let mut chars = "abc".chars();
467 ///
468 /// assert_eq!(chars.as_str(), "abc");
469 /// chars.next();
470 /// assert_eq!(chars.as_str(), "bc");
471 /// chars.next();
472 /// chars.next();
473 /// assert_eq!(chars.as_str(), "");
474 /// ```
e9174d1e
SL
475 #[stable(feature = "iter_to_slice", since = "1.4.0")]
476 #[inline]
477 pub fn as_str(&self) -> &'a str {
478 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
479 }
480}
481
9346a6ac 482/// Iterator for a string's characters and their byte offsets.
54a0048b 483#[derive(Clone, Debug)]
85aaf69f 484#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 485pub struct CharIndices<'a> {
85aaf69f 486 front_offset: usize,
1a4d82fc
JJ
487 iter: Chars<'a>,
488}
489
85aaf69f 490#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 491impl<'a> Iterator for CharIndices<'a> {
85aaf69f 492 type Item = (usize, char);
1a4d82fc
JJ
493
494 #[inline]
85aaf69f 495 fn next(&mut self) -> Option<(usize, char)> {
3157f602 496 let pre_len = self.iter.iter.len();
1a4d82fc
JJ
497 match self.iter.next() {
498 None => None,
499 Some(ch) => {
500 let index = self.front_offset;
3157f602 501 let len = self.iter.iter.len();
1a4d82fc
JJ
502 self.front_offset += pre_len - len;
503 Some((index, ch))
504 }
505 }
506 }
507
508 #[inline]
85aaf69f 509 fn size_hint(&self) -> (usize, Option<usize>) {
1a4d82fc
JJ
510 self.iter.size_hint()
511 }
512}
513
85aaf69f 514#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
515impl<'a> DoubleEndedIterator for CharIndices<'a> {
516 #[inline]
85aaf69f 517 fn next_back(&mut self) -> Option<(usize, char)> {
1a4d82fc
JJ
518 match self.iter.next_back() {
519 None => None,
520 Some(ch) => {
3157f602 521 let index = self.front_offset + self.iter.iter.len();
1a4d82fc
JJ
522 Some((index, ch))
523 }
524 }
525 }
526}
527
e9174d1e
SL
528impl<'a> CharIndices<'a> {
529 /// View the underlying data as a subslice of the original data.
530 ///
531 /// This has the same lifetime as the original slice, and so the
532 /// iterator can continue to be used while this exists.
533 #[stable(feature = "iter_to_slice", since = "1.4.0")]
534 #[inline]
535 pub fn as_str(&self) -> &'a str {
536 self.iter.as_str()
537 }
538}
539
1a4d82fc
JJ
540/// External iterator for a string's bytes.
541/// Use with the `std::iter` module.
542///
9cc50fc6
SL
543/// Created with the method [`bytes()`].
544///
54a0048b 545/// [`bytes()`]: ../../std/primitive.str.html#method.bytes
85aaf69f 546#[stable(feature = "rust1", since = "1.0.0")]
54a0048b 547#[derive(Clone, Debug)]
e9174d1e 548pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
1a4d82fc 549
e9174d1e
SL
550#[stable(feature = "rust1", since = "1.0.0")]
551impl<'a> Iterator for Bytes<'a> {
552 type Item = u8;
1a4d82fc 553
1a4d82fc 554 #[inline]
e9174d1e
SL
555 fn next(&mut self) -> Option<u8> {
556 self.0.next()
1a4d82fc 557 }
1a4d82fc 558
c34b1796 559 #[inline]
e9174d1e
SL
560 fn size_hint(&self) -> (usize, Option<usize>) {
561 self.0.size_hint()
c34b1796 562 }
c34b1796
AL
563
564 #[inline]
e9174d1e
SL
565 fn count(self) -> usize {
566 self.0.count()
c34b1796 567 }
9346a6ac
AL
568
569 #[inline]
e9174d1e
SL
570 fn last(self) -> Option<Self::Item> {
571 self.0.last()
9346a6ac
AL
572 }
573
574 #[inline]
e9174d1e
SL
575 fn nth(&mut self, n: usize) -> Option<Self::Item> {
576 self.0.nth(n)
9346a6ac 577 }
1a4d82fc
JJ
578}
579
9346a6ac
AL
580#[stable(feature = "rust1", since = "1.0.0")]
581impl<'a> DoubleEndedIterator for Bytes<'a> {
582 #[inline]
583 fn next_back(&mut self) -> Option<u8> {
584 self.0.next_back()
585 }
c34b1796
AL
586}
587
9346a6ac
AL
588#[stable(feature = "rust1", since = "1.0.0")]
589impl<'a> ExactSizeIterator for Bytes<'a> {
590 #[inline]
591 fn len(&self) -> usize {
592 self.0.len()
593 }
c34b1796
AL
594}
595
9346a6ac
AL
596/// This macro generates a Clone impl for string pattern API
597/// wrapper types of the form X<'a, P>
598macro_rules! derive_pattern_clone {
599 (clone $t:ident with |$s:ident| $e:expr) => {
600 impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
601 where P::Searcher: Clone
602 {
603 fn clone(&self) -> Self {
604 let $s = self;
605 $e
606 }
607 }
608 }
1a4d82fc
JJ
609}
610
9346a6ac 611/// This macro generates two public iterator structs
b039eaaf 612/// wrapping a private internal one that makes use of the `Pattern` API.
9346a6ac
AL
613///
614/// For all patterns `P: Pattern<'a>` the following items will be
d9579d0f 615/// generated (generics omitted):
9346a6ac
AL
616///
617/// struct $forward_iterator($internal_iterator);
618/// struct $reverse_iterator($internal_iterator);
619///
620/// impl Iterator for $forward_iterator
621/// { /* internal ends up calling Searcher::next_match() */ }
622///
623/// impl DoubleEndedIterator for $forward_iterator
624/// where P::Searcher: DoubleEndedSearcher
625/// { /* internal ends up calling Searcher::next_match_back() */ }
626///
627/// impl Iterator for $reverse_iterator
628/// where P::Searcher: ReverseSearcher
629/// { /* internal ends up calling Searcher::next_match_back() */ }
630///
631/// impl DoubleEndedIterator for $reverse_iterator
632/// where P::Searcher: DoubleEndedSearcher
633/// { /* internal ends up calling Searcher::next_match() */ }
634///
635/// The internal one is defined outside the macro, and has almost the same
636/// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
637/// `pattern::ReverseSearcher` for both forward and reverse iteration.
638///
639/// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
640/// `Pattern` might not return the same elements, so actually implementing
641/// `DoubleEndedIterator` for it would be incorrect.
642/// (See the docs in `str::pattern` for more details)
643///
644/// However, the internal struct still represents a single ended iterator from
645/// either end, and depending on pattern is also a valid double ended iterator,
646/// so the two wrapper structs implement `Iterator`
647/// and `DoubleEndedIterator` depending on the concrete pattern type, leading
648/// to the complex impls seen above.
649macro_rules! generate_pattern_iterators {
650 {
651 // Forward iterator
652 forward:
653 $(#[$forward_iterator_attribute:meta])*
654 struct $forward_iterator:ident;
655
656 // Reverse iterator
657 reverse:
658 $(#[$reverse_iterator_attribute:meta])*
659 struct $reverse_iterator:ident;
660
661 // Stability of all generated items
662 stability:
663 $(#[$common_stability_attribute:meta])*
664
665 // Internal almost-iterator that is being delegated to
666 internal:
667 $internal_iterator:ident yielding ($iterty:ty);
668
669 // Kind of delgation - either single ended or double ended
670 delegate $($t:tt)*
671 } => {
672 $(#[$forward_iterator_attribute])*
673 $(#[$common_stability_attribute])*
674 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
675
54a0048b
SL
676 $(#[$common_stability_attribute])*
677 impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P>
678 where P::Searcher: fmt::Debug
679 {
680 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
681 f.debug_tuple(stringify!($forward_iterator))
682 .field(&self.0)
683 .finish()
684 }
685 }
686
9346a6ac
AL
687 $(#[$common_stability_attribute])*
688 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
689 type Item = $iterty;
690
691 #[inline]
692 fn next(&mut self) -> Option<$iterty> {
693 self.0.next()
694 }
695 }
696
697 $(#[$common_stability_attribute])*
698 impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
699 where P::Searcher: Clone
700 {
701 fn clone(&self) -> Self {
702 $forward_iterator(self.0.clone())
703 }
704 }
705
706 $(#[$reverse_iterator_attribute])*
707 $(#[$common_stability_attribute])*
708 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
709
54a0048b
SL
710 $(#[$common_stability_attribute])*
711 impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P>
712 where P::Searcher: fmt::Debug
713 {
714 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
715 f.debug_tuple(stringify!($reverse_iterator))
716 .field(&self.0)
717 .finish()
718 }
719 }
720
9346a6ac
AL
721 $(#[$common_stability_attribute])*
722 impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
723 where P::Searcher: ReverseSearcher<'a>
724 {
725 type Item = $iterty;
726
727 #[inline]
728 fn next(&mut self) -> Option<$iterty> {
729 self.0.next_back()
730 }
731 }
732
733 $(#[$common_stability_attribute])*
734 impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
735 where P::Searcher: Clone
736 {
737 fn clone(&self) -> Self {
738 $reverse_iterator(self.0.clone())
739 }
740 }
741
742 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
743 $forward_iterator,
744 $reverse_iterator, $iterty);
745 };
746 {
747 double ended; with $(#[$common_stability_attribute:meta])*,
748 $forward_iterator:ident,
749 $reverse_iterator:ident, $iterty:ty
750 } => {
751 $(#[$common_stability_attribute])*
752 impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
753 where P::Searcher: DoubleEndedSearcher<'a>
754 {
755 #[inline]
756 fn next_back(&mut self) -> Option<$iterty> {
757 self.0.next_back()
758 }
759 }
760
761 $(#[$common_stability_attribute])*
762 impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
763 where P::Searcher: DoubleEndedSearcher<'a>
764 {
765 #[inline]
766 fn next_back(&mut self) -> Option<$iterty> {
767 self.0.next()
768 }
769 }
770 };
771 {
772 single ended; with $(#[$common_stability_attribute:meta])*,
773 $forward_iterator:ident,
774 $reverse_iterator:ident, $iterty:ty
775 } => {}
1a4d82fc
JJ
776}
777
9346a6ac
AL
778derive_pattern_clone!{
779 clone SplitInternal
780 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
781}
54a0048b 782
9346a6ac
AL
783struct SplitInternal<'a, P: Pattern<'a>> {
784 start: usize,
785 end: usize,
786 matcher: P::Searcher,
787 allow_trailing_empty: bool,
788 finished: bool,
1a4d82fc
JJ
789}
790
54a0048b
SL
791impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug {
792 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
793 f.debug_struct("SplitInternal")
794 .field("start", &self.start)
795 .field("end", &self.end)
796 .field("matcher", &self.matcher)
797 .field("allow_trailing_empty", &self.allow_trailing_empty)
798 .field("finished", &self.finished)
799 .finish()
800 }
801}
802
9346a6ac 803impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1a4d82fc
JJ
804 #[inline]
805 fn get_end(&mut self) -> Option<&'a str> {
c34b1796 806 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
1a4d82fc 807 self.finished = true;
c34b1796
AL
808 unsafe {
809 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
810 Some(string)
811 }
1a4d82fc
JJ
812 } else {
813 None
814 }
815 }
1a4d82fc
JJ
816
817 #[inline]
818 fn next(&mut self) -> Option<&'a str> {
819 if self.finished { return None }
820
c34b1796
AL
821 let haystack = self.matcher.haystack();
822 match self.matcher.next_match() {
1a4d82fc 823 Some((a, b)) => unsafe {
c34b1796
AL
824 let elt = haystack.slice_unchecked(self.start, a);
825 self.start = b;
1a4d82fc
JJ
826 Some(elt)
827 },
828 None => self.get_end(),
829 }
830 }
1a4d82fc 831
1a4d82fc 832 #[inline]
9346a6ac
AL
833 fn next_back(&mut self) -> Option<&'a str>
834 where P::Searcher: ReverseSearcher<'a>
835 {
1a4d82fc
JJ
836 if self.finished { return None }
837
838 if !self.allow_trailing_empty {
839 self.allow_trailing_empty = true;
840 match self.next_back() {
841 Some(elt) if !elt.is_empty() => return Some(elt),
842 _ => if self.finished { return None }
843 }
844 }
c34b1796
AL
845
846 let haystack = self.matcher.haystack();
847 match self.matcher.next_match_back() {
1a4d82fc 848 Some((a, b)) => unsafe {
c34b1796
AL
849 let elt = haystack.slice_unchecked(b, self.end);
850 self.end = a;
1a4d82fc
JJ
851 Some(elt)
852 },
c34b1796
AL
853 None => unsafe {
854 self.finished = true;
855 Some(haystack.slice_unchecked(self.start, self.end))
856 },
1a4d82fc
JJ
857 }
858 }
859}
860
9346a6ac
AL
861generate_pattern_iterators! {
862 forward:
9cc50fc6
SL
863 /// Created with the method [`split()`].
864 ///
54a0048b 865 /// [`split()`]: ../../std/primitive.str.html#method.split
9346a6ac
AL
866 struct Split;
867 reverse:
9cc50fc6
SL
868 /// Created with the method [`rsplit()`].
869 ///
54a0048b 870 /// [`rsplit()`]: ../../std/primitive.str.html#method.rsplit
9346a6ac
AL
871 struct RSplit;
872 stability:
873 #[stable(feature = "rust1", since = "1.0.0")]
874 internal:
875 SplitInternal yielding (&'a str);
876 delegate double ended;
877}
878
879generate_pattern_iterators! {
880 forward:
9cc50fc6
SL
881 /// Created with the method [`split_terminator()`].
882 ///
54a0048b 883 /// [`split_terminator()`]: ../../std/primitive.str.html#method.split_terminator
9346a6ac
AL
884 struct SplitTerminator;
885 reverse:
9cc50fc6
SL
886 /// Created with the method [`rsplit_terminator()`].
887 ///
54a0048b 888 /// [`rsplit_terminator()`]: ../../std/primitive.str.html#method.rsplit_terminator
9346a6ac
AL
889 struct RSplitTerminator;
890 stability:
891 #[stable(feature = "rust1", since = "1.0.0")]
892 internal:
893 SplitInternal yielding (&'a str);
894 delegate double ended;
895}
1a4d82fc 896
9346a6ac
AL
897derive_pattern_clone!{
898 clone SplitNInternal
899 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
900}
54a0048b 901
9346a6ac
AL
902struct SplitNInternal<'a, P: Pattern<'a>> {
903 iter: SplitInternal<'a, P>,
904 /// The number of splits remaining
905 count: usize,
906}
907
54a0048b
SL
908impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug {
909 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
910 f.debug_struct("SplitNInternal")
911 .field("iter", &self.iter)
912 .field("count", &self.count)
913 .finish()
914 }
915}
916
9346a6ac 917impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1a4d82fc
JJ
918 #[inline]
919 fn next(&mut self) -> Option<&'a str> {
c34b1796
AL
920 match self.count {
921 0 => None,
922 1 => { self.count = 0; self.iter.get_end() }
923 _ => { self.count -= 1; self.iter.next() }
1a4d82fc
JJ
924 }
925 }
1a4d82fc 926
c34b1796 927 #[inline]
9346a6ac
AL
928 fn next_back(&mut self) -> Option<&'a str>
929 where P::Searcher: ReverseSearcher<'a>
930 {
931 match self.count {
932 0 => None,
933 1 => { self.count = 0; self.iter.get_end() }
934 _ => { self.count -= 1; self.iter.next_back() }
c34b1796
AL
935 }
936 }
1a4d82fc
JJ
937}
938
9346a6ac
AL
939generate_pattern_iterators! {
940 forward:
9cc50fc6
SL
941 /// Created with the method [`splitn()`].
942 ///
54a0048b 943 /// [`splitn()`]: ../../std/primitive.str.html#method.splitn
9346a6ac
AL
944 struct SplitN;
945 reverse:
9cc50fc6
SL
946 /// Created with the method [`rsplitn()`].
947 ///
54a0048b 948 /// [`rsplitn()`]: ../../std/primitive.str.html#method.rsplitn
9346a6ac
AL
949 struct RSplitN;
950 stability:
951 #[stable(feature = "rust1", since = "1.0.0")]
952 internal:
953 SplitNInternal yielding (&'a str);
954 delegate single ended;
955}
956
957derive_pattern_clone!{
958 clone MatchIndicesInternal
959 with |s| MatchIndicesInternal(s.0.clone())
960}
54a0048b 961
9346a6ac
AL
962struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
963
54a0048b
SL
964impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug {
965 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
966 f.debug_tuple("MatchIndicesInternal")
967 .field(&self.0)
968 .finish()
969 }
970}
971
9346a6ac
AL
972impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
973 #[inline]
b039eaaf
SL
974 fn next(&mut self) -> Option<(usize, &'a str)> {
975 self.0.next_match().map(|(start, end)| unsafe {
976 (start, self.0.haystack().slice_unchecked(start, end))
977 })
9346a6ac
AL
978 }
979
980 #[inline]
b039eaaf 981 fn next_back(&mut self) -> Option<(usize, &'a str)>
9346a6ac
AL
982 where P::Searcher: ReverseSearcher<'a>
983 {
b039eaaf
SL
984 self.0.next_match_back().map(|(start, end)| unsafe {
985 (start, self.0.haystack().slice_unchecked(start, end))
986 })
9346a6ac
AL
987 }
988}
989
990generate_pattern_iterators! {
991 forward:
9cc50fc6
SL
992 /// Created with the method [`match_indices()`].
993 ///
54a0048b 994 /// [`match_indices()`]: ../../std/primitive.str.html#method.match_indices
9346a6ac
AL
995 struct MatchIndices;
996 reverse:
9cc50fc6
SL
997 /// Created with the method [`rmatch_indices()`].
998 ///
54a0048b 999 /// [`rmatch_indices()`]: ../../std/primitive.str.html#method.rmatch_indices
9346a6ac
AL
1000 struct RMatchIndices;
1001 stability:
b039eaaf 1002 #[stable(feature = "str_match_indices", since = "1.5.0")]
9346a6ac 1003 internal:
b039eaaf 1004 MatchIndicesInternal yielding ((usize, &'a str));
9346a6ac
AL
1005 delegate double ended;
1006}
1007
1008derive_pattern_clone!{
1009 clone MatchesInternal
1010 with |s| MatchesInternal(s.0.clone())
1011}
54a0048b 1012
9346a6ac
AL
1013struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1014
54a0048b
SL
1015impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug {
1016 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1017 f.debug_tuple("MatchesInternal")
1018 .field(&self.0)
1019 .finish()
1020 }
1021}
1022
9346a6ac
AL
1023impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1024 #[inline]
1025 fn next(&mut self) -> Option<&'a str> {
1026 self.0.next_match().map(|(a, b)| unsafe {
1027 // Indices are known to be on utf8 boundaries
1028 self.0.haystack().slice_unchecked(a, b)
1029 })
1030 }
1031
1032 #[inline]
1033 fn next_back(&mut self) -> Option<&'a str>
1034 where P::Searcher: ReverseSearcher<'a>
1035 {
1036 self.0.next_match_back().map(|(a, b)| unsafe {
1037 // Indices are known to be on utf8 boundaries
1038 self.0.haystack().slice_unchecked(a, b)
1039 })
1040 }
1041}
1042
1043generate_pattern_iterators! {
1044 forward:
9cc50fc6
SL
1045 /// Created with the method [`matches()`].
1046 ///
54a0048b 1047 /// [`matches()`]: ../../std/primitive.str.html#method.matches
9346a6ac
AL
1048 struct Matches;
1049 reverse:
9cc50fc6
SL
1050 /// Created with the method [`rmatches()`].
1051 ///
54a0048b 1052 /// [`rmatches()`]: ../../std/primitive.str.html#method.rmatches
9346a6ac
AL
1053 struct RMatches;
1054 stability:
62682a34 1055 #[stable(feature = "str_matches", since = "1.2.0")]
9346a6ac
AL
1056 internal:
1057 MatchesInternal yielding (&'a str);
1058 delegate double ended;
1059}
1060
9cc50fc6
SL
1061/// Created with the method [`lines()`].
1062///
54a0048b 1063/// [`lines()`]: ../../std/primitive.str.html#method.lines
c34b1796 1064#[stable(feature = "rust1", since = "1.0.0")]
54a0048b 1065#[derive(Clone, Debug)]
e9174d1e 1066pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
9346a6ac
AL
1067
1068#[stable(feature = "rust1", since = "1.0.0")]
1069impl<'a> Iterator for Lines<'a> {
c34b1796
AL
1070 type Item = &'a str;
1071
1072 #[inline]
1073 fn next(&mut self) -> Option<&'a str> {
9346a6ac
AL
1074 self.0.next()
1075 }
c34b1796 1076
9346a6ac
AL
1077 #[inline]
1078 fn size_hint(&self) -> (usize, Option<usize>) {
1079 self.0.size_hint()
1080 }
1081}
1082
1083#[stable(feature = "rust1", since = "1.0.0")]
1084impl<'a> DoubleEndedIterator for Lines<'a> {
1085 #[inline]
1086 fn next_back(&mut self) -> Option<&'a str> {
1087 self.0.next_back()
1088 }
1089}
1090
9cc50fc6
SL
1091/// Created with the method [`lines_any()`].
1092///
54a0048b 1093/// [`lines_any()`]: ../../std/primitive.str.html#method.lines_any
9346a6ac 1094#[stable(feature = "rust1", since = "1.0.0")]
92a42be0 1095#[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
54a0048b 1096#[derive(Clone, Debug)]
e9174d1e
SL
1097#[allow(deprecated)]
1098pub struct LinesAny<'a>(Lines<'a>);
9346a6ac 1099
b039eaaf 1100/// A nameable, cloneable fn type
9346a6ac
AL
1101#[derive(Clone)]
1102struct LinesAnyMap;
1103
1104impl<'a> Fn<(&'a str,)> for LinesAnyMap {
1105 #[inline]
1106 extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
1107 let l = line.len();
1108 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1109 else { line }
1110 }
1111}
1112
1113impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
1114 #[inline]
1115 extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
1116 Fn::call(&*self, (line,))
1117 }
1118}
1119
1120impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
1121 type Output = &'a str;
1122
1123 #[inline]
1124 extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
1125 Fn::call(&self, (line,))
1a4d82fc 1126 }
c34b1796 1127}
1a4d82fc 1128
c34b1796 1129#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 1130#[allow(deprecated)]
9346a6ac 1131impl<'a> Iterator for LinesAny<'a> {
c34b1796
AL
1132 type Item = &'a str;
1133
1134 #[inline]
1135 fn next(&mut self) -> Option<&'a str> {
9346a6ac
AL
1136 self.0.next()
1137 }
1138
1139 #[inline]
1140 fn size_hint(&self) -> (usize, Option<usize>) {
1141 self.0.size_hint()
1142 }
1143}
1144
1145#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 1146#[allow(deprecated)]
9346a6ac
AL
1147impl<'a> DoubleEndedIterator for LinesAny<'a> {
1148 #[inline]
1149 fn next_back(&mut self) -> Option<&'a str> {
1150 self.0.next_back()
1a4d82fc
JJ
1151 }
1152}
1153
1a4d82fc
JJ
1154/*
1155Section: Comparing strings
1156*/
1157
c1a9b12d 1158/// Bytewise slice equality
1a4d82fc
JJ
1159/// NOTE: This function is (ab)used in rustc::middle::trans::_match
1160/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
c1a9b12d 1161#[lang = "str_eq"]
1a4d82fc 1162#[inline]
c1a9b12d 1163fn eq_slice(a: &str, b: &str) -> bool {
54a0048b 1164 a.as_bytes() == b.as_bytes()
1a4d82fc
JJ
1165}
1166
1a4d82fc 1167/*
9cc50fc6 1168Section: UTF-8 validation
1a4d82fc
JJ
1169*/
1170
9cc50fc6
SL
1171// use truncation to fit u64 into usize
1172const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1173
1174/// Return `true` if any byte in the word `x` is nonascii (>= 128).
1175#[inline]
1176fn contains_nonascii(x: usize) -> bool {
1177 (x & NONASCII_MASK) != 0
1178}
1179
1a4d82fc
JJ
1180/// Walk through `iter` checking that it's a valid UTF-8 sequence,
1181/// returning `true` in that case, or, if it is invalid, `false` with
1182/// `iter` reset such that it is pointing at the first byte in the
1183/// invalid sequence.
1184#[inline(always)]
9cc50fc6
SL
1185fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1186 let mut offset = 0;
1187 let len = v.len();
1188 while offset < len {
1189 let old_offset = offset;
1a4d82fc 1190 macro_rules! err { () => {{
9346a6ac 1191 return Err(Utf8Error {
9cc50fc6 1192 valid_up_to: old_offset
9346a6ac 1193 })
1a4d82fc
JJ
1194 }}}
1195
9cc50fc6
SL
1196 macro_rules! next { () => {{
1197 offset += 1;
1198 // we needed data, but there was none: error!
1199 if offset >= len {
1200 err!()
1a4d82fc 1201 }
9cc50fc6
SL
1202 v[offset]
1203 }}}
1a4d82fc 1204
9cc50fc6 1205 let first = v[offset];
1a4d82fc 1206 if first >= 128 {
c34b1796 1207 let w = UTF8_CHAR_WIDTH[first as usize];
1a4d82fc
JJ
1208 let second = next!();
1209 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1210 // first C2 80 last DF BF
1211 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1212 // first E0 A0 80 last EF BF BF
1213 // excluding surrogates codepoints \u{d800} to \u{dfff}
1214 // ED A0 80 to ED BF BF
1215 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1216 // first F0 90 80 80 last F4 8F BF BF
1217 //
1218 // Use the UTF-8 syntax from the RFC
1219 //
1220 // https://tools.ietf.org/html/rfc3629
1221 // UTF8-1 = %x00-7F
1222 // UTF8-2 = %xC2-DF UTF8-tail
1223 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1224 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1225 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1226 // %xF4 %x80-8F 2( UTF8-tail )
1227 match w {
1228 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
1229 3 => {
1230 match (first, second, next!() & !CONT_MASK) {
1231 (0xE0 , 0xA0 ... 0xBF, TAG_CONT_U8) |
1232 (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) |
1233 (0xED , 0x80 ... 0x9F, TAG_CONT_U8) |
1234 (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {}
1235 _ => err!()
1236 }
1237 }
1238 4 => {
1239 match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
1240 (0xF0 , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1241 (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1242 (0xF4 , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
1243 _ => err!()
1244 }
1245 }
1246 _ => err!()
1247 }
9cc50fc6
SL
1248 offset += 1;
1249 } else {
1250 // Ascii case, try to skip forward quickly.
1251 // When the pointer is aligned, read 2 words of data per iteration
1252 // until we find a word containing a non-ascii byte.
1253 let usize_bytes = mem::size_of::<usize>();
1254 let bytes_per_iteration = 2 * usize_bytes;
1255 let ptr = v.as_ptr();
1256 let align = (ptr as usize + offset) & (usize_bytes - 1);
1257 if align == 0 {
1258 if len >= bytes_per_iteration {
1259 while offset <= len - bytes_per_iteration {
1260 unsafe {
1261 let u = *(ptr.offset(offset as isize) as *const usize);
1262 let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
1263
1264 // break if there is a nonascii byte
1265 let zu = contains_nonascii(u);
1266 let zv = contains_nonascii(v);
1267 if zu || zv {
1268 break;
1269 }
1270 }
1271 offset += bytes_per_iteration;
1272 }
1273 }
1274 // step from the point where the wordwise loop stopped
1275 while offset < len && v[offset] < 128 {
1276 offset += 1;
1277 }
1278 } else {
1279 offset += 1;
1280 }
1a4d82fc
JJ
1281 }
1282 }
9cc50fc6
SL
1283
1284 Ok(())
1a4d82fc
JJ
1285}
1286
1287// https://tools.ietf.org/html/rfc3629
1288static UTF8_CHAR_WIDTH: [u8; 256] = [
12891,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12901,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
12911,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12921,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
12931,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12941,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
12951,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12961,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
12970,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12980,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
12990,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
13000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
13010,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
13022,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
13033,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
13044,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1305];
1306
1a4d82fc 1307/// Mask of the value bits of a continuation byte
c34b1796 1308const CONT_MASK: u8 = 0b0011_1111;
1a4d82fc 1309/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
c34b1796 1310const TAG_CONT_U8: u8 = 0b1000_0000;
1a4d82fc
JJ
1311
1312/*
1313Section: Trait implementations
1314*/
1315
1316mod traits {
54a0048b 1317 use cmp::{Ord, Ordering, PartialEq, PartialOrd, Eq};
1a4d82fc
JJ
1318 use option::Option;
1319 use option::Option::Some;
1320 use ops;
1321 use str::{StrExt, eq_slice};
1322
85aaf69f 1323 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1324 impl Ord for str {
1325 #[inline]
1326 fn cmp(&self, other: &str) -> Ordering {
54a0048b 1327 self.as_bytes().cmp(other.as_bytes())
1a4d82fc
JJ
1328 }
1329 }
1330
85aaf69f 1331 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1332 impl PartialEq for str {
1333 #[inline]
1334 fn eq(&self, other: &str) -> bool {
1335 eq_slice(self, other)
1336 }
1337 #[inline]
1338 fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1339 }
1340
85aaf69f 1341 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1342 impl Eq for str {}
1343
85aaf69f 1344 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1345 impl PartialOrd for str {
1346 #[inline]
1347 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1348 Some(self.cmp(other))
1349 }
1350 }
1351
54a0048b
SL
1352 /// Implements substring slicing with syntax `&self[begin .. end]`.
1353 ///
85aaf69f
SL
1354 /// Returns a slice of the given string from the byte range
1355 /// [`begin`..`end`).
1356 ///
1357 /// This operation is `O(1)`.
1358 ///
54a0048b
SL
1359 /// # Panics
1360 ///
1361 /// Panics if `begin` or `end` does not point to the starting
1362 /// byte offset of a character (as defined by `is_char_boundary`).
1363 /// Requires that `begin <= end` and `end <= len` where `len` is the
1364 /// length of the string.
85aaf69f 1365 ///
c34b1796 1366 /// # Examples
85aaf69f 1367 ///
c34b1796 1368 /// ```
85aaf69f
SL
1369 /// let s = "Löwe 老虎 Léopard";
1370 /// assert_eq!(&s[0 .. 1], "L");
1371 ///
1372 /// assert_eq!(&s[1 .. 9], "öwe 老");
1373 ///
1374 /// // these will panic:
1375 /// // byte 2 lies within `ö`:
1376 /// // &s[2 ..3];
1377 ///
1378 /// // byte 8 lies within `老`
1379 /// // &s[1 .. 8];
1380 ///
1381 /// // byte 100 is outside the string
1382 /// // &s[3 .. 100];
1383 /// ```
1384 #[stable(feature = "rust1", since = "1.0.0")]
1385 impl ops::Index<ops::Range<usize>> for str {
1a4d82fc
JJ
1386 type Output = str;
1387 #[inline]
c34b1796 1388 fn index(&self, index: ops::Range<usize>) -> &str {
85aaf69f
SL
1389 // is_char_boundary checks that the index is in [0, .len()]
1390 if index.start <= index.end &&
1391 self.is_char_boundary(index.start) &&
1392 self.is_char_boundary(index.end) {
1393 unsafe { self.slice_unchecked(index.start, index.end) }
1394 } else {
1395 super::slice_error_fail(self, index.start, index.end)
1396 }
1a4d82fc
JJ
1397 }
1398 }
85aaf69f 1399
54a0048b
SL
1400 /// Implements mutable substring slicing with syntax
1401 /// `&mut self[begin .. end]`.
1402 ///
c1a9b12d
SL
1403 /// Returns a mutable slice of the given string from the byte range
1404 /// [`begin`..`end`).
54a0048b
SL
1405 ///
1406 /// This operation is `O(1)`.
1407 ///
1408 /// # Panics
1409 ///
1410 /// Panics if `begin` or `end` does not point to the starting
1411 /// byte offset of a character (as defined by `is_char_boundary`).
1412 /// Requires that `begin <= end` and `end <= len` where `len` is the
1413 /// length of the string.
c1a9b12d
SL
1414 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1415 impl ops::IndexMut<ops::Range<usize>> for str {
1416 #[inline]
1417 fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1418 // is_char_boundary checks that the index is in [0, .len()]
1419 if index.start <= index.end &&
1420 self.is_char_boundary(index.start) &&
1421 self.is_char_boundary(index.end) {
1422 unsafe { self.slice_mut_unchecked(index.start, index.end) }
1423 } else {
1424 super::slice_error_fail(self, index.start, index.end)
1425 }
1426 }
1427 }
1428
54a0048b 1429 /// Implements substring slicing with syntax `&self[.. end]`.
85aaf69f 1430 ///
54a0048b
SL
1431 /// Returns a slice of the string from the beginning to byte offset
1432 /// `end`.
85aaf69f 1433 ///
54a0048b 1434 /// Equivalent to `&self[0 .. end]`.
85aaf69f
SL
1435 #[stable(feature = "rust1", since = "1.0.0")]
1436 impl ops::Index<ops::RangeTo<usize>> for str {
1a4d82fc 1437 type Output = str;
c34b1796 1438
1a4d82fc 1439 #[inline]
c34b1796 1440 fn index(&self, index: ops::RangeTo<usize>) -> &str {
85aaf69f
SL
1441 // is_char_boundary checks that the index is in [0, .len()]
1442 if self.is_char_boundary(index.end) {
1443 unsafe { self.slice_unchecked(0, index.end) }
1444 } else {
1445 super::slice_error_fail(self, 0, index.end)
1446 }
1a4d82fc
JJ
1447 }
1448 }
85aaf69f 1449
54a0048b
SL
1450 /// Implements mutable substring slicing with syntax `&mut self[.. end]`.
1451 ///
1452 /// Returns a mutable slice of the string from the beginning to byte offset
c1a9b12d 1453 /// `end`.
54a0048b
SL
1454 ///
1455 /// Equivalent to `&mut self[0 .. end]`.
c1a9b12d
SL
1456 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1457 impl ops::IndexMut<ops::RangeTo<usize>> for str {
1458 #[inline]
1459 fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1460 // is_char_boundary checks that the index is in [0, .len()]
1461 if self.is_char_boundary(index.end) {
1462 unsafe { self.slice_mut_unchecked(0, index.end) }
1463 } else {
1464 super::slice_error_fail(self, 0, index.end)
1465 }
1466 }
1467 }
1468
54a0048b 1469 /// Implements substring slicing with syntax `&self[begin ..]`.
85aaf69f 1470 ///
54a0048b
SL
1471 /// Returns a slice of the string from byte offset `begin`
1472 /// to the end of the string.
85aaf69f 1473 ///
54a0048b 1474 /// Equivalent to `&self[begin .. len]`.
85aaf69f
SL
1475 #[stable(feature = "rust1", since = "1.0.0")]
1476 impl ops::Index<ops::RangeFrom<usize>> for str {
1a4d82fc 1477 type Output = str;
c34b1796 1478
1a4d82fc 1479 #[inline]
c34b1796 1480 fn index(&self, index: ops::RangeFrom<usize>) -> &str {
85aaf69f
SL
1481 // is_char_boundary checks that the index is in [0, .len()]
1482 if self.is_char_boundary(index.start) {
1483 unsafe { self.slice_unchecked(index.start, self.len()) }
1484 } else {
1485 super::slice_error_fail(self, index.start, self.len())
1486 }
1a4d82fc
JJ
1487 }
1488 }
85aaf69f 1489
54a0048b
SL
1490 /// Implements mutable substring slicing with syntax `&mut self[begin ..]`.
1491 ///
1492 /// Returns a mutable slice of the string from byte offset `begin`
1493 /// to the end of the string.
1494 ///
1495 /// Equivalent to `&mut self[begin .. len]`.
c1a9b12d
SL
1496 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1497 impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1498 #[inline]
1499 fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1500 // is_char_boundary checks that the index is in [0, .len()]
1501 if self.is_char_boundary(index.start) {
1502 let len = self.len();
1503 unsafe { self.slice_mut_unchecked(index.start, len) }
1504 } else {
1505 super::slice_error_fail(self, index.start, self.len())
1506 }
1507 }
1508 }
1509
54a0048b
SL
1510 /// Implements substring slicing with syntax `&self[..]`.
1511 ///
1512 /// Returns a slice of the whole string. This operation can
1513 /// never panic.
1514 ///
1515 /// Equivalent to `&self[0 .. len]`.
85aaf69f
SL
1516 #[stable(feature = "rust1", since = "1.0.0")]
1517 impl ops::Index<ops::RangeFull> for str {
1a4d82fc 1518 type Output = str;
c34b1796 1519
1a4d82fc 1520 #[inline]
c34b1796 1521 fn index(&self, _index: ops::RangeFull) -> &str {
1a4d82fc
JJ
1522 self
1523 }
1524 }
c1a9b12d 1525
54a0048b
SL
1526 /// Implements mutable substring slicing with syntax `&mut self[..]`.
1527 ///
1528 /// Returns a mutable slice of the whole string. This operation can
1529 /// never panic.
1530 ///
1531 /// Equivalent to `&mut self[0 .. len]`.
c1a9b12d
SL
1532 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1533 impl ops::IndexMut<ops::RangeFull> for str {
1534 #[inline]
1535 fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1536 self
1537 }
1538 }
54a0048b
SL
1539
1540 #[unstable(feature = "inclusive_range",
1541 reason = "recently added, follows RFC",
1542 issue = "28237")]
1543 impl ops::Index<ops::RangeInclusive<usize>> for str {
1544 type Output = str;
1545
1546 #[inline]
1547 fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1548 match index {
1549 ops::RangeInclusive::Empty { .. } => "",
1550 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1551 panic!("attempted to index slice up to maximum usize"),
1552 ops::RangeInclusive::NonEmpty { start, end } =>
1553 self.index(start .. end+1)
1554 }
1555 }
1556 }
1557 #[unstable(feature = "inclusive_range",
1558 reason = "recently added, follows RFC",
1559 issue = "28237")]
1560 impl ops::Index<ops::RangeToInclusive<usize>> for str {
1561 type Output = str;
1562
1563 #[inline]
1564 fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1565 self.index(0...index.end)
1566 }
1567 }
1568
1569 #[unstable(feature = "inclusive_range",
1570 reason = "recently added, follows RFC",
1571 issue = "28237")]
1572 impl ops::IndexMut<ops::RangeInclusive<usize>> for str {
1573 #[inline]
1574 fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
1575 match index {
1576 ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work
1577 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1578 panic!("attempted to index str up to maximum usize"),
1579 ops::RangeInclusive::NonEmpty { start, end } =>
1580 self.index_mut(start .. end+1)
1581 }
1582 }
1583 }
1584 #[unstable(feature = "inclusive_range",
1585 reason = "recently added, follows RFC",
1586 issue = "28237")]
1587 impl ops::IndexMut<ops::RangeToInclusive<usize>> for str {
1588 #[inline]
1589 fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
1590 self.index_mut(0...index.end)
1591 }
1592 }
1a4d82fc
JJ
1593}
1594
1a4d82fc
JJ
1595/// Methods for string slices
1596#[allow(missing_docs)]
9346a6ac 1597#[doc(hidden)]
62682a34 1598#[unstable(feature = "core_str_ext",
e9174d1e 1599 reason = "stable interface provided by `impl str` in later crates",
54a0048b 1600 issue = "32110")]
1a4d82fc
JJ
1601pub trait StrExt {
1602 // NB there are no docs here are they're all located on the StrExt trait in
1603 // libcollections, not here.
1604
92a42be0 1605 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1606 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
92a42be0 1607 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1608 fn chars(&self) -> Chars;
92a42be0 1609 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1610 fn bytes(&self) -> Bytes;
92a42be0 1611 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1612 fn char_indices(&self) -> CharIndices;
92a42be0 1613 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1614 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
92a42be0 1615 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1616 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1617 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1618 #[stable(feature = "core", since = "1.6.0")]
9346a6ac 1619 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
92a42be0 1620 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1621 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1622 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1623 #[stable(feature = "core", since = "1.6.0")]
9346a6ac 1624 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
92a42be0 1625 #[stable(feature = "core", since = "1.6.0")]
9346a6ac
AL
1626 fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1627 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1628 #[stable(feature = "core", since = "1.6.0")]
9346a6ac 1629 fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
92a42be0 1630 #[stable(feature = "core", since = "1.6.0")]
9346a6ac
AL
1631 fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1632 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1633 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1634 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
92a42be0 1635 #[stable(feature = "core", since = "1.6.0")]
9346a6ac
AL
1636 fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1637 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1638 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1639 fn lines(&self) -> Lines;
92a42be0
SL
1640 #[stable(feature = "core", since = "1.6.0")]
1641 #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")]
e9174d1e
SL
1642 #[allow(deprecated)]
1643 fn lines_any(&self) -> LinesAny;
92a42be0 1644 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1645 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str;
92a42be0 1646 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1647 unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str;
92a42be0 1648 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1649 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
92a42be0 1650 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1651 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1652 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1653 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1654 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1655 where P::Searcher: DoubleEndedSearcher<'a>;
92a42be0 1656 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1657 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
92a42be0 1658 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1659 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1660 where P::Searcher: ReverseSearcher<'a>;
54a0048b 1661 #[stable(feature = "is_char_boundary", since = "1.9.0")]
85aaf69f 1662 fn is_char_boundary(&self, index: usize) -> bool;
92a42be0 1663 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1664 fn as_bytes(&self) -> &[u8];
92a42be0 1665 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1666 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
92a42be0 1667 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1668 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1669 where P::Searcher: ReverseSearcher<'a>;
1670 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
92a42be0 1671 #[stable(feature = "core", since = "1.6.0")]
62682a34 1672 fn split_at(&self, mid: usize) -> (&str, &str);
92a42be0 1673 #[stable(feature = "core", since = "1.6.0")]
c1a9b12d 1674 fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
92a42be0 1675 #[stable(feature = "core", since = "1.6.0")]
1a4d82fc 1676 fn as_ptr(&self) -> *const u8;
92a42be0 1677 #[stable(feature = "core", since = "1.6.0")]
85aaf69f 1678 fn len(&self) -> usize;
92a42be0 1679 #[stable(feature = "core", since = "1.6.0")]
1a4d82fc 1680 fn is_empty(&self) -> bool;
92a42be0 1681 #[stable(feature = "core", since = "1.6.0")]
85aaf69f 1682 fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
1a4d82fc
JJ
1683}
1684
54a0048b
SL
1685// truncate `&str` to length at most equal to `max`
1686// return `true` if it were truncated, and the new str.
1687fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
1688 if max >= s.len() {
1689 (false, s)
1690 } else {
1691 while !s.is_char_boundary(max) {
1692 max -= 1;
1693 }
1694 (true, &s[..max])
1695 }
1696}
1697
1a4d82fc 1698#[inline(never)]
92a42be0 1699#[cold]
85aaf69f 1700fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
54a0048b
SL
1701 const MAX_DISPLAY_LENGTH: usize = 256;
1702 let (truncated, s) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
1703 let ellipsis = if truncated { "[...]" } else { "" };
1704
1705 assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}",
1706 begin, end, s, ellipsis);
1707 panic!("index {} and/or {} in `{}`{} do not lie on character boundary",
1708 begin, end, s, ellipsis);
1a4d82fc
JJ
1709}
1710
92a42be0 1711#[stable(feature = "core", since = "1.6.0")]
1a4d82fc
JJ
1712impl StrExt for str {
1713 #[inline]
c34b1796
AL
1714 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1715 pat.is_contained_in(self)
1a4d82fc
JJ
1716 }
1717
1a4d82fc
JJ
1718 #[inline]
1719 fn chars(&self) -> Chars {
1720 Chars{iter: self.as_bytes().iter()}
1721 }
1722
1723 #[inline]
1724 fn bytes(&self) -> Bytes {
e9174d1e 1725 Bytes(self.as_bytes().iter().cloned())
1a4d82fc
JJ
1726 }
1727
1728 #[inline]
1729 fn char_indices(&self) -> CharIndices {
1730 CharIndices { front_offset: 0, iter: self.chars() }
1731 }
1732
1733 #[inline]
c34b1796 1734 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
9346a6ac 1735 Split(SplitInternal {
c34b1796
AL
1736 start: 0,
1737 end: self.len(),
1738 matcher: pat.into_searcher(self),
1a4d82fc
JJ
1739 allow_trailing_empty: true,
1740 finished: false,
1741 })
1742 }
1743
9346a6ac
AL
1744 #[inline]
1745 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1746 where P::Searcher: ReverseSearcher<'a>
1747 {
1748 RSplit(self.split(pat).0)
1749 }
1750
1a4d82fc 1751 #[inline]
c34b1796 1752 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
9346a6ac 1753 SplitN(SplitNInternal {
1a4d82fc
JJ
1754 iter: self.split(pat).0,
1755 count: count,
1a4d82fc
JJ
1756 })
1757 }
1758
9346a6ac
AL
1759 #[inline]
1760 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1761 where P::Searcher: ReverseSearcher<'a>
1762 {
1763 RSplitN(self.splitn(count, pat).0)
1764 }
1765
1a4d82fc 1766 #[inline]
c34b1796 1767 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
9346a6ac 1768 SplitTerminator(SplitInternal {
1a4d82fc
JJ
1769 allow_trailing_empty: false,
1770 ..self.split(pat).0
1771 })
1772 }
1773
1774 #[inline]
9346a6ac 1775 fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
c34b1796
AL
1776 where P::Searcher: ReverseSearcher<'a>
1777 {
9346a6ac 1778 RSplitTerminator(self.split_terminator(pat).0)
1a4d82fc
JJ
1779 }
1780
1781 #[inline]
9346a6ac
AL
1782 fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1783 Matches(MatchesInternal(pat.into_searcher(self)))
1784 }
1785
1786 #[inline]
1787 fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
c34b1796
AL
1788 where P::Searcher: ReverseSearcher<'a>
1789 {
9346a6ac 1790 RMatches(self.matches(pat).0)
1a4d82fc
JJ
1791 }
1792
1793 #[inline]
c34b1796 1794 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
9346a6ac 1795 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1a4d82fc
JJ
1796 }
1797
9346a6ac
AL
1798 #[inline]
1799 fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1800 where P::Searcher: ReverseSearcher<'a>
1801 {
1802 RMatchIndices(self.match_indices(pat).0)
1803 }
1a4d82fc
JJ
1804 #[inline]
1805 fn lines(&self) -> Lines {
e9174d1e 1806 Lines(self.split_terminator('\n').map(LinesAnyMap))
1a4d82fc
JJ
1807 }
1808
9346a6ac 1809 #[inline]
e9174d1e 1810 #[allow(deprecated)]
1a4d82fc 1811 fn lines_any(&self) -> LinesAny {
e9174d1e 1812 LinesAny(self.lines())
1a4d82fc
JJ
1813 }
1814
1a4d82fc 1815 #[inline]
85aaf69f 1816 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
54a0048b
SL
1817 let ptr = self.as_ptr().offset(begin as isize);
1818 let len = end - begin;
1819 from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1a4d82fc
JJ
1820 }
1821
c1a9b12d
SL
1822 #[inline]
1823 unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
54a0048b
SL
1824 let ptr = self.as_ptr().offset(begin as isize);
1825 let len = end - begin;
1826 mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
c1a9b12d
SL
1827 }
1828
1a4d82fc 1829 #[inline]
c34b1796
AL
1830 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1831 pat.is_prefix_of(self)
1a4d82fc
JJ
1832 }
1833
1834 #[inline]
c34b1796
AL
1835 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1836 where P::Searcher: ReverseSearcher<'a>
1837 {
1838 pat.is_suffix_of(self)
1a4d82fc
JJ
1839 }
1840
1841 #[inline]
c34b1796
AL
1842 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1843 where P::Searcher: DoubleEndedSearcher<'a>
1844 {
1845 let mut i = 0;
1846 let mut j = 0;
1847 let mut matcher = pat.into_searcher(self);
1848 if let Some((a, b)) = matcher.next_reject() {
1849 i = a;
7453a54e 1850 j = b; // Remember earliest known match, correct it below if
c34b1796
AL
1851 // last match is different
1852 }
1853 if let Some((_, b)) = matcher.next_reject_back() {
1854 j = b;
1855 }
1856 unsafe {
1857 // Searcher is known to return valid indices
1858 self.slice_unchecked(i, j)
1a4d82fc
JJ
1859 }
1860 }
1861
1862 #[inline]
c34b1796
AL
1863 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1864 let mut i = self.len();
1865 let mut matcher = pat.into_searcher(self);
1866 if let Some((a, _)) = matcher.next_reject() {
1867 i = a;
1868 }
1869 unsafe {
1870 // Searcher is known to return valid indices
1871 self.slice_unchecked(i, self.len())
1a4d82fc
JJ
1872 }
1873 }
1874
1875 #[inline]
c34b1796
AL
1876 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1877 where P::Searcher: ReverseSearcher<'a>
1878 {
1879 let mut j = 0;
1880 let mut matcher = pat.into_searcher(self);
1881 if let Some((_, b)) = matcher.next_reject_back() {
1882 j = b;
1883 }
1884 unsafe {
1885 // Searcher is known to return valid indices
1886 self.slice_unchecked(0, j)
1a4d82fc
JJ
1887 }
1888 }
1889
1890 #[inline]
85aaf69f 1891 fn is_char_boundary(&self, index: usize) -> bool {
54a0048b
SL
1892 // 0 and len are always ok.
1893 // Test for 0 explicitly so that it can optimize out the check
1894 // easily and skip reading string data for that case.
1895 if index == 0 || index == self.len() { return true; }
1a4d82fc
JJ
1896 match self.as_bytes().get(index) {
1897 None => false,
a7813a04
XL
1898 // This is bit magic equivalent to: b < 128 || b >= 192
1899 Some(&b) => (b as i8) >= -0x40,
1a4d82fc
JJ
1900 }
1901 }
1902
1a4d82fc
JJ
1903 #[inline]
1904 fn as_bytes(&self) -> &[u8] {
1905 unsafe { mem::transmute(self) }
1906 }
1907
c34b1796
AL
1908 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1909 pat.into_searcher(self).next_match().map(|(i, _)| i)
1a4d82fc
JJ
1910 }
1911
c34b1796
AL
1912 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1913 where P::Searcher: ReverseSearcher<'a>
1914 {
1915 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
1a4d82fc
JJ
1916 }
1917
c34b1796
AL
1918 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1919 self.find(pat)
1a4d82fc
JJ
1920 }
1921
54a0048b 1922 #[inline]
62682a34
SL
1923 fn split_at(&self, mid: usize) -> (&str, &str) {
1924 // is_char_boundary checks that the index is in [0, .len()]
1925 if self.is_char_boundary(mid) {
1926 unsafe {
1927 (self.slice_unchecked(0, mid),
1928 self.slice_unchecked(mid, self.len()))
1929 }
1930 } else {
1931 slice_error_fail(self, 0, mid)
1932 }
1933 }
1934
c1a9b12d
SL
1935 fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
1936 // is_char_boundary checks that the index is in [0, .len()]
1937 if self.is_char_boundary(mid) {
1938 let len = self.len();
7453a54e 1939 let ptr = self.as_ptr() as *mut u8;
c1a9b12d 1940 unsafe {
7453a54e
SL
1941 (from_raw_parts_mut(ptr, mid),
1942 from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
c1a9b12d
SL
1943 }
1944 } else {
1945 slice_error_fail(self, 0, mid)
1946 }
1947 }
1948
1a4d82fc
JJ
1949 #[inline]
1950 fn as_ptr(&self) -> *const u8 {
54a0048b 1951 self as *const str as *const u8
1a4d82fc
JJ
1952 }
1953
1954 #[inline]
54a0048b
SL
1955 fn len(&self) -> usize {
1956 self.as_bytes().len()
1957 }
1a4d82fc
JJ
1958
1959 #[inline]
1960 fn is_empty(&self) -> bool { self.len() == 0 }
1961
1962 #[inline]
85aaf69f 1963 fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
1a4d82fc
JJ
1964}
1965
bd371182
AL
1966#[stable(feature = "rust1", since = "1.0.0")]
1967impl AsRef<[u8]> for str {
1968 #[inline]
1969 fn as_ref(&self) -> &[u8] {
1970 self.as_bytes()
1971 }
1972}
1973
85aaf69f 1974#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 1975impl<'a> Default for &'a str {
1a4d82fc
JJ
1976 fn default() -> &'a str { "" }
1977}