src/libcore/str/mod.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! String manipulation
  12 //!
  13 //! For more details, see std::str
  14
  15 #![stable(feature = "rust1", since = "1.0.0")]
  16
  17 use self::pattern::Pattern;
  18 use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
  19
  20 use char::{self, CharExt};
  21 use clone::Clone;
  22 use cmp::Eq;
  23 use convert::AsRef;
  24 use default::Default;
  25 use fmt;
  26 use iter::ExactSizeIterator;
  27 use iter::{Map, Cloned, Iterator, DoubleEndedIterator};
  28 use marker::Sized;
  29 use mem;
  30 use ops::{Fn, FnMut, FnOnce};
  31 use option::Option::{self, None, Some};
  32 use result::Result::{self, Ok, Err};
  33 use slice::{self, SliceExt};
  34
  35 pub mod pattern;
  36
  37 /// A trait to abstract the idea of creating a new instance of a type from a
  38 /// string.
  39 ///
  40 /// `FromStr`'s [`from_str()`] method is often used implicitly, through
  41 /// [`str`]'s [`parse()`] method. See [`parse()`]'s documentation for examples.
  42 ///
  43 /// [`from_str()`]: #tymethod.from_str
  44 /// [`str`]: ../../std/primitive.str.html
  45 /// [`parse()`]: ../../std/primitive.str.html#method.parse
  46 #[stable(feature = "rust1", since = "1.0.0")]
  47 pub trait FromStr: Sized {
  48     /// The associated error which can be returned from parsing.
  49     #[stable(feature = "rust1", since = "1.0.0")]
  50     type Err;
  51
  52     /// Parses a string `s` to return a value of this type.
  53     ///
  54     /// If parsing succeeds, return the value inside `Ok`, otherwise
  55     /// when the string is ill-formatted return an error specific to the
  56     /// inside `Err`. The error type is specific to implementation of the trait.
  57     ///
  58     /// # Examples
  59     ///
  60     /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
  61     ///
  62     /// [ithirtytwo]: ../../std/primitive.i32.html
  63     ///
  64     /// ```
  65     /// use std::str::FromStr;
  66     ///
  67     /// let s = "5";
  68     /// let x = i32::from_str(s).unwrap();
  69     ///
  70     /// assert_eq!(5, x);
  71     /// ```
  72     #[stable(feature = "rust1", since = "1.0.0")]
  73     fn from_str(s: &str) -> Result<Self, Self::Err>;
  74 }
  75
  76 #[stable(feature = "rust1", since = "1.0.0")]
  77 impl FromStr for bool {
  78     type Err = ParseBoolError;
  79
  80     /// Parse a `bool` from a string.
  81     ///
  82     /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
  83     /// actually be parseable.
  84     ///
  85     /// # Examples
  86     ///
  87     /// ```
  88     /// use std::str::FromStr;
  89     ///
  90     /// assert_eq!(FromStr::from_str("true"), Ok(true));
  91     /// assert_eq!(FromStr::from_str("false"), Ok(false));
  92     /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
  93     /// ```
  94     ///
  95     /// Note, in many cases, the `.parse()` method on `str` is more proper.
  96     ///
  97     /// ```
  98     /// assert_eq!("true".parse(), Ok(true));
  99     /// assert_eq!("false".parse(), Ok(false));
 100     /// assert!("not even a boolean".parse::<bool>().is_err());
 101     /// ```
 102     #[inline]
 103     fn from_str(s: &str) -> Result<bool, ParseBoolError> {
 104         match s {
 105             "true"  => Ok(true),
 106             "false" => Ok(false),
 107             _       => Err(ParseBoolError { _priv: () }),
 108         }
 109     }
 110 }
 111
 112 /// An error returned when parsing a `bool` from a string fails.
 113 #[derive(Debug, Clone, PartialEq)]
 114 #[stable(feature = "rust1", since = "1.0.0")]
 115 pub struct ParseBoolError { _priv: () }
 116
 117 #[stable(feature = "rust1", since = "1.0.0")]
 118 impl fmt::Display for ParseBoolError {
 119     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 120         "provided string was not `true` or `false`".fmt(f)
 121     }
 122 }
 123
 124 /*
 125 Section: Creating a string
 126 */
 127
 128 /// Errors which can occur when attempting to interpret a sequence of `u8`
 129 /// as a string.
 130 ///
 131 /// As such, the `from_utf8` family of functions and methods for both `String`s
 132 /// and `&str`s make use of this error, for example.
 133 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
 134 #[stable(feature = "rust1", since = "1.0.0")]
 135 pub struct Utf8Error {
 136     valid_up_to: usize,
 137 }
 138
 139 impl Utf8Error {
 140     /// Returns the index in the given string up to which valid UTF-8 was
 141     /// verified.
 142     ///
 143     /// It is the maximum index such that `from_utf8(input[..index])`
 144     /// would return `Some(_)`.
 145     ///
 146     /// # Examples
 147     ///
 148     /// Basic usage:
 149     ///
 150     /// ```
 151     /// use std::str;
 152     ///
 153     /// // some invalid bytes, in a vector
 154     /// let sparkle_heart = vec![0, 159, 146, 150];
 155     ///
 156     /// // std::str::from_utf8 returns a Utf8Error
 157     /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
 158     ///
 159     /// // the second byte is invalid here
 160     /// assert_eq!(1, error.valid_up_to());
 161     /// ```
 162     #[stable(feature = "utf8_error", since = "1.5.0")]
 163     pub fn valid_up_to(&self) -> usize { self.valid_up_to }
 164 }
 165
 166 /// Converts a slice of bytes to a string slice.
 167 ///
 168 /// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
 169 /// is made of bytes, so this function converts between the two. Not all byte
 170 /// slices are valid string slices, however: `&str` requires that it is valid
 171 /// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
 172 /// then does the conversion.
 173 ///
 174 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
 175 /// incur the overhead of the validity check, there is an unsafe version of
 176 /// this function, [`from_utf8_unchecked()`][fromutf8u], which has the same
 177 /// behavior but skips the check.
 178 ///
 179 /// [fromutf8u]: fn.from_utf8_unchecked.html
 180 ///
 181 /// If you need a `String` instead of a `&str`, consider
 182 /// [`String::from_utf8()`][string].
 183 ///
 184 /// [string]: ../../std/string/struct.String.html#method.from_utf8
 185 ///
 186 /// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
 187 /// it, this function is one way to have a stack-allocated string. There is
 188 /// an example of this in the examples section below.
 189 ///
 190 /// # Errors
 191 ///
 192 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
 193 /// provided slice is not UTF-8.
 194 ///
 195 /// # Examples
 196 ///
 197 /// Basic usage:
 198 ///
 199 /// ```
 200 /// use std::str;
 201 ///
 202 /// // some bytes, in a vector
 203 /// let sparkle_heart = vec![240, 159, 146, 150];
 204 ///
 205 /// // We know these bytes are valid, so just use `unwrap()`.
 206 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 207 ///
 208 /// assert_eq!("💖", sparkle_heart);
 209 /// ```
 210 ///
 211 /// Incorrect bytes:
 212 ///
 213 /// ```
 214 /// use std::str;
 215 ///
 216 /// // some invalid bytes, in a vector
 217 /// let sparkle_heart = vec![0, 159, 146, 150];
 218 ///
 219 /// assert!(str::from_utf8(&sparkle_heart).is_err());
 220 /// ```
 221 ///
 222 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
 223 /// errors that can be returned.
 224 ///
 225 /// [error]: struct.Utf8Error.html
 226 ///
 227 /// A "stack allocated string":
 228 ///
 229 /// ```
 230 /// use std::str;
 231 ///
 232 /// // some bytes, in a stack-allocated array
 233 /// let sparkle_heart = [240, 159, 146, 150];
 234 ///
 235 /// // We know these bytes are valid, so just use `unwrap()`.
 236 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 237 ///
 238 /// assert_eq!("💖", sparkle_heart);
 239 /// ```
 240 #[stable(feature = "rust1", since = "1.0.0")]
 241 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
 242     run_utf8_validation(v)?;
 243     Ok(unsafe { from_utf8_unchecked(v) })
 244 }
 245
 246 /// Forms a str from a pointer and a length.
 247 ///
 248 /// The `len` argument is the number of bytes in the string.
 249 ///
 250 /// # Safety
 251 ///
 252 /// This function is unsafe as there is no guarantee that the given pointer is
 253 /// valid for `len` bytes, nor whether the lifetime inferred is a suitable
 254 /// lifetime for the returned str.
 255 ///
 256 /// The data must be valid UTF-8
 257 ///
 258 /// `p` must be non-null, even for zero-length str.
 259 ///
 260 /// # Caveat
 261 ///
 262 /// The lifetime for the returned str is inferred from its usage. To
 263 /// prevent accidental misuse, it's suggested to tie the lifetime to whichever
 264 /// source lifetime is safe in the context, such as by providing a helper
 265 /// function taking the lifetime of a host value for the str, or by explicit
 266 /// annotation.
 267 /// Performs the same functionality as `from_raw_parts`, except that a mutable
 268 /// str is returned.
 269 ///
 270 unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str {
 271     mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len))
 272 }
 273
 274 /// Converts a slice of bytes to a string slice without checking
 275 /// that the string contains valid UTF-8.
 276 ///
 277 /// See the safe version, [`from_utf8()`][fromutf8], for more information.
 278 ///
 279 /// [fromutf8]: fn.from_utf8.html
 280 ///
 281 /// # Safety
 282 ///
 283 /// This function is unsafe because it does not check that the bytes passed to
 284 /// it are valid UTF-8. If this constraint is violated, undefined behavior
 285 /// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
 286 ///
 287 /// # Examples
 288 ///
 289 /// Basic usage:
 290 ///
 291 /// ```
 292 /// use std::str;
 293 ///
 294 /// // some bytes, in a vector
 295 /// let sparkle_heart = vec![240, 159, 146, 150];
 296 ///
 297 /// let sparkle_heart = unsafe {
 298 ///     str::from_utf8_unchecked(&sparkle_heart)
 299 /// };
 300 ///
 301 /// assert_eq!("💖", sparkle_heart);
 302 /// ```
 303 #[inline(always)]
 304 #[stable(feature = "rust1", since = "1.0.0")]
 305 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
 306     mem::transmute(v)
 307 }
 308
 309 #[stable(feature = "rust1", since = "1.0.0")]
 310 impl fmt::Display for Utf8Error {
 311     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 312         write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
 313     }
 314 }
 315
 316 /*
 317 Section: Iterators
 318 */
 319
 320 /// Iterator for the char (representing *Unicode Scalar Values*) of a string
 321 ///
 322 /// Created with the method [`chars()`].
 323 ///
 324 /// [`chars()`]: ../../std/primitive.str.html#method.chars
 325 #[derive(Clone, Debug)]
 326 #[stable(feature = "rust1", since = "1.0.0")]
 327 pub struct Chars<'a> {
 328     iter: slice::Iter<'a, u8>
 329 }
 330
 331 /// Return the initial codepoint accumulator for the first byte.
 332 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
 333 /// for width 3, and 3 bits for width 4.
 334 #[inline]
 335 fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
 336
 337 /// Return the value of `ch` updated with continuation byte `byte`.
 338 #[inline]
 339 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
 340
 341 /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
 342 /// bits `10`).
 343 #[inline]
 344 fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
 345
 346 #[inline]
 347 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
 348     match opt {
 349         Some(&byte) => byte,
 350         None => 0,
 351     }
 352 }
 353
 354 /// Reads the next code point out of a byte iterator (assuming a
 355 /// UTF-8-like encoding).
 356 #[unstable(feature = "str_internals", issue = "0")]
 357 #[inline]
 358 pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
 359     // Decode UTF-8
 360     let x = match bytes.next() {
 361         None => return None,
 362         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 363         Some(&next_byte) => next_byte,
 364     };
 365
 366     // Multibyte case follows
 367     // Decode from a byte combination out of: [[[x y] z] w]
 368     // NOTE: Performance is sensitive to the exact formulation here
 369     let init = utf8_first_byte(x, 2);
 370     let y = unwrap_or_0(bytes.next());
 371     let mut ch = utf8_acc_cont_byte(init, y);
 372     if x >= 0xE0 {
 373         // [[x y z] w] case
 374         // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
 375         let z = unwrap_or_0(bytes.next());
 376         let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
 377         ch = init << 12 | y_z;
 378         if x >= 0xF0 {
 379             // [x y z w] case
 380             // use only the lower 3 bits of `init`
 381             let w = unwrap_or_0(bytes.next());
 382             ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
 383         }
 384     }
 385
 386     Some(ch)
 387 }
 388
 389 /// Reads the last code point out of a byte iterator (assuming a
 390 /// UTF-8-like encoding).
 391 #[inline]
 392 fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
 393     // Decode UTF-8
 394     let w = match bytes.next_back() {
 395         None => return None,
 396         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 397         Some(&back_byte) => back_byte,
 398     };
 399
 400     // Multibyte case follows
 401     // Decode from a byte combination out of: [x [y [z w]]]
 402     let mut ch;
 403     let z = unwrap_or_0(bytes.next_back());
 404     ch = utf8_first_byte(z, 2);
 405     if utf8_is_cont_byte(z) {
 406         let y = unwrap_or_0(bytes.next_back());
 407         ch = utf8_first_byte(y, 3);
 408         if utf8_is_cont_byte(y) {
 409             let x = unwrap_or_0(bytes.next_back());
 410             ch = utf8_first_byte(x, 4);
 411             ch = utf8_acc_cont_byte(ch, y);
 412         }
 413         ch = utf8_acc_cont_byte(ch, z);
 414     }
 415     ch = utf8_acc_cont_byte(ch, w);
 416
 417     Some(ch)
 418 }
 419
 420 #[stable(feature = "rust1", since = "1.0.0")]
 421 impl<'a> Iterator for Chars<'a> {
 422     type Item = char;
 423
 424     #[inline]
 425     fn next(&mut self) -> Option<char> {
 426         next_code_point(&mut self.iter).map(|ch| {
 427             // str invariant says `ch` is a valid Unicode Scalar Value
 428             unsafe {
 429                 char::from_u32_unchecked(ch)
 430             }
 431         })
 432     }
 433
 434     #[inline]
 435     fn size_hint(&self) -> (usize, Option<usize>) {
 436         let (len, _) = self.iter.size_hint();
 437         // `(len + 3)` can't overflow, because we know that the `slice::Iter`
 438         // belongs to a slice in memory which has a maximum length of
 439         // `isize::MAX` (that's well below `usize::MAX`).
 440         ((len + 3) / 4, Some(len))
 441     }
 442 }
 443
 444 #[stable(feature = "rust1", since = "1.0.0")]
 445 impl<'a> DoubleEndedIterator for Chars<'a> {
 446     #[inline]
 447     fn next_back(&mut self) -> Option<char> {
 448         next_code_point_reverse(&mut self.iter).map(|ch| {
 449             // str invariant says `ch` is a valid Unicode Scalar Value
 450             unsafe {
 451                 char::from_u32_unchecked(ch)
 452             }
 453         })
 454     }
 455 }
 456
 457 impl<'a> Chars<'a> {
 458     /// View the underlying data as a subslice of the original data.
 459     ///
 460     /// This has the same lifetime as the original slice, and so the
 461     /// iterator can continue to be used while this exists.
 462     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 463     #[inline]
 464     pub fn as_str(&self) -> &'a str {
 465         unsafe { from_utf8_unchecked(self.iter.as_slice()) }
 466     }
 467 }
 468
 469 /// Iterator for a string's characters and their byte offsets.
 470 #[derive(Clone, Debug)]
 471 #[stable(feature = "rust1", since = "1.0.0")]
 472 pub struct CharIndices<'a> {
 473     front_offset: usize,
 474     iter: Chars<'a>,
 475 }
 476
 477 #[stable(feature = "rust1", since = "1.0.0")]
 478 impl<'a> Iterator for CharIndices<'a> {
 479     type Item = (usize, char);
 480
 481     #[inline]
 482     fn next(&mut self) -> Option<(usize, char)> {
 483         let (pre_len, _) = self.iter.iter.size_hint();
 484         match self.iter.next() {
 485             None => None,
 486             Some(ch) => {
 487                 let index = self.front_offset;
 488                 let (len, _) = self.iter.iter.size_hint();
 489                 self.front_offset += pre_len - len;
 490                 Some((index, ch))
 491             }
 492         }
 493     }
 494
 495     #[inline]
 496     fn size_hint(&self) -> (usize, Option<usize>) {
 497         self.iter.size_hint()
 498     }
 499 }
 500
 501 #[stable(feature = "rust1", since = "1.0.0")]
 502 impl<'a> DoubleEndedIterator for CharIndices<'a> {
 503     #[inline]
 504     fn next_back(&mut self) -> Option<(usize, char)> {
 505         match self.iter.next_back() {
 506             None => None,
 507             Some(ch) => {
 508                 let (len, _) = self.iter.iter.size_hint();
 509                 let index = self.front_offset + len;
 510                 Some((index, ch))
 511             }
 512         }
 513     }
 514 }
 515
 516 impl<'a> CharIndices<'a> {
 517     /// View the underlying data as a subslice of the original data.
 518     ///
 519     /// This has the same lifetime as the original slice, and so the
 520     /// iterator can continue to be used while this exists.
 521     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 522     #[inline]
 523     pub fn as_str(&self) -> &'a str {
 524         self.iter.as_str()
 525     }
 526 }
 527
 528 /// External iterator for a string's bytes.
 529 /// Use with the `std::iter` module.
 530 ///
 531 /// Created with the method [`bytes()`].
 532 ///
 533 /// [`bytes()`]: ../../std/primitive.str.html#method.bytes
 534 #[stable(feature = "rust1", since = "1.0.0")]
 535 #[derive(Clone, Debug)]
 536 pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
 537
 538 #[stable(feature = "rust1", since = "1.0.0")]
 539 impl<'a> Iterator for Bytes<'a> {
 540     type Item = u8;
 541
 542     #[inline]
 543     fn next(&mut self) -> Option<u8> {
 544         self.0.next()
 545     }
 546
 547     #[inline]
 548     fn size_hint(&self) -> (usize, Option<usize>) {
 549         self.0.size_hint()
 550     }
 551
 552     #[inline]
 553     fn count(self) -> usize {
 554         self.0.count()
 555     }
 556
 557     #[inline]
 558     fn last(self) -> Option<Self::Item> {
 559         self.0.last()
 560     }
 561
 562     #[inline]
 563     fn nth(&mut self, n: usize) -> Option<Self::Item> {
 564         self.0.nth(n)
 565     }
 566 }
 567
 568 #[stable(feature = "rust1", since = "1.0.0")]
 569 impl<'a> DoubleEndedIterator for Bytes<'a> {
 570     #[inline]
 571     fn next_back(&mut self) -> Option<u8> {
 572         self.0.next_back()
 573     }
 574 }
 575
 576 #[stable(feature = "rust1", since = "1.0.0")]
 577 impl<'a> ExactSizeIterator for Bytes<'a> {
 578     #[inline]
 579     fn len(&self) -> usize {
 580         self.0.len()
 581     }
 582 }
 583
 584 /// This macro generates a Clone impl for string pattern API
 585 /// wrapper types of the form X<'a, P>
 586 macro_rules! derive_pattern_clone {
 587     (clone $t:ident with |$s:ident| $e:expr) => {
 588         impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
 589             where P::Searcher: Clone
 590         {
 591             fn clone(&self) -> Self {
 592                 let $s = self;
 593                 $e
 594             }
 595         }
 596     }
 597 }
 598
 599 /// This macro generates two public iterator structs
 600 /// wrapping a private internal one that makes use of the `Pattern` API.
 601 ///
 602 /// For all patterns `P: Pattern<'a>` the following items will be
 603 /// generated (generics omitted):
 604 ///
 605 /// struct $forward_iterator($internal_iterator);
 606 /// struct $reverse_iterator($internal_iterator);
 607 ///
 608 /// impl Iterator for $forward_iterator
 609 /// { /* internal ends up calling Searcher::next_match() */ }
 610 ///
 611 /// impl DoubleEndedIterator for $forward_iterator
 612 ///       where P::Searcher: DoubleEndedSearcher
 613 /// { /* internal ends up calling Searcher::next_match_back() */ }
 614 ///
 615 /// impl Iterator for $reverse_iterator
 616 ///       where P::Searcher: ReverseSearcher
 617 /// { /* internal ends up calling Searcher::next_match_back() */ }
 618 ///
 619 /// impl DoubleEndedIterator for $reverse_iterator
 620 ///       where P::Searcher: DoubleEndedSearcher
 621 /// { /* internal ends up calling Searcher::next_match() */ }
 622 ///
 623 /// The internal one is defined outside the macro, and has almost the same
 624 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
 625 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
 626 ///
 627 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
 628 /// `Pattern` might not return the same elements, so actually implementing
 629 /// `DoubleEndedIterator` for it would be incorrect.
 630 /// (See the docs in `str::pattern` for more details)
 631 ///
 632 /// However, the internal struct still represents a single ended iterator from
 633 /// either end, and depending on pattern is also a valid double ended iterator,
 634 /// so the two wrapper structs implement `Iterator`
 635 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
 636 /// to the complex impls seen above.
 637 macro_rules! generate_pattern_iterators {
 638     {
 639         // Forward iterator
 640         forward:
 641             $(#[$forward_iterator_attribute:meta])*
 642             struct $forward_iterator:ident;
 643
 644         // Reverse iterator
 645         reverse:
 646             $(#[$reverse_iterator_attribute:meta])*
 647             struct $reverse_iterator:ident;
 648
 649         // Stability of all generated items
 650         stability:
 651             $(#[$common_stability_attribute:meta])*
 652
 653         // Internal almost-iterator that is being delegated to
 654         internal:
 655             $internal_iterator:ident yielding ($iterty:ty);
 656
 657         // Kind of delgation - either single ended or double ended
 658         delegate $($t:tt)*
 659     } => {
 660         $(#[$forward_iterator_attribute])*
 661         $(#[$common_stability_attribute])*
 662         pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 663
 664         $(#[$common_stability_attribute])*
 665         impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P>
 666             where P::Searcher: fmt::Debug
 667         {
 668             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 669                 f.debug_tuple(stringify!($forward_iterator))
 670                     .field(&self.0)
 671                     .finish()
 672             }
 673         }
 674
 675         $(#[$common_stability_attribute])*
 676         impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
 677             type Item = $iterty;
 678
 679             #[inline]
 680             fn next(&mut self) -> Option<$iterty> {
 681                 self.0.next()
 682             }
 683         }
 684
 685         $(#[$common_stability_attribute])*
 686         impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
 687             where P::Searcher: Clone
 688         {
 689             fn clone(&self) -> Self {
 690                 $forward_iterator(self.0.clone())
 691             }
 692         }
 693
 694         $(#[$reverse_iterator_attribute])*
 695         $(#[$common_stability_attribute])*
 696         pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 697
 698         $(#[$common_stability_attribute])*
 699         impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P>
 700             where P::Searcher: fmt::Debug
 701         {
 702             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 703                 f.debug_tuple(stringify!($reverse_iterator))
 704                     .field(&self.0)
 705                     .finish()
 706             }
 707         }
 708
 709         $(#[$common_stability_attribute])*
 710         impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
 711             where P::Searcher: ReverseSearcher<'a>
 712         {
 713             type Item = $iterty;
 714
 715             #[inline]
 716             fn next(&mut self) -> Option<$iterty> {
 717                 self.0.next_back()
 718             }
 719         }
 720
 721         $(#[$common_stability_attribute])*
 722         impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
 723             where P::Searcher: Clone
 724         {
 725             fn clone(&self) -> Self {
 726                 $reverse_iterator(self.0.clone())
 727             }
 728         }
 729
 730         generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
 731                                                 $forward_iterator,
 732                                                 $reverse_iterator, $iterty);
 733     };
 734     {
 735         double ended; with $(#[$common_stability_attribute:meta])*,
 736                            $forward_iterator:ident,
 737                            $reverse_iterator:ident, $iterty:ty
 738     } => {
 739         $(#[$common_stability_attribute])*
 740         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
 741             where P::Searcher: DoubleEndedSearcher<'a>
 742         {
 743             #[inline]
 744             fn next_back(&mut self) -> Option<$iterty> {
 745                 self.0.next_back()
 746             }
 747         }
 748
 749         $(#[$common_stability_attribute])*
 750         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
 751             where P::Searcher: DoubleEndedSearcher<'a>
 752         {
 753             #[inline]
 754             fn next_back(&mut self) -> Option<$iterty> {
 755                 self.0.next()
 756             }
 757         }
 758     };
 759     {
 760         single ended; with $(#[$common_stability_attribute:meta])*,
 761                            $forward_iterator:ident,
 762                            $reverse_iterator:ident, $iterty:ty
 763     } => {}
 764 }
 765
 766 derive_pattern_clone!{
 767     clone SplitInternal
 768     with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
 769 }
 770
 771 struct SplitInternal<'a, P: Pattern<'a>> {
 772     start: usize,
 773     end: usize,
 774     matcher: P::Searcher,
 775     allow_trailing_empty: bool,
 776     finished: bool,
 777 }
 778
 779 impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug {
 780     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 781         f.debug_struct("SplitInternal")
 782             .field("start", &self.start)
 783             .field("end", &self.end)
 784             .field("matcher", &self.matcher)
 785             .field("allow_trailing_empty", &self.allow_trailing_empty)
 786             .field("finished", &self.finished)
 787             .finish()
 788     }
 789 }
 790
 791 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
 792     #[inline]
 793     fn get_end(&mut self) -> Option<&'a str> {
 794         if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
 795             self.finished = true;
 796             unsafe {
 797                 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
 798                 Some(string)
 799             }
 800         } else {
 801             None
 802         }
 803     }
 804
 805     #[inline]
 806     fn next(&mut self) -> Option<&'a str> {
 807         if self.finished { return None }
 808
 809         let haystack = self.matcher.haystack();
 810         match self.matcher.next_match() {
 811             Some((a, b)) => unsafe {
 812                 let elt = haystack.slice_unchecked(self.start, a);
 813                 self.start = b;
 814                 Some(elt)
 815             },
 816             None => self.get_end(),
 817         }
 818     }
 819
 820     #[inline]
 821     fn next_back(&mut self) -> Option<&'a str>
 822         where P::Searcher: ReverseSearcher<'a>
 823     {
 824         if self.finished { return None }
 825
 826         if !self.allow_trailing_empty {
 827             self.allow_trailing_empty = true;
 828             match self.next_back() {
 829                 Some(elt) if !elt.is_empty() => return Some(elt),
 830                 _ => if self.finished { return None }
 831             }
 832         }
 833
 834         let haystack = self.matcher.haystack();
 835         match self.matcher.next_match_back() {
 836             Some((a, b)) => unsafe {
 837                 let elt = haystack.slice_unchecked(b, self.end);
 838                 self.end = a;
 839                 Some(elt)
 840             },
 841             None => unsafe {
 842                 self.finished = true;
 843                 Some(haystack.slice_unchecked(self.start, self.end))
 844             },
 845         }
 846     }
 847 }
 848
 849 generate_pattern_iterators! {
 850     forward:
 851         /// Created with the method [`split()`].
 852         ///
 853         /// [`split()`]: ../../std/primitive.str.html#method.split
 854         struct Split;
 855     reverse:
 856         /// Created with the method [`rsplit()`].
 857         ///
 858         /// [`rsplit()`]: ../../std/primitive.str.html#method.rsplit
 859         struct RSplit;
 860     stability:
 861         #[stable(feature = "rust1", since = "1.0.0")]
 862     internal:
 863         SplitInternal yielding (&'a str);
 864     delegate double ended;
 865 }
 866
 867 generate_pattern_iterators! {
 868     forward:
 869         /// Created with the method [`split_terminator()`].
 870         ///
 871         /// [`split_terminator()`]: ../../std/primitive.str.html#method.split_terminator
 872         struct SplitTerminator;
 873     reverse:
 874         /// Created with the method [`rsplit_terminator()`].
 875         ///
 876         /// [`rsplit_terminator()`]: ../../std/primitive.str.html#method.rsplit_terminator
 877         struct RSplitTerminator;
 878     stability:
 879         #[stable(feature = "rust1", since = "1.0.0")]
 880     internal:
 881         SplitInternal yielding (&'a str);
 882     delegate double ended;
 883 }
 884
 885 derive_pattern_clone!{
 886     clone SplitNInternal
 887     with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
 888 }
 889
 890 struct SplitNInternal<'a, P: Pattern<'a>> {
 891     iter: SplitInternal<'a, P>,
 892     /// The number of splits remaining
 893     count: usize,
 894 }
 895
 896 impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug {
 897     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 898         f.debug_struct("SplitNInternal")
 899             .field("iter", &self.iter)
 900             .field("count", &self.count)
 901             .finish()
 902     }
 903 }
 904
 905 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
 906     #[inline]
 907     fn next(&mut self) -> Option<&'a str> {
 908         match self.count {
 909             0 => None,
 910             1 => { self.count = 0; self.iter.get_end() }
 911             _ => { self.count -= 1; self.iter.next() }
 912         }
 913     }
 914
 915     #[inline]
 916     fn next_back(&mut self) -> Option<&'a str>
 917         where P::Searcher: ReverseSearcher<'a>
 918     {
 919         match self.count {
 920             0 => None,
 921             1 => { self.count = 0; self.iter.get_end() }
 922             _ => { self.count -= 1; self.iter.next_back() }
 923         }
 924     }
 925 }
 926
 927 generate_pattern_iterators! {
 928     forward:
 929         /// Created with the method [`splitn()`].
 930         ///
 931         /// [`splitn()`]: ../../std/primitive.str.html#method.splitn
 932         struct SplitN;
 933     reverse:
 934         /// Created with the method [`rsplitn()`].
 935         ///
 936         /// [`rsplitn()`]: ../../std/primitive.str.html#method.rsplitn
 937         struct RSplitN;
 938     stability:
 939         #[stable(feature = "rust1", since = "1.0.0")]
 940     internal:
 941         SplitNInternal yielding (&'a str);
 942     delegate single ended;
 943 }
 944
 945 derive_pattern_clone!{
 946     clone MatchIndicesInternal
 947     with |s| MatchIndicesInternal(s.0.clone())
 948 }
 949
 950 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
 951
 952 impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug {
 953     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 954         f.debug_tuple("MatchIndicesInternal")
 955             .field(&self.0)
 956             .finish()
 957     }
 958 }
 959
 960 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
 961     #[inline]
 962     fn next(&mut self) -> Option<(usize, &'a str)> {
 963         self.0.next_match().map(|(start, end)| unsafe {
 964             (start, self.0.haystack().slice_unchecked(start, end))
 965         })
 966     }
 967
 968     #[inline]
 969     fn next_back(&mut self) -> Option<(usize, &'a str)>
 970         where P::Searcher: ReverseSearcher<'a>
 971     {
 972         self.0.next_match_back().map(|(start, end)| unsafe {
 973             (start, self.0.haystack().slice_unchecked(start, end))
 974         })
 975     }
 976 }
 977
 978 generate_pattern_iterators! {
 979     forward:
 980         /// Created with the method [`match_indices()`].
 981         ///
 982         /// [`match_indices()`]: ../../std/primitive.str.html#method.match_indices
 983         struct MatchIndices;
 984     reverse:
 985         /// Created with the method [`rmatch_indices()`].
 986         ///
 987         /// [`rmatch_indices()`]: ../../std/primitive.str.html#method.rmatch_indices
 988         struct RMatchIndices;
 989     stability:
 990         #[stable(feature = "str_match_indices", since = "1.5.0")]
 991     internal:
 992         MatchIndicesInternal yielding ((usize, &'a str));
 993     delegate double ended;
 994 }
 995
 996 derive_pattern_clone!{
 997     clone MatchesInternal
 998     with |s| MatchesInternal(s.0.clone())
 999 }
1000
1001 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1002
1003 impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug {
1004     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1005         f.debug_tuple("MatchesInternal")
1006             .field(&self.0)
1007             .finish()
1008     }
1009 }
1010
1011 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1012     #[inline]
1013     fn next(&mut self) -> Option<&'a str> {
1014         self.0.next_match().map(|(a, b)| unsafe {
1015             // Indices are known to be on utf8 boundaries
1016             self.0.haystack().slice_unchecked(a, b)
1017         })
1018     }
1019
1020     #[inline]
1021     fn next_back(&mut self) -> Option<&'a str>
1022         where P::Searcher: ReverseSearcher<'a>
1023     {
1024         self.0.next_match_back().map(|(a, b)| unsafe {
1025             // Indices are known to be on utf8 boundaries
1026             self.0.haystack().slice_unchecked(a, b)
1027         })
1028     }
1029 }
1030
1031 generate_pattern_iterators! {
1032     forward:
1033         /// Created with the method [`matches()`].
1034         ///
1035         /// [`matches()`]: ../../std/primitive.str.html#method.matches
1036         struct Matches;
1037     reverse:
1038         /// Created with the method [`rmatches()`].
1039         ///
1040         /// [`rmatches()`]: ../../std/primitive.str.html#method.rmatches
1041         struct RMatches;
1042     stability:
1043         #[stable(feature = "str_matches", since = "1.2.0")]
1044     internal:
1045         MatchesInternal yielding (&'a str);
1046     delegate double ended;
1047 }
1048
1049 /// Created with the method [`lines()`].
1050 ///
1051 /// [`lines()`]: ../../std/primitive.str.html#method.lines
1052 #[stable(feature = "rust1", since = "1.0.0")]
1053 #[derive(Clone, Debug)]
1054 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1055
1056 #[stable(feature = "rust1", since = "1.0.0")]
1057 impl<'a> Iterator for Lines<'a> {
1058     type Item = &'a str;
1059
1060     #[inline]
1061     fn next(&mut self) -> Option<&'a str> {
1062         self.0.next()
1063     }
1064
1065     #[inline]
1066     fn size_hint(&self) -> (usize, Option<usize>) {
1067         self.0.size_hint()
1068     }
1069 }
1070
1071 #[stable(feature = "rust1", since = "1.0.0")]
1072 impl<'a> DoubleEndedIterator for Lines<'a> {
1073     #[inline]
1074     fn next_back(&mut self) -> Option<&'a str> {
1075         self.0.next_back()
1076     }
1077 }
1078
1079 /// Created with the method [`lines_any()`].
1080 ///
1081 /// [`lines_any()`]: ../../std/primitive.str.html#method.lines_any
1082 #[stable(feature = "rust1", since = "1.0.0")]
1083 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1084 #[derive(Clone, Debug)]
1085 #[allow(deprecated)]
1086 pub struct LinesAny<'a>(Lines<'a>);
1087
1088 /// A nameable, cloneable fn type
1089 #[derive(Clone)]
1090 struct LinesAnyMap;
1091
1092 impl<'a> Fn<(&'a str,)> for LinesAnyMap {
1093     #[inline]
1094     extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
1095         let l = line.len();
1096         if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1097         else { line }
1098     }
1099 }
1100
1101 impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
1102     #[inline]
1103     extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
1104         Fn::call(&*self, (line,))
1105     }
1106 }
1107
1108 impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
1109     type Output = &'a str;
1110
1111     #[inline]
1112     extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
1113         Fn::call(&self, (line,))
1114     }
1115 }
1116
1117 #[stable(feature = "rust1", since = "1.0.0")]
1118 #[allow(deprecated)]
1119 impl<'a> Iterator for LinesAny<'a> {
1120     type Item = &'a str;
1121
1122     #[inline]
1123     fn next(&mut self) -> Option<&'a str> {
1124         self.0.next()
1125     }
1126
1127     #[inline]
1128     fn size_hint(&self) -> (usize, Option<usize>) {
1129         self.0.size_hint()
1130     }
1131 }
1132
1133 #[stable(feature = "rust1", since = "1.0.0")]
1134 #[allow(deprecated)]
1135 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1136     #[inline]
1137     fn next_back(&mut self) -> Option<&'a str> {
1138         self.0.next_back()
1139     }
1140 }
1141
1142 /*
1143 Section: Comparing strings
1144 */
1145
1146 /// Bytewise slice equality
1147 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
1148 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
1149 #[lang = "str_eq"]
1150 #[inline]
1151 fn eq_slice(a: &str, b: &str) -> bool {
1152     a.as_bytes() == b.as_bytes()
1153 }
1154
1155 /*
1156 Section: UTF-8 validation
1157 */
1158
1159 // use truncation to fit u64 into usize
1160 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1161
1162 /// Return `true` if any byte in the word `x` is nonascii (>= 128).
1163 #[inline]
1164 fn contains_nonascii(x: usize) -> bool {
1165     (x & NONASCII_MASK) != 0
1166 }
1167
1168 /// Walk through `iter` checking that it's a valid UTF-8 sequence,
1169 /// returning `true` in that case, or, if it is invalid, `false` with
1170 /// `iter` reset such that it is pointing at the first byte in the
1171 /// invalid sequence.
1172 #[inline(always)]
1173 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1174     let mut offset = 0;
1175     let len = v.len();
1176     while offset < len {
1177         let old_offset = offset;
1178         macro_rules! err { () => {{
1179             return Err(Utf8Error {
1180                 valid_up_to: old_offset
1181             })
1182         }}}
1183
1184         macro_rules! next { () => {{
1185             offset += 1;
1186             // we needed data, but there was none: error!
1187             if offset >= len {
1188                 err!()
1189             }
1190             v[offset]
1191         }}}
1192
1193         let first = v[offset];
1194         if first >= 128 {
1195             let w = UTF8_CHAR_WIDTH[first as usize];
1196             let second = next!();
1197             // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
1198             //        first  C2 80        last DF BF
1199             // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
1200             //        first  E0 A0 80     last EF BF BF
1201             //   excluding surrogates codepoints  \u{d800} to  \u{dfff}
1202             //               ED A0 80 to       ED BF BF
1203             // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1204             //        first  F0 90 80 80  last F4 8F BF BF
1205             //
1206             // Use the UTF-8 syntax from the RFC
1207             //
1208             // https://tools.ietf.org/html/rfc3629
1209             // UTF8-1      = %x00-7F
1210             // UTF8-2      = %xC2-DF UTF8-tail
1211             // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1212             //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1213             // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1214             //               %xF4 %x80-8F 2( UTF8-tail )
1215             match w {
1216                 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
1217                 3 => {
1218                     match (first, second, next!() & !CONT_MASK) {
1219                         (0xE0         , 0xA0 ... 0xBF, TAG_CONT_U8) |
1220                         (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) |
1221                         (0xED         , 0x80 ... 0x9F, TAG_CONT_U8) |
1222                         (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {}
1223                         _ => err!()
1224                     }
1225                 }
1226                 4 => {
1227                     match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
1228                         (0xF0         , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1229                         (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1230                         (0xF4         , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
1231                         _ => err!()
1232                     }
1233                 }
1234                 _ => err!()
1235             }
1236             offset += 1;
1237         } else {
1238             // Ascii case, try to skip forward quickly.
1239             // When the pointer is aligned, read 2 words of data per iteration
1240             // until we find a word containing a non-ascii byte.
1241             let usize_bytes = mem::size_of::<usize>();
1242             let bytes_per_iteration = 2 * usize_bytes;
1243             let ptr = v.as_ptr();
1244             let align = (ptr as usize + offset) & (usize_bytes - 1);
1245             if align == 0 {
1246                 if len >= bytes_per_iteration {
1247                     while offset <= len - bytes_per_iteration {
1248                         unsafe {
1249                             let u = *(ptr.offset(offset as isize) as *const usize);
1250                             let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
1251
1252                             // break if there is a nonascii byte
1253                             let zu = contains_nonascii(u);
1254                             let zv = contains_nonascii(v);
1255                             if zu || zv {
1256                                 break;
1257                             }
1258                         }
1259                         offset += bytes_per_iteration;
1260                     }
1261                 }
1262                 // step from the point where the wordwise loop stopped
1263                 while offset < len && v[offset] < 128 {
1264                     offset += 1;
1265                 }
1266             } else {
1267                 offset += 1;
1268             }
1269         }
1270     }
1271
1272     Ok(())
1273 }
1274
1275 // https://tools.ietf.org/html/rfc3629
1276 static UTF8_CHAR_WIDTH: [u8; 256] = [
1277 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1278 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1279 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1280 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1281 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1282 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1283 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1284 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
1285 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1286 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
1287 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1288 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
1289 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1290 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
1291 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
1292 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1293 ];
1294
1295 /// Struct that contains a `char` and the index of the first byte of
1296 /// the next `char` in a string.  This can be used as a data structure
1297 /// for iterating over the UTF-8 bytes of a string.
1298 #[derive(Copy, Clone, Debug)]
1299 #[unstable(feature = "str_char",
1300            reason = "existence of this struct is uncertain as it is frequently \
1301                      able to be replaced with char.len_utf8() and/or \
1302                      char/char_indices iterators",
1303            issue = "27754")]
1304 pub struct CharRange {
1305     /// Current `char`
1306     pub ch: char,
1307     /// Index of the first byte of the next `char`
1308     pub next: usize,
1309 }
1310
1311 /// Mask of the value bits of a continuation byte
1312 const CONT_MASK: u8 = 0b0011_1111;
1313 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
1314 const TAG_CONT_U8: u8 = 0b1000_0000;
1315
1316 /*
1317 Section: Trait implementations
1318 */
1319
1320 mod traits {
1321     use cmp::{Ord, Ordering, PartialEq, PartialOrd, Eq};
1322     use iter::Iterator;
1323     use option::Option;
1324     use option::Option::Some;
1325     use ops;
1326     use str::{StrExt, eq_slice};
1327
1328     #[stable(feature = "rust1", since = "1.0.0")]
1329     impl Ord for str {
1330         #[inline]
1331         fn cmp(&self, other: &str) -> Ordering {
1332             self.as_bytes().cmp(other.as_bytes())
1333         }
1334     }
1335
1336     #[stable(feature = "rust1", since = "1.0.0")]
1337     impl PartialEq for str {
1338         #[inline]
1339         fn eq(&self, other: &str) -> bool {
1340             eq_slice(self, other)
1341         }
1342         #[inline]
1343         fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1344     }
1345
1346     #[stable(feature = "rust1", since = "1.0.0")]
1347     impl Eq for str {}
1348
1349     #[stable(feature = "rust1", since = "1.0.0")]
1350     impl PartialOrd for str {
1351         #[inline]
1352         fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1353             Some(self.cmp(other))
1354         }
1355     }
1356
1357     /// Implements substring slicing with syntax `&self[begin .. end]`.
1358     ///
1359     /// Returns a slice of the given string from the byte range
1360     /// [`begin`..`end`).
1361     ///
1362     /// This operation is `O(1)`.
1363     ///
1364     /// # Panics
1365     ///
1366     /// Panics if `begin` or `end` does not point to the starting
1367     /// byte offset of a character (as defined by `is_char_boundary`).
1368     /// Requires that `begin <= end` and `end <= len` where `len` is the
1369     /// length of the string.
1370     ///
1371     /// # Examples
1372     ///
1373     /// ```
1374     /// let s = "Löwe 老虎 Léopard";
1375     /// assert_eq!(&s[0 .. 1], "L");
1376     ///
1377     /// assert_eq!(&s[1 .. 9], "öwe 老");
1378     ///
1379     /// // these will panic:
1380     /// // byte 2 lies within `ö`:
1381     /// // &s[2 ..3];
1382     ///
1383     /// // byte 8 lies within `老`
1384     /// // &s[1 .. 8];
1385     ///
1386     /// // byte 100 is outside the string
1387     /// // &s[3 .. 100];
1388     /// ```
1389     #[stable(feature = "rust1", since = "1.0.0")]
1390     impl ops::Index<ops::Range<usize>> for str {
1391         type Output = str;
1392         #[inline]
1393         fn index(&self, index: ops::Range<usize>) -> &str {
1394             // is_char_boundary checks that the index is in [0, .len()]
1395             if index.start <= index.end &&
1396                self.is_char_boundary(index.start) &&
1397                self.is_char_boundary(index.end) {
1398                 unsafe { self.slice_unchecked(index.start, index.end) }
1399             } else {
1400                 super::slice_error_fail(self, index.start, index.end)
1401             }
1402         }
1403     }
1404
1405     /// Implements mutable substring slicing with syntax
1406     /// `&mut self[begin .. end]`.
1407     ///
1408     /// Returns a mutable slice of the given string from the byte range
1409     /// [`begin`..`end`).
1410     ///
1411     /// This operation is `O(1)`.
1412     ///
1413     /// # Panics
1414     ///
1415     /// Panics if `begin` or `end` does not point to the starting
1416     /// byte offset of a character (as defined by `is_char_boundary`).
1417     /// Requires that `begin <= end` and `end <= len` where `len` is the
1418     /// length of the string.
1419     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1420     impl ops::IndexMut<ops::Range<usize>> for str {
1421         #[inline]
1422         fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1423             // is_char_boundary checks that the index is in [0, .len()]
1424             if index.start <= index.end &&
1425                self.is_char_boundary(index.start) &&
1426                self.is_char_boundary(index.end) {
1427                 unsafe { self.slice_mut_unchecked(index.start, index.end) }
1428             } else {
1429                 super::slice_error_fail(self, index.start, index.end)
1430             }
1431         }
1432     }
1433
1434     /// Implements substring slicing with syntax `&self[.. end]`.
1435     ///
1436     /// Returns a slice of the string from the beginning to byte offset
1437     /// `end`.
1438     ///
1439     /// Equivalent to `&self[0 .. end]`.
1440     #[stable(feature = "rust1", since = "1.0.0")]
1441     impl ops::Index<ops::RangeTo<usize>> for str {
1442         type Output = str;
1443
1444         #[inline]
1445         fn index(&self, index: ops::RangeTo<usize>) -> &str {
1446             // is_char_boundary checks that the index is in [0, .len()]
1447             if self.is_char_boundary(index.end) {
1448                 unsafe { self.slice_unchecked(0, index.end) }
1449             } else {
1450                 super::slice_error_fail(self, 0, index.end)
1451             }
1452         }
1453     }
1454
1455     /// Implements mutable substring slicing with syntax `&mut self[.. end]`.
1456     ///
1457     /// Returns a mutable slice of the string from the beginning to byte offset
1458     /// `end`.
1459     ///
1460     /// Equivalent to `&mut self[0 .. end]`.
1461     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1462     impl ops::IndexMut<ops::RangeTo<usize>> for str {
1463         #[inline]
1464         fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1465             // is_char_boundary checks that the index is in [0, .len()]
1466             if self.is_char_boundary(index.end) {
1467                 unsafe { self.slice_mut_unchecked(0, index.end) }
1468             } else {
1469                 super::slice_error_fail(self, 0, index.end)
1470             }
1471         }
1472     }
1473
1474     /// Implements substring slicing with syntax `&self[begin ..]`.
1475     ///
1476     /// Returns a slice of the string from byte offset `begin`
1477     /// to the end of the string.
1478     ///
1479     /// Equivalent to `&self[begin .. len]`.
1480     #[stable(feature = "rust1", since = "1.0.0")]
1481     impl ops::Index<ops::RangeFrom<usize>> for str {
1482         type Output = str;
1483
1484         #[inline]
1485         fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1486             // is_char_boundary checks that the index is in [0, .len()]
1487             if self.is_char_boundary(index.start) {
1488                 unsafe { self.slice_unchecked(index.start, self.len()) }
1489             } else {
1490                 super::slice_error_fail(self, index.start, self.len())
1491             }
1492         }
1493     }
1494
1495     /// Implements mutable substring slicing with syntax `&mut self[begin ..]`.
1496     ///
1497     /// Returns a mutable slice of the string from byte offset `begin`
1498     /// to the end of the string.
1499     ///
1500     /// Equivalent to `&mut self[begin .. len]`.
1501     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1502     impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1503         #[inline]
1504         fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1505             // is_char_boundary checks that the index is in [0, .len()]
1506             if self.is_char_boundary(index.start) {
1507                 let len = self.len();
1508                 unsafe { self.slice_mut_unchecked(index.start, len) }
1509             } else {
1510                 super::slice_error_fail(self, index.start, self.len())
1511             }
1512         }
1513     }
1514
1515     /// Implements substring slicing with syntax `&self[..]`.
1516     ///
1517     /// Returns a slice of the whole string. This operation can
1518     /// never panic.
1519     ///
1520     /// Equivalent to `&self[0 .. len]`.
1521     #[stable(feature = "rust1", since = "1.0.0")]
1522     impl ops::Index<ops::RangeFull> for str {
1523         type Output = str;
1524
1525         #[inline]
1526         fn index(&self, _index: ops::RangeFull) -> &str {
1527             self
1528         }
1529     }
1530
1531     /// Implements mutable substring slicing with syntax `&mut self[..]`.
1532     ///
1533     /// Returns a mutable slice of the whole string. This operation can
1534     /// never panic.
1535     ///
1536     /// Equivalent to `&mut self[0 .. len]`.
1537     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1538     impl ops::IndexMut<ops::RangeFull> for str {
1539         #[inline]
1540         fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1541             self
1542         }
1543     }
1544
1545     #[unstable(feature = "inclusive_range",
1546                reason = "recently added, follows RFC",
1547                issue = "28237")]
1548     impl ops::Index<ops::RangeInclusive<usize>> for str {
1549         type Output = str;
1550
1551         #[inline]
1552         fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1553             match index {
1554                 ops::RangeInclusive::Empty { .. } => "",
1555                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1556                     panic!("attempted to index slice up to maximum usize"),
1557                 ops::RangeInclusive::NonEmpty { start, end } =>
1558                     self.index(start .. end+1)
1559             }
1560         }
1561     }
1562     #[unstable(feature = "inclusive_range",
1563                reason = "recently added, follows RFC",
1564                issue = "28237")]
1565     impl ops::Index<ops::RangeToInclusive<usize>> for str {
1566         type Output = str;
1567
1568         #[inline]
1569         fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1570             self.index(0...index.end)
1571         }
1572     }
1573
1574     #[unstable(feature = "inclusive_range",
1575                reason = "recently added, follows RFC",
1576                issue = "28237")]
1577     impl ops::IndexMut<ops::RangeInclusive<usize>> for str {
1578         #[inline]
1579         fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
1580             match index {
1581                 ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work
1582                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1583                     panic!("attempted to index str up to maximum usize"),
1584                     ops::RangeInclusive::NonEmpty { start, end } =>
1585                         self.index_mut(start .. end+1)
1586             }
1587         }
1588     }
1589     #[unstable(feature = "inclusive_range",
1590                reason = "recently added, follows RFC",
1591                issue = "28237")]
1592     impl ops::IndexMut<ops::RangeToInclusive<usize>> for str {
1593         #[inline]
1594         fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
1595             self.index_mut(0...index.end)
1596         }
1597     }
1598 }
1599
1600 /// Methods for string slices
1601 #[allow(missing_docs)]
1602 #[doc(hidden)]
1603 #[unstable(feature = "core_str_ext",
1604            reason = "stable interface provided by `impl str` in later crates",
1605            issue = "32110")]
1606 pub trait StrExt {
1607     // NB there are no docs here are they're all located on the StrExt trait in
1608     // libcollections, not here.
1609
1610     #[stable(feature = "core", since = "1.6.0")]
1611     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1612     #[stable(feature = "core", since = "1.6.0")]
1613     fn chars(&self) -> Chars;
1614     #[stable(feature = "core", since = "1.6.0")]
1615     fn bytes(&self) -> Bytes;
1616     #[stable(feature = "core", since = "1.6.0")]
1617     fn char_indices(&self) -> CharIndices;
1618     #[stable(feature = "core", since = "1.6.0")]
1619     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
1620     #[stable(feature = "core", since = "1.6.0")]
1621     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1622         where P::Searcher: ReverseSearcher<'a>;
1623     #[stable(feature = "core", since = "1.6.0")]
1624     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
1625     #[stable(feature = "core", since = "1.6.0")]
1626     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1627         where P::Searcher: ReverseSearcher<'a>;
1628     #[stable(feature = "core", since = "1.6.0")]
1629     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
1630     #[stable(feature = "core", since = "1.6.0")]
1631     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1632         where P::Searcher: ReverseSearcher<'a>;
1633     #[stable(feature = "core", since = "1.6.0")]
1634     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
1635     #[stable(feature = "core", since = "1.6.0")]
1636     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1637         where P::Searcher: ReverseSearcher<'a>;
1638     #[stable(feature = "core", since = "1.6.0")]
1639     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
1640     #[stable(feature = "core", since = "1.6.0")]
1641     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1642         where P::Searcher: ReverseSearcher<'a>;
1643     #[stable(feature = "core", since = "1.6.0")]
1644     fn lines(&self) -> Lines;
1645     #[stable(feature = "core", since = "1.6.0")]
1646     #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")]
1647     #[allow(deprecated)]
1648     fn lines_any(&self) -> LinesAny;
1649     #[stable(feature = "core", since = "1.6.0")]
1650     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str;
1651     #[stable(feature = "core", since = "1.6.0")]
1652     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str;
1653     #[stable(feature = "core", since = "1.6.0")]
1654     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1655     #[stable(feature = "core", since = "1.6.0")]
1656     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1657         where P::Searcher: ReverseSearcher<'a>;
1658     #[stable(feature = "core", since = "1.6.0")]
1659     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1660         where P::Searcher: DoubleEndedSearcher<'a>;
1661     #[stable(feature = "core", since = "1.6.0")]
1662     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
1663     #[stable(feature = "core", since = "1.6.0")]
1664     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1665         where P::Searcher: ReverseSearcher<'a>;
1666     #[stable(feature = "is_char_boundary", since = "1.9.0")]
1667     fn is_char_boundary(&self, index: usize) -> bool;
1668     #[unstable(feature = "str_char",
1669                reason = "often replaced by char_indices, this method may \
1670                          be removed in favor of just char_at() or eventually \
1671                          removed altogether",
1672                issue = "27754")]
1673     #[rustc_deprecated(reason = "use slicing plus chars() plus len_utf8",
1674                        since = "1.9.0")]
1675     fn char_range_at(&self, start: usize) -> CharRange;
1676     #[unstable(feature = "str_char",
1677                reason = "often replaced by char_indices, this method may \
1678                          be removed in favor of just char_at_reverse() or \
1679                          eventually removed altogether",
1680                issue = "27754")]
1681     #[rustc_deprecated(reason = "use slicing plus chars().rev() plus len_utf8",
1682                        since = "1.9.0")]
1683     fn char_range_at_reverse(&self, start: usize) -> CharRange;
1684     #[unstable(feature = "str_char",
1685                reason = "frequently replaced by the chars() iterator, this \
1686                          method may be removed or possibly renamed in the \
1687                          future; it is normally replaced by chars/char_indices \
1688                          iterators or by getting the first char from a \
1689                          subslice",
1690                issue = "27754")]
1691     #[rustc_deprecated(reason = "use slicing plus chars()",
1692                        since = "1.9.0")]
1693     fn char_at(&self, i: usize) -> char;
1694     #[unstable(feature = "str_char",
1695                reason = "see char_at for more details, but reverse semantics \
1696                          are also somewhat unclear, especially with which \
1697                          cases generate panics",
1698                issue = "27754")]
1699     #[rustc_deprecated(reason = "use slicing plus chars().rev()",
1700                        since = "1.9.0")]
1701     fn char_at_reverse(&self, i: usize) -> char;
1702     #[stable(feature = "core", since = "1.6.0")]
1703     fn as_bytes(&self) -> &[u8];
1704     #[stable(feature = "core", since = "1.6.0")]
1705     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1706     #[stable(feature = "core", since = "1.6.0")]
1707     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1708         where P::Searcher: ReverseSearcher<'a>;
1709     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1710     #[stable(feature = "core", since = "1.6.0")]
1711     fn split_at(&self, mid: usize) -> (&str, &str);
1712     #[stable(feature = "core", since = "1.6.0")]
1713     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
1714     #[unstable(feature = "str_char",
1715                reason = "awaiting conventions about shifting and slices and \
1716                          may not be warranted with the existence of the chars \
1717                          and/or char_indices iterators",
1718                issue = "27754")]
1719     #[rustc_deprecated(reason = "use chars() plus Chars::as_str",
1720                        since = "1.9.0")]
1721     fn slice_shift_char(&self) -> Option<(char, &str)>;
1722     #[stable(feature = "core", since = "1.6.0")]
1723     fn as_ptr(&self) -> *const u8;
1724     #[stable(feature = "core", since = "1.6.0")]
1725     fn len(&self) -> usize;
1726     #[stable(feature = "core", since = "1.6.0")]
1727     fn is_empty(&self) -> bool;
1728     #[stable(feature = "core", since = "1.6.0")]
1729     fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
1730 }
1731
1732 // truncate `&str` to length at most equal to `max`
1733 // return `true` if it were truncated, and the new str.
1734 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
1735     if max >= s.len() {
1736         (false, s)
1737     } else {
1738         while !s.is_char_boundary(max) {
1739             max -= 1;
1740         }
1741         (true, &s[..max])
1742     }
1743 }
1744
1745 #[inline(never)]
1746 #[cold]
1747 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
1748     const MAX_DISPLAY_LENGTH: usize = 256;
1749     let (truncated, s) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
1750     let ellipsis = if truncated { "[...]" } else { "" };
1751
1752     assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}",
1753             begin, end, s, ellipsis);
1754     panic!("index {} and/or {} in `{}`{} do not lie on character boundary",
1755           begin, end, s, ellipsis);
1756 }
1757
1758 #[stable(feature = "core", since = "1.6.0")]
1759 impl StrExt for str {
1760     #[inline]
1761     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1762         pat.is_contained_in(self)
1763     }
1764
1765     #[inline]
1766     fn chars(&self) -> Chars {
1767         Chars{iter: self.as_bytes().iter()}
1768     }
1769
1770     #[inline]
1771     fn bytes(&self) -> Bytes {
1772         Bytes(self.as_bytes().iter().cloned())
1773     }
1774
1775     #[inline]
1776     fn char_indices(&self) -> CharIndices {
1777         CharIndices { front_offset: 0, iter: self.chars() }
1778     }
1779
1780     #[inline]
1781     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1782         Split(SplitInternal {
1783             start: 0,
1784             end: self.len(),
1785             matcher: pat.into_searcher(self),
1786             allow_trailing_empty: true,
1787             finished: false,
1788         })
1789     }
1790
1791     #[inline]
1792     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1793         where P::Searcher: ReverseSearcher<'a>
1794     {
1795         RSplit(self.split(pat).0)
1796     }
1797
1798     #[inline]
1799     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
1800         SplitN(SplitNInternal {
1801             iter: self.split(pat).0,
1802             count: count,
1803         })
1804     }
1805
1806     #[inline]
1807     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1808         where P::Searcher: ReverseSearcher<'a>
1809     {
1810         RSplitN(self.splitn(count, pat).0)
1811     }
1812
1813     #[inline]
1814     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1815         SplitTerminator(SplitInternal {
1816             allow_trailing_empty: false,
1817             ..self.split(pat).0
1818         })
1819     }
1820
1821     #[inline]
1822     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1823         where P::Searcher: ReverseSearcher<'a>
1824     {
1825         RSplitTerminator(self.split_terminator(pat).0)
1826     }
1827
1828     #[inline]
1829     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1830         Matches(MatchesInternal(pat.into_searcher(self)))
1831     }
1832
1833     #[inline]
1834     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1835         where P::Searcher: ReverseSearcher<'a>
1836     {
1837         RMatches(self.matches(pat).0)
1838     }
1839
1840     #[inline]
1841     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1842         MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1843     }
1844
1845     #[inline]
1846     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1847         where P::Searcher: ReverseSearcher<'a>
1848     {
1849         RMatchIndices(self.match_indices(pat).0)
1850     }
1851     #[inline]
1852     fn lines(&self) -> Lines {
1853         Lines(self.split_terminator('\n').map(LinesAnyMap))
1854     }
1855
1856     #[inline]
1857     #[allow(deprecated)]
1858     fn lines_any(&self) -> LinesAny {
1859         LinesAny(self.lines())
1860     }
1861
1862     #[inline]
1863     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
1864         let ptr = self.as_ptr().offset(begin as isize);
1865         let len = end - begin;
1866         from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1867     }
1868
1869     #[inline]
1870     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
1871         let ptr = self.as_ptr().offset(begin as isize);
1872         let len = end - begin;
1873         mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
1874     }
1875
1876     #[inline]
1877     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1878         pat.is_prefix_of(self)
1879     }
1880
1881     #[inline]
1882     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1883         where P::Searcher: ReverseSearcher<'a>
1884     {
1885         pat.is_suffix_of(self)
1886     }
1887
1888     #[inline]
1889     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1890         where P::Searcher: DoubleEndedSearcher<'a>
1891     {
1892         let mut i = 0;
1893         let mut j = 0;
1894         let mut matcher = pat.into_searcher(self);
1895         if let Some((a, b)) = matcher.next_reject() {
1896             i = a;
1897             j = b; // Remember earliest known match, correct it below if
1898                    // last match is different
1899         }
1900         if let Some((_, b)) = matcher.next_reject_back() {
1901             j = b;
1902         }
1903         unsafe {
1904             // Searcher is known to return valid indices
1905             self.slice_unchecked(i, j)
1906         }
1907     }
1908
1909     #[inline]
1910     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1911         let mut i = self.len();
1912         let mut matcher = pat.into_searcher(self);
1913         if let Some((a, _)) = matcher.next_reject() {
1914             i = a;
1915         }
1916         unsafe {
1917             // Searcher is known to return valid indices
1918             self.slice_unchecked(i, self.len())
1919         }
1920     }
1921
1922     #[inline]
1923     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1924         where P::Searcher: ReverseSearcher<'a>
1925     {
1926         let mut j = 0;
1927         let mut matcher = pat.into_searcher(self);
1928         if let Some((_, b)) = matcher.next_reject_back() {
1929             j = b;
1930         }
1931         unsafe {
1932             // Searcher is known to return valid indices
1933             self.slice_unchecked(0, j)
1934         }
1935     }
1936
1937     #[inline]
1938     fn is_char_boundary(&self, index: usize) -> bool {
1939         // 0 and len are always ok.
1940         // Test for 0 explicitly so that it can optimize out the check
1941         // easily and skip reading string data for that case.
1942         if index == 0 || index == self.len() { return true; }
1943         match self.as_bytes().get(index) {
1944             None => false,
1945             Some(&b) => b < 128 || b >= 192,
1946         }
1947     }
1948
1949     #[inline]
1950     fn char_range_at(&self, i: usize) -> CharRange {
1951         let (c, n) = char_range_at_raw(self.as_bytes(), i);
1952         CharRange { ch: unsafe { char::from_u32_unchecked(c) }, next: n }
1953     }
1954
1955     #[inline]
1956     fn char_range_at_reverse(&self, start: usize) -> CharRange {
1957         let mut prev = start;
1958
1959         prev = prev.saturating_sub(1);
1960         if self.as_bytes()[prev] < 128 {
1961             return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
1962         }
1963
1964         // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
1965         fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
1966             // while there is a previous byte == 10......
1967             while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
1968                 i -= 1;
1969             }
1970
1971             let first= s.as_bytes()[i];
1972             let w = UTF8_CHAR_WIDTH[first as usize];
1973             assert!(w != 0);
1974
1975             let mut val = utf8_first_byte(first, w as u32);
1976             val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]);
1977             if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); }
1978             if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); }
1979
1980             CharRange {ch: unsafe { char::from_u32_unchecked(val) }, next: i}
1981         }
1982
1983         multibyte_char_range_at_reverse(self, prev)
1984     }
1985
1986     #[inline]
1987     #[allow(deprecated)]
1988     fn char_at(&self, i: usize) -> char {
1989         self.char_range_at(i).ch
1990     }
1991
1992     #[inline]
1993     #[allow(deprecated)]
1994     fn char_at_reverse(&self, i: usize) -> char {
1995         self.char_range_at_reverse(i).ch
1996     }
1997
1998     #[inline]
1999     fn as_bytes(&self) -> &[u8] {
2000         unsafe { mem::transmute(self) }
2001     }
2002
2003     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2004         pat.into_searcher(self).next_match().map(|(i, _)| i)
2005     }
2006
2007     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
2008         where P::Searcher: ReverseSearcher<'a>
2009     {
2010         pat.into_searcher(self).next_match_back().map(|(i, _)| i)
2011     }
2012
2013     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2014         self.find(pat)
2015     }
2016
2017     #[inline]
2018     fn split_at(&self, mid: usize) -> (&str, &str) {
2019         // is_char_boundary checks that the index is in [0, .len()]
2020         if self.is_char_boundary(mid) {
2021             unsafe {
2022                 (self.slice_unchecked(0, mid),
2023                  self.slice_unchecked(mid, self.len()))
2024             }
2025         } else {
2026             slice_error_fail(self, 0, mid)
2027         }
2028     }
2029
2030     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2031         // is_char_boundary checks that the index is in [0, .len()]
2032         if self.is_char_boundary(mid) {
2033             let len = self.len();
2034             let ptr = self.as_ptr() as *mut u8;
2035             unsafe {
2036                 (from_raw_parts_mut(ptr, mid),
2037                  from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
2038             }
2039         } else {
2040             slice_error_fail(self, 0, mid)
2041         }
2042     }
2043
2044     #[inline]
2045     #[allow(deprecated)]
2046     fn slice_shift_char(&self) -> Option<(char, &str)> {
2047         if self.is_empty() {
2048             None
2049         } else {
2050             let ch = self.char_at(0);
2051             let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) };
2052             Some((ch, next_s))
2053         }
2054     }
2055
2056     #[inline]
2057     fn as_ptr(&self) -> *const u8 {
2058         self as *const str as *const u8
2059     }
2060
2061     #[inline]
2062     fn len(&self) -> usize {
2063         self.as_bytes().len()
2064     }
2065
2066     #[inline]
2067     fn is_empty(&self) -> bool { self.len() == 0 }
2068
2069     #[inline]
2070     fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
2071 }
2072
2073 #[stable(feature = "rust1", since = "1.0.0")]
2074 impl AsRef<[u8]> for str {
2075     #[inline]
2076     fn as_ref(&self) -> &[u8] {
2077         self.as_bytes()
2078     }
2079 }
2080
2081 /// Pluck a code point out of a UTF-8-like byte slice and return the
2082 /// index of the next code point.
2083 #[inline]
2084 fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
2085     if bytes[i] < 128 {
2086         return (bytes[i] as u32, i + 1);
2087     }
2088
2089     // Multibyte case is a fn to allow char_range_at to inline cleanly
2090     fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
2091         let first = bytes[i];
2092         let w = UTF8_CHAR_WIDTH[first as usize];
2093         assert!(w != 0);
2094
2095         let mut val = utf8_first_byte(first, w as u32);
2096         val = utf8_acc_cont_byte(val, bytes[i + 1]);
2097         if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); }
2098         if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); }
2099
2100         (val, i + w as usize)
2101     }
2102
2103     multibyte_char_range_at(bytes, i)
2104 }
2105
2106 #[stable(feature = "rust1", since = "1.0.0")]
2107 impl<'a> Default for &'a str {
2108     fn default() -> &'a str { "" }
2109 }