]> git.proxmox.com Git - rustc.git/blame - src/libcore/str/mod.rs
Imported Upstream version 1.10.0+dfsg1
[rustc.git] / src / libcore / str / mod.rs
CommitLineData
1a4d82fc
JJ
1// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
1a4d82fc
JJ
10
11//! String manipulation
12//!
13//! For more details, see std::str
14
62682a34 15#![stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 16
9346a6ac
AL
17use self::pattern::Pattern;
18use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
1a4d82fc 19
e9174d1e 20use char::{self, CharExt};
85aaf69f 21use clone::Clone;
bd371182 22use convert::AsRef;
1a4d82fc 23use default::Default;
85aaf69f 24use fmt;
1a4d82fc 25use iter::ExactSizeIterator;
e9174d1e
SL
26use iter::{Map, Cloned, Iterator, DoubleEndedIterator};
27use marker::Sized;
1a4d82fc 28use mem;
c34b1796 29use ops::{Fn, FnMut, FnOnce};
1a4d82fc 30use option::Option::{self, None, Some};
1a4d82fc
JJ
31use result::Result::{self, Ok, Err};
32use slice::{self, SliceExt};
1a4d82fc 33
9346a6ac 34pub mod pattern;
1a4d82fc
JJ
35
36/// A trait to abstract the idea of creating a new instance of a type from a
37/// string.
92a42be0
SL
38///
39/// `FromStr`'s [`from_str()`] method is often used implicitly, through
40/// [`str`]'s [`parse()`] method. See [`parse()`]'s documentation for examples.
41///
42/// [`from_str()`]: #tymethod.from_str
54a0048b
SL
43/// [`str`]: ../../std/primitive.str.html
44/// [`parse()`]: ../../std/primitive.str.html#method.parse
85aaf69f 45#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 46pub trait FromStr: Sized {
85aaf69f
SL
47 /// The associated error which can be returned from parsing.
48 #[stable(feature = "rust1", since = "1.0.0")]
49 type Err;
50
d9579d0f
AL
51 /// Parses a string `s` to return a value of this type.
52 ///
53 /// If parsing succeeds, return the value inside `Ok`, otherwise
54 /// when the string is ill-formatted return an error specific to the
55 /// inside `Err`. The error type is specific to implementation of the trait.
b039eaaf
SL
56 ///
57 /// # Examples
58 ///
59 /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
60 ///
54a0048b 61 /// [ithirtytwo]: ../../std/primitive.i32.html
b039eaaf
SL
62 ///
63 /// ```
64 /// use std::str::FromStr;
65 ///
66 /// let s = "5";
67 /// let x = i32::from_str(s).unwrap();
68 ///
69 /// assert_eq!(5, x);
70 /// ```
85aaf69f
SL
71 #[stable(feature = "rust1", since = "1.0.0")]
72 fn from_str(s: &str) -> Result<Self, Self::Err>;
1a4d82fc
JJ
73}
74
85aaf69f 75#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 76impl FromStr for bool {
85aaf69f
SL
77 type Err = ParseBoolError;
78
1a4d82fc
JJ
79 /// Parse a `bool` from a string.
80 ///
c34b1796
AL
81 /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
82 /// actually be parseable.
1a4d82fc
JJ
83 ///
84 /// # Examples
85 ///
c34b1796
AL
86 /// ```
87 /// use std::str::FromStr;
88 ///
89 /// assert_eq!(FromStr::from_str("true"), Ok(true));
90 /// assert_eq!(FromStr::from_str("false"), Ok(false));
91 /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
92 /// ```
93 ///
94 /// Note, in many cases, the `.parse()` method on `str` is more proper.
95 ///
96 /// ```
85aaf69f
SL
97 /// assert_eq!("true".parse(), Ok(true));
98 /// assert_eq!("false".parse(), Ok(false));
99 /// assert!("not even a boolean".parse::<bool>().is_err());
1a4d82fc
JJ
100 /// ```
101 #[inline]
85aaf69f 102 fn from_str(s: &str) -> Result<bool, ParseBoolError> {
1a4d82fc 103 match s {
85aaf69f
SL
104 "true" => Ok(true),
105 "false" => Ok(false),
106 _ => Err(ParseBoolError { _priv: () }),
1a4d82fc
JJ
107 }
108 }
109}
110
85aaf69f
SL
111/// An error returned when parsing a `bool` from a string fails.
112#[derive(Debug, Clone, PartialEq)]
113#[stable(feature = "rust1", since = "1.0.0")]
114pub struct ParseBoolError { _priv: () }
115
116#[stable(feature = "rust1", since = "1.0.0")]
117impl fmt::Display for ParseBoolError {
118 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
119 "provided string was not `true` or `false`".fmt(f)
120 }
121}
122
1a4d82fc
JJ
123/*
124Section: Creating a string
125*/
126
b039eaaf
SL
127/// Errors which can occur when attempting to interpret a sequence of `u8`
128/// as a string.
129///
130/// As such, the `from_utf8` family of functions and methods for both `String`s
131/// and `&str`s make use of this error, for example.
85aaf69f 132#[derive(Copy, Eq, PartialEq, Clone, Debug)]
9346a6ac
AL
133#[stable(feature = "rust1", since = "1.0.0")]
134pub struct Utf8Error {
135 valid_up_to: usize,
136}
137
138impl Utf8Error {
139 /// Returns the index in the given string up to which valid UTF-8 was
140 /// verified.
1a4d82fc 141 ///
b039eaaf
SL
142 /// It is the maximum index such that `from_utf8(input[..index])`
143 /// would return `Some(_)`.
144 ///
145 /// # Examples
146 ///
147 /// Basic usage:
148 ///
149 /// ```
b039eaaf
SL
150 /// use std::str;
151 ///
152 /// // some invalid bytes, in a vector
153 /// let sparkle_heart = vec![0, 159, 146, 150];
154 ///
155 /// // std::str::from_utf8 returns a Utf8Error
156 /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
157 ///
7453a54e 158 /// // the second byte is invalid here
b039eaaf
SL
159 /// assert_eq!(1, error.valid_up_to());
160 /// ```
161 #[stable(feature = "utf8_error", since = "1.5.0")]
9346a6ac 162 pub fn valid_up_to(&self) -> usize { self.valid_up_to }
1a4d82fc
JJ
163}
164
b039eaaf 165/// Converts a slice of bytes to a string slice.
1a4d82fc 166///
b039eaaf
SL
167/// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
168/// is made of bytes, so this function converts between the two. Not all byte
169/// slices are valid string slices, however: `&str` requires that it is valid
170/// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
171/// then does the conversion.
172///
173/// If you are sure that the byte slice is valid UTF-8, and you don't want to
174/// incur the overhead of the validity check, there is an unsafe version of
7453a54e 175/// this function, [`from_utf8_unchecked()`][fromutf8u], which has the same
b039eaaf
SL
176/// behavior but skips the check.
177///
7453a54e 178/// [fromutf8u]: fn.from_utf8_unchecked.html
b039eaaf
SL
179///
180/// If you need a `String` instead of a `&str`, consider
181/// [`String::from_utf8()`][string].
182///
54a0048b 183/// [string]: ../../std/string/struct.String.html#method.from_utf8
b039eaaf
SL
184///
185/// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
186/// it, this function is one way to have a stack-allocated string. There is
187/// an example of this in the examples section below.
1a4d82fc 188///
7453a54e 189/// # Errors
1a4d82fc 190///
e9174d1e
SL
191/// Returns `Err` if the slice is not UTF-8 with a description as to why the
192/// provided slice is not UTF-8.
b039eaaf
SL
193///
194/// # Examples
195///
196/// Basic usage:
197///
198/// ```
199/// use std::str;
200///
201/// // some bytes, in a vector
202/// let sparkle_heart = vec![240, 159, 146, 150];
203///
204/// // We know these bytes are valid, so just use `unwrap()`.
205/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
206///
207/// assert_eq!("💖", sparkle_heart);
208/// ```
209///
210/// Incorrect bytes:
211///
212/// ```
213/// use std::str;
214///
215/// // some invalid bytes, in a vector
216/// let sparkle_heart = vec![0, 159, 146, 150];
217///
218/// assert!(str::from_utf8(&sparkle_heart).is_err());
219/// ```
220///
221/// See the docs for [`Utf8Error`][error] for more details on the kinds of
222/// errors that can be returned.
223///
224/// [error]: struct.Utf8Error.html
225///
226/// A "stack allocated string":
227///
228/// ```
229/// use std::str;
230///
231/// // some bytes, in a stack-allocated array
232/// let sparkle_heart = [240, 159, 146, 150];
233///
234/// // We know these bytes are valid, so just use `unwrap()`.
235/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
236///
237/// assert_eq!("💖", sparkle_heart);
238/// ```
85aaf69f 239#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 240pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
54a0048b 241 run_utf8_validation(v)?;
1a4d82fc
JJ
242 Ok(unsafe { from_utf8_unchecked(v) })
243}
244
7453a54e
SL
245/// Forms a str from a pointer and a length.
246///
247/// The `len` argument is the number of bytes in the string.
248///
249/// # Safety
250///
251/// This function is unsafe as there is no guarantee that the given pointer is
252/// valid for `len` bytes, nor whether the lifetime inferred is a suitable
253/// lifetime for the returned str.
254///
255/// The data must be valid UTF-8
256///
257/// `p` must be non-null, even for zero-length str.
258///
259/// # Caveat
260///
261/// The lifetime for the returned str is inferred from its usage. To
262/// prevent accidental misuse, it's suggested to tie the lifetime to whichever
263/// source lifetime is safe in the context, such as by providing a helper
264/// function taking the lifetime of a host value for the str, or by explicit
265/// annotation.
266/// Performs the same functionality as `from_raw_parts`, except that a mutable
267/// str is returned.
268///
269unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str {
270 mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len))
271}
272
1a4d82fc
JJ
273/// Converts a slice of bytes to a string slice without checking
274/// that the string contains valid UTF-8.
b039eaaf 275///
7453a54e 276/// See the safe version, [`from_utf8()`][fromutf8], for more information.
b039eaaf
SL
277///
278/// [fromutf8]: fn.from_utf8.html
279///
280/// # Safety
281///
282/// This function is unsafe because it does not check that the bytes passed to
283/// it are valid UTF-8. If this constraint is violated, undefined behavior
284/// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
285///
286/// # Examples
287///
288/// Basic usage:
289///
290/// ```
291/// use std::str;
292///
293/// // some bytes, in a vector
294/// let sparkle_heart = vec![240, 159, 146, 150];
295///
296/// let sparkle_heart = unsafe {
297/// str::from_utf8_unchecked(&sparkle_heart)
298/// };
299///
300/// assert_eq!("💖", sparkle_heart);
301/// ```
d9579d0f 302#[inline(always)]
85aaf69f 303#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 304pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
1a4d82fc
JJ
305 mem::transmute(v)
306}
307
85aaf69f
SL
308#[stable(feature = "rust1", since = "1.0.0")]
309impl fmt::Display for Utf8Error {
310 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
9346a6ac 311 write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
85aaf69f
SL
312 }
313}
314
1a4d82fc
JJ
315/*
316Section: Iterators
317*/
318
319/// Iterator for the char (representing *Unicode Scalar Values*) of a string
320///
9cc50fc6
SL
321/// Created with the method [`chars()`].
322///
54a0048b
SL
323/// [`chars()`]: ../../std/primitive.str.html#method.chars
324#[derive(Clone, Debug)]
85aaf69f 325#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
326pub struct Chars<'a> {
327 iter: slice::Iter<'a, u8>
328}
329
c34b1796
AL
330/// Return the initial codepoint accumulator for the first byte.
331/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
332/// for width 3, and 3 bits for width 4.
333#[inline]
334fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
1a4d82fc 335
c34b1796
AL
336/// Return the value of `ch` updated with continuation byte `byte`.
337#[inline]
338fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
1a4d82fc 339
c34b1796
AL
340/// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
341/// bits `10`).
342#[inline]
343fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
1a4d82fc
JJ
344
345#[inline]
346fn unwrap_or_0(opt: Option<&u8>) -> u8 {
347 match opt {
348 Some(&byte) => byte,
349 None => 0,
350 }
351}
352
85aaf69f
SL
353/// Reads the next code point out of a byte iterator (assuming a
354/// UTF-8-like encoding).
e9174d1e 355#[unstable(feature = "str_internals", issue = "0")]
c34b1796 356#[inline]
85aaf69f
SL
357pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
358 // Decode UTF-8
359 let x = match bytes.next() {
360 None => return None,
361 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
362 Some(&next_byte) => next_byte,
363 };
364
365 // Multibyte case follows
366 // Decode from a byte combination out of: [[[x y] z] w]
367 // NOTE: Performance is sensitive to the exact formulation here
c34b1796 368 let init = utf8_first_byte(x, 2);
85aaf69f 369 let y = unwrap_or_0(bytes.next());
c34b1796 370 let mut ch = utf8_acc_cont_byte(init, y);
85aaf69f
SL
371 if x >= 0xE0 {
372 // [[x y z] w] case
373 // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
374 let z = unwrap_or_0(bytes.next());
c34b1796 375 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
85aaf69f
SL
376 ch = init << 12 | y_z;
377 if x >= 0xF0 {
378 // [x y z w] case
379 // use only the lower 3 bits of `init`
380 let w = unwrap_or_0(bytes.next());
c34b1796 381 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
85aaf69f
SL
382 }
383 }
384
385 Some(ch)
386}
387
c34b1796
AL
388/// Reads the last code point out of a byte iterator (assuming a
389/// UTF-8-like encoding).
c34b1796 390#[inline]
62682a34 391fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
c34b1796
AL
392 // Decode UTF-8
393 let w = match bytes.next_back() {
394 None => return None,
395 Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
396 Some(&back_byte) => back_byte,
397 };
398
399 // Multibyte case follows
400 // Decode from a byte combination out of: [x [y [z w]]]
401 let mut ch;
402 let z = unwrap_or_0(bytes.next_back());
403 ch = utf8_first_byte(z, 2);
404 if utf8_is_cont_byte(z) {
405 let y = unwrap_or_0(bytes.next_back());
406 ch = utf8_first_byte(y, 3);
407 if utf8_is_cont_byte(y) {
408 let x = unwrap_or_0(bytes.next_back());
409 ch = utf8_first_byte(x, 4);
410 ch = utf8_acc_cont_byte(ch, y);
411 }
412 ch = utf8_acc_cont_byte(ch, z);
413 }
414 ch = utf8_acc_cont_byte(ch, w);
415
416 Some(ch)
417}
418
85aaf69f 419#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
420impl<'a> Iterator for Chars<'a> {
421 type Item = char;
422
423 #[inline]
424 fn next(&mut self) -> Option<char> {
85aaf69f
SL
425 next_code_point(&mut self.iter).map(|ch| {
426 // str invariant says `ch` is a valid Unicode Scalar Value
427 unsafe {
e9174d1e 428 char::from_u32_unchecked(ch)
1a4d82fc 429 }
85aaf69f 430 })
1a4d82fc
JJ
431 }
432
433 #[inline]
85aaf69f 434 fn size_hint(&self) -> (usize, Option<usize>) {
1a4d82fc 435 let (len, _) = self.iter.size_hint();
c34b1796
AL
436 // `(len + 3)` can't overflow, because we know that the `slice::Iter`
437 // belongs to a slice in memory which has a maximum length of
438 // `isize::MAX` (that's well below `usize::MAX`).
439 ((len + 3) / 4, Some(len))
1a4d82fc
JJ
440 }
441}
442
85aaf69f 443#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
444impl<'a> DoubleEndedIterator for Chars<'a> {
445 #[inline]
446 fn next_back(&mut self) -> Option<char> {
c34b1796
AL
447 next_code_point_reverse(&mut self.iter).map(|ch| {
448 // str invariant says `ch` is a valid Unicode Scalar Value
449 unsafe {
e9174d1e 450 char::from_u32_unchecked(ch)
1a4d82fc 451 }
c34b1796 452 })
1a4d82fc
JJ
453 }
454}
455
e9174d1e
SL
456impl<'a> Chars<'a> {
457 /// View the underlying data as a subslice of the original data.
458 ///
459 /// This has the same lifetime as the original slice, and so the
460 /// iterator can continue to be used while this exists.
461 #[stable(feature = "iter_to_slice", since = "1.4.0")]
462 #[inline]
463 pub fn as_str(&self) -> &'a str {
464 unsafe { from_utf8_unchecked(self.iter.as_slice()) }
465 }
466}
467
9346a6ac 468/// Iterator for a string's characters and their byte offsets.
54a0048b 469#[derive(Clone, Debug)]
85aaf69f 470#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 471pub struct CharIndices<'a> {
85aaf69f 472 front_offset: usize,
1a4d82fc
JJ
473 iter: Chars<'a>,
474}
475
85aaf69f 476#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 477impl<'a> Iterator for CharIndices<'a> {
85aaf69f 478 type Item = (usize, char);
1a4d82fc
JJ
479
480 #[inline]
85aaf69f 481 fn next(&mut self) -> Option<(usize, char)> {
1a4d82fc
JJ
482 let (pre_len, _) = self.iter.iter.size_hint();
483 match self.iter.next() {
484 None => None,
485 Some(ch) => {
486 let index = self.front_offset;
487 let (len, _) = self.iter.iter.size_hint();
488 self.front_offset += pre_len - len;
489 Some((index, ch))
490 }
491 }
492 }
493
494 #[inline]
85aaf69f 495 fn size_hint(&self) -> (usize, Option<usize>) {
1a4d82fc
JJ
496 self.iter.size_hint()
497 }
498}
499
85aaf69f 500#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
501impl<'a> DoubleEndedIterator for CharIndices<'a> {
502 #[inline]
85aaf69f 503 fn next_back(&mut self) -> Option<(usize, char)> {
1a4d82fc
JJ
504 match self.iter.next_back() {
505 None => None,
506 Some(ch) => {
507 let (len, _) = self.iter.iter.size_hint();
508 let index = self.front_offset + len;
509 Some((index, ch))
510 }
511 }
512 }
513}
514
e9174d1e
SL
515impl<'a> CharIndices<'a> {
516 /// View the underlying data as a subslice of the original data.
517 ///
518 /// This has the same lifetime as the original slice, and so the
519 /// iterator can continue to be used while this exists.
520 #[stable(feature = "iter_to_slice", since = "1.4.0")]
521 #[inline]
522 pub fn as_str(&self) -> &'a str {
523 self.iter.as_str()
524 }
525}
526
1a4d82fc
JJ
527/// External iterator for a string's bytes.
528/// Use with the `std::iter` module.
529///
9cc50fc6
SL
530/// Created with the method [`bytes()`].
531///
54a0048b 532/// [`bytes()`]: ../../std/primitive.str.html#method.bytes
85aaf69f 533#[stable(feature = "rust1", since = "1.0.0")]
54a0048b 534#[derive(Clone, Debug)]
e9174d1e 535pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
1a4d82fc 536
e9174d1e
SL
537#[stable(feature = "rust1", since = "1.0.0")]
538impl<'a> Iterator for Bytes<'a> {
539 type Item = u8;
1a4d82fc 540
1a4d82fc 541 #[inline]
e9174d1e
SL
542 fn next(&mut self) -> Option<u8> {
543 self.0.next()
1a4d82fc 544 }
1a4d82fc 545
c34b1796 546 #[inline]
e9174d1e
SL
547 fn size_hint(&self) -> (usize, Option<usize>) {
548 self.0.size_hint()
c34b1796 549 }
c34b1796
AL
550
551 #[inline]
e9174d1e
SL
552 fn count(self) -> usize {
553 self.0.count()
c34b1796 554 }
9346a6ac
AL
555
556 #[inline]
e9174d1e
SL
557 fn last(self) -> Option<Self::Item> {
558 self.0.last()
9346a6ac
AL
559 }
560
561 #[inline]
e9174d1e
SL
562 fn nth(&mut self, n: usize) -> Option<Self::Item> {
563 self.0.nth(n)
9346a6ac 564 }
1a4d82fc
JJ
565}
566
9346a6ac
AL
567#[stable(feature = "rust1", since = "1.0.0")]
568impl<'a> DoubleEndedIterator for Bytes<'a> {
569 #[inline]
570 fn next_back(&mut self) -> Option<u8> {
571 self.0.next_back()
572 }
c34b1796
AL
573}
574
9346a6ac
AL
575#[stable(feature = "rust1", since = "1.0.0")]
576impl<'a> ExactSizeIterator for Bytes<'a> {
577 #[inline]
578 fn len(&self) -> usize {
579 self.0.len()
580 }
c34b1796
AL
581}
582
9346a6ac
AL
583/// This macro generates a Clone impl for string pattern API
584/// wrapper types of the form X<'a, P>
585macro_rules! derive_pattern_clone {
586 (clone $t:ident with |$s:ident| $e:expr) => {
587 impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
588 where P::Searcher: Clone
589 {
590 fn clone(&self) -> Self {
591 let $s = self;
592 $e
593 }
594 }
595 }
1a4d82fc
JJ
596}
597
9346a6ac 598/// This macro generates two public iterator structs
b039eaaf 599/// wrapping a private internal one that makes use of the `Pattern` API.
9346a6ac
AL
600///
601/// For all patterns `P: Pattern<'a>` the following items will be
d9579d0f 602/// generated (generics omitted):
9346a6ac
AL
603///
604/// struct $forward_iterator($internal_iterator);
605/// struct $reverse_iterator($internal_iterator);
606///
607/// impl Iterator for $forward_iterator
608/// { /* internal ends up calling Searcher::next_match() */ }
609///
610/// impl DoubleEndedIterator for $forward_iterator
611/// where P::Searcher: DoubleEndedSearcher
612/// { /* internal ends up calling Searcher::next_match_back() */ }
613///
614/// impl Iterator for $reverse_iterator
615/// where P::Searcher: ReverseSearcher
616/// { /* internal ends up calling Searcher::next_match_back() */ }
617///
618/// impl DoubleEndedIterator for $reverse_iterator
619/// where P::Searcher: DoubleEndedSearcher
620/// { /* internal ends up calling Searcher::next_match() */ }
621///
622/// The internal one is defined outside the macro, and has almost the same
623/// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
624/// `pattern::ReverseSearcher` for both forward and reverse iteration.
625///
626/// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
627/// `Pattern` might not return the same elements, so actually implementing
628/// `DoubleEndedIterator` for it would be incorrect.
629/// (See the docs in `str::pattern` for more details)
630///
631/// However, the internal struct still represents a single ended iterator from
632/// either end, and depending on pattern is also a valid double ended iterator,
633/// so the two wrapper structs implement `Iterator`
634/// and `DoubleEndedIterator` depending on the concrete pattern type, leading
635/// to the complex impls seen above.
636macro_rules! generate_pattern_iterators {
637 {
638 // Forward iterator
639 forward:
640 $(#[$forward_iterator_attribute:meta])*
641 struct $forward_iterator:ident;
642
643 // Reverse iterator
644 reverse:
645 $(#[$reverse_iterator_attribute:meta])*
646 struct $reverse_iterator:ident;
647
648 // Stability of all generated items
649 stability:
650 $(#[$common_stability_attribute:meta])*
651
652 // Internal almost-iterator that is being delegated to
653 internal:
654 $internal_iterator:ident yielding ($iterty:ty);
655
656 // Kind of delgation - either single ended or double ended
657 delegate $($t:tt)*
658 } => {
659 $(#[$forward_iterator_attribute])*
660 $(#[$common_stability_attribute])*
661 pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
662
54a0048b
SL
663 $(#[$common_stability_attribute])*
664 impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P>
665 where P::Searcher: fmt::Debug
666 {
667 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
668 f.debug_tuple(stringify!($forward_iterator))
669 .field(&self.0)
670 .finish()
671 }
672 }
673
9346a6ac
AL
674 $(#[$common_stability_attribute])*
675 impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
676 type Item = $iterty;
677
678 #[inline]
679 fn next(&mut self) -> Option<$iterty> {
680 self.0.next()
681 }
682 }
683
684 $(#[$common_stability_attribute])*
685 impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
686 where P::Searcher: Clone
687 {
688 fn clone(&self) -> Self {
689 $forward_iterator(self.0.clone())
690 }
691 }
692
693 $(#[$reverse_iterator_attribute])*
694 $(#[$common_stability_attribute])*
695 pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
696
54a0048b
SL
697 $(#[$common_stability_attribute])*
698 impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P>
699 where P::Searcher: fmt::Debug
700 {
701 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
702 f.debug_tuple(stringify!($reverse_iterator))
703 .field(&self.0)
704 .finish()
705 }
706 }
707
9346a6ac
AL
708 $(#[$common_stability_attribute])*
709 impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
710 where P::Searcher: ReverseSearcher<'a>
711 {
712 type Item = $iterty;
713
714 #[inline]
715 fn next(&mut self) -> Option<$iterty> {
716 self.0.next_back()
717 }
718 }
719
720 $(#[$common_stability_attribute])*
721 impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
722 where P::Searcher: Clone
723 {
724 fn clone(&self) -> Self {
725 $reverse_iterator(self.0.clone())
726 }
727 }
728
729 generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
730 $forward_iterator,
731 $reverse_iterator, $iterty);
732 };
733 {
734 double ended; with $(#[$common_stability_attribute:meta])*,
735 $forward_iterator:ident,
736 $reverse_iterator:ident, $iterty:ty
737 } => {
738 $(#[$common_stability_attribute])*
739 impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
740 where P::Searcher: DoubleEndedSearcher<'a>
741 {
742 #[inline]
743 fn next_back(&mut self) -> Option<$iterty> {
744 self.0.next_back()
745 }
746 }
747
748 $(#[$common_stability_attribute])*
749 impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
750 where P::Searcher: DoubleEndedSearcher<'a>
751 {
752 #[inline]
753 fn next_back(&mut self) -> Option<$iterty> {
754 self.0.next()
755 }
756 }
757 };
758 {
759 single ended; with $(#[$common_stability_attribute:meta])*,
760 $forward_iterator:ident,
761 $reverse_iterator:ident, $iterty:ty
762 } => {}
1a4d82fc
JJ
763}
764
9346a6ac
AL
765derive_pattern_clone!{
766 clone SplitInternal
767 with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
768}
54a0048b 769
9346a6ac
AL
770struct SplitInternal<'a, P: Pattern<'a>> {
771 start: usize,
772 end: usize,
773 matcher: P::Searcher,
774 allow_trailing_empty: bool,
775 finished: bool,
1a4d82fc
JJ
776}
777
54a0048b
SL
778impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug {
779 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
780 f.debug_struct("SplitInternal")
781 .field("start", &self.start)
782 .field("end", &self.end)
783 .field("matcher", &self.matcher)
784 .field("allow_trailing_empty", &self.allow_trailing_empty)
785 .field("finished", &self.finished)
786 .finish()
787 }
788}
789
9346a6ac 790impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
1a4d82fc
JJ
791 #[inline]
792 fn get_end(&mut self) -> Option<&'a str> {
c34b1796 793 if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
1a4d82fc 794 self.finished = true;
c34b1796
AL
795 unsafe {
796 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
797 Some(string)
798 }
1a4d82fc
JJ
799 } else {
800 None
801 }
802 }
1a4d82fc
JJ
803
804 #[inline]
805 fn next(&mut self) -> Option<&'a str> {
806 if self.finished { return None }
807
c34b1796
AL
808 let haystack = self.matcher.haystack();
809 match self.matcher.next_match() {
1a4d82fc 810 Some((a, b)) => unsafe {
c34b1796
AL
811 let elt = haystack.slice_unchecked(self.start, a);
812 self.start = b;
1a4d82fc
JJ
813 Some(elt)
814 },
815 None => self.get_end(),
816 }
817 }
1a4d82fc 818
1a4d82fc 819 #[inline]
9346a6ac
AL
820 fn next_back(&mut self) -> Option<&'a str>
821 where P::Searcher: ReverseSearcher<'a>
822 {
1a4d82fc
JJ
823 if self.finished { return None }
824
825 if !self.allow_trailing_empty {
826 self.allow_trailing_empty = true;
827 match self.next_back() {
828 Some(elt) if !elt.is_empty() => return Some(elt),
829 _ => if self.finished { return None }
830 }
831 }
c34b1796
AL
832
833 let haystack = self.matcher.haystack();
834 match self.matcher.next_match_back() {
1a4d82fc 835 Some((a, b)) => unsafe {
c34b1796
AL
836 let elt = haystack.slice_unchecked(b, self.end);
837 self.end = a;
1a4d82fc
JJ
838 Some(elt)
839 },
c34b1796
AL
840 None => unsafe {
841 self.finished = true;
842 Some(haystack.slice_unchecked(self.start, self.end))
843 },
1a4d82fc
JJ
844 }
845 }
846}
847
9346a6ac
AL
848generate_pattern_iterators! {
849 forward:
9cc50fc6
SL
850 /// Created with the method [`split()`].
851 ///
54a0048b 852 /// [`split()`]: ../../std/primitive.str.html#method.split
9346a6ac
AL
853 struct Split;
854 reverse:
9cc50fc6
SL
855 /// Created with the method [`rsplit()`].
856 ///
54a0048b 857 /// [`rsplit()`]: ../../std/primitive.str.html#method.rsplit
9346a6ac
AL
858 struct RSplit;
859 stability:
860 #[stable(feature = "rust1", since = "1.0.0")]
861 internal:
862 SplitInternal yielding (&'a str);
863 delegate double ended;
864}
865
866generate_pattern_iterators! {
867 forward:
9cc50fc6
SL
868 /// Created with the method [`split_terminator()`].
869 ///
54a0048b 870 /// [`split_terminator()`]: ../../std/primitive.str.html#method.split_terminator
9346a6ac
AL
871 struct SplitTerminator;
872 reverse:
9cc50fc6
SL
873 /// Created with the method [`rsplit_terminator()`].
874 ///
54a0048b 875 /// [`rsplit_terminator()`]: ../../std/primitive.str.html#method.rsplit_terminator
9346a6ac
AL
876 struct RSplitTerminator;
877 stability:
878 #[stable(feature = "rust1", since = "1.0.0")]
879 internal:
880 SplitInternal yielding (&'a str);
881 delegate double ended;
882}
1a4d82fc 883
9346a6ac
AL
884derive_pattern_clone!{
885 clone SplitNInternal
886 with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
887}
54a0048b 888
9346a6ac
AL
889struct SplitNInternal<'a, P: Pattern<'a>> {
890 iter: SplitInternal<'a, P>,
891 /// The number of splits remaining
892 count: usize,
893}
894
54a0048b
SL
895impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug {
896 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
897 f.debug_struct("SplitNInternal")
898 .field("iter", &self.iter)
899 .field("count", &self.count)
900 .finish()
901 }
902}
903
9346a6ac 904impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1a4d82fc
JJ
905 #[inline]
906 fn next(&mut self) -> Option<&'a str> {
c34b1796
AL
907 match self.count {
908 0 => None,
909 1 => { self.count = 0; self.iter.get_end() }
910 _ => { self.count -= 1; self.iter.next() }
1a4d82fc
JJ
911 }
912 }
1a4d82fc 913
c34b1796 914 #[inline]
9346a6ac
AL
915 fn next_back(&mut self) -> Option<&'a str>
916 where P::Searcher: ReverseSearcher<'a>
917 {
918 match self.count {
919 0 => None,
920 1 => { self.count = 0; self.iter.get_end() }
921 _ => { self.count -= 1; self.iter.next_back() }
c34b1796
AL
922 }
923 }
1a4d82fc
JJ
924}
925
9346a6ac
AL
926generate_pattern_iterators! {
927 forward:
9cc50fc6
SL
928 /// Created with the method [`splitn()`].
929 ///
54a0048b 930 /// [`splitn()`]: ../../std/primitive.str.html#method.splitn
9346a6ac
AL
931 struct SplitN;
932 reverse:
9cc50fc6
SL
933 /// Created with the method [`rsplitn()`].
934 ///
54a0048b 935 /// [`rsplitn()`]: ../../std/primitive.str.html#method.rsplitn
9346a6ac
AL
936 struct RSplitN;
937 stability:
938 #[stable(feature = "rust1", since = "1.0.0")]
939 internal:
940 SplitNInternal yielding (&'a str);
941 delegate single ended;
942}
943
944derive_pattern_clone!{
945 clone MatchIndicesInternal
946 with |s| MatchIndicesInternal(s.0.clone())
947}
54a0048b 948
9346a6ac
AL
949struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
950
54a0048b
SL
951impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug {
952 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
953 f.debug_tuple("MatchIndicesInternal")
954 .field(&self.0)
955 .finish()
956 }
957}
958
9346a6ac
AL
959impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
960 #[inline]
b039eaaf
SL
961 fn next(&mut self) -> Option<(usize, &'a str)> {
962 self.0.next_match().map(|(start, end)| unsafe {
963 (start, self.0.haystack().slice_unchecked(start, end))
964 })
9346a6ac
AL
965 }
966
967 #[inline]
b039eaaf 968 fn next_back(&mut self) -> Option<(usize, &'a str)>
9346a6ac
AL
969 where P::Searcher: ReverseSearcher<'a>
970 {
b039eaaf
SL
971 self.0.next_match_back().map(|(start, end)| unsafe {
972 (start, self.0.haystack().slice_unchecked(start, end))
973 })
9346a6ac
AL
974 }
975}
976
977generate_pattern_iterators! {
978 forward:
9cc50fc6
SL
979 /// Created with the method [`match_indices()`].
980 ///
54a0048b 981 /// [`match_indices()`]: ../../std/primitive.str.html#method.match_indices
9346a6ac
AL
982 struct MatchIndices;
983 reverse:
9cc50fc6
SL
984 /// Created with the method [`rmatch_indices()`].
985 ///
54a0048b 986 /// [`rmatch_indices()`]: ../../std/primitive.str.html#method.rmatch_indices
9346a6ac
AL
987 struct RMatchIndices;
988 stability:
b039eaaf 989 #[stable(feature = "str_match_indices", since = "1.5.0")]
9346a6ac 990 internal:
b039eaaf 991 MatchIndicesInternal yielding ((usize, &'a str));
9346a6ac
AL
992 delegate double ended;
993}
994
995derive_pattern_clone!{
996 clone MatchesInternal
997 with |s| MatchesInternal(s.0.clone())
998}
54a0048b 999
9346a6ac
AL
1000struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1001
54a0048b
SL
1002impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug {
1003 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1004 f.debug_tuple("MatchesInternal")
1005 .field(&self.0)
1006 .finish()
1007 }
1008}
1009
9346a6ac
AL
1010impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1011 #[inline]
1012 fn next(&mut self) -> Option<&'a str> {
1013 self.0.next_match().map(|(a, b)| unsafe {
1014 // Indices are known to be on utf8 boundaries
1015 self.0.haystack().slice_unchecked(a, b)
1016 })
1017 }
1018
1019 #[inline]
1020 fn next_back(&mut self) -> Option<&'a str>
1021 where P::Searcher: ReverseSearcher<'a>
1022 {
1023 self.0.next_match_back().map(|(a, b)| unsafe {
1024 // Indices are known to be on utf8 boundaries
1025 self.0.haystack().slice_unchecked(a, b)
1026 })
1027 }
1028}
1029
1030generate_pattern_iterators! {
1031 forward:
9cc50fc6
SL
1032 /// Created with the method [`matches()`].
1033 ///
54a0048b 1034 /// [`matches()`]: ../../std/primitive.str.html#method.matches
9346a6ac
AL
1035 struct Matches;
1036 reverse:
9cc50fc6
SL
1037 /// Created with the method [`rmatches()`].
1038 ///
54a0048b 1039 /// [`rmatches()`]: ../../std/primitive.str.html#method.rmatches
9346a6ac
AL
1040 struct RMatches;
1041 stability:
62682a34 1042 #[stable(feature = "str_matches", since = "1.2.0")]
9346a6ac
AL
1043 internal:
1044 MatchesInternal yielding (&'a str);
1045 delegate double ended;
1046}
1047
9cc50fc6
SL
1048/// Created with the method [`lines()`].
1049///
54a0048b 1050/// [`lines()`]: ../../std/primitive.str.html#method.lines
c34b1796 1051#[stable(feature = "rust1", since = "1.0.0")]
54a0048b 1052#[derive(Clone, Debug)]
e9174d1e 1053pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
9346a6ac
AL
1054
1055#[stable(feature = "rust1", since = "1.0.0")]
1056impl<'a> Iterator for Lines<'a> {
c34b1796
AL
1057 type Item = &'a str;
1058
1059 #[inline]
1060 fn next(&mut self) -> Option<&'a str> {
9346a6ac
AL
1061 self.0.next()
1062 }
c34b1796 1063
9346a6ac
AL
1064 #[inline]
1065 fn size_hint(&self) -> (usize, Option<usize>) {
1066 self.0.size_hint()
1067 }
1068}
1069
1070#[stable(feature = "rust1", since = "1.0.0")]
1071impl<'a> DoubleEndedIterator for Lines<'a> {
1072 #[inline]
1073 fn next_back(&mut self) -> Option<&'a str> {
1074 self.0.next_back()
1075 }
1076}
1077
9cc50fc6
SL
1078/// Created with the method [`lines_any()`].
1079///
54a0048b 1080/// [`lines_any()`]: ../../std/primitive.str.html#method.lines_any
9346a6ac 1081#[stable(feature = "rust1", since = "1.0.0")]
92a42be0 1082#[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
54a0048b 1083#[derive(Clone, Debug)]
e9174d1e
SL
1084#[allow(deprecated)]
1085pub struct LinesAny<'a>(Lines<'a>);
9346a6ac 1086
b039eaaf 1087/// A nameable, cloneable fn type
9346a6ac
AL
1088#[derive(Clone)]
1089struct LinesAnyMap;
1090
1091impl<'a> Fn<(&'a str,)> for LinesAnyMap {
1092 #[inline]
1093 extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
1094 let l = line.len();
1095 if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1096 else { line }
1097 }
1098}
1099
1100impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
1101 #[inline]
1102 extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
1103 Fn::call(&*self, (line,))
1104 }
1105}
1106
1107impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
1108 type Output = &'a str;
1109
1110 #[inline]
1111 extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
1112 Fn::call(&self, (line,))
1a4d82fc 1113 }
c34b1796 1114}
1a4d82fc 1115
c34b1796 1116#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 1117#[allow(deprecated)]
9346a6ac 1118impl<'a> Iterator for LinesAny<'a> {
c34b1796
AL
1119 type Item = &'a str;
1120
1121 #[inline]
1122 fn next(&mut self) -> Option<&'a str> {
9346a6ac
AL
1123 self.0.next()
1124 }
1125
1126 #[inline]
1127 fn size_hint(&self) -> (usize, Option<usize>) {
1128 self.0.size_hint()
1129 }
1130}
1131
1132#[stable(feature = "rust1", since = "1.0.0")]
e9174d1e 1133#[allow(deprecated)]
9346a6ac
AL
1134impl<'a> DoubleEndedIterator for LinesAny<'a> {
1135 #[inline]
1136 fn next_back(&mut self) -> Option<&'a str> {
1137 self.0.next_back()
1a4d82fc
JJ
1138 }
1139}
1140
1a4d82fc
JJ
1141/*
1142Section: Comparing strings
1143*/
1144
c1a9b12d 1145/// Bytewise slice equality
1a4d82fc
JJ
1146/// NOTE: This function is (ab)used in rustc::middle::trans::_match
1147/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
c1a9b12d 1148#[lang = "str_eq"]
1a4d82fc 1149#[inline]
c1a9b12d 1150fn eq_slice(a: &str, b: &str) -> bool {
54a0048b 1151 a.as_bytes() == b.as_bytes()
1a4d82fc
JJ
1152}
1153
1a4d82fc 1154/*
9cc50fc6 1155Section: UTF-8 validation
1a4d82fc
JJ
1156*/
1157
9cc50fc6
SL
1158// use truncation to fit u64 into usize
1159const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1160
1161/// Return `true` if any byte in the word `x` is nonascii (>= 128).
1162#[inline]
1163fn contains_nonascii(x: usize) -> bool {
1164 (x & NONASCII_MASK) != 0
1165}
1166
1a4d82fc
JJ
1167/// Walk through `iter` checking that it's a valid UTF-8 sequence,
1168/// returning `true` in that case, or, if it is invalid, `false` with
1169/// `iter` reset such that it is pointing at the first byte in the
1170/// invalid sequence.
1171#[inline(always)]
9cc50fc6
SL
1172fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1173 let mut offset = 0;
1174 let len = v.len();
1175 while offset < len {
1176 let old_offset = offset;
1a4d82fc 1177 macro_rules! err { () => {{
9346a6ac 1178 return Err(Utf8Error {
9cc50fc6 1179 valid_up_to: old_offset
9346a6ac 1180 })
1a4d82fc
JJ
1181 }}}
1182
9cc50fc6
SL
1183 macro_rules! next { () => {{
1184 offset += 1;
1185 // we needed data, but there was none: error!
1186 if offset >= len {
1187 err!()
1a4d82fc 1188 }
9cc50fc6
SL
1189 v[offset]
1190 }}}
1a4d82fc 1191
9cc50fc6 1192 let first = v[offset];
1a4d82fc 1193 if first >= 128 {
c34b1796 1194 let w = UTF8_CHAR_WIDTH[first as usize];
1a4d82fc
JJ
1195 let second = next!();
1196 // 2-byte encoding is for codepoints \u{0080} to \u{07ff}
1197 // first C2 80 last DF BF
1198 // 3-byte encoding is for codepoints \u{0800} to \u{ffff}
1199 // first E0 A0 80 last EF BF BF
1200 // excluding surrogates codepoints \u{d800} to \u{dfff}
1201 // ED A0 80 to ED BF BF
1202 // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1203 // first F0 90 80 80 last F4 8F BF BF
1204 //
1205 // Use the UTF-8 syntax from the RFC
1206 //
1207 // https://tools.ietf.org/html/rfc3629
1208 // UTF8-1 = %x00-7F
1209 // UTF8-2 = %xC2-DF UTF8-tail
1210 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1211 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1212 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1213 // %xF4 %x80-8F 2( UTF8-tail )
1214 match w {
1215 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
1216 3 => {
1217 match (first, second, next!() & !CONT_MASK) {
1218 (0xE0 , 0xA0 ... 0xBF, TAG_CONT_U8) |
1219 (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) |
1220 (0xED , 0x80 ... 0x9F, TAG_CONT_U8) |
1221 (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {}
1222 _ => err!()
1223 }
1224 }
1225 4 => {
1226 match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
1227 (0xF0 , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1228 (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1229 (0xF4 , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
1230 _ => err!()
1231 }
1232 }
1233 _ => err!()
1234 }
9cc50fc6
SL
1235 offset += 1;
1236 } else {
1237 // Ascii case, try to skip forward quickly.
1238 // When the pointer is aligned, read 2 words of data per iteration
1239 // until we find a word containing a non-ascii byte.
1240 let usize_bytes = mem::size_of::<usize>();
1241 let bytes_per_iteration = 2 * usize_bytes;
1242 let ptr = v.as_ptr();
1243 let align = (ptr as usize + offset) & (usize_bytes - 1);
1244 if align == 0 {
1245 if len >= bytes_per_iteration {
1246 while offset <= len - bytes_per_iteration {
1247 unsafe {
1248 let u = *(ptr.offset(offset as isize) as *const usize);
1249 let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
1250
1251 // break if there is a nonascii byte
1252 let zu = contains_nonascii(u);
1253 let zv = contains_nonascii(v);
1254 if zu || zv {
1255 break;
1256 }
1257 }
1258 offset += bytes_per_iteration;
1259 }
1260 }
1261 // step from the point where the wordwise loop stopped
1262 while offset < len && v[offset] < 128 {
1263 offset += 1;
1264 }
1265 } else {
1266 offset += 1;
1267 }
1a4d82fc
JJ
1268 }
1269 }
9cc50fc6
SL
1270
1271 Ok(())
1a4d82fc
JJ
1272}
1273
1274// https://tools.ietf.org/html/rfc3629
1275static UTF8_CHAR_WIDTH: [u8; 256] = [
12761,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12771,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
12781,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12791,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
12801,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12811,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
12821,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
12831,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
12840,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12850,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
12860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
12870,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
12880,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
12892,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
12903,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
12914,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1292];
1293
1294/// Struct that contains a `char` and the index of the first byte of
1295/// the next `char` in a string. This can be used as a data structure
1296/// for iterating over the UTF-8 bytes of a string.
54a0048b 1297#[derive(Copy, Clone, Debug)]
c34b1796
AL
1298#[unstable(feature = "str_char",
1299 reason = "existence of this struct is uncertain as it is frequently \
1300 able to be replaced with char.len_utf8() and/or \
e9174d1e
SL
1301 char/char_indices iterators",
1302 issue = "27754")]
1a4d82fc
JJ
1303pub struct CharRange {
1304 /// Current `char`
1305 pub ch: char,
1306 /// Index of the first byte of the next `char`
85aaf69f 1307 pub next: usize,
1a4d82fc
JJ
1308}
1309
1310/// Mask of the value bits of a continuation byte
c34b1796 1311const CONT_MASK: u8 = 0b0011_1111;
1a4d82fc 1312/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
c34b1796 1313const TAG_CONT_U8: u8 = 0b1000_0000;
1a4d82fc
JJ
1314
1315/*
1316Section: Trait implementations
1317*/
1318
1319mod traits {
54a0048b 1320 use cmp::{Ord, Ordering, PartialEq, PartialOrd, Eq};
1a4d82fc
JJ
1321 use option::Option;
1322 use option::Option::Some;
1323 use ops;
1324 use str::{StrExt, eq_slice};
1325
85aaf69f 1326 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1327 impl Ord for str {
1328 #[inline]
1329 fn cmp(&self, other: &str) -> Ordering {
54a0048b 1330 self.as_bytes().cmp(other.as_bytes())
1a4d82fc
JJ
1331 }
1332 }
1333
85aaf69f 1334 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1335 impl PartialEq for str {
1336 #[inline]
1337 fn eq(&self, other: &str) -> bool {
1338 eq_slice(self, other)
1339 }
1340 #[inline]
1341 fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1342 }
1343
85aaf69f 1344 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1345 impl Eq for str {}
1346
85aaf69f 1347 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
1348 impl PartialOrd for str {
1349 #[inline]
1350 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1351 Some(self.cmp(other))
1352 }
1353 }
1354
54a0048b
SL
1355 /// Implements substring slicing with syntax `&self[begin .. end]`.
1356 ///
85aaf69f
SL
1357 /// Returns a slice of the given string from the byte range
1358 /// [`begin`..`end`).
1359 ///
1360 /// This operation is `O(1)`.
1361 ///
54a0048b
SL
1362 /// # Panics
1363 ///
1364 /// Panics if `begin` or `end` does not point to the starting
1365 /// byte offset of a character (as defined by `is_char_boundary`).
1366 /// Requires that `begin <= end` and `end <= len` where `len` is the
1367 /// length of the string.
85aaf69f 1368 ///
c34b1796 1369 /// # Examples
85aaf69f 1370 ///
c34b1796 1371 /// ```
85aaf69f
SL
1372 /// let s = "Löwe 老虎 Léopard";
1373 /// assert_eq!(&s[0 .. 1], "L");
1374 ///
1375 /// assert_eq!(&s[1 .. 9], "öwe 老");
1376 ///
1377 /// // these will panic:
1378 /// // byte 2 lies within `ö`:
1379 /// // &s[2 ..3];
1380 ///
1381 /// // byte 8 lies within `老`
1382 /// // &s[1 .. 8];
1383 ///
1384 /// // byte 100 is outside the string
1385 /// // &s[3 .. 100];
1386 /// ```
1387 #[stable(feature = "rust1", since = "1.0.0")]
1388 impl ops::Index<ops::Range<usize>> for str {
1a4d82fc
JJ
1389 type Output = str;
1390 #[inline]
c34b1796 1391 fn index(&self, index: ops::Range<usize>) -> &str {
85aaf69f
SL
1392 // is_char_boundary checks that the index is in [0, .len()]
1393 if index.start <= index.end &&
1394 self.is_char_boundary(index.start) &&
1395 self.is_char_boundary(index.end) {
1396 unsafe { self.slice_unchecked(index.start, index.end) }
1397 } else {
1398 super::slice_error_fail(self, index.start, index.end)
1399 }
1a4d82fc
JJ
1400 }
1401 }
85aaf69f 1402
54a0048b
SL
1403 /// Implements mutable substring slicing with syntax
1404 /// `&mut self[begin .. end]`.
1405 ///
c1a9b12d
SL
1406 /// Returns a mutable slice of the given string from the byte range
1407 /// [`begin`..`end`).
54a0048b
SL
1408 ///
1409 /// This operation is `O(1)`.
1410 ///
1411 /// # Panics
1412 ///
1413 /// Panics if `begin` or `end` does not point to the starting
1414 /// byte offset of a character (as defined by `is_char_boundary`).
1415 /// Requires that `begin <= end` and `end <= len` where `len` is the
1416 /// length of the string.
c1a9b12d
SL
1417 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1418 impl ops::IndexMut<ops::Range<usize>> for str {
1419 #[inline]
1420 fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1421 // is_char_boundary checks that the index is in [0, .len()]
1422 if index.start <= index.end &&
1423 self.is_char_boundary(index.start) &&
1424 self.is_char_boundary(index.end) {
1425 unsafe { self.slice_mut_unchecked(index.start, index.end) }
1426 } else {
1427 super::slice_error_fail(self, index.start, index.end)
1428 }
1429 }
1430 }
1431
54a0048b 1432 /// Implements substring slicing with syntax `&self[.. end]`.
85aaf69f 1433 ///
54a0048b
SL
1434 /// Returns a slice of the string from the beginning to byte offset
1435 /// `end`.
85aaf69f 1436 ///
54a0048b 1437 /// Equivalent to `&self[0 .. end]`.
85aaf69f
SL
1438 #[stable(feature = "rust1", since = "1.0.0")]
1439 impl ops::Index<ops::RangeTo<usize>> for str {
1a4d82fc 1440 type Output = str;
c34b1796 1441
1a4d82fc 1442 #[inline]
c34b1796 1443 fn index(&self, index: ops::RangeTo<usize>) -> &str {
85aaf69f
SL
1444 // is_char_boundary checks that the index is in [0, .len()]
1445 if self.is_char_boundary(index.end) {
1446 unsafe { self.slice_unchecked(0, index.end) }
1447 } else {
1448 super::slice_error_fail(self, 0, index.end)
1449 }
1a4d82fc
JJ
1450 }
1451 }
85aaf69f 1452
54a0048b
SL
1453 /// Implements mutable substring slicing with syntax `&mut self[.. end]`.
1454 ///
1455 /// Returns a mutable slice of the string from the beginning to byte offset
c1a9b12d 1456 /// `end`.
54a0048b
SL
1457 ///
1458 /// Equivalent to `&mut self[0 .. end]`.
c1a9b12d
SL
1459 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1460 impl ops::IndexMut<ops::RangeTo<usize>> for str {
1461 #[inline]
1462 fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1463 // is_char_boundary checks that the index is in [0, .len()]
1464 if self.is_char_boundary(index.end) {
1465 unsafe { self.slice_mut_unchecked(0, index.end) }
1466 } else {
1467 super::slice_error_fail(self, 0, index.end)
1468 }
1469 }
1470 }
1471
54a0048b 1472 /// Implements substring slicing with syntax `&self[begin ..]`.
85aaf69f 1473 ///
54a0048b
SL
1474 /// Returns a slice of the string from byte offset `begin`
1475 /// to the end of the string.
85aaf69f 1476 ///
54a0048b 1477 /// Equivalent to `&self[begin .. len]`.
85aaf69f
SL
1478 #[stable(feature = "rust1", since = "1.0.0")]
1479 impl ops::Index<ops::RangeFrom<usize>> for str {
1a4d82fc 1480 type Output = str;
c34b1796 1481
1a4d82fc 1482 #[inline]
c34b1796 1483 fn index(&self, index: ops::RangeFrom<usize>) -> &str {
85aaf69f
SL
1484 // is_char_boundary checks that the index is in [0, .len()]
1485 if self.is_char_boundary(index.start) {
1486 unsafe { self.slice_unchecked(index.start, self.len()) }
1487 } else {
1488 super::slice_error_fail(self, index.start, self.len())
1489 }
1a4d82fc
JJ
1490 }
1491 }
85aaf69f 1492
54a0048b
SL
1493 /// Implements mutable substring slicing with syntax `&mut self[begin ..]`.
1494 ///
1495 /// Returns a mutable slice of the string from byte offset `begin`
1496 /// to the end of the string.
1497 ///
1498 /// Equivalent to `&mut self[begin .. len]`.
c1a9b12d
SL
1499 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1500 impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1501 #[inline]
1502 fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1503 // is_char_boundary checks that the index is in [0, .len()]
1504 if self.is_char_boundary(index.start) {
1505 let len = self.len();
1506 unsafe { self.slice_mut_unchecked(index.start, len) }
1507 } else {
1508 super::slice_error_fail(self, index.start, self.len())
1509 }
1510 }
1511 }
1512
54a0048b
SL
1513 /// Implements substring slicing with syntax `&self[..]`.
1514 ///
1515 /// Returns a slice of the whole string. This operation can
1516 /// never panic.
1517 ///
1518 /// Equivalent to `&self[0 .. len]`.
85aaf69f
SL
1519 #[stable(feature = "rust1", since = "1.0.0")]
1520 impl ops::Index<ops::RangeFull> for str {
1a4d82fc 1521 type Output = str;
c34b1796 1522
1a4d82fc 1523 #[inline]
c34b1796 1524 fn index(&self, _index: ops::RangeFull) -> &str {
1a4d82fc
JJ
1525 self
1526 }
1527 }
c1a9b12d 1528
54a0048b
SL
1529 /// Implements mutable substring slicing with syntax `&mut self[..]`.
1530 ///
1531 /// Returns a mutable slice of the whole string. This operation can
1532 /// never panic.
1533 ///
1534 /// Equivalent to `&mut self[0 .. len]`.
c1a9b12d
SL
1535 #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1536 impl ops::IndexMut<ops::RangeFull> for str {
1537 #[inline]
1538 fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1539 self
1540 }
1541 }
54a0048b
SL
1542
1543 #[unstable(feature = "inclusive_range",
1544 reason = "recently added, follows RFC",
1545 issue = "28237")]
1546 impl ops::Index<ops::RangeInclusive<usize>> for str {
1547 type Output = str;
1548
1549 #[inline]
1550 fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1551 match index {
1552 ops::RangeInclusive::Empty { .. } => "",
1553 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1554 panic!("attempted to index slice up to maximum usize"),
1555 ops::RangeInclusive::NonEmpty { start, end } =>
1556 self.index(start .. end+1)
1557 }
1558 }
1559 }
1560 #[unstable(feature = "inclusive_range",
1561 reason = "recently added, follows RFC",
1562 issue = "28237")]
1563 impl ops::Index<ops::RangeToInclusive<usize>> for str {
1564 type Output = str;
1565
1566 #[inline]
1567 fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1568 self.index(0...index.end)
1569 }
1570 }
1571
1572 #[unstable(feature = "inclusive_range",
1573 reason = "recently added, follows RFC",
1574 issue = "28237")]
1575 impl ops::IndexMut<ops::RangeInclusive<usize>> for str {
1576 #[inline]
1577 fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
1578 match index {
1579 ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work
1580 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1581 panic!("attempted to index str up to maximum usize"),
1582 ops::RangeInclusive::NonEmpty { start, end } =>
1583 self.index_mut(start .. end+1)
1584 }
1585 }
1586 }
1587 #[unstable(feature = "inclusive_range",
1588 reason = "recently added, follows RFC",
1589 issue = "28237")]
1590 impl ops::IndexMut<ops::RangeToInclusive<usize>> for str {
1591 #[inline]
1592 fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
1593 self.index_mut(0...index.end)
1594 }
1595 }
1a4d82fc
JJ
1596}
1597
1a4d82fc
JJ
1598/// Methods for string slices
1599#[allow(missing_docs)]
9346a6ac 1600#[doc(hidden)]
62682a34 1601#[unstable(feature = "core_str_ext",
e9174d1e 1602 reason = "stable interface provided by `impl str` in later crates",
54a0048b 1603 issue = "32110")]
1a4d82fc
JJ
1604pub trait StrExt {
1605 // NB there are no docs here are they're all located on the StrExt trait in
1606 // libcollections, not here.
1607
92a42be0 1608 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1609 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
92a42be0 1610 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1611 fn chars(&self) -> Chars;
92a42be0 1612 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1613 fn bytes(&self) -> Bytes;
92a42be0 1614 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1615 fn char_indices(&self) -> CharIndices;
92a42be0 1616 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1617 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
92a42be0 1618 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1619 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1620 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1621 #[stable(feature = "core", since = "1.6.0")]
9346a6ac 1622 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
92a42be0 1623 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1624 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1625 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1626 #[stable(feature = "core", since = "1.6.0")]
9346a6ac 1627 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
92a42be0 1628 #[stable(feature = "core", since = "1.6.0")]
9346a6ac
AL
1629 fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1630 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1631 #[stable(feature = "core", since = "1.6.0")]
9346a6ac 1632 fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
92a42be0 1633 #[stable(feature = "core", since = "1.6.0")]
9346a6ac
AL
1634 fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1635 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1636 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1637 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
92a42be0 1638 #[stable(feature = "core", since = "1.6.0")]
9346a6ac
AL
1639 fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1640 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1641 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1642 fn lines(&self) -> Lines;
92a42be0
SL
1643 #[stable(feature = "core", since = "1.6.0")]
1644 #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")]
e9174d1e
SL
1645 #[allow(deprecated)]
1646 fn lines_any(&self) -> LinesAny;
92a42be0 1647 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1648 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str;
92a42be0 1649 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1650 unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str;
92a42be0 1651 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1652 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
92a42be0 1653 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1654 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1655 where P::Searcher: ReverseSearcher<'a>;
92a42be0 1656 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1657 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1658 where P::Searcher: DoubleEndedSearcher<'a>;
92a42be0 1659 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1660 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
92a42be0 1661 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1662 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1663 where P::Searcher: ReverseSearcher<'a>;
54a0048b 1664 #[stable(feature = "is_char_boundary", since = "1.9.0")]
85aaf69f 1665 fn is_char_boundary(&self, index: usize) -> bool;
92a42be0
SL
1666 #[unstable(feature = "str_char",
1667 reason = "often replaced by char_indices, this method may \
1668 be removed in favor of just char_at() or eventually \
1669 removed altogether",
1670 issue = "27754")]
54a0048b
SL
1671 #[rustc_deprecated(reason = "use slicing plus chars() plus len_utf8",
1672 since = "1.9.0")]
85aaf69f 1673 fn char_range_at(&self, start: usize) -> CharRange;
92a42be0
SL
1674 #[unstable(feature = "str_char",
1675 reason = "often replaced by char_indices, this method may \
1676 be removed in favor of just char_at_reverse() or \
1677 eventually removed altogether",
1678 issue = "27754")]
54a0048b
SL
1679 #[rustc_deprecated(reason = "use slicing plus chars().rev() plus len_utf8",
1680 since = "1.9.0")]
85aaf69f 1681 fn char_range_at_reverse(&self, start: usize) -> CharRange;
92a42be0
SL
1682 #[unstable(feature = "str_char",
1683 reason = "frequently replaced by the chars() iterator, this \
1684 method may be removed or possibly renamed in the \
1685 future; it is normally replaced by chars/char_indices \
1686 iterators or by getting the first char from a \
1687 subslice",
1688 issue = "27754")]
54a0048b
SL
1689 #[rustc_deprecated(reason = "use slicing plus chars()",
1690 since = "1.9.0")]
85aaf69f 1691 fn char_at(&self, i: usize) -> char;
92a42be0
SL
1692 #[unstable(feature = "str_char",
1693 reason = "see char_at for more details, but reverse semantics \
1694 are also somewhat unclear, especially with which \
1695 cases generate panics",
1696 issue = "27754")]
54a0048b
SL
1697 #[rustc_deprecated(reason = "use slicing plus chars().rev()",
1698 since = "1.9.0")]
85aaf69f 1699 fn char_at_reverse(&self, i: usize) -> char;
92a42be0 1700 #[stable(feature = "core", since = "1.6.0")]
e9174d1e 1701 fn as_bytes(&self) -> &[u8];
92a42be0 1702 #[stable(feature = "core", since = "1.6.0")]
c34b1796 1703 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
92a42be0 1704 #[stable(feature = "core", since = "1.6.0")]
c34b1796
AL
1705 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1706 where P::Searcher: ReverseSearcher<'a>;
1707 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
92a42be0 1708 #[stable(feature = "core", since = "1.6.0")]
62682a34 1709 fn split_at(&self, mid: usize) -> (&str, &str);
92a42be0 1710 #[stable(feature = "core", since = "1.6.0")]
c1a9b12d 1711 fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
92a42be0
SL
1712 #[unstable(feature = "str_char",
1713 reason = "awaiting conventions about shifting and slices and \
1714 may not be warranted with the existence of the chars \
1715 and/or char_indices iterators",
1716 issue = "27754")]
54a0048b
SL
1717 #[rustc_deprecated(reason = "use chars() plus Chars::as_str",
1718 since = "1.9.0")]
e9174d1e 1719 fn slice_shift_char(&self) -> Option<(char, &str)>;
92a42be0 1720 #[stable(feature = "core", since = "1.6.0")]
1a4d82fc 1721 fn as_ptr(&self) -> *const u8;
92a42be0 1722 #[stable(feature = "core", since = "1.6.0")]
85aaf69f 1723 fn len(&self) -> usize;
92a42be0 1724 #[stable(feature = "core", since = "1.6.0")]
1a4d82fc 1725 fn is_empty(&self) -> bool;
92a42be0 1726 #[stable(feature = "core", since = "1.6.0")]
85aaf69f 1727 fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
1a4d82fc
JJ
1728}
1729
54a0048b
SL
1730// truncate `&str` to length at most equal to `max`
1731// return `true` if it were truncated, and the new str.
1732fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
1733 if max >= s.len() {
1734 (false, s)
1735 } else {
1736 while !s.is_char_boundary(max) {
1737 max -= 1;
1738 }
1739 (true, &s[..max])
1740 }
1741}
1742
1a4d82fc 1743#[inline(never)]
92a42be0 1744#[cold]
85aaf69f 1745fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
54a0048b
SL
1746 const MAX_DISPLAY_LENGTH: usize = 256;
1747 let (truncated, s) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
1748 let ellipsis = if truncated { "[...]" } else { "" };
1749
1750 assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}",
1751 begin, end, s, ellipsis);
1752 panic!("index {} and/or {} in `{}`{} do not lie on character boundary",
1753 begin, end, s, ellipsis);
1a4d82fc
JJ
1754}
1755
92a42be0 1756#[stable(feature = "core", since = "1.6.0")]
1a4d82fc
JJ
1757impl StrExt for str {
1758 #[inline]
c34b1796
AL
1759 fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1760 pat.is_contained_in(self)
1a4d82fc
JJ
1761 }
1762
1a4d82fc
JJ
1763 #[inline]
1764 fn chars(&self) -> Chars {
1765 Chars{iter: self.as_bytes().iter()}
1766 }
1767
1768 #[inline]
1769 fn bytes(&self) -> Bytes {
e9174d1e 1770 Bytes(self.as_bytes().iter().cloned())
1a4d82fc
JJ
1771 }
1772
1773 #[inline]
1774 fn char_indices(&self) -> CharIndices {
1775 CharIndices { front_offset: 0, iter: self.chars() }
1776 }
1777
1778 #[inline]
c34b1796 1779 fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
9346a6ac 1780 Split(SplitInternal {
c34b1796
AL
1781 start: 0,
1782 end: self.len(),
1783 matcher: pat.into_searcher(self),
1a4d82fc
JJ
1784 allow_trailing_empty: true,
1785 finished: false,
1786 })
1787 }
1788
9346a6ac
AL
1789 #[inline]
1790 fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1791 where P::Searcher: ReverseSearcher<'a>
1792 {
1793 RSplit(self.split(pat).0)
1794 }
1795
1a4d82fc 1796 #[inline]
c34b1796 1797 fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
9346a6ac 1798 SplitN(SplitNInternal {
1a4d82fc
JJ
1799 iter: self.split(pat).0,
1800 count: count,
1a4d82fc
JJ
1801 })
1802 }
1803
9346a6ac
AL
1804 #[inline]
1805 fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1806 where P::Searcher: ReverseSearcher<'a>
1807 {
1808 RSplitN(self.splitn(count, pat).0)
1809 }
1810
1a4d82fc 1811 #[inline]
c34b1796 1812 fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
9346a6ac 1813 SplitTerminator(SplitInternal {
1a4d82fc
JJ
1814 allow_trailing_empty: false,
1815 ..self.split(pat).0
1816 })
1817 }
1818
1819 #[inline]
9346a6ac 1820 fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
c34b1796
AL
1821 where P::Searcher: ReverseSearcher<'a>
1822 {
9346a6ac 1823 RSplitTerminator(self.split_terminator(pat).0)
1a4d82fc
JJ
1824 }
1825
1826 #[inline]
9346a6ac
AL
1827 fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1828 Matches(MatchesInternal(pat.into_searcher(self)))
1829 }
1830
1831 #[inline]
1832 fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
c34b1796
AL
1833 where P::Searcher: ReverseSearcher<'a>
1834 {
9346a6ac 1835 RMatches(self.matches(pat).0)
1a4d82fc
JJ
1836 }
1837
1838 #[inline]
c34b1796 1839 fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
9346a6ac 1840 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1a4d82fc
JJ
1841 }
1842
9346a6ac
AL
1843 #[inline]
1844 fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1845 where P::Searcher: ReverseSearcher<'a>
1846 {
1847 RMatchIndices(self.match_indices(pat).0)
1848 }
1a4d82fc
JJ
1849 #[inline]
1850 fn lines(&self) -> Lines {
e9174d1e 1851 Lines(self.split_terminator('\n').map(LinesAnyMap))
1a4d82fc
JJ
1852 }
1853
9346a6ac 1854 #[inline]
e9174d1e 1855 #[allow(deprecated)]
1a4d82fc 1856 fn lines_any(&self) -> LinesAny {
e9174d1e 1857 LinesAny(self.lines())
1a4d82fc
JJ
1858 }
1859
1a4d82fc 1860 #[inline]
85aaf69f 1861 unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
54a0048b
SL
1862 let ptr = self.as_ptr().offset(begin as isize);
1863 let len = end - begin;
1864 from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1a4d82fc
JJ
1865 }
1866
c1a9b12d
SL
1867 #[inline]
1868 unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
54a0048b
SL
1869 let ptr = self.as_ptr().offset(begin as isize);
1870 let len = end - begin;
1871 mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
c1a9b12d
SL
1872 }
1873
1a4d82fc 1874 #[inline]
c34b1796
AL
1875 fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1876 pat.is_prefix_of(self)
1a4d82fc
JJ
1877 }
1878
1879 #[inline]
c34b1796
AL
1880 fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1881 where P::Searcher: ReverseSearcher<'a>
1882 {
1883 pat.is_suffix_of(self)
1a4d82fc
JJ
1884 }
1885
1886 #[inline]
c34b1796
AL
1887 fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1888 where P::Searcher: DoubleEndedSearcher<'a>
1889 {
1890 let mut i = 0;
1891 let mut j = 0;
1892 let mut matcher = pat.into_searcher(self);
1893 if let Some((a, b)) = matcher.next_reject() {
1894 i = a;
7453a54e 1895 j = b; // Remember earliest known match, correct it below if
c34b1796
AL
1896 // last match is different
1897 }
1898 if let Some((_, b)) = matcher.next_reject_back() {
1899 j = b;
1900 }
1901 unsafe {
1902 // Searcher is known to return valid indices
1903 self.slice_unchecked(i, j)
1a4d82fc
JJ
1904 }
1905 }
1906
1907 #[inline]
c34b1796
AL
1908 fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1909 let mut i = self.len();
1910 let mut matcher = pat.into_searcher(self);
1911 if let Some((a, _)) = matcher.next_reject() {
1912 i = a;
1913 }
1914 unsafe {
1915 // Searcher is known to return valid indices
1916 self.slice_unchecked(i, self.len())
1a4d82fc
JJ
1917 }
1918 }
1919
1920 #[inline]
c34b1796
AL
1921 fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1922 where P::Searcher: ReverseSearcher<'a>
1923 {
1924 let mut j = 0;
1925 let mut matcher = pat.into_searcher(self);
1926 if let Some((_, b)) = matcher.next_reject_back() {
1927 j = b;
1928 }
1929 unsafe {
1930 // Searcher is known to return valid indices
1931 self.slice_unchecked(0, j)
1a4d82fc
JJ
1932 }
1933 }
1934
1935 #[inline]
85aaf69f 1936 fn is_char_boundary(&self, index: usize) -> bool {
54a0048b
SL
1937 // 0 and len are always ok.
1938 // Test for 0 explicitly so that it can optimize out the check
1939 // easily and skip reading string data for that case.
1940 if index == 0 || index == self.len() { return true; }
1a4d82fc
JJ
1941 match self.as_bytes().get(index) {
1942 None => false,
a7813a04
XL
1943 // This is bit magic equivalent to: b < 128 || b >= 192
1944 Some(&b) => (b as i8) >= -0x40,
1a4d82fc
JJ
1945 }
1946 }
1947
1948 #[inline]
85aaf69f
SL
1949 fn char_range_at(&self, i: usize) -> CharRange {
1950 let (c, n) = char_range_at_raw(self.as_bytes(), i);
e9174d1e 1951 CharRange { ch: unsafe { char::from_u32_unchecked(c) }, next: n }
1a4d82fc
JJ
1952 }
1953
1954 #[inline]
85aaf69f 1955 fn char_range_at_reverse(&self, start: usize) -> CharRange {
1a4d82fc
JJ
1956 let mut prev = start;
1957
1958 prev = prev.saturating_sub(1);
1959 if self.as_bytes()[prev] < 128 {
1960 return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
1961 }
1962
1963 // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
85aaf69f 1964 fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
1a4d82fc
JJ
1965 // while there is a previous byte == 10......
1966 while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
85aaf69f 1967 i -= 1;
1a4d82fc
JJ
1968 }
1969
c34b1796
AL
1970 let first= s.as_bytes()[i];
1971 let w = UTF8_CHAR_WIDTH[first as usize];
1972 assert!(w != 0);
1a4d82fc 1973
c34b1796
AL
1974 let mut val = utf8_first_byte(first, w as u32);
1975 val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]);
1976 if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); }
1977 if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); }
1a4d82fc 1978
e9174d1e 1979 CharRange {ch: unsafe { char::from_u32_unchecked(val) }, next: i}
1a4d82fc
JJ
1980 }
1981
e9174d1e 1982 multibyte_char_range_at_reverse(self, prev)
1a4d82fc
JJ
1983 }
1984
1985 #[inline]
54a0048b 1986 #[allow(deprecated)]
85aaf69f 1987 fn char_at(&self, i: usize) -> char {
1a4d82fc
JJ
1988 self.char_range_at(i).ch
1989 }
1990
1991 #[inline]
54a0048b 1992 #[allow(deprecated)]
85aaf69f 1993 fn char_at_reverse(&self, i: usize) -> char {
1a4d82fc
JJ
1994 self.char_range_at_reverse(i).ch
1995 }
1996
1997 #[inline]
1998 fn as_bytes(&self) -> &[u8] {
1999 unsafe { mem::transmute(self) }
2000 }
2001
c34b1796
AL
2002 fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2003 pat.into_searcher(self).next_match().map(|(i, _)| i)
1a4d82fc
JJ
2004 }
2005
c34b1796
AL
2006 fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
2007 where P::Searcher: ReverseSearcher<'a>
2008 {
2009 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
1a4d82fc
JJ
2010 }
2011
c34b1796
AL
2012 fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2013 self.find(pat)
1a4d82fc
JJ
2014 }
2015
54a0048b 2016 #[inline]
62682a34
SL
2017 fn split_at(&self, mid: usize) -> (&str, &str) {
2018 // is_char_boundary checks that the index is in [0, .len()]
2019 if self.is_char_boundary(mid) {
2020 unsafe {
2021 (self.slice_unchecked(0, mid),
2022 self.slice_unchecked(mid, self.len()))
2023 }
2024 } else {
2025 slice_error_fail(self, 0, mid)
2026 }
2027 }
2028
c1a9b12d
SL
2029 fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2030 // is_char_boundary checks that the index is in [0, .len()]
2031 if self.is_char_boundary(mid) {
2032 let len = self.len();
7453a54e 2033 let ptr = self.as_ptr() as *mut u8;
c1a9b12d 2034 unsafe {
7453a54e
SL
2035 (from_raw_parts_mut(ptr, mid),
2036 from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
c1a9b12d
SL
2037 }
2038 } else {
2039 slice_error_fail(self, 0, mid)
2040 }
2041 }
2042
1a4d82fc 2043 #[inline]
54a0048b 2044 #[allow(deprecated)]
1a4d82fc
JJ
2045 fn slice_shift_char(&self) -> Option<(char, &str)> {
2046 if self.is_empty() {
2047 None
2048 } else {
c34b1796
AL
2049 let ch = self.char_at(0);
2050 let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) };
1a4d82fc
JJ
2051 Some((ch, next_s))
2052 }
2053 }
2054
1a4d82fc
JJ
2055 #[inline]
2056 fn as_ptr(&self) -> *const u8 {
54a0048b 2057 self as *const str as *const u8
1a4d82fc
JJ
2058 }
2059
2060 #[inline]
54a0048b
SL
2061 fn len(&self) -> usize {
2062 self.as_bytes().len()
2063 }
1a4d82fc
JJ
2064
2065 #[inline]
2066 fn is_empty(&self) -> bool { self.len() == 0 }
2067
2068 #[inline]
85aaf69f 2069 fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
1a4d82fc
JJ
2070}
2071
bd371182
AL
2072#[stable(feature = "rust1", since = "1.0.0")]
2073impl AsRef<[u8]> for str {
2074 #[inline]
2075 fn as_ref(&self) -> &[u8] {
2076 self.as_bytes()
2077 }
2078}
2079
85aaf69f
SL
2080/// Pluck a code point out of a UTF-8-like byte slice and return the
2081/// index of the next code point.
2082#[inline]
62682a34 2083fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
c34b1796 2084 if bytes[i] < 128 {
85aaf69f
SL
2085 return (bytes[i] as u32, i + 1);
2086 }
2087
2088 // Multibyte case is a fn to allow char_range_at to inline cleanly
2089 fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
c34b1796
AL
2090 let first = bytes[i];
2091 let w = UTF8_CHAR_WIDTH[first as usize];
2092 assert!(w != 0);
85aaf69f 2093
c34b1796
AL
2094 let mut val = utf8_first_byte(first, w as u32);
2095 val = utf8_acc_cont_byte(val, bytes[i + 1]);
2096 if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); }
2097 if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); }
85aaf69f 2098
e9174d1e 2099 (val, i + w as usize)
85aaf69f
SL
2100 }
2101
2102 multibyte_char_range_at(bytes, i)
2103}
2104
2105#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 2106impl<'a> Default for &'a str {
1a4d82fc
JJ
2107 fn default() -> &'a str { "" }
2108}