]> git.proxmox.com Git - rustc.git/blob - src/libstd_unicode/u_str.rs
New upstream version 1.19.0+dfsg1
[rustc.git] / src / libstd_unicode / u_str.rs
1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! Unicode-intensive string manipulations.
12 //!
13 //! This module provides functionality to `str` that requires the Unicode
14 //! methods provided by the unicode parts of the CharExt trait.
15
16 use core::char;
17 use core::iter::{Filter, FusedIterator};
18 use core::str::Split;
19
20 /// An iterator over the non-whitespace substrings of a string,
21 /// separated by any amount of whitespace.
22 ///
23 /// This struct is created by the [`split_whitespace`] method on [`str`].
24 /// See its documentation for more.
25 ///
26 /// [`split_whitespace`]: ../../std/primitive.str.html#method.split_whitespace
27 /// [`str`]: ../../std/primitive.str.html
28 #[stable(feature = "split_whitespace", since = "1.1.0")]
29 #[derive(Clone)]
30 pub struct SplitWhitespace<'a> {
31 inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
32 }
33
34 /// Methods for Unicode string slices
35 #[allow(missing_docs)] // docs in libcollections
36 pub trait UnicodeStr {
37 fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>;
38 fn is_whitespace(&self) -> bool;
39 fn is_alphanumeric(&self) -> bool;
40 fn trim(&self) -> &str;
41 fn trim_left(&self) -> &str;
42 fn trim_right(&self) -> &str;
43 }
44
45 impl UnicodeStr for str {
46 #[inline]
47 fn split_whitespace(&self) -> SplitWhitespace {
48 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
49 }
50
51 #[inline]
52 fn is_whitespace(&self) -> bool {
53 self.chars().all(|c| c.is_whitespace())
54 }
55
56 #[inline]
57 fn is_alphanumeric(&self) -> bool {
58 self.chars().all(|c| c.is_alphanumeric())
59 }
60
61 #[inline]
62 fn trim(&self) -> &str {
63 self.trim_matches(|c: char| c.is_whitespace())
64 }
65
66 #[inline]
67 fn trim_left(&self) -> &str {
68 self.trim_left_matches(|c: char| c.is_whitespace())
69 }
70
71 #[inline]
72 fn trim_right(&self) -> &str {
73 self.trim_right_matches(|c: char| c.is_whitespace())
74 }
75 }
76
77 /// Iterator adaptor for encoding `char`s to UTF-16.
78 #[derive(Clone)]
79 pub struct Utf16Encoder<I> {
80 chars: I,
81 extra: u16,
82 }
83
84 impl<I> Utf16Encoder<I> {
85 /// Create a UTF-16 encoder from any `char` iterator.
86 pub fn new(chars: I) -> Utf16Encoder<I>
87 where I: Iterator<Item = char>
88 {
89 Utf16Encoder {
90 chars: chars,
91 extra: 0,
92 }
93 }
94 }
95
96 impl<I> Iterator for Utf16Encoder<I>
97 where I: Iterator<Item = char>
98 {
99 type Item = u16;
100
101 #[inline]
102 fn next(&mut self) -> Option<u16> {
103 if self.extra != 0 {
104 let tmp = self.extra;
105 self.extra = 0;
106 return Some(tmp);
107 }
108
109 let mut buf = [0; 2];
110 self.chars.next().map(|ch| {
111 let n = CharExt::encode_utf16(ch, &mut buf).len();
112 if n == 2 {
113 self.extra = buf[1];
114 }
115 buf[0]
116 })
117 }
118
119 #[inline]
120 fn size_hint(&self) -> (usize, Option<usize>) {
121 let (low, high) = self.chars.size_hint();
122 // every char gets either one u16 or two u16,
123 // so this iterator is between 1 or 2 times as
124 // long as the underlying iterator.
125 (low, high.and_then(|n| n.checked_mul(2)))
126 }
127 }
128
129 #[unstable(feature = "fused", issue = "35602")]
130 impl<I> FusedIterator for Utf16Encoder<I>
131 where I: FusedIterator<Item = char> {}
132
133 #[derive(Clone)]
134 struct IsWhitespace;
135
136 impl FnOnce<(char, )> for IsWhitespace {
137 type Output = bool;
138
139 #[inline]
140 extern "rust-call" fn call_once(mut self, arg: (char, )) -> bool {
141 self.call_mut(arg)
142 }
143 }
144
145 impl FnMut<(char, )> for IsWhitespace {
146 #[inline]
147 extern "rust-call" fn call_mut(&mut self, arg: (char, )) -> bool {
148 arg.0.is_whitespace()
149 }
150 }
151
152 #[derive(Clone)]
153 struct IsNotEmpty;
154
155 impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty {
156 type Output = bool;
157
158 #[inline]
159 extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool {
160 self.call_mut(arg)
161 }
162 }
163
164 impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty {
165 #[inline]
166 extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool {
167 !arg.0.is_empty()
168 }
169 }
170
171
172 #[stable(feature = "split_whitespace", since = "1.1.0")]
173 impl<'a> Iterator for SplitWhitespace<'a> {
174 type Item = &'a str;
175
176 fn next(&mut self) -> Option<&'a str> {
177 self.inner.next()
178 }
179 }
180
181 #[stable(feature = "split_whitespace", since = "1.1.0")]
182 impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
183 fn next_back(&mut self) -> Option<&'a str> {
184 self.inner.next_back()
185 }
186 }
187
188 #[unstable(feature = "fused", issue = "35602")]
189 impl<'a> FusedIterator for SplitWhitespace<'a> {}