]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! Character manipulation. | |
12 | //! | |
d9579d0f | 13 | //! For more details, see ::rustc_unicode::char (a.k.a. std::char) |
1a4d82fc JJ |
14 | |
15 | #![allow(non_snake_case)] | |
62682a34 | 16 | #![stable(feature = "core_char", since = "1.2.0")] |
1a4d82fc JJ |
17 | |
18 | use iter::Iterator; | |
19 | use mem::transmute; | |
20 | use option::Option::{None, Some}; | |
21 | use option::Option; | |
22 | use slice::SliceExt; | |
23 | ||
24 | // UTF-8 ranges and tags for encoding characters | |
c34b1796 AL |
25 | const TAG_CONT: u8 = 0b1000_0000; |
26 | const TAG_TWO_B: u8 = 0b1100_0000; | |
27 | const TAG_THREE_B: u8 = 0b1110_0000; | |
28 | const TAG_FOUR_B: u8 = 0b1111_0000; | |
29 | const MAX_ONE_B: u32 = 0x80; | |
30 | const MAX_TWO_B: u32 = 0x800; | |
31 | const MAX_THREE_B: u32 = 0x10000; | |
1a4d82fc JJ |
32 | |
33 | /* | |
34 | Lu Uppercase_Letter an uppercase letter | |
35 | Ll Lowercase_Letter a lowercase letter | |
36 | Lt Titlecase_Letter a digraphic character, with first part uppercase | |
37 | Lm Modifier_Letter a modifier letter | |
38 | Lo Other_Letter other letters, including syllables and ideographs | |
39 | Mn Nonspacing_Mark a nonspacing combining mark (zero advance width) | |
40 | Mc Spacing_Mark a spacing combining mark (positive advance width) | |
41 | Me Enclosing_Mark an enclosing combining mark | |
42 | Nd Decimal_Number a decimal digit | |
43 | Nl Letter_Number a letterlike numeric character | |
44 | No Other_Number a numeric character of other type | |
45 | Pc Connector_Punctuation a connecting punctuation mark, like a tie | |
46 | Pd Dash_Punctuation a dash or hyphen punctuation mark | |
47 | Ps Open_Punctuation an opening punctuation mark (of a pair) | |
48 | Pe Close_Punctuation a closing punctuation mark (of a pair) | |
49 | Pi Initial_Punctuation an initial quotation mark | |
50 | Pf Final_Punctuation a final quotation mark | |
51 | Po Other_Punctuation a punctuation mark of other type | |
52 | Sm Math_Symbol a symbol of primarily mathematical use | |
53 | Sc Currency_Symbol a currency sign | |
54 | Sk Modifier_Symbol a non-letterlike modifier symbol | |
55 | So Other_Symbol a symbol of other type | |
56 | Zs Space_Separator a space character (of various non-zero widths) | |
57 | Zl Line_Separator U+2028 LINE SEPARATOR only | |
58 | Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only | |
59 | Cc Control a C0 or C1 control code | |
60 | Cf Format a format control character | |
61 | Cs Surrogate a surrogate code point | |
62 | Co Private_Use a private-use character | |
63 | Cn Unassigned a reserved unassigned code point or a noncharacter | |
64 | */ | |
65 | ||
92a42be0 SL |
66 | /// The highest valid code point a `char` can have. |
67 | /// | |
68 | /// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code | |
69 | /// Point], but only ones within a certain range. `MAX` is the highest valid | |
70 | /// code point that's a valid [Unicode Scalar Value]. | |
71 | /// | |
54a0048b | 72 | /// [`char`]: ../../std/primitive.char.html |
92a42be0 SL |
73 | /// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value |
74 | /// [Code Point]: http://www.unicode.org/glossary/#code_point | |
85aaf69f | 75 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
76 | pub const MAX: char = '\u{10ffff}'; |
77 | ||
92a42be0 SL |
78 | /// Converts a `u32` to a `char`. |
79 | /// | |
80 | /// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with | |
81 | /// [`as`]: | |
82 | /// | |
83 | /// ``` | |
84 | /// let c = '💯'; | |
85 | /// let i = c as u32; | |
86 | /// | |
87 | /// assert_eq!(128175, i); | |
88 | /// ``` | |
89 | /// | |
90 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
91 | /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value | |
92 | /// for a [`char`]. | |
93 | /// | |
54a0048b SL |
94 | /// [`char`]: ../../std/primitive.char.html |
95 | /// [`u32`]: ../../std/primitive.u32.html | |
9cc50fc6 | 96 | /// [`as`]: ../../book/casting-between-types.html#as |
92a42be0 SL |
97 | /// |
98 | /// For an unsafe version of this function which ignores these checks, see | |
99 | /// [`from_u32_unchecked()`]. | |
100 | /// | |
101 | /// [`from_u32_unchecked()`]: fn.from_u32_unchecked.html | |
85aaf69f SL |
102 | /// |
103 | /// # Examples | |
104 | /// | |
92a42be0 SL |
105 | /// Basic usage: |
106 | /// | |
85aaf69f SL |
107 | /// ``` |
108 | /// use std::char; | |
109 | /// | |
92a42be0 SL |
110 | /// let c = char::from_u32(0x2764); |
111 | /// | |
112 | /// assert_eq!(Some('❤'), c); | |
113 | /// ``` | |
114 | /// | |
115 | /// Returning `None` when the input is not a valid [`char`]: | |
116 | /// | |
117 | /// ``` | |
118 | /// use std::char; | |
119 | /// | |
120 | /// let c = char::from_u32(0x110000); | |
121 | /// | |
122 | /// assert_eq!(None, c); | |
85aaf69f | 123 | /// ``` |
1a4d82fc | 124 | #[inline] |
85aaf69f | 125 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
126 | pub fn from_u32(i: u32) -> Option<char> { |
127 | // catch out-of-bounds and surrogates | |
128 | if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { | |
129 | None | |
130 | } else { | |
c1a9b12d | 131 | Some(unsafe { from_u32_unchecked(i) }) |
1a4d82fc JJ |
132 | } |
133 | } | |
134 | ||
92a42be0 SL |
135 | /// Converts a `u32` to a `char`, ignoring validity. |
136 | /// | |
137 | /// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with | |
138 | /// [`as`]: | |
139 | /// | |
140 | /// ``` | |
141 | /// let c = '💯'; | |
142 | /// let i = c as u32; | |
143 | /// | |
144 | /// assert_eq!(128175, i); | |
145 | /// ``` | |
146 | /// | |
147 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
148 | /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to | |
149 | /// [`char`], possibly creating an invalid one. | |
150 | /// | |
54a0048b SL |
151 | /// [`char`]: ../../std/primitive.char.html |
152 | /// [`u32`]: ../../std/primitive.u32.html | |
9cc50fc6 | 153 | /// [`as`]: ../../book/casting-between-types.html#as |
92a42be0 SL |
154 | /// |
155 | /// # Safety | |
156 | /// | |
157 | /// This function is unsafe, as it may construct invalid `char` values. | |
158 | /// | |
159 | /// For a safe version of this function, see the [`from_u32()`] function. | |
160 | /// | |
161 | /// [`from_u32()`]: fn.from_u32.html | |
162 | /// | |
163 | /// # Examples | |
164 | /// | |
165 | /// Basic usage: | |
166 | /// | |
167 | /// ``` | |
168 | /// use std::char; | |
169 | /// | |
170 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; | |
171 | /// | |
172 | /// assert_eq!('❤', c); | |
173 | /// ``` | |
c1a9b12d | 174 | #[inline] |
b039eaaf | 175 | #[stable(feature = "char_from_unchecked", since = "1.5.0")] |
c1a9b12d SL |
176 | pub unsafe fn from_u32_unchecked(i: u32) -> char { |
177 | transmute(i) | |
178 | } | |
179 | ||
92a42be0 | 180 | /// Converts a digit in the given radix to a `char`. |
1a4d82fc | 181 | /// |
92a42be0 SL |
182 | /// A 'radix' here is sometimes also called a 'base'. A radix of two |
183 | /// indicates a binary number, a radix of ten, decimal, and a radix of | |
9cc50fc6 | 184 | /// sixteen, hexadecimal, to give some common values. Arbitrary |
92a42be0 | 185 | /// radicum are supported. |
1a4d82fc | 186 | /// |
92a42be0 SL |
187 | /// `from_digit()` will return `None` if the input is not a digit in |
188 | /// the given radix. | |
1a4d82fc JJ |
189 | /// |
190 | /// # Panics | |
191 | /// | |
92a42be0 | 192 | /// Panics if given a radix larger than 36. |
1a4d82fc | 193 | /// |
85aaf69f SL |
194 | /// # Examples |
195 | /// | |
92a42be0 SL |
196 | /// Basic usage: |
197 | /// | |
85aaf69f SL |
198 | /// ``` |
199 | /// use std::char; | |
200 | /// | |
201 | /// let c = char::from_digit(4, 10); | |
202 | /// | |
92a42be0 SL |
203 | /// assert_eq!(Some('4'), c); |
204 | /// | |
205 | /// // Decimal 11 is a single digit in base 16 | |
206 | /// let c = char::from_digit(11, 16); | |
207 | /// | |
208 | /// assert_eq!(Some('b'), c); | |
209 | /// ``` | |
210 | /// | |
211 | /// Returning `None` when the input is not a digit: | |
212 | /// | |
213 | /// ``` | |
214 | /// use std::char; | |
215 | /// | |
216 | /// let c = char::from_digit(20, 10); | |
217 | /// | |
218 | /// assert_eq!(None, c); | |
219 | /// ``` | |
220 | /// | |
221 | /// Passing a large radix, causing a panic: | |
222 | /// | |
223 | /// ``` | |
224 | /// use std::thread; | |
225 | /// use std::char; | |
226 | /// | |
227 | /// let result = thread::spawn(|| { | |
228 | /// // this panics | |
229 | /// let c = char::from_digit(1, 37); | |
230 | /// }).join(); | |
231 | /// | |
232 | /// assert!(result.is_err()); | |
85aaf69f | 233 | /// ``` |
1a4d82fc | 234 | #[inline] |
c34b1796 | 235 | #[stable(feature = "rust1", since = "1.0.0")] |
85aaf69f | 236 | pub fn from_digit(num: u32, radix: u32) -> Option<char> { |
1a4d82fc JJ |
237 | if radix > 36 { |
238 | panic!("from_digit: radix is too high (maximum 36)"); | |
239 | } | |
240 | if num < radix { | |
c1a9b12d SL |
241 | let num = num as u8; |
242 | if num < 10 { | |
243 | Some((b'0' + num) as char) | |
244 | } else { | |
245 | Some((b'a' + num - 10) as char) | |
1a4d82fc JJ |
246 | } |
247 | } else { | |
248 | None | |
249 | } | |
250 | } | |
251 | ||
c34b1796 AL |
252 | // NB: the stabilization and documentation for this trait is in |
253 | // unicode/char.rs, not here | |
254 | #[allow(missing_docs)] // docs in libunicode/u_char.rs | |
9346a6ac | 255 | #[doc(hidden)] |
62682a34 | 256 | #[unstable(feature = "core_char_ext", |
e9174d1e | 257 | reason = "the stable interface is `impl char` in later crate", |
54a0048b | 258 | issue = "32110")] |
1a4d82fc | 259 | pub trait CharExt { |
92a42be0 | 260 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 261 | fn is_digit(self, radix: u32) -> bool; |
92a42be0 | 262 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 263 | fn to_digit(self, radix: u32) -> Option<u32>; |
92a42be0 | 264 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc | 265 | fn escape_unicode(self) -> EscapeUnicode; |
92a42be0 | 266 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc | 267 | fn escape_default(self) -> EscapeDefault; |
92a42be0 | 268 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 269 | fn len_utf8(self) -> usize; |
92a42be0 | 270 | #[stable(feature = "core", since = "1.6.0")] |
85aaf69f | 271 | fn len_utf16(self) -> usize; |
54a0048b SL |
272 | #[unstable(feature = "unicode", issue = "27784")] |
273 | fn encode_utf8(self) -> EncodeUtf8; | |
274 | #[unstable(feature = "unicode", issue = "27784")] | |
275 | fn encode_utf16(self) -> EncodeUtf16; | |
1a4d82fc JJ |
276 | } |
277 | ||
92a42be0 | 278 | #[stable(feature = "core", since = "1.6.0")] |
1a4d82fc | 279 | impl CharExt for char { |
62682a34 | 280 | #[inline] |
85aaf69f | 281 | fn is_digit(self, radix: u32) -> bool { |
1a4d82fc JJ |
282 | self.to_digit(radix).is_some() |
283 | } | |
284 | ||
62682a34 | 285 | #[inline] |
85aaf69f | 286 | fn to_digit(self, radix: u32) -> Option<u32> { |
1a4d82fc JJ |
287 | if radix > 36 { |
288 | panic!("to_digit: radix is too high (maximum 36)"); | |
289 | } | |
290 | let val = match self { | |
85aaf69f SL |
291 | '0' ... '9' => self as u32 - '0' as u32, |
292 | 'a' ... 'z' => self as u32 - 'a' as u32 + 10, | |
293 | 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, | |
1a4d82fc JJ |
294 | _ => return None, |
295 | }; | |
296 | if val < radix { Some(val) } | |
297 | else { None } | |
298 | } | |
299 | ||
62682a34 | 300 | #[inline] |
1a4d82fc JJ |
301 | fn escape_unicode(self) -> EscapeUnicode { |
302 | EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash } | |
303 | } | |
304 | ||
62682a34 | 305 | #[inline] |
1a4d82fc JJ |
306 | fn escape_default(self) -> EscapeDefault { |
307 | let init_state = match self { | |
308 | '\t' => EscapeDefaultState::Backslash('t'), | |
309 | '\r' => EscapeDefaultState::Backslash('r'), | |
310 | '\n' => EscapeDefaultState::Backslash('n'), | |
b039eaaf | 311 | '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), |
1a4d82fc JJ |
312 | '\x20' ... '\x7e' => EscapeDefaultState::Char(self), |
313 | _ => EscapeDefaultState::Unicode(self.escape_unicode()) | |
314 | }; | |
315 | EscapeDefault { state: init_state } | |
316 | } | |
317 | ||
318 | #[inline] | |
85aaf69f | 319 | fn len_utf8(self) -> usize { |
1a4d82fc | 320 | let code = self as u32; |
c34b1796 AL |
321 | if code < MAX_ONE_B { |
322 | 1 | |
323 | } else if code < MAX_TWO_B { | |
324 | 2 | |
325 | } else if code < MAX_THREE_B { | |
326 | 3 | |
327 | } else { | |
328 | 4 | |
1a4d82fc JJ |
329 | } |
330 | } | |
331 | ||
332 | #[inline] | |
85aaf69f | 333 | fn len_utf16(self) -> usize { |
1a4d82fc | 334 | let ch = self as u32; |
c34b1796 | 335 | if (ch & 0xFFFF) == ch { 1 } else { 2 } |
1a4d82fc JJ |
336 | } |
337 | ||
338 | #[inline] | |
54a0048b SL |
339 | fn encode_utf8(self) -> EncodeUtf8 { |
340 | let code = self as u32; | |
341 | let mut buf = [0; 4]; | |
342 | let pos = if code < MAX_ONE_B { | |
343 | buf[3] = code as u8; | |
344 | 3 | |
345 | } else if code < MAX_TWO_B { | |
346 | buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; | |
347 | buf[3] = (code & 0x3F) as u8 | TAG_CONT; | |
348 | 2 | |
349 | } else if code < MAX_THREE_B { | |
350 | buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; | |
351 | buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; | |
352 | buf[3] = (code & 0x3F) as u8 | TAG_CONT; | |
353 | 1 | |
354 | } else { | |
355 | buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; | |
356 | buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; | |
357 | buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; | |
358 | buf[3] = (code & 0x3F) as u8 | TAG_CONT; | |
359 | 0 | |
360 | }; | |
361 | EncodeUtf8 { buf: buf, pos: pos } | |
1a4d82fc JJ |
362 | } |
363 | ||
364 | #[inline] | |
54a0048b SL |
365 | fn encode_utf16(self) -> EncodeUtf16 { |
366 | let mut buf = [0; 2]; | |
367 | let mut code = self as u32; | |
368 | let pos = if (code & 0xFFFF) == code { | |
369 | // The BMP falls through (assuming non-surrogate, as it should) | |
370 | buf[1] = code as u16; | |
371 | 1 | |
372 | } else { | |
373 | // Supplementary planes break into surrogates. | |
374 | code -= 0x1_0000; | |
375 | buf[0] = 0xD800 | ((code >> 10) as u16); | |
376 | buf[1] = 0xDC00 | ((code as u16) & 0x3FF); | |
377 | 0 | |
378 | }; | |
379 | EncodeUtf16 { buf: buf, pos: pos } | |
1a4d82fc JJ |
380 | } |
381 | } | |
382 | ||
92a42be0 SL |
383 | /// Returns an iterator that yields the hexadecimal Unicode escape of a |
384 | /// character, as `char`s. | |
385 | /// | |
386 | /// This `struct` is created by the [`escape_unicode()`] method on [`char`]. See | |
387 | /// its documentation for more. | |
388 | /// | |
54a0048b SL |
389 | /// [`escape_unicode()`]: ../../std/primitive.char.html#method.escape_unicode |
390 | /// [`char`]: ../../std/primitive.char.html | |
391 | #[derive(Clone, Debug)] | |
85aaf69f | 392 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
393 | pub struct EscapeUnicode { |
394 | c: char, | |
395 | state: EscapeUnicodeState | |
396 | } | |
397 | ||
54a0048b | 398 | #[derive(Clone, Debug)] |
1a4d82fc JJ |
399 | enum EscapeUnicodeState { |
400 | Backslash, | |
401 | Type, | |
402 | LeftBrace, | |
85aaf69f | 403 | Value(usize), |
1a4d82fc JJ |
404 | RightBrace, |
405 | Done, | |
406 | } | |
407 | ||
85aaf69f | 408 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
409 | impl Iterator for EscapeUnicode { |
410 | type Item = char; | |
411 | ||
412 | fn next(&mut self) -> Option<char> { | |
413 | match self.state { | |
414 | EscapeUnicodeState::Backslash => { | |
415 | self.state = EscapeUnicodeState::Type; | |
416 | Some('\\') | |
417 | } | |
418 | EscapeUnicodeState::Type => { | |
419 | self.state = EscapeUnicodeState::LeftBrace; | |
420 | Some('u') | |
421 | } | |
422 | EscapeUnicodeState::LeftBrace => { | |
85aaf69f | 423 | let mut n = 0; |
1a4d82fc JJ |
424 | while (self.c as u32) >> (4 * (n + 1)) != 0 { |
425 | n += 1; | |
426 | } | |
427 | self.state = EscapeUnicodeState::Value(n); | |
428 | Some('{') | |
429 | } | |
430 | EscapeUnicodeState::Value(offset) => { | |
c1a9b12d | 431 | let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap(); |
1a4d82fc JJ |
432 | if offset == 0 { |
433 | self.state = EscapeUnicodeState::RightBrace; | |
434 | } else { | |
435 | self.state = EscapeUnicodeState::Value(offset - 1); | |
436 | } | |
c1a9b12d | 437 | Some(c) |
1a4d82fc JJ |
438 | } |
439 | EscapeUnicodeState::RightBrace => { | |
440 | self.state = EscapeUnicodeState::Done; | |
441 | Some('}') | |
442 | } | |
443 | EscapeUnicodeState::Done => None, | |
444 | } | |
445 | } | |
b039eaaf SL |
446 | |
447 | fn size_hint(&self) -> (usize, Option<usize>) { | |
448 | let mut n = 0; | |
449 | while (self.c as usize) >> (4 * (n + 1)) != 0 { | |
450 | n += 1; | |
451 | } | |
452 | let n = match self.state { | |
453 | EscapeUnicodeState::Backslash => n + 5, | |
454 | EscapeUnicodeState::Type => n + 4, | |
455 | EscapeUnicodeState::LeftBrace => n + 3, | |
456 | EscapeUnicodeState::Value(offset) => offset + 2, | |
457 | EscapeUnicodeState::RightBrace => 1, | |
458 | EscapeUnicodeState::Done => 0, | |
459 | }; | |
460 | (n, Some(n)) | |
461 | } | |
1a4d82fc JJ |
462 | } |
463 | ||
92a42be0 SL |
464 | /// An iterator that yields the literal escape code of a `char`. |
465 | /// | |
466 | /// This `struct` is created by the [`escape_default()`] method on [`char`]. See | |
467 | /// its documentation for more. | |
468 | /// | |
54a0048b SL |
469 | /// [`escape_default()`]: ../../std/primitive.char.html#method.escape_default |
470 | /// [`char`]: ../../std/primitive.char.html | |
471 | #[derive(Clone, Debug)] | |
85aaf69f | 472 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
473 | pub struct EscapeDefault { |
474 | state: EscapeDefaultState | |
475 | } | |
476 | ||
54a0048b | 477 | #[derive(Clone, Debug)] |
1a4d82fc JJ |
478 | enum EscapeDefaultState { |
479 | Backslash(char), | |
480 | Char(char), | |
481 | Done, | |
482 | Unicode(EscapeUnicode), | |
483 | } | |
484 | ||
85aaf69f | 485 | #[stable(feature = "rust1", since = "1.0.0")] |
1a4d82fc JJ |
486 | impl Iterator for EscapeDefault { |
487 | type Item = char; | |
488 | ||
489 | fn next(&mut self) -> Option<char> { | |
490 | match self.state { | |
491 | EscapeDefaultState::Backslash(c) => { | |
492 | self.state = EscapeDefaultState::Char(c); | |
493 | Some('\\') | |
494 | } | |
495 | EscapeDefaultState::Char(c) => { | |
496 | self.state = EscapeDefaultState::Done; | |
497 | Some(c) | |
498 | } | |
499 | EscapeDefaultState::Done => None, | |
b039eaaf SL |
500 | EscapeDefaultState::Unicode(ref mut iter) => iter.next(), |
501 | } | |
502 | } | |
503 | ||
504 | fn size_hint(&self) -> (usize, Option<usize>) { | |
505 | match self.state { | |
506 | EscapeDefaultState::Char(_) => (1, Some(1)), | |
507 | EscapeDefaultState::Backslash(_) => (2, Some(2)), | |
508 | EscapeDefaultState::Unicode(ref iter) => iter.size_hint(), | |
509 | EscapeDefaultState::Done => (0, Some(0)), | |
1a4d82fc JJ |
510 | } |
511 | } | |
9cc50fc6 SL |
512 | |
513 | fn count(self) -> usize { | |
514 | match self.state { | |
515 | EscapeDefaultState::Char(_) => 1, | |
516 | EscapeDefaultState::Unicode(iter) => iter.count(), | |
517 | EscapeDefaultState::Done => 0, | |
518 | EscapeDefaultState::Backslash(_) => 2, | |
519 | } | |
520 | } | |
521 | ||
522 | fn nth(&mut self, n: usize) -> Option<char> { | |
523 | match self.state { | |
524 | EscapeDefaultState::Backslash(c) if n == 0 => { | |
525 | self.state = EscapeDefaultState::Char(c); | |
526 | Some('\\') | |
527 | }, | |
528 | EscapeDefaultState::Backslash(c) if n == 1 => { | |
529 | self.state = EscapeDefaultState::Done; | |
530 | Some(c) | |
531 | }, | |
532 | EscapeDefaultState::Backslash(_) => { | |
533 | self.state = EscapeDefaultState::Done; | |
534 | None | |
535 | }, | |
536 | EscapeDefaultState::Char(c) => { | |
537 | self.state = EscapeDefaultState::Done; | |
538 | ||
539 | if n == 0 { | |
540 | Some(c) | |
541 | } else { | |
542 | None | |
543 | } | |
544 | }, | |
545 | EscapeDefaultState::Done => return None, | |
546 | EscapeDefaultState::Unicode(ref mut i) => return i.nth(n), | |
547 | } | |
548 | } | |
549 | ||
550 | fn last(self) -> Option<char> { | |
551 | match self.state { | |
552 | EscapeDefaultState::Unicode(iter) => iter.last(), | |
553 | EscapeDefaultState::Done => None, | |
554 | EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c), | |
555 | } | |
556 | } | |
1a4d82fc | 557 | } |
54a0048b SL |
558 | |
559 | /// An iterator over `u8` entries represending the UTF-8 encoding of a `char` | |
560 | /// value. | |
561 | /// | |
562 | /// Constructed via the `.encode_utf8()` method on `char`. | |
563 | #[unstable(feature = "unicode", issue = "27784")] | |
564 | #[derive(Debug)] | |
565 | pub struct EncodeUtf8 { | |
566 | buf: [u8; 4], | |
567 | pos: usize, | |
568 | } | |
569 | ||
570 | impl EncodeUtf8 { | |
571 | /// Returns the remaining bytes of this iterator as a slice. | |
572 | #[unstable(feature = "unicode", issue = "27784")] | |
573 | pub fn as_slice(&self) -> &[u8] { | |
574 | &self.buf[self.pos..] | |
575 | } | |
576 | } | |
577 | ||
578 | #[unstable(feature = "unicode", issue = "27784")] | |
579 | impl Iterator for EncodeUtf8 { | |
580 | type Item = u8; | |
581 | ||
582 | fn next(&mut self) -> Option<u8> { | |
583 | if self.pos == self.buf.len() { | |
584 | None | |
585 | } else { | |
586 | let ret = Some(self.buf[self.pos]); | |
587 | self.pos += 1; | |
588 | ret | |
589 | } | |
590 | } | |
591 | ||
592 | fn size_hint(&self) -> (usize, Option<usize>) { | |
593 | self.as_slice().iter().size_hint() | |
594 | } | |
595 | } | |
596 | ||
597 | /// An iterator over `u16` entries represending the UTF-16 encoding of a `char` | |
598 | /// value. | |
599 | /// | |
600 | /// Constructed via the `.encode_utf16()` method on `char`. | |
601 | #[unstable(feature = "unicode", issue = "27784")] | |
602 | #[derive(Debug)] | |
603 | pub struct EncodeUtf16 { | |
604 | buf: [u16; 2], | |
605 | pos: usize, | |
606 | } | |
607 | ||
608 | impl EncodeUtf16 { | |
609 | /// Returns the remaining bytes of this iterator as a slice. | |
610 | #[unstable(feature = "unicode", issue = "27784")] | |
611 | pub fn as_slice(&self) -> &[u16] { | |
612 | &self.buf[self.pos..] | |
613 | } | |
614 | } | |
615 | ||
616 | ||
617 | #[unstable(feature = "unicode", issue = "27784")] | |
618 | impl Iterator for EncodeUtf16 { | |
619 | type Item = u16; | |
620 | ||
621 | fn next(&mut self) -> Option<u16> { | |
622 | if self.pos == self.buf.len() { | |
623 | None | |
624 | } else { | |
625 | let ret = Some(self.buf[self.pos]); | |
626 | self.pos += 1; | |
627 | ret | |
628 | } | |
629 | } | |
630 | ||
631 | fn size_hint(&self) -> (usize, Option<usize>) { | |
632 | self.as_slice().iter().size_hint() | |
633 | } | |
634 | } |