]> git.proxmox.com Git - rustc.git/blame - src/libcore/char/mod.rs
New upstream version 1.28.0~beta.14+dfsg1
[rustc.git] / src / libcore / char / mod.rs
CommitLineData
83c7162d
XL
1// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! A character type.
12//!
13//! The `char` type represents a single character. More specifically, since
14//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
15//! scalar value]', which is similar to, but not the same as, a '[Unicode code
16//! point]'.
17//!
18//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
19//! [Unicode code point]: http://www.unicode.org/glossary/#code_point
20//!
21//! This module exists for technical reasons, the primary documentation for
22//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
23//! itself.
24//!
25//! This module is the home of the iterator implementations for the iterators
26//! implemented on `char`, as well as some useful constants and conversion
27//! functions that convert various types to `char`.
28
29#![allow(non_snake_case)]
30#![stable(feature = "core_char", since = "1.2.0")]
31
32mod convert;
33mod decode;
34mod methods;
35
36// stable re-exports
37#[stable(feature = "rust1", since = "1.0.0")]
38pub use self::convert::{from_u32, from_digit};
39#[stable(feature = "char_from_unchecked", since = "1.5.0")]
40pub use self::convert::from_u32_unchecked;
41#[stable(feature = "char_from_str", since = "1.20.0")]
42pub use self::convert::ParseCharError;
43#[unstable(feature = "try_from", issue = "33417")]
44pub use self::convert::CharTryFromError;
45#[stable(feature = "decode_utf16", since = "1.9.0")]
46pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
47
48// unstable re-exports
49#[unstable(feature = "unicode_version", issue = "49726")]
50pub use unicode::tables::UNICODE_VERSION;
51#[unstable(feature = "unicode_version", issue = "49726")]
52pub use unicode::version::UnicodeVersion;
53#[unstable(feature = "decode_utf8", issue = "33906")]
54#[rustc_deprecated(since = "1.27.0", reason = "Use str::from_utf8 instead:
55 https://doc.rust-lang.org/nightly/std/str/struct.Utf8Error.html#examples")]
56#[allow(deprecated)]
57pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence};
58
59use fmt::{self, Write};
60use iter::FusedIterator;
61
62// UTF-8 ranges and tags for encoding characters
94b46f34
XL
63const TAG_CONT: u8 = 0b1000_0000;
64const TAG_TWO_B: u8 = 0b1100_0000;
65const TAG_THREE_B: u8 = 0b1110_0000;
66const TAG_FOUR_B: u8 = 0b1111_0000;
83c7162d
XL
67const MAX_ONE_B: u32 = 0x80;
68const MAX_TWO_B: u32 = 0x800;
69const MAX_THREE_B: u32 = 0x10000;
70
71/*
72 Lu Uppercase_Letter an uppercase letter
73 Ll Lowercase_Letter a lowercase letter
74 Lt Titlecase_Letter a digraphic character, with first part uppercase
75 Lm Modifier_Letter a modifier letter
76 Lo Other_Letter other letters, including syllables and ideographs
77 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
78 Mc Spacing_Mark a spacing combining mark (positive advance width)
79 Me Enclosing_Mark an enclosing combining mark
80 Nd Decimal_Number a decimal digit
81 Nl Letter_Number a letterlike numeric character
82 No Other_Number a numeric character of other type
83 Pc Connector_Punctuation a connecting punctuation mark, like a tie
84 Pd Dash_Punctuation a dash or hyphen punctuation mark
85 Ps Open_Punctuation an opening punctuation mark (of a pair)
86 Pe Close_Punctuation a closing punctuation mark (of a pair)
87 Pi Initial_Punctuation an initial quotation mark
88 Pf Final_Punctuation a final quotation mark
89 Po Other_Punctuation a punctuation mark of other type
90 Sm Math_Symbol a symbol of primarily mathematical use
91 Sc Currency_Symbol a currency sign
92 Sk Modifier_Symbol a non-letterlike modifier symbol
93 So Other_Symbol a symbol of other type
94 Zs Space_Separator a space character (of various non-zero widths)
95 Zl Line_Separator U+2028 LINE SEPARATOR only
96 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
97 Cc Control a C0 or C1 control code
98 Cf Format a format control character
99 Cs Surrogate a surrogate code point
100 Co Private_Use a private-use character
101 Cn Unassigned a reserved unassigned code point or a noncharacter
102*/
103
104/// The highest valid code point a `char` can have.
105///
106/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
107/// Point], but only ones within a certain range. `MAX` is the highest valid
108/// code point that's a valid [Unicode Scalar Value].
109///
110/// [`char`]: ../../std/primitive.char.html
111/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
112/// [Code Point]: http://www.unicode.org/glossary/#code_point
113#[stable(feature = "rust1", since = "1.0.0")]
114pub const MAX: char = '\u{10ffff}';
115
116/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
117/// decoding error.
118///
119/// It can occur, for example, when giving ill-formed UTF-8 bytes to
120/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
121#[stable(feature = "decode_utf16", since = "1.9.0")]
122pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
123
124/// Returns an iterator that yields the hexadecimal Unicode escape of a
125/// character, as `char`s.
126///
127/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
128/// its documentation for more.
129///
130/// [`escape_unicode`]: ../../std/primitive.char.html#method.escape_unicode
131/// [`char`]: ../../std/primitive.char.html
132#[derive(Clone, Debug)]
133#[stable(feature = "rust1", since = "1.0.0")]
134pub struct EscapeUnicode {
135 c: char,
136 state: EscapeUnicodeState,
137
138 // The index of the next hex digit to be printed (0 if none),
139 // i.e. the number of remaining hex digits to be printed;
140 // increasing from the least significant digit: 0x543210
141 hex_digit_idx: usize,
142}
143
144// The enum values are ordered so that their representation is the
145// same as the remaining length (besides the hexadecimal digits). This
146// likely makes `len()` a single load from memory) and inline-worth.
147#[derive(Clone, Debug)]
148enum EscapeUnicodeState {
149 Done,
150 RightBrace,
151 Value,
152 LeftBrace,
153 Type,
154 Backslash,
155}
156
157#[stable(feature = "rust1", since = "1.0.0")]
158impl Iterator for EscapeUnicode {
159 type Item = char;
160
161 fn next(&mut self) -> Option<char> {
162 match self.state {
163 EscapeUnicodeState::Backslash => {
164 self.state = EscapeUnicodeState::Type;
165 Some('\\')
166 }
167 EscapeUnicodeState::Type => {
168 self.state = EscapeUnicodeState::LeftBrace;
169 Some('u')
170 }
171 EscapeUnicodeState::LeftBrace => {
172 self.state = EscapeUnicodeState::Value;
173 Some('{')
174 }
175 EscapeUnicodeState::Value => {
176 let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
177 let c = from_digit(hex_digit, 16).unwrap();
178 if self.hex_digit_idx == 0 {
179 self.state = EscapeUnicodeState::RightBrace;
180 } else {
181 self.hex_digit_idx -= 1;
182 }
183 Some(c)
184 }
185 EscapeUnicodeState::RightBrace => {
186 self.state = EscapeUnicodeState::Done;
187 Some('}')
188 }
189 EscapeUnicodeState::Done => None,
190 }
191 }
192
193 #[inline]
194 fn size_hint(&self) -> (usize, Option<usize>) {
195 let n = self.len();
196 (n, Some(n))
197 }
198
199 #[inline]
200 fn count(self) -> usize {
201 self.len()
202 }
203
204 fn last(self) -> Option<char> {
205 match self.state {
206 EscapeUnicodeState::Done => None,
207
208 EscapeUnicodeState::RightBrace |
209 EscapeUnicodeState::Value |
210 EscapeUnicodeState::LeftBrace |
211 EscapeUnicodeState::Type |
212 EscapeUnicodeState::Backslash => Some('}'),
213 }
214 }
215}
216
217#[stable(feature = "exact_size_escape", since = "1.11.0")]
218impl ExactSizeIterator for EscapeUnicode {
219 #[inline]
220 fn len(&self) -> usize {
221 // The match is a single memory access with no branching
222 self.hex_digit_idx + match self.state {
223 EscapeUnicodeState::Done => 0,
224 EscapeUnicodeState::RightBrace => 1,
225 EscapeUnicodeState::Value => 2,
226 EscapeUnicodeState::LeftBrace => 3,
227 EscapeUnicodeState::Type => 4,
228 EscapeUnicodeState::Backslash => 5,
229 }
230 }
231}
232
233#[stable(feature = "fused", since = "1.26.0")]
234impl FusedIterator for EscapeUnicode {}
235
236#[stable(feature = "char_struct_display", since = "1.16.0")]
237impl fmt::Display for EscapeUnicode {
238 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
239 for c in self.clone() {
240 f.write_char(c)?;
241 }
242 Ok(())
243 }
244}
245
246/// An iterator that yields the literal escape code of a `char`.
247///
248/// This `struct` is created by the [`escape_default`] method on [`char`]. See
249/// its documentation for more.
250///
251/// [`escape_default`]: ../../std/primitive.char.html#method.escape_default
252/// [`char`]: ../../std/primitive.char.html
253#[derive(Clone, Debug)]
254#[stable(feature = "rust1", since = "1.0.0")]
255pub struct EscapeDefault {
256 state: EscapeDefaultState
257}
258
259#[derive(Clone, Debug)]
260enum EscapeDefaultState {
261 Done,
262 Char(char),
263 Backslash(char),
264 Unicode(EscapeUnicode),
265}
266
267#[stable(feature = "rust1", since = "1.0.0")]
268impl Iterator for EscapeDefault {
269 type Item = char;
270
271 fn next(&mut self) -> Option<char> {
272 match self.state {
273 EscapeDefaultState::Backslash(c) => {
274 self.state = EscapeDefaultState::Char(c);
275 Some('\\')
276 }
277 EscapeDefaultState::Char(c) => {
278 self.state = EscapeDefaultState::Done;
279 Some(c)
280 }
281 EscapeDefaultState::Done => None,
282 EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
283 }
284 }
285
286 #[inline]
287 fn size_hint(&self) -> (usize, Option<usize>) {
288 let n = self.len();
289 (n, Some(n))
290 }
291
292 #[inline]
293 fn count(self) -> usize {
294 self.len()
295 }
296
297 fn nth(&mut self, n: usize) -> Option<char> {
298 match self.state {
299 EscapeDefaultState::Backslash(c) if n == 0 => {
300 self.state = EscapeDefaultState::Char(c);
301 Some('\\')
302 },
303 EscapeDefaultState::Backslash(c) if n == 1 => {
304 self.state = EscapeDefaultState::Done;
305 Some(c)
306 },
307 EscapeDefaultState::Backslash(_) => {
308 self.state = EscapeDefaultState::Done;
309 None
310 },
311 EscapeDefaultState::Char(c) => {
312 self.state = EscapeDefaultState::Done;
313
314 if n == 0 {
315 Some(c)
316 } else {
317 None
318 }
319 },
320 EscapeDefaultState::Done => return None,
321 EscapeDefaultState::Unicode(ref mut i) => return i.nth(n),
322 }
323 }
324
325 fn last(self) -> Option<char> {
326 match self.state {
327 EscapeDefaultState::Unicode(iter) => iter.last(),
328 EscapeDefaultState::Done => None,
329 EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
330 }
331 }
332}
333
334#[stable(feature = "exact_size_escape", since = "1.11.0")]
335impl ExactSizeIterator for EscapeDefault {
336 fn len(&self) -> usize {
337 match self.state {
338 EscapeDefaultState::Done => 0,
339 EscapeDefaultState::Char(_) => 1,
340 EscapeDefaultState::Backslash(_) => 2,
341 EscapeDefaultState::Unicode(ref iter) => iter.len(),
342 }
343 }
344}
345
346#[stable(feature = "fused", since = "1.26.0")]
347impl FusedIterator for EscapeDefault {}
348
349#[stable(feature = "char_struct_display", since = "1.16.0")]
350impl fmt::Display for EscapeDefault {
351 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
352 for c in self.clone() {
353 f.write_char(c)?;
354 }
355 Ok(())
356 }
357}
358
359/// An iterator that yields the literal escape code of a `char`.
360///
361/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
362/// documentation for more.
363///
364/// [`escape_debug`]: ../../std/primitive.char.html#method.escape_debug
365/// [`char`]: ../../std/primitive.char.html
366#[stable(feature = "char_escape_debug", since = "1.20.0")]
367#[derive(Clone, Debug)]
368pub struct EscapeDebug(EscapeDefault);
369
370#[stable(feature = "char_escape_debug", since = "1.20.0")]
371impl Iterator for EscapeDebug {
372 type Item = char;
373 fn next(&mut self) -> Option<char> { self.0.next() }
374 fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
375}
376
377#[stable(feature = "char_escape_debug", since = "1.20.0")]
378impl ExactSizeIterator for EscapeDebug { }
379
380#[stable(feature = "fused", since = "1.26.0")]
381impl FusedIterator for EscapeDebug {}
382
383#[stable(feature = "char_escape_debug", since = "1.20.0")]
384impl fmt::Display for EscapeDebug {
385 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
386 fmt::Display::fmt(&self.0, f)
387 }
388}
389
390/// Returns an iterator that yields the lowercase equivalent of a `char`.
391///
392/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
393/// its documentation for more.
394///
395/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
396/// [`char`]: ../../std/primitive.char.html
397#[stable(feature = "rust1", since = "1.0.0")]
398#[derive(Debug, Clone)]
399pub struct ToLowercase(CaseMappingIter);
400
401#[stable(feature = "rust1", since = "1.0.0")]
402impl Iterator for ToLowercase {
403 type Item = char;
404 fn next(&mut self) -> Option<char> {
405 self.0.next()
406 }
407}
408
409#[stable(feature = "fused", since = "1.26.0")]
410impl FusedIterator for ToLowercase {}
411
412/// Returns an iterator that yields the uppercase equivalent of a `char`.
413///
414/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
415/// its documentation for more.
416///
417/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
418/// [`char`]: ../../std/primitive.char.html
419#[stable(feature = "rust1", since = "1.0.0")]
420#[derive(Debug, Clone)]
421pub struct ToUppercase(CaseMappingIter);
422
423#[stable(feature = "rust1", since = "1.0.0")]
424impl Iterator for ToUppercase {
425 type Item = char;
426 fn next(&mut self) -> Option<char> {
427 self.0.next()
428 }
429}
430
431#[stable(feature = "fused", since = "1.26.0")]
432impl FusedIterator for ToUppercase {}
433
434#[derive(Debug, Clone)]
435enum CaseMappingIter {
436 Three(char, char, char),
437 Two(char, char),
438 One(char),
439 Zero,
440}
441
442impl CaseMappingIter {
443 fn new(chars: [char; 3]) -> CaseMappingIter {
444 if chars[2] == '\0' {
445 if chars[1] == '\0' {
446 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
447 } else {
448 CaseMappingIter::Two(chars[0], chars[1])
449 }
450 } else {
451 CaseMappingIter::Three(chars[0], chars[1], chars[2])
452 }
453 }
454}
455
456impl Iterator for CaseMappingIter {
457 type Item = char;
458 fn next(&mut self) -> Option<char> {
459 match *self {
460 CaseMappingIter::Three(a, b, c) => {
461 *self = CaseMappingIter::Two(b, c);
462 Some(a)
463 }
464 CaseMappingIter::Two(b, c) => {
465 *self = CaseMappingIter::One(c);
466 Some(b)
467 }
468 CaseMappingIter::One(c) => {
469 *self = CaseMappingIter::Zero;
470 Some(c)
471 }
472 CaseMappingIter::Zero => None,
473 }
474 }
475}
476
477impl fmt::Display for CaseMappingIter {
478 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
479 match *self {
480 CaseMappingIter::Three(a, b, c) => {
481 f.write_char(a)?;
482 f.write_char(b)?;
483 f.write_char(c)
484 }
485 CaseMappingIter::Two(b, c) => {
486 f.write_char(b)?;
487 f.write_char(c)
488 }
489 CaseMappingIter::One(c) => {
490 f.write_char(c)
491 }
492 CaseMappingIter::Zero => Ok(()),
493 }
494 }
495}
496
497#[stable(feature = "char_struct_display", since = "1.16.0")]
498impl fmt::Display for ToLowercase {
499 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
500 fmt::Display::fmt(&self.0, f)
501 }
502}
503
504#[stable(feature = "char_struct_display", since = "1.16.0")]
505impl fmt::Display for ToUppercase {
506 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
507 fmt::Display::fmt(&self.0, f)
508 }
509}