]> git.proxmox.com Git - rustc.git/blob - vendor/unicase/src/unicode/mod.rs
New upstream version 1.43.0+dfsg1
[rustc.git] / vendor / unicase / src / unicode / mod.rs
1 #[cfg(__unicase__iter_cmp)]
2 use core::cmp::Ordering;
3 use core::hash::{Hash, Hasher};
4
5 use self::map::lookup;
6 mod map;
7
8 #[derive(Clone, Copy, Debug, Default)]
9 pub struct Unicode<S>(pub S);
10
11 impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<Unicode<S2>> for Unicode<S1> {
12 #[inline]
13 fn eq(&self, other: &Unicode<S2>) -> bool {
14 let mut left = self.0.as_ref().chars().flat_map(lookup);
15 let mut right = other.0.as_ref().chars().flat_map(lookup);
16
17 // inline Iterator::eq since not added until Rust 1.5
18 loop {
19 let x = match left.next() {
20 None => return right.next().is_none(),
21 Some(val) => val,
22 };
23
24 let y = match right.next() {
25 None => return false,
26 Some(val) => val,
27 };
28
29 if x != y {
30 return false;
31 }
32 }
33 }
34 }
35
36 impl<S: AsRef<str>> Eq for Unicode<S> {}
37
38 #[cfg(__unicase__iter_cmp)]
39 impl<T: AsRef<str>> PartialOrd for Unicode<T> {
40 #[inline]
41 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
42 Some(self.cmp(other))
43 }
44 }
45
46 #[cfg(__unicase__iter_cmp)]
47 impl<T: AsRef<str>> Ord for Unicode<T> {
48 #[inline]
49 fn cmp(&self, other: &Self) -> Ordering {
50 let self_chars = self.0.as_ref().chars().flat_map(lookup);
51 let other_chars = other.0.as_ref().chars().flat_map(lookup);
52 self_chars.cmp(other_chars)
53 }
54 }
55
56 impl<S: AsRef<str>> Hash for Unicode<S> {
57 #[inline]
58 fn hash<H: Hasher>(&self, hasher: &mut H) {
59 let mut buf = [0; 4];
60 for c in self.0.as_ref().chars().flat_map(|c| lookup(c)) {
61 let len = char_to_utf8(c, &mut buf);
62 hasher.write(&buf[..len])
63 }
64 }
65 }
66
67 #[inline]
68 fn char_to_utf8(c: char, dst: &mut [u8; 4]) -> usize {
69 const TAG_CONT: u8 = 0b1000_0000;
70 const TAG_TWO_B: u8 = 0b1100_0000;
71 const TAG_THREE_B: u8 = 0b1110_0000;
72 const TAG_FOUR_B: u8 = 0b1111_0000;
73
74 let code = c as u32;
75 if code <= 0x7F {
76 dst[0] = code as u8;
77 1
78 } else if code <= 0x7FF {
79 dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
80 dst[1] = (code & 0x3F) as u8 | TAG_CONT;
81 2
82 } else if code <= 0xFFFF {
83 dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
84 dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
85 dst[2] = (code & 0x3F) as u8 | TAG_CONT;
86 3
87 } else {
88 dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
89 dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
90 dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
91 dst[3] = (code & 0x3F) as u8 | TAG_CONT;
92 4
93 }
94 }
95
96 // internal mod so that the enum can be 'pub'
97 // thanks privacy-checker :___(
98 mod fold {
99 #[derive(Clone, Copy)]
100 pub enum Fold {
101 Zero,
102 One(char),
103 Two(char, char),
104 Three(char, char, char),
105 }
106
107 impl Iterator for Fold {
108 type Item = char;
109 #[inline]
110 fn next(&mut self) -> Option<char> {
111 match *self {
112 Fold::Zero => None,
113 Fold::One(one) => {
114 *self = Fold::Zero;
115 Some(one)
116 },
117 Fold::Two(one, two) => {
118 *self = Fold::One(two);
119 Some(one)
120 },
121 Fold::Three(one, two, three) => {
122 *self = Fold::Two(one, two);
123 Some(three)
124 }
125 }
126 }
127
128 #[inline]
129 fn size_hint(&self) -> (usize, Option<usize>) {
130 match *self {
131 Fold::Zero => (0, Some(0)),
132 Fold::One(..) => (1, Some(1)),
133 Fold::Two(..) => (2, Some(2)),
134 Fold::Three(..) => (3, Some(3))
135 }
136 }
137
138 }
139 impl From<(char,)> for Fold {
140 #[inline]
141 fn from((one,): (char,)) -> Fold {
142 Fold::One(one)
143 }
144 }
145
146 impl From<(char, char)> for Fold {
147 #[inline]
148 fn from((one, two): (char, char)) -> Fold {
149 Fold::Two(one, two)
150 }
151 }
152
153 impl From<(char, char, char)> for Fold {
154 #[inline]
155 fn from((one, two, three): (char, char, char)) -> Fold {
156 Fold::Three(one, two, three)
157 }
158 }
159 }
160
161 #[cfg(test)]
162 mod tests {
163 use super::Unicode;
164
165 macro_rules! eq {
166 ($left:expr, $right:expr) => ({
167 assert_eq!(Unicode($left), Unicode($right));
168 });
169 }
170
171 #[test]
172 fn test_ascii_folding() {
173 eq!("foo bar", "FoO BAR");
174 }
175
176 #[test]
177 fn test_simple_case_folding() {
178 eq!("στιγμας", "στιγμασ");
179 }
180
181 #[test]
182 fn test_full_case_folding() {
183 eq!("flour", "flour");
184 eq!("Maße", "MASSE");
185 eq!("ᾲ στο διάολο", "ὰι στο διάολο");
186 }
187
188 #[cfg(feature = "nightly")]
189 #[bench]
190 fn bench_ascii_folding(b: &mut ::test::Bencher) {
191 b.bytes = b"foo bar".len() as u64;
192 b.iter(|| eq!("foo bar", "FoO BAR"));
193 }
194
195 #[cfg(feature = "nightly")]
196 #[bench]
197 fn bench_simple_case_folding(b: &mut ::test::Bencher) {
198 b.bytes = "στιγμας".len() as u64;
199 b.iter(|| eq!("στιγμας", "στιγμασ"));
200 }
201 }