]>
Commit | Line | Data |
---|---|---|
e74abb32 XL |
1 | // Copyright 2013-2014 The rust-url developers. |
2 | // | |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
6 | // option. This file may not be copied, modified, or distributed | |
7 | // except according to those terms. | |
8 | ||
6a06907d | 9 | use crate::test::TestFn; |
e74abb32 | 10 | use std::char; |
6a06907d XL |
11 | |
12 | use idna::Errors; | |
e74abb32 XL |
13 | |
14 | pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) { | |
6a06907d XL |
15 | // https://www.unicode.org/Public/idna/13.0.0/IdnaTestV2.txt |
16 | for (i, line) in include_str!("IdnaTestV2.txt").lines().enumerate() { | |
17 | if line.is_empty() || line.starts_with('#') { | |
e74abb32 XL |
18 | continue; |
19 | } | |
6a06907d | 20 | |
e74abb32 | 21 | // Remove comments |
6a06907d | 22 | let line = match line.find('#') { |
e74abb32 XL |
23 | Some(index) => &line[0..index], |
24 | None => line, | |
25 | }; | |
26 | ||
e74abb32 | 27 | let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>(); |
6a06907d | 28 | let source = unescape(&pieces.remove(0)); |
e74abb32 | 29 | |
6a06907d XL |
30 | // ToUnicode |
31 | let mut to_unicode = unescape(&pieces.remove(0)); | |
32 | if to_unicode.is_empty() { | |
33 | to_unicode = source.clone(); | |
34 | } | |
35 | let to_unicode_status = status(pieces.remove(0)); | |
36 | ||
37 | // ToAsciiN | |
38 | let to_ascii_n = pieces.remove(0); | |
39 | let to_ascii_n = if to_ascii_n.is_empty() { | |
40 | to_unicode.clone() | |
41 | } else { | |
42 | to_ascii_n.to_owned() | |
43 | }; | |
44 | let to_ascii_n_status = pieces.remove(0); | |
45 | let to_ascii_n_status = if to_ascii_n_status.is_empty() { | |
46 | to_unicode_status.clone() | |
e74abb32 | 47 | } else { |
6a06907d | 48 | status(to_ascii_n_status) |
e74abb32 XL |
49 | }; |
50 | ||
6a06907d XL |
51 | // ToAsciiT |
52 | let to_ascii_t = pieces.remove(0); | |
53 | let to_ascii_t = if to_ascii_t.is_empty() { | |
54 | to_ascii_n.clone() | |
55 | } else { | |
56 | to_ascii_t.to_owned() | |
57 | }; | |
58 | let to_ascii_t_status = pieces.remove(0); | |
59 | let to_ascii_t_status = if to_ascii_t_status.is_empty() { | |
60 | to_ascii_n_status.clone() | |
61 | } else { | |
62 | status(to_ascii_t_status) | |
63 | }; | |
e74abb32 XL |
64 | |
65 | let test_name = format!("UTS #46 line {}", i + 1); | |
66 | add_test( | |
67 | test_name, | |
68 | TestFn::dyn_test_fn(move || { | |
6a06907d | 69 | let config = idna::Config::default() |
e74abb32 XL |
70 | .use_std3_ascii_rules(true) |
71 | .verify_dns_length(true) | |
6a06907d | 72 | .check_hyphens(true); |
e74abb32 | 73 | |
6a06907d XL |
74 | // http://unicode.org/reports/tr46/#Deviations |
75 | // applications that perform IDNA2008 lookup are not required to check | |
76 | // for these contexts, so we skip all tests annotated with C* | |
e74abb32 | 77 | |
6a06907d XL |
78 | // Everybody ignores V2 |
79 | // https://github.com/servo/rust-url/pull/240 | |
80 | // https://github.com/whatwg/url/issues/53#issuecomment-181528158 | |
81 | // http://www.unicode.org/review/pri317/ | |
e74abb32 | 82 | |
6a06907d XL |
83 | // "The special error codes X3 and X4_2 are now returned where a toASCII error code |
84 | // was formerly being generated in toUnicode due to an empty label." | |
85 | // This is not implemented yet, so we skip toUnicode X4_2 tests for now, too. | |
86 | ||
87 | let (to_unicode_value, to_unicode_result) = | |
88 | config.transitional_processing(false).to_unicode(&source); | |
89 | let to_unicode_result = to_unicode_result.map(|()| to_unicode_value); | |
90 | check( | |
91 | &source, | |
92 | (&to_unicode, &to_unicode_status), | |
93 | to_unicode_result, | |
94 | |e| e.starts_with('C') || e == "V2" || e == "X4_2", | |
e74abb32 | 95 | ); |
6a06907d XL |
96 | |
97 | let to_ascii_n_result = config.transitional_processing(false).to_ascii(&source); | |
98 | check( | |
99 | &source, | |
100 | (&to_ascii_n, &to_ascii_n_status), | |
101 | to_ascii_n_result, | |
102 | |e| e.starts_with('C') || e == "V2", | |
103 | ); | |
104 | ||
105 | let to_ascii_t_result = config.transitional_processing(true).to_ascii(&source); | |
106 | check( | |
107 | &source, | |
108 | (&to_ascii_t, &to_ascii_t_status), | |
109 | to_ascii_t_result, | |
110 | |e| e.starts_with('C') || e == "V2", | |
e74abb32 XL |
111 | ); |
112 | }), | |
113 | ) | |
114 | } | |
115 | } | |
116 | ||
6a06907d XL |
117 | #[allow(clippy::redundant_clone)] |
118 | fn check<F>(source: &str, expected: (&str, &[&str]), actual: Result<String, Errors>, ignore: F) | |
119 | where | |
120 | F: Fn(&str) -> bool, | |
121 | { | |
122 | if !expected.1.is_empty() { | |
123 | if !expected.1.iter().copied().any(ignore) { | |
124 | let res = actual.ok(); | |
125 | assert_eq!( | |
126 | res.clone(), | |
127 | None, | |
128 | "Expected error {:?}. result: {} | source: {}", | |
129 | expected.1, | |
130 | res.unwrap(), | |
131 | source, | |
132 | ); | |
133 | } | |
134 | } else { | |
135 | assert!( | |
136 | actual.is_ok(), | |
137 | "Couldn't parse {} | error: {:?}", | |
138 | source, | |
139 | actual.err().unwrap(), | |
140 | ); | |
141 | assert_eq!(actual.unwrap(), expected.0, "source: {}", source); | |
142 | } | |
143 | } | |
144 | ||
e74abb32 XL |
145 | fn unescape(input: &str) -> String { |
146 | let mut output = String::new(); | |
147 | let mut chars = input.chars(); | |
148 | loop { | |
149 | match chars.next() { | |
150 | None => return output, | |
151 | Some(c) => { | |
152 | if c == '\\' { | |
153 | match chars.next().unwrap() { | |
154 | '\\' => output.push('\\'), | |
155 | 'u' => { | |
156 | let c1 = chars.next().unwrap().to_digit(16).unwrap(); | |
157 | let c2 = chars.next().unwrap().to_digit(16).unwrap(); | |
158 | let c3 = chars.next().unwrap().to_digit(16).unwrap(); | |
159 | let c4 = chars.next().unwrap().to_digit(16).unwrap(); | |
160 | match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) { | |
161 | Some(c) => output.push(c), | |
162 | None => { | |
163 | output | |
164 | .push_str(&format!("\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4)); | |
165 | } | |
166 | }; | |
167 | } | |
168 | _ => panic!("Invalid test data input"), | |
169 | } | |
170 | } else { | |
171 | output.push(c); | |
172 | } | |
173 | } | |
174 | } | |
175 | } | |
176 | } | |
6a06907d XL |
177 | |
178 | fn status(status: &str) -> Vec<&str> { | |
179 | if status.is_empty() || status == "[]" { | |
180 | return Vec::new(); | |
181 | } | |
182 | ||
183 | let mut result = status.split(", ").collect::<Vec<_>>(); | |
184 | assert!(result[0].starts_with('[')); | |
185 | result[0] = &result[0][1..]; | |
186 | ||
187 | let idx = result.len() - 1; | |
188 | let last = &mut result[idx]; | |
189 | assert!(last.ends_with(']')); | |
190 | *last = &last[..last.len() - 1]; | |
191 | ||
192 | result | |
193 | } |