]> git.proxmox.com Git - cargo.git/blob - vendor/url/src/host.rs
New upstream version 0.47.0
[cargo.git] / vendor / url / src / host.rs
1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 use idna;
10 use parser::{ParseError, ParseResult};
11 use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
12 #[cfg(feature = "serde")]
13 use serde::{Deserialize, Serialize};
14 use std::cmp;
15 use std::fmt::{self, Formatter};
16 use std::net::{Ipv4Addr, Ipv6Addr};
17
18 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
19 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
20 pub(crate) enum HostInternal {
21 None,
22 Domain,
23 Ipv4(Ipv4Addr),
24 Ipv6(Ipv6Addr),
25 }
26
27 impl From<Host<String>> for HostInternal {
28 fn from(host: Host<String>) -> HostInternal {
29 match host {
30 Host::Domain(ref s) if s.is_empty() => HostInternal::None,
31 Host::Domain(_) => HostInternal::Domain,
32 Host::Ipv4(address) => HostInternal::Ipv4(address),
33 Host::Ipv6(address) => HostInternal::Ipv6(address),
34 }
35 }
36 }
37
38 /// The host name of an URL.
39 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
40 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
41 pub enum Host<S = String> {
42 /// A DNS domain name, as '.' dot-separated labels.
43 /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
44 /// a special URL, or percent encoded for non-special URLs. Hosts for
45 /// non-special URLs are also called opaque hosts.
46 Domain(S),
47
48 /// An IPv4 address.
49 /// `Url::host_str` returns the serialization of this address,
50 /// as four decimal integers separated by `.` dots.
51 Ipv4(Ipv4Addr),
52
53 /// An IPv6 address.
54 /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
55 /// in the format per [RFC 5952 *A Recommendation
56 /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
57 /// lowercase hexadecimal with maximal `::` compression.
58 Ipv6(Ipv6Addr),
59 }
60
61 impl<'a> Host<&'a str> {
62 /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
63 pub fn to_owned(&self) -> Host<String> {
64 match *self {
65 Host::Domain(domain) => Host::Domain(domain.to_owned()),
66 Host::Ipv4(address) => Host::Ipv4(address),
67 Host::Ipv6(address) => Host::Ipv6(address),
68 }
69 }
70 }
71
72 impl Host<String> {
73 /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
74 ///
75 /// <https://url.spec.whatwg.org/#host-parsing>
76 pub fn parse(input: &str) -> Result<Self, ParseError> {
77 if input.starts_with('[') {
78 if !input.ends_with(']') {
79 return Err(ParseError::InvalidIpv6Address);
80 }
81 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
82 }
83 let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
84 let domain = idna::domain_to_ascii(&domain)?;
85 if domain
86 .find(|c| {
87 matches!(
88 c,
89 '\0' | '\t'
90 | '\n'
91 | '\r'
92 | ' '
93 | '#'
94 | '%'
95 | '/'
96 | ':'
97 | '?'
98 | '@'
99 | '['
100 | '\\'
101 | ']'
102 )
103 })
104 .is_some()
105 {
106 return Err(ParseError::InvalidDomainCharacter);
107 }
108 if let Some(address) = parse_ipv4addr(&domain)? {
109 Ok(Host::Ipv4(address))
110 } else {
111 Ok(Host::Domain(domain.into()))
112 }
113 }
114
115 // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
116 pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
117 if input.starts_with('[') {
118 if !input.ends_with(']') {
119 return Err(ParseError::InvalidIpv6Address);
120 }
121 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
122 }
123 if input
124 .find(|c| {
125 matches!(
126 c,
127 '\0' | '\t'
128 | '\n'
129 | '\r'
130 | ' '
131 | '#'
132 | '/'
133 | ':'
134 | '?'
135 | '@'
136 | '['
137 | '\\'
138 | ']'
139 )
140 })
141 .is_some()
142 {
143 return Err(ParseError::InvalidDomainCharacter);
144 }
145 let s = utf8_percent_encode(input, CONTROLS).to_string();
146 Ok(Host::Domain(s))
147 }
148 }
149
150 impl<S: AsRef<str>> fmt::Display for Host<S> {
151 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
152 match *self {
153 Host::Domain(ref domain) => domain.as_ref().fmt(f),
154 Host::Ipv4(ref addr) => addr.fmt(f),
155 Host::Ipv6(ref addr) => {
156 f.write_str("[")?;
157 write_ipv6(addr, f)?;
158 f.write_str("]")
159 }
160 }
161 }
162 }
163
164 fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result {
165 let segments = addr.segments();
166 let (compress_start, compress_end) = longest_zero_sequence(&segments);
167 let mut i = 0;
168 while i < 8 {
169 if i == compress_start {
170 f.write_str(":")?;
171 if i == 0 {
172 f.write_str(":")?;
173 }
174 if compress_end < 8 {
175 i = compress_end;
176 } else {
177 break;
178 }
179 }
180 write!(f, "{:x}", segments[i as usize])?;
181 if i < 7 {
182 f.write_str(":")?;
183 }
184 i += 1;
185 }
186 Ok(())
187 }
188
189 // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
190 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
191 let mut longest = -1;
192 let mut longest_length = -1;
193 let mut start = -1;
194 macro_rules! finish_sequence(
195 ($end: expr) => {
196 if start >= 0 {
197 let length = $end - start;
198 if length > longest_length {
199 longest = start;
200 longest_length = length;
201 }
202 }
203 };
204 );
205 for i in 0..8 {
206 if pieces[i as usize] == 0 {
207 if start < 0 {
208 start = i;
209 }
210 } else {
211 finish_sequence!(i);
212 start = -1;
213 }
214 }
215 finish_sequence!(8);
216 // https://url.spec.whatwg.org/#concept-ipv6-serializer
217 // step 3: ignore lone zeroes
218 if longest_length < 2 {
219 (-1, -2)
220 } else {
221 (longest, longest + longest_length)
222 }
223 }
224
225 /// <https://url.spec.whatwg.org/#ipv4-number-parser>
226 fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
227 let mut r = 10;
228 if input.starts_with("0x") || input.starts_with("0X") {
229 input = &input[2..];
230 r = 16;
231 } else if input.len() >= 2 && input.starts_with('0') {
232 input = &input[1..];
233 r = 8;
234 }
235
236 // At the moment we can't know the reason why from_str_radix fails
237 // https://github.com/rust-lang/rust/issues/22639
238 // So instead we check if the input looks like a real number and only return
239 // an error when it's an overflow.
240 let valid_number = match r {
241 8 => input.chars().all(|c| c >= '0' && c <= '7'),
242 10 => input.chars().all(|c| c >= '0' && c <= '9'),
243 16 => input
244 .chars()
245 .all(|c| (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')),
246 _ => false,
247 };
248
249 if !valid_number {
250 return Ok(None);
251 }
252
253 if input.is_empty() {
254 return Ok(Some(0));
255 }
256 if input.starts_with('+') {
257 return Ok(None);
258 }
259 match u32::from_str_radix(input, r) {
260 Ok(number) => Ok(Some(number)),
261 Err(_) => Err(()),
262 }
263 }
264
265 /// <https://url.spec.whatwg.org/#concept-ipv4-parser>
266 fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
267 if input.is_empty() {
268 return Ok(None);
269 }
270 let mut parts: Vec<&str> = input.split('.').collect();
271 if parts.last() == Some(&"") {
272 parts.pop();
273 }
274 if parts.len() > 4 {
275 return Ok(None);
276 }
277 let mut numbers: Vec<u32> = Vec::new();
278 let mut overflow = false;
279 for part in parts {
280 if part == "" {
281 return Ok(None);
282 }
283 match parse_ipv4number(part) {
284 Ok(Some(n)) => numbers.push(n),
285 Ok(None) => return Ok(None),
286 Err(()) => overflow = true,
287 };
288 }
289 if overflow {
290 return Err(ParseError::InvalidIpv4Address);
291 }
292 let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
293 // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
294 if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
295 return Err(ParseError::InvalidIpv4Address);
296 }
297 if numbers.iter().any(|x| *x > 255) {
298 return Err(ParseError::InvalidIpv4Address);
299 }
300 for (counter, n) in numbers.iter().enumerate() {
301 ipv4 += n << (8 * (3 - counter as u32))
302 }
303 Ok(Some(Ipv4Addr::from(ipv4)))
304 }
305
306 /// <https://url.spec.whatwg.org/#concept-ipv6-parser>
307 fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
308 let input = input.as_bytes();
309 let len = input.len();
310 let mut is_ip_v4 = false;
311 let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
312 let mut piece_pointer = 0;
313 let mut compress_pointer = None;
314 let mut i = 0;
315
316 if len < 2 {
317 return Err(ParseError::InvalidIpv6Address);
318 }
319
320 if input[0] == b':' {
321 if input[1] != b':' {
322 return Err(ParseError::InvalidIpv6Address);
323 }
324 i = 2;
325 piece_pointer = 1;
326 compress_pointer = Some(1);
327 }
328
329 while i < len {
330 if piece_pointer == 8 {
331 return Err(ParseError::InvalidIpv6Address);
332 }
333 if input[i] == b':' {
334 if compress_pointer.is_some() {
335 return Err(ParseError::InvalidIpv6Address);
336 }
337 i += 1;
338 piece_pointer += 1;
339 compress_pointer = Some(piece_pointer);
340 continue;
341 }
342 let start = i;
343 let end = cmp::min(len, start + 4);
344 let mut value = 0u16;
345 while i < end {
346 match (input[i] as char).to_digit(16) {
347 Some(digit) => {
348 value = value * 0x10 + digit as u16;
349 i += 1;
350 }
351 None => break,
352 }
353 }
354 if i < len {
355 match input[i] {
356 b'.' => {
357 if i == start {
358 return Err(ParseError::InvalidIpv6Address);
359 }
360 i = start;
361 if piece_pointer > 6 {
362 return Err(ParseError::InvalidIpv6Address);
363 }
364 is_ip_v4 = true;
365 }
366 b':' => {
367 i += 1;
368 if i == len {
369 return Err(ParseError::InvalidIpv6Address);
370 }
371 }
372 _ => return Err(ParseError::InvalidIpv6Address),
373 }
374 }
375 if is_ip_v4 {
376 break;
377 }
378 pieces[piece_pointer] = value;
379 piece_pointer += 1;
380 }
381
382 if is_ip_v4 {
383 if piece_pointer > 6 {
384 return Err(ParseError::InvalidIpv6Address);
385 }
386 let mut numbers_seen = 0;
387 while i < len {
388 if numbers_seen > 0 {
389 if numbers_seen < 4 && (i < len && input[i] == b'.') {
390 i += 1
391 } else {
392 return Err(ParseError::InvalidIpv6Address);
393 }
394 }
395
396 let mut ipv4_piece = None;
397 while i < len {
398 let digit = match input[i] {
399 c @ b'0'..=b'9' => c - b'0',
400 _ => break,
401 };
402 match ipv4_piece {
403 None => ipv4_piece = Some(digit as u16),
404 Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
405 Some(ref mut v) => {
406 *v = *v * 10 + digit as u16;
407 if *v > 255 {
408 return Err(ParseError::InvalidIpv6Address);
409 }
410 }
411 }
412 i += 1;
413 }
414
415 pieces[piece_pointer] = if let Some(v) = ipv4_piece {
416 pieces[piece_pointer] * 0x100 + v
417 } else {
418 return Err(ParseError::InvalidIpv6Address);
419 };
420 numbers_seen += 1;
421
422 if numbers_seen == 2 || numbers_seen == 4 {
423 piece_pointer += 1;
424 }
425 }
426
427 if numbers_seen != 4 {
428 return Err(ParseError::InvalidIpv6Address);
429 }
430 }
431
432 if i < len {
433 return Err(ParseError::InvalidIpv6Address);
434 }
435
436 match compress_pointer {
437 Some(compress_pointer) => {
438 let mut swaps = piece_pointer - compress_pointer;
439 piece_pointer = 7;
440 while swaps > 0 {
441 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
442 swaps -= 1;
443 piece_pointer -= 1;
444 }
445 }
446 _ => {
447 if piece_pointer != 8 {
448 return Err(ParseError::InvalidIpv6Address);
449 }
450 }
451 }
452 Ok(Ipv6Addr::new(
453 pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
454 ))
455 }