1 // Copyright 2013-2016 The rust-url developers.
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
9 use std
::ascii
::AsciiExt
;
10 use std
::error
::Error
;
11 use std
::fmt
::{self, Formatter, Write}
;
15 use encoding
::EncodingOverride
;
16 use host
::{Host, HostInternal}
;
17 use percent_encoding
::{
18 utf8_percent_encode
, percent_encode
,
19 SIMPLE_ENCODE_SET
, DEFAULT_ENCODE_SET
, USERINFO_ENCODE_SET
, QUERY_ENCODE_SET
,
20 PATH_SEGMENT_ENCODE_SET
23 pub type ParseResult
<T
> = Result
<T
, ParseError
>;
25 macro_rules
! simple_enum_error
{
26 ($
($name
: ident
=> $description
: expr
,)+) => {
27 /// Errors that can occur during parsing.
28 #[derive(PartialEq, Eq, Clone, Copy, Debug)]
35 impl Error
for ParseError
{
36 fn description(&self) -> &str {
39 ParseError
::$name
=> $description
,
48 EmptyHost
=> "empty host",
49 IdnaError
=> "invalid international domain name",
50 InvalidPort
=> "invalid port number",
51 InvalidIpv4Address
=> "invalid IPv4 address",
52 InvalidIpv6Address
=> "invalid IPv6 address",
53 InvalidDomainCharacter
=> "invalid domain character",
54 RelativeUrlWithoutBase
=> "relative URL without a base",
55 RelativeUrlWithCannotBeABaseBase
=> "relative URL with a cannot-be-a-base base",
56 SetHostOnCannotBeABaseUrl
=> "a cannot-be-a-base URL doesn’t have a host to set",
57 Overflow
=> "URLs more than 4 GB are not supported",
60 impl fmt
::Display
for ParseError
{
61 fn fmt(&self, fmt
: &mut Formatter
) -> fmt
::Result
{
62 self.description().fmt(fmt
)
66 impl From
<::idna
::uts46
::Errors
> for ParseError
{
67 fn from(_
: ::idna
::uts46
::Errors
) -> ParseError { ParseError::IdnaError }
70 #[derive(Copy, Clone)]
78 pub fn is_special(&self) -> bool
{
79 !matches
!(*self, SchemeType
::NotSpecial
)
82 pub fn is_file(&self) -> bool
{
83 matches
!(*self, SchemeType
::File
)
86 pub fn from(s
: &str) -> Self {
88 "http" | "https" | "ws" | "wss" | "ftp" | "gopher" => SchemeType
::SpecialNotFile
,
89 "file" => SchemeType
::File
,
90 _
=> SchemeType
::NotSpecial
,
95 pub fn default_port(scheme
: &str) -> Option
<u16> {
97 "http" | "ws" => Some(80),
98 "https" | "wss" => Some(443),
100 "gopher" => Some(70),
106 pub struct Input
<'i
> {
107 chars
: str::Chars
<'i
>,
111 pub fn new(input
: &'i
str) -> Self {
112 Input
::with_log(input
, None
)
115 pub fn with_log(original_input
: &'i
str, log_syntax_violation
: Option
<&Fn(&'
static str)>)
117 let input
= original_input
.trim_matches(c0_control_or_space
);
118 if let Some(log
) = log_syntax_violation
{
119 if input
.len() < original_input
.len() {
120 log("leading or trailing control or space character are ignored in URLs")
122 if input
.chars().any(|c
| matches
!(c
, '
\t'
| '
\n'
| '
\r'
)) {
123 log("tabs or newlines are ignored in URLs")
126 Input { chars: input.chars() }
130 pub fn is_empty(&self) -> bool
{
131 self.clone().next().is_none()
135 fn starts_with
<P
: Pattern
>(&self, p
: P
) -> bool
{
136 p
.split_prefix(&mut self.clone())
140 pub fn split_prefix
<P
: Pattern
>(&self, p
: P
) -> Option
<Self> {
141 let mut remaining
= self.clone();
142 if p
.split_prefix(&mut remaining
) {
150 fn split_first(&self) -> (Option
<char>, Self) {
151 let mut remaining
= self.clone();
152 (remaining
.next(), remaining
)
156 fn count_matching
<F
: Fn(char) -> bool
>(&self, f
: F
) -> (u32, Self) {
158 let mut remaining
= self.clone();
160 let mut input
= remaining
.clone();
161 if matches
!(input
.next(), Some(c
) if f(c
)) {
165 return (count
, remaining
)
171 fn next_utf8(&mut self) -> Option
<(char, &'i
str)> {
173 let utf8
= self.chars
.as_str();
174 match self.chars
.next() {
176 if !matches
!(c
, '
\t'
| '
\n'
| '
\r'
) {
177 return Some((c
, &utf8
[..c
.len_utf8()]))
187 fn split_prefix
<'i
>(self, input
: &mut Input
<'i
>) -> bool
;
190 impl Pattern
for char {
191 fn split_prefix
<'i
>(self, input
: &mut Input
<'i
>) -> bool { input.next() == Some(self) }
194 impl<'a
> Pattern
for &'a
str {
195 fn split_prefix
<'i
>(self, input
: &mut Input
<'i
>) -> bool
{
196 for c
in self.chars() {
197 if input
.next() != Some(c
) {
205 impl<F
: FnMut(char) -> bool
> Pattern
for F
{
206 fn split_prefix
<'i
>(self, input
: &mut Input
<'i
>) -> bool { input.next().map_or(false, self) }
209 impl<'i
> Iterator
for Input
<'i
> {
211 fn next(&mut self) -> Option
<char> {
212 self.chars
.by_ref().filter(|&c
| !matches
!(c
, '
\t'
| '
\n'
| '
\r'
)).next()
216 pub struct Parser
<'a
> {
217 pub serialization
: String
,
218 pub base_url
: Option
<&'a Url
>,
219 pub query_encoding_override
: EncodingOverride
,
220 pub log_syntax_violation
: Option
<&'a
Fn(&'
static str)>,
221 pub context
: Context
,
224 #[derive(PartialEq, Eq, Copy, Clone)]
231 impl<'a
> Parser
<'a
> {
232 pub fn for_setter(serialization
: String
) -> Parser
<'a
> {
234 serialization
: serialization
,
236 query_encoding_override
: EncodingOverride
::utf8(),
237 log_syntax_violation
: None
,
238 context
: Context
::Setter
,
242 fn syntax_violation(&self, reason
: &'
static str) {
243 if let Some(log
) = self.log_syntax_violation
{
248 fn syntax_violation_if
<F
: Fn() -> bool
>(&self, reason
: &'
static str, test
: F
) {
249 // Skip test if not logging.
250 if let Some(log
) = self.log_syntax_violation
{
257 /// https://url.spec.whatwg.org/#concept-basic-url-parser
258 pub fn parse_url(mut self, input
: &str) -> ParseResult
<Url
> {
259 let input
= Input
::with_log(input
, self.log_syntax_violation
);
260 if let Ok(remaining
) = self.parse_scheme(input
.clone()) {
261 return self.parse_with_scheme(remaining
)
265 if let Some(base_url
) = self.base_url
{
266 if input
.starts_with('
#') {
267 self.fragment_only(base_url
, input
)
268 } else if base_url
.cannot_be_a_base() {
269 Err(ParseError
::RelativeUrlWithCannotBeABaseBase
)
271 let scheme_type
= SchemeType
::from(base_url
.scheme());
272 if scheme_type
.is_file() {
273 self.parse_file(input
, Some(base_url
))
275 self.parse_relative(input
, scheme_type
, base_url
)
279 Err(ParseError
::RelativeUrlWithoutBase
)
283 pub fn parse_scheme
<'i
>(&mut self, mut input
: Input
<'i
>) -> Result
<Input
<'i
>, ()> {
284 if input
.is_empty() || !input
.starts_with(ascii_alpha
) {
287 debug_assert
!(self.serialization
.is_empty());
288 while let Some(c
) = input
.next() {
290 'a'
...'z'
| 'A'
...'Z'
| '
0'
...'
9'
| '
+'
| '
-'
| '
.'
=> {
291 self.serialization
.push(c
.to_ascii_lowercase())
293 '
:'
=> return Ok(input
),
295 self.serialization
.clear();
301 if self.context
== Context
::Setter
{
304 self.serialization
.clear();
309 fn parse_with_scheme(mut self, input
: Input
) -> ParseResult
<Url
> {
310 let scheme_end
= try
!(to_u32(self.serialization
.len()));
311 let scheme_type
= SchemeType
::from(&self.serialization
);
312 self.serialization
.push('
:'
);
314 SchemeType
::File
=> {
315 self.syntax_violation_if("expected // after file:", || !input
.starts_with("//"));
316 let base_file_url
= self.base_url
.and_then(|base
| {
317 if base
.scheme() == "file" { Some(base) }
else { None }
319 self.serialization
.clear();
320 self.parse_file(input
, base_file_url
)
322 SchemeType
::SpecialNotFile
=> {
323 // special relative or authority state
324 let (slashes_count
, remaining
) = input
.count_matching(|c
| matches
!(c
, '
/'
| '
\\'
));
325 if let Some(base_url
) = self.base_url
{
326 if slashes_count
< 2 &&
327 base_url
.scheme() == &self.serialization
[..scheme_end
as usize] {
328 // "Cannot-be-a-base" URLs only happen with "not special" schemes.
329 debug_assert
!(!base_url
.cannot_be_a_base());
330 self.serialization
.clear();
331 return self.parse_relative(input
, scheme_type
, base_url
)
334 // special authority slashes state
335 self.syntax_violation_if("expected //", || {
336 input
.clone().take_while(|&c
| matches
!(c
, '
/'
| '
\\'
))
337 .collect
::<String
>() != "//"
339 self.after_double_slash(remaining
, scheme_type
, scheme_end
)
341 SchemeType
::NotSpecial
=> self.parse_non_special(input
, scheme_type
, scheme_end
)
345 /// Scheme other than file, http, https, ws, ws, ftp, gopher.
346 fn parse_non_special(mut self, input
: Input
, scheme_type
: SchemeType
, scheme_end
: u32)
347 -> ParseResult
<Url
> {
348 // path or authority state (
349 if let Some(input
) = input
.split_prefix("//") {
350 return self.after_double_slash(input
, scheme_type
, scheme_end
)
352 // Anarchist URL (no authority)
353 let path_start
= try
!(to_u32(self.serialization
.len()));
354 let username_end
= path_start
;
355 let host_start
= path_start
;
356 let host_end
= path_start
;
357 let host
= HostInternal
::None
;
359 let remaining
= if let Some(input
) = input
.split_prefix('
/'
) {
360 let path_start
= self.serialization
.len();
361 self.serialization
.push('
/'
);
362 self.parse_path(scheme_type
, &mut false, path_start
, input
)
364 self.parse_cannot_be_a_base_path(input
)
366 self.with_query_and_fragment(scheme_end
, username_end
, host_start
,
367 host_end
, host
, port
, path_start
, remaining
)
370 fn parse_file(mut self, input
: Input
, mut base_file_url
: Option
<&Url
>) -> ParseResult
<Url
> {
372 debug_assert
!(self.serialization
.is_empty());
373 let (first_char
, input_after_first_char
) = input
.split_first();
376 if let Some(base_url
) = base_file_url
{
377 // Copy everything except the fragment
378 let before_fragment
= match base_url
.fragment_start
{
379 Some(i
) => &base_url
.serialization
[..i
as usize],
380 None
=> &*base_url
.serialization
,
382 self.serialization
.push_str(before_fragment
);
384 serialization
: self.serialization
,
385 fragment_start
: None
,
389 self.serialization
.push_str("file:///");
390 let scheme_end
= "file".len() as u32;
391 let path_start
= "file://".len() as u32;
393 serialization
: self.serialization
,
394 scheme_end
: scheme_end
,
395 username_end
: path_start
,
396 host_start
: path_start
,
397 host_end
: path_start
,
398 host
: HostInternal
::None
,
400 path_start
: path_start
,
402 fragment_start
: None
,
407 if let Some(base_url
) = base_file_url
{
408 // Copy everything up to the query string
409 let before_query
= match (base_url
.query_start
, base_url
.fragment_start
) {
410 (None
, None
) => &*base_url
.serialization
,
412 (None
, Some(i
)) => base_url
.slice(..i
)
414 self.serialization
.push_str(before_query
);
415 let (query_start
, fragment_start
) =
416 try
!(self.parse_query_and_fragment(base_url
.scheme_end
, input
));
418 serialization
: self.serialization
,
419 query_start
: query_start
,
420 fragment_start
: fragment_start
,
424 self.serialization
.push_str("file:///");
425 let scheme_end
= "file".len() as u32;
426 let path_start
= "file://".len() as u32;
427 let (query_start
, fragment_start
) =
428 try
!(self.parse_query_and_fragment(scheme_end
, input
));
430 serialization
: self.serialization
,
431 scheme_end
: scheme_end
,
432 username_end
: path_start
,
433 host_start
: path_start
,
434 host_end
: path_start
,
435 host
: HostInternal
::None
,
437 path_start
: path_start
,
438 query_start
: query_start
,
439 fragment_start
: fragment_start
,
444 if let Some(base_url
) = base_file_url
{
445 self.fragment_only(base_url
, input
)
447 self.serialization
.push_str("file:///");
448 let scheme_end
= "file".len() as u32;
449 let path_start
= "file://".len() as u32;
450 let fragment_start
= "file:///".len() as u32;
451 self.parse_fragment(input_after_first_char
);
453 serialization
: self.serialization
,
454 scheme_end
: scheme_end
,
455 username_end
: path_start
,
456 host_start
: path_start
,
457 host_end
: path_start
,
458 host
: HostInternal
::None
,
460 path_start
: path_start
,
462 fragment_start
: Some(fragment_start
),
466 Some('
/'
) | Some('
\\'
) => {
467 self.syntax_violation_if("backslash", || first_char
== Some('
\\'
));
469 let (next_char
, input_after_next_char
) = input_after_first_char
.split_first();
470 self.syntax_violation_if("backslash", || next_char
== Some('
\\'
));
471 if matches
!(next_char
, Some('
/'
) | Some('
\\'
)) {
473 self.serialization
.push_str("file://");
474 let scheme_end
= "file".len() as u32;
475 let host_start
= "file://".len() as u32;
476 let (path_start
, host
, remaining
) =
477 try
!(self.parse_file_host(input_after_next_char
));
478 let host_end
= try
!(to_u32(self.serialization
.len()));
479 let mut has_host
= !matches
!(host
, HostInternal
::None
);
480 let remaining
= if path_start
{
481 self.parse_path_start(SchemeType
::File
, &mut has_host
, remaining
)
483 let path_start
= self.serialization
.len();
484 self.serialization
.push('
/'
);
485 self.parse_path(SchemeType
::File
, &mut has_host
, path_start
, remaining
)
487 // FIXME: deal with has_host
488 let (query_start
, fragment_start
) =
489 try
!(self.parse_query_and_fragment(scheme_end
, remaining
));
491 serialization
: self.serialization
,
492 scheme_end
: scheme_end
,
493 username_end
: host_start
,
494 host_start
: host_start
,
498 path_start
: host_end
,
499 query_start
: query_start
,
500 fragment_start
: fragment_start
,
503 self.serialization
.push_str("file:///");
504 let scheme_end
= "file".len() as u32;
505 let path_start
= "file://".len();
506 if let Some(base_url
) = base_file_url
{
507 let first_segment
= base_url
.path_segments().unwrap().next().unwrap();
508 // FIXME: *normalized* drive letter
509 if is_windows_drive_letter(first_segment
) {
510 self.serialization
.push_str(first_segment
);
511 self.serialization
.push('
/'
);
514 let remaining
= self.parse_path(
515 SchemeType
::File
, &mut false, path_start
, input_after_first_char
);
516 let (query_start
, fragment_start
) =
517 try
!(self.parse_query_and_fragment(scheme_end
, remaining
));
518 let path_start
= path_start
as u32;
520 serialization
: self.serialization
,
521 scheme_end
: scheme_end
,
522 username_end
: path_start
,
523 host_start
: path_start
,
524 host_end
: path_start
,
525 host
: HostInternal
::None
,
527 path_start
: path_start
,
528 query_start
: query_start
,
529 fragment_start
: fragment_start
,
534 if starts_with_windows_drive_letter_segment(&input
) {
535 base_file_url
= None
;
537 if let Some(base_url
) = base_file_url
{
538 let before_query
= match (base_url
.query_start
, base_url
.fragment_start
) {
539 (None
, None
) => &*base_url
.serialization
,
541 (None
, Some(i
)) => base_url
.slice(..i
)
543 self.serialization
.push_str(before_query
);
544 self.pop_path(SchemeType
::File
, base_url
.path_start
as usize);
545 let remaining
= self.parse_path(
546 SchemeType
::File
, &mut true, base_url
.path_start
as usize, input
);
547 self.with_query_and_fragment(
548 base_url
.scheme_end
, base_url
.username_end
, base_url
.host_start
,
549 base_url
.host_end
, base_url
.host
, base_url
.port
, base_url
.path_start
, remaining
)
551 self.serialization
.push_str("file:///");
552 let scheme_end
= "file".len() as u32;
553 let path_start
= "file://".len();
554 let remaining
= self.parse_path(
555 SchemeType
::File
, &mut false, path_start
, input
);
556 let (query_start
, fragment_start
) =
557 try
!(self.parse_query_and_fragment(scheme_end
, remaining
));
558 let path_start
= path_start
as u32;
560 serialization
: self.serialization
,
561 scheme_end
: scheme_end
,
562 username_end
: path_start
,
563 host_start
: path_start
,
564 host_end
: path_start
,
565 host
: HostInternal
::None
,
567 path_start
: path_start
,
568 query_start
: query_start
,
569 fragment_start
: fragment_start
,
576 fn parse_relative(mut self, input
: Input
, scheme_type
: SchemeType
, base_url
: &Url
)
577 -> ParseResult
<Url
> {
579 debug_assert
!(self.serialization
.is_empty());
580 let (first_char
, input_after_first_char
) = input
.split_first();
583 // Copy everything except the fragment
584 let before_fragment
= match base_url
.fragment_start
{
585 Some(i
) => &base_url
.serialization
[..i
as usize],
586 None
=> &*base_url
.serialization
,
588 self.serialization
.push_str(before_fragment
);
590 serialization
: self.serialization
,
591 fragment_start
: None
,
596 // Copy everything up to the query string
597 let before_query
= match (base_url
.query_start
, base_url
.fragment_start
) {
598 (None
, None
) => &*base_url
.serialization
,
600 (None
, Some(i
)) => base_url
.slice(..i
)
602 self.serialization
.push_str(before_query
);
603 let (query_start
, fragment_start
) =
604 try
!(self.parse_query_and_fragment(base_url
.scheme_end
, input
));
606 serialization
: self.serialization
,
607 query_start
: query_start
,
608 fragment_start
: fragment_start
,
612 Some('
#') => self.fragment_only(base_url, input),
613 Some('
/'
) | Some('
\\'
) => {
614 let (slashes_count
, remaining
) = input
.count_matching(|c
| matches
!(c
, '
/'
| '
\\'
));
615 if slashes_count
>= 2 {
616 self.syntax_violation_if("expected //", || {
617 input
.clone().take_while(|&c
| matches
!(c
, '
/'
| '
\\'
))
618 .collect
::<String
>() != "//"
620 let scheme_end
= base_url
.scheme_end
;
621 debug_assert
!(base_url
.byte_at(scheme_end
) == b'
:'
);
622 self.serialization
.push_str(base_url
.slice(..scheme_end
+ 1));
623 return self.after_double_slash(remaining
, scheme_type
, scheme_end
)
625 let path_start
= base_url
.path_start
;
626 debug_assert
!(base_url
.byte_at(path_start
) == b'
/'
);
627 self.serialization
.push_str(base_url
.slice(..path_start
+ 1));
628 let remaining
= self.parse_path(
629 scheme_type
, &mut true, path_start
as usize, input_after_first_char
);
630 self.with_query_and_fragment(
631 base_url
.scheme_end
, base_url
.username_end
, base_url
.host_start
,
632 base_url
.host_end
, base_url
.host
, base_url
.port
, base_url
.path_start
, remaining
)
635 let before_query
= match (base_url
.query_start
, base_url
.fragment_start
) {
636 (None
, None
) => &*base_url
.serialization
,
638 (None
, Some(i
)) => base_url
.slice(..i
)
640 self.serialization
.push_str(before_query
);
641 // FIXME spec says just "remove last entry", not the "pop" algorithm
642 self.pop_path(scheme_type
, base_url
.path_start
as usize);
643 let remaining
= self.parse_path(
644 scheme_type
, &mut true, base_url
.path_start
as usize, input
);
645 self.with_query_and_fragment(
646 base_url
.scheme_end
, base_url
.username_end
, base_url
.host_start
,
647 base_url
.host_end
, base_url
.host
, base_url
.port
, base_url
.path_start
, remaining
)
652 fn after_double_slash(mut self, input
: Input
, scheme_type
: SchemeType
, scheme_end
: u32)
653 -> ParseResult
<Url
> {
654 self.serialization
.push('
/'
);
655 self.serialization
.push('
/'
);
657 let (username_end
, remaining
) = try
!(self.parse_userinfo(input
, scheme_type
));
659 let host_start
= try
!(to_u32(self.serialization
.len()));
660 let (host_end
, host
, port
, remaining
) =
661 try
!(self.parse_host_and_port(remaining
, scheme_end
, scheme_type
));
663 let path_start
= try
!(to_u32(self.serialization
.len()));
664 let remaining
= self.parse_path_start(
665 scheme_type
, &mut true, remaining
);
666 self.with_query_and_fragment(scheme_end
, username_end
, host_start
,
667 host_end
, host
, port
, path_start
, remaining
)
670 /// Return (username_end, remaining)
671 fn parse_userinfo
<'i
>(&mut self, mut input
: Input
<'i
>, scheme_type
: SchemeType
)
672 -> ParseResult
<(u32, Input
<'i
>)> {
673 let mut last_at
= None
;
674 let mut remaining
= input
.clone();
675 let mut char_count
= 0;
676 while let Some(c
) = remaining
.next() {
679 if last_at
.is_some() {
680 self.syntax_violation("unencoded @ sign in username or password")
682 self.syntax_violation(
683 "embedding authentification information (username or password) \
684 in an URL is not recommended")
686 last_at
= Some((char_count
, remaining
.clone()))
688 '
/'
| '?'
| '
#' => break,
689 '
\\'
if scheme_type
.is_special() => break,
694 let (mut userinfo_char_count
, remaining
) = match last_at
{
695 None
=> return Ok((try
!(to_u32(self.serialization
.len())), input
)),
696 Some((0, remaining
)) => return Ok((try
!(to_u32(self.serialization
.len())), remaining
)),
700 let mut username_end
= None
;
701 while userinfo_char_count
> 0 {
702 let (c
, utf8_c
) = input
.next_utf8().unwrap();
703 userinfo_char_count
-= 1;
704 if c
== '
:'
&& username_end
.is_none() {
705 // Start parsing password
706 username_end
= Some(try
!(to_u32(self.serialization
.len())));
707 self.serialization
.push('
:'
);
709 self.check_url_code_point(c
, &input
);
710 self.serialization
.extend(utf8_percent_encode(utf8_c
, USERINFO_ENCODE_SET
));
713 let username_end
= match username_end
{
715 None
=> try
!(to_u32(self.serialization
.len())),
717 self.serialization
.push('@'
);
718 Ok((username_end
, remaining
))
721 fn parse_host_and_port
<'i
>(&mut self, input
: Input
<'i
>,
722 scheme_end
: u32, scheme_type
: SchemeType
)
723 -> ParseResult
<(u32, HostInternal
, Option
<u16>, Input
<'i
>)> {
724 let (host
, remaining
) = try
!(
725 Parser
::parse_host(input
, scheme_type
));
726 write
!(&mut self.serialization
, "{}", host
).unwrap();
727 let host_end
= try
!(to_u32(self.serialization
.len()));
728 let (port
, remaining
) = if let Some(remaining
) = remaining
.split_prefix('
:'
) {
729 let scheme
= || default_port(&self.serialization
[..scheme_end
as usize]);
730 try
!(Parser
::parse_port(remaining
, scheme
, self.context
))
734 if let Some(port
) = port
{
735 write
!(&mut self.serialization
, ":{}", port
).unwrap()
737 Ok((host_end
, host
.into(), port
, remaining
))
740 pub fn parse_host
<'i
>(mut input
: Input
<'i
>, scheme_type
: SchemeType
)
741 -> ParseResult
<(Host
<String
>, Input
<'i
>)> {
742 // Undo the Input abstraction here to avoid allocating in the common case
743 // where the host part of the input does not contain any tab or newline
744 let input_str
= input
.chars
.as_str();
745 let mut inside_square_brackets
= false;
746 let mut has_ignored_chars
= false;
747 let mut non_ignored_chars
= 0;
749 for c
in input_str
.chars() {
751 '
:'
if !inside_square_brackets
=> break,
752 '
\\'
if scheme_type
.is_special() => break,
753 '
/'
| '?'
| '
#' => break,
754 '
\t'
| '
\n'
| '
\r'
=> {
755 has_ignored_chars
= true;
758 inside_square_brackets
= true;
759 non_ignored_chars
+= 1
762 inside_square_brackets
= false;
763 non_ignored_chars
+= 1
765 _
=> non_ignored_chars
+= 1
767 bytes
+= c
.len_utf8();
769 let replaced
: String
;
772 let host_input
= input
.by_ref().take(non_ignored_chars
);
773 if has_ignored_chars
{
774 replaced
= host_input
.collect();
775 host_str
= &*replaced
777 for _
in host_input {}
778 host_str
= &input_str
[..bytes
]
781 if scheme_type
.is_special() && host_str
.is_empty() {
782 return Err(ParseError
::EmptyHost
)
784 let host
= try
!(Host
::parse(host_str
));
788 pub fn parse_file_host
<'i
>(&mut self, input
: Input
<'i
>)
789 -> ParseResult
<(bool
, HostInternal
, Input
<'i
>)> {
790 // Undo the Input abstraction here to avoid allocating in the common case
791 // where the host part of the input does not contain any tab or newline
792 let input_str
= input
.chars
.as_str();
793 let mut has_ignored_chars
= false;
794 let mut non_ignored_chars
= 0;
796 for c
in input_str
.chars() {
798 '
/'
| '
\\'
| '?'
| '
#' => break,
799 '
\t'
| '
\n'
| '
\r'
=> has_ignored_chars
= true,
800 _
=> non_ignored_chars
+= 1,
802 bytes
+= c
.len_utf8();
804 let replaced
: String
;
806 let mut remaining
= input
.clone();
808 let host_input
= remaining
.by_ref().take(non_ignored_chars
);
809 if has_ignored_chars
{
810 replaced
= host_input
.collect();
811 host_str
= &*replaced
813 for _
in host_input {}
814 host_str
= &input_str
[..bytes
]
817 if is_windows_drive_letter(host_str
) {
818 return Ok((false, HostInternal
::None
, input
))
820 let host
= if host_str
.is_empty() {
823 match try
!(Host
::parse(host_str
)) {
824 Host
::Domain(ref d
) if d
== "localhost" => HostInternal
::None
,
826 write
!(&mut self.serialization
, "{}", host
).unwrap();
831 Ok((true, host
, remaining
))
834 pub fn parse_port
<'i
, P
>(mut input
: Input
<'i
>, default_port
: P
,
836 -> ParseResult
<(Option
<u16>, Input
<'i
>)>
837 where P
: Fn() -> Option
<u16> {
838 let mut port
: u32 = 0;
839 let mut has_any_digit
= false;
840 while let (Some(c
), remaining
) = input
.split_first() {
841 if let Some(digit
) = c
.to_digit(10) {
842 port
= port
* 10 + digit
;
843 if port
> ::std
::u16::MAX
as u32 {
844 return Err(ParseError
::InvalidPort
)
846 has_any_digit
= true;
847 } else if context
== Context
::UrlParser
&& !matches
!(c
, '
/'
| '
\\'
| '?'
| '
#') {
848 return Err(ParseError
::InvalidPort
)
854 let mut opt_port
= Some(port
as u16);
855 if !has_any_digit
|| opt_port
== default_port() {
858 return Ok((opt_port
, input
))
861 pub fn parse_path_start
<'i
>(&mut self, scheme_type
: SchemeType
, has_host
: &mut bool
,
862 mut input
: Input
<'i
>)
865 match input
.split_first() {
866 (Some('
/'
), remaining
) => input
= remaining
,
867 (Some('
\\'
), remaining
) => if scheme_type
.is_special() {
868 self.syntax_violation("backslash");
873 let path_start
= self.serialization
.len();
874 self.serialization
.push('
/'
);
875 self.parse_path(scheme_type
, has_host
, path_start
, input
)
878 pub fn parse_path
<'i
>(&mut self, scheme_type
: SchemeType
, has_host
: &mut bool
,
879 path_start
: usize, mut input
: Input
<'i
>)
881 // Relative path state
882 debug_assert
!(self.serialization
.ends_with("/"));
884 let segment_start
= self.serialization
.len();
885 let mut ends_with_slash
= false;
887 let input_before_c
= input
.clone();
888 let (c
, utf8_c
) = if let Some(x
) = input
.next_utf8() { x }
else { break }
;
890 '
/'
if self.context
!= Context
::PathSegmentSetter
=> {
891 ends_with_slash
= true;
894 '
\\'
if self.context
!= Context
::PathSegmentSetter
&&
895 scheme_type
.is_special() => {
896 self.syntax_violation("backslash");
897 ends_with_slash
= true;
900 '?'
| '
#' if self.context == Context::UrlParser => {
901 input
= input_before_c
;
905 self.check_url_code_point(c
, &input
);
907 let after_percent_sign
= input
.clone();
908 if matches
!(input
.next(), Some('
2'
)) &&
909 matches
!(input
.next(), Some('E'
) | Some('e'
)) {
910 self.serialization
.push('
.'
);
913 input
= after_percent_sign
915 if self.context
== Context
::PathSegmentSetter
{
916 self.serialization
.extend(utf8_percent_encode(
917 utf8_c
, PATH_SEGMENT_ENCODE_SET
));
919 self.serialization
.extend(utf8_percent_encode(
920 utf8_c
, DEFAULT_ENCODE_SET
));
925 match &self.serialization
[segment_start
..] {
927 debug_assert
!(self.serialization
.as_bytes()[segment_start
- 1] == b'
/'
);
928 self.serialization
.truncate(segment_start
- 1); // Truncate "/.."
929 self.pop_path(scheme_type
, path_start
);
930 if !self.serialization
[path_start
..].ends_with("/") {
931 self.serialization
.push('
/'
)
935 self.serialization
.truncate(segment_start
);
938 if scheme_type
.is_file() && is_windows_drive_letter(
939 &self.serialization
[path_start
+ 1..]
941 if self.serialization
.ends_with('
|'
) {
942 self.serialization
.pop();
943 self.serialization
.push('
:'
);
946 self.syntax_violation("file: with host and Windows drive letter");
947 *has_host
= false; // FIXME account for this in callers
951 self.serialization
.push('
/'
)
955 if !ends_with_slash
{
962 /// https://url.spec.whatwg.org/#pop-a-urls-path
963 fn pop_path(&mut self, scheme_type
: SchemeType
, path_start
: usize) {
964 if self.serialization
.len() > path_start
{
965 let slash_position
= self.serialization
[path_start
..].rfind('
/'
).unwrap();
966 // + 1 since rfind returns the position before the slash.
967 let segment_start
= path_start
+ slash_position
+ 1;
968 // Don’t pop a Windows drive letter
969 // FIXME: *normalized* Windows drive letter
971 scheme_type
.is_file() &&
972 is_windows_drive_letter(&self.serialization
[segment_start
..])
974 self.serialization
.truncate(segment_start
);
980 pub fn parse_cannot_be_a_base_path
<'i
>(&mut self, mut input
: Input
<'i
>) -> Input
<'i
> {
982 let input_before_c
= input
.clone();
983 match input
.next_utf8() {
984 Some(('?'
, _
)) | Some(('
#', _)) if self.context == Context::UrlParser => {
985 return input_before_c
987 Some((c
, utf8_c
)) => {
988 self.check_url_code_point(c
, &input
);
989 self.serialization
.extend(utf8_percent_encode(
990 utf8_c
, SIMPLE_ENCODE_SET
));
997 fn with_query_and_fragment(mut self, scheme_end
: u32, username_end
: u32,
998 host_start
: u32, host_end
: u32, host
: HostInternal
,
999 port
: Option
<u16>, path_start
: u32, remaining
: Input
)
1000 -> ParseResult
<Url
> {
1001 let (query_start
, fragment_start
) =
1002 try
!(self.parse_query_and_fragment(scheme_end
, remaining
));
1004 serialization
: self.serialization
,
1005 scheme_end
: scheme_end
,
1006 username_end
: username_end
,
1007 host_start
: host_start
,
1011 path_start
: path_start
,
1012 query_start
: query_start
,
1013 fragment_start
: fragment_start
1017 /// Return (query_start, fragment_start)
1018 fn parse_query_and_fragment(&mut self, scheme_end
: u32, mut input
: Input
)
1019 -> ParseResult
<(Option
<u32>, Option
<u32>)> {
1020 let mut query_start
= None
;
1021 match input
.next() {
1024 query_start
= Some(try
!(to_u32(self.serialization
.len())));
1025 self.serialization
.push('?'
);
1026 let remaining
= self.parse_query(scheme_end
, input
);
1027 if let Some(remaining
) = remaining
{
1030 return Ok((query_start
, None
))
1033 None
=> return Ok((None
, None
)),
1034 _
=> panic
!("Programming error. parse_query_and_fragment() called without ? or # {:?}")
1037 let fragment_start
= try
!(to_u32(self.serialization
.len()));
1038 self.serialization
.push('
#');
1039 self.parse_fragment(input
);
1040 Ok((query_start
, Some(fragment_start
)))
1043 pub fn parse_query
<'i
>(&mut self, scheme_end
: u32, mut input
: Input
<'i
>)
1044 -> Option
<Input
<'i
>> {
1045 let mut query
= String
::new(); // FIXME: use a streaming decoder instead
1046 let mut remaining
= None
;
1047 while let Some(c
) = input
.next() {
1048 if c
== '
#' && self.context == Context::UrlParser {
1049 remaining
= Some(input
);
1052 self.check_url_code_point(c
, &input
);
1057 let encoding
= match &self.serialization
[..scheme_end
as usize] {
1058 "http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override
,
1059 _
=> EncodingOverride
::utf8(),
1061 let query_bytes
= encoding
.encode(query
.into());
1062 self.serialization
.extend(percent_encode(&query_bytes
, QUERY_ENCODE_SET
));
1066 fn fragment_only(mut self, base_url
: &Url
, mut input
: Input
) -> ParseResult
<Url
> {
1067 let before_fragment
= match base_url
.fragment_start
{
1068 Some(i
) => base_url
.slice(..i
),
1069 None
=> &*base_url
.serialization
,
1071 debug_assert
!(self.serialization
.is_empty());
1072 self.serialization
.reserve(before_fragment
.len() + input
.chars
.as_str().len());
1073 self.serialization
.push_str(before_fragment
);
1074 self.serialization
.push('
#');
1075 let next
= input
.next();
1076 debug_assert
!(next
== Some('
#'));
1077 self.parse_fragment(input
);
1079 serialization
: self.serialization
,
1080 fragment_start
: Some(try
!(to_u32(before_fragment
.len()))),
1085 pub fn parse_fragment(&mut self, mut input
: Input
) {
1086 while let Some((c
, utf8_c
)) = input
.next_utf8() {
1088 self.syntax_violation("NULL characters are ignored in URL fragment identifiers")
1090 self.check_url_code_point(c
, &input
);
1091 self.serialization
.extend(utf8_percent_encode(utf8_c
,
1092 SIMPLE_ENCODE_SET
));
1097 fn check_url_code_point(&self, c
: char, input
: &Input
) {
1098 if let Some(log
) = self.log_syntax_violation
{
1100 let mut input
= input
.clone();
1101 if !matches
!((input
.next(), input
.next()), (Some(a
), Some(b
))
1102 if is_ascii_hex_digit(a
) && is_ascii_hex_digit(b
)) {
1103 log("expected 2 hex digits after %")
1105 } else if !is_url_code_point(c
) {
1106 log("non-URL code point")
1113 fn is_ascii_hex_digit(c
: char) -> bool
{
1114 matches
!(c
, 'a'
...'f'
| 'A'
...'F'
| '
0'
...'
9'
)
1117 // Non URL code points:
1118 // U+0000 to U+0020 (space)
1119 // " # % < > [ \ ] ^ ` { | }
1123 // Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
1125 fn is_url_code_point(c
: char) -> bool
{
1130 '
!'
| '$'
| '
&'
| '
\''
| '
('
| '
)'
| '
*'
| '
+'
| '
,'
| '
-'
|
1131 '
.'
| '
/'
| '
:'
| '
;'
| '
='
| '?'
| '@'
| '_'
| '
~'
|
1132 '
\u{A0}'
...'
\u{D7FF}'
| '
\u{E000}'
...'
\u{FDCF}'
| '
\u{FDF0}'
...'
\u{FFFD}'
|
1133 '
\u{10000}'
...'
\u{1FFFD}'
| '
\u{20000}'
...'
\u{2FFFD}'
|
1134 '
\u{30000}'
...'
\u{3FFFD}'
| '
\u{40000}'
...'
\u{4FFFD}'
|
1135 '
\u{50000}'
...'
\u{5FFFD}'
| '
\u{60000}'
...'
\u{6FFFD}'
|
1136 '
\u{70000}'
...'
\u{7FFFD}'
| '
\u{80000}'
...'
\u{8FFFD}'
|
1137 '
\u{90000}'
...'
\u{9FFFD}'
| '
\u{A0000}'
...'
\u{AFFFD}'
|
1138 '
\u{B0000}'
...'
\u{BFFFD}'
| '
\u{C0000}'
...'
\u{CFFFD}'
|
1139 '
\u{D0000}'
...'
\u{DFFFD}'
| '
\u{E1000}'
...'
\u{EFFFD}'
|
1140 '
\u{F0000}'
...'
\u{FFFFD}'
| '
\u{100000}'
...'
\u{10FFFD}'
)
1143 /// https://url.spec.whatwg.org/#c0-controls-and-space
1145 fn c0_control_or_space(ch
: char) -> bool
{
1146 ch
<= ' '
// U+0000 to U+0020
1149 /// https://url.spec.whatwg.org/#ascii-alpha
1151 pub fn ascii_alpha(ch
: char) -> bool
{
1152 matches
!(ch
, 'a'
...'z'
| 'A'
...'Z'
)
1156 pub fn to_u32(i
: usize) -> ParseResult
<u32> {
1157 if i
<= ::std
::u32::MAX
as usize {
1160 Err(ParseError
::Overflow
)
1164 /// Wether the scheme is file:, the path has a single segment, and that segment
1165 /// is a Windows drive letter
1166 fn is_windows_drive_letter(segment
: &str) -> bool
{
1168 && starts_with_windows_drive_letter(segment
)
1171 fn starts_with_windows_drive_letter(s
: &str) -> bool
{
1172 ascii_alpha(s
.as_bytes()[0] as char)
1173 && matches
!(s
.as_bytes()[1], b'
:'
| b'
|'
)
1176 fn starts_with_windows_drive_letter_segment(input
: &Input
) -> bool
{
1177 let mut input
= input
.clone();
1178 matches
!((input
.next(), input
.next(), input
.next()), (Some(a
), Some(b
), Some(c
))
1179 if ascii_alpha(a
) && matches
!(b
, '
:'
| '
|'
) && matches
!(c
, '
/'
| '
\\'
| '?'
| '
#'))