]>
Commit | Line | Data |
---|---|---|
1 | // Copyright 2013-2015 The rust-url developers. | |
2 | // | |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
6 | // option. This file may not be copied, modified, or distributed | |
7 | // except according to those terms. | |
8 | ||
9 | /*! | |
10 | ||
11 | rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) | |
12 | for the [Rust](http://rust-lang.org/) programming language. | |
13 | ||
14 | ||
15 | # URL parsing and data structures | |
16 | ||
17 | First, URL parsing may fail for various reasons and therefore returns a `Result`. | |
18 | ||
19 | ``` | |
20 | use url::{Url, ParseError}; | |
21 | ||
22 | assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address)) | |
23 | ``` | |
24 | ||
25 | Let’s parse a valid URL and look at its components. | |
26 | ||
27 | ``` | |
28 | use url::{Url, Host, Position}; | |
29 | # use url::ParseError; | |
30 | # fn run() -> Result<(), ParseError> { | |
31 | let issue_list_url = Url::parse( | |
32 | "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" | |
33 | )?; | |
34 | ||
35 | ||
36 | assert!(issue_list_url.scheme() == "https"); | |
37 | assert!(issue_list_url.username() == ""); | |
38 | assert!(issue_list_url.password() == None); | |
39 | assert!(issue_list_url.host_str() == Some("github.com")); | |
40 | assert!(issue_list_url.host() == Some(Host::Domain("github.com"))); | |
41 | assert!(issue_list_url.port() == None); | |
42 | assert!(issue_list_url.path() == "/rust-lang/rust/issues"); | |
43 | assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) == | |
44 | Some(vec!["rust-lang", "rust", "issues"])); | |
45 | assert!(issue_list_url.query() == Some("labels=E-easy&state=open")); | |
46 | assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open"); | |
47 | assert!(issue_list_url.fragment() == None); | |
48 | assert!(!issue_list_url.cannot_be_a_base()); | |
49 | # Ok(()) | |
50 | # } | |
51 | # run().unwrap(); | |
52 | ``` | |
53 | ||
54 | Some URLs are said to be *cannot-be-a-base*: | |
55 | they don’t have a username, password, host, or port, | |
56 | and their "path" is an arbitrary string rather than slash-separated segments: | |
57 | ||
58 | ``` | |
59 | use url::Url; | |
60 | # use url::ParseError; | |
61 | ||
62 | # fn run() -> Result<(), ParseError> { | |
63 | let data_url = Url::parse("data:text/plain,Hello?World#")?; | |
64 | ||
65 | assert!(data_url.cannot_be_a_base()); | |
66 | assert!(data_url.scheme() == "data"); | |
67 | assert!(data_url.path() == "text/plain,Hello"); | |
68 | assert!(data_url.path_segments().is_none()); | |
69 | assert!(data_url.query() == Some("World")); | |
70 | assert!(data_url.fragment() == Some("")); | |
71 | # Ok(()) | |
72 | # } | |
73 | # run().unwrap(); | |
74 | ``` | |
75 | ||
76 | ## Serde | |
77 | ||
78 | Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`. | |
79 | ||
80 | # Base URL | |
81 | ||
82 | Many contexts allow URL *references* that can be relative to a *base URL*: | |
83 | ||
84 | ```html | |
85 | <link rel="stylesheet" href="../main.css"> | |
86 | ``` | |
87 | ||
88 | Since parsed URLs are absolute, giving a base is required for parsing relative URLs: | |
89 | ||
90 | ``` | |
91 | use url::{Url, ParseError}; | |
92 | ||
93 | assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase)) | |
94 | ``` | |
95 | ||
96 | Use the `join` method on an `Url` to use it as a base URL: | |
97 | ||
98 | ``` | |
99 | use url::Url; | |
100 | # use url::ParseError; | |
101 | ||
102 | # fn run() -> Result<(), ParseError> { | |
103 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?; | |
104 | let css_url = this_document.join("../main.css")?; | |
105 | assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); | |
106 | # Ok(()) | |
107 | # } | |
108 | # run().unwrap(); | |
109 | ``` | |
110 | ||
111 | # Feature: `serde` | |
112 | ||
113 | If you enable the `serde` feature, [`Url`](struct.Url.html) will implement | |
114 | [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and | |
115 | [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html). | |
116 | See [serde documentation](https://serde.rs) for more information. | |
117 | ||
118 | ```toml | |
119 | url = { version = "2", features = ["serde"] } | |
120 | ``` | |
121 | */ | |
122 | ||
123 | #![doc(html_root_url = "https://docs.rs/url/2.2.1")] | |
124 | ||
125 | #[macro_use] | |
126 | extern crate matches; | |
127 | pub use form_urlencoded; | |
128 | ||
129 | #[cfg(feature = "serde")] | |
130 | extern crate serde; | |
131 | ||
132 | use crate::host::HostInternal; | |
133 | use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO}; | |
134 | use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode}; | |
135 | use std::borrow::Borrow; | |
136 | use std::cmp; | |
137 | use std::fmt::{self, Write}; | |
138 | use std::hash; | |
139 | use std::io; | |
140 | use std::mem; | |
141 | use std::net::{IpAddr, SocketAddr, ToSocketAddrs}; | |
142 | use std::ops::{Range, RangeFrom, RangeTo}; | |
143 | use std::path::{Path, PathBuf}; | |
144 | use std::str; | |
145 | ||
146 | use std::convert::TryFrom; | |
147 | ||
148 | pub use crate::host::Host; | |
149 | pub use crate::origin::{OpaqueOrigin, Origin}; | |
150 | pub use crate::parser::{ParseError, SyntaxViolation}; | |
151 | pub use crate::path_segments::PathSegmentsMut; | |
152 | pub use crate::slicing::Position; | |
153 | pub use form_urlencoded::EncodingOverride; | |
154 | ||
155 | mod host; | |
156 | mod origin; | |
157 | mod parser; | |
158 | mod path_segments; | |
159 | mod slicing; | |
160 | ||
161 | #[doc(hidden)] | |
162 | pub mod quirks; | |
163 | ||
164 | /// A parsed URL record. | |
165 | #[derive(Clone)] | |
166 | pub struct Url { | |
167 | /// Syntax in pseudo-BNF: | |
168 | /// | |
169 | /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? | |
170 | /// non-hierarchical = non-hierarchical-path | |
171 | /// non-hierarchical-path = /* Does not start with "/" */ | |
172 | /// hierarchical = authority? hierarchical-path | |
173 | /// authority = "//" userinfo? host [ ":" port ]? | |
174 | /// userinfo = username [ ":" password ]? "@" | |
175 | /// hierarchical-path = [ "/" path-segment ]+ | |
176 | serialization: String, | |
177 | ||
178 | // Components | |
179 | scheme_end: u32, // Before ':' | |
180 | username_end: u32, // Before ':' (if a password is given) or '@' (if not) | |
181 | host_start: u32, | |
182 | host_end: u32, | |
183 | host: HostInternal, | |
184 | port: Option<u16>, | |
185 | path_start: u32, // Before initial '/', if any | |
186 | query_start: Option<u32>, // Before '?', unlike Position::QueryStart | |
187 | fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart | |
188 | } | |
189 | ||
190 | /// Full configuration for the URL parser. | |
191 | #[derive(Copy, Clone)] | |
192 | pub struct ParseOptions<'a> { | |
193 | base_url: Option<&'a Url>, | |
194 | encoding_override: EncodingOverride<'a>, | |
195 | violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, | |
196 | } | |
197 | ||
198 | impl<'a> ParseOptions<'a> { | |
199 | /// Change the base URL | |
200 | pub fn base_url(mut self, new: Option<&'a Url>) -> Self { | |
201 | self.base_url = new; | |
202 | self | |
203 | } | |
204 | ||
205 | /// Override the character encoding of query strings. | |
206 | /// This is a legacy concept only relevant for HTML. | |
207 | pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { | |
208 | self.encoding_override = new; | |
209 | self | |
210 | } | |
211 | ||
212 | /// Call the provided function or closure for a non-fatal `SyntaxViolation` | |
213 | /// when it occurs during parsing. Note that since the provided function is | |
214 | /// `Fn`, the caller might need to utilize _interior mutability_, such as with | |
215 | /// a `RefCell`, to collect the violations. | |
216 | /// | |
217 | /// ## Example | |
218 | /// ``` | |
219 | /// use std::cell::RefCell; | |
220 | /// use url::{Url, SyntaxViolation}; | |
221 | /// # use url::ParseError; | |
222 | /// # fn run() -> Result<(), url::ParseError> { | |
223 | /// let violations = RefCell::new(Vec::new()); | |
224 | /// let url = Url::options() | |
225 | /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v))) | |
226 | /// .parse("https:////example.com")?; | |
227 | /// assert_eq!(url.as_str(), "https://example.com/"); | |
228 | /// assert_eq!(violations.into_inner(), | |
229 | /// vec!(SyntaxViolation::ExpectedDoubleSlash)); | |
230 | /// # Ok(()) | |
231 | /// # } | |
232 | /// # run().unwrap(); | |
233 | /// ``` | |
234 | pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { | |
235 | self.violation_fn = new; | |
236 | self | |
237 | } | |
238 | ||
239 | /// Parse an URL string with the configuration so far. | |
240 | pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> { | |
241 | Parser { | |
242 | serialization: String::with_capacity(input.len()), | |
243 | base_url: self.base_url, | |
244 | query_encoding_override: self.encoding_override, | |
245 | violation_fn: self.violation_fn, | |
246 | context: Context::UrlParser, | |
247 | } | |
248 | .parse_url(input) | |
249 | } | |
250 | } | |
251 | ||
252 | impl Url { | |
253 | /// Parse an absolute URL from a string. | |
254 | /// | |
255 | /// # Examples | |
256 | /// | |
257 | /// ```rust | |
258 | /// use url::Url; | |
259 | /// # use url::ParseError; | |
260 | /// | |
261 | /// # fn run() -> Result<(), ParseError> { | |
262 | /// let url = Url::parse("https://example.net")?; | |
263 | /// # Ok(()) | |
264 | /// # } | |
265 | /// # run().unwrap(); | |
266 | /// ``` | |
267 | /// | |
268 | /// # Errors | |
269 | /// | |
270 | /// If the function can not parse an absolute URL from the given string, | |
271 | /// a [`ParseError`] variant will be returned. | |
272 | /// | |
273 | /// [`ParseError`]: enum.ParseError.html | |
274 | #[inline] | |
275 | pub fn parse(input: &str) -> Result<Url, crate::ParseError> { | |
276 | Url::options().parse(input) | |
277 | } | |
278 | ||
279 | /// Parse an absolute URL from a string and add params to its query string. | |
280 | /// | |
281 | /// Existing params are not removed. | |
282 | /// | |
283 | /// # Examples | |
284 | /// | |
285 | /// ```rust | |
286 | /// use url::Url; | |
287 | /// # use url::ParseError; | |
288 | /// | |
289 | /// # fn run() -> Result<(), ParseError> { | |
290 | /// let url = Url::parse_with_params("https://example.net?dont=clobberme", | |
291 | /// &[("lang", "rust"), ("browser", "servo")])?; | |
292 | /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str()); | |
293 | /// # Ok(()) | |
294 | /// # } | |
295 | /// # run().unwrap(); | |
296 | /// ``` | |
297 | /// | |
298 | /// # Errors | |
299 | /// | |
300 | /// If the function can not parse an absolute URL from the given string, | |
301 | /// a [`ParseError`] variant will be returned. | |
302 | /// | |
303 | /// [`ParseError`]: enum.ParseError.html | |
304 | #[inline] | |
305 | pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> | |
306 | where | |
307 | I: IntoIterator, | |
308 | I::Item: Borrow<(K, V)>, | |
309 | K: AsRef<str>, | |
310 | V: AsRef<str>, | |
311 | { | |
312 | let mut url = Url::options().parse(input); | |
313 | ||
314 | if let Ok(ref mut url) = url { | |
315 | url.query_pairs_mut().extend_pairs(iter); | |
316 | } | |
317 | ||
318 | url | |
319 | } | |
320 | ||
321 | /// Parse a string as an URL, with this URL as the base URL. | |
322 | /// | |
323 | /// Note: a trailing slash is significant. | |
324 | /// Without it, the last path component is considered to be a “file” name | |
325 | /// to be removed to get at the “directory” that is used as the base: | |
326 | /// | |
327 | /// # Examples | |
328 | /// | |
329 | /// ```rust | |
330 | /// use url::Url; | |
331 | /// # use url::ParseError; | |
332 | /// | |
333 | /// # fn run() -> Result<(), ParseError> { | |
334 | /// let base = Url::parse("https://example.net/a/b.html")?; | |
335 | /// let url = base.join("c.png")?; | |
336 | /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png | |
337 | /// | |
338 | /// let base = Url::parse("https://example.net/a/b/")?; | |
339 | /// let url = base.join("c.png")?; | |
340 | /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png"); | |
341 | /// # Ok(()) | |
342 | /// # } | |
343 | /// # run().unwrap(); | |
344 | /// ``` | |
345 | /// | |
346 | /// # Errors | |
347 | /// | |
348 | /// If the function can not parse an URL from the given string | |
349 | /// with this URL as the base URL, a [`ParseError`] variant will be returned. | |
350 | /// | |
351 | /// [`ParseError`]: enum.ParseError.html | |
352 | #[inline] | |
353 | pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> { | |
354 | Url::options().base_url(Some(self)).parse(input) | |
355 | } | |
356 | ||
357 | /// Return a default `ParseOptions` that can fully configure the URL parser. | |
358 | /// | |
359 | /// # Examples | |
360 | /// | |
361 | /// Get default `ParseOptions`, then change base url | |
362 | /// | |
363 | /// ```rust | |
364 | /// use url::Url; | |
365 | /// # use url::ParseError; | |
366 | /// # fn run() -> Result<(), ParseError> { | |
367 | /// let options = Url::options(); | |
368 | /// let api = Url::parse("https://api.example.com")?; | |
369 | /// let base_url = options.base_url(Some(&api)); | |
370 | /// let version_url = base_url.parse("version.json")?; | |
371 | /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json"); | |
372 | /// # Ok(()) | |
373 | /// # } | |
374 | /// # run().unwrap(); | |
375 | /// ``` | |
376 | pub fn options<'a>() -> ParseOptions<'a> { | |
377 | ParseOptions { | |
378 | base_url: None, | |
379 | encoding_override: None, | |
380 | violation_fn: None, | |
381 | } | |
382 | } | |
383 | ||
384 | /// Return the serialization of this URL. | |
385 | /// | |
386 | /// This is fast since that serialization is already stored in the `Url` struct. | |
387 | /// | |
388 | /// # Examples | |
389 | /// | |
390 | /// ```rust | |
391 | /// use url::Url; | |
392 | /// # use url::ParseError; | |
393 | /// | |
394 | /// # fn run() -> Result<(), ParseError> { | |
395 | /// let url_str = "https://example.net/"; | |
396 | /// let url = Url::parse(url_str)?; | |
397 | /// assert_eq!(url.as_str(), url_str); | |
398 | /// # Ok(()) | |
399 | /// # } | |
400 | /// # run().unwrap(); | |
401 | /// ``` | |
402 | #[inline] | |
403 | pub fn as_str(&self) -> &str { | |
404 | &self.serialization | |
405 | } | |
406 | ||
407 | /// Return the serialization of this URL. | |
408 | /// | |
409 | /// This consumes the `Url` and takes ownership of the `String` stored in it. | |
410 | /// | |
411 | /// # Examples | |
412 | /// | |
413 | /// ```rust | |
414 | /// use url::Url; | |
415 | /// # use url::ParseError; | |
416 | /// | |
417 | /// # fn run() -> Result<(), ParseError> { | |
418 | /// let url_str = "https://example.net/"; | |
419 | /// let url = Url::parse(url_str)?; | |
420 | /// assert_eq!(url.into_string(), url_str); | |
421 | /// # Ok(()) | |
422 | /// # } | |
423 | /// # run().unwrap(); | |
424 | /// ``` | |
425 | #[inline] | |
426 | pub fn into_string(self) -> String { | |
427 | self.serialization | |
428 | } | |
429 | ||
430 | /// For internal testing, not part of the public API. | |
431 | /// | |
432 | /// Methods of the `Url` struct assume a number of invariants. | |
433 | /// This checks each of these invariants and panic if one is not met. | |
434 | /// This is for testing rust-url itself. | |
435 | #[doc(hidden)] | |
436 | pub fn check_invariants(&self) -> Result<(), String> { | |
437 | macro_rules! assert { | |
438 | ($x: expr) => { | |
439 | if !$x { | |
440 | return Err(format!( | |
441 | "!( {} ) for URL {:?}", | |
442 | stringify!($x), | |
443 | self.serialization | |
444 | )); | |
445 | } | |
446 | }; | |
447 | } | |
448 | ||
449 | macro_rules! assert_eq { | |
450 | ($a: expr, $b: expr) => { | |
451 | { | |
452 | let a = $a; | |
453 | let b = $b; | |
454 | if a != b { | |
455 | return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}", | |
456 | a, b, stringify!($a), stringify!($b), | |
457 | self.serialization)) | |
458 | } | |
459 | } | |
460 | } | |
461 | } | |
462 | ||
463 | assert!(self.scheme_end >= 1); | |
464 | assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z')); | |
465 | assert!(self | |
466 | .slice(1..self.scheme_end) | |
467 | .chars() | |
468 | .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.'))); | |
469 | assert_eq!(self.byte_at(self.scheme_end), b':'); | |
470 | ||
471 | if self.slice(self.scheme_end + 1..).starts_with("//") { | |
472 | // URL with authority | |
473 | if self.username_end != self.serialization.len() as u32 { | |
474 | match self.byte_at(self.username_end) { | |
475 | b':' => { | |
476 | assert!(self.host_start >= self.username_end + 2); | |
477 | assert_eq!(self.byte_at(self.host_start - 1), b'@'); | |
478 | } | |
479 | b'@' => assert!(self.host_start == self.username_end + 1), | |
480 | _ => assert_eq!(self.username_end, self.scheme_end + 3), | |
481 | } | |
482 | } | |
483 | assert!(self.host_start >= self.username_end); | |
484 | assert!(self.host_end >= self.host_start); | |
485 | let host_str = self.slice(self.host_start..self.host_end); | |
486 | match self.host { | |
487 | HostInternal::None => assert_eq!(host_str, ""), | |
488 | HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), | |
489 | HostInternal::Ipv6(address) => { | |
490 | let h: Host<String> = Host::Ipv6(address); | |
491 | assert_eq!(host_str, h.to_string()) | |
492 | } | |
493 | HostInternal::Domain => { | |
494 | if SchemeType::from(self.scheme()).is_special() { | |
495 | assert!(!host_str.is_empty()) | |
496 | } | |
497 | } | |
498 | } | |
499 | if self.path_start == self.host_end { | |
500 | assert_eq!(self.port, None); | |
501 | } else { | |
502 | assert_eq!(self.byte_at(self.host_end), b':'); | |
503 | let port_str = self.slice(self.host_end + 1..self.path_start); | |
504 | assert_eq!( | |
505 | self.port, | |
506 | Some(port_str.parse::<u16>().expect("Couldn't parse port?")) | |
507 | ); | |
508 | } | |
509 | assert!( | |
510 | self.path_start as usize == self.serialization.len() | |
511 | || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?') | |
512 | ); | |
513 | } else { | |
514 | // Anarchist URL (no authority) | |
515 | assert_eq!(self.username_end, self.scheme_end + 1); | |
516 | assert_eq!(self.host_start, self.scheme_end + 1); | |
517 | assert_eq!(self.host_end, self.scheme_end + 1); | |
518 | assert_eq!(self.host, HostInternal::None); | |
519 | assert_eq!(self.port, None); | |
520 | assert_eq!(self.path_start, self.scheme_end + 1); | |
521 | } | |
522 | if let Some(start) = self.query_start { | |
523 | assert!(start >= self.path_start); | |
524 | assert_eq!(self.byte_at(start), b'?'); | |
525 | } | |
526 | if let Some(start) = self.fragment_start { | |
527 | assert!(start >= self.path_start); | |
528 | assert_eq!(self.byte_at(start), b'#'); | |
529 | } | |
530 | if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { | |
531 | assert!(fragment_start > query_start); | |
532 | } | |
533 | ||
534 | let other = Url::parse(self.as_str()).expect("Failed to parse myself?"); | |
535 | assert_eq!(&self.serialization, &other.serialization); | |
536 | assert_eq!(self.scheme_end, other.scheme_end); | |
537 | assert_eq!(self.username_end, other.username_end); | |
538 | assert_eq!(self.host_start, other.host_start); | |
539 | assert_eq!(self.host_end, other.host_end); | |
540 | assert!( | |
541 | self.host == other.host || | |
542 | // XXX No host round-trips to empty host. | |
543 | // See https://github.com/whatwg/url/issues/79 | |
544 | (self.host_str(), other.host_str()) == (None, Some("")) | |
545 | ); | |
546 | assert_eq!(self.port, other.port); | |
547 | assert_eq!(self.path_start, other.path_start); | |
548 | assert_eq!(self.query_start, other.query_start); | |
549 | assert_eq!(self.fragment_start, other.fragment_start); | |
550 | Ok(()) | |
551 | } | |
552 | ||
553 | /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>) | |
554 | /// | |
555 | /// Note: this returns an opaque origin for `file:` URLs, which causes | |
556 | /// `url.origin() != url.origin()`. | |
557 | /// | |
558 | /// # Examples | |
559 | /// | |
560 | /// URL with `ftp` scheme: | |
561 | /// | |
562 | /// ```rust | |
563 | /// use url::{Host, Origin, Url}; | |
564 | /// # use url::ParseError; | |
565 | /// | |
566 | /// # fn run() -> Result<(), ParseError> { | |
567 | /// let url = Url::parse("ftp://example.com/foo")?; | |
568 | /// assert_eq!(url.origin(), | |
569 | /// Origin::Tuple("ftp".into(), | |
570 | /// Host::Domain("example.com".into()), | |
571 | /// 21)); | |
572 | /// # Ok(()) | |
573 | /// # } | |
574 | /// # run().unwrap(); | |
575 | /// ``` | |
576 | /// | |
577 | /// URL with `blob` scheme: | |
578 | /// | |
579 | /// ```rust | |
580 | /// use url::{Host, Origin, Url}; | |
581 | /// # use url::ParseError; | |
582 | /// | |
583 | /// # fn run() -> Result<(), ParseError> { | |
584 | /// let url = Url::parse("blob:https://example.com/foo")?; | |
585 | /// assert_eq!(url.origin(), | |
586 | /// Origin::Tuple("https".into(), | |
587 | /// Host::Domain("example.com".into()), | |
588 | /// 443)); | |
589 | /// # Ok(()) | |
590 | /// # } | |
591 | /// # run().unwrap(); | |
592 | /// ``` | |
593 | /// | |
594 | /// URL with `file` scheme: | |
595 | /// | |
596 | /// ```rust | |
597 | /// use url::{Host, Origin, Url}; | |
598 | /// # use url::ParseError; | |
599 | /// | |
600 | /// # fn run() -> Result<(), ParseError> { | |
601 | /// let url = Url::parse("file:///tmp/foo")?; | |
602 | /// assert!(!url.origin().is_tuple()); | |
603 | /// | |
604 | /// let other_url = Url::parse("file:///tmp/foo")?; | |
605 | /// assert!(url.origin() != other_url.origin()); | |
606 | /// # Ok(()) | |
607 | /// # } | |
608 | /// # run().unwrap(); | |
609 | /// ``` | |
610 | /// | |
611 | /// URL with other scheme: | |
612 | /// | |
613 | /// ```rust | |
614 | /// use url::{Host, Origin, Url}; | |
615 | /// # use url::ParseError; | |
616 | /// | |
617 | /// # fn run() -> Result<(), ParseError> { | |
618 | /// let url = Url::parse("foo:bar")?; | |
619 | /// assert!(!url.origin().is_tuple()); | |
620 | /// # Ok(()) | |
621 | /// # } | |
622 | /// # run().unwrap(); | |
623 | /// ``` | |
624 | #[inline] | |
625 | pub fn origin(&self) -> Origin { | |
626 | origin::url_origin(self) | |
627 | } | |
628 | ||
629 | /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. | |
630 | /// | |
631 | /// # Examples | |
632 | /// | |
633 | /// ``` | |
634 | /// use url::Url; | |
635 | /// # use url::ParseError; | |
636 | /// | |
637 | /// # fn run() -> Result<(), ParseError> { | |
638 | /// let url = Url::parse("file:///tmp/foo")?; | |
639 | /// assert_eq!(url.scheme(), "file"); | |
640 | /// # Ok(()) | |
641 | /// # } | |
642 | /// # run().unwrap(); | |
643 | /// ``` | |
644 | #[inline] | |
645 | pub fn scheme(&self) -> &str { | |
646 | self.slice(..self.scheme_end) | |
647 | } | |
648 | ||
649 | /// Return whether the URL has an 'authority', | |
650 | /// which can contain a username, password, host, and port number. | |
651 | /// | |
652 | /// URLs that do *not* are either path-only like `unix:/run/foo.socket` | |
653 | /// or cannot-be-a-base like `data:text/plain,Stuff`. | |
654 | /// | |
655 | /// # Examples | |
656 | /// | |
657 | /// ``` | |
658 | /// use url::Url; | |
659 | /// # use url::ParseError; | |
660 | /// | |
661 | /// # fn run() -> Result<(), ParseError> { | |
662 | /// let url = Url::parse("ftp://rms@example.com")?; | |
663 | /// assert!(url.has_authority()); | |
664 | /// | |
665 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
666 | /// assert!(!url.has_authority()); | |
667 | /// | |
668 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
669 | /// assert!(!url.has_authority()); | |
670 | /// # Ok(()) | |
671 | /// # } | |
672 | /// # run().unwrap(); | |
673 | /// ``` | |
674 | #[inline] | |
675 | pub fn has_authority(&self) -> bool { | |
676 | debug_assert!(self.byte_at(self.scheme_end) == b':'); | |
677 | self.slice(self.scheme_end..).starts_with("://") | |
678 | } | |
679 | ||
680 | /// Return whether this URL is a cannot-be-a-base URL, | |
681 | /// meaning that parsing a relative URL string with this URL as the base will return an error. | |
682 | /// | |
683 | /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, | |
684 | /// as is typically the case of `data:` and `mailto:` URLs. | |
685 | /// | |
686 | /// # Examples | |
687 | /// | |
688 | /// ``` | |
689 | /// use url::Url; | |
690 | /// # use url::ParseError; | |
691 | /// | |
692 | /// # fn run() -> Result<(), ParseError> { | |
693 | /// let url = Url::parse("ftp://rms@example.com")?; | |
694 | /// assert!(!url.cannot_be_a_base()); | |
695 | /// | |
696 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
697 | /// assert!(!url.cannot_be_a_base()); | |
698 | /// | |
699 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
700 | /// assert!(url.cannot_be_a_base()); | |
701 | /// # Ok(()) | |
702 | /// # } | |
703 | /// # run().unwrap(); | |
704 | /// ``` | |
705 | #[inline] | |
706 | pub fn cannot_be_a_base(&self) -> bool { | |
707 | !self.slice(self.scheme_end + 1..).starts_with('/') | |
708 | } | |
709 | ||
710 | /// Return the username for this URL (typically the empty string) | |
711 | /// as a percent-encoded ASCII string. | |
712 | /// | |
713 | /// # Examples | |
714 | /// | |
715 | /// ``` | |
716 | /// use url::Url; | |
717 | /// # use url::ParseError; | |
718 | /// | |
719 | /// # fn run() -> Result<(), ParseError> { | |
720 | /// let url = Url::parse("ftp://rms@example.com")?; | |
721 | /// assert_eq!(url.username(), "rms"); | |
722 | /// | |
723 | /// let url = Url::parse("ftp://:secret123@example.com")?; | |
724 | /// assert_eq!(url.username(), ""); | |
725 | /// | |
726 | /// let url = Url::parse("https://example.com")?; | |
727 | /// assert_eq!(url.username(), ""); | |
728 | /// # Ok(()) | |
729 | /// # } | |
730 | /// # run().unwrap(); | |
731 | /// ``` | |
732 | pub fn username(&self) -> &str { | |
733 | let scheme_separator_len = "://".len() as u32; | |
734 | if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len { | |
735 | self.slice(self.scheme_end + scheme_separator_len..self.username_end) | |
736 | } else { | |
737 | "" | |
738 | } | |
739 | } | |
740 | ||
741 | /// Return the password for this URL, if any, as a percent-encoded ASCII string. | |
742 | /// | |
743 | /// # Examples | |
744 | /// | |
745 | /// ``` | |
746 | /// use url::Url; | |
747 | /// # use url::ParseError; | |
748 | /// | |
749 | /// # fn run() -> Result<(), ParseError> { | |
750 | /// let url = Url::parse("ftp://rms:secret123@example.com")?; | |
751 | /// assert_eq!(url.password(), Some("secret123")); | |
752 | /// | |
753 | /// let url = Url::parse("ftp://:secret123@example.com")?; | |
754 | /// assert_eq!(url.password(), Some("secret123")); | |
755 | /// | |
756 | /// let url = Url::parse("ftp://rms@example.com")?; | |
757 | /// assert_eq!(url.password(), None); | |
758 | /// | |
759 | /// let url = Url::parse("https://example.com")?; | |
760 | /// assert_eq!(url.password(), None); | |
761 | /// # Ok(()) | |
762 | /// # } | |
763 | /// # run().unwrap(); | |
764 | /// ``` | |
765 | pub fn password(&self) -> Option<&str> { | |
766 | // This ':' is not the one marking a port number since a host can not be empty. | |
767 | // (Except for file: URLs, which do not have port numbers.) | |
768 | if self.has_authority() | |
769 | && self.username_end != self.serialization.len() as u32 | |
770 | && self.byte_at(self.username_end) == b':' | |
771 | { | |
772 | debug_assert!(self.byte_at(self.host_start - 1) == b'@'); | |
773 | Some(self.slice(self.username_end + 1..self.host_start - 1)) | |
774 | } else { | |
775 | None | |
776 | } | |
777 | } | |
778 | ||
779 | /// Equivalent to `url.host().is_some()`. | |
780 | /// | |
781 | /// # Examples | |
782 | /// | |
783 | /// ``` | |
784 | /// use url::Url; | |
785 | /// # use url::ParseError; | |
786 | /// | |
787 | /// # fn run() -> Result<(), ParseError> { | |
788 | /// let url = Url::parse("ftp://rms@example.com")?; | |
789 | /// assert!(url.has_host()); | |
790 | /// | |
791 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
792 | /// assert!(!url.has_host()); | |
793 | /// | |
794 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
795 | /// assert!(!url.has_host()); | |
796 | /// # Ok(()) | |
797 | /// # } | |
798 | /// # run().unwrap(); | |
799 | /// ``` | |
800 | pub fn has_host(&self) -> bool { | |
801 | !matches!(self.host, HostInternal::None) | |
802 | } | |
803 | ||
804 | /// Return the string representation of the host (domain or IP address) for this URL, if any. | |
805 | /// | |
806 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host | |
807 | /// of a special URL, or percent encoded for non-special URLs. | |
808 | /// IPv6 addresses are given between `[` and `]` brackets. | |
809 | /// | |
810 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs | |
811 | /// don’t have a host. | |
812 | /// | |
813 | /// See also the `host` method. | |
814 | /// | |
815 | /// # Examples | |
816 | /// | |
817 | /// ``` | |
818 | /// use url::Url; | |
819 | /// # use url::ParseError; | |
820 | /// | |
821 | /// # fn run() -> Result<(), ParseError> { | |
822 | /// let url = Url::parse("https://127.0.0.1/index.html")?; | |
823 | /// assert_eq!(url.host_str(), Some("127.0.0.1")); | |
824 | /// | |
825 | /// let url = Url::parse("ftp://rms@example.com")?; | |
826 | /// assert_eq!(url.host_str(), Some("example.com")); | |
827 | /// | |
828 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
829 | /// assert_eq!(url.host_str(), None); | |
830 | /// | |
831 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
832 | /// assert_eq!(url.host_str(), None); | |
833 | /// # Ok(()) | |
834 | /// # } | |
835 | /// # run().unwrap(); | |
836 | /// ``` | |
837 | pub fn host_str(&self) -> Option<&str> { | |
838 | if self.has_host() { | |
839 | Some(self.slice(self.host_start..self.host_end)) | |
840 | } else { | |
841 | None | |
842 | } | |
843 | } | |
844 | ||
845 | /// Return the parsed representation of the host for this URL. | |
846 | /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host | |
847 | /// of a special URL, or percent encoded for non-special URLs. | |
848 | /// | |
849 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs | |
850 | /// don’t have a host. | |
851 | /// | |
852 | /// See also the `host_str` method. | |
853 | /// | |
854 | /// # Examples | |
855 | /// | |
856 | /// ``` | |
857 | /// use url::Url; | |
858 | /// # use url::ParseError; | |
859 | /// | |
860 | /// # fn run() -> Result<(), ParseError> { | |
861 | /// let url = Url::parse("https://127.0.0.1/index.html")?; | |
862 | /// assert!(url.host().is_some()); | |
863 | /// | |
864 | /// let url = Url::parse("ftp://rms@example.com")?; | |
865 | /// assert!(url.host().is_some()); | |
866 | /// | |
867 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
868 | /// assert!(url.host().is_none()); | |
869 | /// | |
870 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
871 | /// assert!(url.host().is_none()); | |
872 | /// # Ok(()) | |
873 | /// # } | |
874 | /// # run().unwrap(); | |
875 | /// ``` | |
876 | pub fn host(&self) -> Option<Host<&str>> { | |
877 | match self.host { | |
878 | HostInternal::None => None, | |
879 | HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), | |
880 | HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), | |
881 | HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), | |
882 | } | |
883 | } | |
884 | ||
885 | /// If this URL has a host and it is a domain name (not an IP address), return it. | |
886 | /// Non-ASCII domains are punycode-encoded per IDNA if this is the host | |
887 | /// of a special URL, or percent encoded for non-special URLs. | |
888 | /// | |
889 | /// # Examples | |
890 | /// | |
891 | /// ``` | |
892 | /// use url::Url; | |
893 | /// # use url::ParseError; | |
894 | /// | |
895 | /// # fn run() -> Result<(), ParseError> { | |
896 | /// let url = Url::parse("https://127.0.0.1/")?; | |
897 | /// assert_eq!(url.domain(), None); | |
898 | /// | |
899 | /// let url = Url::parse("mailto:rms@example.net")?; | |
900 | /// assert_eq!(url.domain(), None); | |
901 | /// | |
902 | /// let url = Url::parse("https://example.com/")?; | |
903 | /// assert_eq!(url.domain(), Some("example.com")); | |
904 | /// # Ok(()) | |
905 | /// # } | |
906 | /// # run().unwrap(); | |
907 | /// ``` | |
908 | pub fn domain(&self) -> Option<&str> { | |
909 | match self.host { | |
910 | HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), | |
911 | _ => None, | |
912 | } | |
913 | } | |
914 | ||
915 | /// Return the port number for this URL, if any. | |
916 | /// | |
917 | /// Note that default port numbers are never reflected by the serialization, | |
918 | /// use the `port_or_known_default()` method if you want a default port number returned. | |
919 | /// | |
920 | /// # Examples | |
921 | /// | |
922 | /// ``` | |
923 | /// use url::Url; | |
924 | /// # use url::ParseError; | |
925 | /// | |
926 | /// # fn run() -> Result<(), ParseError> { | |
927 | /// let url = Url::parse("https://example.com")?; | |
928 | /// assert_eq!(url.port(), None); | |
929 | /// | |
930 | /// let url = Url::parse("https://example.com:443/")?; | |
931 | /// assert_eq!(url.port(), None); | |
932 | /// | |
933 | /// let url = Url::parse("ssh://example.com:22")?; | |
934 | /// assert_eq!(url.port(), Some(22)); | |
935 | /// # Ok(()) | |
936 | /// # } | |
937 | /// # run().unwrap(); | |
938 | /// ``` | |
939 | #[inline] | |
940 | pub fn port(&self) -> Option<u16> { | |
941 | self.port | |
942 | } | |
943 | ||
944 | /// Return the port number for this URL, or the default port number if it is known. | |
945 | /// | |
946 | /// This method only knows the default port number | |
947 | /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes. | |
948 | /// | |
949 | /// For URLs in these schemes, this method always returns `Some(_)`. | |
950 | /// For other schemes, it is the same as `Url::port()`. | |
951 | /// | |
952 | /// # Examples | |
953 | /// | |
954 | /// ``` | |
955 | /// use url::Url; | |
956 | /// # use url::ParseError; | |
957 | /// | |
958 | /// # fn run() -> Result<(), ParseError> { | |
959 | /// let url = Url::parse("foo://example.com")?; | |
960 | /// assert_eq!(url.port_or_known_default(), None); | |
961 | /// | |
962 | /// let url = Url::parse("foo://example.com:1456")?; | |
963 | /// assert_eq!(url.port_or_known_default(), Some(1456)); | |
964 | /// | |
965 | /// let url = Url::parse("https://example.com")?; | |
966 | /// assert_eq!(url.port_or_known_default(), Some(443)); | |
967 | /// # Ok(()) | |
968 | /// # } | |
969 | /// # run().unwrap(); | |
970 | /// ``` | |
971 | #[inline] | |
972 | pub fn port_or_known_default(&self) -> Option<u16> { | |
973 | self.port.or_else(|| parser::default_port(self.scheme())) | |
974 | } | |
975 | ||
976 | /// Resolve a URL’s host and port number to `SocketAddr`. | |
977 | /// | |
978 | /// If the URL has the default port number of a scheme that is unknown to this library, | |
979 | /// `default_port_number` provides an opportunity to provide the actual port number. | |
980 | /// In non-example code this should be implemented either simply as `|| None`, | |
981 | /// or by matching on the URL’s `.scheme()`. | |
982 | /// | |
983 | /// If the host is a domain, it is resolved using the standard library’s DNS support. | |
984 | /// | |
985 | /// # Examples | |
986 | /// | |
987 | /// ```no_run | |
988 | /// let url = url::Url::parse("https://example.net/").unwrap(); | |
989 | /// let addrs = url.socket_addrs(|| None).unwrap(); | |
990 | /// std::net::TcpStream::connect(&*addrs) | |
991 | /// # ; | |
992 | /// ``` | |
993 | /// | |
994 | /// ``` | |
995 | /// /// With application-specific known default port numbers | |
996 | /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> { | |
997 | /// url.socket_addrs(|| match url.scheme() { | |
998 | /// "socks5" | "socks5h" => Some(1080), | |
999 | /// _ => None, | |
1000 | /// }) | |
1001 | /// } | |
1002 | /// ``` | |
1003 | pub fn socket_addrs( | |
1004 | &self, | |
1005 | default_port_number: impl Fn() -> Option<u16>, | |
1006 | ) -> io::Result<Vec<SocketAddr>> { | |
1007 | // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>` | |
1008 | // causes borrowck issues because the return value borrows `default_port_number`: | |
1009 | // | |
1010 | // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters | |
1011 | // | |
1012 | // > This RFC proposes that *all* type parameters are considered in scope | |
1013 | // > for `impl Trait` in return position | |
1014 | ||
1015 | fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> { | |
1016 | opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message)) | |
1017 | } | |
1018 | ||
1019 | let host = io_result(self.host(), "No host name in the URL")?; | |
1020 | let port = io_result( | |
1021 | self.port_or_known_default().or_else(default_port_number), | |
1022 | "No port number in the URL", | |
1023 | )?; | |
1024 | Ok(match host { | |
1025 | Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(), | |
1026 | Host::Ipv4(ip) => vec![(ip, port).into()], | |
1027 | Host::Ipv6(ip) => vec![(ip, port).into()], | |
1028 | }) | |
1029 | } | |
1030 | ||
1031 | /// Return the path for this URL, as a percent-encoded ASCII string. | |
1032 | /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. | |
1033 | /// For other URLs, this starts with a '/' slash | |
1034 | /// and continues with slash-separated path segments. | |
1035 | /// | |
1036 | /// # Examples | |
1037 | /// | |
1038 | /// ```rust | |
1039 | /// use url::{Url, ParseError}; | |
1040 | /// | |
1041 | /// # fn run() -> Result<(), ParseError> { | |
1042 | /// let url = Url::parse("https://example.com/api/versions?page=2")?; | |
1043 | /// assert_eq!(url.path(), "/api/versions"); | |
1044 | /// | |
1045 | /// let url = Url::parse("https://example.com")?; | |
1046 | /// assert_eq!(url.path(), "/"); | |
1047 | /// | |
1048 | /// let url = Url::parse("https://example.com/countries/việt nam")?; | |
1049 | /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam"); | |
1050 | /// # Ok(()) | |
1051 | /// # } | |
1052 | /// # run().unwrap(); | |
1053 | /// ``` | |
1054 | pub fn path(&self) -> &str { | |
1055 | match (self.query_start, self.fragment_start) { | |
1056 | (None, None) => self.slice(self.path_start..), | |
1057 | (Some(next_component_start), _) | (None, Some(next_component_start)) => { | |
1058 | self.slice(self.path_start..next_component_start) | |
1059 | } | |
1060 | } | |
1061 | } | |
1062 | ||
1063 | /// Unless this URL is cannot-be-a-base, | |
1064 | /// return an iterator of '/' slash-separated path segments, | |
1065 | /// each as a percent-encoded ASCII string. | |
1066 | /// | |
1067 | /// Return `None` for cannot-be-a-base URLs. | |
1068 | /// | |
1069 | /// When `Some` is returned, the iterator always contains at least one string | |
1070 | /// (which may be empty). | |
1071 | /// | |
1072 | /// # Examples | |
1073 | /// | |
1074 | /// ``` | |
1075 | /// use url::Url; | |
1076 | /// # use std::error::Error; | |
1077 | /// | |
1078 | /// # fn run() -> Result<(), Box<dyn Error>> { | |
1079 | /// let url = Url::parse("https://example.com/foo/bar")?; | |
1080 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; | |
1081 | /// assert_eq!(path_segments.next(), Some("foo")); | |
1082 | /// assert_eq!(path_segments.next(), Some("bar")); | |
1083 | /// assert_eq!(path_segments.next(), None); | |
1084 | /// | |
1085 | /// let url = Url::parse("https://example.com")?; | |
1086 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; | |
1087 | /// assert_eq!(path_segments.next(), Some("")); | |
1088 | /// assert_eq!(path_segments.next(), None); | |
1089 | /// | |
1090 | /// let url = Url::parse("data:text/plain,HelloWorld")?; | |
1091 | /// assert!(url.path_segments().is_none()); | |
1092 | /// | |
1093 | /// let url = Url::parse("https://example.com/countries/việt nam")?; | |
1094 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; | |
1095 | /// assert_eq!(path_segments.next(), Some("countries")); | |
1096 | /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam")); | |
1097 | /// # Ok(()) | |
1098 | /// # } | |
1099 | /// # run().unwrap(); | |
1100 | /// ``` | |
1101 | #[allow(clippy::manual_strip)] // introduced in 1.45, MSRV is 1.36 | |
1102 | pub fn path_segments(&self) -> Option<str::Split<'_, char>> { | |
1103 | let path = self.path(); | |
1104 | if path.starts_with('/') { | |
1105 | Some(path[1..].split('/')) | |
1106 | } else { | |
1107 | None | |
1108 | } | |
1109 | } | |
1110 | ||
1111 | /// Return this URL’s query string, if any, as a percent-encoded ASCII string. | |
1112 | /// | |
1113 | /// # Examples | |
1114 | /// | |
1115 | /// ```rust | |
1116 | /// use url::Url; | |
1117 | /// # use url::ParseError; | |
1118 | /// | |
1119 | /// fn run() -> Result<(), ParseError> { | |
1120 | /// let url = Url::parse("https://example.com/products?page=2")?; | |
1121 | /// let query = url.query(); | |
1122 | /// assert_eq!(query, Some("page=2")); | |
1123 | /// | |
1124 | /// let url = Url::parse("https://example.com/products")?; | |
1125 | /// let query = url.query(); | |
1126 | /// assert!(query.is_none()); | |
1127 | /// | |
1128 | /// let url = Url::parse("https://example.com/?country=español")?; | |
1129 | /// let query = url.query(); | |
1130 | /// assert_eq!(query, Some("country=espa%C3%B1ol")); | |
1131 | /// # Ok(()) | |
1132 | /// # } | |
1133 | /// # run().unwrap(); | |
1134 | /// ``` | |
1135 | pub fn query(&self) -> Option<&str> { | |
1136 | match (self.query_start, self.fragment_start) { | |
1137 | (None, _) => None, | |
1138 | (Some(query_start), None) => { | |
1139 | debug_assert!(self.byte_at(query_start) == b'?'); | |
1140 | Some(self.slice(query_start + 1..)) | |
1141 | } | |
1142 | (Some(query_start), Some(fragment_start)) => { | |
1143 | debug_assert!(self.byte_at(query_start) == b'?'); | |
1144 | Some(self.slice(query_start + 1..fragment_start)) | |
1145 | } | |
1146 | } | |
1147 | } | |
1148 | ||
1149 | /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` | |
1150 | /// and return an iterator of (key, value) pairs. | |
1151 | /// | |
1152 | /// # Examples | |
1153 | /// | |
1154 | /// ```rust | |
1155 | /// use std::borrow::Cow; | |
1156 | /// | |
1157 | /// use url::Url; | |
1158 | /// # use url::ParseError; | |
1159 | /// | |
1160 | /// # fn run() -> Result<(), ParseError> { | |
1161 | /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?; | |
1162 | /// let mut pairs = url.query_pairs(); | |
1163 | /// | |
1164 | /// assert_eq!(pairs.count(), 2); | |
1165 | /// | |
1166 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2")))); | |
1167 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc")))); | |
1168 | /// # Ok(()) | |
1169 | /// # } | |
1170 | /// # run().unwrap(); | |
1171 | /// | |
1172 | ||
1173 | #[inline] | |
1174 | pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> { | |
1175 | form_urlencoded::parse(self.query().unwrap_or("").as_bytes()) | |
1176 | } | |
1177 | ||
1178 | /// Return this URL’s fragment identifier, if any. | |
1179 | /// | |
1180 | /// A fragment is the part of the URL after the `#` symbol. | |
1181 | /// The fragment is optional and, if present, contains a fragment identifier | |
1182 | /// that identifies a secondary resource, such as a section heading | |
1183 | /// of a document. | |
1184 | /// | |
1185 | /// In HTML, the fragment identifier is usually the id attribute of a an element | |
1186 | /// that is scrolled to on load. Browsers typically will not send the fragment portion | |
1187 | /// of a URL to the server. | |
1188 | /// | |
1189 | /// **Note:** the parser did *not* percent-encode this component, | |
1190 | /// but the input may have been percent-encoded already. | |
1191 | /// | |
1192 | /// # Examples | |
1193 | /// | |
1194 | /// ```rust | |
1195 | /// use url::Url; | |
1196 | /// # use url::ParseError; | |
1197 | /// | |
1198 | /// # fn run() -> Result<(), ParseError> { | |
1199 | /// let url = Url::parse("https://example.com/data.csv#row=4")?; | |
1200 | /// | |
1201 | /// assert_eq!(url.fragment(), Some("row=4")); | |
1202 | /// | |
1203 | /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?; | |
1204 | /// | |
1205 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); | |
1206 | /// # Ok(()) | |
1207 | /// # } | |
1208 | /// # run().unwrap(); | |
1209 | /// ``` | |
1210 | pub fn fragment(&self) -> Option<&str> { | |
1211 | self.fragment_start.map(|start| { | |
1212 | debug_assert!(self.byte_at(start) == b'#'); | |
1213 | self.slice(start + 1..) | |
1214 | }) | |
1215 | } | |
1216 | ||
1217 | fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R { | |
1218 | let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new())); | |
1219 | let result = f(&mut parser); | |
1220 | self.serialization = parser.serialization; | |
1221 | result | |
1222 | } | |
1223 | ||
1224 | /// Change this URL’s fragment identifier. | |
1225 | /// | |
1226 | /// # Examples | |
1227 | /// | |
1228 | /// ```rust | |
1229 | /// use url::Url; | |
1230 | /// # use url::ParseError; | |
1231 | /// | |
1232 | /// # fn run() -> Result<(), ParseError> { | |
1233 | /// let mut url = Url::parse("https://example.com/data.csv")?; | |
1234 | /// assert_eq!(url.as_str(), "https://example.com/data.csv"); | |
1235 | ||
1236 | /// url.set_fragment(Some("cell=4,1-6,2")); | |
1237 | /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2"); | |
1238 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); | |
1239 | /// | |
1240 | /// url.set_fragment(None); | |
1241 | /// assert_eq!(url.as_str(), "https://example.com/data.csv"); | |
1242 | /// assert!(url.fragment().is_none()); | |
1243 | /// # Ok(()) | |
1244 | /// # } | |
1245 | /// # run().unwrap(); | |
1246 | /// ``` | |
1247 | pub fn set_fragment(&mut self, fragment: Option<&str>) { | |
1248 | // Remove any previous fragment | |
1249 | if let Some(start) = self.fragment_start { | |
1250 | debug_assert!(self.byte_at(start) == b'#'); | |
1251 | self.serialization.truncate(start as usize); | |
1252 | } | |
1253 | // Write the new one | |
1254 | if let Some(input) = fragment { | |
1255 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); | |
1256 | self.serialization.push('#'); | |
1257 | self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input))) | |
1258 | } else { | |
1259 | self.fragment_start = None | |
1260 | } | |
1261 | } | |
1262 | ||
1263 | fn take_fragment(&mut self) -> Option<String> { | |
1264 | self.fragment_start.take().map(|start| { | |
1265 | debug_assert!(self.byte_at(start) == b'#'); | |
1266 | let fragment = self.slice(start + 1..).to_owned(); | |
1267 | self.serialization.truncate(start as usize); | |
1268 | fragment | |
1269 | }) | |
1270 | } | |
1271 | ||
1272 | fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) { | |
1273 | if let Some(ref fragment) = fragment { | |
1274 | assert!(self.fragment_start.is_none()); | |
1275 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); | |
1276 | self.serialization.push('#'); | |
1277 | self.serialization.push_str(fragment); | |
1278 | } | |
1279 | } | |
1280 | ||
1281 | /// Change this URL’s query string. | |
1282 | /// | |
1283 | /// # Examples | |
1284 | /// | |
1285 | /// ```rust | |
1286 | /// use url::Url; | |
1287 | /// # use url::ParseError; | |
1288 | /// | |
1289 | /// # fn run() -> Result<(), ParseError> { | |
1290 | /// let mut url = Url::parse("https://example.com/products")?; | |
1291 | /// assert_eq!(url.as_str(), "https://example.com/products"); | |
1292 | /// | |
1293 | /// url.set_query(Some("page=2")); | |
1294 | /// assert_eq!(url.as_str(), "https://example.com/products?page=2"); | |
1295 | /// assert_eq!(url.query(), Some("page=2")); | |
1296 | /// # Ok(()) | |
1297 | /// # } | |
1298 | /// # run().unwrap(); | |
1299 | /// ``` | |
1300 | pub fn set_query(&mut self, query: Option<&str>) { | |
1301 | let fragment = self.take_fragment(); | |
1302 | ||
1303 | // Remove any previous query | |
1304 | if let Some(start) = self.query_start.take() { | |
1305 | debug_assert!(self.byte_at(start) == b'?'); | |
1306 | self.serialization.truncate(start as usize); | |
1307 | } | |
1308 | // Write the new query, if any | |
1309 | if let Some(input) = query { | |
1310 | self.query_start = Some(to_u32(self.serialization.len()).unwrap()); | |
1311 | self.serialization.push('?'); | |
1312 | let scheme_type = SchemeType::from(self.scheme()); | |
1313 | let scheme_end = self.scheme_end; | |
1314 | self.mutate(|parser| { | |
1315 | let vfn = parser.violation_fn; | |
1316 | parser.parse_query( | |
1317 | scheme_type, | |
1318 | scheme_end, | |
1319 | parser::Input::trim_tab_and_newlines(input, vfn), | |
1320 | ) | |
1321 | }); | |
1322 | } | |
1323 | ||
1324 | self.restore_already_parsed_fragment(fragment); | |
1325 | } | |
1326 | ||
1327 | /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs | |
1328 | /// in `application/x-www-form-urlencoded` syntax. | |
1329 | /// | |
1330 | /// The return value has a method-chaining API: | |
1331 | /// | |
1332 | /// ```rust | |
1333 | /// # use url::{Url, ParseError}; | |
1334 | /// | |
1335 | /// # fn run() -> Result<(), ParseError> { | |
1336 | /// let mut url = Url::parse("https://example.net?lang=fr#nav")?; | |
1337 | /// assert_eq!(url.query(), Some("lang=fr")); | |
1338 | /// | |
1339 | /// url.query_pairs_mut().append_pair("foo", "bar"); | |
1340 | /// assert_eq!(url.query(), Some("lang=fr&foo=bar")); | |
1341 | /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav"); | |
1342 | /// | |
1343 | /// url.query_pairs_mut() | |
1344 | /// .clear() | |
1345 | /// .append_pair("foo", "bar & baz") | |
1346 | /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver"); | |
1347 | /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver")); | |
1348 | /// assert_eq!(url.as_str(), | |
1349 | /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav"); | |
1350 | /// # Ok(()) | |
1351 | /// # } | |
1352 | /// # run().unwrap(); | |
1353 | /// ``` | |
1354 | /// | |
1355 | /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, | |
1356 | /// not `url.set_query(None)`. | |
1357 | /// | |
1358 | /// The state of `Url` is unspecified if this return value is leaked without being dropped. | |
1359 | pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> { | |
1360 | let fragment = self.take_fragment(); | |
1361 | ||
1362 | let query_start; | |
1363 | if let Some(start) = self.query_start { | |
1364 | debug_assert!(self.byte_at(start) == b'?'); | |
1365 | query_start = start as usize; | |
1366 | } else { | |
1367 | query_start = self.serialization.len(); | |
1368 | self.query_start = Some(to_u32(query_start).unwrap()); | |
1369 | self.serialization.push('?'); | |
1370 | } | |
1371 | ||
1372 | let query = UrlQuery { | |
1373 | url: Some(self), | |
1374 | fragment, | |
1375 | }; | |
1376 | form_urlencoded::Serializer::for_suffix(query, query_start + "?".len()) | |
1377 | } | |
1378 | ||
1379 | fn take_after_path(&mut self) -> String { | |
1380 | match (self.query_start, self.fragment_start) { | |
1381 | (Some(i), _) | (None, Some(i)) => { | |
1382 | let after_path = self.slice(i..).to_owned(); | |
1383 | self.serialization.truncate(i as usize); | |
1384 | after_path | |
1385 | } | |
1386 | (None, None) => String::new(), | |
1387 | } | |
1388 | } | |
1389 | ||
1390 | /// Change this URL’s path. | |
1391 | /// | |
1392 | /// # Examples | |
1393 | /// | |
1394 | /// ```rust | |
1395 | /// use url::Url; | |
1396 | /// # use url::ParseError; | |
1397 | /// | |
1398 | /// # fn run() -> Result<(), ParseError> { | |
1399 | /// let mut url = Url::parse("https://example.com")?; | |
1400 | /// url.set_path("api/comments"); | |
1401 | /// assert_eq!(url.as_str(), "https://example.com/api/comments"); | |
1402 | /// assert_eq!(url.path(), "/api/comments"); | |
1403 | /// | |
1404 | /// let mut url = Url::parse("https://example.com/api")?; | |
1405 | /// url.set_path("data/report.csv"); | |
1406 | /// assert_eq!(url.as_str(), "https://example.com/data/report.csv"); | |
1407 | /// assert_eq!(url.path(), "/data/report.csv"); | |
1408 | /// # Ok(()) | |
1409 | /// # } | |
1410 | /// # run().unwrap(); | |
1411 | /// ``` | |
1412 | pub fn set_path(&mut self, mut path: &str) { | |
1413 | let after_path = self.take_after_path(); | |
1414 | let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); | |
1415 | let cannot_be_a_base = self.cannot_be_a_base(); | |
1416 | let scheme_type = SchemeType::from(self.scheme()); | |
1417 | self.serialization.truncate(self.path_start as usize); | |
1418 | self.mutate(|parser| { | |
1419 | if cannot_be_a_base { | |
1420 | if path.starts_with('/') { | |
1421 | parser.serialization.push_str("%2F"); | |
1422 | path = &path[1..]; | |
1423 | } | |
1424 | parser.parse_cannot_be_a_base_path(parser::Input::new(path)); | |
1425 | } else { | |
1426 | let mut has_host = true; // FIXME | |
1427 | parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); | |
1428 | } | |
1429 | }); | |
1430 | self.restore_after_path(old_after_path_pos, &after_path); | |
1431 | } | |
1432 | ||
1433 | /// Return an object with methods to manipulate this URL’s path segments. | |
1434 | /// | |
1435 | /// Return `Err(())` if this URL is cannot-be-a-base. | |
1436 | #[allow(clippy::clippy::result_unit_err)] | |
1437 | pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> { | |
1438 | if self.cannot_be_a_base() { | |
1439 | Err(()) | |
1440 | } else { | |
1441 | Ok(path_segments::new(self)) | |
1442 | } | |
1443 | } | |
1444 | ||
1445 | fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { | |
1446 | let new_after_path_position = to_u32(self.serialization.len()).unwrap(); | |
1447 | let adjust = |index: &mut u32| { | |
1448 | *index -= old_after_path_position; | |
1449 | *index += new_after_path_position; | |
1450 | }; | |
1451 | if let Some(ref mut index) = self.query_start { | |
1452 | adjust(index) | |
1453 | } | |
1454 | if let Some(ref mut index) = self.fragment_start { | |
1455 | adjust(index) | |
1456 | } | |
1457 | self.serialization.push_str(after_path) | |
1458 | } | |
1459 | ||
1460 | /// Change this URL’s port number. | |
1461 | /// | |
1462 | /// Note that default port numbers are not reflected in the serialization. | |
1463 | /// | |
1464 | /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; | |
1465 | /// do nothing and return `Err`. | |
1466 | /// | |
1467 | /// # Examples | |
1468 | /// | |
1469 | /// ``` | |
1470 | /// use url::Url; | |
1471 | /// # use std::error::Error; | |
1472 | /// | |
1473 | /// # fn run() -> Result<(), Box<dyn Error>> { | |
1474 | /// let mut url = Url::parse("ssh://example.net:2048/")?; | |
1475 | /// | |
1476 | /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?; | |
1477 | /// assert_eq!(url.as_str(), "ssh://example.net:4096/"); | |
1478 | /// | |
1479 | /// url.set_port(None).map_err(|_| "cannot be base")?; | |
1480 | /// assert_eq!(url.as_str(), "ssh://example.net/"); | |
1481 | /// # Ok(()) | |
1482 | /// # } | |
1483 | /// # run().unwrap(); | |
1484 | /// ``` | |
1485 | /// | |
1486 | /// Known default port numbers are not reflected: | |
1487 | /// | |
1488 | /// ```rust | |
1489 | /// use url::Url; | |
1490 | /// # use std::error::Error; | |
1491 | /// | |
1492 | /// # fn run() -> Result<(), Box<dyn Error>> { | |
1493 | /// let mut url = Url::parse("https://example.org/")?; | |
1494 | /// | |
1495 | /// url.set_port(Some(443)).map_err(|_| "cannot be base")?; | |
1496 | /// assert!(url.port().is_none()); | |
1497 | /// # Ok(()) | |
1498 | /// # } | |
1499 | /// # run().unwrap(); | |
1500 | /// ``` | |
1501 | /// | |
1502 | /// Cannot set port for cannot-be-a-base URLs: | |
1503 | /// | |
1504 | /// ``` | |
1505 | /// use url::Url; | |
1506 | /// # use url::ParseError; | |
1507 | /// | |
1508 | /// # fn run() -> Result<(), ParseError> { | |
1509 | /// let mut url = Url::parse("mailto:rms@example.net")?; | |
1510 | /// | |
1511 | /// let result = url.set_port(Some(80)); | |
1512 | /// assert!(result.is_err()); | |
1513 | /// | |
1514 | /// let result = url.set_port(None); | |
1515 | /// assert!(result.is_err()); | |
1516 | /// # Ok(()) | |
1517 | /// # } | |
1518 | /// # run().unwrap(); | |
1519 | /// ``` | |
1520 | #[allow(clippy::clippy::result_unit_err)] | |
1521 | pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> { | |
1522 | // has_host implies !cannot_be_a_base | |
1523 | if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { | |
1524 | return Err(()); | |
1525 | } | |
1526 | if port.is_some() && port == parser::default_port(self.scheme()) { | |
1527 | port = None | |
1528 | } | |
1529 | self.set_port_internal(port); | |
1530 | Ok(()) | |
1531 | } | |
1532 | ||
1533 | fn set_port_internal(&mut self, port: Option<u16>) { | |
1534 | match (self.port, port) { | |
1535 | (None, None) => {} | |
1536 | (Some(_), None) => { | |
1537 | self.serialization | |
1538 | .drain(self.host_end as usize..self.path_start as usize); | |
1539 | let offset = self.path_start - self.host_end; | |
1540 | self.path_start = self.host_end; | |
1541 | if let Some(ref mut index) = self.query_start { | |
1542 | *index -= offset | |
1543 | } | |
1544 | if let Some(ref mut index) = self.fragment_start { | |
1545 | *index -= offset | |
1546 | } | |
1547 | } | |
1548 | (Some(old), Some(new)) if old == new => {} | |
1549 | (_, Some(new)) => { | |
1550 | let path_and_after = self.slice(self.path_start..).to_owned(); | |
1551 | self.serialization.truncate(self.host_end as usize); | |
1552 | write!(&mut self.serialization, ":{}", new).unwrap(); | |
1553 | let old_path_start = self.path_start; | |
1554 | let new_path_start = to_u32(self.serialization.len()).unwrap(); | |
1555 | self.path_start = new_path_start; | |
1556 | let adjust = |index: &mut u32| { | |
1557 | *index -= old_path_start; | |
1558 | *index += new_path_start; | |
1559 | }; | |
1560 | if let Some(ref mut index) = self.query_start { | |
1561 | adjust(index) | |
1562 | } | |
1563 | if let Some(ref mut index) = self.fragment_start { | |
1564 | adjust(index) | |
1565 | } | |
1566 | self.serialization.push_str(&path_and_after); | |
1567 | } | |
1568 | } | |
1569 | self.port = port; | |
1570 | } | |
1571 | ||
1572 | /// Change this URL’s host. | |
1573 | /// | |
1574 | /// Removing the host (calling this with `None`) | |
1575 | /// will also remove any username, password, and port number. | |
1576 | /// | |
1577 | /// # Examples | |
1578 | /// | |
1579 | /// Change host: | |
1580 | /// | |
1581 | /// ``` | |
1582 | /// use url::Url; | |
1583 | /// # use url::ParseError; | |
1584 | /// | |
1585 | /// # fn run() -> Result<(), ParseError> { | |
1586 | /// let mut url = Url::parse("https://example.net")?; | |
1587 | /// let result = url.set_host(Some("rust-lang.org")); | |
1588 | /// assert!(result.is_ok()); | |
1589 | /// assert_eq!(url.as_str(), "https://rust-lang.org/"); | |
1590 | /// # Ok(()) | |
1591 | /// # } | |
1592 | /// # run().unwrap(); | |
1593 | /// ``` | |
1594 | /// | |
1595 | /// Remove host: | |
1596 | /// | |
1597 | /// ``` | |
1598 | /// use url::Url; | |
1599 | /// # use url::ParseError; | |
1600 | /// | |
1601 | /// # fn run() -> Result<(), ParseError> { | |
1602 | /// let mut url = Url::parse("foo://example.net")?; | |
1603 | /// let result = url.set_host(None); | |
1604 | /// assert!(result.is_ok()); | |
1605 | /// assert_eq!(url.as_str(), "foo:/"); | |
1606 | /// # Ok(()) | |
1607 | /// # } | |
1608 | /// # run().unwrap(); | |
1609 | /// ``` | |
1610 | /// | |
1611 | /// Cannot remove host for 'special' schemes (e.g. `http`): | |
1612 | /// | |
1613 | /// ``` | |
1614 | /// use url::Url; | |
1615 | /// # use url::ParseError; | |
1616 | /// | |
1617 | /// # fn run() -> Result<(), ParseError> { | |
1618 | /// let mut url = Url::parse("https://example.net")?; | |
1619 | /// let result = url.set_host(None); | |
1620 | /// assert!(result.is_err()); | |
1621 | /// assert_eq!(url.as_str(), "https://example.net/"); | |
1622 | /// # Ok(()) | |
1623 | /// # } | |
1624 | /// # run().unwrap(); | |
1625 | /// ``` | |
1626 | /// | |
1627 | /// Cannot change or remove host for cannot-be-a-base URLs: | |
1628 | /// | |
1629 | /// ``` | |
1630 | /// use url::Url; | |
1631 | /// # use url::ParseError; | |
1632 | /// | |
1633 | /// # fn run() -> Result<(), ParseError> { | |
1634 | /// let mut url = Url::parse("mailto:rms@example.net")?; | |
1635 | /// | |
1636 | /// let result = url.set_host(Some("rust-lang.org")); | |
1637 | /// assert!(result.is_err()); | |
1638 | /// assert_eq!(url.as_str(), "mailto:rms@example.net"); | |
1639 | /// | |
1640 | /// let result = url.set_host(None); | |
1641 | /// assert!(result.is_err()); | |
1642 | /// assert_eq!(url.as_str(), "mailto:rms@example.net"); | |
1643 | /// # Ok(()) | |
1644 | /// # } | |
1645 | /// # run().unwrap(); | |
1646 | /// ``` | |
1647 | /// | |
1648 | /// # Errors | |
1649 | /// | |
1650 | /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, | |
1651 | /// a [`ParseError`] variant will be returned. | |
1652 | /// | |
1653 | /// [`ParseError`]: enum.ParseError.html | |
1654 | pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { | |
1655 | if self.cannot_be_a_base() { | |
1656 | return Err(ParseError::SetHostOnCannotBeABaseUrl); | |
1657 | } | |
1658 | ||
1659 | if let Some(host) = host { | |
1660 | if host.is_empty() && SchemeType::from(self.scheme()).is_special() { | |
1661 | return Err(ParseError::EmptyHost); | |
1662 | } | |
1663 | let mut host_substr = host; | |
1664 | // Otherwise, if c is U+003A (:) and the [] flag is unset, then | |
1665 | if !host.starts_with('[') || !host.ends_with(']') { | |
1666 | match host.find(':') { | |
1667 | Some(0) => { | |
1668 | // If buffer is the empty string, validation error, return failure. | |
1669 | return Err(ParseError::InvalidDomainCharacter); | |
1670 | } | |
1671 | // Let host be the result of host parsing buffer | |
1672 | Some(colon_index) => { | |
1673 | host_substr = &host[..colon_index]; | |
1674 | } | |
1675 | None => {} | |
1676 | } | |
1677 | } | |
1678 | if SchemeType::from(self.scheme()).is_special() { | |
1679 | self.set_host_internal(Host::parse(host_substr)?, None); | |
1680 | } else { | |
1681 | self.set_host_internal(Host::parse_opaque(host_substr)?, None); | |
1682 | } | |
1683 | } else if self.has_host() { | |
1684 | let scheme_type = SchemeType::from(self.scheme()); | |
1685 | if scheme_type.is_special() { | |
1686 | return Err(ParseError::EmptyHost); | |
1687 | } else if self.serialization.len() == self.path_start as usize { | |
1688 | self.serialization.push('/'); | |
1689 | } | |
1690 | debug_assert!(self.byte_at(self.scheme_end) == b':'); | |
1691 | debug_assert!(self.byte_at(self.path_start) == b'/'); | |
1692 | let new_path_start = self.scheme_end + 1; | |
1693 | self.serialization | |
1694 | .drain(new_path_start as usize..self.path_start as usize); | |
1695 | let offset = self.path_start - new_path_start; | |
1696 | self.path_start = new_path_start; | |
1697 | self.username_end = new_path_start; | |
1698 | self.host_start = new_path_start; | |
1699 | self.host_end = new_path_start; | |
1700 | self.port = None; | |
1701 | if let Some(ref mut index) = self.query_start { | |
1702 | *index -= offset | |
1703 | } | |
1704 | if let Some(ref mut index) = self.fragment_start { | |
1705 | *index -= offset | |
1706 | } | |
1707 | } | |
1708 | Ok(()) | |
1709 | } | |
1710 | ||
1711 | /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. | |
1712 | fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) { | |
1713 | let old_suffix_pos = if opt_new_port.is_some() { | |
1714 | self.path_start | |
1715 | } else { | |
1716 | self.host_end | |
1717 | }; | |
1718 | let suffix = self.slice(old_suffix_pos..).to_owned(); | |
1719 | self.serialization.truncate(self.host_start as usize); | |
1720 | if !self.has_authority() { | |
1721 | debug_assert!(self.slice(self.scheme_end..self.host_start) == ":"); | |
1722 | debug_assert!(self.username_end == self.host_start); | |
1723 | self.serialization.push('/'); | |
1724 | self.serialization.push('/'); | |
1725 | self.username_end += 2; | |
1726 | self.host_start += 2; | |
1727 | } | |
1728 | write!(&mut self.serialization, "{}", host).unwrap(); | |
1729 | self.host_end = to_u32(self.serialization.len()).unwrap(); | |
1730 | self.host = host.into(); | |
1731 | ||
1732 | if let Some(new_port) = opt_new_port { | |
1733 | self.port = new_port; | |
1734 | if let Some(port) = new_port { | |
1735 | write!(&mut self.serialization, ":{}", port).unwrap(); | |
1736 | } | |
1737 | } | |
1738 | let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); | |
1739 | self.serialization.push_str(&suffix); | |
1740 | ||
1741 | let adjust = |index: &mut u32| { | |
1742 | *index -= old_suffix_pos; | |
1743 | *index += new_suffix_pos; | |
1744 | }; | |
1745 | adjust(&mut self.path_start); | |
1746 | if let Some(ref mut index) = self.query_start { | |
1747 | adjust(index) | |
1748 | } | |
1749 | if let Some(ref mut index) = self.fragment_start { | |
1750 | adjust(index) | |
1751 | } | |
1752 | } | |
1753 | ||
1754 | /// Change this URL’s host to the given IP address. | |
1755 | /// | |
1756 | /// If this URL is cannot-be-a-base, do nothing and return `Err`. | |
1757 | /// | |
1758 | /// Compared to `Url::set_host`, this skips the host parser. | |
1759 | /// | |
1760 | /// # Examples | |
1761 | /// | |
1762 | /// ```rust | |
1763 | /// use url::{Url, ParseError}; | |
1764 | /// | |
1765 | /// # fn run() -> Result<(), ParseError> { | |
1766 | /// let mut url = Url::parse("http://example.com")?; | |
1767 | /// url.set_ip_host("127.0.0.1".parse().unwrap()); | |
1768 | /// assert_eq!(url.host_str(), Some("127.0.0.1")); | |
1769 | /// assert_eq!(url.as_str(), "http://127.0.0.1/"); | |
1770 | /// # Ok(()) | |
1771 | /// # } | |
1772 | /// # run().unwrap(); | |
1773 | /// ``` | |
1774 | /// | |
1775 | /// Cannot change URL's from mailto(cannot-be-base) to ip: | |
1776 | /// | |
1777 | /// ```rust | |
1778 | /// use url::{Url, ParseError}; | |
1779 | /// | |
1780 | /// # fn run() -> Result<(), ParseError> { | |
1781 | /// let mut url = Url::parse("mailto:rms@example.com")?; | |
1782 | /// let result = url.set_ip_host("127.0.0.1".parse().unwrap()); | |
1783 | /// | |
1784 | /// assert_eq!(url.as_str(), "mailto:rms@example.com"); | |
1785 | /// assert!(result.is_err()); | |
1786 | /// # Ok(()) | |
1787 | /// # } | |
1788 | /// # run().unwrap(); | |
1789 | /// ``` | |
1790 | /// | |
1791 | #[allow(clippy::clippy::result_unit_err)] | |
1792 | pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { | |
1793 | if self.cannot_be_a_base() { | |
1794 | return Err(()); | |
1795 | } | |
1796 | ||
1797 | let address = match address { | |
1798 | IpAddr::V4(address) => Host::Ipv4(address), | |
1799 | IpAddr::V6(address) => Host::Ipv6(address), | |
1800 | }; | |
1801 | self.set_host_internal(address, None); | |
1802 | Ok(()) | |
1803 | } | |
1804 | ||
1805 | /// Change this URL’s password. | |
1806 | /// | |
1807 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. | |
1808 | /// | |
1809 | /// # Examples | |
1810 | /// | |
1811 | /// ```rust | |
1812 | /// use url::{Url, ParseError}; | |
1813 | /// | |
1814 | /// # fn run() -> Result<(), ParseError> { | |
1815 | /// let mut url = Url::parse("mailto:rmz@example.com")?; | |
1816 | /// let result = url.set_password(Some("secret_password")); | |
1817 | /// assert!(result.is_err()); | |
1818 | /// | |
1819 | /// let mut url = Url::parse("ftp://user1:secret1@example.com")?; | |
1820 | /// let result = url.set_password(Some("secret_password")); | |
1821 | /// assert_eq!(url.password(), Some("secret_password")); | |
1822 | /// | |
1823 | /// let mut url = Url::parse("ftp://user2:@example.com")?; | |
1824 | /// let result = url.set_password(Some("secret2")); | |
1825 | /// assert!(result.is_ok()); | |
1826 | /// assert_eq!(url.password(), Some("secret2")); | |
1827 | /// # Ok(()) | |
1828 | /// # } | |
1829 | /// # run().unwrap(); | |
1830 | /// ``` | |
1831 | #[allow(clippy::clippy::result_unit_err)] | |
1832 | pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { | |
1833 | // has_host implies !cannot_be_a_base | |
1834 | if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { | |
1835 | return Err(()); | |
1836 | } | |
1837 | if let Some(password) = password { | |
1838 | let host_and_after = self.slice(self.host_start..).to_owned(); | |
1839 | self.serialization.truncate(self.username_end as usize); | |
1840 | self.serialization.push(':'); | |
1841 | self.serialization | |
1842 | .extend(utf8_percent_encode(password, USERINFO)); | |
1843 | self.serialization.push('@'); | |
1844 | ||
1845 | let old_host_start = self.host_start; | |
1846 | let new_host_start = to_u32(self.serialization.len()).unwrap(); | |
1847 | let adjust = |index: &mut u32| { | |
1848 | *index -= old_host_start; | |
1849 | *index += new_host_start; | |
1850 | }; | |
1851 | self.host_start = new_host_start; | |
1852 | adjust(&mut self.host_end); | |
1853 | adjust(&mut self.path_start); | |
1854 | if let Some(ref mut index) = self.query_start { | |
1855 | adjust(index) | |
1856 | } | |
1857 | if let Some(ref mut index) = self.fragment_start { | |
1858 | adjust(index) | |
1859 | } | |
1860 | ||
1861 | self.serialization.push_str(&host_and_after); | |
1862 | } else if self.byte_at(self.username_end) == b':' { | |
1863 | // If there is a password to remove | |
1864 | let has_username_or_password = self.byte_at(self.host_start - 1) == b'@'; | |
1865 | debug_assert!(has_username_or_password); | |
1866 | let username_start = self.scheme_end + 3; | |
1867 | let empty_username = username_start == self.username_end; | |
1868 | let start = self.username_end; // Remove the ':' | |
1869 | let end = if empty_username { | |
1870 | self.host_start // Remove the '@' as well | |
1871 | } else { | |
1872 | self.host_start - 1 // Keep the '@' to separate the username from the host | |
1873 | }; | |
1874 | self.serialization.drain(start as usize..end as usize); | |
1875 | let offset = end - start; | |
1876 | self.host_start -= offset; | |
1877 | self.host_end -= offset; | |
1878 | self.path_start -= offset; | |
1879 | if let Some(ref mut index) = self.query_start { | |
1880 | *index -= offset | |
1881 | } | |
1882 | if let Some(ref mut index) = self.fragment_start { | |
1883 | *index -= offset | |
1884 | } | |
1885 | } | |
1886 | Ok(()) | |
1887 | } | |
1888 | ||
1889 | /// Change this URL’s username. | |
1890 | /// | |
1891 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. | |
1892 | /// # Examples | |
1893 | /// | |
1894 | /// Cannot setup username from mailto(cannot-be-base) | |
1895 | /// | |
1896 | /// ```rust | |
1897 | /// use url::{Url, ParseError}; | |
1898 | /// | |
1899 | /// # fn run() -> Result<(), ParseError> { | |
1900 | /// let mut url = Url::parse("mailto:rmz@example.com")?; | |
1901 | /// let result = url.set_username("user1"); | |
1902 | /// assert_eq!(url.as_str(), "mailto:rmz@example.com"); | |
1903 | /// assert!(result.is_err()); | |
1904 | /// # Ok(()) | |
1905 | /// # } | |
1906 | /// # run().unwrap(); | |
1907 | /// ``` | |
1908 | /// | |
1909 | /// Setup username to user1 | |
1910 | /// | |
1911 | /// ```rust | |
1912 | /// use url::{Url, ParseError}; | |
1913 | /// | |
1914 | /// # fn run() -> Result<(), ParseError> { | |
1915 | /// let mut url = Url::parse("ftp://:secre1@example.com/")?; | |
1916 | /// let result = url.set_username("user1"); | |
1917 | /// assert!(result.is_ok()); | |
1918 | /// assert_eq!(url.username(), "user1"); | |
1919 | /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/"); | |
1920 | /// # Ok(()) | |
1921 | /// # } | |
1922 | /// # run().unwrap(); | |
1923 | /// ``` | |
1924 | #[allow(clippy::clippy::result_unit_err)] | |
1925 | pub fn set_username(&mut self, username: &str) -> Result<(), ()> { | |
1926 | // has_host implies !cannot_be_a_base | |
1927 | if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { | |
1928 | return Err(()); | |
1929 | } | |
1930 | let username_start = self.scheme_end + 3; | |
1931 | debug_assert!(self.slice(self.scheme_end..username_start) == "://"); | |
1932 | if self.slice(username_start..self.username_end) == username { | |
1933 | return Ok(()); | |
1934 | } | |
1935 | let after_username = self.slice(self.username_end..).to_owned(); | |
1936 | self.serialization.truncate(username_start as usize); | |
1937 | self.serialization | |
1938 | .extend(utf8_percent_encode(username, USERINFO)); | |
1939 | ||
1940 | let mut removed_bytes = self.username_end; | |
1941 | self.username_end = to_u32(self.serialization.len()).unwrap(); | |
1942 | let mut added_bytes = self.username_end; | |
1943 | ||
1944 | let new_username_is_empty = self.username_end == username_start; | |
1945 | match (new_username_is_empty, after_username.chars().next()) { | |
1946 | (true, Some('@')) => { | |
1947 | removed_bytes += 1; | |
1948 | self.serialization.push_str(&after_username[1..]); | |
1949 | } | |
1950 | (false, Some('@')) | (_, Some(':')) | (true, _) => { | |
1951 | self.serialization.push_str(&after_username); | |
1952 | } | |
1953 | (false, _) => { | |
1954 | added_bytes += 1; | |
1955 | self.serialization.push('@'); | |
1956 | self.serialization.push_str(&after_username); | |
1957 | } | |
1958 | } | |
1959 | ||
1960 | let adjust = |index: &mut u32| { | |
1961 | *index -= removed_bytes; | |
1962 | *index += added_bytes; | |
1963 | }; | |
1964 | adjust(&mut self.host_start); | |
1965 | adjust(&mut self.host_end); | |
1966 | adjust(&mut self.path_start); | |
1967 | if let Some(ref mut index) = self.query_start { | |
1968 | adjust(index) | |
1969 | } | |
1970 | if let Some(ref mut index) = self.fragment_start { | |
1971 | adjust(index) | |
1972 | } | |
1973 | Ok(()) | |
1974 | } | |
1975 | ||
1976 | /// Change this URL’s scheme. | |
1977 | /// | |
1978 | /// Do nothing and return `Err` under the following circumstances: | |
1979 | /// | |
1980 | /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` | |
1981 | /// * If this URL is cannot-be-a-base and the new scheme is one of | |
1982 | /// `http`, `https`, `ws`, `wss` or `ftp` | |
1983 | /// * If either the old or new scheme is `http`, `https`, `ws`, | |
1984 | /// `wss` or `ftp` and the other is not one of these | |
1985 | /// * If the new scheme is `file` and this URL includes credentials | |
1986 | /// or has a non-null port | |
1987 | /// * If this URL's scheme is `file` and its host is empty or null | |
1988 | /// | |
1989 | /// See also [the URL specification's section on legal scheme state | |
1990 | /// overrides](https://url.spec.whatwg.org/#scheme-state). | |
1991 | /// | |
1992 | /// # Examples | |
1993 | /// | |
1994 | /// Change the URL’s scheme from `https` to `foo`: | |
1995 | /// | |
1996 | /// ``` | |
1997 | /// use url::Url; | |
1998 | /// # use url::ParseError; | |
1999 | /// | |
2000 | /// # fn run() -> Result<(), ParseError> { | |
2001 | /// let mut url = Url::parse("https://example.net")?; | |
2002 | /// let result = url.set_scheme("http"); | |
2003 | /// assert_eq!(url.as_str(), "http://example.net/"); | |
2004 | /// assert!(result.is_ok()); | |
2005 | /// # Ok(()) | |
2006 | /// # } | |
2007 | /// # run().unwrap(); | |
2008 | /// ``` | |
2009 | /// Change the URL’s scheme from `foo` to `bar`: | |
2010 | /// | |
2011 | /// ``` | |
2012 | /// use url::Url; | |
2013 | /// # use url::ParseError; | |
2014 | /// | |
2015 | /// # fn run() -> Result<(), ParseError> { | |
2016 | /// let mut url = Url::parse("foo://example.net")?; | |
2017 | /// let result = url.set_scheme("bar"); | |
2018 | /// assert_eq!(url.as_str(), "bar://example.net"); | |
2019 | /// assert!(result.is_ok()); | |
2020 | /// # Ok(()) | |
2021 | /// # } | |
2022 | /// # run().unwrap(); | |
2023 | /// ``` | |
2024 | /// | |
2025 | /// Cannot change URL’s scheme from `https` to `foõ`: | |
2026 | /// | |
2027 | /// ``` | |
2028 | /// use url::Url; | |
2029 | /// # use url::ParseError; | |
2030 | /// | |
2031 | /// # fn run() -> Result<(), ParseError> { | |
2032 | /// let mut url = Url::parse("https://example.net")?; | |
2033 | /// let result = url.set_scheme("foõ"); | |
2034 | /// assert_eq!(url.as_str(), "https://example.net/"); | |
2035 | /// assert!(result.is_err()); | |
2036 | /// # Ok(()) | |
2037 | /// # } | |
2038 | /// # run().unwrap(); | |
2039 | /// ``` | |
2040 | /// | |
2041 | /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`: | |
2042 | /// | |
2043 | /// ``` | |
2044 | /// use url::Url; | |
2045 | /// # use url::ParseError; | |
2046 | /// | |
2047 | /// # fn run() -> Result<(), ParseError> { | |
2048 | /// let mut url = Url::parse("mailto:rms@example.net")?; | |
2049 | /// let result = url.set_scheme("https"); | |
2050 | /// assert_eq!(url.as_str(), "mailto:rms@example.net"); | |
2051 | /// assert!(result.is_err()); | |
2052 | /// # Ok(()) | |
2053 | /// # } | |
2054 | /// # run().unwrap(); | |
2055 | /// ``` | |
2056 | /// Cannot change the URL’s scheme from `foo` to `https`: | |
2057 | /// | |
2058 | /// ``` | |
2059 | /// use url::Url; | |
2060 | /// # use url::ParseError; | |
2061 | /// | |
2062 | /// # fn run() -> Result<(), ParseError> { | |
2063 | /// let mut url = Url::parse("foo://example.net")?; | |
2064 | /// let result = url.set_scheme("https"); | |
2065 | /// assert_eq!(url.as_str(), "foo://example.net"); | |
2066 | /// assert!(result.is_err()); | |
2067 | /// # Ok(()) | |
2068 | /// # } | |
2069 | /// # run().unwrap(); | |
2070 | /// ``` | |
2071 | /// Cannot change the URL’s scheme from `http` to `foo`: | |
2072 | /// | |
2073 | /// ``` | |
2074 | /// use url::Url; | |
2075 | /// # use url::ParseError; | |
2076 | /// | |
2077 | /// # fn run() -> Result<(), ParseError> { | |
2078 | /// let mut url = Url::parse("http://example.net")?; | |
2079 | /// let result = url.set_scheme("foo"); | |
2080 | /// assert_eq!(url.as_str(), "http://example.net/"); | |
2081 | /// assert!(result.is_err()); | |
2082 | /// # Ok(()) | |
2083 | /// # } | |
2084 | /// # run().unwrap(); | |
2085 | /// ``` | |
2086 | #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)] | |
2087 | pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { | |
2088 | let mut parser = Parser::for_setter(String::new()); | |
2089 | let remaining = parser.parse_scheme(parser::Input::new(scheme))?; | |
2090 | let new_scheme_type = SchemeType::from(&parser.serialization); | |
2091 | let old_scheme_type = SchemeType::from(self.scheme()); | |
2092 | // If url’s scheme is a special scheme and buffer is not a special scheme, then return. | |
2093 | if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || | |
2094 | // If url’s scheme is not a special scheme and buffer is a special scheme, then return. | |
2095 | (!new_scheme_type.is_special() && old_scheme_type.is_special()) || | |
2096 | // If url includes credentials or has a non-null port, and buffer is "file", then return. | |
2097 | // If url’s scheme is "file" and its host is an empty host or null, then return. | |
2098 | (new_scheme_type.is_file() && self.has_authority()) | |
2099 | { | |
2100 | return Err(()); | |
2101 | } | |
2102 | ||
2103 | if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { | |
2104 | return Err(()); | |
2105 | } | |
2106 | let old_scheme_end = self.scheme_end; | |
2107 | let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); | |
2108 | let adjust = |index: &mut u32| { | |
2109 | *index -= old_scheme_end; | |
2110 | *index += new_scheme_end; | |
2111 | }; | |
2112 | ||
2113 | self.scheme_end = new_scheme_end; | |
2114 | adjust(&mut self.username_end); | |
2115 | adjust(&mut self.host_start); | |
2116 | adjust(&mut self.host_end); | |
2117 | adjust(&mut self.path_start); | |
2118 | if let Some(ref mut index) = self.query_start { | |
2119 | adjust(index) | |
2120 | } | |
2121 | if let Some(ref mut index) = self.fragment_start { | |
2122 | adjust(index) | |
2123 | } | |
2124 | ||
2125 | parser.serialization.push_str(self.slice(old_scheme_end..)); | |
2126 | self.serialization = parser.serialization; | |
2127 | ||
2128 | // Update the port so it can be removed | |
2129 | // If it is the scheme's default | |
2130 | // we don't mind it silently failing | |
2131 | // if there was no port in the first place | |
2132 | let previous_port = self.port(); | |
2133 | let _ = self.set_port(previous_port); | |
2134 | ||
2135 | Ok(()) | |
2136 | } | |
2137 | ||
2138 | /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. | |
2139 | /// | |
2140 | /// This returns `Err` if the given path is not absolute or, | |
2141 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). | |
2142 | /// | |
2143 | /// # Examples | |
2144 | /// | |
2145 | /// On Unix-like platforms: | |
2146 | /// | |
2147 | /// ``` | |
2148 | /// # if cfg!(unix) { | |
2149 | /// use url::Url; | |
2150 | /// | |
2151 | /// # fn run() -> Result<(), ()> { | |
2152 | /// let url = Url::from_file_path("/tmp/foo.txt")?; | |
2153 | /// assert_eq!(url.as_str(), "file:///tmp/foo.txt"); | |
2154 | /// | |
2155 | /// let url = Url::from_file_path("../foo.txt"); | |
2156 | /// assert!(url.is_err()); | |
2157 | /// | |
2158 | /// let url = Url::from_file_path("https://google.com/"); | |
2159 | /// assert!(url.is_err()); | |
2160 | /// # Ok(()) | |
2161 | /// # } | |
2162 | /// # run().unwrap(); | |
2163 | /// # } | |
2164 | /// ``` | |
2165 | #[cfg(any(unix, windows, target_os = "redox"))] | |
2166 | #[allow(clippy::clippy::result_unit_err)] | |
2167 | pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { | |
2168 | let mut serialization = "file://".to_owned(); | |
2169 | let host_start = serialization.len() as u32; | |
2170 | let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?; | |
2171 | Ok(Url { | |
2172 | serialization, | |
2173 | scheme_end: "file".len() as u32, | |
2174 | username_end: host_start, | |
2175 | host_start, | |
2176 | host_end, | |
2177 | host, | |
2178 | port: None, | |
2179 | path_start: host_end, | |
2180 | query_start: None, | |
2181 | fragment_start: None, | |
2182 | }) | |
2183 | } | |
2184 | ||
2185 | /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. | |
2186 | /// | |
2187 | /// This returns `Err` if the given path is not absolute or, | |
2188 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). | |
2189 | /// | |
2190 | /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash | |
2191 | /// so that the entire path is considered when using this URL as a base URL. | |
2192 | /// | |
2193 | /// For example: | |
2194 | /// | |
2195 | /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` | |
2196 | /// as the base URL is `file:///var/www/index.html` | |
2197 | /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` | |
2198 | /// as the base URL is `file:///var/index.html`, which might not be what was intended. | |
2199 | /// | |
2200 | /// Note that `std::path` does not consider trailing slashes significant | |
2201 | /// and usually does not include them (e.g. in `Path::parent()`). | |
2202 | #[cfg(any(unix, windows, target_os = "redox"))] | |
2203 | #[allow(clippy::clippy::result_unit_err)] | |
2204 | pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { | |
2205 | let mut url = Url::from_file_path(path)?; | |
2206 | if !url.serialization.ends_with('/') { | |
2207 | url.serialization.push('/') | |
2208 | } | |
2209 | Ok(url) | |
2210 | } | |
2211 | ||
2212 | /// Serialize with Serde using the internal representation of the `Url` struct. | |
2213 | /// | |
2214 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking | |
2215 | /// for speed, compared to the `Deserialize` trait impl. | |
2216 | /// | |
2217 | /// This method is only available if the `serde` Cargo feature is enabled. | |
2218 | #[cfg(feature = "serde")] | |
2219 | #[deny(unused)] | |
2220 | pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | |
2221 | where | |
2222 | S: serde::Serializer, | |
2223 | { | |
2224 | use serde::Serialize; | |
2225 | // Destructuring first lets us ensure that adding or removing fields forces this method | |
2226 | // to be updated | |
2227 | let Url { | |
2228 | ref serialization, | |
2229 | ref scheme_end, | |
2230 | ref username_end, | |
2231 | ref host_start, | |
2232 | ref host_end, | |
2233 | ref host, | |
2234 | ref port, | |
2235 | ref path_start, | |
2236 | ref query_start, | |
2237 | ref fragment_start, | |
2238 | } = *self; | |
2239 | ( | |
2240 | serialization, | |
2241 | scheme_end, | |
2242 | username_end, | |
2243 | host_start, | |
2244 | host_end, | |
2245 | host, | |
2246 | port, | |
2247 | path_start, | |
2248 | query_start, | |
2249 | fragment_start, | |
2250 | ) | |
2251 | .serialize(serializer) | |
2252 | } | |
2253 | ||
2254 | /// Serialize with Serde using the internal representation of the `Url` struct. | |
2255 | /// | |
2256 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking | |
2257 | /// for speed, compared to the `Deserialize` trait impl. | |
2258 | /// | |
2259 | /// This method is only available if the `serde` Cargo feature is enabled. | |
2260 | #[cfg(feature = "serde")] | |
2261 | #[deny(unused)] | |
2262 | pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> | |
2263 | where | |
2264 | D: serde::Deserializer<'de>, | |
2265 | { | |
2266 | use serde::de::{Deserialize, Error, Unexpected}; | |
2267 | let ( | |
2268 | serialization, | |
2269 | scheme_end, | |
2270 | username_end, | |
2271 | host_start, | |
2272 | host_end, | |
2273 | host, | |
2274 | port, | |
2275 | path_start, | |
2276 | query_start, | |
2277 | fragment_start, | |
2278 | ) = Deserialize::deserialize(deserializer)?; | |
2279 | let url = Url { | |
2280 | serialization, | |
2281 | scheme_end, | |
2282 | username_end, | |
2283 | host_start, | |
2284 | host_end, | |
2285 | host, | |
2286 | port, | |
2287 | path_start, | |
2288 | query_start, | |
2289 | fragment_start, | |
2290 | }; | |
2291 | if cfg!(debug_assertions) { | |
2292 | url.check_invariants().map_err(|reason| { | |
2293 | let reason: &str = &reason; | |
2294 | Error::invalid_value(Unexpected::Other("value"), &reason) | |
2295 | })? | |
2296 | } | |
2297 | Ok(url) | |
2298 | } | |
2299 | ||
2300 | /// Assuming the URL is in the `file` scheme or similar, | |
2301 | /// convert its path to an absolute `std::path::Path`. | |
2302 | /// | |
2303 | /// **Note:** This does not actually check the URL’s `scheme`, | |
2304 | /// and may give nonsensical results for other schemes. | |
2305 | /// It is the user’s responsibility to check the URL’s scheme before calling this. | |
2306 | /// | |
2307 | /// ``` | |
2308 | /// # use url::Url; | |
2309 | /// # let url = Url::parse("file:///etc/passwd").unwrap(); | |
2310 | /// let path = url.to_file_path(); | |
2311 | /// ``` | |
2312 | /// | |
2313 | /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where | |
2314 | /// `file:` URLs may have a non-local host), | |
2315 | /// or if `Path::new_opt()` returns `None`. | |
2316 | /// (That is, if the percent-decoded path contains a NUL byte or, | |
2317 | /// for a Windows path, is not UTF-8.) | |
2318 | #[inline] | |
2319 | #[cfg(any(unix, windows, target_os = "redox"))] | |
2320 | #[allow(clippy::clippy::result_unit_err)] | |
2321 | pub fn to_file_path(&self) -> Result<PathBuf, ()> { | |
2322 | if let Some(segments) = self.path_segments() { | |
2323 | let host = match self.host() { | |
2324 | None | Some(Host::Domain("localhost")) => None, | |
2325 | Some(_) if cfg!(windows) && self.scheme() == "file" => { | |
2326 | Some(&self.serialization[self.host_start as usize..self.host_end as usize]) | |
2327 | } | |
2328 | _ => return Err(()), | |
2329 | }; | |
2330 | ||
2331 | return file_url_segments_to_pathbuf(host, segments); | |
2332 | } | |
2333 | Err(()) | |
2334 | } | |
2335 | ||
2336 | // Private helper methods: | |
2337 | ||
2338 | #[inline] | |
2339 | fn slice<R>(&self, range: R) -> &str | |
2340 | where | |
2341 | R: RangeArg, | |
2342 | { | |
2343 | range.slice_of(&self.serialization) | |
2344 | } | |
2345 | ||
2346 | #[inline] | |
2347 | fn byte_at(&self, i: u32) -> u8 { | |
2348 | self.serialization.as_bytes()[i as usize] | |
2349 | } | |
2350 | } | |
2351 | ||
2352 | /// Parse a string as an URL, without a base URL or encoding override. | |
2353 | impl str::FromStr for Url { | |
2354 | type Err = ParseError; | |
2355 | ||
2356 | #[inline] | |
2357 | fn from_str(input: &str) -> Result<Url, crate::ParseError> { | |
2358 | Url::parse(input) | |
2359 | } | |
2360 | } | |
2361 | ||
2362 | impl<'a> TryFrom<&'a str> for Url { | |
2363 | type Error = ParseError; | |
2364 | ||
2365 | fn try_from(s: &'a str) -> Result<Self, Self::Error> { | |
2366 | Url::parse(s) | |
2367 | } | |
2368 | } | |
2369 | ||
2370 | /// Display the serialization of this URL. | |
2371 | impl fmt::Display for Url { | |
2372 | #[inline] | |
2373 | fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { | |
2374 | fmt::Display::fmt(&self.serialization, formatter) | |
2375 | } | |
2376 | } | |
2377 | ||
2378 | /// Debug the serialization of this URL. | |
2379 | impl fmt::Debug for Url { | |
2380 | #[inline] | |
2381 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | |
2382 | formatter | |
2383 | .debug_struct("Url") | |
2384 | .field("scheme", &self.scheme()) | |
2385 | .field("username", &self.username()) | |
2386 | .field("password", &self.password()) | |
2387 | .field("host", &self.host()) | |
2388 | .field("port", &self.port()) | |
2389 | .field("path", &self.path()) | |
2390 | .field("query", &self.query()) | |
2391 | .field("fragment", &self.fragment()) | |
2392 | .finish() | |
2393 | } | |
2394 | } | |
2395 | ||
2396 | /// URLs compare like their serialization. | |
2397 | impl Eq for Url {} | |
2398 | ||
2399 | /// URLs compare like their serialization. | |
2400 | impl PartialEq for Url { | |
2401 | #[inline] | |
2402 | fn eq(&self, other: &Self) -> bool { | |
2403 | self.serialization == other.serialization | |
2404 | } | |
2405 | } | |
2406 | ||
2407 | /// URLs compare like their serialization. | |
2408 | impl Ord for Url { | |
2409 | #[inline] | |
2410 | fn cmp(&self, other: &Self) -> cmp::Ordering { | |
2411 | self.serialization.cmp(&other.serialization) | |
2412 | } | |
2413 | } | |
2414 | ||
2415 | /// URLs compare like their serialization. | |
2416 | impl PartialOrd for Url { | |
2417 | #[inline] | |
2418 | fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { | |
2419 | self.serialization.partial_cmp(&other.serialization) | |
2420 | } | |
2421 | } | |
2422 | ||
2423 | /// URLs hash like their serialization. | |
2424 | impl hash::Hash for Url { | |
2425 | #[inline] | |
2426 | fn hash<H>(&self, state: &mut H) | |
2427 | where | |
2428 | H: hash::Hasher, | |
2429 | { | |
2430 | hash::Hash::hash(&self.serialization, state) | |
2431 | } | |
2432 | } | |
2433 | ||
2434 | /// Return the serialization of this URL. | |
2435 | impl AsRef<str> for Url { | |
2436 | #[inline] | |
2437 | fn as_ref(&self) -> &str { | |
2438 | &self.serialization | |
2439 | } | |
2440 | } | |
2441 | ||
2442 | trait RangeArg { | |
2443 | fn slice_of<'a>(&self, s: &'a str) -> &'a str; | |
2444 | } | |
2445 | ||
2446 | impl RangeArg for Range<u32> { | |
2447 | #[inline] | |
2448 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { | |
2449 | &s[self.start as usize..self.end as usize] | |
2450 | } | |
2451 | } | |
2452 | ||
2453 | impl RangeArg for RangeFrom<u32> { | |
2454 | #[inline] | |
2455 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { | |
2456 | &s[self.start as usize..] | |
2457 | } | |
2458 | } | |
2459 | ||
2460 | impl RangeArg for RangeTo<u32> { | |
2461 | #[inline] | |
2462 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { | |
2463 | &s[..self.end as usize] | |
2464 | } | |
2465 | } | |
2466 | ||
2467 | /// Serializes this URL into a `serde` stream. | |
2468 | /// | |
2469 | /// This implementation is only available if the `serde` Cargo feature is enabled. | |
2470 | #[cfg(feature = "serde")] | |
2471 | impl serde::Serialize for Url { | |
2472 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | |
2473 | where | |
2474 | S: serde::Serializer, | |
2475 | { | |
2476 | serializer.serialize_str(self.as_str()) | |
2477 | } | |
2478 | } | |
2479 | ||
2480 | /// Deserializes this URL from a `serde` stream. | |
2481 | /// | |
2482 | /// This implementation is only available if the `serde` Cargo feature is enabled. | |
2483 | #[cfg(feature = "serde")] | |
2484 | impl<'de> serde::Deserialize<'de> for Url { | |
2485 | fn deserialize<D>(deserializer: D) -> Result<Url, D::Error> | |
2486 | where | |
2487 | D: serde::Deserializer<'de>, | |
2488 | { | |
2489 | use serde::de::{Error, Unexpected, Visitor}; | |
2490 | ||
2491 | struct UrlVisitor; | |
2492 | ||
2493 | impl<'de> Visitor<'de> for UrlVisitor { | |
2494 | type Value = Url; | |
2495 | ||
2496 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | |
2497 | formatter.write_str("a string representing an URL") | |
2498 | } | |
2499 | ||
2500 | fn visit_str<E>(self, s: &str) -> Result<Self::Value, E> | |
2501 | where | |
2502 | E: Error, | |
2503 | { | |
2504 | Url::parse(s).map_err(|err| { | |
2505 | let err_s = format!("{}", err); | |
2506 | Error::invalid_value(Unexpected::Str(s), &err_s.as_str()) | |
2507 | }) | |
2508 | } | |
2509 | } | |
2510 | ||
2511 | deserializer.deserialize_str(UrlVisitor) | |
2512 | } | |
2513 | } | |
2514 | ||
2515 | #[cfg(any(unix, target_os = "redox"))] | |
2516 | fn path_to_file_url_segments( | |
2517 | path: &Path, | |
2518 | serialization: &mut String, | |
2519 | ) -> Result<(u32, HostInternal), ()> { | |
2520 | use std::os::unix::prelude::OsStrExt; | |
2521 | if !path.is_absolute() { | |
2522 | return Err(()); | |
2523 | } | |
2524 | let host_end = to_u32(serialization.len()).unwrap(); | |
2525 | let mut empty = true; | |
2526 | // skip the root component | |
2527 | for component in path.components().skip(1) { | |
2528 | empty = false; | |
2529 | serialization.push('/'); | |
2530 | serialization.extend(percent_encode( | |
2531 | component.as_os_str().as_bytes(), | |
2532 | PATH_SEGMENT, | |
2533 | )); | |
2534 | } | |
2535 | if empty { | |
2536 | // An URL’s path must not be empty. | |
2537 | serialization.push('/'); | |
2538 | } | |
2539 | Ok((host_end, HostInternal::None)) | |
2540 | } | |
2541 | ||
2542 | #[cfg(windows)] | |
2543 | fn path_to_file_url_segments( | |
2544 | path: &Path, | |
2545 | serialization: &mut String, | |
2546 | ) -> Result<(u32, HostInternal), ()> { | |
2547 | path_to_file_url_segments_windows(path, serialization) | |
2548 | } | |
2549 | ||
2550 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 | |
2551 | #[cfg_attr(not(windows), allow(dead_code))] | |
2552 | fn path_to_file_url_segments_windows( | |
2553 | path: &Path, | |
2554 | serialization: &mut String, | |
2555 | ) -> Result<(u32, HostInternal), ()> { | |
2556 | use std::path::{Component, Prefix}; | |
2557 | if !path.is_absolute() { | |
2558 | return Err(()); | |
2559 | } | |
2560 | let mut components = path.components(); | |
2561 | ||
2562 | let host_start = serialization.len() + 1; | |
2563 | let host_end; | |
2564 | let host_internal; | |
2565 | match components.next() { | |
2566 | Some(Component::Prefix(ref p)) => match p.kind() { | |
2567 | Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { | |
2568 | host_end = to_u32(serialization.len()).unwrap(); | |
2569 | host_internal = HostInternal::None; | |
2570 | serialization.push('/'); | |
2571 | serialization.push(letter as char); | |
2572 | serialization.push(':'); | |
2573 | } | |
2574 | Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { | |
2575 | let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; | |
2576 | write!(serialization, "{}", host).unwrap(); | |
2577 | host_end = to_u32(serialization.len()).unwrap(); | |
2578 | host_internal = host.into(); | |
2579 | serialization.push('/'); | |
2580 | let share = share.to_str().ok_or(())?; | |
2581 | serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT)); | |
2582 | } | |
2583 | _ => return Err(()), | |
2584 | }, | |
2585 | ||
2586 | _ => return Err(()), | |
2587 | } | |
2588 | ||
2589 | let mut path_only_has_prefix = true; | |
2590 | for component in components { | |
2591 | if component == Component::RootDir { | |
2592 | continue; | |
2593 | } | |
2594 | path_only_has_prefix = false; | |
2595 | // FIXME: somehow work with non-unicode? | |
2596 | let component = component.as_os_str().to_str().ok_or(())?; | |
2597 | serialization.push('/'); | |
2598 | serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); | |
2599 | } | |
2600 | // A windows drive letter must end with a slash. | |
2601 | if serialization.len() > host_start | |
2602 | && parser::is_windows_drive_letter(&serialization[host_start..]) | |
2603 | && path_only_has_prefix | |
2604 | { | |
2605 | serialization.push('/'); | |
2606 | } | |
2607 | Ok((host_end, host_internal)) | |
2608 | } | |
2609 | ||
2610 | #[cfg(any(unix, target_os = "redox"))] | |
2611 | fn file_url_segments_to_pathbuf( | |
2612 | host: Option<&str>, | |
2613 | segments: str::Split<'_, char>, | |
2614 | ) -> Result<PathBuf, ()> { | |
2615 | use std::ffi::OsStr; | |
2616 | use std::os::unix::prelude::OsStrExt; | |
2617 | ||
2618 | if host.is_some() { | |
2619 | return Err(()); | |
2620 | } | |
2621 | ||
2622 | let mut bytes = if cfg!(target_os = "redox") { | |
2623 | b"file:".to_vec() | |
2624 | } else { | |
2625 | Vec::new() | |
2626 | }; | |
2627 | for segment in segments { | |
2628 | bytes.push(b'/'); | |
2629 | bytes.extend(percent_decode(segment.as_bytes())); | |
2630 | } | |
2631 | // A windows drive letter must end with a slash. | |
2632 | if bytes.len() > 2 | |
2633 | && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z') | |
2634 | && matches!(bytes[bytes.len() - 1], b':' | b'|') | |
2635 | { | |
2636 | bytes.push(b'/'); | |
2637 | } | |
2638 | let os_str = OsStr::from_bytes(&bytes); | |
2639 | let path = PathBuf::from(os_str); | |
2640 | debug_assert!( | |
2641 | path.is_absolute(), | |
2642 | "to_file_path() failed to produce an absolute Path" | |
2643 | ); | |
2644 | Ok(path) | |
2645 | } | |
2646 | ||
2647 | #[cfg(windows)] | |
2648 | fn file_url_segments_to_pathbuf( | |
2649 | host: Option<&str>, | |
2650 | segments: str::Split<char>, | |
2651 | ) -> Result<PathBuf, ()> { | |
2652 | file_url_segments_to_pathbuf_windows(host, segments) | |
2653 | } | |
2654 | ||
2655 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 | |
2656 | #[cfg_attr(not(windows), allow(dead_code))] | |
2657 | fn file_url_segments_to_pathbuf_windows( | |
2658 | host: Option<&str>, | |
2659 | mut segments: str::Split<'_, char>, | |
2660 | ) -> Result<PathBuf, ()> { | |
2661 | let mut string = if let Some(host) = host { | |
2662 | r"\\".to_owned() + host | |
2663 | } else { | |
2664 | let first = segments.next().ok_or(())?; | |
2665 | ||
2666 | match first.len() { | |
2667 | 2 => { | |
2668 | if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { | |
2669 | return Err(()); | |
2670 | } | |
2671 | ||
2672 | first.to_owned() | |
2673 | } | |
2674 | ||
2675 | 4 => { | |
2676 | if !first.starts_with(parser::ascii_alpha) { | |
2677 | return Err(()); | |
2678 | } | |
2679 | let bytes = first.as_bytes(); | |
2680 | if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { | |
2681 | return Err(()); | |
2682 | } | |
2683 | ||
2684 | first[0..1].to_owned() + ":" | |
2685 | } | |
2686 | ||
2687 | _ => return Err(()), | |
2688 | } | |
2689 | }; | |
2690 | ||
2691 | for segment in segments { | |
2692 | string.push('\\'); | |
2693 | ||
2694 | // Currently non-unicode windows paths cannot be represented | |
2695 | match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { | |
2696 | Ok(s) => string.push_str(&s), | |
2697 | Err(..) => return Err(()), | |
2698 | } | |
2699 | } | |
2700 | let path = PathBuf::from(string); | |
2701 | debug_assert!( | |
2702 | path.is_absolute(), | |
2703 | "to_file_path() failed to produce an absolute Path" | |
2704 | ); | |
2705 | Ok(path) | |
2706 | } | |
2707 | ||
2708 | /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. | |
2709 | #[derive(Debug)] | |
2710 | pub struct UrlQuery<'a> { | |
2711 | url: Option<&'a mut Url>, | |
2712 | fragment: Option<String>, | |
2713 | } | |
2714 | ||
2715 | // `as_mut_string` string here exposes the internal serialization of an `Url`, | |
2716 | // which should not be exposed to users. | |
2717 | // We achieve that by not giving users direct access to `UrlQuery`: | |
2718 | // * Its fields are private | |
2719 | // (and so can not be constructed with struct literal syntax outside of this crate), | |
2720 | // * It has no constructor | |
2721 | // * It is only visible (on the type level) to users in the return type of | |
2722 | // `Url::query_pairs_mut` which is `Serializer<UrlQuery>` | |
2723 | // * `Serializer` keeps its target in a private field | |
2724 | // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`. | |
2725 | impl<'a> form_urlencoded::Target for UrlQuery<'a> { | |
2726 | fn as_mut_string(&mut self) -> &mut String { | |
2727 | &mut self.url.as_mut().unwrap().serialization | |
2728 | } | |
2729 | ||
2730 | fn finish(mut self) -> &'a mut Url { | |
2731 | let url = self.url.take().unwrap(); | |
2732 | url.restore_already_parsed_fragment(self.fragment.take()); | |
2733 | url | |
2734 | } | |
2735 | ||
2736 | type Finished = &'a mut Url; | |
2737 | } | |
2738 | ||
2739 | impl<'a> Drop for UrlQuery<'a> { | |
2740 | fn drop(&mut self) { | |
2741 | if let Some(url) = self.url.take() { | |
2742 | url.restore_already_parsed_fragment(self.fragment.take()) | |
2743 | } | |
2744 | } | |
2745 | } |