]>
Commit | Line | Data |
---|---|---|
abe05a73 XL |
1 | // Copyright 2013-2015 The rust-url developers. |
2 | // | |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
6 | // option. This file may not be copied, modified, or distributed | |
7 | // except according to those terms. | |
8 | ||
9 | /*! | |
10 | ||
11 | rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) | |
12 | for the [Rust](http://rust-lang.org/) programming language. | |
13 | ||
14 | ||
15 | # URL parsing and data structures | |
16 | ||
17 | First, URL parsing may fail for various reasons and therefore returns a `Result`. | |
18 | ||
19 | ``` | |
20 | use url::{Url, ParseError}; | |
21 | ||
22 | assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address)) | |
23 | ``` | |
24 | ||
25 | Let’s parse a valid URL and look at its components. | |
26 | ||
27 | ``` | |
28 | use url::{Url, Host}; | |
29 | # use url::ParseError; | |
30 | # fn run() -> Result<(), ParseError> { | |
31 | let issue_list_url = Url::parse( | |
32 | "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" | |
33 | )?; | |
34 | ||
35 | ||
36 | assert!(issue_list_url.scheme() == "https"); | |
37 | assert!(issue_list_url.username() == ""); | |
38 | assert!(issue_list_url.password() == None); | |
39 | assert!(issue_list_url.host_str() == Some("github.com")); | |
40 | assert!(issue_list_url.host() == Some(Host::Domain("github.com"))); | |
41 | assert!(issue_list_url.port() == None); | |
42 | assert!(issue_list_url.path() == "/rust-lang/rust/issues"); | |
43 | assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) == | |
44 | Some(vec!["rust-lang", "rust", "issues"])); | |
45 | assert!(issue_list_url.query() == Some("labels=E-easy&state=open")); | |
46 | assert!(issue_list_url.fragment() == None); | |
47 | assert!(!issue_list_url.cannot_be_a_base()); | |
48 | # Ok(()) | |
49 | # } | |
50 | # run().unwrap(); | |
51 | ``` | |
52 | ||
53 | Some URLs are said to be *cannot-be-a-base*: | |
54 | they don’t have a username, password, host, or port, | |
55 | and their "path" is an arbitrary string rather than slash-separated segments: | |
56 | ||
57 | ``` | |
58 | use url::Url; | |
59 | # use url::ParseError; | |
60 | ||
61 | # fn run() -> Result<(), ParseError> { | |
62 | let data_url = Url::parse("data:text/plain,Hello?World#")?; | |
63 | ||
64 | assert!(data_url.cannot_be_a_base()); | |
65 | assert!(data_url.scheme() == "data"); | |
66 | assert!(data_url.path() == "text/plain,Hello"); | |
67 | assert!(data_url.path_segments().is_none()); | |
68 | assert!(data_url.query() == Some("World")); | |
69 | assert!(data_url.fragment() == Some("")); | |
70 | # Ok(()) | |
71 | # } | |
72 | # run().unwrap(); | |
73 | ``` | |
74 | ||
75 | ||
76 | # Base URL | |
77 | ||
78 | Many contexts allow URL *references* that can be relative to a *base URL*: | |
79 | ||
80 | ```html | |
81 | <link rel="stylesheet" href="../main.css"> | |
82 | ``` | |
83 | ||
84 | Since parsed URL are absolute, giving a base is required for parsing relative URLs: | |
85 | ||
86 | ``` | |
87 | use url::{Url, ParseError}; | |
88 | ||
89 | assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase)) | |
90 | ``` | |
91 | ||
92 | Use the `join` method on an `Url` to use it as a base URL: | |
93 | ||
94 | ``` | |
95 | use url::Url; | |
96 | # use url::ParseError; | |
97 | ||
98 | # fn run() -> Result<(), ParseError> { | |
99 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?; | |
100 | let css_url = this_document.join("../main.css")?; | |
101 | assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); | |
102 | # Ok(()) | |
103 | # } | |
104 | # run().unwrap(); | |
105 | */ | |
106 | ||
107 | #![doc(html_root_url = "https://docs.rs/url/1.6.0")] | |
108 | ||
109 | #[cfg(feature="rustc-serialize")] extern crate rustc_serialize; | |
110 | #[macro_use] extern crate matches; | |
111 | #[cfg(feature="serde")] extern crate serde; | |
112 | #[cfg(feature="heapsize")] #[macro_use] extern crate heapsize; | |
113 | ||
114 | pub extern crate idna; | |
115 | pub extern crate percent_encoding; | |
116 | ||
117 | use encoding::EncodingOverride; | |
118 | #[cfg(feature = "heapsize")] use heapsize::HeapSizeOf; | |
119 | use host::HostInternal; | |
120 | use parser::{Parser, Context, SchemeType, to_u32}; | |
121 | use percent_encoding::{PATH_SEGMENT_ENCODE_SET, USERINFO_ENCODE_SET, | |
122 | percent_encode, percent_decode, utf8_percent_encode}; | |
123 | use std::borrow::Borrow; | |
124 | use std::cmp; | |
125 | #[cfg(feature = "serde")] use std::error::Error; | |
126 | use std::fmt::{self, Write, Debug, Formatter}; | |
127 | use std::hash; | |
128 | use std::io; | |
129 | use std::mem; | |
130 | use std::net::{ToSocketAddrs, IpAddr}; | |
131 | use std::ops::{Range, RangeFrom, RangeTo}; | |
132 | use std::path::{Path, PathBuf}; | |
133 | use std::str; | |
134 | ||
135 | pub use origin::{Origin, OpaqueOrigin}; | |
136 | pub use host::{Host, HostAndPort, SocketAddrs}; | |
137 | pub use path_segments::PathSegmentsMut; | |
138 | pub use parser::ParseError; | |
139 | pub use slicing::Position; | |
140 | ||
141 | mod encoding; | |
142 | mod host; | |
143 | mod origin; | |
144 | mod path_segments; | |
145 | mod parser; | |
146 | mod slicing; | |
147 | ||
148 | pub mod form_urlencoded; | |
149 | #[doc(hidden)] pub mod quirks; | |
150 | ||
151 | /// A parsed URL record. | |
152 | #[derive(Clone)] | |
153 | pub struct Url { | |
154 | /// Syntax in pseudo-BNF: | |
155 | /// | |
156 | /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? | |
157 | /// non-hierarchical = non-hierarchical-path | |
158 | /// non-hierarchical-path = /* Does not start with "/" */ | |
159 | /// hierarchical = authority? hierarchical-path | |
160 | /// authority = "//" userinfo? host [ ":" port ]? | |
161 | /// userinfo = username [ ":" password ]? "@" | |
162 | /// hierarchical-path = [ "/" path-segment ]+ | |
163 | serialization: String, | |
164 | ||
165 | // Components | |
166 | scheme_end: u32, // Before ':' | |
167 | username_end: u32, // Before ':' (if a password is given) or '@' (if not) | |
168 | host_start: u32, | |
169 | host_end: u32, | |
170 | host: HostInternal, | |
171 | port: Option<u16>, | |
172 | path_start: u32, // Before initial '/', if any | |
173 | query_start: Option<u32>, // Before '?', unlike Position::QueryStart | |
174 | fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart | |
175 | } | |
176 | ||
177 | #[cfg(feature = "heapsize")] | |
178 | impl HeapSizeOf for Url { | |
179 | fn heap_size_of_children(&self) -> usize { | |
180 | self.serialization.heap_size_of_children() | |
181 | } | |
182 | } | |
183 | ||
184 | /// Full configuration for the URL parser. | |
185 | #[derive(Copy, Clone)] | |
186 | pub struct ParseOptions<'a> { | |
187 | base_url: Option<&'a Url>, | |
188 | encoding_override: encoding::EncodingOverride, | |
189 | log_syntax_violation: Option<&'a Fn(&'static str)>, | |
190 | } | |
191 | ||
192 | impl<'a> ParseOptions<'a> { | |
193 | /// Change the base URL | |
194 | pub fn base_url(mut self, new: Option<&'a Url>) -> Self { | |
195 | self.base_url = new; | |
196 | self | |
197 | } | |
198 | ||
199 | /// Override the character encoding of query strings. | |
200 | /// This is a legacy concept only relevant for HTML. | |
201 | /// | |
202 | /// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding). | |
203 | /// | |
204 | /// This method is only available if the `query_encoding` | |
205 | /// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled. | |
206 | #[cfg(feature = "query_encoding")] | |
207 | pub fn encoding_override(mut self, new: Option<encoding::EncodingRef>) -> Self { | |
208 | self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding(); | |
209 | self | |
210 | } | |
211 | ||
212 | /// Call the provided function or closure on non-fatal parse errors. | |
213 | pub fn log_syntax_violation(mut self, new: Option<&'a Fn(&'static str)>) -> Self { | |
214 | self.log_syntax_violation = new; | |
215 | self | |
216 | } | |
217 | ||
218 | /// Parse an URL string with the configuration so far. | |
219 | pub fn parse(self, input: &str) -> Result<Url, ::ParseError> { | |
220 | Parser { | |
221 | serialization: String::with_capacity(input.len()), | |
222 | base_url: self.base_url, | |
223 | query_encoding_override: self.encoding_override, | |
224 | log_syntax_violation: self.log_syntax_violation, | |
225 | context: Context::UrlParser, | |
226 | }.parse_url(input) | |
227 | } | |
228 | } | |
229 | ||
230 | impl<'a> Debug for ParseOptions<'a> { | |
231 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { | |
232 | write!(f, "ParseOptions {{ base_url: {:?}, encoding_override: {:?}, log_syntax_violation: ", self.base_url, self.encoding_override)?; | |
233 | match self.log_syntax_violation { | |
234 | Some(_) => write!(f, "Some(Fn(&'static str)) }}"), | |
235 | None => write!(f, "None }}") | |
236 | } | |
237 | } | |
238 | } | |
239 | ||
240 | impl Url { | |
241 | /// Parse an absolute URL from a string. | |
242 | /// | |
243 | /// # Examples | |
244 | /// | |
245 | /// ```rust | |
246 | /// use url::Url; | |
247 | /// # use url::ParseError; | |
248 | /// | |
249 | /// # fn run() -> Result<(), ParseError> { | |
250 | /// let url = Url::parse("https://example.net")?; | |
251 | /// # Ok(()) | |
252 | /// # } | |
253 | /// # run().unwrap(); | |
254 | /// ``` | |
255 | /// | |
256 | /// # Errors | |
257 | /// | |
258 | /// If the function can not parse an absolute URL from the given string, | |
259 | /// a [`ParseError`] variant will be returned. | |
260 | /// | |
261 | /// [`ParseError`]: enum.ParseError.html | |
262 | #[inline] | |
263 | pub fn parse(input: &str) -> Result<Url, ::ParseError> { | |
264 | Url::options().parse(input) | |
265 | } | |
266 | ||
267 | /// Parse an absolute URL from a string and add params to its query string. | |
268 | /// | |
269 | /// Existing params are not removed. | |
270 | /// | |
271 | /// # Examples | |
272 | /// | |
273 | /// ```rust | |
274 | /// use url::Url; | |
275 | /// # use url::ParseError; | |
276 | /// | |
277 | /// # fn run() -> Result<(), ParseError> { | |
278 | /// let url = Url::parse_with_params("https://example.net?dont=clobberme", | |
279 | /// &[("lang", "rust"), ("browser", "servo")])?; | |
280 | /// # Ok(()) | |
281 | /// # } | |
282 | /// # run().unwrap(); | |
283 | /// ``` | |
284 | /// | |
285 | /// # Errors | |
286 | /// | |
287 | /// If the function can not parse an absolute URL from the given string, | |
288 | /// a [`ParseError`] variant will be returned. | |
289 | /// | |
290 | /// [`ParseError`]: enum.ParseError.html | |
291 | #[inline] | |
292 | pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, ::ParseError> | |
293 | where I: IntoIterator, | |
294 | I::Item: Borrow<(K, V)>, | |
295 | K: AsRef<str>, | |
296 | V: AsRef<str> | |
297 | { | |
298 | let mut url = Url::options().parse(input); | |
299 | ||
300 | if let Ok(ref mut url) = url { | |
301 | url.query_pairs_mut().extend_pairs(iter); | |
302 | } | |
303 | ||
304 | url | |
305 | } | |
306 | ||
307 | /// Parse a string as an URL, with this URL as the base URL. | |
308 | /// | |
309 | /// Note: a trailing slash is significant. | |
310 | /// Without it, the last path component is considered to be a “file” name | |
311 | /// to be removed to get at the “directory” that is used as the base: | |
312 | /// | |
313 | /// # Examples | |
314 | /// | |
315 | /// ```rust | |
316 | /// use url::Url; | |
317 | /// # use url::ParseError; | |
318 | /// | |
319 | /// # fn run() -> Result<(), ParseError> { | |
320 | /// let base = Url::parse("https://example.net/a/b.html")?; | |
321 | /// let url = base.join("c.png")?; | |
322 | /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png | |
323 | /// | |
324 | /// let base = Url::parse("https://example.net/a/b/")?; | |
325 | /// let url = base.join("c.png")?; | |
326 | /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png"); | |
327 | /// # Ok(()) | |
328 | /// # } | |
329 | /// # run().unwrap(); | |
330 | /// ``` | |
331 | /// | |
332 | /// # Errors | |
333 | /// | |
334 | /// If the function can not parse an URL from the given string | |
335 | /// with this URL as the base URL, a [`ParseError`] variant will be returned. | |
336 | /// | |
337 | /// [`ParseError`]: enum.ParseError.html | |
338 | #[inline] | |
339 | pub fn join(&self, input: &str) -> Result<Url, ::ParseError> { | |
340 | Url::options().base_url(Some(self)).parse(input) | |
341 | } | |
342 | ||
343 | /// Return a default `ParseOptions` that can fully configure the URL parser. | |
344 | /// | |
345 | /// # Examples | |
346 | /// | |
347 | /// Get default `ParseOptions`, then change base url | |
348 | /// | |
349 | /// ```rust | |
350 | /// use url::Url; | |
351 | /// # use url::ParseError; | |
352 | /// # fn run() -> Result<(), ParseError> { | |
353 | /// let options = Url::options(); | |
354 | /// let api = Url::parse("https://api.example.com")?; | |
355 | /// let base_url = options.base_url(Some(&api)); | |
356 | /// let version_url = base_url.parse("version.json")?; | |
357 | /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json"); | |
358 | /// # Ok(()) | |
359 | /// # } | |
360 | /// # run().unwrap(); | |
361 | /// ``` | |
362 | pub fn options<'a>() -> ParseOptions<'a> { | |
363 | ParseOptions { | |
364 | base_url: None, | |
365 | encoding_override: EncodingOverride::utf8(), | |
366 | log_syntax_violation: None, | |
367 | } | |
368 | } | |
369 | ||
370 | /// Return the serialization of this URL. | |
371 | /// | |
372 | /// This is fast since that serialization is already stored in the `Url` struct. | |
373 | /// | |
374 | /// # Examples | |
375 | /// | |
376 | /// ```rust | |
377 | /// use url::Url; | |
378 | /// # use url::ParseError; | |
379 | /// | |
380 | /// # fn run() -> Result<(), ParseError> { | |
381 | /// let url_str = "https://example.net/"; | |
382 | /// let url = Url::parse(url_str)?; | |
383 | /// assert_eq!(url.as_str(), url_str); | |
384 | /// # Ok(()) | |
385 | /// # } | |
386 | /// # run().unwrap(); | |
387 | /// ``` | |
388 | #[inline] | |
389 | pub fn as_str(&self) -> &str { | |
390 | &self.serialization | |
391 | } | |
392 | ||
393 | /// Return the serialization of this URL. | |
394 | /// | |
395 | /// This consumes the `Url` and takes ownership of the `String` stored in it. | |
396 | /// | |
397 | /// # Examples | |
398 | /// | |
399 | /// ```rust | |
400 | /// use url::Url; | |
401 | /// # use url::ParseError; | |
402 | /// | |
403 | /// # fn run() -> Result<(), ParseError> { | |
404 | /// let url_str = "https://example.net/"; | |
405 | /// let url = Url::parse(url_str)?; | |
406 | /// assert_eq!(url.into_string(), url_str); | |
407 | /// # Ok(()) | |
408 | /// # } | |
409 | /// # run().unwrap(); | |
410 | /// ``` | |
411 | #[inline] | |
412 | pub fn into_string(self) -> String { | |
413 | self.serialization | |
414 | } | |
415 | ||
416 | /// For internal testing, not part of the public API. | |
417 | /// | |
418 | /// Methods of the `Url` struct assume a number of invariants. | |
419 | /// This checks each of these invariants and panic if one is not met. | |
420 | /// This is for testing rust-url itself. | |
421 | #[doc(hidden)] | |
422 | pub fn check_invariants(&self) -> Result<(), String> { | |
423 | macro_rules! assert { | |
424 | ($x: expr) => { | |
425 | if !$x { | |
426 | return Err(format!("!( {} ) for URL {:?}", | |
427 | stringify!($x), self.serialization)) | |
428 | } | |
429 | } | |
430 | } | |
431 | ||
432 | macro_rules! assert_eq { | |
433 | ($a: expr, $b: expr) => { | |
434 | { | |
435 | let a = $a; | |
436 | let b = $b; | |
437 | if a != b { | |
438 | return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}", | |
439 | a, b, stringify!($a), stringify!($b), | |
440 | self.serialization)) | |
441 | } | |
442 | } | |
443 | } | |
444 | } | |
445 | ||
446 | assert!(self.scheme_end >= 1); | |
447 | assert!(matches!(self.byte_at(0), b'a'...b'z' | b'A'...b'Z')); | |
448 | assert!(self.slice(1..self.scheme_end).chars() | |
449 | .all(|c| matches!(c, 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.'))); | |
450 | assert_eq!(self.byte_at(self.scheme_end), b':'); | |
451 | ||
452 | if self.slice(self.scheme_end + 1 ..).starts_with("//") { | |
453 | // URL with authority | |
454 | match self.byte_at(self.username_end) { | |
455 | b':' => { | |
456 | assert!(self.host_start >= self.username_end + 2); | |
457 | assert_eq!(self.byte_at(self.host_start - 1), b'@'); | |
458 | } | |
459 | b'@' => assert!(self.host_start == self.username_end + 1), | |
460 | _ => assert_eq!(self.username_end, self.scheme_end + 3), | |
461 | } | |
462 | assert!(self.host_start >= self.username_end); | |
463 | assert!(self.host_end >= self.host_start); | |
464 | let host_str = self.slice(self.host_start..self.host_end); | |
465 | match self.host { | |
466 | HostInternal::None => assert_eq!(host_str, ""), | |
467 | HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), | |
468 | HostInternal::Ipv6(address) => { | |
469 | let h: Host<String> = Host::Ipv6(address); | |
470 | assert_eq!(host_str, h.to_string()) | |
471 | } | |
472 | HostInternal::Domain => { | |
473 | if SchemeType::from(self.scheme()).is_special() { | |
474 | assert!(!host_str.is_empty()) | |
475 | } | |
476 | } | |
477 | } | |
478 | if self.path_start == self.host_end { | |
479 | assert_eq!(self.port, None); | |
480 | } else { | |
481 | assert_eq!(self.byte_at(self.host_end), b':'); | |
482 | let port_str = self.slice(self.host_end + 1..self.path_start); | |
483 | assert_eq!(self.port, Some(port_str.parse::<u16>().expect("Couldn't parse port?"))); | |
484 | } | |
485 | assert_eq!(self.byte_at(self.path_start), b'/'); | |
486 | } else { | |
487 | // Anarchist URL (no authority) | |
488 | assert_eq!(self.username_end, self.scheme_end + 1); | |
489 | assert_eq!(self.host_start, self.scheme_end + 1); | |
490 | assert_eq!(self.host_end, self.scheme_end + 1); | |
491 | assert_eq!(self.host, HostInternal::None); | |
492 | assert_eq!(self.port, None); | |
493 | assert_eq!(self.path_start, self.scheme_end + 1); | |
494 | } | |
495 | if let Some(start) = self.query_start { | |
496 | assert!(start > self.path_start); | |
497 | assert_eq!(self.byte_at(start), b'?'); | |
498 | } | |
499 | if let Some(start) = self.fragment_start { | |
500 | assert!(start > self.path_start); | |
501 | assert_eq!(self.byte_at(start), b'#'); | |
502 | } | |
503 | if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { | |
504 | assert!(fragment_start > query_start); | |
505 | } | |
506 | ||
507 | let other = Url::parse(self.as_str()).expect("Failed to parse myself?"); | |
508 | assert_eq!(&self.serialization, &other.serialization); | |
509 | assert_eq!(self.scheme_end, other.scheme_end); | |
510 | assert_eq!(self.username_end, other.username_end); | |
511 | assert_eq!(self.host_start, other.host_start); | |
512 | assert_eq!(self.host_end, other.host_end); | |
513 | assert!(self.host == other.host || | |
514 | // XXX No host round-trips to empty host. | |
515 | // See https://github.com/whatwg/url/issues/79 | |
516 | (self.host_str(), other.host_str()) == (None, Some(""))); | |
517 | assert_eq!(self.port, other.port); | |
518 | assert_eq!(self.path_start, other.path_start); | |
519 | assert_eq!(self.query_start, other.query_start); | |
520 | assert_eq!(self.fragment_start, other.fragment_start); | |
521 | Ok(()) | |
522 | } | |
523 | ||
524 | /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>) | |
525 | /// | |
526 | /// Note: this returns an opaque origin for `file:` URLs, which causes | |
527 | /// `url.origin() != url.origin()`. | |
528 | /// | |
529 | /// # Examples | |
530 | /// | |
531 | /// URL with `ftp` scheme: | |
532 | /// | |
533 | /// ```rust | |
534 | /// use url::{Host, Origin, Url}; | |
535 | /// # use url::ParseError; | |
536 | /// | |
537 | /// # fn run() -> Result<(), ParseError> { | |
538 | /// let url = Url::parse("ftp://example.com/foo")?; | |
539 | /// assert_eq!(url.origin(), | |
540 | /// Origin::Tuple("ftp".into(), | |
541 | /// Host::Domain("example.com".into()), | |
542 | /// 21)); | |
543 | /// # Ok(()) | |
544 | /// # } | |
545 | /// # run().unwrap(); | |
546 | /// ``` | |
547 | /// | |
548 | /// URL with `blob` scheme: | |
549 | /// | |
550 | /// ```rust | |
551 | /// use url::{Host, Origin, Url}; | |
552 | /// # use url::ParseError; | |
553 | /// | |
554 | /// # fn run() -> Result<(), ParseError> { | |
555 | /// let url = Url::parse("blob:https://example.com/foo")?; | |
556 | /// assert_eq!(url.origin(), | |
557 | /// Origin::Tuple("https".into(), | |
558 | /// Host::Domain("example.com".into()), | |
559 | /// 443)); | |
560 | /// # Ok(()) | |
561 | /// # } | |
562 | /// # run().unwrap(); | |
563 | /// ``` | |
564 | /// | |
565 | /// URL with `file` scheme: | |
566 | /// | |
567 | /// ```rust | |
568 | /// use url::{Host, Origin, Url}; | |
569 | /// # use url::ParseError; | |
570 | /// | |
571 | /// # fn run() -> Result<(), ParseError> { | |
572 | /// let url = Url::parse("file:///tmp/foo")?; | |
573 | /// assert!(!url.origin().is_tuple()); | |
574 | /// | |
575 | /// let other_url = Url::parse("file:///tmp/foo")?; | |
576 | /// assert!(url.origin() != other_url.origin()); | |
577 | /// # Ok(()) | |
578 | /// # } | |
579 | /// # run().unwrap(); | |
580 | /// ``` | |
581 | /// | |
582 | /// URL with other scheme: | |
583 | /// | |
584 | /// ```rust | |
585 | /// use url::{Host, Origin, Url}; | |
586 | /// # use url::ParseError; | |
587 | /// | |
588 | /// # fn run() -> Result<(), ParseError> { | |
589 | /// let url = Url::parse("foo:bar")?; | |
590 | /// assert!(!url.origin().is_tuple()); | |
591 | /// # Ok(()) | |
592 | /// # } | |
593 | /// # run().unwrap(); | |
594 | /// ``` | |
595 | #[inline] | |
596 | pub fn origin(&self) -> Origin { | |
597 | origin::url_origin(self) | |
598 | } | |
599 | ||
600 | /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. | |
601 | /// | |
602 | /// # Examples | |
603 | /// | |
604 | /// ``` | |
605 | /// use url::Url; | |
606 | /// # use url::ParseError; | |
607 | /// | |
608 | /// # fn run() -> Result<(), ParseError> { | |
609 | /// let url = Url::parse("file:///tmp/foo")?; | |
610 | /// assert_eq!(url.scheme(), "file"); | |
611 | /// # Ok(()) | |
612 | /// # } | |
613 | /// # run().unwrap(); | |
614 | /// ``` | |
615 | #[inline] | |
616 | pub fn scheme(&self) -> &str { | |
617 | self.slice(..self.scheme_end) | |
618 | } | |
619 | ||
620 | /// Return whether the URL has an 'authority', | |
621 | /// which can contain a username, password, host, and port number. | |
622 | /// | |
623 | /// URLs that do *not* are either path-only like `unix:/run/foo.socket` | |
624 | /// or cannot-be-a-base like `data:text/plain,Stuff`. | |
625 | /// | |
626 | /// # Examples | |
627 | /// | |
628 | /// ``` | |
629 | /// use url::Url; | |
630 | /// # use url::ParseError; | |
631 | /// | |
632 | /// # fn run() -> Result<(), ParseError> { | |
633 | /// let url = Url::parse("ftp://rms@example.com")?; | |
634 | /// assert!(url.has_authority()); | |
635 | /// | |
636 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
637 | /// assert!(!url.has_authority()); | |
638 | /// | |
639 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
640 | /// assert!(!url.has_authority()); | |
641 | /// # Ok(()) | |
642 | /// # } | |
643 | /// # run().unwrap(); | |
644 | /// ``` | |
645 | #[inline] | |
646 | pub fn has_authority(&self) -> bool { | |
647 | debug_assert!(self.byte_at(self.scheme_end) == b':'); | |
648 | self.slice(self.scheme_end..).starts_with("://") | |
649 | } | |
650 | ||
651 | /// Return whether this URL is a cannot-be-a-base URL, | |
652 | /// meaning that parsing a relative URL string with this URL as the base will return an error. | |
653 | /// | |
654 | /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, | |
655 | /// as is typically the case of `data:` and `mailto:` URLs. | |
656 | /// | |
657 | /// # Examples | |
658 | /// | |
659 | /// ``` | |
660 | /// use url::Url; | |
661 | /// # use url::ParseError; | |
662 | /// | |
663 | /// # fn run() -> Result<(), ParseError> { | |
664 | /// let url = Url::parse("ftp://rms@example.com")?; | |
665 | /// assert!(!url.cannot_be_a_base()); | |
666 | /// | |
667 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
668 | /// assert!(!url.cannot_be_a_base()); | |
669 | /// | |
670 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
671 | /// assert!(url.cannot_be_a_base()); | |
672 | /// # Ok(()) | |
673 | /// # } | |
674 | /// # run().unwrap(); | |
675 | /// ``` | |
676 | #[inline] | |
677 | pub fn cannot_be_a_base(&self) -> bool { | |
678 | !self.slice(self.path_start..).starts_with('/') | |
679 | } | |
680 | ||
681 | /// Return the username for this URL (typically the empty string) | |
682 | /// as a percent-encoded ASCII string. | |
683 | /// | |
684 | /// # Examples | |
685 | /// | |
686 | /// ``` | |
687 | /// use url::Url; | |
688 | /// # use url::ParseError; | |
689 | /// | |
690 | /// # fn run() -> Result<(), ParseError> { | |
691 | /// let url = Url::parse("ftp://rms@example.com")?; | |
692 | /// assert_eq!(url.username(), "rms"); | |
693 | /// | |
694 | /// let url = Url::parse("ftp://:secret123@example.com")?; | |
695 | /// assert_eq!(url.username(), ""); | |
696 | /// | |
697 | /// let url = Url::parse("https://example.com")?; | |
698 | /// assert_eq!(url.username(), ""); | |
699 | /// # Ok(()) | |
700 | /// # } | |
701 | /// # run().unwrap(); | |
702 | /// ``` | |
703 | pub fn username(&self) -> &str { | |
704 | if self.has_authority() { | |
705 | self.slice(self.scheme_end + ("://".len() as u32)..self.username_end) | |
706 | } else { | |
707 | "" | |
708 | } | |
709 | } | |
710 | ||
711 | /// Return the password for this URL, if any, as a percent-encoded ASCII string. | |
712 | /// | |
713 | /// # Examples | |
714 | /// | |
715 | /// ``` | |
716 | /// use url::Url; | |
717 | /// # use url::ParseError; | |
718 | /// | |
719 | /// # fn run() -> Result<(), ParseError> { | |
720 | /// let url = Url::parse("ftp://rms:secret123@example.com")?; | |
721 | /// assert_eq!(url.password(), Some("secret123")); | |
722 | /// | |
723 | /// let url = Url::parse("ftp://:secret123@example.com")?; | |
724 | /// assert_eq!(url.password(), Some("secret123")); | |
725 | /// | |
726 | /// let url = Url::parse("ftp://rms@example.com")?; | |
727 | /// assert_eq!(url.password(), None); | |
728 | /// | |
729 | /// let url = Url::parse("https://example.com")?; | |
730 | /// assert_eq!(url.password(), None); | |
731 | /// # Ok(()) | |
732 | /// # } | |
733 | /// # run().unwrap(); | |
734 | /// ``` | |
735 | pub fn password(&self) -> Option<&str> { | |
736 | // This ':' is not the one marking a port number since a host can not be empty. | |
737 | // (Except for file: URLs, which do not have port numbers.) | |
738 | if self.has_authority() && self.byte_at(self.username_end) == b':' { | |
739 | debug_assert!(self.byte_at(self.host_start - 1) == b'@'); | |
740 | Some(self.slice(self.username_end + 1..self.host_start - 1)) | |
741 | } else { | |
742 | None | |
743 | } | |
744 | } | |
745 | ||
746 | /// Equivalent to `url.host().is_some()`. | |
747 | /// | |
748 | /// # Examples | |
749 | /// | |
750 | /// ``` | |
751 | /// use url::Url; | |
752 | /// # use url::ParseError; | |
753 | /// | |
754 | /// # fn run() -> Result<(), ParseError> { | |
755 | /// let url = Url::parse("ftp://rms@example.com")?; | |
756 | /// assert!(url.has_host()); | |
757 | /// | |
758 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
759 | /// assert!(!url.has_host()); | |
760 | /// | |
761 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
762 | /// assert!(!url.has_host()); | |
763 | /// # Ok(()) | |
764 | /// # } | |
765 | /// # run().unwrap(); | |
766 | /// ``` | |
767 | pub fn has_host(&self) -> bool { | |
768 | !matches!(self.host, HostInternal::None) | |
769 | } | |
770 | ||
771 | /// Return the string representation of the host (domain or IP address) for this URL, if any. | |
772 | /// | |
773 | /// Non-ASCII domains are punycode-encoded per IDNA. | |
774 | /// IPv6 addresses are given between `[` and `]` brackets. | |
775 | /// | |
776 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs | |
777 | /// don’t have a host. | |
778 | /// | |
779 | /// See also the `host` method. | |
780 | /// | |
781 | /// # Examples | |
782 | /// | |
783 | /// ``` | |
784 | /// use url::Url; | |
785 | /// # use url::ParseError; | |
786 | /// | |
787 | /// # fn run() -> Result<(), ParseError> { | |
788 | /// let url = Url::parse("https://127.0.0.1/index.html")?; | |
789 | /// assert_eq!(url.host_str(), Some("127.0.0.1")); | |
790 | /// | |
791 | /// let url = Url::parse("ftp://rms@example.com")?; | |
792 | /// assert_eq!(url.host_str(), Some("example.com")); | |
793 | /// | |
794 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
795 | /// assert_eq!(url.host_str(), None); | |
796 | /// | |
797 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
798 | /// assert_eq!(url.host_str(), None); | |
799 | /// # Ok(()) | |
800 | /// # } | |
801 | /// # run().unwrap(); | |
802 | /// ``` | |
803 | pub fn host_str(&self) -> Option<&str> { | |
804 | if self.has_host() { | |
805 | Some(self.slice(self.host_start..self.host_end)) | |
806 | } else { | |
807 | None | |
808 | } | |
809 | } | |
810 | ||
811 | /// Return the parsed representation of the host for this URL. | |
812 | /// Non-ASCII domain labels are punycode-encoded per IDNA. | |
813 | /// | |
814 | /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs | |
815 | /// don’t have a host. | |
816 | /// | |
817 | /// See also the `host_str` method. | |
818 | /// | |
819 | /// # Examples | |
820 | /// | |
821 | /// ``` | |
822 | /// use url::Url; | |
823 | /// # use url::ParseError; | |
824 | /// | |
825 | /// # fn run() -> Result<(), ParseError> { | |
826 | /// let url = Url::parse("https://127.0.0.1/index.html")?; | |
827 | /// assert!(url.host().is_some()); | |
828 | /// | |
829 | /// let url = Url::parse("ftp://rms@example.com")?; | |
830 | /// assert!(url.host().is_some()); | |
831 | /// | |
832 | /// let url = Url::parse("unix:/run/foo.socket")?; | |
833 | /// assert!(url.host().is_none()); | |
834 | /// | |
835 | /// let url = Url::parse("data:text/plain,Stuff")?; | |
836 | /// assert!(url.host().is_none()); | |
837 | /// # Ok(()) | |
838 | /// # } | |
839 | /// # run().unwrap(); | |
840 | /// ``` | |
841 | pub fn host(&self) -> Option<Host<&str>> { | |
842 | match self.host { | |
843 | HostInternal::None => None, | |
844 | HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), | |
845 | HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), | |
846 | HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), | |
847 | } | |
848 | } | |
849 | ||
850 | /// If this URL has a host and it is a domain name (not an IP address), return it. | |
851 | /// | |
852 | /// # Examples | |
853 | /// | |
854 | /// ``` | |
855 | /// use url::Url; | |
856 | /// # use url::ParseError; | |
857 | /// | |
858 | /// # fn run() -> Result<(), ParseError> { | |
859 | /// let url = Url::parse("https://127.0.0.1/")?; | |
860 | /// assert_eq!(url.domain(), None); | |
861 | /// | |
862 | /// let url = Url::parse("mailto:rms@example.net")?; | |
863 | /// assert_eq!(url.domain(), None); | |
864 | /// | |
865 | /// let url = Url::parse("https://example.com/")?; | |
866 | /// assert_eq!(url.domain(), Some("example.com")); | |
867 | /// # Ok(()) | |
868 | /// # } | |
869 | /// # run().unwrap(); | |
870 | /// ``` | |
871 | pub fn domain(&self) -> Option<&str> { | |
872 | match self.host { | |
873 | HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), | |
874 | _ => None, | |
875 | } | |
876 | } | |
877 | ||
878 | /// Return the port number for this URL, if any. | |
879 | /// | |
880 | /// # Examples | |
881 | /// | |
882 | /// ``` | |
883 | /// use url::Url; | |
884 | /// # use url::ParseError; | |
885 | /// | |
886 | /// # fn run() -> Result<(), ParseError> { | |
887 | /// let url = Url::parse("https://example.com")?; | |
888 | /// assert_eq!(url.port(), None); | |
889 | /// | |
890 | /// let url = Url::parse("ssh://example.com:22")?; | |
891 | /// assert_eq!(url.port(), Some(22)); | |
892 | /// # Ok(()) | |
893 | /// # } | |
894 | /// # run().unwrap(); | |
895 | /// ``` | |
896 | #[inline] | |
897 | pub fn port(&self) -> Option<u16> { | |
898 | self.port | |
899 | } | |
900 | ||
901 | /// Return the port number for this URL, or the default port number if it is known. | |
902 | /// | |
903 | /// This method only knows the default port number | |
904 | /// of the `http`, `https`, `ws`, `wss`, `ftp`, and `gopher` schemes. | |
905 | /// | |
906 | /// For URLs in these schemes, this method always returns `Some(_)`. | |
907 | /// For other schemes, it is the same as `Url::port()`. | |
908 | /// | |
909 | /// # Examples | |
910 | /// | |
911 | /// ``` | |
912 | /// use url::Url; | |
913 | /// # use url::ParseError; | |
914 | /// | |
915 | /// # fn run() -> Result<(), ParseError> { | |
916 | /// let url = Url::parse("foo://example.com")?; | |
917 | /// assert_eq!(url.port_or_known_default(), None); | |
918 | /// | |
919 | /// let url = Url::parse("foo://example.com:1456")?; | |
920 | /// assert_eq!(url.port_or_known_default(), Some(1456)); | |
921 | /// | |
922 | /// let url = Url::parse("https://example.com")?; | |
923 | /// assert_eq!(url.port_or_known_default(), Some(443)); | |
924 | /// # Ok(()) | |
925 | /// # } | |
926 | /// # run().unwrap(); | |
927 | /// ``` | |
928 | #[inline] | |
929 | pub fn port_or_known_default(&self) -> Option<u16> { | |
930 | self.port.or_else(|| parser::default_port(self.scheme())) | |
931 | } | |
932 | ||
933 | /// If the URL has a host, return something that implements `ToSocketAddrs`. | |
934 | /// | |
935 | /// If the URL has no port number and the scheme’s default port number is not known | |
936 | /// (see `Url::port_or_known_default`), | |
937 | /// the closure is called to obtain a port number. | |
938 | /// Typically, this closure can match on the result `Url::scheme` | |
939 | /// to have per-scheme default port numbers, | |
940 | /// and panic for schemes it’s not prepared to handle. | |
941 | /// For example: | |
942 | /// | |
943 | /// ```rust | |
944 | /// # use url::Url; | |
945 | /// # use std::net::TcpStream; | |
946 | /// # use std::io; | |
947 | /// fn connect(url: &Url) -> io::Result<TcpStream> { | |
948 | /// TcpStream::connect(url.with_default_port(default_port)?) | |
949 | /// } | |
950 | /// | |
951 | /// fn default_port(url: &Url) -> Result<u16, ()> { | |
952 | /// match url.scheme() { | |
953 | /// "git" => Ok(9418), | |
954 | /// "git+ssh" => Ok(22), | |
955 | /// "git+https" => Ok(443), | |
956 | /// "git+http" => Ok(80), | |
957 | /// _ => Err(()), | |
958 | /// } | |
959 | /// } | |
960 | /// ``` | |
961 | pub fn with_default_port<F>(&self, f: F) -> io::Result<HostAndPort<&str>> | |
962 | where F: FnOnce(&Url) -> Result<u16, ()> { | |
963 | Ok(HostAndPort { | |
964 | host: self.host() | |
965 | .ok_or(()) | |
966 | .or_else(|()| io_error("URL has no host"))?, | |
967 | port: self.port_or_known_default() | |
968 | .ok_or(()) | |
969 | .or_else(|()| f(self)) | |
970 | .or_else(|()| io_error("URL has no port number"))? | |
971 | }) | |
972 | } | |
973 | ||
974 | /// Return the path for this URL, as a percent-encoded ASCII string. | |
975 | /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. | |
976 | /// For other URLs, this starts with a '/' slash | |
977 | /// and continues with slash-separated path segments. | |
978 | /// | |
979 | /// # Examples | |
980 | /// | |
981 | /// ```rust | |
982 | /// use url::{Url, ParseError}; | |
983 | /// | |
984 | /// # fn run() -> Result<(), ParseError> { | |
985 | /// let url = Url::parse("https://example.com/api/versions?page=2")?; | |
986 | /// assert_eq!(url.path(), "/api/versions"); | |
987 | /// | |
988 | /// let url = Url::parse("https://example.com")?; | |
989 | /// assert_eq!(url.path(), "/"); | |
990 | /// | |
991 | /// let url = Url::parse("https://example.com/countries/việt nam")?; | |
992 | /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam"); | |
993 | /// # Ok(()) | |
994 | /// # } | |
995 | /// # run().unwrap(); | |
996 | /// ``` | |
997 | pub fn path(&self) -> &str { | |
998 | match (self.query_start, self.fragment_start) { | |
999 | (None, None) => self.slice(self.path_start..), | |
1000 | (Some(next_component_start), _) | | |
1001 | (None, Some(next_component_start)) => { | |
1002 | self.slice(self.path_start..next_component_start) | |
1003 | } | |
1004 | } | |
1005 | } | |
1006 | ||
1007 | /// Unless this URL is cannot-be-a-base, | |
1008 | /// return an iterator of '/' slash-separated path segments, | |
1009 | /// each as a percent-encoded ASCII string. | |
1010 | /// | |
1011 | /// Return `None` for cannot-be-a-base URLs. | |
1012 | /// | |
1013 | /// When `Some` is returned, the iterator always contains at least one string | |
1014 | /// (which may be empty). | |
1015 | /// | |
1016 | /// # Examples | |
1017 | /// | |
1018 | /// ``` | |
1019 | /// use url::Url; | |
1020 | /// # use std::error::Error; | |
1021 | /// | |
1022 | /// # fn run() -> Result<(), Box<Error>> { | |
1023 | /// let url = Url::parse("https://example.com/foo/bar")?; | |
1024 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; | |
1025 | /// assert_eq!(path_segments.next(), Some("foo")); | |
1026 | /// assert_eq!(path_segments.next(), Some("bar")); | |
1027 | /// assert_eq!(path_segments.next(), None); | |
1028 | /// | |
1029 | /// let url = Url::parse("https://example.com")?; | |
1030 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; | |
1031 | /// assert_eq!(path_segments.next(), Some("")); | |
1032 | /// assert_eq!(path_segments.next(), None); | |
1033 | /// | |
1034 | /// let url = Url::parse("data:text/plain,HelloWorld")?; | |
1035 | /// assert!(url.path_segments().is_none()); | |
1036 | /// | |
1037 | /// let url = Url::parse("https://example.com/countries/việt nam")?; | |
1038 | /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; | |
1039 | /// assert_eq!(path_segments.next(), Some("countries")); | |
1040 | /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam")); | |
1041 | /// # Ok(()) | |
1042 | /// # } | |
1043 | /// # run().unwrap(); | |
1044 | /// ``` | |
1045 | pub fn path_segments(&self) -> Option<str::Split<char>> { | |
1046 | let path = self.path(); | |
1047 | if path.starts_with('/') { | |
1048 | Some(path[1..].split('/')) | |
1049 | } else { | |
1050 | None | |
1051 | } | |
1052 | } | |
1053 | ||
1054 | /// Return this URL’s query string, if any, as a percent-encoded ASCII string. | |
1055 | /// | |
1056 | /// # Examples | |
1057 | /// | |
1058 | /// ```rust | |
1059 | /// use url::Url; | |
1060 | /// # use url::ParseError; | |
1061 | /// | |
1062 | /// fn run() -> Result<(), ParseError> { | |
1063 | /// let url = Url::parse("https://example.com/products?page=2")?; | |
1064 | /// let query = url.query(); | |
1065 | /// assert_eq!(query, Some("page=2")); | |
1066 | /// | |
1067 | /// let url = Url::parse("https://example.com/products")?; | |
1068 | /// let query = url.query(); | |
1069 | /// assert!(query.is_none()); | |
1070 | /// | |
1071 | /// let url = Url::parse("https://example.com/?country=español")?; | |
1072 | /// let query = url.query(); | |
1073 | /// assert_eq!(query, Some("country=espa%C3%B1ol")); | |
1074 | /// # Ok(()) | |
1075 | /// # } | |
1076 | /// # run().unwrap(); | |
1077 | /// ``` | |
1078 | pub fn query(&self) -> Option<&str> { | |
1079 | match (self.query_start, self.fragment_start) { | |
1080 | (None, _) => None, | |
1081 | (Some(query_start), None) => { | |
1082 | debug_assert!(self.byte_at(query_start) == b'?'); | |
1083 | Some(self.slice(query_start + 1..)) | |
1084 | } | |
1085 | (Some(query_start), Some(fragment_start)) => { | |
1086 | debug_assert!(self.byte_at(query_start) == b'?'); | |
1087 | Some(self.slice(query_start + 1..fragment_start)) | |
1088 | } | |
1089 | } | |
1090 | } | |
1091 | ||
1092 | /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` | |
1093 | /// and return an iterator of (key, value) pairs. | |
1094 | /// | |
1095 | /// # Examples | |
1096 | /// | |
1097 | /// ```rust | |
1098 | /// use std::borrow::Cow; | |
1099 | /// | |
1100 | /// use url::Url; | |
1101 | /// # use url::ParseError; | |
1102 | /// | |
1103 | /// # fn run() -> Result<(), ParseError> { | |
1104 | /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?; | |
1105 | /// let mut pairs = url.query_pairs(); | |
1106 | /// | |
1107 | /// assert_eq!(pairs.count(), 2); | |
1108 | /// | |
1109 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2")))); | |
1110 | /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc")))); | |
1111 | /// # Ok(()) | |
1112 | /// # } | |
1113 | /// # run().unwrap(); | |
1114 | /// | |
1115 | ||
1116 | #[inline] | |
1117 | pub fn query_pairs(&self) -> form_urlencoded::Parse { | |
1118 | form_urlencoded::parse(self.query().unwrap_or("").as_bytes()) | |
1119 | } | |
1120 | ||
1121 | /// Return this URL’s fragment identifier, if any. | |
1122 | /// | |
1123 | /// A fragment is the part of the URL after the `#` symbol. | |
1124 | /// The fragment is optional and, if present, contains a fragment identifier | |
1125 | /// that identifies a secondary resource, such as a section heading | |
1126 | /// of a document. | |
1127 | /// | |
1128 | /// In HTML, the fragment identifier is usually the id attribute of a an element | |
1129 | /// that is scrolled to on load. Browsers typically will not send the fragment portion | |
1130 | /// of a URL to the server. | |
1131 | /// | |
1132 | /// **Note:** the parser did *not* percent-encode this component, | |
1133 | /// but the input may have been percent-encoded already. | |
1134 | /// | |
1135 | /// # Examples | |
1136 | /// | |
1137 | /// ```rust | |
1138 | /// use url::Url; | |
1139 | /// # use url::ParseError; | |
1140 | /// | |
1141 | /// # fn run() -> Result<(), ParseError> { | |
1142 | /// let url = Url::parse("https://example.com/data.csv#row=4")?; | |
1143 | /// | |
1144 | /// assert_eq!(url.fragment(), Some("row=4")); | |
1145 | /// | |
1146 | /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?; | |
1147 | /// | |
1148 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); | |
1149 | /// # Ok(()) | |
1150 | /// # } | |
1151 | /// # run().unwrap(); | |
1152 | /// ``` | |
1153 | pub fn fragment(&self) -> Option<&str> { | |
1154 | self.fragment_start.map(|start| { | |
1155 | debug_assert!(self.byte_at(start) == b'#'); | |
1156 | self.slice(start + 1..) | |
1157 | }) | |
1158 | } | |
1159 | ||
1160 | fn mutate<F: FnOnce(&mut Parser) -> R, R>(&mut self, f: F) -> R { | |
1161 | let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new())); | |
1162 | let result = f(&mut parser); | |
1163 | self.serialization = parser.serialization; | |
1164 | result | |
1165 | } | |
1166 | ||
1167 | /// Change this URL’s fragment identifier. | |
1168 | /// | |
1169 | /// # Examples | |
1170 | /// | |
1171 | /// ```rust | |
1172 | /// use url::Url; | |
1173 | /// # use url::ParseError; | |
1174 | /// | |
1175 | /// # fn run() -> Result<(), ParseError> { | |
1176 | /// let mut url = Url::parse("https://example.com/data.csv")?; | |
1177 | /// assert_eq!(url.as_str(), "https://example.com/data.csv"); | |
1178 | ||
1179 | /// url.set_fragment(Some("cell=4,1-6,2")); | |
1180 | /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2"); | |
1181 | /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); | |
1182 | /// | |
1183 | /// url.set_fragment(None); | |
1184 | /// assert_eq!(url.as_str(), "https://example.com/data.csv"); | |
1185 | /// assert!(url.fragment().is_none()); | |
1186 | /// # Ok(()) | |
1187 | /// # } | |
1188 | /// # run().unwrap(); | |
1189 | /// ``` | |
1190 | pub fn set_fragment(&mut self, fragment: Option<&str>) { | |
1191 | // Remove any previous fragment | |
1192 | if let Some(start) = self.fragment_start { | |
1193 | debug_assert!(self.byte_at(start) == b'#'); | |
1194 | self.serialization.truncate(start as usize); | |
1195 | } | |
1196 | // Write the new one | |
1197 | if let Some(input) = fragment { | |
1198 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); | |
1199 | self.serialization.push('#'); | |
1200 | self.mutate(|parser| parser.parse_fragment(parser::Input::new(input))) | |
1201 | } else { | |
1202 | self.fragment_start = None | |
1203 | } | |
1204 | } | |
1205 | ||
1206 | fn take_fragment(&mut self) -> Option<String> { | |
1207 | self.fragment_start.take().map(|start| { | |
1208 | debug_assert!(self.byte_at(start) == b'#'); | |
1209 | let fragment = self.slice(start + 1..).to_owned(); | |
1210 | self.serialization.truncate(start as usize); | |
1211 | fragment | |
1212 | }) | |
1213 | } | |
1214 | ||
1215 | fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) { | |
1216 | if let Some(ref fragment) = fragment { | |
1217 | assert!(self.fragment_start.is_none()); | |
1218 | self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); | |
1219 | self.serialization.push('#'); | |
1220 | self.serialization.push_str(fragment); | |
1221 | } | |
1222 | } | |
1223 | ||
1224 | /// Change this URL’s query string. | |
1225 | /// | |
1226 | /// # Examples | |
1227 | /// | |
1228 | /// ```rust | |
1229 | /// use url::Url; | |
1230 | /// # use url::ParseError; | |
1231 | /// | |
1232 | /// # fn run() -> Result<(), ParseError> { | |
1233 | /// let mut url = Url::parse("https://example.com/products")?; | |
1234 | /// assert_eq!(url.as_str(), "https://example.com/products"); | |
1235 | /// | |
1236 | /// url.set_query(Some("page=2")); | |
1237 | /// assert_eq!(url.as_str(), "https://example.com/products?page=2"); | |
1238 | /// assert_eq!(url.query(), Some("page=2")); | |
1239 | /// # Ok(()) | |
1240 | /// # } | |
1241 | /// # run().unwrap(); | |
1242 | /// ``` | |
1243 | pub fn set_query(&mut self, query: Option<&str>) { | |
1244 | let fragment = self.take_fragment(); | |
1245 | ||
1246 | // Remove any previous query | |
1247 | if let Some(start) = self.query_start.take() { | |
1248 | debug_assert!(self.byte_at(start) == b'?'); | |
1249 | self.serialization.truncate(start as usize); | |
1250 | } | |
1251 | // Write the new query, if any | |
1252 | if let Some(input) = query { | |
1253 | self.query_start = Some(to_u32(self.serialization.len()).unwrap()); | |
1254 | self.serialization.push('?'); | |
1255 | let scheme_end = self.scheme_end; | |
1256 | self.mutate(|parser| parser.parse_query(scheme_end, parser::Input::new(input))); | |
1257 | } | |
1258 | ||
1259 | self.restore_already_parsed_fragment(fragment); | |
1260 | } | |
1261 | ||
1262 | /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs | |
1263 | /// in `application/x-www-form-urlencoded` syntax. | |
1264 | /// | |
1265 | /// The return value has a method-chaining API: | |
1266 | /// | |
1267 | /// ```rust | |
1268 | /// # use url::{Url, ParseError}; | |
1269 | /// | |
1270 | /// # fn run() -> Result<(), ParseError> { | |
1271 | /// let mut url = Url::parse("https://example.net?lang=fr#nav")?; | |
1272 | /// assert_eq!(url.query(), Some("lang=fr")); | |
1273 | /// | |
1274 | /// url.query_pairs_mut().append_pair("foo", "bar"); | |
1275 | /// assert_eq!(url.query(), Some("lang=fr&foo=bar")); | |
1276 | /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav"); | |
1277 | /// | |
1278 | /// url.query_pairs_mut() | |
1279 | /// .clear() | |
1280 | /// .append_pair("foo", "bar & baz") | |
1281 | /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver"); | |
1282 | /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver")); | |
1283 | /// assert_eq!(url.as_str(), | |
1284 | /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav"); | |
1285 | /// # Ok(()) | |
1286 | /// # } | |
1287 | /// # run().unwrap(); | |
1288 | /// ``` | |
1289 | /// | |
1290 | /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, | |
1291 | /// not `url.set_query(None)`. | |
1292 | /// | |
1293 | /// The state of `Url` is unspecified if this return value is leaked without being dropped. | |
1294 | pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<UrlQuery> { | |
1295 | let fragment = self.take_fragment(); | |
1296 | ||
1297 | let query_start; | |
1298 | if let Some(start) = self.query_start { | |
1299 | debug_assert!(self.byte_at(start) == b'?'); | |
1300 | query_start = start as usize; | |
1301 | } else { | |
1302 | query_start = self.serialization.len(); | |
1303 | self.query_start = Some(to_u32(query_start).unwrap()); | |
1304 | self.serialization.push('?'); | |
1305 | } | |
1306 | ||
1307 | let query = UrlQuery { url: self, fragment: fragment }; | |
1308 | form_urlencoded::Serializer::for_suffix(query, query_start + "?".len()) | |
1309 | } | |
1310 | ||
1311 | fn take_after_path(&mut self) -> String { | |
1312 | match (self.query_start, self.fragment_start) { | |
1313 | (Some(i), _) | (None, Some(i)) => { | |
1314 | let after_path = self.slice(i..).to_owned(); | |
1315 | self.serialization.truncate(i as usize); | |
1316 | after_path | |
1317 | }, | |
1318 | (None, None) => String::new(), | |
1319 | } | |
1320 | } | |
1321 | ||
1322 | /// Change this URL’s path. | |
1323 | /// | |
1324 | /// # Examples | |
1325 | /// | |
1326 | /// ```rust | |
1327 | /// use url::Url; | |
1328 | /// # use url::ParseError; | |
1329 | /// | |
1330 | /// # fn run() -> Result<(), ParseError> { | |
1331 | /// let mut url = Url::parse("https://example.com")?; | |
1332 | /// url.set_path("api/comments"); | |
1333 | /// assert_eq!(url.as_str(), "https://example.com/api/comments"); | |
1334 | /// assert_eq!(url.path(), "/api/comments"); | |
1335 | /// | |
1336 | /// let mut url = Url::parse("https://example.com/api")?; | |
1337 | /// url.set_path("data/report.csv"); | |
1338 | /// assert_eq!(url.as_str(), "https://example.com/data/report.csv"); | |
1339 | /// assert_eq!(url.path(), "/data/report.csv"); | |
1340 | /// # Ok(()) | |
1341 | /// # } | |
1342 | /// # run().unwrap(); | |
1343 | /// ``` | |
1344 | pub fn set_path(&mut self, mut path: &str) { | |
1345 | let after_path = self.take_after_path(); | |
1346 | let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); | |
1347 | let cannot_be_a_base = self.cannot_be_a_base(); | |
1348 | let scheme_type = SchemeType::from(self.scheme()); | |
1349 | self.serialization.truncate(self.path_start as usize); | |
1350 | self.mutate(|parser| { | |
1351 | if cannot_be_a_base { | |
1352 | if path.starts_with('/') { | |
1353 | parser.serialization.push_str("%2F"); | |
1354 | path = &path[1..]; | |
1355 | } | |
1356 | parser.parse_cannot_be_a_base_path(parser::Input::new(path)); | |
1357 | } else { | |
1358 | let mut has_host = true; // FIXME | |
1359 | parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path)); | |
1360 | } | |
1361 | }); | |
1362 | self.restore_after_path(old_after_path_pos, &after_path); | |
1363 | } | |
1364 | ||
1365 | /// Return an object with methods to manipulate this URL’s path segments. | |
1366 | /// | |
1367 | /// Return `Err(())` if this URL is cannot-be-a-base. | |
1368 | pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut, ()> { | |
1369 | if self.cannot_be_a_base() { | |
1370 | Err(()) | |
1371 | } else { | |
1372 | Ok(path_segments::new(self)) | |
1373 | } | |
1374 | } | |
1375 | ||
1376 | fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { | |
1377 | let new_after_path_position = to_u32(self.serialization.len()).unwrap(); | |
1378 | let adjust = |index: &mut u32| { | |
1379 | *index -= old_after_path_position; | |
1380 | *index += new_after_path_position; | |
1381 | }; | |
1382 | if let Some(ref mut index) = self.query_start { adjust(index) } | |
1383 | if let Some(ref mut index) = self.fragment_start { adjust(index) } | |
1384 | self.serialization.push_str(after_path) | |
1385 | } | |
1386 | ||
1387 | /// Change this URL’s port number. | |
1388 | /// | |
1389 | /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; | |
1390 | /// do nothing and return `Err`. | |
1391 | /// | |
1392 | /// # Examples | |
1393 | /// | |
1394 | /// ``` | |
1395 | /// use url::Url; | |
1396 | /// # use std::error::Error; | |
1397 | /// | |
1398 | /// # fn run() -> Result<(), Box<Error>> { | |
1399 | /// let mut url = Url::parse("ssh://example.net:2048/")?; | |
1400 | /// | |
1401 | /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?; | |
1402 | /// assert_eq!(url.as_str(), "ssh://example.net:4096/"); | |
1403 | /// | |
1404 | /// url.set_port(None).map_err(|_| "cannot be base")?; | |
1405 | /// assert_eq!(url.as_str(), "ssh://example.net/"); | |
1406 | /// # Ok(()) | |
1407 | /// # } | |
1408 | /// # run().unwrap(); | |
1409 | /// ``` | |
1410 | /// | |
1411 | /// Cannot set port for cannot-be-a-base URLs: | |
1412 | /// | |
1413 | /// ``` | |
1414 | /// use url::Url; | |
1415 | /// # use url::ParseError; | |
1416 | /// | |
1417 | /// # fn run() -> Result<(), ParseError> { | |
1418 | /// let mut url = Url::parse("mailto:rms@example.net")?; | |
1419 | /// | |
1420 | /// let result = url.set_port(Some(80)); | |
1421 | /// assert!(result.is_err()); | |
1422 | /// | |
1423 | /// let result = url.set_port(None); | |
1424 | /// assert!(result.is_err()); | |
1425 | /// # Ok(()) | |
1426 | /// # } | |
1427 | /// # run().unwrap(); | |
1428 | /// ``` | |
1429 | pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> { | |
1430 | if !self.has_host() || self.scheme() == "file" { | |
1431 | return Err(()) | |
1432 | } | |
1433 | if port.is_some() && port == parser::default_port(self.scheme()) { | |
1434 | port = None | |
1435 | } | |
1436 | self.set_port_internal(port); | |
1437 | Ok(()) | |
1438 | } | |
1439 | ||
1440 | fn set_port_internal(&mut self, port: Option<u16>) { | |
1441 | match (self.port, port) { | |
1442 | (None, None) => {} | |
1443 | (Some(_), None) => { | |
1444 | self.serialization.drain(self.host_end as usize .. self.path_start as usize); | |
1445 | let offset = self.path_start - self.host_end; | |
1446 | self.path_start = self.host_end; | |
1447 | if let Some(ref mut index) = self.query_start { *index -= offset } | |
1448 | if let Some(ref mut index) = self.fragment_start { *index -= offset } | |
1449 | } | |
1450 | (Some(old), Some(new)) if old == new => {} | |
1451 | (_, Some(new)) => { | |
1452 | let path_and_after = self.slice(self.path_start..).to_owned(); | |
1453 | self.serialization.truncate(self.host_end as usize); | |
1454 | write!(&mut self.serialization, ":{}", new).unwrap(); | |
1455 | let old_path_start = self.path_start; | |
1456 | let new_path_start = to_u32(self.serialization.len()).unwrap(); | |
1457 | self.path_start = new_path_start; | |
1458 | let adjust = |index: &mut u32| { | |
1459 | *index -= old_path_start; | |
1460 | *index += new_path_start; | |
1461 | }; | |
1462 | if let Some(ref mut index) = self.query_start { adjust(index) } | |
1463 | if let Some(ref mut index) = self.fragment_start { adjust(index) } | |
1464 | self.serialization.push_str(&path_and_after); | |
1465 | } | |
1466 | } | |
1467 | self.port = port; | |
1468 | } | |
1469 | ||
1470 | /// Change this URL’s host. | |
1471 | /// | |
1472 | /// Removing the host (calling this with `None`) | |
1473 | /// will also remove any username, password, and port number. | |
1474 | /// | |
1475 | /// # Examples | |
1476 | /// | |
1477 | /// Change host: | |
1478 | /// | |
1479 | /// ``` | |
1480 | /// use url::Url; | |
1481 | /// # use url::ParseError; | |
1482 | /// | |
1483 | /// # fn run() -> Result<(), ParseError> { | |
1484 | /// let mut url = Url::parse("https://example.net")?; | |
1485 | /// let result = url.set_host(Some("rust-lang.org")); | |
1486 | /// assert!(result.is_ok()); | |
1487 | /// assert_eq!(url.as_str(), "https://rust-lang.org/"); | |
1488 | /// # Ok(()) | |
1489 | /// # } | |
1490 | /// # run().unwrap(); | |
1491 | /// ``` | |
1492 | /// | |
1493 | /// Remove host: | |
1494 | /// | |
1495 | /// ``` | |
1496 | /// use url::Url; | |
1497 | /// # use url::ParseError; | |
1498 | /// | |
1499 | /// # fn run() -> Result<(), ParseError> { | |
1500 | /// let mut url = Url::parse("foo://example.net")?; | |
1501 | /// let result = url.set_host(None); | |
1502 | /// assert!(result.is_ok()); | |
1503 | /// assert_eq!(url.as_str(), "foo:/"); | |
1504 | /// # Ok(()) | |
1505 | /// # } | |
1506 | /// # run().unwrap(); | |
1507 | /// ``` | |
1508 | /// | |
1509 | /// Cannot remove host for 'special' schemes (e.g. `http`): | |
1510 | /// | |
1511 | /// ``` | |
1512 | /// use url::Url; | |
1513 | /// # use url::ParseError; | |
1514 | /// | |
1515 | /// # fn run() -> Result<(), ParseError> { | |
1516 | /// let mut url = Url::parse("https://example.net")?; | |
1517 | /// let result = url.set_host(None); | |
1518 | /// assert!(result.is_err()); | |
1519 | /// assert_eq!(url.as_str(), "https://example.net/"); | |
1520 | /// # Ok(()) | |
1521 | /// # } | |
1522 | /// # run().unwrap(); | |
1523 | /// ``` | |
1524 | /// | |
1525 | /// Cannot change or remove host for cannot-be-a-base URLs: | |
1526 | /// | |
1527 | /// ``` | |
1528 | /// use url::Url; | |
1529 | /// # use url::ParseError; | |
1530 | /// | |
1531 | /// # fn run() -> Result<(), ParseError> { | |
1532 | /// let mut url = Url::parse("mailto:rms@example.net")?; | |
1533 | /// | |
1534 | /// let result = url.set_host(Some("rust-lang.org")); | |
1535 | /// assert!(result.is_err()); | |
1536 | /// assert_eq!(url.as_str(), "mailto:rms@example.net"); | |
1537 | /// | |
1538 | /// let result = url.set_host(None); | |
1539 | /// assert!(result.is_err()); | |
1540 | /// assert_eq!(url.as_str(), "mailto:rms@example.net"); | |
1541 | /// # Ok(()) | |
1542 | /// # } | |
1543 | /// # run().unwrap(); | |
1544 | /// ``` | |
1545 | /// | |
1546 | /// # Errors | |
1547 | /// | |
1548 | /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, | |
1549 | /// a [`ParseError`] variant will be returned. | |
1550 | /// | |
1551 | /// [`ParseError`]: enum.ParseError.html | |
1552 | pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { | |
1553 | if self.cannot_be_a_base() { | |
1554 | return Err(ParseError::SetHostOnCannotBeABaseUrl) | |
1555 | } | |
1556 | ||
1557 | if let Some(host) = host { | |
1558 | if host == "" && SchemeType::from(self.scheme()).is_special() { | |
1559 | return Err(ParseError::EmptyHost); | |
1560 | } | |
1561 | self.set_host_internal(Host::parse(host)?, None) | |
1562 | } else if self.has_host() { | |
1563 | if SchemeType::from(self.scheme()).is_special() { | |
1564 | return Err(ParseError::EmptyHost) | |
1565 | } | |
1566 | debug_assert!(self.byte_at(self.scheme_end) == b':'); | |
1567 | debug_assert!(self.byte_at(self.path_start) == b'/'); | |
1568 | let new_path_start = self.scheme_end + 1; | |
1569 | self.serialization.drain(new_path_start as usize..self.path_start as usize); | |
1570 | let offset = self.path_start - new_path_start; | |
1571 | self.path_start = new_path_start; | |
1572 | self.username_end = new_path_start; | |
1573 | self.host_start = new_path_start; | |
1574 | self.host_end = new_path_start; | |
1575 | self.port = None; | |
1576 | if let Some(ref mut index) = self.query_start { *index -= offset } | |
1577 | if let Some(ref mut index) = self.fragment_start { *index -= offset } | |
1578 | } | |
1579 | Ok(()) | |
1580 | } | |
1581 | ||
1582 | /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. | |
1583 | fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) { | |
1584 | let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { self.host_end }; | |
1585 | let suffix = self.slice(old_suffix_pos..).to_owned(); | |
1586 | self.serialization.truncate(self.host_start as usize); | |
1587 | if !self.has_authority() { | |
1588 | debug_assert!(self.slice(self.scheme_end..self.host_start) == ":"); | |
1589 | debug_assert!(self.username_end == self.host_start); | |
1590 | self.serialization.push('/'); | |
1591 | self.serialization.push('/'); | |
1592 | self.username_end += 2; | |
1593 | self.host_start += 2; | |
1594 | } | |
1595 | write!(&mut self.serialization, "{}", host).unwrap(); | |
1596 | self.host_end = to_u32(self.serialization.len()).unwrap(); | |
1597 | self.host = host.into(); | |
1598 | ||
1599 | if let Some(new_port) = opt_new_port { | |
1600 | self.port = new_port; | |
1601 | if let Some(port) = new_port { | |
1602 | write!(&mut self.serialization, ":{}", port).unwrap(); | |
1603 | } | |
1604 | } | |
1605 | let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); | |
1606 | self.serialization.push_str(&suffix); | |
1607 | ||
1608 | let adjust = |index: &mut u32| { | |
1609 | *index -= old_suffix_pos; | |
1610 | *index += new_suffix_pos; | |
1611 | }; | |
1612 | adjust(&mut self.path_start); | |
1613 | if let Some(ref mut index) = self.query_start { adjust(index) } | |
1614 | if let Some(ref mut index) = self.fragment_start { adjust(index) } | |
1615 | } | |
1616 | ||
1617 | /// Change this URL’s host to the given IP address. | |
1618 | /// | |
1619 | /// If this URL is cannot-be-a-base, do nothing and return `Err`. | |
1620 | /// | |
1621 | /// Compared to `Url::set_host`, this skips the host parser. | |
1622 | /// | |
1623 | /// # Examples | |
1624 | /// | |
1625 | /// ```rust | |
1626 | /// use url::{Url, ParseError}; | |
1627 | /// | |
1628 | /// # fn run() -> Result<(), ParseError> { | |
1629 | /// let mut url = Url::parse("http://example.com")?; | |
1630 | /// url.set_ip_host("127.0.0.1".parse().unwrap()); | |
1631 | /// assert_eq!(url.host_str(), Some("127.0.0.1")); | |
1632 | /// assert_eq!(url.as_str(), "http://127.0.0.1/"); | |
1633 | /// # Ok(()) | |
1634 | /// # } | |
1635 | /// # run().unwrap(); | |
1636 | /// ``` | |
1637 | /// | |
1638 | /// Cannot change URL's from mailto(cannot-be-base) to ip: | |
1639 | /// | |
1640 | /// ```rust | |
1641 | /// use url::{Url, ParseError}; | |
1642 | /// | |
1643 | /// # fn run() -> Result<(), ParseError> { | |
1644 | /// let mut url = Url::parse("mailto:rms@example.com")?; | |
1645 | /// let result = url.set_ip_host("127.0.0.1".parse().unwrap()); | |
1646 | /// | |
1647 | /// assert_eq!(url.as_str(), "mailto:rms@example.com"); | |
1648 | /// assert!(result.is_err()); | |
1649 | /// # Ok(()) | |
1650 | /// # } | |
1651 | /// # run().unwrap(); | |
1652 | /// ``` | |
1653 | /// | |
1654 | pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { | |
1655 | if self.cannot_be_a_base() { | |
1656 | return Err(()) | |
1657 | } | |
1658 | ||
1659 | let address = match address { | |
1660 | IpAddr::V4(address) => Host::Ipv4(address), | |
1661 | IpAddr::V6(address) => Host::Ipv6(address), | |
1662 | }; | |
1663 | self.set_host_internal(address, None); | |
1664 | Ok(()) | |
1665 | } | |
1666 | ||
1667 | /// Change this URL’s password. | |
1668 | /// | |
1669 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. | |
1670 | /// | |
1671 | /// # Examples | |
1672 | /// | |
1673 | /// ```rust | |
1674 | /// use url::{Url, ParseError}; | |
1675 | /// | |
1676 | /// # fn run() -> Result<(), ParseError> { | |
1677 | /// let mut url = Url::parse("mailto:rmz@example.com")?; | |
1678 | /// let result = url.set_password(Some("secret_password")); | |
1679 | /// assert!(result.is_err()); | |
1680 | /// | |
1681 | /// let mut url = Url::parse("ftp://user1:secret1@example.com")?; | |
1682 | /// let result = url.set_password(Some("secret_password")); | |
1683 | /// assert_eq!(url.password(), Some("secret_password")); | |
1684 | /// | |
1685 | /// let mut url = Url::parse("ftp://user2:@example.com")?; | |
1686 | /// let result = url.set_password(Some("secret2")); | |
1687 | /// assert!(result.is_ok()); | |
1688 | /// assert_eq!(url.password(), Some("secret2")); | |
1689 | /// # Ok(()) | |
1690 | /// # } | |
1691 | /// # run().unwrap(); | |
1692 | /// ``` | |
1693 | pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { | |
1694 | if !self.has_host() { | |
1695 | return Err(()) | |
1696 | } | |
1697 | if let Some(password) = password { | |
1698 | let host_and_after = self.slice(self.host_start..).to_owned(); | |
1699 | self.serialization.truncate(self.username_end as usize); | |
1700 | self.serialization.push(':'); | |
1701 | self.serialization.extend(utf8_percent_encode(password, USERINFO_ENCODE_SET)); | |
1702 | self.serialization.push('@'); | |
1703 | ||
1704 | let old_host_start = self.host_start; | |
1705 | let new_host_start = to_u32(self.serialization.len()).unwrap(); | |
1706 | let adjust = |index: &mut u32| { | |
1707 | *index -= old_host_start; | |
1708 | *index += new_host_start; | |
1709 | }; | |
1710 | self.host_start = new_host_start; | |
1711 | adjust(&mut self.host_end); | |
1712 | adjust(&mut self.path_start); | |
1713 | if let Some(ref mut index) = self.query_start { adjust(index) } | |
1714 | if let Some(ref mut index) = self.fragment_start { adjust(index) } | |
1715 | ||
1716 | self.serialization.push_str(&host_and_after); | |
1717 | } else if self.byte_at(self.username_end) == b':' { // If there is a password to remove | |
1718 | let has_username_or_password = self.byte_at(self.host_start - 1) == b'@'; | |
1719 | debug_assert!(has_username_or_password); | |
1720 | let username_start = self.scheme_end + 3; | |
1721 | let empty_username = username_start == self.username_end; | |
1722 | let start = self.username_end; // Remove the ':' | |
1723 | let end = if empty_username { | |
1724 | self.host_start // Remove the '@' as well | |
1725 | } else { | |
1726 | self.host_start - 1 // Keep the '@' to separate the username from the host | |
1727 | }; | |
1728 | self.serialization.drain(start as usize .. end as usize); | |
1729 | let offset = end - start; | |
1730 | self.host_start -= offset; | |
1731 | self.host_end -= offset; | |
1732 | self.path_start -= offset; | |
1733 | if let Some(ref mut index) = self.query_start { *index -= offset } | |
1734 | if let Some(ref mut index) = self.fragment_start { *index -= offset } | |
1735 | } | |
1736 | Ok(()) | |
1737 | } | |
1738 | ||
1739 | /// Change this URL’s username. | |
1740 | /// | |
1741 | /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. | |
1742 | /// # Examples | |
1743 | /// | |
1744 | /// Cannot setup username from mailto(cannot-be-base) | |
1745 | /// | |
1746 | /// ```rust | |
1747 | /// use url::{Url, ParseError}; | |
1748 | /// | |
1749 | /// # fn run() -> Result<(), ParseError> { | |
1750 | /// let mut url = Url::parse("mailto:rmz@example.com")?; | |
1751 | /// let result = url.set_username("user1"); | |
1752 | /// assert_eq!(url.as_str(), "mailto:rmz@example.com"); | |
1753 | /// assert!(result.is_err()); | |
1754 | /// # Ok(()) | |
1755 | /// # } | |
1756 | /// # run().unwrap(); | |
1757 | /// ``` | |
1758 | /// | |
1759 | /// Setup username to user1 | |
1760 | /// | |
1761 | /// ```rust | |
1762 | /// use url::{Url, ParseError}; | |
1763 | /// | |
1764 | /// # fn run() -> Result<(), ParseError> { | |
1765 | /// let mut url = Url::parse("ftp://:secre1@example.com/")?; | |
1766 | /// let result = url.set_username("user1"); | |
1767 | /// assert!(result.is_ok()); | |
1768 | /// assert_eq!(url.username(), "user1"); | |
1769 | /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/"); | |
1770 | /// # Ok(()) | |
1771 | /// # } | |
1772 | /// # run().unwrap(); | |
1773 | /// ``` | |
1774 | pub fn set_username(&mut self, username: &str) -> Result<(), ()> { | |
1775 | if !self.has_host() { | |
1776 | return Err(()) | |
1777 | } | |
1778 | let username_start = self.scheme_end + 3; | |
1779 | debug_assert!(self.slice(self.scheme_end..username_start) == "://"); | |
1780 | if self.slice(username_start..self.username_end) == username { | |
1781 | return Ok(()) | |
1782 | } | |
1783 | let after_username = self.slice(self.username_end..).to_owned(); | |
1784 | self.serialization.truncate(username_start as usize); | |
1785 | self.serialization.extend(utf8_percent_encode(username, USERINFO_ENCODE_SET)); | |
1786 | ||
1787 | let mut removed_bytes = self.username_end; | |
1788 | self.username_end = to_u32(self.serialization.len()).unwrap(); | |
1789 | let mut added_bytes = self.username_end; | |
1790 | ||
1791 | let new_username_is_empty = self.username_end == username_start; | |
1792 | match (new_username_is_empty, after_username.chars().next()) { | |
1793 | (true, Some('@')) => { | |
1794 | removed_bytes += 1; | |
1795 | self.serialization.push_str(&after_username[1..]); | |
1796 | } | |
1797 | (false, Some('@')) | (_, Some(':')) | (true, _) => { | |
1798 | self.serialization.push_str(&after_username); | |
1799 | } | |
1800 | (false, _) => { | |
1801 | added_bytes += 1; | |
1802 | self.serialization.push('@'); | |
1803 | self.serialization.push_str(&after_username); | |
1804 | } | |
1805 | } | |
1806 | ||
1807 | let adjust = |index: &mut u32| { | |
1808 | *index -= removed_bytes; | |
1809 | *index += added_bytes; | |
1810 | }; | |
1811 | adjust(&mut self.host_start); | |
1812 | adjust(&mut self.host_end); | |
1813 | adjust(&mut self.path_start); | |
1814 | if let Some(ref mut index) = self.query_start { adjust(index) } | |
1815 | if let Some(ref mut index) = self.fragment_start { adjust(index) } | |
1816 | Ok(()) | |
1817 | } | |
1818 | ||
1819 | /// Change this URL’s scheme. | |
1820 | /// | |
1821 | /// Do nothing and return `Err` if: | |
1822 | /// | |
1823 | /// * The new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` | |
1824 | /// * This URL is cannot-be-a-base and the new scheme is one of | |
1825 | /// `http`, `https`, `ws`, `wss`, `ftp`, or `gopher` | |
1826 | /// | |
1827 | /// # Examples | |
1828 | /// | |
1829 | /// Change the URL’s scheme from `https` to `foo`: | |
1830 | /// | |
1831 | /// ``` | |
1832 | /// use url::Url; | |
1833 | /// # use url::ParseError; | |
1834 | /// | |
1835 | /// # fn run() -> Result<(), ParseError> { | |
1836 | /// let mut url = Url::parse("https://example.net")?; | |
1837 | /// let result = url.set_scheme("foo"); | |
1838 | /// assert_eq!(url.as_str(), "foo://example.net/"); | |
1839 | /// assert!(result.is_ok()); | |
1840 | /// # Ok(()) | |
1841 | /// # } | |
1842 | /// # run().unwrap(); | |
1843 | /// ``` | |
1844 | /// | |
1845 | /// | |
1846 | /// Cannot change URL’s scheme from `https` to `foõ`: | |
1847 | /// | |
1848 | /// ``` | |
1849 | /// use url::Url; | |
1850 | /// # use url::ParseError; | |
1851 | /// | |
1852 | /// # fn run() -> Result<(), ParseError> { | |
1853 | /// let mut url = Url::parse("https://example.net")?; | |
1854 | /// let result = url.set_scheme("foõ"); | |
1855 | /// assert_eq!(url.as_str(), "https://example.net/"); | |
1856 | /// assert!(result.is_err()); | |
1857 | /// # Ok(()) | |
1858 | /// # } | |
1859 | /// # run().unwrap(); | |
1860 | /// ``` | |
1861 | /// | |
1862 | /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`: | |
1863 | /// | |
1864 | /// ``` | |
1865 | /// use url::Url; | |
1866 | /// # use url::ParseError; | |
1867 | /// | |
1868 | /// # fn run() -> Result<(), ParseError> { | |
1869 | /// let mut url = Url::parse("mailto:rms@example.net")?; | |
1870 | /// let result = url.set_scheme("https"); | |
1871 | /// assert_eq!(url.as_str(), "mailto:rms@example.net"); | |
1872 | /// assert!(result.is_err()); | |
1873 | /// # Ok(()) | |
1874 | /// # } | |
1875 | /// # run().unwrap(); | |
1876 | /// ``` | |
1877 | pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { | |
1878 | let mut parser = Parser::for_setter(String::new()); | |
1879 | let remaining = parser.parse_scheme(parser::Input::new(scheme))?; | |
1880 | if !remaining.is_empty() || | |
1881 | (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) { | |
1882 | return Err(()) | |
1883 | } | |
1884 | let old_scheme_end = self.scheme_end; | |
1885 | let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); | |
1886 | let adjust = |index: &mut u32| { | |
1887 | *index -= old_scheme_end; | |
1888 | *index += new_scheme_end; | |
1889 | }; | |
1890 | ||
1891 | self.scheme_end = new_scheme_end; | |
1892 | adjust(&mut self.username_end); | |
1893 | adjust(&mut self.host_start); | |
1894 | adjust(&mut self.host_end); | |
1895 | adjust(&mut self.path_start); | |
1896 | if let Some(ref mut index) = self.query_start { adjust(index) } | |
1897 | if let Some(ref mut index) = self.fragment_start { adjust(index) } | |
1898 | ||
1899 | parser.serialization.push_str(self.slice(old_scheme_end..)); | |
1900 | self.serialization = parser.serialization; | |
1901 | Ok(()) | |
1902 | } | |
1903 | ||
1904 | /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. | |
1905 | /// | |
1906 | /// This returns `Err` if the given path is not absolute or, | |
1907 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). | |
1908 | /// | |
1909 | /// # Examples | |
1910 | /// | |
1911 | /// On Unix-like platforms: | |
1912 | /// | |
1913 | /// ``` | |
1914 | /// # if cfg!(unix) { | |
1915 | /// use url::Url; | |
1916 | /// | |
1917 | /// # fn run() -> Result<(), ()> { | |
1918 | /// let url = Url::from_file_path("/tmp/foo.txt")?; | |
1919 | /// assert_eq!(url.as_str(), "file:///tmp/foo.txt"); | |
1920 | /// | |
1921 | /// let url = Url::from_file_path("../foo.txt"); | |
1922 | /// assert!(url.is_err()); | |
1923 | /// | |
1924 | /// let url = Url::from_file_path("https://google.com/"); | |
1925 | /// assert!(url.is_err()); | |
1926 | /// # Ok(()) | |
1927 | /// # } | |
1928 | /// # run().unwrap(); | |
1929 | /// # } | |
1930 | /// ``` | |
1931 | pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { | |
1932 | let mut serialization = "file://".to_owned(); | |
1933 | let host_start = serialization.len() as u32; | |
1934 | let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?; | |
1935 | Ok(Url { | |
1936 | serialization: serialization, | |
1937 | scheme_end: "file".len() as u32, | |
1938 | username_end: host_start, | |
1939 | host_start: host_start, | |
1940 | host_end: host_end, | |
1941 | host: host, | |
1942 | port: None, | |
1943 | path_start: host_end, | |
1944 | query_start: None, | |
1945 | fragment_start: None, | |
1946 | }) | |
1947 | } | |
1948 | ||
1949 | /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. | |
1950 | /// | |
1951 | /// This returns `Err` if the given path is not absolute or, | |
1952 | /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). | |
1953 | /// | |
1954 | /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash | |
1955 | /// so that the entire path is considered when using this URL as a base URL. | |
1956 | /// | |
1957 | /// For example: | |
1958 | /// | |
1959 | /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` | |
1960 | /// as the base URL is `file:///var/www/index.html` | |
1961 | /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` | |
1962 | /// as the base URL is `file:///var/index.html`, which might not be what was intended. | |
1963 | /// | |
1964 | /// Note that `std::path` does not consider trailing slashes significant | |
1965 | /// and usually does not include them (e.g. in `Path::parent()`). | |
1966 | pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> { | |
1967 | let mut url = Url::from_file_path(path)?; | |
1968 | if !url.serialization.ends_with('/') { | |
1969 | url.serialization.push('/') | |
1970 | } | |
1971 | Ok(url) | |
1972 | } | |
1973 | ||
1974 | /// Serialize with Serde using the internal representation of the `Url` struct. | |
1975 | /// | |
1976 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking | |
1977 | /// for speed, compared to the `Deserialize` trait impl. | |
1978 | /// | |
1979 | /// This method is only available if the `serde` Cargo feature is enabled. | |
1980 | #[cfg(feature = "serde")] | |
1981 | #[deny(unused)] | |
1982 | pub fn serialize_internal<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer { | |
1983 | use serde::Serialize; | |
1984 | // Destructuring first lets us ensure that adding or removing fields forces this method | |
1985 | // to be updated | |
1986 | let Url { ref serialization, ref scheme_end, | |
1987 | ref username_end, ref host_start, | |
1988 | ref host_end, ref host, ref port, | |
1989 | ref path_start, ref query_start, | |
1990 | ref fragment_start} = *self; | |
1991 | (serialization, scheme_end, username_end, | |
1992 | host_start, host_end, host, port, path_start, | |
1993 | query_start, fragment_start).serialize(serializer) | |
1994 | } | |
1995 | ||
1996 | /// Serialize with Serde using the internal representation of the `Url` struct. | |
1997 | /// | |
1998 | /// The corresponding `deserialize_internal` method sacrifices some invariant-checking | |
1999 | /// for speed, compared to the `Deserialize` trait impl. | |
2000 | /// | |
2001 | /// This method is only available if the `serde` Cargo feature is enabled. | |
2002 | #[cfg(feature = "serde")] | |
2003 | #[deny(unused)] | |
2004 | pub fn deserialize_internal<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: serde::Deserializer { | |
2005 | use serde::{Deserialize, Error}; | |
2006 | let (serialization, scheme_end, username_end, | |
2007 | host_start, host_end, host, port, path_start, | |
2008 | query_start, fragment_start) = Deserialize::deserialize(deserializer)?; | |
2009 | let url = Url { | |
2010 | serialization: serialization, | |
2011 | scheme_end: scheme_end, | |
2012 | username_end: username_end, | |
2013 | host_start: host_start, | |
2014 | host_end: host_end, | |
2015 | host: host, | |
2016 | port: port, | |
2017 | path_start: path_start, | |
2018 | query_start: query_start, | |
2019 | fragment_start: fragment_start | |
2020 | }; | |
2021 | if cfg!(debug_assertions) { | |
2022 | url.check_invariants().map_err(|ref reason| Error::invalid_value(&reason))? | |
2023 | } | |
2024 | Ok(url) | |
2025 | } | |
2026 | ||
2027 | ||
2028 | /// Assuming the URL is in the `file` scheme or similar, | |
2029 | /// convert its path to an absolute `std::path::Path`. | |
2030 | /// | |
2031 | /// **Note:** This does not actually check the URL’s `scheme`, | |
2032 | /// and may give nonsensical results for other schemes. | |
2033 | /// It is the user’s responsibility to check the URL’s scheme before calling this. | |
2034 | /// | |
2035 | /// ``` | |
2036 | /// # use url::Url; | |
2037 | /// # let url = Url::parse("file:///etc/passwd").unwrap(); | |
2038 | /// let path = url.to_file_path(); | |
2039 | /// ``` | |
2040 | /// | |
2041 | /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where | |
2042 | /// `file:` URLs may have a non-local host), | |
2043 | /// or if `Path::new_opt()` returns `None`. | |
2044 | /// (That is, if the percent-decoded path contains a NUL byte or, | |
2045 | /// for a Windows path, is not UTF-8.) | |
2046 | #[inline] | |
2047 | pub fn to_file_path(&self) -> Result<PathBuf, ()> { | |
2048 | if let Some(segments) = self.path_segments() { | |
2049 | let host = match self.host() { | |
2050 | None | Some(Host::Domain("localhost")) => None, | |
2051 | Some(_) if cfg!(windows) && self.scheme() == "file" => { | |
2052 | Some(&self.serialization[self.host_start as usize .. self.host_end as usize]) | |
2053 | }, | |
2054 | _ => return Err(()) | |
2055 | }; | |
2056 | ||
2057 | return file_url_segments_to_pathbuf(host, segments); | |
2058 | } | |
2059 | Err(()) | |
2060 | } | |
2061 | ||
2062 | // Private helper methods: | |
2063 | ||
2064 | #[inline] | |
2065 | fn slice<R>(&self, range: R) -> &str where R: RangeArg { | |
2066 | range.slice_of(&self.serialization) | |
2067 | } | |
2068 | ||
2069 | #[inline] | |
2070 | fn byte_at(&self, i: u32) -> u8 { | |
2071 | self.serialization.as_bytes()[i as usize] | |
2072 | } | |
2073 | } | |
2074 | ||
2075 | /// Return an error if `Url::host` or `Url::port_or_known_default` return `None`. | |
2076 | impl ToSocketAddrs for Url { | |
2077 | type Iter = SocketAddrs; | |
2078 | ||
2079 | fn to_socket_addrs(&self) -> io::Result<Self::Iter> { | |
2080 | self.with_default_port(|_| Err(()))?.to_socket_addrs() | |
2081 | } | |
2082 | } | |
2083 | ||
2084 | /// Parse a string as an URL, without a base URL or encoding override. | |
2085 | impl str::FromStr for Url { | |
2086 | type Err = ParseError; | |
2087 | ||
2088 | #[inline] | |
2089 | fn from_str(input: &str) -> Result<Url, ::ParseError> { | |
2090 | Url::parse(input) | |
2091 | } | |
2092 | } | |
2093 | ||
2094 | /// Display the serialization of this URL. | |
2095 | impl fmt::Display for Url { | |
2096 | #[inline] | |
2097 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | |
2098 | fmt::Display::fmt(&self.serialization, formatter) | |
2099 | } | |
2100 | } | |
2101 | ||
2102 | /// Debug the serialization of this URL. | |
2103 | impl fmt::Debug for Url { | |
2104 | #[inline] | |
2105 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | |
2106 | fmt::Debug::fmt(&self.serialization, formatter) | |
2107 | } | |
2108 | } | |
2109 | ||
2110 | /// URLs compare like their serialization. | |
2111 | impl Eq for Url {} | |
2112 | ||
2113 | /// URLs compare like their serialization. | |
2114 | impl PartialEq for Url { | |
2115 | #[inline] | |
2116 | fn eq(&self, other: &Self) -> bool { | |
2117 | self.serialization == other.serialization | |
2118 | } | |
2119 | } | |
2120 | ||
2121 | /// URLs compare like their serialization. | |
2122 | impl Ord for Url { | |
2123 | #[inline] | |
2124 | fn cmp(&self, other: &Self) -> cmp::Ordering { | |
2125 | self.serialization.cmp(&other.serialization) | |
2126 | } | |
2127 | } | |
2128 | ||
2129 | /// URLs compare like their serialization. | |
2130 | impl PartialOrd for Url { | |
2131 | #[inline] | |
2132 | fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { | |
2133 | self.serialization.partial_cmp(&other.serialization) | |
2134 | } | |
2135 | } | |
2136 | ||
2137 | /// URLs hash like their serialization. | |
2138 | impl hash::Hash for Url { | |
2139 | #[inline] | |
2140 | fn hash<H>(&self, state: &mut H) where H: hash::Hasher { | |
2141 | hash::Hash::hash(&self.serialization, state) | |
2142 | } | |
2143 | } | |
2144 | ||
2145 | /// Return the serialization of this URL. | |
2146 | impl AsRef<str> for Url { | |
2147 | #[inline] | |
2148 | fn as_ref(&self) -> &str { | |
2149 | &self.serialization | |
2150 | } | |
2151 | } | |
2152 | ||
2153 | trait RangeArg { | |
2154 | fn slice_of<'a>(&self, s: &'a str) -> &'a str; | |
2155 | } | |
2156 | ||
2157 | impl RangeArg for Range<u32> { | |
2158 | #[inline] | |
2159 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { | |
2160 | &s[self.start as usize .. self.end as usize] | |
2161 | } | |
2162 | } | |
2163 | ||
2164 | impl RangeArg for RangeFrom<u32> { | |
2165 | #[inline] | |
2166 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { | |
2167 | &s[self.start as usize ..] | |
2168 | } | |
2169 | } | |
2170 | ||
2171 | impl RangeArg for RangeTo<u32> { | |
2172 | #[inline] | |
2173 | fn slice_of<'a>(&self, s: &'a str) -> &'a str { | |
2174 | &s[.. self.end as usize] | |
2175 | } | |
2176 | } | |
2177 | ||
2178 | #[cfg(feature="rustc-serialize")] | |
2179 | impl rustc_serialize::Encodable for Url { | |
2180 | fn encode<S: rustc_serialize::Encoder>(&self, encoder: &mut S) -> Result<(), S::Error> { | |
2181 | encoder.emit_str(self.as_str()) | |
2182 | } | |
2183 | } | |
2184 | ||
2185 | ||
2186 | #[cfg(feature="rustc-serialize")] | |
2187 | impl rustc_serialize::Decodable for Url { | |
2188 | fn decode<D: rustc_serialize::Decoder>(decoder: &mut D) -> Result<Url, D::Error> { | |
2189 | Url::parse(&*decoder.read_str()?).map_err(|error| { | |
2190 | decoder.error(&format!("URL parsing error: {}", error)) | |
2191 | }) | |
2192 | } | |
2193 | } | |
2194 | ||
2195 | /// Serializes this URL into a `serde` stream. | |
2196 | /// | |
2197 | /// This implementation is only available if the `serde` Cargo feature is enabled. | |
2198 | #[cfg(feature="serde")] | |
2199 | impl serde::Serialize for Url { | |
2200 | fn serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer { | |
2201 | serializer.serialize_str(self.as_str()) | |
2202 | } | |
2203 | } | |
2204 | ||
2205 | /// Deserializes this URL from a `serde` stream. | |
2206 | /// | |
2207 | /// This implementation is only available if the `serde` Cargo feature is enabled. | |
2208 | #[cfg(feature="serde")] | |
2209 | impl serde::Deserialize for Url { | |
2210 | fn deserialize<D>(deserializer: &mut D) -> Result<Url, D::Error> where D: serde::Deserializer { | |
2211 | let string_representation: String = serde::Deserialize::deserialize(deserializer)?; | |
2212 | Url::parse(&string_representation).map_err(|err| { | |
2213 | serde::Error::invalid_value(err.description()) | |
2214 | }) | |
2215 | } | |
2216 | } | |
2217 | ||
2218 | #[cfg(any(unix, target_os = "redox"))] | |
2219 | fn path_to_file_url_segments(path: &Path, serialization: &mut String) | |
2220 | -> Result<(u32, HostInternal), ()> { | |
2221 | use std::os::unix::prelude::OsStrExt; | |
2222 | if !path.is_absolute() { | |
2223 | return Err(()) | |
2224 | } | |
2225 | let host_end = to_u32(serialization.len()).unwrap(); | |
2226 | let mut empty = true; | |
2227 | // skip the root component | |
2228 | for component in path.components().skip(1) { | |
2229 | empty = false; | |
2230 | serialization.push('/'); | |
2231 | serialization.extend(percent_encode( | |
2232 | component.as_os_str().as_bytes(), PATH_SEGMENT_ENCODE_SET)); | |
2233 | } | |
2234 | if empty { | |
2235 | // An URL’s path must not be empty. | |
2236 | serialization.push('/'); | |
2237 | } | |
2238 | Ok((host_end, HostInternal::None)) | |
2239 | } | |
2240 | ||
2241 | #[cfg(windows)] | |
2242 | fn path_to_file_url_segments(path: &Path, serialization: &mut String) | |
2243 | -> Result<(u32, HostInternal), ()> { | |
2244 | path_to_file_url_segments_windows(path, serialization) | |
2245 | } | |
2246 | ||
2247 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 | |
2248 | #[cfg_attr(not(windows), allow(dead_code))] | |
2249 | fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String) | |
2250 | -> Result<(u32, HostInternal), ()> { | |
2251 | use std::path::{Prefix, Component}; | |
2252 | if !path.is_absolute() { | |
2253 | return Err(()) | |
2254 | } | |
2255 | let mut components = path.components(); | |
2256 | ||
2257 | let host_end; | |
2258 | let host_internal; | |
2259 | match components.next() { | |
2260 | Some(Component::Prefix(ref p)) => match p.kind() { | |
2261 | Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { | |
2262 | host_end = to_u32(serialization.len()).unwrap(); | |
2263 | host_internal = HostInternal::None; | |
2264 | serialization.push('/'); | |
2265 | serialization.push(letter as char); | |
2266 | serialization.push(':'); | |
2267 | }, | |
2268 | Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { | |
2269 | let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; | |
2270 | write!(serialization, "{}", host).unwrap(); | |
2271 | host_end = to_u32(serialization.len()).unwrap(); | |
2272 | host_internal = host.into(); | |
2273 | serialization.push('/'); | |
2274 | let share = share.to_str().ok_or(())?; | |
2275 | serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT_ENCODE_SET)); | |
2276 | }, | |
2277 | _ => return Err(()) | |
2278 | }, | |
2279 | ||
2280 | _ => return Err(()) | |
2281 | } | |
2282 | ||
2283 | for component in components { | |
2284 | if component == Component::RootDir { continue } | |
2285 | // FIXME: somehow work with non-unicode? | |
2286 | let component = component.as_os_str().to_str().ok_or(())?; | |
2287 | serialization.push('/'); | |
2288 | serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT_ENCODE_SET)); | |
2289 | } | |
2290 | Ok((host_end, host_internal)) | |
2291 | } | |
2292 | ||
2293 | #[cfg(any(unix, target_os = "redox"))] | |
2294 | fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split<char>) -> Result<PathBuf, ()> { | |
2295 | use std::ffi::OsStr; | |
2296 | use std::os::unix::prelude::OsStrExt; | |
2297 | use std::path::PathBuf; | |
2298 | ||
2299 | if host.is_some() { | |
2300 | return Err(()); | |
2301 | } | |
2302 | ||
2303 | let mut bytes = Vec::new(); | |
2304 | for segment in segments { | |
2305 | bytes.push(b'/'); | |
2306 | bytes.extend(percent_decode(segment.as_bytes())); | |
2307 | } | |
2308 | let os_str = OsStr::from_bytes(&bytes); | |
2309 | let path = PathBuf::from(os_str); | |
2310 | debug_assert!(path.is_absolute(), | |
2311 | "to_file_path() failed to produce an absolute Path"); | |
2312 | Ok(path) | |
2313 | } | |
2314 | ||
2315 | #[cfg(windows)] | |
2316 | fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split<char>) -> Result<PathBuf, ()> { | |
2317 | file_url_segments_to_pathbuf_windows(host, segments) | |
2318 | } | |
2319 | ||
2320 | // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 | |
2321 | #[cfg_attr(not(windows), allow(dead_code))] | |
2322 | fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::Split<char>) -> Result<PathBuf, ()> { | |
2323 | ||
2324 | let mut string = if let Some(host) = host { | |
2325 | r"\\".to_owned() + host | |
2326 | } else { | |
2327 | let first = segments.next().ok_or(())?; | |
2328 | ||
2329 | match first.len() { | |
2330 | 2 => { | |
2331 | if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { | |
2332 | return Err(()) | |
2333 | } | |
2334 | ||
2335 | first.to_owned() | |
2336 | }, | |
2337 | ||
2338 | 4 => { | |
2339 | if !first.starts_with(parser::ascii_alpha) { | |
2340 | return Err(()) | |
2341 | } | |
2342 | let bytes = first.as_bytes(); | |
2343 | if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { | |
2344 | return Err(()) | |
2345 | } | |
2346 | ||
2347 | first[0..1].to_owned() + ":" | |
2348 | }, | |
2349 | ||
2350 | _ => return Err(()), | |
2351 | } | |
2352 | }; | |
2353 | ||
2354 | for segment in segments { | |
2355 | string.push('\\'); | |
2356 | ||
2357 | // Currently non-unicode windows paths cannot be represented | |
2358 | match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { | |
2359 | Ok(s) => string.push_str(&s), | |
2360 | Err(..) => return Err(()), | |
2361 | } | |
2362 | } | |
2363 | let path = PathBuf::from(string); | |
2364 | debug_assert!(path.is_absolute(), | |
2365 | "to_file_path() failed to produce an absolute Path"); | |
2366 | Ok(path) | |
2367 | } | |
2368 | ||
2369 | fn io_error<T>(reason: &str) -> io::Result<T> { | |
2370 | Err(io::Error::new(io::ErrorKind::InvalidData, reason)) | |
2371 | } | |
2372 | ||
2373 | /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. | |
2374 | #[derive(Debug)] | |
2375 | pub struct UrlQuery<'a> { | |
2376 | url: &'a mut Url, | |
2377 | fragment: Option<String>, | |
2378 | } | |
2379 | ||
2380 | impl<'a> Drop for UrlQuery<'a> { | |
2381 | fn drop(&mut self) { | |
2382 | self.url.restore_already_parsed_fragment(self.fragment.take()) | |
2383 | } | |
2384 | } | |
2385 | ||
2386 | ||
2387 | /// Define a new struct | |
2388 | /// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait, | |
2389 | /// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html) | |
2390 | /// and related functions. | |
2391 | /// | |
2392 | /// Parameters are characters to include in the set in addition to those of the base set. | |
2393 | /// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set). | |
2394 | /// | |
2395 | /// Example | |
2396 | /// ======= | |
2397 | /// | |
2398 | /// ```rust | |
2399 | /// #[macro_use] extern crate url; | |
2400 | /// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET}; | |
2401 | /// define_encode_set! { | |
2402 | /// /// This encode set is used in the URL parser for query strings. | |
2403 | /// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'} | |
2404 | /// } | |
2405 | /// # fn main() { | |
2406 | /// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(), "foo%20bar"); | |
2407 | /// # } | |
2408 | /// ``` | |
2409 | #[macro_export] | |
2410 | macro_rules! define_encode_set { | |
2411 | ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => { | |
2412 | $(#[$attr])* | |
2413 | #[derive(Copy, Clone)] | |
2414 | #[allow(non_camel_case_types)] | |
2415 | pub struct $name; | |
2416 | ||
2417 | impl $crate::percent_encoding::EncodeSet for $name { | |
2418 | #[inline] | |
2419 | fn contains(&self, byte: u8) -> bool { | |
2420 | match byte as char { | |
2421 | $( | |
2422 | $ch => true, | |
2423 | )* | |
2424 | _ => $base_set.contains(byte) | |
2425 | } | |
2426 | } | |
2427 | } | |
2428 | } | |
2429 | } |