]> git.proxmox.com Git - rustc.git/blob - vendor/gix-url/src/parse.rs
New upstream version 1.70.0+dfsg2
[rustc.git] / vendor / gix-url / src / parse.rs
1 use std::{borrow::Cow, convert::Infallible};
2
3 pub use bstr;
4 use bstr::{BStr, BString, ByteSlice};
5
6 use crate::Scheme;
7
8 /// The Error returned by [`parse()`]
9 #[derive(Debug, thiserror::Error)]
10 #[allow(missing_docs)]
11 pub enum Error {
12 #[error("Could not decode URL as UTF8")]
13 Utf8(#[from] std::str::Utf8Error),
14 #[error(transparent)]
15 Url(#[from] url::ParseError),
16 #[error("URLs need to specify the path to the repository")]
17 MissingResourceLocation,
18 #[error("file URLs require an absolute or relative path to the repository")]
19 MissingRepositoryPath,
20 #[error("\"{url}\" is not a valid local path")]
21 NotALocalFile { url: BString },
22 #[error("Relative URLs are not permitted: {url:?}")]
23 RelativeUrl { url: String },
24 }
25
26 impl From<Infallible> for Error {
27 fn from(_: Infallible) -> Self {
28 unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this")
29 }
30 }
31
32 fn str_to_protocol(s: &str) -> Scheme {
33 Scheme::from(s)
34 }
35
36 fn guess_protocol(url: &[u8]) -> Option<&str> {
37 match url.find_byte(b':') {
38 Some(colon_pos) => {
39 if url[..colon_pos].find_byteset(b"@.").is_some() {
40 "ssh"
41 } else {
42 url.get(colon_pos + 1..).and_then(|from_colon| {
43 (from_colon.contains(&b'/') || from_colon.contains(&b'\\')).then_some("file")
44 })?
45 }
46 }
47 None => "file",
48 }
49 .into()
50 }
51
52 /// Extract the path part from an SCP-like URL `[user@]host.xz:path/to/repo.git/`
53 fn extract_scp_path(url: &str) -> Option<&str> {
54 url.splitn(2, ':').last()
55 }
56
57 fn sanitize_for_protocol<'a>(protocol: &str, url: &'a str) -> Cow<'a, str> {
58 match protocol {
59 "ssh" => url.replacen(':', "/", 1).into(),
60 _ => url.into(),
61 }
62 }
63
64 fn has_no_explicit_protocol(url: &[u8]) -> bool {
65 url.find(b"://").is_none()
66 }
67
68 fn to_owned_url(url: url::Url) -> Result<crate::Url, Error> {
69 Ok(crate::Url {
70 serialize_alternative_form: false,
71 scheme: str_to_protocol(url.scheme()),
72 user: if url.username().is_empty() {
73 None
74 } else {
75 Some(url.username().into())
76 },
77 host: url.host_str().map(Into::into),
78 port: url.port(),
79 path: url.path().into(),
80 })
81 }
82
83 /// Parse the given `bytes` as git url.
84 ///
85 /// # Note
86 ///
87 /// We cannot and should never have to deal with UTF-16 encoded windows strings, so bytes input is acceptable.
88 /// For file-paths, we don't expect UTF8 encoding either.
89 pub fn parse(input: &BStr) -> Result<crate::Url, Error> {
90 let guessed_protocol = guess_protocol(input).ok_or_else(|| Error::NotALocalFile { url: input.into() })?;
91 let path_without_file_protocol = input.strip_prefix(b"file://");
92 if path_without_file_protocol.is_some() || (has_no_explicit_protocol(input) && guessed_protocol == "file") {
93 let path: BString = path_without_file_protocol
94 .map(|stripped_path| {
95 #[cfg(windows)]
96 {
97 if stripped_path.starts_with(b"/") {
98 input
99 .to_str()
100 .ok()
101 .and_then(|url| {
102 let path = url::Url::parse(url).ok()?.to_file_path().ok()?;
103 path.is_absolute().then(|| gix_path::into_bstr(path).into_owned())
104 })
105 .unwrap_or_else(|| stripped_path.into())
106 } else {
107 stripped_path.into()
108 }
109 }
110 #[cfg(not(windows))]
111 {
112 stripped_path.into()
113 }
114 })
115 .unwrap_or_else(|| input.into());
116 if path.is_empty() {
117 return Err(Error::MissingRepositoryPath);
118 }
119 let input_starts_with_file_protocol = input.starts_with(b"file://");
120 if input_starts_with_file_protocol {
121 let wanted = cfg!(windows).then(|| &[b'\\', b'/'] as &[_]).unwrap_or(&[b'/']);
122 if !wanted.iter().any(|w| path.contains(w)) {
123 return Err(Error::MissingRepositoryPath);
124 }
125 }
126 return Ok(crate::Url {
127 scheme: Scheme::File,
128 path,
129 serialize_alternative_form: !input_starts_with_file_protocol,
130 ..Default::default()
131 });
132 }
133
134 let url_str = std::str::from_utf8(input)?;
135 let (mut url, mut scp_path) = match url::Url::parse(url_str) {
136 Ok(url) => (url, None),
137 Err(url::ParseError::RelativeUrlWithoutBase) => {
138 // happens with bare paths as well as scp like paths. The latter contain a ':' past the host portion,
139 // which we are trying to detect.
140 (
141 url::Url::parse(&format!(
142 "{}://{}",
143 guessed_protocol,
144 sanitize_for_protocol(guessed_protocol, url_str)
145 ))?,
146 extract_scp_path(url_str),
147 )
148 }
149 Err(err) => return Err(err.into()),
150 };
151 // SCP like URLs without user parse as 'something' with the scheme being the 'host'. Hosts always have dots.
152 if url.scheme().find('.').is_some() {
153 // try again with prefixed protocol
154 url = url::Url::parse(&format!("ssh://{}", sanitize_for_protocol("ssh", url_str)))?;
155 scp_path = extract_scp_path(url_str);
156 }
157 if url.path().is_empty() && ["ssh", "git"].contains(&url.scheme()) {
158 return Err(Error::MissingResourceLocation);
159 }
160 if url.cannot_be_a_base() {
161 return Err(Error::RelativeUrl { url: url.into() });
162 }
163
164 let mut url = to_owned_url(url)?;
165 if let Some(path) = scp_path {
166 url.path = path.into();
167 url.serialize_alternative_form = true;
168 }
169 Ok(url)
170 }