]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | // Copyright 2012 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! Types/fns concerning URLs (see RFC 3986) | |
12 | ||
970d7e83 LB |
13 | #[allow(missing_doc)]; |
14 | ||
15 | ||
16 | use std::cmp::Eq; | |
17 | use std::io::{Reader, ReaderUtil}; | |
18 | use std::io; | |
19 | use std::hashmap::HashMap; | |
20 | use std::to_bytes; | |
21 | use std::uint; | |
22 | ||
23 | #[deriving(Clone, Eq)] | |
223e47cc LB |
24 | struct Url { |
25 | scheme: ~str, | |
26 | user: Option<UserInfo>, | |
27 | host: ~str, | |
28 | port: Option<~str>, | |
29 | path: ~str, | |
30 | query: Query, | |
31 | fragment: Option<~str> | |
32 | } | |
33 | ||
970d7e83 | 34 | #[deriving(Clone, Eq)] |
223e47cc LB |
35 | struct UserInfo { |
36 | user: ~str, | |
37 | pass: Option<~str> | |
38 | } | |
39 | ||
40 | pub type Query = ~[(~str, ~str)]; | |
41 | ||
970d7e83 LB |
42 | impl Url { |
43 | pub fn new(scheme: ~str, | |
44 | user: Option<UserInfo>, | |
45 | host: ~str, | |
46 | port: Option<~str>, | |
47 | path: ~str, | |
48 | query: Query, | |
49 | fragment: Option<~str>) | |
50 | -> Url { | |
223e47cc LB |
51 | Url { |
52 | scheme: scheme, | |
53 | user: user, | |
54 | host: host, | |
55 | port: port, | |
56 | path: path, | |
57 | query: query, | |
58 | fragment: fragment, | |
59 | } | |
60 | } | |
61 | } | |
62 | ||
970d7e83 LB |
63 | impl UserInfo { |
64 | pub fn new(user: ~str, pass: Option<~str>) -> UserInfo { | |
223e47cc LB |
65 | UserInfo { user: user, pass: pass } |
66 | } | |
67 | } | |
68 | ||
69 | fn encode_inner(s: &str, full_url: bool) -> ~str { | |
70 | do io::with_str_reader(s) |rdr| { | |
71 | let mut out = ~""; | |
72 | ||
73 | while !rdr.eof() { | |
74 | let ch = rdr.read_byte() as char; | |
75 | match ch { | |
76 | // unreserved: | |
77 | 'A' .. 'Z' | | |
78 | 'a' .. 'z' | | |
79 | '0' .. '9' | | |
80 | '-' | '.' | '_' | '~' => { | |
970d7e83 | 81 | out.push_char(ch); |
223e47cc LB |
82 | } |
83 | _ => { | |
84 | if full_url { | |
85 | match ch { | |
86 | // gen-delims: | |
87 | ':' | '/' | '?' | '#' | '[' | ']' | '@' | | |
88 | ||
89 | // sub-delims: | |
90 | '!' | '$' | '&' | '"' | '(' | ')' | '*' | | |
91 | '+' | ',' | ';' | '=' => { | |
970d7e83 | 92 | out.push_char(ch); |
223e47cc LB |
93 | } |
94 | ||
970d7e83 | 95 | _ => out.push_str(fmt!("%%%X", ch as uint)) |
223e47cc LB |
96 | } |
97 | } else { | |
970d7e83 | 98 | out.push_str(fmt!("%%%X", ch as uint)); |
223e47cc LB |
99 | } |
100 | } | |
101 | } | |
102 | } | |
103 | ||
104 | out | |
105 | } | |
106 | } | |
107 | ||
108 | /** | |
109 | * Encodes a URI by replacing reserved characters with percent encoded | |
110 | * character sequences. | |
111 | * | |
112 | * This function is compliant with RFC 3986. | |
113 | */ | |
114 | pub fn encode(s: &str) -> ~str { | |
970d7e83 | 115 | encode_inner(s, true) |
223e47cc LB |
116 | } |
117 | ||
118 | /** | |
119 | * Encodes a URI component by replacing reserved characters with percent | |
120 | * encoded character sequences. | |
121 | * | |
122 | * This function is compliant with RFC 3986. | |
123 | */ | |
124 | ||
125 | pub fn encode_component(s: &str) -> ~str { | |
970d7e83 | 126 | encode_inner(s, false) |
223e47cc LB |
127 | } |
128 | ||
129 | fn decode_inner(s: &str, full_url: bool) -> ~str { | |
130 | do io::with_str_reader(s) |rdr| { | |
131 | let mut out = ~""; | |
132 | ||
133 | while !rdr.eof() { | |
134 | match rdr.read_char() { | |
135 | '%' => { | |
136 | let bytes = rdr.read_bytes(2u); | |
137 | let ch = uint::parse_bytes(bytes, 16u).get() as char; | |
138 | ||
139 | if full_url { | |
140 | // Only decode some characters: | |
141 | match ch { | |
142 | // gen-delims: | |
143 | ':' | '/' | '?' | '#' | '[' | ']' | '@' | | |
144 | ||
145 | // sub-delims: | |
146 | '!' | '$' | '&' | '"' | '(' | ')' | '*' | | |
147 | '+' | ',' | ';' | '=' => { | |
970d7e83 LB |
148 | out.push_char('%'); |
149 | out.push_char(bytes[0u] as char); | |
150 | out.push_char(bytes[1u] as char); | |
223e47cc LB |
151 | } |
152 | ||
970d7e83 | 153 | ch => out.push_char(ch) |
223e47cc LB |
154 | } |
155 | } else { | |
970d7e83 | 156 | out.push_char(ch); |
223e47cc LB |
157 | } |
158 | } | |
970d7e83 | 159 | ch => out.push_char(ch) |
223e47cc LB |
160 | } |
161 | } | |
162 | ||
163 | out | |
164 | } | |
165 | } | |
166 | ||
167 | /** | |
168 | * Decode a string encoded with percent encoding. | |
169 | * | |
170 | * This will only decode escape sequences generated by encode. | |
171 | */ | |
172 | pub fn decode(s: &str) -> ~str { | |
970d7e83 | 173 | decode_inner(s, true) |
223e47cc LB |
174 | } |
175 | ||
176 | /** | |
177 | * Decode a string encoded with percent encoding. | |
178 | */ | |
179 | pub fn decode_component(s: &str) -> ~str { | |
970d7e83 | 180 | decode_inner(s, false) |
223e47cc LB |
181 | } |
182 | ||
183 | fn encode_plus(s: &str) -> ~str { | |
184 | do io::with_str_reader(s) |rdr| { | |
185 | let mut out = ~""; | |
186 | ||
187 | while !rdr.eof() { | |
188 | let ch = rdr.read_byte() as char; | |
189 | match ch { | |
190 | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '.' | '-' => { | |
970d7e83 | 191 | out.push_char(ch); |
223e47cc | 192 | } |
970d7e83 LB |
193 | ' ' => out.push_char('+'), |
194 | _ => out.push_str(fmt!("%%%X", ch as uint)) | |
223e47cc LB |
195 | } |
196 | } | |
197 | ||
198 | out | |
199 | } | |
200 | } | |
201 | ||
202 | /** | |
203 | * Encode a hashmap to the 'application/x-www-form-urlencoded' media type. | |
204 | */ | |
970d7e83 | 205 | pub fn encode_form_urlencoded(m: &HashMap<~str, ~[~str]>) -> ~str { |
223e47cc LB |
206 | let mut out = ~""; |
207 | let mut first = true; | |
208 | ||
970d7e83 | 209 | for m.iter().advance |(key, values)| { |
223e47cc LB |
210 | let key = encode_plus(*key); |
211 | ||
970d7e83 | 212 | for values.iter().advance |value| { |
223e47cc LB |
213 | if first { |
214 | first = false; | |
215 | } else { | |
970d7e83 | 216 | out.push_char('&'); |
223e47cc LB |
217 | first = false; |
218 | } | |
219 | ||
970d7e83 | 220 | out.push_str(fmt!("%s=%s", key, encode_plus(*value))); |
223e47cc LB |
221 | } |
222 | } | |
223 | ||
224 | out | |
225 | } | |
226 | ||
227 | /** | |
228 | * Decode a string encoded with the 'application/x-www-form-urlencoded' media | |
229 | * type into a hashmap. | |
230 | */ | |
970d7e83 | 231 | pub fn decode_form_urlencoded(s: &[u8]) -> HashMap<~str, ~[~str]> { |
223e47cc | 232 | do io::with_bytes_reader(s) |rdr| { |
970d7e83 | 233 | let mut m = HashMap::new(); |
223e47cc LB |
234 | let mut key = ~""; |
235 | let mut value = ~""; | |
236 | let mut parsing_key = true; | |
237 | ||
238 | while !rdr.eof() { | |
239 | match rdr.read_char() { | |
240 | '&' | ';' => { | |
241 | if key != ~"" && value != ~"" { | |
242 | let mut values = match m.pop(&key) { | |
243 | Some(values) => values, | |
244 | None => ~[], | |
245 | }; | |
246 | ||
247 | values.push(value); | |
248 | m.insert(key, values); | |
249 | } | |
250 | ||
251 | parsing_key = true; | |
252 | key = ~""; | |
253 | value = ~""; | |
254 | } | |
255 | '=' => parsing_key = false, | |
256 | ch => { | |
257 | let ch = match ch { | |
258 | '%' => { | |
259 | let bytes = rdr.read_bytes(2u); | |
260 | uint::parse_bytes(bytes, 16u).get() as char | |
261 | } | |
262 | '+' => ' ', | |
263 | ch => ch | |
264 | }; | |
265 | ||
266 | if parsing_key { | |
970d7e83 | 267 | key.push_char(ch) |
223e47cc | 268 | } else { |
970d7e83 | 269 | value.push_char(ch) |
223e47cc LB |
270 | } |
271 | } | |
272 | } | |
273 | } | |
274 | ||
275 | if key != ~"" && value != ~"" { | |
276 | let mut values = match m.pop(&key) { | |
277 | Some(values) => values, | |
278 | None => ~[], | |
279 | }; | |
280 | ||
281 | values.push(value); | |
282 | m.insert(key, values); | |
283 | } | |
284 | ||
285 | m | |
286 | } | |
287 | } | |
288 | ||
289 | ||
290 | fn split_char_first(s: &str, c: char) -> (~str, ~str) { | |
970d7e83 | 291 | let len = s.len(); |
223e47cc LB |
292 | let mut index = len; |
293 | let mut mat = 0; | |
970d7e83 LB |
294 | do io::with_str_reader(s) |rdr| { |
295 | let mut ch; | |
296 | while !rdr.eof() { | |
297 | ch = rdr.read_byte() as char; | |
298 | if ch == c { | |
299 | // found a match, adjust markers | |
300 | index = rdr.tell()-1; | |
301 | mat = 1; | |
302 | break; | |
223e47cc LB |
303 | } |
304 | } | |
305 | } | |
306 | if index+mat == len { | |
970d7e83 | 307 | return (s.slice(0, index).to_owned(), ~""); |
223e47cc | 308 | } else { |
970d7e83 LB |
309 | return (s.slice(0, index).to_owned(), |
310 | s.slice(index + mat, s.len()).to_owned()); | |
223e47cc LB |
311 | } |
312 | } | |
313 | ||
314 | fn userinfo_from_str(uinfo: &str) -> UserInfo { | |
315 | let (user, p) = split_char_first(uinfo, ':'); | |
970d7e83 | 316 | let pass = if p.is_empty() { |
223e47cc LB |
317 | None |
318 | } else { | |
319 | Some(p) | |
320 | }; | |
321 | return UserInfo::new(user, pass); | |
322 | } | |
323 | ||
324 | fn userinfo_to_str(userinfo: &UserInfo) -> ~str { | |
325 | match userinfo.pass { | |
326 | Some(ref pass) => fmt!("%s:%s@", userinfo.user, *pass), | |
327 | None => fmt!("%s@", userinfo.user), | |
328 | } | |
329 | } | |
330 | ||
331 | fn query_from_str(rawquery: &str) -> Query { | |
332 | let mut query: Query = ~[]; | |
970d7e83 LB |
333 | if !rawquery.is_empty() { |
334 | for rawquery.split_iter('&').advance |p| { | |
223e47cc | 335 | let (k, v) = split_char_first(p, '='); |
970d7e83 | 336 | query.push((decode_component(k), decode_component(v))); |
223e47cc LB |
337 | }; |
338 | } | |
339 | return query; | |
340 | } | |
341 | ||
342 | pub fn query_to_str(query: &Query) -> ~str { | |
970d7e83 LB |
343 | let mut strvec = ~[]; |
344 | for query.iter().advance |kv| { | |
345 | match kv { | |
346 | &(ref k, ref v) => { | |
347 | strvec.push(fmt!("%s=%s", | |
348 | encode_component(*k), | |
349 | encode_component(*v)) | |
350 | ); | |
223e47cc LB |
351 | } |
352 | } | |
223e47cc | 353 | } |
970d7e83 | 354 | return strvec.connect("&"); |
223e47cc LB |
355 | } |
356 | ||
357 | // returns the scheme and the rest of the url, or a parsing error | |
358 | pub fn get_scheme(rawurl: &str) -> Result<(~str, ~str), ~str> { | |
970d7e83 | 359 | for rawurl.iter().enumerate().advance |(i,c)| { |
223e47cc LB |
360 | match c { |
361 | 'A' .. 'Z' | 'a' .. 'z' => loop, | |
362 | '0' .. '9' | '+' | '-' | '.' => { | |
363 | if i == 0 { | |
364 | return Err(~"url: Scheme must begin with a letter."); | |
365 | } | |
366 | loop; | |
367 | } | |
368 | ':' => { | |
369 | if i == 0 { | |
370 | return Err(~"url: Scheme cannot be empty."); | |
371 | } else { | |
372 | return Ok((rawurl.slice(0,i).to_owned(), | |
970d7e83 | 373 | rawurl.slice(i+1,rawurl.len()).to_owned())); |
223e47cc LB |
374 | } |
375 | } | |
376 | _ => { | |
377 | return Err(~"url: Invalid character in scheme."); | |
378 | } | |
379 | } | |
380 | }; | |
381 | return Err(~"url: Scheme must be terminated with a colon."); | |
382 | } | |
383 | ||
970d7e83 | 384 | #[deriving(Clone, Eq)] |
223e47cc LB |
385 | enum Input { |
386 | Digit, // all digits | |
387 | Hex, // digits and letters a-f | |
388 | Unreserved // all other legal characters | |
389 | } | |
390 | ||
391 | // returns userinfo, host, port, and unparsed part, or an error | |
392 | fn get_authority(rawurl: &str) -> | |
393 | Result<(Option<UserInfo>, ~str, Option<~str>, ~str), ~str> { | |
970d7e83 | 394 | if !rawurl.starts_with("//") { |
223e47cc LB |
395 | // there is no authority. |
396 | return Ok((None, ~"", None, rawurl.to_str())); | |
397 | } | |
398 | ||
399 | enum State { | |
400 | Start, // starting state | |
401 | PassHostPort, // could be in user or port | |
402 | Ip6Port, // either in ipv6 host or port | |
403 | Ip6Host, // are in an ipv6 host | |
404 | InHost, // are in a host - may be ipv6, but don't know yet | |
405 | InPort // are in port | |
406 | } | |
407 | ||
408 | let len = rawurl.len(); | |
409 | let mut st = Start; | |
410 | let mut in = Digit; // most restricted, start here. | |
411 | ||
412 | let mut userinfo = None; | |
413 | let mut host = ~""; | |
414 | let mut port = None; | |
415 | ||
416 | let mut colon_count = 0; | |
970d7e83 LB |
417 | let mut pos = 0; |
418 | let mut begin = 2; | |
419 | let mut end = len; | |
223e47cc | 420 | |
970d7e83 | 421 | for rawurl.iter().enumerate().advance |(i,c)| { |
223e47cc LB |
422 | if i < 2 { loop; } // ignore the leading // |
423 | ||
424 | // deal with input class first | |
425 | match c { | |
426 | '0' .. '9' => (), | |
427 | 'A' .. 'F' | 'a' .. 'f' => { | |
428 | if in == Digit { | |
429 | in = Hex; | |
430 | } | |
431 | } | |
432 | 'G' .. 'Z' | 'g' .. 'z' | '-' | '.' | '_' | '~' | '%' | | |
433 | '&' |'\'' | '(' | ')' | '+' | '!' | '*' | ',' | ';' | '=' => { | |
434 | in = Unreserved; | |
435 | } | |
436 | ':' | '@' | '?' | '#' | '/' => { | |
437 | // separators, don't change anything | |
438 | } | |
439 | _ => { | |
440 | return Err(~"Illegal character in authority"); | |
441 | } | |
442 | } | |
443 | ||
444 | // now process states | |
445 | match c { | |
446 | ':' => { | |
447 | colon_count += 1; | |
448 | match st { | |
449 | Start => { | |
450 | pos = i; | |
451 | st = PassHostPort; | |
452 | } | |
453 | PassHostPort => { | |
454 | // multiple colons means ipv6 address. | |
455 | if in == Unreserved { | |
456 | return Err( | |
457 | ~"Illegal characters in IPv6 address."); | |
458 | } | |
459 | st = Ip6Host; | |
460 | } | |
461 | InHost => { | |
462 | pos = i; | |
463 | // can't be sure whether this is an ipv6 address or a port | |
464 | if in == Unreserved { | |
465 | return Err(~"Illegal characters in authority."); | |
466 | } | |
467 | st = Ip6Port; | |
468 | } | |
469 | Ip6Port => { | |
470 | if in == Unreserved { | |
471 | return Err(~"Illegal characters in authority."); | |
472 | } | |
473 | st = Ip6Host; | |
474 | } | |
475 | Ip6Host => { | |
476 | if colon_count > 7 { | |
970d7e83 | 477 | host = rawurl.slice(begin, i).to_owned(); |
223e47cc LB |
478 | pos = i; |
479 | st = InPort; | |
480 | } | |
481 | } | |
482 | _ => { | |
483 | return Err(~"Invalid ':' in authority."); | |
484 | } | |
485 | } | |
486 | in = Digit; // reset input class | |
487 | } | |
488 | ||
489 | '@' => { | |
490 | in = Digit; // reset input class | |
491 | colon_count = 0; // reset count | |
492 | match st { | |
493 | Start => { | |
970d7e83 | 494 | let user = rawurl.slice(begin, i).to_owned(); |
223e47cc LB |
495 | userinfo = Some(UserInfo::new(user, None)); |
496 | st = InHost; | |
497 | } | |
498 | PassHostPort => { | |
970d7e83 LB |
499 | let user = rawurl.slice(begin, pos).to_owned(); |
500 | let pass = rawurl.slice(pos+1, i).to_owned(); | |
223e47cc LB |
501 | userinfo = Some(UserInfo::new(user, Some(pass))); |
502 | st = InHost; | |
503 | } | |
504 | _ => { | |
505 | return Err(~"Invalid '@' in authority."); | |
506 | } | |
507 | } | |
508 | begin = i+1; | |
509 | } | |
510 | ||
511 | '?' | '#' | '/' => { | |
512 | end = i; | |
513 | break; | |
514 | } | |
515 | _ => () | |
516 | } | |
517 | end = i; | |
518 | } | |
519 | ||
520 | let end = end; // make end immutable so it can be captured | |
521 | ||
522 | let host_is_end_plus_one: &fn() -> bool = || { | |
970d7e83 | 523 | let xs = ['?', '#', '/']; |
223e47cc | 524 | end+1 == len |
970d7e83 | 525 | && !xs.iter().any_(|x| *x == (rawurl[end] as char)) |
223e47cc LB |
526 | }; |
527 | ||
528 | // finish up | |
529 | match st { | |
530 | Start => { | |
531 | if host_is_end_plus_one() { | |
970d7e83 | 532 | host = rawurl.slice(begin, end+1).to_owned(); |
223e47cc | 533 | } else { |
970d7e83 | 534 | host = rawurl.slice(begin, end).to_owned(); |
223e47cc LB |
535 | } |
536 | } | |
537 | PassHostPort | Ip6Port => { | |
538 | if in != Digit { | |
539 | return Err(~"Non-digit characters in port."); | |
540 | } | |
970d7e83 LB |
541 | host = rawurl.slice(begin, pos).to_owned(); |
542 | port = Some(rawurl.slice(pos+1, end).to_owned()); | |
223e47cc LB |
543 | } |
544 | Ip6Host | InHost => { | |
970d7e83 | 545 | host = rawurl.slice(begin, end).to_owned(); |
223e47cc LB |
546 | } |
547 | InPort => { | |
548 | if in != Digit { | |
549 | return Err(~"Non-digit characters in port."); | |
550 | } | |
970d7e83 | 551 | port = Some(rawurl.slice(pos+1, end).to_owned()); |
223e47cc LB |
552 | } |
553 | } | |
554 | ||
555 | let rest = if host_is_end_plus_one() { ~"" } | |
970d7e83 | 556 | else { rawurl.slice(end, len).to_owned() }; |
223e47cc LB |
557 | return Ok((userinfo, host, port, rest)); |
558 | } | |
559 | ||
560 | ||
561 | // returns the path and unparsed part of url, or an error | |
562 | fn get_path(rawurl: &str, authority: bool) -> | |
563 | Result<(~str, ~str), ~str> { | |
970d7e83 | 564 | let len = rawurl.len(); |
223e47cc | 565 | let mut end = len; |
970d7e83 | 566 | for rawurl.iter().enumerate().advance |(i,c)| { |
223e47cc LB |
567 | match c { |
568 | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '&' |'\'' | '(' | ')' | '.' | |
569 | | '@' | ':' | '%' | '/' | '+' | '!' | '*' | ',' | ';' | '=' | |
570 | | '_' | '-' => { | |
571 | loop; | |
572 | } | |
573 | '?' | '#' => { | |
574 | end = i; | |
575 | break; | |
576 | } | |
577 | _ => return Err(~"Invalid character in path.") | |
578 | } | |
579 | } | |
580 | ||
581 | if authority { | |
970d7e83 | 582 | if end != 0 && !rawurl.starts_with("/") { |
223e47cc LB |
583 | return Err(~"Non-empty path must begin with\ |
584 | '/' in presence of authority."); | |
585 | } | |
586 | } | |
587 | ||
970d7e83 LB |
588 | return Ok((decode_component(rawurl.slice(0, end)), |
589 | rawurl.slice(end, len).to_owned())); | |
223e47cc LB |
590 | } |
591 | ||
592 | // returns the parsed query and the fragment, if present | |
593 | fn get_query_fragment(rawurl: &str) -> | |
594 | Result<(Query, Option<~str>), ~str> { | |
970d7e83 LB |
595 | if !rawurl.starts_with("?") { |
596 | if rawurl.starts_with("#") { | |
597 | let f = decode_component(rawurl.slice( | |
223e47cc | 598 | 1, |
970d7e83 | 599 | rawurl.len())); |
223e47cc LB |
600 | return Ok((~[], Some(f))); |
601 | } else { | |
602 | return Ok((~[], None)); | |
603 | } | |
604 | } | |
970d7e83 LB |
605 | let (q, r) = split_char_first(rawurl.slice(1, rawurl.len()), '#'); |
606 | let f = if r.len() != 0 { | |
223e47cc LB |
607 | Some(decode_component(r)) } else { None }; |
608 | return Ok((query_from_str(q), f)); | |
609 | } | |
610 | ||
611 | /** | |
612 | * Parse a `str` to a `url` | |
613 | * | |
614 | * # Arguments | |
615 | * | |
616 | * `rawurl` - a string representing a full url, including scheme. | |
617 | * | |
618 | * # Returns | |
619 | * | |
620 | * a `url` that contains the parsed representation of the url. | |
621 | * | |
622 | */ | |
623 | ||
624 | pub fn from_str(rawurl: &str) -> Result<Url, ~str> { | |
625 | // scheme | |
626 | let (scheme, rest) = match get_scheme(rawurl) { | |
627 | Ok(val) => val, | |
628 | Err(e) => return Err(e), | |
629 | }; | |
630 | ||
631 | // authority | |
632 | let (userinfo, host, port, rest) = match get_authority(rest) { | |
633 | Ok(val) => val, | |
634 | Err(e) => return Err(e), | |
635 | }; | |
636 | ||
637 | // path | |
638 | let has_authority = if host == ~"" { false } else { true }; | |
639 | let (path, rest) = match get_path(rest, has_authority) { | |
640 | Ok(val) => val, | |
641 | Err(e) => return Err(e), | |
642 | }; | |
643 | ||
644 | // query and fragment | |
645 | let (query, fragment) = match get_query_fragment(rest) { | |
646 | Ok(val) => val, | |
647 | Err(e) => return Err(e), | |
648 | }; | |
649 | ||
650 | Ok(Url::new(scheme, userinfo, host, port, path, query, fragment)) | |
651 | } | |
652 | ||
653 | impl FromStr for Url { | |
654 | fn from_str(s: &str) -> Option<Url> { | |
655 | match from_str(s) { | |
656 | Ok(url) => Some(url), | |
657 | Err(_) => None | |
658 | } | |
659 | } | |
660 | } | |
661 | ||
662 | /** | |
663 | * Format a `url` as a string | |
664 | * | |
665 | * # Arguments | |
666 | * | |
667 | * `url` - a url. | |
668 | * | |
669 | * # Returns | |
670 | * | |
671 | * a `str` that contains the formatted url. Note that this will usually | |
672 | * be an inverse of `from_str` but might strip out unneeded separators. | |
673 | * for example, "http://somehost.com?", when parsed and formatted, will | |
674 | * result in just "http://somehost.com". | |
675 | * | |
676 | */ | |
677 | pub fn to_str(url: &Url) -> ~str { | |
678 | let user = match url.user { | |
679 | Some(ref user) => userinfo_to_str(user), | |
680 | None => ~"", | |
681 | }; | |
682 | ||
683 | let authority = if url.host.is_empty() { | |
684 | ~"" | |
685 | } else { | |
686 | fmt!("//%s%s", user, url.host) | |
687 | }; | |
688 | ||
689 | let query = if url.query.is_empty() { | |
690 | ~"" | |
691 | } else { | |
692 | fmt!("?%s", query_to_str(&url.query)) | |
693 | }; | |
694 | ||
695 | let fragment = match url.fragment { | |
696 | Some(ref fragment) => fmt!("#%s", encode_component(*fragment)), | |
697 | None => ~"", | |
698 | }; | |
699 | ||
700 | fmt!("%s:%s%s%s%s", url.scheme, authority, url.path, query, fragment) | |
701 | } | |
702 | ||
970d7e83 | 703 | impl ToStr for Url { |
223e47cc LB |
704 | pub fn to_str(&self) -> ~str { |
705 | to_str(self) | |
706 | } | |
707 | } | |
708 | ||
970d7e83 LB |
709 | impl IterBytes for Url { |
710 | fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool { | |
223e47cc LB |
711 | self.to_str().iter_bytes(lsb0, f) |
712 | } | |
713 | } | |
714 | ||
715 | // Put a few tests outside of the 'test' module so they can test the internal | |
716 | // functions and those functions don't need 'pub' | |
717 | ||
718 | #[test] | |
719 | fn test_split_char_first() { | |
970d7e83 LB |
720 | let (u,v) = split_char_first("hello, sweet world", ','); |
721 | assert_eq!(u, ~"hello"); | |
722 | assert_eq!(v, ~" sweet world"); | |
223e47cc | 723 | |
970d7e83 LB |
724 | let (u,v) = split_char_first("hello sweet world", ','); |
725 | assert_eq!(u, ~"hello sweet world"); | |
726 | assert_eq!(v, ~""); | |
223e47cc LB |
727 | } |
728 | ||
729 | #[test] | |
730 | fn test_get_authority() { | |
731 | let (u, h, p, r) = get_authority( | |
732 | "//user:pass@rust-lang.org/something").unwrap(); | |
970d7e83 LB |
733 | assert_eq!(u, Some(UserInfo::new(~"user", Some(~"pass")))); |
734 | assert_eq!(h, ~"rust-lang.org"); | |
223e47cc | 735 | assert!(p.is_none()); |
970d7e83 | 736 | assert_eq!(r, ~"/something"); |
223e47cc LB |
737 | |
738 | let (u, h, p, r) = get_authority( | |
739 | "//rust-lang.org:8000?something").unwrap(); | |
740 | assert!(u.is_none()); | |
970d7e83 LB |
741 | assert_eq!(h, ~"rust-lang.org"); |
742 | assert_eq!(p, Some(~"8000")); | |
743 | assert_eq!(r, ~"?something"); | |
223e47cc LB |
744 | |
745 | let (u, h, p, r) = get_authority( | |
746 | "//rust-lang.org#blah").unwrap(); | |
747 | assert!(u.is_none()); | |
970d7e83 | 748 | assert_eq!(h, ~"rust-lang.org"); |
223e47cc | 749 | assert!(p.is_none()); |
970d7e83 | 750 | assert_eq!(r, ~"#blah"); |
223e47cc LB |
751 | |
752 | // ipv6 tests | |
753 | let (_, h, _, _) = get_authority( | |
754 | "//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah").unwrap(); | |
970d7e83 | 755 | assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334"); |
223e47cc LB |
756 | |
757 | let (_, h, p, _) = get_authority( | |
758 | "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah").unwrap(); | |
970d7e83 LB |
759 | assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334"); |
760 | assert_eq!(p, Some(~"8000")); | |
223e47cc LB |
761 | |
762 | let (u, h, p, _) = get_authority( | |
763 | "//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah" | |
764 | ).unwrap(); | |
970d7e83 LB |
765 | assert_eq!(u, Some(UserInfo::new(~"us", Some(~"p")))); |
766 | assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334"); | |
767 | assert_eq!(p, Some(~"8000")); | |
223e47cc LB |
768 | |
769 | // invalid authorities; | |
770 | assert!(get_authority("//user:pass@rust-lang:something").is_err()); | |
771 | assert!(get_authority("//user@rust-lang:something:/path").is_err()); | |
772 | assert!(get_authority( | |
773 | "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a").is_err()); | |
774 | assert!(get_authority( | |
775 | "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00").is_err()); | |
776 | ||
777 | // these parse as empty, because they don't start with '//' | |
970d7e83 LB |
778 | let (_, h, _, _) = get_authority("user:pass@rust-lang").unwrap(); |
779 | assert_eq!(h, ~""); | |
780 | let (_, h, _, _) = get_authority("rust-lang.org").unwrap(); | |
781 | assert_eq!(h, ~""); | |
223e47cc LB |
782 | } |
783 | ||
784 | #[test] | |
785 | fn test_get_path() { | |
786 | let (p, r) = get_path("/something+%20orother", true).unwrap(); | |
970d7e83 LB |
787 | assert_eq!(p, ~"/something+ orother"); |
788 | assert_eq!(r, ~""); | |
223e47cc | 789 | let (p, r) = get_path("test@email.com#fragment", false).unwrap(); |
970d7e83 LB |
790 | assert_eq!(p, ~"test@email.com"); |
791 | assert_eq!(r, ~"#fragment"); | |
792 | let (p, r) = get_path("/gen/:addr=?q=v", false).unwrap(); | |
793 | assert_eq!(p, ~"/gen/:addr="); | |
794 | assert_eq!(r, ~"?q=v"); | |
223e47cc LB |
795 | |
796 | //failure cases | |
970d7e83 | 797 | assert!(get_path("something?q", true).is_err()); |
223e47cc LB |
798 | } |
799 | ||
800 | #[cfg(test)] | |
801 | mod tests { | |
223e47cc LB |
802 | |
803 | use net_url::*; | |
804 | ||
970d7e83 | 805 | use std::hashmap::HashMap; |
223e47cc LB |
806 | |
807 | #[test] | |
970d7e83 | 808 | fn test_url_parse() { |
223e47cc LB |
809 | let url = ~"http://user:pass@rust-lang.org/doc?s=v#something"; |
810 | ||
811 | let up = from_str(url); | |
812 | let u = up.unwrap(); | |
813 | assert!(u.scheme == ~"http"); | |
814 | let userinfo = u.user.get_ref(); | |
815 | assert!(userinfo.user == ~"user"); | |
816 | assert!(userinfo.pass.get_ref() == &~"pass"); | |
817 | assert!(u.host == ~"rust-lang.org"); | |
818 | assert!(u.path == ~"/doc"); | |
819 | assert!(u.query == ~[(~"s", ~"v")]); | |
820 | assert!(u.fragment.get_ref() == &~"something"); | |
821 | } | |
822 | ||
823 | #[test] | |
970d7e83 | 824 | fn test_url_parse_host_slash() { |
223e47cc LB |
825 | let urlstr = ~"http://0.42.42.42/"; |
826 | let url = from_str(urlstr).unwrap(); | |
827 | assert!(url.host == ~"0.42.42.42"); | |
828 | assert!(url.path == ~"/"); | |
829 | } | |
830 | ||
831 | #[test] | |
970d7e83 | 832 | fn test_url_with_underscores() { |
223e47cc LB |
833 | let urlstr = ~"http://dotcom.com/file_name.html"; |
834 | let url = from_str(urlstr).unwrap(); | |
835 | assert!(url.path == ~"/file_name.html"); | |
836 | } | |
837 | ||
838 | #[test] | |
970d7e83 | 839 | fn test_url_with_dashes() { |
223e47cc LB |
840 | let urlstr = ~"http://dotcom.com/file-name.html"; |
841 | let url = from_str(urlstr).unwrap(); | |
842 | assert!(url.path == ~"/file-name.html"); | |
843 | } | |
844 | ||
845 | #[test] | |
970d7e83 | 846 | fn test_no_scheme() { |
223e47cc LB |
847 | assert!(get_scheme("noschemehere.html").is_err()); |
848 | } | |
849 | ||
850 | #[test] | |
970d7e83 | 851 | fn test_invalid_scheme_errors() { |
223e47cc LB |
852 | assert!(from_str("99://something").is_err()); |
853 | assert!(from_str("://something").is_err()); | |
854 | } | |
855 | ||
856 | #[test] | |
970d7e83 | 857 | fn test_full_url_parse_and_format() { |
223e47cc | 858 | let url = ~"http://user:pass@rust-lang.org/doc?s=v#something"; |
970d7e83 | 859 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
860 | } |
861 | ||
862 | #[test] | |
970d7e83 | 863 | fn test_userless_url_parse_and_format() { |
223e47cc | 864 | let url = ~"http://rust-lang.org/doc?s=v#something"; |
970d7e83 | 865 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
866 | } |
867 | ||
868 | #[test] | |
970d7e83 | 869 | fn test_queryless_url_parse_and_format() { |
223e47cc | 870 | let url = ~"http://user:pass@rust-lang.org/doc#something"; |
970d7e83 | 871 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
872 | } |
873 | ||
874 | #[test] | |
970d7e83 | 875 | fn test_empty_query_url_parse_and_format() { |
223e47cc LB |
876 | let url = ~"http://user:pass@rust-lang.org/doc?#something"; |
877 | let should_be = ~"http://user:pass@rust-lang.org/doc#something"; | |
970d7e83 | 878 | assert_eq!(from_str(url).unwrap().to_str(), should_be); |
223e47cc LB |
879 | } |
880 | ||
881 | #[test] | |
970d7e83 | 882 | fn test_fragmentless_url_parse_and_format() { |
223e47cc | 883 | let url = ~"http://user:pass@rust-lang.org/doc?q=v"; |
970d7e83 | 884 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
885 | } |
886 | ||
887 | #[test] | |
970d7e83 | 888 | fn test_minimal_url_parse_and_format() { |
223e47cc | 889 | let url = ~"http://rust-lang.org/doc"; |
970d7e83 | 890 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
891 | } |
892 | ||
893 | #[test] | |
970d7e83 | 894 | fn test_scheme_host_only_url_parse_and_format() { |
223e47cc | 895 | let url = ~"http://rust-lang.org"; |
970d7e83 | 896 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
897 | } |
898 | ||
899 | #[test] | |
970d7e83 | 900 | fn test_pathless_url_parse_and_format() { |
223e47cc | 901 | let url = ~"http://user:pass@rust-lang.org?q=v#something"; |
970d7e83 | 902 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
903 | } |
904 | ||
905 | #[test] | |
970d7e83 | 906 | fn test_scheme_host_fragment_only_url_parse_and_format() { |
223e47cc | 907 | let url = ~"http://rust-lang.org#something"; |
970d7e83 | 908 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
909 | } |
910 | ||
911 | #[test] | |
970d7e83 | 912 | fn test_url_component_encoding() { |
223e47cc LB |
913 | let url = ~"http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B"; |
914 | let u = from_str(url).unwrap(); | |
915 | assert!(u.path == ~"/doc uments"); | |
916 | assert!(u.query == ~[(~"ba%d ", ~"#&+")]); | |
917 | } | |
918 | ||
919 | #[test] | |
970d7e83 | 920 | fn test_url_without_authority() { |
223e47cc | 921 | let url = ~"mailto:test@email.com"; |
970d7e83 | 922 | assert_eq!(from_str(url).unwrap().to_str(), url); |
223e47cc LB |
923 | } |
924 | ||
925 | #[test] | |
970d7e83 LB |
926 | fn test_encode() { |
927 | assert_eq!(encode(""), ~""); | |
928 | assert_eq!(encode("http://example.com"), ~"http://example.com"); | |
929 | assert_eq!(encode("foo bar% baz"), ~"foo%20bar%25%20baz"); | |
930 | assert_eq!(encode(" "), ~"%20"); | |
931 | assert_eq!(encode("!"), ~"!"); | |
932 | assert_eq!(encode("\""), ~"\""); | |
933 | assert_eq!(encode("#"), ~"#"); | |
934 | assert_eq!(encode("$"), ~"$"); | |
935 | assert_eq!(encode("%"), ~"%25"); | |
936 | assert_eq!(encode("&"), ~"&"); | |
937 | assert_eq!(encode("'"), ~"%27"); | |
938 | assert_eq!(encode("("), ~"("); | |
939 | assert_eq!(encode(")"), ~")"); | |
940 | assert_eq!(encode("*"), ~"*"); | |
941 | assert_eq!(encode("+"), ~"+"); | |
942 | assert_eq!(encode(","), ~","); | |
943 | assert_eq!(encode("/"), ~"/"); | |
944 | assert_eq!(encode(":"), ~":"); | |
945 | assert_eq!(encode(";"), ~";"); | |
946 | assert_eq!(encode("="), ~"="); | |
947 | assert_eq!(encode("?"), ~"?"); | |
948 | assert_eq!(encode("@"), ~"@"); | |
949 | assert_eq!(encode("["), ~"["); | |
950 | assert_eq!(encode("]"), ~"]"); | |
223e47cc LB |
951 | } |
952 | ||
953 | #[test] | |
970d7e83 LB |
954 | fn test_encode_component() { |
955 | assert_eq!(encode_component(""), ~""); | |
223e47cc LB |
956 | assert!(encode_component("http://example.com") == |
957 | ~"http%3A%2F%2Fexample.com"); | |
958 | assert!(encode_component("foo bar% baz") == | |
959 | ~"foo%20bar%25%20baz"); | |
970d7e83 LB |
960 | assert_eq!(encode_component(" "), ~"%20"); |
961 | assert_eq!(encode_component("!"), ~"%21"); | |
962 | assert_eq!(encode_component("#"), ~"%23"); | |
963 | assert_eq!(encode_component("$"), ~"%24"); | |
964 | assert_eq!(encode_component("%"), ~"%25"); | |
965 | assert_eq!(encode_component("&"), ~"%26"); | |
966 | assert_eq!(encode_component("'"), ~"%27"); | |
967 | assert_eq!(encode_component("("), ~"%28"); | |
968 | assert_eq!(encode_component(")"), ~"%29"); | |
969 | assert_eq!(encode_component("*"), ~"%2A"); | |
970 | assert_eq!(encode_component("+"), ~"%2B"); | |
971 | assert_eq!(encode_component(","), ~"%2C"); | |
972 | assert_eq!(encode_component("/"), ~"%2F"); | |
973 | assert_eq!(encode_component(":"), ~"%3A"); | |
974 | assert_eq!(encode_component(";"), ~"%3B"); | |
975 | assert_eq!(encode_component("="), ~"%3D"); | |
976 | assert_eq!(encode_component("?"), ~"%3F"); | |
977 | assert_eq!(encode_component("@"), ~"%40"); | |
978 | assert_eq!(encode_component("["), ~"%5B"); | |
979 | assert_eq!(encode_component("]"), ~"%5D"); | |
223e47cc LB |
980 | } |
981 | ||
982 | #[test] | |
970d7e83 LB |
983 | fn test_decode() { |
984 | assert_eq!(decode(""), ~""); | |
985 | assert_eq!(decode("abc/def 123"), ~"abc/def 123"); | |
986 | assert_eq!(decode("abc%2Fdef%20123"), ~"abc%2Fdef 123"); | |
987 | assert_eq!(decode("%20"), ~" "); | |
988 | assert_eq!(decode("%21"), ~"%21"); | |
989 | assert_eq!(decode("%22"), ~"%22"); | |
990 | assert_eq!(decode("%23"), ~"%23"); | |
991 | assert_eq!(decode("%24"), ~"%24"); | |
992 | assert_eq!(decode("%25"), ~"%"); | |
993 | assert_eq!(decode("%26"), ~"%26"); | |
994 | assert_eq!(decode("%27"), ~"'"); | |
995 | assert_eq!(decode("%28"), ~"%28"); | |
996 | assert_eq!(decode("%29"), ~"%29"); | |
997 | assert_eq!(decode("%2A"), ~"%2A"); | |
998 | assert_eq!(decode("%2B"), ~"%2B"); | |
999 | assert_eq!(decode("%2C"), ~"%2C"); | |
1000 | assert_eq!(decode("%2F"), ~"%2F"); | |
1001 | assert_eq!(decode("%3A"), ~"%3A"); | |
1002 | assert_eq!(decode("%3B"), ~"%3B"); | |
1003 | assert_eq!(decode("%3D"), ~"%3D"); | |
1004 | assert_eq!(decode("%3F"), ~"%3F"); | |
1005 | assert_eq!(decode("%40"), ~"%40"); | |
1006 | assert_eq!(decode("%5B"), ~"%5B"); | |
1007 | assert_eq!(decode("%5D"), ~"%5D"); | |
223e47cc LB |
1008 | } |
1009 | ||
1010 | #[test] | |
970d7e83 LB |
1011 | fn test_decode_component() { |
1012 | assert_eq!(decode_component(""), ~""); | |
1013 | assert_eq!(decode_component("abc/def 123"), ~"abc/def 123"); | |
1014 | assert_eq!(decode_component("abc%2Fdef%20123"), ~"abc/def 123"); | |
1015 | assert_eq!(decode_component("%20"), ~" "); | |
1016 | assert_eq!(decode_component("%21"), ~"!"); | |
1017 | assert_eq!(decode_component("%22"), ~"\""); | |
1018 | assert_eq!(decode_component("%23"), ~"#"); | |
1019 | assert_eq!(decode_component("%24"), ~"$"); | |
1020 | assert_eq!(decode_component("%25"), ~"%"); | |
1021 | assert_eq!(decode_component("%26"), ~"&"); | |
1022 | assert_eq!(decode_component("%27"), ~"'"); | |
1023 | assert_eq!(decode_component("%28"), ~"("); | |
1024 | assert_eq!(decode_component("%29"), ~")"); | |
1025 | assert_eq!(decode_component("%2A"), ~"*"); | |
1026 | assert_eq!(decode_component("%2B"), ~"+"); | |
1027 | assert_eq!(decode_component("%2C"), ~","); | |
1028 | assert_eq!(decode_component("%2F"), ~"/"); | |
1029 | assert_eq!(decode_component("%3A"), ~":"); | |
1030 | assert_eq!(decode_component("%3B"), ~";"); | |
1031 | assert_eq!(decode_component("%3D"), ~"="); | |
1032 | assert_eq!(decode_component("%3F"), ~"?"); | |
1033 | assert_eq!(decode_component("%40"), ~"@"); | |
1034 | assert_eq!(decode_component("%5B"), ~"["); | |
1035 | assert_eq!(decode_component("%5D"), ~"]"); | |
223e47cc LB |
1036 | } |
1037 | ||
1038 | #[test] | |
970d7e83 LB |
1039 | fn test_encode_form_urlencoded() { |
1040 | let mut m = HashMap::new(); | |
1041 | assert_eq!(encode_form_urlencoded(&m), ~""); | |
223e47cc LB |
1042 | |
1043 | m.insert(~"", ~[]); | |
1044 | m.insert(~"foo", ~[]); | |
970d7e83 | 1045 | assert_eq!(encode_form_urlencoded(&m), ~""); |
223e47cc | 1046 | |
970d7e83 | 1047 | let mut m = HashMap::new(); |
223e47cc | 1048 | m.insert(~"foo", ~[~"bar", ~"123"]); |
970d7e83 | 1049 | assert_eq!(encode_form_urlencoded(&m), ~"foo=bar&foo=123"); |
223e47cc | 1050 | |
970d7e83 | 1051 | let mut m = HashMap::new(); |
223e47cc LB |
1052 | m.insert(~"foo bar", ~[~"abc", ~"12 = 34"]); |
1053 | assert!(encode_form_urlencoded(&m) == | |
1054 | ~"foo+bar=abc&foo+bar=12+%3D+34"); | |
1055 | } | |
1056 | ||
1057 | #[test] | |
970d7e83 | 1058 | fn test_decode_form_urlencoded() { |
223e47cc LB |
1059 | // FIXME #4449: Commented out because this causes an ICE, but only |
1060 | // on FreeBSD | |
1061 | /* | |
970d7e83 | 1062 | assert_eq!(decode_form_urlencoded([]).len(), 0); |
223e47cc | 1063 | |
970d7e83 | 1064 | let s = "a=1&foo+bar=abc&foo+bar=12+%3D+34".as_bytes(); |
223e47cc | 1065 | let form = decode_form_urlencoded(s); |
970d7e83 LB |
1066 | assert_eq!(form.len(), 2); |
1067 | assert_eq!(form.get_ref(&~"a"), &~[~"1"]); | |
1068 | assert_eq!(form.get_ref(&~"foo bar"), &~[~"abc", ~"12 = 34"]); | |
223e47cc LB |
1069 | */ |
1070 | } | |
1071 | } |