]> git.proxmox.com Git - rustc.git/blame - src/libextra/net_url.rs
Imported Upstream version 0.7
[rustc.git] / src / libextra / net_url.rs
CommitLineData
223e47cc
LB
1// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Types/fns concerning URLs (see RFC 3986)
12
970d7e83
LB
13#[allow(missing_doc)];
14
15
16use std::cmp::Eq;
17use std::io::{Reader, ReaderUtil};
18use std::io;
19use std::hashmap::HashMap;
20use std::to_bytes;
21use std::uint;
22
23#[deriving(Clone, Eq)]
223e47cc
LB
24struct Url {
25 scheme: ~str,
26 user: Option<UserInfo>,
27 host: ~str,
28 port: Option<~str>,
29 path: ~str,
30 query: Query,
31 fragment: Option<~str>
32}
33
970d7e83 34#[deriving(Clone, Eq)]
223e47cc
LB
35struct UserInfo {
36 user: ~str,
37 pass: Option<~str>
38}
39
40pub type Query = ~[(~str, ~str)];
41
970d7e83
LB
42impl Url {
43 pub fn new(scheme: ~str,
44 user: Option<UserInfo>,
45 host: ~str,
46 port: Option<~str>,
47 path: ~str,
48 query: Query,
49 fragment: Option<~str>)
50 -> Url {
223e47cc
LB
51 Url {
52 scheme: scheme,
53 user: user,
54 host: host,
55 port: port,
56 path: path,
57 query: query,
58 fragment: fragment,
59 }
60 }
61}
62
970d7e83
LB
63impl UserInfo {
64 pub fn new(user: ~str, pass: Option<~str>) -> UserInfo {
223e47cc
LB
65 UserInfo { user: user, pass: pass }
66 }
67}
68
69fn encode_inner(s: &str, full_url: bool) -> ~str {
70 do io::with_str_reader(s) |rdr| {
71 let mut out = ~"";
72
73 while !rdr.eof() {
74 let ch = rdr.read_byte() as char;
75 match ch {
76 // unreserved:
77 'A' .. 'Z' |
78 'a' .. 'z' |
79 '0' .. '9' |
80 '-' | '.' | '_' | '~' => {
970d7e83 81 out.push_char(ch);
223e47cc
LB
82 }
83 _ => {
84 if full_url {
85 match ch {
86 // gen-delims:
87 ':' | '/' | '?' | '#' | '[' | ']' | '@' |
88
89 // sub-delims:
90 '!' | '$' | '&' | '"' | '(' | ')' | '*' |
91 '+' | ',' | ';' | '=' => {
970d7e83 92 out.push_char(ch);
223e47cc
LB
93 }
94
970d7e83 95 _ => out.push_str(fmt!("%%%X", ch as uint))
223e47cc
LB
96 }
97 } else {
970d7e83 98 out.push_str(fmt!("%%%X", ch as uint));
223e47cc
LB
99 }
100 }
101 }
102 }
103
104 out
105 }
106}
107
108/**
109 * Encodes a URI by replacing reserved characters with percent encoded
110 * character sequences.
111 *
112 * This function is compliant with RFC 3986.
113 */
114pub fn encode(s: &str) -> ~str {
970d7e83 115 encode_inner(s, true)
223e47cc
LB
116}
117
118/**
119 * Encodes a URI component by replacing reserved characters with percent
120 * encoded character sequences.
121 *
122 * This function is compliant with RFC 3986.
123 */
124
125pub fn encode_component(s: &str) -> ~str {
970d7e83 126 encode_inner(s, false)
223e47cc
LB
127}
128
129fn decode_inner(s: &str, full_url: bool) -> ~str {
130 do io::with_str_reader(s) |rdr| {
131 let mut out = ~"";
132
133 while !rdr.eof() {
134 match rdr.read_char() {
135 '%' => {
136 let bytes = rdr.read_bytes(2u);
137 let ch = uint::parse_bytes(bytes, 16u).get() as char;
138
139 if full_url {
140 // Only decode some characters:
141 match ch {
142 // gen-delims:
143 ':' | '/' | '?' | '#' | '[' | ']' | '@' |
144
145 // sub-delims:
146 '!' | '$' | '&' | '"' | '(' | ')' | '*' |
147 '+' | ',' | ';' | '=' => {
970d7e83
LB
148 out.push_char('%');
149 out.push_char(bytes[0u] as char);
150 out.push_char(bytes[1u] as char);
223e47cc
LB
151 }
152
970d7e83 153 ch => out.push_char(ch)
223e47cc
LB
154 }
155 } else {
970d7e83 156 out.push_char(ch);
223e47cc
LB
157 }
158 }
970d7e83 159 ch => out.push_char(ch)
223e47cc
LB
160 }
161 }
162
163 out
164 }
165}
166
167/**
168 * Decode a string encoded with percent encoding.
169 *
170 * This will only decode escape sequences generated by encode.
171 */
172pub fn decode(s: &str) -> ~str {
970d7e83 173 decode_inner(s, true)
223e47cc
LB
174}
175
176/**
177 * Decode a string encoded with percent encoding.
178 */
179pub fn decode_component(s: &str) -> ~str {
970d7e83 180 decode_inner(s, false)
223e47cc
LB
181}
182
183fn encode_plus(s: &str) -> ~str {
184 do io::with_str_reader(s) |rdr| {
185 let mut out = ~"";
186
187 while !rdr.eof() {
188 let ch = rdr.read_byte() as char;
189 match ch {
190 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '.' | '-' => {
970d7e83 191 out.push_char(ch);
223e47cc 192 }
970d7e83
LB
193 ' ' => out.push_char('+'),
194 _ => out.push_str(fmt!("%%%X", ch as uint))
223e47cc
LB
195 }
196 }
197
198 out
199 }
200}
201
202/**
203 * Encode a hashmap to the 'application/x-www-form-urlencoded' media type.
204 */
970d7e83 205pub fn encode_form_urlencoded(m: &HashMap<~str, ~[~str]>) -> ~str {
223e47cc
LB
206 let mut out = ~"";
207 let mut first = true;
208
970d7e83 209 for m.iter().advance |(key, values)| {
223e47cc
LB
210 let key = encode_plus(*key);
211
970d7e83 212 for values.iter().advance |value| {
223e47cc
LB
213 if first {
214 first = false;
215 } else {
970d7e83 216 out.push_char('&');
223e47cc
LB
217 first = false;
218 }
219
970d7e83 220 out.push_str(fmt!("%s=%s", key, encode_plus(*value)));
223e47cc
LB
221 }
222 }
223
224 out
225}
226
227/**
228 * Decode a string encoded with the 'application/x-www-form-urlencoded' media
229 * type into a hashmap.
230 */
970d7e83 231pub fn decode_form_urlencoded(s: &[u8]) -> HashMap<~str, ~[~str]> {
223e47cc 232 do io::with_bytes_reader(s) |rdr| {
970d7e83 233 let mut m = HashMap::new();
223e47cc
LB
234 let mut key = ~"";
235 let mut value = ~"";
236 let mut parsing_key = true;
237
238 while !rdr.eof() {
239 match rdr.read_char() {
240 '&' | ';' => {
241 if key != ~"" && value != ~"" {
242 let mut values = match m.pop(&key) {
243 Some(values) => values,
244 None => ~[],
245 };
246
247 values.push(value);
248 m.insert(key, values);
249 }
250
251 parsing_key = true;
252 key = ~"";
253 value = ~"";
254 }
255 '=' => parsing_key = false,
256 ch => {
257 let ch = match ch {
258 '%' => {
259 let bytes = rdr.read_bytes(2u);
260 uint::parse_bytes(bytes, 16u).get() as char
261 }
262 '+' => ' ',
263 ch => ch
264 };
265
266 if parsing_key {
970d7e83 267 key.push_char(ch)
223e47cc 268 } else {
970d7e83 269 value.push_char(ch)
223e47cc
LB
270 }
271 }
272 }
273 }
274
275 if key != ~"" && value != ~"" {
276 let mut values = match m.pop(&key) {
277 Some(values) => values,
278 None => ~[],
279 };
280
281 values.push(value);
282 m.insert(key, values);
283 }
284
285 m
286 }
287}
288
289
290fn split_char_first(s: &str, c: char) -> (~str, ~str) {
970d7e83 291 let len = s.len();
223e47cc
LB
292 let mut index = len;
293 let mut mat = 0;
970d7e83
LB
294 do io::with_str_reader(s) |rdr| {
295 let mut ch;
296 while !rdr.eof() {
297 ch = rdr.read_byte() as char;
298 if ch == c {
299 // found a match, adjust markers
300 index = rdr.tell()-1;
301 mat = 1;
302 break;
223e47cc
LB
303 }
304 }
305 }
306 if index+mat == len {
970d7e83 307 return (s.slice(0, index).to_owned(), ~"");
223e47cc 308 } else {
970d7e83
LB
309 return (s.slice(0, index).to_owned(),
310 s.slice(index + mat, s.len()).to_owned());
223e47cc
LB
311 }
312}
313
314fn userinfo_from_str(uinfo: &str) -> UserInfo {
315 let (user, p) = split_char_first(uinfo, ':');
970d7e83 316 let pass = if p.is_empty() {
223e47cc
LB
317 None
318 } else {
319 Some(p)
320 };
321 return UserInfo::new(user, pass);
322}
323
324fn userinfo_to_str(userinfo: &UserInfo) -> ~str {
325 match userinfo.pass {
326 Some(ref pass) => fmt!("%s:%s@", userinfo.user, *pass),
327 None => fmt!("%s@", userinfo.user),
328 }
329}
330
331fn query_from_str(rawquery: &str) -> Query {
332 let mut query: Query = ~[];
970d7e83
LB
333 if !rawquery.is_empty() {
334 for rawquery.split_iter('&').advance |p| {
223e47cc 335 let (k, v) = split_char_first(p, '=');
970d7e83 336 query.push((decode_component(k), decode_component(v)));
223e47cc
LB
337 };
338 }
339 return query;
340}
341
342pub fn query_to_str(query: &Query) -> ~str {
970d7e83
LB
343 let mut strvec = ~[];
344 for query.iter().advance |kv| {
345 match kv {
346 &(ref k, ref v) => {
347 strvec.push(fmt!("%s=%s",
348 encode_component(*k),
349 encode_component(*v))
350 );
223e47cc
LB
351 }
352 }
223e47cc 353 }
970d7e83 354 return strvec.connect("&");
223e47cc
LB
355}
356
357// returns the scheme and the rest of the url, or a parsing error
358pub fn get_scheme(rawurl: &str) -> Result<(~str, ~str), ~str> {
970d7e83 359 for rawurl.iter().enumerate().advance |(i,c)| {
223e47cc
LB
360 match c {
361 'A' .. 'Z' | 'a' .. 'z' => loop,
362 '0' .. '9' | '+' | '-' | '.' => {
363 if i == 0 {
364 return Err(~"url: Scheme must begin with a letter.");
365 }
366 loop;
367 }
368 ':' => {
369 if i == 0 {
370 return Err(~"url: Scheme cannot be empty.");
371 } else {
372 return Ok((rawurl.slice(0,i).to_owned(),
970d7e83 373 rawurl.slice(i+1,rawurl.len()).to_owned()));
223e47cc
LB
374 }
375 }
376 _ => {
377 return Err(~"url: Invalid character in scheme.");
378 }
379 }
380 };
381 return Err(~"url: Scheme must be terminated with a colon.");
382}
383
970d7e83 384#[deriving(Clone, Eq)]
223e47cc
LB
385enum Input {
386 Digit, // all digits
387 Hex, // digits and letters a-f
388 Unreserved // all other legal characters
389}
390
391// returns userinfo, host, port, and unparsed part, or an error
392fn get_authority(rawurl: &str) ->
393 Result<(Option<UserInfo>, ~str, Option<~str>, ~str), ~str> {
970d7e83 394 if !rawurl.starts_with("//") {
223e47cc
LB
395 // there is no authority.
396 return Ok((None, ~"", None, rawurl.to_str()));
397 }
398
399 enum State {
400 Start, // starting state
401 PassHostPort, // could be in user or port
402 Ip6Port, // either in ipv6 host or port
403 Ip6Host, // are in an ipv6 host
404 InHost, // are in a host - may be ipv6, but don't know yet
405 InPort // are in port
406 }
407
408 let len = rawurl.len();
409 let mut st = Start;
410 let mut in = Digit; // most restricted, start here.
411
412 let mut userinfo = None;
413 let mut host = ~"";
414 let mut port = None;
415
416 let mut colon_count = 0;
970d7e83
LB
417 let mut pos = 0;
418 let mut begin = 2;
419 let mut end = len;
223e47cc 420
970d7e83 421 for rawurl.iter().enumerate().advance |(i,c)| {
223e47cc
LB
422 if i < 2 { loop; } // ignore the leading //
423
424 // deal with input class first
425 match c {
426 '0' .. '9' => (),
427 'A' .. 'F' | 'a' .. 'f' => {
428 if in == Digit {
429 in = Hex;
430 }
431 }
432 'G' .. 'Z' | 'g' .. 'z' | '-' | '.' | '_' | '~' | '%' |
433 '&' |'\'' | '(' | ')' | '+' | '!' | '*' | ',' | ';' | '=' => {
434 in = Unreserved;
435 }
436 ':' | '@' | '?' | '#' | '/' => {
437 // separators, don't change anything
438 }
439 _ => {
440 return Err(~"Illegal character in authority");
441 }
442 }
443
444 // now process states
445 match c {
446 ':' => {
447 colon_count += 1;
448 match st {
449 Start => {
450 pos = i;
451 st = PassHostPort;
452 }
453 PassHostPort => {
454 // multiple colons means ipv6 address.
455 if in == Unreserved {
456 return Err(
457 ~"Illegal characters in IPv6 address.");
458 }
459 st = Ip6Host;
460 }
461 InHost => {
462 pos = i;
463 // can't be sure whether this is an ipv6 address or a port
464 if in == Unreserved {
465 return Err(~"Illegal characters in authority.");
466 }
467 st = Ip6Port;
468 }
469 Ip6Port => {
470 if in == Unreserved {
471 return Err(~"Illegal characters in authority.");
472 }
473 st = Ip6Host;
474 }
475 Ip6Host => {
476 if colon_count > 7 {
970d7e83 477 host = rawurl.slice(begin, i).to_owned();
223e47cc
LB
478 pos = i;
479 st = InPort;
480 }
481 }
482 _ => {
483 return Err(~"Invalid ':' in authority.");
484 }
485 }
486 in = Digit; // reset input class
487 }
488
489 '@' => {
490 in = Digit; // reset input class
491 colon_count = 0; // reset count
492 match st {
493 Start => {
970d7e83 494 let user = rawurl.slice(begin, i).to_owned();
223e47cc
LB
495 userinfo = Some(UserInfo::new(user, None));
496 st = InHost;
497 }
498 PassHostPort => {
970d7e83
LB
499 let user = rawurl.slice(begin, pos).to_owned();
500 let pass = rawurl.slice(pos+1, i).to_owned();
223e47cc
LB
501 userinfo = Some(UserInfo::new(user, Some(pass)));
502 st = InHost;
503 }
504 _ => {
505 return Err(~"Invalid '@' in authority.");
506 }
507 }
508 begin = i+1;
509 }
510
511 '?' | '#' | '/' => {
512 end = i;
513 break;
514 }
515 _ => ()
516 }
517 end = i;
518 }
519
520 let end = end; // make end immutable so it can be captured
521
522 let host_is_end_plus_one: &fn() -> bool = || {
970d7e83 523 let xs = ['?', '#', '/'];
223e47cc 524 end+1 == len
970d7e83 525 && !xs.iter().any_(|x| *x == (rawurl[end] as char))
223e47cc
LB
526 };
527
528 // finish up
529 match st {
530 Start => {
531 if host_is_end_plus_one() {
970d7e83 532 host = rawurl.slice(begin, end+1).to_owned();
223e47cc 533 } else {
970d7e83 534 host = rawurl.slice(begin, end).to_owned();
223e47cc
LB
535 }
536 }
537 PassHostPort | Ip6Port => {
538 if in != Digit {
539 return Err(~"Non-digit characters in port.");
540 }
970d7e83
LB
541 host = rawurl.slice(begin, pos).to_owned();
542 port = Some(rawurl.slice(pos+1, end).to_owned());
223e47cc
LB
543 }
544 Ip6Host | InHost => {
970d7e83 545 host = rawurl.slice(begin, end).to_owned();
223e47cc
LB
546 }
547 InPort => {
548 if in != Digit {
549 return Err(~"Non-digit characters in port.");
550 }
970d7e83 551 port = Some(rawurl.slice(pos+1, end).to_owned());
223e47cc
LB
552 }
553 }
554
555 let rest = if host_is_end_plus_one() { ~"" }
970d7e83 556 else { rawurl.slice(end, len).to_owned() };
223e47cc
LB
557 return Ok((userinfo, host, port, rest));
558}
559
560
561// returns the path and unparsed part of url, or an error
562fn get_path(rawurl: &str, authority: bool) ->
563 Result<(~str, ~str), ~str> {
970d7e83 564 let len = rawurl.len();
223e47cc 565 let mut end = len;
970d7e83 566 for rawurl.iter().enumerate().advance |(i,c)| {
223e47cc
LB
567 match c {
568 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '&' |'\'' | '(' | ')' | '.'
569 | '@' | ':' | '%' | '/' | '+' | '!' | '*' | ',' | ';' | '='
570 | '_' | '-' => {
571 loop;
572 }
573 '?' | '#' => {
574 end = i;
575 break;
576 }
577 _ => return Err(~"Invalid character in path.")
578 }
579 }
580
581 if authority {
970d7e83 582 if end != 0 && !rawurl.starts_with("/") {
223e47cc
LB
583 return Err(~"Non-empty path must begin with\
584 '/' in presence of authority.");
585 }
586 }
587
970d7e83
LB
588 return Ok((decode_component(rawurl.slice(0, end)),
589 rawurl.slice(end, len).to_owned()));
223e47cc
LB
590}
591
592// returns the parsed query and the fragment, if present
593fn get_query_fragment(rawurl: &str) ->
594 Result<(Query, Option<~str>), ~str> {
970d7e83
LB
595 if !rawurl.starts_with("?") {
596 if rawurl.starts_with("#") {
597 let f = decode_component(rawurl.slice(
223e47cc 598 1,
970d7e83 599 rawurl.len()));
223e47cc
LB
600 return Ok((~[], Some(f)));
601 } else {
602 return Ok((~[], None));
603 }
604 }
970d7e83
LB
605 let (q, r) = split_char_first(rawurl.slice(1, rawurl.len()), '#');
606 let f = if r.len() != 0 {
223e47cc
LB
607 Some(decode_component(r)) } else { None };
608 return Ok((query_from_str(q), f));
609}
610
611/**
612 * Parse a `str` to a `url`
613 *
614 * # Arguments
615 *
616 * `rawurl` - a string representing a full url, including scheme.
617 *
618 * # Returns
619 *
620 * a `url` that contains the parsed representation of the url.
621 *
622 */
623
624pub fn from_str(rawurl: &str) -> Result<Url, ~str> {
625 // scheme
626 let (scheme, rest) = match get_scheme(rawurl) {
627 Ok(val) => val,
628 Err(e) => return Err(e),
629 };
630
631 // authority
632 let (userinfo, host, port, rest) = match get_authority(rest) {
633 Ok(val) => val,
634 Err(e) => return Err(e),
635 };
636
637 // path
638 let has_authority = if host == ~"" { false } else { true };
639 let (path, rest) = match get_path(rest, has_authority) {
640 Ok(val) => val,
641 Err(e) => return Err(e),
642 };
643
644 // query and fragment
645 let (query, fragment) = match get_query_fragment(rest) {
646 Ok(val) => val,
647 Err(e) => return Err(e),
648 };
649
650 Ok(Url::new(scheme, userinfo, host, port, path, query, fragment))
651}
652
653impl FromStr for Url {
654 fn from_str(s: &str) -> Option<Url> {
655 match from_str(s) {
656 Ok(url) => Some(url),
657 Err(_) => None
658 }
659 }
660}
661
662/**
663 * Format a `url` as a string
664 *
665 * # Arguments
666 *
667 * `url` - a url.
668 *
669 * # Returns
670 *
671 * a `str` that contains the formatted url. Note that this will usually
672 * be an inverse of `from_str` but might strip out unneeded separators.
673 * for example, "http://somehost.com?", when parsed and formatted, will
674 * result in just "http://somehost.com".
675 *
676 */
677pub fn to_str(url: &Url) -> ~str {
678 let user = match url.user {
679 Some(ref user) => userinfo_to_str(user),
680 None => ~"",
681 };
682
683 let authority = if url.host.is_empty() {
684 ~""
685 } else {
686 fmt!("//%s%s", user, url.host)
687 };
688
689 let query = if url.query.is_empty() {
690 ~""
691 } else {
692 fmt!("?%s", query_to_str(&url.query))
693 };
694
695 let fragment = match url.fragment {
696 Some(ref fragment) => fmt!("#%s", encode_component(*fragment)),
697 None => ~"",
698 };
699
700 fmt!("%s:%s%s%s%s", url.scheme, authority, url.path, query, fragment)
701}
702
970d7e83 703impl ToStr for Url {
223e47cc
LB
704 pub fn to_str(&self) -> ~str {
705 to_str(self)
706 }
707}
708
970d7e83
LB
709impl IterBytes for Url {
710 fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) -> bool {
223e47cc
LB
711 self.to_str().iter_bytes(lsb0, f)
712 }
713}
714
715// Put a few tests outside of the 'test' module so they can test the internal
716// functions and those functions don't need 'pub'
717
718#[test]
719fn test_split_char_first() {
970d7e83
LB
720 let (u,v) = split_char_first("hello, sweet world", ',');
721 assert_eq!(u, ~"hello");
722 assert_eq!(v, ~" sweet world");
223e47cc 723
970d7e83
LB
724 let (u,v) = split_char_first("hello sweet world", ',');
725 assert_eq!(u, ~"hello sweet world");
726 assert_eq!(v, ~"");
223e47cc
LB
727}
728
729#[test]
730fn test_get_authority() {
731 let (u, h, p, r) = get_authority(
732 "//user:pass@rust-lang.org/something").unwrap();
970d7e83
LB
733 assert_eq!(u, Some(UserInfo::new(~"user", Some(~"pass"))));
734 assert_eq!(h, ~"rust-lang.org");
223e47cc 735 assert!(p.is_none());
970d7e83 736 assert_eq!(r, ~"/something");
223e47cc
LB
737
738 let (u, h, p, r) = get_authority(
739 "//rust-lang.org:8000?something").unwrap();
740 assert!(u.is_none());
970d7e83
LB
741 assert_eq!(h, ~"rust-lang.org");
742 assert_eq!(p, Some(~"8000"));
743 assert_eq!(r, ~"?something");
223e47cc
LB
744
745 let (u, h, p, r) = get_authority(
746 "//rust-lang.org#blah").unwrap();
747 assert!(u.is_none());
970d7e83 748 assert_eq!(h, ~"rust-lang.org");
223e47cc 749 assert!(p.is_none());
970d7e83 750 assert_eq!(r, ~"#blah");
223e47cc
LB
751
752 // ipv6 tests
753 let (_, h, _, _) = get_authority(
754 "//2001:0db8:85a3:0042:0000:8a2e:0370:7334#blah").unwrap();
970d7e83 755 assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334");
223e47cc
LB
756
757 let (_, h, p, _) = get_authority(
758 "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah").unwrap();
970d7e83
LB
759 assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334");
760 assert_eq!(p, Some(~"8000"));
223e47cc
LB
761
762 let (u, h, p, _) = get_authority(
763 "//us:p@2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000#blah"
764 ).unwrap();
970d7e83
LB
765 assert_eq!(u, Some(UserInfo::new(~"us", Some(~"p"))));
766 assert_eq!(h, ~"2001:0db8:85a3:0042:0000:8a2e:0370:7334");
767 assert_eq!(p, Some(~"8000"));
223e47cc
LB
768
769 // invalid authorities;
770 assert!(get_authority("//user:pass@rust-lang:something").is_err());
771 assert!(get_authority("//user@rust-lang:something:/path").is_err());
772 assert!(get_authority(
773 "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:800a").is_err());
774 assert!(get_authority(
775 "//2001:0db8:85a3:0042:0000:8a2e:0370:7334:8000:00").is_err());
776
777 // these parse as empty, because they don't start with '//'
970d7e83
LB
778 let (_, h, _, _) = get_authority("user:pass@rust-lang").unwrap();
779 assert_eq!(h, ~"");
780 let (_, h, _, _) = get_authority("rust-lang.org").unwrap();
781 assert_eq!(h, ~"");
223e47cc
LB
782}
783
784#[test]
785fn test_get_path() {
786 let (p, r) = get_path("/something+%20orother", true).unwrap();
970d7e83
LB
787 assert_eq!(p, ~"/something+ orother");
788 assert_eq!(r, ~"");
223e47cc 789 let (p, r) = get_path("test@email.com#fragment", false).unwrap();
970d7e83
LB
790 assert_eq!(p, ~"test@email.com");
791 assert_eq!(r, ~"#fragment");
792 let (p, r) = get_path("/gen/:addr=?q=v", false).unwrap();
793 assert_eq!(p, ~"/gen/:addr=");
794 assert_eq!(r, ~"?q=v");
223e47cc
LB
795
796 //failure cases
970d7e83 797 assert!(get_path("something?q", true).is_err());
223e47cc
LB
798}
799
800#[cfg(test)]
801mod tests {
223e47cc
LB
802
803 use net_url::*;
804
970d7e83 805 use std::hashmap::HashMap;
223e47cc
LB
806
807 #[test]
970d7e83 808 fn test_url_parse() {
223e47cc
LB
809 let url = ~"http://user:pass@rust-lang.org/doc?s=v#something";
810
811 let up = from_str(url);
812 let u = up.unwrap();
813 assert!(u.scheme == ~"http");
814 let userinfo = u.user.get_ref();
815 assert!(userinfo.user == ~"user");
816 assert!(userinfo.pass.get_ref() == &~"pass");
817 assert!(u.host == ~"rust-lang.org");
818 assert!(u.path == ~"/doc");
819 assert!(u.query == ~[(~"s", ~"v")]);
820 assert!(u.fragment.get_ref() == &~"something");
821 }
822
823 #[test]
970d7e83 824 fn test_url_parse_host_slash() {
223e47cc
LB
825 let urlstr = ~"http://0.42.42.42/";
826 let url = from_str(urlstr).unwrap();
827 assert!(url.host == ~"0.42.42.42");
828 assert!(url.path == ~"/");
829 }
830
831 #[test]
970d7e83 832 fn test_url_with_underscores() {
223e47cc
LB
833 let urlstr = ~"http://dotcom.com/file_name.html";
834 let url = from_str(urlstr).unwrap();
835 assert!(url.path == ~"/file_name.html");
836 }
837
838 #[test]
970d7e83 839 fn test_url_with_dashes() {
223e47cc
LB
840 let urlstr = ~"http://dotcom.com/file-name.html";
841 let url = from_str(urlstr).unwrap();
842 assert!(url.path == ~"/file-name.html");
843 }
844
845 #[test]
970d7e83 846 fn test_no_scheme() {
223e47cc
LB
847 assert!(get_scheme("noschemehere.html").is_err());
848 }
849
850 #[test]
970d7e83 851 fn test_invalid_scheme_errors() {
223e47cc
LB
852 assert!(from_str("99://something").is_err());
853 assert!(from_str("://something").is_err());
854 }
855
856 #[test]
970d7e83 857 fn test_full_url_parse_and_format() {
223e47cc 858 let url = ~"http://user:pass@rust-lang.org/doc?s=v#something";
970d7e83 859 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
860 }
861
862 #[test]
970d7e83 863 fn test_userless_url_parse_and_format() {
223e47cc 864 let url = ~"http://rust-lang.org/doc?s=v#something";
970d7e83 865 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
866 }
867
868 #[test]
970d7e83 869 fn test_queryless_url_parse_and_format() {
223e47cc 870 let url = ~"http://user:pass@rust-lang.org/doc#something";
970d7e83 871 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
872 }
873
874 #[test]
970d7e83 875 fn test_empty_query_url_parse_and_format() {
223e47cc
LB
876 let url = ~"http://user:pass@rust-lang.org/doc?#something";
877 let should_be = ~"http://user:pass@rust-lang.org/doc#something";
970d7e83 878 assert_eq!(from_str(url).unwrap().to_str(), should_be);
223e47cc
LB
879 }
880
881 #[test]
970d7e83 882 fn test_fragmentless_url_parse_and_format() {
223e47cc 883 let url = ~"http://user:pass@rust-lang.org/doc?q=v";
970d7e83 884 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
885 }
886
887 #[test]
970d7e83 888 fn test_minimal_url_parse_and_format() {
223e47cc 889 let url = ~"http://rust-lang.org/doc";
970d7e83 890 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
891 }
892
893 #[test]
970d7e83 894 fn test_scheme_host_only_url_parse_and_format() {
223e47cc 895 let url = ~"http://rust-lang.org";
970d7e83 896 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
897 }
898
899 #[test]
970d7e83 900 fn test_pathless_url_parse_and_format() {
223e47cc 901 let url = ~"http://user:pass@rust-lang.org?q=v#something";
970d7e83 902 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
903 }
904
905 #[test]
970d7e83 906 fn test_scheme_host_fragment_only_url_parse_and_format() {
223e47cc 907 let url = ~"http://rust-lang.org#something";
970d7e83 908 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
909 }
910
911 #[test]
970d7e83 912 fn test_url_component_encoding() {
223e47cc
LB
913 let url = ~"http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B";
914 let u = from_str(url).unwrap();
915 assert!(u.path == ~"/doc uments");
916 assert!(u.query == ~[(~"ba%d ", ~"#&+")]);
917 }
918
919 #[test]
970d7e83 920 fn test_url_without_authority() {
223e47cc 921 let url = ~"mailto:test@email.com";
970d7e83 922 assert_eq!(from_str(url).unwrap().to_str(), url);
223e47cc
LB
923 }
924
925 #[test]
970d7e83
LB
926 fn test_encode() {
927 assert_eq!(encode(""), ~"");
928 assert_eq!(encode("http://example.com"), ~"http://example.com");
929 assert_eq!(encode("foo bar% baz"), ~"foo%20bar%25%20baz");
930 assert_eq!(encode(" "), ~"%20");
931 assert_eq!(encode("!"), ~"!");
932 assert_eq!(encode("\""), ~"\"");
933 assert_eq!(encode("#"), ~"#");
934 assert_eq!(encode("$"), ~"$");
935 assert_eq!(encode("%"), ~"%25");
936 assert_eq!(encode("&"), ~"&");
937 assert_eq!(encode("'"), ~"%27");
938 assert_eq!(encode("("), ~"(");
939 assert_eq!(encode(")"), ~")");
940 assert_eq!(encode("*"), ~"*");
941 assert_eq!(encode("+"), ~"+");
942 assert_eq!(encode(","), ~",");
943 assert_eq!(encode("/"), ~"/");
944 assert_eq!(encode(":"), ~":");
945 assert_eq!(encode(";"), ~";");
946 assert_eq!(encode("="), ~"=");
947 assert_eq!(encode("?"), ~"?");
948 assert_eq!(encode("@"), ~"@");
949 assert_eq!(encode("["), ~"[");
950 assert_eq!(encode("]"), ~"]");
223e47cc
LB
951 }
952
953 #[test]
970d7e83
LB
954 fn test_encode_component() {
955 assert_eq!(encode_component(""), ~"");
223e47cc
LB
956 assert!(encode_component("http://example.com") ==
957 ~"http%3A%2F%2Fexample.com");
958 assert!(encode_component("foo bar% baz") ==
959 ~"foo%20bar%25%20baz");
970d7e83
LB
960 assert_eq!(encode_component(" "), ~"%20");
961 assert_eq!(encode_component("!"), ~"%21");
962 assert_eq!(encode_component("#"), ~"%23");
963 assert_eq!(encode_component("$"), ~"%24");
964 assert_eq!(encode_component("%"), ~"%25");
965 assert_eq!(encode_component("&"), ~"%26");
966 assert_eq!(encode_component("'"), ~"%27");
967 assert_eq!(encode_component("("), ~"%28");
968 assert_eq!(encode_component(")"), ~"%29");
969 assert_eq!(encode_component("*"), ~"%2A");
970 assert_eq!(encode_component("+"), ~"%2B");
971 assert_eq!(encode_component(","), ~"%2C");
972 assert_eq!(encode_component("/"), ~"%2F");
973 assert_eq!(encode_component(":"), ~"%3A");
974 assert_eq!(encode_component(";"), ~"%3B");
975 assert_eq!(encode_component("="), ~"%3D");
976 assert_eq!(encode_component("?"), ~"%3F");
977 assert_eq!(encode_component("@"), ~"%40");
978 assert_eq!(encode_component("["), ~"%5B");
979 assert_eq!(encode_component("]"), ~"%5D");
223e47cc
LB
980 }
981
982 #[test]
970d7e83
LB
983 fn test_decode() {
984 assert_eq!(decode(""), ~"");
985 assert_eq!(decode("abc/def 123"), ~"abc/def 123");
986 assert_eq!(decode("abc%2Fdef%20123"), ~"abc%2Fdef 123");
987 assert_eq!(decode("%20"), ~" ");
988 assert_eq!(decode("%21"), ~"%21");
989 assert_eq!(decode("%22"), ~"%22");
990 assert_eq!(decode("%23"), ~"%23");
991 assert_eq!(decode("%24"), ~"%24");
992 assert_eq!(decode("%25"), ~"%");
993 assert_eq!(decode("%26"), ~"%26");
994 assert_eq!(decode("%27"), ~"'");
995 assert_eq!(decode("%28"), ~"%28");
996 assert_eq!(decode("%29"), ~"%29");
997 assert_eq!(decode("%2A"), ~"%2A");
998 assert_eq!(decode("%2B"), ~"%2B");
999 assert_eq!(decode("%2C"), ~"%2C");
1000 assert_eq!(decode("%2F"), ~"%2F");
1001 assert_eq!(decode("%3A"), ~"%3A");
1002 assert_eq!(decode("%3B"), ~"%3B");
1003 assert_eq!(decode("%3D"), ~"%3D");
1004 assert_eq!(decode("%3F"), ~"%3F");
1005 assert_eq!(decode("%40"), ~"%40");
1006 assert_eq!(decode("%5B"), ~"%5B");
1007 assert_eq!(decode("%5D"), ~"%5D");
223e47cc
LB
1008 }
1009
1010 #[test]
970d7e83
LB
1011 fn test_decode_component() {
1012 assert_eq!(decode_component(""), ~"");
1013 assert_eq!(decode_component("abc/def 123"), ~"abc/def 123");
1014 assert_eq!(decode_component("abc%2Fdef%20123"), ~"abc/def 123");
1015 assert_eq!(decode_component("%20"), ~" ");
1016 assert_eq!(decode_component("%21"), ~"!");
1017 assert_eq!(decode_component("%22"), ~"\"");
1018 assert_eq!(decode_component("%23"), ~"#");
1019 assert_eq!(decode_component("%24"), ~"$");
1020 assert_eq!(decode_component("%25"), ~"%");
1021 assert_eq!(decode_component("%26"), ~"&");
1022 assert_eq!(decode_component("%27"), ~"'");
1023 assert_eq!(decode_component("%28"), ~"(");
1024 assert_eq!(decode_component("%29"), ~")");
1025 assert_eq!(decode_component("%2A"), ~"*");
1026 assert_eq!(decode_component("%2B"), ~"+");
1027 assert_eq!(decode_component("%2C"), ~",");
1028 assert_eq!(decode_component("%2F"), ~"/");
1029 assert_eq!(decode_component("%3A"), ~":");
1030 assert_eq!(decode_component("%3B"), ~";");
1031 assert_eq!(decode_component("%3D"), ~"=");
1032 assert_eq!(decode_component("%3F"), ~"?");
1033 assert_eq!(decode_component("%40"), ~"@");
1034 assert_eq!(decode_component("%5B"), ~"[");
1035 assert_eq!(decode_component("%5D"), ~"]");
223e47cc
LB
1036 }
1037
1038 #[test]
970d7e83
LB
1039 fn test_encode_form_urlencoded() {
1040 let mut m = HashMap::new();
1041 assert_eq!(encode_form_urlencoded(&m), ~"");
223e47cc
LB
1042
1043 m.insert(~"", ~[]);
1044 m.insert(~"foo", ~[]);
970d7e83 1045 assert_eq!(encode_form_urlencoded(&m), ~"");
223e47cc 1046
970d7e83 1047 let mut m = HashMap::new();
223e47cc 1048 m.insert(~"foo", ~[~"bar", ~"123"]);
970d7e83 1049 assert_eq!(encode_form_urlencoded(&m), ~"foo=bar&foo=123");
223e47cc 1050
970d7e83 1051 let mut m = HashMap::new();
223e47cc
LB
1052 m.insert(~"foo bar", ~[~"abc", ~"12 = 34"]);
1053 assert!(encode_form_urlencoded(&m) ==
1054 ~"foo+bar=abc&foo+bar=12+%3D+34");
1055 }
1056
1057 #[test]
970d7e83 1058 fn test_decode_form_urlencoded() {
223e47cc
LB
1059 // FIXME #4449: Commented out because this causes an ICE, but only
1060 // on FreeBSD
1061 /*
970d7e83 1062 assert_eq!(decode_form_urlencoded([]).len(), 0);
223e47cc 1063
970d7e83 1064 let s = "a=1&foo+bar=abc&foo+bar=12+%3D+34".as_bytes();
223e47cc 1065 let form = decode_form_urlencoded(s);
970d7e83
LB
1066 assert_eq!(form.len(), 2);
1067 assert_eq!(form.get_ref(&~"a"), &~[~"1"]);
1068 assert_eq!(form.get_ref(&~"foo bar"), &~[~"abc", ~"12 = 34"]);
223e47cc
LB
1069 */
1070 }
1071}