]>
git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.10/Lib/urlparse.py
1 """Parse (absolute and relative) URLs.
3 urlparse module is based upon the following RFC specifications.
5 RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
6 and L. Masinter, January 2005.
8 RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
9 and L.Masinter, December 1999.
11 RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
12 Berners-Lee, R. Fielding, and L. Masinter, August 1998.
14 RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998.
16 RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
19 RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
20 McCahill, December 1994
22 RFC 3986 is considered the current standard and any future changes to
23 urlparse module should conform with it. The urlparse module is
24 currently not entirely compliant with this RFC due to defacto
25 scenarios for parsing, and for backward compatibility purposes, some
26 parsing quirks from older RFCs are retained. The testcases in
27 test_urlparse.py provides a good indicator of parsing behavior.
33 __all__
= ["urlparse", "urlunparse", "urljoin", "urldefrag",
34 "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
36 # A classification of schemes ('' means apply by default)
37 uses_relative
= ['ftp', 'http', 'gopher', 'nntp', 'imap',
38 'wais', 'file', 'https', 'shttp', 'mms',
39 'prospero', 'rtsp', 'rtspu', '', 'sftp',
41 uses_netloc
= ['ftp', 'http', 'gopher', 'nntp', 'telnet',
42 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
43 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
44 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh']
45 uses_params
= ['ftp', 'hdl', 'prospero', 'http', 'imap',
46 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
47 'mms', '', 'sftp', 'tel']
49 # These are not actually used anymore, but should stay for backwards
50 # compatibility. (They are undocumented, but have a public-looking name.)
51 non_hierarchical
= ['gopher', 'hdl', 'mailto', 'news',
52 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
53 uses_query
= ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
54 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
55 uses_fragment
= ['ftp', 'hdl', 'http', 'gopher', 'news',
56 'nntp', 'wais', 'https', 'shttp', 'snews',
57 'file', 'prospero', '']
59 # Characters valid in scheme names
60 scheme_chars
= ('abcdefghijklmnopqrstuvwxyz'
61 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
69 """Clear the parse cache."""
73 class ResultMixin(object):
74 """Shared methods for the parsed result objects."""
80 userinfo
= netloc
.rsplit("@", 1)[0]
82 userinfo
= userinfo
.split(":", 1)[0]
90 userinfo
= netloc
.rsplit("@", 1)[0]
92 return userinfo
.split(":", 1)[1]
97 netloc
= self
.netloc
.split('@')[-1]
98 if '[' in netloc
and ']' in netloc
:
99 return netloc
.split(']')[0][1:].lower()
101 return netloc
.split(':')[0].lower()
105 return netloc
.lower()
109 netloc
= self
.netloc
.split('@')[-1].split(']')[-1]
111 port
= netloc
.split(':')[1]
115 if (0 <= port
<= 65535):
119 from collections
import namedtuple
121 class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin
):
126 return urlunsplit(self
)
129 class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin
):
134 return urlunparse(self
)
137 def urlparse(url
, scheme
='', allow_fragments
=True):
138 """Parse a URL into 6 components:
139 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
140 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
141 Note that we don't break the components up in smaller bits
142 (e.g. netloc is a single string) and we don't expand % escapes."""
143 tuple = urlsplit(url
, scheme
, allow_fragments
)
144 scheme
, netloc
, url
, query
, fragment
= tuple
145 if scheme
in uses_params
and ';' in url
:
146 url
, params
= _splitparams(url
)
149 return ParseResult(scheme
, netloc
, url
, params
, query
, fragment
)
151 def _splitparams(url
):
153 i
= url
.find(';', url
.rfind('/'))
158 return url
[:i
], url
[i
+1:]
160 def _splitnetloc(url
, start
=0):
161 delim
= len(url
) # position of end of domain part of url, default is end
162 for c
in '/?#': # look for delimiters; the order is NOT important
163 wdelim
= url
.find(c
, start
) # find first of this delim
164 if wdelim
>= 0: # if found
165 delim
= min(delim
, wdelim
) # use earliest delim position
166 return url
[start
:delim
], url
[delim
:] # return (domain, rest)
168 def urlsplit(url
, scheme
='', allow_fragments
=True):
169 """Parse a URL into 5 components:
170 <scheme>://<netloc>/<path>?<query>#<fragment>
171 Return a 5-tuple: (scheme, netloc, path, query, fragment).
172 Note that we don't break the components up in smaller bits
173 (e.g. netloc is a single string) and we don't expand % escapes."""
174 allow_fragments
= bool(allow_fragments
)
175 key
= url
, scheme
, allow_fragments
, type(url
), type(scheme
)
176 cached
= _parse_cache
.get(key
, None)
179 if len(_parse_cache
) >= MAX_CACHE_SIZE
: # avoid runaway growth
181 netloc
= query
= fragment
= ''
184 if url
[:i
] == 'http': # optimize the common case
185 scheme
= url
[:i
].lower()
188 netloc
, url
= _splitnetloc(url
, 2)
189 if (('[' in netloc
and ']' not in netloc
) or
190 (']' in netloc
and '[' not in netloc
)):
191 raise ValueError("Invalid IPv6 URL")
192 if allow_fragments
and '#' in url
:
193 url
, fragment
= url
.split('#', 1)
195 url
, query
= url
.split('?', 1)
196 v
= SplitResult(scheme
, netloc
, url
, query
, fragment
)
197 _parse_cache
[key
] = v
200 if c
not in scheme_chars
:
203 # make sure "url" is not actually a port number (in which case
204 # "scheme" is really part of the path)
206 if not rest
or any(c
not in '0123456789' for c
in rest
):
208 scheme
, url
= url
[:i
].lower(), rest
211 netloc
, url
= _splitnetloc(url
, 2)
212 if (('[' in netloc
and ']' not in netloc
) or
213 (']' in netloc
and '[' not in netloc
)):
214 raise ValueError("Invalid IPv6 URL")
215 if allow_fragments
and '#' in url
:
216 url
, fragment
= url
.split('#', 1)
218 url
, query
= url
.split('?', 1)
219 v
= SplitResult(scheme
, netloc
, url
, query
, fragment
)
220 _parse_cache
[key
] = v
223 def urlunparse(data
):
224 """Put a parsed URL back together again. This may result in a
225 slightly different, but equivalent URL, if the URL that was parsed
226 originally had redundant delimiters, e.g. a ? with an empty query
227 (the draft states that these are equivalent)."""
228 scheme
, netloc
, url
, params
, query
, fragment
= data
230 url
= "%s;%s" % (url
, params
)
231 return urlunsplit((scheme
, netloc
, url
, query
, fragment
))
233 def urlunsplit(data
):
234 """Combine the elements of a tuple as returned by urlsplit() into a
235 complete URL as a string. The data argument can be any five-item iterable.
236 This may result in a slightly different, but equivalent URL, if the URL that
237 was parsed originally had unnecessary delimiters (for example, a ? with an
238 empty query; the RFC states that these are equivalent)."""
239 scheme
, netloc
, url
, query
, fragment
= data
240 if netloc
or (scheme
and scheme
in uses_netloc
and url
[:2] != '//'):
241 if url
and url
[:1] != '/': url
= '/' + url
242 url
= '//' + (netloc
or '') + url
244 url
= scheme
+ ':' + url
246 url
= url
+ '?' + query
248 url
= url
+ '#' + fragment
251 def urljoin(base
, url
, allow_fragments
=True):
252 """Join a base URL and a possibly relative URL to form an absolute
253 interpretation of the latter."""
258 bscheme
, bnetloc
, bpath
, bparams
, bquery
, bfragment
= \
259 urlparse(base
, '', allow_fragments
)
260 scheme
, netloc
, path
, params
, query
, fragment
= \
261 urlparse(url
, bscheme
, allow_fragments
)
262 if scheme
!= bscheme
or scheme
not in uses_relative
:
264 if scheme
in uses_netloc
:
266 return urlunparse((scheme
, netloc
, path
,
267 params
, query
, fragment
))
270 return urlunparse((scheme
, netloc
, path
,
271 params
, query
, fragment
))
272 if not path
and not params
:
277 return urlunparse((scheme
, netloc
, path
,
278 params
, query
, fragment
))
279 segments
= bpath
.split('/')[:-1] + path
.split('/')
280 # XXX The stuff below is bogus in various ways...
281 if segments
[-1] == '.':
283 while '.' in segments
:
287 n
= len(segments
) - 1
289 if (segments
[i
] == '..'
290 and segments
[i
-1] not in ('', '..')):
291 del segments
[i
-1:i
+1]
296 if segments
== ['', '..']:
298 elif len(segments
) >= 2 and segments
[-1] == '..':
300 return urlunparse((scheme
, netloc
, '/'.join(segments
),
301 params
, query
, fragment
))
304 """Removes any existing fragment from URL.
306 Returns a tuple of the defragmented URL and the fragment. If
307 the URL contained no fragments, the second element is the
311 s
, n
, p
, a
, q
, frag
= urlparse(url
)
312 defrag
= urlunparse((s
, n
, p
, a
, q
, ''))
324 return isinstance(x
, unicode)
326 # unquote method for parse_qs and parse_qsl
327 # Cannot use directly from urllib as it would create a circular reference
328 # because urllib uses urlparse methods (urljoin). If you update this function,
329 # update it also in urllib. This code duplication does not existin in Python3.
331 _hexdig
= '0123456789ABCDEFabcdef'
332 _hextochr
= dict((a
+b
, chr(int(a
+b
,16)))
333 for a
in _hexdig
for b
in _hexdig
)
334 _asciire
= re
.compile('([\x00-\x7f]+)')
337 """unquote('abc%20def') -> 'abc def'."""
341 bits
= _asciire
.split(s
)
344 for i
in range(1, len(bits
), 2):
345 append(unquote(str(bits
[i
])).decode('latin1'))
355 for item
in bits
[1:]:
357 append(_hextochr
[item
[:2]])
364 def parse_qs(qs
, keep_blank_values
=0, strict_parsing
=0):
365 """Parse a query given as a string argument.
369 qs: percent-encoded query string to be parsed
371 keep_blank_values: flag indicating whether blank values in
372 percent-encoded queries should be treated as blank strings.
373 A true value indicates that blanks should be retained as
374 blank strings. The default false value indicates that
375 blank values are to be ignored and treated as if they were
378 strict_parsing: flag indicating what to do with parsing errors.
379 If false (the default), errors are silently ignored.
380 If true, errors raise a ValueError exception.
383 for name
, value
in parse_qsl(qs
, keep_blank_values
, strict_parsing
):
385 dict[name
].append(value
)
390 def parse_qsl(qs
, keep_blank_values
=0, strict_parsing
=0):
391 """Parse a query given as a string argument.
395 qs: percent-encoded query string to be parsed
397 keep_blank_values: flag indicating whether blank values in
398 percent-encoded queries should be treated as blank strings. A
399 true value indicates that blanks should be retained as blank
400 strings. The default false value indicates that blank values
401 are to be ignored and treated as if they were not included.
403 strict_parsing: flag indicating what to do with parsing errors. If
404 false (the default), errors are silently ignored. If true,
405 errors raise a ValueError exception.
407 Returns a list, as G-d intended.
409 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
411 for name_value
in pairs
:
412 if not name_value
and not strict_parsing
:
414 nv
= name_value
.split('=', 1)
417 raise ValueError, "bad query field: %r" % (name_value
,)
418 # Handle case of a control-name with no equal sign
419 if keep_blank_values
:
423 if len(nv
[1]) or keep_blank_values
:
424 name
= unquote(nv
[0].replace('+', ' '))
425 value
= unquote(nv
[1].replace('+', ' '))
426 r
.append((name
, value
))