]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.10/Lib/urlparse.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / urlparse.py
1 """Parse (absolute and relative) URLs.
2
3 urlparse module is based upon the following RFC specifications.
4
5 RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
6 and L. Masinter, January 2005.
7
8 RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
9 and L.Masinter, December 1999.
10
11 RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
12 Berners-Lee, R. Fielding, and L. Masinter, August 1998.
13
14 RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998.
15
16 RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
17 1995.
18
19 RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
20 McCahill, December 1994
21
22 RFC 3986 is considered the current standard and any future changes to
23 urlparse module should conform with it. The urlparse module is
24 currently not entirely compliant with this RFC due to defacto
25 scenarios for parsing, and for backward compatibility purposes, some
26 parsing quirks from older RFCs are retained. The testcases in
27 test_urlparse.py provides a good indicator of parsing behavior.
28
29 """
30
31 import re
32
33 __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
34 "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
35
36 # A classification of schemes ('' means apply by default)
37 uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
38 'wais', 'file', 'https', 'shttp', 'mms',
39 'prospero', 'rtsp', 'rtspu', '', 'sftp',
40 'svn', 'svn+ssh']
41 uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
42 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
43 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
44 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh']
45 uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
46 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
47 'mms', '', 'sftp', 'tel']
48
49 # These are not actually used anymore, but should stay for backwards
50 # compatibility. (They are undocumented, but have a public-looking name.)
51 non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
52 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
53 uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
54 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
55 uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
56 'nntp', 'wais', 'https', 'shttp', 'snews',
57 'file', 'prospero', '']
58
59 # Characters valid in scheme names
60 scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
61 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
62 '0123456789'
63 '+-.')
64
65 MAX_CACHE_SIZE = 20
66 _parse_cache = {}
67
68 def clear_cache():
69 """Clear the parse cache."""
70 _parse_cache.clear()
71
72
73 class ResultMixin(object):
74 """Shared methods for the parsed result objects."""
75
76 @property
77 def username(self):
78 netloc = self.netloc
79 if "@" in netloc:
80 userinfo = netloc.rsplit("@", 1)[0]
81 if ":" in userinfo:
82 userinfo = userinfo.split(":", 1)[0]
83 return userinfo
84 return None
85
86 @property
87 def password(self):
88 netloc = self.netloc
89 if "@" in netloc:
90 userinfo = netloc.rsplit("@", 1)[0]
91 if ":" in userinfo:
92 return userinfo.split(":", 1)[1]
93 return None
94
95 @property
96 def hostname(self):
97 netloc = self.netloc.split('@')[-1]
98 if '[' in netloc and ']' in netloc:
99 return netloc.split(']')[0][1:].lower()
100 elif ':' in netloc:
101 return netloc.split(':')[0].lower()
102 elif netloc == '':
103 return None
104 else:
105 return netloc.lower()
106
107 @property
108 def port(self):
109 netloc = self.netloc.split('@')[-1].split(']')[-1]
110 if ':' in netloc:
111 port = netloc.split(':')[1]
112 if port:
113 port = int(port, 10)
114 # verify legal port
115 if (0 <= port <= 65535):
116 return port
117 return None
118
119 from collections import namedtuple
120
121 class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
122
123 __slots__ = ()
124
125 def geturl(self):
126 return urlunsplit(self)
127
128
129 class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
130
131 __slots__ = ()
132
133 def geturl(self):
134 return urlunparse(self)
135
136
137 def urlparse(url, scheme='', allow_fragments=True):
138 """Parse a URL into 6 components:
139 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
140 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
141 Note that we don't break the components up in smaller bits
142 (e.g. netloc is a single string) and we don't expand % escapes."""
143 tuple = urlsplit(url, scheme, allow_fragments)
144 scheme, netloc, url, query, fragment = tuple
145 if scheme in uses_params and ';' in url:
146 url, params = _splitparams(url)
147 else:
148 params = ''
149 return ParseResult(scheme, netloc, url, params, query, fragment)
150
151 def _splitparams(url):
152 if '/' in url:
153 i = url.find(';', url.rfind('/'))
154 if i < 0:
155 return url, ''
156 else:
157 i = url.find(';')
158 return url[:i], url[i+1:]
159
160 def _splitnetloc(url, start=0):
161 delim = len(url) # position of end of domain part of url, default is end
162 for c in '/?#': # look for delimiters; the order is NOT important
163 wdelim = url.find(c, start) # find first of this delim
164 if wdelim >= 0: # if found
165 delim = min(delim, wdelim) # use earliest delim position
166 return url[start:delim], url[delim:] # return (domain, rest)
167
168 def urlsplit(url, scheme='', allow_fragments=True):
169 """Parse a URL into 5 components:
170 <scheme>://<netloc>/<path>?<query>#<fragment>
171 Return a 5-tuple: (scheme, netloc, path, query, fragment).
172 Note that we don't break the components up in smaller bits
173 (e.g. netloc is a single string) and we don't expand % escapes."""
174 allow_fragments = bool(allow_fragments)
175 key = url, scheme, allow_fragments, type(url), type(scheme)
176 cached = _parse_cache.get(key, None)
177 if cached:
178 return cached
179 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
180 clear_cache()
181 netloc = query = fragment = ''
182 i = url.find(':')
183 if i > 0:
184 if url[:i] == 'http': # optimize the common case
185 scheme = url[:i].lower()
186 url = url[i+1:]
187 if url[:2] == '//':
188 netloc, url = _splitnetloc(url, 2)
189 if (('[' in netloc and ']' not in netloc) or
190 (']' in netloc and '[' not in netloc)):
191 raise ValueError("Invalid IPv6 URL")
192 if allow_fragments and '#' in url:
193 url, fragment = url.split('#', 1)
194 if '?' in url:
195 url, query = url.split('?', 1)
196 v = SplitResult(scheme, netloc, url, query, fragment)
197 _parse_cache[key] = v
198 return v
199 for c in url[:i]:
200 if c not in scheme_chars:
201 break
202 else:
203 # make sure "url" is not actually a port number (in which case
204 # "scheme" is really part of the path)
205 rest = url[i+1:]
206 if not rest or any(c not in '0123456789' for c in rest):
207 # not a port number
208 scheme, url = url[:i].lower(), rest
209
210 if url[:2] == '//':
211 netloc, url = _splitnetloc(url, 2)
212 if (('[' in netloc and ']' not in netloc) or
213 (']' in netloc and '[' not in netloc)):
214 raise ValueError("Invalid IPv6 URL")
215 if allow_fragments and '#' in url:
216 url, fragment = url.split('#', 1)
217 if '?' in url:
218 url, query = url.split('?', 1)
219 v = SplitResult(scheme, netloc, url, query, fragment)
220 _parse_cache[key] = v
221 return v
222
223 def urlunparse(data):
224 """Put a parsed URL back together again. This may result in a
225 slightly different, but equivalent URL, if the URL that was parsed
226 originally had redundant delimiters, e.g. a ? with an empty query
227 (the draft states that these are equivalent)."""
228 scheme, netloc, url, params, query, fragment = data
229 if params:
230 url = "%s;%s" % (url, params)
231 return urlunsplit((scheme, netloc, url, query, fragment))
232
233 def urlunsplit(data):
234 """Combine the elements of a tuple as returned by urlsplit() into a
235 complete URL as a string. The data argument can be any five-item iterable.
236 This may result in a slightly different, but equivalent URL, if the URL that
237 was parsed originally had unnecessary delimiters (for example, a ? with an
238 empty query; the RFC states that these are equivalent)."""
239 scheme, netloc, url, query, fragment = data
240 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
241 if url and url[:1] != '/': url = '/' + url
242 url = '//' + (netloc or '') + url
243 if scheme:
244 url = scheme + ':' + url
245 if query:
246 url = url + '?' + query
247 if fragment:
248 url = url + '#' + fragment
249 return url
250
251 def urljoin(base, url, allow_fragments=True):
252 """Join a base URL and a possibly relative URL to form an absolute
253 interpretation of the latter."""
254 if not base:
255 return url
256 if not url:
257 return base
258 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
259 urlparse(base, '', allow_fragments)
260 scheme, netloc, path, params, query, fragment = \
261 urlparse(url, bscheme, allow_fragments)
262 if scheme != bscheme or scheme not in uses_relative:
263 return url
264 if scheme in uses_netloc:
265 if netloc:
266 return urlunparse((scheme, netloc, path,
267 params, query, fragment))
268 netloc = bnetloc
269 if path[:1] == '/':
270 return urlunparse((scheme, netloc, path,
271 params, query, fragment))
272 if not path and not params:
273 path = bpath
274 params = bparams
275 if not query:
276 query = bquery
277 return urlunparse((scheme, netloc, path,
278 params, query, fragment))
279 segments = bpath.split('/')[:-1] + path.split('/')
280 # XXX The stuff below is bogus in various ways...
281 if segments[-1] == '.':
282 segments[-1] = ''
283 while '.' in segments:
284 segments.remove('.')
285 while 1:
286 i = 1
287 n = len(segments) - 1
288 while i < n:
289 if (segments[i] == '..'
290 and segments[i-1] not in ('', '..')):
291 del segments[i-1:i+1]
292 break
293 i = i+1
294 else:
295 break
296 if segments == ['', '..']:
297 segments[-1] = ''
298 elif len(segments) >= 2 and segments[-1] == '..':
299 segments[-2:] = ['']
300 return urlunparse((scheme, netloc, '/'.join(segments),
301 params, query, fragment))
302
303 def urldefrag(url):
304 """Removes any existing fragment from URL.
305
306 Returns a tuple of the defragmented URL and the fragment. If
307 the URL contained no fragments, the second element is the
308 empty string.
309 """
310 if '#' in url:
311 s, n, p, a, q, frag = urlparse(url)
312 defrag = urlunparse((s, n, p, a, q, ''))
313 return defrag, frag
314 else:
315 return url, ''
316
317 try:
318 unicode
319 except NameError:
320 def _is_unicode(x):
321 return 0
322 else:
323 def _is_unicode(x):
324 return isinstance(x, unicode)
325
326 # unquote method for parse_qs and parse_qsl
327 # Cannot use directly from urllib as it would create a circular reference
328 # because urllib uses urlparse methods (urljoin). If you update this function,
329 # update it also in urllib. This code duplication does not existin in Python3.
330
331 _hexdig = '0123456789ABCDEFabcdef'
332 _hextochr = dict((a+b, chr(int(a+b,16)))
333 for a in _hexdig for b in _hexdig)
334 _asciire = re.compile('([\x00-\x7f]+)')
335
336 def unquote(s):
337 """unquote('abc%20def') -> 'abc def'."""
338 if _is_unicode(s):
339 if '%' not in s:
340 return s
341 bits = _asciire.split(s)
342 res = [bits[0]]
343 append = res.append
344 for i in range(1, len(bits), 2):
345 append(unquote(str(bits[i])).decode('latin1'))
346 append(bits[i + 1])
347 return ''.join(res)
348
349 bits = s.split('%')
350 # fastpath
351 if len(bits) == 1:
352 return s
353 res = [bits[0]]
354 append = res.append
355 for item in bits[1:]:
356 try:
357 append(_hextochr[item[:2]])
358 append(item[2:])
359 except KeyError:
360 append('%')
361 append(item)
362 return ''.join(res)
363
364 def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
365 """Parse a query given as a string argument.
366
367 Arguments:
368
369 qs: percent-encoded query string to be parsed
370
371 keep_blank_values: flag indicating whether blank values in
372 percent-encoded queries should be treated as blank strings.
373 A true value indicates that blanks should be retained as
374 blank strings. The default false value indicates that
375 blank values are to be ignored and treated as if they were
376 not included.
377
378 strict_parsing: flag indicating what to do with parsing errors.
379 If false (the default), errors are silently ignored.
380 If true, errors raise a ValueError exception.
381 """
382 dict = {}
383 for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
384 if name in dict:
385 dict[name].append(value)
386 else:
387 dict[name] = [value]
388 return dict
389
390 def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
391 """Parse a query given as a string argument.
392
393 Arguments:
394
395 qs: percent-encoded query string to be parsed
396
397 keep_blank_values: flag indicating whether blank values in
398 percent-encoded queries should be treated as blank strings. A
399 true value indicates that blanks should be retained as blank
400 strings. The default false value indicates that blank values
401 are to be ignored and treated as if they were not included.
402
403 strict_parsing: flag indicating what to do with parsing errors. If
404 false (the default), errors are silently ignored. If true,
405 errors raise a ValueError exception.
406
407 Returns a list, as G-d intended.
408 """
409 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
410 r = []
411 for name_value in pairs:
412 if not name_value and not strict_parsing:
413 continue
414 nv = name_value.split('=', 1)
415 if len(nv) != 2:
416 if strict_parsing:
417 raise ValueError, "bad query field: %r" % (name_value,)
418 # Handle case of a control-name with no equal sign
419 if keep_blank_values:
420 nv.append('')
421 else:
422 continue
423 if len(nv[1]) or keep_blank_values:
424 name = unquote(nv[0].replace('+', ' '))
425 value = unquote(nv[1].replace('+', ' '))
426 r.append((name, value))
427
428 return r