]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Lib/urlparse.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / urlparse.py
CommitLineData
3257aa99
DM
1"""Parse (absolute and relative) URLs.\r
2\r
3urlparse module is based upon the following RFC specifications.\r
4\r
5RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding\r
6and L. Masinter, January 2005.\r
7\r
8RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter\r
9and L.Masinter, December 1999.\r
10\r
11RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.\r
12Berners-Lee, R. Fielding, and L. Masinter, August 1998.\r
13\r
14RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998.\r
15\r
16RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June\r
171995.\r
18\r
19RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.\r
20McCahill, December 1994\r
21\r
22RFC 3986 is considered the current standard and any future changes to\r
23urlparse module should conform with it. The urlparse module is\r
24currently not entirely compliant with this RFC due to defacto\r
25scenarios for parsing, and for backward compatibility purposes, some\r
26parsing quirks from older RFCs are retained. The testcases in\r
27test_urlparse.py provides a good indicator of parsing behavior.\r
28\r
29"""\r
30\r
31import re\r
32\r
33__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",\r
34 "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]\r
35\r
36# A classification of schemes ('' means apply by default)\r
37uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',\r
38 'wais', 'file', 'https', 'shttp', 'mms',\r
39 'prospero', 'rtsp', 'rtspu', '', 'sftp',\r
40 'svn', 'svn+ssh']\r
41uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',\r
42 'imap', 'wais', 'file', 'mms', 'https', 'shttp',\r
43 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',\r
44 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh']\r
45uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',\r
46 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',\r
47 'mms', '', 'sftp', 'tel']\r
48\r
49# These are not actually used anymore, but should stay for backwards\r
50# compatibility. (They are undocumented, but have a public-looking name.)\r
51non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',\r
52 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']\r
53uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',\r
54 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']\r
55uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',\r
56 'nntp', 'wais', 'https', 'shttp', 'snews',\r
57 'file', 'prospero', '']\r
58\r
59# Characters valid in scheme names\r
60scheme_chars = ('abcdefghijklmnopqrstuvwxyz'\r
61 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\r
62 '0123456789'\r
63 '+-.')\r
64\r
65MAX_CACHE_SIZE = 20\r
66_parse_cache = {}\r
67\r
68def clear_cache():\r
69 """Clear the parse cache."""\r
70 _parse_cache.clear()\r
71\r
72\r
73class ResultMixin(object):\r
74 """Shared methods for the parsed result objects."""\r
75\r
76 @property\r
77 def username(self):\r
78 netloc = self.netloc\r
79 if "@" in netloc:\r
80 userinfo = netloc.rsplit("@", 1)[0]\r
81 if ":" in userinfo:\r
82 userinfo = userinfo.split(":", 1)[0]\r
83 return userinfo\r
84 return None\r
85\r
86 @property\r
87 def password(self):\r
88 netloc = self.netloc\r
89 if "@" in netloc:\r
90 userinfo = netloc.rsplit("@", 1)[0]\r
91 if ":" in userinfo:\r
92 return userinfo.split(":", 1)[1]\r
93 return None\r
94\r
95 @property\r
96 def hostname(self):\r
97 netloc = self.netloc.split('@')[-1]\r
98 if '[' in netloc and ']' in netloc:\r
99 return netloc.split(']')[0][1:].lower()\r
100 elif ':' in netloc:\r
101 return netloc.split(':')[0].lower()\r
102 elif netloc == '':\r
103 return None\r
104 else:\r
105 return netloc.lower()\r
106\r
107 @property\r
108 def port(self):\r
109 netloc = self.netloc.split('@')[-1].split(']')[-1]\r
110 if ':' in netloc:\r
111 port = netloc.split(':')[1]\r
112 if port:\r
113 port = int(port, 10)\r
114 # verify legal port\r
115 if (0 <= port <= 65535):\r
116 return port\r
117 return None\r
118\r
119from collections import namedtuple\r
120\r
121class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):\r
122\r
123 __slots__ = ()\r
124\r
125 def geturl(self):\r
126 return urlunsplit(self)\r
127\r
128\r
129class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):\r
130\r
131 __slots__ = ()\r
132\r
133 def geturl(self):\r
134 return urlunparse(self)\r
135\r
136\r
137def urlparse(url, scheme='', allow_fragments=True):\r
138 """Parse a URL into 6 components:\r
139 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>\r
140 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).\r
141 Note that we don't break the components up in smaller bits\r
142 (e.g. netloc is a single string) and we don't expand % escapes."""\r
143 tuple = urlsplit(url, scheme, allow_fragments)\r
144 scheme, netloc, url, query, fragment = tuple\r
145 if scheme in uses_params and ';' in url:\r
146 url, params = _splitparams(url)\r
147 else:\r
148 params = ''\r
149 return ParseResult(scheme, netloc, url, params, query, fragment)\r
150\r
151def _splitparams(url):\r
152 if '/' in url:\r
153 i = url.find(';', url.rfind('/'))\r
154 if i < 0:\r
155 return url, ''\r
156 else:\r
157 i = url.find(';')\r
158 return url[:i], url[i+1:]\r
159\r
160def _splitnetloc(url, start=0):\r
161 delim = len(url) # position of end of domain part of url, default is end\r
162 for c in '/?#': # look for delimiters; the order is NOT important\r
163 wdelim = url.find(c, start) # find first of this delim\r
164 if wdelim >= 0: # if found\r
165 delim = min(delim, wdelim) # use earliest delim position\r
166 return url[start:delim], url[delim:] # return (domain, rest)\r
167\r
168def urlsplit(url, scheme='', allow_fragments=True):\r
169 """Parse a URL into 5 components:\r
170 <scheme>://<netloc>/<path>?<query>#<fragment>\r
171 Return a 5-tuple: (scheme, netloc, path, query, fragment).\r
172 Note that we don't break the components up in smaller bits\r
173 (e.g. netloc is a single string) and we don't expand % escapes."""\r
174 allow_fragments = bool(allow_fragments)\r
175 key = url, scheme, allow_fragments, type(url), type(scheme)\r
176 cached = _parse_cache.get(key, None)\r
177 if cached:\r
178 return cached\r
179 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth\r
180 clear_cache()\r
181 netloc = query = fragment = ''\r
182 i = url.find(':')\r
183 if i > 0:\r
184 if url[:i] == 'http': # optimize the common case\r
185 scheme = url[:i].lower()\r
186 url = url[i+1:]\r
187 if url[:2] == '//':\r
188 netloc, url = _splitnetloc(url, 2)\r
189 if (('[' in netloc and ']' not in netloc) or\r
190 (']' in netloc and '[' not in netloc)):\r
191 raise ValueError("Invalid IPv6 URL")\r
192 if allow_fragments and '#' in url:\r
193 url, fragment = url.split('#', 1)\r
194 if '?' in url:\r
195 url, query = url.split('?', 1)\r
196 v = SplitResult(scheme, netloc, url, query, fragment)\r
197 _parse_cache[key] = v\r
198 return v\r
199 for c in url[:i]:\r
200 if c not in scheme_chars:\r
201 break\r
202 else:\r
203 # make sure "url" is not actually a port number (in which case\r
204 # "scheme" is really part of the path)\r
205 rest = url[i+1:]\r
206 if not rest or any(c not in '0123456789' for c in rest):\r
207 # not a port number\r
208 scheme, url = url[:i].lower(), rest\r
209\r
210 if url[:2] == '//':\r
211 netloc, url = _splitnetloc(url, 2)\r
212 if (('[' in netloc and ']' not in netloc) or\r
213 (']' in netloc and '[' not in netloc)):\r
214 raise ValueError("Invalid IPv6 URL")\r
215 if allow_fragments and '#' in url:\r
216 url, fragment = url.split('#', 1)\r
217 if '?' in url:\r
218 url, query = url.split('?', 1)\r
219 v = SplitResult(scheme, netloc, url, query, fragment)\r
220 _parse_cache[key] = v\r
221 return v\r
222\r
223def urlunparse(data):\r
224 """Put a parsed URL back together again. This may result in a\r
225 slightly different, but equivalent URL, if the URL that was parsed\r
226 originally had redundant delimiters, e.g. a ? with an empty query\r
227 (the draft states that these are equivalent)."""\r
228 scheme, netloc, url, params, query, fragment = data\r
229 if params:\r
230 url = "%s;%s" % (url, params)\r
231 return urlunsplit((scheme, netloc, url, query, fragment))\r
232\r
233def urlunsplit(data):\r
234 """Combine the elements of a tuple as returned by urlsplit() into a\r
235 complete URL as a string. The data argument can be any five-item iterable.\r
236 This may result in a slightly different, but equivalent URL, if the URL that\r
237 was parsed originally had unnecessary delimiters (for example, a ? with an\r
238 empty query; the RFC states that these are equivalent)."""\r
239 scheme, netloc, url, query, fragment = data\r
240 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):\r
241 if url and url[:1] != '/': url = '/' + url\r
242 url = '//' + (netloc or '') + url\r
243 if scheme:\r
244 url = scheme + ':' + url\r
245 if query:\r
246 url = url + '?' + query\r
247 if fragment:\r
248 url = url + '#' + fragment\r
249 return url\r
250\r
251def urljoin(base, url, allow_fragments=True):\r
252 """Join a base URL and a possibly relative URL to form an absolute\r
253 interpretation of the latter."""\r
254 if not base:\r
255 return url\r
256 if not url:\r
257 return base\r
258 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \\r
259 urlparse(base, '', allow_fragments)\r
260 scheme, netloc, path, params, query, fragment = \\r
261 urlparse(url, bscheme, allow_fragments)\r
262 if scheme != bscheme or scheme not in uses_relative:\r
263 return url\r
264 if scheme in uses_netloc:\r
265 if netloc:\r
266 return urlunparse((scheme, netloc, path,\r
267 params, query, fragment))\r
268 netloc = bnetloc\r
269 if path[:1] == '/':\r
270 return urlunparse((scheme, netloc, path,\r
271 params, query, fragment))\r
272 if not path and not params:\r
273 path = bpath\r
274 params = bparams\r
275 if not query:\r
276 query = bquery\r
277 return urlunparse((scheme, netloc, path,\r
278 params, query, fragment))\r
279 segments = bpath.split('/')[:-1] + path.split('/')\r
280 # XXX The stuff below is bogus in various ways...\r
281 if segments[-1] == '.':\r
282 segments[-1] = ''\r
283 while '.' in segments:\r
284 segments.remove('.')\r
285 while 1:\r
286 i = 1\r
287 n = len(segments) - 1\r
288 while i < n:\r
289 if (segments[i] == '..'\r
290 and segments[i-1] not in ('', '..')):\r
291 del segments[i-1:i+1]\r
292 break\r
293 i = i+1\r
294 else:\r
295 break\r
296 if segments == ['', '..']:\r
297 segments[-1] = ''\r
298 elif len(segments) >= 2 and segments[-1] == '..':\r
299 segments[-2:] = ['']\r
300 return urlunparse((scheme, netloc, '/'.join(segments),\r
301 params, query, fragment))\r
302\r
303def urldefrag(url):\r
304 """Removes any existing fragment from URL.\r
305\r
306 Returns a tuple of the defragmented URL and the fragment. If\r
307 the URL contained no fragments, the second element is the\r
308 empty string.\r
309 """\r
310 if '#' in url:\r
311 s, n, p, a, q, frag = urlparse(url)\r
312 defrag = urlunparse((s, n, p, a, q, ''))\r
313 return defrag, frag\r
314 else:\r
315 return url, ''\r
316\r
317try:\r
318 unicode\r
319except NameError:\r
320 def _is_unicode(x):\r
321 return 0\r
322else:\r
323 def _is_unicode(x):\r
324 return isinstance(x, unicode)\r
325\r
326# unquote method for parse_qs and parse_qsl\r
327# Cannot use directly from urllib as it would create a circular reference\r
328# because urllib uses urlparse methods (urljoin). If you update this function,\r
329# update it also in urllib. This code duplication does not existin in Python3.\r
330\r
331_hexdig = '0123456789ABCDEFabcdef'\r
332_hextochr = dict((a+b, chr(int(a+b,16)))\r
333 for a in _hexdig for b in _hexdig)\r
334_asciire = re.compile('([\x00-\x7f]+)')\r
335\r
336def unquote(s):\r
337 """unquote('abc%20def') -> 'abc def'."""\r
338 if _is_unicode(s):\r
339 if '%' not in s:\r
340 return s\r
341 bits = _asciire.split(s)\r
342 res = [bits[0]]\r
343 append = res.append\r
344 for i in range(1, len(bits), 2):\r
345 append(unquote(str(bits[i])).decode('latin1'))\r
346 append(bits[i + 1])\r
347 return ''.join(res)\r
348\r
349 bits = s.split('%')\r
350 # fastpath\r
351 if len(bits) == 1:\r
352 return s\r
353 res = [bits[0]]\r
354 append = res.append\r
355 for item in bits[1:]:\r
356 try:\r
357 append(_hextochr[item[:2]])\r
358 append(item[2:])\r
359 except KeyError:\r
360 append('%')\r
361 append(item)\r
362 return ''.join(res)\r
363\r
364def parse_qs(qs, keep_blank_values=0, strict_parsing=0):\r
365 """Parse a query given as a string argument.\r
366\r
367 Arguments:\r
368\r
369 qs: percent-encoded query string to be parsed\r
370\r
371 keep_blank_values: flag indicating whether blank values in\r
372 percent-encoded queries should be treated as blank strings.\r
373 A true value indicates that blanks should be retained as\r
374 blank strings. The default false value indicates that\r
375 blank values are to be ignored and treated as if they were\r
376 not included.\r
377\r
378 strict_parsing: flag indicating what to do with parsing errors.\r
379 If false (the default), errors are silently ignored.\r
380 If true, errors raise a ValueError exception.\r
381 """\r
382 dict = {}\r
383 for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):\r
384 if name in dict:\r
385 dict[name].append(value)\r
386 else:\r
387 dict[name] = [value]\r
388 return dict\r
389\r
390def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):\r
391 """Parse a query given as a string argument.\r
392\r
393 Arguments:\r
394\r
395 qs: percent-encoded query string to be parsed\r
396\r
397 keep_blank_values: flag indicating whether blank values in\r
398 percent-encoded queries should be treated as blank strings. A\r
399 true value indicates that blanks should be retained as blank\r
400 strings. The default false value indicates that blank values\r
401 are to be ignored and treated as if they were not included.\r
402\r
403 strict_parsing: flag indicating what to do with parsing errors. If\r
404 false (the default), errors are silently ignored. If true,\r
405 errors raise a ValueError exception.\r
406\r
407 Returns a list, as G-d intended.\r
408 """\r
409 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]\r
410 r = []\r
411 for name_value in pairs:\r
412 if not name_value and not strict_parsing:\r
413 continue\r
414 nv = name_value.split('=', 1)\r
415 if len(nv) != 2:\r
416 if strict_parsing:\r
417 raise ValueError, "bad query field: %r" % (name_value,)\r
418 # Handle case of a control-name with no equal sign\r
419 if keep_blank_values:\r
420 nv.append('')\r
421 else:\r
422 continue\r
423 if len(nv[1]) or keep_blank_values:\r
424 name = unquote(nv[0].replace('+', ' '))\r
425 value = unquote(nv[1].replace('+', ' '))\r
426 r.append((name, value))\r
427\r
428 return r\r