[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / urlparse.py

"""Parse (absolute and relative) URLs.\r
\r
urlparse module is based upon the following RFC specifications.\r
\r
RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding\r
and L.  Masinter, January 2005.\r
\r
RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter\r
and L.Masinter, December 1999.\r
\r
RFC 2396:  "Uniform Resource Identifiers (URI)": Generic Syntax by T.\r
Berners-Lee, R. Fielding, and L. Masinter, August 1998.\r
\r
RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998.\r
\r
RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June\r
1995.\r
\r
RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.\r
McCahill, December 1994\r
\r
RFC 3986 is considered the current standard and any future changes to\r
urlparse module should conform with it.  The urlparse module is\r
currently not entirely compliant with this RFC due to defacto\r
scenarios for parsing, and for backward compatibility purposes, some\r
parsing quirks from older RFCs are retained. The testcases in\r
test_urlparse.py provides a good indicator of parsing behavior.\r
\r
"""\r
\r
import re\r
\r
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",\r
           "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]\r
\r
# A classification of schemes ('' means apply by default)\r
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',\r
                 'wais', 'file', 'https', 'shttp', 'mms',\r
                 'prospero', 'rtsp', 'rtspu', '', 'sftp',\r
                 'svn', 'svn+ssh']\r
uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',\r
               'imap', 'wais', 'file', 'mms', 'https', 'shttp',\r
               'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',\r
               'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh']\r
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',\r
               'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',\r
               'mms', '', 'sftp', 'tel']\r
\r
# These are not actually used anymore, but should stay for backwards\r
# compatibility.  (They are undocumented, but have a public-looking name.)\r
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',\r
                    'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']\r
uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',\r
              'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']\r
uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',\r
                 'nntp', 'wais', 'https', 'shttp', 'snews',\r
                 'file', 'prospero', '']\r
\r
# Characters valid in scheme names\r
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'\r
                'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\r
                '0123456789'\r
                '+-.')\r
\r
MAX_CACHE_SIZE = 20\r
_parse_cache = {}\r
\r
def clear_cache():\r
    """Clear the parse cache."""\r
    _parse_cache.clear()\r
\r
\r
class ResultMixin(object):\r
    """Shared methods for the parsed result objects."""\r
\r
    @property\r
    def username(self):\r
        netloc = self.netloc\r
        if "@" in netloc:\r
            userinfo = netloc.rsplit("@", 1)[0]\r
            if ":" in userinfo:\r
                userinfo = userinfo.split(":", 1)[0]\r
            return userinfo\r
        return None\r
\r
    @property\r
    def password(self):\r
        netloc = self.netloc\r
        if "@" in netloc:\r
            userinfo = netloc.rsplit("@", 1)[0]\r
            if ":" in userinfo:\r
                return userinfo.split(":", 1)[1]\r
        return None\r
\r
    @property\r
    def hostname(self):\r
        netloc = self.netloc.split('@')[-1]\r
        if '[' in netloc and ']' in netloc:\r
            return netloc.split(']')[0][1:].lower()\r
        elif ':' in netloc:\r
            return netloc.split(':')[0].lower()\r
        elif netloc == '':\r
            return None\r
        else:\r
            return netloc.lower()\r
\r
    @property\r
    def port(self):\r
        netloc = self.netloc.split('@')[-1].split(']')[-1]\r
        if ':' in netloc:\r
            port = netloc.split(':')[1]\r
            if port:\r
                port = int(port, 10)\r
                # verify legal port\r
                if (0 <= port <= 65535):\r
                    return port\r
        return None\r
\r
from collections import namedtuple\r
\r
class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):\r
\r
    __slots__ = ()\r
\r
    def geturl(self):\r
        return urlunsplit(self)\r
\r
\r
class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):\r
\r
    __slots__ = ()\r
\r
    def geturl(self):\r
        return urlunparse(self)\r
\r
\r
def urlparse(url, scheme='', allow_fragments=True):\r
    """Parse a URL into 6 components:\r
    <scheme>://<netloc>/<path>;<params>?<query>#<fragment>\r
    Return a 6-tuple: (scheme, netloc, path, params, query, fragment).\r
    Note that we don't break the components up in smaller bits\r
    (e.g. netloc is a single string) and we don't expand % escapes."""\r
    tuple = urlsplit(url, scheme, allow_fragments)\r
    scheme, netloc, url, query, fragment = tuple\r
    if scheme in uses_params and ';' in url:\r
        url, params = _splitparams(url)\r
    else:\r
        params = ''\r
    return ParseResult(scheme, netloc, url, params, query, fragment)\r
\r
def _splitparams(url):\r
    if '/'  in url:\r
        i = url.find(';', url.rfind('/'))\r
        if i < 0:\r
            return url, ''\r
    else:\r
        i = url.find(';')\r
    return url[:i], url[i+1:]\r
\r
def _splitnetloc(url, start=0):\r
    delim = len(url)   # position of end of domain part of url, default is end\r
    for c in '/?#':    # look for delimiters; the order is NOT important\r
        wdelim = url.find(c, start)        # find first of this delim\r
        if wdelim >= 0:                    # if found\r
            delim = min(delim, wdelim)     # use earliest delim position\r
    return url[start:delim], url[delim:]   # return (domain, rest)\r
\r
def urlsplit(url, scheme='', allow_fragments=True):\r
    """Parse a URL into 5 components:\r
    <scheme>://<netloc>/<path>?<query>#<fragment>\r
    Return a 5-tuple: (scheme, netloc, path, query, fragment).\r
    Note that we don't break the components up in smaller bits\r
    (e.g. netloc is a single string) and we don't expand % escapes."""\r
    allow_fragments = bool(allow_fragments)\r
    key = url, scheme, allow_fragments, type(url), type(scheme)\r
    cached = _parse_cache.get(key, None)\r
    if cached:\r
        return cached\r
    if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth\r
        clear_cache()\r
    netloc = query = fragment = ''\r
    i = url.find(':')\r
    if i > 0:\r
        if url[:i] == 'http': # optimize the common case\r
            scheme = url[:i].lower()\r
            url = url[i+1:]\r
            if url[:2] == '//':\r
                netloc, url = _splitnetloc(url, 2)\r
                if (('[' in netloc and ']' not in netloc) or\r
                        (']' in netloc and '[' not in netloc)):\r
                    raise ValueError("Invalid IPv6 URL")\r
            if allow_fragments and '#' in url:\r
                url, fragment = url.split('#', 1)\r
            if '?' in url:\r
                url, query = url.split('?', 1)\r
            v = SplitResult(scheme, netloc, url, query, fragment)\r
            _parse_cache[key] = v\r
            return v\r
        for c in url[:i]:\r
            if c not in scheme_chars:\r
                break\r
        else:\r
            # make sure "url" is not actually a port number (in which case\r
            # "scheme" is really part of the path)\r
            rest = url[i+1:]\r
            if not rest or any(c not in '0123456789' for c in rest):\r
                # not a port number\r
                scheme, url = url[:i].lower(), rest\r
\r
    if url[:2] == '//':\r
        netloc, url = _splitnetloc(url, 2)\r
        if (('[' in netloc and ']' not in netloc) or\r
                (']' in netloc and '[' not in netloc)):\r
            raise ValueError("Invalid IPv6 URL")\r
    if allow_fragments and '#' in url:\r
        url, fragment = url.split('#', 1)\r
    if '?' in url:\r
        url, query = url.split('?', 1)\r
    v = SplitResult(scheme, netloc, url, query, fragment)\r
    _parse_cache[key] = v\r
    return v\r
\r
def urlunparse(data):\r
    """Put a parsed URL back together again.  This may result in a\r
    slightly different, but equivalent URL, if the URL that was parsed\r
    originally had redundant delimiters, e.g. a ? with an empty query\r
    (the draft states that these are equivalent)."""\r
    scheme, netloc, url, params, query, fragment = data\r
    if params:\r
        url = "%s;%s" % (url, params)\r
    return urlunsplit((scheme, netloc, url, query, fragment))\r
\r
def urlunsplit(data):\r
    """Combine the elements of a tuple as returned by urlsplit() into a\r
    complete URL as a string. The data argument can be any five-item iterable.\r
    This may result in a slightly different, but equivalent URL, if the URL that\r
    was parsed originally had unnecessary delimiters (for example, a ? with an\r
    empty query; the RFC states that these are equivalent)."""\r
    scheme, netloc, url, query, fragment = data\r
    if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):\r
        if url and url[:1] != '/': url = '/' + url\r
        url = '//' + (netloc or '') + url\r
    if scheme:\r
        url = scheme + ':' + url\r
    if query:\r
        url = url + '?' + query\r
    if fragment:\r
        url = url + '#' + fragment\r
    return url\r
\r
def urljoin(base, url, allow_fragments=True):\r
    """Join a base URL and a possibly relative URL to form an absolute\r
    interpretation of the latter."""\r
    if not base:\r
        return url\r
    if not url:\r
        return base\r
    bscheme, bnetloc, bpath, bparams, bquery, bfragment = \\r
            urlparse(base, '', allow_fragments)\r
    scheme, netloc, path, params, query, fragment = \\r
            urlparse(url, bscheme, allow_fragments)\r
    if scheme != bscheme or scheme not in uses_relative:\r
        return url\r
    if scheme in uses_netloc:\r
        if netloc:\r
            return urlunparse((scheme, netloc, path,\r
                               params, query, fragment))\r
        netloc = bnetloc\r
    if path[:1] == '/':\r
        return urlunparse((scheme, netloc, path,\r
                           params, query, fragment))\r
    if not path and not params:\r
        path = bpath\r
        params = bparams\r
        if not query:\r
            query = bquery\r
        return urlunparse((scheme, netloc, path,\r
                           params, query, fragment))\r
    segments = bpath.split('/')[:-1] + path.split('/')\r
    # XXX The stuff below is bogus in various ways...\r
    if segments[-1] == '.':\r
        segments[-1] = ''\r
    while '.' in segments:\r
        segments.remove('.')\r
    while 1:\r
        i = 1\r
        n = len(segments) - 1\r
        while i < n:\r
            if (segments[i] == '..'\r
                and segments[i-1] not in ('', '..')):\r
                del segments[i-1:i+1]\r
                break\r
            i = i+1\r
        else:\r
            break\r
    if segments == ['', '..']:\r
        segments[-1] = ''\r
    elif len(segments) >= 2 and segments[-1] == '..':\r
        segments[-2:] = ['']\r
    return urlunparse((scheme, netloc, '/'.join(segments),\r
                       params, query, fragment))\r
\r
def urldefrag(url):\r
    """Removes any existing fragment from URL.\r
\r
    Returns a tuple of the defragmented URL and the fragment.  If\r
    the URL contained no fragments, the second element is the\r
    empty string.\r
    """\r
    if '#' in url:\r
        s, n, p, a, q, frag = urlparse(url)\r
        defrag = urlunparse((s, n, p, a, q, ''))\r
        return defrag, frag\r
    else:\r
        return url, ''\r
\r
try:\r
    unicode\r
except NameError:\r
    def _is_unicode(x):\r
        return 0\r
else:\r
    def _is_unicode(x):\r
        return isinstance(x, unicode)\r
\r
# unquote method for parse_qs and parse_qsl\r
# Cannot use directly from urllib as it would create a circular reference\r
# because urllib uses urlparse methods (urljoin).  If you update this function,\r
# update it also in urllib.  This code duplication does not existin in Python3.\r
\r
_hexdig = '0123456789ABCDEFabcdef'\r
_hextochr = dict((a+b, chr(int(a+b,16)))\r
                 for a in _hexdig for b in _hexdig)\r
_asciire = re.compile('([\x00-\x7f]+)')\r
\r
def unquote(s):\r
    """unquote('abc%20def') -> 'abc def'."""\r
    if _is_unicode(s):\r
        if '%' not in s:\r
            return s\r
        bits = _asciire.split(s)\r
        res = [bits[0]]\r
        append = res.append\r
        for i in range(1, len(bits), 2):\r
            append(unquote(str(bits[i])).decode('latin1'))\r
            append(bits[i + 1])\r
        return ''.join(res)\r
\r
    bits = s.split('%')\r
    # fastpath\r
    if len(bits) == 1:\r
        return s\r
    res = [bits[0]]\r
    append = res.append\r
    for item in bits[1:]:\r
        try:\r
            append(_hextochr[item[:2]])\r
            append(item[2:])\r
        except KeyError:\r
            append('%')\r
            append(item)\r
    return ''.join(res)\r
\r
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):\r
    """Parse a query given as a string argument.\r
\r
        Arguments:\r
\r
        qs: percent-encoded query string to be parsed\r
\r
        keep_blank_values: flag indicating whether blank values in\r
            percent-encoded queries should be treated as blank strings.\r
            A true value indicates that blanks should be retained as\r
            blank strings.  The default false value indicates that\r
            blank values are to be ignored and treated as if they were\r
            not included.\r
\r
        strict_parsing: flag indicating what to do with parsing errors.\r
            If false (the default), errors are silently ignored.\r
            If true, errors raise a ValueError exception.\r
    """\r
    dict = {}\r
    for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):\r
        if name in dict:\r
            dict[name].append(value)\r
        else:\r
            dict[name] = [value]\r
    return dict\r
\r
def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):\r
    """Parse a query given as a string argument.\r
\r
    Arguments:\r
\r
    qs: percent-encoded query string to be parsed\r
\r
    keep_blank_values: flag indicating whether blank values in\r
        percent-encoded queries should be treated as blank strings.  A\r
        true value indicates that blanks should be retained as blank\r
        strings.  The default false value indicates that blank values\r
        are to be ignored and treated as if they were  not included.\r
\r
    strict_parsing: flag indicating what to do with parsing errors. If\r
        false (the default), errors are silently ignored. If true,\r
        errors raise a ValueError exception.\r
\r
    Returns a list, as G-d intended.\r
    """\r
    pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]\r
    r = []\r
    for name_value in pairs:\r
        if not name_value and not strict_parsing:\r
            continue\r
        nv = name_value.split('=', 1)\r
        if len(nv) != 2:\r
            if strict_parsing:\r
                raise ValueError, "bad query field: %r" % (name_value,)\r
            # Handle case of a control-name with no equal sign\r
            if keep_blank_values:\r
                nv.append('')\r
            else:\r
                continue\r
        if len(nv[1]) or keep_blank_values:\r
            name = unquote(nv[0].replace('+', ' '))\r
            value = unquote(nv[1].replace('+', ' '))\r
            r.append((name, value))\r
\r
    return r\r
Commit	Line	Data
3257aa99 DM	1	"""Parse (absolute and relative) URLs.\r
	2	\r
	3	urlparse module is based upon the following RFC specifications.\r
	4	\r
	5	RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding\r
	6	and L. Masinter, January 2005.\r
	7	\r
	8	RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter\r
	9	and L.Masinter, December 1999.\r
	10	\r
	11	RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.\r
	12	Berners-Lee, R. Fielding, and L. Masinter, August 1998.\r
	13	\r
	14	RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998.\r
	15	\r
	16	RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June\r
	17	1995.\r
	18	\r
	19	RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.\r
	20	McCahill, December 1994\r
	21	\r
	22	RFC 3986 is considered the current standard and any future changes to\r
	23	urlparse module should conform with it. The urlparse module is\r
	24	currently not entirely compliant with this RFC due to defacto\r
	25	scenarios for parsing, and for backward compatibility purposes, some\r
	26	parsing quirks from older RFCs are retained. The testcases in\r
	27	test_urlparse.py provides a good indicator of parsing behavior.\r
	28	\r
	29	"""\r
	30	\r
	31	import re\r
	32	\r
	33	__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",\r
	34	"urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]\r
	35	\r
	36	# A classification of schemes ('' means apply by default)\r
	37	uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',\r
	38	'wais', 'file', 'https', 'shttp', 'mms',\r
	39	'prospero', 'rtsp', 'rtspu', '', 'sftp',\r
	40	'svn', 'svn+ssh']\r
	41	uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',\r
	42	'imap', 'wais', 'file', 'mms', 'https', 'shttp',\r
	43	'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',\r
	44	'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh']\r
	45	uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',\r
	46	'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',\r
	47	'mms', '', 'sftp', 'tel']\r
	48	\r
	49	# These are not actually used anymore, but should stay for backwards\r
	50	# compatibility. (They are undocumented, but have a public-looking name.)\r
	51	non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',\r
	52	'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']\r
	53	uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',\r
	54	'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']\r
	55	uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',\r
	56	'nntp', 'wais', 'https', 'shttp', 'snews',\r
	57	'file', 'prospero', '']\r
	58	\r
	59	# Characters valid in scheme names\r
	60	scheme_chars = ('abcdefghijklmnopqrstuvwxyz'\r
	61	'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\r
	62	'0123456789'\r
	63	'+-.')\r
	64	\r
65	MAX_CACHE_SIZE = 20\r
66	_parse_cache = {}\r
67	\r
68	def clear_cache():\r
69	"""Clear the parse cache."""\r
70	_parse_cache.clear()\r
71	\r
72	\r
73	class ResultMixin(object):\r
74	"""Shared methods for the parsed result objects."""\r
75	\r
76	@property\r
77	def username(self):\r
78	netloc = self.netloc\r
79	if "@" in netloc:\r
80	userinfo = netloc.rsplit("@", 1)[0]\r
81	if ":" in userinfo:\r
82	userinfo = userinfo.split(":", 1)[0]\r
83	return userinfo\r
84	return None\r
85	\r
86	@property\r
87	def password(self):\r
88	netloc = self.netloc\r
89	if "@" in netloc:\r
90	userinfo = netloc.rsplit("@", 1)[0]\r
91	if ":" in userinfo:\r
92	return userinfo.split(":", 1)[1]\r
93	return None\r
94	\r
95	@property\r
96	def hostname(self):\r
97	netloc = self.netloc.split('@')[-1]\r
98	if '[' in netloc and ']' in netloc:\r
99	return netloc.split(']')[0][1:].lower()\r
100	elif ':' in netloc:\r
101	return netloc.split(':')[0].lower()\r
102	elif netloc == '':\r
103	return None\r
104	else:\r
105	return netloc.lower()\r
106	\r
107	@property\r
108	def port(self):\r
109	netloc = self.netloc.split('@')[-1].split(']')[-1]\r
110	if ':' in netloc:\r
111	port = netloc.split(':')[1]\r
112	if port:\r
113	port = int(port, 10)\r
114	# verify legal port\r
115	if (0 <= port <= 65535):\r
116	return port\r
117	return None\r
118	\r
119	from collections import namedtuple\r
120	\r
121	class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):\r
122	\r
123	__slots__ = ()\r
124	\r
125	def geturl(self):\r
126	return urlunsplit(self)\r
127	\r
128	\r
129	class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):\r
130	\r
131	__slots__ = ()\r
132	\r
133	def geturl(self):\r
134	return urlunparse(self)\r
135	\r
136	\r
137	def urlparse(url, scheme='', allow_fragments=True):\r
138	"""Parse a URL into 6 components:\r
139	<scheme>://<netloc>/<path>;<params>?<query>#<fragment>\r
140	Return a 6-tuple: (scheme, netloc, path, params, query, fragment).\r
141	Note that we don't break the components up in smaller bits\r
142	(e.g. netloc is a single string) and we don't expand % escapes."""\r
143	tuple = urlsplit(url, scheme, allow_fragments)\r
144	scheme, netloc, url, query, fragment = tuple\r
145	if scheme in uses_params and ';' in url:\r
146	url, params = _splitparams(url)\r
147	else:\r
148	params = ''\r
149	return ParseResult(scheme, netloc, url, params, query, fragment)\r
150	\r
151	def _splitparams(url):\r
152	if '/' in url:\r
153	i = url.find(';', url.rfind('/'))\r
154	if i < 0:\r
155	return url, ''\r
156	else:\r
157	i = url.find(';')\r
158	return url[:i], url[i+1:]\r
159	\r
160	def _splitnetloc(url, start=0):\r
161	delim = len(url) # position of end of domain part of url, default is end\r
162	for c in '/?#': # look for delimiters; the order is NOT important\r
163	wdelim = url.find(c, start) # find first of this delim\r
164	if wdelim >= 0: # if found\r
165	delim = min(delim, wdelim) # use earliest delim position\r
166	return url[start:delim], url[delim:] # return (domain, rest)\r
167	\r
168	def urlsplit(url, scheme='', allow_fragments=True):\r
169	"""Parse a URL into 5 components:\r
170	<scheme>://<netloc>/<path>?<query>#<fragment>\r
171	Return a 5-tuple: (scheme, netloc, path, query, fragment).\r
172	Note that we don't break the components up in smaller bits\r
173	(e.g. netloc is a single string) and we don't expand % escapes."""\r
174	allow_fragments = bool(allow_fragments)\r
175	key = url, scheme, allow_fragments, type(url), type(scheme)\r
176	cached = _parse_cache.get(key, None)\r
177	if cached:\r
178	return cached\r
179	if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth\r
180	clear_cache()\r
181	netloc = query = fragment = ''\r
182	i = url.find(':')\r
183	if i > 0:\r
184	if url[:i] == 'http': # optimize the common case\r
185	scheme = url[:i].lower()\r
186	url = url[i+1:]\r
187	if url[:2] == '//':\r
188	netloc, url = _splitnetloc(url, 2)\r
189	if (('[' in netloc and ']' not in netloc) or\r
190	(']' in netloc and '[' not in netloc)):\r
191	raise ValueError("Invalid IPv6 URL")\r
192	if allow_fragments and '#' in url:\r
193	url, fragment = url.split('#', 1)\r
194	if '?' in url:\r
195	url, query = url.split('?', 1)\r
196	v = SplitResult(scheme, netloc, url, query, fragment)\r
197	_parse_cache[key] = v\r
198	return v\r
199	for c in url[:i]:\r
200	if c not in scheme_chars:\r
201	break\r
202	else:\r
203	# make sure "url" is not actually a port number (in which case\r
204	# "scheme" is really part of the path)\r
205	rest = url[i+1:]\r
206	if not rest or any(c not in '0123456789' for c in rest):\r
207	# not a port number\r
208	scheme, url = url[:i].lower(), rest\r
209	\r
210	if url[:2] == '//':\r
211	netloc, url = _splitnetloc(url, 2)\r
212	if (('[' in netloc and ']' not in netloc) or\r
213	(']' in netloc and '[' not in netloc)):\r
214	raise ValueError("Invalid IPv6 URL")\r
215	if allow_fragments and '#' in url:\r
216	url, fragment = url.split('#', 1)\r
217	if '?' in url:\r
218	url, query = url.split('?', 1)\r
219	v = SplitResult(scheme, netloc, url, query, fragment)\r
220	_parse_cache[key] = v\r
221	return v\r
222	\r
223	def urlunparse(data):\r
224	"""Put a parsed URL back together again. This may result in a\r
225	slightly different, but equivalent URL, if the URL that was parsed\r
226	originally had redundant delimiters, e.g. a ? with an empty query\r
227	(the draft states that these are equivalent)."""\r
228	scheme, netloc, url, params, query, fragment = data\r
229	if params:\r
230	url = "%s;%s" % (url, params)\r
231	return urlunsplit((scheme, netloc, url, query, fragment))\r
232	\r
233	def urlunsplit(data):\r
234	"""Combine the elements of a tuple as returned by urlsplit() into a\r
235	complete URL as a string. The data argument can be any five-item iterable.\r
236	This may result in a slightly different, but equivalent URL, if the URL that\r
237	was parsed originally had unnecessary delimiters (for example, a ? with an\r
238	empty query; the RFC states that these are equivalent)."""\r
239	scheme, netloc, url, query, fragment = data\r
240	if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):\r
241	if url and url[:1] != '/': url = '/' + url\r
242	url = '//' + (netloc or '') + url\r
243	if scheme:\r
244	url = scheme + ':' + url\r
245	if query:\r
246	url = url + '?' + query\r
247	if fragment:\r
248	url = url + '#' + fragment\r
249	return url\r
250	\r
251	def urljoin(base, url, allow_fragments=True):\r
252	"""Join a base URL and a possibly relative URL to form an absolute\r
253	interpretation of the latter."""\r
254	if not base:\r
255	return url\r
256	if not url:\r
257	return base\r
258	bscheme, bnetloc, bpath, bparams, bquery, bfragment = \\r
259	urlparse(base, '', allow_fragments)\r
260	scheme, netloc, path, params, query, fragment = \\r
261	urlparse(url, bscheme, allow_fragments)\r
262	if scheme != bscheme or scheme not in uses_relative:\r
263	return url\r
264	if scheme in uses_netloc:\r
265	if netloc:\r
266	return urlunparse((scheme, netloc, path,\r
267	params, query, fragment))\r
268	netloc = bnetloc\r
269	if path[:1] == '/':\r
270	return urlunparse((scheme, netloc, path,\r
271	params, query, fragment))\r
272	if not path and not params:\r
273	path = bpath\r
274	params = bparams\r
275	if not query:\r
276	query = bquery\r
277	return urlunparse((scheme, netloc, path,\r
278	params, query, fragment))\r
279	segments = bpath.split('/')[:-1] + path.split('/')\r
280	# XXX The stuff below is bogus in various ways...\r
281	if segments[-1] == '.':\r
282	segments[-1] = ''\r
283	while '.' in segments:\r
284	segments.remove('.')\r
285	while 1:\r
286	i = 1\r
287	n = len(segments) - 1\r
288	while i < n:\r
289	if (segments[i] == '..'\r
290	and segments[i-1] not in ('', '..')):\r
291	del segments[i-1:i+1]\r
292	break\r
293	i = i+1\r
294	else:\r
295	break\r
296	if segments == ['', '..']:\r
297	segments[-1] = ''\r
298	elif len(segments) >= 2 and segments[-1] == '..':\r
299	segments[-2:] = ['']\r
300	return urlunparse((scheme, netloc, '/'.join(segments),\r
301	params, query, fragment))\r
302	\r
303	def urldefrag(url):\r
304	"""Removes any existing fragment from URL.\r
305	\r
306	Returns a tuple of the defragmented URL and the fragment. If\r
307	the URL contained no fragments, the second element is the\r
308	empty string.\r
309	"""\r
310	if '#' in url:\r
311	s, n, p, a, q, frag = urlparse(url)\r
312	defrag = urlunparse((s, n, p, a, q, ''))\r
313	return defrag, frag\r
314	else:\r
315	return url, ''\r
316	\r
317	try:\r
318	unicode\r
319	except NameError:\r
320	def _is_unicode(x):\r
321	return 0\r
322	else:\r
323	def _is_unicode(x):\r
324	return isinstance(x, unicode)\r
325	\r
326	# unquote method for parse_qs and parse_qsl\r
327	# Cannot use directly from urllib as it would create a circular reference\r
328	# because urllib uses urlparse methods (urljoin). If you update this function,\r
329	# update it also in urllib. This code duplication does not existin in Python3.\r
330	\r
331	_hexdig = '0123456789ABCDEFabcdef'\r
332	_hextochr = dict((a+b, chr(int(a+b,16)))\r
333	for a in _hexdig for b in _hexdig)\r
334	_asciire = re.compile('([\x00-\x7f]+)')\r
335	\r
336	def unquote(s):\r
337	"""unquote('abc%20def') -> 'abc def'."""\r
338	if _is_unicode(s):\r
339	if '%' not in s:\r
340	return s\r
341	bits = _asciire.split(s)\r
342	res = [bits[0]]\r
343	append = res.append\r
344	for i in range(1, len(bits), 2):\r
345	append(unquote(str(bits[i])).decode('latin1'))\r
346	append(bits[i + 1])\r
347	return ''.join(res)\r
348	\r
349	bits = s.split('%')\r
350	# fastpath\r
351	if len(bits) == 1:\r
352	return s\r
353	res = [bits[0]]\r
354	append = res.append\r
355	for item in bits[1:]:\r
356	try:\r
357	append(_hextochr[item[:2]])\r
358	append(item[2:])\r
359	except KeyError:\r
360	append('%')\r
361	append(item)\r
362	return ''.join(res)\r
363	\r
364	def parse_qs(qs, keep_blank_values=0, strict_parsing=0):\r
365	"""Parse a query given as a string argument.\r
366	\r
367	Arguments:\r
368	\r
369	qs: percent-encoded query string to be parsed\r
370	\r
371	keep_blank_values: flag indicating whether blank values in\r
372	percent-encoded queries should be treated as blank strings.\r
373	A true value indicates that blanks should be retained as\r
374	blank strings. The default false value indicates that\r
375	blank values are to be ignored and treated as if they were\r
376	not included.\r
377	\r
378	strict_parsing: flag indicating what to do with parsing errors.\r
379	If false (the default), errors are silently ignored.\r
380	If true, errors raise a ValueError exception.\r
381	"""\r
382	dict = {}\r
383	for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):\r
384	if name in dict:\r
385	dict[name].append(value)\r
386	else:\r
387	dict[name] = [value]\r
388	return dict\r
389	\r
390	def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):\r
391	"""Parse a query given as a string argument.\r
392	\r
393	Arguments:\r
394	\r
395	qs: percent-encoded query string to be parsed\r
396	\r
397	keep_blank_values: flag indicating whether blank values in\r
398	percent-encoded queries should be treated as blank strings. A\r
399	true value indicates that blanks should be retained as blank\r
400	strings. The default false value indicates that blank values\r
401	are to be ignored and treated as if they were not included.\r
402	\r
403	strict_parsing: flag indicating what to do with parsing errors. If\r
404	false (the default), errors are silently ignored. If true,\r
405	errors raise a ValueError exception.\r
406	\r
407	Returns a list, as G-d intended.\r
408	"""\r
409	pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]\r
410	r = []\r
411	for name_value in pairs:\r
412	if not name_value and not strict_parsing:\r
413	continue\r
414	nv = name_value.split('=', 1)\r
415	if len(nv) != 2:\r
416	if strict_parsing:\r
417	raise ValueError, "bad query field: %r" % (name_value,)\r
418	# Handle case of a control-name with no equal sign\r
419	if keep_blank_values:\r
420	nv.append('')\r
421	else:\r
422	continue\r
423	if len(nv[1]) or keep_blank_values:\r
424	name = unquote(nv[0].replace('+', ' '))\r
425	value = unquote(nv[1].replace('+', ' '))\r
426	r.append((name, value))\r
427	\r
428	return r\r