]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Lib/urllib.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / urllib.py
CommitLineData
4710c53d 1"""Open an arbitrary URL.\r
2\r
3See the following document for more info on URLs:\r
4"Names and Addresses, URIs, URLs, URNs, URCs", at\r
5http://www.w3.org/pub/WWW/Addressing/Overview.html\r
6\r
7See also the HTTP spec (from which the error codes are derived):\r
8"HTTP - Hypertext Transfer Protocol", at\r
9http://www.w3.org/pub/WWW/Protocols/\r
10\r
11Related standards and specs:\r
12- RFC1808: the "relative URL" spec. (authoritative status)\r
13- RFC1738 - the "URL standard". (authoritative status)\r
14- RFC1630 - the "URI spec". (informational status)\r
15\r
16The object returned by URLopener().open(file) will differ per\r
17protocol. All you know is that is has methods read(), readline(),\r
18readlines(), fileno(), close() and info(). The read*(), fileno()\r
19and close() methods work like those of open files.\r
20The info() method returns a mimetools.Message object which can be\r
21used to query various info about the object, if available.\r
22(mimetools.Message objects are queried with the getheader() method.)\r
23"""\r
24\r
25import string\r
26import socket\r
27import os\r
28import time\r
29import sys\r
30from urlparse import urljoin as basejoin\r
31\r
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",\r
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",\r
34 "urlencode", "url2pathname", "pathname2url", "splittag",\r
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",\r
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",\r
37 "splitnport", "splitquery", "splitattr", "splitvalue",\r
38 "getproxies"]\r
39\r
40__version__ = '1.17' # XXX This version is not always updated :-(\r
41\r
42MAXFTPCACHE = 10 # Trim the ftp cache beyond this size\r
43\r
44# Helper for non-unix systems\r
45if os.name == 'nt':\r
46 from nturl2path import url2pathname, pathname2url\r
47elif os.name == 'riscos':\r
48 from rourl2path import url2pathname, pathname2url\r
49else:\r
50 def url2pathname(pathname):\r
51 """OS-specific conversion from a relative URL of the 'file' scheme\r
52 to a file system path; not recommended for general use."""\r
53 return unquote(pathname)\r
54\r
55 def pathname2url(pathname):\r
56 """OS-specific conversion from a file system path to a relative URL\r
57 of the 'file' scheme; not recommended for general use."""\r
58 return quote(pathname)\r
59\r
60# This really consists of two pieces:\r
61# (1) a class which handles opening of all sorts of URLs\r
62# (plus assorted utilities etc.)\r
63# (2) a set of functions for parsing URLs\r
64# XXX Should these be separated out into different modules?\r
65\r
66\r
67# Shortcut for basic usage\r
68_urlopener = None\r
69def urlopen(url, data=None, proxies=None):\r
70 """Create a file-like object for the specified URL to read from."""\r
71 from warnings import warnpy3k\r
72 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "\r
73 "favor of urllib2.urlopen()", stacklevel=2)\r
74\r
75 global _urlopener\r
76 if proxies is not None:\r
77 opener = FancyURLopener(proxies=proxies)\r
78 elif not _urlopener:\r
79 opener = FancyURLopener()\r
80 _urlopener = opener\r
81 else:\r
82 opener = _urlopener\r
83 if data is None:\r
84 return opener.open(url)\r
85 else:\r
86 return opener.open(url, data)\r
87def urlretrieve(url, filename=None, reporthook=None, data=None):\r
88 global _urlopener\r
89 if not _urlopener:\r
90 _urlopener = FancyURLopener()\r
91 return _urlopener.retrieve(url, filename, reporthook, data)\r
92def urlcleanup():\r
93 if _urlopener:\r
94 _urlopener.cleanup()\r
95 _safe_quoters.clear()\r
96 ftpcache.clear()\r
97\r
98# check for SSL\r
99try:\r
100 import ssl\r
101except:\r
102 _have_ssl = False\r
103else:\r
104 _have_ssl = True\r
105\r
106# exception raised when downloaded size does not match content-length\r
107class ContentTooShortError(IOError):\r
108 def __init__(self, message, content):\r
109 IOError.__init__(self, message)\r
110 self.content = content\r
111\r
112ftpcache = {}\r
113class URLopener:\r
114 """Class to open URLs.\r
115 This is a class rather than just a subroutine because we may need\r
116 more than one set of global protocol-specific options.\r
117 Note -- this is a base class for those who don't want the\r
118 automatic handling of errors type 302 (relocated) and 401\r
119 (authorization needed)."""\r
120\r
121 __tempfiles = None\r
122\r
123 version = "Python-urllib/%s" % __version__\r
124\r
125 # Constructor\r
126 def __init__(self, proxies=None, **x509):\r
127 if proxies is None:\r
128 proxies = getproxies()\r
129 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"\r
130 self.proxies = proxies\r
131 self.key_file = x509.get('key_file')\r
132 self.cert_file = x509.get('cert_file')\r
133 self.addheaders = [('User-Agent', self.version)]\r
134 self.__tempfiles = []\r
135 self.__unlink = os.unlink # See cleanup()\r
136 self.tempcache = None\r
137 # Undocumented feature: if you assign {} to tempcache,\r
138 # it is used to cache files retrieved with\r
139 # self.retrieve(). This is not enabled by default\r
140 # since it does not work for changing documents (and I\r
141 # haven't got the logic to check expiration headers\r
142 # yet).\r
143 self.ftpcache = ftpcache\r
144 # Undocumented feature: you can use a different\r
145 # ftp cache by assigning to the .ftpcache member;\r
146 # in case you want logically independent URL openers\r
147 # XXX This is not threadsafe. Bah.\r
148\r
149 def __del__(self):\r
150 self.close()\r
151\r
152 def close(self):\r
153 self.cleanup()\r
154\r
155 def cleanup(self):\r
156 # This code sometimes runs when the rest of this module\r
157 # has already been deleted, so it can't use any globals\r
158 # or import anything.\r
159 if self.__tempfiles:\r
160 for file in self.__tempfiles:\r
161 try:\r
162 self.__unlink(file)\r
163 except OSError:\r
164 pass\r
165 del self.__tempfiles[:]\r
166 if self.tempcache:\r
167 self.tempcache.clear()\r
168\r
169 def addheader(self, *args):\r
170 """Add a header to be used by the HTTP interface only\r
171 e.g. u.addheader('Accept', 'sound/basic')"""\r
172 self.addheaders.append(args)\r
173\r
174 # External interface\r
175 def open(self, fullurl, data=None):\r
176 """Use URLopener().open(file) instead of open(file, 'r')."""\r
177 fullurl = unwrap(toBytes(fullurl))\r
178 # percent encode url, fixing lame server errors for e.g, like space\r
179 # within url paths.\r
180 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")\r
181 if self.tempcache and fullurl in self.tempcache:\r
182 filename, headers = self.tempcache[fullurl]\r
183 fp = open(filename, 'rb')\r
184 return addinfourl(fp, headers, fullurl)\r
185 urltype, url = splittype(fullurl)\r
186 if not urltype:\r
187 urltype = 'file'\r
188 if urltype in self.proxies:\r
189 proxy = self.proxies[urltype]\r
190 urltype, proxyhost = splittype(proxy)\r
191 host, selector = splithost(proxyhost)\r
192 url = (host, fullurl) # Signal special case to open_*()\r
193 else:\r
194 proxy = None\r
195 name = 'open_' + urltype\r
196 self.type = urltype\r
197 name = name.replace('-', '_')\r
198 if not hasattr(self, name):\r
199 if proxy:\r
200 return self.open_unknown_proxy(proxy, fullurl, data)\r
201 else:\r
202 return self.open_unknown(fullurl, data)\r
203 try:\r
204 if data is None:\r
205 return getattr(self, name)(url)\r
206 else:\r
207 return getattr(self, name)(url, data)\r
208 except socket.error, msg:\r
209 raise IOError, ('socket error', msg), sys.exc_info()[2]\r
210\r
211 def open_unknown(self, fullurl, data=None):\r
212 """Overridable interface to open unknown URL type."""\r
213 type, url = splittype(fullurl)\r
214 raise IOError, ('url error', 'unknown url type', type)\r
215\r
216 def open_unknown_proxy(self, proxy, fullurl, data=None):\r
217 """Overridable interface to open unknown URL type."""\r
218 type, url = splittype(fullurl)\r
219 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)\r
220\r
221 # External interface\r
222 def retrieve(self, url, filename=None, reporthook=None, data=None):\r
223 """retrieve(url) returns (filename, headers) for a local object\r
224 or (tempfilename, headers) for a remote object."""\r
225 url = unwrap(toBytes(url))\r
226 if self.tempcache and url in self.tempcache:\r
227 return self.tempcache[url]\r
228 type, url1 = splittype(url)\r
229 if filename is None and (not type or type == 'file'):\r
230 try:\r
231 fp = self.open_local_file(url1)\r
232 hdrs = fp.info()\r
233 fp.close()\r
234 return url2pathname(splithost(url1)[1]), hdrs\r
235 except IOError:\r
236 pass\r
237 fp = self.open(url, data)\r
238 try:\r
239 headers = fp.info()\r
240 if filename:\r
241 tfp = open(filename, 'wb')\r
242 else:\r
243 import tempfile\r
244 garbage, path = splittype(url)\r
245 garbage, path = splithost(path or "")\r
246 path, garbage = splitquery(path or "")\r
247 path, garbage = splitattr(path or "")\r
248 suffix = os.path.splitext(path)[1]\r
249 (fd, filename) = tempfile.mkstemp(suffix)\r
250 self.__tempfiles.append(filename)\r
251 tfp = os.fdopen(fd, 'wb')\r
252 try:\r
253 result = filename, headers\r
254 if self.tempcache is not None:\r
255 self.tempcache[url] = result\r
256 bs = 1024*8\r
257 size = -1\r
258 read = 0\r
259 blocknum = 0\r
260 if reporthook:\r
261 if "content-length" in headers:\r
262 size = int(headers["Content-Length"])\r
263 reporthook(blocknum, bs, size)\r
264 while 1:\r
265 block = fp.read(bs)\r
266 if block == "":\r
267 break\r
268 read += len(block)\r
269 tfp.write(block)\r
270 blocknum += 1\r
271 if reporthook:\r
272 reporthook(blocknum, bs, size)\r
273 finally:\r
274 tfp.close()\r
275 finally:\r
276 fp.close()\r
277\r
278 # raise exception if actual size does not match content-length header\r
279 if size >= 0 and read < size:\r
280 raise ContentTooShortError("retrieval incomplete: got only %i out "\r
281 "of %i bytes" % (read, size), result)\r
282\r
283 return result\r
284\r
285 # Each method named open_<type> knows how to open that type of URL\r
286\r
287 def open_http(self, url, data=None):\r
288 """Use HTTP protocol."""\r
289 import httplib\r
290 user_passwd = None\r
291 proxy_passwd= None\r
292 if isinstance(url, str):\r
293 host, selector = splithost(url)\r
294 if host:\r
295 user_passwd, host = splituser(host)\r
296 host = unquote(host)\r
297 realhost = host\r
298 else:\r
299 host, selector = url\r
300 # check whether the proxy contains authorization information\r
301 proxy_passwd, host = splituser(host)\r
302 # now we proceed with the url we want to obtain\r
303 urltype, rest = splittype(selector)\r
304 url = rest\r
305 user_passwd = None\r
306 if urltype.lower() != 'http':\r
307 realhost = None\r
308 else:\r
309 realhost, rest = splithost(rest)\r
310 if realhost:\r
311 user_passwd, realhost = splituser(realhost)\r
312 if user_passwd:\r
313 selector = "%s://%s%s" % (urltype, realhost, rest)\r
314 if proxy_bypass(realhost):\r
315 host = realhost\r
316\r
317 #print "proxy via http:", host, selector\r
318 if not host: raise IOError, ('http error', 'no host given')\r
319\r
320 if proxy_passwd:\r
321 import base64\r
322 proxy_auth = base64.b64encode(proxy_passwd).strip()\r
323 else:\r
324 proxy_auth = None\r
325\r
326 if user_passwd:\r
327 import base64\r
328 auth = base64.b64encode(user_passwd).strip()\r
329 else:\r
330 auth = None\r
331 h = httplib.HTTP(host)\r
332 if data is not None:\r
333 h.putrequest('POST', selector)\r
334 h.putheader('Content-Type', 'application/x-www-form-urlencoded')\r
335 h.putheader('Content-Length', '%d' % len(data))\r
336 else:\r
337 h.putrequest('GET', selector)\r
338 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)\r
339 if auth: h.putheader('Authorization', 'Basic %s' % auth)\r
340 if realhost: h.putheader('Host', realhost)\r
341 for args in self.addheaders: h.putheader(*args)\r
342 h.endheaders(data)\r
343 errcode, errmsg, headers = h.getreply()\r
344 fp = h.getfile()\r
345 if errcode == -1:\r
346 if fp: fp.close()\r
347 # something went wrong with the HTTP status line\r
348 raise IOError, ('http protocol error', 0,\r
349 'got a bad status line', None)\r
350 # According to RFC 2616, "2xx" code indicates that the client's\r
351 # request was successfully received, understood, and accepted.\r
352 if (200 <= errcode < 300):\r
353 return addinfourl(fp, headers, "http:" + url, errcode)\r
354 else:\r
355 if data is None:\r
356 return self.http_error(url, fp, errcode, errmsg, headers)\r
357 else:\r
358 return self.http_error(url, fp, errcode, errmsg, headers, data)\r
359\r
360 def http_error(self, url, fp, errcode, errmsg, headers, data=None):\r
361 """Handle http errors.\r
362 Derived class can override this, or provide specific handlers\r
363 named http_error_DDD where DDD is the 3-digit error code."""\r
364 # First check if there's a specific handler for this error\r
365 name = 'http_error_%d' % errcode\r
366 if hasattr(self, name):\r
367 method = getattr(self, name)\r
368 if data is None:\r
369 result = method(url, fp, errcode, errmsg, headers)\r
370 else:\r
371 result = method(url, fp, errcode, errmsg, headers, data)\r
372 if result: return result\r
373 return self.http_error_default(url, fp, errcode, errmsg, headers)\r
374\r
375 def http_error_default(self, url, fp, errcode, errmsg, headers):\r
376 """Default error handler: close the connection and raise IOError."""\r
377 void = fp.read()\r
378 fp.close()\r
379 raise IOError, ('http error', errcode, errmsg, headers)\r
380\r
381 if _have_ssl:\r
382 def open_https(self, url, data=None):\r
383 """Use HTTPS protocol."""\r
384\r
385 import httplib\r
386 user_passwd = None\r
387 proxy_passwd = None\r
388 if isinstance(url, str):\r
389 host, selector = splithost(url)\r
390 if host:\r
391 user_passwd, host = splituser(host)\r
392 host = unquote(host)\r
393 realhost = host\r
394 else:\r
395 host, selector = url\r
396 # here, we determine, whether the proxy contains authorization information\r
397 proxy_passwd, host = splituser(host)\r
398 urltype, rest = splittype(selector)\r
399 url = rest\r
400 user_passwd = None\r
401 if urltype.lower() != 'https':\r
402 realhost = None\r
403 else:\r
404 realhost, rest = splithost(rest)\r
405 if realhost:\r
406 user_passwd, realhost = splituser(realhost)\r
407 if user_passwd:\r
408 selector = "%s://%s%s" % (urltype, realhost, rest)\r
409 #print "proxy via https:", host, selector\r
410 if not host: raise IOError, ('https error', 'no host given')\r
411 if proxy_passwd:\r
412 import base64\r
413 proxy_auth = base64.b64encode(proxy_passwd).strip()\r
414 else:\r
415 proxy_auth = None\r
416 if user_passwd:\r
417 import base64\r
418 auth = base64.b64encode(user_passwd).strip()\r
419 else:\r
420 auth = None\r
421 h = httplib.HTTPS(host, 0,\r
422 key_file=self.key_file,\r
423 cert_file=self.cert_file)\r
424 if data is not None:\r
425 h.putrequest('POST', selector)\r
426 h.putheader('Content-Type',\r
427 'application/x-www-form-urlencoded')\r
428 h.putheader('Content-Length', '%d' % len(data))\r
429 else:\r
430 h.putrequest('GET', selector)\r
431 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)\r
432 if auth: h.putheader('Authorization', 'Basic %s' % auth)\r
433 if realhost: h.putheader('Host', realhost)\r
434 for args in self.addheaders: h.putheader(*args)\r
435 h.endheaders(data)\r
436 errcode, errmsg, headers = h.getreply()\r
437 fp = h.getfile()\r
438 if errcode == -1:\r
439 if fp: fp.close()\r
440 # something went wrong with the HTTP status line\r
441 raise IOError, ('http protocol error', 0,\r
442 'got a bad status line', None)\r
443 # According to RFC 2616, "2xx" code indicates that the client's\r
444 # request was successfully received, understood, and accepted.\r
445 if (200 <= errcode < 300):\r
446 return addinfourl(fp, headers, "https:" + url, errcode)\r
447 else:\r
448 if data is None:\r
449 return self.http_error(url, fp, errcode, errmsg, headers)\r
450 else:\r
451 return self.http_error(url, fp, errcode, errmsg, headers,\r
452 data)\r
453\r
454 def open_file(self, url):\r
455 """Use local file or FTP depending on form of URL."""\r
456 if not isinstance(url, str):\r
457 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')\r
458 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':\r
459 return self.open_ftp(url)\r
460 else:\r
461 return self.open_local_file(url)\r
462\r
463 def open_local_file(self, url):\r
464 """Use local file."""\r
465 import mimetypes, mimetools, email.utils\r
466 try:\r
467 from cStringIO import StringIO\r
468 except ImportError:\r
469 from StringIO import StringIO\r
470 host, file = splithost(url)\r
471 localname = url2pathname(file)\r
472 try:\r
473 stats = os.stat(localname)\r
474 except OSError, e:\r
475 raise IOError(e.errno, e.strerror, e.filename)\r
476 size = stats.st_size\r
477 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)\r
478 mtype = mimetypes.guess_type(url)[0]\r
479 headers = mimetools.Message(StringIO(\r
480 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %\r
481 (mtype or 'text/plain', size, modified)))\r
482 if not host:\r
483 urlfile = file\r
484 if file[:1] == '/':\r
485 urlfile = 'file://' + file\r
486 return addinfourl(open(localname, 'rb'),\r
487 headers, urlfile)\r
488 host, port = splitport(host)\r
489 if not port \\r
490 and socket.gethostbyname(host) in (localhost(), thishost()):\r
491 urlfile = file\r
492 if file[:1] == '/':\r
493 urlfile = 'file://' + file\r
494 return addinfourl(open(localname, 'rb'),\r
495 headers, urlfile)\r
496 raise IOError, ('local file error', 'not on local host')\r
497\r
498 def open_ftp(self, url):\r
499 """Use FTP protocol."""\r
500 if not isinstance(url, str):\r
501 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')\r
502 import mimetypes, mimetools\r
503 try:\r
504 from cStringIO import StringIO\r
505 except ImportError:\r
506 from StringIO import StringIO\r
507 host, path = splithost(url)\r
508 if not host: raise IOError, ('ftp error', 'no host given')\r
509 host, port = splitport(host)\r
510 user, host = splituser(host)\r
511 if user: user, passwd = splitpasswd(user)\r
512 else: passwd = None\r
513 host = unquote(host)\r
514 user = user or ''\r
515 passwd = passwd or ''\r
516 host = socket.gethostbyname(host)\r
517 if not port:\r
518 import ftplib\r
519 port = ftplib.FTP_PORT\r
520 else:\r
521 port = int(port)\r
522 path, attrs = splitattr(path)\r
523 path = unquote(path)\r
524 dirs = path.split('/')\r
525 dirs, file = dirs[:-1], dirs[-1]\r
526 if dirs and not dirs[0]: dirs = dirs[1:]\r
527 if dirs and not dirs[0]: dirs[0] = '/'\r
528 key = user, host, port, '/'.join(dirs)\r
529 # XXX thread unsafe!\r
530 if len(self.ftpcache) > MAXFTPCACHE:\r
531 # Prune the cache, rather arbitrarily\r
532 for k in self.ftpcache.keys():\r
533 if k != key:\r
534 v = self.ftpcache[k]\r
535 del self.ftpcache[k]\r
536 v.close()\r
537 try:\r
538 if not key in self.ftpcache:\r
539 self.ftpcache[key] = \\r
540 ftpwrapper(user, passwd, host, port, dirs)\r
541 if not file: type = 'D'\r
542 else: type = 'I'\r
543 for attr in attrs:\r
544 attr, value = splitvalue(attr)\r
545 if attr.lower() == 'type' and \\r
546 value in ('a', 'A', 'i', 'I', 'd', 'D'):\r
547 type = value.upper()\r
548 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)\r
549 mtype = mimetypes.guess_type("ftp:" + url)[0]\r
550 headers = ""\r
551 if mtype:\r
552 headers += "Content-Type: %s\n" % mtype\r
553 if retrlen is not None and retrlen >= 0:\r
554 headers += "Content-Length: %d\n" % retrlen\r
555 headers = mimetools.Message(StringIO(headers))\r
556 return addinfourl(fp, headers, "ftp:" + url)\r
557 except ftperrors(), msg:\r
558 raise IOError, ('ftp error', msg), sys.exc_info()[2]\r
559\r
560 def open_data(self, url, data=None):\r
561 """Use "data" URL."""\r
562 if not isinstance(url, str):\r
563 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')\r
564 # ignore POSTed data\r
565 #\r
566 # syntax of data URLs:\r
567 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data\r
568 # mediatype := [ type "/" subtype ] *( ";" parameter )\r
569 # data := *urlchar\r
570 # parameter := attribute "=" value\r
571 import mimetools\r
572 try:\r
573 from cStringIO import StringIO\r
574 except ImportError:\r
575 from StringIO import StringIO\r
576 try:\r
577 [type, data] = url.split(',', 1)\r
578 except ValueError:\r
579 raise IOError, ('data error', 'bad data URL')\r
580 if not type:\r
581 type = 'text/plain;charset=US-ASCII'\r
582 semi = type.rfind(';')\r
583 if semi >= 0 and '=' not in type[semi:]:\r
584 encoding = type[semi+1:]\r
585 type = type[:semi]\r
586 else:\r
587 encoding = ''\r
588 msg = []\r
589 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',\r
590 time.gmtime(time.time())))\r
591 msg.append('Content-type: %s' % type)\r
592 if encoding == 'base64':\r
593 import base64\r
594 data = base64.decodestring(data)\r
595 else:\r
596 data = unquote(data)\r
597 msg.append('Content-Length: %d' % len(data))\r
598 msg.append('')\r
599 msg.append(data)\r
600 msg = '\n'.join(msg)\r
601 f = StringIO(msg)\r
602 headers = mimetools.Message(f, 0)\r
603 #f.fileno = None # needed for addinfourl\r
604 return addinfourl(f, headers, url)\r
605\r
606\r
607class FancyURLopener(URLopener):\r
608 """Derived class with handlers for errors we can handle (perhaps)."""\r
609\r
610 def __init__(self, *args, **kwargs):\r
611 URLopener.__init__(self, *args, **kwargs)\r
612 self.auth_cache = {}\r
613 self.tries = 0\r
614 self.maxtries = 10\r
615\r
616 def http_error_default(self, url, fp, errcode, errmsg, headers):\r
617 """Default error handling -- don't raise an exception."""\r
618 return addinfourl(fp, headers, "http:" + url, errcode)\r
619\r
620 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):\r
621 """Error 302 -- relocated (temporarily)."""\r
622 self.tries += 1\r
623 if self.maxtries and self.tries >= self.maxtries:\r
624 if hasattr(self, "http_error_500"):\r
625 meth = self.http_error_500\r
626 else:\r
627 meth = self.http_error_default\r
628 self.tries = 0\r
629 return meth(url, fp, 500,\r
630 "Internal Server Error: Redirect Recursion", headers)\r
631 result = self.redirect_internal(url, fp, errcode, errmsg, headers,\r
632 data)\r
633 self.tries = 0\r
634 return result\r
635\r
636 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):\r
637 if 'location' in headers:\r
638 newurl = headers['location']\r
639 elif 'uri' in headers:\r
640 newurl = headers['uri']\r
641 else:\r
642 return\r
643 void = fp.read()\r
644 fp.close()\r
645 # In case the server sent a relative URL, join with original:\r
646 newurl = basejoin(self.type + ":" + url, newurl)\r
647\r
648 # For security reasons we do not allow redirects to protocols\r
649 # other than HTTP, HTTPS or FTP.\r
650 newurl_lower = newurl.lower()\r
651 if not (newurl_lower.startswith('http://') or\r
652 newurl_lower.startswith('https://') or\r
653 newurl_lower.startswith('ftp://')):\r
654 raise IOError('redirect error', errcode,\r
655 errmsg + " - Redirection to url '%s' is not allowed" %\r
656 newurl,\r
657 headers)\r
658\r
659 return self.open(newurl)\r
660\r
661 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):\r
662 """Error 301 -- also relocated (permanently)."""\r
663 return self.http_error_302(url, fp, errcode, errmsg, headers, data)\r
664\r
665 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):\r
666 """Error 303 -- also relocated (essentially identical to 302)."""\r
667 return self.http_error_302(url, fp, errcode, errmsg, headers, data)\r
668\r
669 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):\r
670 """Error 307 -- relocated, but turn POST into error."""\r
671 if data is None:\r
672 return self.http_error_302(url, fp, errcode, errmsg, headers, data)\r
673 else:\r
674 return self.http_error_default(url, fp, errcode, errmsg, headers)\r
675\r
676 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):\r
677 """Error 401 -- authentication required.\r
678 This function supports Basic authentication only."""\r
679 if not 'www-authenticate' in headers:\r
680 URLopener.http_error_default(self, url, fp,\r
681 errcode, errmsg, headers)\r
682 stuff = headers['www-authenticate']\r
683 import re\r
684 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)\r
685 if not match:\r
686 URLopener.http_error_default(self, url, fp,\r
687 errcode, errmsg, headers)\r
688 scheme, realm = match.groups()\r
689 if scheme.lower() != 'basic':\r
690 URLopener.http_error_default(self, url, fp,\r
691 errcode, errmsg, headers)\r
692 name = 'retry_' + self.type + '_basic_auth'\r
693 if data is None:\r
694 return getattr(self,name)(url, realm)\r
695 else:\r
696 return getattr(self,name)(url, realm, data)\r
697\r
698 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):\r
699 """Error 407 -- proxy authentication required.\r
700 This function supports Basic authentication only."""\r
701 if not 'proxy-authenticate' in headers:\r
702 URLopener.http_error_default(self, url, fp,\r
703 errcode, errmsg, headers)\r
704 stuff = headers['proxy-authenticate']\r
705 import re\r
706 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)\r
707 if not match:\r
708 URLopener.http_error_default(self, url, fp,\r
709 errcode, errmsg, headers)\r
710 scheme, realm = match.groups()\r
711 if scheme.lower() != 'basic':\r
712 URLopener.http_error_default(self, url, fp,\r
713 errcode, errmsg, headers)\r
714 name = 'retry_proxy_' + self.type + '_basic_auth'\r
715 if data is None:\r
716 return getattr(self,name)(url, realm)\r
717 else:\r
718 return getattr(self,name)(url, realm, data)\r
719\r
720 def retry_proxy_http_basic_auth(self, url, realm, data=None):\r
721 host, selector = splithost(url)\r
722 newurl = 'http://' + host + selector\r
723 proxy = self.proxies['http']\r
724 urltype, proxyhost = splittype(proxy)\r
725 proxyhost, proxyselector = splithost(proxyhost)\r
726 i = proxyhost.find('@') + 1\r
727 proxyhost = proxyhost[i:]\r
728 user, passwd = self.get_user_passwd(proxyhost, realm, i)\r
729 if not (user or passwd): return None\r
730 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost\r
731 self.proxies['http'] = 'http://' + proxyhost + proxyselector\r
732 if data is None:\r
733 return self.open(newurl)\r
734 else:\r
735 return self.open(newurl, data)\r
736\r
737 def retry_proxy_https_basic_auth(self, url, realm, data=None):\r
738 host, selector = splithost(url)\r
739 newurl = 'https://' + host + selector\r
740 proxy = self.proxies['https']\r
741 urltype, proxyhost = splittype(proxy)\r
742 proxyhost, proxyselector = splithost(proxyhost)\r
743 i = proxyhost.find('@') + 1\r
744 proxyhost = proxyhost[i:]\r
745 user, passwd = self.get_user_passwd(proxyhost, realm, i)\r
746 if not (user or passwd): return None\r
747 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost\r
748 self.proxies['https'] = 'https://' + proxyhost + proxyselector\r
749 if data is None:\r
750 return self.open(newurl)\r
751 else:\r
752 return self.open(newurl, data)\r
753\r
754 def retry_http_basic_auth(self, url, realm, data=None):\r
755 host, selector = splithost(url)\r
756 i = host.find('@') + 1\r
757 host = host[i:]\r
758 user, passwd = self.get_user_passwd(host, realm, i)\r
759 if not (user or passwd): return None\r
760 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host\r
761 newurl = 'http://' + host + selector\r
762 if data is None:\r
763 return self.open(newurl)\r
764 else:\r
765 return self.open(newurl, data)\r
766\r
767 def retry_https_basic_auth(self, url, realm, data=None):\r
768 host, selector = splithost(url)\r
769 i = host.find('@') + 1\r
770 host = host[i:]\r
771 user, passwd = self.get_user_passwd(host, realm, i)\r
772 if not (user or passwd): return None\r
773 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host\r
774 newurl = 'https://' + host + selector\r
775 if data is None:\r
776 return self.open(newurl)\r
777 else:\r
778 return self.open(newurl, data)\r
779\r
780 def get_user_passwd(self, host, realm, clear_cache=0):\r
781 key = realm + '@' + host.lower()\r
782 if key in self.auth_cache:\r
783 if clear_cache:\r
784 del self.auth_cache[key]\r
785 else:\r
786 return self.auth_cache[key]\r
787 user, passwd = self.prompt_user_passwd(host, realm)\r
788 if user or passwd: self.auth_cache[key] = (user, passwd)\r
789 return user, passwd\r
790\r
791 def prompt_user_passwd(self, host, realm):\r
792 """Override this in a GUI environment!"""\r
793 import getpass\r
794 try:\r
795 user = raw_input("Enter username for %s at %s: " % (realm,\r
796 host))\r
797 passwd = getpass.getpass("Enter password for %s in %s at %s: " %\r
798 (user, realm, host))\r
799 return user, passwd\r
800 except KeyboardInterrupt:\r
801 print\r
802 return None, None\r
803\r
804\r
805# Utility functions\r
806\r
807_localhost = None\r
808def localhost():\r
809 """Return the IP address of the magic hostname 'localhost'."""\r
810 global _localhost\r
811 if _localhost is None:\r
812 _localhost = socket.gethostbyname('localhost')\r
813 return _localhost\r
814\r
815_thishost = None\r
816def thishost():\r
817 """Return the IP address of the current host."""\r
818 global _thishost\r
819 if _thishost is None:\r
820 _thishost = socket.gethostbyname(socket.gethostname())\r
821 return _thishost\r
822\r
823_ftperrors = None\r
824def ftperrors():\r
825 """Return the set of errors raised by the FTP class."""\r
826 global _ftperrors\r
827 if _ftperrors is None:\r
828 import ftplib\r
829 _ftperrors = ftplib.all_errors\r
830 return _ftperrors\r
831\r
832_noheaders = None\r
833def noheaders():\r
834 """Return an empty mimetools.Message object."""\r
835 global _noheaders\r
836 if _noheaders is None:\r
837 import mimetools\r
838 try:\r
839 from cStringIO import StringIO\r
840 except ImportError:\r
841 from StringIO import StringIO\r
842 _noheaders = mimetools.Message(StringIO(), 0)\r
843 _noheaders.fp.close() # Recycle file descriptor\r
844 return _noheaders\r
845\r
846\r
847# Utility classes\r
848\r
849class ftpwrapper:\r
850 """Class used by open_ftp() for cache of open FTP connections."""\r
851\r
852 def __init__(self, user, passwd, host, port, dirs,\r
853 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):\r
854 self.user = user\r
855 self.passwd = passwd\r
856 self.host = host\r
857 self.port = port\r
858 self.dirs = dirs\r
859 self.timeout = timeout\r
860 self.init()\r
861\r
862 def init(self):\r
863 import ftplib\r
864 self.busy = 0\r
865 self.ftp = ftplib.FTP()\r
866 self.ftp.connect(self.host, self.port, self.timeout)\r
867 self.ftp.login(self.user, self.passwd)\r
868 for dir in self.dirs:\r
869 self.ftp.cwd(dir)\r
870\r
871 def retrfile(self, file, type):\r
872 import ftplib\r
873 self.endtransfer()\r
874 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1\r
875 else: cmd = 'TYPE ' + type; isdir = 0\r
876 try:\r
877 self.ftp.voidcmd(cmd)\r
878 except ftplib.all_errors:\r
879 self.init()\r
880 self.ftp.voidcmd(cmd)\r
881 conn = None\r
882 if file and not isdir:\r
883 # Try to retrieve as a file\r
884 try:\r
885 cmd = 'RETR ' + file\r
886 conn = self.ftp.ntransfercmd(cmd)\r
887 except ftplib.error_perm, reason:\r
888 if str(reason)[:3] != '550':\r
889 raise IOError, ('ftp error', reason), sys.exc_info()[2]\r
890 if not conn:\r
891 # Set transfer mode to ASCII!\r
892 self.ftp.voidcmd('TYPE A')\r
893 # Try a directory listing. Verify that directory exists.\r
894 if file:\r
895 pwd = self.ftp.pwd()\r
896 try:\r
897 try:\r
898 self.ftp.cwd(file)\r
899 except ftplib.error_perm, reason:\r
900 raise IOError, ('ftp error', reason), sys.exc_info()[2]\r
901 finally:\r
902 self.ftp.cwd(pwd)\r
903 cmd = 'LIST ' + file\r
904 else:\r
905 cmd = 'LIST'\r
906 conn = self.ftp.ntransfercmd(cmd)\r
907 self.busy = 1\r
908 # Pass back both a suitably decorated object and a retrieval length\r
909 return (addclosehook(conn[0].makefile('rb'),\r
910 self.endtransfer), conn[1])\r
911 def endtransfer(self):\r
912 if not self.busy:\r
913 return\r
914 self.busy = 0\r
915 try:\r
916 self.ftp.voidresp()\r
917 except ftperrors():\r
918 pass\r
919\r
920 def close(self):\r
921 self.endtransfer()\r
922 try:\r
923 self.ftp.close()\r
924 except ftperrors():\r
925 pass\r
926\r
927class addbase:\r
928 """Base class for addinfo and addclosehook."""\r
929\r
930 def __init__(self, fp):\r
931 self.fp = fp\r
932 self.read = self.fp.read\r
933 self.readline = self.fp.readline\r
934 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines\r
935 if hasattr(self.fp, "fileno"):\r
936 self.fileno = self.fp.fileno\r
937 else:\r
938 self.fileno = lambda: None\r
939 if hasattr(self.fp, "__iter__"):\r
940 self.__iter__ = self.fp.__iter__\r
941 if hasattr(self.fp, "next"):\r
942 self.next = self.fp.next\r
943\r
944 def __repr__(self):\r
945 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,\r
946 id(self), self.fp)\r
947\r
948 def close(self):\r
949 self.read = None\r
950 self.readline = None\r
951 self.readlines = None\r
952 self.fileno = None\r
953 if self.fp: self.fp.close()\r
954 self.fp = None\r
955\r
956class addclosehook(addbase):\r
957 """Class to add a close hook to an open file."""\r
958\r
959 def __init__(self, fp, closehook, *hookargs):\r
960 addbase.__init__(self, fp)\r
961 self.closehook = closehook\r
962 self.hookargs = hookargs\r
963\r
964 def close(self):\r
965 addbase.close(self)\r
966 if self.closehook:\r
967 self.closehook(*self.hookargs)\r
968 self.closehook = None\r
969 self.hookargs = None\r
970\r
971class addinfo(addbase):\r
972 """class to add an info() method to an open file."""\r
973\r
974 def __init__(self, fp, headers):\r
975 addbase.__init__(self, fp)\r
976 self.headers = headers\r
977\r
978 def info(self):\r
979 return self.headers\r
980\r
981class addinfourl(addbase):\r
982 """class to add info() and geturl() methods to an open file."""\r
983\r
984 def __init__(self, fp, headers, url, code=None):\r
985 addbase.__init__(self, fp)\r
986 self.headers = headers\r
987 self.url = url\r
988 self.code = code\r
989\r
990 def info(self):\r
991 return self.headers\r
992\r
993 def getcode(self):\r
994 return self.code\r
995\r
996 def geturl(self):\r
997 return self.url\r
998\r
999\r
1000# Utilities to parse URLs (most of these return None for missing parts):\r
1001# unwrap('<URL:type://host/path>') --> 'type://host/path'\r
1002# splittype('type:opaquestring') --> 'type', 'opaquestring'\r
1003# splithost('//host[:port]/path') --> 'host[:port]', '/path'\r
1004# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'\r
1005# splitpasswd('user:passwd') -> 'user', 'passwd'\r
1006# splitport('host:port') --> 'host', 'port'\r
1007# splitquery('/path?query') --> '/path', 'query'\r
1008# splittag('/path#tag') --> '/path', 'tag'\r
1009# splitattr('/path;attr1=value1;attr2=value2;...') ->\r
1010# '/path', ['attr1=value1', 'attr2=value2', ...]\r
1011# splitvalue('attr=value') --> 'attr', 'value'\r
1012# unquote('abc%20def') -> 'abc def'\r
1013# quote('abc def') -> 'abc%20def')\r
1014\r
1015try:\r
1016 unicode\r
1017except NameError:\r
1018 def _is_unicode(x):\r
1019 return 0\r
1020else:\r
1021 def _is_unicode(x):\r
1022 return isinstance(x, unicode)\r
1023\r
1024def toBytes(url):\r
1025 """toBytes(u"URL") --> 'URL'."""\r
1026 # Most URL schemes require ASCII. If that changes, the conversion\r
1027 # can be relaxed\r
1028 if _is_unicode(url):\r
1029 try:\r
1030 url = url.encode("ASCII")\r
1031 except UnicodeError:\r
1032 raise UnicodeError("URL " + repr(url) +\r
1033 " contains non-ASCII characters")\r
1034 return url\r
1035\r
1036def unwrap(url):\r
1037 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""\r
1038 url = url.strip()\r
1039 if url[:1] == '<' and url[-1:] == '>':\r
1040 url = url[1:-1].strip()\r
1041 if url[:4] == 'URL:': url = url[4:].strip()\r
1042 return url\r
1043\r
1044_typeprog = None\r
1045def splittype(url):\r
1046 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""\r
1047 global _typeprog\r
1048 if _typeprog is None:\r
1049 import re\r
1050 _typeprog = re.compile('^([^/:]+):')\r
1051\r
1052 match = _typeprog.match(url)\r
1053 if match:\r
1054 scheme = match.group(1)\r
1055 return scheme.lower(), url[len(scheme) + 1:]\r
1056 return None, url\r
1057\r
1058_hostprog = None\r
1059def splithost(url):\r
1060 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""\r
1061 global _hostprog\r
1062 if _hostprog is None:\r
1063 import re\r
1064 _hostprog = re.compile('^//([^/?]*)(.*)$')\r
1065\r
1066 match = _hostprog.match(url)\r
1067 if match:\r
1068 host_port = match.group(1)\r
1069 path = match.group(2)\r
1070 if path and not path.startswith('/'):\r
1071 path = '/' + path\r
1072 return host_port, path\r
1073 return None, url\r
1074\r
1075_userprog = None\r
1076def splituser(host):\r
1077 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""\r
1078 global _userprog\r
1079 if _userprog is None:\r
1080 import re\r
1081 _userprog = re.compile('^(.*)@(.*)$')\r
1082\r
1083 match = _userprog.match(host)\r
1084 if match: return match.group(1, 2)\r
1085 return None, host\r
1086\r
1087_passwdprog = None\r
1088def splitpasswd(user):\r
1089 """splitpasswd('user:passwd') -> 'user', 'passwd'."""\r
1090 global _passwdprog\r
1091 if _passwdprog is None:\r
1092 import re\r
1093 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)\r
1094\r
1095 match = _passwdprog.match(user)\r
1096 if match: return match.group(1, 2)\r
1097 return user, None\r
1098\r
1099# splittag('/path#tag') --> '/path', 'tag'\r
1100_portprog = None\r
1101def splitport(host):\r
1102 """splitport('host:port') --> 'host', 'port'."""\r
1103 global _portprog\r
1104 if _portprog is None:\r
1105 import re\r
1106 _portprog = re.compile('^(.*):([0-9]+)$')\r
1107\r
1108 match = _portprog.match(host)\r
1109 if match: return match.group(1, 2)\r
1110 return host, None\r
1111\r
1112_nportprog = None\r
1113def splitnport(host, defport=-1):\r
1114 """Split host and port, returning numeric port.\r
1115 Return given default port if no ':' found; defaults to -1.\r
1116 Return numerical port if a valid number are found after ':'.\r
1117 Return None if ':' but not a valid number."""\r
1118 global _nportprog\r
1119 if _nportprog is None:\r
1120 import re\r
1121 _nportprog = re.compile('^(.*):(.*)$')\r
1122\r
1123 match = _nportprog.match(host)\r
1124 if match:\r
1125 host, port = match.group(1, 2)\r
1126 try:\r
1127 if not port: raise ValueError, "no digits"\r
1128 nport = int(port)\r
1129 except ValueError:\r
1130 nport = None\r
1131 return host, nport\r
1132 return host, defport\r
1133\r
1134_queryprog = None\r
1135def splitquery(url):\r
1136 """splitquery('/path?query') --> '/path', 'query'."""\r
1137 global _queryprog\r
1138 if _queryprog is None:\r
1139 import re\r
1140 _queryprog = re.compile('^(.*)\?([^?]*)$')\r
1141\r
1142 match = _queryprog.match(url)\r
1143 if match: return match.group(1, 2)\r
1144 return url, None\r
1145\r
1146_tagprog = None\r
1147def splittag(url):\r
1148 """splittag('/path#tag') --> '/path', 'tag'."""\r
1149 global _tagprog\r
1150 if _tagprog is None:\r
1151 import re\r
1152 _tagprog = re.compile('^(.*)#([^#]*)$')\r
1153\r
1154 match = _tagprog.match(url)\r
1155 if match: return match.group(1, 2)\r
1156 return url, None\r
1157\r
1158def splitattr(url):\r
1159 """splitattr('/path;attr1=value1;attr2=value2;...') ->\r
1160 '/path', ['attr1=value1', 'attr2=value2', ...]."""\r
1161 words = url.split(';')\r
1162 return words[0], words[1:]\r
1163\r
1164_valueprog = None\r
1165def splitvalue(attr):\r
1166 """splitvalue('attr=value') --> 'attr', 'value'."""\r
1167 global _valueprog\r
1168 if _valueprog is None:\r
1169 import re\r
1170 _valueprog = re.compile('^([^=]*)=(.*)$')\r
1171\r
1172 match = _valueprog.match(attr)\r
1173 if match: return match.group(1, 2)\r
1174 return attr, None\r
1175\r
1176# urlparse contains a duplicate of this method to avoid a circular import. If\r
1177# you update this method, also update the copy in urlparse. This code\r
1178# duplication does not exist in Python3.\r
1179\r
1180_hexdig = '0123456789ABCDEFabcdef'\r
1181_hextochr = dict((a + b, chr(int(a + b, 16)))\r
1182 for a in _hexdig for b in _hexdig)\r
1183\r
1184def unquote(s):\r
1185 """unquote('abc%20def') -> 'abc def'."""\r
1186 res = s.split('%')\r
1187 # fastpath\r
1188 if len(res) == 1:\r
1189 return s\r
1190 s = res[0]\r
1191 for item in res[1:]:\r
1192 try:\r
1193 s += _hextochr[item[:2]] + item[2:]\r
1194 except KeyError:\r
1195 s += '%' + item\r
1196 except UnicodeDecodeError:\r
1197 s += unichr(int(item[:2], 16)) + item[2:]\r
1198 return s\r
1199\r
1200def unquote_plus(s):\r
1201 """unquote('%7e/abc+def') -> '~/abc def'"""\r
1202 s = s.replace('+', ' ')\r
1203 return unquote(s)\r
1204\r
1205always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'\r
1206 'abcdefghijklmnopqrstuvwxyz'\r
1207 '0123456789' '_.-')\r
1208_safe_map = {}\r
1209for i, c in zip(xrange(256), str(bytearray(xrange(256)))):\r
1210 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)\r
1211_safe_quoters = {}\r
1212\r
1213def quote(s, safe='/'):\r
1214 """quote('abc def') -> 'abc%20def'\r
1215\r
1216 Each part of a URL, e.g. the path info, the query, etc., has a\r
1217 different set of reserved characters that must be quoted.\r
1218\r
1219 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists\r
1220 the following reserved characters.\r
1221\r
1222 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |\r
1223 "$" | ","\r
1224\r
1225 Each of these characters is reserved in some component of a URL,\r
1226 but not necessarily in all of them.\r
1227\r
1228 By default, the quote function is intended for quoting the path\r
1229 section of a URL. Thus, it will not encode '/'. This character\r
1230 is reserved, but in typical usage the quote function is being\r
1231 called on a path where the existing slash characters are used as\r
1232 reserved characters.\r
1233 """\r
1234 # fastpath\r
1235 if not s:\r
1236 if s is None:\r
1237 raise TypeError('None object cannot be quoted')\r
1238 return s\r
1239 cachekey = (safe, always_safe)\r
1240 try:\r
1241 (quoter, safe) = _safe_quoters[cachekey]\r
1242 except KeyError:\r
1243 safe_map = _safe_map.copy()\r
1244 safe_map.update([(c, c) for c in safe])\r
1245 quoter = safe_map.__getitem__\r
1246 safe = always_safe + safe\r
1247 _safe_quoters[cachekey] = (quoter, safe)\r
1248 if not s.rstrip(safe):\r
1249 return s\r
1250 return ''.join(map(quoter, s))\r
1251\r
1252def quote_plus(s, safe=''):\r
1253 """Quote the query fragment of a URL; replacing ' ' with '+'"""\r
1254 if ' ' in s:\r
1255 s = quote(s, safe + ' ')\r
1256 return s.replace(' ', '+')\r
1257 return quote(s, safe)\r
1258\r
1259def urlencode(query, doseq=0):\r
1260 """Encode a sequence of two-element tuples or dictionary into a URL query string.\r
1261\r
1262 If any values in the query arg are sequences and doseq is true, each\r
1263 sequence element is converted to a separate parameter.\r
1264\r
1265 If the query arg is a sequence of two-element tuples, the order of the\r
1266 parameters in the output will match the order of parameters in the\r
1267 input.\r
1268 """\r
1269\r
1270 if hasattr(query,"items"):\r
1271 # mapping objects\r
1272 query = query.items()\r
1273 else:\r
1274 # it's a bother at times that strings and string-like objects are\r
1275 # sequences...\r
1276 try:\r
1277 # non-sequence items should not work with len()\r
1278 # non-empty strings will fail this\r
1279 if len(query) and not isinstance(query[0], tuple):\r
1280 raise TypeError\r
1281 # zero-length sequences of all types will get here and succeed,\r
1282 # but that's a minor nit - since the original implementation\r
1283 # allowed empty dicts that type of behavior probably should be\r
1284 # preserved for consistency\r
1285 except TypeError:\r
1286 ty,va,tb = sys.exc_info()\r
1287 raise TypeError, "not a valid non-string sequence or mapping object", tb\r
1288\r
1289 l = []\r
1290 if not doseq:\r
1291 # preserve old behavior\r
1292 for k, v in query:\r
1293 k = quote_plus(str(k))\r
1294 v = quote_plus(str(v))\r
1295 l.append(k + '=' + v)\r
1296 else:\r
1297 for k, v in query:\r
1298 k = quote_plus(str(k))\r
1299 if isinstance(v, str):\r
1300 v = quote_plus(v)\r
1301 l.append(k + '=' + v)\r
1302 elif _is_unicode(v):\r
1303 # is there a reasonable way to convert to ASCII?\r
1304 # encode generates a string, but "replace" or "ignore"\r
1305 # lose information and "strict" can raise UnicodeError\r
1306 v = quote_plus(v.encode("ASCII","replace"))\r
1307 l.append(k + '=' + v)\r
1308 else:\r
1309 try:\r
1310 # is this a sufficient test for sequence-ness?\r
1311 len(v)\r
1312 except TypeError:\r
1313 # not a sequence\r
1314 v = quote_plus(str(v))\r
1315 l.append(k + '=' + v)\r
1316 else:\r
1317 # loop over the sequence\r
1318 for elt in v:\r
1319 l.append(k + '=' + quote_plus(str(elt)))\r
1320 return '&'.join(l)\r
1321\r
1322# Proxy handling\r
1323def getproxies_environment():\r
1324 """Return a dictionary of scheme -> proxy server URL mappings.\r
1325\r
1326 Scan the environment for variables named <scheme>_proxy;\r
1327 this seems to be the standard convention. If you need a\r
1328 different way, you can pass a proxies dictionary to the\r
1329 [Fancy]URLopener constructor.\r
1330\r
1331 """\r
1332 proxies = {}\r
1333 for name, value in os.environ.items():\r
1334 name = name.lower()\r
1335 if value and name[-6:] == '_proxy':\r
1336 proxies[name[:-6]] = value\r
1337 return proxies\r
1338\r
1339def proxy_bypass_environment(host):\r
1340 """Test if proxies should not be used for a particular host.\r
1341\r
1342 Checks the environment for a variable named no_proxy, which should\r
1343 be a list of DNS suffixes separated by commas, or '*' for all hosts.\r
1344 """\r
1345 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')\r
1346 # '*' is special case for always bypass\r
1347 if no_proxy == '*':\r
1348 return 1\r
1349 # strip port off host\r
1350 hostonly, port = splitport(host)\r
1351 # check if the host ends with any of the DNS suffixes\r
1352 for name in no_proxy.split(','):\r
1353 if name and (hostonly.endswith(name) or host.endswith(name)):\r
1354 return 1\r
1355 # otherwise, don't bypass\r
1356 return 0\r
1357\r
1358\r
1359if sys.platform == 'darwin':\r
1360 from _scproxy import _get_proxy_settings, _get_proxies\r
1361\r
1362 def proxy_bypass_macosx_sysconf(host):\r
1363 """\r
1364 Return True iff this host shouldn't be accessed using a proxy\r
1365\r
1366 This function uses the MacOSX framework SystemConfiguration\r
1367 to fetch the proxy information.\r
1368 """\r
1369 import re\r
1370 import socket\r
1371 from fnmatch import fnmatch\r
1372\r
1373 hostonly, port = splitport(host)\r
1374\r
1375 def ip2num(ipAddr):\r
1376 parts = ipAddr.split('.')\r
1377 parts = map(int, parts)\r
1378 if len(parts) != 4:\r
1379 parts = (parts + [0, 0, 0, 0])[:4]\r
1380 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]\r
1381\r
1382 proxy_settings = _get_proxy_settings()\r
1383\r
1384 # Check for simple host names:\r
1385 if '.' not in host:\r
1386 if proxy_settings['exclude_simple']:\r
1387 return True\r
1388\r
1389 hostIP = None\r
1390\r
1391 for value in proxy_settings.get('exceptions', ()):\r
1392 # Items in the list are strings like these: *.local, 169.254/16\r
1393 if not value: continue\r
1394\r
1395 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)\r
1396 if m is not None:\r
1397 if hostIP is None:\r
1398 try:\r
1399 hostIP = socket.gethostbyname(hostonly)\r
1400 hostIP = ip2num(hostIP)\r
1401 except socket.error:\r
1402 continue\r
1403\r
1404 base = ip2num(m.group(1))\r
1405 mask = m.group(2)\r
1406 if mask is None:\r
1407 mask = 8 * (m.group(1).count('.') + 1)\r
1408\r
1409 else:\r
1410 mask = int(mask[1:])\r
1411 mask = 32 - mask\r
1412\r
1413 if (hostIP >> mask) == (base >> mask):\r
1414 return True\r
1415\r
1416 elif fnmatch(host, value):\r
1417 return True\r
1418\r
1419 return False\r
1420\r
1421 def getproxies_macosx_sysconf():\r
1422 """Return a dictionary of scheme -> proxy server URL mappings.\r
1423\r
1424 This function uses the MacOSX framework SystemConfiguration\r
1425 to fetch the proxy information.\r
1426 """\r
1427 return _get_proxies()\r
1428\r
1429 def proxy_bypass(host):\r
1430 if getproxies_environment():\r
1431 return proxy_bypass_environment(host)\r
1432 else:\r
1433 return proxy_bypass_macosx_sysconf(host)\r
1434\r
1435 def getproxies():\r
1436 return getproxies_environment() or getproxies_macosx_sysconf()\r
1437\r
1438elif os.name == 'nt':\r
1439 def getproxies_registry():\r
1440 """Return a dictionary of scheme -> proxy server URL mappings.\r
1441\r
1442 Win32 uses the registry to store proxies.\r
1443\r
1444 """\r
1445 proxies = {}\r
1446 try:\r
1447 import _winreg\r
1448 except ImportError:\r
1449 # Std module, so should be around - but you never know!\r
1450 return proxies\r
1451 try:\r
1452 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,\r
1453 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')\r
1454 proxyEnable = _winreg.QueryValueEx(internetSettings,\r
1455 'ProxyEnable')[0]\r
1456 if proxyEnable:\r
1457 # Returned as Unicode but problems if not converted to ASCII\r
1458 proxyServer = str(_winreg.QueryValueEx(internetSettings,\r
1459 'ProxyServer')[0])\r
1460 if '=' in proxyServer:\r
1461 # Per-protocol settings\r
1462 for p in proxyServer.split(';'):\r
1463 protocol, address = p.split('=', 1)\r
1464 # See if address has a type:// prefix\r
1465 import re\r
1466 if not re.match('^([^/:]+)://', address):\r
1467 address = '%s://%s' % (protocol, address)\r
1468 proxies[protocol] = address\r
1469 else:\r
1470 # Use one setting for all protocols\r
1471 if proxyServer[:5] == 'http:':\r
1472 proxies['http'] = proxyServer\r
1473 else:\r
1474 proxies['http'] = 'http://%s' % proxyServer\r
1475 proxies['https'] = 'https://%s' % proxyServer\r
1476 proxies['ftp'] = 'ftp://%s' % proxyServer\r
1477 internetSettings.Close()\r
1478 except (WindowsError, ValueError, TypeError):\r
1479 # Either registry key not found etc, or the value in an\r
1480 # unexpected format.\r
1481 # proxies already set up to be empty so nothing to do\r
1482 pass\r
1483 return proxies\r
1484\r
1485 def getproxies():\r
1486 """Return a dictionary of scheme -> proxy server URL mappings.\r
1487\r
1488 Returns settings gathered from the environment, if specified,\r
1489 or the registry.\r
1490\r
1491 """\r
1492 return getproxies_environment() or getproxies_registry()\r
1493\r
1494 def proxy_bypass_registry(host):\r
1495 try:\r
1496 import _winreg\r
1497 import re\r
1498 except ImportError:\r
1499 # Std modules, so should be around - but you never know!\r
1500 return 0\r
1501 try:\r
1502 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,\r
1503 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')\r
1504 proxyEnable = _winreg.QueryValueEx(internetSettings,\r
1505 'ProxyEnable')[0]\r
1506 proxyOverride = str(_winreg.QueryValueEx(internetSettings,\r
1507 'ProxyOverride')[0])\r
1508 # ^^^^ Returned as Unicode but problems if not converted to ASCII\r
1509 except WindowsError:\r
1510 return 0\r
1511 if not proxyEnable or not proxyOverride:\r
1512 return 0\r
1513 # try to make a host list from name and IP address.\r
1514 rawHost, port = splitport(host)\r
1515 host = [rawHost]\r
1516 try:\r
1517 addr = socket.gethostbyname(rawHost)\r
1518 if addr != rawHost:\r
1519 host.append(addr)\r
1520 except socket.error:\r
1521 pass\r
1522 try:\r
1523 fqdn = socket.getfqdn(rawHost)\r
1524 if fqdn != rawHost:\r
1525 host.append(fqdn)\r
1526 except socket.error:\r
1527 pass\r
1528 # make a check value list from the registry entry: replace the\r
1529 # '<local>' string by the localhost entry and the corresponding\r
1530 # canonical entry.\r
1531 proxyOverride = proxyOverride.split(';')\r
1532 # now check if we match one of the registry values.\r
1533 for test in proxyOverride:\r
1534 if test == '<local>':\r
1535 if '.' not in rawHost:\r
1536 return 1\r
1537 test = test.replace(".", r"\.") # mask dots\r
1538 test = test.replace("*", r".*") # change glob sequence\r
1539 test = test.replace("?", r".") # change glob char\r
1540 for val in host:\r
1541 # print "%s <--> %s" %( test, val )\r
1542 if re.match(test, val, re.I):\r
1543 return 1\r
1544 return 0\r
1545\r
1546 def proxy_bypass(host):\r
1547 """Return a dictionary of scheme -> proxy server URL mappings.\r
1548\r
1549 Returns settings gathered from the environment, if specified,\r
1550 or the registry.\r
1551\r
1552 """\r
1553 if getproxies_environment():\r
1554 return proxy_bypass_environment(host)\r
1555 else:\r
1556 return proxy_bypass_registry(host)\r
1557\r
1558else:\r
1559 # By default use environment variables\r
1560 getproxies = getproxies_environment\r
1561 proxy_bypass = proxy_bypass_environment\r
1562\r
1563# Test and time quote() and unquote()\r
1564def test1():\r
1565 s = ''\r
1566 for i in range(256): s = s + chr(i)\r
1567 s = s*4\r
1568 t0 = time.time()\r
1569 qs = quote(s)\r
1570 uqs = unquote(qs)\r
1571 t1 = time.time()\r
1572 if uqs != s:\r
1573 print 'Wrong!'\r
1574 print repr(s)\r
1575 print repr(qs)\r
1576 print repr(uqs)\r
1577 print round(t1 - t0, 3), 'sec'\r
1578\r
1579\r
1580def reporthook(blocknum, blocksize, totalsize):\r
1581 # Report during remote transfers\r
1582 print "Block number: %d, Block size: %d, Total size: %d" % (\r
1583 blocknum, blocksize, totalsize)\r
1584\r
1585# Test program\r
1586def test(args=[]):\r
1587 if not args:\r
1588 args = [\r
1589 '/etc/passwd',\r
1590 'file:/etc/passwd',\r
1591 'file://localhost/etc/passwd',\r
1592 'ftp://ftp.gnu.org/pub/README',\r
1593 'http://www.python.org/index.html',\r
1594 ]\r
1595 if hasattr(URLopener, "open_https"):\r
1596 args.append('https://synergy.as.cmu.edu/~geek/')\r
1597 try:\r
1598 for url in args:\r
1599 print '-'*10, url, '-'*10\r
1600 fn, h = urlretrieve(url, None, reporthook)\r
1601 print fn\r
1602 if h:\r
1603 print '======'\r
1604 for k in h.keys(): print k + ':', h[k]\r
1605 print '======'\r
1606 with open(fn, 'rb') as fp:\r
1607 data = fp.read()\r
1608 if '\r' in data:\r
1609 table = string.maketrans("", "")\r
1610 data = data.translate(table, "\r")\r
1611 print data\r
1612 fn, h = None, None\r
1613 print '-'*40\r
1614 finally:\r
1615 urlcleanup()\r
1616\r
1617def main():\r
1618 import getopt, sys\r
1619 try:\r
1620 opts, args = getopt.getopt(sys.argv[1:], "th")\r
1621 except getopt.error, msg:\r
1622 print msg\r
1623 print "Use -h for help"\r
1624 return\r
1625 t = 0\r
1626 for o, a in opts:\r
1627 if o == '-t':\r
1628 t = t + 1\r
1629 if o == '-h':\r
1630 print "Usage: python urllib.py [-t] [url ...]"\r
1631 print "-t runs self-test;",\r
1632 print "otherwise, contents of urls are printed"\r
1633 return\r
1634 if t:\r
1635 if t > 1:\r
1636 test1()\r
1637 test(args)\r
1638 else:\r
1639 if not args:\r
1640 print "Use -h for help"\r
1641 for url in args:\r
1642 print urlopen(url).read(),\r
1643\r
1644# Run test program when run as a script\r
1645if __name__ == '__main__':\r
1646 main()\r