]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | """Open an arbitrary URL.\r |
2 | \r | |
3 | See the following document for more info on URLs:\r | |
4 | "Names and Addresses, URIs, URLs, URNs, URCs", at\r | |
5 | http://www.w3.org/pub/WWW/Addressing/Overview.html\r | |
6 | \r | |
7 | See also the HTTP spec (from which the error codes are derived):\r | |
8 | "HTTP - Hypertext Transfer Protocol", at\r | |
9 | http://www.w3.org/pub/WWW/Protocols/\r | |
10 | \r | |
11 | Related standards and specs:\r | |
12 | - RFC1808: the "relative URL" spec. (authoritative status)\r | |
13 | - RFC1738 - the "URL standard". (authoritative status)\r | |
14 | - RFC1630 - the "URI spec". (informational status)\r | |
15 | \r | |
16 | The object returned by URLopener().open(file) will differ per\r | |
17 | protocol. All you know is that is has methods read(), readline(),\r | |
18 | readlines(), fileno(), close() and info(). The read*(), fileno()\r | |
19 | and close() methods work like those of open files.\r | |
20 | The info() method returns a mimetools.Message object which can be\r | |
21 | used to query various info about the object, if available.\r | |
22 | (mimetools.Message objects are queried with the getheader() method.)\r | |
23 | """\r | |
24 | \r | |
25 | import string\r | |
26 | import socket\r | |
27 | import os\r | |
28 | import time\r | |
29 | import sys\r | |
30 | from urlparse import urljoin as basejoin\r | |
31 | \r | |
32 | __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",\r | |
33 | "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",\r | |
34 | "urlencode", "url2pathname", "pathname2url", "splittag",\r | |
35 | "localhost", "thishost", "ftperrors", "basejoin", "unwrap",\r | |
36 | "splittype", "splithost", "splituser", "splitpasswd", "splitport",\r | |
37 | "splitnport", "splitquery", "splitattr", "splitvalue",\r | |
38 | "getproxies"]\r | |
39 | \r | |
40 | __version__ = '1.17' # XXX This version is not always updated :-(\r | |
41 | \r | |
42 | MAXFTPCACHE = 10 # Trim the ftp cache beyond this size\r | |
43 | \r | |
44 | # Helper for non-unix systems\r | |
45 | if os.name == 'nt':\r | |
46 | from nturl2path import url2pathname, pathname2url\r | |
47 | elif os.name == 'riscos':\r | |
48 | from rourl2path import url2pathname, pathname2url\r | |
49 | else:\r | |
50 | def url2pathname(pathname):\r | |
51 | """OS-specific conversion from a relative URL of the 'file' scheme\r | |
52 | to a file system path; not recommended for general use."""\r | |
53 | return unquote(pathname)\r | |
54 | \r | |
55 | def pathname2url(pathname):\r | |
56 | """OS-specific conversion from a file system path to a relative URL\r | |
57 | of the 'file' scheme; not recommended for general use."""\r | |
58 | return quote(pathname)\r | |
59 | \r | |
60 | # This really consists of two pieces:\r | |
61 | # (1) a class which handles opening of all sorts of URLs\r | |
62 | # (plus assorted utilities etc.)\r | |
63 | # (2) a set of functions for parsing URLs\r | |
64 | # XXX Should these be separated out into different modules?\r | |
65 | \r | |
66 | \r | |
67 | # Shortcut for basic usage\r | |
68 | _urlopener = None\r | |
69 | def urlopen(url, data=None, proxies=None):\r | |
70 | """Create a file-like object for the specified URL to read from."""\r | |
71 | from warnings import warnpy3k\r | |
72 | warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "\r | |
73 | "favor of urllib2.urlopen()", stacklevel=2)\r | |
74 | \r | |
75 | global _urlopener\r | |
76 | if proxies is not None:\r | |
77 | opener = FancyURLopener(proxies=proxies)\r | |
78 | elif not _urlopener:\r | |
79 | opener = FancyURLopener()\r | |
80 | _urlopener = opener\r | |
81 | else:\r | |
82 | opener = _urlopener\r | |
83 | if data is None:\r | |
84 | return opener.open(url)\r | |
85 | else:\r | |
86 | return opener.open(url, data)\r | |
87 | def urlretrieve(url, filename=None, reporthook=None, data=None):\r | |
88 | global _urlopener\r | |
89 | if not _urlopener:\r | |
90 | _urlopener = FancyURLopener()\r | |
91 | return _urlopener.retrieve(url, filename, reporthook, data)\r | |
92 | def urlcleanup():\r | |
93 | if _urlopener:\r | |
94 | _urlopener.cleanup()\r | |
95 | _safe_quoters.clear()\r | |
96 | ftpcache.clear()\r | |
97 | \r | |
98 | # check for SSL\r | |
99 | try:\r | |
100 | import ssl\r | |
101 | except:\r | |
102 | _have_ssl = False\r | |
103 | else:\r | |
104 | _have_ssl = True\r | |
105 | \r | |
106 | # exception raised when downloaded size does not match content-length\r | |
107 | class ContentTooShortError(IOError):\r | |
108 | def __init__(self, message, content):\r | |
109 | IOError.__init__(self, message)\r | |
110 | self.content = content\r | |
111 | \r | |
112 | ftpcache = {}\r | |
113 | class URLopener:\r | |
114 | """Class to open URLs.\r | |
115 | This is a class rather than just a subroutine because we may need\r | |
116 | more than one set of global protocol-specific options.\r | |
117 | Note -- this is a base class for those who don't want the\r | |
118 | automatic handling of errors type 302 (relocated) and 401\r | |
119 | (authorization needed)."""\r | |
120 | \r | |
121 | __tempfiles = None\r | |
122 | \r | |
123 | version = "Python-urllib/%s" % __version__\r | |
124 | \r | |
125 | # Constructor\r | |
126 | def __init__(self, proxies=None, **x509):\r | |
127 | if proxies is None:\r | |
128 | proxies = getproxies()\r | |
129 | assert hasattr(proxies, 'has_key'), "proxies must be a mapping"\r | |
130 | self.proxies = proxies\r | |
131 | self.key_file = x509.get('key_file')\r | |
132 | self.cert_file = x509.get('cert_file')\r | |
133 | self.addheaders = [('User-Agent', self.version)]\r | |
134 | self.__tempfiles = []\r | |
135 | self.__unlink = os.unlink # See cleanup()\r | |
136 | self.tempcache = None\r | |
137 | # Undocumented feature: if you assign {} to tempcache,\r | |
138 | # it is used to cache files retrieved with\r | |
139 | # self.retrieve(). This is not enabled by default\r | |
140 | # since it does not work for changing documents (and I\r | |
141 | # haven't got the logic to check expiration headers\r | |
142 | # yet).\r | |
143 | self.ftpcache = ftpcache\r | |
144 | # Undocumented feature: you can use a different\r | |
145 | # ftp cache by assigning to the .ftpcache member;\r | |
146 | # in case you want logically independent URL openers\r | |
147 | # XXX This is not threadsafe. Bah.\r | |
148 | \r | |
149 | def __del__(self):\r | |
150 | self.close()\r | |
151 | \r | |
152 | def close(self):\r | |
153 | self.cleanup()\r | |
154 | \r | |
155 | def cleanup(self):\r | |
156 | # This code sometimes runs when the rest of this module\r | |
157 | # has already been deleted, so it can't use any globals\r | |
158 | # or import anything.\r | |
159 | if self.__tempfiles:\r | |
160 | for file in self.__tempfiles:\r | |
161 | try:\r | |
162 | self.__unlink(file)\r | |
163 | except OSError:\r | |
164 | pass\r | |
165 | del self.__tempfiles[:]\r | |
166 | if self.tempcache:\r | |
167 | self.tempcache.clear()\r | |
168 | \r | |
169 | def addheader(self, *args):\r | |
170 | """Add a header to be used by the HTTP interface only\r | |
171 | e.g. u.addheader('Accept', 'sound/basic')"""\r | |
172 | self.addheaders.append(args)\r | |
173 | \r | |
174 | # External interface\r | |
175 | def open(self, fullurl, data=None):\r | |
176 | """Use URLopener().open(file) instead of open(file, 'r')."""\r | |
177 | fullurl = unwrap(toBytes(fullurl))\r | |
178 | # percent encode url, fixing lame server errors for e.g, like space\r | |
179 | # within url paths.\r | |
180 | fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")\r | |
181 | if self.tempcache and fullurl in self.tempcache:\r | |
182 | filename, headers = self.tempcache[fullurl]\r | |
183 | fp = open(filename, 'rb')\r | |
184 | return addinfourl(fp, headers, fullurl)\r | |
185 | urltype, url = splittype(fullurl)\r | |
186 | if not urltype:\r | |
187 | urltype = 'file'\r | |
188 | if urltype in self.proxies:\r | |
189 | proxy = self.proxies[urltype]\r | |
190 | urltype, proxyhost = splittype(proxy)\r | |
191 | host, selector = splithost(proxyhost)\r | |
192 | url = (host, fullurl) # Signal special case to open_*()\r | |
193 | else:\r | |
194 | proxy = None\r | |
195 | name = 'open_' + urltype\r | |
196 | self.type = urltype\r | |
197 | name = name.replace('-', '_')\r | |
198 | if not hasattr(self, name):\r | |
199 | if proxy:\r | |
200 | return self.open_unknown_proxy(proxy, fullurl, data)\r | |
201 | else:\r | |
202 | return self.open_unknown(fullurl, data)\r | |
203 | try:\r | |
204 | if data is None:\r | |
205 | return getattr(self, name)(url)\r | |
206 | else:\r | |
207 | return getattr(self, name)(url, data)\r | |
208 | except socket.error, msg:\r | |
209 | raise IOError, ('socket error', msg), sys.exc_info()[2]\r | |
210 | \r | |
211 | def open_unknown(self, fullurl, data=None):\r | |
212 | """Overridable interface to open unknown URL type."""\r | |
213 | type, url = splittype(fullurl)\r | |
214 | raise IOError, ('url error', 'unknown url type', type)\r | |
215 | \r | |
216 | def open_unknown_proxy(self, proxy, fullurl, data=None):\r | |
217 | """Overridable interface to open unknown URL type."""\r | |
218 | type, url = splittype(fullurl)\r | |
219 | raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)\r | |
220 | \r | |
221 | # External interface\r | |
222 | def retrieve(self, url, filename=None, reporthook=None, data=None):\r | |
223 | """retrieve(url) returns (filename, headers) for a local object\r | |
224 | or (tempfilename, headers) for a remote object."""\r | |
225 | url = unwrap(toBytes(url))\r | |
226 | if self.tempcache and url in self.tempcache:\r | |
227 | return self.tempcache[url]\r | |
228 | type, url1 = splittype(url)\r | |
229 | if filename is None and (not type or type == 'file'):\r | |
230 | try:\r | |
231 | fp = self.open_local_file(url1)\r | |
232 | hdrs = fp.info()\r | |
233 | fp.close()\r | |
234 | return url2pathname(splithost(url1)[1]), hdrs\r | |
235 | except IOError:\r | |
236 | pass\r | |
237 | fp = self.open(url, data)\r | |
238 | try:\r | |
239 | headers = fp.info()\r | |
240 | if filename:\r | |
241 | tfp = open(filename, 'wb')\r | |
242 | else:\r | |
243 | import tempfile\r | |
244 | garbage, path = splittype(url)\r | |
245 | garbage, path = splithost(path or "")\r | |
246 | path, garbage = splitquery(path or "")\r | |
247 | path, garbage = splitattr(path or "")\r | |
248 | suffix = os.path.splitext(path)[1]\r | |
249 | (fd, filename) = tempfile.mkstemp(suffix)\r | |
250 | self.__tempfiles.append(filename)\r | |
251 | tfp = os.fdopen(fd, 'wb')\r | |
252 | try:\r | |
253 | result = filename, headers\r | |
254 | if self.tempcache is not None:\r | |
255 | self.tempcache[url] = result\r | |
256 | bs = 1024*8\r | |
257 | size = -1\r | |
258 | read = 0\r | |
259 | blocknum = 0\r | |
260 | if reporthook:\r | |
261 | if "content-length" in headers:\r | |
262 | size = int(headers["Content-Length"])\r | |
263 | reporthook(blocknum, bs, size)\r | |
264 | while 1:\r | |
265 | block = fp.read(bs)\r | |
266 | if block == "":\r | |
267 | break\r | |
268 | read += len(block)\r | |
269 | tfp.write(block)\r | |
270 | blocknum += 1\r | |
271 | if reporthook:\r | |
272 | reporthook(blocknum, bs, size)\r | |
273 | finally:\r | |
274 | tfp.close()\r | |
275 | finally:\r | |
276 | fp.close()\r | |
277 | \r | |
278 | # raise exception if actual size does not match content-length header\r | |
279 | if size >= 0 and read < size:\r | |
280 | raise ContentTooShortError("retrieval incomplete: got only %i out "\r | |
281 | "of %i bytes" % (read, size), result)\r | |
282 | \r | |
283 | return result\r | |
284 | \r | |
285 | # Each method named open_<type> knows how to open that type of URL\r | |
286 | \r | |
287 | def open_http(self, url, data=None):\r | |
288 | """Use HTTP protocol."""\r | |
289 | import httplib\r | |
290 | user_passwd = None\r | |
291 | proxy_passwd= None\r | |
292 | if isinstance(url, str):\r | |
293 | host, selector = splithost(url)\r | |
294 | if host:\r | |
295 | user_passwd, host = splituser(host)\r | |
296 | host = unquote(host)\r | |
297 | realhost = host\r | |
298 | else:\r | |
299 | host, selector = url\r | |
300 | # check whether the proxy contains authorization information\r | |
301 | proxy_passwd, host = splituser(host)\r | |
302 | # now we proceed with the url we want to obtain\r | |
303 | urltype, rest = splittype(selector)\r | |
304 | url = rest\r | |
305 | user_passwd = None\r | |
306 | if urltype.lower() != 'http':\r | |
307 | realhost = None\r | |
308 | else:\r | |
309 | realhost, rest = splithost(rest)\r | |
310 | if realhost:\r | |
311 | user_passwd, realhost = splituser(realhost)\r | |
312 | if user_passwd:\r | |
313 | selector = "%s://%s%s" % (urltype, realhost, rest)\r | |
314 | if proxy_bypass(realhost):\r | |
315 | host = realhost\r | |
316 | \r | |
317 | #print "proxy via http:", host, selector\r | |
318 | if not host: raise IOError, ('http error', 'no host given')\r | |
319 | \r | |
320 | if proxy_passwd:\r | |
321 | import base64\r | |
322 | proxy_auth = base64.b64encode(proxy_passwd).strip()\r | |
323 | else:\r | |
324 | proxy_auth = None\r | |
325 | \r | |
326 | if user_passwd:\r | |
327 | import base64\r | |
328 | auth = base64.b64encode(user_passwd).strip()\r | |
329 | else:\r | |
330 | auth = None\r | |
331 | h = httplib.HTTP(host)\r | |
332 | if data is not None:\r | |
333 | h.putrequest('POST', selector)\r | |
334 | h.putheader('Content-Type', 'application/x-www-form-urlencoded')\r | |
335 | h.putheader('Content-Length', '%d' % len(data))\r | |
336 | else:\r | |
337 | h.putrequest('GET', selector)\r | |
338 | if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)\r | |
339 | if auth: h.putheader('Authorization', 'Basic %s' % auth)\r | |
340 | if realhost: h.putheader('Host', realhost)\r | |
341 | for args in self.addheaders: h.putheader(*args)\r | |
342 | h.endheaders(data)\r | |
343 | errcode, errmsg, headers = h.getreply()\r | |
344 | fp = h.getfile()\r | |
345 | if errcode == -1:\r | |
346 | if fp: fp.close()\r | |
347 | # something went wrong with the HTTP status line\r | |
348 | raise IOError, ('http protocol error', 0,\r | |
349 | 'got a bad status line', None)\r | |
350 | # According to RFC 2616, "2xx" code indicates that the client's\r | |
351 | # request was successfully received, understood, and accepted.\r | |
352 | if (200 <= errcode < 300):\r | |
353 | return addinfourl(fp, headers, "http:" + url, errcode)\r | |
354 | else:\r | |
355 | if data is None:\r | |
356 | return self.http_error(url, fp, errcode, errmsg, headers)\r | |
357 | else:\r | |
358 | return self.http_error(url, fp, errcode, errmsg, headers, data)\r | |
359 | \r | |
360 | def http_error(self, url, fp, errcode, errmsg, headers, data=None):\r | |
361 | """Handle http errors.\r | |
362 | Derived class can override this, or provide specific handlers\r | |
363 | named http_error_DDD where DDD is the 3-digit error code."""\r | |
364 | # First check if there's a specific handler for this error\r | |
365 | name = 'http_error_%d' % errcode\r | |
366 | if hasattr(self, name):\r | |
367 | method = getattr(self, name)\r | |
368 | if data is None:\r | |
369 | result = method(url, fp, errcode, errmsg, headers)\r | |
370 | else:\r | |
371 | result = method(url, fp, errcode, errmsg, headers, data)\r | |
372 | if result: return result\r | |
373 | return self.http_error_default(url, fp, errcode, errmsg, headers)\r | |
374 | \r | |
375 | def http_error_default(self, url, fp, errcode, errmsg, headers):\r | |
376 | """Default error handler: close the connection and raise IOError."""\r | |
377 | void = fp.read()\r | |
378 | fp.close()\r | |
379 | raise IOError, ('http error', errcode, errmsg, headers)\r | |
380 | \r | |
381 | if _have_ssl:\r | |
382 | def open_https(self, url, data=None):\r | |
383 | """Use HTTPS protocol."""\r | |
384 | \r | |
385 | import httplib\r | |
386 | user_passwd = None\r | |
387 | proxy_passwd = None\r | |
388 | if isinstance(url, str):\r | |
389 | host, selector = splithost(url)\r | |
390 | if host:\r | |
391 | user_passwd, host = splituser(host)\r | |
392 | host = unquote(host)\r | |
393 | realhost = host\r | |
394 | else:\r | |
395 | host, selector = url\r | |
396 | # here, we determine, whether the proxy contains authorization information\r | |
397 | proxy_passwd, host = splituser(host)\r | |
398 | urltype, rest = splittype(selector)\r | |
399 | url = rest\r | |
400 | user_passwd = None\r | |
401 | if urltype.lower() != 'https':\r | |
402 | realhost = None\r | |
403 | else:\r | |
404 | realhost, rest = splithost(rest)\r | |
405 | if realhost:\r | |
406 | user_passwd, realhost = splituser(realhost)\r | |
407 | if user_passwd:\r | |
408 | selector = "%s://%s%s" % (urltype, realhost, rest)\r | |
409 | #print "proxy via https:", host, selector\r | |
410 | if not host: raise IOError, ('https error', 'no host given')\r | |
411 | if proxy_passwd:\r | |
412 | import base64\r | |
413 | proxy_auth = base64.b64encode(proxy_passwd).strip()\r | |
414 | else:\r | |
415 | proxy_auth = None\r | |
416 | if user_passwd:\r | |
417 | import base64\r | |
418 | auth = base64.b64encode(user_passwd).strip()\r | |
419 | else:\r | |
420 | auth = None\r | |
421 | h = httplib.HTTPS(host, 0,\r | |
422 | key_file=self.key_file,\r | |
423 | cert_file=self.cert_file)\r | |
424 | if data is not None:\r | |
425 | h.putrequest('POST', selector)\r | |
426 | h.putheader('Content-Type',\r | |
427 | 'application/x-www-form-urlencoded')\r | |
428 | h.putheader('Content-Length', '%d' % len(data))\r | |
429 | else:\r | |
430 | h.putrequest('GET', selector)\r | |
431 | if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)\r | |
432 | if auth: h.putheader('Authorization', 'Basic %s' % auth)\r | |
433 | if realhost: h.putheader('Host', realhost)\r | |
434 | for args in self.addheaders: h.putheader(*args)\r | |
435 | h.endheaders(data)\r | |
436 | errcode, errmsg, headers = h.getreply()\r | |
437 | fp = h.getfile()\r | |
438 | if errcode == -1:\r | |
439 | if fp: fp.close()\r | |
440 | # something went wrong with the HTTP status line\r | |
441 | raise IOError, ('http protocol error', 0,\r | |
442 | 'got a bad status line', None)\r | |
443 | # According to RFC 2616, "2xx" code indicates that the client's\r | |
444 | # request was successfully received, understood, and accepted.\r | |
445 | if (200 <= errcode < 300):\r | |
446 | return addinfourl(fp, headers, "https:" + url, errcode)\r | |
447 | else:\r | |
448 | if data is None:\r | |
449 | return self.http_error(url, fp, errcode, errmsg, headers)\r | |
450 | else:\r | |
451 | return self.http_error(url, fp, errcode, errmsg, headers,\r | |
452 | data)\r | |
453 | \r | |
454 | def open_file(self, url):\r | |
455 | """Use local file or FTP depending on form of URL."""\r | |
456 | if not isinstance(url, str):\r | |
457 | raise IOError, ('file error', 'proxy support for file protocol currently not implemented')\r | |
458 | if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':\r | |
459 | return self.open_ftp(url)\r | |
460 | else:\r | |
461 | return self.open_local_file(url)\r | |
462 | \r | |
463 | def open_local_file(self, url):\r | |
464 | """Use local file."""\r | |
465 | import mimetypes, mimetools, email.utils\r | |
466 | try:\r | |
467 | from cStringIO import StringIO\r | |
468 | except ImportError:\r | |
469 | from StringIO import StringIO\r | |
470 | host, file = splithost(url)\r | |
471 | localname = url2pathname(file)\r | |
472 | try:\r | |
473 | stats = os.stat(localname)\r | |
474 | except OSError, e:\r | |
475 | raise IOError(e.errno, e.strerror, e.filename)\r | |
476 | size = stats.st_size\r | |
477 | modified = email.utils.formatdate(stats.st_mtime, usegmt=True)\r | |
478 | mtype = mimetypes.guess_type(url)[0]\r | |
479 | headers = mimetools.Message(StringIO(\r | |
480 | 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %\r | |
481 | (mtype or 'text/plain', size, modified)))\r | |
482 | if not host:\r | |
483 | urlfile = file\r | |
484 | if file[:1] == '/':\r | |
485 | urlfile = 'file://' + file\r | |
486 | return addinfourl(open(localname, 'rb'),\r | |
487 | headers, urlfile)\r | |
488 | host, port = splitport(host)\r | |
489 | if not port \\r | |
490 | and socket.gethostbyname(host) in (localhost(), thishost()):\r | |
491 | urlfile = file\r | |
492 | if file[:1] == '/':\r | |
493 | urlfile = 'file://' + file\r | |
494 | return addinfourl(open(localname, 'rb'),\r | |
495 | headers, urlfile)\r | |
496 | raise IOError, ('local file error', 'not on local host')\r | |
497 | \r | |
498 | def open_ftp(self, url):\r | |
499 | """Use FTP protocol."""\r | |
500 | if not isinstance(url, str):\r | |
501 | raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')\r | |
502 | import mimetypes, mimetools\r | |
503 | try:\r | |
504 | from cStringIO import StringIO\r | |
505 | except ImportError:\r | |
506 | from StringIO import StringIO\r | |
507 | host, path = splithost(url)\r | |
508 | if not host: raise IOError, ('ftp error', 'no host given')\r | |
509 | host, port = splitport(host)\r | |
510 | user, host = splituser(host)\r | |
511 | if user: user, passwd = splitpasswd(user)\r | |
512 | else: passwd = None\r | |
513 | host = unquote(host)\r | |
514 | user = user or ''\r | |
515 | passwd = passwd or ''\r | |
516 | host = socket.gethostbyname(host)\r | |
517 | if not port:\r | |
518 | import ftplib\r | |
519 | port = ftplib.FTP_PORT\r | |
520 | else:\r | |
521 | port = int(port)\r | |
522 | path, attrs = splitattr(path)\r | |
523 | path = unquote(path)\r | |
524 | dirs = path.split('/')\r | |
525 | dirs, file = dirs[:-1], dirs[-1]\r | |
526 | if dirs and not dirs[0]: dirs = dirs[1:]\r | |
527 | if dirs and not dirs[0]: dirs[0] = '/'\r | |
528 | key = user, host, port, '/'.join(dirs)\r | |
529 | # XXX thread unsafe!\r | |
530 | if len(self.ftpcache) > MAXFTPCACHE:\r | |
531 | # Prune the cache, rather arbitrarily\r | |
532 | for k in self.ftpcache.keys():\r | |
533 | if k != key:\r | |
534 | v = self.ftpcache[k]\r | |
535 | del self.ftpcache[k]\r | |
536 | v.close()\r | |
537 | try:\r | |
538 | if not key in self.ftpcache:\r | |
539 | self.ftpcache[key] = \\r | |
540 | ftpwrapper(user, passwd, host, port, dirs)\r | |
541 | if not file: type = 'D'\r | |
542 | else: type = 'I'\r | |
543 | for attr in attrs:\r | |
544 | attr, value = splitvalue(attr)\r | |
545 | if attr.lower() == 'type' and \\r | |
546 | value in ('a', 'A', 'i', 'I', 'd', 'D'):\r | |
547 | type = value.upper()\r | |
548 | (fp, retrlen) = self.ftpcache[key].retrfile(file, type)\r | |
549 | mtype = mimetypes.guess_type("ftp:" + url)[0]\r | |
550 | headers = ""\r | |
551 | if mtype:\r | |
552 | headers += "Content-Type: %s\n" % mtype\r | |
553 | if retrlen is not None and retrlen >= 0:\r | |
554 | headers += "Content-Length: %d\n" % retrlen\r | |
555 | headers = mimetools.Message(StringIO(headers))\r | |
556 | return addinfourl(fp, headers, "ftp:" + url)\r | |
557 | except ftperrors(), msg:\r | |
558 | raise IOError, ('ftp error', msg), sys.exc_info()[2]\r | |
559 | \r | |
560 | def open_data(self, url, data=None):\r | |
561 | """Use "data" URL."""\r | |
562 | if not isinstance(url, str):\r | |
563 | raise IOError, ('data error', 'proxy support for data protocol currently not implemented')\r | |
564 | # ignore POSTed data\r | |
565 | #\r | |
566 | # syntax of data URLs:\r | |
567 | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data\r | |
568 | # mediatype := [ type "/" subtype ] *( ";" parameter )\r | |
569 | # data := *urlchar\r | |
570 | # parameter := attribute "=" value\r | |
571 | import mimetools\r | |
572 | try:\r | |
573 | from cStringIO import StringIO\r | |
574 | except ImportError:\r | |
575 | from StringIO import StringIO\r | |
576 | try:\r | |
577 | [type, data] = url.split(',', 1)\r | |
578 | except ValueError:\r | |
579 | raise IOError, ('data error', 'bad data URL')\r | |
580 | if not type:\r | |
581 | type = 'text/plain;charset=US-ASCII'\r | |
582 | semi = type.rfind(';')\r | |
583 | if semi >= 0 and '=' not in type[semi:]:\r | |
584 | encoding = type[semi+1:]\r | |
585 | type = type[:semi]\r | |
586 | else:\r | |
587 | encoding = ''\r | |
588 | msg = []\r | |
589 | msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',\r | |
590 | time.gmtime(time.time())))\r | |
591 | msg.append('Content-type: %s' % type)\r | |
592 | if encoding == 'base64':\r | |
593 | import base64\r | |
594 | data = base64.decodestring(data)\r | |
595 | else:\r | |
596 | data = unquote(data)\r | |
597 | msg.append('Content-Length: %d' % len(data))\r | |
598 | msg.append('')\r | |
599 | msg.append(data)\r | |
600 | msg = '\n'.join(msg)\r | |
601 | f = StringIO(msg)\r | |
602 | headers = mimetools.Message(f, 0)\r | |
603 | #f.fileno = None # needed for addinfourl\r | |
604 | return addinfourl(f, headers, url)\r | |
605 | \r | |
606 | \r | |
607 | class FancyURLopener(URLopener):\r | |
608 | """Derived class with handlers for errors we can handle (perhaps)."""\r | |
609 | \r | |
610 | def __init__(self, *args, **kwargs):\r | |
611 | URLopener.__init__(self, *args, **kwargs)\r | |
612 | self.auth_cache = {}\r | |
613 | self.tries = 0\r | |
614 | self.maxtries = 10\r | |
615 | \r | |
616 | def http_error_default(self, url, fp, errcode, errmsg, headers):\r | |
617 | """Default error handling -- don't raise an exception."""\r | |
618 | return addinfourl(fp, headers, "http:" + url, errcode)\r | |
619 | \r | |
620 | def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):\r | |
621 | """Error 302 -- relocated (temporarily)."""\r | |
622 | self.tries += 1\r | |
623 | if self.maxtries and self.tries >= self.maxtries:\r | |
624 | if hasattr(self, "http_error_500"):\r | |
625 | meth = self.http_error_500\r | |
626 | else:\r | |
627 | meth = self.http_error_default\r | |
628 | self.tries = 0\r | |
629 | return meth(url, fp, 500,\r | |
630 | "Internal Server Error: Redirect Recursion", headers)\r | |
631 | result = self.redirect_internal(url, fp, errcode, errmsg, headers,\r | |
632 | data)\r | |
633 | self.tries = 0\r | |
634 | return result\r | |
635 | \r | |
636 | def redirect_internal(self, url, fp, errcode, errmsg, headers, data):\r | |
637 | if 'location' in headers:\r | |
638 | newurl = headers['location']\r | |
639 | elif 'uri' in headers:\r | |
640 | newurl = headers['uri']\r | |
641 | else:\r | |
642 | return\r | |
643 | void = fp.read()\r | |
644 | fp.close()\r | |
645 | # In case the server sent a relative URL, join with original:\r | |
646 | newurl = basejoin(self.type + ":" + url, newurl)\r | |
647 | \r | |
648 | # For security reasons we do not allow redirects to protocols\r | |
649 | # other than HTTP, HTTPS or FTP.\r | |
650 | newurl_lower = newurl.lower()\r | |
651 | if not (newurl_lower.startswith('http://') or\r | |
652 | newurl_lower.startswith('https://') or\r | |
653 | newurl_lower.startswith('ftp://')):\r | |
654 | raise IOError('redirect error', errcode,\r | |
655 | errmsg + " - Redirection to url '%s' is not allowed" %\r | |
656 | newurl,\r | |
657 | headers)\r | |
658 | \r | |
659 | return self.open(newurl)\r | |
660 | \r | |
661 | def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):\r | |
662 | """Error 301 -- also relocated (permanently)."""\r | |
663 | return self.http_error_302(url, fp, errcode, errmsg, headers, data)\r | |
664 | \r | |
665 | def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):\r | |
666 | """Error 303 -- also relocated (essentially identical to 302)."""\r | |
667 | return self.http_error_302(url, fp, errcode, errmsg, headers, data)\r | |
668 | \r | |
669 | def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):\r | |
670 | """Error 307 -- relocated, but turn POST into error."""\r | |
671 | if data is None:\r | |
672 | return self.http_error_302(url, fp, errcode, errmsg, headers, data)\r | |
673 | else:\r | |
674 | return self.http_error_default(url, fp, errcode, errmsg, headers)\r | |
675 | \r | |
676 | def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):\r | |
677 | """Error 401 -- authentication required.\r | |
678 | This function supports Basic authentication only."""\r | |
679 | if not 'www-authenticate' in headers:\r | |
680 | URLopener.http_error_default(self, url, fp,\r | |
681 | errcode, errmsg, headers)\r | |
682 | stuff = headers['www-authenticate']\r | |
683 | import re\r | |
684 | match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)\r | |
685 | if not match:\r | |
686 | URLopener.http_error_default(self, url, fp,\r | |
687 | errcode, errmsg, headers)\r | |
688 | scheme, realm = match.groups()\r | |
689 | if scheme.lower() != 'basic':\r | |
690 | URLopener.http_error_default(self, url, fp,\r | |
691 | errcode, errmsg, headers)\r | |
692 | name = 'retry_' + self.type + '_basic_auth'\r | |
693 | if data is None:\r | |
694 | return getattr(self,name)(url, realm)\r | |
695 | else:\r | |
696 | return getattr(self,name)(url, realm, data)\r | |
697 | \r | |
698 | def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):\r | |
699 | """Error 407 -- proxy authentication required.\r | |
700 | This function supports Basic authentication only."""\r | |
701 | if not 'proxy-authenticate' in headers:\r | |
702 | URLopener.http_error_default(self, url, fp,\r | |
703 | errcode, errmsg, headers)\r | |
704 | stuff = headers['proxy-authenticate']\r | |
705 | import re\r | |
706 | match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)\r | |
707 | if not match:\r | |
708 | URLopener.http_error_default(self, url, fp,\r | |
709 | errcode, errmsg, headers)\r | |
710 | scheme, realm = match.groups()\r | |
711 | if scheme.lower() != 'basic':\r | |
712 | URLopener.http_error_default(self, url, fp,\r | |
713 | errcode, errmsg, headers)\r | |
714 | name = 'retry_proxy_' + self.type + '_basic_auth'\r | |
715 | if data is None:\r | |
716 | return getattr(self,name)(url, realm)\r | |
717 | else:\r | |
718 | return getattr(self,name)(url, realm, data)\r | |
719 | \r | |
720 | def retry_proxy_http_basic_auth(self, url, realm, data=None):\r | |
721 | host, selector = splithost(url)\r | |
722 | newurl = 'http://' + host + selector\r | |
723 | proxy = self.proxies['http']\r | |
724 | urltype, proxyhost = splittype(proxy)\r | |
725 | proxyhost, proxyselector = splithost(proxyhost)\r | |
726 | i = proxyhost.find('@') + 1\r | |
727 | proxyhost = proxyhost[i:]\r | |
728 | user, passwd = self.get_user_passwd(proxyhost, realm, i)\r | |
729 | if not (user or passwd): return None\r | |
730 | proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost\r | |
731 | self.proxies['http'] = 'http://' + proxyhost + proxyselector\r | |
732 | if data is None:\r | |
733 | return self.open(newurl)\r | |
734 | else:\r | |
735 | return self.open(newurl, data)\r | |
736 | \r | |
737 | def retry_proxy_https_basic_auth(self, url, realm, data=None):\r | |
738 | host, selector = splithost(url)\r | |
739 | newurl = 'https://' + host + selector\r | |
740 | proxy = self.proxies['https']\r | |
741 | urltype, proxyhost = splittype(proxy)\r | |
742 | proxyhost, proxyselector = splithost(proxyhost)\r | |
743 | i = proxyhost.find('@') + 1\r | |
744 | proxyhost = proxyhost[i:]\r | |
745 | user, passwd = self.get_user_passwd(proxyhost, realm, i)\r | |
746 | if not (user or passwd): return None\r | |
747 | proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost\r | |
748 | self.proxies['https'] = 'https://' + proxyhost + proxyselector\r | |
749 | if data is None:\r | |
750 | return self.open(newurl)\r | |
751 | else:\r | |
752 | return self.open(newurl, data)\r | |
753 | \r | |
754 | def retry_http_basic_auth(self, url, realm, data=None):\r | |
755 | host, selector = splithost(url)\r | |
756 | i = host.find('@') + 1\r | |
757 | host = host[i:]\r | |
758 | user, passwd = self.get_user_passwd(host, realm, i)\r | |
759 | if not (user or passwd): return None\r | |
760 | host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host\r | |
761 | newurl = 'http://' + host + selector\r | |
762 | if data is None:\r | |
763 | return self.open(newurl)\r | |
764 | else:\r | |
765 | return self.open(newurl, data)\r | |
766 | \r | |
767 | def retry_https_basic_auth(self, url, realm, data=None):\r | |
768 | host, selector = splithost(url)\r | |
769 | i = host.find('@') + 1\r | |
770 | host = host[i:]\r | |
771 | user, passwd = self.get_user_passwd(host, realm, i)\r | |
772 | if not (user or passwd): return None\r | |
773 | host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host\r | |
774 | newurl = 'https://' + host + selector\r | |
775 | if data is None:\r | |
776 | return self.open(newurl)\r | |
777 | else:\r | |
778 | return self.open(newurl, data)\r | |
779 | \r | |
780 | def get_user_passwd(self, host, realm, clear_cache=0):\r | |
781 | key = realm + '@' + host.lower()\r | |
782 | if key in self.auth_cache:\r | |
783 | if clear_cache:\r | |
784 | del self.auth_cache[key]\r | |
785 | else:\r | |
786 | return self.auth_cache[key]\r | |
787 | user, passwd = self.prompt_user_passwd(host, realm)\r | |
788 | if user or passwd: self.auth_cache[key] = (user, passwd)\r | |
789 | return user, passwd\r | |
790 | \r | |
791 | def prompt_user_passwd(self, host, realm):\r | |
792 | """Override this in a GUI environment!"""\r | |
793 | import getpass\r | |
794 | try:\r | |
795 | user = raw_input("Enter username for %s at %s: " % (realm,\r | |
796 | host))\r | |
797 | passwd = getpass.getpass("Enter password for %s in %s at %s: " %\r | |
798 | (user, realm, host))\r | |
799 | return user, passwd\r | |
800 | except KeyboardInterrupt:\r | |
801 | print\r | |
802 | return None, None\r | |
803 | \r | |
804 | \r | |
805 | # Utility functions\r | |
806 | \r | |
807 | _localhost = None\r | |
808 | def localhost():\r | |
809 | """Return the IP address of the magic hostname 'localhost'."""\r | |
810 | global _localhost\r | |
811 | if _localhost is None:\r | |
812 | _localhost = socket.gethostbyname('localhost')\r | |
813 | return _localhost\r | |
814 | \r | |
815 | _thishost = None\r | |
816 | def thishost():\r | |
817 | """Return the IP address of the current host."""\r | |
818 | global _thishost\r | |
819 | if _thishost is None:\r | |
820 | _thishost = socket.gethostbyname(socket.gethostname())\r | |
821 | return _thishost\r | |
822 | \r | |
823 | _ftperrors = None\r | |
824 | def ftperrors():\r | |
825 | """Return the set of errors raised by the FTP class."""\r | |
826 | global _ftperrors\r | |
827 | if _ftperrors is None:\r | |
828 | import ftplib\r | |
829 | _ftperrors = ftplib.all_errors\r | |
830 | return _ftperrors\r | |
831 | \r | |
832 | _noheaders = None\r | |
833 | def noheaders():\r | |
834 | """Return an empty mimetools.Message object."""\r | |
835 | global _noheaders\r | |
836 | if _noheaders is None:\r | |
837 | import mimetools\r | |
838 | try:\r | |
839 | from cStringIO import StringIO\r | |
840 | except ImportError:\r | |
841 | from StringIO import StringIO\r | |
842 | _noheaders = mimetools.Message(StringIO(), 0)\r | |
843 | _noheaders.fp.close() # Recycle file descriptor\r | |
844 | return _noheaders\r | |
845 | \r | |
846 | \r | |
847 | # Utility classes\r | |
848 | \r | |
849 | class ftpwrapper:\r | |
850 | """Class used by open_ftp() for cache of open FTP connections."""\r | |
851 | \r | |
852 | def __init__(self, user, passwd, host, port, dirs,\r | |
853 | timeout=socket._GLOBAL_DEFAULT_TIMEOUT):\r | |
854 | self.user = user\r | |
855 | self.passwd = passwd\r | |
856 | self.host = host\r | |
857 | self.port = port\r | |
858 | self.dirs = dirs\r | |
859 | self.timeout = timeout\r | |
860 | self.init()\r | |
861 | \r | |
862 | def init(self):\r | |
863 | import ftplib\r | |
864 | self.busy = 0\r | |
865 | self.ftp = ftplib.FTP()\r | |
866 | self.ftp.connect(self.host, self.port, self.timeout)\r | |
867 | self.ftp.login(self.user, self.passwd)\r | |
868 | for dir in self.dirs:\r | |
869 | self.ftp.cwd(dir)\r | |
870 | \r | |
871 | def retrfile(self, file, type):\r | |
872 | import ftplib\r | |
873 | self.endtransfer()\r | |
874 | if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1\r | |
875 | else: cmd = 'TYPE ' + type; isdir = 0\r | |
876 | try:\r | |
877 | self.ftp.voidcmd(cmd)\r | |
878 | except ftplib.all_errors:\r | |
879 | self.init()\r | |
880 | self.ftp.voidcmd(cmd)\r | |
881 | conn = None\r | |
882 | if file and not isdir:\r | |
883 | # Try to retrieve as a file\r | |
884 | try:\r | |
885 | cmd = 'RETR ' + file\r | |
886 | conn = self.ftp.ntransfercmd(cmd)\r | |
887 | except ftplib.error_perm, reason:\r | |
888 | if str(reason)[:3] != '550':\r | |
889 | raise IOError, ('ftp error', reason), sys.exc_info()[2]\r | |
890 | if not conn:\r | |
891 | # Set transfer mode to ASCII!\r | |
892 | self.ftp.voidcmd('TYPE A')\r | |
893 | # Try a directory listing. Verify that directory exists.\r | |
894 | if file:\r | |
895 | pwd = self.ftp.pwd()\r | |
896 | try:\r | |
897 | try:\r | |
898 | self.ftp.cwd(file)\r | |
899 | except ftplib.error_perm, reason:\r | |
900 | raise IOError, ('ftp error', reason), sys.exc_info()[2]\r | |
901 | finally:\r | |
902 | self.ftp.cwd(pwd)\r | |
903 | cmd = 'LIST ' + file\r | |
904 | else:\r | |
905 | cmd = 'LIST'\r | |
906 | conn = self.ftp.ntransfercmd(cmd)\r | |
907 | self.busy = 1\r | |
908 | # Pass back both a suitably decorated object and a retrieval length\r | |
909 | return (addclosehook(conn[0].makefile('rb'),\r | |
910 | self.endtransfer), conn[1])\r | |
911 | def endtransfer(self):\r | |
912 | if not self.busy:\r | |
913 | return\r | |
914 | self.busy = 0\r | |
915 | try:\r | |
916 | self.ftp.voidresp()\r | |
917 | except ftperrors():\r | |
918 | pass\r | |
919 | \r | |
920 | def close(self):\r | |
921 | self.endtransfer()\r | |
922 | try:\r | |
923 | self.ftp.close()\r | |
924 | except ftperrors():\r | |
925 | pass\r | |
926 | \r | |
927 | class addbase:\r | |
928 | """Base class for addinfo and addclosehook."""\r | |
929 | \r | |
930 | def __init__(self, fp):\r | |
931 | self.fp = fp\r | |
932 | self.read = self.fp.read\r | |
933 | self.readline = self.fp.readline\r | |
934 | if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines\r | |
935 | if hasattr(self.fp, "fileno"):\r | |
936 | self.fileno = self.fp.fileno\r | |
937 | else:\r | |
938 | self.fileno = lambda: None\r | |
939 | if hasattr(self.fp, "__iter__"):\r | |
940 | self.__iter__ = self.fp.__iter__\r | |
941 | if hasattr(self.fp, "next"):\r | |
942 | self.next = self.fp.next\r | |
943 | \r | |
944 | def __repr__(self):\r | |
945 | return '<%s at %r whose fp = %r>' % (self.__class__.__name__,\r | |
946 | id(self), self.fp)\r | |
947 | \r | |
948 | def close(self):\r | |
949 | self.read = None\r | |
950 | self.readline = None\r | |
951 | self.readlines = None\r | |
952 | self.fileno = None\r | |
953 | if self.fp: self.fp.close()\r | |
954 | self.fp = None\r | |
955 | \r | |
956 | class addclosehook(addbase):\r | |
957 | """Class to add a close hook to an open file."""\r | |
958 | \r | |
959 | def __init__(self, fp, closehook, *hookargs):\r | |
960 | addbase.__init__(self, fp)\r | |
961 | self.closehook = closehook\r | |
962 | self.hookargs = hookargs\r | |
963 | \r | |
964 | def close(self):\r | |
965 | addbase.close(self)\r | |
966 | if self.closehook:\r | |
967 | self.closehook(*self.hookargs)\r | |
968 | self.closehook = None\r | |
969 | self.hookargs = None\r | |
970 | \r | |
971 | class addinfo(addbase):\r | |
972 | """class to add an info() method to an open file."""\r | |
973 | \r | |
974 | def __init__(self, fp, headers):\r | |
975 | addbase.__init__(self, fp)\r | |
976 | self.headers = headers\r | |
977 | \r | |
978 | def info(self):\r | |
979 | return self.headers\r | |
980 | \r | |
981 | class addinfourl(addbase):\r | |
982 | """class to add info() and geturl() methods to an open file."""\r | |
983 | \r | |
984 | def __init__(self, fp, headers, url, code=None):\r | |
985 | addbase.__init__(self, fp)\r | |
986 | self.headers = headers\r | |
987 | self.url = url\r | |
988 | self.code = code\r | |
989 | \r | |
990 | def info(self):\r | |
991 | return self.headers\r | |
992 | \r | |
993 | def getcode(self):\r | |
994 | return self.code\r | |
995 | \r | |
996 | def geturl(self):\r | |
997 | return self.url\r | |
998 | \r | |
999 | \r | |
1000 | # Utilities to parse URLs (most of these return None for missing parts):\r | |
1001 | # unwrap('<URL:type://host/path>') --> 'type://host/path'\r | |
1002 | # splittype('type:opaquestring') --> 'type', 'opaquestring'\r | |
1003 | # splithost('//host[:port]/path') --> 'host[:port]', '/path'\r | |
1004 | # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'\r | |
1005 | # splitpasswd('user:passwd') -> 'user', 'passwd'\r | |
1006 | # splitport('host:port') --> 'host', 'port'\r | |
1007 | # splitquery('/path?query') --> '/path', 'query'\r | |
1008 | # splittag('/path#tag') --> '/path', 'tag'\r | |
1009 | # splitattr('/path;attr1=value1;attr2=value2;...') ->\r | |
1010 | # '/path', ['attr1=value1', 'attr2=value2', ...]\r | |
1011 | # splitvalue('attr=value') --> 'attr', 'value'\r | |
1012 | # unquote('abc%20def') -> 'abc def'\r | |
1013 | # quote('abc def') -> 'abc%20def')\r | |
1014 | \r | |
1015 | try:\r | |
1016 | unicode\r | |
1017 | except NameError:\r | |
1018 | def _is_unicode(x):\r | |
1019 | return 0\r | |
1020 | else:\r | |
1021 | def _is_unicode(x):\r | |
1022 | return isinstance(x, unicode)\r | |
1023 | \r | |
1024 | def toBytes(url):\r | |
1025 | """toBytes(u"URL") --> 'URL'."""\r | |
1026 | # Most URL schemes require ASCII. If that changes, the conversion\r | |
1027 | # can be relaxed\r | |
1028 | if _is_unicode(url):\r | |
1029 | try:\r | |
1030 | url = url.encode("ASCII")\r | |
1031 | except UnicodeError:\r | |
1032 | raise UnicodeError("URL " + repr(url) +\r | |
1033 | " contains non-ASCII characters")\r | |
1034 | return url\r | |
1035 | \r | |
1036 | def unwrap(url):\r | |
1037 | """unwrap('<URL:type://host/path>') --> 'type://host/path'."""\r | |
1038 | url = url.strip()\r | |
1039 | if url[:1] == '<' and url[-1:] == '>':\r | |
1040 | url = url[1:-1].strip()\r | |
1041 | if url[:4] == 'URL:': url = url[4:].strip()\r | |
1042 | return url\r | |
1043 | \r | |
1044 | _typeprog = None\r | |
1045 | def splittype(url):\r | |
1046 | """splittype('type:opaquestring') --> 'type', 'opaquestring'."""\r | |
1047 | global _typeprog\r | |
1048 | if _typeprog is None:\r | |
1049 | import re\r | |
1050 | _typeprog = re.compile('^([^/:]+):')\r | |
1051 | \r | |
1052 | match = _typeprog.match(url)\r | |
1053 | if match:\r | |
1054 | scheme = match.group(1)\r | |
1055 | return scheme.lower(), url[len(scheme) + 1:]\r | |
1056 | return None, url\r | |
1057 | \r | |
1058 | _hostprog = None\r | |
1059 | def splithost(url):\r | |
1060 | """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""\r | |
1061 | global _hostprog\r | |
1062 | if _hostprog is None:\r | |
1063 | import re\r | |
1064 | _hostprog = re.compile('^//([^/?]*)(.*)$')\r | |
1065 | \r | |
1066 | match = _hostprog.match(url)\r | |
1067 | if match:\r | |
1068 | host_port = match.group(1)\r | |
1069 | path = match.group(2)\r | |
1070 | if path and not path.startswith('/'):\r | |
1071 | path = '/' + path\r | |
1072 | return host_port, path\r | |
1073 | return None, url\r | |
1074 | \r | |
1075 | _userprog = None\r | |
1076 | def splituser(host):\r | |
1077 | """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""\r | |
1078 | global _userprog\r | |
1079 | if _userprog is None:\r | |
1080 | import re\r | |
1081 | _userprog = re.compile('^(.*)@(.*)$')\r | |
1082 | \r | |
1083 | match = _userprog.match(host)\r | |
1084 | if match: return match.group(1, 2)\r | |
1085 | return None, host\r | |
1086 | \r | |
1087 | _passwdprog = None\r | |
1088 | def splitpasswd(user):\r | |
1089 | """splitpasswd('user:passwd') -> 'user', 'passwd'."""\r | |
1090 | global _passwdprog\r | |
1091 | if _passwdprog is None:\r | |
1092 | import re\r | |
1093 | _passwdprog = re.compile('^([^:]*):(.*)$',re.S)\r | |
1094 | \r | |
1095 | match = _passwdprog.match(user)\r | |
1096 | if match: return match.group(1, 2)\r | |
1097 | return user, None\r | |
1098 | \r | |
1099 | # splittag('/path#tag') --> '/path', 'tag'\r | |
1100 | _portprog = None\r | |
1101 | def splitport(host):\r | |
1102 | """splitport('host:port') --> 'host', 'port'."""\r | |
1103 | global _portprog\r | |
1104 | if _portprog is None:\r | |
1105 | import re\r | |
1106 | _portprog = re.compile('^(.*):([0-9]+)$')\r | |
1107 | \r | |
1108 | match = _portprog.match(host)\r | |
1109 | if match: return match.group(1, 2)\r | |
1110 | return host, None\r | |
1111 | \r | |
1112 | _nportprog = None\r | |
1113 | def splitnport(host, defport=-1):\r | |
1114 | """Split host and port, returning numeric port.\r | |
1115 | Return given default port if no ':' found; defaults to -1.\r | |
1116 | Return numerical port if a valid number are found after ':'.\r | |
1117 | Return None if ':' but not a valid number."""\r | |
1118 | global _nportprog\r | |
1119 | if _nportprog is None:\r | |
1120 | import re\r | |
1121 | _nportprog = re.compile('^(.*):(.*)$')\r | |
1122 | \r | |
1123 | match = _nportprog.match(host)\r | |
1124 | if match:\r | |
1125 | host, port = match.group(1, 2)\r | |
1126 | try:\r | |
1127 | if not port: raise ValueError, "no digits"\r | |
1128 | nport = int(port)\r | |
1129 | except ValueError:\r | |
1130 | nport = None\r | |
1131 | return host, nport\r | |
1132 | return host, defport\r | |
1133 | \r | |
1134 | _queryprog = None\r | |
1135 | def splitquery(url):\r | |
1136 | """splitquery('/path?query') --> '/path', 'query'."""\r | |
1137 | global _queryprog\r | |
1138 | if _queryprog is None:\r | |
1139 | import re\r | |
1140 | _queryprog = re.compile('^(.*)\?([^?]*)$')\r | |
1141 | \r | |
1142 | match = _queryprog.match(url)\r | |
1143 | if match: return match.group(1, 2)\r | |
1144 | return url, None\r | |
1145 | \r | |
1146 | _tagprog = None\r | |
1147 | def splittag(url):\r | |
1148 | """splittag('/path#tag') --> '/path', 'tag'."""\r | |
1149 | global _tagprog\r | |
1150 | if _tagprog is None:\r | |
1151 | import re\r | |
1152 | _tagprog = re.compile('^(.*)#([^#]*)$')\r | |
1153 | \r | |
1154 | match = _tagprog.match(url)\r | |
1155 | if match: return match.group(1, 2)\r | |
1156 | return url, None\r | |
1157 | \r | |
1158 | def splitattr(url):\r | |
1159 | """splitattr('/path;attr1=value1;attr2=value2;...') ->\r | |
1160 | '/path', ['attr1=value1', 'attr2=value2', ...]."""\r | |
1161 | words = url.split(';')\r | |
1162 | return words[0], words[1:]\r | |
1163 | \r | |
1164 | _valueprog = None\r | |
1165 | def splitvalue(attr):\r | |
1166 | """splitvalue('attr=value') --> 'attr', 'value'."""\r | |
1167 | global _valueprog\r | |
1168 | if _valueprog is None:\r | |
1169 | import re\r | |
1170 | _valueprog = re.compile('^([^=]*)=(.*)$')\r | |
1171 | \r | |
1172 | match = _valueprog.match(attr)\r | |
1173 | if match: return match.group(1, 2)\r | |
1174 | return attr, None\r | |
1175 | \r | |
1176 | # urlparse contains a duplicate of this method to avoid a circular import. If\r | |
1177 | # you update this method, also update the copy in urlparse. This code\r | |
1178 | # duplication does not exist in Python3.\r | |
1179 | \r | |
1180 | _hexdig = '0123456789ABCDEFabcdef'\r | |
1181 | _hextochr = dict((a + b, chr(int(a + b, 16)))\r | |
1182 | for a in _hexdig for b in _hexdig)\r | |
1183 | \r | |
1184 | def unquote(s):\r | |
1185 | """unquote('abc%20def') -> 'abc def'."""\r | |
1186 | res = s.split('%')\r | |
1187 | # fastpath\r | |
1188 | if len(res) == 1:\r | |
1189 | return s\r | |
1190 | s = res[0]\r | |
1191 | for item in res[1:]:\r | |
1192 | try:\r | |
1193 | s += _hextochr[item[:2]] + item[2:]\r | |
1194 | except KeyError:\r | |
1195 | s += '%' + item\r | |
1196 | except UnicodeDecodeError:\r | |
1197 | s += unichr(int(item[:2], 16)) + item[2:]\r | |
1198 | return s\r | |
1199 | \r | |
1200 | def unquote_plus(s):\r | |
1201 | """unquote('%7e/abc+def') -> '~/abc def'"""\r | |
1202 | s = s.replace('+', ' ')\r | |
1203 | return unquote(s)\r | |
1204 | \r | |
1205 | always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'\r | |
1206 | 'abcdefghijklmnopqrstuvwxyz'\r | |
1207 | '0123456789' '_.-')\r | |
1208 | _safe_map = {}\r | |
1209 | for i, c in zip(xrange(256), str(bytearray(xrange(256)))):\r | |
1210 | _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)\r | |
1211 | _safe_quoters = {}\r | |
1212 | \r | |
1213 | def quote(s, safe='/'):\r | |
1214 | """quote('abc def') -> 'abc%20def'\r | |
1215 | \r | |
1216 | Each part of a URL, e.g. the path info, the query, etc., has a\r | |
1217 | different set of reserved characters that must be quoted.\r | |
1218 | \r | |
1219 | RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists\r | |
1220 | the following reserved characters.\r | |
1221 | \r | |
1222 | reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |\r | |
1223 | "$" | ","\r | |
1224 | \r | |
1225 | Each of these characters is reserved in some component of a URL,\r | |
1226 | but not necessarily in all of them.\r | |
1227 | \r | |
1228 | By default, the quote function is intended for quoting the path\r | |
1229 | section of a URL. Thus, it will not encode '/'. This character\r | |
1230 | is reserved, but in typical usage the quote function is being\r | |
1231 | called on a path where the existing slash characters are used as\r | |
1232 | reserved characters.\r | |
1233 | """\r | |
1234 | # fastpath\r | |
1235 | if not s:\r | |
1236 | if s is None:\r | |
1237 | raise TypeError('None object cannot be quoted')\r | |
1238 | return s\r | |
1239 | cachekey = (safe, always_safe)\r | |
1240 | try:\r | |
1241 | (quoter, safe) = _safe_quoters[cachekey]\r | |
1242 | except KeyError:\r | |
1243 | safe_map = _safe_map.copy()\r | |
1244 | safe_map.update([(c, c) for c in safe])\r | |
1245 | quoter = safe_map.__getitem__\r | |
1246 | safe = always_safe + safe\r | |
1247 | _safe_quoters[cachekey] = (quoter, safe)\r | |
1248 | if not s.rstrip(safe):\r | |
1249 | return s\r | |
1250 | return ''.join(map(quoter, s))\r | |
1251 | \r | |
1252 | def quote_plus(s, safe=''):\r | |
1253 | """Quote the query fragment of a URL; replacing ' ' with '+'"""\r | |
1254 | if ' ' in s:\r | |
1255 | s = quote(s, safe + ' ')\r | |
1256 | return s.replace(' ', '+')\r | |
1257 | return quote(s, safe)\r | |
1258 | \r | |
1259 | def urlencode(query, doseq=0):\r | |
1260 | """Encode a sequence of two-element tuples or dictionary into a URL query string.\r | |
1261 | \r | |
1262 | If any values in the query arg are sequences and doseq is true, each\r | |
1263 | sequence element is converted to a separate parameter.\r | |
1264 | \r | |
1265 | If the query arg is a sequence of two-element tuples, the order of the\r | |
1266 | parameters in the output will match the order of parameters in the\r | |
1267 | input.\r | |
1268 | """\r | |
1269 | \r | |
1270 | if hasattr(query,"items"):\r | |
1271 | # mapping objects\r | |
1272 | query = query.items()\r | |
1273 | else:\r | |
1274 | # it's a bother at times that strings and string-like objects are\r | |
1275 | # sequences...\r | |
1276 | try:\r | |
1277 | # non-sequence items should not work with len()\r | |
1278 | # non-empty strings will fail this\r | |
1279 | if len(query) and not isinstance(query[0], tuple):\r | |
1280 | raise TypeError\r | |
1281 | # zero-length sequences of all types will get here and succeed,\r | |
1282 | # but that's a minor nit - since the original implementation\r | |
1283 | # allowed empty dicts that type of behavior probably should be\r | |
1284 | # preserved for consistency\r | |
1285 | except TypeError:\r | |
1286 | ty,va,tb = sys.exc_info()\r | |
1287 | raise TypeError, "not a valid non-string sequence or mapping object", tb\r | |
1288 | \r | |
1289 | l = []\r | |
1290 | if not doseq:\r | |
1291 | # preserve old behavior\r | |
1292 | for k, v in query:\r | |
1293 | k = quote_plus(str(k))\r | |
1294 | v = quote_plus(str(v))\r | |
1295 | l.append(k + '=' + v)\r | |
1296 | else:\r | |
1297 | for k, v in query:\r | |
1298 | k = quote_plus(str(k))\r | |
1299 | if isinstance(v, str):\r | |
1300 | v = quote_plus(v)\r | |
1301 | l.append(k + '=' + v)\r | |
1302 | elif _is_unicode(v):\r | |
1303 | # is there a reasonable way to convert to ASCII?\r | |
1304 | # encode generates a string, but "replace" or "ignore"\r | |
1305 | # lose information and "strict" can raise UnicodeError\r | |
1306 | v = quote_plus(v.encode("ASCII","replace"))\r | |
1307 | l.append(k + '=' + v)\r | |
1308 | else:\r | |
1309 | try:\r | |
1310 | # is this a sufficient test for sequence-ness?\r | |
1311 | len(v)\r | |
1312 | except TypeError:\r | |
1313 | # not a sequence\r | |
1314 | v = quote_plus(str(v))\r | |
1315 | l.append(k + '=' + v)\r | |
1316 | else:\r | |
1317 | # loop over the sequence\r | |
1318 | for elt in v:\r | |
1319 | l.append(k + '=' + quote_plus(str(elt)))\r | |
1320 | return '&'.join(l)\r | |
1321 | \r | |
1322 | # Proxy handling\r | |
1323 | def getproxies_environment():\r | |
1324 | """Return a dictionary of scheme -> proxy server URL mappings.\r | |
1325 | \r | |
1326 | Scan the environment for variables named <scheme>_proxy;\r | |
1327 | this seems to be the standard convention. If you need a\r | |
1328 | different way, you can pass a proxies dictionary to the\r | |
1329 | [Fancy]URLopener constructor.\r | |
1330 | \r | |
1331 | """\r | |
1332 | proxies = {}\r | |
1333 | for name, value in os.environ.items():\r | |
1334 | name = name.lower()\r | |
1335 | if value and name[-6:] == '_proxy':\r | |
1336 | proxies[name[:-6]] = value\r | |
1337 | return proxies\r | |
1338 | \r | |
1339 | def proxy_bypass_environment(host):\r | |
1340 | """Test if proxies should not be used for a particular host.\r | |
1341 | \r | |
1342 | Checks the environment for a variable named no_proxy, which should\r | |
1343 | be a list of DNS suffixes separated by commas, or '*' for all hosts.\r | |
1344 | """\r | |
1345 | no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')\r | |
1346 | # '*' is special case for always bypass\r | |
1347 | if no_proxy == '*':\r | |
1348 | return 1\r | |
1349 | # strip port off host\r | |
1350 | hostonly, port = splitport(host)\r | |
1351 | # check if the host ends with any of the DNS suffixes\r | |
1352 | for name in no_proxy.split(','):\r | |
1353 | if name and (hostonly.endswith(name) or host.endswith(name)):\r | |
1354 | return 1\r | |
1355 | # otherwise, don't bypass\r | |
1356 | return 0\r | |
1357 | \r | |
1358 | \r | |
1359 | if sys.platform == 'darwin':\r | |
1360 | from _scproxy import _get_proxy_settings, _get_proxies\r | |
1361 | \r | |
1362 | def proxy_bypass_macosx_sysconf(host):\r | |
1363 | """\r | |
1364 | Return True iff this host shouldn't be accessed using a proxy\r | |
1365 | \r | |
1366 | This function uses the MacOSX framework SystemConfiguration\r | |
1367 | to fetch the proxy information.\r | |
1368 | """\r | |
1369 | import re\r | |
1370 | import socket\r | |
1371 | from fnmatch import fnmatch\r | |
1372 | \r | |
1373 | hostonly, port = splitport(host)\r | |
1374 | \r | |
1375 | def ip2num(ipAddr):\r | |
1376 | parts = ipAddr.split('.')\r | |
1377 | parts = map(int, parts)\r | |
1378 | if len(parts) != 4:\r | |
1379 | parts = (parts + [0, 0, 0, 0])[:4]\r | |
1380 | return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]\r | |
1381 | \r | |
1382 | proxy_settings = _get_proxy_settings()\r | |
1383 | \r | |
1384 | # Check for simple host names:\r | |
1385 | if '.' not in host:\r | |
1386 | if proxy_settings['exclude_simple']:\r | |
1387 | return True\r | |
1388 | \r | |
1389 | hostIP = None\r | |
1390 | \r | |
1391 | for value in proxy_settings.get('exceptions', ()):\r | |
1392 | # Items in the list are strings like these: *.local, 169.254/16\r | |
1393 | if not value: continue\r | |
1394 | \r | |
1395 | m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)\r | |
1396 | if m is not None:\r | |
1397 | if hostIP is None:\r | |
1398 | try:\r | |
1399 | hostIP = socket.gethostbyname(hostonly)\r | |
1400 | hostIP = ip2num(hostIP)\r | |
1401 | except socket.error:\r | |
1402 | continue\r | |
1403 | \r | |
1404 | base = ip2num(m.group(1))\r | |
1405 | mask = m.group(2)\r | |
1406 | if mask is None:\r | |
1407 | mask = 8 * (m.group(1).count('.') + 1)\r | |
1408 | \r | |
1409 | else:\r | |
1410 | mask = int(mask[1:])\r | |
1411 | mask = 32 - mask\r | |
1412 | \r | |
1413 | if (hostIP >> mask) == (base >> mask):\r | |
1414 | return True\r | |
1415 | \r | |
1416 | elif fnmatch(host, value):\r | |
1417 | return True\r | |
1418 | \r | |
1419 | return False\r | |
1420 | \r | |
1421 | def getproxies_macosx_sysconf():\r | |
1422 | """Return a dictionary of scheme -> proxy server URL mappings.\r | |
1423 | \r | |
1424 | This function uses the MacOSX framework SystemConfiguration\r | |
1425 | to fetch the proxy information.\r | |
1426 | """\r | |
1427 | return _get_proxies()\r | |
1428 | \r | |
1429 | def proxy_bypass(host):\r | |
1430 | if getproxies_environment():\r | |
1431 | return proxy_bypass_environment(host)\r | |
1432 | else:\r | |
1433 | return proxy_bypass_macosx_sysconf(host)\r | |
1434 | \r | |
1435 | def getproxies():\r | |
1436 | return getproxies_environment() or getproxies_macosx_sysconf()\r | |
1437 | \r | |
1438 | elif os.name == 'nt':\r | |
1439 | def getproxies_registry():\r | |
1440 | """Return a dictionary of scheme -> proxy server URL mappings.\r | |
1441 | \r | |
1442 | Win32 uses the registry to store proxies.\r | |
1443 | \r | |
1444 | """\r | |
1445 | proxies = {}\r | |
1446 | try:\r | |
1447 | import _winreg\r | |
1448 | except ImportError:\r | |
1449 | # Std module, so should be around - but you never know!\r | |
1450 | return proxies\r | |
1451 | try:\r | |
1452 | internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,\r | |
1453 | r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')\r | |
1454 | proxyEnable = _winreg.QueryValueEx(internetSettings,\r | |
1455 | 'ProxyEnable')[0]\r | |
1456 | if proxyEnable:\r | |
1457 | # Returned as Unicode but problems if not converted to ASCII\r | |
1458 | proxyServer = str(_winreg.QueryValueEx(internetSettings,\r | |
1459 | 'ProxyServer')[0])\r | |
1460 | if '=' in proxyServer:\r | |
1461 | # Per-protocol settings\r | |
1462 | for p in proxyServer.split(';'):\r | |
1463 | protocol, address = p.split('=', 1)\r | |
1464 | # See if address has a type:// prefix\r | |
1465 | import re\r | |
1466 | if not re.match('^([^/:]+)://', address):\r | |
1467 | address = '%s://%s' % (protocol, address)\r | |
1468 | proxies[protocol] = address\r | |
1469 | else:\r | |
1470 | # Use one setting for all protocols\r | |
1471 | if proxyServer[:5] == 'http:':\r | |
1472 | proxies['http'] = proxyServer\r | |
1473 | else:\r | |
1474 | proxies['http'] = 'http://%s' % proxyServer\r | |
1475 | proxies['https'] = 'https://%s' % proxyServer\r | |
1476 | proxies['ftp'] = 'ftp://%s' % proxyServer\r | |
1477 | internetSettings.Close()\r | |
1478 | except (WindowsError, ValueError, TypeError):\r | |
1479 | # Either registry key not found etc, or the value in an\r | |
1480 | # unexpected format.\r | |
1481 | # proxies already set up to be empty so nothing to do\r | |
1482 | pass\r | |
1483 | return proxies\r | |
1484 | \r | |
1485 | def getproxies():\r | |
1486 | """Return a dictionary of scheme -> proxy server URL mappings.\r | |
1487 | \r | |
1488 | Returns settings gathered from the environment, if specified,\r | |
1489 | or the registry.\r | |
1490 | \r | |
1491 | """\r | |
1492 | return getproxies_environment() or getproxies_registry()\r | |
1493 | \r | |
1494 | def proxy_bypass_registry(host):\r | |
1495 | try:\r | |
1496 | import _winreg\r | |
1497 | import re\r | |
1498 | except ImportError:\r | |
1499 | # Std modules, so should be around - but you never know!\r | |
1500 | return 0\r | |
1501 | try:\r | |
1502 | internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,\r | |
1503 | r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')\r | |
1504 | proxyEnable = _winreg.QueryValueEx(internetSettings,\r | |
1505 | 'ProxyEnable')[0]\r | |
1506 | proxyOverride = str(_winreg.QueryValueEx(internetSettings,\r | |
1507 | 'ProxyOverride')[0])\r | |
1508 | # ^^^^ Returned as Unicode but problems if not converted to ASCII\r | |
1509 | except WindowsError:\r | |
1510 | return 0\r | |
1511 | if not proxyEnable or not proxyOverride:\r | |
1512 | return 0\r | |
1513 | # try to make a host list from name and IP address.\r | |
1514 | rawHost, port = splitport(host)\r | |
1515 | host = [rawHost]\r | |
1516 | try:\r | |
1517 | addr = socket.gethostbyname(rawHost)\r | |
1518 | if addr != rawHost:\r | |
1519 | host.append(addr)\r | |
1520 | except socket.error:\r | |
1521 | pass\r | |
1522 | try:\r | |
1523 | fqdn = socket.getfqdn(rawHost)\r | |
1524 | if fqdn != rawHost:\r | |
1525 | host.append(fqdn)\r | |
1526 | except socket.error:\r | |
1527 | pass\r | |
1528 | # make a check value list from the registry entry: replace the\r | |
1529 | # '<local>' string by the localhost entry and the corresponding\r | |
1530 | # canonical entry.\r | |
1531 | proxyOverride = proxyOverride.split(';')\r | |
1532 | # now check if we match one of the registry values.\r | |
1533 | for test in proxyOverride:\r | |
1534 | if test == '<local>':\r | |
1535 | if '.' not in rawHost:\r | |
1536 | return 1\r | |
1537 | test = test.replace(".", r"\.") # mask dots\r | |
1538 | test = test.replace("*", r".*") # change glob sequence\r | |
1539 | test = test.replace("?", r".") # change glob char\r | |
1540 | for val in host:\r | |
1541 | # print "%s <--> %s" %( test, val )\r | |
1542 | if re.match(test, val, re.I):\r | |
1543 | return 1\r | |
1544 | return 0\r | |
1545 | \r | |
1546 | def proxy_bypass(host):\r | |
1547 | """Return a dictionary of scheme -> proxy server URL mappings.\r | |
1548 | \r | |
1549 | Returns settings gathered from the environment, if specified,\r | |
1550 | or the registry.\r | |
1551 | \r | |
1552 | """\r | |
1553 | if getproxies_environment():\r | |
1554 | return proxy_bypass_environment(host)\r | |
1555 | else:\r | |
1556 | return proxy_bypass_registry(host)\r | |
1557 | \r | |
1558 | else:\r | |
1559 | # By default use environment variables\r | |
1560 | getproxies = getproxies_environment\r | |
1561 | proxy_bypass = proxy_bypass_environment\r | |
1562 | \r | |
1563 | # Test and time quote() and unquote()\r | |
1564 | def test1():\r | |
1565 | s = ''\r | |
1566 | for i in range(256): s = s + chr(i)\r | |
1567 | s = s*4\r | |
1568 | t0 = time.time()\r | |
1569 | qs = quote(s)\r | |
1570 | uqs = unquote(qs)\r | |
1571 | t1 = time.time()\r | |
1572 | if uqs != s:\r | |
1573 | print 'Wrong!'\r | |
1574 | print repr(s)\r | |
1575 | print repr(qs)\r | |
1576 | print repr(uqs)\r | |
1577 | print round(t1 - t0, 3), 'sec'\r | |
1578 | \r | |
1579 | \r | |
1580 | def reporthook(blocknum, blocksize, totalsize):\r | |
1581 | # Report during remote transfers\r | |
1582 | print "Block number: %d, Block size: %d, Total size: %d" % (\r | |
1583 | blocknum, blocksize, totalsize)\r | |
1584 | \r | |
1585 | # Test program\r | |
1586 | def test(args=[]):\r | |
1587 | if not args:\r | |
1588 | args = [\r | |
1589 | '/etc/passwd',\r | |
1590 | 'file:/etc/passwd',\r | |
1591 | 'file://localhost/etc/passwd',\r | |
1592 | 'ftp://ftp.gnu.org/pub/README',\r | |
1593 | 'http://www.python.org/index.html',\r | |
1594 | ]\r | |
1595 | if hasattr(URLopener, "open_https"):\r | |
1596 | args.append('https://synergy.as.cmu.edu/~geek/')\r | |
1597 | try:\r | |
1598 | for url in args:\r | |
1599 | print '-'*10, url, '-'*10\r | |
1600 | fn, h = urlretrieve(url, None, reporthook)\r | |
1601 | print fn\r | |
1602 | if h:\r | |
1603 | print '======'\r | |
1604 | for k in h.keys(): print k + ':', h[k]\r | |
1605 | print '======'\r | |
1606 | with open(fn, 'rb') as fp:\r | |
1607 | data = fp.read()\r | |
1608 | if '\r' in data:\r | |
1609 | table = string.maketrans("", "")\r | |
1610 | data = data.translate(table, "\r")\r | |
1611 | print data\r | |
1612 | fn, h = None, None\r | |
1613 | print '-'*40\r | |
1614 | finally:\r | |
1615 | urlcleanup()\r | |
1616 | \r | |
1617 | def main():\r | |
1618 | import getopt, sys\r | |
1619 | try:\r | |
1620 | opts, args = getopt.getopt(sys.argv[1:], "th")\r | |
1621 | except getopt.error, msg:\r | |
1622 | print msg\r | |
1623 | print "Use -h for help"\r | |
1624 | return\r | |
1625 | t = 0\r | |
1626 | for o, a in opts:\r | |
1627 | if o == '-t':\r | |
1628 | t = t + 1\r | |
1629 | if o == '-h':\r | |
1630 | print "Usage: python urllib.py [-t] [url ...]"\r | |
1631 | print "-t runs self-test;",\r | |
1632 | print "otherwise, contents of urls are printed"\r | |
1633 | return\r | |
1634 | if t:\r | |
1635 | if t > 1:\r | |
1636 | test1()\r | |
1637 | test(args)\r | |
1638 | else:\r | |
1639 | if not args:\r | |
1640 | print "Use -h for help"\r | |
1641 | for url in args:\r | |
1642 | print urlopen(url).read(),\r | |
1643 | \r | |
1644 | # Run test program when run as a script\r | |
1645 | if __name__ == '__main__':\r | |
1646 | main()\r |