]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Tools/webchecker/websucker.py
AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python...
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Tools / webchecker / websucker.py
1 #! /usr/bin/env python
2
3 """A variant on webchecker that creates a mirror copy of a remote site."""
4
5 __version__ = "$Revision$"
6
7 import os
8 import sys
9 import urllib
10 import getopt
11
12 import webchecker
13
14 # Extract real version number if necessary
15 if __version__[0] == '$':
16 _v = __version__.split()
17 if len(_v) == 3:
18 __version__ = _v[1]
19
20 def main():
21 verbose = webchecker.VERBOSE
22 try:
23 opts, args = getopt.getopt(sys.argv[1:], "qv")
24 except getopt.error, msg:
25 print msg
26 print "usage:", sys.argv[0], "[-qv] ... [rooturl] ..."
27 return 2
28 for o, a in opts:
29 if o == "-q":
30 verbose = 0
31 if o == "-v":
32 verbose = verbose + 1
33 c = Sucker()
34 c.setflags(verbose=verbose)
35 c.urlopener.addheaders = [
36 ('User-agent', 'websucker/%s' % __version__),
37 ]
38 for arg in args:
39 print "Adding root", arg
40 c.addroot(arg)
41 print "Run..."
42 c.run()
43
44 class Sucker(webchecker.Checker):
45
46 checkext = 0
47 nonames = 1
48
49 # SAM 11/13/99: in general, URLs are now URL pairs.
50 # Since we've suppressed name anchor checking,
51 # we can ignore the second dimension.
52
53 def readhtml(self, url_pair):
54 url = url_pair[0]
55 text = None
56 path = self.savefilename(url)
57 try:
58 f = open(path, "rb")
59 except IOError:
60 f = self.openpage(url_pair)
61 if f:
62 info = f.info()
63 nurl = f.geturl()
64 if nurl != url:
65 url = nurl
66 path = self.savefilename(url)
67 text = f.read()
68 f.close()
69 self.savefile(text, path)
70 if not self.checkforhtml(info, url):
71 text = None
72 else:
73 if self.checkforhtml({}, url):
74 text = f.read()
75 f.close()
76 return text, url
77
78 def savefile(self, text, path):
79 dir, base = os.path.split(path)
80 makedirs(dir)
81 try:
82 f = open(path, "wb")
83 f.write(text)
84 f.close()
85 self.message("saved %s", path)
86 except IOError, msg:
87 self.message("didn't save %s: %s", path, str(msg))
88
89 def savefilename(self, url):
90 type, rest = urllib.splittype(url)
91 host, path = urllib.splithost(rest)
92 path = path.lstrip("/")
93 user, host = urllib.splituser(host)
94 host, port = urllib.splitnport(host)
95 host = host.lower()
96 if not path or path[-1] == "/":
97 path = path + "index.html"
98 if os.sep != "/":
99 path = os.sep.join(path.split("/"))
100 path = os.path.join(host, path)
101 return path
102
103 def makedirs(dir):
104 if not dir:
105 return
106 if os.path.exists(dir):
107 if not os.path.isdir(dir):
108 try:
109 os.rename(dir, dir + ".bak")
110 os.mkdir(dir)
111 os.rename(dir + ".bak", os.path.join(dir, "index.html"))
112 except os.error:
113 pass
114 return
115 head, tail = os.path.split(dir)
116 if not tail:
117 print "Huh? Don't know how to make dir", dir
118 return
119 makedirs(head)
120 os.mkdir(dir, 0777)
121
122 if __name__ == '__main__':
123 sys.exit(main() or 0)