]>
Commit | Line | Data |
---|---|---|
3257aa99 DM |
1 | """\\r |
2 | A library of useful helper classes to the SAX classes, for the\r | |
3 | convenience of application and driver writers.\r | |
4 | """\r | |
5 | \r | |
6 | import os, urlparse, urllib, types\r | |
7 | import io\r | |
8 | import sys\r | |
9 | import handler\r | |
10 | import xmlreader\r | |
11 | \r | |
12 | try:\r | |
13 | _StringTypes = [types.StringType, types.UnicodeType]\r | |
14 | except AttributeError:\r | |
15 | _StringTypes = [types.StringType]\r | |
16 | \r | |
17 | def __dict_replace(s, d):\r | |
18 | """Replace substrings of a string using a dictionary."""\r | |
19 | for key, value in d.items():\r | |
20 | s = s.replace(key, value)\r | |
21 | return s\r | |
22 | \r | |
23 | def escape(data, entities={}):\r | |
24 | """Escape &, <, and > in a string of data.\r | |
25 | \r | |
26 | You can escape other strings of data by passing a dictionary as\r | |
27 | the optional entities parameter. The keys and values must all be\r | |
28 | strings; each key will be replaced with its corresponding value.\r | |
29 | """\r | |
30 | \r | |
31 | # must do ampersand first\r | |
32 | data = data.replace("&", "&")\r | |
33 | data = data.replace(">", ">")\r | |
34 | data = data.replace("<", "<")\r | |
35 | if entities:\r | |
36 | data = __dict_replace(data, entities)\r | |
37 | return data\r | |
38 | \r | |
39 | def unescape(data, entities={}):\r | |
40 | """Unescape &, <, and > in a string of data.\r | |
41 | \r | |
42 | You can unescape other strings of data by passing a dictionary as\r | |
43 | the optional entities parameter. The keys and values must all be\r | |
44 | strings; each key will be replaced with its corresponding value.\r | |
45 | """\r | |
46 | data = data.replace("<", "<")\r | |
47 | data = data.replace(">", ">")\r | |
48 | if entities:\r | |
49 | data = __dict_replace(data, entities)\r | |
50 | # must do ampersand last\r | |
51 | return data.replace("&", "&")\r | |
52 | \r | |
53 | def quoteattr(data, entities={}):\r | |
54 | """Escape and quote an attribute value.\r | |
55 | \r | |
56 | Escape &, <, and > in a string of data, then quote it for use as\r | |
57 | an attribute value. The \" character will be escaped as well, if\r | |
58 | necessary.\r | |
59 | \r | |
60 | You can escape other strings of data by passing a dictionary as\r | |
61 | the optional entities parameter. The keys and values must all be\r | |
62 | strings; each key will be replaced with its corresponding value.\r | |
63 | """\r | |
64 | entities = entities.copy()\r | |
65 | entities.update({'\n': ' ', '\r': ' ', '\t':'	'})\r | |
66 | data = escape(data, entities)\r | |
67 | if '"' in data:\r | |
68 | if "'" in data:\r | |
69 | data = '"%s"' % data.replace('"', """)\r | |
70 | else:\r | |
71 | data = "'%s'" % data\r | |
72 | else:\r | |
73 | data = '"%s"' % data\r | |
74 | return data\r | |
75 | \r | |
76 | \r | |
77 | def _gettextwriter(out, encoding):\r | |
78 | if out is None:\r | |
79 | import sys\r | |
80 | out = sys.stdout\r | |
81 | \r | |
82 | if isinstance(out, io.RawIOBase):\r | |
83 | buffer = io.BufferedIOBase(out)\r | |
84 | # Keep the original file open when the TextIOWrapper is\r | |
85 | # destroyed\r | |
86 | buffer.close = lambda: None\r | |
87 | else:\r | |
88 | # This is to handle passed objects that aren't in the\r | |
89 | # IOBase hierarchy, but just have a write method\r | |
90 | buffer = io.BufferedIOBase()\r | |
91 | buffer.writable = lambda: True\r | |
92 | buffer.write = out.write\r | |
93 | try:\r | |
94 | # TextIOWrapper uses this methods to determine\r | |
95 | # if BOM (for UTF-16, etc) should be added\r | |
96 | buffer.seekable = out.seekable\r | |
97 | buffer.tell = out.tell\r | |
98 | except AttributeError:\r | |
99 | pass\r | |
100 | # wrap a binary writer with TextIOWrapper\r | |
101 | return _UnbufferedTextIOWrapper(buffer, encoding=encoding,\r | |
102 | errors='xmlcharrefreplace',\r | |
103 | newline='\n')\r | |
104 | \r | |
105 | \r | |
106 | class _UnbufferedTextIOWrapper(io.TextIOWrapper):\r | |
107 | def write(self, s):\r | |
108 | super(_UnbufferedTextIOWrapper, self).write(s)\r | |
109 | self.flush()\r | |
110 | \r | |
111 | \r | |
112 | class XMLGenerator(handler.ContentHandler):\r | |
113 | \r | |
114 | def __init__(self, out=None, encoding="iso-8859-1"):\r | |
115 | handler.ContentHandler.__init__(self)\r | |
116 | out = _gettextwriter(out, encoding)\r | |
117 | self._write = out.write\r | |
118 | self._flush = out.flush\r | |
119 | self._ns_contexts = [{}] # contains uri -> prefix dicts\r | |
120 | self._current_context = self._ns_contexts[-1]\r | |
121 | self._undeclared_ns_maps = []\r | |
122 | self._encoding = encoding\r | |
123 | \r | |
124 | def _qname(self, name):\r | |
125 | """Builds a qualified name from a (ns_url, localname) pair"""\r | |
126 | if name[0]:\r | |
127 | # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is\r | |
128 | # bound by definition to http://www.w3.org/XML/1998/namespace. It\r | |
129 | # does not need to be declared and will not usually be found in\r | |
130 | # self._current_context.\r | |
131 | if 'http://www.w3.org/XML/1998/namespace' == name[0]:\r | |
132 | return 'xml:' + name[1]\r | |
133 | # The name is in a non-empty namespace\r | |
134 | prefix = self._current_context[name[0]]\r | |
135 | if prefix:\r | |
136 | # If it is not the default namespace, prepend the prefix\r | |
137 | return prefix + ":" + name[1]\r | |
138 | # Return the unqualified name\r | |
139 | return name[1]\r | |
140 | \r | |
141 | # ContentHandler methods\r | |
142 | \r | |
143 | def startDocument(self):\r | |
144 | self._write(u'<?xml version="1.0" encoding="%s"?>\n' %\r | |
145 | self._encoding)\r | |
146 | \r | |
147 | def endDocument(self):\r | |
148 | self._flush()\r | |
149 | \r | |
150 | def startPrefixMapping(self, prefix, uri):\r | |
151 | self._ns_contexts.append(self._current_context.copy())\r | |
152 | self._current_context[uri] = prefix\r | |
153 | self._undeclared_ns_maps.append((prefix, uri))\r | |
154 | \r | |
155 | def endPrefixMapping(self, prefix):\r | |
156 | self._current_context = self._ns_contexts[-1]\r | |
157 | del self._ns_contexts[-1]\r | |
158 | \r | |
159 | def startElement(self, name, attrs):\r | |
160 | self._write(u'<' + name)\r | |
161 | for (name, value) in attrs.items():\r | |
162 | self._write(u' %s=%s' % (name, quoteattr(value)))\r | |
163 | self._write(u'>')\r | |
164 | \r | |
165 | def endElement(self, name):\r | |
166 | self._write(u'</%s>' % name)\r | |
167 | \r | |
168 | def startElementNS(self, name, qname, attrs):\r | |
169 | self._write(u'<' + self._qname(name))\r | |
170 | \r | |
171 | for prefix, uri in self._undeclared_ns_maps:\r | |
172 | if prefix:\r | |
173 | self._write(u' xmlns:%s="%s"' % (prefix, uri))\r | |
174 | else:\r | |
175 | self._write(u' xmlns="%s"' % uri)\r | |
176 | self._undeclared_ns_maps = []\r | |
177 | \r | |
178 | for (name, value) in attrs.items():\r | |
179 | self._write(u' %s=%s' % (self._qname(name), quoteattr(value)))\r | |
180 | self._write(u'>')\r | |
181 | \r | |
182 | def endElementNS(self, name, qname):\r | |
183 | self._write(u'</%s>' % self._qname(name))\r | |
184 | \r | |
185 | def characters(self, content):\r | |
186 | if not isinstance(content, unicode):\r | |
187 | content = unicode(content, self._encoding)\r | |
188 | self._write(escape(content))\r | |
189 | \r | |
190 | def ignorableWhitespace(self, content):\r | |
191 | if not isinstance(content, unicode):\r | |
192 | content = unicode(content, self._encoding)\r | |
193 | self._write(content)\r | |
194 | \r | |
195 | def processingInstruction(self, target, data):\r | |
196 | self._write(u'<?%s %s?>' % (target, data))\r | |
197 | \r | |
198 | \r | |
199 | class XMLFilterBase(xmlreader.XMLReader):\r | |
200 | """This class is designed to sit between an XMLReader and the\r | |
201 | client application's event handlers. By default, it does nothing\r | |
202 | but pass requests up to the reader and events on to the handlers\r | |
203 | unmodified, but subclasses can override specific methods to modify\r | |
204 | the event stream or the configuration requests as they pass\r | |
205 | through."""\r | |
206 | \r | |
207 | def __init__(self, parent = None):\r | |
208 | xmlreader.XMLReader.__init__(self)\r | |
209 | self._parent = parent\r | |
210 | \r | |
211 | # ErrorHandler methods\r | |
212 | \r | |
213 | def error(self, exception):\r | |
214 | self._err_handler.error(exception)\r | |
215 | \r | |
216 | def fatalError(self, exception):\r | |
217 | self._err_handler.fatalError(exception)\r | |
218 | \r | |
219 | def warning(self, exception):\r | |
220 | self._err_handler.warning(exception)\r | |
221 | \r | |
222 | # ContentHandler methods\r | |
223 | \r | |
224 | def setDocumentLocator(self, locator):\r | |
225 | self._cont_handler.setDocumentLocator(locator)\r | |
226 | \r | |
227 | def startDocument(self):\r | |
228 | self._cont_handler.startDocument()\r | |
229 | \r | |
230 | def endDocument(self):\r | |
231 | self._cont_handler.endDocument()\r | |
232 | \r | |
233 | def startPrefixMapping(self, prefix, uri):\r | |
234 | self._cont_handler.startPrefixMapping(prefix, uri)\r | |
235 | \r | |
236 | def endPrefixMapping(self, prefix):\r | |
237 | self._cont_handler.endPrefixMapping(prefix)\r | |
238 | \r | |
239 | def startElement(self, name, attrs):\r | |
240 | self._cont_handler.startElement(name, attrs)\r | |
241 | \r | |
242 | def endElement(self, name):\r | |
243 | self._cont_handler.endElement(name)\r | |
244 | \r | |
245 | def startElementNS(self, name, qname, attrs):\r | |
246 | self._cont_handler.startElementNS(name, qname, attrs)\r | |
247 | \r | |
248 | def endElementNS(self, name, qname):\r | |
249 | self._cont_handler.endElementNS(name, qname)\r | |
250 | \r | |
251 | def characters(self, content):\r | |
252 | self._cont_handler.characters(content)\r | |
253 | \r | |
254 | def ignorableWhitespace(self, chars):\r | |
255 | self._cont_handler.ignorableWhitespace(chars)\r | |
256 | \r | |
257 | def processingInstruction(self, target, data):\r | |
258 | self._cont_handler.processingInstruction(target, data)\r | |
259 | \r | |
260 | def skippedEntity(self, name):\r | |
261 | self._cont_handler.skippedEntity(name)\r | |
262 | \r | |
263 | # DTDHandler methods\r | |
264 | \r | |
265 | def notationDecl(self, name, publicId, systemId):\r | |
266 | self._dtd_handler.notationDecl(name, publicId, systemId)\r | |
267 | \r | |
268 | def unparsedEntityDecl(self, name, publicId, systemId, ndata):\r | |
269 | self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)\r | |
270 | \r | |
271 | # EntityResolver methods\r | |
272 | \r | |
273 | def resolveEntity(self, publicId, systemId):\r | |
274 | return self._ent_handler.resolveEntity(publicId, systemId)\r | |
275 | \r | |
276 | # XMLReader methods\r | |
277 | \r | |
278 | def parse(self, source):\r | |
279 | self._parent.setContentHandler(self)\r | |
280 | self._parent.setErrorHandler(self)\r | |
281 | self._parent.setEntityResolver(self)\r | |
282 | self._parent.setDTDHandler(self)\r | |
283 | self._parent.parse(source)\r | |
284 | \r | |
285 | def setLocale(self, locale):\r | |
286 | self._parent.setLocale(locale)\r | |
287 | \r | |
288 | def getFeature(self, name):\r | |
289 | return self._parent.getFeature(name)\r | |
290 | \r | |
291 | def setFeature(self, name, state):\r | |
292 | self._parent.setFeature(name, state)\r | |
293 | \r | |
294 | def getProperty(self, name):\r | |
295 | return self._parent.getProperty(name)\r | |
296 | \r | |
297 | def setProperty(self, name, value):\r | |
298 | self._parent.setProperty(name, value)\r | |
299 | \r | |
300 | # XMLFilter methods\r | |
301 | \r | |
302 | def getParent(self):\r | |
303 | return self._parent\r | |
304 | \r | |
305 | def setParent(self, parent):\r | |
306 | self._parent = parent\r | |
307 | \r | |
308 | # --- Utility functions\r | |
309 | \r | |
310 | def prepare_input_source(source, base = ""):\r | |
311 | """This function takes an InputSource and an optional base URL and\r | |
312 | returns a fully resolved InputSource object ready for reading."""\r | |
313 | \r | |
314 | if type(source) in _StringTypes:\r | |
315 | source = xmlreader.InputSource(source)\r | |
316 | elif hasattr(source, "read"):\r | |
317 | f = source\r | |
318 | source = xmlreader.InputSource()\r | |
319 | source.setByteStream(f)\r | |
320 | if hasattr(f, "name"):\r | |
321 | source.setSystemId(f.name)\r | |
322 | \r | |
323 | if source.getByteStream() is None:\r | |
324 | try:\r | |
325 | sysid = source.getSystemId()\r | |
326 | basehead = os.path.dirname(os.path.normpath(base))\r | |
327 | encoding = sys.getfilesystemencoding()\r | |
328 | if isinstance(sysid, unicode):\r | |
329 | if not isinstance(basehead, unicode):\r | |
330 | try:\r | |
331 | basehead = basehead.decode(encoding)\r | |
332 | except UnicodeDecodeError:\r | |
333 | sysid = sysid.encode(encoding)\r | |
334 | else:\r | |
335 | if isinstance(basehead, unicode):\r | |
336 | try:\r | |
337 | sysid = sysid.decode(encoding)\r | |
338 | except UnicodeDecodeError:\r | |
339 | basehead = basehead.encode(encoding)\r | |
340 | sysidfilename = os.path.join(basehead, sysid)\r | |
341 | isfile = os.path.isfile(sysidfilename)\r | |
342 | except UnicodeError:\r | |
343 | isfile = False\r | |
344 | if isfile:\r | |
345 | source.setSystemId(sysidfilename)\r | |
346 | f = open(sysidfilename, "rb")\r | |
347 | else:\r | |
348 | source.setSystemId(urlparse.urljoin(base, source.getSystemId()))\r | |
349 | f = urllib.urlopen(source.getSystemId())\r | |
350 | \r | |
351 | source.setByteStream(f)\r | |
352 | \r | |
353 | return source\r |