]>
Commit | Line | Data |
---|---|---|
3257aa99 DM |
1 | #\r |
2 | # ElementTree\r | |
3 | # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $\r | |
4 | #\r | |
5 | # limited xpath support for element trees\r | |
6 | #\r | |
7 | # history:\r | |
8 | # 2003-05-23 fl created\r | |
9 | # 2003-05-28 fl added support for // etc\r | |
10 | # 2003-08-27 fl fixed parsing of periods in element names\r | |
11 | # 2007-09-10 fl new selection engine\r | |
12 | # 2007-09-12 fl fixed parent selector\r | |
13 | # 2007-09-13 fl added iterfind; changed findall to return a list\r | |
14 | # 2007-11-30 fl added namespaces support\r | |
15 | # 2009-10-30 fl added child element value filter\r | |
16 | #\r | |
17 | # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.\r | |
18 | #\r | |
19 | # fredrik@pythonware.com\r | |
20 | # http://www.pythonware.com\r | |
21 | #\r | |
22 | # --------------------------------------------------------------------\r | |
23 | # The ElementTree toolkit is\r | |
24 | #\r | |
25 | # Copyright (c) 1999-2009 by Fredrik Lundh\r | |
26 | #\r | |
27 | # By obtaining, using, and/or copying this software and/or its\r | |
28 | # associated documentation, you agree that you have read, understood,\r | |
29 | # and will comply with the following terms and conditions:\r | |
30 | #\r | |
31 | # Permission to use, copy, modify, and distribute this software and\r | |
32 | # its associated documentation for any purpose and without fee is\r | |
33 | # hereby granted, provided that the above copyright notice appears in\r | |
34 | # all copies, and that both that copyright notice and this permission\r | |
35 | # notice appear in supporting documentation, and that the name of\r | |
36 | # Secret Labs AB or the author not be used in advertising or publicity\r | |
37 | # pertaining to distribution of the software without specific, written\r | |
38 | # prior permission.\r | |
39 | #\r | |
40 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD\r | |
41 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-\r | |
42 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR\r | |
43 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY\r | |
44 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\r | |
45 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\r | |
46 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\r | |
47 | # OF THIS SOFTWARE.\r | |
48 | # --------------------------------------------------------------------\r | |
49 | \r | |
50 | # Licensed to PSF under a Contributor Agreement.\r | |
51 | # See http://www.python.org/psf/license for licensing details.\r | |
52 | \r | |
53 | ##\r | |
54 | # Implementation module for XPath support. There's usually no reason\r | |
55 | # to import this module directly; the <b>ElementTree</b> does this for\r | |
56 | # you, if needed.\r | |
57 | ##\r | |
58 | \r | |
59 | import re\r | |
60 | \r | |
61 | xpath_tokenizer_re = re.compile(\r | |
62 | "("\r | |
63 | "'[^']*'|\"[^\"]*\"|"\r | |
64 | "::|"\r | |
65 | "//?|"\r | |
66 | "\.\.|"\r | |
67 | "\(\)|"\r | |
68 | "[/.*:\[\]\(\)@=])|"\r | |
69 | "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"\r | |
70 | "\s+"\r | |
71 | )\r | |
72 | \r | |
73 | def xpath_tokenizer(pattern, namespaces=None):\r | |
74 | for token in xpath_tokenizer_re.findall(pattern):\r | |
75 | tag = token[1]\r | |
76 | if tag and tag[0] != "{" and ":" in tag:\r | |
77 | try:\r | |
78 | prefix, uri = tag.split(":", 1)\r | |
79 | if not namespaces:\r | |
80 | raise KeyError\r | |
81 | yield token[0], "{%s}%s" % (namespaces[prefix], uri)\r | |
82 | except KeyError:\r | |
83 | raise SyntaxError("prefix %r not found in prefix map" % prefix)\r | |
84 | else:\r | |
85 | yield token\r | |
86 | \r | |
87 | def get_parent_map(context):\r | |
88 | parent_map = context.parent_map\r | |
89 | if parent_map is None:\r | |
90 | context.parent_map = parent_map = {}\r | |
91 | for p in context.root.iter():\r | |
92 | for e in p:\r | |
93 | parent_map[e] = p\r | |
94 | return parent_map\r | |
95 | \r | |
96 | def prepare_child(next, token):\r | |
97 | tag = token[1]\r | |
98 | def select(context, result):\r | |
99 | for elem in result:\r | |
100 | for e in elem:\r | |
101 | if e.tag == tag:\r | |
102 | yield e\r | |
103 | return select\r | |
104 | \r | |
105 | def prepare_star(next, token):\r | |
106 | def select(context, result):\r | |
107 | for elem in result:\r | |
108 | for e in elem:\r | |
109 | yield e\r | |
110 | return select\r | |
111 | \r | |
112 | def prepare_self(next, token):\r | |
113 | def select(context, result):\r | |
114 | for elem in result:\r | |
115 | yield elem\r | |
116 | return select\r | |
117 | \r | |
118 | def prepare_descendant(next, token):\r | |
119 | token = next()\r | |
120 | if token[0] == "*":\r | |
121 | tag = "*"\r | |
122 | elif not token[0]:\r | |
123 | tag = token[1]\r | |
124 | else:\r | |
125 | raise SyntaxError("invalid descendant")\r | |
126 | def select(context, result):\r | |
127 | for elem in result:\r | |
128 | for e in elem.iter(tag):\r | |
129 | if e is not elem:\r | |
130 | yield e\r | |
131 | return select\r | |
132 | \r | |
133 | def prepare_parent(next, token):\r | |
134 | def select(context, result):\r | |
135 | # FIXME: raise error if .. is applied at toplevel?\r | |
136 | parent_map = get_parent_map(context)\r | |
137 | result_map = {}\r | |
138 | for elem in result:\r | |
139 | if elem in parent_map:\r | |
140 | parent = parent_map[elem]\r | |
141 | if parent not in result_map:\r | |
142 | result_map[parent] = None\r | |
143 | yield parent\r | |
144 | return select\r | |
145 | \r | |
146 | def prepare_predicate(next, token):\r | |
147 | # FIXME: replace with real parser!!! refs:\r | |
148 | # http://effbot.org/zone/simple-iterator-parser.htm\r | |
149 | # http://javascript.crockford.com/tdop/tdop.html\r | |
150 | signature = []\r | |
151 | predicate = []\r | |
152 | while 1:\r | |
153 | token = next()\r | |
154 | if token[0] == "]":\r | |
155 | break\r | |
156 | if token[0] and token[0][:1] in "'\"":\r | |
157 | token = "'", token[0][1:-1]\r | |
158 | signature.append(token[0] or "-")\r | |
159 | predicate.append(token[1])\r | |
160 | signature = "".join(signature)\r | |
161 | # use signature to determine predicate type\r | |
162 | if signature == "@-":\r | |
163 | # [@attribute] predicate\r | |
164 | key = predicate[1]\r | |
165 | def select(context, result):\r | |
166 | for elem in result:\r | |
167 | if elem.get(key) is not None:\r | |
168 | yield elem\r | |
169 | return select\r | |
170 | if signature == "@-='":\r | |
171 | # [@attribute='value']\r | |
172 | key = predicate[1]\r | |
173 | value = predicate[-1]\r | |
174 | def select(context, result):\r | |
175 | for elem in result:\r | |
176 | if elem.get(key) == value:\r | |
177 | yield elem\r | |
178 | return select\r | |
179 | if signature == "-" and not re.match("\d+$", predicate[0]):\r | |
180 | # [tag]\r | |
181 | tag = predicate[0]\r | |
182 | def select(context, result):\r | |
183 | for elem in result:\r | |
184 | if elem.find(tag) is not None:\r | |
185 | yield elem\r | |
186 | return select\r | |
187 | if signature == "-='" and not re.match("\d+$", predicate[0]):\r | |
188 | # [tag='value']\r | |
189 | tag = predicate[0]\r | |
190 | value = predicate[-1]\r | |
191 | def select(context, result):\r | |
192 | for elem in result:\r | |
193 | for e in elem.findall(tag):\r | |
194 | if "".join(e.itertext()) == value:\r | |
195 | yield elem\r | |
196 | break\r | |
197 | return select\r | |
198 | if signature == "-" or signature == "-()" or signature == "-()-":\r | |
199 | # [index] or [last()] or [last()-index]\r | |
200 | if signature == "-":\r | |
201 | index = int(predicate[0]) - 1\r | |
202 | else:\r | |
203 | if predicate[0] != "last":\r | |
204 | raise SyntaxError("unsupported function")\r | |
205 | if signature == "-()-":\r | |
206 | try:\r | |
207 | index = int(predicate[2]) - 1\r | |
208 | except ValueError:\r | |
209 | raise SyntaxError("unsupported expression")\r | |
210 | else:\r | |
211 | index = -1\r | |
212 | def select(context, result):\r | |
213 | parent_map = get_parent_map(context)\r | |
214 | for elem in result:\r | |
215 | try:\r | |
216 | parent = parent_map[elem]\r | |
217 | # FIXME: what if the selector is "*" ?\r | |
218 | elems = list(parent.findall(elem.tag))\r | |
219 | if elems[index] is elem:\r | |
220 | yield elem\r | |
221 | except (IndexError, KeyError):\r | |
222 | pass\r | |
223 | return select\r | |
224 | raise SyntaxError("invalid predicate")\r | |
225 | \r | |
226 | ops = {\r | |
227 | "": prepare_child,\r | |
228 | "*": prepare_star,\r | |
229 | ".": prepare_self,\r | |
230 | "..": prepare_parent,\r | |
231 | "//": prepare_descendant,\r | |
232 | "[": prepare_predicate,\r | |
233 | }\r | |
234 | \r | |
235 | _cache = {}\r | |
236 | \r | |
237 | class _SelectorContext:\r | |
238 | parent_map = None\r | |
239 | def __init__(self, root):\r | |
240 | self.root = root\r | |
241 | \r | |
242 | # --------------------------------------------------------------------\r | |
243 | \r | |
244 | ##\r | |
245 | # Generate all matching objects.\r | |
246 | \r | |
247 | def iterfind(elem, path, namespaces=None):\r | |
248 | # compile selector pattern\r | |
249 | if path[-1:] == "/":\r | |
250 | path = path + "*" # implicit all (FIXME: keep this?)\r | |
251 | try:\r | |
252 | selector = _cache[path]\r | |
253 | except KeyError:\r | |
254 | if len(_cache) > 100:\r | |
255 | _cache.clear()\r | |
256 | if path[:1] == "/":\r | |
257 | raise SyntaxError("cannot use absolute path on element")\r | |
258 | next = iter(xpath_tokenizer(path, namespaces)).next\r | |
259 | token = next()\r | |
260 | selector = []\r | |
261 | while 1:\r | |
262 | try:\r | |
263 | selector.append(ops[token[0]](next, token))\r | |
264 | except StopIteration:\r | |
265 | raise SyntaxError("invalid path")\r | |
266 | try:\r | |
267 | token = next()\r | |
268 | if token[0] == "/":\r | |
269 | token = next()\r | |
270 | except StopIteration:\r | |
271 | break\r | |
272 | _cache[path] = selector\r | |
273 | # execute selector pattern\r | |
274 | result = [elem]\r | |
275 | context = _SelectorContext(elem)\r | |
276 | for select in selector:\r | |
277 | result = select(context, result)\r | |
278 | return result\r | |
279 | \r | |
280 | ##\r | |
281 | # Find first matching object.\r | |
282 | \r | |
283 | def find(elem, path, namespaces=None):\r | |
284 | try:\r | |
285 | return iterfind(elem, path, namespaces).next()\r | |
286 | except StopIteration:\r | |
287 | return None\r | |
288 | \r | |
289 | ##\r | |
290 | # Find all matching objects.\r | |
291 | \r | |
292 | def findall(elem, path, namespaces=None):\r | |
293 | return list(iterfind(elem, path, namespaces))\r | |
294 | \r | |
295 | ##\r | |
296 | # Find text for first matching object.\r | |
297 | \r | |
298 | def findtext(elem, path, default=None, namespaces=None):\r | |
299 | try:\r | |
300 | elem = iterfind(elem, path, namespaces).next()\r | |
301 | return elem.text or ""\r | |
302 | except StopIteration:\r | |
303 | return default\r |