--- /dev/null
+#\r
+# ElementTree\r
+# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $\r
+#\r
+# limited xpath support for element trees\r
+#\r
+# history:\r
+# 2003-05-23 fl created\r
+# 2003-05-28 fl added support for // etc\r
+# 2003-08-27 fl fixed parsing of periods in element names\r
+# 2007-09-10 fl new selection engine\r
+# 2007-09-12 fl fixed parent selector\r
+# 2007-09-13 fl added iterfind; changed findall to return a list\r
+# 2007-11-30 fl added namespaces support\r
+# 2009-10-30 fl added child element value filter\r
+#\r
+# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.\r
+#\r
+# fredrik@pythonware.com\r
+# http://www.pythonware.com\r
+#\r
+# --------------------------------------------------------------------\r
+# The ElementTree toolkit is\r
+#\r
+# Copyright (c) 1999-2009 by Fredrik Lundh\r
+#\r
+# By obtaining, using, and/or copying this software and/or its\r
+# associated documentation, you agree that you have read, understood,\r
+# and will comply with the following terms and conditions:\r
+#\r
+# Permission to use, copy, modify, and distribute this software and\r
+# its associated documentation for any purpose and without fee is\r
+# hereby granted, provided that the above copyright notice appears in\r
+# all copies, and that both that copyright notice and this permission\r
+# notice appear in supporting documentation, and that the name of\r
+# Secret Labs AB or the author not be used in advertising or publicity\r
+# pertaining to distribution of the software without specific, written\r
+# prior permission.\r
+#\r
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD\r
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-\r
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR\r
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY\r
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\r
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\r
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\r
+# OF THIS SOFTWARE.\r
+# --------------------------------------------------------------------\r
+\r
+# Licensed to PSF under a Contributor Agreement.\r
+# See http://www.python.org/psf/license for licensing details.\r
+\r
+##\r
+# Implementation module for XPath support. There's usually no reason\r
+# to import this module directly; the <b>ElementTree</b> does this for\r
+# you, if needed.\r
+##\r
+\r
+import re\r
+\r
+xpath_tokenizer_re = re.compile(\r
+ "("\r
+ "'[^']*'|\"[^\"]*\"|"\r
+ "::|"\r
+ "//?|"\r
+ "\.\.|"\r
+ "\(\)|"\r
+ "[/.*:\[\]\(\)@=])|"\r
+ "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"\r
+ "\s+"\r
+ )\r
+\r
+def xpath_tokenizer(pattern, namespaces=None):\r
+ for token in xpath_tokenizer_re.findall(pattern):\r
+ tag = token[1]\r
+ if tag and tag[0] != "{" and ":" in tag:\r
+ try:\r
+ prefix, uri = tag.split(":", 1)\r
+ if not namespaces:\r
+ raise KeyError\r
+ yield token[0], "{%s}%s" % (namespaces[prefix], uri)\r
+ except KeyError:\r
+ raise SyntaxError("prefix %r not found in prefix map" % prefix)\r
+ else:\r
+ yield token\r
+\r
+def get_parent_map(context):\r
+ parent_map = context.parent_map\r
+ if parent_map is None:\r
+ context.parent_map = parent_map = {}\r
+ for p in context.root.iter():\r
+ for e in p:\r
+ parent_map[e] = p\r
+ return parent_map\r
+\r
+def prepare_child(next, token):\r
+ tag = token[1]\r
+ def select(context, result):\r
+ for elem in result:\r
+ for e in elem:\r
+ if e.tag == tag:\r
+ yield e\r
+ return select\r
+\r
+def prepare_star(next, token):\r
+ def select(context, result):\r
+ for elem in result:\r
+ for e in elem:\r
+ yield e\r
+ return select\r
+\r
+def prepare_self(next, token):\r
+ def select(context, result):\r
+ for elem in result:\r
+ yield elem\r
+ return select\r
+\r
+def prepare_descendant(next, token):\r
+ token = next()\r
+ if token[0] == "*":\r
+ tag = "*"\r
+ elif not token[0]:\r
+ tag = token[1]\r
+ else:\r
+ raise SyntaxError("invalid descendant")\r
+ def select(context, result):\r
+ for elem in result:\r
+ for e in elem.iter(tag):\r
+ if e is not elem:\r
+ yield e\r
+ return select\r
+\r
+def prepare_parent(next, token):\r
+ def select(context, result):\r
+ # FIXME: raise error if .. is applied at toplevel?\r
+ parent_map = get_parent_map(context)\r
+ result_map = {}\r
+ for elem in result:\r
+ if elem in parent_map:\r
+ parent = parent_map[elem]\r
+ if parent not in result_map:\r
+ result_map[parent] = None\r
+ yield parent\r
+ return select\r
+\r
+def prepare_predicate(next, token):\r
+ # FIXME: replace with real parser!!! refs:\r
+ # http://effbot.org/zone/simple-iterator-parser.htm\r
+ # http://javascript.crockford.com/tdop/tdop.html\r
+ signature = []\r
+ predicate = []\r
+ while 1:\r
+ token = next()\r
+ if token[0] == "]":\r
+ break\r
+ if token[0] and token[0][:1] in "'\"":\r
+ token = "'", token[0][1:-1]\r
+ signature.append(token[0] or "-")\r
+ predicate.append(token[1])\r
+ signature = "".join(signature)\r
+ # use signature to determine predicate type\r
+ if signature == "@-":\r
+ # [@attribute] predicate\r
+ key = predicate[1]\r
+ def select(context, result):\r
+ for elem in result:\r
+ if elem.get(key) is not None:\r
+ yield elem\r
+ return select\r
+ if signature == "@-='":\r
+ # [@attribute='value']\r
+ key = predicate[1]\r
+ value = predicate[-1]\r
+ def select(context, result):\r
+ for elem in result:\r
+ if elem.get(key) == value:\r
+ yield elem\r
+ return select\r
+ if signature == "-" and not re.match("\d+$", predicate[0]):\r
+ # [tag]\r
+ tag = predicate[0]\r
+ def select(context, result):\r
+ for elem in result:\r
+ if elem.find(tag) is not None:\r
+ yield elem\r
+ return select\r
+ if signature == "-='" and not re.match("\d+$", predicate[0]):\r
+ # [tag='value']\r
+ tag = predicate[0]\r
+ value = predicate[-1]\r
+ def select(context, result):\r
+ for elem in result:\r
+ for e in elem.findall(tag):\r
+ if "".join(e.itertext()) == value:\r
+ yield elem\r
+ break\r
+ return select\r
+ if signature == "-" or signature == "-()" or signature == "-()-":\r
+ # [index] or [last()] or [last()-index]\r
+ if signature == "-":\r
+ index = int(predicate[0]) - 1\r
+ else:\r
+ if predicate[0] != "last":\r
+ raise SyntaxError("unsupported function")\r
+ if signature == "-()-":\r
+ try:\r
+ index = int(predicate[2]) - 1\r
+ except ValueError:\r
+ raise SyntaxError("unsupported expression")\r
+ else:\r
+ index = -1\r
+ def select(context, result):\r
+ parent_map = get_parent_map(context)\r
+ for elem in result:\r
+ try:\r
+ parent = parent_map[elem]\r
+ # FIXME: what if the selector is "*" ?\r
+ elems = list(parent.findall(elem.tag))\r
+ if elems[index] is elem:\r
+ yield elem\r
+ except (IndexError, KeyError):\r
+ pass\r
+ return select\r
+ raise SyntaxError("invalid predicate")\r
+\r
+ops = {\r
+ "": prepare_child,\r
+ "*": prepare_star,\r
+ ".": prepare_self,\r
+ "..": prepare_parent,\r
+ "//": prepare_descendant,\r
+ "[": prepare_predicate,\r
+ }\r
+\r
+_cache = {}\r
+\r
+class _SelectorContext:\r
+ parent_map = None\r
+ def __init__(self, root):\r
+ self.root = root\r
+\r
+# --------------------------------------------------------------------\r
+\r
+##\r
+# Generate all matching objects.\r
+\r
+def iterfind(elem, path, namespaces=None):\r
+ # compile selector pattern\r
+ if path[-1:] == "/":\r
+ path = path + "*" # implicit all (FIXME: keep this?)\r
+ try:\r
+ selector = _cache[path]\r
+ except KeyError:\r
+ if len(_cache) > 100:\r
+ _cache.clear()\r
+ if path[:1] == "/":\r
+ raise SyntaxError("cannot use absolute path on element")\r
+ next = iter(xpath_tokenizer(path, namespaces)).next\r
+ token = next()\r
+ selector = []\r
+ while 1:\r
+ try:\r
+ selector.append(ops[token[0]](next, token))\r
+ except StopIteration:\r
+ raise SyntaxError("invalid path")\r
+ try:\r
+ token = next()\r
+ if token[0] == "/":\r
+ token = next()\r
+ except StopIteration:\r
+ break\r
+ _cache[path] = selector\r
+ # execute selector pattern\r
+ result = [elem]\r
+ context = _SelectorContext(elem)\r
+ for select in selector:\r
+ result = select(context, result)\r
+ return result\r
+\r
+##\r
+# Find first matching object.\r
+\r
+def find(elem, path, namespaces=None):\r
+ try:\r
+ return iterfind(elem, path, namespaces).next()\r
+ except StopIteration:\r
+ return None\r
+\r
+##\r
+# Find all matching objects.\r
+\r
+def findall(elem, path, namespaces=None):\r
+ return list(iterfind(elem, path, namespaces))\r
+\r
+##\r
+# Find text for first matching object.\r
+\r
+def findtext(elem, path, default=None, namespaces=None):\r
+ try:\r
+ elem = iterfind(elem, path, namespaces).next()\r
+ return elem.text or ""\r
+ except StopIteration:\r
+ return default\r