+++ /dev/null
-#\r
-# ElementTree\r
-# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $\r
-#\r
-# limited xpath support for element trees\r
-#\r
-# history:\r
-# 2003-05-23 fl created\r
-# 2003-05-28 fl added support for // etc\r
-# 2003-08-27 fl fixed parsing of periods in element names\r
-# 2007-09-10 fl new selection engine\r
-# 2007-09-12 fl fixed parent selector\r
-# 2007-09-13 fl added iterfind; changed findall to return a list\r
-# 2007-11-30 fl added namespaces support\r
-# 2009-10-30 fl added child element value filter\r
-#\r
-# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.\r
-#\r
-# fredrik@pythonware.com\r
-# http://www.pythonware.com\r
-#\r
-# --------------------------------------------------------------------\r
-# The ElementTree toolkit is\r
-#\r
-# Copyright (c) 1999-2009 by Fredrik Lundh\r
-#\r
-# By obtaining, using, and/or copying this software and/or its\r
-# associated documentation, you agree that you have read, understood,\r
-# and will comply with the following terms and conditions:\r
-#\r
-# Permission to use, copy, modify, and distribute this software and\r
-# its associated documentation for any purpose and without fee is\r
-# hereby granted, provided that the above copyright notice appears in\r
-# all copies, and that both that copyright notice and this permission\r
-# notice appear in supporting documentation, and that the name of\r
-# Secret Labs AB or the author not be used in advertising or publicity\r
-# pertaining to distribution of the software without specific, written\r
-# prior permission.\r
-#\r
-# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD\r
-# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-\r
-# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR\r
-# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY\r
-# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\r
-# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\r
-# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\r
-# OF THIS SOFTWARE.\r
-# --------------------------------------------------------------------\r
-\r
-# Licensed to PSF under a Contributor Agreement.\r
-# See http://www.python.org/psf/license for licensing details.\r
-\r
-##\r
-# Implementation module for XPath support. There's usually no reason\r
-# to import this module directly; the <b>ElementTree</b> does this for\r
-# you, if needed.\r
-##\r
-\r
-import re\r
-\r
-xpath_tokenizer_re = re.compile(\r
- "("\r
- "'[^']*'|\"[^\"]*\"|"\r
- "::|"\r
- "//?|"\r
- "\.\.|"\r
- "\(\)|"\r
- "[/.*:\[\]\(\)@=])|"\r
- "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"\r
- "\s+"\r
- )\r
-\r
-def xpath_tokenizer(pattern, namespaces=None):\r
- for token in xpath_tokenizer_re.findall(pattern):\r
- tag = token[1]\r
- if tag and tag[0] != "{" and ":" in tag:\r
- try:\r
- prefix, uri = tag.split(":", 1)\r
- if not namespaces:\r
- raise KeyError\r
- yield token[0], "{%s}%s" % (namespaces[prefix], uri)\r
- except KeyError:\r
- raise SyntaxError("prefix %r not found in prefix map" % prefix)\r
- else:\r
- yield token\r
-\r
-def get_parent_map(context):\r
- parent_map = context.parent_map\r
- if parent_map is None:\r
- context.parent_map = parent_map = {}\r
- for p in context.root.iter():\r
- for e in p:\r
- parent_map[e] = p\r
- return parent_map\r
-\r
-def prepare_child(next, token):\r
- tag = token[1]\r
- def select(context, result):\r
- for elem in result:\r
- for e in elem:\r
- if e.tag == tag:\r
- yield e\r
- return select\r
-\r
-def prepare_star(next, token):\r
- def select(context, result):\r
- for elem in result:\r
- for e in elem:\r
- yield e\r
- return select\r
-\r
-def prepare_self(next, token):\r
- def select(context, result):\r
- for elem in result:\r
- yield elem\r
- return select\r
-\r
-def prepare_descendant(next, token):\r
- token = next()\r
- if token[0] == "*":\r
- tag = "*"\r
- elif not token[0]:\r
- tag = token[1]\r
- else:\r
- raise SyntaxError("invalid descendant")\r
- def select(context, result):\r
- for elem in result:\r
- for e in elem.iter(tag):\r
- if e is not elem:\r
- yield e\r
- return select\r
-\r
-def prepare_parent(next, token):\r
- def select(context, result):\r
- # FIXME: raise error if .. is applied at toplevel?\r
- parent_map = get_parent_map(context)\r
- result_map = {}\r
- for elem in result:\r
- if elem in parent_map:\r
- parent = parent_map[elem]\r
- if parent not in result_map:\r
- result_map[parent] = None\r
- yield parent\r
- return select\r
-\r
-def prepare_predicate(next, token):\r
- # FIXME: replace with real parser!!! refs:\r
- # http://effbot.org/zone/simple-iterator-parser.htm\r
- # http://javascript.crockford.com/tdop/tdop.html\r
- signature = []\r
- predicate = []\r
- while 1:\r
- token = next()\r
- if token[0] == "]":\r
- break\r
- if token[0] and token[0][:1] in "'\"":\r
- token = "'", token[0][1:-1]\r
- signature.append(token[0] or "-")\r
- predicate.append(token[1])\r
- signature = "".join(signature)\r
- # use signature to determine predicate type\r
- if signature == "@-":\r
- # [@attribute] predicate\r
- key = predicate[1]\r
- def select(context, result):\r
- for elem in result:\r
- if elem.get(key) is not None:\r
- yield elem\r
- return select\r
- if signature == "@-='":\r
- # [@attribute='value']\r
- key = predicate[1]\r
- value = predicate[-1]\r
- def select(context, result):\r
- for elem in result:\r
- if elem.get(key) == value:\r
- yield elem\r
- return select\r
- if signature == "-" and not re.match("\d+$", predicate[0]):\r
- # [tag]\r
- tag = predicate[0]\r
- def select(context, result):\r
- for elem in result:\r
- if elem.find(tag) is not None:\r
- yield elem\r
- return select\r
- if signature == "-='" and not re.match("\d+$", predicate[0]):\r
- # [tag='value']\r
- tag = predicate[0]\r
- value = predicate[-1]\r
- def select(context, result):\r
- for elem in result:\r
- for e in elem.findall(tag):\r
- if "".join(e.itertext()) == value:\r
- yield elem\r
- break\r
- return select\r
- if signature == "-" or signature == "-()" or signature == "-()-":\r
- # [index] or [last()] or [last()-index]\r
- if signature == "-":\r
- index = int(predicate[0]) - 1\r
- else:\r
- if predicate[0] != "last":\r
- raise SyntaxError("unsupported function")\r
- if signature == "-()-":\r
- try:\r
- index = int(predicate[2]) - 1\r
- except ValueError:\r
- raise SyntaxError("unsupported expression")\r
- else:\r
- index = -1\r
- def select(context, result):\r
- parent_map = get_parent_map(context)\r
- for elem in result:\r
- try:\r
- parent = parent_map[elem]\r
- # FIXME: what if the selector is "*" ?\r
- elems = list(parent.findall(elem.tag))\r
- if elems[index] is elem:\r
- yield elem\r
- except (IndexError, KeyError):\r
- pass\r
- return select\r
- raise SyntaxError("invalid predicate")\r
-\r
-ops = {\r
- "": prepare_child,\r
- "*": prepare_star,\r
- ".": prepare_self,\r
- "..": prepare_parent,\r
- "//": prepare_descendant,\r
- "[": prepare_predicate,\r
- }\r
-\r
-_cache = {}\r
-\r
-class _SelectorContext:\r
- parent_map = None\r
- def __init__(self, root):\r
- self.root = root\r
-\r
-# --------------------------------------------------------------------\r
-\r
-##\r
-# Generate all matching objects.\r
-\r
-def iterfind(elem, path, namespaces=None):\r
- # compile selector pattern\r
- if path[-1:] == "/":\r
- path = path + "*" # implicit all (FIXME: keep this?)\r
- try:\r
- selector = _cache[path]\r
- except KeyError:\r
- if len(_cache) > 100:\r
- _cache.clear()\r
- if path[:1] == "/":\r
- raise SyntaxError("cannot use absolute path on element")\r
- next = iter(xpath_tokenizer(path, namespaces)).next\r
- token = next()\r
- selector = []\r
- while 1:\r
- try:\r
- selector.append(ops[token[0]](next, token))\r
- except StopIteration:\r
- raise SyntaxError("invalid path")\r
- try:\r
- token = next()\r
- if token[0] == "/":\r
- token = next()\r
- except StopIteration:\r
- break\r
- _cache[path] = selector\r
- # execute selector pattern\r
- result = [elem]\r
- context = _SelectorContext(elem)\r
- for select in selector:\r
- result = select(context, result)\r
- return result\r
-\r
-##\r
-# Find first matching object.\r
-\r
-def find(elem, path, namespaces=None):\r
- try:\r
- return iterfind(elem, path, namespaces).next()\r
- except StopIteration:\r
- return None\r
-\r
-##\r
-# Find all matching objects.\r
-\r
-def findall(elem, path, namespaces=None):\r
- return list(iterfind(elem, path, namespaces))\r
-\r
-##\r
-# Find text for first matching object.\r
-\r
-def findtext(elem, path, default=None, namespaces=None):\r
- try:\r
- elem = iterfind(elem, path, namespaces).next()\r
- return elem.text or ""\r
- except StopIteration:\r
- return default\r