+++ /dev/null
-#\r
-# Secret Labs' Regular Expression Engine\r
-#\r
-# convert re-style regular expression to sre pattern\r
-#\r
-# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.\r
-#\r
-# See the sre.py file for information on usage and redistribution.\r
-#\r
-\r
-"""Internal support module for sre"""\r
-\r
-# XXX: show string offset and offending character for all errors\r
-\r
-import sys\r
-\r
-from sre_constants import *\r
-\r
-SPECIAL_CHARS = ".\\[{()*+?^$|"\r
-REPEAT_CHARS = "*+?{"\r
-\r
-DIGITS = set("0123456789")\r
-\r
-OCTDIGITS = set("01234567")\r
-HEXDIGITS = set("0123456789abcdefABCDEF")\r
-\r
-WHITESPACE = set(" \t\n\r\v\f")\r
-\r
-ESCAPES = {\r
- r"\a": (LITERAL, ord("\a")),\r
- r"\b": (LITERAL, ord("\b")),\r
- r"\f": (LITERAL, ord("\f")),\r
- r"\n": (LITERAL, ord("\n")),\r
- r"\r": (LITERAL, ord("\r")),\r
- r"\t": (LITERAL, ord("\t")),\r
- r"\v": (LITERAL, ord("\v")),\r
- r"\\": (LITERAL, ord("\\"))\r
-}\r
-\r
-CATEGORIES = {\r
- r"\A": (AT, AT_BEGINNING_STRING), # start of string\r
- r"\b": (AT, AT_BOUNDARY),\r
- r"\B": (AT, AT_NON_BOUNDARY),\r
- r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),\r
- r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),\r
- r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),\r
- r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),\r
- r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),\r
- r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),\r
- r"\Z": (AT, AT_END_STRING), # end of string\r
-}\r
-\r
-FLAGS = {\r
- # standard flags\r
- "i": SRE_FLAG_IGNORECASE,\r
- "L": SRE_FLAG_LOCALE,\r
- "m": SRE_FLAG_MULTILINE,\r
- "s": SRE_FLAG_DOTALL,\r
- "x": SRE_FLAG_VERBOSE,\r
- # extensions\r
- "t": SRE_FLAG_TEMPLATE,\r
- "u": SRE_FLAG_UNICODE,\r
-}\r
-\r
-class Pattern:\r
- # master pattern object. keeps track of global attributes\r
- def __init__(self):\r
- self.flags = 0\r
- self.open = []\r
- self.groups = 1\r
- self.groupdict = {}\r
- def opengroup(self, name=None):\r
- gid = self.groups\r
- self.groups = gid + 1\r
- if name is not None:\r
- ogid = self.groupdict.get(name, None)\r
- if ogid is not None:\r
- raise error, ("redefinition of group name %s as group %d; "\r
- "was group %d" % (repr(name), gid, ogid))\r
- self.groupdict[name] = gid\r
- self.open.append(gid)\r
- return gid\r
- def closegroup(self, gid):\r
- self.open.remove(gid)\r
- def checkgroup(self, gid):\r
- return gid < self.groups and gid not in self.open\r
-\r
-class SubPattern:\r
- # a subpattern, in intermediate form\r
- def __init__(self, pattern, data=None):\r
- self.pattern = pattern\r
- if data is None:\r
- data = []\r
- self.data = data\r
- self.width = None\r
- def dump(self, level=0):\r
- nl = 1\r
- seqtypes = type(()), type([])\r
- for op, av in self.data:\r
- print level*" " + op,; nl = 0\r
- if op == "in":\r
- # member sublanguage\r
- print; nl = 1\r
- for op, a in av:\r
- print (level+1)*" " + op, a\r
- elif op == "branch":\r
- print; nl = 1\r
- i = 0\r
- for a in av[1]:\r
- if i > 0:\r
- print level*" " + "or"\r
- a.dump(level+1); nl = 1\r
- i = i + 1\r
- elif type(av) in seqtypes:\r
- for a in av:\r
- if isinstance(a, SubPattern):\r
- if not nl: print\r
- a.dump(level+1); nl = 1\r
- else:\r
- print a, ; nl = 0\r
- else:\r
- print av, ; nl = 0\r
- if not nl: print\r
- def __repr__(self):\r
- return repr(self.data)\r
- def __len__(self):\r
- return len(self.data)\r
- def __delitem__(self, index):\r
- del self.data[index]\r
- def __getitem__(self, index):\r
- if isinstance(index, slice):\r
- return SubPattern(self.pattern, self.data[index])\r
- return self.data[index]\r
- def __setitem__(self, index, code):\r
- self.data[index] = code\r
- def insert(self, index, code):\r
- self.data.insert(index, code)\r
- def append(self, code):\r
- self.data.append(code)\r
- def getwidth(self):\r
- # determine the width (min, max) for this subpattern\r
- if self.width:\r
- return self.width\r
- lo = hi = 0L\r
- UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)\r
- REPEATCODES = (MIN_REPEAT, MAX_REPEAT)\r
- for op, av in self.data:\r
- if op is BRANCH:\r
- i = sys.maxint\r
- j = 0\r
- for av in av[1]:\r
- l, h = av.getwidth()\r
- i = min(i, l)\r
- j = max(j, h)\r
- lo = lo + i\r
- hi = hi + j\r
- elif op is CALL:\r
- i, j = av.getwidth()\r
- lo = lo + i\r
- hi = hi + j\r
- elif op is SUBPATTERN:\r
- i, j = av[1].getwidth()\r
- lo = lo + i\r
- hi = hi + j\r
- elif op in REPEATCODES:\r
- i, j = av[2].getwidth()\r
- lo = lo + long(i) * av[0]\r
- hi = hi + long(j) * av[1]\r
- elif op in UNITCODES:\r
- lo = lo + 1\r
- hi = hi + 1\r
- elif op == SUCCESS:\r
- break\r
- self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))\r
- return self.width\r
-\r
-class Tokenizer:\r
- def __init__(self, string):\r
- self.string = string\r
- self.index = 0\r
- self.__next()\r
- def __next(self):\r
- if self.index >= len(self.string):\r
- self.next = None\r
- return\r
- char = self.string[self.index]\r
- if char[0] == "\\":\r
- try:\r
- c = self.string[self.index + 1]\r
- except IndexError:\r
- raise error, "bogus escape (end of line)"\r
- char = char + c\r
- self.index = self.index + len(char)\r
- self.next = char\r
- def match(self, char, skip=1):\r
- if char == self.next:\r
- if skip:\r
- self.__next()\r
- return 1\r
- return 0\r
- def get(self):\r
- this = self.next\r
- self.__next()\r
- return this\r
- def tell(self):\r
- return self.index, self.next\r
- def seek(self, index):\r
- self.index, self.next = index\r
-\r
-def isident(char):\r
- return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"\r
-\r
-def isdigit(char):\r
- return "0" <= char <= "9"\r
-\r
-def isname(name):\r
- # check that group name is a valid string\r
- if not isident(name[0]):\r
- return False\r
- for char in name[1:]:\r
- if not isident(char) and not isdigit(char):\r
- return False\r
- return True\r
-\r
-def _class_escape(source, escape):\r
- # handle escape code inside character class\r
- code = ESCAPES.get(escape)\r
- if code:\r
- return code\r
- code = CATEGORIES.get(escape)\r
- if code:\r
- return code\r
- try:\r
- c = escape[1:2]\r
- if c == "x":\r
- # hexadecimal escape (exactly two digits)\r
- while source.next in HEXDIGITS and len(escape) < 4:\r
- escape = escape + source.get()\r
- escape = escape[2:]\r
- if len(escape) != 2:\r
- raise error, "bogus escape: %s" % repr("\\" + escape)\r
- return LITERAL, int(escape, 16) & 0xff\r
- elif c in OCTDIGITS:\r
- # octal escape (up to three digits)\r
- while source.next in OCTDIGITS and len(escape) < 4:\r
- escape = escape + source.get()\r
- escape = escape[1:]\r
- return LITERAL, int(escape, 8) & 0xff\r
- elif c in DIGITS:\r
- raise error, "bogus escape: %s" % repr(escape)\r
- if len(escape) == 2:\r
- return LITERAL, ord(escape[1])\r
- except ValueError:\r
- pass\r
- raise error, "bogus escape: %s" % repr(escape)\r
-\r
-def _escape(source, escape, state):\r
- # handle escape code in expression\r
- code = CATEGORIES.get(escape)\r
- if code:\r
- return code\r
- code = ESCAPES.get(escape)\r
- if code:\r
- return code\r
- try:\r
- c = escape[1:2]\r
- if c == "x":\r
- # hexadecimal escape\r
- while source.next in HEXDIGITS and len(escape) < 4:\r
- escape = escape + source.get()\r
- if len(escape) != 4:\r
- raise ValueError\r
- return LITERAL, int(escape[2:], 16) & 0xff\r
- elif c == "0":\r
- # octal escape\r
- while source.next in OCTDIGITS and len(escape) < 4:\r
- escape = escape + source.get()\r
- return LITERAL, int(escape[1:], 8) & 0xff\r
- elif c in DIGITS:\r
- # octal escape *or* decimal group reference (sigh)\r
- if source.next in DIGITS:\r
- escape = escape + source.get()\r
- if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and\r
- source.next in OCTDIGITS):\r
- # got three octal digits; this is an octal escape\r
- escape = escape + source.get()\r
- return LITERAL, int(escape[1:], 8) & 0xff\r
- # not an octal escape, so this is a group reference\r
- group = int(escape[1:])\r
- if group < state.groups:\r
- if not state.checkgroup(group):\r
- raise error, "cannot refer to open group"\r
- return GROUPREF, group\r
- raise ValueError\r
- if len(escape) == 2:\r
- return LITERAL, ord(escape[1])\r
- except ValueError:\r
- pass\r
- raise error, "bogus escape: %s" % repr(escape)\r
-\r
-def _parse_sub(source, state, nested=1):\r
- # parse an alternation: a|b|c\r
-\r
- items = []\r
- itemsappend = items.append\r
- sourcematch = source.match\r
- while 1:\r
- itemsappend(_parse(source, state))\r
- if sourcematch("|"):\r
- continue\r
- if not nested:\r
- break\r
- if not source.next or sourcematch(")", 0):\r
- break\r
- else:\r
- raise error, "pattern not properly closed"\r
-\r
- if len(items) == 1:\r
- return items[0]\r
-\r
- subpattern = SubPattern(state)\r
- subpatternappend = subpattern.append\r
-\r
- # check if all items share a common prefix\r
- while 1:\r
- prefix = None\r
- for item in items:\r
- if not item:\r
- break\r
- if prefix is None:\r
- prefix = item[0]\r
- elif item[0] != prefix:\r
- break\r
- else:\r
- # all subitems start with a common "prefix".\r
- # move it out of the branch\r
- for item in items:\r
- del item[0]\r
- subpatternappend(prefix)\r
- continue # check next one\r
- break\r
-\r
- # check if the branch can be replaced by a character set\r
- for item in items:\r
- if len(item) != 1 or item[0][0] != LITERAL:\r
- break\r
- else:\r
- # we can store this as a character set instead of a\r
- # branch (the compiler may optimize this even more)\r
- set = []\r
- setappend = set.append\r
- for item in items:\r
- setappend(item[0])\r
- subpatternappend((IN, set))\r
- return subpattern\r
-\r
- subpattern.append((BRANCH, (None, items)))\r
- return subpattern\r
-\r
-def _parse_sub_cond(source, state, condgroup):\r
- item_yes = _parse(source, state)\r
- if source.match("|"):\r
- item_no = _parse(source, state)\r
- if source.match("|"):\r
- raise error, "conditional backref with more than two branches"\r
- else:\r
- item_no = None\r
- if source.next and not source.match(")", 0):\r
- raise error, "pattern not properly closed"\r
- subpattern = SubPattern(state)\r
- subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))\r
- return subpattern\r
-\r
-_PATTERNENDERS = set("|)")\r
-_ASSERTCHARS = set("=!<")\r
-_LOOKBEHINDASSERTCHARS = set("=!")\r
-_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])\r
-\r
-def _parse(source, state):\r
- # parse a simple pattern\r
- subpattern = SubPattern(state)\r
-\r
- # precompute constants into local variables\r
- subpatternappend = subpattern.append\r
- sourceget = source.get\r
- sourcematch = source.match\r
- _len = len\r
- PATTERNENDERS = _PATTERNENDERS\r
- ASSERTCHARS = _ASSERTCHARS\r
- LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS\r
- REPEATCODES = _REPEATCODES\r
-\r
- while 1:\r
-\r
- if source.next in PATTERNENDERS:\r
- break # end of subpattern\r
- this = sourceget()\r
- if this is None:\r
- break # end of pattern\r
-\r
- if state.flags & SRE_FLAG_VERBOSE:\r
- # skip whitespace and comments\r
- if this in WHITESPACE:\r
- continue\r
- if this == "#":\r
- while 1:\r
- this = sourceget()\r
- if this in (None, "\n"):\r
- break\r
- continue\r
-\r
- if this and this[0] not in SPECIAL_CHARS:\r
- subpatternappend((LITERAL, ord(this)))\r
-\r
- elif this == "[":\r
- # character set\r
- set = []\r
- setappend = set.append\r
-## if sourcematch(":"):\r
-## pass # handle character classes\r
- if sourcematch("^"):\r
- setappend((NEGATE, None))\r
- # check remaining characters\r
- start = set[:]\r
- while 1:\r
- this = sourceget()\r
- if this == "]" and set != start:\r
- break\r
- elif this and this[0] == "\\":\r
- code1 = _class_escape(source, this)\r
- elif this:\r
- code1 = LITERAL, ord(this)\r
- else:\r
- raise error, "unexpected end of regular expression"\r
- if sourcematch("-"):\r
- # potential range\r
- this = sourceget()\r
- if this == "]":\r
- if code1[0] is IN:\r
- code1 = code1[1][0]\r
- setappend(code1)\r
- setappend((LITERAL, ord("-")))\r
- break\r
- elif this:\r
- if this[0] == "\\":\r
- code2 = _class_escape(source, this)\r
- else:\r
- code2 = LITERAL, ord(this)\r
- if code1[0] != LITERAL or code2[0] != LITERAL:\r
- raise error, "bad character range"\r
- lo = code1[1]\r
- hi = code2[1]\r
- if hi < lo:\r
- raise error, "bad character range"\r
- setappend((RANGE, (lo, hi)))\r
- else:\r
- raise error, "unexpected end of regular expression"\r
- else:\r
- if code1[0] is IN:\r
- code1 = code1[1][0]\r
- setappend(code1)\r
-\r
- # XXX: <fl> should move set optimization to compiler!\r
- if _len(set)==1 and set[0][0] is LITERAL:\r
- subpatternappend(set[0]) # optimization\r
- elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:\r
- subpatternappend((NOT_LITERAL, set[1][1])) # optimization\r
- else:\r
- # XXX: <fl> should add charmap optimization here\r
- subpatternappend((IN, set))\r
-\r
- elif this and this[0] in REPEAT_CHARS:\r
- # repeat previous item\r
- if this == "?":\r
- min, max = 0, 1\r
- elif this == "*":\r
- min, max = 0, MAXREPEAT\r
-\r
- elif this == "+":\r
- min, max = 1, MAXREPEAT\r
- elif this == "{":\r
- if source.next == "}":\r
- subpatternappend((LITERAL, ord(this)))\r
- continue\r
- here = source.tell()\r
- min, max = 0, MAXREPEAT\r
- lo = hi = ""\r
- while source.next in DIGITS:\r
- lo = lo + source.get()\r
- if sourcematch(","):\r
- while source.next in DIGITS:\r
- hi = hi + sourceget()\r
- else:\r
- hi = lo\r
- if not sourcematch("}"):\r
- subpatternappend((LITERAL, ord(this)))\r
- source.seek(here)\r
- continue\r
- if lo:\r
- min = int(lo)\r
- if hi:\r
- max = int(hi)\r
- if max < min:\r
- raise error, "bad repeat interval"\r
- else:\r
- raise error, "not supported"\r
- # figure out which item to repeat\r
- if subpattern:\r
- item = subpattern[-1:]\r
- else:\r
- item = None\r
- if not item or (_len(item) == 1 and item[0][0] == AT):\r
- raise error, "nothing to repeat"\r
- if item[0][0] in REPEATCODES:\r
- raise error, "multiple repeat"\r
- if sourcematch("?"):\r
- subpattern[-1] = (MIN_REPEAT, (min, max, item))\r
- else:\r
- subpattern[-1] = (MAX_REPEAT, (min, max, item))\r
-\r
- elif this == ".":\r
- subpatternappend((ANY, None))\r
-\r
- elif this == "(":\r
- group = 1\r
- name = None\r
- condgroup = None\r
- if sourcematch("?"):\r
- group = 0\r
- # options\r
- if sourcematch("P"):\r
- # python extensions\r
- if sourcematch("<"):\r
- # named group: skip forward to end of name\r
- name = ""\r
- while 1:\r
- char = sourceget()\r
- if char is None:\r
- raise error, "unterminated name"\r
- if char == ">":\r
- break\r
- name = name + char\r
- group = 1\r
- if not isname(name):\r
- raise error, "bad character in group name"\r
- elif sourcematch("="):\r
- # named backreference\r
- name = ""\r
- while 1:\r
- char = sourceget()\r
- if char is None:\r
- raise error, "unterminated name"\r
- if char == ")":\r
- break\r
- name = name + char\r
- if not isname(name):\r
- raise error, "bad character in group name"\r
- gid = state.groupdict.get(name)\r
- if gid is None:\r
- raise error, "unknown group name"\r
- subpatternappend((GROUPREF, gid))\r
- continue\r
- else:\r
- char = sourceget()\r
- if char is None:\r
- raise error, "unexpected end of pattern"\r
- raise error, "unknown specifier: ?P%s" % char\r
- elif sourcematch(":"):\r
- # non-capturing group\r
- group = 2\r
- elif sourcematch("#"):\r
- # comment\r
- while 1:\r
- if source.next is None or source.next == ")":\r
- break\r
- sourceget()\r
- if not sourcematch(")"):\r
- raise error, "unbalanced parenthesis"\r
- continue\r
- elif source.next in ASSERTCHARS:\r
- # lookahead assertions\r
- char = sourceget()\r
- dir = 1\r
- if char == "<":\r
- if source.next not in LOOKBEHINDASSERTCHARS:\r
- raise error, "syntax error"\r
- dir = -1 # lookbehind\r
- char = sourceget()\r
- p = _parse_sub(source, state)\r
- if not sourcematch(")"):\r
- raise error, "unbalanced parenthesis"\r
- if char == "=":\r
- subpatternappend((ASSERT, (dir, p)))\r
- else:\r
- subpatternappend((ASSERT_NOT, (dir, p)))\r
- continue\r
- elif sourcematch("("):\r
- # conditional backreference group\r
- condname = ""\r
- while 1:\r
- char = sourceget()\r
- if char is None:\r
- raise error, "unterminated name"\r
- if char == ")":\r
- break\r
- condname = condname + char\r
- group = 2\r
- if isname(condname):\r
- condgroup = state.groupdict.get(condname)\r
- if condgroup is None:\r
- raise error, "unknown group name"\r
- else:\r
- try:\r
- condgroup = int(condname)\r
- except ValueError:\r
- raise error, "bad character in group name"\r
- else:\r
- # flags\r
- if not source.next in FLAGS:\r
- raise error, "unexpected end of pattern"\r
- while source.next in FLAGS:\r
- state.flags = state.flags | FLAGS[sourceget()]\r
- if group:\r
- # parse group contents\r
- if group == 2:\r
- # anonymous group\r
- group = None\r
- else:\r
- group = state.opengroup(name)\r
- if condgroup:\r
- p = _parse_sub_cond(source, state, condgroup)\r
- else:\r
- p = _parse_sub(source, state)\r
- if not sourcematch(")"):\r
- raise error, "unbalanced parenthesis"\r
- if group is not None:\r
- state.closegroup(group)\r
- subpatternappend((SUBPATTERN, (group, p)))\r
- else:\r
- while 1:\r
- char = sourceget()\r
- if char is None:\r
- raise error, "unexpected end of pattern"\r
- if char == ")":\r
- break\r
- raise error, "unknown extension"\r
-\r
- elif this == "^":\r
- subpatternappend((AT, AT_BEGINNING))\r
-\r
- elif this == "$":\r
- subpattern.append((AT, AT_END))\r
-\r
- elif this and this[0] == "\\":\r
- code = _escape(source, this, state)\r
- subpatternappend(code)\r
-\r
- else:\r
- raise error, "parser error"\r
-\r
- return subpattern\r
-\r
-def parse(str, flags=0, pattern=None):\r
- # parse 're' pattern into list of (opcode, argument) tuples\r
-\r
- source = Tokenizer(str)\r
-\r
- if pattern is None:\r
- pattern = Pattern()\r
- pattern.flags = flags\r
- pattern.str = str\r
-\r
- p = _parse_sub(source, pattern, 0)\r
-\r
- tail = source.get()\r
- if tail == ")":\r
- raise error, "unbalanced parenthesis"\r
- elif tail:\r
- raise error, "bogus characters at end of regular expression"\r
-\r
- if flags & SRE_FLAG_DEBUG:\r
- p.dump()\r
-\r
- if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:\r
- # the VERBOSE flag was switched on inside the pattern. to be\r
- # on the safe side, we'll parse the whole thing again...\r
- return parse(str, p.pattern.flags)\r
-\r
- return p\r
-\r
-def parse_template(source, pattern):\r
- # parse 're' replacement string into list of literals and\r
- # group references\r
- s = Tokenizer(source)\r
- sget = s.get\r
- p = []\r
- a = p.append\r
- def literal(literal, p=p, pappend=a):\r
- if p and p[-1][0] is LITERAL:\r
- p[-1] = LITERAL, p[-1][1] + literal\r
- else:\r
- pappend((LITERAL, literal))\r
- sep = source[:0]\r
- if type(sep) is type(""):\r
- makechar = chr\r
- else:\r
- makechar = unichr\r
- while 1:\r
- this = sget()\r
- if this is None:\r
- break # end of replacement string\r
- if this and this[0] == "\\":\r
- # group\r
- c = this[1:2]\r
- if c == "g":\r
- name = ""\r
- if s.match("<"):\r
- while 1:\r
- char = sget()\r
- if char is None:\r
- raise error, "unterminated group name"\r
- if char == ">":\r
- break\r
- name = name + char\r
- if not name:\r
- raise error, "bad group name"\r
- try:\r
- index = int(name)\r
- if index < 0:\r
- raise error, "negative group number"\r
- except ValueError:\r
- if not isname(name):\r
- raise error, "bad character in group name"\r
- try:\r
- index = pattern.groupindex[name]\r
- except KeyError:\r
- raise IndexError, "unknown group name"\r
- a((MARK, index))\r
- elif c == "0":\r
- if s.next in OCTDIGITS:\r
- this = this + sget()\r
- if s.next in OCTDIGITS:\r
- this = this + sget()\r
- literal(makechar(int(this[1:], 8) & 0xff))\r
- elif c in DIGITS:\r
- isoctal = False\r
- if s.next in DIGITS:\r
- this = this + sget()\r
- if (c in OCTDIGITS and this[2] in OCTDIGITS and\r
- s.next in OCTDIGITS):\r
- this = this + sget()\r
- isoctal = True\r
- literal(makechar(int(this[1:], 8) & 0xff))\r
- if not isoctal:\r
- a((MARK, int(this[1:])))\r
- else:\r
- try:\r
- this = makechar(ESCAPES[this][1])\r
- except KeyError:\r
- pass\r
- literal(this)\r
- else:\r
- literal(this)\r
- # convert template to groups and literals lists\r
- i = 0\r
- groups = []\r
- groupsappend = groups.append\r
- literals = [None] * len(p)\r
- for c, s in p:\r
- if c is MARK:\r
- groupsappend((i, s))\r
- # literal[i] is already None\r
- else:\r
- literals[i] = s\r
- i = i + 1\r
- return groups, literals\r
-\r
-def expand_template(template, match):\r
- g = match.group\r
- sep = match.string[:0]\r
- groups, literals = template\r
- literals = literals[:]\r
- try:\r
- for index, group in groups:\r
- literals[index] = s = g(group)\r
- if s is None:\r
- raise error, "unmatched group"\r
- except IndexError:\r
- raise error, "invalid group reference"\r
- return sep.join(literals)\r