AppPkg/Applications/Python/Python-2.7.10/Lib/sre_parse.py

   1 #
   2 # Secret Labs' Regular Expression Engine
   3 #
   4 # convert re-style regular expression to sre pattern
   5 #
   6 # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
   7 #
   8 # See the sre.py file for information on usage and redistribution.
   9 #
  10
  11 """Internal support module for sre"""
  12
  13 # XXX: show string offset and offending character for all errors
  14
  15 import sys
  16
  17 from sre_constants import *
  18
  19 SPECIAL_CHARS = ".\\[{()*+?^$|"
  20 REPEAT_CHARS = "*+?{"
  21
  22 DIGITS = set("0123456789")
  23
  24 OCTDIGITS = set("01234567")
  25 HEXDIGITS = set("0123456789abcdefABCDEF")
  26
  27 WHITESPACE = set(" \t\n\r\v\f")
  28
  29 ESCAPES = {
  30     r"\a": (LITERAL, ord("\a")),
  31     r"\b": (LITERAL, ord("\b")),
  32     r"\f": (LITERAL, ord("\f")),
  33     r"\n": (LITERAL, ord("\n")),
  34     r"\r": (LITERAL, ord("\r")),
  35     r"\t": (LITERAL, ord("\t")),
  36     r"\v": (LITERAL, ord("\v")),
  37     r"\\": (LITERAL, ord("\\"))
  38 }
  39
  40 CATEGORIES = {
  41     r"\A": (AT, AT_BEGINNING_STRING), # start of string
  42     r"\b": (AT, AT_BOUNDARY),
  43     r"\B": (AT, AT_NON_BOUNDARY),
  44     r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
  45     r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
  46     r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
  47     r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
  48     r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
  49     r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
  50     r"\Z": (AT, AT_END_STRING), # end of string
  51 }
  52
  53 FLAGS = {
  54     # standard flags
  55     "i": SRE_FLAG_IGNORECASE,
  56     "L": SRE_FLAG_LOCALE,
  57     "m": SRE_FLAG_MULTILINE,
  58     "s": SRE_FLAG_DOTALL,
  59     "x": SRE_FLAG_VERBOSE,
  60     # extensions
  61     "t": SRE_FLAG_TEMPLATE,
  62     "u": SRE_FLAG_UNICODE,
  63 }
  64
  65 class Pattern:
  66     # master pattern object.  keeps track of global attributes
  67     def __init__(self):
  68         self.flags = 0
  69         self.open = []
  70         self.groups = 1
  71         self.groupdict = {}
  72         self.lookbehind = 0
  73
  74     def opengroup(self, name=None):
  75         gid = self.groups
  76         self.groups = gid + 1
  77         if name is not None:
  78             ogid = self.groupdict.get(name, None)
  79             if ogid is not None:
  80                 raise error, ("redefinition of group name %s as group %d; "
  81                               "was group %d" % (repr(name), gid,  ogid))
  82             self.groupdict[name] = gid
  83         self.open.append(gid)
  84         return gid
  85     def closegroup(self, gid):
  86         self.open.remove(gid)
  87     def checkgroup(self, gid):
  88         return gid < self.groups and gid not in self.open
  89
  90 class SubPattern:
  91     # a subpattern, in intermediate form
  92     def __init__(self, pattern, data=None):
  93         self.pattern = pattern
  94         if data is None:
  95             data = []
  96         self.data = data
  97         self.width = None
  98     def dump(self, level=0):
  99         seqtypes = (tuple, list)
 100         for op, av in self.data:
 101             print level*"  " + op,
 102             if op == IN:
 103                 # member sublanguage
 104                 print
 105                 for op, a in av:
 106                     print (level+1)*"  " + op, a
 107             elif op == BRANCH:
 108                 print
 109                 for i, a in enumerate(av[1]):
 110                     if i:
 111                         print level*"  " + "or"
 112                     a.dump(level+1)
 113             elif op == GROUPREF_EXISTS:
 114                 condgroup, item_yes, item_no = av
 115                 print condgroup
 116                 item_yes.dump(level+1)
 117                 if item_no:
 118                     print level*"  " + "else"
 119                     item_no.dump(level+1)
 120             elif isinstance(av, seqtypes):
 121                 nl = 0
 122                 for a in av:
 123                     if isinstance(a, SubPattern):
 124                         if not nl:
 125                             print
 126                         a.dump(level+1)
 127                         nl = 1
 128                     else:
 129                         print a,
 130                         nl = 0
 131                 if not nl:
 132                     print
 133             else:
 134                 print av
 135     def __repr__(self):
 136         return repr(self.data)
 137     def __len__(self):
 138         return len(self.data)
 139     def __delitem__(self, index):
 140         del self.data[index]
 141     def __getitem__(self, index):
 142         if isinstance(index, slice):
 143             return SubPattern(self.pattern, self.data[index])
 144         return self.data[index]
 145     def __setitem__(self, index, code):
 146         self.data[index] = code
 147     def insert(self, index, code):
 148         self.data.insert(index, code)
 149     def append(self, code):
 150         self.data.append(code)
 151     def getwidth(self):
 152         # determine the width (min, max) for this subpattern
 153         if self.width:
 154             return self.width
 155         lo = hi = 0
 156         UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
 157         REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
 158         for op, av in self.data:
 159             if op is BRANCH:
 160                 i = MAXREPEAT - 1
 161                 j = 0
 162                 for av in av[1]:
 163                     l, h = av.getwidth()
 164                     i = min(i, l)
 165                     j = max(j, h)
 166                 lo = lo + i
 167                 hi = hi + j
 168             elif op is CALL:
 169                 i, j = av.getwidth()
 170                 lo = lo + i
 171                 hi = hi + j
 172             elif op is SUBPATTERN:
 173                 i, j = av[1].getwidth()
 174                 lo = lo + i
 175                 hi = hi + j
 176             elif op in REPEATCODES:
 177                 i, j = av[2].getwidth()
 178                 lo = lo + i * av[0]
 179                 hi = hi + j * av[1]
 180             elif op in UNITCODES:
 181                 lo = lo + 1
 182                 hi = hi + 1
 183             elif op == SUCCESS:
 184                 break
 185         self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
 186         return self.width
 187
 188 class Tokenizer:
 189     def __init__(self, string):
 190         self.string = string
 191         self.index = 0
 192         self.__next()
 193     def __next(self):
 194         if self.index >= len(self.string):
 195             self.next = None
 196             return
 197         char = self.string[self.index]
 198         if char[0] == "\\":
 199             try:
 200                 c = self.string[self.index + 1]
 201             except IndexError:
 202                 raise error, "bogus escape (end of line)"
 203             char = char + c
 204         self.index = self.index + len(char)
 205         self.next = char
 206     def match(self, char, skip=1):
 207         if char == self.next:
 208             if skip:
 209                 self.__next()
 210             return 1
 211         return 0
 212     def get(self):
 213         this = self.next
 214         self.__next()
 215         return this
 216     def tell(self):
 217         return self.index, self.next
 218     def seek(self, index):
 219         self.index, self.next = index
 220
 221 def isident(char):
 222     return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
 223
 224 def isdigit(char):
 225     return "0" <= char <= "9"
 226
 227 def isname(name):
 228     # check that group name is a valid string
 229     if not isident(name[0]):
 230         return False
 231     for char in name[1:]:
 232         if not isident(char) and not isdigit(char):
 233             return False
 234     return True
 235
 236 def _class_escape(source, escape):
 237     # handle escape code inside character class
 238     code = ESCAPES.get(escape)
 239     if code:
 240         return code
 241     code = CATEGORIES.get(escape)
 242     if code and code[0] == IN:
 243         return code
 244     try:
 245         c = escape[1:2]
 246         if c == "x":
 247             # hexadecimal escape (exactly two digits)
 248             while source.next in HEXDIGITS and len(escape) < 4:
 249                 escape = escape + source.get()
 250             escape = escape[2:]
 251             if len(escape) != 2:
 252                 raise error, "bogus escape: %s" % repr("\\" + escape)
 253             return LITERAL, int(escape, 16) & 0xff
 254         elif c in OCTDIGITS:
 255             # octal escape (up to three digits)
 256             while source.next in OCTDIGITS and len(escape) < 4:
 257                 escape = escape + source.get()
 258             escape = escape[1:]
 259             return LITERAL, int(escape, 8) & 0xff
 260         elif c in DIGITS:
 261             raise error, "bogus escape: %s" % repr(escape)
 262         if len(escape) == 2:
 263             return LITERAL, ord(escape[1])
 264     except ValueError:
 265         pass
 266     raise error, "bogus escape: %s" % repr(escape)
 267
 268 def _escape(source, escape, state):
 269     # handle escape code in expression
 270     code = CATEGORIES.get(escape)
 271     if code:
 272         return code
 273     code = ESCAPES.get(escape)
 274     if code:
 275         return code
 276     try:
 277         c = escape[1:2]
 278         if c == "x":
 279             # hexadecimal escape
 280             while source.next in HEXDIGITS and len(escape) < 4:
 281                 escape = escape + source.get()
 282             if len(escape) != 4:
 283                 raise ValueError
 284             return LITERAL, int(escape[2:], 16) & 0xff
 285         elif c == "0":
 286             # octal escape
 287             while source.next in OCTDIGITS and len(escape) < 4:
 288                 escape = escape + source.get()
 289             return LITERAL, int(escape[1:], 8) & 0xff
 290         elif c in DIGITS:
 291             # octal escape *or* decimal group reference (sigh)
 292             if source.next in DIGITS:
 293                 escape = escape + source.get()
 294                 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
 295                     source.next in OCTDIGITS):
 296                     # got three octal digits; this is an octal escape
 297                     escape = escape + source.get()
 298                     return LITERAL, int(escape[1:], 8) & 0xff
 299             # not an octal escape, so this is a group reference
 300             group = int(escape[1:])
 301             if group < state.groups:
 302                 if not state.checkgroup(group):
 303                     raise error, "cannot refer to open group"
 304                 if state.lookbehind:
 305                     import warnings
 306                     warnings.warn('group references in lookbehind '
 307                                   'assertions are not supported',
 308                                   RuntimeWarning)
 309                 return GROUPREF, group
 310             raise ValueError
 311         if len(escape) == 2:
 312             return LITERAL, ord(escape[1])
 313     except ValueError:
 314         pass
 315     raise error, "bogus escape: %s" % repr(escape)
 316
 317 def _parse_sub(source, state, nested=1):
 318     # parse an alternation: a|b|c
 319
 320     items = []
 321     itemsappend = items.append
 322     sourcematch = source.match
 323     while 1:
 324         itemsappend(_parse(source, state))
 325         if sourcematch("|"):
 326             continue
 327         if not nested:
 328             break
 329         if not source.next or sourcematch(")", 0):
 330             break
 331         else:
 332             raise error, "pattern not properly closed"
 333
 334     if len(items) == 1:
 335         return items[0]
 336
 337     subpattern = SubPattern(state)
 338     subpatternappend = subpattern.append
 339
 340     # check if all items share a common prefix
 341     while 1:
 342         prefix = None
 343         for item in items:
 344             if not item:
 345                 break
 346             if prefix is None:
 347                 prefix = item[0]
 348             elif item[0] != prefix:
 349                 break
 350         else:
 351             # all subitems start with a common "prefix".
 352             # move it out of the branch
 353             for item in items:
 354                 del item[0]
 355             subpatternappend(prefix)
 356             continue # check next one
 357         break
 358
 359     # check if the branch can be replaced by a character set
 360     for item in items:
 361         if len(item) != 1 or item[0][0] != LITERAL:
 362             break
 363     else:
 364         # we can store this as a character set instead of a
 365         # branch (the compiler may optimize this even more)
 366         set = []
 367         setappend = set.append
 368         for item in items:
 369             setappend(item[0])
 370         subpatternappend((IN, set))
 371         return subpattern
 372
 373     subpattern.append((BRANCH, (None, items)))
 374     return subpattern
 375
 376 def _parse_sub_cond(source, state, condgroup):
 377     item_yes = _parse(source, state)
 378     if source.match("|"):
 379         item_no = _parse(source, state)
 380         if source.match("|"):
 381             raise error, "conditional backref with more than two branches"
 382     else:
 383         item_no = None
 384     if source.next and not source.match(")", 0):
 385         raise error, "pattern not properly closed"
 386     subpattern = SubPattern(state)
 387     subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
 388     return subpattern
 389
 390 _PATTERNENDERS = set("|)")
 391 _ASSERTCHARS = set("=!<")
 392 _LOOKBEHINDASSERTCHARS = set("=!")
 393 _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
 394
 395 def _parse(source, state):
 396     # parse a simple pattern
 397     subpattern = SubPattern(state)
 398
 399     # precompute constants into local variables
 400     subpatternappend = subpattern.append
 401     sourceget = source.get
 402     sourcematch = source.match
 403     _len = len
 404     PATTERNENDERS = _PATTERNENDERS
 405     ASSERTCHARS = _ASSERTCHARS
 406     LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS
 407     REPEATCODES = _REPEATCODES
 408
 409     while 1:
 410
 411         if source.next in PATTERNENDERS:
 412             break # end of subpattern
 413         this = sourceget()
 414         if this is None:
 415             break # end of pattern
 416
 417         if state.flags & SRE_FLAG_VERBOSE:
 418             # skip whitespace and comments
 419             if this in WHITESPACE:
 420                 continue
 421             if this == "#":
 422                 while 1:
 423                     this = sourceget()
 424                     if this in (None, "\n"):
 425                         break
 426                 continue
 427
 428         if this and this[0] not in SPECIAL_CHARS:
 429             subpatternappend((LITERAL, ord(this)))
 430
 431         elif this == "[":
 432             # character set
 433             set = []
 434             setappend = set.append
 435 ##          if sourcematch(":"):
 436 ##              pass # handle character classes
 437             if sourcematch("^"):
 438                 setappend((NEGATE, None))
 439             # check remaining characters
 440             start = set[:]
 441             while 1:
 442                 this = sourceget()
 443                 if this == "]" and set != start:
 444                     break
 445                 elif this and this[0] == "\\":
 446                     code1 = _class_escape(source, this)
 447                 elif this:
 448                     code1 = LITERAL, ord(this)
 449                 else:
 450                     raise error, "unexpected end of regular expression"
 451                 if sourcematch("-"):
 452                     # potential range
 453                     this = sourceget()
 454                     if this == "]":
 455                         if code1[0] is IN:
 456                             code1 = code1[1][0]
 457                         setappend(code1)
 458                         setappend((LITERAL, ord("-")))
 459                         break
 460                     elif this:
 461                         if this[0] == "\\":
 462                             code2 = _class_escape(source, this)
 463                         else:
 464                             code2 = LITERAL, ord(this)
 465                         if code1[0] != LITERAL or code2[0] != LITERAL:
 466                             raise error, "bad character range"
 467                         lo = code1[1]
 468                         hi = code2[1]
 469                         if hi < lo:
 470                             raise error, "bad character range"
 471                         setappend((RANGE, (lo, hi)))
 472                     else:
 473                         raise error, "unexpected end of regular expression"
 474                 else:
 475                     if code1[0] is IN:
 476                         code1 = code1[1][0]
 477                     setappend(code1)
 478
 479             # XXX: <fl> should move set optimization to compiler!
 480             if _len(set)==1 and set[0][0] is LITERAL:
 481                 subpatternappend(set[0]) # optimization
 482             elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
 483                 subpatternappend((NOT_LITERAL, set[1][1])) # optimization
 484             else:
 485                 # XXX: <fl> should add charmap optimization here
 486                 subpatternappend((IN, set))
 487
 488         elif this and this[0] in REPEAT_CHARS:
 489             # repeat previous item
 490             if this == "?":
 491                 min, max = 0, 1
 492             elif this == "*":
 493                 min, max = 0, MAXREPEAT
 494
 495             elif this == "+":
 496                 min, max = 1, MAXREPEAT
 497             elif this == "{":
 498                 if source.next == "}":
 499                     subpatternappend((LITERAL, ord(this)))
 500                     continue
 501                 here = source.tell()
 502                 min, max = 0, MAXREPEAT
 503                 lo = hi = ""
 504                 while source.next in DIGITS:
 505                     lo = lo + source.get()
 506                 if sourcematch(","):
 507                     while source.next in DIGITS:
 508                         hi = hi + sourceget()
 509                 else:
 510                     hi = lo
 511                 if not sourcematch("}"):
 512                     subpatternappend((LITERAL, ord(this)))
 513                     source.seek(here)
 514                     continue
 515                 if lo:
 516                     min = int(lo)
 517                     if min >= MAXREPEAT:
 518                         raise OverflowError("the repetition number is too large")
 519                 if hi:
 520                     max = int(hi)
 521                     if max >= MAXREPEAT:
 522                         raise OverflowError("the repetition number is too large")
 523                     if max < min:
 524                         raise error("bad repeat interval")
 525             else:
 526                 raise error, "not supported"
 527             # figure out which item to repeat
 528             if subpattern:
 529                 item = subpattern[-1:]
 530             else:
 531                 item = None
 532             if not item or (_len(item) == 1 and item[0][0] == AT):
 533                 raise error, "nothing to repeat"
 534             if item[0][0] in REPEATCODES:
 535                 raise error, "multiple repeat"
 536             if sourcematch("?"):
 537                 subpattern[-1] = (MIN_REPEAT, (min, max, item))
 538             else:
 539                 subpattern[-1] = (MAX_REPEAT, (min, max, item))
 540
 541         elif this == ".":
 542             subpatternappend((ANY, None))
 543
 544         elif this == "(":
 545             group = 1
 546             name = None
 547             condgroup = None
 548             if sourcematch("?"):
 549                 group = 0
 550                 # options
 551                 if sourcematch("P"):
 552                     # python extensions
 553                     if sourcematch("<"):
 554                         # named group: skip forward to end of name
 555                         name = ""
 556                         while 1:
 557                             char = sourceget()
 558                             if char is None:
 559                                 raise error, "unterminated name"
 560                             if char == ">":
 561                                 break
 562                             name = name + char
 563                         group = 1
 564                         if not name:
 565                             raise error("missing group name")
 566                         if not isname(name):
 567                             raise error("bad character in group name %r" %
 568                                         name)
 569                     elif sourcematch("="):
 570                         # named backreference
 571                         name = ""
 572                         while 1:
 573                             char = sourceget()
 574                             if char is None:
 575                                 raise error, "unterminated name"
 576                             if char == ")":
 577                                 break
 578                             name = name + char
 579                         if not name:
 580                             raise error("missing group name")
 581                         if not isname(name):
 582                             raise error("bad character in backref group name "
 583                                         "%r" % name)
 584                         gid = state.groupdict.get(name)
 585                         if gid is None:
 586                             msg = "unknown group name: {0!r}".format(name)
 587                             raise error(msg)
 588                         if state.lookbehind:
 589                             import warnings
 590                             warnings.warn('group references in lookbehind '
 591                                           'assertions are not supported',
 592                                           RuntimeWarning)
 593                         subpatternappend((GROUPREF, gid))
 594                         continue
 595                     else:
 596                         char = sourceget()
 597                         if char is None:
 598                             raise error, "unexpected end of pattern"
 599                         raise error, "unknown specifier: ?P%s" % char
 600                 elif sourcematch(":"):
 601                     # non-capturing group
 602                     group = 2
 603                 elif sourcematch("#"):
 604                     # comment
 605                     while 1:
 606                         if source.next is None or source.next == ")":
 607                             break
 608                         sourceget()
 609                     if not sourcematch(")"):
 610                         raise error, "unbalanced parenthesis"
 611                     continue
 612                 elif source.next in ASSERTCHARS:
 613                     # lookahead assertions
 614                     char = sourceget()
 615                     dir = 1
 616                     if char == "<":
 617                         if source.next not in LOOKBEHINDASSERTCHARS:
 618                             raise error, "syntax error"
 619                         dir = -1 # lookbehind
 620                         char = sourceget()
 621                         state.lookbehind += 1
 622                     p = _parse_sub(source, state)
 623                     if dir < 0:
 624                         state.lookbehind -= 1
 625                     if not sourcematch(")"):
 626                         raise error, "unbalanced parenthesis"
 627                     if char == "=":
 628                         subpatternappend((ASSERT, (dir, p)))
 629                     else:
 630                         subpatternappend((ASSERT_NOT, (dir, p)))
 631                     continue
 632                 elif sourcematch("("):
 633                     # conditional backreference group
 634                     condname = ""
 635                     while 1:
 636                         char = sourceget()
 637                         if char is None:
 638                             raise error, "unterminated name"
 639                         if char == ")":
 640                             break
 641                         condname = condname + char
 642                     group = 2
 643                     if not condname:
 644                         raise error("missing group name")
 645                     if isname(condname):
 646                         condgroup = state.groupdict.get(condname)
 647                         if condgroup is None:
 648                             msg = "unknown group name: {0!r}".format(condname)
 649                             raise error(msg)
 650                     else:
 651                         try:
 652                             condgroup = int(condname)
 653                         except ValueError:
 654                             raise error, "bad character in group name"
 655                     if state.lookbehind:
 656                         import warnings
 657                         warnings.warn('group references in lookbehind '
 658                                       'assertions are not supported',
 659                                       RuntimeWarning)
 660                 else:
 661                     # flags
 662                     if not source.next in FLAGS:
 663                         raise error, "unexpected end of pattern"
 664                     while source.next in FLAGS:
 665                         state.flags = state.flags | FLAGS[sourceget()]
 666             if group:
 667                 # parse group contents
 668                 if group == 2:
 669                     # anonymous group
 670                     group = None
 671                 else:
 672                     group = state.opengroup(name)
 673                 if condgroup:
 674                     p = _parse_sub_cond(source, state, condgroup)
 675                 else:
 676                     p = _parse_sub(source, state)
 677                 if not sourcematch(")"):
 678                     raise error, "unbalanced parenthesis"
 679                 if group is not None:
 680                     state.closegroup(group)
 681                 subpatternappend((SUBPATTERN, (group, p)))
 682             else:
 683                 while 1:
 684                     char = sourceget()
 685                     if char is None:
 686                         raise error, "unexpected end of pattern"
 687                     if char == ")":
 688                         break
 689                     raise error, "unknown extension"
 690
 691         elif this == "^":
 692             subpatternappend((AT, AT_BEGINNING))
 693
 694         elif this == "$":
 695             subpattern.append((AT, AT_END))
 696
 697         elif this and this[0] == "\\":
 698             code = _escape(source, this, state)
 699             subpatternappend(code)
 700
 701         else:
 702             raise error, "parser error"
 703
 704     return subpattern
 705
 706 def parse(str, flags=0, pattern=None):
 707     # parse 're' pattern into list of (opcode, argument) tuples
 708
 709     source = Tokenizer(str)
 710
 711     if pattern is None:
 712         pattern = Pattern()
 713     pattern.flags = flags
 714     pattern.str = str
 715
 716     p = _parse_sub(source, pattern, 0)
 717
 718     tail = source.get()
 719     if tail == ")":
 720         raise error, "unbalanced parenthesis"
 721     elif tail:
 722         raise error, "bogus characters at end of regular expression"
 723
 724     if flags & SRE_FLAG_DEBUG:
 725         p.dump()
 726
 727     if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
 728         # the VERBOSE flag was switched on inside the pattern.  to be
 729         # on the safe side, we'll parse the whole thing again...
 730         return parse(str, p.pattern.flags)
 731
 732     return p
 733
 734 def parse_template(source, pattern):
 735     # parse 're' replacement string into list of literals and
 736     # group references
 737     s = Tokenizer(source)
 738     sget = s.get
 739     p = []
 740     a = p.append
 741     def literal(literal, p=p, pappend=a):
 742         if p and p[-1][0] is LITERAL:
 743             p[-1] = LITERAL, p[-1][1] + literal
 744         else:
 745             pappend((LITERAL, literal))
 746     sep = source[:0]
 747     if type(sep) is type(""):
 748         makechar = chr
 749     else:
 750         makechar = unichr
 751     while 1:
 752         this = sget()
 753         if this is None:
 754             break # end of replacement string
 755         if this and this[0] == "\\":
 756             # group
 757             c = this[1:2]
 758             if c == "g":
 759                 name = ""
 760                 if s.match("<"):
 761                     while 1:
 762                         char = sget()
 763                         if char is None:
 764                             raise error, "unterminated group name"
 765                         if char == ">":
 766                             break
 767                         name = name + char
 768                 if not name:
 769                     raise error, "missing group name"
 770                 try:
 771                     index = int(name)
 772                     if index < 0:
 773                         raise error, "negative group number"
 774                 except ValueError:
 775                     if not isname(name):
 776                         raise error, "bad character in group name"
 777                     try:
 778                         index = pattern.groupindex[name]
 779                     except KeyError:
 780                         msg = "unknown group name: {0!r}".format(name)
 781                         raise IndexError(msg)
 782                 a((MARK, index))
 783             elif c == "0":
 784                 if s.next in OCTDIGITS:
 785                     this = this + sget()
 786                     if s.next in OCTDIGITS:
 787                         this = this + sget()
 788                 literal(makechar(int(this[1:], 8) & 0xff))
 789             elif c in DIGITS:
 790                 isoctal = False
 791                 if s.next in DIGITS:
 792                     this = this + sget()
 793                     if (c in OCTDIGITS and this[2] in OCTDIGITS and
 794                         s.next in OCTDIGITS):
 795                         this = this + sget()
 796                         isoctal = True
 797                         literal(makechar(int(this[1:], 8) & 0xff))
 798                 if not isoctal:
 799                     a((MARK, int(this[1:])))
 800             else:
 801                 try:
 802                     this = makechar(ESCAPES[this][1])
 803                 except KeyError:
 804                     pass
 805                 literal(this)
 806         else:
 807             literal(this)
 808     # convert template to groups and literals lists
 809     i = 0
 810     groups = []
 811     groupsappend = groups.append
 812     literals = [None] * len(p)
 813     for c, s in p:
 814         if c is MARK:
 815             groupsappend((i, s))
 816             # literal[i] is already None
 817         else:
 818             literals[i] = s
 819         i = i + 1
 820     return groups, literals
 821
 822 def expand_template(template, match):
 823     g = match.group
 824     sep = match.string[:0]
 825     groups, literals = template
 826     literals = literals[:]
 827     try:
 828         for index, group in groups:
 829             literals[index] = s = g(group)
 830             if s is None:
 831                 raise error, "unmatched group"
 832     except IndexError:
 833         raise error, "invalid group reference"
 834     return sep.join(literals)