]> git.proxmox.com Git - mirror_edk2.git/blobdiff - AppPkg/Applications/Python/Python-2.7.10/Lib/sre_compile.py
AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 4/5.
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / sre_compile.py
diff --git a/AppPkg/Applications/Python/Python-2.7.10/Lib/sre_compile.py b/AppPkg/Applications/Python/Python-2.7.10/Lib/sre_compile.py
new file mode 100644 (file)
index 0000000..6a74d04
--- /dev/null
@@ -0,0 +1,596 @@
+# -*- coding: utf-8 -*-\r
+#\r
+# Secret Labs' Regular Expression Engine\r
+#\r
+# convert template to internal format\r
+#\r
+# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.\r
+#\r
+# See the sre.py file for information on usage and redistribution.\r
+#\r
+\r
+"""Internal support module for sre"""\r
+\r
+import _sre, sys\r
+import sre_parse\r
+from sre_constants import *\r
+\r
+assert _sre.MAGIC == MAGIC, "SRE module mismatch"\r
+\r
+if _sre.CODESIZE == 2:\r
+    MAXCODE = 65535\r
+else:\r
+    MAXCODE = 0xFFFFFFFFL\r
+\r
+_LITERAL_CODES = set([LITERAL, NOT_LITERAL])\r
+_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])\r
+_SUCCESS_CODES = set([SUCCESS, FAILURE])\r
+_ASSERT_CODES = set([ASSERT, ASSERT_NOT])\r
+\r
+# Sets of lowercase characters which have the same uppercase.\r
+_equivalences = (\r
+    # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I\r
+    (0x69, 0x131), # iı\r
+    # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S\r
+    (0x73, 0x17f), # sſ\r
+    # MICRO SIGN, GREEK SMALL LETTER MU\r
+    (0xb5, 0x3bc), # µμ\r
+    # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI\r
+    (0x345, 0x3b9, 0x1fbe), # \u0345ιι\r
+    # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL\r
+    (0x3b2, 0x3d0), # βϐ\r
+    # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL\r
+    (0x3b5, 0x3f5), # εϵ\r
+    # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL\r
+    (0x3b8, 0x3d1), # θϑ\r
+    # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL\r
+    (0x3ba, 0x3f0), # κϰ\r
+    # GREEK SMALL LETTER PI, GREEK PI SYMBOL\r
+    (0x3c0, 0x3d6), # πϖ\r
+    # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL\r
+    (0x3c1, 0x3f1), # ρϱ\r
+    # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA\r
+    (0x3c2, 0x3c3), # ςσ\r
+    # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL\r
+    (0x3c6, 0x3d5), # φϕ\r
+    # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE\r
+    (0x1e61, 0x1e9b), # ṡẛ\r
+)\r
+\r
+# Maps the lowercase code to lowercase codes which have the same uppercase.\r
+_ignorecase_fixes = {i: tuple(j for j in t if i != j)\r
+                     for t in _equivalences for i in t}\r
+\r
+def _compile(code, pattern, flags):\r
+    # internal: compile a (sub)pattern\r
+    emit = code.append\r
+    _len = len\r
+    LITERAL_CODES = _LITERAL_CODES\r
+    REPEATING_CODES = _REPEATING_CODES\r
+    SUCCESS_CODES = _SUCCESS_CODES\r
+    ASSERT_CODES = _ASSERT_CODES\r
+    if (flags & SRE_FLAG_IGNORECASE and\r
+            not (flags & SRE_FLAG_LOCALE) and\r
+            flags & SRE_FLAG_UNICODE):\r
+        fixes = _ignorecase_fixes\r
+    else:\r
+        fixes = None\r
+    for op, av in pattern:\r
+        if op in LITERAL_CODES:\r
+            if flags & SRE_FLAG_IGNORECASE:\r
+                lo = _sre.getlower(av, flags)\r
+                if fixes and lo in fixes:\r
+                    emit(OPCODES[IN_IGNORE])\r
+                    skip = _len(code); emit(0)\r
+                    if op is NOT_LITERAL:\r
+                        emit(OPCODES[NEGATE])\r
+                    for k in (lo,) + fixes[lo]:\r
+                        emit(OPCODES[LITERAL])\r
+                        emit(k)\r
+                    emit(OPCODES[FAILURE])\r
+                    code[skip] = _len(code) - skip\r
+                else:\r
+                    emit(OPCODES[OP_IGNORE[op]])\r
+                    emit(lo)\r
+            else:\r
+                emit(OPCODES[op])\r
+                emit(av)\r
+        elif op is IN:\r
+            if flags & SRE_FLAG_IGNORECASE:\r
+                emit(OPCODES[OP_IGNORE[op]])\r
+                def fixup(literal, flags=flags):\r
+                    return _sre.getlower(literal, flags)\r
+            else:\r
+                emit(OPCODES[op])\r
+                fixup = None\r
+            skip = _len(code); emit(0)\r
+            _compile_charset(av, flags, code, fixup, fixes)\r
+            code[skip] = _len(code) - skip\r
+        elif op is ANY:\r
+            if flags & SRE_FLAG_DOTALL:\r
+                emit(OPCODES[ANY_ALL])\r
+            else:\r
+                emit(OPCODES[ANY])\r
+        elif op in REPEATING_CODES:\r
+            if flags & SRE_FLAG_TEMPLATE:\r
+                raise error, "internal: unsupported template operator"\r
+                emit(OPCODES[REPEAT])\r
+                skip = _len(code); emit(0)\r
+                emit(av[0])\r
+                emit(av[1])\r
+                _compile(code, av[2], flags)\r
+                emit(OPCODES[SUCCESS])\r
+                code[skip] = _len(code) - skip\r
+            elif _simple(av) and op is not REPEAT:\r
+                if op is MAX_REPEAT:\r
+                    emit(OPCODES[REPEAT_ONE])\r
+                else:\r
+                    emit(OPCODES[MIN_REPEAT_ONE])\r
+                skip = _len(code); emit(0)\r
+                emit(av[0])\r
+                emit(av[1])\r
+                _compile(code, av[2], flags)\r
+                emit(OPCODES[SUCCESS])\r
+                code[skip] = _len(code) - skip\r
+            else:\r
+                emit(OPCODES[REPEAT])\r
+                skip = _len(code); emit(0)\r
+                emit(av[0])\r
+                emit(av[1])\r
+                _compile(code, av[2], flags)\r
+                code[skip] = _len(code) - skip\r
+                if op is MAX_REPEAT:\r
+                    emit(OPCODES[MAX_UNTIL])\r
+                else:\r
+                    emit(OPCODES[MIN_UNTIL])\r
+        elif op is SUBPATTERN:\r
+            if av[0]:\r
+                emit(OPCODES[MARK])\r
+                emit((av[0]-1)*2)\r
+            # _compile_info(code, av[1], flags)\r
+            _compile(code, av[1], flags)\r
+            if av[0]:\r
+                emit(OPCODES[MARK])\r
+                emit((av[0]-1)*2+1)\r
+        elif op in SUCCESS_CODES:\r
+            emit(OPCODES[op])\r
+        elif op in ASSERT_CODES:\r
+            emit(OPCODES[op])\r
+            skip = _len(code); emit(0)\r
+            if av[0] >= 0:\r
+                emit(0) # look ahead\r
+            else:\r
+                lo, hi = av[1].getwidth()\r
+                if lo != hi:\r
+                    raise error, "look-behind requires fixed-width pattern"\r
+                emit(lo) # look behind\r
+            _compile(code, av[1], flags)\r
+            emit(OPCODES[SUCCESS])\r
+            code[skip] = _len(code) - skip\r
+        elif op is CALL:\r
+            emit(OPCODES[op])\r
+            skip = _len(code); emit(0)\r
+            _compile(code, av, flags)\r
+            emit(OPCODES[SUCCESS])\r
+            code[skip] = _len(code) - skip\r
+        elif op is AT:\r
+            emit(OPCODES[op])\r
+            if flags & SRE_FLAG_MULTILINE:\r
+                av = AT_MULTILINE.get(av, av)\r
+            if flags & SRE_FLAG_LOCALE:\r
+                av = AT_LOCALE.get(av, av)\r
+            elif flags & SRE_FLAG_UNICODE:\r
+                av = AT_UNICODE.get(av, av)\r
+            emit(ATCODES[av])\r
+        elif op is BRANCH:\r
+            emit(OPCODES[op])\r
+            tail = []\r
+            tailappend = tail.append\r
+            for av in av[1]:\r
+                skip = _len(code); emit(0)\r
+                # _compile_info(code, av, flags)\r
+                _compile(code, av, flags)\r
+                emit(OPCODES[JUMP])\r
+                tailappend(_len(code)); emit(0)\r
+                code[skip] = _len(code) - skip\r
+            emit(0) # end of branch\r
+            for tail in tail:\r
+                code[tail] = _len(code) - tail\r
+        elif op is CATEGORY:\r
+            emit(OPCODES[op])\r
+            if flags & SRE_FLAG_LOCALE:\r
+                av = CH_LOCALE[av]\r
+            elif flags & SRE_FLAG_UNICODE:\r
+                av = CH_UNICODE[av]\r
+            emit(CHCODES[av])\r
+        elif op is GROUPREF:\r
+            if flags & SRE_FLAG_IGNORECASE:\r
+                emit(OPCODES[OP_IGNORE[op]])\r
+            else:\r
+                emit(OPCODES[op])\r
+            emit(av-1)\r
+        elif op is GROUPREF_EXISTS:\r
+            emit(OPCODES[op])\r
+            emit(av[0]-1)\r
+            skipyes = _len(code); emit(0)\r
+            _compile(code, av[1], flags)\r
+            if av[2]:\r
+                emit(OPCODES[JUMP])\r
+                skipno = _len(code); emit(0)\r
+                code[skipyes] = _len(code) - skipyes + 1\r
+                _compile(code, av[2], flags)\r
+                code[skipno] = _len(code) - skipno\r
+            else:\r
+                code[skipyes] = _len(code) - skipyes + 1\r
+        else:\r
+            raise ValueError, ("unsupported operand type", op)\r
+\r
+def _compile_charset(charset, flags, code, fixup=None, fixes=None):\r
+    # compile charset subprogram\r
+    emit = code.append\r
+    for op, av in _optimize_charset(charset, fixup, fixes,\r
+                                    flags & SRE_FLAG_UNICODE):\r
+        emit(OPCODES[op])\r
+        if op is NEGATE:\r
+            pass\r
+        elif op is LITERAL:\r
+            emit(av)\r
+        elif op is RANGE:\r
+            emit(av[0])\r
+            emit(av[1])\r
+        elif op is CHARSET:\r
+            code.extend(av)\r
+        elif op is BIGCHARSET:\r
+            code.extend(av)\r
+        elif op is CATEGORY:\r
+            if flags & SRE_FLAG_LOCALE:\r
+                emit(CHCODES[CH_LOCALE[av]])\r
+            elif flags & SRE_FLAG_UNICODE:\r
+                emit(CHCODES[CH_UNICODE[av]])\r
+            else:\r
+                emit(CHCODES[av])\r
+        else:\r
+            raise error, "internal: unsupported set operator"\r
+    emit(OPCODES[FAILURE])\r
+\r
+def _optimize_charset(charset, fixup, fixes, isunicode):\r
+    # internal: optimize character set\r
+    out = []\r
+    tail = []\r
+    charmap = bytearray(256)\r
+    for op, av in charset:\r
+        while True:\r
+            try:\r
+                if op is LITERAL:\r
+                    if fixup:\r
+                        i = fixup(av)\r
+                        charmap[i] = 1\r
+                        if fixes and i in fixes:\r
+                            for k in fixes[i]:\r
+                                charmap[k] = 1\r
+                    else:\r
+                        charmap[av] = 1\r
+                elif op is RANGE:\r
+                    r = range(av[0], av[1]+1)\r
+                    if fixup:\r
+                        r = map(fixup, r)\r
+                    if fixup and fixes:\r
+                        for i in r:\r
+                            charmap[i] = 1\r
+                            if i in fixes:\r
+                                for k in fixes[i]:\r
+                                    charmap[k] = 1\r
+                    else:\r
+                        for i in r:\r
+                            charmap[i] = 1\r
+                elif op is NEGATE:\r
+                    out.append((op, av))\r
+                else:\r
+                    tail.append((op, av))\r
+            except IndexError:\r
+                if len(charmap) == 256:\r
+                    # character set contains non-UCS1 character codes\r
+                    charmap += b'\0' * 0xff00\r
+                    continue\r
+                # character set contains non-BMP character codes\r
+                if fixup and isunicode and op is RANGE:\r
+                    lo, hi = av\r
+                    ranges = [av]\r
+                    # There are only two ranges of cased astral characters:\r
+                    # 10400-1044F (Deseret) and 118A0-118DF (Warang Citi).\r
+                    _fixup_range(max(0x10000, lo), min(0x11fff, hi),\r
+                                 ranges, fixup)\r
+                    for lo, hi in ranges:\r
+                        if lo == hi:\r
+                            tail.append((LITERAL, hi))\r
+                        else:\r
+                            tail.append((RANGE, (lo, hi)))\r
+                else:\r
+                    tail.append((op, av))\r
+            break\r
+\r
+    # compress character map\r
+    runs = []\r
+    q = 0\r
+    while True:\r
+        p = charmap.find(b'\1', q)\r
+        if p < 0:\r
+            break\r
+        if len(runs) >= 2:\r
+            runs = None\r
+            break\r
+        q = charmap.find(b'\0', p)\r
+        if q < 0:\r
+            runs.append((p, len(charmap)))\r
+            break\r
+        runs.append((p, q))\r
+    if runs is not None:\r
+        # use literal/range\r
+        for p, q in runs:\r
+            if q - p == 1:\r
+                out.append((LITERAL, p))\r
+            else:\r
+                out.append((RANGE, (p, q - 1)))\r
+        out += tail\r
+        # if the case was changed or new representation is more compact\r
+        if fixup or len(out) < len(charset):\r
+            return out\r
+        # else original character set is good enough\r
+        return charset\r
+\r
+    # use bitmap\r
+    if len(charmap) == 256:\r
+        data = _mk_bitmap(charmap)\r
+        out.append((CHARSET, data))\r
+        out += tail\r
+        return out\r
+\r
+    # To represent a big charset, first a bitmap of all characters in the\r
+    # set is constructed. Then, this bitmap is sliced into chunks of 256\r
+    # characters, duplicate chunks are eliminated, and each chunk is\r
+    # given a number. In the compiled expression, the charset is\r
+    # represented by a 32-bit word sequence, consisting of one word for\r
+    # the number of different chunks, a sequence of 256 bytes (64 words)\r
+    # of chunk numbers indexed by their original chunk position, and a\r
+    # sequence of 256-bit chunks (8 words each).\r
+\r
+    # Compression is normally good: in a typical charset, large ranges of\r
+    # Unicode will be either completely excluded (e.g. if only cyrillic\r
+    # letters are to be matched), or completely included (e.g. if large\r
+    # subranges of Kanji match). These ranges will be represented by\r
+    # chunks of all one-bits or all zero-bits.\r
+\r
+    # Matching can be also done efficiently: the more significant byte of\r
+    # the Unicode character is an index into the chunk number, and the\r
+    # less significant byte is a bit index in the chunk (just like the\r
+    # CHARSET matching).\r
+\r
+    # In UCS-4 mode, the BIGCHARSET opcode still supports only subsets\r
+    # of the basic multilingual plane; an efficient representation\r
+    # for all of Unicode has not yet been developed.\r
+\r
+    charmap = bytes(charmap) # should be hashable\r
+    comps = {}\r
+    mapping = bytearray(256)\r
+    block = 0\r
+    data = bytearray()\r
+    for i in range(0, 65536, 256):\r
+        chunk = charmap[i: i + 256]\r
+        if chunk in comps:\r
+            mapping[i // 256] = comps[chunk]\r
+        else:\r
+            mapping[i // 256] = comps[chunk] = block\r
+            block += 1\r
+            data += chunk\r
+    data = _mk_bitmap(data)\r
+    data[0:0] = [block] + _bytes_to_codes(mapping)\r
+    out.append((BIGCHARSET, data))\r
+    out += tail\r
+    return out\r
+\r
+def _fixup_range(lo, hi, ranges, fixup):\r
+    for i in map(fixup, range(lo, hi+1)):\r
+        for k, (lo, hi) in enumerate(ranges):\r
+            if i < lo:\r
+                if l == lo - 1:\r
+                    ranges[k] = (i, hi)\r
+                else:\r
+                    ranges.insert(k, (i, i))\r
+                break\r
+            elif i > hi:\r
+                if i == hi + 1:\r
+                    ranges[k] = (lo, i)\r
+                    break\r
+            else:\r
+                break\r
+        else:\r
+            ranges.append((i, i))\r
+\r
+_CODEBITS = _sre.CODESIZE * 8\r
+_BITS_TRANS = b'0' + b'1' * 255\r
+def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):\r
+    s = bytes(bits).translate(_BITS_TRANS)[::-1]\r
+    return [_int(s[i - _CODEBITS: i], 2)\r
+            for i in range(len(s), 0, -_CODEBITS)]\r
+\r
+def _bytes_to_codes(b):\r
+    # Convert block indices to word array\r
+    import array\r
+    if _sre.CODESIZE == 2:\r
+        code = 'H'\r
+    else:\r
+        code = 'I'\r
+    a = array.array(code, bytes(b))\r
+    assert a.itemsize == _sre.CODESIZE\r
+    assert len(a) * a.itemsize == len(b)\r
+    return a.tolist()\r
+\r
+def _simple(av):\r
+    # check if av is a "simple" operator\r
+    lo, hi = av[2].getwidth()\r
+    return lo == hi == 1 and av[2][0][0] != SUBPATTERN\r
+\r
+def _compile_info(code, pattern, flags):\r
+    # internal: compile an info block.  in the current version,\r
+    # this contains min/max pattern width, and an optional literal\r
+    # prefix or a character map\r
+    lo, hi = pattern.getwidth()\r
+    if lo == 0:\r
+        return # not worth it\r
+    # look for a literal prefix\r
+    prefix = []\r
+    prefixappend = prefix.append\r
+    prefix_skip = 0\r
+    charset = [] # not used\r
+    charsetappend = charset.append\r
+    if not (flags & SRE_FLAG_IGNORECASE):\r
+        # look for literal prefix\r
+        for op, av in pattern.data:\r
+            if op is LITERAL:\r
+                if len(prefix) == prefix_skip:\r
+                    prefix_skip = prefix_skip + 1\r
+                prefixappend(av)\r
+            elif op is SUBPATTERN and len(av[1]) == 1:\r
+                op, av = av[1][0]\r
+                if op is LITERAL:\r
+                    prefixappend(av)\r
+                else:\r
+                    break\r
+            else:\r
+                break\r
+        # if no prefix, look for charset prefix\r
+        if not prefix and pattern.data:\r
+            op, av = pattern.data[0]\r
+            if op is SUBPATTERN and av[1]:\r
+                op, av = av[1][0]\r
+                if op is LITERAL:\r
+                    charsetappend((op, av))\r
+                elif op is BRANCH:\r
+                    c = []\r
+                    cappend = c.append\r
+                    for p in av[1]:\r
+                        if not p:\r
+                            break\r
+                        op, av = p[0]\r
+                        if op is LITERAL:\r
+                            cappend((op, av))\r
+                        else:\r
+                            break\r
+                    else:\r
+                        charset = c\r
+            elif op is BRANCH:\r
+                c = []\r
+                cappend = c.append\r
+                for p in av[1]:\r
+                    if not p:\r
+                        break\r
+                    op, av = p[0]\r
+                    if op is LITERAL:\r
+                        cappend((op, av))\r
+                    else:\r
+                        break\r
+                else:\r
+                    charset = c\r
+            elif op is IN:\r
+                charset = av\r
+##     if prefix:\r
+##         print "*** PREFIX", prefix, prefix_skip\r
+##     if charset:\r
+##         print "*** CHARSET", charset\r
+    # add an info block\r
+    emit = code.append\r
+    emit(OPCODES[INFO])\r
+    skip = len(code); emit(0)\r
+    # literal flag\r
+    mask = 0\r
+    if prefix:\r
+        mask = SRE_INFO_PREFIX\r
+        if len(prefix) == prefix_skip == len(pattern.data):\r
+            mask = mask + SRE_INFO_LITERAL\r
+    elif charset:\r
+        mask = mask + SRE_INFO_CHARSET\r
+    emit(mask)\r
+    # pattern length\r
+    if lo < MAXCODE:\r
+        emit(lo)\r
+    else:\r
+        emit(MAXCODE)\r
+        prefix = prefix[:MAXCODE]\r
+    if hi < MAXCODE:\r
+        emit(hi)\r
+    else:\r
+        emit(0)\r
+    # add literal prefix\r
+    if prefix:\r
+        emit(len(prefix)) # length\r
+        emit(prefix_skip) # skip\r
+        code.extend(prefix)\r
+        # generate overlap table\r
+        table = [-1] + ([0]*len(prefix))\r
+        for i in xrange(len(prefix)):\r
+            table[i+1] = table[i]+1\r
+            while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:\r
+                table[i+1] = table[table[i+1]-1]+1\r
+        code.extend(table[1:]) # don't store first entry\r
+    elif charset:\r
+        _compile_charset(charset, flags, code)\r
+    code[skip] = len(code) - skip\r
+\r
+try:\r
+    unicode\r
+except NameError:\r
+    STRING_TYPES = (type(""),)\r
+else:\r
+    STRING_TYPES = (type(""), type(unicode("")))\r
+\r
+def isstring(obj):\r
+    for tp in STRING_TYPES:\r
+        if isinstance(obj, tp):\r
+            return 1\r
+    return 0\r
+\r
+def _code(p, flags):\r
+\r
+    flags = p.pattern.flags | flags\r
+    code = []\r
+\r
+    # compile info block\r
+    _compile_info(code, p, flags)\r
+\r
+    # compile the pattern\r
+    _compile(code, p.data, flags)\r
+\r
+    code.append(OPCODES[SUCCESS])\r
+\r
+    return code\r
+\r
+def compile(p, flags=0):\r
+    # internal: convert pattern list to internal format\r
+\r
+    if isstring(p):\r
+        pattern = p\r
+        p = sre_parse.parse(p, flags)\r
+    else:\r
+        pattern = None\r
+\r
+    code = _code(p, flags)\r
+\r
+    # print code\r
+\r
+    # XXX: <fl> get rid of this limitation!\r
+    if p.pattern.groups > 100:\r
+        raise AssertionError(\r
+            "sorry, but this version only supports 100 named groups"\r
+            )\r
+\r
+    # map in either direction\r
+    groupindex = p.pattern.groupdict\r
+    indexgroup = [None] * p.pattern.groups\r
+    for k, i in groupindex.items():\r
+        indexgroup[i] = k\r
+\r
+    return _sre.compile(\r
+        pattern, flags | p.pattern.flags, code,\r
+        p.pattern.groups-1,\r
+        groupindex, indexgroup\r
+        )\r