+++ /dev/null
-"""Create portable serialized representations of Python objects.\r
-\r
-See module cPickle for a (much) faster implementation.\r
-See module copy_reg for a mechanism for registering custom picklers.\r
-See module pickletools source for extensive comments.\r
-\r
-Classes:\r
-\r
- Pickler\r
- Unpickler\r
-\r
-Functions:\r
-\r
- dump(object, file)\r
- dumps(object) -> string\r
- load(file) -> object\r
- loads(string) -> object\r
-\r
-Misc variables:\r
-\r
- __version__\r
- format_version\r
- compatible_formats\r
-\r
-"""\r
-\r
-__version__ = "$Revision$" # Code version\r
-\r
-from types import *\r
-from copy_reg import dispatch_table\r
-from copy_reg import _extension_registry, _inverted_registry, _extension_cache\r
-import marshal\r
-import sys\r
-import struct\r
-import re\r
-\r
-__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",\r
- "Unpickler", "dump", "dumps", "load", "loads"]\r
-\r
-# These are purely informational; no code uses these.\r
-format_version = "2.0" # File format version we write\r
-compatible_formats = ["1.0", # Original protocol 0\r
- "1.1", # Protocol 0 with INST added\r
- "1.2", # Original protocol 1\r
- "1.3", # Protocol 1 with BINFLOAT added\r
- "2.0", # Protocol 2\r
- ] # Old format versions we can read\r
-\r
-# Keep in synch with cPickle. This is the highest protocol number we\r
-# know how to read.\r
-HIGHEST_PROTOCOL = 2\r
-\r
-# Why use struct.pack() for pickling but marshal.loads() for\r
-# unpickling? struct.pack() is 40% faster than marshal.dumps(), but\r
-# marshal.loads() is twice as fast as struct.unpack()!\r
-mloads = marshal.loads\r
-\r
-class PickleError(Exception):\r
- """A common base class for the other pickling exceptions."""\r
- pass\r
-\r
-class PicklingError(PickleError):\r
- """This exception is raised when an unpicklable object is passed to the\r
- dump() method.\r
-\r
- """\r
- pass\r
-\r
-class UnpicklingError(PickleError):\r
- """This exception is raised when there is a problem unpickling an object,\r
- such as a security violation.\r
-\r
- Note that other exceptions may also be raised during unpickling, including\r
- (but not necessarily limited to) AttributeError, EOFError, ImportError,\r
- and IndexError.\r
-\r
- """\r
- pass\r
-\r
-# An instance of _Stop is raised by Unpickler.load_stop() in response to\r
-# the STOP opcode, passing the object that is the result of unpickling.\r
-class _Stop(Exception):\r
- def __init__(self, value):\r
- self.value = value\r
-\r
-# Jython has PyStringMap; it's a dict subclass with string keys\r
-try:\r
- from org.python.core import PyStringMap\r
-except ImportError:\r
- PyStringMap = None\r
-\r
-# UnicodeType may or may not be exported (normally imported from types)\r
-try:\r
- UnicodeType\r
-except NameError:\r
- UnicodeType = None\r
-\r
-# Pickle opcodes. See pickletools.py for extensive docs. The listing\r
-# here is in kind-of alphabetical order of 1-character pickle code.\r
-# pickletools groups them by purpose.\r
-\r
-MARK = '(' # push special markobject on stack\r
-STOP = '.' # every pickle ends with STOP\r
-POP = '0' # discard topmost stack item\r
-POP_MARK = '1' # discard stack top through topmost markobject\r
-DUP = '2' # duplicate top stack item\r
-FLOAT = 'F' # push float object; decimal string argument\r
-INT = 'I' # push integer or bool; decimal string argument\r
-BININT = 'J' # push four-byte signed int\r
-BININT1 = 'K' # push 1-byte unsigned int\r
-LONG = 'L' # push long; decimal string argument\r
-BININT2 = 'M' # push 2-byte unsigned int\r
-NONE = 'N' # push None\r
-PERSID = 'P' # push persistent object; id is taken from string arg\r
-BINPERSID = 'Q' # " " " ; " " " " stack\r
-REDUCE = 'R' # apply callable to argtuple, both on stack\r
-STRING = 'S' # push string; NL-terminated string argument\r
-BINSTRING = 'T' # push string; counted binary string argument\r
-SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes\r
-UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument\r
-BINUNICODE = 'X' # " " " ; counted UTF-8 string argument\r
-APPEND = 'a' # append stack top to list below it\r
-BUILD = 'b' # call __setstate__ or __dict__.update()\r
-GLOBAL = 'c' # push self.find_class(modname, name); 2 string args\r
-DICT = 'd' # build a dict from stack items\r
-EMPTY_DICT = '}' # push empty dict\r
-APPENDS = 'e' # extend list on stack by topmost stack slice\r
-GET = 'g' # push item from memo on stack; index is string arg\r
-BINGET = 'h' # " " " " " " ; " " 1-byte arg\r
-INST = 'i' # build & push class instance\r
-LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg\r
-LIST = 'l' # build list from topmost stack items\r
-EMPTY_LIST = ']' # push empty list\r
-OBJ = 'o' # build & push class instance\r
-PUT = 'p' # store stack top in memo; index is string arg\r
-BINPUT = 'q' # " " " " " ; " " 1-byte arg\r
-LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg\r
-SETITEM = 's' # add key+value pair to dict\r
-TUPLE = 't' # build tuple from topmost stack items\r
-EMPTY_TUPLE = ')' # push empty tuple\r
-SETITEMS = 'u' # modify dict by adding topmost key+value pairs\r
-BINFLOAT = 'G' # push float; arg is 8-byte float encoding\r
-\r
-TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py\r
-FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py\r
-\r
-# Protocol 2\r
-\r
-PROTO = '\x80' # identify pickle protocol\r
-NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple\r
-EXT1 = '\x82' # push object from extension registry; 1-byte index\r
-EXT2 = '\x83' # ditto, but 2-byte index\r
-EXT4 = '\x84' # ditto, but 4-byte index\r
-TUPLE1 = '\x85' # build 1-tuple from stack top\r
-TUPLE2 = '\x86' # build 2-tuple from two topmost stack items\r
-TUPLE3 = '\x87' # build 3-tuple from three topmost stack items\r
-NEWTRUE = '\x88' # push True\r
-NEWFALSE = '\x89' # push False\r
-LONG1 = '\x8a' # push long from < 256 bytes\r
-LONG4 = '\x8b' # push really big long\r
-\r
-_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]\r
-\r
-\r
-__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])\r
-del x\r
-\r
-\r
-# Pickling machinery\r
-\r
-class Pickler:\r
-\r
- def __init__(self, file, protocol=None):\r
- """This takes a file-like object for writing a pickle data stream.\r
-\r
- The optional protocol argument tells the pickler to use the\r
- given protocol; supported protocols are 0, 1, 2. The default\r
- protocol is 0, to be backwards compatible. (Protocol 0 is the\r
- only protocol that can be written to a file opened in text\r
- mode and read back successfully. When using a protocol higher\r
- than 0, make sure the file is opened in binary mode, both when\r
- pickling and unpickling.)\r
-\r
- Protocol 1 is more efficient than protocol 0; protocol 2 is\r
- more efficient than protocol 1.\r
-\r
- Specifying a negative protocol version selects the highest\r
- protocol version supported. The higher the protocol used, the\r
- more recent the version of Python needed to read the pickle\r
- produced.\r
-\r
- The file parameter must have a write() method that accepts a single\r
- string argument. It can thus be an open file object, a StringIO\r
- object, or any other custom object that meets this interface.\r
-\r
- """\r
- if protocol is None:\r
- protocol = 0\r
- if protocol < 0:\r
- protocol = HIGHEST_PROTOCOL\r
- elif not 0 <= protocol <= HIGHEST_PROTOCOL:\r
- raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)\r
- self.write = file.write\r
- self.memo = {}\r
- self.proto = int(protocol)\r
- self.bin = protocol >= 1\r
- self.fast = 0\r
-\r
- def clear_memo(self):\r
- """Clears the pickler's "memo".\r
-\r
- The memo is the data structure that remembers which objects the\r
- pickler has already seen, so that shared or recursive objects are\r
- pickled by reference and not by value. This method is useful when\r
- re-using picklers.\r
-\r
- """\r
- self.memo.clear()\r
-\r
- def dump(self, obj):\r
- """Write a pickled representation of obj to the open file."""\r
- if self.proto >= 2:\r
- self.write(PROTO + chr(self.proto))\r
- self.save(obj)\r
- self.write(STOP)\r
-\r
- def memoize(self, obj):\r
- """Store an object in the memo."""\r
-\r
- # The Pickler memo is a dictionary mapping object ids to 2-tuples\r
- # that contain the Unpickler memo key and the object being memoized.\r
- # The memo key is written to the pickle and will become\r
- # the key in the Unpickler's memo. The object is stored in the\r
- # Pickler memo so that transient objects are kept alive during\r
- # pickling.\r
-\r
- # The use of the Unpickler memo length as the memo key is just a\r
- # convention. The only requirement is that the memo values be unique.\r
- # But there appears no advantage to any other scheme, and this\r
- # scheme allows the Unpickler memo to be implemented as a plain (but\r
- # growable) array, indexed by memo key.\r
- if self.fast:\r
- return\r
- assert id(obj) not in self.memo\r
- memo_len = len(self.memo)\r
- self.write(self.put(memo_len))\r
- self.memo[id(obj)] = memo_len, obj\r
-\r
- # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.\r
- def put(self, i, pack=struct.pack):\r
- if self.bin:\r
- if i < 256:\r
- return BINPUT + chr(i)\r
- else:\r
- return LONG_BINPUT + pack("<i", i)\r
-\r
- return PUT + repr(i) + '\n'\r
-\r
- # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.\r
- def get(self, i, pack=struct.pack):\r
- if self.bin:\r
- if i < 256:\r
- return BINGET + chr(i)\r
- else:\r
- return LONG_BINGET + pack("<i", i)\r
-\r
- return GET + repr(i) + '\n'\r
-\r
- def save(self, obj):\r
- # Check for persistent id (defined by a subclass)\r
- pid = self.persistent_id(obj)\r
- if pid:\r
- self.save_pers(pid)\r
- return\r
-\r
- # Check the memo\r
- x = self.memo.get(id(obj))\r
- if x:\r
- self.write(self.get(x[0]))\r
- return\r
-\r
- # Check the type dispatch table\r
- t = type(obj)\r
- f = self.dispatch.get(t)\r
- if f:\r
- f(self, obj) # Call unbound method with explicit self\r
- return\r
-\r
- # Check for a class with a custom metaclass; treat as regular class\r
- try:\r
- issc = issubclass(t, TypeType)\r
- except TypeError: # t is not a class (old Boost; see SF #502085)\r
- issc = 0\r
- if issc:\r
- self.save_global(obj)\r
- return\r
-\r
- # Check copy_reg.dispatch_table\r
- reduce = dispatch_table.get(t)\r
- if reduce:\r
- rv = reduce(obj)\r
- else:\r
- # Check for a __reduce_ex__ method, fall back to __reduce__\r
- reduce = getattr(obj, "__reduce_ex__", None)\r
- if reduce:\r
- rv = reduce(self.proto)\r
- else:\r
- reduce = getattr(obj, "__reduce__", None)\r
- if reduce:\r
- rv = reduce()\r
- else:\r
- raise PicklingError("Can't pickle %r object: %r" %\r
- (t.__name__, obj))\r
-\r
- # Check for string returned by reduce(), meaning "save as global"\r
- if type(rv) is StringType:\r
- self.save_global(obj, rv)\r
- return\r
-\r
- # Assert that reduce() returned a tuple\r
- if type(rv) is not TupleType:\r
- raise PicklingError("%s must return string or tuple" % reduce)\r
-\r
- # Assert that it returned an appropriately sized tuple\r
- l = len(rv)\r
- if not (2 <= l <= 5):\r
- raise PicklingError("Tuple returned by %s must have "\r
- "two to five elements" % reduce)\r
-\r
- # Save the reduce() output and finally memoize the object\r
- self.save_reduce(obj=obj, *rv)\r
-\r
- def persistent_id(self, obj):\r
- # This exists so a subclass can override it\r
- return None\r
-\r
- def save_pers(self, pid):\r
- # Save a persistent id reference\r
- if self.bin:\r
- self.save(pid)\r
- self.write(BINPERSID)\r
- else:\r
- self.write(PERSID + str(pid) + '\n')\r
-\r
- def save_reduce(self, func, args, state=None,\r
- listitems=None, dictitems=None, obj=None):\r
- # This API is called by some subclasses\r
-\r
- # Assert that args is a tuple or None\r
- if not isinstance(args, TupleType):\r
- raise PicklingError("args from reduce() should be a tuple")\r
-\r
- # Assert that func is callable\r
- if not hasattr(func, '__call__'):\r
- raise PicklingError("func from reduce should be callable")\r
-\r
- save = self.save\r
- write = self.write\r
-\r
- # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ\r
- if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":\r
- # A __reduce__ implementation can direct protocol 2 to\r
- # use the more efficient NEWOBJ opcode, while still\r
- # allowing protocol 0 and 1 to work normally. For this to\r
- # work, the function returned by __reduce__ should be\r
- # called __newobj__, and its first argument should be a\r
- # new-style class. The implementation for __newobj__\r
- # should be as follows, although pickle has no way to\r
- # verify this:\r
- #\r
- # def __newobj__(cls, *args):\r
- # return cls.__new__(cls, *args)\r
- #\r
- # Protocols 0 and 1 will pickle a reference to __newobj__,\r
- # while protocol 2 (and above) will pickle a reference to\r
- # cls, the remaining args tuple, and the NEWOBJ code,\r
- # which calls cls.__new__(cls, *args) at unpickling time\r
- # (see load_newobj below). If __reduce__ returns a\r
- # three-tuple, the state from the third tuple item will be\r
- # pickled regardless of the protocol, calling __setstate__\r
- # at unpickling time (see load_build below).\r
- #\r
- # Note that no standard __newobj__ implementation exists;\r
- # you have to provide your own. This is to enforce\r
- # compatibility with Python 2.2 (pickles written using\r
- # protocol 0 or 1 in Python 2.3 should be unpicklable by\r
- # Python 2.2).\r
- cls = args[0]\r
- if not hasattr(cls, "__new__"):\r
- raise PicklingError(\r
- "args[0] from __newobj__ args has no __new__")\r
- if obj is not None and cls is not obj.__class__:\r
- raise PicklingError(\r
- "args[0] from __newobj__ args has the wrong class")\r
- args = args[1:]\r
- save(cls)\r
- save(args)\r
- write(NEWOBJ)\r
- else:\r
- save(func)\r
- save(args)\r
- write(REDUCE)\r
-\r
- if obj is not None:\r
- self.memoize(obj)\r
-\r
- # More new special cases (that work with older protocols as\r
- # well): when __reduce__ returns a tuple with 4 or 5 items,\r
- # the 4th and 5th item should be iterators that provide list\r
- # items and dict items (as (key, value) tuples), or None.\r
-\r
- if listitems is not None:\r
- self._batch_appends(listitems)\r
-\r
- if dictitems is not None:\r
- self._batch_setitems(dictitems)\r
-\r
- if state is not None:\r
- save(state)\r
- write(BUILD)\r
-\r
- # Methods below this point are dispatched through the dispatch table\r
-\r
- dispatch = {}\r
-\r
- def save_none(self, obj):\r
- self.write(NONE)\r
- dispatch[NoneType] = save_none\r
-\r
- def save_bool(self, obj):\r
- if self.proto >= 2:\r
- self.write(obj and NEWTRUE or NEWFALSE)\r
- else:\r
- self.write(obj and TRUE or FALSE)\r
- dispatch[bool] = save_bool\r
-\r
- def save_int(self, obj, pack=struct.pack):\r
- if self.bin:\r
- # If the int is small enough to fit in a signed 4-byte 2's-comp\r
- # format, we can store it more efficiently than the general\r
- # case.\r
- # First one- and two-byte unsigned ints:\r
- if obj >= 0:\r
- if obj <= 0xff:\r
- self.write(BININT1 + chr(obj))\r
- return\r
- if obj <= 0xffff:\r
- self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))\r
- return\r
- # Next check for 4-byte signed ints:\r
- high_bits = obj >> 31 # note that Python shift sign-extends\r
- if high_bits == 0 or high_bits == -1:\r
- # All high bits are copies of bit 2**31, so the value\r
- # fits in a 4-byte signed int.\r
- self.write(BININT + pack("<i", obj))\r
- return\r
- # Text pickle, or int too big to fit in signed 4-byte format.\r
- self.write(INT + repr(obj) + '\n')\r
- dispatch[IntType] = save_int\r
-\r
- def save_long(self, obj, pack=struct.pack):\r
- if self.proto >= 2:\r
- bytes = encode_long(obj)\r
- n = len(bytes)\r
- if n < 256:\r
- self.write(LONG1 + chr(n) + bytes)\r
- else:\r
- self.write(LONG4 + pack("<i", n) + bytes)\r
- return\r
- self.write(LONG + repr(obj) + '\n')\r
- dispatch[LongType] = save_long\r
-\r
- def save_float(self, obj, pack=struct.pack):\r
- if self.bin:\r
- self.write(BINFLOAT + pack('>d', obj))\r
- else:\r
- self.write(FLOAT + repr(obj) + '\n')\r
- dispatch[FloatType] = save_float\r
-\r
- def save_string(self, obj, pack=struct.pack):\r
- if self.bin:\r
- n = len(obj)\r
- if n < 256:\r
- self.write(SHORT_BINSTRING + chr(n) + obj)\r
- else:\r
- self.write(BINSTRING + pack("<i", n) + obj)\r
- else:\r
- self.write(STRING + repr(obj) + '\n')\r
- self.memoize(obj)\r
- dispatch[StringType] = save_string\r
-\r
- def save_unicode(self, obj, pack=struct.pack):\r
- if self.bin:\r
- encoding = obj.encode('utf-8')\r
- n = len(encoding)\r
- self.write(BINUNICODE + pack("<i", n) + encoding)\r
- else:\r
- obj = obj.replace("\\", "\\u005c")\r
- obj = obj.replace("\n", "\\u000a")\r
- self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')\r
- self.memoize(obj)\r
- dispatch[UnicodeType] = save_unicode\r
-\r
- if StringType is UnicodeType:\r
- # This is true for Jython\r
- def save_string(self, obj, pack=struct.pack):\r
- unicode = obj.isunicode()\r
-\r
- if self.bin:\r
- if unicode:\r
- obj = obj.encode("utf-8")\r
- l = len(obj)\r
- if l < 256 and not unicode:\r
- self.write(SHORT_BINSTRING + chr(l) + obj)\r
- else:\r
- s = pack("<i", l)\r
- if unicode:\r
- self.write(BINUNICODE + s + obj)\r
- else:\r
- self.write(BINSTRING + s + obj)\r
- else:\r
- if unicode:\r
- obj = obj.replace("\\", "\\u005c")\r
- obj = obj.replace("\n", "\\u000a")\r
- obj = obj.encode('raw-unicode-escape')\r
- self.write(UNICODE + obj + '\n')\r
- else:\r
- self.write(STRING + repr(obj) + '\n')\r
- self.memoize(obj)\r
- dispatch[StringType] = save_string\r
-\r
- def save_tuple(self, obj):\r
- write = self.write\r
- proto = self.proto\r
-\r
- n = len(obj)\r
- if n == 0:\r
- if proto:\r
- write(EMPTY_TUPLE)\r
- else:\r
- write(MARK + TUPLE)\r
- return\r
-\r
- save = self.save\r
- memo = self.memo\r
- if n <= 3 and proto >= 2:\r
- for element in obj:\r
- save(element)\r
- # Subtle. Same as in the big comment below.\r
- if id(obj) in memo:\r
- get = self.get(memo[id(obj)][0])\r
- write(POP * n + get)\r
- else:\r
- write(_tuplesize2code[n])\r
- self.memoize(obj)\r
- return\r
-\r
- # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple\r
- # has more than 3 elements.\r
- write(MARK)\r
- for element in obj:\r
- save(element)\r
-\r
- if id(obj) in memo:\r
- # Subtle. d was not in memo when we entered save_tuple(), so\r
- # the process of saving the tuple's elements must have saved\r
- # the tuple itself: the tuple is recursive. The proper action\r
- # now is to throw away everything we put on the stack, and\r
- # simply GET the tuple (it's already constructed). This check\r
- # could have been done in the "for element" loop instead, but\r
- # recursive tuples are a rare thing.\r
- get = self.get(memo[id(obj)][0])\r
- if proto:\r
- write(POP_MARK + get)\r
- else: # proto 0 -- POP_MARK not available\r
- write(POP * (n+1) + get)\r
- return\r
-\r
- # No recursion.\r
- self.write(TUPLE)\r
- self.memoize(obj)\r
-\r
- dispatch[TupleType] = save_tuple\r
-\r
- # save_empty_tuple() isn't used by anything in Python 2.3. However, I\r
- # found a Pickler subclass in Zope3 that calls it, so it's not harmless\r
- # to remove it.\r
- def save_empty_tuple(self, obj):\r
- self.write(EMPTY_TUPLE)\r
-\r
- def save_list(self, obj):\r
- write = self.write\r
-\r
- if self.bin:\r
- write(EMPTY_LIST)\r
- else: # proto 0 -- can't use EMPTY_LIST\r
- write(MARK + LIST)\r
-\r
- self.memoize(obj)\r
- self._batch_appends(iter(obj))\r
-\r
- dispatch[ListType] = save_list\r
-\r
- # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets\r
- # out of synch, though.\r
- _BATCHSIZE = 1000\r
-\r
- def _batch_appends(self, items):\r
- # Helper to batch up APPENDS sequences\r
- save = self.save\r
- write = self.write\r
-\r
- if not self.bin:\r
- for x in items:\r
- save(x)\r
- write(APPEND)\r
- return\r
-\r
- r = xrange(self._BATCHSIZE)\r
- while items is not None:\r
- tmp = []\r
- for i in r:\r
- try:\r
- x = items.next()\r
- tmp.append(x)\r
- except StopIteration:\r
- items = None\r
- break\r
- n = len(tmp)\r
- if n > 1:\r
- write(MARK)\r
- for x in tmp:\r
- save(x)\r
- write(APPENDS)\r
- elif n:\r
- save(tmp[0])\r
- write(APPEND)\r
- # else tmp is empty, and we're done\r
-\r
- def save_dict(self, obj):\r
- write = self.write\r
-\r
- if self.bin:\r
- write(EMPTY_DICT)\r
- else: # proto 0 -- can't use EMPTY_DICT\r
- write(MARK + DICT)\r
-\r
- self.memoize(obj)\r
- self._batch_setitems(obj.iteritems())\r
-\r
- dispatch[DictionaryType] = save_dict\r
- if not PyStringMap is None:\r
- dispatch[PyStringMap] = save_dict\r
-\r
- def _batch_setitems(self, items):\r
- # Helper to batch up SETITEMS sequences; proto >= 1 only\r
- save = self.save\r
- write = self.write\r
-\r
- if not self.bin:\r
- for k, v in items:\r
- save(k)\r
- save(v)\r
- write(SETITEM)\r
- return\r
-\r
- r = xrange(self._BATCHSIZE)\r
- while items is not None:\r
- tmp = []\r
- for i in r:\r
- try:\r
- tmp.append(items.next())\r
- except StopIteration:\r
- items = None\r
- break\r
- n = len(tmp)\r
- if n > 1:\r
- write(MARK)\r
- for k, v in tmp:\r
- save(k)\r
- save(v)\r
- write(SETITEMS)\r
- elif n:\r
- k, v = tmp[0]\r
- save(k)\r
- save(v)\r
- write(SETITEM)\r
- # else tmp is empty, and we're done\r
-\r
- def save_inst(self, obj):\r
- cls = obj.__class__\r
-\r
- memo = self.memo\r
- write = self.write\r
- save = self.save\r
-\r
- if hasattr(obj, '__getinitargs__'):\r
- args = obj.__getinitargs__()\r
- len(args) # XXX Assert it's a sequence\r
- _keep_alive(args, memo)\r
- else:\r
- args = ()\r
-\r
- write(MARK)\r
-\r
- if self.bin:\r
- save(cls)\r
- for arg in args:\r
- save(arg)\r
- write(OBJ)\r
- else:\r
- for arg in args:\r
- save(arg)\r
- write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')\r
-\r
- self.memoize(obj)\r
-\r
- try:\r
- getstate = obj.__getstate__\r
- except AttributeError:\r
- stuff = obj.__dict__\r
- else:\r
- stuff = getstate()\r
- _keep_alive(stuff, memo)\r
- save(stuff)\r
- write(BUILD)\r
-\r
- dispatch[InstanceType] = save_inst\r
-\r
- def save_global(self, obj, name=None, pack=struct.pack):\r
- write = self.write\r
- memo = self.memo\r
-\r
- if name is None:\r
- name = obj.__name__\r
-\r
- module = getattr(obj, "__module__", None)\r
- if module is None:\r
- module = whichmodule(obj, name)\r
-\r
- try:\r
- __import__(module)\r
- mod = sys.modules[module]\r
- klass = getattr(mod, name)\r
- except (ImportError, KeyError, AttributeError):\r
- raise PicklingError(\r
- "Can't pickle %r: it's not found as %s.%s" %\r
- (obj, module, name))\r
- else:\r
- if klass is not obj:\r
- raise PicklingError(\r
- "Can't pickle %r: it's not the same object as %s.%s" %\r
- (obj, module, name))\r
-\r
- if self.proto >= 2:\r
- code = _extension_registry.get((module, name))\r
- if code:\r
- assert code > 0\r
- if code <= 0xff:\r
- write(EXT1 + chr(code))\r
- elif code <= 0xffff:\r
- write("%c%c%c" % (EXT2, code&0xff, code>>8))\r
- else:\r
- write(EXT4 + pack("<i", code))\r
- return\r
-\r
- write(GLOBAL + module + '\n' + name + '\n')\r
- self.memoize(obj)\r
-\r
- dispatch[ClassType] = save_global\r
- dispatch[FunctionType] = save_global\r
- dispatch[BuiltinFunctionType] = save_global\r
- dispatch[TypeType] = save_global\r
-\r
-# Pickling helpers\r
-\r
-def _keep_alive(x, memo):\r
- """Keeps a reference to the object x in the memo.\r
-\r
- Because we remember objects by their id, we have\r
- to assure that possibly temporary objects are kept\r
- alive by referencing them.\r
- We store a reference at the id of the memo, which should\r
- normally not be used unless someone tries to deepcopy\r
- the memo itself...\r
- """\r
- try:\r
- memo[id(memo)].append(x)\r
- except KeyError:\r
- # aha, this is the first one :-)\r
- memo[id(memo)]=[x]\r
-\r
-\r
-# A cache for whichmodule(), mapping a function object to the name of\r
-# the module in which the function was found.\r
-\r
-classmap = {} # called classmap for backwards compatibility\r
-\r
-def whichmodule(func, funcname):\r
- """Figure out the module in which a function occurs.\r
-\r
- Search sys.modules for the module.\r
- Cache in classmap.\r
- Return a module name.\r
- If the function cannot be found, return "__main__".\r
- """\r
- # Python functions should always get an __module__ from their globals.\r
- mod = getattr(func, "__module__", None)\r
- if mod is not None:\r
- return mod\r
- if func in classmap:\r
- return classmap[func]\r
-\r
- for name, module in sys.modules.items():\r
- if module is None:\r
- continue # skip dummy package entries\r
- if name != '__main__' and getattr(module, funcname, None) is func:\r
- break\r
- else:\r
- name = '__main__'\r
- classmap[func] = name\r
- return name\r
-\r
-\r
-# Unpickling machinery\r
-\r
-class Unpickler:\r
-\r
- def __init__(self, file):\r
- """This takes a file-like object for reading a pickle data stream.\r
-\r
- The protocol version of the pickle is detected automatically, so no\r
- proto argument is needed.\r
-\r
- The file-like object must have two methods, a read() method that\r
- takes an integer argument, and a readline() method that requires no\r
- arguments. Both methods should return a string. Thus file-like\r
- object can be a file object opened for reading, a StringIO object,\r
- or any other custom object that meets this interface.\r
- """\r
- self.readline = file.readline\r
- self.read = file.read\r
- self.memo = {}\r
-\r
- def load(self):\r
- """Read a pickled object representation from the open file.\r
-\r
- Return the reconstituted object hierarchy specified in the file.\r
- """\r
- self.mark = object() # any new unique object\r
- self.stack = []\r
- self.append = self.stack.append\r
- read = self.read\r
- dispatch = self.dispatch\r
- try:\r
- while 1:\r
- key = read(1)\r
- dispatch[key](self)\r
- except _Stop, stopinst:\r
- return stopinst.value\r
-\r
- # Return largest index k such that self.stack[k] is self.mark.\r
- # If the stack doesn't contain a mark, eventually raises IndexError.\r
- # This could be sped by maintaining another stack, of indices at which\r
- # the mark appears. For that matter, the latter stack would suffice,\r
- # and we wouldn't need to push mark objects on self.stack at all.\r
- # Doing so is probably a good thing, though, since if the pickle is\r
- # corrupt (or hostile) we may get a clue from finding self.mark embedded\r
- # in unpickled objects.\r
- def marker(self):\r
- stack = self.stack\r
- mark = self.mark\r
- k = len(stack)-1\r
- while stack[k] is not mark: k = k-1\r
- return k\r
-\r
- dispatch = {}\r
-\r
- def load_eof(self):\r
- raise EOFError\r
- dispatch[''] = load_eof\r
-\r
- def load_proto(self):\r
- proto = ord(self.read(1))\r
- if not 0 <= proto <= 2:\r
- raise ValueError, "unsupported pickle protocol: %d" % proto\r
- dispatch[PROTO] = load_proto\r
-\r
- def load_persid(self):\r
- pid = self.readline()[:-1]\r
- self.append(self.persistent_load(pid))\r
- dispatch[PERSID] = load_persid\r
-\r
- def load_binpersid(self):\r
- pid = self.stack.pop()\r
- self.append(self.persistent_load(pid))\r
- dispatch[BINPERSID] = load_binpersid\r
-\r
- def load_none(self):\r
- self.append(None)\r
- dispatch[NONE] = load_none\r
-\r
- def load_false(self):\r
- self.append(False)\r
- dispatch[NEWFALSE] = load_false\r
-\r
- def load_true(self):\r
- self.append(True)\r
- dispatch[NEWTRUE] = load_true\r
-\r
- def load_int(self):\r
- data = self.readline()\r
- if data == FALSE[1:]:\r
- val = False\r
- elif data == TRUE[1:]:\r
- val = True\r
- else:\r
- try:\r
- val = int(data)\r
- except ValueError:\r
- val = long(data)\r
- self.append(val)\r
- dispatch[INT] = load_int\r
-\r
- def load_binint(self):\r
- self.append(mloads('i' + self.read(4)))\r
- dispatch[BININT] = load_binint\r
-\r
- def load_binint1(self):\r
- self.append(ord(self.read(1)))\r
- dispatch[BININT1] = load_binint1\r
-\r
- def load_binint2(self):\r
- self.append(mloads('i' + self.read(2) + '\000\000'))\r
- dispatch[BININT2] = load_binint2\r
-\r
- def load_long(self):\r
- self.append(long(self.readline()[:-1], 0))\r
- dispatch[LONG] = load_long\r
-\r
- def load_long1(self):\r
- n = ord(self.read(1))\r
- bytes = self.read(n)\r
- self.append(decode_long(bytes))\r
- dispatch[LONG1] = load_long1\r
-\r
- def load_long4(self):\r
- n = mloads('i' + self.read(4))\r
- bytes = self.read(n)\r
- self.append(decode_long(bytes))\r
- dispatch[LONG4] = load_long4\r
-\r
- def load_float(self):\r
- self.append(float(self.readline()[:-1]))\r
- dispatch[FLOAT] = load_float\r
-\r
- def load_binfloat(self, unpack=struct.unpack):\r
- self.append(unpack('>d', self.read(8))[0])\r
- dispatch[BINFLOAT] = load_binfloat\r
-\r
- def load_string(self):\r
- rep = self.readline()[:-1]\r
- for q in "\"'": # double or single quote\r
- if rep.startswith(q):\r
- if not rep.endswith(q):\r
- raise ValueError, "insecure string pickle"\r
- rep = rep[len(q):-len(q)]\r
- break\r
- else:\r
- raise ValueError, "insecure string pickle"\r
- self.append(rep.decode("string-escape"))\r
- dispatch[STRING] = load_string\r
-\r
- def load_binstring(self):\r
- len = mloads('i' + self.read(4))\r
- self.append(self.read(len))\r
- dispatch[BINSTRING] = load_binstring\r
-\r
- def load_unicode(self):\r
- self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))\r
- dispatch[UNICODE] = load_unicode\r
-\r
- def load_binunicode(self):\r
- len = mloads('i' + self.read(4))\r
- self.append(unicode(self.read(len),'utf-8'))\r
- dispatch[BINUNICODE] = load_binunicode\r
-\r
- def load_short_binstring(self):\r
- len = ord(self.read(1))\r
- self.append(self.read(len))\r
- dispatch[SHORT_BINSTRING] = load_short_binstring\r
-\r
- def load_tuple(self):\r
- k = self.marker()\r
- self.stack[k:] = [tuple(self.stack[k+1:])]\r
- dispatch[TUPLE] = load_tuple\r
-\r
- def load_empty_tuple(self):\r
- self.stack.append(())\r
- dispatch[EMPTY_TUPLE] = load_empty_tuple\r
-\r
- def load_tuple1(self):\r
- self.stack[-1] = (self.stack[-1],)\r
- dispatch[TUPLE1] = load_tuple1\r
-\r
- def load_tuple2(self):\r
- self.stack[-2:] = [(self.stack[-2], self.stack[-1])]\r
- dispatch[TUPLE2] = load_tuple2\r
-\r
- def load_tuple3(self):\r
- self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]\r
- dispatch[TUPLE3] = load_tuple3\r
-\r
- def load_empty_list(self):\r
- self.stack.append([])\r
- dispatch[EMPTY_LIST] = load_empty_list\r
-\r
- def load_empty_dictionary(self):\r
- self.stack.append({})\r
- dispatch[EMPTY_DICT] = load_empty_dictionary\r
-\r
- def load_list(self):\r
- k = self.marker()\r
- self.stack[k:] = [self.stack[k+1:]]\r
- dispatch[LIST] = load_list\r
-\r
- def load_dict(self):\r
- k = self.marker()\r
- d = {}\r
- items = self.stack[k+1:]\r
- for i in range(0, len(items), 2):\r
- key = items[i]\r
- value = items[i+1]\r
- d[key] = value\r
- self.stack[k:] = [d]\r
- dispatch[DICT] = load_dict\r
-\r
- # INST and OBJ differ only in how they get a class object. It's not\r
- # only sensible to do the rest in a common routine, the two routines\r
- # previously diverged and grew different bugs.\r
- # klass is the class to instantiate, and k points to the topmost mark\r
- # object, following which are the arguments for klass.__init__.\r
- def _instantiate(self, klass, k):\r
- args = tuple(self.stack[k+1:])\r
- del self.stack[k:]\r
- instantiated = 0\r
- if (not args and\r
- type(klass) is ClassType and\r
- not hasattr(klass, "__getinitargs__")):\r
- try:\r
- value = _EmptyClass()\r
- value.__class__ = klass\r
- instantiated = 1\r
- except RuntimeError:\r
- # In restricted execution, assignment to inst.__class__ is\r
- # prohibited\r
- pass\r
- if not instantiated:\r
- try:\r
- value = klass(*args)\r
- except TypeError, err:\r
- raise TypeError, "in constructor for %s: %s" % (\r
- klass.__name__, str(err)), sys.exc_info()[2]\r
- self.append(value)\r
-\r
- def load_inst(self):\r
- module = self.readline()[:-1]\r
- name = self.readline()[:-1]\r
- klass = self.find_class(module, name)\r
- self._instantiate(klass, self.marker())\r
- dispatch[INST] = load_inst\r
-\r
- def load_obj(self):\r
- # Stack is ... markobject classobject arg1 arg2 ...\r
- k = self.marker()\r
- klass = self.stack.pop(k+1)\r
- self._instantiate(klass, k)\r
- dispatch[OBJ] = load_obj\r
-\r
- def load_newobj(self):\r
- args = self.stack.pop()\r
- cls = self.stack[-1]\r
- obj = cls.__new__(cls, *args)\r
- self.stack[-1] = obj\r
- dispatch[NEWOBJ] = load_newobj\r
-\r
- def load_global(self):\r
- module = self.readline()[:-1]\r
- name = self.readline()[:-1]\r
- klass = self.find_class(module, name)\r
- self.append(klass)\r
- dispatch[GLOBAL] = load_global\r
-\r
- def load_ext1(self):\r
- code = ord(self.read(1))\r
- self.get_extension(code)\r
- dispatch[EXT1] = load_ext1\r
-\r
- def load_ext2(self):\r
- code = mloads('i' + self.read(2) + '\000\000')\r
- self.get_extension(code)\r
- dispatch[EXT2] = load_ext2\r
-\r
- def load_ext4(self):\r
- code = mloads('i' + self.read(4))\r
- self.get_extension(code)\r
- dispatch[EXT4] = load_ext4\r
-\r
- def get_extension(self, code):\r
- nil = []\r
- obj = _extension_cache.get(code, nil)\r
- if obj is not nil:\r
- self.append(obj)\r
- return\r
- key = _inverted_registry.get(code)\r
- if not key:\r
- raise ValueError("unregistered extension code %d" % code)\r
- obj = self.find_class(*key)\r
- _extension_cache[code] = obj\r
- self.append(obj)\r
-\r
- def find_class(self, module, name):\r
- # Subclasses may override this\r
- __import__(module)\r
- mod = sys.modules[module]\r
- klass = getattr(mod, name)\r
- return klass\r
-\r
- def load_reduce(self):\r
- stack = self.stack\r
- args = stack.pop()\r
- func = stack[-1]\r
- value = func(*args)\r
- stack[-1] = value\r
- dispatch[REDUCE] = load_reduce\r
-\r
- def load_pop(self):\r
- del self.stack[-1]\r
- dispatch[POP] = load_pop\r
-\r
- def load_pop_mark(self):\r
- k = self.marker()\r
- del self.stack[k:]\r
- dispatch[POP_MARK] = load_pop_mark\r
-\r
- def load_dup(self):\r
- self.append(self.stack[-1])\r
- dispatch[DUP] = load_dup\r
-\r
- def load_get(self):\r
- self.append(self.memo[self.readline()[:-1]])\r
- dispatch[GET] = load_get\r
-\r
- def load_binget(self):\r
- i = ord(self.read(1))\r
- self.append(self.memo[repr(i)])\r
- dispatch[BINGET] = load_binget\r
-\r
- def load_long_binget(self):\r
- i = mloads('i' + self.read(4))\r
- self.append(self.memo[repr(i)])\r
- dispatch[LONG_BINGET] = load_long_binget\r
-\r
- def load_put(self):\r
- self.memo[self.readline()[:-1]] = self.stack[-1]\r
- dispatch[PUT] = load_put\r
-\r
- def load_binput(self):\r
- i = ord(self.read(1))\r
- self.memo[repr(i)] = self.stack[-1]\r
- dispatch[BINPUT] = load_binput\r
-\r
- def load_long_binput(self):\r
- i = mloads('i' + self.read(4))\r
- self.memo[repr(i)] = self.stack[-1]\r
- dispatch[LONG_BINPUT] = load_long_binput\r
-\r
- def load_append(self):\r
- stack = self.stack\r
- value = stack.pop()\r
- list = stack[-1]\r
- list.append(value)\r
- dispatch[APPEND] = load_append\r
-\r
- def load_appends(self):\r
- stack = self.stack\r
- mark = self.marker()\r
- list = stack[mark - 1]\r
- list.extend(stack[mark + 1:])\r
- del stack[mark:]\r
- dispatch[APPENDS] = load_appends\r
-\r
- def load_setitem(self):\r
- stack = self.stack\r
- value = stack.pop()\r
- key = stack.pop()\r
- dict = stack[-1]\r
- dict[key] = value\r
- dispatch[SETITEM] = load_setitem\r
-\r
- def load_setitems(self):\r
- stack = self.stack\r
- mark = self.marker()\r
- dict = stack[mark - 1]\r
- for i in range(mark + 1, len(stack), 2):\r
- dict[stack[i]] = stack[i + 1]\r
-\r
- del stack[mark:]\r
- dispatch[SETITEMS] = load_setitems\r
-\r
- def load_build(self):\r
- stack = self.stack\r
- state = stack.pop()\r
- inst = stack[-1]\r
- setstate = getattr(inst, "__setstate__", None)\r
- if setstate:\r
- setstate(state)\r
- return\r
- slotstate = None\r
- if isinstance(state, tuple) and len(state) == 2:\r
- state, slotstate = state\r
- if state:\r
- try:\r
- d = inst.__dict__\r
- try:\r
- for k, v in state.iteritems():\r
- d[intern(k)] = v\r
- # keys in state don't have to be strings\r
- # don't blow up, but don't go out of our way\r
- except TypeError:\r
- d.update(state)\r
-\r
- except RuntimeError:\r
- # XXX In restricted execution, the instance's __dict__\r
- # is not accessible. Use the old way of unpickling\r
- # the instance variables. This is a semantic\r
- # difference when unpickling in restricted\r
- # vs. unrestricted modes.\r
- # Note, however, that cPickle has never tried to do the\r
- # .update() business, and always uses\r
- # PyObject_SetItem(inst.__dict__, key, value) in a\r
- # loop over state.items().\r
- for k, v in state.items():\r
- setattr(inst, k, v)\r
- if slotstate:\r
- for k, v in slotstate.items():\r
- setattr(inst, k, v)\r
- dispatch[BUILD] = load_build\r
-\r
- def load_mark(self):\r
- self.append(self.mark)\r
- dispatch[MARK] = load_mark\r
-\r
- def load_stop(self):\r
- value = self.stack.pop()\r
- raise _Stop(value)\r
- dispatch[STOP] = load_stop\r
-\r
-# Helper class for load_inst/load_obj\r
-\r
-class _EmptyClass:\r
- pass\r
-\r
-# Encode/decode longs in linear time.\r
-\r
-import binascii as _binascii\r
-\r
-def encode_long(x):\r
- r"""Encode a long to a two's complement little-endian binary string.\r
- Note that 0L is a special case, returning an empty string, to save a\r
- byte in the LONG1 pickling context.\r
-\r
- >>> encode_long(0L)\r
- ''\r
- >>> encode_long(255L)\r
- '\xff\x00'\r
- >>> encode_long(32767L)\r
- '\xff\x7f'\r
- >>> encode_long(-256L)\r
- '\x00\xff'\r
- >>> encode_long(-32768L)\r
- '\x00\x80'\r
- >>> encode_long(-128L)\r
- '\x80'\r
- >>> encode_long(127L)\r
- '\x7f'\r
- >>>\r
- """\r
-\r
- if x == 0:\r
- return ''\r
- if x > 0:\r
- ashex = hex(x)\r
- assert ashex.startswith("0x")\r
- njunkchars = 2 + ashex.endswith('L')\r
- nibbles = len(ashex) - njunkchars\r
- if nibbles & 1:\r
- # need an even # of nibbles for unhexlify\r
- ashex = "0x0" + ashex[2:]\r
- elif int(ashex[2], 16) >= 8:\r
- # "looks negative", so need a byte of sign bits\r
- ashex = "0x00" + ashex[2:]\r
- else:\r
- # Build the 256's-complement: (1L << nbytes) + x. The trick is\r
- # to find the number of bytes in linear time (although that should\r
- # really be a constant-time task).\r
- ashex = hex(-x)\r
- assert ashex.startswith("0x")\r
- njunkchars = 2 + ashex.endswith('L')\r
- nibbles = len(ashex) - njunkchars\r
- if nibbles & 1:\r
- # Extend to a full byte.\r
- nibbles += 1\r
- nbits = nibbles * 4\r
- x += 1L << nbits\r
- assert x > 0\r
- ashex = hex(x)\r
- njunkchars = 2 + ashex.endswith('L')\r
- newnibbles = len(ashex) - njunkchars\r
- if newnibbles < nibbles:\r
- ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]\r
- if int(ashex[2], 16) < 8:\r
- # "looks positive", so need a byte of sign bits\r
- ashex = "0xff" + ashex[2:]\r
-\r
- if ashex.endswith('L'):\r
- ashex = ashex[2:-1]\r
- else:\r
- ashex = ashex[2:]\r
- assert len(ashex) & 1 == 0, (x, ashex)\r
- binary = _binascii.unhexlify(ashex)\r
- return binary[::-1]\r
-\r
-def decode_long(data):\r
- r"""Decode a long from a two's complement little-endian binary string.\r
-\r
- >>> decode_long('')\r
- 0L\r
- >>> decode_long("\xff\x00")\r
- 255L\r
- >>> decode_long("\xff\x7f")\r
- 32767L\r
- >>> decode_long("\x00\xff")\r
- -256L\r
- >>> decode_long("\x00\x80")\r
- -32768L\r
- >>> decode_long("\x80")\r
- -128L\r
- >>> decode_long("\x7f")\r
- 127L\r
- """\r
-\r
- nbytes = len(data)\r
- if nbytes == 0:\r
- return 0L\r
- ashex = _binascii.hexlify(data[::-1])\r
- n = long(ashex, 16) # quadratic time before Python 2.3; linear now\r
- if data[-1] >= '\x80':\r
- n -= 1L << (nbytes * 8)\r
- return n\r
-\r
-# Shorthands\r
-\r
-try:\r
- from cStringIO import StringIO\r
-except ImportError:\r
- from StringIO import StringIO\r
-\r
-def dump(obj, file, protocol=None):\r
- Pickler(file, protocol).dump(obj)\r
-\r
-def dumps(obj, protocol=None):\r
- file = StringIO()\r
- Pickler(file, protocol).dump(obj)\r
- return file.getvalue()\r
-\r
-def load(file):\r
- return Unpickler(file).load()\r
-\r
-def loads(str):\r
- file = StringIO(str)\r
- return Unpickler(file).load()\r
-\r
-# Doctest\r
-\r
-def _test():\r
- import doctest\r
- return doctest.testmod()\r
-\r
-if __name__ == "__main__":\r
- _test()\r