]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Lib/pickle.py
9b35432950d75a7485abce4f916004ee6a16e7b2
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / pickle.py
1 """Create portable serialized representations of Python objects.
2
3 See module cPickle for a (much) faster implementation.
4 See module copy_reg for a mechanism for registering custom picklers.
5 See module pickletools source for extensive comments.
6
7 Classes:
8
9 Pickler
10 Unpickler
11
12 Functions:
13
14 dump(object, file)
15 dumps(object) -> string
16 load(file) -> object
17 loads(string) -> object
18
19 Misc variables:
20
21 __version__
22 format_version
23 compatible_formats
24
25 """
26
27 __version__ = "$Revision$" # Code version
28
29 from types import *
30 from copy_reg import dispatch_table
31 from copy_reg import _extension_registry, _inverted_registry, _extension_cache
32 import marshal
33 import sys
34 import struct
35 import re
36
37 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38 "Unpickler", "dump", "dumps", "load", "loads"]
39
40 # These are purely informational; no code uses these.
41 format_version = "2.0" # File format version we write
42 compatible_formats = ["1.0", # Original protocol 0
43 "1.1", # Protocol 0 with INST added
44 "1.2", # Original protocol 1
45 "1.3", # Protocol 1 with BINFLOAT added
46 "2.0", # Protocol 2
47 ] # Old format versions we can read
48
49 # Keep in synch with cPickle. This is the highest protocol number we
50 # know how to read.
51 HIGHEST_PROTOCOL = 2
52
53 # Why use struct.pack() for pickling but marshal.loads() for
54 # unpickling? struct.pack() is 40% faster than marshal.dumps(), but
55 # marshal.loads() is twice as fast as struct.unpack()!
56 mloads = marshal.loads
57
58 class PickleError(Exception):
59 """A common base class for the other pickling exceptions."""
60 pass
61
62 class PicklingError(PickleError):
63 """This exception is raised when an unpicklable object is passed to the
64 dump() method.
65
66 """
67 pass
68
69 class UnpicklingError(PickleError):
70 """This exception is raised when there is a problem unpickling an object,
71 such as a security violation.
72
73 Note that other exceptions may also be raised during unpickling, including
74 (but not necessarily limited to) AttributeError, EOFError, ImportError,
75 and IndexError.
76
77 """
78 pass
79
80 # An instance of _Stop is raised by Unpickler.load_stop() in response to
81 # the STOP opcode, passing the object that is the result of unpickling.
82 class _Stop(Exception):
83 def __init__(self, value):
84 self.value = value
85
86 # Jython has PyStringMap; it's a dict subclass with string keys
87 try:
88 from org.python.core import PyStringMap
89 except ImportError:
90 PyStringMap = None
91
92 # UnicodeType may or may not be exported (normally imported from types)
93 try:
94 UnicodeType
95 except NameError:
96 UnicodeType = None
97
98 # Pickle opcodes. See pickletools.py for extensive docs. The listing
99 # here is in kind-of alphabetical order of 1-character pickle code.
100 # pickletools groups them by purpose.
101
102 MARK = '(' # push special markobject on stack
103 STOP = '.' # every pickle ends with STOP
104 POP = '0' # discard topmost stack item
105 POP_MARK = '1' # discard stack top through topmost markobject
106 DUP = '2' # duplicate top stack item
107 FLOAT = 'F' # push float object; decimal string argument
108 INT = 'I' # push integer or bool; decimal string argument
109 BININT = 'J' # push four-byte signed int
110 BININT1 = 'K' # push 1-byte unsigned int
111 LONG = 'L' # push long; decimal string argument
112 BININT2 = 'M' # push 2-byte unsigned int
113 NONE = 'N' # push None
114 PERSID = 'P' # push persistent object; id is taken from string arg
115 BINPERSID = 'Q' # " " " ; " " " " stack
116 REDUCE = 'R' # apply callable to argtuple, both on stack
117 STRING = 'S' # push string; NL-terminated string argument
118 BINSTRING = 'T' # push string; counted binary string argument
119 SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
120 UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
121 BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
122 APPEND = 'a' # append stack top to list below it
123 BUILD = 'b' # call __setstate__ or __dict__.update()
124 GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
125 DICT = 'd' # build a dict from stack items
126 EMPTY_DICT = '}' # push empty dict
127 APPENDS = 'e' # extend list on stack by topmost stack slice
128 GET = 'g' # push item from memo on stack; index is string arg
129 BINGET = 'h' # " " " " " " ; " " 1-byte arg
130 INST = 'i' # build & push class instance
131 LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
132 LIST = 'l' # build list from topmost stack items
133 EMPTY_LIST = ']' # push empty list
134 OBJ = 'o' # build & push class instance
135 PUT = 'p' # store stack top in memo; index is string arg
136 BINPUT = 'q' # " " " " " ; " " 1-byte arg
137 LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
138 SETITEM = 's' # add key+value pair to dict
139 TUPLE = 't' # build tuple from topmost stack items
140 EMPTY_TUPLE = ')' # push empty tuple
141 SETITEMS = 'u' # modify dict by adding topmost key+value pairs
142 BINFLOAT = 'G' # push float; arg is 8-byte float encoding
143
144 TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
145 FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
146
147 # Protocol 2
148
149 PROTO = '\x80' # identify pickle protocol
150 NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
151 EXT1 = '\x82' # push object from extension registry; 1-byte index
152 EXT2 = '\x83' # ditto, but 2-byte index
153 EXT4 = '\x84' # ditto, but 4-byte index
154 TUPLE1 = '\x85' # build 1-tuple from stack top
155 TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
156 TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
157 NEWTRUE = '\x88' # push True
158 NEWFALSE = '\x89' # push False
159 LONG1 = '\x8a' # push long from < 256 bytes
160 LONG4 = '\x8b' # push really big long
161
162 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
163
164
165 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
166 del x
167
168
169 # Pickling machinery
170
171 class Pickler:
172
173 def __init__(self, file, protocol=None):
174 """This takes a file-like object for writing a pickle data stream.
175
176 The optional protocol argument tells the pickler to use the
177 given protocol; supported protocols are 0, 1, 2. The default
178 protocol is 0, to be backwards compatible. (Protocol 0 is the
179 only protocol that can be written to a file opened in text
180 mode and read back successfully. When using a protocol higher
181 than 0, make sure the file is opened in binary mode, both when
182 pickling and unpickling.)
183
184 Protocol 1 is more efficient than protocol 0; protocol 2 is
185 more efficient than protocol 1.
186
187 Specifying a negative protocol version selects the highest
188 protocol version supported. The higher the protocol used, the
189 more recent the version of Python needed to read the pickle
190 produced.
191
192 The file parameter must have a write() method that accepts a single
193 string argument. It can thus be an open file object, a StringIO
194 object, or any other custom object that meets this interface.
195
196 """
197 if protocol is None:
198 protocol = 0
199 if protocol < 0:
200 protocol = HIGHEST_PROTOCOL
201 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
202 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
203 self.write = file.write
204 self.memo = {}
205 self.proto = int(protocol)
206 self.bin = protocol >= 1
207 self.fast = 0
208
209 def clear_memo(self):
210 """Clears the pickler's "memo".
211
212 The memo is the data structure that remembers which objects the
213 pickler has already seen, so that shared or recursive objects are
214 pickled by reference and not by value. This method is useful when
215 re-using picklers.
216
217 """
218 self.memo.clear()
219
220 def dump(self, obj):
221 """Write a pickled representation of obj to the open file."""
222 if self.proto >= 2:
223 self.write(PROTO + chr(self.proto))
224 self.save(obj)
225 self.write(STOP)
226
227 def memoize(self, obj):
228 """Store an object in the memo."""
229
230 # The Pickler memo is a dictionary mapping object ids to 2-tuples
231 # that contain the Unpickler memo key and the object being memoized.
232 # The memo key is written to the pickle and will become
233 # the key in the Unpickler's memo. The object is stored in the
234 # Pickler memo so that transient objects are kept alive during
235 # pickling.
236
237 # The use of the Unpickler memo length as the memo key is just a
238 # convention. The only requirement is that the memo values be unique.
239 # But there appears no advantage to any other scheme, and this
240 # scheme allows the Unpickler memo to be implemented as a plain (but
241 # growable) array, indexed by memo key.
242 if self.fast:
243 return
244 assert id(obj) not in self.memo
245 memo_len = len(self.memo)
246 self.write(self.put(memo_len))
247 self.memo[id(obj)] = memo_len, obj
248
249 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
250 def put(self, i, pack=struct.pack):
251 if self.bin:
252 if i < 256:
253 return BINPUT + chr(i)
254 else:
255 return LONG_BINPUT + pack("<i", i)
256
257 return PUT + repr(i) + '\n'
258
259 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
260 def get(self, i, pack=struct.pack):
261 if self.bin:
262 if i < 256:
263 return BINGET + chr(i)
264 else:
265 return LONG_BINGET + pack("<i", i)
266
267 return GET + repr(i) + '\n'
268
269 def save(self, obj):
270 # Check for persistent id (defined by a subclass)
271 pid = self.persistent_id(obj)
272 if pid:
273 self.save_pers(pid)
274 return
275
276 # Check the memo
277 x = self.memo.get(id(obj))
278 if x:
279 self.write(self.get(x[0]))
280 return
281
282 # Check the type dispatch table
283 t = type(obj)
284 f = self.dispatch.get(t)
285 if f:
286 f(self, obj) # Call unbound method with explicit self
287 return
288
289 # Check for a class with a custom metaclass; treat as regular class
290 try:
291 issc = issubclass(t, TypeType)
292 except TypeError: # t is not a class (old Boost; see SF #502085)
293 issc = 0
294 if issc:
295 self.save_global(obj)
296 return
297
298 # Check copy_reg.dispatch_table
299 reduce = dispatch_table.get(t)
300 if reduce:
301 rv = reduce(obj)
302 else:
303 # Check for a __reduce_ex__ method, fall back to __reduce__
304 reduce = getattr(obj, "__reduce_ex__", None)
305 if reduce:
306 rv = reduce(self.proto)
307 else:
308 reduce = getattr(obj, "__reduce__", None)
309 if reduce:
310 rv = reduce()
311 else:
312 raise PicklingError("Can't pickle %r object: %r" %
313 (t.__name__, obj))
314
315 # Check for string returned by reduce(), meaning "save as global"
316 if type(rv) is StringType:
317 self.save_global(obj, rv)
318 return
319
320 # Assert that reduce() returned a tuple
321 if type(rv) is not TupleType:
322 raise PicklingError("%s must return string or tuple" % reduce)
323
324 # Assert that it returned an appropriately sized tuple
325 l = len(rv)
326 if not (2 <= l <= 5):
327 raise PicklingError("Tuple returned by %s must have "
328 "two to five elements" % reduce)
329
330 # Save the reduce() output and finally memoize the object
331 self.save_reduce(obj=obj, *rv)
332
333 def persistent_id(self, obj):
334 # This exists so a subclass can override it
335 return None
336
337 def save_pers(self, pid):
338 # Save a persistent id reference
339 if self.bin:
340 self.save(pid)
341 self.write(BINPERSID)
342 else:
343 self.write(PERSID + str(pid) + '\n')
344
345 def save_reduce(self, func, args, state=None,
346 listitems=None, dictitems=None, obj=None):
347 # This API is called by some subclasses
348
349 # Assert that args is a tuple or None
350 if not isinstance(args, TupleType):
351 raise PicklingError("args from reduce() should be a tuple")
352
353 # Assert that func is callable
354 if not hasattr(func, '__call__'):
355 raise PicklingError("func from reduce should be callable")
356
357 save = self.save
358 write = self.write
359
360 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
361 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
362 # A __reduce__ implementation can direct protocol 2 to
363 # use the more efficient NEWOBJ opcode, while still
364 # allowing protocol 0 and 1 to work normally. For this to
365 # work, the function returned by __reduce__ should be
366 # called __newobj__, and its first argument should be a
367 # new-style class. The implementation for __newobj__
368 # should be as follows, although pickle has no way to
369 # verify this:
370 #
371 # def __newobj__(cls, *args):
372 # return cls.__new__(cls, *args)
373 #
374 # Protocols 0 and 1 will pickle a reference to __newobj__,
375 # while protocol 2 (and above) will pickle a reference to
376 # cls, the remaining args tuple, and the NEWOBJ code,
377 # which calls cls.__new__(cls, *args) at unpickling time
378 # (see load_newobj below). If __reduce__ returns a
379 # three-tuple, the state from the third tuple item will be
380 # pickled regardless of the protocol, calling __setstate__
381 # at unpickling time (see load_build below).
382 #
383 # Note that no standard __newobj__ implementation exists;
384 # you have to provide your own. This is to enforce
385 # compatibility with Python 2.2 (pickles written using
386 # protocol 0 or 1 in Python 2.3 should be unpicklable by
387 # Python 2.2).
388 cls = args[0]
389 if not hasattr(cls, "__new__"):
390 raise PicklingError(
391 "args[0] from __newobj__ args has no __new__")
392 if obj is not None and cls is not obj.__class__:
393 raise PicklingError(
394 "args[0] from __newobj__ args has the wrong class")
395 args = args[1:]
396 save(cls)
397 save(args)
398 write(NEWOBJ)
399 else:
400 save(func)
401 save(args)
402 write(REDUCE)
403
404 if obj is not None:
405 self.memoize(obj)
406
407 # More new special cases (that work with older protocols as
408 # well): when __reduce__ returns a tuple with 4 or 5 items,
409 # the 4th and 5th item should be iterators that provide list
410 # items and dict items (as (key, value) tuples), or None.
411
412 if listitems is not None:
413 self._batch_appends(listitems)
414
415 if dictitems is not None:
416 self._batch_setitems(dictitems)
417
418 if state is not None:
419 save(state)
420 write(BUILD)
421
422 # Methods below this point are dispatched through the dispatch table
423
424 dispatch = {}
425
426 def save_none(self, obj):
427 self.write(NONE)
428 dispatch[NoneType] = save_none
429
430 def save_bool(self, obj):
431 if self.proto >= 2:
432 self.write(obj and NEWTRUE or NEWFALSE)
433 else:
434 self.write(obj and TRUE or FALSE)
435 dispatch[bool] = save_bool
436
437 def save_int(self, obj, pack=struct.pack):
438 if self.bin:
439 # If the int is small enough to fit in a signed 4-byte 2's-comp
440 # format, we can store it more efficiently than the general
441 # case.
442 # First one- and two-byte unsigned ints:
443 if obj >= 0:
444 if obj <= 0xff:
445 self.write(BININT1 + chr(obj))
446 return
447 if obj <= 0xffff:
448 self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
449 return
450 # Next check for 4-byte signed ints:
451 high_bits = obj >> 31 # note that Python shift sign-extends
452 if high_bits == 0 or high_bits == -1:
453 # All high bits are copies of bit 2**31, so the value
454 # fits in a 4-byte signed int.
455 self.write(BININT + pack("<i", obj))
456 return
457 # Text pickle, or int too big to fit in signed 4-byte format.
458 self.write(INT + repr(obj) + '\n')
459 dispatch[IntType] = save_int
460
461 def save_long(self, obj, pack=struct.pack):
462 if self.proto >= 2:
463 bytes = encode_long(obj)
464 n = len(bytes)
465 if n < 256:
466 self.write(LONG1 + chr(n) + bytes)
467 else:
468 self.write(LONG4 + pack("<i", n) + bytes)
469 return
470 self.write(LONG + repr(obj) + '\n')
471 dispatch[LongType] = save_long
472
473 def save_float(self, obj, pack=struct.pack):
474 if self.bin:
475 self.write(BINFLOAT + pack('>d', obj))
476 else:
477 self.write(FLOAT + repr(obj) + '\n')
478 dispatch[FloatType] = save_float
479
480 def save_string(self, obj, pack=struct.pack):
481 if self.bin:
482 n = len(obj)
483 if n < 256:
484 self.write(SHORT_BINSTRING + chr(n) + obj)
485 else:
486 self.write(BINSTRING + pack("<i", n) + obj)
487 else:
488 self.write(STRING + repr(obj) + '\n')
489 self.memoize(obj)
490 dispatch[StringType] = save_string
491
492 def save_unicode(self, obj, pack=struct.pack):
493 if self.bin:
494 encoding = obj.encode('utf-8')
495 n = len(encoding)
496 self.write(BINUNICODE + pack("<i", n) + encoding)
497 else:
498 obj = obj.replace("\\", "\\u005c")
499 obj = obj.replace("\n", "\\u000a")
500 self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
501 self.memoize(obj)
502 dispatch[UnicodeType] = save_unicode
503
504 if StringType is UnicodeType:
505 # This is true for Jython
506 def save_string(self, obj, pack=struct.pack):
507 unicode = obj.isunicode()
508
509 if self.bin:
510 if unicode:
511 obj = obj.encode("utf-8")
512 l = len(obj)
513 if l < 256 and not unicode:
514 self.write(SHORT_BINSTRING + chr(l) + obj)
515 else:
516 s = pack("<i", l)
517 if unicode:
518 self.write(BINUNICODE + s + obj)
519 else:
520 self.write(BINSTRING + s + obj)
521 else:
522 if unicode:
523 obj = obj.replace("\\", "\\u005c")
524 obj = obj.replace("\n", "\\u000a")
525 obj = obj.encode('raw-unicode-escape')
526 self.write(UNICODE + obj + '\n')
527 else:
528 self.write(STRING + repr(obj) + '\n')
529 self.memoize(obj)
530 dispatch[StringType] = save_string
531
532 def save_tuple(self, obj):
533 write = self.write
534 proto = self.proto
535
536 n = len(obj)
537 if n == 0:
538 if proto:
539 write(EMPTY_TUPLE)
540 else:
541 write(MARK + TUPLE)
542 return
543
544 save = self.save
545 memo = self.memo
546 if n <= 3 and proto >= 2:
547 for element in obj:
548 save(element)
549 # Subtle. Same as in the big comment below.
550 if id(obj) in memo:
551 get = self.get(memo[id(obj)][0])
552 write(POP * n + get)
553 else:
554 write(_tuplesize2code[n])
555 self.memoize(obj)
556 return
557
558 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
559 # has more than 3 elements.
560 write(MARK)
561 for element in obj:
562 save(element)
563
564 if id(obj) in memo:
565 # Subtle. d was not in memo when we entered save_tuple(), so
566 # the process of saving the tuple's elements must have saved
567 # the tuple itself: the tuple is recursive. The proper action
568 # now is to throw away everything we put on the stack, and
569 # simply GET the tuple (it's already constructed). This check
570 # could have been done in the "for element" loop instead, but
571 # recursive tuples are a rare thing.
572 get = self.get(memo[id(obj)][0])
573 if proto:
574 write(POP_MARK + get)
575 else: # proto 0 -- POP_MARK not available
576 write(POP * (n+1) + get)
577 return
578
579 # No recursion.
580 self.write(TUPLE)
581 self.memoize(obj)
582
583 dispatch[TupleType] = save_tuple
584
585 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
586 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
587 # to remove it.
588 def save_empty_tuple(self, obj):
589 self.write(EMPTY_TUPLE)
590
591 def save_list(self, obj):
592 write = self.write
593
594 if self.bin:
595 write(EMPTY_LIST)
596 else: # proto 0 -- can't use EMPTY_LIST
597 write(MARK + LIST)
598
599 self.memoize(obj)
600 self._batch_appends(iter(obj))
601
602 dispatch[ListType] = save_list
603
604 # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
605 # out of synch, though.
606 _BATCHSIZE = 1000
607
608 def _batch_appends(self, items):
609 # Helper to batch up APPENDS sequences
610 save = self.save
611 write = self.write
612
613 if not self.bin:
614 for x in items:
615 save(x)
616 write(APPEND)
617 return
618
619 r = xrange(self._BATCHSIZE)
620 while items is not None:
621 tmp = []
622 for i in r:
623 try:
624 x = items.next()
625 tmp.append(x)
626 except StopIteration:
627 items = None
628 break
629 n = len(tmp)
630 if n > 1:
631 write(MARK)
632 for x in tmp:
633 save(x)
634 write(APPENDS)
635 elif n:
636 save(tmp[0])
637 write(APPEND)
638 # else tmp is empty, and we're done
639
640 def save_dict(self, obj):
641 write = self.write
642
643 if self.bin:
644 write(EMPTY_DICT)
645 else: # proto 0 -- can't use EMPTY_DICT
646 write(MARK + DICT)
647
648 self.memoize(obj)
649 self._batch_setitems(obj.iteritems())
650
651 dispatch[DictionaryType] = save_dict
652 if not PyStringMap is None:
653 dispatch[PyStringMap] = save_dict
654
655 def _batch_setitems(self, items):
656 # Helper to batch up SETITEMS sequences; proto >= 1 only
657 save = self.save
658 write = self.write
659
660 if not self.bin:
661 for k, v in items:
662 save(k)
663 save(v)
664 write(SETITEM)
665 return
666
667 r = xrange(self._BATCHSIZE)
668 while items is not None:
669 tmp = []
670 for i in r:
671 try:
672 tmp.append(items.next())
673 except StopIteration:
674 items = None
675 break
676 n = len(tmp)
677 if n > 1:
678 write(MARK)
679 for k, v in tmp:
680 save(k)
681 save(v)
682 write(SETITEMS)
683 elif n:
684 k, v = tmp[0]
685 save(k)
686 save(v)
687 write(SETITEM)
688 # else tmp is empty, and we're done
689
690 def save_inst(self, obj):
691 cls = obj.__class__
692
693 memo = self.memo
694 write = self.write
695 save = self.save
696
697 if hasattr(obj, '__getinitargs__'):
698 args = obj.__getinitargs__()
699 len(args) # XXX Assert it's a sequence
700 _keep_alive(args, memo)
701 else:
702 args = ()
703
704 write(MARK)
705
706 if self.bin:
707 save(cls)
708 for arg in args:
709 save(arg)
710 write(OBJ)
711 else:
712 for arg in args:
713 save(arg)
714 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
715
716 self.memoize(obj)
717
718 try:
719 getstate = obj.__getstate__
720 except AttributeError:
721 stuff = obj.__dict__
722 else:
723 stuff = getstate()
724 _keep_alive(stuff, memo)
725 save(stuff)
726 write(BUILD)
727
728 dispatch[InstanceType] = save_inst
729
730 def save_global(self, obj, name=None, pack=struct.pack):
731 write = self.write
732 memo = self.memo
733
734 if name is None:
735 name = obj.__name__
736
737 module = getattr(obj, "__module__", None)
738 if module is None:
739 module = whichmodule(obj, name)
740
741 try:
742 __import__(module)
743 mod = sys.modules[module]
744 klass = getattr(mod, name)
745 except (ImportError, KeyError, AttributeError):
746 raise PicklingError(
747 "Can't pickle %r: it's not found as %s.%s" %
748 (obj, module, name))
749 else:
750 if klass is not obj:
751 raise PicklingError(
752 "Can't pickle %r: it's not the same object as %s.%s" %
753 (obj, module, name))
754
755 if self.proto >= 2:
756 code = _extension_registry.get((module, name))
757 if code:
758 assert code > 0
759 if code <= 0xff:
760 write(EXT1 + chr(code))
761 elif code <= 0xffff:
762 write("%c%c%c" % (EXT2, code&0xff, code>>8))
763 else:
764 write(EXT4 + pack("<i", code))
765 return
766
767 write(GLOBAL + module + '\n' + name + '\n')
768 self.memoize(obj)
769
770 dispatch[ClassType] = save_global
771 dispatch[FunctionType] = save_global
772 dispatch[BuiltinFunctionType] = save_global
773 dispatch[TypeType] = save_global
774
775 # Pickling helpers
776
777 def _keep_alive(x, memo):
778 """Keeps a reference to the object x in the memo.
779
780 Because we remember objects by their id, we have
781 to assure that possibly temporary objects are kept
782 alive by referencing them.
783 We store a reference at the id of the memo, which should
784 normally not be used unless someone tries to deepcopy
785 the memo itself...
786 """
787 try:
788 memo[id(memo)].append(x)
789 except KeyError:
790 # aha, this is the first one :-)
791 memo[id(memo)]=[x]
792
793
794 # A cache for whichmodule(), mapping a function object to the name of
795 # the module in which the function was found.
796
797 classmap = {} # called classmap for backwards compatibility
798
799 def whichmodule(func, funcname):
800 """Figure out the module in which a function occurs.
801
802 Search sys.modules for the module.
803 Cache in classmap.
804 Return a module name.
805 If the function cannot be found, return "__main__".
806 """
807 # Python functions should always get an __module__ from their globals.
808 mod = getattr(func, "__module__", None)
809 if mod is not None:
810 return mod
811 if func in classmap:
812 return classmap[func]
813
814 for name, module in sys.modules.items():
815 if module is None:
816 continue # skip dummy package entries
817 if name != '__main__' and getattr(module, funcname, None) is func:
818 break
819 else:
820 name = '__main__'
821 classmap[func] = name
822 return name
823
824
825 # Unpickling machinery
826
827 class Unpickler:
828
829 def __init__(self, file):
830 """This takes a file-like object for reading a pickle data stream.
831
832 The protocol version of the pickle is detected automatically, so no
833 proto argument is needed.
834
835 The file-like object must have two methods, a read() method that
836 takes an integer argument, and a readline() method that requires no
837 arguments. Both methods should return a string. Thus file-like
838 object can be a file object opened for reading, a StringIO object,
839 or any other custom object that meets this interface.
840 """
841 self.readline = file.readline
842 self.read = file.read
843 self.memo = {}
844
845 def load(self):
846 """Read a pickled object representation from the open file.
847
848 Return the reconstituted object hierarchy specified in the file.
849 """
850 self.mark = object() # any new unique object
851 self.stack = []
852 self.append = self.stack.append
853 read = self.read
854 dispatch = self.dispatch
855 try:
856 while 1:
857 key = read(1)
858 dispatch[key](self)
859 except _Stop, stopinst:
860 return stopinst.value
861
862 # Return largest index k such that self.stack[k] is self.mark.
863 # If the stack doesn't contain a mark, eventually raises IndexError.
864 # This could be sped by maintaining another stack, of indices at which
865 # the mark appears. For that matter, the latter stack would suffice,
866 # and we wouldn't need to push mark objects on self.stack at all.
867 # Doing so is probably a good thing, though, since if the pickle is
868 # corrupt (or hostile) we may get a clue from finding self.mark embedded
869 # in unpickled objects.
870 def marker(self):
871 stack = self.stack
872 mark = self.mark
873 k = len(stack)-1
874 while stack[k] is not mark: k = k-1
875 return k
876
877 dispatch = {}
878
879 def load_eof(self):
880 raise EOFError
881 dispatch[''] = load_eof
882
883 def load_proto(self):
884 proto = ord(self.read(1))
885 if not 0 <= proto <= 2:
886 raise ValueError, "unsupported pickle protocol: %d" % proto
887 dispatch[PROTO] = load_proto
888
889 def load_persid(self):
890 pid = self.readline()[:-1]
891 self.append(self.persistent_load(pid))
892 dispatch[PERSID] = load_persid
893
894 def load_binpersid(self):
895 pid = self.stack.pop()
896 self.append(self.persistent_load(pid))
897 dispatch[BINPERSID] = load_binpersid
898
899 def load_none(self):
900 self.append(None)
901 dispatch[NONE] = load_none
902
903 def load_false(self):
904 self.append(False)
905 dispatch[NEWFALSE] = load_false
906
907 def load_true(self):
908 self.append(True)
909 dispatch[NEWTRUE] = load_true
910
911 def load_int(self):
912 data = self.readline()
913 if data == FALSE[1:]:
914 val = False
915 elif data == TRUE[1:]:
916 val = True
917 else:
918 try:
919 val = int(data)
920 except ValueError:
921 val = long(data)
922 self.append(val)
923 dispatch[INT] = load_int
924
925 def load_binint(self):
926 self.append(mloads('i' + self.read(4)))
927 dispatch[BININT] = load_binint
928
929 def load_binint1(self):
930 self.append(ord(self.read(1)))
931 dispatch[BININT1] = load_binint1
932
933 def load_binint2(self):
934 self.append(mloads('i' + self.read(2) + '\000\000'))
935 dispatch[BININT2] = load_binint2
936
937 def load_long(self):
938 self.append(long(self.readline()[:-1], 0))
939 dispatch[LONG] = load_long
940
941 def load_long1(self):
942 n = ord(self.read(1))
943 bytes = self.read(n)
944 self.append(decode_long(bytes))
945 dispatch[LONG1] = load_long1
946
947 def load_long4(self):
948 n = mloads('i' + self.read(4))
949 bytes = self.read(n)
950 self.append(decode_long(bytes))
951 dispatch[LONG4] = load_long4
952
953 def load_float(self):
954 self.append(float(self.readline()[:-1]))
955 dispatch[FLOAT] = load_float
956
957 def load_binfloat(self, unpack=struct.unpack):
958 self.append(unpack('>d', self.read(8))[0])
959 dispatch[BINFLOAT] = load_binfloat
960
961 def load_string(self):
962 rep = self.readline()[:-1]
963 for q in "\"'": # double or single quote
964 if rep.startswith(q):
965 if not rep.endswith(q):
966 raise ValueError, "insecure string pickle"
967 rep = rep[len(q):-len(q)]
968 break
969 else:
970 raise ValueError, "insecure string pickle"
971 self.append(rep.decode("string-escape"))
972 dispatch[STRING] = load_string
973
974 def load_binstring(self):
975 len = mloads('i' + self.read(4))
976 self.append(self.read(len))
977 dispatch[BINSTRING] = load_binstring
978
979 def load_unicode(self):
980 self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
981 dispatch[UNICODE] = load_unicode
982
983 def load_binunicode(self):
984 len = mloads('i' + self.read(4))
985 self.append(unicode(self.read(len),'utf-8'))
986 dispatch[BINUNICODE] = load_binunicode
987
988 def load_short_binstring(self):
989 len = ord(self.read(1))
990 self.append(self.read(len))
991 dispatch[SHORT_BINSTRING] = load_short_binstring
992
993 def load_tuple(self):
994 k = self.marker()
995 self.stack[k:] = [tuple(self.stack[k+1:])]
996 dispatch[TUPLE] = load_tuple
997
998 def load_empty_tuple(self):
999 self.stack.append(())
1000 dispatch[EMPTY_TUPLE] = load_empty_tuple
1001
1002 def load_tuple1(self):
1003 self.stack[-1] = (self.stack[-1],)
1004 dispatch[TUPLE1] = load_tuple1
1005
1006 def load_tuple2(self):
1007 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1008 dispatch[TUPLE2] = load_tuple2
1009
1010 def load_tuple3(self):
1011 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1012 dispatch[TUPLE3] = load_tuple3
1013
1014 def load_empty_list(self):
1015 self.stack.append([])
1016 dispatch[EMPTY_LIST] = load_empty_list
1017
1018 def load_empty_dictionary(self):
1019 self.stack.append({})
1020 dispatch[EMPTY_DICT] = load_empty_dictionary
1021
1022 def load_list(self):
1023 k = self.marker()
1024 self.stack[k:] = [self.stack[k+1:]]
1025 dispatch[LIST] = load_list
1026
1027 def load_dict(self):
1028 k = self.marker()
1029 d = {}
1030 items = self.stack[k+1:]
1031 for i in range(0, len(items), 2):
1032 key = items[i]
1033 value = items[i+1]
1034 d[key] = value
1035 self.stack[k:] = [d]
1036 dispatch[DICT] = load_dict
1037
1038 # INST and OBJ differ only in how they get a class object. It's not
1039 # only sensible to do the rest in a common routine, the two routines
1040 # previously diverged and grew different bugs.
1041 # klass is the class to instantiate, and k points to the topmost mark
1042 # object, following which are the arguments for klass.__init__.
1043 def _instantiate(self, klass, k):
1044 args = tuple(self.stack[k+1:])
1045 del self.stack[k:]
1046 instantiated = 0
1047 if (not args and
1048 type(klass) is ClassType and
1049 not hasattr(klass, "__getinitargs__")):
1050 try:
1051 value = _EmptyClass()
1052 value.__class__ = klass
1053 instantiated = 1
1054 except RuntimeError:
1055 # In restricted execution, assignment to inst.__class__ is
1056 # prohibited
1057 pass
1058 if not instantiated:
1059 try:
1060 value = klass(*args)
1061 except TypeError, err:
1062 raise TypeError, "in constructor for %s: %s" % (
1063 klass.__name__, str(err)), sys.exc_info()[2]
1064 self.append(value)
1065
1066 def load_inst(self):
1067 module = self.readline()[:-1]
1068 name = self.readline()[:-1]
1069 klass = self.find_class(module, name)
1070 self._instantiate(klass, self.marker())
1071 dispatch[INST] = load_inst
1072
1073 def load_obj(self):
1074 # Stack is ... markobject classobject arg1 arg2 ...
1075 k = self.marker()
1076 klass = self.stack.pop(k+1)
1077 self._instantiate(klass, k)
1078 dispatch[OBJ] = load_obj
1079
1080 def load_newobj(self):
1081 args = self.stack.pop()
1082 cls = self.stack[-1]
1083 obj = cls.__new__(cls, *args)
1084 self.stack[-1] = obj
1085 dispatch[NEWOBJ] = load_newobj
1086
1087 def load_global(self):
1088 module = self.readline()[:-1]
1089 name = self.readline()[:-1]
1090 klass = self.find_class(module, name)
1091 self.append(klass)
1092 dispatch[GLOBAL] = load_global
1093
1094 def load_ext1(self):
1095 code = ord(self.read(1))
1096 self.get_extension(code)
1097 dispatch[EXT1] = load_ext1
1098
1099 def load_ext2(self):
1100 code = mloads('i' + self.read(2) + '\000\000')
1101 self.get_extension(code)
1102 dispatch[EXT2] = load_ext2
1103
1104 def load_ext4(self):
1105 code = mloads('i' + self.read(4))
1106 self.get_extension(code)
1107 dispatch[EXT4] = load_ext4
1108
1109 def get_extension(self, code):
1110 nil = []
1111 obj = _extension_cache.get(code, nil)
1112 if obj is not nil:
1113 self.append(obj)
1114 return
1115 key = _inverted_registry.get(code)
1116 if not key:
1117 raise ValueError("unregistered extension code %d" % code)
1118 obj = self.find_class(*key)
1119 _extension_cache[code] = obj
1120 self.append(obj)
1121
1122 def find_class(self, module, name):
1123 # Subclasses may override this
1124 __import__(module)
1125 mod = sys.modules[module]
1126 klass = getattr(mod, name)
1127 return klass
1128
1129 def load_reduce(self):
1130 stack = self.stack
1131 args = stack.pop()
1132 func = stack[-1]
1133 value = func(*args)
1134 stack[-1] = value
1135 dispatch[REDUCE] = load_reduce
1136
1137 def load_pop(self):
1138 del self.stack[-1]
1139 dispatch[POP] = load_pop
1140
1141 def load_pop_mark(self):
1142 k = self.marker()
1143 del self.stack[k:]
1144 dispatch[POP_MARK] = load_pop_mark
1145
1146 def load_dup(self):
1147 self.append(self.stack[-1])
1148 dispatch[DUP] = load_dup
1149
1150 def load_get(self):
1151 self.append(self.memo[self.readline()[:-1]])
1152 dispatch[GET] = load_get
1153
1154 def load_binget(self):
1155 i = ord(self.read(1))
1156 self.append(self.memo[repr(i)])
1157 dispatch[BINGET] = load_binget
1158
1159 def load_long_binget(self):
1160 i = mloads('i' + self.read(4))
1161 self.append(self.memo[repr(i)])
1162 dispatch[LONG_BINGET] = load_long_binget
1163
1164 def load_put(self):
1165 self.memo[self.readline()[:-1]] = self.stack[-1]
1166 dispatch[PUT] = load_put
1167
1168 def load_binput(self):
1169 i = ord(self.read(1))
1170 self.memo[repr(i)] = self.stack[-1]
1171 dispatch[BINPUT] = load_binput
1172
1173 def load_long_binput(self):
1174 i = mloads('i' + self.read(4))
1175 self.memo[repr(i)] = self.stack[-1]
1176 dispatch[LONG_BINPUT] = load_long_binput
1177
1178 def load_append(self):
1179 stack = self.stack
1180 value = stack.pop()
1181 list = stack[-1]
1182 list.append(value)
1183 dispatch[APPEND] = load_append
1184
1185 def load_appends(self):
1186 stack = self.stack
1187 mark = self.marker()
1188 list = stack[mark - 1]
1189 list.extend(stack[mark + 1:])
1190 del stack[mark:]
1191 dispatch[APPENDS] = load_appends
1192
1193 def load_setitem(self):
1194 stack = self.stack
1195 value = stack.pop()
1196 key = stack.pop()
1197 dict = stack[-1]
1198 dict[key] = value
1199 dispatch[SETITEM] = load_setitem
1200
1201 def load_setitems(self):
1202 stack = self.stack
1203 mark = self.marker()
1204 dict = stack[mark - 1]
1205 for i in range(mark + 1, len(stack), 2):
1206 dict[stack[i]] = stack[i + 1]
1207
1208 del stack[mark:]
1209 dispatch[SETITEMS] = load_setitems
1210
1211 def load_build(self):
1212 stack = self.stack
1213 state = stack.pop()
1214 inst = stack[-1]
1215 setstate = getattr(inst, "__setstate__", None)
1216 if setstate:
1217 setstate(state)
1218 return
1219 slotstate = None
1220 if isinstance(state, tuple) and len(state) == 2:
1221 state, slotstate = state
1222 if state:
1223 try:
1224 d = inst.__dict__
1225 try:
1226 for k, v in state.iteritems():
1227 d[intern(k)] = v
1228 # keys in state don't have to be strings
1229 # don't blow up, but don't go out of our way
1230 except TypeError:
1231 d.update(state)
1232
1233 except RuntimeError:
1234 # XXX In restricted execution, the instance's __dict__
1235 # is not accessible. Use the old way of unpickling
1236 # the instance variables. This is a semantic
1237 # difference when unpickling in restricted
1238 # vs. unrestricted modes.
1239 # Note, however, that cPickle has never tried to do the
1240 # .update() business, and always uses
1241 # PyObject_SetItem(inst.__dict__, key, value) in a
1242 # loop over state.items().
1243 for k, v in state.items():
1244 setattr(inst, k, v)
1245 if slotstate:
1246 for k, v in slotstate.items():
1247 setattr(inst, k, v)
1248 dispatch[BUILD] = load_build
1249
1250 def load_mark(self):
1251 self.append(self.mark)
1252 dispatch[MARK] = load_mark
1253
1254 def load_stop(self):
1255 value = self.stack.pop()
1256 raise _Stop(value)
1257 dispatch[STOP] = load_stop
1258
1259 # Helper class for load_inst/load_obj
1260
1261 class _EmptyClass:
1262 pass
1263
1264 # Encode/decode longs in linear time.
1265
1266 import binascii as _binascii
1267
1268 def encode_long(x):
1269 r"""Encode a long to a two's complement little-endian binary string.
1270 Note that 0L is a special case, returning an empty string, to save a
1271 byte in the LONG1 pickling context.
1272
1273 >>> encode_long(0L)
1274 ''
1275 >>> encode_long(255L)
1276 '\xff\x00'
1277 >>> encode_long(32767L)
1278 '\xff\x7f'
1279 >>> encode_long(-256L)
1280 '\x00\xff'
1281 >>> encode_long(-32768L)
1282 '\x00\x80'
1283 >>> encode_long(-128L)
1284 '\x80'
1285 >>> encode_long(127L)
1286 '\x7f'
1287 >>>
1288 """
1289
1290 if x == 0:
1291 return ''
1292 if x > 0:
1293 ashex = hex(x)
1294 assert ashex.startswith("0x")
1295 njunkchars = 2 + ashex.endswith('L')
1296 nibbles = len(ashex) - njunkchars
1297 if nibbles & 1:
1298 # need an even # of nibbles for unhexlify
1299 ashex = "0x0" + ashex[2:]
1300 elif int(ashex[2], 16) >= 8:
1301 # "looks negative", so need a byte of sign bits
1302 ashex = "0x00" + ashex[2:]
1303 else:
1304 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1305 # to find the number of bytes in linear time (although that should
1306 # really be a constant-time task).
1307 ashex = hex(-x)
1308 assert ashex.startswith("0x")
1309 njunkchars = 2 + ashex.endswith('L')
1310 nibbles = len(ashex) - njunkchars
1311 if nibbles & 1:
1312 # Extend to a full byte.
1313 nibbles += 1
1314 nbits = nibbles * 4
1315 x += 1L << nbits
1316 assert x > 0
1317 ashex = hex(x)
1318 njunkchars = 2 + ashex.endswith('L')
1319 newnibbles = len(ashex) - njunkchars
1320 if newnibbles < nibbles:
1321 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1322 if int(ashex[2], 16) < 8:
1323 # "looks positive", so need a byte of sign bits
1324 ashex = "0xff" + ashex[2:]
1325
1326 if ashex.endswith('L'):
1327 ashex = ashex[2:-1]
1328 else:
1329 ashex = ashex[2:]
1330 assert len(ashex) & 1 == 0, (x, ashex)
1331 binary = _binascii.unhexlify(ashex)
1332 return binary[::-1]
1333
1334 def decode_long(data):
1335 r"""Decode a long from a two's complement little-endian binary string.
1336
1337 >>> decode_long('')
1338 0L
1339 >>> decode_long("\xff\x00")
1340 255L
1341 >>> decode_long("\xff\x7f")
1342 32767L
1343 >>> decode_long("\x00\xff")
1344 -256L
1345 >>> decode_long("\x00\x80")
1346 -32768L
1347 >>> decode_long("\x80")
1348 -128L
1349 >>> decode_long("\x7f")
1350 127L
1351 """
1352
1353 nbytes = len(data)
1354 if nbytes == 0:
1355 return 0L
1356 ashex = _binascii.hexlify(data[::-1])
1357 n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1358 if data[-1] >= '\x80':
1359 n -= 1L << (nbytes * 8)
1360 return n
1361
1362 # Shorthands
1363
1364 try:
1365 from cStringIO import StringIO
1366 except ImportError:
1367 from StringIO import StringIO
1368
1369 def dump(obj, file, protocol=None):
1370 Pickler(file, protocol).dump(obj)
1371
1372 def dumps(obj, protocol=None):
1373 file = StringIO()
1374 Pickler(file, protocol).dump(obj)
1375 return file.getvalue()
1376
1377 def load(file):
1378 return Unpickler(file).load()
1379
1380 def loads(str):
1381 file = StringIO(str)
1382 return Unpickler(file).load()
1383
1384 # Doctest
1385
1386 def _test():
1387 import doctest
1388 return doctest.testmod()
1389
1390 if __name__ == "__main__":
1391 _test()