2 Read and write ZIP files.
4 import struct
, os
, time
, sys
, shutil
5 import binascii
, cStringIO
, stat
10 import zlib
# We may need its compression method
14 crc32
= binascii
.crc32
16 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
17 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
19 class BadZipfile(Exception):
23 class LargeZipFile(Exception):
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
29 error
= BadZipfile
# The exception raised by this module
31 ZIP64_LIMIT
= (1 << 31) - 1
32 ZIP_FILECOUNT_LIMIT
= 1 << 16
33 ZIP_MAX_COMMENT
= (1 << 16) - 1
35 # constants for Zip file compression methods
38 # Other ZIP compression methods not supported
40 # Below are some formats and associated data for reading/writing headers using
41 # the struct module. The names and structures of headers/records are those used
42 # in the PKWARE description of the ZIP file format:
43 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44 # (URL valid as of January 2008)
46 # The "end of central directory" structure, magic number, size, and indices
47 # (section V.I in the format document)
48 structEndArchive
= "<4s4H2LH"
49 stringEndArchive
= "PK\005\006"
50 sizeEndCentDir
= struct
.calcsize(structEndArchive
)
55 _ECD_ENTRIES_THIS_DISK
= 3
56 _ECD_ENTRIES_TOTAL
= 4
60 # These last two indices are not part of the structure as defined in the
61 # spec, but they are used internally by this module as a convenience
65 # The "central directory" structure, magic number, size, and indices
66 # of entries in the structure (section V.F in the format document)
67 structCentralDir
= "<4s4B4HL2L5H2L"
68 stringCentralDir
= "PK\001\002"
69 sizeCentralDir
= struct
.calcsize(structCentralDir
)
71 # indexes of entries in the central directory structure
73 _CD_CREATE_VERSION
= 1
75 _CD_EXTRACT_VERSION
= 3
76 _CD_EXTRACT_SYSTEM
= 4
82 _CD_COMPRESSED_SIZE
= 10
83 _CD_UNCOMPRESSED_SIZE
= 11
84 _CD_FILENAME_LENGTH
= 12
85 _CD_EXTRA_FIELD_LENGTH
= 13
86 _CD_COMMENT_LENGTH
= 14
87 _CD_DISK_NUMBER_START
= 15
88 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
89 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
90 _CD_LOCAL_HEADER_OFFSET
= 18
92 # The "local file header" structure, magic number, size, and indices
93 # (section V.A in the format document)
94 structFileHeader
= "<4s2B4HL2L2H"
95 stringFileHeader
= "PK\003\004"
96 sizeFileHeader
= struct
.calcsize(structFileHeader
)
99 _FH_EXTRACT_VERSION
= 1
100 _FH_EXTRACT_SYSTEM
= 2
101 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
102 _FH_COMPRESSION_METHOD
= 4
103 _FH_LAST_MOD_TIME
= 5
104 _FH_LAST_MOD_DATE
= 6
106 _FH_COMPRESSED_SIZE
= 8
107 _FH_UNCOMPRESSED_SIZE
= 9
108 _FH_FILENAME_LENGTH
= 10
109 _FH_EXTRA_FIELD_LENGTH
= 11
111 # The "Zip64 end of central directory locator" structure, magic number, and size
112 structEndArchive64Locator
= "<4sLQL"
113 stringEndArchive64Locator
= "PK\x06\x07"
114 sizeEndCentDir64Locator
= struct
.calcsize(structEndArchive64Locator
)
116 # The "Zip64 end of central directory" record, magic number, size, and indices
117 # (section V.G in the format document)
118 structEndArchive64
= "<4sQ2H2L4Q"
119 stringEndArchive64
= "PK\x06\x06"
120 sizeEndCentDir64
= struct
.calcsize(structEndArchive64
)
123 _CD64_DIRECTORY_RECSIZE
= 1
124 _CD64_CREATE_VERSION
= 2
125 _CD64_EXTRACT_VERSION
= 3
126 _CD64_DISK_NUMBER
= 4
127 _CD64_DISK_NUMBER_START
= 5
128 _CD64_NUMBER_ENTRIES_THIS_DISK
= 6
129 _CD64_NUMBER_ENTRIES_TOTAL
= 7
130 _CD64_DIRECTORY_SIZE
= 8
131 _CD64_OFFSET_START_CENTDIR
= 9
133 def _check_zipfile(fp
):
136 return True # file has correct magic number
141 def is_zipfile(filename
):
142 """Quickly see if a file is a ZIP file by checking the magic number.
144 The filename argument may be a file or file-like object too.
148 if hasattr(filename
, "read"):
149 result
= _check_zipfile(fp
=filename
)
151 with
open(filename
, "rb") as fp
:
152 result
= _check_zipfile(fp
)
157 def _EndRecData64(fpin
, offset
, endrec
):
159 Read the ZIP64 end-of-archive records and use that to update endrec
162 fpin
.seek(offset
- sizeEndCentDir64Locator
, 2)
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
168 data
= fpin
.read(sizeEndCentDir64Locator
)
169 sig
, diskno
, reloff
, disks
= struct
.unpack(structEndArchive64Locator
, data
)
170 if sig
!= stringEndArchive64Locator
:
173 if diskno
!= 0 or disks
!= 1:
174 raise BadZipfile("zipfiles that span multiple disks are not supported")
176 # Assume no 'zip64 extensible data'
177 fpin
.seek(offset
- sizeEndCentDir64Locator
- sizeEndCentDir64
, 2)
178 data
= fpin
.read(sizeEndCentDir64
)
179 sig
, sz
, create_version
, read_version
, disk_num
, disk_dir
, \
180 dircount
, dircount2
, dirsize
, diroffset
= \
181 struct
.unpack(structEndArchive64
, data
)
182 if sig
!= stringEndArchive64
:
185 # Update the original endrec using data from the ZIP64 record
186 endrec
[_ECD_SIGNATURE
] = sig
187 endrec
[_ECD_DISK_NUMBER
] = disk_num
188 endrec
[_ECD_DISK_START
] = disk_dir
189 endrec
[_ECD_ENTRIES_THIS_DISK
] = dircount
190 endrec
[_ECD_ENTRIES_TOTAL
] = dircount2
191 endrec
[_ECD_SIZE
] = dirsize
192 endrec
[_ECD_OFFSET
] = diroffset
196 def _EndRecData(fpin
):
197 """Return data from the "End of Central Directory" record, or None.
199 The data is a list of the nine items in the ZIP "End of central dir"
200 record followed by a tenth item, the file seek offset of this record."""
202 # Determine file size
204 filesize
= fpin
.tell()
206 # Check to see if this is ZIP file with no archive comment (the
207 # "end of central directory" structure should be the last item in the
208 # file if this is the case).
210 fpin
.seek(-sizeEndCentDir
, 2)
214 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
215 # the signature is correct and there's no comment, unpack structure
216 endrec
= struct
.unpack(structEndArchive
, data
)
219 # Append a blank comment and record start offset
221 endrec
.append(filesize
- sizeEndCentDir
)
223 # Try to read the "Zip64 end of central directory" structure
224 return _EndRecData64(fpin
, -sizeEndCentDir
, endrec
)
226 # Either this is not a ZIP file, or it is a ZIP file with an archive
227 # comment. Search the end of the file for the "end of central directory"
228 # record signature. The comment is the last item in the ZIP file and may be
229 # up to 64K long. It is assumed that the "end of central directory" magic
230 # number does not appear in the comment.
231 maxCommentStart
= max(filesize
- (1 << 16) - sizeEndCentDir
, 0)
232 fpin
.seek(maxCommentStart
, 0)
234 start
= data
.rfind(stringEndArchive
)
236 # found the magic number; attempt to unpack and interpret
237 recData
= data
[start
:start
+sizeEndCentDir
]
238 endrec
= list(struct
.unpack(structEndArchive
, recData
))
239 comment
= data
[start
+sizeEndCentDir
:]
240 # check that comment length is correct
241 if endrec
[_ECD_COMMENT_SIZE
] == len(comment
):
242 # Append the archive comment and start offset
243 endrec
.append(comment
)
244 endrec
.append(maxCommentStart
+ start
)
246 # Try to read the "Zip64 end of central directory" structure
247 return _EndRecData64(fpin
, maxCommentStart
+ start
- filesize
,
250 # Unable to find a valid end of central directory structure
254 class ZipInfo (object):
255 """Class with attributes describing each file in the ZIP archive."""
279 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
280 self
.orig_filename
= filename
# Original file name in archive
282 # Terminate the file name at the first null byte. Null bytes in file
283 # names are used as tricks by viruses in archives.
284 null_byte
= filename
.find(chr(0))
286 filename
= filename
[0:null_byte
]
287 # This is used to ensure paths in generated ZIP files always use
288 # forward slashes as the directory separator, as required by the
289 # ZIP format specification.
290 if os
.sep
!= "/" and os
.sep
in filename
:
291 filename
= filename
.replace(os
.sep
, "/")
293 self
.filename
= filename
# Normalized file name
294 self
.date_time
= date_time
# year, month, day, hour, min, sec
296 self
.compress_type
= ZIP_STORED
# Type of compression for the file
297 self
.comment
= "" # Comment for each file
298 self
.extra
= "" # ZIP extra data
299 if sys
.platform
== 'win32':
300 self
.create_system
= 0 # System which created ZIP archive
302 # Assume everything else is unix-y
303 self
.create_system
= 3 # System which created ZIP archive
304 self
.create_version
= 20 # Version which created ZIP archive
305 self
.extract_version
= 20 # Version needed to extract archive
306 self
.reserved
= 0 # Must be zero
307 self
.flag_bits
= 0 # ZIP flag bits
308 self
.volume
= 0 # Volume number of file header
309 self
.internal_attr
= 0 # Internal attributes
310 self
.external_attr
= 0 # External file attributes
311 # Other attributes are set by class ZipFile:
312 # header_offset Byte offset to the file header
313 # CRC CRC-32 of the uncompressed file
314 # compress_size Size of the compressed file
315 # file_size Size of the uncompressed file
317 def FileHeader(self
):
318 """Return the per-file header as a string."""
320 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
321 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
322 if self
.flag_bits
& 0x08:
323 # Set these to zero because we write them after the file data
324 CRC
= compress_size
= file_size
= 0
327 compress_size
= self
.compress_size
328 file_size
= self
.file_size
332 if file_size
> ZIP64_LIMIT
or compress_size
> ZIP64_LIMIT
:
333 # File is larger than what fits into a 4 byte integer,
334 # fall back to the ZIP64 extension
336 extra
= extra
+ struct
.pack(fmt
,
337 1, struct
.calcsize(fmt
)-4, file_size
, compress_size
)
338 file_size
= 0xffffffff
339 compress_size
= 0xffffffff
340 self
.extract_version
= max(45, self
.extract_version
)
341 self
.create_version
= max(45, self
.extract_version
)
343 filename
, flag_bits
= self
._encodeFilenameFlags
()
344 header
= struct
.pack(structFileHeader
, stringFileHeader
,
345 self
.extract_version
, self
.reserved
, flag_bits
,
346 self
.compress_type
, dostime
, dosdate
, CRC
,
347 compress_size
, file_size
,
348 len(filename
), len(extra
))
349 return header
+ filename
+ extra
351 def _encodeFilenameFlags(self
):
352 if isinstance(self
.filename
, unicode):
354 return self
.filename
.encode('ascii'), self
.flag_bits
355 except UnicodeEncodeError:
356 return self
.filename
.encode('utf-8'), self
.flag_bits |
0x800
358 return self
.filename
, self
.flag_bits
360 def _decodeFilename(self
):
361 if self
.flag_bits
& 0x800:
362 return self
.filename
.decode('utf-8')
366 def _decodeExtra(self
):
367 # Try to decode the extra field.
369 unpack
= struct
.unpack
371 tp
, ln
= unpack('<HH', extra
[:4])
374 counts
= unpack('<QQQ', extra
[4:28])
376 counts
= unpack('<QQ', extra
[4:20])
378 counts
= unpack('<Q', extra
[4:12])
382 raise RuntimeError, "Corrupt extra field %s"%(ln
,)
386 # ZIP64 extension (large files and/or large archives)
387 if self
.file_size
in (0xffffffffffffffffL
, 0xffffffffL
):
388 self
.file_size
= counts
[idx
]
391 if self
.compress_size
== 0xFFFFFFFFL
:
392 self
.compress_size
= counts
[idx
]
395 if self
.header_offset
== 0xffffffffL
:
396 old
= self
.header_offset
397 self
.header_offset
= counts
[idx
]
404 """Class to handle decryption of files stored within a ZIP archive.
406 ZIP supports a password-based form of encryption. Even though known
407 plaintext attacks have been found against it, it is still useful
408 to be able to get data out of such a file.
411 zd = _ZipDecrypter(mypwd)
412 plain_char = zd(cypher_char)
413 plain_text = map(zd, cypher_text)
416 def _GenerateCRCTable():
417 """Generate a CRC-32 table.
419 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
420 internal keys. We noticed that a direct implementation is faster than
421 relying on binascii.crc32().
429 crc
= ((crc
>> 1) & 0x7FFFFFFF) ^ poly
431 crc
= ((crc
>> 1) & 0x7FFFFFFF)
434 crctable
= _GenerateCRCTable()
436 def _crc32(self
, ch
, crc
):
437 """Compute the CRC32 primitive on one byte."""
438 return ((crc
>> 8) & 0xffffff) ^ self
.crctable
[(crc ^
ord(ch
)) & 0xff]
440 def __init__(self
, pwd
):
441 self
.key0
= 305419896
442 self
.key1
= 591751049
443 self
.key2
= 878082192
447 def _UpdateKeys(self
, c
):
448 self
.key0
= self
._crc
32(c
, self
.key0
)
449 self
.key1
= (self
.key1
+ (self
.key0
& 255)) & 4294967295
450 self
.key1
= (self
.key1
* 134775813 + 1) & 4294967295
451 self
.key2
= self
._crc
32(chr((self
.key1
>> 24) & 255), self
.key2
)
453 def __call__(self
, c
):
454 """Decrypt a single character."""
457 c
= c ^
(((k
* (k^
1)) >> 8) & 255)
462 class ZipExtFile(io
.BufferedIOBase
):
463 """File-like object for reading an archive member.
464 Is returned by ZipFile.open().
467 # Max size supported by decompressor.
470 # Read from compressed files in 4k blocks.
473 # Search for universal newlines or line chunks.
474 PATTERN
= re
.compile(r
'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
476 def __init__(self
, fileobj
, mode
, zipinfo
, decrypter
=None):
477 self
._fileobj
= fileobj
478 self
._decrypter
= decrypter
480 self
._compress
_type
= zipinfo
.compress_type
481 self
._compress
_size
= zipinfo
.compress_size
482 self
._compress
_left
= zipinfo
.compress_size
484 if self
._compress
_type
== ZIP_DEFLATED
:
485 self
._decompressor
= zlib
.decompressobj(-15)
486 self
._unconsumed
= ''
488 self
._readbuffer
= ''
491 self
._universal
= 'U' in mode
494 # Adjust read size for encrypted files since the first 12 bytes
495 # are for the encryption/password information.
496 if self
._decrypter
is not None:
497 self
._compress
_left
-= 12
500 self
.name
= zipinfo
.filename
502 if hasattr(zipinfo
, 'CRC'):
503 self
._expected
_crc
= zipinfo
.CRC
504 self
._running
_crc
= crc32(b
'') & 0xffffffff
506 self
._expected
_crc
= None
508 def readline(self
, limit
=-1):
509 """Read and return a line from the stream.
511 If limit is specified, at most limit bytes will be read.
514 if not self
._universal
and limit
< 0:
515 # Shortcut common case - newline found in buffer.
516 i
= self
._readbuffer
.find('\n', self
._offset
) + 1
518 line
= self
._readbuffer
[self
._offset
: i
]
522 if not self
._universal
:
523 return io
.BufferedIOBase
.readline(self
, limit
)
526 while limit
< 0 or len(line
) < limit
:
527 readahead
= self
.peek(2)
532 # Search for universal newlines or line chunks.
534 # The pattern returns either a line chunk or a newline, but not
535 # both. Combined with peek(2), we are assured that the sequence
536 # '\r\n' is always retrieved completely and never split into
537 # separate newlines - '\r', '\n' due to coincidental readaheads.
539 match
= self
.PATTERN
.search(readahead
)
540 newline
= match
.group('newline')
541 if newline
is not None:
542 if self
.newlines
is None:
544 if newline
not in self
.newlines
:
545 self
.newlines
.append(newline
)
546 self
._offset
+= len(newline
)
549 chunk
= match
.group('chunk')
551 chunk
= chunk
[: limit
- len(line
)]
553 self
._offset
+= len(chunk
)
559 """Returns buffered bytes without advancing the position."""
560 if n
> len(self
._readbuffer
) - self
._offset
:
562 self
._offset
-= len(chunk
)
564 # Return up to 512 bytes to reduce allocation overhead for tight loops.
565 return self
._readbuffer
[self
._offset
: self
._offset
+ 512]
570 def read(self
, n
=-1):
571 """Read and return up to n bytes.
572 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
581 data
= self
.read1(n
- len(buf
))
588 def _update_crc(self
, newdata
, eof
):
589 # Update the CRC using the given data.
590 if self
._expected
_crc
is None:
591 # No need to compute the CRC if we don't have a reference value
593 self
._running
_crc
= crc32(newdata
, self
._running
_crc
) & 0xffffffff
594 # Check the CRC if we're at the end of the file
595 if eof
and self
._running
_crc
!= self
._expected
_crc
:
596 raise BadZipfile("Bad CRC-32 for file %r" % self
.name
)
599 """Read up to n bytes with at most one read() system call."""
601 # Simplify algorithm (branching) by transforming negative n to large n.
602 if n
< 0 or n
is None:
605 # Bytes available in read buffer.
606 len_readbuffer
= len(self
._readbuffer
) - self
._offset
609 if self
._compress
_left
> 0 and n
> len_readbuffer
+ len(self
._unconsumed
):
610 nbytes
= n
- len_readbuffer
- len(self
._unconsumed
)
611 nbytes
= max(nbytes
, self
.MIN_READ_SIZE
)
612 nbytes
= min(nbytes
, self
._compress
_left
)
614 data
= self
._fileobj
.read(nbytes
)
615 self
._compress
_left
-= len(data
)
617 if data
and self
._decrypter
is not None:
618 data
= ''.join(map(self
._decrypter
, data
))
620 if self
._compress
_type
== ZIP_STORED
:
621 self
._update
_crc
(data
, eof
=(self
._compress
_left
==0))
622 self
._readbuffer
= self
._readbuffer
[self
._offset
:] + data
625 # Prepare deflated bytes for decompression.
626 self
._unconsumed
+= data
628 # Handle unconsumed data.
629 if (len(self
._unconsumed
) > 0 and n
> len_readbuffer
and
630 self
._compress
_type
== ZIP_DEFLATED
):
631 data
= self
._decompressor
.decompress(
633 max(n
- len_readbuffer
, self
.MIN_READ_SIZE
)
636 self
._unconsumed
= self
._decompressor
.unconsumed_tail
637 eof
= len(self
._unconsumed
) == 0 and self
._compress
_left
== 0
639 data
+= self
._decompressor
.flush()
641 self
._update
_crc
(data
, eof
=eof
)
642 self
._readbuffer
= self
._readbuffer
[self
._offset
:] + data
646 data
= self
._readbuffer
[self
._offset
: self
._offset
+ n
]
647 self
._offset
+= len(data
)
653 """ Class with methods to open, read, write, close, list zip files.
655 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
657 file: Either the path to the file, or a file-like object.
658 If it is a path, the file will be opened and closed by ZipFile.
659 mode: The mode can be either read "r", write "w" or append "a".
660 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
661 allowZip64: if True ZipFile will create files with ZIP64 extensions when
662 needed, otherwise it will raise an exception when this would
667 fp
= None # Set here since __del__ checks it
669 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
, allowZip64
=False):
670 """Open the ZIP file with mode read "r", write "w" or append "a"."""
671 if mode
not in ("r", "w", "a"):
672 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
674 if compression
== ZIP_STORED
:
676 elif compression
== ZIP_DEFLATED
:
679 "Compression requires the (missing) zlib module"
681 raise RuntimeError, "That compression method is not supported"
683 self
._allowZip
64 = allowZip64
684 self
._didModify
= False
685 self
.debug
= 0 # Level of printing: 0 through 3
686 self
.NameToInfo
= {} # Find file info given name
687 self
.filelist
= [] # List of ZipInfo instances for archive
688 self
.compression
= compression
# Method of compression
689 self
.mode
= key
= mode
.replace('b', '')[0]
693 # Check if we were passed a file-like object
694 if isinstance(file, basestring
):
697 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
699 self
.fp
= open(file, modeDict
[mode
])
703 self
.fp
= open(file, modeDict
[mode
])
709 self
.filename
= getattr(file, 'name', None)
714 # set the modified flag so central directory gets written
715 # even if no files are added to the archive
716 self
._didModify
= True
719 # See if file is a zip file
720 self
._RealGetContents
()
721 # seek to start of directory and overwrite
722 self
.fp
.seek(self
.start_dir
, 0)
724 # file is not a zip file, just append
727 # set the modified flag so central directory gets written
728 # even if no files are added to the archive
729 self
._didModify
= True
731 if not self
._filePassed
:
734 raise RuntimeError, 'Mode must be "r", "w" or "a"'
739 def __exit__(self
, type, value
, traceback
):
742 def _GetContents(self
):
743 """Read the directory, making sure we close the file if the format
746 self
._RealGetContents
()
748 if not self
._filePassed
:
753 def _RealGetContents(self
):
754 """Read in the table of contents for the ZIP file."""
757 endrec
= _EndRecData(fp
)
759 raise BadZipfile("File is not a zip file")
761 raise BadZipfile
, "File is not a zip file"
764 size_cd
= endrec
[_ECD_SIZE
] # bytes in central directory
765 offset_cd
= endrec
[_ECD_OFFSET
] # offset of central directory
766 self
.comment
= endrec
[_ECD_COMMENT
] # archive comment
768 # "concat" is zero, unless zip was concatenated to another file
769 concat
= endrec
[_ECD_LOCATION
] - size_cd
- offset_cd
770 if endrec
[_ECD_SIGNATURE
] == stringEndArchive64
:
771 # If Zip64 extension structures are present, account for them
772 concat
-= (sizeEndCentDir64
+ sizeEndCentDir64Locator
)
775 inferred
= concat
+ offset_cd
776 print "given, inferred, offset", offset_cd
, inferred
, concat
777 # self.start_dir: Position of start of central directory
778 self
.start_dir
= offset_cd
+ concat
779 fp
.seek(self
.start_dir
, 0)
780 data
= fp
.read(size_cd
)
781 fp
= cStringIO
.StringIO(data
)
783 while total
< size_cd
:
784 centdir
= fp
.read(sizeCentralDir
)
785 if centdir
[0:4] != stringCentralDir
:
786 raise BadZipfile
, "Bad magic number for central directory"
787 centdir
= struct
.unpack(structCentralDir
, centdir
)
790 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
791 # Create ZipInfo instance to store file information
792 x
= ZipInfo(filename
)
793 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
794 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
795 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
]
796 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
797 x
.flag_bits
, x
.compress_type
, t
, d
,
798 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
799 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
800 # Convert date/time code to (year, month, day, hour, min, sec)
802 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
803 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
806 x
.header_offset
= x
.header_offset
+ concat
807 x
.filename
= x
._decodeFilename
()
808 self
.filelist
.append(x
)
809 self
.NameToInfo
[x
.filename
] = x
811 # update total bytes read from central directory
812 total
= (total
+ sizeCentralDir
+ centdir
[_CD_FILENAME_LENGTH
]
813 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
814 + centdir
[_CD_COMMENT_LENGTH
])
821 """Return a list of file names in the archive."""
823 for data
in self
.filelist
:
824 l
.append(data
.filename
)
828 """Return a list of class ZipInfo instances for files in the
833 """Print a table of contents for the zip file."""
834 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
835 for zinfo
in self
.filelist
:
836 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
[:6]
837 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
840 """Read all the files and check the CRC."""
842 for zinfo
in self
.filelist
:
844 # Read by chunks, to avoid an OverflowError or a
845 # MemoryError with very large embedded files.
846 f
= self
.open(zinfo
.filename
, "r")
847 while f
.read(chunk_size
): # Check CRC-32
850 return zinfo
.filename
852 def getinfo(self
, name
):
853 """Return the instance of ZipInfo given 'name'."""
854 info
= self
.NameToInfo
.get(name
)
857 'There is no item named %r in the archive' % name
)
861 def setpassword(self
, pwd
):
862 """Set default password for encrypted files."""
865 def read(self
, name
, pwd
=None):
866 """Return file bytes (as a string) for name."""
867 return self
.open(name
, "r", pwd
).read()
869 def open(self
, name
, mode
="r", pwd
=None):
870 """Return file-like object for 'name'."""
871 if mode
not in ("r", "U", "rU"):
872 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
874 raise RuntimeError, \
875 "Attempt to read ZIP archive that was already closed"
877 # Only open a new file for instances where we were not
878 # given a file object in the constructor
882 zef_file
= open(self
.filename
, 'rb')
884 # Make sure we have an info object
885 if isinstance(name
, ZipInfo
):
886 # 'name' is already an info object
889 # Get info object for name
890 zinfo
= self
.getinfo(name
)
892 zef_file
.seek(zinfo
.header_offset
, 0)
894 # Skip the file header:
895 fheader
= zef_file
.read(sizeFileHeader
)
896 if fheader
[0:4] != stringFileHeader
:
897 raise BadZipfile
, "Bad magic number for file header"
899 fheader
= struct
.unpack(structFileHeader
, fheader
)
900 fname
= zef_file
.read(fheader
[_FH_FILENAME_LENGTH
])
901 if fheader
[_FH_EXTRA_FIELD_LENGTH
]:
902 zef_file
.read(fheader
[_FH_EXTRA_FIELD_LENGTH
])
904 if fname
!= zinfo
.orig_filename
:
906 'File name in directory "%s" and header "%s" differ.' % (
907 zinfo
.orig_filename
, fname
)
909 # check for encrypted flag & handle password
910 is_encrypted
= zinfo
.flag_bits
& 0x1
916 raise RuntimeError, "File %s is encrypted, " \
917 "password required for extraction" % name
919 zd
= _ZipDecrypter(pwd
)
920 # The first 12 bytes in the cypher stream is an encryption header
921 # used to strengthen the algorithm. The first 11 bytes are
922 # completely random, while the 12th contains the MSB of the CRC,
923 # or the MSB of the file time depending on the header type
924 # and is used to check the correctness of the password.
925 bytes
= zef_file
.read(12)
926 h
= map(zd
, bytes
[0:12])
927 if zinfo
.flag_bits
& 0x8:
928 # compare against the file type from extended local headers
929 check_byte
= (zinfo
._raw
_time
>> 8) & 0xff
931 # compare against the CRC otherwise
932 check_byte
= (zinfo
.CRC
>> 24) & 0xff
933 if ord(h
[11]) != check_byte
:
934 raise RuntimeError("Bad password for file", name
)
936 return ZipExtFile(zef_file
, mode
, zinfo
, zd
)
938 def extract(self
, member
, path
=None, pwd
=None):
939 """Extract a member from the archive to the current working directory,
940 using its full name. Its file information is extracted as accurately
941 as possible. `member' may be a filename or a ZipInfo object. You can
942 specify a different directory using `path'.
944 if not isinstance(member
, ZipInfo
):
945 member
= self
.getinfo(member
)
950 return self
._extract
_member
(member
, path
, pwd
)
952 def extractall(self
, path
=None, members
=None, pwd
=None):
953 """Extract all members from the archive to the current working
954 directory. `path' specifies a different directory to extract to.
955 `members' is optional and must be a subset of the list returned
959 members
= self
.namelist()
961 for zipinfo
in members
:
962 self
.extract(zipinfo
, path
, pwd
)
964 def _extract_member(self
, member
, targetpath
, pwd
):
965 """Extract the ZipInfo object 'member' to a physical
966 file on the path targetpath.
968 # build the destination pathname, replacing
969 # forward slashes to platform specific separators.
970 # Strip trailing path separator, unless it represents the root.
971 if (targetpath
[-1:] in (os
.path
.sep
, os
.path
.altsep
)
972 and len(os
.path
.splitdrive(targetpath
)[1]) > 1):
973 targetpath
= targetpath
[:-1]
975 # don't include leading "/" from file name if present
976 if member
.filename
[0] == '/':
977 targetpath
= os
.path
.join(targetpath
, member
.filename
[1:])
979 targetpath
= os
.path
.join(targetpath
, member
.filename
)
981 targetpath
= os
.path
.normpath(targetpath
)
983 # Create all upper directories if necessary.
984 upperdirs
= os
.path
.dirname(targetpath
)
985 if upperdirs
and not os
.path
.exists(upperdirs
):
986 os
.makedirs(upperdirs
)
988 if member
.filename
[-1] == '/':
989 if not os
.path
.isdir(targetpath
):
993 source
= self
.open(member
, pwd
=pwd
)
994 target
= file(targetpath
, "wb")
995 shutil
.copyfileobj(source
, target
)
1001 def _writecheck(self
, zinfo
):
1002 """Check for errors before writing a file to the archive."""
1003 if zinfo
.filename
in self
.NameToInfo
:
1004 if self
.debug
: # Warning for duplicate names
1005 print "Duplicate name:", zinfo
.filename
1006 if self
.mode
not in ("w", "a"):
1007 raise RuntimeError, 'write() requires mode "w" or "a"'
1009 raise RuntimeError, \
1010 "Attempt to write ZIP archive that was already closed"
1011 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
1012 raise RuntimeError, \
1013 "Compression requires the (missing) zlib module"
1014 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
1015 raise RuntimeError, \
1016 "That compression method is not supported"
1017 if zinfo
.file_size
> ZIP64_LIMIT
:
1018 if not self
._allowZip
64:
1019 raise LargeZipFile("Filesize would require ZIP64 extensions")
1020 if zinfo
.header_offset
> ZIP64_LIMIT
:
1021 if not self
._allowZip
64:
1022 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1024 def write(self
, filename
, arcname
=None, compress_type
=None):
1025 """Put the bytes from filename into the archive under the name
1029 "Attempt to write to ZIP archive that was already closed")
1031 st
= os
.stat(filename
)
1032 isdir
= stat
.S_ISDIR(st
.st_mode
)
1033 mtime
= time
.localtime(st
.st_mtime
)
1034 date_time
= mtime
[0:6]
1035 # Create ZipInfo instance to store file information
1038 arcname
= os
.path
.normpath(os
.path
.splitdrive(arcname
)[1])
1039 while arcname
[0] in (os
.sep
, os
.altsep
):
1040 arcname
= arcname
[1:]
1043 zinfo
= ZipInfo(arcname
, date_time
)
1044 zinfo
.external_attr
= (st
[0] & 0xFFFF) << 16L # Unix attributes
1045 if compress_type
is None:
1046 zinfo
.compress_type
= self
.compression
1048 zinfo
.compress_type
= compress_type
1050 zinfo
.file_size
= st
.st_size
1051 zinfo
.flag_bits
= 0x00
1052 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1054 self
._writecheck
(zinfo
)
1055 self
._didModify
= True
1059 zinfo
.compress_size
= 0
1061 self
.filelist
.append(zinfo
)
1062 self
.NameToInfo
[zinfo
.filename
] = zinfo
1063 self
.fp
.write(zinfo
.FileHeader())
1066 with
open(filename
, "rb") as fp
:
1067 # Must overwrite CRC and sizes with correct data later
1069 zinfo
.compress_size
= compress_size
= 0
1070 zinfo
.file_size
= file_size
= 0
1071 self
.fp
.write(zinfo
.FileHeader())
1072 if zinfo
.compress_type
== ZIP_DEFLATED
:
1073 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
1078 buf
= fp
.read(1024 * 8)
1081 file_size
= file_size
+ len(buf
)
1082 CRC
= crc32(buf
, CRC
) & 0xffffffff
1084 buf
= cmpr
.compress(buf
)
1085 compress_size
= compress_size
+ len(buf
)
1089 compress_size
= compress_size
+ len(buf
)
1091 zinfo
.compress_size
= compress_size
1093 zinfo
.compress_size
= file_size
1095 zinfo
.file_size
= file_size
1096 # Seek backwards and write CRC and file sizes
1097 position
= self
.fp
.tell() # Preserve current position in file
1098 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
1099 self
.fp
.write(struct
.pack("<LLL", zinfo
.CRC
, zinfo
.compress_size
,
1101 self
.fp
.seek(position
, 0)
1102 self
.filelist
.append(zinfo
)
1103 self
.NameToInfo
[zinfo
.filename
] = zinfo
1105 def writestr(self
, zinfo_or_arcname
, bytes
, compress_type
=None):
1106 """Write a file into the archive. The contents is the string
1107 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1108 the name of the file in the archive."""
1109 if not isinstance(zinfo_or_arcname
, ZipInfo
):
1110 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
1111 date_time
=time
.localtime(time
.time())[:6])
1113 zinfo
.compress_type
= self
.compression
1114 zinfo
.external_attr
= 0600 << 16
1116 zinfo
= zinfo_or_arcname
1120 "Attempt to write to ZIP archive that was already closed")
1122 if compress_type
is not None:
1123 zinfo
.compress_type
= compress_type
1125 zinfo
.file_size
= len(bytes
) # Uncompressed size
1126 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1127 self
._writecheck
(zinfo
)
1128 self
._didModify
= True
1129 zinfo
.CRC
= crc32(bytes
) & 0xffffffff # CRC-32 checksum
1130 if zinfo
.compress_type
== ZIP_DEFLATED
:
1131 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
1133 bytes
= co
.compress(bytes
) + co
.flush()
1134 zinfo
.compress_size
= len(bytes
) # Compressed size
1136 zinfo
.compress_size
= zinfo
.file_size
1137 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1138 self
.fp
.write(zinfo
.FileHeader())
1139 self
.fp
.write(bytes
)
1141 if zinfo
.flag_bits
& 0x08:
1142 # Write CRC and file sizes after the file data
1143 self
.fp
.write(struct
.pack("<LLL", zinfo
.CRC
, zinfo
.compress_size
,
1145 self
.filelist
.append(zinfo
)
1146 self
.NameToInfo
[zinfo
.filename
] = zinfo
1149 """Call the "close()" method in case the user forgot."""
1153 """Close the file, and for mode "w" and "a" write the ending
1158 if self
.mode
in ("w", "a") and self
._didModify
: # write ending records
1160 pos1
= self
.fp
.tell()
1161 for zinfo
in self
.filelist
: # write central directory
1163 dt
= zinfo
.date_time
1164 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
1165 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
1167 if zinfo
.file_size
> ZIP64_LIMIT \
1168 or zinfo
.compress_size
> ZIP64_LIMIT
:
1169 extra
.append(zinfo
.file_size
)
1170 extra
.append(zinfo
.compress_size
)
1171 file_size
= 0xffffffff
1172 compress_size
= 0xffffffff
1174 file_size
= zinfo
.file_size
1175 compress_size
= zinfo
.compress_size
1177 if zinfo
.header_offset
> ZIP64_LIMIT
:
1178 extra
.append(zinfo
.header_offset
)
1179 header_offset
= 0xffffffffL
1181 header_offset
= zinfo
.header_offset
1183 extra_data
= zinfo
.extra
1185 # Append a ZIP64 field to the extra's
1186 extra_data
= struct
.pack(
1187 '<HH' + 'Q'*len(extra
),
1188 1, 8*len(extra
), *extra
) + extra_data
1190 extract_version
= max(45, zinfo
.extract_version
)
1191 create_version
= max(45, zinfo
.create_version
)
1193 extract_version
= zinfo
.extract_version
1194 create_version
= zinfo
.create_version
1197 filename
, flag_bits
= zinfo
._encodeFilenameFlags
()
1198 centdir
= struct
.pack(structCentralDir
,
1199 stringCentralDir
, create_version
,
1200 zinfo
.create_system
, extract_version
, zinfo
.reserved
,
1201 flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
1202 zinfo
.CRC
, compress_size
, file_size
,
1203 len(filename
), len(extra_data
), len(zinfo
.comment
),
1204 0, zinfo
.internal_attr
, zinfo
.external_attr
,
1206 except DeprecationWarning:
1207 print >>sys
.stderr
, (structCentralDir
,
1208 stringCentralDir
, create_version
,
1209 zinfo
.create_system
, extract_version
, zinfo
.reserved
,
1210 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
1211 zinfo
.CRC
, compress_size
, file_size
,
1212 len(zinfo
.filename
), len(extra_data
), len(zinfo
.comment
),
1213 0, zinfo
.internal_attr
, zinfo
.external_attr
,
1216 self
.fp
.write(centdir
)
1217 self
.fp
.write(filename
)
1218 self
.fp
.write(extra_data
)
1219 self
.fp
.write(zinfo
.comment
)
1221 pos2
= self
.fp
.tell()
1222 # Write end-of-zip-archive record
1223 centDirCount
= count
1224 centDirSize
= pos2
- pos1
1225 centDirOffset
= pos1
1226 if (centDirCount
>= ZIP_FILECOUNT_LIMIT
or
1227 centDirOffset
> ZIP64_LIMIT
or
1228 centDirSize
> ZIP64_LIMIT
):
1229 # Need to write the ZIP64 end-of-archive records
1230 zip64endrec
= struct
.pack(
1231 structEndArchive64
, stringEndArchive64
,
1232 44, 45, 45, 0, 0, centDirCount
, centDirCount
,
1233 centDirSize
, centDirOffset
)
1234 self
.fp
.write(zip64endrec
)
1236 zip64locrec
= struct
.pack(
1237 structEndArchive64Locator
,
1238 stringEndArchive64Locator
, 0, pos2
, 1)
1239 self
.fp
.write(zip64locrec
)
1240 centDirCount
= min(centDirCount
, 0xFFFF)
1241 centDirSize
= min(centDirSize
, 0xFFFFFFFF)
1242 centDirOffset
= min(centDirOffset
, 0xFFFFFFFF)
1244 # check for valid comment length
1245 if len(self
.comment
) >= ZIP_MAX_COMMENT
:
1247 msg
= 'Archive comment is too long; truncating to %d bytes' \
1249 self
.comment
= self
.comment
[:ZIP_MAX_COMMENT
]
1251 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
1252 0, 0, centDirCount
, centDirCount
,
1253 centDirSize
, centDirOffset
, len(self
.comment
))
1254 self
.fp
.write(endrec
)
1255 self
.fp
.write(self
.comment
)
1258 if not self
._filePassed
:
1263 class PyZipFile(ZipFile
):
1264 """Class to create ZIP archives with Python library files and packages."""
1266 def writepy(self
, pathname
, basename
= ""):
1267 """Add all files from "pathname" to the ZIP archive.
1269 If pathname is a package directory, search the directory and
1270 all package subdirectories recursively for all *.py and enter
1271 the modules into the archive. If pathname is a plain
1272 directory, listdir *.py and enter all modules. Else, pathname
1273 must be a Python *.py file and the module will be put into the
1274 archive. Added modules are always module.pyo or module.pyc.
1275 This method will compile the module.py into module.pyc if
1278 dir, name
= os
.path
.split(pathname
)
1279 if os
.path
.isdir(pathname
):
1280 initname
= os
.path
.join(pathname
, "__init__.py")
1281 if os
.path
.isfile(initname
):
1282 # This is a package directory, add it
1284 basename
= "%s/%s" % (basename
, name
)
1288 print "Adding package in", pathname
, "as", basename
1289 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
1291 print "Adding", arcname
1292 self
.write(fname
, arcname
)
1293 dirlist
= os
.listdir(pathname
)
1294 dirlist
.remove("__init__.py")
1295 # Add all *.py files and package subdirectories
1296 for filename
in dirlist
:
1297 path
= os
.path
.join(pathname
, filename
)
1298 root
, ext
= os
.path
.splitext(filename
)
1299 if os
.path
.isdir(path
):
1300 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
1301 # This is a package directory, add it
1302 self
.writepy(path
, basename
) # Recursive call
1304 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1307 print "Adding", arcname
1308 self
.write(fname
, arcname
)
1310 # This is NOT a package directory, add its files at top level
1312 print "Adding files from directory", pathname
1313 for filename
in os
.listdir(pathname
):
1314 path
= os
.path
.join(pathname
, filename
)
1315 root
, ext
= os
.path
.splitext(filename
)
1317 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1320 print "Adding", arcname
1321 self
.write(fname
, arcname
)
1323 if pathname
[-3:] != ".py":
1324 raise RuntimeError, \
1325 'Files added with writepy() must end with ".py"'
1326 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
1328 print "Adding file", arcname
1329 self
.write(fname
, arcname
)
1331 def _get_codename(self
, pathname
, basename
):
1332 """Return (filename, archivename) for the path.
1334 Given a module name path, return the correct file path and
1335 archive name, compiling if necessary. For example, given
1336 /python/lib/string, return (/python/lib/string.pyc, string).
1338 file_py
= pathname
+ ".py"
1339 file_pyc
= pathname
+ ".pyc"
1340 file_pyo
= pathname
+ ".pyo"
1341 if os
.path
.isfile(file_pyo
) and \
1342 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
1343 fname
= file_pyo
# Use .pyo file
1344 elif not os
.path
.isfile(file_pyc
) or \
1345 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
1348 print "Compiling", file_py
1350 py_compile
.compile(file_py
, file_pyc
, None, True)
1351 except py_compile
.PyCompileError
,err
:
1356 archivename
= os
.path
.split(fname
)[1]
1358 archivename
= "%s/%s" % (basename
, archivename
)
1359 return (fname
, archivename
)
1362 def main(args
= None):
1364 USAGE
=textwrap
.dedent("""\
1366 zipfile.py -l zipfile.zip # Show listing of a zipfile
1367 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1368 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1369 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1374 if not args
or args
[0] not in ('-l', '-c', '-e', '-t'):
1382 zf
= ZipFile(args
[1], 'r')
1386 elif args
[0] == '-t':
1390 zf
= ZipFile(args
[1], 'r')
1391 badfile
= zf
.testzip()
1393 print("The following enclosed file is corrupted: {!r}".format(badfile
))
1394 print "Done testing"
1396 elif args
[0] == '-e':
1401 zf
= ZipFile(args
[1], 'r')
1403 for path
in zf
.namelist():
1404 if path
.startswith('./'):
1405 tgt
= os
.path
.join(out
, path
[2:])
1407 tgt
= os
.path
.join(out
, path
)
1409 tgtdir
= os
.path
.dirname(tgt
)
1410 if not os
.path
.exists(tgtdir
):
1412 with
open(tgt
, 'wb') as fp
:
1413 fp
.write(zf
.read(path
))
1416 elif args
[0] == '-c':
1421 def addToZip(zf
, path
, zippath
):
1422 if os
.path
.isfile(path
):
1423 zf
.write(path
, zippath
, ZIP_DEFLATED
)
1424 elif os
.path
.isdir(path
):
1425 for nm
in os
.listdir(path
):
1427 os
.path
.join(path
, nm
), os
.path
.join(zippath
, nm
))
1430 zf
= ZipFile(args
[1], 'w', allowZip64
=True)
1431 for src
in args
[2:]:
1432 addToZip(zf
, src
, os
.path
.basename(src
))
1436 if __name__
== "__main__":