AppPkg/Applications/Python/Python-2.7.10/Lib/zipfile.py

   1 """
   2 Read and write ZIP files.
   3 """
   4 import struct, os, time, sys, shutil
   5 import binascii, cStringIO, stat
   6 import io
   7 import re
   8 import string
   9
  10 try:
  11     import zlib # We may need its compression method
  12     crc32 = zlib.crc32
  13 except ImportError:
  14     zlib = None
  15     crc32 = binascii.crc32
  16
  17 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  18            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  19
  20 class BadZipfile(Exception):
  21     pass
  22
  23
  24 class LargeZipFile(Exception):
  25     """
  26     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  27     and those extensions are disabled.
  28     """
  29
  30 error = BadZipfile      # The exception raised by this module
  31
  32 ZIP64_LIMIT = (1 << 31) - 1
  33 ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
  34 ZIP_MAX_COMMENT = (1 << 16) - 1
  35
  36 # constants for Zip file compression methods
  37 ZIP_STORED = 0
  38 ZIP_DEFLATED = 8
  39 # Other ZIP compression methods not supported
  40
  41 # Below are some formats and associated data for reading/writing headers using
  42 # the struct module.  The names and structures of headers/records are those used
  43 # in the PKWARE description of the ZIP file format:
  44 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  45 # (URL valid as of January 2008)
  46
  47 # The "end of central directory" structure, magic number, size, and indices
  48 # (section V.I in the format document)
  49 structEndArchive = "<4s4H2LH"
  50 stringEndArchive = "PK\005\006"
  51 sizeEndCentDir = struct.calcsize(structEndArchive)
  52
  53 _ECD_SIGNATURE = 0
  54 _ECD_DISK_NUMBER = 1
  55 _ECD_DISK_START = 2
  56 _ECD_ENTRIES_THIS_DISK = 3
  57 _ECD_ENTRIES_TOTAL = 4
  58 _ECD_SIZE = 5
  59 _ECD_OFFSET = 6
  60 _ECD_COMMENT_SIZE = 7
  61 # These last two indices are not part of the structure as defined in the
  62 # spec, but they are used internally by this module as a convenience
  63 _ECD_COMMENT = 8
  64 _ECD_LOCATION = 9
  65
  66 # The "central directory" structure, magic number, size, and indices
  67 # of entries in the structure (section V.F in the format document)
  68 structCentralDir = "<4s4B4HL2L5H2L"
  69 stringCentralDir = "PK\001\002"
  70 sizeCentralDir = struct.calcsize(structCentralDir)
  71
  72 # indexes of entries in the central directory structure
  73 _CD_SIGNATURE = 0
  74 _CD_CREATE_VERSION = 1
  75 _CD_CREATE_SYSTEM = 2
  76 _CD_EXTRACT_VERSION = 3
  77 _CD_EXTRACT_SYSTEM = 4
  78 _CD_FLAG_BITS = 5
  79 _CD_COMPRESS_TYPE = 6
  80 _CD_TIME = 7
  81 _CD_DATE = 8
  82 _CD_CRC = 9
  83 _CD_COMPRESSED_SIZE = 10
  84 _CD_UNCOMPRESSED_SIZE = 11
  85 _CD_FILENAME_LENGTH = 12
  86 _CD_EXTRA_FIELD_LENGTH = 13
  87 _CD_COMMENT_LENGTH = 14
  88 _CD_DISK_NUMBER_START = 15
  89 _CD_INTERNAL_FILE_ATTRIBUTES = 16
  90 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  91 _CD_LOCAL_HEADER_OFFSET = 18
  92
  93 # The "local file header" structure, magic number, size, and indices
  94 # (section V.A in the format document)
  95 structFileHeader = "<4s2B4HL2L2H"
  96 stringFileHeader = "PK\003\004"
  97 sizeFileHeader = struct.calcsize(structFileHeader)
  98
  99 _FH_SIGNATURE = 0
 100 _FH_EXTRACT_VERSION = 1
 101 _FH_EXTRACT_SYSTEM = 2
 102 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
 103 _FH_COMPRESSION_METHOD = 4
 104 _FH_LAST_MOD_TIME = 5
 105 _FH_LAST_MOD_DATE = 6
 106 _FH_CRC = 7
 107 _FH_COMPRESSED_SIZE = 8
 108 _FH_UNCOMPRESSED_SIZE = 9
 109 _FH_FILENAME_LENGTH = 10
 110 _FH_EXTRA_FIELD_LENGTH = 11
 111
 112 # The "Zip64 end of central directory locator" structure, magic number, and size
 113 structEndArchive64Locator = "<4sLQL"
 114 stringEndArchive64Locator = "PK\x06\x07"
 115 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
 116
 117 # The "Zip64 end of central directory" record, magic number, size, and indices
 118 # (section V.G in the format document)
 119 structEndArchive64 = "<4sQ2H2L4Q"
 120 stringEndArchive64 = "PK\x06\x06"
 121 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
 122
 123 _CD64_SIGNATURE = 0
 124 _CD64_DIRECTORY_RECSIZE = 1
 125 _CD64_CREATE_VERSION = 2
 126 _CD64_EXTRACT_VERSION = 3
 127 _CD64_DISK_NUMBER = 4
 128 _CD64_DISK_NUMBER_START = 5
 129 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
 130 _CD64_NUMBER_ENTRIES_TOTAL = 7
 131 _CD64_DIRECTORY_SIZE = 8
 132 _CD64_OFFSET_START_CENTDIR = 9
 133
 134 def _check_zipfile(fp):
 135     try:
 136         if _EndRecData(fp):
 137             return True         # file has correct magic number
 138     except IOError:
 139         pass
 140     return False
 141
 142 def is_zipfile(filename):
 143     """Quickly see if a file is a ZIP file by checking the magic number.
 144
 145     The filename argument may be a file or file-like object too.
 146     """
 147     result = False
 148     try:
 149         if hasattr(filename, "read"):
 150             result = _check_zipfile(fp=filename)
 151         else:
 152             with open(filename, "rb") as fp:
 153                 result = _check_zipfile(fp)
 154     except IOError:
 155         pass
 156     return result
 157
 158 def _EndRecData64(fpin, offset, endrec):
 159     """
 160     Read the ZIP64 end-of-archive records and use that to update endrec
 161     """
 162     try:
 163         fpin.seek(offset - sizeEndCentDir64Locator, 2)
 164     except IOError:
 165         # If the seek fails, the file is not large enough to contain a ZIP64
 166         # end-of-archive record, so just return the end record we were given.
 167         return endrec
 168
 169     data = fpin.read(sizeEndCentDir64Locator)
 170     if len(data) != sizeEndCentDir64Locator:
 171         return endrec
 172     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 173     if sig != stringEndArchive64Locator:
 174         return endrec
 175
 176     if diskno != 0 or disks != 1:
 177         raise BadZipfile("zipfiles that span multiple disks are not supported")
 178
 179     # Assume no 'zip64 extensible data'
 180     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
 181     data = fpin.read(sizeEndCentDir64)
 182     if len(data) != sizeEndCentDir64:
 183         return endrec
 184     sig, sz, create_version, read_version, disk_num, disk_dir, \
 185             dircount, dircount2, dirsize, diroffset = \
 186             struct.unpack(structEndArchive64, data)
 187     if sig != stringEndArchive64:
 188         return endrec
 189
 190     # Update the original endrec using data from the ZIP64 record
 191     endrec[_ECD_SIGNATURE] = sig
 192     endrec[_ECD_DISK_NUMBER] = disk_num
 193     endrec[_ECD_DISK_START] = disk_dir
 194     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
 195     endrec[_ECD_ENTRIES_TOTAL] = dircount2
 196     endrec[_ECD_SIZE] = dirsize
 197     endrec[_ECD_OFFSET] = diroffset
 198     return endrec
 199
 200
 201 def _EndRecData(fpin):
 202     """Return data from the "End of Central Directory" record, or None.
 203
 204     The data is a list of the nine items in the ZIP "End of central dir"
 205     record followed by a tenth item, the file seek offset of this record."""
 206
 207     # Determine file size
 208     fpin.seek(0, 2)
 209     filesize = fpin.tell()
 210
 211     # Check to see if this is ZIP file with no archive comment (the
 212     # "end of central directory" structure should be the last item in the
 213     # file if this is the case).
 214     try:
 215         fpin.seek(-sizeEndCentDir, 2)
 216     except IOError:
 217         return None
 218     data = fpin.read()
 219     if (len(data) == sizeEndCentDir and
 220         data[0:4] == stringEndArchive and
 221         data[-2:] == b"\000\000"):
 222         # the signature is correct and there's no comment, unpack structure
 223         endrec = struct.unpack(structEndArchive, data)
 224         endrec=list(endrec)
 225
 226         # Append a blank comment and record start offset
 227         endrec.append("")
 228         endrec.append(filesize - sizeEndCentDir)
 229
 230         # Try to read the "Zip64 end of central directory" structure
 231         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
 232
 233     # Either this is not a ZIP file, or it is a ZIP file with an archive
 234     # comment.  Search the end of the file for the "end of central directory"
 235     # record signature. The comment is the last item in the ZIP file and may be
 236     # up to 64K long.  It is assumed that the "end of central directory" magic
 237     # number does not appear in the comment.
 238     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
 239     fpin.seek(maxCommentStart, 0)
 240     data = fpin.read()
 241     start = data.rfind(stringEndArchive)
 242     if start >= 0:
 243         # found the magic number; attempt to unpack and interpret
 244         recData = data[start:start+sizeEndCentDir]
 245         if len(recData) != sizeEndCentDir:
 246             # Zip file is corrupted.
 247             return None
 248         endrec = list(struct.unpack(structEndArchive, recData))
 249         commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
 250         comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
 251         endrec.append(comment)
 252         endrec.append(maxCommentStart + start)
 253
 254         # Try to read the "Zip64 end of central directory" structure
 255         return _EndRecData64(fpin, maxCommentStart + start - filesize,
 256                              endrec)
 257
 258     # Unable to find a valid end of central directory structure
 259     return None
 260
 261
 262 class ZipInfo (object):
 263     """Class with attributes describing each file in the ZIP archive."""
 264
 265     __slots__ = (
 266             'orig_filename',
 267             'filename',
 268             'date_time',
 269             'compress_type',
 270             'comment',
 271             'extra',
 272             'create_system',
 273             'create_version',
 274             'extract_version',
 275             'reserved',
 276             'flag_bits',
 277             'volume',
 278             'internal_attr',
 279             'external_attr',
 280             'header_offset',
 281             'CRC',
 282             'compress_size',
 283             'file_size',
 284             '_raw_time',
 285         )
 286
 287     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 288         self.orig_filename = filename   # Original file name in archive
 289
 290         # Terminate the file name at the first null byte.  Null bytes in file
 291         # names are used as tricks by viruses in archives.
 292         null_byte = filename.find(chr(0))
 293         if null_byte >= 0:
 294             filename = filename[0:null_byte]
 295         # This is used to ensure paths in generated ZIP files always use
 296         # forward slashes as the directory separator, as required by the
 297         # ZIP format specification.
 298         if os.sep != "/" and os.sep in filename:
 299             filename = filename.replace(os.sep, "/")
 300
 301         self.filename = filename        # Normalized file name
 302         self.date_time = date_time      # year, month, day, hour, min, sec
 303
 304         if date_time[0] < 1980:
 305             raise ValueError('ZIP does not support timestamps before 1980')
 306
 307         # Standard values:
 308         self.compress_type = ZIP_STORED # Type of compression for the file
 309         self.comment = ""               # Comment for each file
 310         self.extra = ""                 # ZIP extra data
 311         if sys.platform == 'win32':
 312             self.create_system = 0          # System which created ZIP archive
 313         else:
 314             # Assume everything else is unix-y
 315             self.create_system = 3          # System which created ZIP archive
 316         self.create_version = 20        # Version which created ZIP archive
 317         self.extract_version = 20       # Version needed to extract archive
 318         self.reserved = 0               # Must be zero
 319         self.flag_bits = 0              # ZIP flag bits
 320         self.volume = 0                 # Volume number of file header
 321         self.internal_attr = 0          # Internal attributes
 322         self.external_attr = 0          # External file attributes
 323         # Other attributes are set by class ZipFile:
 324         # header_offset         Byte offset to the file header
 325         # CRC                   CRC-32 of the uncompressed file
 326         # compress_size         Size of the compressed file
 327         # file_size             Size of the uncompressed file
 328
 329     def FileHeader(self, zip64=None):
 330         """Return the per-file header as a string."""
 331         dt = self.date_time
 332         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 333         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 334         if self.flag_bits & 0x08:
 335             # Set these to zero because we write them after the file data
 336             CRC = compress_size = file_size = 0
 337         else:
 338             CRC = self.CRC
 339             compress_size = self.compress_size
 340             file_size = self.file_size
 341
 342         extra = self.extra
 343
 344         if zip64 is None:
 345             zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
 346         if zip64:
 347             fmt = '<HHQQ'
 348             extra = extra + struct.pack(fmt,
 349                     1, struct.calcsize(fmt)-4, file_size, compress_size)
 350         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 351             if not zip64:
 352                 raise LargeZipFile("Filesize would require ZIP64 extensions")
 353             # File is larger than what fits into a 4 byte integer,
 354             # fall back to the ZIP64 extension
 355             file_size = 0xffffffff
 356             compress_size = 0xffffffff
 357             self.extract_version = max(45, self.extract_version)
 358             self.create_version = max(45, self.extract_version)
 359
 360         filename, flag_bits = self._encodeFilenameFlags()
 361         header = struct.pack(structFileHeader, stringFileHeader,
 362                  self.extract_version, self.reserved, flag_bits,
 363                  self.compress_type, dostime, dosdate, CRC,
 364                  compress_size, file_size,
 365                  len(filename), len(extra))
 366         return header + filename + extra
 367
 368     def _encodeFilenameFlags(self):
 369         if isinstance(self.filename, unicode):
 370             try:
 371                 return self.filename.encode('ascii'), self.flag_bits
 372             except UnicodeEncodeError:
 373                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
 374         else:
 375             return self.filename, self.flag_bits
 376
 377     def _decodeFilename(self):
 378         if self.flag_bits & 0x800:
 379             return self.filename.decode('utf-8')
 380         else:
 381             return self.filename
 382
 383     def _decodeExtra(self):
 384         # Try to decode the extra field.
 385         extra = self.extra
 386         unpack = struct.unpack
 387         while len(extra) >= 4:
 388             tp, ln = unpack('<HH', extra[:4])
 389             if tp == 1:
 390                 if ln >= 24:
 391                     counts = unpack('<QQQ', extra[4:28])
 392                 elif ln == 16:
 393                     counts = unpack('<QQ', extra[4:20])
 394                 elif ln == 8:
 395                     counts = unpack('<Q', extra[4:12])
 396                 elif ln == 0:
 397                     counts = ()
 398                 else:
 399                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
 400
 401                 idx = 0
 402
 403                 # ZIP64 extension (large files and/or large archives)
 404                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
 405                     self.file_size = counts[idx]
 406                     idx += 1
 407
 408                 if self.compress_size == 0xFFFFFFFFL:
 409                     self.compress_size = counts[idx]
 410                     idx += 1
 411
 412                 if self.header_offset == 0xffffffffL:
 413                     old = self.header_offset
 414                     self.header_offset = counts[idx]
 415                     idx+=1
 416
 417             extra = extra[ln+4:]
 418
 419
 420 class _ZipDecrypter:
 421     """Class to handle decryption of files stored within a ZIP archive.
 422
 423     ZIP supports a password-based form of encryption. Even though known
 424     plaintext attacks have been found against it, it is still useful
 425     to be able to get data out of such a file.
 426
 427     Usage:
 428         zd = _ZipDecrypter(mypwd)
 429         plain_char = zd(cypher_char)
 430         plain_text = map(zd, cypher_text)
 431     """
 432
 433     def _GenerateCRCTable():
 434         """Generate a CRC-32 table.
 435
 436         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 437         internal keys. We noticed that a direct implementation is faster than
 438         relying on binascii.crc32().
 439         """
 440         poly = 0xedb88320
 441         table = [0] * 256
 442         for i in range(256):
 443             crc = i
 444             for j in range(8):
 445                 if crc & 1:
 446                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
 447                 else:
 448                     crc = ((crc >> 1) & 0x7FFFFFFF)
 449             table[i] = crc
 450         return table
 451     crctable = _GenerateCRCTable()
 452
 453     def _crc32(self, ch, crc):
 454         """Compute the CRC32 primitive on one byte."""
 455         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
 456
 457     def __init__(self, pwd):
 458         self.key0 = 305419896
 459         self.key1 = 591751049
 460         self.key2 = 878082192
 461         for p in pwd:
 462             self._UpdateKeys(p)
 463
 464     def _UpdateKeys(self, c):
 465         self.key0 = self._crc32(c, self.key0)
 466         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
 467         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
 468         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
 469
 470     def __call__(self, c):
 471         """Decrypt a single character."""
 472         c = ord(c)
 473         k = self.key2 | 2
 474         c = c ^ (((k * (k^1)) >> 8) & 255)
 475         c = chr(c)
 476         self._UpdateKeys(c)
 477         return c
 478
 479
 480 compressor_names = {
 481     0: 'store',
 482     1: 'shrink',
 483     2: 'reduce',
 484     3: 'reduce',
 485     4: 'reduce',
 486     5: 'reduce',
 487     6: 'implode',
 488     7: 'tokenize',
 489     8: 'deflate',
 490     9: 'deflate64',
 491     10: 'implode',
 492     12: 'bzip2',
 493     14: 'lzma',
 494     18: 'terse',
 495     19: 'lz77',
 496     97: 'wavpack',
 497     98: 'ppmd',
 498 }
 499
 500
 501 class ZipExtFile(io.BufferedIOBase):
 502     """File-like object for reading an archive member.
 503        Is returned by ZipFile.open().
 504     """
 505
 506     # Max size supported by decompressor.
 507     MAX_N = 1 << 31 - 1
 508
 509     # Read from compressed files in 4k blocks.
 510     MIN_READ_SIZE = 4096
 511
 512     # Search for universal newlines or line chunks.
 513     PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
 514
 515     def __init__(self, fileobj, mode, zipinfo, decrypter=None,
 516             close_fileobj=False):
 517         self._fileobj = fileobj
 518         self._decrypter = decrypter
 519         self._close_fileobj = close_fileobj
 520
 521         self._compress_type = zipinfo.compress_type
 522         self._compress_size = zipinfo.compress_size
 523         self._compress_left = zipinfo.compress_size
 524
 525         if self._compress_type == ZIP_DEFLATED:
 526             self._decompressor = zlib.decompressobj(-15)
 527         elif self._compress_type != ZIP_STORED:
 528             descr = compressor_names.get(self._compress_type)
 529             if descr:
 530                 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
 531             else:
 532                 raise NotImplementedError("compression type %d" % (self._compress_type,))
 533         self._unconsumed = ''
 534
 535         self._readbuffer = ''
 536         self._offset = 0
 537
 538         self._universal = 'U' in mode
 539         self.newlines = None
 540
 541         # Adjust read size for encrypted files since the first 12 bytes
 542         # are for the encryption/password information.
 543         if self._decrypter is not None:
 544             self._compress_left -= 12
 545
 546         self.mode = mode
 547         self.name = zipinfo.filename
 548
 549         if hasattr(zipinfo, 'CRC'):
 550             self._expected_crc = zipinfo.CRC
 551             self._running_crc = crc32(b'') & 0xffffffff
 552         else:
 553             self._expected_crc = None
 554
 555     def readline(self, limit=-1):
 556         """Read and return a line from the stream.
 557
 558         If limit is specified, at most limit bytes will be read.
 559         """
 560
 561         if not self._universal and limit < 0:
 562             # Shortcut common case - newline found in buffer.
 563             i = self._readbuffer.find('\n', self._offset) + 1
 564             if i > 0:
 565                 line = self._readbuffer[self._offset: i]
 566                 self._offset = i
 567                 return line
 568
 569         if not self._universal:
 570             return io.BufferedIOBase.readline(self, limit)
 571
 572         line = ''
 573         while limit < 0 or len(line) < limit:
 574             readahead = self.peek(2)
 575             if readahead == '':
 576                 return line
 577
 578             #
 579             # Search for universal newlines or line chunks.
 580             #
 581             # The pattern returns either a line chunk or a newline, but not
 582             # both. Combined with peek(2), we are assured that the sequence
 583             # '\r\n' is always retrieved completely and never split into
 584             # separate newlines - '\r', '\n' due to coincidental readaheads.
 585             #
 586             match = self.PATTERN.search(readahead)
 587             newline = match.group('newline')
 588             if newline is not None:
 589                 if self.newlines is None:
 590                     self.newlines = []
 591                 if newline not in self.newlines:
 592                     self.newlines.append(newline)
 593                 self._offset += len(newline)
 594                 return line + '\n'
 595
 596             chunk = match.group('chunk')
 597             if limit >= 0:
 598                 chunk = chunk[: limit - len(line)]
 599
 600             self._offset += len(chunk)
 601             line += chunk
 602
 603         return line
 604
 605     def peek(self, n=1):
 606         """Returns buffered bytes without advancing the position."""
 607         if n > len(self._readbuffer) - self._offset:
 608             chunk = self.read(n)
 609             if len(chunk) > self._offset:
 610                 self._readbuffer = chunk + self._readbuffer[self._offset:]
 611                 self._offset = 0
 612             else:
 613                 self._offset -= len(chunk)
 614
 615         # Return up to 512 bytes to reduce allocation overhead for tight loops.
 616         return self._readbuffer[self._offset: self._offset + 512]
 617
 618     def readable(self):
 619         return True
 620
 621     def read(self, n=-1):
 622         """Read and return up to n bytes.
 623         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
 624         """
 625         buf = ''
 626         if n is None:
 627             n = -1
 628         while True:
 629             if n < 0:
 630                 data = self.read1(n)
 631             elif n > len(buf):
 632                 data = self.read1(n - len(buf))
 633             else:
 634                 return buf
 635             if len(data) == 0:
 636                 return buf
 637             buf += data
 638
 639     def _update_crc(self, newdata, eof):
 640         # Update the CRC using the given data.
 641         if self._expected_crc is None:
 642             # No need to compute the CRC if we don't have a reference value
 643             return
 644         self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
 645         # Check the CRC if we're at the end of the file
 646         if eof and self._running_crc != self._expected_crc:
 647             raise BadZipfile("Bad CRC-32 for file %r" % self.name)
 648
 649     def read1(self, n):
 650         """Read up to n bytes with at most one read() system call."""
 651
 652         # Simplify algorithm (branching) by transforming negative n to large n.
 653         if n < 0 or n is None:
 654             n = self.MAX_N
 655
 656         # Bytes available in read buffer.
 657         len_readbuffer = len(self._readbuffer) - self._offset
 658
 659         # Read from file.
 660         if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
 661             nbytes = n - len_readbuffer - len(self._unconsumed)
 662             nbytes = max(nbytes, self.MIN_READ_SIZE)
 663             nbytes = min(nbytes, self._compress_left)
 664
 665             data = self._fileobj.read(nbytes)
 666             self._compress_left -= len(data)
 667
 668             if data and self._decrypter is not None:
 669                 data = ''.join(map(self._decrypter, data))
 670
 671             if self._compress_type == ZIP_STORED:
 672                 self._update_crc(data, eof=(self._compress_left==0))
 673                 self._readbuffer = self._readbuffer[self._offset:] + data
 674                 self._offset = 0
 675             else:
 676                 # Prepare deflated bytes for decompression.
 677                 self._unconsumed += data
 678
 679         # Handle unconsumed data.
 680         if (len(self._unconsumed) > 0 and n > len_readbuffer and
 681             self._compress_type == ZIP_DEFLATED):
 682             data = self._decompressor.decompress(
 683                 self._unconsumed,
 684                 max(n - len_readbuffer, self.MIN_READ_SIZE)
 685             )
 686
 687             self._unconsumed = self._decompressor.unconsumed_tail
 688             eof = len(self._unconsumed) == 0 and self._compress_left == 0
 689             if eof:
 690                 data += self._decompressor.flush()
 691
 692             self._update_crc(data, eof=eof)
 693             self._readbuffer = self._readbuffer[self._offset:] + data
 694             self._offset = 0
 695
 696         # Read from buffer.
 697         data = self._readbuffer[self._offset: self._offset + n]
 698         self._offset += len(data)
 699         return data
 700
 701     def close(self):
 702         try :
 703             if self._close_fileobj:
 704                 self._fileobj.close()
 705         finally:
 706             super(ZipExtFile, self).close()
 707
 708
 709 class ZipFile(object):
 710     """ Class with methods to open, read, write, close, list zip files.
 711
 712     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
 713
 714     file: Either the path to the file, or a file-like object.
 715           If it is a path, the file will be opened and closed by ZipFile.
 716     mode: The mode can be either read "r", write "w" or append "a".
 717     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
 718     allowZip64: if True ZipFile will create files with ZIP64 extensions when
 719                 needed, otherwise it will raise an exception when this would
 720                 be necessary.
 721
 722     """
 723
 724     fp = None                   # Set here since __del__ checks it
 725
 726     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
 727         """Open the ZIP file with mode read "r", write "w" or append "a"."""
 728         if mode not in ("r", "w", "a"):
 729             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
 730
 731         if compression == ZIP_STORED:
 732             pass
 733         elif compression == ZIP_DEFLATED:
 734             if not zlib:
 735                 raise RuntimeError,\
 736                       "Compression requires the (missing) zlib module"
 737         else:
 738             raise RuntimeError, "That compression method is not supported"
 739
 740         self._allowZip64 = allowZip64
 741         self._didModify = False
 742         self.debug = 0  # Level of printing: 0 through 3
 743         self.NameToInfo = {}    # Find file info given name
 744         self.filelist = []      # List of ZipInfo instances for archive
 745         self.compression = compression  # Method of compression
 746         self.mode = key = mode.replace('b', '')[0]
 747         self.pwd = None
 748         self._comment = ''
 749
 750         # Check if we were passed a file-like object
 751         if isinstance(file, basestring):
 752             self._filePassed = 0
 753             self.filename = file
 754             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
 755             try:
 756                 self.fp = open(file, modeDict[mode])
 757             except IOError:
 758                 if mode == 'a':
 759                     mode = key = 'w'
 760                     self.fp = open(file, modeDict[mode])
 761                 else:
 762                     raise
 763         else:
 764             self._filePassed = 1
 765             self.fp = file
 766             self.filename = getattr(file, 'name', None)
 767
 768         try:
 769             if key == 'r':
 770                 self._RealGetContents()
 771             elif key == 'w':
 772                 # set the modified flag so central directory gets written
 773                 # even if no files are added to the archive
 774                 self._didModify = True
 775             elif key == 'a':
 776                 try:
 777                     # See if file is a zip file
 778                     self._RealGetContents()
 779                     # seek to start of directory and overwrite
 780                     self.fp.seek(self.start_dir, 0)
 781                 except BadZipfile:
 782                     # file is not a zip file, just append
 783                     self.fp.seek(0, 2)
 784
 785                     # set the modified flag so central directory gets written
 786                     # even if no files are added to the archive
 787                     self._didModify = True
 788             else:
 789                 raise RuntimeError('Mode must be "r", "w" or "a"')
 790         except:
 791             fp = self.fp
 792             self.fp = None
 793             if not self._filePassed:
 794                 fp.close()
 795             raise
 796
 797     def __enter__(self):
 798         return self
 799
 800     def __exit__(self, type, value, traceback):
 801         self.close()
 802
 803     def _RealGetContents(self):
 804         """Read in the table of contents for the ZIP file."""
 805         fp = self.fp
 806         try:
 807             endrec = _EndRecData(fp)
 808         except IOError:
 809             raise BadZipfile("File is not a zip file")
 810         if not endrec:
 811             raise BadZipfile, "File is not a zip file"
 812         if self.debug > 1:
 813             print endrec
 814         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
 815         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
 816         self._comment = endrec[_ECD_COMMENT]    # archive comment
 817
 818         # "concat" is zero, unless zip was concatenated to another file
 819         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
 820         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
 821             # If Zip64 extension structures are present, account for them
 822             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 823
 824         if self.debug > 2:
 825             inferred = concat + offset_cd
 826             print "given, inferred, offset", offset_cd, inferred, concat
 827         # self.start_dir:  Position of start of central directory
 828         self.start_dir = offset_cd + concat
 829         fp.seek(self.start_dir, 0)
 830         data = fp.read(size_cd)
 831         fp = cStringIO.StringIO(data)
 832         total = 0
 833         while total < size_cd:
 834             centdir = fp.read(sizeCentralDir)
 835             if len(centdir) != sizeCentralDir:
 836                 raise BadZipfile("Truncated central directory")
 837             centdir = struct.unpack(structCentralDir, centdir)
 838             if centdir[_CD_SIGNATURE] != stringCentralDir:
 839                 raise BadZipfile("Bad magic number for central directory")
 840             if self.debug > 2:
 841                 print centdir
 842             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
 843             # Create ZipInfo instance to store file information
 844             x = ZipInfo(filename)
 845             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
 846             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
 847             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
 848             (x.create_version, x.create_system, x.extract_version, x.reserved,
 849                 x.flag_bits, x.compress_type, t, d,
 850                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
 851             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
 852             # Convert date/time code to (year, month, day, hour, min, sec)
 853             x._raw_time = t
 854             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
 855                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 856
 857             x._decodeExtra()
 858             x.header_offset = x.header_offset + concat
 859             x.filename = x._decodeFilename()
 860             self.filelist.append(x)
 861             self.NameToInfo[x.filename] = x
 862
 863             # update total bytes read from central directory
 864             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
 865                      + centdir[_CD_EXTRA_FIELD_LENGTH]
 866                      + centdir[_CD_COMMENT_LENGTH])
 867
 868             if self.debug > 2:
 869                 print "total", total
 870
 871
 872     def namelist(self):
 873         """Return a list of file names in the archive."""
 874         l = []
 875         for data in self.filelist:
 876             l.append(data.filename)
 877         return l
 878
 879     def infolist(self):
 880         """Return a list of class ZipInfo instances for files in the
 881         archive."""
 882         return self.filelist
 883
 884     def printdir(self):
 885         """Print a table of contents for the zip file."""
 886         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
 887         for zinfo in self.filelist:
 888             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
 889             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
 890
 891     def testzip(self):
 892         """Read all the files and check the CRC."""
 893         chunk_size = 2 ** 20
 894         for zinfo in self.filelist:
 895             try:
 896                 # Read by chunks, to avoid an OverflowError or a
 897                 # MemoryError with very large embedded files.
 898                 with self.open(zinfo.filename, "r") as f:
 899                     while f.read(chunk_size):     # Check CRC-32
 900                         pass
 901             except BadZipfile:
 902                 return zinfo.filename
 903
 904     def getinfo(self, name):
 905         """Return the instance of ZipInfo given 'name'."""
 906         info = self.NameToInfo.get(name)
 907         if info is None:
 908             raise KeyError(
 909                 'There is no item named %r in the archive' % name)
 910
 911         return info
 912
 913     def setpassword(self, pwd):
 914         """Set default password for encrypted files."""
 915         self.pwd = pwd
 916
 917     @property
 918     def comment(self):
 919         """The comment text associated with the ZIP file."""
 920         return self._comment
 921
 922     @comment.setter
 923     def comment(self, comment):
 924         # check for valid comment length
 925         if len(comment) > ZIP_MAX_COMMENT:
 926             import warnings
 927             warnings.warn('Archive comment is too long; truncating to %d bytes'
 928                           % ZIP_MAX_COMMENT, stacklevel=2)
 929             comment = comment[:ZIP_MAX_COMMENT]
 930         self._comment = comment
 931         self._didModify = True
 932
 933     def read(self, name, pwd=None):
 934         """Return file bytes (as a string) for name."""
 935         return self.open(name, "r", pwd).read()
 936
 937     def open(self, name, mode="r", pwd=None):
 938         """Return file-like object for 'name'."""
 939         if mode not in ("r", "U", "rU"):
 940             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
 941         if not self.fp:
 942             raise RuntimeError, \
 943                   "Attempt to read ZIP archive that was already closed"
 944
 945         # Only open a new file for instances where we were not
 946         # given a file object in the constructor
 947         if self._filePassed:
 948             zef_file = self.fp
 949             should_close = False
 950         else:
 951             zef_file = open(self.filename, 'rb')
 952             should_close = True
 953
 954         try:
 955             # Make sure we have an info object
 956             if isinstance(name, ZipInfo):
 957                 # 'name' is already an info object
 958                 zinfo = name
 959             else:
 960                 # Get info object for name
 961                 zinfo = self.getinfo(name)
 962
 963             zef_file.seek(zinfo.header_offset, 0)
 964
 965             # Skip the file header:
 966             fheader = zef_file.read(sizeFileHeader)
 967             if len(fheader) != sizeFileHeader:
 968                 raise BadZipfile("Truncated file header")
 969             fheader = struct.unpack(structFileHeader, fheader)
 970             if fheader[_FH_SIGNATURE] != stringFileHeader:
 971                 raise BadZipfile("Bad magic number for file header")
 972
 973             fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
 974             if fheader[_FH_EXTRA_FIELD_LENGTH]:
 975                 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 976
 977             if fname != zinfo.orig_filename:
 978                 raise BadZipfile, \
 979                         'File name in directory "%s" and header "%s" differ.' % (
 980                             zinfo.orig_filename, fname)
 981
 982             # check for encrypted flag & handle password
 983             is_encrypted = zinfo.flag_bits & 0x1
 984             zd = None
 985             if is_encrypted:
 986                 if not pwd:
 987                     pwd = self.pwd
 988                 if not pwd:
 989                     raise RuntimeError, "File %s is encrypted, " \
 990                         "password required for extraction" % name
 991
 992                 zd = _ZipDecrypter(pwd)
 993                 # The first 12 bytes in the cypher stream is an encryption header
 994                 #  used to strengthen the algorithm. The first 11 bytes are
 995                 #  completely random, while the 12th contains the MSB of the CRC,
 996                 #  or the MSB of the file time depending on the header type
 997                 #  and is used to check the correctness of the password.
 998                 bytes = zef_file.read(12)
 999                 h = map(zd, bytes[0:12])
1000                 if zinfo.flag_bits & 0x8:
1001                     # compare against the file type from extended local headers
1002                     check_byte = (zinfo._raw_time >> 8) & 0xff
1003                 else:
1004                     # compare against the CRC otherwise
1005                     check_byte = (zinfo.CRC >> 24) & 0xff
1006                 if ord(h[11]) != check_byte:
1007                     raise RuntimeError("Bad password for file", name)
1008
1009             return ZipExtFile(zef_file, mode, zinfo, zd,
1010                     close_fileobj=should_close)
1011         except:
1012             if should_close:
1013                 zef_file.close()
1014             raise
1015
1016     def extract(self, member, path=None, pwd=None):
1017         """Extract a member from the archive to the current working directory,
1018            using its full name. Its file information is extracted as accurately
1019            as possible. `member' may be a filename or a ZipInfo object. You can
1020            specify a different directory using `path'.
1021         """
1022         if not isinstance(member, ZipInfo):
1023             member = self.getinfo(member)
1024
1025         if path is None:
1026             path = os.getcwd()
1027
1028         return self._extract_member(member, path, pwd)
1029
1030     def extractall(self, path=None, members=None, pwd=None):
1031         """Extract all members from the archive to the current working
1032            directory. `path' specifies a different directory to extract to.
1033            `members' is optional and must be a subset of the list returned
1034            by namelist().
1035         """
1036         if members is None:
1037             members = self.namelist()
1038
1039         for zipinfo in members:
1040             self.extract(zipinfo, path, pwd)
1041
1042     def _extract_member(self, member, targetpath, pwd):
1043         """Extract the ZipInfo object 'member' to a physical
1044            file on the path targetpath.
1045         """
1046         # build the destination pathname, replacing
1047         # forward slashes to platform specific separators.
1048         arcname = member.filename.replace('/', os.path.sep)
1049
1050         if os.path.altsep:
1051             arcname = arcname.replace(os.path.altsep, os.path.sep)
1052         # interpret absolute pathname as relative, remove drive letter or
1053         # UNC path, redundant separators, "." and ".." components.
1054         arcname = os.path.splitdrive(arcname)[1]
1055         arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1056                     if x not in ('', os.path.curdir, os.path.pardir))
1057         if os.path.sep == '\\':
1058             # filter illegal characters on Windows
1059             illegal = ':<>|"?*'
1060             if isinstance(arcname, unicode):
1061                 table = {ord(c): ord('_') for c in illegal}
1062             else:
1063                 table = string.maketrans(illegal, '_' * len(illegal))
1064             arcname = arcname.translate(table)
1065             # remove trailing dots
1066             arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1067             arcname = os.path.sep.join(x for x in arcname if x)
1068
1069         targetpath = os.path.join(targetpath, arcname)
1070         targetpath = os.path.normpath(targetpath)
1071
1072         # Create all upper directories if necessary.
1073         upperdirs = os.path.dirname(targetpath)
1074         if upperdirs and not os.path.exists(upperdirs):
1075             os.makedirs(upperdirs)
1076
1077         if member.filename[-1] == '/':
1078             if not os.path.isdir(targetpath):
1079                 os.mkdir(targetpath)
1080             return targetpath
1081
1082         with self.open(member, pwd=pwd) as source, \
1083              file(targetpath, "wb") as target:
1084             shutil.copyfileobj(source, target)
1085
1086         return targetpath
1087
1088     def _writecheck(self, zinfo):
1089         """Check for errors before writing a file to the archive."""
1090         if zinfo.filename in self.NameToInfo:
1091             import warnings
1092             warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1093         if self.mode not in ("w", "a"):
1094             raise RuntimeError, 'write() requires mode "w" or "a"'
1095         if not self.fp:
1096             raise RuntimeError, \
1097                   "Attempt to write ZIP archive that was already closed"
1098         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1099             raise RuntimeError, \
1100                   "Compression requires the (missing) zlib module"
1101         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1102             raise RuntimeError, \
1103                   "That compression method is not supported"
1104         if not self._allowZip64:
1105             requires_zip64 = None
1106             if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1107                 requires_zip64 = "Files count"
1108             elif zinfo.file_size > ZIP64_LIMIT:
1109                 requires_zip64 = "Filesize"
1110             elif zinfo.header_offset > ZIP64_LIMIT:
1111                 requires_zip64 = "Zipfile size"
1112             if requires_zip64:
1113                 raise LargeZipFile(requires_zip64 +
1114                                    " would require ZIP64 extensions")
1115
1116     def write(self, filename, arcname=None, compress_type=None):
1117         """Put the bytes from filename into the archive under the name
1118         arcname."""
1119         if not self.fp:
1120             raise RuntimeError(
1121                   "Attempt to write to ZIP archive that was already closed")
1122
1123         st = os.stat(filename)
1124         isdir = stat.S_ISDIR(st.st_mode)
1125         mtime = time.localtime(st.st_mtime)
1126         date_time = mtime[0:6]
1127         # Create ZipInfo instance to store file information
1128         if arcname is None:
1129             arcname = filename
1130         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1131         while arcname[0] in (os.sep, os.altsep):
1132             arcname = arcname[1:]
1133         if isdir:
1134             arcname += '/'
1135         zinfo = ZipInfo(arcname, date_time)
1136         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1137         if compress_type is None:
1138             zinfo.compress_type = self.compression
1139         else:
1140             zinfo.compress_type = compress_type
1141
1142         zinfo.file_size = st.st_size
1143         zinfo.flag_bits = 0x00
1144         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1145
1146         self._writecheck(zinfo)
1147         self._didModify = True
1148
1149         if isdir:
1150             zinfo.file_size = 0
1151             zinfo.compress_size = 0
1152             zinfo.CRC = 0
1153             zinfo.external_attr |= 0x10  # MS-DOS directory flag
1154             self.filelist.append(zinfo)
1155             self.NameToInfo[zinfo.filename] = zinfo
1156             self.fp.write(zinfo.FileHeader(False))
1157             return
1158
1159         with open(filename, "rb") as fp:
1160             # Must overwrite CRC and sizes with correct data later
1161             zinfo.CRC = CRC = 0
1162             zinfo.compress_size = compress_size = 0
1163             # Compressed size can be larger than uncompressed size
1164             zip64 = self._allowZip64 and \
1165                     zinfo.file_size * 1.05 > ZIP64_LIMIT
1166             self.fp.write(zinfo.FileHeader(zip64))
1167             if zinfo.compress_type == ZIP_DEFLATED:
1168                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1169                      zlib.DEFLATED, -15)
1170             else:
1171                 cmpr = None
1172             file_size = 0
1173             while 1:
1174                 buf = fp.read(1024 * 8)
1175                 if not buf:
1176                     break
1177                 file_size = file_size + len(buf)
1178                 CRC = crc32(buf, CRC) & 0xffffffff
1179                 if cmpr:
1180                     buf = cmpr.compress(buf)
1181                     compress_size = compress_size + len(buf)
1182                 self.fp.write(buf)
1183         if cmpr:
1184             buf = cmpr.flush()
1185             compress_size = compress_size + len(buf)
1186             self.fp.write(buf)
1187             zinfo.compress_size = compress_size
1188         else:
1189             zinfo.compress_size = file_size
1190         zinfo.CRC = CRC
1191         zinfo.file_size = file_size
1192         if not zip64 and self._allowZip64:
1193             if file_size > ZIP64_LIMIT:
1194                 raise RuntimeError('File size has increased during compressing')
1195             if compress_size > ZIP64_LIMIT:
1196                 raise RuntimeError('Compressed size larger than uncompressed size')
1197         # Seek backwards and write file header (which will now include
1198         # correct CRC and file sizes)
1199         position = self.fp.tell()       # Preserve current position in file
1200         self.fp.seek(zinfo.header_offset, 0)
1201         self.fp.write(zinfo.FileHeader(zip64))
1202         self.fp.seek(position, 0)
1203         self.filelist.append(zinfo)
1204         self.NameToInfo[zinfo.filename] = zinfo
1205
1206     def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1207         """Write a file into the archive.  The contents is the string
1208         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1209         the name of the file in the archive."""
1210         if not isinstance(zinfo_or_arcname, ZipInfo):
1211             zinfo = ZipInfo(filename=zinfo_or_arcname,
1212                             date_time=time.localtime(time.time())[:6])
1213
1214             zinfo.compress_type = self.compression
1215             if zinfo.filename[-1] == '/':
1216                 zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1217                 zinfo.external_attr |= 0x10           # MS-DOS directory flag
1218             else:
1219                 zinfo.external_attr = 0o600 << 16     # ?rw-------
1220         else:
1221             zinfo = zinfo_or_arcname
1222
1223         if not self.fp:
1224             raise RuntimeError(
1225                   "Attempt to write to ZIP archive that was already closed")
1226
1227         if compress_type is not None:
1228             zinfo.compress_type = compress_type
1229
1230         zinfo.file_size = len(bytes)            # Uncompressed size
1231         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1232         self._writecheck(zinfo)
1233         self._didModify = True
1234         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1235         if zinfo.compress_type == ZIP_DEFLATED:
1236             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1237                  zlib.DEFLATED, -15)
1238             bytes = co.compress(bytes) + co.flush()
1239             zinfo.compress_size = len(bytes)    # Compressed size
1240         else:
1241             zinfo.compress_size = zinfo.file_size
1242         zip64 = zinfo.file_size > ZIP64_LIMIT or \
1243                 zinfo.compress_size > ZIP64_LIMIT
1244         if zip64 and not self._allowZip64:
1245             raise LargeZipFile("Filesize would require ZIP64 extensions")
1246         self.fp.write(zinfo.FileHeader(zip64))
1247         self.fp.write(bytes)
1248         if zinfo.flag_bits & 0x08:
1249             # Write CRC and file sizes after the file data
1250             fmt = '<LQQ' if zip64 else '<LLL'
1251             self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1252                   zinfo.file_size))
1253         self.fp.flush()
1254         self.filelist.append(zinfo)
1255         self.NameToInfo[zinfo.filename] = zinfo
1256
1257     def __del__(self):
1258         """Call the "close()" method in case the user forgot."""
1259         self.close()
1260
1261     def close(self):
1262         """Close the file, and for mode "w" and "a" write the ending
1263         records."""
1264         if self.fp is None:
1265             return
1266
1267         try:
1268             if self.mode in ("w", "a") and self._didModify: # write ending records
1269                 pos1 = self.fp.tell()
1270                 for zinfo in self.filelist:         # write central directory
1271                     dt = zinfo.date_time
1272                     dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1273                     dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1274                     extra = []
1275                     if zinfo.file_size > ZIP64_LIMIT \
1276                             or zinfo.compress_size > ZIP64_LIMIT:
1277                         extra.append(zinfo.file_size)
1278                         extra.append(zinfo.compress_size)
1279                         file_size = 0xffffffff
1280                         compress_size = 0xffffffff
1281                     else:
1282                         file_size = zinfo.file_size
1283                         compress_size = zinfo.compress_size
1284
1285                     if zinfo.header_offset > ZIP64_LIMIT:
1286                         extra.append(zinfo.header_offset)
1287                         header_offset = 0xffffffffL
1288                     else:
1289                         header_offset = zinfo.header_offset
1290
1291                     extra_data = zinfo.extra
1292                     if extra:
1293                         # Append a ZIP64 field to the extra's
1294                         extra_data = struct.pack(
1295                                 '<HH' + 'Q'*len(extra),
1296                                 1, 8*len(extra), *extra) + extra_data
1297
1298                         extract_version = max(45, zinfo.extract_version)
1299                         create_version = max(45, zinfo.create_version)
1300                     else:
1301                         extract_version = zinfo.extract_version
1302                         create_version = zinfo.create_version
1303
1304                     try:
1305                         filename, flag_bits = zinfo._encodeFilenameFlags()
1306                         centdir = struct.pack(structCentralDir,
1307                         stringCentralDir, create_version,
1308                         zinfo.create_system, extract_version, zinfo.reserved,
1309                         flag_bits, zinfo.compress_type, dostime, dosdate,
1310                         zinfo.CRC, compress_size, file_size,
1311                         len(filename), len(extra_data), len(zinfo.comment),
1312                         0, zinfo.internal_attr, zinfo.external_attr,
1313                         header_offset)
1314                     except DeprecationWarning:
1315                         print >>sys.stderr, (structCentralDir,
1316                         stringCentralDir, create_version,
1317                         zinfo.create_system, extract_version, zinfo.reserved,
1318                         zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1319                         zinfo.CRC, compress_size, file_size,
1320                         len(zinfo.filename), len(extra_data), len(zinfo.comment),
1321                         0, zinfo.internal_attr, zinfo.external_attr,
1322                         header_offset)
1323                         raise
1324                     self.fp.write(centdir)
1325                     self.fp.write(filename)
1326                     self.fp.write(extra_data)
1327                     self.fp.write(zinfo.comment)
1328
1329                 pos2 = self.fp.tell()
1330                 # Write end-of-zip-archive record
1331                 centDirCount = len(self.filelist)
1332                 centDirSize = pos2 - pos1
1333                 centDirOffset = pos1
1334                 requires_zip64 = None
1335                 if centDirCount > ZIP_FILECOUNT_LIMIT:
1336                     requires_zip64 = "Files count"
1337                 elif centDirOffset > ZIP64_LIMIT:
1338                     requires_zip64 = "Central directory offset"
1339                 elif centDirSize > ZIP64_LIMIT:
1340                     requires_zip64 = "Central directory size"
1341                 if requires_zip64:
1342                     # Need to write the ZIP64 end-of-archive records
1343                     if not self._allowZip64:
1344                         raise LargeZipFile(requires_zip64 +
1345                                            " would require ZIP64 extensions")
1346                     zip64endrec = struct.pack(
1347                             structEndArchive64, stringEndArchive64,
1348                             44, 45, 45, 0, 0, centDirCount, centDirCount,
1349                             centDirSize, centDirOffset)
1350                     self.fp.write(zip64endrec)
1351
1352                     zip64locrec = struct.pack(
1353                             structEndArchive64Locator,
1354                             stringEndArchive64Locator, 0, pos2, 1)
1355                     self.fp.write(zip64locrec)
1356                     centDirCount = min(centDirCount, 0xFFFF)
1357                     centDirSize = min(centDirSize, 0xFFFFFFFF)
1358                     centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1359
1360                 endrec = struct.pack(structEndArchive, stringEndArchive,
1361                                     0, 0, centDirCount, centDirCount,
1362                                     centDirSize, centDirOffset, len(self._comment))
1363                 self.fp.write(endrec)
1364                 self.fp.write(self._comment)
1365                 self.fp.flush()
1366         finally:
1367             fp = self.fp
1368             self.fp = None
1369             if not self._filePassed:
1370                 fp.close()
1371
1372
1373 class PyZipFile(ZipFile):
1374     """Class to create ZIP archives with Python library files and packages."""
1375
1376     def writepy(self, pathname, basename = ""):
1377         """Add all files from "pathname" to the ZIP archive.
1378
1379         If pathname is a package directory, search the directory and
1380         all package subdirectories recursively for all *.py and enter
1381         the modules into the archive.  If pathname is a plain
1382         directory, listdir *.py and enter all modules.  Else, pathname
1383         must be a Python *.py file and the module will be put into the
1384         archive.  Added modules are always module.pyo or module.pyc.
1385         This method will compile the module.py into module.pyc if
1386         necessary.
1387         """
1388         dir, name = os.path.split(pathname)
1389         if os.path.isdir(pathname):
1390             initname = os.path.join(pathname, "__init__.py")
1391             if os.path.isfile(initname):
1392                 # This is a package directory, add it
1393                 if basename:
1394                     basename = "%s/%s" % (basename, name)
1395                 else:
1396                     basename = name
1397                 if self.debug:
1398                     print "Adding package in", pathname, "as", basename
1399                 fname, arcname = self._get_codename(initname[0:-3], basename)
1400                 if self.debug:
1401                     print "Adding", arcname
1402                 self.write(fname, arcname)
1403                 dirlist = os.listdir(pathname)
1404                 dirlist.remove("__init__.py")
1405                 # Add all *.py files and package subdirectories
1406                 for filename in dirlist:
1407                     path = os.path.join(pathname, filename)
1408                     root, ext = os.path.splitext(filename)
1409                     if os.path.isdir(path):
1410                         if os.path.isfile(os.path.join(path, "__init__.py")):
1411                             # This is a package directory, add it
1412                             self.writepy(path, basename)  # Recursive call
1413                     elif ext == ".py":
1414                         fname, arcname = self._get_codename(path[0:-3],
1415                                          basename)
1416                         if self.debug:
1417                             print "Adding", arcname
1418                         self.write(fname, arcname)
1419             else:
1420                 # This is NOT a package directory, add its files at top level
1421                 if self.debug:
1422                     print "Adding files from directory", pathname
1423                 for filename in os.listdir(pathname):
1424                     path = os.path.join(pathname, filename)
1425                     root, ext = os.path.splitext(filename)
1426                     if ext == ".py":
1427                         fname, arcname = self._get_codename(path[0:-3],
1428                                          basename)
1429                         if self.debug:
1430                             print "Adding", arcname
1431                         self.write(fname, arcname)
1432         else:
1433             if pathname[-3:] != ".py":
1434                 raise RuntimeError, \
1435                       'Files added with writepy() must end with ".py"'
1436             fname, arcname = self._get_codename(pathname[0:-3], basename)
1437             if self.debug:
1438                 print "Adding file", arcname
1439             self.write(fname, arcname)
1440
1441     def _get_codename(self, pathname, basename):
1442         """Return (filename, archivename) for the path.
1443
1444         Given a module name path, return the correct file path and
1445         archive name, compiling if necessary.  For example, given
1446         /python/lib/string, return (/python/lib/string.pyc, string).
1447         """
1448         file_py  = pathname + ".py"
1449         file_pyc = pathname + ".pyc"
1450         file_pyo = pathname + ".pyo"
1451         if os.path.isfile(file_pyo) and \
1452                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1453             fname = file_pyo    # Use .pyo file
1454         elif not os.path.isfile(file_pyc) or \
1455              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1456             import py_compile
1457             if self.debug:
1458                 print "Compiling", file_py
1459             try:
1460                 py_compile.compile(file_py, file_pyc, None, True)
1461             except py_compile.PyCompileError,err:
1462                 print err.msg
1463             fname = file_pyc
1464         else:
1465             fname = file_pyc
1466         archivename = os.path.split(fname)[1]
1467         if basename:
1468             archivename = "%s/%s" % (basename, archivename)
1469         return (fname, archivename)
1470
1471
1472 def main(args = None):
1473     import textwrap
1474     USAGE=textwrap.dedent("""\
1475         Usage:
1476             zipfile.py -l zipfile.zip        # Show listing of a zipfile
1477             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1478             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1479             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1480         """)
1481     if args is None:
1482         args = sys.argv[1:]
1483
1484     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1485         print USAGE
1486         sys.exit(1)
1487
1488     if args[0] == '-l':
1489         if len(args) != 2:
1490             print USAGE
1491             sys.exit(1)
1492         with ZipFile(args[1], 'r') as zf:
1493             zf.printdir()
1494
1495     elif args[0] == '-t':
1496         if len(args) != 2:
1497             print USAGE
1498             sys.exit(1)
1499         with ZipFile(args[1], 'r') as zf:
1500             badfile = zf.testzip()
1501         if badfile:
1502             print("The following enclosed file is corrupted: {!r}".format(badfile))
1503         print "Done testing"
1504
1505     elif args[0] == '-e':
1506         if len(args) != 3:
1507             print USAGE
1508             sys.exit(1)
1509
1510         with ZipFile(args[1], 'r') as zf:
1511             zf.extractall(args[2])
1512
1513     elif args[0] == '-c':
1514         if len(args) < 3:
1515             print USAGE
1516             sys.exit(1)
1517
1518         def addToZip(zf, path, zippath):
1519             if os.path.isfile(path):
1520                 zf.write(path, zippath, ZIP_DEFLATED)
1521             elif os.path.isdir(path):
1522                 if zippath:
1523                     zf.write(path, zippath)
1524                 for nm in os.listdir(path):
1525                     addToZip(zf,
1526                             os.path.join(path, nm), os.path.join(zippath, nm))
1527             # else: ignore
1528
1529         with ZipFile(args[1], 'w', allowZip64=True) as zf:
1530             for path in args[2:]:
1531                 zippath = os.path.basename(path)
1532                 if not zippath:
1533                     zippath = os.path.basename(os.path.dirname(path))
1534                 if zippath in ('', os.curdir, os.pardir):
1535                     zippath = ''
1536                 addToZip(zf, path, zippath)
1537
1538 if __name__ == "__main__":
1539     main()