AppPkg/Applications/Python/Python-2.7.2/Lib/zipfile.py

   1 """
   2 Read and write ZIP files.
   3 """
   4 import struct, os, time, sys, shutil
   5 import binascii, cStringIO, stat
   6 import io
   7 import re
   8
   9 try:
  10     import zlib # We may need its compression method
  11     crc32 = zlib.crc32
  12 except ImportError:
  13     zlib = None
  14     crc32 = binascii.crc32
  15
  16 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  17            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  18
  19 class BadZipfile(Exception):
  20     pass
  21
  22
  23 class LargeZipFile(Exception):
  24     """
  25     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  26     and those extensions are disabled.
  27     """
  28
  29 error = BadZipfile      # The exception raised by this module
  30
  31 ZIP64_LIMIT = (1 << 31) - 1
  32 ZIP_FILECOUNT_LIMIT = 1 << 16
  33 ZIP_MAX_COMMENT = (1 << 16) - 1
  34
  35 # constants for Zip file compression methods
  36 ZIP_STORED = 0
  37 ZIP_DEFLATED = 8
  38 # Other ZIP compression methods not supported
  39
  40 # Below are some formats and associated data for reading/writing headers using
  41 # the struct module.  The names and structures of headers/records are those used
  42 # in the PKWARE description of the ZIP file format:
  43 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  44 # (URL valid as of January 2008)
  45
  46 # The "end of central directory" structure, magic number, size, and indices
  47 # (section V.I in the format document)
  48 structEndArchive = "<4s4H2LH"
  49 stringEndArchive = "PK\005\006"
  50 sizeEndCentDir = struct.calcsize(structEndArchive)
  51
  52 _ECD_SIGNATURE = 0
  53 _ECD_DISK_NUMBER = 1
  54 _ECD_DISK_START = 2
  55 _ECD_ENTRIES_THIS_DISK = 3
  56 _ECD_ENTRIES_TOTAL = 4
  57 _ECD_SIZE = 5
  58 _ECD_OFFSET = 6
  59 _ECD_COMMENT_SIZE = 7
  60 # These last two indices are not part of the structure as defined in the
  61 # spec, but they are used internally by this module as a convenience
  62 _ECD_COMMENT = 8
  63 _ECD_LOCATION = 9
  64
  65 # The "central directory" structure, magic number, size, and indices
  66 # of entries in the structure (section V.F in the format document)
  67 structCentralDir = "<4s4B4HL2L5H2L"
  68 stringCentralDir = "PK\001\002"
  69 sizeCentralDir = struct.calcsize(structCentralDir)
  70
  71 # indexes of entries in the central directory structure
  72 _CD_SIGNATURE = 0
  73 _CD_CREATE_VERSION = 1
  74 _CD_CREATE_SYSTEM = 2
  75 _CD_EXTRACT_VERSION = 3
  76 _CD_EXTRACT_SYSTEM = 4
  77 _CD_FLAG_BITS = 5
  78 _CD_COMPRESS_TYPE = 6
  79 _CD_TIME = 7
  80 _CD_DATE = 8
  81 _CD_CRC = 9
  82 _CD_COMPRESSED_SIZE = 10
  83 _CD_UNCOMPRESSED_SIZE = 11
  84 _CD_FILENAME_LENGTH = 12
  85 _CD_EXTRA_FIELD_LENGTH = 13
  86 _CD_COMMENT_LENGTH = 14
  87 _CD_DISK_NUMBER_START = 15
  88 _CD_INTERNAL_FILE_ATTRIBUTES = 16
  89 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  90 _CD_LOCAL_HEADER_OFFSET = 18
  91
  92 # The "local file header" structure, magic number, size, and indices
  93 # (section V.A in the format document)
  94 structFileHeader = "<4s2B4HL2L2H"
  95 stringFileHeader = "PK\003\004"
  96 sizeFileHeader = struct.calcsize(structFileHeader)
  97
  98 _FH_SIGNATURE = 0
  99 _FH_EXTRACT_VERSION = 1
 100 _FH_EXTRACT_SYSTEM = 2
 101 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
 102 _FH_COMPRESSION_METHOD = 4
 103 _FH_LAST_MOD_TIME = 5
 104 _FH_LAST_MOD_DATE = 6
 105 _FH_CRC = 7
 106 _FH_COMPRESSED_SIZE = 8
 107 _FH_UNCOMPRESSED_SIZE = 9
 108 _FH_FILENAME_LENGTH = 10
 109 _FH_EXTRA_FIELD_LENGTH = 11
 110
 111 # The "Zip64 end of central directory locator" structure, magic number, and size
 112 structEndArchive64Locator = "<4sLQL"
 113 stringEndArchive64Locator = "PK\x06\x07"
 114 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
 115
 116 # The "Zip64 end of central directory" record, magic number, size, and indices
 117 # (section V.G in the format document)
 118 structEndArchive64 = "<4sQ2H2L4Q"
 119 stringEndArchive64 = "PK\x06\x06"
 120 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
 121
 122 _CD64_SIGNATURE = 0
 123 _CD64_DIRECTORY_RECSIZE = 1
 124 _CD64_CREATE_VERSION = 2
 125 _CD64_EXTRACT_VERSION = 3
 126 _CD64_DISK_NUMBER = 4
 127 _CD64_DISK_NUMBER_START = 5
 128 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
 129 _CD64_NUMBER_ENTRIES_TOTAL = 7
 130 _CD64_DIRECTORY_SIZE = 8
 131 _CD64_OFFSET_START_CENTDIR = 9
 132
 133 def _check_zipfile(fp):
 134     try:
 135         if _EndRecData(fp):
 136             return True         # file has correct magic number
 137     except IOError:
 138         pass
 139     return False
 140
 141 def is_zipfile(filename):
 142     """Quickly see if a file is a ZIP file by checking the magic number.
 143
 144     The filename argument may be a file or file-like object too.
 145     """
 146     result = False
 147     try:
 148         if hasattr(filename, "read"):
 149             result = _check_zipfile(fp=filename)
 150         else:
 151             with open(filename, "rb") as fp:
 152                 result = _check_zipfile(fp)
 153     except IOError:
 154         pass
 155     return result
 156
 157 def _EndRecData64(fpin, offset, endrec):
 158     """
 159     Read the ZIP64 end-of-archive records and use that to update endrec
 160     """
 161     try:
 162         fpin.seek(offset - sizeEndCentDir64Locator, 2)
 163     except IOError:
 164         # If the seek fails, the file is not large enough to contain a ZIP64
 165         # end-of-archive record, so just return the end record we were given.
 166         return endrec
 167
 168     data = fpin.read(sizeEndCentDir64Locator)
 169     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 170     if sig != stringEndArchive64Locator:
 171         return endrec
 172
 173     if diskno != 0 or disks != 1:
 174         raise BadZipfile("zipfiles that span multiple disks are not supported")
 175
 176     # Assume no 'zip64 extensible data'
 177     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
 178     data = fpin.read(sizeEndCentDir64)
 179     sig, sz, create_version, read_version, disk_num, disk_dir, \
 180             dircount, dircount2, dirsize, diroffset = \
 181             struct.unpack(structEndArchive64, data)
 182     if sig != stringEndArchive64:
 183         return endrec
 184
 185     # Update the original endrec using data from the ZIP64 record
 186     endrec[_ECD_SIGNATURE] = sig
 187     endrec[_ECD_DISK_NUMBER] = disk_num
 188     endrec[_ECD_DISK_START] = disk_dir
 189     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
 190     endrec[_ECD_ENTRIES_TOTAL] = dircount2
 191     endrec[_ECD_SIZE] = dirsize
 192     endrec[_ECD_OFFSET] = diroffset
 193     return endrec
 194
 195
 196 def _EndRecData(fpin):
 197     """Return data from the "End of Central Directory" record, or None.
 198
 199     The data is a list of the nine items in the ZIP "End of central dir"
 200     record followed by a tenth item, the file seek offset of this record."""
 201
 202     # Determine file size
 203     fpin.seek(0, 2)
 204     filesize = fpin.tell()
 205
 206     # Check to see if this is ZIP file with no archive comment (the
 207     # "end of central directory" structure should be the last item in the
 208     # file if this is the case).
 209     try:
 210         fpin.seek(-sizeEndCentDir, 2)
 211     except IOError:
 212         return None
 213     data = fpin.read()
 214     if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
 215         # the signature is correct and there's no comment, unpack structure
 216         endrec = struct.unpack(structEndArchive, data)
 217         endrec=list(endrec)
 218
 219         # Append a blank comment and record start offset
 220         endrec.append("")
 221         endrec.append(filesize - sizeEndCentDir)
 222
 223         # Try to read the "Zip64 end of central directory" structure
 224         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
 225
 226     # Either this is not a ZIP file, or it is a ZIP file with an archive
 227     # comment.  Search the end of the file for the "end of central directory"
 228     # record signature. The comment is the last item in the ZIP file and may be
 229     # up to 64K long.  It is assumed that the "end of central directory" magic
 230     # number does not appear in the comment.
 231     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
 232     fpin.seek(maxCommentStart, 0)
 233     data = fpin.read()
 234     start = data.rfind(stringEndArchive)
 235     if start >= 0:
 236         # found the magic number; attempt to unpack and interpret
 237         recData = data[start:start+sizeEndCentDir]
 238         endrec = list(struct.unpack(structEndArchive, recData))
 239         comment = data[start+sizeEndCentDir:]
 240         # check that comment length is correct
 241         if endrec[_ECD_COMMENT_SIZE] == len(comment):
 242             # Append the archive comment and start offset
 243             endrec.append(comment)
 244             endrec.append(maxCommentStart + start)
 245
 246             # Try to read the "Zip64 end of central directory" structure
 247             return _EndRecData64(fpin, maxCommentStart + start - filesize,
 248                                  endrec)
 249
 250     # Unable to find a valid end of central directory structure
 251     return
 252
 253
 254 class ZipInfo (object):
 255     """Class with attributes describing each file in the ZIP archive."""
 256
 257     __slots__ = (
 258             'orig_filename',
 259             'filename',
 260             'date_time',
 261             'compress_type',
 262             'comment',
 263             'extra',
 264             'create_system',
 265             'create_version',
 266             'extract_version',
 267             'reserved',
 268             'flag_bits',
 269             'volume',
 270             'internal_attr',
 271             'external_attr',
 272             'header_offset',
 273             'CRC',
 274             'compress_size',
 275             'file_size',
 276             '_raw_time',
 277         )
 278
 279     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 280         self.orig_filename = filename   # Original file name in archive
 281
 282         # Terminate the file name at the first null byte.  Null bytes in file
 283         # names are used as tricks by viruses in archives.
 284         null_byte = filename.find(chr(0))
 285         if null_byte >= 0:
 286             filename = filename[0:null_byte]
 287         # This is used to ensure paths in generated ZIP files always use
 288         # forward slashes as the directory separator, as required by the
 289         # ZIP format specification.
 290         if os.sep != "/" and os.sep in filename:
 291             filename = filename.replace(os.sep, "/")
 292
 293         self.filename = filename        # Normalized file name
 294         self.date_time = date_time      # year, month, day, hour, min, sec
 295         # Standard values:
 296         self.compress_type = ZIP_STORED # Type of compression for the file
 297         self.comment = ""               # Comment for each file
 298         self.extra = ""                 # ZIP extra data
 299         if sys.platform == 'win32':
 300             self.create_system = 0          # System which created ZIP archive
 301         else:
 302             # Assume everything else is unix-y
 303             self.create_system = 3          # System which created ZIP archive
 304         self.create_version = 20        # Version which created ZIP archive
 305         self.extract_version = 20       # Version needed to extract archive
 306         self.reserved = 0               # Must be zero
 307         self.flag_bits = 0              # ZIP flag bits
 308         self.volume = 0                 # Volume number of file header
 309         self.internal_attr = 0          # Internal attributes
 310         self.external_attr = 0          # External file attributes
 311         # Other attributes are set by class ZipFile:
 312         # header_offset         Byte offset to the file header
 313         # CRC                   CRC-32 of the uncompressed file
 314         # compress_size         Size of the compressed file
 315         # file_size             Size of the uncompressed file
 316
 317     def FileHeader(self):
 318         """Return the per-file header as a string."""
 319         dt = self.date_time
 320         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 321         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 322         if self.flag_bits & 0x08:
 323             # Set these to zero because we write them after the file data
 324             CRC = compress_size = file_size = 0
 325         else:
 326             CRC = self.CRC
 327             compress_size = self.compress_size
 328             file_size = self.file_size
 329
 330         extra = self.extra
 331
 332         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 333             # File is larger than what fits into a 4 byte integer,
 334             # fall back to the ZIP64 extension
 335             fmt = '<HHQQ'
 336             extra = extra + struct.pack(fmt,
 337                     1, struct.calcsize(fmt)-4, file_size, compress_size)
 338             file_size = 0xffffffff
 339             compress_size = 0xffffffff
 340             self.extract_version = max(45, self.extract_version)
 341             self.create_version = max(45, self.extract_version)
 342
 343         filename, flag_bits = self._encodeFilenameFlags()
 344         header = struct.pack(structFileHeader, stringFileHeader,
 345                  self.extract_version, self.reserved, flag_bits,
 346                  self.compress_type, dostime, dosdate, CRC,
 347                  compress_size, file_size,
 348                  len(filename), len(extra))
 349         return header + filename + extra
 350
 351     def _encodeFilenameFlags(self):
 352         if isinstance(self.filename, unicode):
 353             try:
 354                 return self.filename.encode('ascii'), self.flag_bits
 355             except UnicodeEncodeError:
 356                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
 357         else:
 358             return self.filename, self.flag_bits
 359
 360     def _decodeFilename(self):
 361         if self.flag_bits & 0x800:
 362             return self.filename.decode('utf-8')
 363         else:
 364             return self.filename
 365
 366     def _decodeExtra(self):
 367         # Try to decode the extra field.
 368         extra = self.extra
 369         unpack = struct.unpack
 370         while extra:
 371             tp, ln = unpack('<HH', extra[:4])
 372             if tp == 1:
 373                 if ln >= 24:
 374                     counts = unpack('<QQQ', extra[4:28])
 375                 elif ln == 16:
 376                     counts = unpack('<QQ', extra[4:20])
 377                 elif ln == 8:
 378                     counts = unpack('<Q', extra[4:12])
 379                 elif ln == 0:
 380                     counts = ()
 381                 else:
 382                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
 383
 384                 idx = 0
 385
 386                 # ZIP64 extension (large files and/or large archives)
 387                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
 388                     self.file_size = counts[idx]
 389                     idx += 1
 390
 391                 if self.compress_size == 0xFFFFFFFFL:
 392                     self.compress_size = counts[idx]
 393                     idx += 1
 394
 395                 if self.header_offset == 0xffffffffL:
 396                     old = self.header_offset
 397                     self.header_offset = counts[idx]
 398                     idx+=1
 399
 400             extra = extra[ln+4:]
 401
 402
 403 class _ZipDecrypter:
 404     """Class to handle decryption of files stored within a ZIP archive.
 405
 406     ZIP supports a password-based form of encryption. Even though known
 407     plaintext attacks have been found against it, it is still useful
 408     to be able to get data out of such a file.
 409
 410     Usage:
 411         zd = _ZipDecrypter(mypwd)
 412         plain_char = zd(cypher_char)
 413         plain_text = map(zd, cypher_text)
 414     """
 415
 416     def _GenerateCRCTable():
 417         """Generate a CRC-32 table.
 418
 419         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 420         internal keys. We noticed that a direct implementation is faster than
 421         relying on binascii.crc32().
 422         """
 423         poly = 0xedb88320
 424         table = [0] * 256
 425         for i in range(256):
 426             crc = i
 427             for j in range(8):
 428                 if crc & 1:
 429                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
 430                 else:
 431                     crc = ((crc >> 1) & 0x7FFFFFFF)
 432             table[i] = crc
 433         return table
 434     crctable = _GenerateCRCTable()
 435
 436     def _crc32(self, ch, crc):
 437         """Compute the CRC32 primitive on one byte."""
 438         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
 439
 440     def __init__(self, pwd):
 441         self.key0 = 305419896
 442         self.key1 = 591751049
 443         self.key2 = 878082192
 444         for p in pwd:
 445             self._UpdateKeys(p)
 446
 447     def _UpdateKeys(self, c):
 448         self.key0 = self._crc32(c, self.key0)
 449         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
 450         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
 451         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
 452
 453     def __call__(self, c):
 454         """Decrypt a single character."""
 455         c = ord(c)
 456         k = self.key2 | 2
 457         c = c ^ (((k * (k^1)) >> 8) & 255)
 458         c = chr(c)
 459         self._UpdateKeys(c)
 460         return c
 461
 462 class ZipExtFile(io.BufferedIOBase):
 463     """File-like object for reading an archive member.
 464        Is returned by ZipFile.open().
 465     """
 466
 467     # Max size supported by decompressor.
 468     MAX_N = 1 << 31 - 1
 469
 470     # Read from compressed files in 4k blocks.
 471     MIN_READ_SIZE = 4096
 472
 473     # Search for universal newlines or line chunks.
 474     PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
 475
 476     def __init__(self, fileobj, mode, zipinfo, decrypter=None):
 477         self._fileobj = fileobj
 478         self._decrypter = decrypter
 479
 480         self._compress_type = zipinfo.compress_type
 481         self._compress_size = zipinfo.compress_size
 482         self._compress_left = zipinfo.compress_size
 483
 484         if self._compress_type == ZIP_DEFLATED:
 485             self._decompressor = zlib.decompressobj(-15)
 486         self._unconsumed = ''
 487
 488         self._readbuffer = ''
 489         self._offset = 0
 490
 491         self._universal = 'U' in mode
 492         self.newlines = None
 493
 494         # Adjust read size for encrypted files since the first 12 bytes
 495         # are for the encryption/password information.
 496         if self._decrypter is not None:
 497             self._compress_left -= 12
 498
 499         self.mode = mode
 500         self.name = zipinfo.filename
 501
 502         if hasattr(zipinfo, 'CRC'):
 503             self._expected_crc = zipinfo.CRC
 504             self._running_crc = crc32(b'') & 0xffffffff
 505         else:
 506             self._expected_crc = None
 507
 508     def readline(self, limit=-1):
 509         """Read and return a line from the stream.
 510
 511         If limit is specified, at most limit bytes will be read.
 512         """
 513
 514         if not self._universal and limit < 0:
 515             # Shortcut common case - newline found in buffer.
 516             i = self._readbuffer.find('\n', self._offset) + 1
 517             if i > 0:
 518                 line = self._readbuffer[self._offset: i]
 519                 self._offset = i
 520                 return line
 521
 522         if not self._universal:
 523             return io.BufferedIOBase.readline(self, limit)
 524
 525         line = ''
 526         while limit < 0 or len(line) < limit:
 527             readahead = self.peek(2)
 528             if readahead == '':
 529                 return line
 530
 531             #
 532             # Search for universal newlines or line chunks.
 533             #
 534             # The pattern returns either a line chunk or a newline, but not
 535             # both. Combined with peek(2), we are assured that the sequence
 536             # '\r\n' is always retrieved completely and never split into
 537             # separate newlines - '\r', '\n' due to coincidental readaheads.
 538             #
 539             match = self.PATTERN.search(readahead)
 540             newline = match.group('newline')
 541             if newline is not None:
 542                 if self.newlines is None:
 543                     self.newlines = []
 544                 if newline not in self.newlines:
 545                     self.newlines.append(newline)
 546                 self._offset += len(newline)
 547                 return line + '\n'
 548
 549             chunk = match.group('chunk')
 550             if limit >= 0:
 551                 chunk = chunk[: limit - len(line)]
 552
 553             self._offset += len(chunk)
 554             line += chunk
 555
 556         return line
 557
 558     def peek(self, n=1):
 559         """Returns buffered bytes without advancing the position."""
 560         if n > len(self._readbuffer) - self._offset:
 561             chunk = self.read(n)
 562             self._offset -= len(chunk)
 563
 564         # Return up to 512 bytes to reduce allocation overhead for tight loops.
 565         return self._readbuffer[self._offset: self._offset + 512]
 566
 567     def readable(self):
 568         return True
 569
 570     def read(self, n=-1):
 571         """Read and return up to n bytes.
 572         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
 573         """
 574         buf = ''
 575         if n is None:
 576             n = -1
 577         while True:
 578             if n < 0:
 579                 data = self.read1(n)
 580             elif n > len(buf):
 581                 data = self.read1(n - len(buf))
 582             else:
 583                 return buf
 584             if len(data) == 0:
 585                 return buf
 586             buf += data
 587
 588     def _update_crc(self, newdata, eof):
 589         # Update the CRC using the given data.
 590         if self._expected_crc is None:
 591             # No need to compute the CRC if we don't have a reference value
 592             return
 593         self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
 594         # Check the CRC if we're at the end of the file
 595         if eof and self._running_crc != self._expected_crc:
 596             raise BadZipfile("Bad CRC-32 for file %r" % self.name)
 597
 598     def read1(self, n):
 599         """Read up to n bytes with at most one read() system call."""
 600
 601         # Simplify algorithm (branching) by transforming negative n to large n.
 602         if n < 0 or n is None:
 603             n = self.MAX_N
 604
 605         # Bytes available in read buffer.
 606         len_readbuffer = len(self._readbuffer) - self._offset
 607
 608         # Read from file.
 609         if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
 610             nbytes = n - len_readbuffer - len(self._unconsumed)
 611             nbytes = max(nbytes, self.MIN_READ_SIZE)
 612             nbytes = min(nbytes, self._compress_left)
 613
 614             data = self._fileobj.read(nbytes)
 615             self._compress_left -= len(data)
 616
 617             if data and self._decrypter is not None:
 618                 data = ''.join(map(self._decrypter, data))
 619
 620             if self._compress_type == ZIP_STORED:
 621                 self._update_crc(data, eof=(self._compress_left==0))
 622                 self._readbuffer = self._readbuffer[self._offset:] + data
 623                 self._offset = 0
 624             else:
 625                 # Prepare deflated bytes for decompression.
 626                 self._unconsumed += data
 627
 628         # Handle unconsumed data.
 629         if (len(self._unconsumed) > 0 and n > len_readbuffer and
 630             self._compress_type == ZIP_DEFLATED):
 631             data = self._decompressor.decompress(
 632                 self._unconsumed,
 633                 max(n - len_readbuffer, self.MIN_READ_SIZE)
 634             )
 635
 636             self._unconsumed = self._decompressor.unconsumed_tail
 637             eof = len(self._unconsumed) == 0 and self._compress_left == 0
 638             if eof:
 639                 data += self._decompressor.flush()
 640
 641             self._update_crc(data, eof=eof)
 642             self._readbuffer = self._readbuffer[self._offset:] + data
 643             self._offset = 0
 644
 645         # Read from buffer.
 646         data = self._readbuffer[self._offset: self._offset + n]
 647         self._offset += len(data)
 648         return data
 649
 650
 651
 652 class ZipFile:
 653     """ Class with methods to open, read, write, close, list zip files.
 654
 655     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
 656
 657     file: Either the path to the file, or a file-like object.
 658           If it is a path, the file will be opened and closed by ZipFile.
 659     mode: The mode can be either read "r", write "w" or append "a".
 660     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
 661     allowZip64: if True ZipFile will create files with ZIP64 extensions when
 662                 needed, otherwise it will raise an exception when this would
 663                 be necessary.
 664
 665     """
 666
 667     fp = None                   # Set here since __del__ checks it
 668
 669     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
 670         """Open the ZIP file with mode read "r", write "w" or append "a"."""
 671         if mode not in ("r", "w", "a"):
 672             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
 673
 674         if compression == ZIP_STORED:
 675             pass
 676         elif compression == ZIP_DEFLATED:
 677             if not zlib:
 678                 raise RuntimeError,\
 679                       "Compression requires the (missing) zlib module"
 680         else:
 681             raise RuntimeError, "That compression method is not supported"
 682
 683         self._allowZip64 = allowZip64
 684         self._didModify = False
 685         self.debug = 0  # Level of printing: 0 through 3
 686         self.NameToInfo = {}    # Find file info given name
 687         self.filelist = []      # List of ZipInfo instances for archive
 688         self.compression = compression  # Method of compression
 689         self.mode = key = mode.replace('b', '')[0]
 690         self.pwd = None
 691         self.comment = ''
 692
 693         # Check if we were passed a file-like object
 694         if isinstance(file, basestring):
 695             self._filePassed = 0
 696             self.filename = file
 697             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
 698             try:
 699                 self.fp = open(file, modeDict[mode])
 700             except IOError:
 701                 if mode == 'a':
 702                     mode = key = 'w'
 703                     self.fp = open(file, modeDict[mode])
 704                 else:
 705                     raise
 706         else:
 707             self._filePassed = 1
 708             self.fp = file
 709             self.filename = getattr(file, 'name', None)
 710
 711         if key == 'r':
 712             self._GetContents()
 713         elif key == 'w':
 714             # set the modified flag so central directory gets written
 715             # even if no files are added to the archive
 716             self._didModify = True
 717         elif key == 'a':
 718             try:
 719                 # See if file is a zip file
 720                 self._RealGetContents()
 721                 # seek to start of directory and overwrite
 722                 self.fp.seek(self.start_dir, 0)
 723             except BadZipfile:
 724                 # file is not a zip file, just append
 725                 self.fp.seek(0, 2)
 726
 727                 # set the modified flag so central directory gets written
 728                 # even if no files are added to the archive
 729                 self._didModify = True
 730         else:
 731             if not self._filePassed:
 732                 self.fp.close()
 733                 self.fp = None
 734             raise RuntimeError, 'Mode must be "r", "w" or "a"'
 735
 736     def __enter__(self):
 737         return self
 738
 739     def __exit__(self, type, value, traceback):
 740         self.close()
 741
 742     def _GetContents(self):
 743         """Read the directory, making sure we close the file if the format
 744         is bad."""
 745         try:
 746             self._RealGetContents()
 747         except BadZipfile:
 748             if not self._filePassed:
 749                 self.fp.close()
 750                 self.fp = None
 751             raise
 752
 753     def _RealGetContents(self):
 754         """Read in the table of contents for the ZIP file."""
 755         fp = self.fp
 756         try:
 757             endrec = _EndRecData(fp)
 758         except IOError:
 759             raise BadZipfile("File is not a zip file")
 760         if not endrec:
 761             raise BadZipfile, "File is not a zip file"
 762         if self.debug > 1:
 763             print endrec
 764         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
 765         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
 766         self.comment = endrec[_ECD_COMMENT]     # archive comment
 767
 768         # "concat" is zero, unless zip was concatenated to another file
 769         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
 770         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
 771             # If Zip64 extension structures are present, account for them
 772             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 773
 774         if self.debug > 2:
 775             inferred = concat + offset_cd
 776             print "given, inferred, offset", offset_cd, inferred, concat
 777         # self.start_dir:  Position of start of central directory
 778         self.start_dir = offset_cd + concat
 779         fp.seek(self.start_dir, 0)
 780         data = fp.read(size_cd)
 781         fp = cStringIO.StringIO(data)
 782         total = 0
 783         while total < size_cd:
 784             centdir = fp.read(sizeCentralDir)
 785             if centdir[0:4] != stringCentralDir:
 786                 raise BadZipfile, "Bad magic number for central directory"
 787             centdir = struct.unpack(structCentralDir, centdir)
 788             if self.debug > 2:
 789                 print centdir
 790             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
 791             # Create ZipInfo instance to store file information
 792             x = ZipInfo(filename)
 793             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
 794             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
 795             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
 796             (x.create_version, x.create_system, x.extract_version, x.reserved,
 797                 x.flag_bits, x.compress_type, t, d,
 798                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
 799             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
 800             # Convert date/time code to (year, month, day, hour, min, sec)
 801             x._raw_time = t
 802             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
 803                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 804
 805             x._decodeExtra()
 806             x.header_offset = x.header_offset + concat
 807             x.filename = x._decodeFilename()
 808             self.filelist.append(x)
 809             self.NameToInfo[x.filename] = x
 810
 811             # update total bytes read from central directory
 812             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
 813                      + centdir[_CD_EXTRA_FIELD_LENGTH]
 814                      + centdir[_CD_COMMENT_LENGTH])
 815
 816             if self.debug > 2:
 817                 print "total", total
 818
 819
 820     def namelist(self):
 821         """Return a list of file names in the archive."""
 822         l = []
 823         for data in self.filelist:
 824             l.append(data.filename)
 825         return l
 826
 827     def infolist(self):
 828         """Return a list of class ZipInfo instances for files in the
 829         archive."""
 830         return self.filelist
 831
 832     def printdir(self):
 833         """Print a table of contents for the zip file."""
 834         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
 835         for zinfo in self.filelist:
 836             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
 837             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
 838
 839     def testzip(self):
 840         """Read all the files and check the CRC."""
 841         chunk_size = 2 ** 20
 842         for zinfo in self.filelist:
 843             try:
 844                 # Read by chunks, to avoid an OverflowError or a
 845                 # MemoryError with very large embedded files.
 846                 f = self.open(zinfo.filename, "r")
 847                 while f.read(chunk_size):     # Check CRC-32
 848                     pass
 849             except BadZipfile:
 850                 return zinfo.filename
 851
 852     def getinfo(self, name):
 853         """Return the instance of ZipInfo given 'name'."""
 854         info = self.NameToInfo.get(name)
 855         if info is None:
 856             raise KeyError(
 857                 'There is no item named %r in the archive' % name)
 858
 859         return info
 860
 861     def setpassword(self, pwd):
 862         """Set default password for encrypted files."""
 863         self.pwd = pwd
 864
 865     def read(self, name, pwd=None):
 866         """Return file bytes (as a string) for name."""
 867         return self.open(name, "r", pwd).read()
 868
 869     def open(self, name, mode="r", pwd=None):
 870         """Return file-like object for 'name'."""
 871         if mode not in ("r", "U", "rU"):
 872             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
 873         if not self.fp:
 874             raise RuntimeError, \
 875                   "Attempt to read ZIP archive that was already closed"
 876
 877         # Only open a new file for instances where we were not
 878         # given a file object in the constructor
 879         if self._filePassed:
 880             zef_file = self.fp
 881         else:
 882             zef_file = open(self.filename, 'rb')
 883
 884         # Make sure we have an info object
 885         if isinstance(name, ZipInfo):
 886             # 'name' is already an info object
 887             zinfo = name
 888         else:
 889             # Get info object for name
 890             zinfo = self.getinfo(name)
 891
 892         zef_file.seek(zinfo.header_offset, 0)
 893
 894         # Skip the file header:
 895         fheader = zef_file.read(sizeFileHeader)
 896         if fheader[0:4] != stringFileHeader:
 897             raise BadZipfile, "Bad magic number for file header"
 898
 899         fheader = struct.unpack(structFileHeader, fheader)
 900         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
 901         if fheader[_FH_EXTRA_FIELD_LENGTH]:
 902             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 903
 904         if fname != zinfo.orig_filename:
 905             raise BadZipfile, \
 906                       'File name in directory "%s" and header "%s" differ.' % (
 907                           zinfo.orig_filename, fname)
 908
 909         # check for encrypted flag & handle password
 910         is_encrypted = zinfo.flag_bits & 0x1
 911         zd = None
 912         if is_encrypted:
 913             if not pwd:
 914                 pwd = self.pwd
 915             if not pwd:
 916                 raise RuntimeError, "File %s is encrypted, " \
 917                       "password required for extraction" % name
 918
 919             zd = _ZipDecrypter(pwd)
 920             # The first 12 bytes in the cypher stream is an encryption header
 921             #  used to strengthen the algorithm. The first 11 bytes are
 922             #  completely random, while the 12th contains the MSB of the CRC,
 923             #  or the MSB of the file time depending on the header type
 924             #  and is used to check the correctness of the password.
 925             bytes = zef_file.read(12)
 926             h = map(zd, bytes[0:12])
 927             if zinfo.flag_bits & 0x8:
 928                 # compare against the file type from extended local headers
 929                 check_byte = (zinfo._raw_time >> 8) & 0xff
 930             else:
 931                 # compare against the CRC otherwise
 932                 check_byte = (zinfo.CRC >> 24) & 0xff
 933             if ord(h[11]) != check_byte:
 934                 raise RuntimeError("Bad password for file", name)
 935
 936         return  ZipExtFile(zef_file, mode, zinfo, zd)
 937
 938     def extract(self, member, path=None, pwd=None):
 939         """Extract a member from the archive to the current working directory,
 940            using its full name. Its file information is extracted as accurately
 941            as possible. `member' may be a filename or a ZipInfo object. You can
 942            specify a different directory using `path'.
 943         """
 944         if not isinstance(member, ZipInfo):
 945             member = self.getinfo(member)
 946
 947         if path is None:
 948             path = os.getcwd()
 949
 950         return self._extract_member(member, path, pwd)
 951
 952     def extractall(self, path=None, members=None, pwd=None):
 953         """Extract all members from the archive to the current working
 954            directory. `path' specifies a different directory to extract to.
 955            `members' is optional and must be a subset of the list returned
 956            by namelist().
 957         """
 958         if members is None:
 959             members = self.namelist()
 960
 961         for zipinfo in members:
 962             self.extract(zipinfo, path, pwd)
 963
 964     def _extract_member(self, member, targetpath, pwd):
 965         """Extract the ZipInfo object 'member' to a physical
 966            file on the path targetpath.
 967         """
 968         # build the destination pathname, replacing
 969         # forward slashes to platform specific separators.
 970         # Strip trailing path separator, unless it represents the root.
 971         if (targetpath[-1:] in (os.path.sep, os.path.altsep)
 972             and len(os.path.splitdrive(targetpath)[1]) > 1):
 973             targetpath = targetpath[:-1]
 974
 975         # don't include leading "/" from file name if present
 976         if member.filename[0] == '/':
 977             targetpath = os.path.join(targetpath, member.filename[1:])
 978         else:
 979             targetpath = os.path.join(targetpath, member.filename)
 980
 981         targetpath = os.path.normpath(targetpath)
 982
 983         # Create all upper directories if necessary.
 984         upperdirs = os.path.dirname(targetpath)
 985         if upperdirs and not os.path.exists(upperdirs):
 986             os.makedirs(upperdirs)
 987
 988         if member.filename[-1] == '/':
 989             if not os.path.isdir(targetpath):
 990                 os.mkdir(targetpath)
 991             return targetpath
 992
 993         source = self.open(member, pwd=pwd)
 994         target = file(targetpath, "wb")
 995         shutil.copyfileobj(source, target)
 996         source.close()
 997         target.close()
 998
 999         return targetpath
1000
1001     def _writecheck(self, zinfo):
1002         """Check for errors before writing a file to the archive."""
1003         if zinfo.filename in self.NameToInfo:
1004             if self.debug:      # Warning for duplicate names
1005                 print "Duplicate name:", zinfo.filename
1006         if self.mode not in ("w", "a"):
1007             raise RuntimeError, 'write() requires mode "w" or "a"'
1008         if not self.fp:
1009             raise RuntimeError, \
1010                   "Attempt to write ZIP archive that was already closed"
1011         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1012             raise RuntimeError, \
1013                   "Compression requires the (missing) zlib module"
1014         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1015             raise RuntimeError, \
1016                   "That compression method is not supported"
1017         if zinfo.file_size > ZIP64_LIMIT:
1018             if not self._allowZip64:
1019                 raise LargeZipFile("Filesize would require ZIP64 extensions")
1020         if zinfo.header_offset > ZIP64_LIMIT:
1021             if not self._allowZip64:
1022                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1023
1024     def write(self, filename, arcname=None, compress_type=None):
1025         """Put the bytes from filename into the archive under the name
1026         arcname."""
1027         if not self.fp:
1028             raise RuntimeError(
1029                   "Attempt to write to ZIP archive that was already closed")
1030
1031         st = os.stat(filename)
1032         isdir = stat.S_ISDIR(st.st_mode)
1033         mtime = time.localtime(st.st_mtime)
1034         date_time = mtime[0:6]
1035         # Create ZipInfo instance to store file information
1036         if arcname is None:
1037             arcname = filename
1038         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1039         while arcname[0] in (os.sep, os.altsep):
1040             arcname = arcname[1:]
1041         if isdir:
1042             arcname += '/'
1043         zinfo = ZipInfo(arcname, date_time)
1044         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1045         if compress_type is None:
1046             zinfo.compress_type = self.compression
1047         else:
1048             zinfo.compress_type = compress_type
1049
1050         zinfo.file_size = st.st_size
1051         zinfo.flag_bits = 0x00
1052         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1053
1054         self._writecheck(zinfo)
1055         self._didModify = True
1056
1057         if isdir:
1058             zinfo.file_size = 0
1059             zinfo.compress_size = 0
1060             zinfo.CRC = 0
1061             self.filelist.append(zinfo)
1062             self.NameToInfo[zinfo.filename] = zinfo
1063             self.fp.write(zinfo.FileHeader())
1064             return
1065
1066         with open(filename, "rb") as fp:
1067             # Must overwrite CRC and sizes with correct data later
1068             zinfo.CRC = CRC = 0
1069             zinfo.compress_size = compress_size = 0
1070             zinfo.file_size = file_size = 0
1071             self.fp.write(zinfo.FileHeader())
1072             if zinfo.compress_type == ZIP_DEFLATED:
1073                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1074                      zlib.DEFLATED, -15)
1075             else:
1076                 cmpr = None
1077             while 1:
1078                 buf = fp.read(1024 * 8)
1079                 if not buf:
1080                     break
1081                 file_size = file_size + len(buf)
1082                 CRC = crc32(buf, CRC) & 0xffffffff
1083                 if cmpr:
1084                     buf = cmpr.compress(buf)
1085                     compress_size = compress_size + len(buf)
1086                 self.fp.write(buf)
1087         if cmpr:
1088             buf = cmpr.flush()
1089             compress_size = compress_size + len(buf)
1090             self.fp.write(buf)
1091             zinfo.compress_size = compress_size
1092         else:
1093             zinfo.compress_size = file_size
1094         zinfo.CRC = CRC
1095         zinfo.file_size = file_size
1096         # Seek backwards and write CRC and file sizes
1097         position = self.fp.tell()       # Preserve current position in file
1098         self.fp.seek(zinfo.header_offset + 14, 0)
1099         self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1100               zinfo.file_size))
1101         self.fp.seek(position, 0)
1102         self.filelist.append(zinfo)
1103         self.NameToInfo[zinfo.filename] = zinfo
1104
1105     def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1106         """Write a file into the archive.  The contents is the string
1107         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1108         the name of the file in the archive."""
1109         if not isinstance(zinfo_or_arcname, ZipInfo):
1110             zinfo = ZipInfo(filename=zinfo_or_arcname,
1111                             date_time=time.localtime(time.time())[:6])
1112
1113             zinfo.compress_type = self.compression
1114             zinfo.external_attr = 0600 << 16
1115         else:
1116             zinfo = zinfo_or_arcname
1117
1118         if not self.fp:
1119             raise RuntimeError(
1120                   "Attempt to write to ZIP archive that was already closed")
1121
1122         if compress_type is not None:
1123             zinfo.compress_type = compress_type
1124
1125         zinfo.file_size = len(bytes)            # Uncompressed size
1126         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1127         self._writecheck(zinfo)
1128         self._didModify = True
1129         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1130         if zinfo.compress_type == ZIP_DEFLATED:
1131             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1132                  zlib.DEFLATED, -15)
1133             bytes = co.compress(bytes) + co.flush()
1134             zinfo.compress_size = len(bytes)    # Compressed size
1135         else:
1136             zinfo.compress_size = zinfo.file_size
1137         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1138         self.fp.write(zinfo.FileHeader())
1139         self.fp.write(bytes)
1140         self.fp.flush()
1141         if zinfo.flag_bits & 0x08:
1142             # Write CRC and file sizes after the file data
1143             self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1144                   zinfo.file_size))
1145         self.filelist.append(zinfo)
1146         self.NameToInfo[zinfo.filename] = zinfo
1147
1148     def __del__(self):
1149         """Call the "close()" method in case the user forgot."""
1150         self.close()
1151
1152     def close(self):
1153         """Close the file, and for mode "w" and "a" write the ending
1154         records."""
1155         if self.fp is None:
1156             return
1157
1158         if self.mode in ("w", "a") and self._didModify: # write ending records
1159             count = 0
1160             pos1 = self.fp.tell()
1161             for zinfo in self.filelist:         # write central directory
1162                 count = count + 1
1163                 dt = zinfo.date_time
1164                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1165                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1166                 extra = []
1167                 if zinfo.file_size > ZIP64_LIMIT \
1168                         or zinfo.compress_size > ZIP64_LIMIT:
1169                     extra.append(zinfo.file_size)
1170                     extra.append(zinfo.compress_size)
1171                     file_size = 0xffffffff
1172                     compress_size = 0xffffffff
1173                 else:
1174                     file_size = zinfo.file_size
1175                     compress_size = zinfo.compress_size
1176
1177                 if zinfo.header_offset > ZIP64_LIMIT:
1178                     extra.append(zinfo.header_offset)
1179                     header_offset = 0xffffffffL
1180                 else:
1181                     header_offset = zinfo.header_offset
1182
1183                 extra_data = zinfo.extra
1184                 if extra:
1185                     # Append a ZIP64 field to the extra's
1186                     extra_data = struct.pack(
1187                             '<HH' + 'Q'*len(extra),
1188                             1, 8*len(extra), *extra) + extra_data
1189
1190                     extract_version = max(45, zinfo.extract_version)
1191                     create_version = max(45, zinfo.create_version)
1192                 else:
1193                     extract_version = zinfo.extract_version
1194                     create_version = zinfo.create_version
1195
1196                 try:
1197                     filename, flag_bits = zinfo._encodeFilenameFlags()
1198                     centdir = struct.pack(structCentralDir,
1199                      stringCentralDir, create_version,
1200                      zinfo.create_system, extract_version, zinfo.reserved,
1201                      flag_bits, zinfo.compress_type, dostime, dosdate,
1202                      zinfo.CRC, compress_size, file_size,
1203                      len(filename), len(extra_data), len(zinfo.comment),
1204                      0, zinfo.internal_attr, zinfo.external_attr,
1205                      header_offset)
1206                 except DeprecationWarning:
1207                     print >>sys.stderr, (structCentralDir,
1208                      stringCentralDir, create_version,
1209                      zinfo.create_system, extract_version, zinfo.reserved,
1210                      zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1211                      zinfo.CRC, compress_size, file_size,
1212                      len(zinfo.filename), len(extra_data), len(zinfo.comment),
1213                      0, zinfo.internal_attr, zinfo.external_attr,
1214                      header_offset)
1215                     raise
1216                 self.fp.write(centdir)
1217                 self.fp.write(filename)
1218                 self.fp.write(extra_data)
1219                 self.fp.write(zinfo.comment)
1220
1221             pos2 = self.fp.tell()
1222             # Write end-of-zip-archive record
1223             centDirCount = count
1224             centDirSize = pos2 - pos1
1225             centDirOffset = pos1
1226             if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1227                 centDirOffset > ZIP64_LIMIT or
1228                 centDirSize > ZIP64_LIMIT):
1229                 # Need to write the ZIP64 end-of-archive records
1230                 zip64endrec = struct.pack(
1231                         structEndArchive64, stringEndArchive64,
1232                         44, 45, 45, 0, 0, centDirCount, centDirCount,
1233                         centDirSize, centDirOffset)
1234                 self.fp.write(zip64endrec)
1235
1236                 zip64locrec = struct.pack(
1237                         structEndArchive64Locator,
1238                         stringEndArchive64Locator, 0, pos2, 1)
1239                 self.fp.write(zip64locrec)
1240                 centDirCount = min(centDirCount, 0xFFFF)
1241                 centDirSize = min(centDirSize, 0xFFFFFFFF)
1242                 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1243
1244             # check for valid comment length
1245             if len(self.comment) >= ZIP_MAX_COMMENT:
1246                 if self.debug > 0:
1247                     msg = 'Archive comment is too long; truncating to %d bytes' \
1248                           % ZIP_MAX_COMMENT
1249                 self.comment = self.comment[:ZIP_MAX_COMMENT]
1250
1251             endrec = struct.pack(structEndArchive, stringEndArchive,
1252                                  0, 0, centDirCount, centDirCount,
1253                                  centDirSize, centDirOffset, len(self.comment))
1254             self.fp.write(endrec)
1255             self.fp.write(self.comment)
1256             self.fp.flush()
1257
1258         if not self._filePassed:
1259             self.fp.close()
1260         self.fp = None
1261
1262
1263 class PyZipFile(ZipFile):
1264     """Class to create ZIP archives with Python library files and packages."""
1265
1266     def writepy(self, pathname, basename = ""):
1267         """Add all files from "pathname" to the ZIP archive.
1268
1269         If pathname is a package directory, search the directory and
1270         all package subdirectories recursively for all *.py and enter
1271         the modules into the archive.  If pathname is a plain
1272         directory, listdir *.py and enter all modules.  Else, pathname
1273         must be a Python *.py file and the module will be put into the
1274         archive.  Added modules are always module.pyo or module.pyc.
1275         This method will compile the module.py into module.pyc if
1276         necessary.
1277         """
1278         dir, name = os.path.split(pathname)
1279         if os.path.isdir(pathname):
1280             initname = os.path.join(pathname, "__init__.py")
1281             if os.path.isfile(initname):
1282                 # This is a package directory, add it
1283                 if basename:
1284                     basename = "%s/%s" % (basename, name)
1285                 else:
1286                     basename = name
1287                 if self.debug:
1288                     print "Adding package in", pathname, "as", basename
1289                 fname, arcname = self._get_codename(initname[0:-3], basename)
1290                 if self.debug:
1291                     print "Adding", arcname
1292                 self.write(fname, arcname)
1293                 dirlist = os.listdir(pathname)
1294                 dirlist.remove("__init__.py")
1295                 # Add all *.py files and package subdirectories
1296                 for filename in dirlist:
1297                     path = os.path.join(pathname, filename)
1298                     root, ext = os.path.splitext(filename)
1299                     if os.path.isdir(path):
1300                         if os.path.isfile(os.path.join(path, "__init__.py")):
1301                             # This is a package directory, add it
1302                             self.writepy(path, basename)  # Recursive call
1303                     elif ext == ".py":
1304                         fname, arcname = self._get_codename(path[0:-3],
1305                                          basename)
1306                         if self.debug:
1307                             print "Adding", arcname
1308                         self.write(fname, arcname)
1309             else:
1310                 # This is NOT a package directory, add its files at top level
1311                 if self.debug:
1312                     print "Adding files from directory", pathname
1313                 for filename in os.listdir(pathname):
1314                     path = os.path.join(pathname, filename)
1315                     root, ext = os.path.splitext(filename)
1316                     if ext == ".py":
1317                         fname, arcname = self._get_codename(path[0:-3],
1318                                          basename)
1319                         if self.debug:
1320                             print "Adding", arcname
1321                         self.write(fname, arcname)
1322         else:
1323             if pathname[-3:] != ".py":
1324                 raise RuntimeError, \
1325                       'Files added with writepy() must end with ".py"'
1326             fname, arcname = self._get_codename(pathname[0:-3], basename)
1327             if self.debug:
1328                 print "Adding file", arcname
1329             self.write(fname, arcname)
1330
1331     def _get_codename(self, pathname, basename):
1332         """Return (filename, archivename) for the path.
1333
1334         Given a module name path, return the correct file path and
1335         archive name, compiling if necessary.  For example, given
1336         /python/lib/string, return (/python/lib/string.pyc, string).
1337         """
1338         file_py  = pathname + ".py"
1339         file_pyc = pathname + ".pyc"
1340         file_pyo = pathname + ".pyo"
1341         if os.path.isfile(file_pyo) and \
1342                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1343             fname = file_pyo    # Use .pyo file
1344         elif not os.path.isfile(file_pyc) or \
1345              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1346             import py_compile
1347             if self.debug:
1348                 print "Compiling", file_py
1349             try:
1350                 py_compile.compile(file_py, file_pyc, None, True)
1351             except py_compile.PyCompileError,err:
1352                 print err.msg
1353             fname = file_pyc
1354         else:
1355             fname = file_pyc
1356         archivename = os.path.split(fname)[1]
1357         if basename:
1358             archivename = "%s/%s" % (basename, archivename)
1359         return (fname, archivename)
1360
1361
1362 def main(args = None):
1363     import textwrap
1364     USAGE=textwrap.dedent("""\
1365         Usage:
1366             zipfile.py -l zipfile.zip        # Show listing of a zipfile
1367             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1368             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1369             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1370         """)
1371     if args is None:
1372         args = sys.argv[1:]
1373
1374     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1375         print USAGE
1376         sys.exit(1)
1377
1378     if args[0] == '-l':
1379         if len(args) != 2:
1380             print USAGE
1381             sys.exit(1)
1382         zf = ZipFile(args[1], 'r')
1383         zf.printdir()
1384         zf.close()
1385
1386     elif args[0] == '-t':
1387         if len(args) != 2:
1388             print USAGE
1389             sys.exit(1)
1390         zf = ZipFile(args[1], 'r')
1391         badfile = zf.testzip()
1392         if badfile:
1393             print("The following enclosed file is corrupted: {!r}".format(badfile))
1394         print "Done testing"
1395
1396     elif args[0] == '-e':
1397         if len(args) != 3:
1398             print USAGE
1399             sys.exit(1)
1400
1401         zf = ZipFile(args[1], 'r')
1402         out = args[2]
1403         for path in zf.namelist():
1404             if path.startswith('./'):
1405                 tgt = os.path.join(out, path[2:])
1406             else:
1407                 tgt = os.path.join(out, path)
1408
1409             tgtdir = os.path.dirname(tgt)
1410             if not os.path.exists(tgtdir):
1411                 os.makedirs(tgtdir)
1412             with open(tgt, 'wb') as fp:
1413                 fp.write(zf.read(path))
1414         zf.close()
1415
1416     elif args[0] == '-c':
1417         if len(args) < 3:
1418             print USAGE
1419             sys.exit(1)
1420
1421         def addToZip(zf, path, zippath):
1422             if os.path.isfile(path):
1423                 zf.write(path, zippath, ZIP_DEFLATED)
1424             elif os.path.isdir(path):
1425                 for nm in os.listdir(path):
1426                     addToZip(zf,
1427                             os.path.join(path, nm), os.path.join(zippath, nm))
1428             # else: ignore
1429
1430         zf = ZipFile(args[1], 'w', allowZip64=True)
1431         for src in args[2:]:
1432             addToZip(zf, src, os.path.basename(src))
1433
1434         zf.close()
1435
1436 if __name__ == "__main__":
1437     main()