]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.10/Lib/zipfile.py
AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 4/5.
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / zipfile.py
CommitLineData
3257aa99
DM
1"""\r
2Read and write ZIP files.\r
3"""\r
4import struct, os, time, sys, shutil\r
5import binascii, cStringIO, stat\r
6import io\r
7import re\r
8import string\r
9\r
10try:\r
11 import zlib # We may need its compression method\r
12 crc32 = zlib.crc32\r
13except ImportError:\r
14 zlib = None\r
15 crc32 = binascii.crc32\r
16\r
17__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",\r
18 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]\r
19\r
20class BadZipfile(Exception):\r
21 pass\r
22\r
23\r
24class LargeZipFile(Exception):\r
25 """\r
26 Raised when writing a zipfile, the zipfile requires ZIP64 extensions\r
27 and those extensions are disabled.\r
28 """\r
29\r
30error = BadZipfile # The exception raised by this module\r
31\r
32ZIP64_LIMIT = (1 << 31) - 1\r
33ZIP_FILECOUNT_LIMIT = (1 << 16) - 1\r
34ZIP_MAX_COMMENT = (1 << 16) - 1\r
35\r
36# constants for Zip file compression methods\r
37ZIP_STORED = 0\r
38ZIP_DEFLATED = 8\r
39# Other ZIP compression methods not supported\r
40\r
41# Below are some formats and associated data for reading/writing headers using\r
42# the struct module. The names and structures of headers/records are those used\r
43# in the PKWARE description of the ZIP file format:\r
44# http://www.pkware.com/documents/casestudies/APPNOTE.TXT\r
45# (URL valid as of January 2008)\r
46\r
47# The "end of central directory" structure, magic number, size, and indices\r
48# (section V.I in the format document)\r
49structEndArchive = "<4s4H2LH"\r
50stringEndArchive = "PK\005\006"\r
51sizeEndCentDir = struct.calcsize(structEndArchive)\r
52\r
53_ECD_SIGNATURE = 0\r
54_ECD_DISK_NUMBER = 1\r
55_ECD_DISK_START = 2\r
56_ECD_ENTRIES_THIS_DISK = 3\r
57_ECD_ENTRIES_TOTAL = 4\r
58_ECD_SIZE = 5\r
59_ECD_OFFSET = 6\r
60_ECD_COMMENT_SIZE = 7\r
61# These last two indices are not part of the structure as defined in the\r
62# spec, but they are used internally by this module as a convenience\r
63_ECD_COMMENT = 8\r
64_ECD_LOCATION = 9\r
65\r
66# The "central directory" structure, magic number, size, and indices\r
67# of entries in the structure (section V.F in the format document)\r
68structCentralDir = "<4s4B4HL2L5H2L"\r
69stringCentralDir = "PK\001\002"\r
70sizeCentralDir = struct.calcsize(structCentralDir)\r
71\r
72# indexes of entries in the central directory structure\r
73_CD_SIGNATURE = 0\r
74_CD_CREATE_VERSION = 1\r
75_CD_CREATE_SYSTEM = 2\r
76_CD_EXTRACT_VERSION = 3\r
77_CD_EXTRACT_SYSTEM = 4\r
78_CD_FLAG_BITS = 5\r
79_CD_COMPRESS_TYPE = 6\r
80_CD_TIME = 7\r
81_CD_DATE = 8\r
82_CD_CRC = 9\r
83_CD_COMPRESSED_SIZE = 10\r
84_CD_UNCOMPRESSED_SIZE = 11\r
85_CD_FILENAME_LENGTH = 12\r
86_CD_EXTRA_FIELD_LENGTH = 13\r
87_CD_COMMENT_LENGTH = 14\r
88_CD_DISK_NUMBER_START = 15\r
89_CD_INTERNAL_FILE_ATTRIBUTES = 16\r
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17\r
91_CD_LOCAL_HEADER_OFFSET = 18\r
92\r
93# The "local file header" structure, magic number, size, and indices\r
94# (section V.A in the format document)\r
95structFileHeader = "<4s2B4HL2L2H"\r
96stringFileHeader = "PK\003\004"\r
97sizeFileHeader = struct.calcsize(structFileHeader)\r
98\r
99_FH_SIGNATURE = 0\r
100_FH_EXTRACT_VERSION = 1\r
101_FH_EXTRACT_SYSTEM = 2\r
102_FH_GENERAL_PURPOSE_FLAG_BITS = 3\r
103_FH_COMPRESSION_METHOD = 4\r
104_FH_LAST_MOD_TIME = 5\r
105_FH_LAST_MOD_DATE = 6\r
106_FH_CRC = 7\r
107_FH_COMPRESSED_SIZE = 8\r
108_FH_UNCOMPRESSED_SIZE = 9\r
109_FH_FILENAME_LENGTH = 10\r
110_FH_EXTRA_FIELD_LENGTH = 11\r
111\r
112# The "Zip64 end of central directory locator" structure, magic number, and size\r
113structEndArchive64Locator = "<4sLQL"\r
114stringEndArchive64Locator = "PK\x06\x07"\r
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)\r
116\r
117# The "Zip64 end of central directory" record, magic number, size, and indices\r
118# (section V.G in the format document)\r
119structEndArchive64 = "<4sQ2H2L4Q"\r
120stringEndArchive64 = "PK\x06\x06"\r
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)\r
122\r
123_CD64_SIGNATURE = 0\r
124_CD64_DIRECTORY_RECSIZE = 1\r
125_CD64_CREATE_VERSION = 2\r
126_CD64_EXTRACT_VERSION = 3\r
127_CD64_DISK_NUMBER = 4\r
128_CD64_DISK_NUMBER_START = 5\r
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6\r
130_CD64_NUMBER_ENTRIES_TOTAL = 7\r
131_CD64_DIRECTORY_SIZE = 8\r
132_CD64_OFFSET_START_CENTDIR = 9\r
133\r
134def _check_zipfile(fp):\r
135 try:\r
136 if _EndRecData(fp):\r
137 return True # file has correct magic number\r
138 except IOError:\r
139 pass\r
140 return False\r
141\r
142def is_zipfile(filename):\r
143 """Quickly see if a file is a ZIP file by checking the magic number.\r
144\r
145 The filename argument may be a file or file-like object too.\r
146 """\r
147 result = False\r
148 try:\r
149 if hasattr(filename, "read"):\r
150 result = _check_zipfile(fp=filename)\r
151 else:\r
152 with open(filename, "rb") as fp:\r
153 result = _check_zipfile(fp)\r
154 except IOError:\r
155 pass\r
156 return result\r
157\r
158def _EndRecData64(fpin, offset, endrec):\r
159 """\r
160 Read the ZIP64 end-of-archive records and use that to update endrec\r
161 """\r
162 try:\r
163 fpin.seek(offset - sizeEndCentDir64Locator, 2)\r
164 except IOError:\r
165 # If the seek fails, the file is not large enough to contain a ZIP64\r
166 # end-of-archive record, so just return the end record we were given.\r
167 return endrec\r
168\r
169 data = fpin.read(sizeEndCentDir64Locator)\r
170 if len(data) != sizeEndCentDir64Locator:\r
171 return endrec\r
172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)\r
173 if sig != stringEndArchive64Locator:\r
174 return endrec\r
175\r
176 if diskno != 0 or disks != 1:\r
177 raise BadZipfile("zipfiles that span multiple disks are not supported")\r
178\r
179 # Assume no 'zip64 extensible data'\r
180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)\r
181 data = fpin.read(sizeEndCentDir64)\r
182 if len(data) != sizeEndCentDir64:\r
183 return endrec\r
184 sig, sz, create_version, read_version, disk_num, disk_dir, \\r
185 dircount, dircount2, dirsize, diroffset = \\r
186 struct.unpack(structEndArchive64, data)\r
187 if sig != stringEndArchive64:\r
188 return endrec\r
189\r
190 # Update the original endrec using data from the ZIP64 record\r
191 endrec[_ECD_SIGNATURE] = sig\r
192 endrec[_ECD_DISK_NUMBER] = disk_num\r
193 endrec[_ECD_DISK_START] = disk_dir\r
194 endrec[_ECD_ENTRIES_THIS_DISK] = dircount\r
195 endrec[_ECD_ENTRIES_TOTAL] = dircount2\r
196 endrec[_ECD_SIZE] = dirsize\r
197 endrec[_ECD_OFFSET] = diroffset\r
198 return endrec\r
199\r
200\r
201def _EndRecData(fpin):\r
202 """Return data from the "End of Central Directory" record, or None.\r
203\r
204 The data is a list of the nine items in the ZIP "End of central dir"\r
205 record followed by a tenth item, the file seek offset of this record."""\r
206\r
207 # Determine file size\r
208 fpin.seek(0, 2)\r
209 filesize = fpin.tell()\r
210\r
211 # Check to see if this is ZIP file with no archive comment (the\r
212 # "end of central directory" structure should be the last item in the\r
213 # file if this is the case).\r
214 try:\r
215 fpin.seek(-sizeEndCentDir, 2)\r
216 except IOError:\r
217 return None\r
218 data = fpin.read()\r
219 if (len(data) == sizeEndCentDir and\r
220 data[0:4] == stringEndArchive and\r
221 data[-2:] == b"\000\000"):\r
222 # the signature is correct and there's no comment, unpack structure\r
223 endrec = struct.unpack(structEndArchive, data)\r
224 endrec=list(endrec)\r
225\r
226 # Append a blank comment and record start offset\r
227 endrec.append("")\r
228 endrec.append(filesize - sizeEndCentDir)\r
229\r
230 # Try to read the "Zip64 end of central directory" structure\r
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec)\r
232\r
233 # Either this is not a ZIP file, or it is a ZIP file with an archive\r
234 # comment. Search the end of the file for the "end of central directory"\r
235 # record signature. The comment is the last item in the ZIP file and may be\r
236 # up to 64K long. It is assumed that the "end of central directory" magic\r
237 # number does not appear in the comment.\r
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)\r
239 fpin.seek(maxCommentStart, 0)\r
240 data = fpin.read()\r
241 start = data.rfind(stringEndArchive)\r
242 if start >= 0:\r
243 # found the magic number; attempt to unpack and interpret\r
244 recData = data[start:start+sizeEndCentDir]\r
245 if len(recData) != sizeEndCentDir:\r
246 # Zip file is corrupted.\r
247 return None\r
248 endrec = list(struct.unpack(structEndArchive, recData))\r
249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file\r
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]\r
251 endrec.append(comment)\r
252 endrec.append(maxCommentStart + start)\r
253\r
254 # Try to read the "Zip64 end of central directory" structure\r
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,\r
256 endrec)\r
257\r
258 # Unable to find a valid end of central directory structure\r
259 return None\r
260\r
261\r
262class ZipInfo (object):\r
263 """Class with attributes describing each file in the ZIP archive."""\r
264\r
265 __slots__ = (\r
266 'orig_filename',\r
267 'filename',\r
268 'date_time',\r
269 'compress_type',\r
270 'comment',\r
271 'extra',\r
272 'create_system',\r
273 'create_version',\r
274 'extract_version',\r
275 'reserved',\r
276 'flag_bits',\r
277 'volume',\r
278 'internal_attr',\r
279 'external_attr',\r
280 'header_offset',\r
281 'CRC',\r
282 'compress_size',\r
283 'file_size',\r
284 '_raw_time',\r
285 )\r
286\r
287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):\r
288 self.orig_filename = filename # Original file name in archive\r
289\r
290 # Terminate the file name at the first null byte. Null bytes in file\r
291 # names are used as tricks by viruses in archives.\r
292 null_byte = filename.find(chr(0))\r
293 if null_byte >= 0:\r
294 filename = filename[0:null_byte]\r
295 # This is used to ensure paths in generated ZIP files always use\r
296 # forward slashes as the directory separator, as required by the\r
297 # ZIP format specification.\r
298 if os.sep != "/" and os.sep in filename:\r
299 filename = filename.replace(os.sep, "/")\r
300\r
301 self.filename = filename # Normalized file name\r
302 self.date_time = date_time # year, month, day, hour, min, sec\r
303\r
304 if date_time[0] < 1980:\r
305 raise ValueError('ZIP does not support timestamps before 1980')\r
306\r
307 # Standard values:\r
308 self.compress_type = ZIP_STORED # Type of compression for the file\r
309 self.comment = "" # Comment for each file\r
310 self.extra = "" # ZIP extra data\r
311 if sys.platform == 'win32':\r
312 self.create_system = 0 # System which created ZIP archive\r
313 else:\r
314 # Assume everything else is unix-y\r
315 self.create_system = 3 # System which created ZIP archive\r
316 self.create_version = 20 # Version which created ZIP archive\r
317 self.extract_version = 20 # Version needed to extract archive\r
318 self.reserved = 0 # Must be zero\r
319 self.flag_bits = 0 # ZIP flag bits\r
320 self.volume = 0 # Volume number of file header\r
321 self.internal_attr = 0 # Internal attributes\r
322 self.external_attr = 0 # External file attributes\r
323 # Other attributes are set by class ZipFile:\r
324 # header_offset Byte offset to the file header\r
325 # CRC CRC-32 of the uncompressed file\r
326 # compress_size Size of the compressed file\r
327 # file_size Size of the uncompressed file\r
328\r
329 def FileHeader(self, zip64=None):\r
330 """Return the per-file header as a string."""\r
331 dt = self.date_time\r
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]\r
333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)\r
334 if self.flag_bits & 0x08:\r
335 # Set these to zero because we write them after the file data\r
336 CRC = compress_size = file_size = 0\r
337 else:\r
338 CRC = self.CRC\r
339 compress_size = self.compress_size\r
340 file_size = self.file_size\r
341\r
342 extra = self.extra\r
343\r
344 if zip64 is None:\r
345 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT\r
346 if zip64:\r
347 fmt = '<HHQQ'\r
348 extra = extra + struct.pack(fmt,\r
349 1, struct.calcsize(fmt)-4, file_size, compress_size)\r
350 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:\r
351 if not zip64:\r
352 raise LargeZipFile("Filesize would require ZIP64 extensions")\r
353 # File is larger than what fits into a 4 byte integer,\r
354 # fall back to the ZIP64 extension\r
355 file_size = 0xffffffff\r
356 compress_size = 0xffffffff\r
357 self.extract_version = max(45, self.extract_version)\r
358 self.create_version = max(45, self.extract_version)\r
359\r
360 filename, flag_bits = self._encodeFilenameFlags()\r
361 header = struct.pack(structFileHeader, stringFileHeader,\r
362 self.extract_version, self.reserved, flag_bits,\r
363 self.compress_type, dostime, dosdate, CRC,\r
364 compress_size, file_size,\r
365 len(filename), len(extra))\r
366 return header + filename + extra\r
367\r
368 def _encodeFilenameFlags(self):\r
369 if isinstance(self.filename, unicode):\r
370 try:\r
371 return self.filename.encode('ascii'), self.flag_bits\r
372 except UnicodeEncodeError:\r
373 return self.filename.encode('utf-8'), self.flag_bits | 0x800\r
374 else:\r
375 return self.filename, self.flag_bits\r
376\r
377 def _decodeFilename(self):\r
378 if self.flag_bits & 0x800:\r
379 return self.filename.decode('utf-8')\r
380 else:\r
381 return self.filename\r
382\r
383 def _decodeExtra(self):\r
384 # Try to decode the extra field.\r
385 extra = self.extra\r
386 unpack = struct.unpack\r
387 while len(extra) >= 4:\r
388 tp, ln = unpack('<HH', extra[:4])\r
389 if tp == 1:\r
390 if ln >= 24:\r
391 counts = unpack('<QQQ', extra[4:28])\r
392 elif ln == 16:\r
393 counts = unpack('<QQ', extra[4:20])\r
394 elif ln == 8:\r
395 counts = unpack('<Q', extra[4:12])\r
396 elif ln == 0:\r
397 counts = ()\r
398 else:\r
399 raise RuntimeError, "Corrupt extra field %s"%(ln,)\r
400\r
401 idx = 0\r
402\r
403 # ZIP64 extension (large files and/or large archives)\r
404 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):\r
405 self.file_size = counts[idx]\r
406 idx += 1\r
407\r
408 if self.compress_size == 0xFFFFFFFFL:\r
409 self.compress_size = counts[idx]\r
410 idx += 1\r
411\r
412 if self.header_offset == 0xffffffffL:\r
413 old = self.header_offset\r
414 self.header_offset = counts[idx]\r
415 idx+=1\r
416\r
417 extra = extra[ln+4:]\r
418\r
419\r
420class _ZipDecrypter:\r
421 """Class to handle decryption of files stored within a ZIP archive.\r
422\r
423 ZIP supports a password-based form of encryption. Even though known\r
424 plaintext attacks have been found against it, it is still useful\r
425 to be able to get data out of such a file.\r
426\r
427 Usage:\r
428 zd = _ZipDecrypter(mypwd)\r
429 plain_char = zd(cypher_char)\r
430 plain_text = map(zd, cypher_text)\r
431 """\r
432\r
433 def _GenerateCRCTable():\r
434 """Generate a CRC-32 table.\r
435\r
436 ZIP encryption uses the CRC32 one-byte primitive for scrambling some\r
437 internal keys. We noticed that a direct implementation is faster than\r
438 relying on binascii.crc32().\r
439 """\r
440 poly = 0xedb88320\r
441 table = [0] * 256\r
442 for i in range(256):\r
443 crc = i\r
444 for j in range(8):\r
445 if crc & 1:\r
446 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly\r
447 else:\r
448 crc = ((crc >> 1) & 0x7FFFFFFF)\r
449 table[i] = crc\r
450 return table\r
451 crctable = _GenerateCRCTable()\r
452\r
453 def _crc32(self, ch, crc):\r
454 """Compute the CRC32 primitive on one byte."""\r
455 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]\r
456\r
457 def __init__(self, pwd):\r
458 self.key0 = 305419896\r
459 self.key1 = 591751049\r
460 self.key2 = 878082192\r
461 for p in pwd:\r
462 self._UpdateKeys(p)\r
463\r
464 def _UpdateKeys(self, c):\r
465 self.key0 = self._crc32(c, self.key0)\r
466 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295\r
467 self.key1 = (self.key1 * 134775813 + 1) & 4294967295\r
468 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)\r
469\r
470 def __call__(self, c):\r
471 """Decrypt a single character."""\r
472 c = ord(c)\r
473 k = self.key2 | 2\r
474 c = c ^ (((k * (k^1)) >> 8) & 255)\r
475 c = chr(c)\r
476 self._UpdateKeys(c)\r
477 return c\r
478\r
479\r
480compressor_names = {\r
481 0: 'store',\r
482 1: 'shrink',\r
483 2: 'reduce',\r
484 3: 'reduce',\r
485 4: 'reduce',\r
486 5: 'reduce',\r
487 6: 'implode',\r
488 7: 'tokenize',\r
489 8: 'deflate',\r
490 9: 'deflate64',\r
491 10: 'implode',\r
492 12: 'bzip2',\r
493 14: 'lzma',\r
494 18: 'terse',\r
495 19: 'lz77',\r
496 97: 'wavpack',\r
497 98: 'ppmd',\r
498}\r
499\r
500\r
501class ZipExtFile(io.BufferedIOBase):\r
502 """File-like object for reading an archive member.\r
503 Is returned by ZipFile.open().\r
504 """\r
505\r
506 # Max size supported by decompressor.\r
507 MAX_N = 1 << 31 - 1\r
508\r
509 # Read from compressed files in 4k blocks.\r
510 MIN_READ_SIZE = 4096\r
511\r
512 # Search for universal newlines or line chunks.\r
513 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')\r
514\r
515 def __init__(self, fileobj, mode, zipinfo, decrypter=None,\r
516 close_fileobj=False):\r
517 self._fileobj = fileobj\r
518 self._decrypter = decrypter\r
519 self._close_fileobj = close_fileobj\r
520\r
521 self._compress_type = zipinfo.compress_type\r
522 self._compress_size = zipinfo.compress_size\r
523 self._compress_left = zipinfo.compress_size\r
524\r
525 if self._compress_type == ZIP_DEFLATED:\r
526 self._decompressor = zlib.decompressobj(-15)\r
527 elif self._compress_type != ZIP_STORED:\r
528 descr = compressor_names.get(self._compress_type)\r
529 if descr:\r
530 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))\r
531 else:\r
532 raise NotImplementedError("compression type %d" % (self._compress_type,))\r
533 self._unconsumed = ''\r
534\r
535 self._readbuffer = ''\r
536 self._offset = 0\r
537\r
538 self._universal = 'U' in mode\r
539 self.newlines = None\r
540\r
541 # Adjust read size for encrypted files since the first 12 bytes\r
542 # are for the encryption/password information.\r
543 if self._decrypter is not None:\r
544 self._compress_left -= 12\r
545\r
546 self.mode = mode\r
547 self.name = zipinfo.filename\r
548\r
549 if hasattr(zipinfo, 'CRC'):\r
550 self._expected_crc = zipinfo.CRC\r
551 self._running_crc = crc32(b'') & 0xffffffff\r
552 else:\r
553 self._expected_crc = None\r
554\r
555 def readline(self, limit=-1):\r
556 """Read and return a line from the stream.\r
557\r
558 If limit is specified, at most limit bytes will be read.\r
559 """\r
560\r
561 if not self._universal and limit < 0:\r
562 # Shortcut common case - newline found in buffer.\r
563 i = self._readbuffer.find('\n', self._offset) + 1\r
564 if i > 0:\r
565 line = self._readbuffer[self._offset: i]\r
566 self._offset = i\r
567 return line\r
568\r
569 if not self._universal:\r
570 return io.BufferedIOBase.readline(self, limit)\r
571\r
572 line = ''\r
573 while limit < 0 or len(line) < limit:\r
574 readahead = self.peek(2)\r
575 if readahead == '':\r
576 return line\r
577\r
578 #\r
579 # Search for universal newlines or line chunks.\r
580 #\r
581 # The pattern returns either a line chunk or a newline, but not\r
582 # both. Combined with peek(2), we are assured that the sequence\r
583 # '\r\n' is always retrieved completely and never split into\r
584 # separate newlines - '\r', '\n' due to coincidental readaheads.\r
585 #\r
586 match = self.PATTERN.search(readahead)\r
587 newline = match.group('newline')\r
588 if newline is not None:\r
589 if self.newlines is None:\r
590 self.newlines = []\r
591 if newline not in self.newlines:\r
592 self.newlines.append(newline)\r
593 self._offset += len(newline)\r
594 return line + '\n'\r
595\r
596 chunk = match.group('chunk')\r
597 if limit >= 0:\r
598 chunk = chunk[: limit - len(line)]\r
599\r
600 self._offset += len(chunk)\r
601 line += chunk\r
602\r
603 return line\r
604\r
605 def peek(self, n=1):\r
606 """Returns buffered bytes without advancing the position."""\r
607 if n > len(self._readbuffer) - self._offset:\r
608 chunk = self.read(n)\r
609 if len(chunk) > self._offset:\r
610 self._readbuffer = chunk + self._readbuffer[self._offset:]\r
611 self._offset = 0\r
612 else:\r
613 self._offset -= len(chunk)\r
614\r
615 # Return up to 512 bytes to reduce allocation overhead for tight loops.\r
616 return self._readbuffer[self._offset: self._offset + 512]\r
617\r
618 def readable(self):\r
619 return True\r
620\r
621 def read(self, n=-1):\r
622 """Read and return up to n bytes.\r
623 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..\r
624 """\r
625 buf = ''\r
626 if n is None:\r
627 n = -1\r
628 while True:\r
629 if n < 0:\r
630 data = self.read1(n)\r
631 elif n > len(buf):\r
632 data = self.read1(n - len(buf))\r
633 else:\r
634 return buf\r
635 if len(data) == 0:\r
636 return buf\r
637 buf += data\r
638\r
639 def _update_crc(self, newdata, eof):\r
640 # Update the CRC using the given data.\r
641 if self._expected_crc is None:\r
642 # No need to compute the CRC if we don't have a reference value\r
643 return\r
644 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff\r
645 # Check the CRC if we're at the end of the file\r
646 if eof and self._running_crc != self._expected_crc:\r
647 raise BadZipfile("Bad CRC-32 for file %r" % self.name)\r
648\r
649 def read1(self, n):\r
650 """Read up to n bytes with at most one read() system call."""\r
651\r
652 # Simplify algorithm (branching) by transforming negative n to large n.\r
653 if n < 0 or n is None:\r
654 n = self.MAX_N\r
655\r
656 # Bytes available in read buffer.\r
657 len_readbuffer = len(self._readbuffer) - self._offset\r
658\r
659 # Read from file.\r
660 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):\r
661 nbytes = n - len_readbuffer - len(self._unconsumed)\r
662 nbytes = max(nbytes, self.MIN_READ_SIZE)\r
663 nbytes = min(nbytes, self._compress_left)\r
664\r
665 data = self._fileobj.read(nbytes)\r
666 self._compress_left -= len(data)\r
667\r
668 if data and self._decrypter is not None:\r
669 data = ''.join(map(self._decrypter, data))\r
670\r
671 if self._compress_type == ZIP_STORED:\r
672 self._update_crc(data, eof=(self._compress_left==0))\r
673 self._readbuffer = self._readbuffer[self._offset:] + data\r
674 self._offset = 0\r
675 else:\r
676 # Prepare deflated bytes for decompression.\r
677 self._unconsumed += data\r
678\r
679 # Handle unconsumed data.\r
680 if (len(self._unconsumed) > 0 and n > len_readbuffer and\r
681 self._compress_type == ZIP_DEFLATED):\r
682 data = self._decompressor.decompress(\r
683 self._unconsumed,\r
684 max(n - len_readbuffer, self.MIN_READ_SIZE)\r
685 )\r
686\r
687 self._unconsumed = self._decompressor.unconsumed_tail\r
688 eof = len(self._unconsumed) == 0 and self._compress_left == 0\r
689 if eof:\r
690 data += self._decompressor.flush()\r
691\r
692 self._update_crc(data, eof=eof)\r
693 self._readbuffer = self._readbuffer[self._offset:] + data\r
694 self._offset = 0\r
695\r
696 # Read from buffer.\r
697 data = self._readbuffer[self._offset: self._offset + n]\r
698 self._offset += len(data)\r
699 return data\r
700\r
701 def close(self):\r
702 try :\r
703 if self._close_fileobj:\r
704 self._fileobj.close()\r
705 finally:\r
706 super(ZipExtFile, self).close()\r
707\r
708\r
709class ZipFile(object):\r
710 """ Class with methods to open, read, write, close, list zip files.\r
711\r
712 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)\r
713\r
714 file: Either the path to the file, or a file-like object.\r
715 If it is a path, the file will be opened and closed by ZipFile.\r
716 mode: The mode can be either read "r", write "w" or append "a".\r
717 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).\r
718 allowZip64: if True ZipFile will create files with ZIP64 extensions when\r
719 needed, otherwise it will raise an exception when this would\r
720 be necessary.\r
721\r
722 """\r
723\r
724 fp = None # Set here since __del__ checks it\r
725\r
726 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):\r
727 """Open the ZIP file with mode read "r", write "w" or append "a"."""\r
728 if mode not in ("r", "w", "a"):\r
729 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')\r
730\r
731 if compression == ZIP_STORED:\r
732 pass\r
733 elif compression == ZIP_DEFLATED:\r
734 if not zlib:\r
735 raise RuntimeError,\\r
736 "Compression requires the (missing) zlib module"\r
737 else:\r
738 raise RuntimeError, "That compression method is not supported"\r
739\r
740 self._allowZip64 = allowZip64\r
741 self._didModify = False\r
742 self.debug = 0 # Level of printing: 0 through 3\r
743 self.NameToInfo = {} # Find file info given name\r
744 self.filelist = [] # List of ZipInfo instances for archive\r
745 self.compression = compression # Method of compression\r
746 self.mode = key = mode.replace('b', '')[0]\r
747 self.pwd = None\r
748 self._comment = ''\r
749\r
750 # Check if we were passed a file-like object\r
751 if isinstance(file, basestring):\r
752 self._filePassed = 0\r
753 self.filename = file\r
754 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}\r
755 try:\r
756 self.fp = open(file, modeDict[mode])\r
757 except IOError:\r
758 if mode == 'a':\r
759 mode = key = 'w'\r
760 self.fp = open(file, modeDict[mode])\r
761 else:\r
762 raise\r
763 else:\r
764 self._filePassed = 1\r
765 self.fp = file\r
766 self.filename = getattr(file, 'name', None)\r
767\r
768 try:\r
769 if key == 'r':\r
770 self._RealGetContents()\r
771 elif key == 'w':\r
772 # set the modified flag so central directory gets written\r
773 # even if no files are added to the archive\r
774 self._didModify = True\r
775 elif key == 'a':\r
776 try:\r
777 # See if file is a zip file\r
778 self._RealGetContents()\r
779 # seek to start of directory and overwrite\r
780 self.fp.seek(self.start_dir, 0)\r
781 except BadZipfile:\r
782 # file is not a zip file, just append\r
783 self.fp.seek(0, 2)\r
784\r
785 # set the modified flag so central directory gets written\r
786 # even if no files are added to the archive\r
787 self._didModify = True\r
788 else:\r
789 raise RuntimeError('Mode must be "r", "w" or "a"')\r
790 except:\r
791 fp = self.fp\r
792 self.fp = None\r
793 if not self._filePassed:\r
794 fp.close()\r
795 raise\r
796\r
797 def __enter__(self):\r
798 return self\r
799\r
800 def __exit__(self, type, value, traceback):\r
801 self.close()\r
802\r
803 def _RealGetContents(self):\r
804 """Read in the table of contents for the ZIP file."""\r
805 fp = self.fp\r
806 try:\r
807 endrec = _EndRecData(fp)\r
808 except IOError:\r
809 raise BadZipfile("File is not a zip file")\r
810 if not endrec:\r
811 raise BadZipfile, "File is not a zip file"\r
812 if self.debug > 1:\r
813 print endrec\r
814 size_cd = endrec[_ECD_SIZE] # bytes in central directory\r
815 offset_cd = endrec[_ECD_OFFSET] # offset of central directory\r
816 self._comment = endrec[_ECD_COMMENT] # archive comment\r
817\r
818 # "concat" is zero, unless zip was concatenated to another file\r
819 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd\r
820 if endrec[_ECD_SIGNATURE] == stringEndArchive64:\r
821 # If Zip64 extension structures are present, account for them\r
822 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)\r
823\r
824 if self.debug > 2:\r
825 inferred = concat + offset_cd\r
826 print "given, inferred, offset", offset_cd, inferred, concat\r
827 # self.start_dir: Position of start of central directory\r
828 self.start_dir = offset_cd + concat\r
829 fp.seek(self.start_dir, 0)\r
830 data = fp.read(size_cd)\r
831 fp = cStringIO.StringIO(data)\r
832 total = 0\r
833 while total < size_cd:\r
834 centdir = fp.read(sizeCentralDir)\r
835 if len(centdir) != sizeCentralDir:\r
836 raise BadZipfile("Truncated central directory")\r
837 centdir = struct.unpack(structCentralDir, centdir)\r
838 if centdir[_CD_SIGNATURE] != stringCentralDir:\r
839 raise BadZipfile("Bad magic number for central directory")\r
840 if self.debug > 2:\r
841 print centdir\r
842 filename = fp.read(centdir[_CD_FILENAME_LENGTH])\r
843 # Create ZipInfo instance to store file information\r
844 x = ZipInfo(filename)\r
845 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])\r
846 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])\r
847 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]\r
848 (x.create_version, x.create_system, x.extract_version, x.reserved,\r
849 x.flag_bits, x.compress_type, t, d,\r
850 x.CRC, x.compress_size, x.file_size) = centdir[1:12]\r
851 x.volume, x.internal_attr, x.external_attr = centdir[15:18]\r
852 # Convert date/time code to (year, month, day, hour, min, sec)\r
853 x._raw_time = t\r
854 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,\r
855 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )\r
856\r
857 x._decodeExtra()\r
858 x.header_offset = x.header_offset + concat\r
859 x.filename = x._decodeFilename()\r
860 self.filelist.append(x)\r
861 self.NameToInfo[x.filename] = x\r
862\r
863 # update total bytes read from central directory\r
864 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]\r
865 + centdir[_CD_EXTRA_FIELD_LENGTH]\r
866 + centdir[_CD_COMMENT_LENGTH])\r
867\r
868 if self.debug > 2:\r
869 print "total", total\r
870\r
871\r
872 def namelist(self):\r
873 """Return a list of file names in the archive."""\r
874 l = []\r
875 for data in self.filelist:\r
876 l.append(data.filename)\r
877 return l\r
878\r
879 def infolist(self):\r
880 """Return a list of class ZipInfo instances for files in the\r
881 archive."""\r
882 return self.filelist\r
883\r
884 def printdir(self):\r
885 """Print a table of contents for the zip file."""\r
886 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")\r
887 for zinfo in self.filelist:\r
888 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]\r
889 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)\r
890\r
891 def testzip(self):\r
892 """Read all the files and check the CRC."""\r
893 chunk_size = 2 ** 20\r
894 for zinfo in self.filelist:\r
895 try:\r
896 # Read by chunks, to avoid an OverflowError or a\r
897 # MemoryError with very large embedded files.\r
898 with self.open(zinfo.filename, "r") as f:\r
899 while f.read(chunk_size): # Check CRC-32\r
900 pass\r
901 except BadZipfile:\r
902 return zinfo.filename\r
903\r
904 def getinfo(self, name):\r
905 """Return the instance of ZipInfo given 'name'."""\r
906 info = self.NameToInfo.get(name)\r
907 if info is None:\r
908 raise KeyError(\r
909 'There is no item named %r in the archive' % name)\r
910\r
911 return info\r
912\r
913 def setpassword(self, pwd):\r
914 """Set default password for encrypted files."""\r
915 self.pwd = pwd\r
916\r
917 @property\r
918 def comment(self):\r
919 """The comment text associated with the ZIP file."""\r
920 return self._comment\r
921\r
922 @comment.setter\r
923 def comment(self, comment):\r
924 # check for valid comment length\r
925 if len(comment) > ZIP_MAX_COMMENT:\r
926 import warnings\r
927 warnings.warn('Archive comment is too long; truncating to %d bytes'\r
928 % ZIP_MAX_COMMENT, stacklevel=2)\r
929 comment = comment[:ZIP_MAX_COMMENT]\r
930 self._comment = comment\r
931 self._didModify = True\r
932\r
933 def read(self, name, pwd=None):\r
934 """Return file bytes (as a string) for name."""\r
935 return self.open(name, "r", pwd).read()\r
936\r
937 def open(self, name, mode="r", pwd=None):\r
938 """Return file-like object for 'name'."""\r
939 if mode not in ("r", "U", "rU"):\r
940 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'\r
941 if not self.fp:\r
942 raise RuntimeError, \\r
943 "Attempt to read ZIP archive that was already closed"\r
944\r
945 # Only open a new file for instances where we were not\r
946 # given a file object in the constructor\r
947 if self._filePassed:\r
948 zef_file = self.fp\r
949 should_close = False\r
950 else:\r
951 zef_file = open(self.filename, 'rb')\r
952 should_close = True\r
953\r
954 try:\r
955 # Make sure we have an info object\r
956 if isinstance(name, ZipInfo):\r
957 # 'name' is already an info object\r
958 zinfo = name\r
959 else:\r
960 # Get info object for name\r
961 zinfo = self.getinfo(name)\r
962\r
963 zef_file.seek(zinfo.header_offset, 0)\r
964\r
965 # Skip the file header:\r
966 fheader = zef_file.read(sizeFileHeader)\r
967 if len(fheader) != sizeFileHeader:\r
968 raise BadZipfile("Truncated file header")\r
969 fheader = struct.unpack(structFileHeader, fheader)\r
970 if fheader[_FH_SIGNATURE] != stringFileHeader:\r
971 raise BadZipfile("Bad magic number for file header")\r
972\r
973 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])\r
974 if fheader[_FH_EXTRA_FIELD_LENGTH]:\r
975 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])\r
976\r
977 if fname != zinfo.orig_filename:\r
978 raise BadZipfile, \\r
979 'File name in directory "%s" and header "%s" differ.' % (\r
980 zinfo.orig_filename, fname)\r
981\r
982 # check for encrypted flag & handle password\r
983 is_encrypted = zinfo.flag_bits & 0x1\r
984 zd = None\r
985 if is_encrypted:\r
986 if not pwd:\r
987 pwd = self.pwd\r
988 if not pwd:\r
989 raise RuntimeError, "File %s is encrypted, " \\r
990 "password required for extraction" % name\r
991\r
992 zd = _ZipDecrypter(pwd)\r
993 # The first 12 bytes in the cypher stream is an encryption header\r
994 # used to strengthen the algorithm. The first 11 bytes are\r
995 # completely random, while the 12th contains the MSB of the CRC,\r
996 # or the MSB of the file time depending on the header type\r
997 # and is used to check the correctness of the password.\r
998 bytes = zef_file.read(12)\r
999 h = map(zd, bytes[0:12])\r
1000 if zinfo.flag_bits & 0x8:\r
1001 # compare against the file type from extended local headers\r
1002 check_byte = (zinfo._raw_time >> 8) & 0xff\r
1003 else:\r
1004 # compare against the CRC otherwise\r
1005 check_byte = (zinfo.CRC >> 24) & 0xff\r
1006 if ord(h[11]) != check_byte:\r
1007 raise RuntimeError("Bad password for file", name)\r
1008\r
1009 return ZipExtFile(zef_file, mode, zinfo, zd,\r
1010 close_fileobj=should_close)\r
1011 except:\r
1012 if should_close:\r
1013 zef_file.close()\r
1014 raise\r
1015\r
1016 def extract(self, member, path=None, pwd=None):\r
1017 """Extract a member from the archive to the current working directory,\r
1018 using its full name. Its file information is extracted as accurately\r
1019 as possible. `member' may be a filename or a ZipInfo object. You can\r
1020 specify a different directory using `path'.\r
1021 """\r
1022 if not isinstance(member, ZipInfo):\r
1023 member = self.getinfo(member)\r
1024\r
1025 if path is None:\r
1026 path = os.getcwd()\r
1027\r
1028 return self._extract_member(member, path, pwd)\r
1029\r
1030 def extractall(self, path=None, members=None, pwd=None):\r
1031 """Extract all members from the archive to the current working\r
1032 directory. `path' specifies a different directory to extract to.\r
1033 `members' is optional and must be a subset of the list returned\r
1034 by namelist().\r
1035 """\r
1036 if members is None:\r
1037 members = self.namelist()\r
1038\r
1039 for zipinfo in members:\r
1040 self.extract(zipinfo, path, pwd)\r
1041\r
1042 def _extract_member(self, member, targetpath, pwd):\r
1043 """Extract the ZipInfo object 'member' to a physical\r
1044 file on the path targetpath.\r
1045 """\r
1046 # build the destination pathname, replacing\r
1047 # forward slashes to platform specific separators.\r
1048 arcname = member.filename.replace('/', os.path.sep)\r
1049\r
1050 if os.path.altsep:\r
1051 arcname = arcname.replace(os.path.altsep, os.path.sep)\r
1052 # interpret absolute pathname as relative, remove drive letter or\r
1053 # UNC path, redundant separators, "." and ".." components.\r
1054 arcname = os.path.splitdrive(arcname)[1]\r
1055 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)\r
1056 if x not in ('', os.path.curdir, os.path.pardir))\r
1057 if os.path.sep == '\\':\r
1058 # filter illegal characters on Windows\r
1059 illegal = ':<>|"?*'\r
1060 if isinstance(arcname, unicode):\r
1061 table = {ord(c): ord('_') for c in illegal}\r
1062 else:\r
1063 table = string.maketrans(illegal, '_' * len(illegal))\r
1064 arcname = arcname.translate(table)\r
1065 # remove trailing dots\r
1066 arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))\r
1067 arcname = os.path.sep.join(x for x in arcname if x)\r
1068\r
1069 targetpath = os.path.join(targetpath, arcname)\r
1070 targetpath = os.path.normpath(targetpath)\r
1071\r
1072 # Create all upper directories if necessary.\r
1073 upperdirs = os.path.dirname(targetpath)\r
1074 if upperdirs and not os.path.exists(upperdirs):\r
1075 os.makedirs(upperdirs)\r
1076\r
1077 if member.filename[-1] == '/':\r
1078 if not os.path.isdir(targetpath):\r
1079 os.mkdir(targetpath)\r
1080 return targetpath\r
1081\r
1082 with self.open(member, pwd=pwd) as source, \\r
1083 file(targetpath, "wb") as target:\r
1084 shutil.copyfileobj(source, target)\r
1085\r
1086 return targetpath\r
1087\r
1088 def _writecheck(self, zinfo):\r
1089 """Check for errors before writing a file to the archive."""\r
1090 if zinfo.filename in self.NameToInfo:\r
1091 import warnings\r
1092 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)\r
1093 if self.mode not in ("w", "a"):\r
1094 raise RuntimeError, 'write() requires mode "w" or "a"'\r
1095 if not self.fp:\r
1096 raise RuntimeError, \\r
1097 "Attempt to write ZIP archive that was already closed"\r
1098 if zinfo.compress_type == ZIP_DEFLATED and not zlib:\r
1099 raise RuntimeError, \\r
1100 "Compression requires the (missing) zlib module"\r
1101 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):\r
1102 raise RuntimeError, \\r
1103 "That compression method is not supported"\r
1104 if not self._allowZip64:\r
1105 requires_zip64 = None\r
1106 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:\r
1107 requires_zip64 = "Files count"\r
1108 elif zinfo.file_size > ZIP64_LIMIT:\r
1109 requires_zip64 = "Filesize"\r
1110 elif zinfo.header_offset > ZIP64_LIMIT:\r
1111 requires_zip64 = "Zipfile size"\r
1112 if requires_zip64:\r
1113 raise LargeZipFile(requires_zip64 +\r
1114 " would require ZIP64 extensions")\r
1115\r
1116 def write(self, filename, arcname=None, compress_type=None):\r
1117 """Put the bytes from filename into the archive under the name\r
1118 arcname."""\r
1119 if not self.fp:\r
1120 raise RuntimeError(\r
1121 "Attempt to write to ZIP archive that was already closed")\r
1122\r
1123 st = os.stat(filename)\r
1124 isdir = stat.S_ISDIR(st.st_mode)\r
1125 mtime = time.localtime(st.st_mtime)\r
1126 date_time = mtime[0:6]\r
1127 # Create ZipInfo instance to store file information\r
1128 if arcname is None:\r
1129 arcname = filename\r
1130 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])\r
1131 while arcname[0] in (os.sep, os.altsep):\r
1132 arcname = arcname[1:]\r
1133 if isdir:\r
1134 arcname += '/'\r
1135 zinfo = ZipInfo(arcname, date_time)\r
1136 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes\r
1137 if compress_type is None:\r
1138 zinfo.compress_type = self.compression\r
1139 else:\r
1140 zinfo.compress_type = compress_type\r
1141\r
1142 zinfo.file_size = st.st_size\r
1143 zinfo.flag_bits = 0x00\r
1144 zinfo.header_offset = self.fp.tell() # Start of header bytes\r
1145\r
1146 self._writecheck(zinfo)\r
1147 self._didModify = True\r
1148\r
1149 if isdir:\r
1150 zinfo.file_size = 0\r
1151 zinfo.compress_size = 0\r
1152 zinfo.CRC = 0\r
1153 zinfo.external_attr |= 0x10 # MS-DOS directory flag\r
1154 self.filelist.append(zinfo)\r
1155 self.NameToInfo[zinfo.filename] = zinfo\r
1156 self.fp.write(zinfo.FileHeader(False))\r
1157 return\r
1158\r
1159 with open(filename, "rb") as fp:\r
1160 # Must overwrite CRC and sizes with correct data later\r
1161 zinfo.CRC = CRC = 0\r
1162 zinfo.compress_size = compress_size = 0\r
1163 # Compressed size can be larger than uncompressed size\r
1164 zip64 = self._allowZip64 and \\r
1165 zinfo.file_size * 1.05 > ZIP64_LIMIT\r
1166 self.fp.write(zinfo.FileHeader(zip64))\r
1167 if zinfo.compress_type == ZIP_DEFLATED:\r
1168 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,\r
1169 zlib.DEFLATED, -15)\r
1170 else:\r
1171 cmpr = None\r
1172 file_size = 0\r
1173 while 1:\r
1174 buf = fp.read(1024 * 8)\r
1175 if not buf:\r
1176 break\r
1177 file_size = file_size + len(buf)\r
1178 CRC = crc32(buf, CRC) & 0xffffffff\r
1179 if cmpr:\r
1180 buf = cmpr.compress(buf)\r
1181 compress_size = compress_size + len(buf)\r
1182 self.fp.write(buf)\r
1183 if cmpr:\r
1184 buf = cmpr.flush()\r
1185 compress_size = compress_size + len(buf)\r
1186 self.fp.write(buf)\r
1187 zinfo.compress_size = compress_size\r
1188 else:\r
1189 zinfo.compress_size = file_size\r
1190 zinfo.CRC = CRC\r
1191 zinfo.file_size = file_size\r
1192 if not zip64 and self._allowZip64:\r
1193 if file_size > ZIP64_LIMIT:\r
1194 raise RuntimeError('File size has increased during compressing')\r
1195 if compress_size > ZIP64_LIMIT:\r
1196 raise RuntimeError('Compressed size larger than uncompressed size')\r
1197 # Seek backwards and write file header (which will now include\r
1198 # correct CRC and file sizes)\r
1199 position = self.fp.tell() # Preserve current position in file\r
1200 self.fp.seek(zinfo.header_offset, 0)\r
1201 self.fp.write(zinfo.FileHeader(zip64))\r
1202 self.fp.seek(position, 0)\r
1203 self.filelist.append(zinfo)\r
1204 self.NameToInfo[zinfo.filename] = zinfo\r
1205\r
1206 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):\r
1207 """Write a file into the archive. The contents is the string\r
1208 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or\r
1209 the name of the file in the archive."""\r
1210 if not isinstance(zinfo_or_arcname, ZipInfo):\r
1211 zinfo = ZipInfo(filename=zinfo_or_arcname,\r
1212 date_time=time.localtime(time.time())[:6])\r
1213\r
1214 zinfo.compress_type = self.compression\r
1215 if zinfo.filename[-1] == '/':\r
1216 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x\r
1217 zinfo.external_attr |= 0x10 # MS-DOS directory flag\r
1218 else:\r
1219 zinfo.external_attr = 0o600 << 16 # ?rw-------\r
1220 else:\r
1221 zinfo = zinfo_or_arcname\r
1222\r
1223 if not self.fp:\r
1224 raise RuntimeError(\r
1225 "Attempt to write to ZIP archive that was already closed")\r
1226\r
1227 if compress_type is not None:\r
1228 zinfo.compress_type = compress_type\r
1229\r
1230 zinfo.file_size = len(bytes) # Uncompressed size\r
1231 zinfo.header_offset = self.fp.tell() # Start of header bytes\r
1232 self._writecheck(zinfo)\r
1233 self._didModify = True\r
1234 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum\r
1235 if zinfo.compress_type == ZIP_DEFLATED:\r
1236 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,\r
1237 zlib.DEFLATED, -15)\r
1238 bytes = co.compress(bytes) + co.flush()\r
1239 zinfo.compress_size = len(bytes) # Compressed size\r
1240 else:\r
1241 zinfo.compress_size = zinfo.file_size\r
1242 zip64 = zinfo.file_size > ZIP64_LIMIT or \\r
1243 zinfo.compress_size > ZIP64_LIMIT\r
1244 if zip64 and not self._allowZip64:\r
1245 raise LargeZipFile("Filesize would require ZIP64 extensions")\r
1246 self.fp.write(zinfo.FileHeader(zip64))\r
1247 self.fp.write(bytes)\r
1248 if zinfo.flag_bits & 0x08:\r
1249 # Write CRC and file sizes after the file data\r
1250 fmt = '<LQQ' if zip64 else '<LLL'\r
1251 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,\r
1252 zinfo.file_size))\r
1253 self.fp.flush()\r
1254 self.filelist.append(zinfo)\r
1255 self.NameToInfo[zinfo.filename] = zinfo\r
1256\r
1257 def __del__(self):\r
1258 """Call the "close()" method in case the user forgot."""\r
1259 self.close()\r
1260\r
1261 def close(self):\r
1262 """Close the file, and for mode "w" and "a" write the ending\r
1263 records."""\r
1264 if self.fp is None:\r
1265 return\r
1266\r
1267 try:\r
1268 if self.mode in ("w", "a") and self._didModify: # write ending records\r
1269 pos1 = self.fp.tell()\r
1270 for zinfo in self.filelist: # write central directory\r
1271 dt = zinfo.date_time\r
1272 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]\r
1273 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)\r
1274 extra = []\r
1275 if zinfo.file_size > ZIP64_LIMIT \\r
1276 or zinfo.compress_size > ZIP64_LIMIT:\r
1277 extra.append(zinfo.file_size)\r
1278 extra.append(zinfo.compress_size)\r
1279 file_size = 0xffffffff\r
1280 compress_size = 0xffffffff\r
1281 else:\r
1282 file_size = zinfo.file_size\r
1283 compress_size = zinfo.compress_size\r
1284\r
1285 if zinfo.header_offset > ZIP64_LIMIT:\r
1286 extra.append(zinfo.header_offset)\r
1287 header_offset = 0xffffffffL\r
1288 else:\r
1289 header_offset = zinfo.header_offset\r
1290\r
1291 extra_data = zinfo.extra\r
1292 if extra:\r
1293 # Append a ZIP64 field to the extra's\r
1294 extra_data = struct.pack(\r
1295 '<HH' + 'Q'*len(extra),\r
1296 1, 8*len(extra), *extra) + extra_data\r
1297\r
1298 extract_version = max(45, zinfo.extract_version)\r
1299 create_version = max(45, zinfo.create_version)\r
1300 else:\r
1301 extract_version = zinfo.extract_version\r
1302 create_version = zinfo.create_version\r
1303\r
1304 try:\r
1305 filename, flag_bits = zinfo._encodeFilenameFlags()\r
1306 centdir = struct.pack(structCentralDir,\r
1307 stringCentralDir, create_version,\r
1308 zinfo.create_system, extract_version, zinfo.reserved,\r
1309 flag_bits, zinfo.compress_type, dostime, dosdate,\r
1310 zinfo.CRC, compress_size, file_size,\r
1311 len(filename), len(extra_data), len(zinfo.comment),\r
1312 0, zinfo.internal_attr, zinfo.external_attr,\r
1313 header_offset)\r
1314 except DeprecationWarning:\r
1315 print >>sys.stderr, (structCentralDir,\r
1316 stringCentralDir, create_version,\r
1317 zinfo.create_system, extract_version, zinfo.reserved,\r
1318 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,\r
1319 zinfo.CRC, compress_size, file_size,\r
1320 len(zinfo.filename), len(extra_data), len(zinfo.comment),\r
1321 0, zinfo.internal_attr, zinfo.external_attr,\r
1322 header_offset)\r
1323 raise\r
1324 self.fp.write(centdir)\r
1325 self.fp.write(filename)\r
1326 self.fp.write(extra_data)\r
1327 self.fp.write(zinfo.comment)\r
1328\r
1329 pos2 = self.fp.tell()\r
1330 # Write end-of-zip-archive record\r
1331 centDirCount = len(self.filelist)\r
1332 centDirSize = pos2 - pos1\r
1333 centDirOffset = pos1\r
1334 requires_zip64 = None\r
1335 if centDirCount > ZIP_FILECOUNT_LIMIT:\r
1336 requires_zip64 = "Files count"\r
1337 elif centDirOffset > ZIP64_LIMIT:\r
1338 requires_zip64 = "Central directory offset"\r
1339 elif centDirSize > ZIP64_LIMIT:\r
1340 requires_zip64 = "Central directory size"\r
1341 if requires_zip64:\r
1342 # Need to write the ZIP64 end-of-archive records\r
1343 if not self._allowZip64:\r
1344 raise LargeZipFile(requires_zip64 +\r
1345 " would require ZIP64 extensions")\r
1346 zip64endrec = struct.pack(\r
1347 structEndArchive64, stringEndArchive64,\r
1348 44, 45, 45, 0, 0, centDirCount, centDirCount,\r
1349 centDirSize, centDirOffset)\r
1350 self.fp.write(zip64endrec)\r
1351\r
1352 zip64locrec = struct.pack(\r
1353 structEndArchive64Locator,\r
1354 stringEndArchive64Locator, 0, pos2, 1)\r
1355 self.fp.write(zip64locrec)\r
1356 centDirCount = min(centDirCount, 0xFFFF)\r
1357 centDirSize = min(centDirSize, 0xFFFFFFFF)\r
1358 centDirOffset = min(centDirOffset, 0xFFFFFFFF)\r
1359\r
1360 endrec = struct.pack(structEndArchive, stringEndArchive,\r
1361 0, 0, centDirCount, centDirCount,\r
1362 centDirSize, centDirOffset, len(self._comment))\r
1363 self.fp.write(endrec)\r
1364 self.fp.write(self._comment)\r
1365 self.fp.flush()\r
1366 finally:\r
1367 fp = self.fp\r
1368 self.fp = None\r
1369 if not self._filePassed:\r
1370 fp.close()\r
1371\r
1372\r
1373class PyZipFile(ZipFile):\r
1374 """Class to create ZIP archives with Python library files and packages."""\r
1375\r
1376 def writepy(self, pathname, basename = ""):\r
1377 """Add all files from "pathname" to the ZIP archive.\r
1378\r
1379 If pathname is a package directory, search the directory and\r
1380 all package subdirectories recursively for all *.py and enter\r
1381 the modules into the archive. If pathname is a plain\r
1382 directory, listdir *.py and enter all modules. Else, pathname\r
1383 must be a Python *.py file and the module will be put into the\r
1384 archive. Added modules are always module.pyo or module.pyc.\r
1385 This method will compile the module.py into module.pyc if\r
1386 necessary.\r
1387 """\r
1388 dir, name = os.path.split(pathname)\r
1389 if os.path.isdir(pathname):\r
1390 initname = os.path.join(pathname, "__init__.py")\r
1391 if os.path.isfile(initname):\r
1392 # This is a package directory, add it\r
1393 if basename:\r
1394 basename = "%s/%s" % (basename, name)\r
1395 else:\r
1396 basename = name\r
1397 if self.debug:\r
1398 print "Adding package in", pathname, "as", basename\r
1399 fname, arcname = self._get_codename(initname[0:-3], basename)\r
1400 if self.debug:\r
1401 print "Adding", arcname\r
1402 self.write(fname, arcname)\r
1403 dirlist = os.listdir(pathname)\r
1404 dirlist.remove("__init__.py")\r
1405 # Add all *.py files and package subdirectories\r
1406 for filename in dirlist:\r
1407 path = os.path.join(pathname, filename)\r
1408 root, ext = os.path.splitext(filename)\r
1409 if os.path.isdir(path):\r
1410 if os.path.isfile(os.path.join(path, "__init__.py")):\r
1411 # This is a package directory, add it\r
1412 self.writepy(path, basename) # Recursive call\r
1413 elif ext == ".py":\r
1414 fname, arcname = self._get_codename(path[0:-3],\r
1415 basename)\r
1416 if self.debug:\r
1417 print "Adding", arcname\r
1418 self.write(fname, arcname)\r
1419 else:\r
1420 # This is NOT a package directory, add its files at top level\r
1421 if self.debug:\r
1422 print "Adding files from directory", pathname\r
1423 for filename in os.listdir(pathname):\r
1424 path = os.path.join(pathname, filename)\r
1425 root, ext = os.path.splitext(filename)\r
1426 if ext == ".py":\r
1427 fname, arcname = self._get_codename(path[0:-3],\r
1428 basename)\r
1429 if self.debug:\r
1430 print "Adding", arcname\r
1431 self.write(fname, arcname)\r
1432 else:\r
1433 if pathname[-3:] != ".py":\r
1434 raise RuntimeError, \\r
1435 'Files added with writepy() must end with ".py"'\r
1436 fname, arcname = self._get_codename(pathname[0:-3], basename)\r
1437 if self.debug:\r
1438 print "Adding file", arcname\r
1439 self.write(fname, arcname)\r
1440\r
1441 def _get_codename(self, pathname, basename):\r
1442 """Return (filename, archivename) for the path.\r
1443\r
1444 Given a module name path, return the correct file path and\r
1445 archive name, compiling if necessary. For example, given\r
1446 /python/lib/string, return (/python/lib/string.pyc, string).\r
1447 """\r
1448 file_py = pathname + ".py"\r
1449 file_pyc = pathname + ".pyc"\r
1450 file_pyo = pathname + ".pyo"\r
1451 if os.path.isfile(file_pyo) and \\r
1452 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:\r
1453 fname = file_pyo # Use .pyo file\r
1454 elif not os.path.isfile(file_pyc) or \\r
1455 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:\r
1456 import py_compile\r
1457 if self.debug:\r
1458 print "Compiling", file_py\r
1459 try:\r
1460 py_compile.compile(file_py, file_pyc, None, True)\r
1461 except py_compile.PyCompileError,err:\r
1462 print err.msg\r
1463 fname = file_pyc\r
1464 else:\r
1465 fname = file_pyc\r
1466 archivename = os.path.split(fname)[1]\r
1467 if basename:\r
1468 archivename = "%s/%s" % (basename, archivename)\r
1469 return (fname, archivename)\r
1470\r
1471\r
1472def main(args = None):\r
1473 import textwrap\r
1474 USAGE=textwrap.dedent("""\\r
1475 Usage:\r
1476 zipfile.py -l zipfile.zip # Show listing of a zipfile\r
1477 zipfile.py -t zipfile.zip # Test if a zipfile is valid\r
1478 zipfile.py -e zipfile.zip target # Extract zipfile into target dir\r
1479 zipfile.py -c zipfile.zip src ... # Create zipfile from sources\r
1480 """)\r
1481 if args is None:\r
1482 args = sys.argv[1:]\r
1483\r
1484 if not args or args[0] not in ('-l', '-c', '-e', '-t'):\r
1485 print USAGE\r
1486 sys.exit(1)\r
1487\r
1488 if args[0] == '-l':\r
1489 if len(args) != 2:\r
1490 print USAGE\r
1491 sys.exit(1)\r
1492 with ZipFile(args[1], 'r') as zf:\r
1493 zf.printdir()\r
1494\r
1495 elif args[0] == '-t':\r
1496 if len(args) != 2:\r
1497 print USAGE\r
1498 sys.exit(1)\r
1499 with ZipFile(args[1], 'r') as zf:\r
1500 badfile = zf.testzip()\r
1501 if badfile:\r
1502 print("The following enclosed file is corrupted: {!r}".format(badfile))\r
1503 print "Done testing"\r
1504\r
1505 elif args[0] == '-e':\r
1506 if len(args) != 3:\r
1507 print USAGE\r
1508 sys.exit(1)\r
1509\r
1510 with ZipFile(args[1], 'r') as zf:\r
1511 zf.extractall(args[2])\r
1512\r
1513 elif args[0] == '-c':\r
1514 if len(args) < 3:\r
1515 print USAGE\r
1516 sys.exit(1)\r
1517\r
1518 def addToZip(zf, path, zippath):\r
1519 if os.path.isfile(path):\r
1520 zf.write(path, zippath, ZIP_DEFLATED)\r
1521 elif os.path.isdir(path):\r
1522 if zippath:\r
1523 zf.write(path, zippath)\r
1524 for nm in os.listdir(path):\r
1525 addToZip(zf,\r
1526 os.path.join(path, nm), os.path.join(zippath, nm))\r
1527 # else: ignore\r
1528\r
1529 with ZipFile(args[1], 'w', allowZip64=True) as zf:\r
1530 for path in args[2:]:\r
1531 zippath = os.path.basename(path)\r
1532 if not zippath:\r
1533 zippath = os.path.basename(os.path.dirname(path))\r
1534 if zippath in ('', os.curdir, os.pardir):\r
1535 zippath = ''\r
1536 addToZip(zf, path, zippath)\r
1537\r
1538if __name__ == "__main__":\r
1539 main()\r