]> git.proxmox.com Git - mirror_edk2.git/blame - AppPkg/Applications/Python/Python-2.7.2/Lib/gzip.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Lib / gzip.py
CommitLineData
4710c53d 1"""Functions that read and write gzipped files.\r
2\r
3The user of the file doesn't have to worry about the compression,\r
4but random access is not allowed."""\r
5\r
6# based on Andrew Kuchling's minigzip.py distributed with the zlib module\r
7\r
8import struct, sys, time, os\r
9import zlib\r
10import io\r
11import __builtin__\r
12\r
13__all__ = ["GzipFile","open"]\r
14\r
15FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16\r
16\r
17READ, WRITE = 1, 2\r
18\r
19def write32u(output, value):\r
20 # The L format writes the bit pattern correctly whether signed\r
21 # or unsigned.\r
22 output.write(struct.pack("<L", value))\r
23\r
24def read32(input):\r
25 return struct.unpack("<I", input.read(4))[0]\r
26\r
27def open(filename, mode="rb", compresslevel=9):\r
28 """Shorthand for GzipFile(filename, mode, compresslevel).\r
29\r
30 The filename argument is required; mode defaults to 'rb'\r
31 and compresslevel defaults to 9.\r
32\r
33 """\r
34 return GzipFile(filename, mode, compresslevel)\r
35\r
36class GzipFile(io.BufferedIOBase):\r
37 """The GzipFile class simulates most of the methods of a file object with\r
38 the exception of the readinto() and truncate() methods.\r
39\r
40 """\r
41\r
42 myfileobj = None\r
43 max_read_chunk = 10 * 1024 * 1024 # 10Mb\r
44\r
45 def __init__(self, filename=None, mode=None,\r
46 compresslevel=9, fileobj=None, mtime=None):\r
47 """Constructor for the GzipFile class.\r
48\r
49 At least one of fileobj and filename must be given a\r
50 non-trivial value.\r
51\r
52 The new class instance is based on fileobj, which can be a regular\r
53 file, a StringIO object, or any other object which simulates a file.\r
54 It defaults to None, in which case filename is opened to provide\r
55 a file object.\r
56\r
57 When fileobj is not None, the filename argument is only used to be\r
58 included in the gzip file header, which may includes the original\r
59 filename of the uncompressed file. It defaults to the filename of\r
60 fileobj, if discernible; otherwise, it defaults to the empty string,\r
61 and in this case the original filename is not included in the header.\r
62\r
63 The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',\r
64 depending on whether the file will be read or written. The default\r
65 is the mode of fileobj if discernible; otherwise, the default is 'rb'.\r
66 Be aware that only the 'rb', 'ab', and 'wb' values should be used\r
67 for cross-platform portability.\r
68\r
69 The compresslevel argument is an integer from 1 to 9 controlling the\r
70 level of compression; 1 is fastest and produces the least compression,\r
71 and 9 is slowest and produces the most compression. The default is 9.\r
72\r
73 The mtime argument is an optional numeric timestamp to be written\r
74 to the stream when compressing. All gzip compressed streams\r
75 are required to contain a timestamp. If omitted or None, the\r
76 current time is used. This module ignores the timestamp when\r
77 decompressing; however, some programs, such as gunzip, make use\r
78 of it. The format of the timestamp is the same as that of the\r
79 return value of time.time() and of the st_mtime member of the\r
80 object returned by os.stat().\r
81\r
82 """\r
83\r
84 # guarantee the file is opened in binary mode on platforms\r
85 # that care about that sort of thing\r
86 if mode and 'b' not in mode:\r
87 mode += 'b'\r
88 if fileobj is None:\r
89 fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')\r
90 if filename is None:\r
91 if hasattr(fileobj, 'name'): filename = fileobj.name\r
92 else: filename = ''\r
93 if mode is None:\r
94 if hasattr(fileobj, 'mode'): mode = fileobj.mode\r
95 else: mode = 'rb'\r
96\r
97 if mode[0:1] == 'r':\r
98 self.mode = READ\r
99 # Set flag indicating start of a new member\r
100 self._new_member = True\r
101 # Buffer data read from gzip file. extrastart is offset in\r
102 # stream where buffer starts. extrasize is number of\r
103 # bytes remaining in buffer from current stream position.\r
104 self.extrabuf = ""\r
105 self.extrasize = 0\r
106 self.extrastart = 0\r
107 self.name = filename\r
108 # Starts small, scales exponentially\r
109 self.min_readsize = 100\r
110\r
111 elif mode[0:1] == 'w' or mode[0:1] == 'a':\r
112 self.mode = WRITE\r
113 self._init_write(filename)\r
114 self.compress = zlib.compressobj(compresslevel,\r
115 zlib.DEFLATED,\r
116 -zlib.MAX_WBITS,\r
117 zlib.DEF_MEM_LEVEL,\r
118 0)\r
119 else:\r
120 raise IOError, "Mode " + mode + " not supported"\r
121\r
122 self.fileobj = fileobj\r
123 self.offset = 0\r
124 self.mtime = mtime\r
125\r
126 if self.mode == WRITE:\r
127 self._write_gzip_header()\r
128\r
129 @property\r
130 def filename(self):\r
131 import warnings\r
132 warnings.warn("use the name attribute", DeprecationWarning, 2)\r
133 if self.mode == WRITE and self.name[-3:] != ".gz":\r
134 return self.name + ".gz"\r
135 return self.name\r
136\r
137 def __repr__(self):\r
138 s = repr(self.fileobj)\r
139 return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'\r
140\r
141 def _check_closed(self):\r
142 """Raises a ValueError if the underlying file object has been closed.\r
143\r
144 """\r
145 if self.closed:\r
146 raise ValueError('I/O operation on closed file.')\r
147\r
148 def _init_write(self, filename):\r
149 self.name = filename\r
150 self.crc = zlib.crc32("") & 0xffffffffL\r
151 self.size = 0\r
152 self.writebuf = []\r
153 self.bufsize = 0\r
154\r
155 def _write_gzip_header(self):\r
156 self.fileobj.write('\037\213') # magic header\r
157 self.fileobj.write('\010') # compression method\r
158 fname = os.path.basename(self.name)\r
159 if fname.endswith(".gz"):\r
160 fname = fname[:-3]\r
161 flags = 0\r
162 if fname:\r
163 flags = FNAME\r
164 self.fileobj.write(chr(flags))\r
165 mtime = self.mtime\r
166 if mtime is None:\r
167 mtime = time.time()\r
168 write32u(self.fileobj, long(mtime))\r
169 self.fileobj.write('\002')\r
170 self.fileobj.write('\377')\r
171 if fname:\r
172 self.fileobj.write(fname + '\000')\r
173\r
174 def _init_read(self):\r
175 self.crc = zlib.crc32("") & 0xffffffffL\r
176 self.size = 0\r
177\r
178 def _read_gzip_header(self):\r
179 magic = self.fileobj.read(2)\r
180 if magic != '\037\213':\r
181 raise IOError, 'Not a gzipped file'\r
182 method = ord( self.fileobj.read(1) )\r
183 if method != 8:\r
184 raise IOError, 'Unknown compression method'\r
185 flag = ord( self.fileobj.read(1) )\r
186 self.mtime = read32(self.fileobj)\r
187 # extraflag = self.fileobj.read(1)\r
188 # os = self.fileobj.read(1)\r
189 self.fileobj.read(2)\r
190\r
191 if flag & FEXTRA:\r
192 # Read & discard the extra field, if present\r
193 xlen = ord(self.fileobj.read(1))\r
194 xlen = xlen + 256*ord(self.fileobj.read(1))\r
195 self.fileobj.read(xlen)\r
196 if flag & FNAME:\r
197 # Read and discard a null-terminated string containing the filename\r
198 while True:\r
199 s = self.fileobj.read(1)\r
200 if not s or s=='\000':\r
201 break\r
202 if flag & FCOMMENT:\r
203 # Read and discard a null-terminated string containing a comment\r
204 while True:\r
205 s = self.fileobj.read(1)\r
206 if not s or s=='\000':\r
207 break\r
208 if flag & FHCRC:\r
209 self.fileobj.read(2) # Read & discard the 16-bit header CRC\r
210\r
211 def write(self,data):\r
212 self._check_closed()\r
213 if self.mode != WRITE:\r
214 import errno\r
215 raise IOError(errno.EBADF, "write() on read-only GzipFile object")\r
216\r
217 if self.fileobj is None:\r
218 raise ValueError, "write() on closed GzipFile object"\r
219\r
220 # Convert data type if called by io.BufferedWriter.\r
221 if isinstance(data, memoryview):\r
222 data = data.tobytes()\r
223\r
224 if len(data) > 0:\r
225 self.size = self.size + len(data)\r
226 self.crc = zlib.crc32(data, self.crc) & 0xffffffffL\r
227 self.fileobj.write( self.compress.compress(data) )\r
228 self.offset += len(data)\r
229\r
230 return len(data)\r
231\r
232 def read(self, size=-1):\r
233 self._check_closed()\r
234 if self.mode != READ:\r
235 import errno\r
236 raise IOError(errno.EBADF, "read() on write-only GzipFile object")\r
237\r
238 if self.extrasize <= 0 and self.fileobj is None:\r
239 return ''\r
240\r
241 readsize = 1024\r
242 if size < 0: # get the whole thing\r
243 try:\r
244 while True:\r
245 self._read(readsize)\r
246 readsize = min(self.max_read_chunk, readsize * 2)\r
247 except EOFError:\r
248 size = self.extrasize\r
249 else: # just get some more of it\r
250 try:\r
251 while size > self.extrasize:\r
252 self._read(readsize)\r
253 readsize = min(self.max_read_chunk, readsize * 2)\r
254 except EOFError:\r
255 if size > self.extrasize:\r
256 size = self.extrasize\r
257\r
258 offset = self.offset - self.extrastart\r
259 chunk = self.extrabuf[offset: offset + size]\r
260 self.extrasize = self.extrasize - size\r
261\r
262 self.offset += size\r
263 return chunk\r
264\r
265 def _unread(self, buf):\r
266 self.extrasize = len(buf) + self.extrasize\r
267 self.offset -= len(buf)\r
268\r
269 def _read(self, size=1024):\r
270 if self.fileobj is None:\r
271 raise EOFError, "Reached EOF"\r
272\r
273 if self._new_member:\r
274 # If the _new_member flag is set, we have to\r
275 # jump to the next member, if there is one.\r
276 #\r
277 # First, check if we're at the end of the file;\r
278 # if so, it's time to stop; no more members to read.\r
279 pos = self.fileobj.tell() # Save current position\r
280 self.fileobj.seek(0, 2) # Seek to end of file\r
281 if pos == self.fileobj.tell():\r
282 raise EOFError, "Reached EOF"\r
283 else:\r
284 self.fileobj.seek( pos ) # Return to original position\r
285\r
286 self._init_read()\r
287 self._read_gzip_header()\r
288 self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)\r
289 self._new_member = False\r
290\r
291 # Read a chunk of data from the file\r
292 buf = self.fileobj.read(size)\r
293\r
294 # If the EOF has been reached, flush the decompression object\r
295 # and mark this object as finished.\r
296\r
297 if buf == "":\r
298 uncompress = self.decompress.flush()\r
299 self._read_eof()\r
300 self._add_read_data( uncompress )\r
301 raise EOFError, 'Reached EOF'\r
302\r
303 uncompress = self.decompress.decompress(buf)\r
304 self._add_read_data( uncompress )\r
305\r
306 if self.decompress.unused_data != "":\r
307 # Ending case: we've come to the end of a member in the file,\r
308 # so seek back to the start of the unused data, finish up\r
309 # this member, and read a new gzip header.\r
310 # (The number of bytes to seek back is the length of the unused\r
311 # data, minus 8 because _read_eof() will rewind a further 8 bytes)\r
312 self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)\r
313\r
314 # Check the CRC and file size, and set the flag so we read\r
315 # a new member on the next call\r
316 self._read_eof()\r
317 self._new_member = True\r
318\r
319 def _add_read_data(self, data):\r
320 self.crc = zlib.crc32(data, self.crc) & 0xffffffffL\r
321 offset = self.offset - self.extrastart\r
322 self.extrabuf = self.extrabuf[offset:] + data\r
323 self.extrasize = self.extrasize + len(data)\r
324 self.extrastart = self.offset\r
325 self.size = self.size + len(data)\r
326\r
327 def _read_eof(self):\r
328 # We've read to the end of the file, so we have to rewind in order\r
329 # to reread the 8 bytes containing the CRC and the file size.\r
330 # We check the that the computed CRC and size of the\r
331 # uncompressed data matches the stored values. Note that the size\r
332 # stored is the true file size mod 2**32.\r
333 self.fileobj.seek(-8, 1)\r
334 crc32 = read32(self.fileobj)\r
335 isize = read32(self.fileobj) # may exceed 2GB\r
336 if crc32 != self.crc:\r
337 raise IOError("CRC check failed %s != %s" % (hex(crc32),\r
338 hex(self.crc)))\r
339 elif isize != (self.size & 0xffffffffL):\r
340 raise IOError, "Incorrect length of data produced"\r
341\r
342 # Gzip files can be padded with zeroes and still have archives.\r
343 # Consume all zero bytes and set the file position to the first\r
344 # non-zero byte. See http://www.gzip.org/#faq8\r
345 c = "\x00"\r
346 while c == "\x00":\r
347 c = self.fileobj.read(1)\r
348 if c:\r
349 self.fileobj.seek(-1, 1)\r
350\r
351 @property\r
352 def closed(self):\r
353 return self.fileobj is None\r
354\r
355 def close(self):\r
356 if self.fileobj is None:\r
357 return\r
358 if self.mode == WRITE:\r
359 self.fileobj.write(self.compress.flush())\r
360 write32u(self.fileobj, self.crc)\r
361 # self.size may exceed 2GB, or even 4GB\r
362 write32u(self.fileobj, self.size & 0xffffffffL)\r
363 self.fileobj = None\r
364 elif self.mode == READ:\r
365 self.fileobj = None\r
366 if self.myfileobj:\r
367 self.myfileobj.close()\r
368 self.myfileobj = None\r
369\r
370 def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):\r
371 self._check_closed()\r
372 if self.mode == WRITE:\r
373 # Ensure the compressor's buffer is flushed\r
374 self.fileobj.write(self.compress.flush(zlib_mode))\r
375 self.fileobj.flush()\r
376\r
377 def fileno(self):\r
378 """Invoke the underlying file object's fileno() method.\r
379\r
380 This will raise AttributeError if the underlying file object\r
381 doesn't support fileno().\r
382 """\r
383 return self.fileobj.fileno()\r
384\r
385 def rewind(self):\r
386 '''Return the uncompressed stream file position indicator to the\r
387 beginning of the file'''\r
388 if self.mode != READ:\r
389 raise IOError("Can't rewind in write mode")\r
390 self.fileobj.seek(0)\r
391 self._new_member = True\r
392 self.extrabuf = ""\r
393 self.extrasize = 0\r
394 self.extrastart = 0\r
395 self.offset = 0\r
396\r
397 def readable(self):\r
398 return self.mode == READ\r
399\r
400 def writable(self):\r
401 return self.mode == WRITE\r
402\r
403 def seekable(self):\r
404 return True\r
405\r
406 def seek(self, offset, whence=0):\r
407 if whence:\r
408 if whence == 1:\r
409 offset = self.offset + offset\r
410 else:\r
411 raise ValueError('Seek from end not supported')\r
412 if self.mode == WRITE:\r
413 if offset < self.offset:\r
414 raise IOError('Negative seek in write mode')\r
415 count = offset - self.offset\r
416 for i in range(count // 1024):\r
417 self.write(1024 * '\0')\r
418 self.write((count % 1024) * '\0')\r
419 elif self.mode == READ:\r
420 if offset < self.offset:\r
421 # for negative seek, rewind and do positive seek\r
422 self.rewind()\r
423 count = offset - self.offset\r
424 for i in range(count // 1024):\r
425 self.read(1024)\r
426 self.read(count % 1024)\r
427\r
428 return self.offset\r
429\r
430 def readline(self, size=-1):\r
431 if size < 0:\r
432 # Shortcut common case - newline found in buffer.\r
433 offset = self.offset - self.extrastart\r
434 i = self.extrabuf.find('\n', offset) + 1\r
435 if i > 0:\r
436 self.extrasize -= i - offset\r
437 self.offset += i - offset\r
438 return self.extrabuf[offset: i]\r
439\r
440 size = sys.maxint\r
441 readsize = self.min_readsize\r
442 else:\r
443 readsize = size\r
444 bufs = []\r
445 while size != 0:\r
446 c = self.read(readsize)\r
447 i = c.find('\n')\r
448\r
449 # We set i=size to break out of the loop under two\r
450 # conditions: 1) there's no newline, and the chunk is\r
451 # larger than size, or 2) there is a newline, but the\r
452 # resulting line would be longer than 'size'.\r
453 if (size <= i) or (i == -1 and len(c) > size):\r
454 i = size - 1\r
455\r
456 if i >= 0 or c == '':\r
457 bufs.append(c[:i + 1]) # Add portion of last chunk\r
458 self._unread(c[i + 1:]) # Push back rest of chunk\r
459 break\r
460\r
461 # Append chunk to list, decrease 'size',\r
462 bufs.append(c)\r
463 size = size - len(c)\r
464 readsize = min(size, readsize * 2)\r
465 if readsize > self.min_readsize:\r
466 self.min_readsize = min(readsize, self.min_readsize * 2, 512)\r
467 return ''.join(bufs) # Return resulting line\r
468\r
469\r
470def _test():\r
471 # Act like gzip; with -d, act like gunzip.\r
472 # The input file is not deleted, however, nor are any other gzip\r
473 # options or features supported.\r
474 args = sys.argv[1:]\r
475 decompress = args and args[0] == "-d"\r
476 if decompress:\r
477 args = args[1:]\r
478 if not args:\r
479 args = ["-"]\r
480 for arg in args:\r
481 if decompress:\r
482 if arg == "-":\r
483 f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)\r
484 g = sys.stdout\r
485 else:\r
486 if arg[-3:] != ".gz":\r
487 print "filename doesn't end in .gz:", repr(arg)\r
488 continue\r
489 f = open(arg, "rb")\r
490 g = __builtin__.open(arg[:-3], "wb")\r
491 else:\r
492 if arg == "-":\r
493 f = sys.stdin\r
494 g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)\r
495 else:\r
496 f = __builtin__.open(arg, "rb")\r
497 g = open(arg + ".gz", "wb")\r
498 while True:\r
499 chunk = f.read(1024)\r
500 if not chunk:\r
501 break\r
502 g.write(chunk)\r
503 if g is not sys.stdout:\r
504 g.close()\r
505 if f is not sys.stdin:\r
506 f.close()\r
507\r
508if __name__ == '__main__':\r
509 _test()\r