3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t
;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t
;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t
;
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
41 #define MODE_READ_EOF 2
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
56 #define BZS_TOTAL_OUT(bzs) \
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
81 #define ACQUIRE_LOCK(obj) do { \
82 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
89 #define ACQUIRE_LOCK(obj)
90 #define RELEASE_LOCK(obj)
93 /* Bits in f_newlinetypes */
94 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95 #define NEWLINE_CR 1 /* \r newline seen */
96 #define NEWLINE_LF 2 /* \n newline seen */
97 #define NEWLINE_CRLF 4 /* \r\n newline seen */
99 /* ===================================================================== */
100 /* Structure definitions. */
106 char* f_buf
; /* Allocated readahead buffer */
107 char* f_bufend
; /* Points after last occupied position */
108 char* f_bufptr
; /* Current buffer position */
110 int f_softspace
; /* Flag used by 'print' command */
112 int f_univ_newline
; /* Handle any newline convention */
113 int f_newlinetypes
; /* Types of newlines seen */
114 int f_skipnextlf
; /* Skip next \n */
121 PyThread_type_lock lock
;
130 PyThread_type_lock lock
;
138 PyObject
*unused_data
;
140 PyThread_type_lock lock
;
144 /* ===================================================================== */
145 /* Utility functions. */
147 /* Refuse regular I/O if there's data in the iteration-buffer.
148 * Mixing them would cause data to arrive out of order, as the read*
149 * methods don't use the iteration buffer. */
151 check_iterbuffered(BZ2FileObject
*f
)
153 if (f
->f_buf
!= NULL
&&
154 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
155 f
->f_buf
[0] != '\0') {
156 PyErr_SetString(PyExc_ValueError
,
157 "Mixing iteration and read methods would lose data");
164 Util_CatchBZ2Error(int bzerror
)
172 #ifdef BZ_CONFIG_ERROR
173 case BZ_CONFIG_ERROR
:
174 PyErr_SetString(PyExc_SystemError
,
175 "the bz2 library was not compiled "
182 PyErr_SetString(PyExc_ValueError
,
183 "the bz2 library has received wrong "
194 case BZ_DATA_ERROR_MAGIC
:
195 PyErr_SetString(PyExc_IOError
, "invalid data stream");
200 PyErr_SetString(PyExc_IOError
, "unknown IO error");
204 case BZ_UNEXPECTED_EOF
:
205 PyErr_SetString(PyExc_EOFError
,
206 "compressed file ended before the "
207 "logical end-of-stream was detected");
211 case BZ_SEQUENCE_ERROR
:
212 PyErr_SetString(PyExc_RuntimeError
,
213 "wrong sequence of bz2 library "
222 #define SMALLCHUNK 8192
224 #define SMALLCHUNK BUFSIZ
228 #define BIGCHUNK (512 * 32)
230 #define BIGCHUNK (512 * 1024)
233 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
235 Util_NewBufferSize(size_t currentsize
)
237 if (currentsize
> SMALLCHUNK
) {
238 /* Keep doubling until we reach BIGCHUNK;
239 then keep adding BIGCHUNK. */
240 if (currentsize
<= BIGCHUNK
)
241 return currentsize
+ currentsize
;
243 return currentsize
+ BIGCHUNK
;
245 return currentsize
+ SMALLCHUNK
;
248 /* This is a hacked version of Python's fileobject.c:get_line(). */
250 Util_GetLine(BZ2FileObject
*f
, int n
)
254 size_t total_v_size
; /* total # of slots in buffer */
255 size_t used_v_size
; /* # used slots in buffer */
256 size_t increment
; /* amount to increment the buffer */
260 int newlinetypes
= f
->f_newlinetypes
;
261 int skipnextlf
= f
->f_skipnextlf
;
262 int univ_newline
= f
->f_univ_newline
;
264 total_v_size
= n
> 0 ? n
: 100;
265 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
270 end
= buf
+ total_v_size
;
273 Py_BEGIN_ALLOW_THREADS
275 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
277 if (bytes_read
== 0) break;
282 /* Seeing a \n here with skipnextlf true means we
285 newlinetypes
|= NEWLINE_CRLF
;
286 if (bzerror
!= BZ_OK
) break;
287 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
289 if (bytes_read
== 0) break;
291 newlinetypes
|= NEWLINE_CR
;
297 } else if (c
== '\n')
298 newlinetypes
|= NEWLINE_LF
;
301 if (bzerror
!= BZ_OK
|| c
== '\n') break;
303 if (univ_newline
&& bzerror
== BZ_STREAM_END
&& skipnextlf
)
304 newlinetypes
|= NEWLINE_CR
;
306 f
->f_newlinetypes
= newlinetypes
;
307 f
->f_skipnextlf
= skipnextlf
;
308 if (bzerror
== BZ_STREAM_END
) {
310 f
->mode
= MODE_READ_EOF
;
312 } else if (bzerror
!= BZ_OK
) {
313 Util_CatchBZ2Error(bzerror
);
319 /* Must be because buf == end */
322 used_v_size
= total_v_size
;
323 increment
= total_v_size
>> 2; /* mild exponential growth */
324 total_v_size
+= increment
;
325 if (total_v_size
> INT_MAX
) {
326 PyErr_SetString(PyExc_OverflowError
,
327 "line is longer than a Python string can hold");
331 if (_PyString_Resize(&v
, total_v_size
) < 0)
333 buf
= BUF(v
) + used_v_size
;
334 end
= BUF(v
) + total_v_size
;
337 used_v_size
= buf
- BUF(v
);
338 if (used_v_size
!= total_v_size
)
339 _PyString_Resize(&v
, used_v_size
);
343 /* This is a hacked version of Python's
344 * fileobject.c:Py_UniversalNewlineFread(). */
346 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
347 char* buf
, size_t n
, BZ2FileObject
*f
)
350 int newlinetypes
, skipnextlf
;
353 assert(stream
!= NULL
);
355 if (!f
->f_univ_newline
)
356 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
358 newlinetypes
= f
->f_newlinetypes
;
359 skipnextlf
= f
->f_skipnextlf
;
361 /* Invariant: n is the number of bytes remaining to be filled
369 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
371 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
372 shortread
= n
!= 0; /* true iff EOF or error */
376 /* Save as LF and set flag to skip next LF. */
380 else if (skipnextlf
&& c
== '\n') {
381 /* Skip LF, and remember we saw CR LF. */
383 newlinetypes
|= NEWLINE_CRLF
;
387 /* Normal char to be stored in buffer. Also
388 * update the newlinetypes flag if either this
389 * is an LF or the previous char was a CR.
392 newlinetypes
|= NEWLINE_LF
;
394 newlinetypes
|= NEWLINE_CR
;
400 /* If this is EOF, update type flags. */
401 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
402 newlinetypes
|= NEWLINE_CR
;
406 f
->f_newlinetypes
= newlinetypes
;
407 f
->f_skipnextlf
= skipnextlf
;
411 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
413 Util_DropReadAhead(BZ2FileObject
*f
)
415 if (f
->f_buf
!= NULL
) {
416 PyMem_Free(f
->f_buf
);
421 /* This is a hacked version of Python's fileobject.c:readahead(). */
423 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
428 if (f
->f_buf
!= NULL
) {
429 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
432 Util_DropReadAhead(f
);
434 if (f
->mode
== MODE_READ_EOF
) {
435 f
->f_bufptr
= f
->f_buf
;
436 f
->f_bufend
= f
->f_buf
;
439 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
443 Py_BEGIN_ALLOW_THREADS
444 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
448 if (bzerror
== BZ_STREAM_END
) {
450 f
->mode
= MODE_READ_EOF
;
451 } else if (bzerror
!= BZ_OK
) {
452 Util_CatchBZ2Error(bzerror
);
453 Util_DropReadAhead(f
);
456 f
->f_bufptr
= f
->f_buf
;
457 f
->f_bufend
= f
->f_buf
+ chunksize
;
461 /* This is a hacked version of Python's
462 * fileobject.c:readahead_get_line_skip(). */
463 static PyStringObject
*
464 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
471 if (f
->f_buf
== NULL
)
472 if (Util_ReadAhead(f
, bufsize
) < 0)
475 len
= f
->f_bufend
- f
->f_bufptr
;
477 return (PyStringObject
*)
478 PyString_FromStringAndSize(NULL
, skip
);
479 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
480 if (bufptr
!= NULL
) {
481 bufptr
++; /* Count the '\n' */
482 len
= bufptr
- f
->f_bufptr
;
483 s
= (PyStringObject
*)
484 PyString_FromStringAndSize(NULL
, skip
+len
);
487 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
488 f
->f_bufptr
= bufptr
;
489 if (bufptr
== f
->f_bufend
)
490 Util_DropReadAhead(f
);
492 bufptr
= f
->f_bufptr
;
494 f
->f_buf
= NULL
; /* Force new readahead buffer */
495 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
496 bufsize
+ (bufsize
>>2));
501 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
507 /* ===================================================================== */
508 /* Methods of BZ2File. */
510 PyDoc_STRVAR(BZ2File_read__doc__
,
511 "read([size]) -> string\n\
513 Read at most size uncompressed bytes, returned as a string. If the size\n\
514 argument is negative or omitted, read until EOF is reached.\n\
517 /* This is a hacked version of Python's fileobject.c:file_read(). */
519 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
521 long bytesrequested
= -1;
522 size_t bytesread
, buffersize
, chunksize
;
524 PyObject
*ret
= NULL
;
526 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
530 switch (self
->mode
) {
534 ret
= PyString_FromString("");
537 PyErr_SetString(PyExc_ValueError
,
538 "I/O operation on closed file");
541 PyErr_SetString(PyExc_IOError
,
542 "file is not ready for reading");
546 /* refuse to mix with f.next() */
547 if (check_iterbuffered(self
))
550 if (bytesrequested
< 0)
551 buffersize
= Util_NewBufferSize((size_t)0);
553 buffersize
= bytesrequested
;
554 if (buffersize
> INT_MAX
) {
555 PyErr_SetString(PyExc_OverflowError
,
556 "requested number of bytes is "
557 "more than a Python string can hold");
560 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
566 Py_BEGIN_ALLOW_THREADS
567 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
569 buffersize
-bytesread
,
571 self
->pos
+= chunksize
;
573 bytesread
+= chunksize
;
574 if (bzerror
== BZ_STREAM_END
) {
575 self
->size
= self
->pos
;
576 self
->mode
= MODE_READ_EOF
;
578 } else if (bzerror
!= BZ_OK
) {
579 Util_CatchBZ2Error(bzerror
);
584 if (bytesrequested
< 0) {
585 buffersize
= Util_NewBufferSize(buffersize
);
586 if (_PyString_Resize(&ret
, buffersize
) < 0)
592 if (bytesread
!= buffersize
)
593 _PyString_Resize(&ret
, bytesread
);
600 PyDoc_STRVAR(BZ2File_readline__doc__
,
601 "readline([size]) -> string\n\
603 Return the next line from the file, as a string, retaining newline.\n\
604 A non-negative size argument will limit the maximum number of bytes to\n\
605 return (an incomplete line may be returned then). Return an empty\n\
610 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
612 PyObject
*ret
= NULL
;
615 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
619 switch (self
->mode
) {
623 ret
= PyString_FromString("");
626 PyErr_SetString(PyExc_ValueError
,
627 "I/O operation on closed file");
630 PyErr_SetString(PyExc_IOError
,
631 "file is not ready for reading");
635 /* refuse to mix with f.next() */
636 if (check_iterbuffered(self
))
640 ret
= PyString_FromString("");
642 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
649 PyDoc_STRVAR(BZ2File_readlines__doc__
,
650 "readlines([size]) -> list\n\
652 Call readline() repeatedly and return a list of lines read.\n\
653 The optional size argument, if given, is an approximate bound on the\n\
654 total number of bytes in the lines returned.\n\
657 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
659 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
662 PyObject
*list
= NULL
;
664 char small_buffer
[SMALLCHUNK
];
665 char *buffer
= small_buffer
;
666 size_t buffersize
= SMALLCHUNK
;
667 PyObject
*big_buffer
= NULL
;
670 size_t totalread
= 0;
676 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
680 switch (self
->mode
) {
684 list
= PyList_New(0);
687 PyErr_SetString(PyExc_ValueError
,
688 "I/O operation on closed file");
691 PyErr_SetString(PyExc_IOError
,
692 "file is not ready for reading");
696 /* refuse to mix with f.next() */
697 if (check_iterbuffered(self
))
700 if ((list
= PyList_New(0)) == NULL
)
704 Py_BEGIN_ALLOW_THREADS
705 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
707 buffersize
-nfilled
, self
);
710 if (bzerror
== BZ_STREAM_END
) {
711 self
->size
= self
->pos
;
712 self
->mode
= MODE_READ_EOF
;
718 } else if (bzerror
!= BZ_OK
) {
719 Util_CatchBZ2Error(bzerror
);
726 p
= memchr(buffer
+nfilled
, '\n', nread
);
727 if (!shortread
&& p
== NULL
) {
728 /* Need a larger buffer to fit this line */
731 if (buffersize
> INT_MAX
) {
732 PyErr_SetString(PyExc_OverflowError
,
733 "line is longer than a Python string can hold");
736 if (big_buffer
== NULL
) {
737 /* Create the big buffer */
738 big_buffer
= PyString_FromStringAndSize(
740 if (big_buffer
== NULL
)
742 buffer
= PyString_AS_STRING(big_buffer
);
743 memcpy(buffer
, small_buffer
, nfilled
);
746 /* Grow the big buffer */
747 _PyString_Resize(&big_buffer
, buffersize
);
748 buffer
= PyString_AS_STRING(big_buffer
);
752 end
= buffer
+nfilled
+nread
;
755 /* Process complete lines */
757 line
= PyString_FromStringAndSize(q
, p
-q
);
760 err
= PyList_Append(list
, line
);
765 p
= memchr(q
, '\n', end
-q
);
767 /* Move the remaining incomplete line to the start */
769 memmove(buffer
, q
, nfilled
);
771 if (totalread
>= (size_t)sizehint
)
779 /* Partial last line */
780 line
= PyString_FromStringAndSize(buffer
, nfilled
);
784 /* Need to complete the last line */
785 PyObject
*rest
= Util_GetLine(self
, 0);
790 PyString_Concat(&line
, rest
);
795 err
= PyList_Append(list
, line
);
804 Py_DECREF(big_buffer
);
809 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
810 "xreadlines() -> self\n\
812 For backward compatibility. BZ2File objects now include the performance\n\
813 optimizations previously implemented in the xreadlines module.\n\
816 PyDoc_STRVAR(BZ2File_write__doc__
,
817 "write(data) -> None\n\
819 Write the 'data' string to file. Note that due to buffering, close() may\n\
820 be needed before the file on disk reflects the data written.\n\
823 /* This is a hacked version of Python's fileobject.c:file_write(). */
825 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
827 PyObject
*ret
= NULL
;
833 if (!PyArg_ParseTuple(args
, "s*:write", &pbuf
))
839 switch (self
->mode
) {
844 PyErr_SetString(PyExc_ValueError
,
845 "I/O operation on closed file");
849 PyErr_SetString(PyExc_IOError
,
850 "file is not ready for writing");
854 self
->f_softspace
= 0;
856 Py_BEGIN_ALLOW_THREADS
857 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
861 if (bzerror
!= BZ_OK
) {
862 Util_CatchBZ2Error(bzerror
);
870 PyBuffer_Release(&pbuf
);
875 PyDoc_STRVAR(BZ2File_writelines__doc__
,
876 "writelines(sequence_of_strings) -> None\n\
878 Write the sequence of strings to the file. Note that newlines are not\n\
879 added. The sequence can be any iterable object producing strings. This is\n\
880 equivalent to calling write() for each string.\n\
883 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
885 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
887 #define CHUNKSIZE 1000
888 PyObject
*list
= NULL
;
889 PyObject
*iter
= NULL
;
890 PyObject
*ret
= NULL
;
892 int i
, j
, index
, len
, islist
;
896 switch (self
->mode
) {
901 PyErr_SetString(PyExc_ValueError
,
902 "I/O operation on closed file");
906 PyErr_SetString(PyExc_IOError
,
907 "file is not ready for writing");
911 islist
= PyList_Check(seq
);
913 iter
= PyObject_GetIter(seq
);
915 PyErr_SetString(PyExc_TypeError
,
916 "writelines() requires an iterable argument");
919 list
= PyList_New(CHUNKSIZE
);
924 /* Strategy: slurp CHUNKSIZE lines into a private list,
925 checking that they are all strings, then write that list
926 without holding the interpreter lock, then come back for more. */
927 for (index
= 0; ; index
+= CHUNKSIZE
) {
930 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
933 j
= PyList_GET_SIZE(list
);
936 for (j
= 0; j
< CHUNKSIZE
; j
++) {
937 line
= PyIter_Next(iter
);
939 if (PyErr_Occurred())
943 PyList_SetItem(list
, j
, line
);
949 /* Check that all entries are indeed strings. If not,
950 apply the same rules as for file.write() and
951 convert the rets to strings. This is slow, but
952 seems to be the only way since all conversion APIs
953 could potentially execute Python code. */
954 for (i
= 0; i
< j
; i
++) {
955 PyObject
*v
= PyList_GET_ITEM(list
, i
);
956 if (!PyString_Check(v
)) {
959 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
960 PyErr_SetString(PyExc_TypeError
,
967 line
= PyString_FromStringAndSize(buffer
,
972 PyList_SET_ITEM(list
, i
, line
);
976 self
->f_softspace
= 0;
978 /* Since we are releasing the global lock, the
979 following code may *not* execute Python code. */
980 Py_BEGIN_ALLOW_THREADS
981 for (i
= 0; i
< j
; i
++) {
982 line
= PyList_GET_ITEM(list
, i
);
983 len
= PyString_GET_SIZE(line
);
984 BZ2_bzWrite (&bzerror
, self
->fp
,
985 PyString_AS_STRING(line
), len
);
986 if (bzerror
!= BZ_OK
) {
988 Util_CatchBZ2Error(bzerror
);
1009 PyDoc_STRVAR(BZ2File_seek__doc__
,
1010 "seek(offset [, whence]) -> None\n\
1012 Move to new file position. Argument offset is a byte count. Optional\n\
1013 argument whence defaults to 0 (offset from start of file, offset\n\
1014 should be >= 0); other values are 1 (move relative to current position,\n\
1015 positive or negative), and 2 (move relative to end of file, usually\n\
1016 negative, although many platforms allow seeking beyond the end of a file).\n\
1018 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1019 the operation may be extremely slow.\n\
1023 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
1028 char small_buffer
[SMALLCHUNK
];
1029 char *buffer
= small_buffer
;
1030 size_t buffersize
= SMALLCHUNK
;
1031 Py_off_t bytesread
= 0;
1035 PyObject
*ret
= NULL
;
1037 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &where
))
1039 #if !defined(HAVE_LARGEFILE_SUPPORT)
1040 offset
= PyInt_AsLong(offobj
);
1042 offset
= PyLong_Check(offobj
) ?
1043 PyLong_AsLongLong(offobj
) : PyInt_AsLong(offobj
);
1045 if (PyErr_Occurred())
1049 Util_DropReadAhead(self
);
1050 switch (self
->mode
) {
1056 PyErr_SetString(PyExc_ValueError
,
1057 "I/O operation on closed file");
1061 PyErr_SetString(PyExc_IOError
,
1062 "seek works only while reading");
1067 if (self
->size
== -1) {
1068 assert(self
->mode
!= MODE_READ_EOF
);
1070 Py_BEGIN_ALLOW_THREADS
1071 chunksize
= Util_UnivNewlineRead(
1075 self
->pos
+= chunksize
;
1076 Py_END_ALLOW_THREADS
1078 bytesread
+= chunksize
;
1079 if (bzerror
== BZ_STREAM_END
) {
1081 } else if (bzerror
!= BZ_OK
) {
1082 Util_CatchBZ2Error(bzerror
);
1086 self
->mode
= MODE_READ_EOF
;
1087 self
->size
= self
->pos
;
1090 offset
= self
->size
+ offset
;
1091 } else if (where
== 1) {
1092 offset
= self
->pos
+ offset
;
1095 /* Before getting here, offset must be the absolute position the file
1096 * pointer should be set to. */
1098 if (offset
>= self
->pos
) {
1099 /* we can move forward */
1100 offset
-= self
->pos
;
1102 /* we cannot move back, so rewind the stream */
1103 BZ2_bzReadClose(&bzerror
, self
->fp
);
1105 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1108 if (bzerror
!= BZ_OK
) {
1109 Util_CatchBZ2Error(bzerror
);
1112 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1118 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1121 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1122 if (bzerror
!= BZ_OK
) {
1123 Util_CatchBZ2Error(bzerror
);
1126 self
->mode
= MODE_READ
;
1129 if (offset
<= 0 || self
->mode
== MODE_READ_EOF
)
1132 /* Before getting here, offset must be set to the number of bytes
1133 * to walk forward. */
1135 if (offset
-bytesread
> buffersize
)
1136 readsize
= buffersize
;
1138 /* offset might be wider that readsize, but the result
1139 * of the subtraction is bound by buffersize (see the
1140 * condition above). buffersize is 8192. */
1141 readsize
= (size_t)(offset
-bytesread
);
1142 Py_BEGIN_ALLOW_THREADS
1143 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1144 buffer
, readsize
, self
);
1145 self
->pos
+= chunksize
;
1146 Py_END_ALLOW_THREADS
1147 bytesread
+= chunksize
;
1148 if (bzerror
== BZ_STREAM_END
) {
1149 self
->size
= self
->pos
;
1150 self
->mode
= MODE_READ_EOF
;
1152 } else if (bzerror
!= BZ_OK
) {
1153 Util_CatchBZ2Error(bzerror
);
1156 if (bytesread
== offset
)
1169 PyDoc_STRVAR(BZ2File_tell__doc__
,
1172 Return the current file position, an integer (may be a long integer).\n\
1176 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1178 PyObject
*ret
= NULL
;
1180 if (self
->mode
== MODE_CLOSED
) {
1181 PyErr_SetString(PyExc_ValueError
,
1182 "I/O operation on closed file");
1186 #if !defined(HAVE_LARGEFILE_SUPPORT)
1187 ret
= PyInt_FromLong(self
->pos
);
1189 ret
= PyLong_FromLongLong(self
->pos
);
1196 PyDoc_STRVAR(BZ2File_close__doc__
,
1197 "close() -> None or (perhaps) an integer\n\
1199 Close the file. Sets data attribute .closed to true. A closed file\n\
1200 cannot be used for further I/O operations. close() may be called more\n\
1201 than once without error.\n\
1205 BZ2File_close(BZ2FileObject
*self
)
1207 PyObject
*ret
= NULL
;
1208 int bzerror
= BZ_OK
;
1211 switch (self
->mode
) {
1214 BZ2_bzReadClose(&bzerror
, self
->fp
);
1217 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1222 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1225 self
->mode
= MODE_CLOSED
;
1226 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1227 if (bzerror
!= BZ_OK
) {
1228 Util_CatchBZ2Error(bzerror
);
1237 PyDoc_STRVAR(BZ2File_enter_doc
,
1238 "__enter__() -> self.");
1241 BZ2File_enter(BZ2FileObject
*self
)
1243 if (self
->mode
== MODE_CLOSED
) {
1244 PyErr_SetString(PyExc_ValueError
,
1245 "I/O operation on closed file");
1249 return (PyObject
*) self
;
1252 PyDoc_STRVAR(BZ2File_exit_doc
,
1253 "__exit__(*excinfo) -> None. Closes the file.");
1256 BZ2File_exit(BZ2FileObject
*self
, PyObject
*args
)
1258 PyObject
*ret
= PyObject_CallMethod((PyObject
*) self
, "close", NULL
);
1260 /* If error occurred, pass through */
1267 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1269 static PyMethodDef BZ2File_methods
[] = {
1270 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1271 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1272 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1273 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1274 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1275 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1276 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1277 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1278 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1279 {"__enter__", (PyCFunction
)BZ2File_enter
, METH_NOARGS
, BZ2File_enter_doc
},
1280 {"__exit__", (PyCFunction
)BZ2File_exit
, METH_VARARGS
, BZ2File_exit_doc
},
1281 {NULL
, NULL
} /* sentinel */
1285 /* ===================================================================== */
1286 /* Getters and setters of BZ2File. */
1288 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1290 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1292 switch (self
->f_newlinetypes
) {
1293 case NEWLINE_UNKNOWN
:
1297 return PyString_FromString("\r");
1299 return PyString_FromString("\n");
1300 case NEWLINE_CR
|NEWLINE_LF
:
1301 return Py_BuildValue("(ss)", "\r", "\n");
1303 return PyString_FromString("\r\n");
1304 case NEWLINE_CR
|NEWLINE_CRLF
:
1305 return Py_BuildValue("(ss)", "\r", "\r\n");
1306 case NEWLINE_LF
|NEWLINE_CRLF
:
1307 return Py_BuildValue("(ss)", "\n", "\r\n");
1308 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1309 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1311 PyErr_Format(PyExc_SystemError
,
1312 "Unknown newlines value 0x%x\n",
1313 self
->f_newlinetypes
);
1319 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1321 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1325 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1327 return PyObject_GetAttrString(self
->file
, "mode");
1331 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1333 return PyObject_GetAttrString(self
->file
, "name");
1336 static PyGetSetDef BZ2File_getset
[] = {
1337 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1338 "True if the file is closed"},
1339 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1340 "end-of-line convention used in this file"},
1341 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1342 "file mode ('r', 'w', or 'U')"},
1343 {"name", (getter
)BZ2File_get_name
, NULL
,
1345 {NULL
} /* Sentinel */
1349 /* ===================================================================== */
1350 /* Members of BZ2File_Type. */
1353 #define OFF(x) offsetof(BZ2FileObject, x)
1355 static PyMemberDef BZ2File_members
[] = {
1356 {"softspace", T_INT
, OFF(f_softspace
), 0,
1357 "flag indicating that a space needs to be printed; used by print"},
1358 {NULL
} /* Sentinel */
1361 /* ===================================================================== */
1362 /* Slot definitions for BZ2File_Type. */
1365 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1367 static char *kwlist
[] = {"filename", "mode", "buffering",
1368 "compresslevel", 0};
1372 int compresslevel
= 9;
1378 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1379 kwlist
, &name
, &mode
, &buffering
,
1383 if (compresslevel
< 1 || compresslevel
> 9) {
1384 PyErr_SetString(PyExc_ValueError
,
1385 "compresslevel must be between 1 and 9");
1404 self
->f_univ_newline
= 0;
1406 self
->f_univ_newline
= 1;
1415 PyErr_Format(PyExc_ValueError
,
1416 "invalid mode char %c", *mode
);
1424 if (mode_char
== 0) {
1428 mode
= (mode_char
== 'r') ? "rb" : "wb";
1430 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1431 name
, mode
, buffering
);
1432 if (self
->file
== NULL
)
1435 /* From now on, we have stuff to dealloc, so jump to error label
1436 * instead of returning */
1439 self
->lock
= PyThread_allocate_lock();
1441 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1446 if (mode_char
== 'r')
1447 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1448 PyFile_AsFile(self
->file
),
1451 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1452 PyFile_AsFile(self
->file
),
1453 compresslevel
, 0, 0);
1455 if (bzerror
!= BZ_OK
) {
1456 Util_CatchBZ2Error(bzerror
);
1459 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1461 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1466 Py_CLEAR(self
->file
);
1469 PyThread_free_lock(self
->lock
);
1477 BZ2File_dealloc(BZ2FileObject
*self
)
1482 PyThread_free_lock(self
->lock
);
1484 switch (self
->mode
) {
1487 BZ2_bzReadClose(&bzerror
, self
->fp
);
1490 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1495 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1498 Util_DropReadAhead(self
);
1499 Py_XDECREF(self
->file
);
1500 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1503 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1505 BZ2File_getiter(BZ2FileObject
*self
)
1507 if (self
->mode
== MODE_CLOSED
) {
1508 PyErr_SetString(PyExc_ValueError
,
1509 "I/O operation on closed file");
1512 Py_INCREF((PyObject
*)self
);
1513 return (PyObject
*)self
;
1516 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1517 #define READAHEAD_BUFSIZE 8192
1519 BZ2File_iternext(BZ2FileObject
*self
)
1521 PyStringObject
* ret
;
1523 if (self
->mode
== MODE_CLOSED
) {
1525 PyErr_SetString(PyExc_ValueError
,
1526 "I/O operation on closed file");
1529 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1531 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1535 return (PyObject
*)ret
;
1538 /* ===================================================================== */
1539 /* BZ2File_Type definition. */
1541 PyDoc_VAR(BZ2File__doc__
) =
1543 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1545 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1546 writing. When opened for writing, the file will be created if it doesn't\n\
1547 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1548 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1549 is given, must be a number between 1 and 9.\n\
1553 Add a 'U' to mode to open the file for input with universal newline\n\
1554 support. Any line ending in the input file will be seen as a '\\n' in\n\
1555 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1556 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1557 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1558 newlines are available only when reading.\n\
1562 static PyTypeObject BZ2File_Type
= {
1563 PyVarObject_HEAD_INIT(NULL
, 0)
1564 "bz2.BZ2File", /*tp_name*/
1565 sizeof(BZ2FileObject
), /*tp_basicsize*/
1567 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1574 0, /*tp_as_sequence*/
1575 0, /*tp_as_mapping*/
1579 PyObject_GenericGetAttr
,/*tp_getattro*/
1580 PyObject_GenericSetAttr
,/*tp_setattro*/
1582 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1583 BZ2File__doc__
, /*tp_doc*/
1586 0, /*tp_richcompare*/
1587 0, /*tp_weaklistoffset*/
1588 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1589 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1590 BZ2File_methods
, /*tp_methods*/
1591 BZ2File_members
, /*tp_members*/
1592 BZ2File_getset
, /*tp_getset*/
1597 0, /*tp_dictoffset*/
1598 (initproc
)BZ2File_init
, /*tp_init*/
1599 PyType_GenericAlloc
, /*tp_alloc*/
1600 PyType_GenericNew
, /*tp_new*/
1601 _PyObject_Del
, /*tp_free*/
1606 /* ===================================================================== */
1607 /* Methods of BZ2Comp. */
1609 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1610 "compress(data) -> string\n\
1612 Provide more data to the compressor object. It will return chunks of\n\
1613 compressed data whenever possible. When you've finished providing data\n\
1614 to compress, call the flush() method to finish the compression process,\n\
1615 and return what is left in the internal buffers.\n\
1619 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1624 int bufsize
= SMALLCHUNK
;
1625 PY_LONG_LONG totalout
;
1626 PyObject
*ret
= NULL
;
1627 bz_stream
*bzs
= &self
->bzs
;
1630 if (!PyArg_ParseTuple(args
, "s*:compress", &pdata
))
1633 datasize
= pdata
.len
;
1635 if (datasize
== 0) {
1636 PyBuffer_Release(&pdata
);
1637 return PyString_FromString("");
1641 if (!self
->running
) {
1642 PyErr_SetString(PyExc_ValueError
,
1643 "this object was already flushed");
1647 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1651 bzs
->next_in
= data
;
1652 bzs
->avail_in
= datasize
;
1653 bzs
->next_out
= BUF(ret
);
1654 bzs
->avail_out
= bufsize
;
1656 totalout
= BZS_TOTAL_OUT(bzs
);
1659 Py_BEGIN_ALLOW_THREADS
1660 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1661 Py_END_ALLOW_THREADS
1662 if (bzerror
!= BZ_RUN_OK
) {
1663 Util_CatchBZ2Error(bzerror
);
1666 if (bzs
->avail_in
== 0)
1667 break; /* no more input data */
1668 if (bzs
->avail_out
== 0) {
1669 bufsize
= Util_NewBufferSize(bufsize
);
1670 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1671 BZ2_bzCompressEnd(bzs
);
1674 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1676 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1680 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1683 PyBuffer_Release(&pdata
);
1688 PyBuffer_Release(&pdata
);
1693 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1694 "flush() -> string\n\
1696 Finish the compression process and return what is left in internal buffers.\n\
1697 You must not use the compressor object after calling this method.\n\
1701 BZ2Comp_flush(BZ2CompObject
*self
)
1703 int bufsize
= SMALLCHUNK
;
1704 PyObject
*ret
= NULL
;
1705 bz_stream
*bzs
= &self
->bzs
;
1706 PY_LONG_LONG totalout
;
1710 if (!self
->running
) {
1711 PyErr_SetString(PyExc_ValueError
, "object was already "
1717 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1721 bzs
->next_out
= BUF(ret
);
1722 bzs
->avail_out
= bufsize
;
1724 totalout
= BZS_TOTAL_OUT(bzs
);
1727 Py_BEGIN_ALLOW_THREADS
1728 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1729 Py_END_ALLOW_THREADS
1730 if (bzerror
== BZ_STREAM_END
) {
1732 } else if (bzerror
!= BZ_FINISH_OK
) {
1733 Util_CatchBZ2Error(bzerror
);
1736 if (bzs
->avail_out
== 0) {
1737 bufsize
= Util_NewBufferSize(bufsize
);
1738 if (_PyString_Resize(&ret
, bufsize
) < 0)
1740 bzs
->next_out
= BUF(ret
);
1741 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1743 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1747 if (bzs
->avail_out
!= 0)
1748 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1759 static PyMethodDef BZ2Comp_methods
[] = {
1760 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1761 BZ2Comp_compress__doc__
},
1762 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1763 BZ2Comp_flush__doc__
},
1764 {NULL
, NULL
} /* sentinel */
1768 /* ===================================================================== */
1769 /* Slot definitions for BZ2Comp_Type. */
1772 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1774 int compresslevel
= 9;
1776 static char *kwlist
[] = {"compresslevel", 0};
1778 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1779 kwlist
, &compresslevel
))
1782 if (compresslevel
< 1 || compresslevel
> 9) {
1783 PyErr_SetString(PyExc_ValueError
,
1784 "compresslevel must be between 1 and 9");
1789 self
->lock
= PyThread_allocate_lock();
1791 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1796 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1797 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1798 if (bzerror
!= BZ_OK
) {
1799 Util_CatchBZ2Error(bzerror
);
1809 PyThread_free_lock(self
->lock
);
1817 BZ2Comp_dealloc(BZ2CompObject
*self
)
1821 PyThread_free_lock(self
->lock
);
1823 BZ2_bzCompressEnd(&self
->bzs
);
1824 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1828 /* ===================================================================== */
1829 /* BZ2Comp_Type definition. */
1831 PyDoc_STRVAR(BZ2Comp__doc__
,
1832 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1834 Create a new compressor object. This object may be used to compress\n\
1835 data sequentially. If you want to compress data in one shot, use the\n\
1836 compress() function instead. The compresslevel parameter, if given,\n\
1837 must be a number between 1 and 9.\n\
1840 static PyTypeObject BZ2Comp_Type
= {
1841 PyVarObject_HEAD_INIT(NULL
, 0)
1842 "bz2.BZ2Compressor", /*tp_name*/
1843 sizeof(BZ2CompObject
), /*tp_basicsize*/
1845 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1852 0, /*tp_as_sequence*/
1853 0, /*tp_as_mapping*/
1857 PyObject_GenericGetAttr
,/*tp_getattro*/
1858 PyObject_GenericSetAttr
,/*tp_setattro*/
1860 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1861 BZ2Comp__doc__
, /*tp_doc*/
1864 0, /*tp_richcompare*/
1865 0, /*tp_weaklistoffset*/
1868 BZ2Comp_methods
, /*tp_methods*/
1875 0, /*tp_dictoffset*/
1876 (initproc
)BZ2Comp_init
, /*tp_init*/
1877 PyType_GenericAlloc
, /*tp_alloc*/
1878 PyType_GenericNew
, /*tp_new*/
1879 _PyObject_Del
, /*tp_free*/
1884 /* ===================================================================== */
1885 /* Members of BZ2Decomp. */
1888 #define OFF(x) offsetof(BZ2DecompObject, x)
1890 static PyMemberDef BZ2Decomp_members
[] = {
1891 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1892 {NULL
} /* Sentinel */
1896 /* ===================================================================== */
1897 /* Methods of BZ2Decomp. */
1899 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1900 "decompress(data) -> string\n\
1902 Provide more data to the decompressor object. It will return chunks\n\
1903 of decompressed data whenever possible. If you try to decompress data\n\
1904 after the end of stream is found, EOFError will be raised. If any data\n\
1905 was found after the end of stream, it'll be ignored and saved in\n\
1906 unused_data attribute.\n\
1910 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1915 int bufsize
= SMALLCHUNK
;
1916 PY_LONG_LONG totalout
;
1917 PyObject
*ret
= NULL
;
1918 bz_stream
*bzs
= &self
->bzs
;
1921 if (!PyArg_ParseTuple(args
, "s*:decompress", &pdata
))
1924 datasize
= pdata
.len
;
1927 if (!self
->running
) {
1928 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1933 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1937 bzs
->next_in
= data
;
1938 bzs
->avail_in
= datasize
;
1939 bzs
->next_out
= BUF(ret
);
1940 bzs
->avail_out
= bufsize
;
1942 totalout
= BZS_TOTAL_OUT(bzs
);
1945 Py_BEGIN_ALLOW_THREADS
1946 bzerror
= BZ2_bzDecompress(bzs
);
1947 Py_END_ALLOW_THREADS
1948 if (bzerror
== BZ_STREAM_END
) {
1949 if (bzs
->avail_in
!= 0) {
1950 Py_DECREF(self
->unused_data
);
1952 PyString_FromStringAndSize(bzs
->next_in
,
1958 if (bzerror
!= BZ_OK
) {
1959 Util_CatchBZ2Error(bzerror
);
1962 if (bzs
->avail_in
== 0)
1963 break; /* no more input data */
1964 if (bzs
->avail_out
== 0) {
1965 bufsize
= Util_NewBufferSize(bufsize
);
1966 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1967 BZ2_bzDecompressEnd(bzs
);
1970 bzs
->next_out
= BUF(ret
);
1971 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1973 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1977 if (bzs
->avail_out
!= 0)
1978 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1981 PyBuffer_Release(&pdata
);
1986 PyBuffer_Release(&pdata
);
1991 static PyMethodDef BZ2Decomp_methods
[] = {
1992 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1993 {NULL
, NULL
} /* sentinel */
1997 /* ===================================================================== */
1998 /* Slot definitions for BZ2Decomp_Type. */
2001 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2005 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
2009 self
->lock
= PyThread_allocate_lock();
2011 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
2016 self
->unused_data
= PyString_FromString("");
2017 if (!self
->unused_data
)
2020 memset(&self
->bzs
, 0, sizeof(bz_stream
));
2021 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
2022 if (bzerror
!= BZ_OK
) {
2023 Util_CatchBZ2Error(bzerror
);
2034 PyThread_free_lock(self
->lock
);
2038 Py_CLEAR(self
->unused_data
);
2043 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
2047 PyThread_free_lock(self
->lock
);
2049 Py_XDECREF(self
->unused_data
);
2050 BZ2_bzDecompressEnd(&self
->bzs
);
2051 Py_TYPE(self
)->tp_free((PyObject
*)self
);
2055 /* ===================================================================== */
2056 /* BZ2Decomp_Type definition. */
2058 PyDoc_STRVAR(BZ2Decomp__doc__
,
2059 "BZ2Decompressor() -> decompressor object\n\
2061 Create a new decompressor object. This object may be used to decompress\n\
2062 data sequentially. If you want to decompress data in one shot, use the\n\
2063 decompress() function instead.\n\
2066 static PyTypeObject BZ2Decomp_Type
= {
2067 PyVarObject_HEAD_INIT(NULL
, 0)
2068 "bz2.BZ2Decompressor", /*tp_name*/
2069 sizeof(BZ2DecompObject
), /*tp_basicsize*/
2071 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
2078 0, /*tp_as_sequence*/
2079 0, /*tp_as_mapping*/
2083 PyObject_GenericGetAttr
,/*tp_getattro*/
2084 PyObject_GenericSetAttr
,/*tp_setattro*/
2086 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
2087 BZ2Decomp__doc__
, /*tp_doc*/
2090 0, /*tp_richcompare*/
2091 0, /*tp_weaklistoffset*/
2094 BZ2Decomp_methods
, /*tp_methods*/
2095 BZ2Decomp_members
, /*tp_members*/
2101 0, /*tp_dictoffset*/
2102 (initproc
)BZ2Decomp_init
, /*tp_init*/
2103 PyType_GenericAlloc
, /*tp_alloc*/
2104 PyType_GenericNew
, /*tp_new*/
2105 _PyObject_Del
, /*tp_free*/
2110 /* ===================================================================== */
2111 /* Module functions. */
2113 PyDoc_STRVAR(bz2_compress__doc__
,
2114 "compress(data [, compresslevel=9]) -> string\n\
2116 Compress data in one shot. If you want to compress data sequentially,\n\
2117 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2118 given, must be a number between 1 and 9.\n\
2122 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2124 int compresslevel
=9;
2129 PyObject
*ret
= NULL
;
2131 bz_stream
*bzs
= &_bzs
;
2133 static char *kwlist
[] = {"data", "compresslevel", 0};
2135 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s*|i",
2140 datasize
= pdata
.len
;
2142 if (compresslevel
< 1 || compresslevel
> 9) {
2143 PyErr_SetString(PyExc_ValueError
,
2144 "compresslevel must be between 1 and 9");
2145 PyBuffer_Release(&pdata
);
2149 /* Conforming to bz2 manual, this is large enough to fit compressed
2150 * data in one shot. We will check it later anyway. */
2151 bufsize
= datasize
+ (datasize
/100+1) + 600;
2153 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2155 PyBuffer_Release(&pdata
);
2159 memset(bzs
, 0, sizeof(bz_stream
));
2161 bzs
->next_in
= data
;
2162 bzs
->avail_in
= datasize
;
2163 bzs
->next_out
= BUF(ret
);
2164 bzs
->avail_out
= bufsize
;
2166 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2167 if (bzerror
!= BZ_OK
) {
2168 Util_CatchBZ2Error(bzerror
);
2169 PyBuffer_Release(&pdata
);
2175 Py_BEGIN_ALLOW_THREADS
2176 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2177 Py_END_ALLOW_THREADS
2178 if (bzerror
== BZ_STREAM_END
) {
2180 } else if (bzerror
!= BZ_FINISH_OK
) {
2181 BZ2_bzCompressEnd(bzs
);
2182 Util_CatchBZ2Error(bzerror
);
2183 PyBuffer_Release(&pdata
);
2187 if (bzs
->avail_out
== 0) {
2188 bufsize
= Util_NewBufferSize(bufsize
);
2189 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2190 BZ2_bzCompressEnd(bzs
);
2191 PyBuffer_Release(&pdata
);
2195 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2196 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2200 if (bzs
->avail_out
!= 0)
2201 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2202 BZ2_bzCompressEnd(bzs
);
2204 PyBuffer_Release(&pdata
);
2208 PyDoc_STRVAR(bz2_decompress__doc__
,
2209 "decompress(data) -> decompressed data\n\
2211 Decompress data in one shot. If you want to decompress data sequentially,\n\
2212 use an instance of BZ2Decompressor instead.\n\
2216 bz2_decompress(PyObject
*self
, PyObject
*args
)
2221 int bufsize
= SMALLCHUNK
;
2224 bz_stream
*bzs
= &_bzs
;
2227 if (!PyArg_ParseTuple(args
, "s*:decompress", &pdata
))
2230 datasize
= pdata
.len
;
2232 if (datasize
== 0) {
2233 PyBuffer_Release(&pdata
);
2234 return PyString_FromString("");
2237 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2239 PyBuffer_Release(&pdata
);
2243 memset(bzs
, 0, sizeof(bz_stream
));
2245 bzs
->next_in
= data
;
2246 bzs
->avail_in
= datasize
;
2247 bzs
->next_out
= BUF(ret
);
2248 bzs
->avail_out
= bufsize
;
2250 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2251 if (bzerror
!= BZ_OK
) {
2252 Util_CatchBZ2Error(bzerror
);
2254 PyBuffer_Release(&pdata
);
2259 Py_BEGIN_ALLOW_THREADS
2260 bzerror
= BZ2_bzDecompress(bzs
);
2261 Py_END_ALLOW_THREADS
2262 if (bzerror
== BZ_STREAM_END
) {
2264 } else if (bzerror
!= BZ_OK
) {
2265 BZ2_bzDecompressEnd(bzs
);
2266 Util_CatchBZ2Error(bzerror
);
2267 PyBuffer_Release(&pdata
);
2271 if (bzs
->avail_in
== 0) {
2272 BZ2_bzDecompressEnd(bzs
);
2273 PyErr_SetString(PyExc_ValueError
,
2274 "couldn't find end of stream");
2275 PyBuffer_Release(&pdata
);
2279 if (bzs
->avail_out
== 0) {
2280 bufsize
= Util_NewBufferSize(bufsize
);
2281 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2282 BZ2_bzDecompressEnd(bzs
);
2283 PyBuffer_Release(&pdata
);
2287 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2288 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2292 if (bzs
->avail_out
!= 0)
2293 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2294 BZ2_bzDecompressEnd(bzs
);
2295 PyBuffer_Release(&pdata
);
2300 static PyMethodDef bz2_methods
[] = {
2301 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2302 bz2_compress__doc__
},
2303 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2304 bz2_decompress__doc__
},
2305 {NULL
, NULL
} /* sentinel */
2308 /* ===================================================================== */
2309 /* Initialization function. */
2311 PyDoc_STRVAR(bz2__doc__
,
2312 "The python bz2 module provides a comprehensive interface for\n\
2313 the bz2 compression library. It implements a complete file\n\
2314 interface, one shot (de)compression functions, and types for\n\
2315 sequential (de)compression.\n\
2323 if (PyType_Ready(&BZ2File_Type
) < 0)
2325 if (PyType_Ready(&BZ2Comp_Type
) < 0)
2327 if (PyType_Ready(&BZ2Decomp_Type
) < 0)
2330 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2334 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2336 Py_INCREF(&BZ2File_Type
);
2337 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2339 Py_INCREF(&BZ2Comp_Type
);
2340 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2342 Py_INCREF(&BZ2Decomp_Type
);
2343 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);