]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.2/Modules/bz2module.c
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.2 / Modules / bz2module.c
1 /*
2
3 python-bz2 - python bz2 library interface
4
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8 */
9
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
14
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
18
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23 ";
24
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
36
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
43
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
45
46
47 #ifdef BZ_CONFIG_ERROR
48
49 #if SIZEOF_LONG >= 8
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55 #else
56 #define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32
58 #endif
59
60 #else /* ! BZ_CONFIG_ERROR */
61
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
74
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77 #endif /* ! BZ_CONFIG_ERROR */
78
79
80 #ifdef WITH_THREAD
81 #define ACQUIRE_LOCK(obj) do { \
82 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88 #else
89 #define ACQUIRE_LOCK(obj)
90 #define RELEASE_LOCK(obj)
91 #endif
92
93 /* Bits in f_newlinetypes */
94 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95 #define NEWLINE_CR 1 /* \r newline seen */
96 #define NEWLINE_LF 2 /* \n newline seen */
97 #define NEWLINE_CRLF 4 /* \r\n newline seen */
98
99 /* ===================================================================== */
100 /* Structure definitions. */
101
102 typedef struct {
103 PyObject_HEAD
104 PyObject *file;
105
106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
109
110 int f_softspace; /* Flag used by 'print' command */
111
112 int f_univ_newline; /* Handle any newline convention */
113 int f_newlinetypes; /* Types of newlines seen */
114 int f_skipnextlf; /* Skip next \n */
115
116 BZFILE *fp;
117 int mode;
118 Py_off_t pos;
119 Py_off_t size;
120 #ifdef WITH_THREAD
121 PyThread_type_lock lock;
122 #endif
123 } BZ2FileObject;
124
125 typedef struct {
126 PyObject_HEAD
127 bz_stream bzs;
128 int running;
129 #ifdef WITH_THREAD
130 PyThread_type_lock lock;
131 #endif
132 } BZ2CompObject;
133
134 typedef struct {
135 PyObject_HEAD
136 bz_stream bzs;
137 int running;
138 PyObject *unused_data;
139 #ifdef WITH_THREAD
140 PyThread_type_lock lock;
141 #endif
142 } BZ2DecompObject;
143
144 /* ===================================================================== */
145 /* Utility functions. */
146
147 /* Refuse regular I/O if there's data in the iteration-buffer.
148 * Mixing them would cause data to arrive out of order, as the read*
149 * methods don't use the iteration buffer. */
150 static int
151 check_iterbuffered(BZ2FileObject *f)
152 {
153 if (f->f_buf != NULL &&
154 (f->f_bufend - f->f_bufptr) > 0 &&
155 f->f_buf[0] != '\0') {
156 PyErr_SetString(PyExc_ValueError,
157 "Mixing iteration and read methods would lose data");
158 return -1;
159 }
160 return 0;
161 }
162
163 static int
164 Util_CatchBZ2Error(int bzerror)
165 {
166 int ret = 0;
167 switch(bzerror) {
168 case BZ_OK:
169 case BZ_STREAM_END:
170 break;
171
172 #ifdef BZ_CONFIG_ERROR
173 case BZ_CONFIG_ERROR:
174 PyErr_SetString(PyExc_SystemError,
175 "the bz2 library was not compiled "
176 "correctly");
177 ret = 1;
178 break;
179 #endif
180
181 case BZ_PARAM_ERROR:
182 PyErr_SetString(PyExc_ValueError,
183 "the bz2 library has received wrong "
184 "parameters");
185 ret = 1;
186 break;
187
188 case BZ_MEM_ERROR:
189 PyErr_NoMemory();
190 ret = 1;
191 break;
192
193 case BZ_DATA_ERROR:
194 case BZ_DATA_ERROR_MAGIC:
195 PyErr_SetString(PyExc_IOError, "invalid data stream");
196 ret = 1;
197 break;
198
199 case BZ_IO_ERROR:
200 PyErr_SetString(PyExc_IOError, "unknown IO error");
201 ret = 1;
202 break;
203
204 case BZ_UNEXPECTED_EOF:
205 PyErr_SetString(PyExc_EOFError,
206 "compressed file ended before the "
207 "logical end-of-stream was detected");
208 ret = 1;
209 break;
210
211 case BZ_SEQUENCE_ERROR:
212 PyErr_SetString(PyExc_RuntimeError,
213 "wrong sequence of bz2 library "
214 "commands used");
215 ret = 1;
216 break;
217 }
218 return ret;
219 }
220
221 #if BUFSIZ < 8192
222 #define SMALLCHUNK 8192
223 #else
224 #define SMALLCHUNK BUFSIZ
225 #endif
226
227 #if SIZEOF_INT < 4
228 #define BIGCHUNK (512 * 32)
229 #else
230 #define BIGCHUNK (512 * 1024)
231 #endif
232
233 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
234 static size_t
235 Util_NewBufferSize(size_t currentsize)
236 {
237 if (currentsize > SMALLCHUNK) {
238 /* Keep doubling until we reach BIGCHUNK;
239 then keep adding BIGCHUNK. */
240 if (currentsize <= BIGCHUNK)
241 return currentsize + currentsize;
242 else
243 return currentsize + BIGCHUNK;
244 }
245 return currentsize + SMALLCHUNK;
246 }
247
248 /* This is a hacked version of Python's fileobject.c:get_line(). */
249 static PyObject *
250 Util_GetLine(BZ2FileObject *f, int n)
251 {
252 char c;
253 char *buf, *end;
254 size_t total_v_size; /* total # of slots in buffer */
255 size_t used_v_size; /* # used slots in buffer */
256 size_t increment; /* amount to increment the buffer */
257 PyObject *v;
258 int bzerror;
259 int bytes_read;
260 int newlinetypes = f->f_newlinetypes;
261 int skipnextlf = f->f_skipnextlf;
262 int univ_newline = f->f_univ_newline;
263
264 total_v_size = n > 0 ? n : 100;
265 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
266 if (v == NULL)
267 return NULL;
268
269 buf = BUF(v);
270 end = buf + total_v_size;
271
272 for (;;) {
273 Py_BEGIN_ALLOW_THREADS
274 while (buf != end) {
275 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276 f->pos++;
277 if (bytes_read == 0) break;
278 if (univ_newline) {
279 if (skipnextlf) {
280 skipnextlf = 0;
281 if (c == '\n') {
282 /* Seeing a \n here with skipnextlf true means we
283 * saw a \r before.
284 */
285 newlinetypes |= NEWLINE_CRLF;
286 if (bzerror != BZ_OK) break;
287 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
288 f->pos++;
289 if (bytes_read == 0) break;
290 } else {
291 newlinetypes |= NEWLINE_CR;
292 }
293 }
294 if (c == '\r') {
295 skipnextlf = 1;
296 c = '\n';
297 } else if (c == '\n')
298 newlinetypes |= NEWLINE_LF;
299 }
300 *buf++ = c;
301 if (bzerror != BZ_OK || c == '\n') break;
302 }
303 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
304 newlinetypes |= NEWLINE_CR;
305 Py_END_ALLOW_THREADS
306 f->f_newlinetypes = newlinetypes;
307 f->f_skipnextlf = skipnextlf;
308 if (bzerror == BZ_STREAM_END) {
309 f->size = f->pos;
310 f->mode = MODE_READ_EOF;
311 break;
312 } else if (bzerror != BZ_OK) {
313 Util_CatchBZ2Error(bzerror);
314 Py_DECREF(v);
315 return NULL;
316 }
317 if (c == '\n')
318 break;
319 /* Must be because buf == end */
320 if (n > 0)
321 break;
322 used_v_size = total_v_size;
323 increment = total_v_size >> 2; /* mild exponential growth */
324 total_v_size += increment;
325 if (total_v_size > INT_MAX) {
326 PyErr_SetString(PyExc_OverflowError,
327 "line is longer than a Python string can hold");
328 Py_DECREF(v);
329 return NULL;
330 }
331 if (_PyString_Resize(&v, total_v_size) < 0)
332 return NULL;
333 buf = BUF(v) + used_v_size;
334 end = BUF(v) + total_v_size;
335 }
336
337 used_v_size = buf - BUF(v);
338 if (used_v_size != total_v_size)
339 _PyString_Resize(&v, used_v_size);
340 return v;
341 }
342
343 /* This is a hacked version of Python's
344 * fileobject.c:Py_UniversalNewlineFread(). */
345 size_t
346 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
347 char* buf, size_t n, BZ2FileObject *f)
348 {
349 char *dst = buf;
350 int newlinetypes, skipnextlf;
351
352 assert(buf != NULL);
353 assert(stream != NULL);
354
355 if (!f->f_univ_newline)
356 return BZ2_bzRead(bzerror, stream, buf, n);
357
358 newlinetypes = f->f_newlinetypes;
359 skipnextlf = f->f_skipnextlf;
360
361 /* Invariant: n is the number of bytes remaining to be filled
362 * in the buffer.
363 */
364 while (n) {
365 size_t nread;
366 int shortread;
367 char *src = dst;
368
369 nread = BZ2_bzRead(bzerror, stream, dst, n);
370 assert(nread <= n);
371 n -= nread; /* assuming 1 byte out for each in; will adjust */
372 shortread = n != 0; /* true iff EOF or error */
373 while (nread--) {
374 char c = *src++;
375 if (c == '\r') {
376 /* Save as LF and set flag to skip next LF. */
377 *dst++ = '\n';
378 skipnextlf = 1;
379 }
380 else if (skipnextlf && c == '\n') {
381 /* Skip LF, and remember we saw CR LF. */
382 skipnextlf = 0;
383 newlinetypes |= NEWLINE_CRLF;
384 ++n;
385 }
386 else {
387 /* Normal char to be stored in buffer. Also
388 * update the newlinetypes flag if either this
389 * is an LF or the previous char was a CR.
390 */
391 if (c == '\n')
392 newlinetypes |= NEWLINE_LF;
393 else if (skipnextlf)
394 newlinetypes |= NEWLINE_CR;
395 *dst++ = c;
396 skipnextlf = 0;
397 }
398 }
399 if (shortread) {
400 /* If this is EOF, update type flags. */
401 if (skipnextlf && *bzerror == BZ_STREAM_END)
402 newlinetypes |= NEWLINE_CR;
403 break;
404 }
405 }
406 f->f_newlinetypes = newlinetypes;
407 f->f_skipnextlf = skipnextlf;
408 return dst - buf;
409 }
410
411 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
412 static void
413 Util_DropReadAhead(BZ2FileObject *f)
414 {
415 if (f->f_buf != NULL) {
416 PyMem_Free(f->f_buf);
417 f->f_buf = NULL;
418 }
419 }
420
421 /* This is a hacked version of Python's fileobject.c:readahead(). */
422 static int
423 Util_ReadAhead(BZ2FileObject *f, int bufsize)
424 {
425 int chunksize;
426 int bzerror;
427
428 if (f->f_buf != NULL) {
429 if((f->f_bufend - f->f_bufptr) >= 1)
430 return 0;
431 else
432 Util_DropReadAhead(f);
433 }
434 if (f->mode == MODE_READ_EOF) {
435 f->f_bufptr = f->f_buf;
436 f->f_bufend = f->f_buf;
437 return 0;
438 }
439 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
440 PyErr_NoMemory();
441 return -1;
442 }
443 Py_BEGIN_ALLOW_THREADS
444 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
445 bufsize, f);
446 Py_END_ALLOW_THREADS
447 f->pos += chunksize;
448 if (bzerror == BZ_STREAM_END) {
449 f->size = f->pos;
450 f->mode = MODE_READ_EOF;
451 } else if (bzerror != BZ_OK) {
452 Util_CatchBZ2Error(bzerror);
453 Util_DropReadAhead(f);
454 return -1;
455 }
456 f->f_bufptr = f->f_buf;
457 f->f_bufend = f->f_buf + chunksize;
458 return 0;
459 }
460
461 /* This is a hacked version of Python's
462 * fileobject.c:readahead_get_line_skip(). */
463 static PyStringObject *
464 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
465 {
466 PyStringObject* s;
467 char *bufptr;
468 char *buf;
469 int len;
470
471 if (f->f_buf == NULL)
472 if (Util_ReadAhead(f, bufsize) < 0)
473 return NULL;
474
475 len = f->f_bufend - f->f_bufptr;
476 if (len == 0)
477 return (PyStringObject *)
478 PyString_FromStringAndSize(NULL, skip);
479 bufptr = memchr(f->f_bufptr, '\n', len);
480 if (bufptr != NULL) {
481 bufptr++; /* Count the '\n' */
482 len = bufptr - f->f_bufptr;
483 s = (PyStringObject *)
484 PyString_FromStringAndSize(NULL, skip+len);
485 if (s == NULL)
486 return NULL;
487 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
488 f->f_bufptr = bufptr;
489 if (bufptr == f->f_bufend)
490 Util_DropReadAhead(f);
491 } else {
492 bufptr = f->f_bufptr;
493 buf = f->f_buf;
494 f->f_buf = NULL; /* Force new readahead buffer */
495 s = Util_ReadAheadGetLineSkip(f, skip+len,
496 bufsize + (bufsize>>2));
497 if (s == NULL) {
498 PyMem_Free(buf);
499 return NULL;
500 }
501 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
502 PyMem_Free(buf);
503 }
504 return s;
505 }
506
507 /* ===================================================================== */
508 /* Methods of BZ2File. */
509
510 PyDoc_STRVAR(BZ2File_read__doc__,
511 "read([size]) -> string\n\
512 \n\
513 Read at most size uncompressed bytes, returned as a string. If the size\n\
514 argument is negative or omitted, read until EOF is reached.\n\
515 ");
516
517 /* This is a hacked version of Python's fileobject.c:file_read(). */
518 static PyObject *
519 BZ2File_read(BZ2FileObject *self, PyObject *args)
520 {
521 long bytesrequested = -1;
522 size_t bytesread, buffersize, chunksize;
523 int bzerror;
524 PyObject *ret = NULL;
525
526 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
527 return NULL;
528
529 ACQUIRE_LOCK(self);
530 switch (self->mode) {
531 case MODE_READ:
532 break;
533 case MODE_READ_EOF:
534 ret = PyString_FromString("");
535 goto cleanup;
536 case MODE_CLOSED:
537 PyErr_SetString(PyExc_ValueError,
538 "I/O operation on closed file");
539 goto cleanup;
540 default:
541 PyErr_SetString(PyExc_IOError,
542 "file is not ready for reading");
543 goto cleanup;
544 }
545
546 /* refuse to mix with f.next() */
547 if (check_iterbuffered(self))
548 goto cleanup;
549
550 if (bytesrequested < 0)
551 buffersize = Util_NewBufferSize((size_t)0);
552 else
553 buffersize = bytesrequested;
554 if (buffersize > INT_MAX) {
555 PyErr_SetString(PyExc_OverflowError,
556 "requested number of bytes is "
557 "more than a Python string can hold");
558 goto cleanup;
559 }
560 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
561 if (ret == NULL)
562 goto cleanup;
563 bytesread = 0;
564
565 for (;;) {
566 Py_BEGIN_ALLOW_THREADS
567 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
568 BUF(ret)+bytesread,
569 buffersize-bytesread,
570 self);
571 self->pos += chunksize;
572 Py_END_ALLOW_THREADS
573 bytesread += chunksize;
574 if (bzerror == BZ_STREAM_END) {
575 self->size = self->pos;
576 self->mode = MODE_READ_EOF;
577 break;
578 } else if (bzerror != BZ_OK) {
579 Util_CatchBZ2Error(bzerror);
580 Py_DECREF(ret);
581 ret = NULL;
582 goto cleanup;
583 }
584 if (bytesrequested < 0) {
585 buffersize = Util_NewBufferSize(buffersize);
586 if (_PyString_Resize(&ret, buffersize) < 0)
587 goto cleanup;
588 } else {
589 break;
590 }
591 }
592 if (bytesread != buffersize)
593 _PyString_Resize(&ret, bytesread);
594
595 cleanup:
596 RELEASE_LOCK(self);
597 return ret;
598 }
599
600 PyDoc_STRVAR(BZ2File_readline__doc__,
601 "readline([size]) -> string\n\
602 \n\
603 Return the next line from the file, as a string, retaining newline.\n\
604 A non-negative size argument will limit the maximum number of bytes to\n\
605 return (an incomplete line may be returned then). Return an empty\n\
606 string at EOF.\n\
607 ");
608
609 static PyObject *
610 BZ2File_readline(BZ2FileObject *self, PyObject *args)
611 {
612 PyObject *ret = NULL;
613 int sizehint = -1;
614
615 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
616 return NULL;
617
618 ACQUIRE_LOCK(self);
619 switch (self->mode) {
620 case MODE_READ:
621 break;
622 case MODE_READ_EOF:
623 ret = PyString_FromString("");
624 goto cleanup;
625 case MODE_CLOSED:
626 PyErr_SetString(PyExc_ValueError,
627 "I/O operation on closed file");
628 goto cleanup;
629 default:
630 PyErr_SetString(PyExc_IOError,
631 "file is not ready for reading");
632 goto cleanup;
633 }
634
635 /* refuse to mix with f.next() */
636 if (check_iterbuffered(self))
637 goto cleanup;
638
639 if (sizehint == 0)
640 ret = PyString_FromString("");
641 else
642 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
643
644 cleanup:
645 RELEASE_LOCK(self);
646 return ret;
647 }
648
649 PyDoc_STRVAR(BZ2File_readlines__doc__,
650 "readlines([size]) -> list\n\
651 \n\
652 Call readline() repeatedly and return a list of lines read.\n\
653 The optional size argument, if given, is an approximate bound on the\n\
654 total number of bytes in the lines returned.\n\
655 ");
656
657 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
658 static PyObject *
659 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
660 {
661 long sizehint = 0;
662 PyObject *list = NULL;
663 PyObject *line;
664 char small_buffer[SMALLCHUNK];
665 char *buffer = small_buffer;
666 size_t buffersize = SMALLCHUNK;
667 PyObject *big_buffer = NULL;
668 size_t nfilled = 0;
669 size_t nread;
670 size_t totalread = 0;
671 char *p, *q, *end;
672 int err;
673 int shortread = 0;
674 int bzerror;
675
676 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
677 return NULL;
678
679 ACQUIRE_LOCK(self);
680 switch (self->mode) {
681 case MODE_READ:
682 break;
683 case MODE_READ_EOF:
684 list = PyList_New(0);
685 goto cleanup;
686 case MODE_CLOSED:
687 PyErr_SetString(PyExc_ValueError,
688 "I/O operation on closed file");
689 goto cleanup;
690 default:
691 PyErr_SetString(PyExc_IOError,
692 "file is not ready for reading");
693 goto cleanup;
694 }
695
696 /* refuse to mix with f.next() */
697 if (check_iterbuffered(self))
698 goto cleanup;
699
700 if ((list = PyList_New(0)) == NULL)
701 goto cleanup;
702
703 for (;;) {
704 Py_BEGIN_ALLOW_THREADS
705 nread = Util_UnivNewlineRead(&bzerror, self->fp,
706 buffer+nfilled,
707 buffersize-nfilled, self);
708 self->pos += nread;
709 Py_END_ALLOW_THREADS
710 if (bzerror == BZ_STREAM_END) {
711 self->size = self->pos;
712 self->mode = MODE_READ_EOF;
713 if (nread == 0) {
714 sizehint = 0;
715 break;
716 }
717 shortread = 1;
718 } else if (bzerror != BZ_OK) {
719 Util_CatchBZ2Error(bzerror);
720 error:
721 Py_DECREF(list);
722 list = NULL;
723 goto cleanup;
724 }
725 totalread += nread;
726 p = memchr(buffer+nfilled, '\n', nread);
727 if (!shortread && p == NULL) {
728 /* Need a larger buffer to fit this line */
729 nfilled += nread;
730 buffersize *= 2;
731 if (buffersize > INT_MAX) {
732 PyErr_SetString(PyExc_OverflowError,
733 "line is longer than a Python string can hold");
734 goto error;
735 }
736 if (big_buffer == NULL) {
737 /* Create the big buffer */
738 big_buffer = PyString_FromStringAndSize(
739 NULL, buffersize);
740 if (big_buffer == NULL)
741 goto error;
742 buffer = PyString_AS_STRING(big_buffer);
743 memcpy(buffer, small_buffer, nfilled);
744 }
745 else {
746 /* Grow the big buffer */
747 _PyString_Resize(&big_buffer, buffersize);
748 buffer = PyString_AS_STRING(big_buffer);
749 }
750 continue;
751 }
752 end = buffer+nfilled+nread;
753 q = buffer;
754 while (p != NULL) {
755 /* Process complete lines */
756 p++;
757 line = PyString_FromStringAndSize(q, p-q);
758 if (line == NULL)
759 goto error;
760 err = PyList_Append(list, line);
761 Py_DECREF(line);
762 if (err != 0)
763 goto error;
764 q = p;
765 p = memchr(q, '\n', end-q);
766 }
767 /* Move the remaining incomplete line to the start */
768 nfilled = end-q;
769 memmove(buffer, q, nfilled);
770 if (sizehint > 0)
771 if (totalread >= (size_t)sizehint)
772 break;
773 if (shortread) {
774 sizehint = 0;
775 break;
776 }
777 }
778 if (nfilled != 0) {
779 /* Partial last line */
780 line = PyString_FromStringAndSize(buffer, nfilled);
781 if (line == NULL)
782 goto error;
783 if (sizehint > 0) {
784 /* Need to complete the last line */
785 PyObject *rest = Util_GetLine(self, 0);
786 if (rest == NULL) {
787 Py_DECREF(line);
788 goto error;
789 }
790 PyString_Concat(&line, rest);
791 Py_DECREF(rest);
792 if (line == NULL)
793 goto error;
794 }
795 err = PyList_Append(list, line);
796 Py_DECREF(line);
797 if (err != 0)
798 goto error;
799 }
800
801 cleanup:
802 RELEASE_LOCK(self);
803 if (big_buffer) {
804 Py_DECREF(big_buffer);
805 }
806 return list;
807 }
808
809 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
810 "xreadlines() -> self\n\
811 \n\
812 For backward compatibility. BZ2File objects now include the performance\n\
813 optimizations previously implemented in the xreadlines module.\n\
814 ");
815
816 PyDoc_STRVAR(BZ2File_write__doc__,
817 "write(data) -> None\n\
818 \n\
819 Write the 'data' string to file. Note that due to buffering, close() may\n\
820 be needed before the file on disk reflects the data written.\n\
821 ");
822
823 /* This is a hacked version of Python's fileobject.c:file_write(). */
824 static PyObject *
825 BZ2File_write(BZ2FileObject *self, PyObject *args)
826 {
827 PyObject *ret = NULL;
828 Py_buffer pbuf;
829 char *buf;
830 int len;
831 int bzerror;
832
833 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
834 return NULL;
835 buf = pbuf.buf;
836 len = pbuf.len;
837
838 ACQUIRE_LOCK(self);
839 switch (self->mode) {
840 case MODE_WRITE:
841 break;
842
843 case MODE_CLOSED:
844 PyErr_SetString(PyExc_ValueError,
845 "I/O operation on closed file");
846 goto cleanup;
847
848 default:
849 PyErr_SetString(PyExc_IOError,
850 "file is not ready for writing");
851 goto cleanup;
852 }
853
854 self->f_softspace = 0;
855
856 Py_BEGIN_ALLOW_THREADS
857 BZ2_bzWrite (&bzerror, self->fp, buf, len);
858 self->pos += len;
859 Py_END_ALLOW_THREADS
860
861 if (bzerror != BZ_OK) {
862 Util_CatchBZ2Error(bzerror);
863 goto cleanup;
864 }
865
866 Py_INCREF(Py_None);
867 ret = Py_None;
868
869 cleanup:
870 PyBuffer_Release(&pbuf);
871 RELEASE_LOCK(self);
872 return ret;
873 }
874
875 PyDoc_STRVAR(BZ2File_writelines__doc__,
876 "writelines(sequence_of_strings) -> None\n\
877 \n\
878 Write the sequence of strings to the file. Note that newlines are not\n\
879 added. The sequence can be any iterable object producing strings. This is\n\
880 equivalent to calling write() for each string.\n\
881 ");
882
883 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
884 static PyObject *
885 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
886 {
887 #define CHUNKSIZE 1000
888 PyObject *list = NULL;
889 PyObject *iter = NULL;
890 PyObject *ret = NULL;
891 PyObject *line;
892 int i, j, index, len, islist;
893 int bzerror;
894
895 ACQUIRE_LOCK(self);
896 switch (self->mode) {
897 case MODE_WRITE:
898 break;
899
900 case MODE_CLOSED:
901 PyErr_SetString(PyExc_ValueError,
902 "I/O operation on closed file");
903 goto error;
904
905 default:
906 PyErr_SetString(PyExc_IOError,
907 "file is not ready for writing");
908 goto error;
909 }
910
911 islist = PyList_Check(seq);
912 if (!islist) {
913 iter = PyObject_GetIter(seq);
914 if (iter == NULL) {
915 PyErr_SetString(PyExc_TypeError,
916 "writelines() requires an iterable argument");
917 goto error;
918 }
919 list = PyList_New(CHUNKSIZE);
920 if (list == NULL)
921 goto error;
922 }
923
924 /* Strategy: slurp CHUNKSIZE lines into a private list,
925 checking that they are all strings, then write that list
926 without holding the interpreter lock, then come back for more. */
927 for (index = 0; ; index += CHUNKSIZE) {
928 if (islist) {
929 Py_XDECREF(list);
930 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
931 if (list == NULL)
932 goto error;
933 j = PyList_GET_SIZE(list);
934 }
935 else {
936 for (j = 0; j < CHUNKSIZE; j++) {
937 line = PyIter_Next(iter);
938 if (line == NULL) {
939 if (PyErr_Occurred())
940 goto error;
941 break;
942 }
943 PyList_SetItem(list, j, line);
944 }
945 }
946 if (j == 0)
947 break;
948
949 /* Check that all entries are indeed strings. If not,
950 apply the same rules as for file.write() and
951 convert the rets to strings. This is slow, but
952 seems to be the only way since all conversion APIs
953 could potentially execute Python code. */
954 for (i = 0; i < j; i++) {
955 PyObject *v = PyList_GET_ITEM(list, i);
956 if (!PyString_Check(v)) {
957 const char *buffer;
958 Py_ssize_t len;
959 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
960 PyErr_SetString(PyExc_TypeError,
961 "writelines() "
962 "argument must be "
963 "a sequence of "
964 "strings");
965 goto error;
966 }
967 line = PyString_FromStringAndSize(buffer,
968 len);
969 if (line == NULL)
970 goto error;
971 Py_DECREF(v);
972 PyList_SET_ITEM(list, i, line);
973 }
974 }
975
976 self->f_softspace = 0;
977
978 /* Since we are releasing the global lock, the
979 following code may *not* execute Python code. */
980 Py_BEGIN_ALLOW_THREADS
981 for (i = 0; i < j; i++) {
982 line = PyList_GET_ITEM(list, i);
983 len = PyString_GET_SIZE(line);
984 BZ2_bzWrite (&bzerror, self->fp,
985 PyString_AS_STRING(line), len);
986 if (bzerror != BZ_OK) {
987 Py_BLOCK_THREADS
988 Util_CatchBZ2Error(bzerror);
989 goto error;
990 }
991 }
992 Py_END_ALLOW_THREADS
993
994 if (j < CHUNKSIZE)
995 break;
996 }
997
998 Py_INCREF(Py_None);
999 ret = Py_None;
1000
1001 error:
1002 RELEASE_LOCK(self);
1003 Py_XDECREF(list);
1004 Py_XDECREF(iter);
1005 return ret;
1006 #undef CHUNKSIZE
1007 }
1008
1009 PyDoc_STRVAR(BZ2File_seek__doc__,
1010 "seek(offset [, whence]) -> None\n\
1011 \n\
1012 Move to new file position. Argument offset is a byte count. Optional\n\
1013 argument whence defaults to 0 (offset from start of file, offset\n\
1014 should be >= 0); other values are 1 (move relative to current position,\n\
1015 positive or negative), and 2 (move relative to end of file, usually\n\
1016 negative, although many platforms allow seeking beyond the end of a file).\n\
1017 \n\
1018 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1019 the operation may be extremely slow.\n\
1020 ");
1021
1022 static PyObject *
1023 BZ2File_seek(BZ2FileObject *self, PyObject *args)
1024 {
1025 int where = 0;
1026 PyObject *offobj;
1027 Py_off_t offset;
1028 char small_buffer[SMALLCHUNK];
1029 char *buffer = small_buffer;
1030 size_t buffersize = SMALLCHUNK;
1031 Py_off_t bytesread = 0;
1032 size_t readsize;
1033 int chunksize;
1034 int bzerror;
1035 PyObject *ret = NULL;
1036
1037 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1038 return NULL;
1039 #if !defined(HAVE_LARGEFILE_SUPPORT)
1040 offset = PyInt_AsLong(offobj);
1041 #else
1042 offset = PyLong_Check(offobj) ?
1043 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1044 #endif
1045 if (PyErr_Occurred())
1046 return NULL;
1047
1048 ACQUIRE_LOCK(self);
1049 Util_DropReadAhead(self);
1050 switch (self->mode) {
1051 case MODE_READ:
1052 case MODE_READ_EOF:
1053 break;
1054
1055 case MODE_CLOSED:
1056 PyErr_SetString(PyExc_ValueError,
1057 "I/O operation on closed file");
1058 goto cleanup;
1059
1060 default:
1061 PyErr_SetString(PyExc_IOError,
1062 "seek works only while reading");
1063 goto cleanup;
1064 }
1065
1066 if (where == 2) {
1067 if (self->size == -1) {
1068 assert(self->mode != MODE_READ_EOF);
1069 for (;;) {
1070 Py_BEGIN_ALLOW_THREADS
1071 chunksize = Util_UnivNewlineRead(
1072 &bzerror, self->fp,
1073 buffer, buffersize,
1074 self);
1075 self->pos += chunksize;
1076 Py_END_ALLOW_THREADS
1077
1078 bytesread += chunksize;
1079 if (bzerror == BZ_STREAM_END) {
1080 break;
1081 } else if (bzerror != BZ_OK) {
1082 Util_CatchBZ2Error(bzerror);
1083 goto cleanup;
1084 }
1085 }
1086 self->mode = MODE_READ_EOF;
1087 self->size = self->pos;
1088 bytesread = 0;
1089 }
1090 offset = self->size + offset;
1091 } else if (where == 1) {
1092 offset = self->pos + offset;
1093 }
1094
1095 /* Before getting here, offset must be the absolute position the file
1096 * pointer should be set to. */
1097
1098 if (offset >= self->pos) {
1099 /* we can move forward */
1100 offset -= self->pos;
1101 } else {
1102 /* we cannot move back, so rewind the stream */
1103 BZ2_bzReadClose(&bzerror, self->fp);
1104 if (self->fp) {
1105 PyFile_DecUseCount((PyFileObject *)self->file);
1106 self->fp = NULL;
1107 }
1108 if (bzerror != BZ_OK) {
1109 Util_CatchBZ2Error(bzerror);
1110 goto cleanup;
1111 }
1112 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1113 if (!ret)
1114 goto cleanup;
1115 Py_DECREF(ret);
1116 ret = NULL;
1117 self->pos = 0;
1118 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1119 0, 0, NULL, 0);
1120 if (self->fp)
1121 PyFile_IncUseCount((PyFileObject *)self->file);
1122 if (bzerror != BZ_OK) {
1123 Util_CatchBZ2Error(bzerror);
1124 goto cleanup;
1125 }
1126 self->mode = MODE_READ;
1127 }
1128
1129 if (offset <= 0 || self->mode == MODE_READ_EOF)
1130 goto exit;
1131
1132 /* Before getting here, offset must be set to the number of bytes
1133 * to walk forward. */
1134 for (;;) {
1135 if (offset-bytesread > buffersize)
1136 readsize = buffersize;
1137 else
1138 /* offset might be wider that readsize, but the result
1139 * of the subtraction is bound by buffersize (see the
1140 * condition above). buffersize is 8192. */
1141 readsize = (size_t)(offset-bytesread);
1142 Py_BEGIN_ALLOW_THREADS
1143 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1144 buffer, readsize, self);
1145 self->pos += chunksize;
1146 Py_END_ALLOW_THREADS
1147 bytesread += chunksize;
1148 if (bzerror == BZ_STREAM_END) {
1149 self->size = self->pos;
1150 self->mode = MODE_READ_EOF;
1151 break;
1152 } else if (bzerror != BZ_OK) {
1153 Util_CatchBZ2Error(bzerror);
1154 goto cleanup;
1155 }
1156 if (bytesread == offset)
1157 break;
1158 }
1159
1160 exit:
1161 Py_INCREF(Py_None);
1162 ret = Py_None;
1163
1164 cleanup:
1165 RELEASE_LOCK(self);
1166 return ret;
1167 }
1168
1169 PyDoc_STRVAR(BZ2File_tell__doc__,
1170 "tell() -> int\n\
1171 \n\
1172 Return the current file position, an integer (may be a long integer).\n\
1173 ");
1174
1175 static PyObject *
1176 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1177 {
1178 PyObject *ret = NULL;
1179
1180 if (self->mode == MODE_CLOSED) {
1181 PyErr_SetString(PyExc_ValueError,
1182 "I/O operation on closed file");
1183 goto cleanup;
1184 }
1185
1186 #if !defined(HAVE_LARGEFILE_SUPPORT)
1187 ret = PyInt_FromLong(self->pos);
1188 #else
1189 ret = PyLong_FromLongLong(self->pos);
1190 #endif
1191
1192 cleanup:
1193 return ret;
1194 }
1195
1196 PyDoc_STRVAR(BZ2File_close__doc__,
1197 "close() -> None or (perhaps) an integer\n\
1198 \n\
1199 Close the file. Sets data attribute .closed to true. A closed file\n\
1200 cannot be used for further I/O operations. close() may be called more\n\
1201 than once without error.\n\
1202 ");
1203
1204 static PyObject *
1205 BZ2File_close(BZ2FileObject *self)
1206 {
1207 PyObject *ret = NULL;
1208 int bzerror = BZ_OK;
1209
1210 ACQUIRE_LOCK(self);
1211 switch (self->mode) {
1212 case MODE_READ:
1213 case MODE_READ_EOF:
1214 BZ2_bzReadClose(&bzerror, self->fp);
1215 break;
1216 case MODE_WRITE:
1217 BZ2_bzWriteClose(&bzerror, self->fp,
1218 0, NULL, NULL);
1219 break;
1220 }
1221 if (self->fp) {
1222 PyFile_DecUseCount((PyFileObject *)self->file);
1223 self->fp = NULL;
1224 }
1225 self->mode = MODE_CLOSED;
1226 ret = PyObject_CallMethod(self->file, "close", NULL);
1227 if (bzerror != BZ_OK) {
1228 Util_CatchBZ2Error(bzerror);
1229 Py_XDECREF(ret);
1230 ret = NULL;
1231 }
1232
1233 RELEASE_LOCK(self);
1234 return ret;
1235 }
1236
1237 PyDoc_STRVAR(BZ2File_enter_doc,
1238 "__enter__() -> self.");
1239
1240 static PyObject *
1241 BZ2File_enter(BZ2FileObject *self)
1242 {
1243 if (self->mode == MODE_CLOSED) {
1244 PyErr_SetString(PyExc_ValueError,
1245 "I/O operation on closed file");
1246 return NULL;
1247 }
1248 Py_INCREF(self);
1249 return (PyObject *) self;
1250 }
1251
1252 PyDoc_STRVAR(BZ2File_exit_doc,
1253 "__exit__(*excinfo) -> None. Closes the file.");
1254
1255 static PyObject *
1256 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1257 {
1258 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1259 if (!ret)
1260 /* If error occurred, pass through */
1261 return NULL;
1262 Py_DECREF(ret);
1263 Py_RETURN_NONE;
1264 }
1265
1266
1267 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1268
1269 static PyMethodDef BZ2File_methods[] = {
1270 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1271 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1272 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1273 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1274 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1275 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1276 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1277 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1278 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1279 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1280 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1281 {NULL, NULL} /* sentinel */
1282 };
1283
1284
1285 /* ===================================================================== */
1286 /* Getters and setters of BZ2File. */
1287
1288 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1289 static PyObject *
1290 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1291 {
1292 switch (self->f_newlinetypes) {
1293 case NEWLINE_UNKNOWN:
1294 Py_INCREF(Py_None);
1295 return Py_None;
1296 case NEWLINE_CR:
1297 return PyString_FromString("\r");
1298 case NEWLINE_LF:
1299 return PyString_FromString("\n");
1300 case NEWLINE_CR|NEWLINE_LF:
1301 return Py_BuildValue("(ss)", "\r", "\n");
1302 case NEWLINE_CRLF:
1303 return PyString_FromString("\r\n");
1304 case NEWLINE_CR|NEWLINE_CRLF:
1305 return Py_BuildValue("(ss)", "\r", "\r\n");
1306 case NEWLINE_LF|NEWLINE_CRLF:
1307 return Py_BuildValue("(ss)", "\n", "\r\n");
1308 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1309 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1310 default:
1311 PyErr_Format(PyExc_SystemError,
1312 "Unknown newlines value 0x%x\n",
1313 self->f_newlinetypes);
1314 return NULL;
1315 }
1316 }
1317
1318 static PyObject *
1319 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1320 {
1321 return PyInt_FromLong(self->mode == MODE_CLOSED);
1322 }
1323
1324 static PyObject *
1325 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1326 {
1327 return PyObject_GetAttrString(self->file, "mode");
1328 }
1329
1330 static PyObject *
1331 BZ2File_get_name(BZ2FileObject *self, void *closure)
1332 {
1333 return PyObject_GetAttrString(self->file, "name");
1334 }
1335
1336 static PyGetSetDef BZ2File_getset[] = {
1337 {"closed", (getter)BZ2File_get_closed, NULL,
1338 "True if the file is closed"},
1339 {"newlines", (getter)BZ2File_get_newlines, NULL,
1340 "end-of-line convention used in this file"},
1341 {"mode", (getter)BZ2File_get_mode, NULL,
1342 "file mode ('r', 'w', or 'U')"},
1343 {"name", (getter)BZ2File_get_name, NULL,
1344 "file name"},
1345 {NULL} /* Sentinel */
1346 };
1347
1348
1349 /* ===================================================================== */
1350 /* Members of BZ2File_Type. */
1351
1352 #undef OFF
1353 #define OFF(x) offsetof(BZ2FileObject, x)
1354
1355 static PyMemberDef BZ2File_members[] = {
1356 {"softspace", T_INT, OFF(f_softspace), 0,
1357 "flag indicating that a space needs to be printed; used by print"},
1358 {NULL} /* Sentinel */
1359 };
1360
1361 /* ===================================================================== */
1362 /* Slot definitions for BZ2File_Type. */
1363
1364 static int
1365 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1366 {
1367 static char *kwlist[] = {"filename", "mode", "buffering",
1368 "compresslevel", 0};
1369 PyObject *name;
1370 char *mode = "r";
1371 int buffering = -1;
1372 int compresslevel = 9;
1373 int bzerror;
1374 int mode_char = 0;
1375
1376 self->size = -1;
1377
1378 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1379 kwlist, &name, &mode, &buffering,
1380 &compresslevel))
1381 return -1;
1382
1383 if (compresslevel < 1 || compresslevel > 9) {
1384 PyErr_SetString(PyExc_ValueError,
1385 "compresslevel must be between 1 and 9");
1386 return -1;
1387 }
1388
1389 for (;;) {
1390 int error = 0;
1391 switch (*mode) {
1392 case 'r':
1393 case 'w':
1394 if (mode_char)
1395 error = 1;
1396 mode_char = *mode;
1397 break;
1398
1399 case 'b':
1400 break;
1401
1402 case 'U':
1403 #ifdef __VMS
1404 self->f_univ_newline = 0;
1405 #else
1406 self->f_univ_newline = 1;
1407 #endif
1408 break;
1409
1410 default:
1411 error = 1;
1412 break;
1413 }
1414 if (error) {
1415 PyErr_Format(PyExc_ValueError,
1416 "invalid mode char %c", *mode);
1417 return -1;
1418 }
1419 mode++;
1420 if (*mode == '\0')
1421 break;
1422 }
1423
1424 if (mode_char == 0) {
1425 mode_char = 'r';
1426 }
1427
1428 mode = (mode_char == 'r') ? "rb" : "wb";
1429
1430 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1431 name, mode, buffering);
1432 if (self->file == NULL)
1433 return -1;
1434
1435 /* From now on, we have stuff to dealloc, so jump to error label
1436 * instead of returning */
1437
1438 #ifdef WITH_THREAD
1439 self->lock = PyThread_allocate_lock();
1440 if (!self->lock) {
1441 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1442 goto error;
1443 }
1444 #endif
1445
1446 if (mode_char == 'r')
1447 self->fp = BZ2_bzReadOpen(&bzerror,
1448 PyFile_AsFile(self->file),
1449 0, 0, NULL, 0);
1450 else
1451 self->fp = BZ2_bzWriteOpen(&bzerror,
1452 PyFile_AsFile(self->file),
1453 compresslevel, 0, 0);
1454
1455 if (bzerror != BZ_OK) {
1456 Util_CatchBZ2Error(bzerror);
1457 goto error;
1458 }
1459 PyFile_IncUseCount((PyFileObject *)self->file);
1460
1461 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1462
1463 return 0;
1464
1465 error:
1466 Py_CLEAR(self->file);
1467 #ifdef WITH_THREAD
1468 if (self->lock) {
1469 PyThread_free_lock(self->lock);
1470 self->lock = NULL;
1471 }
1472 #endif
1473 return -1;
1474 }
1475
1476 static void
1477 BZ2File_dealloc(BZ2FileObject *self)
1478 {
1479 int bzerror;
1480 #ifdef WITH_THREAD
1481 if (self->lock)
1482 PyThread_free_lock(self->lock);
1483 #endif
1484 switch (self->mode) {
1485 case MODE_READ:
1486 case MODE_READ_EOF:
1487 BZ2_bzReadClose(&bzerror, self->fp);
1488 break;
1489 case MODE_WRITE:
1490 BZ2_bzWriteClose(&bzerror, self->fp,
1491 0, NULL, NULL);
1492 break;
1493 }
1494 if (self->fp) {
1495 PyFile_DecUseCount((PyFileObject *)self->file);
1496 self->fp = NULL;
1497 }
1498 Util_DropReadAhead(self);
1499 Py_XDECREF(self->file);
1500 Py_TYPE(self)->tp_free((PyObject *)self);
1501 }
1502
1503 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1504 static PyObject *
1505 BZ2File_getiter(BZ2FileObject *self)
1506 {
1507 if (self->mode == MODE_CLOSED) {
1508 PyErr_SetString(PyExc_ValueError,
1509 "I/O operation on closed file");
1510 return NULL;
1511 }
1512 Py_INCREF((PyObject*)self);
1513 return (PyObject *)self;
1514 }
1515
1516 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1517 #define READAHEAD_BUFSIZE 8192
1518 static PyObject *
1519 BZ2File_iternext(BZ2FileObject *self)
1520 {
1521 PyStringObject* ret;
1522 ACQUIRE_LOCK(self);
1523 if (self->mode == MODE_CLOSED) {
1524 RELEASE_LOCK(self);
1525 PyErr_SetString(PyExc_ValueError,
1526 "I/O operation on closed file");
1527 return NULL;
1528 }
1529 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1530 RELEASE_LOCK(self);
1531 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1532 Py_XDECREF(ret);
1533 return NULL;
1534 }
1535 return (PyObject *)ret;
1536 }
1537
1538 /* ===================================================================== */
1539 /* BZ2File_Type definition. */
1540
1541 PyDoc_VAR(BZ2File__doc__) =
1542 PyDoc_STR(
1543 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1544 \n\
1545 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1546 writing. When opened for writing, the file will be created if it doesn't\n\
1547 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1548 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1549 is given, must be a number between 1 and 9.\n\
1550 ")
1551 PyDoc_STR(
1552 "\n\
1553 Add a 'U' to mode to open the file for input with universal newline\n\
1554 support. Any line ending in the input file will be seen as a '\\n' in\n\
1555 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1556 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1557 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1558 newlines are available only when reading.\n\
1559 ")
1560 ;
1561
1562 static PyTypeObject BZ2File_Type = {
1563 PyVarObject_HEAD_INIT(NULL, 0)
1564 "bz2.BZ2File", /*tp_name*/
1565 sizeof(BZ2FileObject), /*tp_basicsize*/
1566 0, /*tp_itemsize*/
1567 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1568 0, /*tp_print*/
1569 0, /*tp_getattr*/
1570 0, /*tp_setattr*/
1571 0, /*tp_compare*/
1572 0, /*tp_repr*/
1573 0, /*tp_as_number*/
1574 0, /*tp_as_sequence*/
1575 0, /*tp_as_mapping*/
1576 0, /*tp_hash*/
1577 0, /*tp_call*/
1578 0, /*tp_str*/
1579 PyObject_GenericGetAttr,/*tp_getattro*/
1580 PyObject_GenericSetAttr,/*tp_setattro*/
1581 0, /*tp_as_buffer*/
1582 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1583 BZ2File__doc__, /*tp_doc*/
1584 0, /*tp_traverse*/
1585 0, /*tp_clear*/
1586 0, /*tp_richcompare*/
1587 0, /*tp_weaklistoffset*/
1588 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1589 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1590 BZ2File_methods, /*tp_methods*/
1591 BZ2File_members, /*tp_members*/
1592 BZ2File_getset, /*tp_getset*/
1593 0, /*tp_base*/
1594 0, /*tp_dict*/
1595 0, /*tp_descr_get*/
1596 0, /*tp_descr_set*/
1597 0, /*tp_dictoffset*/
1598 (initproc)BZ2File_init, /*tp_init*/
1599 PyType_GenericAlloc, /*tp_alloc*/
1600 PyType_GenericNew, /*tp_new*/
1601 _PyObject_Del, /*tp_free*/
1602 0, /*tp_is_gc*/
1603 };
1604
1605
1606 /* ===================================================================== */
1607 /* Methods of BZ2Comp. */
1608
1609 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1610 "compress(data) -> string\n\
1611 \n\
1612 Provide more data to the compressor object. It will return chunks of\n\
1613 compressed data whenever possible. When you've finished providing data\n\
1614 to compress, call the flush() method to finish the compression process,\n\
1615 and return what is left in the internal buffers.\n\
1616 ");
1617
1618 static PyObject *
1619 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1620 {
1621 Py_buffer pdata;
1622 char *data;
1623 int datasize;
1624 int bufsize = SMALLCHUNK;
1625 PY_LONG_LONG totalout;
1626 PyObject *ret = NULL;
1627 bz_stream *bzs = &self->bzs;
1628 int bzerror;
1629
1630 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1631 return NULL;
1632 data = pdata.buf;
1633 datasize = pdata.len;
1634
1635 if (datasize == 0) {
1636 PyBuffer_Release(&pdata);
1637 return PyString_FromString("");
1638 }
1639
1640 ACQUIRE_LOCK(self);
1641 if (!self->running) {
1642 PyErr_SetString(PyExc_ValueError,
1643 "this object was already flushed");
1644 goto error;
1645 }
1646
1647 ret = PyString_FromStringAndSize(NULL, bufsize);
1648 if (!ret)
1649 goto error;
1650
1651 bzs->next_in = data;
1652 bzs->avail_in = datasize;
1653 bzs->next_out = BUF(ret);
1654 bzs->avail_out = bufsize;
1655
1656 totalout = BZS_TOTAL_OUT(bzs);
1657
1658 for (;;) {
1659 Py_BEGIN_ALLOW_THREADS
1660 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1661 Py_END_ALLOW_THREADS
1662 if (bzerror != BZ_RUN_OK) {
1663 Util_CatchBZ2Error(bzerror);
1664 goto error;
1665 }
1666 if (bzs->avail_in == 0)
1667 break; /* no more input data */
1668 if (bzs->avail_out == 0) {
1669 bufsize = Util_NewBufferSize(bufsize);
1670 if (_PyString_Resize(&ret, bufsize) < 0) {
1671 BZ2_bzCompressEnd(bzs);
1672 goto error;
1673 }
1674 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1675 - totalout);
1676 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1677 }
1678 }
1679
1680 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1681
1682 RELEASE_LOCK(self);
1683 PyBuffer_Release(&pdata);
1684 return ret;
1685
1686 error:
1687 RELEASE_LOCK(self);
1688 PyBuffer_Release(&pdata);
1689 Py_XDECREF(ret);
1690 return NULL;
1691 }
1692
1693 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1694 "flush() -> string\n\
1695 \n\
1696 Finish the compression process and return what is left in internal buffers.\n\
1697 You must not use the compressor object after calling this method.\n\
1698 ");
1699
1700 static PyObject *
1701 BZ2Comp_flush(BZ2CompObject *self)
1702 {
1703 int bufsize = SMALLCHUNK;
1704 PyObject *ret = NULL;
1705 bz_stream *bzs = &self->bzs;
1706 PY_LONG_LONG totalout;
1707 int bzerror;
1708
1709 ACQUIRE_LOCK(self);
1710 if (!self->running) {
1711 PyErr_SetString(PyExc_ValueError, "object was already "
1712 "flushed");
1713 goto error;
1714 }
1715 self->running = 0;
1716
1717 ret = PyString_FromStringAndSize(NULL, bufsize);
1718 if (!ret)
1719 goto error;
1720
1721 bzs->next_out = BUF(ret);
1722 bzs->avail_out = bufsize;
1723
1724 totalout = BZS_TOTAL_OUT(bzs);
1725
1726 for (;;) {
1727 Py_BEGIN_ALLOW_THREADS
1728 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1729 Py_END_ALLOW_THREADS
1730 if (bzerror == BZ_STREAM_END) {
1731 break;
1732 } else if (bzerror != BZ_FINISH_OK) {
1733 Util_CatchBZ2Error(bzerror);
1734 goto error;
1735 }
1736 if (bzs->avail_out == 0) {
1737 bufsize = Util_NewBufferSize(bufsize);
1738 if (_PyString_Resize(&ret, bufsize) < 0)
1739 goto error;
1740 bzs->next_out = BUF(ret);
1741 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1742 - totalout);
1743 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1744 }
1745 }
1746
1747 if (bzs->avail_out != 0)
1748 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1749
1750 RELEASE_LOCK(self);
1751 return ret;
1752
1753 error:
1754 RELEASE_LOCK(self);
1755 Py_XDECREF(ret);
1756 return NULL;
1757 }
1758
1759 static PyMethodDef BZ2Comp_methods[] = {
1760 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1761 BZ2Comp_compress__doc__},
1762 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1763 BZ2Comp_flush__doc__},
1764 {NULL, NULL} /* sentinel */
1765 };
1766
1767
1768 /* ===================================================================== */
1769 /* Slot definitions for BZ2Comp_Type. */
1770
1771 static int
1772 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1773 {
1774 int compresslevel = 9;
1775 int bzerror;
1776 static char *kwlist[] = {"compresslevel", 0};
1777
1778 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1779 kwlist, &compresslevel))
1780 return -1;
1781
1782 if (compresslevel < 1 || compresslevel > 9) {
1783 PyErr_SetString(PyExc_ValueError,
1784 "compresslevel must be between 1 and 9");
1785 goto error;
1786 }
1787
1788 #ifdef WITH_THREAD
1789 self->lock = PyThread_allocate_lock();
1790 if (!self->lock) {
1791 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1792 goto error;
1793 }
1794 #endif
1795
1796 memset(&self->bzs, 0, sizeof(bz_stream));
1797 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1798 if (bzerror != BZ_OK) {
1799 Util_CatchBZ2Error(bzerror);
1800 goto error;
1801 }
1802
1803 self->running = 1;
1804
1805 return 0;
1806 error:
1807 #ifdef WITH_THREAD
1808 if (self->lock) {
1809 PyThread_free_lock(self->lock);
1810 self->lock = NULL;
1811 }
1812 #endif
1813 return -1;
1814 }
1815
1816 static void
1817 BZ2Comp_dealloc(BZ2CompObject *self)
1818 {
1819 #ifdef WITH_THREAD
1820 if (self->lock)
1821 PyThread_free_lock(self->lock);
1822 #endif
1823 BZ2_bzCompressEnd(&self->bzs);
1824 Py_TYPE(self)->tp_free((PyObject *)self);
1825 }
1826
1827
1828 /* ===================================================================== */
1829 /* BZ2Comp_Type definition. */
1830
1831 PyDoc_STRVAR(BZ2Comp__doc__,
1832 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1833 \n\
1834 Create a new compressor object. This object may be used to compress\n\
1835 data sequentially. If you want to compress data in one shot, use the\n\
1836 compress() function instead. The compresslevel parameter, if given,\n\
1837 must be a number between 1 and 9.\n\
1838 ");
1839
1840 static PyTypeObject BZ2Comp_Type = {
1841 PyVarObject_HEAD_INIT(NULL, 0)
1842 "bz2.BZ2Compressor", /*tp_name*/
1843 sizeof(BZ2CompObject), /*tp_basicsize*/
1844 0, /*tp_itemsize*/
1845 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1846 0, /*tp_print*/
1847 0, /*tp_getattr*/
1848 0, /*tp_setattr*/
1849 0, /*tp_compare*/
1850 0, /*tp_repr*/
1851 0, /*tp_as_number*/
1852 0, /*tp_as_sequence*/
1853 0, /*tp_as_mapping*/
1854 0, /*tp_hash*/
1855 0, /*tp_call*/
1856 0, /*tp_str*/
1857 PyObject_GenericGetAttr,/*tp_getattro*/
1858 PyObject_GenericSetAttr,/*tp_setattro*/
1859 0, /*tp_as_buffer*/
1860 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1861 BZ2Comp__doc__, /*tp_doc*/
1862 0, /*tp_traverse*/
1863 0, /*tp_clear*/
1864 0, /*tp_richcompare*/
1865 0, /*tp_weaklistoffset*/
1866 0, /*tp_iter*/
1867 0, /*tp_iternext*/
1868 BZ2Comp_methods, /*tp_methods*/
1869 0, /*tp_members*/
1870 0, /*tp_getset*/
1871 0, /*tp_base*/
1872 0, /*tp_dict*/
1873 0, /*tp_descr_get*/
1874 0, /*tp_descr_set*/
1875 0, /*tp_dictoffset*/
1876 (initproc)BZ2Comp_init, /*tp_init*/
1877 PyType_GenericAlloc, /*tp_alloc*/
1878 PyType_GenericNew, /*tp_new*/
1879 _PyObject_Del, /*tp_free*/
1880 0, /*tp_is_gc*/
1881 };
1882
1883
1884 /* ===================================================================== */
1885 /* Members of BZ2Decomp. */
1886
1887 #undef OFF
1888 #define OFF(x) offsetof(BZ2DecompObject, x)
1889
1890 static PyMemberDef BZ2Decomp_members[] = {
1891 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1892 {NULL} /* Sentinel */
1893 };
1894
1895
1896 /* ===================================================================== */
1897 /* Methods of BZ2Decomp. */
1898
1899 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1900 "decompress(data) -> string\n\
1901 \n\
1902 Provide more data to the decompressor object. It will return chunks\n\
1903 of decompressed data whenever possible. If you try to decompress data\n\
1904 after the end of stream is found, EOFError will be raised. If any data\n\
1905 was found after the end of stream, it'll be ignored and saved in\n\
1906 unused_data attribute.\n\
1907 ");
1908
1909 static PyObject *
1910 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1911 {
1912 Py_buffer pdata;
1913 char *data;
1914 int datasize;
1915 int bufsize = SMALLCHUNK;
1916 PY_LONG_LONG totalout;
1917 PyObject *ret = NULL;
1918 bz_stream *bzs = &self->bzs;
1919 int bzerror;
1920
1921 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1922 return NULL;
1923 data = pdata.buf;
1924 datasize = pdata.len;
1925
1926 ACQUIRE_LOCK(self);
1927 if (!self->running) {
1928 PyErr_SetString(PyExc_EOFError, "end of stream was "
1929 "already found");
1930 goto error;
1931 }
1932
1933 ret = PyString_FromStringAndSize(NULL, bufsize);
1934 if (!ret)
1935 goto error;
1936
1937 bzs->next_in = data;
1938 bzs->avail_in = datasize;
1939 bzs->next_out = BUF(ret);
1940 bzs->avail_out = bufsize;
1941
1942 totalout = BZS_TOTAL_OUT(bzs);
1943
1944 for (;;) {
1945 Py_BEGIN_ALLOW_THREADS
1946 bzerror = BZ2_bzDecompress(bzs);
1947 Py_END_ALLOW_THREADS
1948 if (bzerror == BZ_STREAM_END) {
1949 if (bzs->avail_in != 0) {
1950 Py_DECREF(self->unused_data);
1951 self->unused_data =
1952 PyString_FromStringAndSize(bzs->next_in,
1953 bzs->avail_in);
1954 }
1955 self->running = 0;
1956 break;
1957 }
1958 if (bzerror != BZ_OK) {
1959 Util_CatchBZ2Error(bzerror);
1960 goto error;
1961 }
1962 if (bzs->avail_in == 0)
1963 break; /* no more input data */
1964 if (bzs->avail_out == 0) {
1965 bufsize = Util_NewBufferSize(bufsize);
1966 if (_PyString_Resize(&ret, bufsize) < 0) {
1967 BZ2_bzDecompressEnd(bzs);
1968 goto error;
1969 }
1970 bzs->next_out = BUF(ret);
1971 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1972 - totalout);
1973 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1974 }
1975 }
1976
1977 if (bzs->avail_out != 0)
1978 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1979
1980 RELEASE_LOCK(self);
1981 PyBuffer_Release(&pdata);
1982 return ret;
1983
1984 error:
1985 RELEASE_LOCK(self);
1986 PyBuffer_Release(&pdata);
1987 Py_XDECREF(ret);
1988 return NULL;
1989 }
1990
1991 static PyMethodDef BZ2Decomp_methods[] = {
1992 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1993 {NULL, NULL} /* sentinel */
1994 };
1995
1996
1997 /* ===================================================================== */
1998 /* Slot definitions for BZ2Decomp_Type. */
1999
2000 static int
2001 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2002 {
2003 int bzerror;
2004
2005 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2006 return -1;
2007
2008 #ifdef WITH_THREAD
2009 self->lock = PyThread_allocate_lock();
2010 if (!self->lock) {
2011 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2012 goto error;
2013 }
2014 #endif
2015
2016 self->unused_data = PyString_FromString("");
2017 if (!self->unused_data)
2018 goto error;
2019
2020 memset(&self->bzs, 0, sizeof(bz_stream));
2021 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2022 if (bzerror != BZ_OK) {
2023 Util_CatchBZ2Error(bzerror);
2024 goto error;
2025 }
2026
2027 self->running = 1;
2028
2029 return 0;
2030
2031 error:
2032 #ifdef WITH_THREAD
2033 if (self->lock) {
2034 PyThread_free_lock(self->lock);
2035 self->lock = NULL;
2036 }
2037 #endif
2038 Py_CLEAR(self->unused_data);
2039 return -1;
2040 }
2041
2042 static void
2043 BZ2Decomp_dealloc(BZ2DecompObject *self)
2044 {
2045 #ifdef WITH_THREAD
2046 if (self->lock)
2047 PyThread_free_lock(self->lock);
2048 #endif
2049 Py_XDECREF(self->unused_data);
2050 BZ2_bzDecompressEnd(&self->bzs);
2051 Py_TYPE(self)->tp_free((PyObject *)self);
2052 }
2053
2054
2055 /* ===================================================================== */
2056 /* BZ2Decomp_Type definition. */
2057
2058 PyDoc_STRVAR(BZ2Decomp__doc__,
2059 "BZ2Decompressor() -> decompressor object\n\
2060 \n\
2061 Create a new decompressor object. This object may be used to decompress\n\
2062 data sequentially. If you want to decompress data in one shot, use the\n\
2063 decompress() function instead.\n\
2064 ");
2065
2066 static PyTypeObject BZ2Decomp_Type = {
2067 PyVarObject_HEAD_INIT(NULL, 0)
2068 "bz2.BZ2Decompressor", /*tp_name*/
2069 sizeof(BZ2DecompObject), /*tp_basicsize*/
2070 0, /*tp_itemsize*/
2071 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2072 0, /*tp_print*/
2073 0, /*tp_getattr*/
2074 0, /*tp_setattr*/
2075 0, /*tp_compare*/
2076 0, /*tp_repr*/
2077 0, /*tp_as_number*/
2078 0, /*tp_as_sequence*/
2079 0, /*tp_as_mapping*/
2080 0, /*tp_hash*/
2081 0, /*tp_call*/
2082 0, /*tp_str*/
2083 PyObject_GenericGetAttr,/*tp_getattro*/
2084 PyObject_GenericSetAttr,/*tp_setattro*/
2085 0, /*tp_as_buffer*/
2086 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2087 BZ2Decomp__doc__, /*tp_doc*/
2088 0, /*tp_traverse*/
2089 0, /*tp_clear*/
2090 0, /*tp_richcompare*/
2091 0, /*tp_weaklistoffset*/
2092 0, /*tp_iter*/
2093 0, /*tp_iternext*/
2094 BZ2Decomp_methods, /*tp_methods*/
2095 BZ2Decomp_members, /*tp_members*/
2096 0, /*tp_getset*/
2097 0, /*tp_base*/
2098 0, /*tp_dict*/
2099 0, /*tp_descr_get*/
2100 0, /*tp_descr_set*/
2101 0, /*tp_dictoffset*/
2102 (initproc)BZ2Decomp_init, /*tp_init*/
2103 PyType_GenericAlloc, /*tp_alloc*/
2104 PyType_GenericNew, /*tp_new*/
2105 _PyObject_Del, /*tp_free*/
2106 0, /*tp_is_gc*/
2107 };
2108
2109
2110 /* ===================================================================== */
2111 /* Module functions. */
2112
2113 PyDoc_STRVAR(bz2_compress__doc__,
2114 "compress(data [, compresslevel=9]) -> string\n\
2115 \n\
2116 Compress data in one shot. If you want to compress data sequentially,\n\
2117 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2118 given, must be a number between 1 and 9.\n\
2119 ");
2120
2121 static PyObject *
2122 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2123 {
2124 int compresslevel=9;
2125 Py_buffer pdata;
2126 char *data;
2127 int datasize;
2128 int bufsize;
2129 PyObject *ret = NULL;
2130 bz_stream _bzs;
2131 bz_stream *bzs = &_bzs;
2132 int bzerror;
2133 static char *kwlist[] = {"data", "compresslevel", 0};
2134
2135 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2136 kwlist, &pdata,
2137 &compresslevel))
2138 return NULL;
2139 data = pdata.buf;
2140 datasize = pdata.len;
2141
2142 if (compresslevel < 1 || compresslevel > 9) {
2143 PyErr_SetString(PyExc_ValueError,
2144 "compresslevel must be between 1 and 9");
2145 PyBuffer_Release(&pdata);
2146 return NULL;
2147 }
2148
2149 /* Conforming to bz2 manual, this is large enough to fit compressed
2150 * data in one shot. We will check it later anyway. */
2151 bufsize = datasize + (datasize/100+1) + 600;
2152
2153 ret = PyString_FromStringAndSize(NULL, bufsize);
2154 if (!ret) {
2155 PyBuffer_Release(&pdata);
2156 return NULL;
2157 }
2158
2159 memset(bzs, 0, sizeof(bz_stream));
2160
2161 bzs->next_in = data;
2162 bzs->avail_in = datasize;
2163 bzs->next_out = BUF(ret);
2164 bzs->avail_out = bufsize;
2165
2166 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2167 if (bzerror != BZ_OK) {
2168 Util_CatchBZ2Error(bzerror);
2169 PyBuffer_Release(&pdata);
2170 Py_DECREF(ret);
2171 return NULL;
2172 }
2173
2174 for (;;) {
2175 Py_BEGIN_ALLOW_THREADS
2176 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2177 Py_END_ALLOW_THREADS
2178 if (bzerror == BZ_STREAM_END) {
2179 break;
2180 } else if (bzerror != BZ_FINISH_OK) {
2181 BZ2_bzCompressEnd(bzs);
2182 Util_CatchBZ2Error(bzerror);
2183 PyBuffer_Release(&pdata);
2184 Py_DECREF(ret);
2185 return NULL;
2186 }
2187 if (bzs->avail_out == 0) {
2188 bufsize = Util_NewBufferSize(bufsize);
2189 if (_PyString_Resize(&ret, bufsize) < 0) {
2190 BZ2_bzCompressEnd(bzs);
2191 PyBuffer_Release(&pdata);
2192 Py_DECREF(ret);
2193 return NULL;
2194 }
2195 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2196 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2197 }
2198 }
2199
2200 if (bzs->avail_out != 0)
2201 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2202 BZ2_bzCompressEnd(bzs);
2203
2204 PyBuffer_Release(&pdata);
2205 return ret;
2206 }
2207
2208 PyDoc_STRVAR(bz2_decompress__doc__,
2209 "decompress(data) -> decompressed data\n\
2210 \n\
2211 Decompress data in one shot. If you want to decompress data sequentially,\n\
2212 use an instance of BZ2Decompressor instead.\n\
2213 ");
2214
2215 static PyObject *
2216 bz2_decompress(PyObject *self, PyObject *args)
2217 {
2218 Py_buffer pdata;
2219 char *data;
2220 int datasize;
2221 int bufsize = SMALLCHUNK;
2222 PyObject *ret;
2223 bz_stream _bzs;
2224 bz_stream *bzs = &_bzs;
2225 int bzerror;
2226
2227 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2228 return NULL;
2229 data = pdata.buf;
2230 datasize = pdata.len;
2231
2232 if (datasize == 0) {
2233 PyBuffer_Release(&pdata);
2234 return PyString_FromString("");
2235 }
2236
2237 ret = PyString_FromStringAndSize(NULL, bufsize);
2238 if (!ret) {
2239 PyBuffer_Release(&pdata);
2240 return NULL;
2241 }
2242
2243 memset(bzs, 0, sizeof(bz_stream));
2244
2245 bzs->next_in = data;
2246 bzs->avail_in = datasize;
2247 bzs->next_out = BUF(ret);
2248 bzs->avail_out = bufsize;
2249
2250 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2251 if (bzerror != BZ_OK) {
2252 Util_CatchBZ2Error(bzerror);
2253 Py_DECREF(ret);
2254 PyBuffer_Release(&pdata);
2255 return NULL;
2256 }
2257
2258 for (;;) {
2259 Py_BEGIN_ALLOW_THREADS
2260 bzerror = BZ2_bzDecompress(bzs);
2261 Py_END_ALLOW_THREADS
2262 if (bzerror == BZ_STREAM_END) {
2263 break;
2264 } else if (bzerror != BZ_OK) {
2265 BZ2_bzDecompressEnd(bzs);
2266 Util_CatchBZ2Error(bzerror);
2267 PyBuffer_Release(&pdata);
2268 Py_DECREF(ret);
2269 return NULL;
2270 }
2271 if (bzs->avail_in == 0) {
2272 BZ2_bzDecompressEnd(bzs);
2273 PyErr_SetString(PyExc_ValueError,
2274 "couldn't find end of stream");
2275 PyBuffer_Release(&pdata);
2276 Py_DECREF(ret);
2277 return NULL;
2278 }
2279 if (bzs->avail_out == 0) {
2280 bufsize = Util_NewBufferSize(bufsize);
2281 if (_PyString_Resize(&ret, bufsize) < 0) {
2282 BZ2_bzDecompressEnd(bzs);
2283 PyBuffer_Release(&pdata);
2284 Py_DECREF(ret);
2285 return NULL;
2286 }
2287 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2288 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2289 }
2290 }
2291
2292 if (bzs->avail_out != 0)
2293 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2294 BZ2_bzDecompressEnd(bzs);
2295 PyBuffer_Release(&pdata);
2296
2297 return ret;
2298 }
2299
2300 static PyMethodDef bz2_methods[] = {
2301 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2302 bz2_compress__doc__},
2303 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2304 bz2_decompress__doc__},
2305 {NULL, NULL} /* sentinel */
2306 };
2307
2308 /* ===================================================================== */
2309 /* Initialization function. */
2310
2311 PyDoc_STRVAR(bz2__doc__,
2312 "The python bz2 module provides a comprehensive interface for\n\
2313 the bz2 compression library. It implements a complete file\n\
2314 interface, one shot (de)compression functions, and types for\n\
2315 sequential (de)compression.\n\
2316 ");
2317
2318 PyMODINIT_FUNC
2319 initbz2(void)
2320 {
2321 PyObject *m;
2322
2323 if (PyType_Ready(&BZ2File_Type) < 0)
2324 return;
2325 if (PyType_Ready(&BZ2Comp_Type) < 0)
2326 return;
2327 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2328 return;
2329
2330 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2331 if (m == NULL)
2332 return;
2333
2334 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2335
2336 Py_INCREF(&BZ2File_Type);
2337 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2338
2339 Py_INCREF(&BZ2Comp_Type);
2340 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2341
2342 Py_INCREF(&BZ2Decomp_Type);
2343 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2344 }